• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * Copyright 2007-2008 VMware, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * @file
31  * TGSI to LLVM IR translation -- SoA.
32  *
33  * @author Jose Fonseca <jfonseca@vmware.com>
34  *
35  * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36  * Brian Paul, and others.
37  */
38 
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_misc.h"
61 #include "lp_bld_swizzle.h"
62 #include "lp_bld_flow.h"
63 #include "lp_bld_coro.h"
64 #include "lp_bld_quad.h"
65 #include "lp_bld_tgsi.h"
66 #include "lp_bld_limits.h"
67 #include "lp_bld_debug.h"
68 #include "lp_bld_printf.h"
69 #include "lp_bld_sample.h"
70 #include "lp_bld_struct.h"
71 
72 #define DUMP_GS_EMITS 0
73 
74 /*
75  * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
76  * instruction.
77  *
78  * TODO:
79  * - take execution masks in consideration
80  * - debug control-flow instructions
81  */
82 #define DEBUG_EXECUTION 0
83 
84 
85 /*
86  * Emit code to print a register value.
87  */
88 static void
emit_dump_reg(struct gallivm_state * gallivm,unsigned file,unsigned index,unsigned chan,LLVMValueRef value)89 emit_dump_reg(struct gallivm_state *gallivm,
90               unsigned file,
91               unsigned index,
92               unsigned chan,
93               LLVMValueRef value)
94 {
95    char buf[32];
96 
97    snprintf(buf, sizeof buf, "    %s[%u].%c = ",
98             tgsi_file_name(file),
99             index, "xyzw"[chan]);
100 
101    lp_build_print_value(gallivm, buf, value);
102 }
103 
104 static inline struct function_ctx *
func_ctx(struct lp_exec_mask * mask)105 func_ctx(struct lp_exec_mask *mask)
106 {
107    assert(mask->function_stack_size > 0);
108    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
109    return &mask->function_stack[mask->function_stack_size - 1];
110 }
111 
112 /*
113  * combine the execution mask if there is one with the current mask.
114  */
115 static LLVMValueRef
mask_vec(struct lp_build_tgsi_context * bld_base)116 mask_vec(struct lp_build_tgsi_context *bld_base)
117 {
118    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
119    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
120    struct lp_exec_mask *exec_mask = &bld->exec_mask;
121    LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
122    if (!exec_mask->has_mask) {
123       return bld_mask;
124    }
125    if (!bld_mask)
126       return exec_mask->exec_mask;
127    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
128                        exec_mask->exec_mask, "");
129 }
130 
lp_exec_tgsi_break(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)131 static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
132                           struct lp_build_tgsi_context * bld_base)
133 {
134    enum tgsi_opcode opcode =
135       bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
136    bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
137                         opcode == TGSI_OPCODE_CASE);
138    lp_exec_break(mask, &bld_base->pc, break_always);
139 }
140 
lp_exec_switch(struct lp_exec_mask * mask,LLVMValueRef switchval)141 static void lp_exec_switch(struct lp_exec_mask *mask,
142                            LLVMValueRef switchval)
143 {
144    struct function_ctx *ctx = func_ctx(mask);
145 
146    if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
147        ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
148       ctx->switch_stack_size++;
149       return;
150    }
151 
152    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
153       ctx->break_type;
154    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
155 
156    ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
157    ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
158    ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
159    ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
160    ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
161    ctx->switch_stack_size++;
162 
163    mask->switch_mask = LLVMConstNull(mask->int_vec_type);
164    ctx->switch_val = switchval;
165    ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
166    ctx->switch_in_default = false;
167    ctx->switch_pc = 0;
168 
169    lp_exec_mask_update(mask);
170 }
171 
lp_exec_endswitch(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)172 static void lp_exec_endswitch(struct lp_exec_mask *mask,
173                               struct lp_build_tgsi_context * bld_base)
174 {
175    LLVMBuilderRef builder = mask->bld->gallivm->builder;
176    struct function_ctx *ctx = func_ctx(mask);
177 
178    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
179       ctx->switch_stack_size--;
180       return;
181    }
182 
183    /* check if there's deferred default if so do it now */
184    if (ctx->switch_pc && !ctx->switch_in_default) {
185       LLVMValueRef prevmask, defaultmask;
186       unsigned tmp_pc;
187       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
188       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
189       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
190       ctx->switch_in_default = true;
191 
192       lp_exec_mask_update(mask);
193 
194       assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
195              TGSI_OPCODE_DEFAULT);
196 
197       tmp_pc = bld_base->pc;
198       bld_base->pc = ctx->switch_pc;
199       /*
200        * re-purpose switch_pc to point to here again, since we stop execution of
201        * the deferred default after next break.
202        */
203       ctx->switch_pc = tmp_pc - 1;
204 
205       return;
206    }
207 
208    else if (ctx->switch_pc && ctx->switch_in_default) {
209       assert(bld_base->pc == ctx->switch_pc + 1);
210    }
211 
212    ctx->switch_stack_size--;
213    mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
214    ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
215    ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
216    ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
217    ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
218 
219    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
220 
221    lp_exec_mask_update(mask);
222 }
223 
lp_exec_case(struct lp_exec_mask * mask,LLVMValueRef caseval)224 static void lp_exec_case(struct lp_exec_mask *mask,
225                          LLVMValueRef caseval)
226 {
227    LLVMBuilderRef builder = mask->bld->gallivm->builder;
228    struct function_ctx *ctx = func_ctx(mask);
229 
230    LLVMValueRef casemask, prevmask;
231 
232    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
233       return;
234    }
235 
236    /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
237    if (!ctx->switch_in_default) {
238       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
239       casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
240       ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
241                                              ctx->switch_mask_default, "sw_default_mask");
242       casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
243       mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
244 
245       lp_exec_mask_update(mask);
246    }
247 }
248 
249 /*
250  * Analyse default statement in a switch.
251  * \return true if default is last statement, false otherwise
252  * \param default_pc_start contains pc of instruction to jump to
253  *                         if default wasn't last but there's no
254  *                         fallthrough into default.
255  */
default_analyse_is_last(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base,int * default_pc_start)256 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
257                                        struct lp_build_tgsi_context * bld_base,
258                                        int *default_pc_start)
259 {
260    unsigned pc = bld_base->pc;
261    struct function_ctx *ctx = func_ctx(mask);
262    int curr_switch_stack = ctx->switch_stack_size;
263 
264    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
265       return false;
266    }
267 
268    /* skip over case statements which are together with default */
269    while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
270       pc++;
271    }
272 
273    while (pc != ~0u && pc < bld_base->num_instructions) {
274       enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
275       switch (opcode) {
276       case TGSI_OPCODE_CASE:
277          if (curr_switch_stack == ctx->switch_stack_size) {
278             *default_pc_start = pc - 1;
279             return false;
280          }
281          break;
282       case TGSI_OPCODE_SWITCH:
283          curr_switch_stack++;
284          break;
285       case TGSI_OPCODE_ENDSWITCH:
286          if (curr_switch_stack == ctx->switch_stack_size) {
287             *default_pc_start = pc - 1;
288             return true;
289          }
290          curr_switch_stack--;
291          break;
292       default:
293          ; /* nothing */
294       }
295       pc++;
296    }
297    /* should never arrive here */
298    assert(0);
299    return true;
300 }
301 
lp_exec_default(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)302 static void lp_exec_default(struct lp_exec_mask *mask,
303                             struct lp_build_tgsi_context * bld_base)
304 {
305    LLVMBuilderRef builder = mask->bld->gallivm->builder;
306    struct function_ctx *ctx = func_ctx(mask);
307 
308    int default_exec_pc = 0;
309    boolean default_is_last;
310 
311    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
312       return;
313    }
314 
315    /*
316     * This is a messy opcode, because it may not be always at the end and
317     * there can be fallthrough in and out of it.
318     */
319 
320    default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
321    /*
322     * If it is last statement in switch (note that case statements appearing
323     * "at the same time" as default don't change that) everything is just fine,
324     * update switch mask and go on. This means we can handle default with
325     * fallthrough INTO it without overhead, if it is last.
326     */
327    if (default_is_last) {
328       LLVMValueRef prevmask, defaultmask;
329       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
330       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
331       defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
332       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
333       ctx->switch_in_default = true;
334 
335       lp_exec_mask_update(mask);
336    }
337    else {
338       /*
339        * Technically, "case" immediately before default isn't really a
340        * fallthrough, however we still have to count them as such as we
341        * already have updated the masks.
342        * If that happens in practice could add a switch optimizer pass
343        * which just gets rid of all case statements appearing together with
344        * default (or could do switch analysis at switch start time instead).
345        */
346       enum tgsi_opcode opcode =
347          bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
348       boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
349                          opcode != TGSI_OPCODE_SWITCH);
350       /*
351        * If it is not last statement and there was no fallthrough into it,
352        * we record the PC and continue execution at next case (again, those
353        * case encountered at the same time don't count). At endswitch
354        * time, we update switchmask, and go back executing the code we skipped
355        * until the next break (possibly re-executing some code with changed mask
356        * if there was a fallthrough out of default).
357        * Finally, if it is not last statement and there was a fallthrough into it,
358        * do the same as with the former case, except instead of skipping the code
359        * just execute it without updating the mask, then go back and re-execute.
360        */
361       ctx->switch_pc = bld_base->pc;
362       if (!ft_into) {
363          bld_base->pc = default_exec_pc;
364       }
365    }
366 }
367 
368 
lp_exec_mask_call(struct lp_exec_mask * mask,int func,int * pc)369 static void lp_exec_mask_call(struct lp_exec_mask *mask,
370                               int func,
371                               int *pc)
372 {
373    if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
374       return;
375    }
376 
377    lp_exec_mask_function_init(mask, mask->function_stack_size);
378    mask->function_stack[mask->function_stack_size].pc = *pc;
379    mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
380    mask->function_stack_size++;
381    *pc = func;
382 }
383 
lp_exec_mask_ret(struct lp_exec_mask * mask,int * pc)384 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385 {
386    LLVMBuilderRef builder = mask->bld->gallivm->builder;
387    struct function_ctx *ctx = func_ctx(mask);
388    LLVMValueRef exec_mask;
389 
390    if (ctx->cond_stack_size == 0 &&
391        ctx->loop_stack_size == 0 &&
392        ctx->switch_stack_size == 0 &&
393        mask->function_stack_size == 1) {
394       /* returning from main() */
395       *pc = -1;
396       return;
397    }
398 
399    if (mask->function_stack_size == 1) {
400       /*
401        * This requires special handling since we need to ensure
402        * we don't drop the mask even if we have no call stack
403        * (e.g. after a ret in a if clause after the endif)
404        */
405       mask->ret_in_main = TRUE;
406    }
407 
408    exec_mask = LLVMBuildNot(builder,
409                             mask->exec_mask,
410                             "ret");
411 
412    mask->ret_mask = LLVMBuildAnd(builder,
413                                  mask->ret_mask,
414                                  exec_mask, "ret_full");
415 
416    lp_exec_mask_update(mask);
417 }
418 
lp_exec_mask_bgnsub(struct lp_exec_mask * mask)419 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420 {
421 }
422 
lp_exec_mask_endsub(struct lp_exec_mask * mask,int * pc)423 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424 {
425    struct function_ctx *ctx;
426 
427    assert(mask->function_stack_size > 1);
428    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
429 
430    ctx = func_ctx(mask);
431    mask->function_stack_size--;
432 
433    *pc = ctx->pc;
434    mask->ret_mask = ctx->ret_mask;
435 
436    lp_exec_mask_update(mask);
437 }
438 
439 
440 static LLVMValueRef
get_file_ptr(struct lp_build_tgsi_soa_context * bld,unsigned file,int index,unsigned chan)441 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
442              unsigned file,
443              int index,
444              unsigned chan)
445 {
446    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
447    LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
448    LLVMValueRef var_of_array;
449 
450    switch (file) {
451    case TGSI_FILE_TEMPORARY:
452       array_of_vars = bld->temps;
453       var_of_array = bld->temps_array;
454       break;
455    case TGSI_FILE_OUTPUT:
456       array_of_vars = bld->outputs;
457       var_of_array = bld->outputs_array;
458       break;
459    default:
460       assert(0);
461       return NULL;
462    }
463 
464    assert(chan < 4);
465 
466    if (bld->indirect_files & (1 << file)) {
467       LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
468       if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
469          LLVMValueRef gep[2];
470          gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
471          gep[1] = lindex;
472          return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
473       } else {
474          return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
475       }
476    }
477    else {
478       assert(index <= bld->bld_base.info->file_max[file]);
479       return array_of_vars[index][chan];
480    }
481 }
482 
483 
484 /**
485  * Return pointer to a temporary register channel (src or dest).
486  * Note that indirect addressing cannot be handled here.
487  * \param index  which temporary register
488  * \param chan  which channel of the temp register.
489  */
490 LLVMValueRef
lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)491 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
492              unsigned index,
493              unsigned chan)
494 {
495    return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
496 }
497 
498 /**
499  * Return pointer to a output register channel (src or dest).
500  * Note that indirect addressing cannot be handled here.
501  * \param index  which output register
502  * \param chan  which channel of the output register.
503  */
504 LLVMValueRef
lp_get_output_ptr(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)505 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
506                unsigned index,
507                unsigned chan)
508 {
509    return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
510 }
511 
512 /*
513  * If we have indirect addressing in outputs copy our alloca array
514  * to the outputs slots specified by the caller to make sure
515  * our outputs are delivered consistently via the same interface.
516  */
517 static void
gather_outputs(struct lp_build_tgsi_soa_context * bld)518 gather_outputs(struct lp_build_tgsi_soa_context * bld)
519 {
520    if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
521       unsigned index, chan;
522       assert(bld->bld_base.info->num_outputs <=
523              bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
524       for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
525          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
526             bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
527          }
528       }
529    }
530 }
531 
532 /**
533  * Gather vector.
534  * XXX the lp_build_gather() function should be capable of doing this
535  * with a little work.
536  */
537 static LLVMValueRef
build_gather(struct lp_build_tgsi_context * bld_base,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)538 build_gather(struct lp_build_tgsi_context *bld_base,
539              LLVMValueRef base_ptr,
540              LLVMValueRef indexes,
541              LLVMValueRef overflow_mask,
542              LLVMValueRef indexes2)
543 {
544    struct gallivm_state *gallivm = bld_base->base.gallivm;
545    LLVMBuilderRef builder = gallivm->builder;
546    struct lp_build_context *uint_bld = &bld_base->uint_bld;
547    struct lp_build_context *bld = &bld_base->base;
548    LLVMValueRef res;
549    unsigned i;
550 
551    if (indexes2)
552       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
553    else
554       res = bld->undef;
555    /*
556     * overflow_mask is a vector telling us which channels
557     * in the vector overflowed. We use the overflow behavior for
558     * constant buffers which is defined as:
559     * Out of bounds access to constant buffer returns 0 in all
560     * components. Out of bounds behavior is always with respect
561     * to the size of the buffer bound at that slot.
562     */
563 
564    if (overflow_mask) {
565       /*
566        * We avoid per-element control flow here (also due to llvm going crazy,
567        * though I suspect it's better anyway since overflow is likely rare).
568        * Note that since we still fetch from buffers even if num_elements was
569        * zero (in this case we'll fetch from index zero) the jit func callers
570        * MUST provide valid fake constant buffers of size 4x32 (the values do
571        * not matter), otherwise we'd still need (not per element though)
572        * control flow.
573        */
574       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
575       if (indexes2)
576          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
577    }
578 
579    /*
580     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
581     */
582    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
583       LLVMValueRef si, di;
584       LLVMValueRef index;
585       LLVMValueRef scalar_ptr, scalar;
586 
587       di = lp_build_const_int32(bld->gallivm, i);
588       if (indexes2)
589          si = lp_build_const_int32(bld->gallivm, i >> 1);
590       else
591          si = di;
592 
593       if (indexes2 && (i & 1)) {
594          index = LLVMBuildExtractElement(builder,
595                                          indexes2, si, "");
596       } else {
597          index = LLVMBuildExtractElement(builder,
598                                          indexes, si, "");
599       }
600       scalar_ptr = LLVMBuildGEP(builder, base_ptr,
601                                 &index, 1, "gather_ptr");
602       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
603 
604       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
605    }
606 
607    if (overflow_mask) {
608       if (indexes2) {
609          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
610          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
611                                        bld_base->dbl_bld.int_vec_type, "");
612          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
613                                bld_base->dbl_bld.zero, res);
614       } else
615          res = lp_build_select(bld, overflow_mask, bld->zero, res);
616    }
617 
618    return res;
619 }
620 
621 
622 /**
623  * Scatter/store vector.
624  */
625 static void
emit_mask_scatter(struct lp_build_tgsi_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)626 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
627                   LLVMValueRef base_ptr,
628                   LLVMValueRef indexes,
629                   LLVMValueRef values,
630                   struct lp_exec_mask *mask)
631 {
632    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
633    LLVMBuilderRef builder = gallivm->builder;
634    unsigned i;
635    LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
636 
637    /*
638     * Loop over elements of index_vec, store scalar value.
639     */
640    for (i = 0; i < bld->bld_base.base.type.length; i++) {
641       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
642       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
643       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
644       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
645       LLVMValueRef scalar_pred = pred ?
646          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
647 
648       if (0)
649          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
650                          ii, val, index, scalar_ptr);
651 
652       if (scalar_pred) {
653          LLVMValueRef real_val, dst_val;
654          dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
655          real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
656          LLVMBuildStore(builder, real_val, scalar_ptr);
657       }
658       else {
659          LLVMBuildStore(builder, val, scalar_ptr);
660       }
661    }
662 }
663 
664 
665 /**
666  * Read the current value of the ADDR register, convert the floats to
667  * ints, add the base index and return the vector of offsets.
668  * The offsets will be used to index into the constant buffer or
669  * temporary register file.
670  */
671 static LLVMValueRef
get_indirect_index(struct lp_build_tgsi_soa_context * bld,unsigned reg_file,unsigned reg_index,const struct tgsi_ind_register * indirect_reg,int index_limit)672 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
673                    unsigned reg_file, unsigned reg_index,
674                    const struct tgsi_ind_register *indirect_reg,
675                    int index_limit)
676 {
677    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
678    struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
679    /* always use X component of address register */
680    unsigned swizzle = indirect_reg->Swizzle;
681    LLVMValueRef base;
682    LLVMValueRef rel;
683    LLVMValueRef max_index;
684    LLVMValueRef index;
685 
686    assert(bld->indirect_files & (1 << reg_file));
687 
688    base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
689 
690    assert(swizzle < 4);
691    switch (indirect_reg->File) {
692    case TGSI_FILE_ADDRESS:
693       rel = LLVMBuildLoad(builder,
694                           bld->addr[indirect_reg->Index][swizzle],
695                           "load addr reg");
696       /* ADDR LLVM values already have LLVM integer type. */
697       break;
698    case TGSI_FILE_TEMPORARY:
699       rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
700       rel = LLVMBuildLoad(builder, rel, "load temp reg");
701       /* TEMP LLVM values always have LLVM float type, but for indirection, the
702        * value actually stored is expected to be an integer */
703       rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
704       break;
705    default:
706       assert(0);
707       rel = uint_bld->zero;
708    }
709 
710    index = lp_build_add(uint_bld, base, rel);
711 
712    /*
713     * emit_fetch_constant handles constant buffer overflow so this code
714     * is pointless for them.
715     * Furthermore the D3D10 spec in section 6.5 says:
716     * If the constant buffer bound to a slot is larger than the size
717     * declared in the shader for that slot, implementations are allowed
718     * to return incorrect data (not necessarily 0) for indices that are
719     * larger than the declared size but smaller than the buffer size.
720     */
721    if (reg_file != TGSI_FILE_CONSTANT) {
722       assert(index_limit >= 0);
723       max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
724                                          uint_bld->type, index_limit);
725 
726       assert(!uint_bld->type.sign);
727       index = lp_build_min(uint_bld, index, max_index);
728    }
729 
730    return index;
731 }
732 
733 static struct lp_build_context *
stype_to_fetch(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype)734 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
735 	       enum tgsi_opcode_type stype)
736 {
737    struct lp_build_context *bld_fetch;
738 
739    switch (stype) {
740    case TGSI_TYPE_FLOAT:
741    case TGSI_TYPE_UNTYPED:
742       bld_fetch = &bld_base->base;
743       break;
744    case TGSI_TYPE_UNSIGNED:
745       bld_fetch = &bld_base->uint_bld;
746       break;
747    case TGSI_TYPE_SIGNED:
748       bld_fetch = &bld_base->int_bld;
749       break;
750    case TGSI_TYPE_DOUBLE:
751       bld_fetch = &bld_base->dbl_bld;
752       break;
753    case TGSI_TYPE_UNSIGNED64:
754       bld_fetch = &bld_base->uint64_bld;
755       break;
756    case TGSI_TYPE_SIGNED64:
757       bld_fetch = &bld_base->int64_bld;
758       break;
759    case TGSI_TYPE_VOID:
760    default:
761       assert(0);
762       bld_fetch = NULL;
763       break;
764    }
765    return bld_fetch;
766 }
767 
768 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,unsigned chan_index,boolean need_perelement_offset)769 get_soa_array_offsets(struct lp_build_context *uint_bld,
770                       LLVMValueRef indirect_index,
771                       unsigned chan_index,
772                       boolean need_perelement_offset)
773 {
774    struct gallivm_state *gallivm = uint_bld->gallivm;
775    LLVMValueRef chan_vec =
776       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
777    LLVMValueRef length_vec =
778       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
779    LLVMValueRef index_vec;
780 
781    /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
782    index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
783    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
784    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
785 
786    if (need_perelement_offset) {
787       LLVMValueRef pixel_offsets;
788       unsigned i;
789      /* build pixel offset vector: {0, 1, 2, 3, ...} */
790       pixel_offsets = uint_bld->undef;
791       for (i = 0; i < uint_bld->type.length; i++) {
792          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
793          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
794                                                 ii, ii, "");
795       }
796       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
797    }
798    return index_vec;
799 }
800 
801 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)802 emit_fetch_constant(
803    struct lp_build_tgsi_context * bld_base,
804    const struct tgsi_full_src_register * reg,
805    enum tgsi_opcode_type stype,
806    unsigned swizzle_in)
807 {
808    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
809    struct gallivm_state *gallivm = bld_base->base.gallivm;
810    LLVMBuilderRef builder = gallivm->builder;
811    struct lp_build_context *uint_bld = &bld_base->uint_bld;
812    unsigned dimension = 0;
813    LLVMValueRef consts_ptr;
814    LLVMValueRef num_consts;
815    LLVMValueRef res;
816    unsigned swizzle = swizzle_in & 0xffff;
817 
818    /* XXX: Handle fetching xyzw components as a vector */
819    assert(swizzle != ~0u);
820 
821    if (reg->Register.Dimension) {
822       assert(!reg->Dimension.Indirect);
823       dimension = reg->Dimension.Index;
824       assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
825    }
826 
827    consts_ptr = bld->consts[dimension];
828    num_consts = bld->consts_sizes[dimension];
829 
830    if (reg->Register.Indirect) {
831       LLVMValueRef indirect_index;
832       LLVMValueRef swizzle_vec =
833          lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
834       LLVMValueRef index_vec;  /* index into the const buffer */
835       LLVMValueRef overflow_mask;
836       LLVMValueRef index_vec2 = NULL;
837 
838       indirect_index = get_indirect_index(bld,
839                                           reg->Register.File,
840                                           reg->Register.Index,
841                                           &reg->Indirect,
842                                           bld->bld_base.info->file_max[reg->Register.File]);
843 
844       /* All fetches are from the same constant buffer, so
845        * we need to propagate the size to a vector to do a
846        * vector comparison */
847       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
848       /* Construct a boolean vector telling us which channels
849        * overflow the bound constant buffer */
850       overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
851                                        indirect_index, num_consts);
852 
853       /* index_vec = indirect_index * 4 + swizzle */
854       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
855       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
856 
857       if (tgsi_type_is_64bit(stype)) {
858          LLVMValueRef swizzle_vec2;
859          swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
860          index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
861          index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
862       }
863       /* Gather values from the constant buffer */
864       res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
865    }
866    else {
867       LLVMValueRef index;  /* index into the const buffer */
868       LLVMValueRef scalar, scalar_ptr;
869       struct lp_build_context *bld_broad = &bld_base->base;
870       index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
871 
872       scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
873                                 &index, 1, "");
874 
875       if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
876 
877          LLVMValueRef scalar2, scalar2_ptr;
878          LLVMValueRef shuffles[2];
879          index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
880 
881          scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
882                                     &index, 1, "");
883 
884          scalar = LLVMBuildLoad(builder, scalar_ptr, "");
885          scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
886          shuffles[0] = lp_build_const_int32(gallivm, 0);
887          shuffles[1] = lp_build_const_int32(gallivm, 1);
888 
889          res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
890          res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
891          res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
892       } else {
893         if (stype == TGSI_TYPE_DOUBLE) {
894            LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
895            scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
896            bld_broad = &bld_base->dbl_bld;
897         } else if (stype == TGSI_TYPE_UNSIGNED64) {
898            LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
899            scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
900            bld_broad = &bld_base->uint64_bld;
901         } else if (stype == TGSI_TYPE_SIGNED64) {
902            LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
903            scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
904            bld_broad = &bld_base->int64_bld;
905         }
906         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
907         res = lp_build_broadcast_scalar(bld_broad, scalar);
908       }
909 
910    }
911 
912    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
913       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
914       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
915    }
916 
917    return res;
918 }
919 
920 /**
921  * Fetch 64-bit values from two separate channels.
922  * 64-bit values are stored split across two channels, like xy and zw.
923  * This function creates a set of vec_length*2 floats,
924  * extracts the values from the two channels,
925  * puts them in the correct place, then casts to vec_length 64-bits.
926  */
927 static LLVMValueRef
emit_fetch_64bit(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype,LLVMValueRef input,LLVMValueRef input2)928 emit_fetch_64bit(
929    struct lp_build_tgsi_context * bld_base,
930    enum tgsi_opcode_type stype,
931    LLVMValueRef input,
932    LLVMValueRef input2)
933 {
934    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
935    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
936    LLVMBuilderRef builder = gallivm->builder;
937    LLVMValueRef res;
938    struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
939    int i;
940    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
941    int len = bld_base->base.type.length * 2;
942    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
943 
944    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
945       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
946       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
947    }
948    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
949 
950    return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
951 }
952 
953 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)954 emit_fetch_immediate(
955    struct lp_build_tgsi_context * bld_base,
956    const struct tgsi_full_src_register * reg,
957    enum tgsi_opcode_type stype,
958    unsigned swizzle_in)
959 {
960    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
961    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
962    LLVMBuilderRef builder = gallivm->builder;
963    LLVMValueRef res = NULL;
964    unsigned swizzle = swizzle_in & 0xffff;
965 
966    if (bld->use_immediates_array || reg->Register.Indirect) {
967       LLVMValueRef imms_array;
968       LLVMTypeRef fptr_type;
969 
970       /* cast imms_array pointer to float* */
971       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
972       imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
973 
974       if (reg->Register.Indirect) {
975          LLVMValueRef indirect_index;
976          LLVMValueRef index_vec;  /* index into the immediate register array */
977          LLVMValueRef index_vec2 = NULL;
978          indirect_index = get_indirect_index(bld,
979                                              reg->Register.File,
980                                              reg->Register.Index,
981                                              &reg->Indirect,
982                                              bld->bld_base.info->file_max[reg->Register.File]);
983          /*
984           * Unlike for other reg classes, adding pixel offsets is unnecessary -
985           * immediates are stored as full vectors (FIXME??? - might be better
986           * to store them the same as constants) but all elements are the same
987           * in any case.
988           */
989          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
990                                            indirect_index,
991                                            swizzle,
992                                            FALSE);
993          if (tgsi_type_is_64bit(stype))
994             index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
995                                               indirect_index,
996                                               swizzle_in >> 16,
997                                               FALSE);
998          /* Gather values from the immediate register array */
999          res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1000       } else {
1001          LLVMValueRef gep[2];
1002          gep[0] = lp_build_const_int32(gallivm, 0);
1003          gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1004          LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1005                                               bld->imms_array, gep, 2, "");
1006          res = LLVMBuildLoad(builder, imms_ptr, "");
1007 
1008          if (tgsi_type_is_64bit(stype)) {
1009             LLVMValueRef imms_ptr2;
1010             LLVMValueRef res2;
1011             gep[1] = lp_build_const_int32(gallivm,
1012                                           reg->Register.Index * 4 + (swizzle_in >> 16));
1013             imms_ptr2 = LLVMBuildGEP(builder,
1014                                      bld->imms_array, gep, 2, "");
1015             res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1016             res = emit_fetch_64bit(bld_base, stype, res, res2);
1017          }
1018       }
1019    }
1020    else {
1021       res = bld->immediates[reg->Register.Index][swizzle];
1022       if (tgsi_type_is_64bit(stype))
1023          res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1024    }
1025 
1026    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1027       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1028       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1029    }
1030    return res;
1031 }
1032 
1033 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1034 emit_fetch_input(
1035    struct lp_build_tgsi_context * bld_base,
1036    const struct tgsi_full_src_register * reg,
1037    enum tgsi_opcode_type stype,
1038    unsigned swizzle_in)
1039 {
1040    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1041    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1042    LLVMBuilderRef builder = gallivm->builder;
1043    LLVMValueRef res;
1044    unsigned swizzle = swizzle_in & 0xffff;
1045 
1046    if (reg->Register.Indirect) {
1047       LLVMValueRef indirect_index;
1048       LLVMValueRef index_vec;  /* index into the input reg array */
1049       LLVMValueRef index_vec2 = NULL;
1050       LLVMValueRef inputs_array;
1051       LLVMTypeRef fptr_type;
1052 
1053       indirect_index = get_indirect_index(bld,
1054                                           reg->Register.File,
1055                                           reg->Register.Index,
1056                                           &reg->Indirect,
1057                                           bld->bld_base.info->file_max[reg->Register.File]);
1058 
1059       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1060                                         indirect_index,
1061                                         swizzle,
1062                                         TRUE);
1063       if (tgsi_type_is_64bit(stype)) {
1064          index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1065                                            indirect_index,
1066                                            swizzle_in >> 16,
1067                                            TRUE);
1068       }
1069       /* cast inputs_array pointer to float* */
1070       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1071       inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1072 
1073       /* Gather values from the input register array */
1074       res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1075    } else {
1076       if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1077          LLVMValueRef lindex = lp_build_const_int32(gallivm,
1078                                         reg->Register.Index * 4 + swizzle);
1079          LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1080                                                bld->inputs_array, &lindex, 1, "");
1081 
1082          res = LLVMBuildLoad(builder, input_ptr, "");
1083          if (tgsi_type_is_64bit(stype)) {
1084             LLVMValueRef lindex1;
1085             LLVMValueRef input_ptr2;
1086             LLVMValueRef res2;
1087 
1088             lindex1 = lp_build_const_int32(gallivm,
1089                                            reg->Register.Index * 4 + (swizzle_in >> 16));
1090             input_ptr2 = LLVMBuildGEP(builder,
1091                                       bld->inputs_array, &lindex1, 1, "");
1092             res2 = LLVMBuildLoad(builder, input_ptr2, "");
1093             res = emit_fetch_64bit(bld_base, stype, res, res2);
1094          }
1095       }
1096       else {
1097          res = bld->inputs[reg->Register.Index][swizzle];
1098          if (tgsi_type_is_64bit(stype))
1099             res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1100       }
1101    }
1102 
1103    assert(res);
1104 
1105    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1106       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1107       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1108    }
1109 
1110    return res;
1111 }
1112 
1113 
1114 static LLVMValueRef
emit_fetch_gs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1115 emit_fetch_gs_input(
1116    struct lp_build_tgsi_context * bld_base,
1117    const struct tgsi_full_src_register * reg,
1118    enum tgsi_opcode_type stype,
1119    unsigned swizzle_in)
1120 {
1121    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1122    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1123    const struct tgsi_shader_info *info = bld->bld_base.info;
1124    LLVMBuilderRef builder = gallivm->builder;
1125    LLVMValueRef attrib_index = NULL;
1126    LLVMValueRef vertex_index = NULL;
1127    unsigned swizzle = swizzle_in & 0xffff;
1128    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1129    LLVMValueRef res;
1130 
1131    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1132       /* This is really a system value not a regular input */
1133       assert(!reg->Register.Indirect);
1134       assert(!reg->Dimension.Indirect);
1135       res = bld->system_values.prim_id;
1136       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1137          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1138       }
1139       return res;
1140    }
1141 
1142    if (reg->Register.Indirect) {
1143       /*
1144        * XXX: this is possibly not quite the right value, since file_max may be
1145        * larger than the max attrib index, due to it being the max of declared
1146        * inputs AND the max vertices per prim (which is 6 for tri adj).
1147        * It should however be safe to use (since we always allocate
1148        * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1149        */
1150       int index_limit = info->file_max[reg->Register.File];
1151       attrib_index = get_indirect_index(bld,
1152                                         reg->Register.File,
1153                                         reg->Register.Index,
1154                                         &reg->Indirect,
1155                                         index_limit);
1156    } else {
1157       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1158    }
1159 
1160    if (reg->Dimension.Indirect) {
1161       /*
1162        * A fixed 6 should do as well (which is what we allocate).
1163        */
1164       int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1165       vertex_index = get_indirect_index(bld,
1166                                         reg->Register.File,
1167                                         reg->Dimension.Index,
1168                                         &reg->DimIndirect,
1169                                         index_limit);
1170    } else {
1171       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1172    }
1173 
1174    res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1175                                     reg->Dimension.Indirect,
1176                                     vertex_index,
1177                                     reg->Register.Indirect,
1178                                     attrib_index,
1179                                     swizzle_index);
1180 
1181    assert(res);
1182    if (tgsi_type_is_64bit(stype)) {
1183       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1184       LLVMValueRef res2;
1185       res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1186                                         reg->Dimension.Indirect,
1187                                         vertex_index,
1188                                         reg->Register.Indirect,
1189                                         attrib_index,
1190                                         swizzle_index);
1191       assert(res2);
1192       res = emit_fetch_64bit(bld_base, stype, res, res2);
1193    } else if (stype == TGSI_TYPE_UNSIGNED) {
1194       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1195    } else if (stype == TGSI_TYPE_SIGNED) {
1196       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1197    }
1198 
1199    return res;
1200 }
1201 
1202 static LLVMValueRef
emit_fetch_tcs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1203 emit_fetch_tcs_input(
1204    struct lp_build_tgsi_context * bld_base,
1205    const struct tgsi_full_src_register * reg,
1206    enum tgsi_opcode_type stype,
1207    unsigned swizzle_in)
1208 {
1209    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1210    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1211    const struct tgsi_shader_info *info = bld->bld_base.info;
1212    LLVMBuilderRef builder = gallivm->builder;
1213    LLVMValueRef attrib_index = NULL;
1214    LLVMValueRef vertex_index = NULL;
1215    unsigned swizzle = swizzle_in & 0xffff;
1216    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1217    LLVMValueRef res;
1218 
1219    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1220       /* This is really a system value not a regular input */
1221       assert(!reg->Register.Indirect);
1222       assert(!reg->Dimension.Indirect);
1223       res = bld->system_values.prim_id;
1224       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1225          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1226       }
1227       return res;
1228    }
1229 
1230    if (reg->Register.Indirect) {
1231       int index_limit = info->file_max[reg->Register.File];
1232       attrib_index = get_indirect_index(bld,
1233                                         reg->Register.File,
1234                                         reg->Register.Index,
1235                                         &reg->Indirect,
1236                                         index_limit);
1237    } else {
1238       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1239    }
1240 
1241    if (reg->Dimension.Indirect) {
1242       vertex_index = get_indirect_index(bld,
1243                                         reg->Register.File,
1244                                         reg->Dimension.Index,
1245                                         &reg->DimIndirect,
1246                                         PIPE_MAX_SHADER_INPUTS);
1247    } else {
1248       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1249    }
1250 
1251    // TCS can read from its own outputs
1252    if (reg->Register.File == TGSI_FILE_OUTPUT) {
1253       res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1254                                               reg->Dimension.Indirect,
1255                                               vertex_index,
1256                                               reg->Register.Indirect,
1257                                               attrib_index,
1258                                               FALSE,
1259                                               swizzle_index,
1260                                               bld_base->info->output_semantic_name[reg->Register.Index]);
1261    } else {
1262       res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1263                                              reg->Dimension.Indirect,
1264                                              vertex_index,
1265                                              reg->Register.Indirect,
1266                                              attrib_index,
1267                                              FALSE,
1268                                              swizzle_index);
1269    }
1270 
1271 
1272    assert(res);
1273    if (tgsi_type_is_64bit(stype)) {
1274       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1275       LLVMValueRef res2;
1276       if (reg->Register.File == TGSI_FILE_OUTPUT) {
1277          res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1278                                                   reg->Dimension.Indirect,
1279                                                   vertex_index,
1280                                                   reg->Register.Indirect,
1281                                                   attrib_index,
1282                                                   FALSE,
1283                                                   swizzle_index,
1284                                                   bld_base->info->output_semantic_name[reg->Register.Index]);
1285       } else {
1286          res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1287                                                  reg->Dimension.Indirect,
1288                                                  vertex_index,
1289                                                  reg->Register.Indirect,
1290                                                  attrib_index,
1291                                                  FALSE,
1292                                                  swizzle_index);
1293       }
1294       assert(res2);
1295       res = emit_fetch_64bit(bld_base, stype, res, res2);
1296    } else if (stype == TGSI_TYPE_UNSIGNED) {
1297       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1298    } else if (stype == TGSI_TYPE_SIGNED) {
1299       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1300    }
1301 
1302    return res;
1303 }
1304 
1305 static LLVMValueRef
emit_fetch_tes_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1306 emit_fetch_tes_input(
1307    struct lp_build_tgsi_context * bld_base,
1308    const struct tgsi_full_src_register * reg,
1309    enum tgsi_opcode_type stype,
1310    unsigned swizzle_in)
1311 {
1312    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1313    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1314    const struct tgsi_shader_info *info = bld->bld_base.info;
1315    LLVMBuilderRef builder = gallivm->builder;
1316    LLVMValueRef attrib_index = NULL;
1317    LLVMValueRef vertex_index = NULL;
1318    unsigned swizzle = swizzle_in & 0xffff;
1319    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1320    LLVMValueRef res;
1321 
1322    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1323       /* This is really a system value not a regular input */
1324       assert(!reg->Register.Indirect);
1325       assert(!reg->Dimension.Indirect);
1326       res = bld->system_values.prim_id;
1327       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1328          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1329       }
1330       return res;
1331    }
1332 
1333    if (reg->Register.Indirect) {
1334       int index_limit = info->file_max[reg->Register.File];
1335       attrib_index = get_indirect_index(bld,
1336                                         reg->Register.File,
1337                                         reg->Register.Index,
1338                                         &reg->Indirect,
1339                                         index_limit);
1340    } else {
1341       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1342    }
1343 
1344    if (reg->Dimension.Indirect) {
1345       vertex_index = get_indirect_index(bld,
1346                                         reg->Register.File,
1347                                         reg->Dimension.Index,
1348                                         &reg->DimIndirect,
1349                                         PIPE_MAX_SHADER_INPUTS);
1350    } else {
1351       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1352    }
1353 
1354    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1355       res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1356                                      reg->Register.Indirect,
1357                                      attrib_index,
1358                                      swizzle_index);
1359    } else {
1360       res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1361                                        reg->Dimension.Indirect,
1362                                        vertex_index,
1363                                        reg->Register.Indirect,
1364                                        attrib_index,
1365                                        FALSE,
1366                                        swizzle_index);
1367    }
1368 
1369    assert(res);
1370    if (tgsi_type_is_64bit(stype)) {
1371       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1372       LLVMValueRef res2;
1373       if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1374          res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1375                                     reg->Register.Indirect,
1376                                     attrib_index,
1377                                     swizzle_index);
1378       }
1379       else {
1380          res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1381                                              reg->Dimension.Indirect,
1382                                              vertex_index,
1383                                              reg->Register.Indirect,
1384                                              attrib_index,
1385                                              FALSE,
1386                                              swizzle_index);
1387       }
1388       assert(res2);
1389       res = emit_fetch_64bit(bld_base, stype, res, res2);
1390    } else if (stype == TGSI_TYPE_UNSIGNED) {
1391       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1392    } else if (stype == TGSI_TYPE_SIGNED) {
1393       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1394    }
1395 
1396    return res;
1397 }
1398 
1399 
1400 
1401 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1402 emit_fetch_temporary(
1403    struct lp_build_tgsi_context * bld_base,
1404    const struct tgsi_full_src_register * reg,
1405    enum tgsi_opcode_type stype,
1406    unsigned swizzle_in)
1407 {
1408    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1409    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1410    LLVMBuilderRef builder = gallivm->builder;
1411    LLVMValueRef res;
1412    unsigned swizzle = swizzle_in & 0xffff;
1413 
1414    if (reg->Register.Indirect) {
1415       LLVMValueRef indirect_index;
1416       LLVMValueRef index_vec, index_vec2 = NULL;  /* index into the temp reg array */
1417       LLVMValueRef temps_array;
1418       LLVMTypeRef fptr_type;
1419 
1420       indirect_index = get_indirect_index(bld,
1421                                           reg->Register.File,
1422                                           reg->Register.Index,
1423                                           &reg->Indirect,
1424                                           bld->bld_base.info->file_max[reg->Register.File]);
1425 
1426       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1427                                         indirect_index,
1428                                         swizzle,
1429                                         TRUE);
1430       if (tgsi_type_is_64bit(stype)) {
1431                index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1432                                                   indirect_index,
1433                                                   swizzle_in >> 16,
1434                                                   TRUE);
1435       }
1436 
1437       /* cast temps_array pointer to float* */
1438       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1439       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1440 
1441       /* Gather values from the temporary register array */
1442       res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1443    }
1444    else {
1445       LLVMValueRef temp_ptr;
1446       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1447       res = LLVMBuildLoad(builder, temp_ptr, "");
1448 
1449       if (tgsi_type_is_64bit(stype)) {
1450          LLVMValueRef temp_ptr2, res2;
1451 
1452          temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1453          res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1454          res = emit_fetch_64bit(bld_base, stype, res, res2);
1455       }
1456    }
1457 
1458    if (stype == TGSI_TYPE_SIGNED ||
1459        stype == TGSI_TYPE_UNSIGNED ||
1460        stype == TGSI_TYPE_DOUBLE ||
1461        stype == TGSI_TYPE_SIGNED64 ||
1462        stype == TGSI_TYPE_UNSIGNED64) {
1463       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1464       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1465    }
1466 
1467    return res;
1468 }
1469 
1470 static LLVMValueRef
emit_fetch_system_value(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1471 emit_fetch_system_value(
1472    struct lp_build_tgsi_context * bld_base,
1473    const struct tgsi_full_src_register * reg,
1474    enum tgsi_opcode_type stype,
1475    unsigned swizzle_in)
1476 {
1477    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1478    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1479    const struct tgsi_shader_info *info = bld->bld_base.info;
1480    LLVMBuilderRef builder = gallivm->builder;
1481    LLVMValueRef res;
1482    enum tgsi_opcode_type atype; // Actual type of the value
1483    unsigned swizzle = swizzle_in & 0xffff;
1484 
1485    assert(!reg->Register.Indirect);
1486 
1487    switch (info->system_value_semantic_name[reg->Register.Index]) {
1488    case TGSI_SEMANTIC_INSTANCEID:
1489       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1490       atype = TGSI_TYPE_UNSIGNED;
1491       break;
1492 
1493    case TGSI_SEMANTIC_VERTEXID:
1494       res = bld->system_values.vertex_id;
1495       atype = TGSI_TYPE_UNSIGNED;
1496       break;
1497 
1498    case TGSI_SEMANTIC_VERTEXID_NOBASE:
1499       res = bld->system_values.vertex_id_nobase;
1500       atype = TGSI_TYPE_UNSIGNED;
1501       break;
1502 
1503    case TGSI_SEMANTIC_BASEVERTEX:
1504       res = bld->system_values.basevertex;
1505       atype = TGSI_TYPE_UNSIGNED;
1506       break;
1507 
1508    case TGSI_SEMANTIC_BASEINSTANCE:
1509       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1510       atype = TGSI_TYPE_UNSIGNED;
1511       break;
1512 
1513    case TGSI_SEMANTIC_PRIMID:
1514       res = bld->system_values.prim_id;
1515       atype = TGSI_TYPE_UNSIGNED;
1516       break;
1517 
1518    case TGSI_SEMANTIC_INVOCATIONID:
1519       if (info->processor == PIPE_SHADER_TESS_CTRL)
1520          res = bld->system_values.invocation_id;
1521       else
1522          res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1523       atype = TGSI_TYPE_UNSIGNED;
1524       break;
1525 
1526    case TGSI_SEMANTIC_HELPER_INVOCATION:
1527       res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1528       atype = TGSI_TYPE_UNSIGNED;
1529       break;
1530 
1531    case TGSI_SEMANTIC_THREAD_ID:
1532       res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
1533       atype = TGSI_TYPE_UNSIGNED;
1534       break;
1535 
1536    case TGSI_SEMANTIC_BLOCK_ID:
1537       res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
1538       atype = TGSI_TYPE_UNSIGNED;
1539       break;
1540 
1541    case TGSI_SEMANTIC_GRID_SIZE:
1542       res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
1543       atype = TGSI_TYPE_UNSIGNED;
1544       break;
1545 
1546    case TGSI_SEMANTIC_TESSCOORD:
1547       {
1548          LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) };
1549          LLVMValueRef array_indexed = LLVMBuildGEP(gallivm->builder, bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed");
1550          res = LLVMBuildLoad(builder, array_indexed, "tess_coord");
1551       }
1552       atype = TGSI_TYPE_FLOAT;
1553       break;
1554 
1555    case TGSI_SEMANTIC_FACE:
1556       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1557       atype = TGSI_TYPE_UNSIGNED;
1558       break;
1559 
1560   case TGSI_SEMANTIC_DRAWID:
1561       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1562       atype = TGSI_TYPE_UNSIGNED;
1563       break;
1564 
1565   case TGSI_SEMANTIC_SAMPLEID:
1566       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1567       atype = TGSI_TYPE_UNSIGNED;
1568       break;
1569 
1570    case TGSI_SEMANTIC_TESSOUTER:
1571       res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1572                                        bld->system_values.tess_outer,
1573                                        lp_build_const_int32(gallivm, swizzle_in));
1574       atype = TGSI_TYPE_FLOAT;
1575       break;
1576 
1577    case TGSI_SEMANTIC_TESSINNER:
1578       res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1579                                        bld->system_values.tess_inner,
1580                                        lp_build_const_int32(gallivm, swizzle_in));
1581       atype = TGSI_TYPE_FLOAT;
1582       break;
1583 
1584    case TGSI_SEMANTIC_VERTICESIN:
1585       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in);
1586       atype = TGSI_TYPE_UNSIGNED;
1587       break;
1588 
1589    default:
1590       assert(!"unexpected semantic in emit_fetch_system_value");
1591       res = bld_base->base.zero;
1592       atype = TGSI_TYPE_FLOAT;
1593       break;
1594    }
1595 
1596    if (atype != stype) {
1597       if (stype == TGSI_TYPE_FLOAT) {
1598          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1599       } else if (stype == TGSI_TYPE_UNSIGNED) {
1600          res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1601       } else if (stype == TGSI_TYPE_SIGNED) {
1602          res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1603       }
1604    }
1605 
1606    return res;
1607 }
1608 
1609 /**
1610  * Register fetch with derivatives.
1611  */
1612 static void
emit_fetch_deriv(struct lp_build_tgsi_soa_context * bld,LLVMValueRef src,LLVMValueRef * res,LLVMValueRef * ddx,LLVMValueRef * ddy)1613 emit_fetch_deriv(
1614    struct lp_build_tgsi_soa_context *bld,
1615    LLVMValueRef src,
1616    LLVMValueRef *res,
1617    LLVMValueRef *ddx,
1618    LLVMValueRef *ddy)
1619 {
1620    if (res)
1621       *res = src;
1622 
1623    /* TODO: use interpolation coeffs for inputs */
1624 
1625    if (ddx)
1626       *ddx = lp_build_ddx(&bld->bld_base.base, src);
1627 
1628    if (ddy)
1629       *ddy = lp_build_ddy(&bld->bld_base.base, src);
1630 }
1631 
1632 /**
1633  * store an array of vec-length 64-bit into two arrays of vec_length floats
1634  * i.e.
1635  * value is d0, d1, d2, d3 etc.
1636  * each 64-bit has high and low pieces x, y
1637  * so gets stored into the separate channels as:
1638  * chan_ptr = d0.x, d1.x, d2.x, d3.x
1639  * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1640  */
1641 static void
emit_store_64bit_chan(struct lp_build_tgsi_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)1642 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1643                       LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1644                       LLVMValueRef value)
1645 {
1646    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1647    struct gallivm_state *gallivm = bld_base->base.gallivm;
1648    LLVMBuilderRef builder = gallivm->builder;
1649    struct lp_build_context *float_bld = &bld_base->base;
1650    unsigned i;
1651    LLVMValueRef temp, temp2;
1652    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1653    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1654 
1655    for (i = 0; i < bld_base->base.type.length; i++) {
1656       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1657       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1658    }
1659 
1660    temp = LLVMBuildShuffleVector(builder, value,
1661                                  LLVMGetUndef(LLVMTypeOf(value)),
1662                                  LLVMConstVector(shuffles,
1663                                                  bld_base->base.type.length),
1664                                  "");
1665    temp2 = LLVMBuildShuffleVector(builder, value,
1666                                   LLVMGetUndef(LLVMTypeOf(value)),
1667                                   LLVMConstVector(shuffles2,
1668                                                   bld_base->base.type.length),
1669                                   "");
1670 
1671    lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1672    lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1673 }
1674 
1675 static void
emit_store_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1676 emit_store_output(struct lp_build_tgsi_context *bld_base,
1677                   enum tgsi_opcode_type dtype,
1678                   const struct tgsi_full_dst_register *reg,
1679                   unsigned index,
1680                   unsigned chan_index,
1681                   LLVMValueRef indirect_index,
1682                   LLVMValueRef value)
1683 {
1684    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1685    struct gallivm_state *gallivm = bld_base->base.gallivm;
1686    LLVMBuilderRef builder = gallivm->builder;
1687    struct lp_build_context *float_bld = &bld_base->base;
1688 
1689    /* Outputs are always stored as floats */
1690    value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1691 
1692    if (reg->Register.Indirect) {
1693       LLVMValueRef index_vec;  /* indexes into the output registers */
1694       LLVMValueRef outputs_array;
1695       LLVMTypeRef fptr_type;
1696 
1697       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1698                                           indirect_index,
1699                                           chan_index,
1700                                           TRUE);
1701 
1702       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1703       outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1704 
1705       /* Scatter store values into output registers */
1706       emit_mask_scatter(bld, outputs_array, index_vec, value,
1707                         &bld->exec_mask);
1708    }
1709    else {
1710       assert(LLVMTypeOf(value) == float_bld->vec_type);
1711       LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1712                                                 chan_index);
1713 
1714       if (tgsi_type_is_64bit(dtype)) {
1715          LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1716                                                    chan_index + 1);
1717          emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1718                                  value);
1719       } else
1720          lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1721    }
1722 }
1723 
1724 static void
emit_store_tcs_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1725 emit_store_tcs_output(struct lp_build_tgsi_context *bld_base,
1726                       enum tgsi_opcode_type dtype,
1727                       const struct tgsi_full_dst_register *reg,
1728                       unsigned index,
1729                       unsigned chan_index,
1730                       LLVMValueRef indirect_index,
1731                       LLVMValueRef value)
1732 {
1733    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1734    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1735    const struct tgsi_shader_info *info = bld->bld_base.info;
1736    LLVMValueRef attrib_index = NULL;
1737    LLVMValueRef vertex_index = NULL;
1738    LLVMValueRef channel_index = NULL;
1739 
1740    if (reg->Register.Indirect) {
1741       /*
1742        * XXX: this is possibly not quite the right value, since file_max may be
1743        * larger than the max attrib index, due to it being the max of declared
1744        * inputs AND the max vertices per prim (which is 6 for tri adj).
1745        * It should however be safe to use (since we always allocate
1746        * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1747        */
1748       int index_limit = info->file_max[reg->Register.File];
1749       attrib_index = get_indirect_index(bld,
1750                                         reg->Register.File,
1751                                         reg->Register.Index,
1752                                         &reg->Indirect,
1753                                         index_limit);
1754    } else {
1755       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1756    }
1757 
1758    if (reg->Dimension.Indirect) {
1759       vertex_index = get_indirect_index(bld,
1760                                         reg->Register.File,
1761                                         reg->Dimension.Index,
1762                                         &reg->DimIndirect,
1763                                         PIPE_MAX_SHADER_OUTPUTS);
1764    } else {
1765       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1766    }
1767 
1768    channel_index = lp_build_const_int32(gallivm, chan_index);
1769 
1770    assert(bld->tcs_iface->emit_store_output);
1771    bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1772                                           bld_base->info->output_semantic_name[reg->Register.Index],
1773                                           reg->Dimension.Indirect,
1774                                           vertex_index,
1775                                           reg->Register.Indirect,
1776                                           attrib_index,
1777                                           false,
1778                                           channel_index,
1779                                           value,
1780                                           mask_vec(bld_base));
1781 }
1782 
1783 static void
emit_store_temp(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1784 emit_store_temp(struct lp_build_tgsi_context *bld_base,
1785                   enum tgsi_opcode_type dtype,
1786                   const struct tgsi_full_dst_register *reg,
1787                   unsigned index,
1788                   unsigned chan_index,
1789                   LLVMValueRef indirect_index,
1790                   LLVMValueRef value)
1791 {
1792    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1793    struct gallivm_state *gallivm = bld_base->base.gallivm;
1794    LLVMBuilderRef builder = gallivm->builder;
1795    struct lp_build_context *float_bld = &bld_base->base;
1796 
1797    /* Temporaries are always stored as floats */
1798    if (!tgsi_type_is_64bit(dtype))
1799       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1800    else
1801       value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1802 
1803    if (reg->Register.Indirect) {
1804       LLVMValueRef index_vec;  /* indexes into the temp registers */
1805       LLVMValueRef temps_array;
1806       LLVMTypeRef fptr_type;
1807 
1808       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1809                                           indirect_index,
1810                                           chan_index,
1811                                           TRUE);
1812 
1813       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1814       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1815 
1816       /* Scatter store values into temp registers */
1817       emit_mask_scatter(bld, temps_array, index_vec, value,
1818                         &bld->exec_mask);
1819    }
1820    else {
1821       LLVMValueRef temp_ptr;
1822       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1823 
1824       if (tgsi_type_is_64bit(dtype)) {
1825          LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1826                                                       reg->Register.Index,
1827                                                       chan_index + 1);
1828          emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1829                                  value);
1830       }
1831       else
1832          lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1833    }
1834 }
1835 
1836 static void
emit_store_address(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1837 emit_store_address(struct lp_build_tgsi_context *bld_base,
1838                    enum tgsi_opcode_type dtype,
1839                    const struct tgsi_full_dst_register *reg,
1840                    unsigned index,
1841                    unsigned chan_index,
1842                    LLVMValueRef indirect_index,
1843                    LLVMValueRef value)
1844 {
1845    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1846    struct gallivm_state *gallivm = bld_base->base.gallivm;
1847    LLVMBuilderRef builder = gallivm->builder;
1848    struct lp_build_context *int_bld = &bld_base->int_bld;
1849 
1850    assert(dtype == TGSI_TYPE_SIGNED);
1851    assert(LLVMTypeOf(value) == int_bld->vec_type);
1852    value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1853    lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1854                         bld->addr[reg->Register.Index][chan_index]);
1855 }
1856 
1857 /**
1858  * Register store.
1859  */
1860 static void
emit_store_chan(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned index,unsigned chan_index,LLVMValueRef value)1861 emit_store_chan(
1862    struct lp_build_tgsi_context *bld_base,
1863    const struct tgsi_full_instruction *inst,
1864    unsigned index,
1865    unsigned chan_index,
1866    LLVMValueRef value)
1867 {
1868    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1869    struct gallivm_state *gallivm = bld_base->base.gallivm;
1870    LLVMBuilderRef builder = gallivm->builder;
1871    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1872    struct lp_build_context *float_bld = &bld_base->base;
1873    LLVMValueRef indirect_index = NULL;
1874    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1875 
1876    /*
1877     * Apply saturation.
1878     *
1879     * It is always assumed to be float.
1880     */
1881    if (inst->Instruction.Saturate) {
1882       assert(dtype == TGSI_TYPE_FLOAT ||
1883              dtype == TGSI_TYPE_UNTYPED);
1884       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1885       value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1886    }
1887 
1888    if (reg->Register.Indirect) {
1889       /*
1890        * Currently the mesa/st doesn't generate indirect stores
1891        * to 64-bit values, it normally uses MOV to do indirect stores.
1892        */
1893       assert(!tgsi_type_is_64bit(dtype));
1894       indirect_index = get_indirect_index(bld,
1895                                           reg->Register.File,
1896                                           reg->Register.Index,
1897                                           &reg->Indirect,
1898                                           bld->bld_base.info->file_max[reg->Register.File]);
1899    } else {
1900       assert(reg->Register.Index <=
1901                              bld_base->info->file_max[reg->Register.File]);
1902    }
1903 
1904    if (DEBUG_EXECUTION) {
1905       emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1906    }
1907 
1908    assert(bld_base->emit_store_reg_funcs[reg->Register.File]);
1909    bld_base->emit_store_reg_funcs[reg->Register.File](bld_base,
1910                                                       dtype,
1911                                                       reg,
1912                                                       index,
1913                                                       chan_index,
1914                                                       indirect_index,
1915                                                       value);
1916 
1917    (void)dtype;
1918 }
1919 
1920 /*
1921  * Called at the beginning of the translation of each TGSI instruction, to
1922  * emit some debug code.
1923  */
1924 static void
emit_debug(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info)1925 emit_debug(
1926    struct lp_build_tgsi_context * bld_base,
1927    const struct tgsi_full_instruction * inst,
1928    const struct tgsi_opcode_info * info)
1929 
1930 {
1931    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1932 
1933    if (DEBUG_EXECUTION) {
1934       /*
1935        * Dump the TGSI instruction.
1936        */
1937 
1938       struct gallivm_state *gallivm = bld_base->base.gallivm;
1939       char buf[512];
1940       buf[0] = '$';
1941       buf[1] = ' ';
1942       tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1943       lp_build_printf(gallivm, buf);
1944 
1945       /* Dump the execution mask.
1946        */
1947       if (bld->exec_mask.has_mask) {
1948          lp_build_print_value(gallivm, "    mask = ", bld->exec_mask.exec_mask);
1949       }
1950    }
1951 }
1952 
1953 static void
emit_store(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,unsigned index,LLVMValueRef dst[4])1954 emit_store(
1955    struct lp_build_tgsi_context * bld_base,
1956    const struct tgsi_full_instruction * inst,
1957    const struct tgsi_opcode_info * info,
1958    unsigned index,
1959    LLVMValueRef dst[4])
1960 
1961 {
1962    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1963 
1964    unsigned writemask = inst->Dst[index].Register.WriteMask;
1965    while (writemask) {
1966       unsigned chan_index = u_bit_scan(&writemask);
1967       if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1968           continue;
1969       emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1970    }
1971 }
1972 
1973 static unsigned
tgsi_to_pipe_tex_target(enum tgsi_texture_type tgsi_target)1974 tgsi_to_pipe_tex_target(enum tgsi_texture_type tgsi_target)
1975 {
1976    switch (tgsi_target) {
1977    case TGSI_TEXTURE_BUFFER:
1978       return PIPE_BUFFER;
1979    case TGSI_TEXTURE_1D:
1980    case TGSI_TEXTURE_SHADOW1D:
1981       return PIPE_TEXTURE_1D;
1982    case TGSI_TEXTURE_2D:
1983    case TGSI_TEXTURE_SHADOW2D:
1984    case TGSI_TEXTURE_2D_MSAA:
1985       return PIPE_TEXTURE_2D;
1986    case TGSI_TEXTURE_3D:
1987       return PIPE_TEXTURE_3D;
1988    case TGSI_TEXTURE_CUBE:
1989    case TGSI_TEXTURE_SHADOWCUBE:
1990       return PIPE_TEXTURE_CUBE;
1991    case TGSI_TEXTURE_RECT:
1992    case TGSI_TEXTURE_SHADOWRECT:
1993       return PIPE_TEXTURE_RECT;
1994    case TGSI_TEXTURE_1D_ARRAY:
1995    case TGSI_TEXTURE_SHADOW1D_ARRAY:
1996       return PIPE_TEXTURE_1D_ARRAY;
1997    case TGSI_TEXTURE_2D_ARRAY:
1998    case TGSI_TEXTURE_SHADOW2D_ARRAY:
1999    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2000       return PIPE_TEXTURE_2D_ARRAY;
2001    case TGSI_TEXTURE_CUBE_ARRAY:
2002    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2003       return PIPE_TEXTURE_CUBE_ARRAY;
2004    default:
2005       assert(0);
2006       return PIPE_BUFFER;
2007    }
2008 }
2009 
2010 
2011 static enum lp_sampler_lod_property
lp_build_lod_property(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned src_op)2012 lp_build_lod_property(
2013    struct lp_build_tgsi_context *bld_base,
2014    const struct tgsi_full_instruction *inst,
2015    unsigned src_op)
2016 {
2017    const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2018    enum lp_sampler_lod_property lod_property;
2019 
2020    /*
2021     * Not much we can do here. We could try catching inputs declared
2022     * with constant interpolation but not sure it's worth it - since for
2023     * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2024     * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2025     * like the constant/immediate recognition below.
2026     * What seems to be of more value would be to recognize temps holding
2027     * broadcasted scalars but no way we can do it.
2028     * Tried asking llvm but without any success (using LLVMIsConstant
2029     * even though this isn't exactly what we'd need), even as simple as
2030     * IMM[0] UINT32 (0,-1,0,0)
2031     * MOV TEMP[0] IMM[0].yyyy
2032     * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2033     * doesn't work.
2034     * This means there's ZERO chance this will ever catch a scalar lod
2035     * with traditional tex opcodes as well as texel fetches, since the lod
2036     * comes from the same reg as coords (except some test shaders using
2037     * constant coords maybe).
2038     * There's at least hope for sample opcodes as well as size queries.
2039     */
2040    if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
2041        reg->Register.File == TGSI_FILE_CONSTANT ||
2042        reg->Register.File == TGSI_FILE_IMMEDIATE) {
2043       lod_property = LP_SAMPLER_LOD_SCALAR;
2044    }
2045    else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2046       if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2047          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2048       }
2049       else {
2050          lod_property = LP_SAMPLER_LOD_PER_QUAD;
2051       }
2052    }
2053    else {
2054       /* never use scalar (per-quad) lod the results are just too wrong. */
2055       lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2056    }
2057    return lod_property;
2058 }
2059 
2060 
2061 /**
2062  * High-level instruction translators.
2063  */
2064 
2065 static void
emit_tex(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,LLVMValueRef * texel,unsigned sampler_reg,enum lp_sampler_op_type sampler_op)2066 emit_tex( struct lp_build_tgsi_soa_context *bld,
2067           const struct tgsi_full_instruction *inst,
2068           enum lp_build_tex_modifier modifier,
2069           LLVMValueRef *texel,
2070           unsigned sampler_reg,
2071           enum lp_sampler_op_type sampler_op)
2072 {
2073    unsigned unit = inst->Src[sampler_reg].Register.Index;
2074    LLVMValueRef oow = NULL;
2075    LLVMValueRef lod = NULL;
2076    LLVMValueRef coords[5];
2077    LLVMValueRef offsets[3] = { NULL };
2078    struct lp_derivatives derivs;
2079    struct lp_sampler_params params;
2080    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2081    unsigned num_derivs, num_offsets, i;
2082    unsigned shadow_coord = 0;
2083    unsigned layer_coord = 0;
2084    unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2085 
2086    memset(&params, 0, sizeof(params));
2087 
2088    if (!bld->sampler) {
2089       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2090       for (i = 0; i < 4; i++) {
2091          texel[i] = bld->bld_base.base.undef;
2092       }
2093       return;
2094    }
2095 
2096    switch (inst->Texture.Texture) {
2097    case TGSI_TEXTURE_1D_ARRAY:
2098       layer_coord = 1;
2099       FALLTHROUGH;
2100    case TGSI_TEXTURE_1D:
2101       num_offsets = 1;
2102       num_derivs = 1;
2103       break;
2104    case TGSI_TEXTURE_2D_ARRAY:
2105       layer_coord = 2;
2106       FALLTHROUGH;
2107    case TGSI_TEXTURE_2D:
2108    case TGSI_TEXTURE_RECT:
2109       num_offsets = 2;
2110       num_derivs = 2;
2111       break;
2112    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2113       layer_coord = 1;
2114       FALLTHROUGH;
2115    case TGSI_TEXTURE_SHADOW1D:
2116       shadow_coord = 2;
2117       num_offsets = 1;
2118       num_derivs = 1;
2119       break;
2120    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2121       layer_coord = 2;
2122       shadow_coord = 3;
2123       num_offsets = 2;
2124       num_derivs = 2;
2125       break;
2126    case TGSI_TEXTURE_SHADOW2D:
2127    case TGSI_TEXTURE_SHADOWRECT:
2128       shadow_coord = 2;
2129       num_offsets = 2;
2130       num_derivs = 2;
2131       break;
2132    case TGSI_TEXTURE_CUBE:
2133       num_offsets = 2;
2134       num_derivs = 3;
2135       break;
2136    case TGSI_TEXTURE_3D:
2137       num_offsets = 3;
2138       num_derivs = 3;
2139       break;
2140    case TGSI_TEXTURE_SHADOWCUBE:
2141       shadow_coord = 3;
2142       num_offsets = 2;
2143       num_derivs = 3;
2144       break;
2145    case TGSI_TEXTURE_CUBE_ARRAY:
2146       num_offsets = 2;
2147       num_derivs = 3;
2148       layer_coord = 3;
2149       break;
2150    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2151       num_offsets = 2;
2152       num_derivs = 3;
2153       layer_coord = 3;
2154       shadow_coord = 4; /* shadow coord special different reg */
2155       break;
2156    case TGSI_TEXTURE_2D_MSAA:
2157    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2158    default:
2159       assert(0);
2160       return;
2161    }
2162 
2163    /* Note lod and especially projected are illegal in a LOT of cases */
2164    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2165        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2166       if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
2167          lod = bld->bld_base.base.zero;
2168       } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2169                  inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2170          /* note that shadow cube array with bias/explicit lod does not exist */
2171          lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2172       }
2173       else {
2174          lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2175       }
2176       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2177          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2178       }
2179       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2180          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2181       }
2182       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2183    }
2184 
2185    if (sampler_op == LP_SAMPLER_OP_GATHER) {
2186       uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
2187       sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
2188    }
2189    if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2190       oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2191       oow = lp_build_rcp(&bld->bld_base.base, oow);
2192    }
2193 
2194    for (i = 0; i < num_derivs; i++) {
2195       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2196       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2197          coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2198    }
2199    for (i = num_derivs; i < 5; i++) {
2200       coords[i] = bld->bld_base.base.undef;
2201    }
2202 
2203    /* Layer coord always goes into 3rd slot, except for cube map arrays */
2204    if (layer_coord) {
2205       if (layer_coord == 3) {
2206          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2207       }
2208       else {
2209          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2210       }
2211       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2212          coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2213    }
2214    /* Shadow coord occupies always 5th slot. */
2215    if (shadow_coord) {
2216       sample_key |= LP_SAMPLER_SHADOW;
2217       if (shadow_coord == 4) {
2218          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2219       }
2220       else {
2221          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2222       }
2223       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2224          coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2225    }
2226 
2227    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2228       unsigned dim;
2229       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2230       for (dim = 0; dim < num_derivs; ++dim) {
2231          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2232          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2233       }
2234       params.derivs = &derivs;
2235       /*
2236        * could also check all src regs if constant but I doubt such
2237        * cases exist in practice.
2238        */
2239       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2240          if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2241             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2242          }
2243          else {
2244             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2245          }
2246       }
2247       else {
2248          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2249       }
2250    }
2251    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2252 
2253    /* we don't handle the 4 offset version of tg4 */
2254    if (inst->Texture.NumOffsets == 1) {
2255       unsigned dim;
2256       sample_key |= LP_SAMPLER_OFFSETS;
2257       for (dim = 0; dim < num_offsets; dim++) {
2258          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2259       }
2260    }
2261 
2262    params.type = bld->bld_base.base.type;
2263    params.sample_key = sample_key;
2264    params.texture_index = unit;
2265    params.sampler_index = unit;
2266    params.context_ptr = bld->context_ptr;
2267    params.thread_data_ptr = bld->thread_data_ptr;
2268    params.coords = coords;
2269    params.offsets = offsets;
2270    params.lod = lod;
2271    params.texel = texel;
2272 
2273    bld->sampler->emit_tex_sample(bld->sampler,
2274                                  bld->bld_base.base.gallivm,
2275                                  &params);
2276 }
2277 
2278 static void
emit_sample(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,boolean compare,enum lp_sampler_op_type sample_type,LLVMValueRef * texel)2279 emit_sample(struct lp_build_tgsi_soa_context *bld,
2280             const struct tgsi_full_instruction *inst,
2281             enum lp_build_tex_modifier modifier,
2282             boolean compare,
2283             enum lp_sampler_op_type sample_type,
2284             LLVMValueRef *texel)
2285 {
2286    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2287    unsigned texture_unit, sampler_unit;
2288    LLVMValueRef lod = NULL;
2289    LLVMValueRef coords[5];
2290    LLVMValueRef offsets[3] = { NULL };
2291    struct lp_derivatives derivs;
2292    struct lp_sampler_params params;
2293    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2294 
2295    unsigned num_offsets, num_derivs, i;
2296    unsigned layer_coord = 0;
2297    unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2298 
2299    memset(&params, 0, sizeof(params));
2300 
2301    if (!bld->sampler) {
2302       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2303       for (i = 0; i < 4; i++) {
2304          texel[i] = bld->bld_base.base.undef;
2305       }
2306       return;
2307    }
2308 
2309    /*
2310     * unlike old-style tex opcodes the texture/sampler indices
2311     * always come from src1 and src2 respectively.
2312     */
2313    texture_unit = inst->Src[1].Register.Index;
2314    sampler_unit = inst->Src[2].Register.Index;
2315 
2316    /*
2317     * Note inst->Texture.Texture will contain the number of offsets,
2318     * however the target information is NOT there and comes from the
2319     * declared sampler views instead.
2320     */
2321    switch (bld->sv[texture_unit].Resource) {
2322    case TGSI_TEXTURE_1D:
2323       num_offsets = 1;
2324       num_derivs = 1;
2325       break;
2326    case TGSI_TEXTURE_1D_ARRAY:
2327       layer_coord = 1;
2328       num_offsets = 1;
2329       num_derivs = 1;
2330       break;
2331    case TGSI_TEXTURE_2D:
2332    case TGSI_TEXTURE_RECT:
2333       num_offsets = 2;
2334       num_derivs = 2;
2335       break;
2336    case TGSI_TEXTURE_2D_ARRAY:
2337       layer_coord = 2;
2338       num_offsets = 2;
2339       num_derivs = 2;
2340       break;
2341    case TGSI_TEXTURE_CUBE:
2342       num_offsets = 2;
2343       num_derivs = 3;
2344       break;
2345    case TGSI_TEXTURE_3D:
2346       num_offsets = 3;
2347       num_derivs = 3;
2348       break;
2349    case TGSI_TEXTURE_CUBE_ARRAY:
2350       layer_coord = 3;
2351       num_offsets = 2;
2352       num_derivs = 3;
2353       break;
2354    default:
2355       assert(0);
2356       return;
2357    }
2358 
2359    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2360        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2361       lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2362       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2363          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2364       }
2365       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2366          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2367       }
2368       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2369    }
2370    else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2371       /* XXX might be better to explicitly pass the level zero information */
2372       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2373       lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2374    }
2375 
2376    for (i = 0; i < num_derivs; i++) {
2377       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2378    }
2379    for (i = num_derivs; i < 5; i++) {
2380       coords[i] = bld->bld_base.base.undef;
2381    }
2382 
2383    /* Layer coord always goes into 3rd slot, except for cube map arrays */
2384    if (layer_coord) {
2385       if (layer_coord == 3)
2386          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2387       else
2388          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2389    }
2390    /* Shadow coord occupies always 5th slot. */
2391    if (compare) {
2392       sample_key |= LP_SAMPLER_SHADOW;
2393       coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2394    }
2395 
2396    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2397       unsigned dim;
2398       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2399       for (dim = 0; dim < num_derivs; ++dim) {
2400          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2401          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2402       }
2403       params.derivs = &derivs;
2404       /*
2405        * could also check all src regs if constant but I doubt such
2406        * cases exist in practice.
2407        */
2408       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2409          if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2410             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2411          }
2412          else {
2413             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2414          }
2415       }
2416       else {
2417          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2418       }
2419    }
2420 
2421    /* some advanced gather instructions (txgo) would require 4 offsets */
2422    if (inst->Texture.NumOffsets == 1) {
2423       unsigned dim;
2424       sample_key |= LP_SAMPLER_OFFSETS;
2425       for (dim = 0; dim < num_offsets; dim++) {
2426          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2427       }
2428    }
2429    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2430 
2431    params.type = bld->bld_base.base.type;
2432    params.sample_key = sample_key;
2433    params.texture_index = texture_unit;
2434    params.sampler_index = sampler_unit;
2435    params.context_ptr = bld->context_ptr;
2436    params.thread_data_ptr = bld->thread_data_ptr;
2437    params.coords = coords;
2438    params.offsets = offsets;
2439    params.lod = lod;
2440    params.texel = texel;
2441 
2442    bld->sampler->emit_tex_sample(bld->sampler,
2443                                  bld->bld_base.base.gallivm,
2444                                  &params);
2445 
2446    if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2447        inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2448        inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2449        inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2450       unsigned char swizzles[4];
2451       swizzles[0] = inst->Src[1].Register.SwizzleX;
2452       swizzles[1] = inst->Src[1].Register.SwizzleY;
2453       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2454       swizzles[3] = inst->Src[1].Register.SwizzleW;
2455 
2456       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2457    }
2458 }
2459 
2460 static void
emit_fetch_texels(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * texel,boolean is_samplei)2461 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2462                    const struct tgsi_full_instruction *inst,
2463                    LLVMValueRef *texel,
2464                    boolean is_samplei)
2465 {
2466    unsigned unit, target;
2467    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2468    LLVMValueRef explicit_lod = NULL;
2469    LLVMValueRef coords[5];
2470    LLVMValueRef offsets[3] = { NULL };
2471    LLVMValueRef ms_index = NULL;
2472    struct lp_sampler_params params;
2473    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2474    unsigned dims, i;
2475    unsigned layer_coord = 0;
2476    unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2477 
2478    memset(&params, 0, sizeof(params));
2479 
2480    if (!bld->sampler) {
2481       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2482       for (i = 0; i < 4; i++) {
2483          texel[i] = coord_undef;
2484       }
2485       return;
2486    }
2487 
2488    unit = inst->Src[1].Register.Index;
2489 
2490    if (is_samplei) {
2491       target = bld->sv[unit].Resource;
2492    }
2493    else {
2494       target = inst->Texture.Texture;
2495    }
2496 
2497    switch (target) {
2498    case TGSI_TEXTURE_1D:
2499    case TGSI_TEXTURE_BUFFER:
2500       dims = 1;
2501       break;
2502    case TGSI_TEXTURE_1D_ARRAY:
2503       layer_coord = 1;
2504       dims = 1;
2505       break;
2506    case TGSI_TEXTURE_2D:
2507    case TGSI_TEXTURE_RECT:
2508    case TGSI_TEXTURE_2D_MSAA:
2509       dims = 2;
2510       break;
2511    case TGSI_TEXTURE_2D_ARRAY:
2512    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2513       layer_coord = 2;
2514       dims = 2;
2515       break;
2516    case TGSI_TEXTURE_3D:
2517       dims = 3;
2518       break;
2519    default:
2520       assert(0);
2521       return;
2522    }
2523 
2524    /* always have lod except for buffers and msaa targets ? */
2525    if (target != TGSI_TEXTURE_BUFFER &&
2526        target != TGSI_TEXTURE_2D_MSAA &&
2527        target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
2528        inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
2529       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2530       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2531       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2532    }
2533 
2534    if (target == TGSI_TEXTURE_2D_MSAA ||
2535        target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2536       sample_key |= LP_SAMPLER_FETCH_MS;
2537       ms_index = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2538    }
2539 
2540    /*
2541     * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2542     * would be the sample index.
2543     */
2544 
2545    for (i = 0; i < dims; i++) {
2546       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2547    }
2548    /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2549    for (i = dims; i < 5; i++) {
2550       coords[i] = coord_undef;
2551    }
2552    if (layer_coord)
2553       coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2554 
2555    if (inst->Texture.NumOffsets == 1) {
2556       unsigned dim;
2557       sample_key |= LP_SAMPLER_OFFSETS;
2558       for (dim = 0; dim < dims; dim++) {
2559          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2560       }
2561    }
2562    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2563 
2564    params.type = bld->bld_base.base.type;
2565    params.sample_key = sample_key;
2566    params.texture_index = unit;
2567    /*
2568     * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2569     * and trigger some assertions with d3d10 where the sampler view number
2570     * can exceed this.
2571     */
2572    params.sampler_index = 0;
2573    params.context_ptr = bld->context_ptr;
2574    params.thread_data_ptr = bld->thread_data_ptr;
2575    params.coords = coords;
2576    params.offsets = offsets;
2577    params.derivs = NULL;
2578    params.lod = explicit_lod;
2579    params.texel = texel;
2580    params.ms_index = ms_index;
2581 
2582    bld->sampler->emit_tex_sample(bld->sampler,
2583                                  bld->bld_base.base.gallivm,
2584                                  &params);
2585 
2586    if (is_samplei &&
2587        (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2588         inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2589         inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2590         inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2591       unsigned char swizzles[4];
2592       swizzles[0] = inst->Src[1].Register.SwizzleX;
2593       swizzles[1] = inst->Src[1].Register.SwizzleY;
2594       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2595       swizzles[3] = inst->Src[1].Register.SwizzleW;
2596 
2597       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2598    }
2599 }
2600 
2601 static void
emit_size_query(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * sizes_out,boolean is_sviewinfo)2602 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2603                  const struct tgsi_full_instruction *inst,
2604                  LLVMValueRef *sizes_out,
2605                  boolean is_sviewinfo)
2606 {
2607    LLVMValueRef explicit_lod;
2608    enum lp_sampler_lod_property lod_property;
2609    unsigned has_lod;
2610    unsigned i;
2611    unsigned unit = inst->Src[1].Register.Index;
2612    enum tgsi_texture_type target;
2613    enum pipe_texture_target pipe_target;
2614    struct lp_sampler_size_query_params params;
2615 
2616    if (is_sviewinfo) {
2617       target = bld->sv[unit].Resource;
2618    }
2619    else {
2620       target = inst->Texture.Texture;
2621    }
2622    switch (target) {
2623    case TGSI_TEXTURE_BUFFER:
2624    case TGSI_TEXTURE_RECT:
2625    case TGSI_TEXTURE_SHADOWRECT:
2626       has_lod = 0;
2627       break;
2628    default:
2629       has_lod = 1;
2630       break;
2631    }
2632 
2633    if (!bld->sampler) {
2634       _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2635       for (i = 0; i < 4; i++)
2636          sizes_out[i] = bld->bld_base.int_bld.undef;
2637       return;
2638    }
2639 
2640    if (has_lod) {
2641       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2642       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2643    }
2644    else {
2645       explicit_lod = NULL;
2646       lod_property = LP_SAMPLER_LOD_SCALAR;
2647    }
2648 
2649 
2650    pipe_target = tgsi_to_pipe_tex_target(target);
2651 
2652    params.int_type = bld->bld_base.int_bld.type;
2653    params.texture_unit = unit;
2654    params.target = pipe_target;
2655    params.context_ptr = bld->context_ptr;
2656    params.is_sviewinfo = TRUE;
2657    params.lod_property = lod_property;
2658    params.explicit_lod = explicit_lod;
2659    params.sizes_out = sizes_out;
2660    params.samples_only = false;
2661 
2662    bld->sampler->emit_size_query(bld->sampler,
2663                                  bld->bld_base.base.gallivm,
2664                                  &params);
2665 }
2666 
2667 static boolean
near_end_of_shader(struct lp_build_tgsi_soa_context * bld,int pc)2668 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2669                    int pc)
2670 {
2671    unsigned i;
2672 
2673    for (i = 0; i < 5; i++) {
2674       enum tgsi_opcode opcode;
2675 
2676       if (pc + i >= bld->bld_base.info->num_instructions)
2677          return TRUE;
2678 
2679       opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2680 
2681       if (opcode == TGSI_OPCODE_END)
2682          return TRUE;
2683 
2684       if (opcode == TGSI_OPCODE_TEX ||
2685          opcode == TGSI_OPCODE_TXP ||
2686          opcode == TGSI_OPCODE_TXD ||
2687          opcode == TGSI_OPCODE_TXB ||
2688          opcode == TGSI_OPCODE_TXL ||
2689          opcode == TGSI_OPCODE_TXF ||
2690          opcode == TGSI_OPCODE_TXQ ||
2691          opcode == TGSI_OPCODE_TEX2 ||
2692          opcode == TGSI_OPCODE_TXB2 ||
2693          opcode == TGSI_OPCODE_TXL2 ||
2694          opcode == TGSI_OPCODE_SAMPLE ||
2695          opcode == TGSI_OPCODE_SAMPLE_B ||
2696          opcode == TGSI_OPCODE_SAMPLE_C ||
2697          opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2698          opcode == TGSI_OPCODE_SAMPLE_D ||
2699          opcode == TGSI_OPCODE_SAMPLE_I ||
2700          opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2701          opcode == TGSI_OPCODE_SAMPLE_L ||
2702          opcode == TGSI_OPCODE_SVIEWINFO ||
2703          opcode == TGSI_OPCODE_CAL ||
2704          opcode == TGSI_OPCODE_IF ||
2705          opcode == TGSI_OPCODE_UIF ||
2706          opcode == TGSI_OPCODE_BGNLOOP ||
2707          opcode == TGSI_OPCODE_SWITCH)
2708          return FALSE;
2709    }
2710 
2711    return TRUE;
2712 }
2713 
2714 
2715 
2716 /**
2717  * Kill fragment if any of the src register values are negative.
2718  */
2719 static void
emit_kill_if(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,int pc)2720 emit_kill_if(
2721    struct lp_build_tgsi_soa_context *bld,
2722    const struct tgsi_full_instruction *inst,
2723    int pc)
2724 {
2725    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2726    const struct tgsi_full_src_register *reg = &inst->Src[0];
2727    LLVMValueRef terms[TGSI_NUM_CHANNELS];
2728    LLVMValueRef mask;
2729    unsigned chan_index;
2730 
2731    memset(&terms, 0, sizeof terms);
2732 
2733    TGSI_FOR_EACH_CHANNEL( chan_index ) {
2734       unsigned swizzle;
2735 
2736       /* Unswizzle channel */
2737       swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2738 
2739       /* Check if the component has not been already tested. */
2740       assert(swizzle < TGSI_NUM_CHANNELS);
2741       if( !terms[swizzle] )
2742          /* TODO: change the comparison operator instead of setting the sign */
2743          terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2744    }
2745 
2746    mask = NULL;
2747    TGSI_FOR_EACH_CHANNEL( chan_index ) {
2748       if(terms[chan_index]) {
2749          LLVMValueRef chan_mask;
2750 
2751          /*
2752           * If term < 0 then mask = 0 else mask = ~0.
2753           */
2754          chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2755 
2756          if(mask)
2757             mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2758          else
2759             mask = chan_mask;
2760       }
2761    }
2762 
2763    if (bld->exec_mask.has_mask) {
2764       LLVMValueRef invmask;
2765       invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2766       mask = LLVMBuildOr(builder, mask, invmask, "");
2767    }
2768 
2769    lp_build_mask_update(bld->mask, mask);
2770    if (!near_end_of_shader(bld, pc))
2771       lp_build_mask_check(bld->mask);
2772 }
2773 
2774 
2775 /**
2776  * Unconditional fragment kill.
2777  * The only predication is the execution mask which will apply if
2778  * we're inside a loop or conditional.
2779  */
2780 static void
emit_kill(struct lp_build_tgsi_soa_context * bld,int pc)2781 emit_kill(struct lp_build_tgsi_soa_context *bld,
2782           int pc)
2783 {
2784    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2785    LLVMValueRef mask;
2786 
2787    /* For those channels which are "alive", disable fragment shader
2788     * execution.
2789     */
2790    if (bld->exec_mask.has_mask) {
2791       mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2792    }
2793    else {
2794       LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2795       mask = zero;
2796    }
2797 
2798    lp_build_mask_update(bld->mask, mask);
2799 
2800    if (!near_end_of_shader(bld, pc))
2801       lp_build_mask_check(bld->mask);
2802 }
2803 
2804 
2805 /**
2806  * Emit code which will dump the value of all the temporary registers
2807  * to stdout.
2808  */
2809 static void
emit_dump_file(struct lp_build_tgsi_soa_context * bld,unsigned file)2810 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2811                unsigned file)
2812 {
2813    const struct tgsi_shader_info *info = bld->bld_base.info;
2814    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2815    LLVMBuilderRef builder = gallivm->builder;
2816    LLVMValueRef reg_ptr;
2817    int index;
2818    int max_index = info->file_max[file];
2819 
2820    /*
2821     * Some register files, particularly constants, can be very large,
2822     * and dumping everything could make this unusably slow.
2823     */
2824    max_index = MIN2(max_index, 32);
2825 
2826    for (index = 0; index <= max_index; index++) {
2827       LLVMValueRef res;
2828       unsigned mask;
2829       int chan;
2830 
2831       if (index < 8 * sizeof(unsigned) &&
2832           (info->file_mask[file] & (1u << index)) == 0)  {
2833          /* This was not declared.*/
2834          continue;
2835       }
2836 
2837       if (file == TGSI_FILE_INPUT) {
2838          mask = info->input_usage_mask[index];
2839       } else {
2840          mask = TGSI_WRITEMASK_XYZW;
2841       }
2842 
2843       for (chan = 0; chan < 4; chan++) {
2844          if ((mask & (1 << chan)) == 0) {
2845             /* This channel is not used.*/
2846             continue;
2847          }
2848 
2849          if (file == TGSI_FILE_CONSTANT) {
2850             struct tgsi_full_src_register reg;
2851             memset(&reg, 0, sizeof reg);
2852             reg.Register.File = file;
2853             reg.Register.Index = index;
2854             reg.Register.SwizzleX = 0;
2855             reg.Register.SwizzleY = 1;
2856             reg.Register.SwizzleZ = 2;
2857             reg.Register.SwizzleW = 3;
2858 
2859             res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2860             if (!res) {
2861                continue;
2862             }
2863          } else if (file == TGSI_FILE_INPUT) {
2864             res = bld->inputs[index][chan];
2865             if (!res) {
2866                continue;
2867             }
2868          } else if (file == TGSI_FILE_TEMPORARY) {
2869             reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2870             assert(reg_ptr);
2871             res = LLVMBuildLoad(builder, reg_ptr, "");
2872          } else if (file == TGSI_FILE_OUTPUT) {
2873             reg_ptr = lp_get_output_ptr(bld, index, chan);
2874             assert(reg_ptr);
2875             res = LLVMBuildLoad(builder, reg_ptr, "");
2876          } else {
2877             assert(0);
2878             continue;
2879          }
2880 
2881          emit_dump_reg(gallivm, file, index, chan, res);
2882       }
2883    }
2884 }
2885 
2886 
2887 
2888 void
lp_emit_declaration_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_declaration * decl)2889 lp_emit_declaration_soa(
2890    struct lp_build_tgsi_context *bld_base,
2891    const struct tgsi_full_declaration *decl)
2892 {
2893    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2894    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2895    LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2896    const unsigned first = decl->Range.First;
2897    const unsigned last = decl->Range.Last;
2898    unsigned idx, i;
2899 
2900    assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2901 
2902    switch (decl->Declaration.File) {
2903    case TGSI_FILE_TEMPORARY:
2904       if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2905          assert(last < LP_MAX_INLINED_TEMPS);
2906          for (idx = first; idx <= last; ++idx) {
2907             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2908                bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2909          }
2910       }
2911       break;
2912 
2913    case TGSI_FILE_OUTPUT:
2914       if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2915          for (idx = first; idx <= last; ++idx) {
2916             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2917                bld->outputs[idx][i] = lp_build_alloca(gallivm,
2918                                                       vec_type, "output");
2919          }
2920       }
2921       break;
2922 
2923    case TGSI_FILE_ADDRESS:
2924       /* ADDR registers are only allocated with an integer LLVM IR type,
2925        * as they are guaranteed to always have integers.
2926        * XXX: Not sure if this exception is worthwhile (or the whole idea of
2927        * an ADDR register for that matter).
2928        */
2929       assert(last < LP_MAX_TGSI_ADDRS);
2930       for (idx = first; idx <= last; ++idx) {
2931          assert(idx < LP_MAX_TGSI_ADDRS);
2932          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2933             bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2934       }
2935       break;
2936 
2937    case TGSI_FILE_SAMPLER_VIEW:
2938       /*
2939        * The target stored here MUST match whatever there actually
2940        * is in the set sampler views (what about return type?).
2941        */
2942       assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2943       for (idx = first; idx <= last; ++idx) {
2944          bld->sv[idx] = decl->SamplerView;
2945       }
2946       break;
2947 
2948    case TGSI_FILE_CONSTANT:
2949    {
2950       /*
2951        * We could trivially fetch the per-buffer pointer when fetching the
2952        * constant, relying on llvm to figure out it's always the same pointer
2953        * anyway. However, doing so results in a huge (more than factor of 10)
2954        * slowdown in llvm compilation times for some (but not all) shaders
2955        * (more specifically, the IR optimization spends way more time in
2956        * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2957        */
2958       unsigned idx2D = decl->Dim.Index2D;
2959       LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2960       assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2961       bld->consts[idx2D] =
2962          lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2963       bld->consts_sizes[idx2D] =
2964          lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2965    }
2966    break;
2967    case TGSI_FILE_BUFFER:
2968    {
2969       unsigned idx = decl->Range.First;
2970       LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2971       assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2972       bld->ssbos[idx] =
2973          lp_build_array_get(gallivm, bld->ssbo_ptr, index);
2974       bld->ssbo_sizes[idx] =
2975          lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
2976 
2977    }
2978    break;
2979    case TGSI_FILE_MEMORY:
2980       break;
2981    default:
2982       /* don't need to declare other vars */
2983       break;
2984    }
2985 }
2986 
2987 
lp_emit_immediate_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_immediate * imm)2988 void lp_emit_immediate_soa(
2989    struct lp_build_tgsi_context *bld_base,
2990    const struct tgsi_full_immediate *imm)
2991 {
2992    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2993    struct gallivm_state * gallivm = bld_base->base.gallivm;
2994    LLVMValueRef imms[4];
2995    unsigned i;
2996    const uint size = imm->Immediate.NrTokens - 1;
2997    assert(size <= 4);
2998    switch (imm->Immediate.DataType) {
2999    case TGSI_IMM_FLOAT32:
3000       for( i = 0; i < size; ++i )
3001          imms[i] =
3002                lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
3003 
3004       break;
3005    case TGSI_IMM_FLOAT64:
3006    case TGSI_IMM_UINT64:
3007    case TGSI_IMM_INT64:
3008    case TGSI_IMM_UINT32:
3009       for( i = 0; i < size; ++i ) {
3010          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3011          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3012       }
3013 
3014       break;
3015    case TGSI_IMM_INT32:
3016       for( i = 0; i < size; ++i ) {
3017          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3018          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3019       }
3020 
3021       break;
3022    }
3023    for( i = size; i < 4; ++i )
3024       imms[i] = bld_base->base.undef;
3025 
3026    if (bld->use_immediates_array) {
3027       unsigned index = bld->num_immediates;
3028       struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3029       LLVMBuilderRef builder = gallivm->builder;
3030       LLVMValueRef gep[2];
3031       gep[0] = lp_build_const_int32(gallivm, 0);
3032 
3033       assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3034       for (i = 0; i < 4; ++i ) {
3035          gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3036          LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3037                                              bld->imms_array, gep, 2, "");
3038          LLVMBuildStore(builder, imms[i], imm_ptr);
3039       }
3040    } else {
3041       /* simply copy the immediate values into the next immediates[] slot */
3042       unsigned i;
3043       assert(imm->Immediate.NrTokens - 1 <= 4);
3044       assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3045 
3046       for(i = 0; i < 4; ++i )
3047          bld->immediates[bld->num_immediates][i] = imms[i];
3048 
3049       if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3050          unsigned index = bld->num_immediates;
3051          struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3052          LLVMBuilderRef builder = gallivm->builder;
3053          LLVMValueRef gep[2];
3054          gep[0] = lp_build_const_int32(gallivm, 0);
3055          for (i = 0; i < 4; ++i ) {
3056             gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3057             LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3058                                                 bld->imms_array, gep, 2, "");
3059             LLVMBuildStore(builder,
3060                            bld->immediates[index][i],
3061                            imm_ptr);
3062          }
3063       }
3064    }
3065 
3066    bld->num_immediates++;
3067 }
3068 
3069 static void
ddx_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3070 ddx_emit(
3071    const struct lp_build_tgsi_action * action,
3072    struct lp_build_tgsi_context * bld_base,
3073    struct lp_build_emit_data * emit_data)
3074 {
3075    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3076 
3077    emit_fetch_deriv(bld, emit_data->args[0], NULL,
3078                     &emit_data->output[emit_data->chan], NULL);
3079 }
3080 
3081 static void
ddy_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3082 ddy_emit(
3083    const struct lp_build_tgsi_action * action,
3084    struct lp_build_tgsi_context * bld_base,
3085    struct lp_build_emit_data * emit_data)
3086 {
3087    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3088 
3089    emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3090                     &emit_data->output[emit_data->chan]);
3091 }
3092 
3093 static void
kill_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3094 kill_emit(
3095    const struct lp_build_tgsi_action * action,
3096    struct lp_build_tgsi_context * bld_base,
3097    struct lp_build_emit_data * emit_data)
3098 {
3099    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3100 
3101    emit_kill(bld, bld_base->pc - 1);
3102 }
3103 
3104 static void
kill_if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3105 kill_if_emit(
3106    const struct lp_build_tgsi_action * action,
3107    struct lp_build_tgsi_context * bld_base,
3108    struct lp_build_emit_data * emit_data)
3109 {
3110    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3111 
3112    emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3113 }
3114 
3115 static void
tex_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3116 tex_emit(
3117    const struct lp_build_tgsi_action * action,
3118    struct lp_build_tgsi_context * bld_base,
3119    struct lp_build_emit_data * emit_data)
3120 {
3121    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3122 
3123    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3124             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3125 }
3126 
3127 static void
tex2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3128 tex2_emit(
3129    const struct lp_build_tgsi_action * action,
3130    struct lp_build_tgsi_context * bld_base,
3131    struct lp_build_emit_data * emit_data)
3132 {
3133    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3134 
3135    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3136             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3137 }
3138 
3139 static void
txb_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3140 txb_emit(
3141    const struct lp_build_tgsi_action * action,
3142    struct lp_build_tgsi_context * bld_base,
3143    struct lp_build_emit_data * emit_data)
3144 {
3145    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3146 
3147    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3148             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3149 }
3150 
3151 static void
txb2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3152 txb2_emit(
3153    const struct lp_build_tgsi_action * action,
3154    struct lp_build_tgsi_context * bld_base,
3155    struct lp_build_emit_data * emit_data)
3156 {
3157    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3158 
3159    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3160             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3161 }
3162 
3163 static void
txd_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3164 txd_emit(
3165    const struct lp_build_tgsi_action * action,
3166    struct lp_build_tgsi_context * bld_base,
3167    struct lp_build_emit_data * emit_data)
3168 {
3169    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3170 
3171    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3172             emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3173 }
3174 
3175 static void
txl_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3176 txl_emit(
3177    const struct lp_build_tgsi_action * action,
3178    struct lp_build_tgsi_context * bld_base,
3179    struct lp_build_emit_data * emit_data)
3180 {
3181    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3182 
3183    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3184             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3185 }
3186 
3187 static void
txl2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3188 txl2_emit(
3189    const struct lp_build_tgsi_action * action,
3190    struct lp_build_tgsi_context * bld_base,
3191    struct lp_build_emit_data * emit_data)
3192 {
3193    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3194 
3195    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3196             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3197 }
3198 
3199 static void
txp_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3200 txp_emit(
3201    const struct lp_build_tgsi_action * action,
3202    struct lp_build_tgsi_context * bld_base,
3203    struct lp_build_emit_data * emit_data)
3204 {
3205    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3206 
3207    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3208             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3209 }
3210 
3211 static void
tg4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3212 tg4_emit(
3213    const struct lp_build_tgsi_action * action,
3214    struct lp_build_tgsi_context * bld_base,
3215    struct lp_build_emit_data * emit_data)
3216 {
3217    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3218 
3219    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3220             emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3221 }
3222 
3223 static void
lodq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3224 lodq_emit(
3225    const struct lp_build_tgsi_action * action,
3226    struct lp_build_tgsi_context * bld_base,
3227    struct lp_build_emit_data * emit_data)
3228 {
3229    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3230 
3231    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3232             emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3233 }
3234 
3235 static void
txq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3236 txq_emit(
3237    const struct lp_build_tgsi_action * action,
3238    struct lp_build_tgsi_context * bld_base,
3239    struct lp_build_emit_data * emit_data)
3240 {
3241    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3242 
3243    emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3244 }
3245 
3246 static void
txf_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3247 txf_emit(
3248    const struct lp_build_tgsi_action * action,
3249    struct lp_build_tgsi_context * bld_base,
3250    struct lp_build_emit_data * emit_data)
3251 {
3252    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3253 
3254    emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3255 }
3256 
3257 static void
sample_i_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3258 sample_i_emit(
3259    const struct lp_build_tgsi_action * action,
3260    struct lp_build_tgsi_context * bld_base,
3261    struct lp_build_emit_data * emit_data)
3262 {
3263    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3264 
3265    emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3266 }
3267 
3268 static void
sample_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3269 sample_emit(
3270    const struct lp_build_tgsi_action * action,
3271    struct lp_build_tgsi_context * bld_base,
3272    struct lp_build_emit_data * emit_data)
3273 {
3274    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3275 
3276    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3277                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3278 }
3279 
3280 static void
sample_b_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3281 sample_b_emit(
3282    const struct lp_build_tgsi_action * action,
3283    struct lp_build_tgsi_context * bld_base,
3284    struct lp_build_emit_data * emit_data)
3285 {
3286    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3287 
3288    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3289                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3290 }
3291 
3292 static void
sample_c_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3293 sample_c_emit(
3294    const struct lp_build_tgsi_action * action,
3295    struct lp_build_tgsi_context * bld_base,
3296    struct lp_build_emit_data * emit_data)
3297 {
3298    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3299 
3300    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3301                TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3302 }
3303 
3304 static void
sample_c_lz_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3305 sample_c_lz_emit(
3306    const struct lp_build_tgsi_action * action,
3307    struct lp_build_tgsi_context * bld_base,
3308    struct lp_build_emit_data * emit_data)
3309 {
3310    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3311 
3312    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3313                TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3314 }
3315 
3316 static void
sample_d_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3317 sample_d_emit(
3318    const struct lp_build_tgsi_action * action,
3319    struct lp_build_tgsi_context * bld_base,
3320    struct lp_build_emit_data * emit_data)
3321 {
3322    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3323 
3324    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3325                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3326 }
3327 
3328 static void
sample_l_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3329 sample_l_emit(
3330    const struct lp_build_tgsi_action * action,
3331    struct lp_build_tgsi_context * bld_base,
3332    struct lp_build_emit_data * emit_data)
3333 {
3334    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3335 
3336    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3337                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3338 }
3339 
3340 static void
gather4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3341 gather4_emit(
3342    const struct lp_build_tgsi_action * action,
3343    struct lp_build_tgsi_context * bld_base,
3344    struct lp_build_emit_data * emit_data)
3345 {
3346    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3347 
3348    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3349                FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3350 }
3351 
3352 static void
sviewinfo_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3353 sviewinfo_emit(
3354    const struct lp_build_tgsi_action * action,
3355    struct lp_build_tgsi_context * bld_base,
3356    struct lp_build_emit_data * emit_data)
3357 {
3358    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3359 
3360    emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3361 }
3362 
3363 static void
lod_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3364 lod_emit(
3365    const struct lp_build_tgsi_action * action,
3366    struct lp_build_tgsi_context * bld_base,
3367    struct lp_build_emit_data * emit_data)
3368 {
3369    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3370 
3371    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3372                FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3373 }
3374 
3375 static void
target_to_dims_layer(enum tgsi_texture_type target,unsigned * dims,unsigned * layer_coord)3376 target_to_dims_layer(enum tgsi_texture_type target,
3377                      unsigned *dims,
3378                      unsigned *layer_coord)
3379 {
3380    *layer_coord = 0;
3381    switch (target) {
3382    case TGSI_TEXTURE_1D:
3383    case TGSI_TEXTURE_BUFFER:
3384       *dims = 1;
3385       break;
3386    case TGSI_TEXTURE_1D_ARRAY:
3387       *layer_coord = 1;
3388       *dims = 1;
3389       break;
3390    case TGSI_TEXTURE_2D:
3391    case TGSI_TEXTURE_RECT:
3392       *dims = 2;
3393       break;
3394    case TGSI_TEXTURE_2D_ARRAY:
3395       *layer_coord = 2;
3396       *dims = 2;
3397       break;
3398    case TGSI_TEXTURE_3D:
3399    case TGSI_TEXTURE_CUBE:
3400    case TGSI_TEXTURE_CUBE_ARRAY:
3401       *dims = 3;
3402       break;
3403    default:
3404       assert(0);
3405       *dims = 0;
3406       return;
3407    }
3408 }
3409 
3410 static void
img_load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3411 img_load_emit(
3412    const struct lp_build_tgsi_action * action,
3413    struct lp_build_tgsi_context * bld_base,
3414    struct lp_build_emit_data * emit_data)
3415 {
3416    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3417    struct lp_img_params params;
3418    LLVMValueRef coords[5];
3419    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3420    unsigned dims;
3421    enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3422    unsigned layer_coord;
3423 
3424    target_to_dims_layer(target, &dims, &layer_coord);
3425 
3426    for (unsigned i = 0; i < dims; i++) {
3427       coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3428    }
3429    for (unsigned i = dims; i < 5; i++) {
3430       coords[i] = coord_undef;
3431    }
3432    if (layer_coord)
3433       coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3434 
3435    memset(&params, 0, sizeof(params));
3436 
3437    params.type = bld->bld_base.base.type;
3438    params.context_ptr = bld->context_ptr;
3439    params.thread_data_ptr = bld->thread_data_ptr;
3440    params.coords = coords;
3441    params.outdata = emit_data->output;
3442    params.target = tgsi_to_pipe_tex_target(target);
3443    params.image_index = emit_data->inst->Src[0].Register.Index;
3444    params.img_op = LP_IMG_LOAD;
3445    bld->image->emit_op(bld->image,
3446                          bld->bld_base.base.gallivm,
3447                          &params);
3448 }
3449 
3450 static void
load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3451 load_emit(
3452    const struct lp_build_tgsi_action * action,
3453    struct lp_build_tgsi_context * bld_base,
3454    struct lp_build_emit_data * emit_data)
3455 {
3456    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3457    struct gallivm_state * gallivm = bld_base->base.gallivm;
3458    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3459    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3460    unsigned buf = bufreg->Register.Index;
3461    assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
3462           bufreg->Register.File == TGSI_FILE_IMAGE ||
3463           bufreg->Register.File == TGSI_FILE_MEMORY ||
3464           bufreg->Register.File == TGSI_FILE_CONSTBUF);
3465    bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3466    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3467 
3468    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3469       img_load_emit(action, bld_base, emit_data);
3470    } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
3471       LLVMValueRef consts_ptr = bld->consts[buf];
3472       LLVMValueRef num_consts = bld->consts_sizes[buf];
3473 
3474       LLVMValueRef indirect_index;
3475       LLVMValueRef overflow_mask;
3476 
3477       indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
3478       indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
3479 
3480       /* All fetches are from the same constant buffer, so
3481        * we need to propagate the size to a vector to do a
3482        * vector comparison */
3483       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
3484 
3485       /* Gather values from the constant buffer */
3486       unsigned chan_index;
3487       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3488          /* Construct a boolean vector telling us which channels
3489           * overflow the bound constant buffer */
3490          overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
3491                                           indirect_index, num_consts);
3492 
3493          /* index_vec = indirect_index * 4 */
3494          LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
3495          index_vec = lp_build_add(uint_bld, index_vec,
3496                                   lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3497 
3498          emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
3499       }
3500    } else if (0) {
3501       /* for indirect support with ARB_gpu_shader5 */
3502    } else {
3503       LLVMValueRef index;
3504       LLVMValueRef scalar, scalar_ptr;
3505       unsigned chan_index;
3506 
3507       index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3508       index = lp_build_shr_imm(uint_bld, index, 2);
3509 
3510       scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3511 
3512       LLVMValueRef ssbo_limit = NULL;
3513 
3514       if (!is_shared) {
3515          ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3516          ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3517       }
3518 
3519       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3520          LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3521 
3522          LLVMValueRef exec_mask = mask_vec(bld_base);
3523          if (!is_shared) {
3524             LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3525             exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3526          }
3527 
3528          LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3529          struct lp_build_loop_state loop_state;
3530          lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3531 
3532          struct lp_build_if_state ifthen;
3533          LLVMValueRef cond, temp_res;
3534 
3535          loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3536                                               loop_state.counter, "");
3537 
3538          cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3539          cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3540 
3541          lp_build_if(&ifthen, gallivm, cond);
3542          scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3543 
3544          temp_res = LLVMBuildLoad(builder, result, "");
3545          temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3546          LLVMBuildStore(builder, temp_res, result);
3547          lp_build_else(&ifthen);
3548          temp_res = LLVMBuildLoad(builder, result, "");
3549          temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3550          LLVMBuildStore(builder, temp_res, result);
3551          lp_build_endif(&ifthen);
3552          lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3553                                 NULL, LLVMIntUGE);
3554          emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3555       }
3556    }
3557 }
3558 
3559 static void
img_store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3560 img_store_emit(
3561    const struct lp_build_tgsi_action * action,
3562    struct lp_build_tgsi_context * bld_base,
3563    struct lp_build_emit_data * emit_data)
3564 {
3565    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3566    struct lp_img_params params;
3567    LLVMValueRef coords[5];
3568    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3569    unsigned dims;
3570    enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3571    unsigned layer_coord;
3572 
3573    target_to_dims_layer(target, &dims, &layer_coord);
3574    for (unsigned i = 0; i < dims; i++) {
3575       coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3576    }
3577    for (unsigned i = dims; i < 5; i++) {
3578       coords[i] = coord_undef;
3579    }
3580    if (layer_coord)
3581       coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3582    memset(&params, 0, sizeof(params));
3583 
3584    params.type = bld->bld_base.base.type;
3585    params.context_ptr = bld->context_ptr;
3586    params.thread_data_ptr = bld->thread_data_ptr;
3587    params.coords = coords;
3588    params.outdata = NULL;
3589    params.exec_mask = mask_vec(bld_base);
3590    params.target = tgsi_to_pipe_tex_target(target);
3591    params.image_index = emit_data->inst->Dst[0].Register.Index;
3592    params.img_op = LP_IMG_STORE;
3593    for (unsigned i = 0; i < 4; i++)
3594       params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3595 
3596    bld->image->emit_op(bld->image,
3597                        bld->bld_base.base.gallivm,
3598                        &params);
3599 }
3600 
3601 static void
store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3602 store_emit(
3603    const struct lp_build_tgsi_action * action,
3604    struct lp_build_tgsi_context * bld_base,
3605    struct lp_build_emit_data * emit_data)
3606 {
3607    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3608    struct gallivm_state * gallivm = bld_base->base.gallivm;
3609    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3610    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3611    const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3612    unsigned buf = bufreg->Register.Index;
3613    assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3614    bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3615 
3616    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3617       img_store_emit(action, bld_base, emit_data);
3618    } else if (0) {
3619 
3620    } else {
3621       LLVMValueRef index;  /* index into the const buffer */
3622       LLVMValueRef scalar_ptr;
3623       LLVMValueRef value;
3624       unsigned chan_index;
3625 
3626       index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3627       index = lp_build_shr_imm(uint_bld, index, 2);
3628 
3629       scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3630 
3631       LLVMValueRef ssbo_limit = NULL;
3632 
3633       if (!is_shared) {
3634          ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3635          ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3636       }
3637 
3638       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3639          LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3640 
3641          value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3642 
3643          LLVMValueRef exec_mask = mask_vec(bld_base);
3644          if (!is_shared) {
3645             LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3646             exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3647          }
3648 
3649          struct lp_build_loop_state loop_state;
3650          lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3651 
3652          LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3653                                                           loop_state.counter, "");
3654          value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3655 
3656          struct lp_build_if_state ifthen;
3657          LLVMValueRef cond;
3658 
3659          loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3660                                               loop_state.counter, "");
3661 
3662          cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3663          cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3664          lp_build_if(&ifthen, gallivm, cond);
3665 
3666          lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3667 
3668          lp_build_endif(&ifthen);
3669          lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3670                                 NULL, LLVMIntUGE);
3671       }
3672    }
3673 }
3674 
3675 static void
resq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3676 resq_emit(
3677    const struct lp_build_tgsi_action * action,
3678    struct lp_build_tgsi_context * bld_base,
3679    struct lp_build_emit_data * emit_data)
3680 {
3681    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3682    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3683    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3684 
3685    unsigned buf = bufreg->Register.Index;
3686    assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3687 
3688    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3689       enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3690       struct lp_sampler_size_query_params params = { 0 };
3691       params.int_type = bld->bld_base.int_bld.type;
3692       params.texture_unit = buf;
3693       params.target = tgsi_to_pipe_tex_target(target);
3694       params.context_ptr = bld->context_ptr;
3695       params.sizes_out = emit_data->output;
3696 
3697       bld->image->emit_size_query(bld->image,
3698                                   bld->bld_base.base.gallivm,
3699                                   &params);
3700    } else {
3701       LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3702 
3703       emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3704    }
3705 }
3706 
3707 static void
img_atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data,LLVMAtomicRMWBinOp op)3708 img_atomic_emit(
3709    const struct lp_build_tgsi_action * action,
3710    struct lp_build_tgsi_context * bld_base,
3711    struct lp_build_emit_data * emit_data,
3712    LLVMAtomicRMWBinOp op)
3713 {
3714    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3715    struct lp_img_params params;
3716    LLVMValueRef coords[5];
3717    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3718    unsigned dims;
3719    unsigned layer_coord;
3720    enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3721 
3722    target_to_dims_layer(target, &dims, &layer_coord);
3723 
3724    for (unsigned i = 0; i < dims; i++) {
3725       coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3726    }
3727    for (unsigned i = dims; i < 5; i++) {
3728       coords[i] = coord_undef;
3729    }
3730    if (layer_coord)
3731       coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3732    memset(&params, 0, sizeof(params));
3733 
3734    params.type = bld->bld_base.base.type;
3735    params.context_ptr = bld->context_ptr;
3736    params.thread_data_ptr = bld->thread_data_ptr;
3737    params.exec_mask = mask_vec(bld_base);
3738    params.image_index = emit_data->inst->Src[0].Register.Index;
3739    params.coords = coords;
3740    params.target = tgsi_to_pipe_tex_target(target);
3741    params.op = op;
3742    params.outdata = emit_data->output;
3743    params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3744 
3745    for (unsigned i = 0; i < 4; i++)
3746       params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3747    if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3748       for (unsigned i = 0; i < 4; i++)
3749          params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3750    }
3751    bld->image->emit_op(bld->image,
3752                        bld->bld_base.base.gallivm,
3753                        &params);
3754 }
3755 
3756 static void
atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3757 atomic_emit(
3758    const struct lp_build_tgsi_action * action,
3759    struct lp_build_tgsi_context * bld_base,
3760    struct lp_build_emit_data * emit_data)
3761 {
3762    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3763    struct gallivm_state * gallivm = bld_base->base.gallivm;
3764    LLVMBuilderRef builder = gallivm->builder;
3765    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3766    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3767 
3768    assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3769    unsigned buf = bufreg->Register.Index;
3770    bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3771 
3772    LLVMAtomicRMWBinOp op = -1;
3773    switch (emit_data->inst->Instruction.Opcode) {
3774    case TGSI_OPCODE_ATOMUADD:
3775       op = LLVMAtomicRMWBinOpAdd;
3776       break;
3777    case TGSI_OPCODE_ATOMXCHG:
3778       op = LLVMAtomicRMWBinOpXchg;
3779       break;
3780    case TGSI_OPCODE_ATOMAND:
3781       op = LLVMAtomicRMWBinOpAnd;
3782       break;
3783    case TGSI_OPCODE_ATOMOR:
3784       op = LLVMAtomicRMWBinOpOr;
3785       break;
3786    case TGSI_OPCODE_ATOMXOR:
3787       op = LLVMAtomicRMWBinOpXor;
3788       break;
3789    case TGSI_OPCODE_ATOMUMIN:
3790       op = LLVMAtomicRMWBinOpUMin;
3791       break;
3792    case TGSI_OPCODE_ATOMUMAX:
3793       op = LLVMAtomicRMWBinOpUMax;
3794       break;
3795    case TGSI_OPCODE_ATOMIMIN:
3796       op = LLVMAtomicRMWBinOpMin;
3797       break;
3798    case TGSI_OPCODE_ATOMIMAX:
3799       op = LLVMAtomicRMWBinOpMax;
3800       break;
3801    case TGSI_OPCODE_ATOMCAS:
3802       break;
3803    default:
3804       assert(0);
3805       return;
3806    }
3807 
3808    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3809       img_atomic_emit(action, bld_base, emit_data, op);
3810    } else if (0) {
3811    } else {
3812       LLVMValueRef index;  /* index into the const buffer */
3813       LLVMValueRef scalar, scalar_ptr;
3814       LLVMValueRef value;
3815 
3816       index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3817       value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3818 
3819       index = lp_build_shr_imm(uint_bld, index, 2);
3820 
3821       if (!is_shared) {
3822          index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3823          scalar_ptr = bld->ssbos[buf];
3824       } else
3825          scalar_ptr = bld->shared_ptr;
3826 
3827       LLVMValueRef atom_res = lp_build_alloca(gallivm,
3828                                               uint_bld->vec_type, "");
3829 
3830       LLVMValueRef ssbo_limit;
3831       if (!is_shared) {
3832          ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3833          ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3834       }
3835 
3836       LLVMValueRef exec_mask = mask_vec(bld_base);
3837 
3838       if (!is_shared) {
3839          LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3840          exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3841       }
3842 
3843       struct lp_build_loop_state loop_state;
3844       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3845 
3846       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3847                                                        loop_state.counter, "");
3848       value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3849 
3850       index = LLVMBuildExtractElement(gallivm->builder, index,
3851                                       loop_state.counter, "");
3852 
3853       scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3854                                 &index, 1, "");
3855 
3856       struct lp_build_if_state ifthen;
3857       LLVMValueRef cond, temp_res;
3858 
3859       cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3860       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3861       lp_build_if(&ifthen, gallivm, cond);
3862 
3863       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3864          LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3865          LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3866                                                             loop_state.counter, "");
3867          cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3868          scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3869                                          cas_src_ptr,
3870                                          LLVMAtomicOrderingSequentiallyConsistent,
3871                                          LLVMAtomicOrderingSequentiallyConsistent,
3872                                          false);
3873          scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3874       } else {
3875          scalar = LLVMBuildAtomicRMW(builder, op,
3876                                      scalar_ptr, value_ptr,
3877                                      LLVMAtomicOrderingSequentiallyConsistent,
3878                                      false);
3879       }
3880       temp_res = LLVMBuildLoad(builder, atom_res, "");
3881       temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3882       LLVMBuildStore(builder, temp_res, atom_res);
3883       lp_build_else(&ifthen);
3884       temp_res = LLVMBuildLoad(builder, atom_res, "");
3885       temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3886       LLVMBuildStore(builder, temp_res, atom_res);
3887       lp_build_endif(&ifthen);
3888 
3889       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3890                              NULL, LLVMIntUGE);
3891       emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3892    }
3893 }
3894 
3895 static void
barrier_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3896 barrier_emit(
3897    const struct lp_build_tgsi_action * action,
3898    struct lp_build_tgsi_context * bld_base,
3899    struct lp_build_emit_data * emit_data)
3900 {
3901    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3902    struct gallivm_state * gallivm = bld_base->base.gallivm;
3903 
3904    LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3905 
3906    lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3907    LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3908 }
3909 
3910 static void
membar_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3911 membar_emit(
3912    const struct lp_build_tgsi_action * action,
3913    struct lp_build_tgsi_context * bld_base,
3914    struct lp_build_emit_data * emit_data)
3915 {
3916    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3917    LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3918 }
3919 
3920 static void
increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3921 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3922                           LLVMValueRef ptr,
3923                           LLVMValueRef mask)
3924 {
3925    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3926    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3927 
3928    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3929 
3930    LLVMBuildStore(builder, current_vec, ptr);
3931 }
3932 
3933 static void
clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3934 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3935                              LLVMValueRef ptr,
3936                              LLVMValueRef mask)
3937 {
3938    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3939    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3940 
3941    current_vec = lp_build_select(&bld_base->uint_bld,
3942                                  mask,
3943                                  bld_base->uint_bld.zero,
3944                                  current_vec);
3945 
3946    LLVMBuildStore(builder, current_vec, ptr);
3947 }
3948 
3949 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)3950 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3951                                   LLVMValueRef current_mask_vec,
3952                                   LLVMValueRef total_emitted_vertices_vec)
3953 {
3954    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3955    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3956    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3957                                         total_emitted_vertices_vec,
3958                                         bld->max_output_vertices_vec);
3959 
3960    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3961 }
3962 
3963 static void
emit_vertex(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3964 emit_vertex(
3965    const struct lp_build_tgsi_action * action,
3966    struct lp_build_tgsi_context * bld_base,
3967    struct lp_build_emit_data * emit_data)
3968 {
3969    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3970    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3971 
3972    if (bld->gs_iface->emit_vertex) {
3973       LLVMValueRef stream_id = emit_fetch_immediate(bld_base, &emit_data->inst->Src[0],
3974                                                     TGSI_TYPE_UNSIGNED,
3975                                                     emit_data->inst->Src[0].Register.SwizzleX);
3976       LLVMValueRef mask = mask_vec(bld_base);
3977       LLVMValueRef total_emitted_vertices_vec =
3978          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3979 
3980       mask = clamp_mask_to_max_output_vertices(bld, mask,
3981                                                total_emitted_vertices_vec);
3982       gather_outputs(bld);
3983       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
3984                                  bld->outputs,
3985                                  total_emitted_vertices_vec,
3986                                  mask,
3987                                  stream_id);
3988       increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3989                                 mask);
3990       increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3991                                 mask);
3992 #if DUMP_GS_EMITS
3993       lp_build_print_value(bld->bld_base.base.gallivm,
3994                            " +++ emit vertex masked ones = ",
3995                            mask);
3996       lp_build_print_value(bld->bld_base.base.gallivm,
3997                            " +++ emit vertex emitted = ",
3998                            total_emitted_vertices_vec);
3999 #endif
4000    }
4001 }
4002 
4003 
4004 static void
end_primitive_masked(struct lp_build_tgsi_context * bld_base,LLVMValueRef mask)4005 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
4006                      LLVMValueRef mask)
4007 {
4008    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4009    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
4010 
4011    if (bld->gs_iface->end_primitive) {
4012       struct lp_build_context *uint_bld = &bld_base->uint_bld;
4013       LLVMValueRef emitted_vertices_vec =
4014          LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
4015       LLVMValueRef emitted_prims_vec =
4016          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4017       LLVMValueRef total_emitted_vertices_vec =
4018          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4019       LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4020                                                emitted_vertices_vec,
4021                                                uint_bld->zero);
4022       /* We need to combine the current execution mask with the mask
4023          telling us which, if any, execution slots actually have
4024          unemitted primitives, this way we make sure that end_primitives
4025          executes only on the paths that have unflushed vertices */
4026       mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
4027 
4028       bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
4029                                    total_emitted_vertices_vec,
4030                                    emitted_vertices_vec,
4031                                    emitted_prims_vec,
4032                                    mask_vec(bld_base), 0);
4033 
4034 #if DUMP_GS_EMITS
4035       lp_build_print_value(bld->bld_base.base.gallivm,
4036                            " +++ end prim masked ones = ",
4037                            mask);
4038       lp_build_print_value(bld->bld_base.base.gallivm,
4039                            " +++ end prim emitted verts1 = ",
4040                            emitted_vertices_vec);
4041       lp_build_print_value(bld->bld_base.base.gallivm,
4042                            " +++ end prim emitted prims1 = ",
4043                            LLVMBuildLoad(builder,
4044                                          bld->emitted_prims_vec_ptr, ""));
4045 #endif
4046       increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
4047                                 mask);
4048       clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
4049                                    mask);
4050 #if DUMP_GS_EMITS
4051       lp_build_print_value(bld->bld_base.base.gallivm,
4052                            " +++ end prim emitted verts2 = ",
4053                            LLVMBuildLoad(builder,
4054                                          bld->emitted_vertices_vec_ptr, ""));
4055 #endif
4056    }
4057 
4058 }
4059 
4060 static void
end_primitive(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4061 end_primitive(
4062    const struct lp_build_tgsi_action * action,
4063    struct lp_build_tgsi_context * bld_base,
4064    struct lp_build_emit_data * emit_data)
4065 {
4066    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4067 
4068    if (bld->gs_iface->end_primitive) {
4069       LLVMValueRef mask = mask_vec(bld_base);
4070       end_primitive_masked(bld_base, mask);
4071    }
4072 }
4073 
4074 static void
barrier_emit_tcs(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4075 barrier_emit_tcs(
4076    const struct lp_build_tgsi_action * action,
4077    struct lp_build_tgsi_context * bld_base,
4078    struct lp_build_emit_data * emit_data)
4079 {
4080    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4081 
4082    if (bld->tcs_iface->emit_barrier) {
4083       bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base);
4084    }
4085 }
4086 
4087 
4088 static void
cal_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4089 cal_emit(
4090    const struct lp_build_tgsi_action * action,
4091    struct lp_build_tgsi_context * bld_base,
4092    struct lp_build_emit_data * emit_data)
4093 {
4094    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4095 
4096    lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
4097                      &bld_base->pc);
4098 }
4099 
4100 static void
ret_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4101 ret_emit(
4102    const struct lp_build_tgsi_action * action,
4103    struct lp_build_tgsi_context * bld_base,
4104    struct lp_build_emit_data * emit_data)
4105 {
4106    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4107 
4108    lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
4109 }
4110 
4111 static void
brk_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4112 brk_emit(
4113    const struct lp_build_tgsi_action * action,
4114    struct lp_build_tgsi_context * bld_base,
4115    struct lp_build_emit_data * emit_data)
4116 {
4117    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4118 
4119    lp_exec_tgsi_break(&bld->exec_mask, bld_base);
4120 }
4121 
4122 static void
if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4123 if_emit(
4124    const struct lp_build_tgsi_action * action,
4125    struct lp_build_tgsi_context * bld_base,
4126    struct lp_build_emit_data * emit_data)
4127 {
4128    LLVMValueRef tmp;
4129    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4130 
4131    tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
4132                       emit_data->args[0], bld->bld_base.base.zero);
4133    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4134 }
4135 
4136 static void
uif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4137 uif_emit(
4138    const struct lp_build_tgsi_action * action,
4139    struct lp_build_tgsi_context * bld_base,
4140    struct lp_build_emit_data * emit_data)
4141 {
4142    LLVMValueRef tmp;
4143    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4144    struct lp_build_context *uint_bld = &bld_base->uint_bld;
4145 
4146    tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4147                       emit_data->args[0], uint_bld->zero);
4148    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4149 }
4150 
4151 static void
case_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4152 case_emit(
4153    const struct lp_build_tgsi_action * action,
4154    struct lp_build_tgsi_context * bld_base,
4155    struct lp_build_emit_data * emit_data)
4156 {
4157    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4158 
4159    lp_exec_case(&bld->exec_mask, emit_data->args[0]);
4160 }
4161 
4162 static void
default_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4163 default_emit(
4164    const struct lp_build_tgsi_action * action,
4165    struct lp_build_tgsi_context * bld_base,
4166    struct lp_build_emit_data * emit_data)
4167 {
4168    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4169 
4170    lp_exec_default(&bld->exec_mask, bld_base);
4171 }
4172 
4173 static void
switch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4174 switch_emit(
4175    const struct lp_build_tgsi_action * action,
4176    struct lp_build_tgsi_context * bld_base,
4177    struct lp_build_emit_data * emit_data)
4178 {
4179    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4180 
4181    lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
4182 }
4183 
4184 static void
endswitch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4185 endswitch_emit(
4186    const struct lp_build_tgsi_action * action,
4187    struct lp_build_tgsi_context * bld_base,
4188    struct lp_build_emit_data * emit_data)
4189 {
4190    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4191 
4192    lp_exec_endswitch(&bld->exec_mask, bld_base);
4193 }
4194 
4195 static void
bgnloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4196 bgnloop_emit(
4197    const struct lp_build_tgsi_action * action,
4198    struct lp_build_tgsi_context * bld_base,
4199    struct lp_build_emit_data * emit_data)
4200 {
4201    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4202 
4203    lp_exec_bgnloop(&bld->exec_mask, true);
4204 }
4205 
4206 static void
bgnsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4207 bgnsub_emit(
4208    const struct lp_build_tgsi_action * action,
4209    struct lp_build_tgsi_context * bld_base,
4210    struct lp_build_emit_data * emit_data)
4211 {
4212    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4213 
4214    lp_exec_mask_bgnsub(&bld->exec_mask);
4215 }
4216 
4217 static void
else_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4218 else_emit(
4219    const struct lp_build_tgsi_action * action,
4220    struct lp_build_tgsi_context * bld_base,
4221    struct lp_build_emit_data * emit_data)
4222 {
4223    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4224 
4225    lp_exec_mask_cond_invert(&bld->exec_mask);
4226 }
4227 
4228 static void
endif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4229 endif_emit(
4230    const struct lp_build_tgsi_action * action,
4231    struct lp_build_tgsi_context * bld_base,
4232    struct lp_build_emit_data * emit_data)
4233 {
4234    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4235 
4236    lp_exec_mask_cond_pop(&bld->exec_mask);
4237 }
4238 
4239 static void
endloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4240 endloop_emit(
4241    const struct lp_build_tgsi_action * action,
4242    struct lp_build_tgsi_context * bld_base,
4243    struct lp_build_emit_data * emit_data)
4244 {
4245    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4246 
4247    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
4248 }
4249 
4250 static void
endsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4251 endsub_emit(
4252    const struct lp_build_tgsi_action * action,
4253    struct lp_build_tgsi_context * bld_base,
4254    struct lp_build_emit_data * emit_data)
4255 {
4256    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4257 
4258    lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
4259 }
4260 
4261 static void
cont_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4262 cont_emit(
4263    const struct lp_build_tgsi_action * action,
4264    struct lp_build_tgsi_context * bld_base,
4265    struct lp_build_emit_data * emit_data)
4266 {
4267    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4268 
4269    lp_exec_continue(&bld->exec_mask);
4270 }
4271 
emit_prologue(struct lp_build_tgsi_context * bld_base)4272 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
4273 {
4274    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4275    struct gallivm_state * gallivm = bld_base->base.gallivm;
4276 
4277    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4278       unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4279       bld->temps_array = lp_build_alloca_undef(gallivm,
4280                                                LLVMArrayType(bld_base->base.vec_type, array_size),
4281                                                "temp_array");
4282    }
4283 
4284    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4285       LLVMValueRef array_size =
4286          lp_build_const_int32(gallivm,
4287                             bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4288       bld->outputs_array = lp_build_array_alloca(gallivm,
4289                                                 bld_base->base.vec_type, array_size,
4290                                                 "output_array");
4291    }
4292 
4293    if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4294       unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4295       bld->imms_array = lp_build_alloca_undef(gallivm,
4296                                               LLVMArrayType(bld_base->base.vec_type, array_size),
4297                                               "imms_array");
4298    }
4299 
4300    /* If we have indirect addressing in inputs we need to copy them into
4301     * our alloca array to be able to iterate over them */
4302    if (bld->indirect_files & (1 << TGSI_FILE_INPUT) &&
4303        !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) {
4304       unsigned index, chan;
4305       LLVMTypeRef vec_type = bld_base->base.vec_type;
4306       LLVMValueRef array_size = lp_build_const_int32(gallivm,
4307             bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4308       bld->inputs_array = lp_build_array_alloca(gallivm,
4309                                                vec_type, array_size,
4310                                                "input_array");
4311 
4312       assert(bld_base->info->num_inputs
4313                         <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4314 
4315       for (index = 0; index < bld_base->info->num_inputs; ++index) {
4316          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4317             LLVMValueRef lindex =
4318                lp_build_const_int32(gallivm, index * 4 + chan);
4319             LLVMValueRef input_ptr =
4320                LLVMBuildGEP(gallivm->builder, bld->inputs_array,
4321                             &lindex, 1, "");
4322             LLVMValueRef value = bld->inputs[index][chan];
4323             if (value)
4324                LLVMBuildStore(gallivm->builder, value, input_ptr);
4325          }
4326       }
4327    }
4328 
4329    if (bld->gs_iface) {
4330       struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4331       bld->emitted_prims_vec_ptr =
4332          lp_build_alloca(gallivm,
4333                          uint_bld->vec_type,
4334                          "emitted_prims_ptr");
4335       bld->emitted_vertices_vec_ptr =
4336          lp_build_alloca(gallivm,
4337                          uint_bld->vec_type,
4338                          "emitted_vertices_ptr");
4339       bld->total_emitted_vertices_vec_ptr =
4340          lp_build_alloca(gallivm,
4341                          uint_bld->vec_type,
4342                          "total_emitted_vertices_ptr");
4343 
4344       LLVMBuildStore(gallivm->builder, uint_bld->zero,
4345                      bld->emitted_prims_vec_ptr);
4346       LLVMBuildStore(gallivm->builder, uint_bld->zero,
4347                      bld->emitted_vertices_vec_ptr);
4348       LLVMBuildStore(gallivm->builder, uint_bld->zero,
4349                      bld->total_emitted_vertices_vec_ptr);
4350    }
4351 
4352    if (DEBUG_EXECUTION) {
4353       lp_build_printf(gallivm, "\n");
4354       emit_dump_file(bld, TGSI_FILE_CONSTANT);
4355       if (!bld->gs_iface)
4356          emit_dump_file(bld, TGSI_FILE_INPUT);
4357    }
4358 }
4359 
emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)4360 static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)
4361 {
4362    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4363 
4364    if (bld->tcs_iface && bld->tcs_iface->emit_prologue) {
4365       bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base);
4366    }
4367 }
4368 
emit_epilogue(struct lp_build_tgsi_context * bld_base)4369 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4370 {
4371    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4372    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4373 
4374    if (DEBUG_EXECUTION) {
4375       /* for debugging */
4376       if (0) {
4377          emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4378       }
4379       emit_dump_file(bld, TGSI_FILE_OUTPUT);
4380       lp_build_printf(bld_base->base.gallivm, "\n");
4381    }
4382 
4383    if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) {
4384       bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base);
4385    }
4386 
4387    /* If we have indirect addressing in outputs we need to copy our alloca array
4388     * to the outputs slots specified by the caller */
4389    if (bld->gs_iface) {
4390       LLVMValueRef total_emitted_vertices_vec;
4391       LLVMValueRef emitted_prims_vec;
4392       /* implicit end_primitives, needed in case there are any unflushed
4393          vertices in the cache. Note must not call end_primitive here
4394          since the exec_mask is not valid at this point. */
4395       end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4396 
4397       total_emitted_vertices_vec =
4398          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4399       emitted_prims_vec =
4400          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4401 
4402       bld->gs_iface->gs_epilogue(bld->gs_iface,
4403                                  total_emitted_vertices_vec,
4404                                  emitted_prims_vec, 0);
4405    } else {
4406       gather_outputs(bld);
4407    }
4408 }
4409 
4410 void
lp_build_tgsi_soa(struct gallivm_state * gallivm,const struct tgsi_token * tokens,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS])4411 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4412                   const struct tgsi_token *tokens,
4413                   const struct lp_build_tgsi_params *params,
4414                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4415 {
4416    struct lp_build_tgsi_soa_context bld;
4417    struct lp_type type = params->type;
4418    struct lp_type res_type;
4419 
4420    assert(type.length <= LP_MAX_VECTOR_LENGTH);
4421    memset(&res_type, 0, sizeof res_type);
4422    res_type.width = type.width;
4423    res_type.length = type.length;
4424    res_type.sign = 1;
4425 
4426    /* Setup build context */
4427    memset(&bld, 0, sizeof bld);
4428    lp_build_context_init(&bld.bld_base.base, gallivm, type);
4429    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4430    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4431    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4432    {
4433       struct lp_type dbl_type;
4434       dbl_type = type;
4435       dbl_type.width *= 2;
4436       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4437    }
4438    {
4439       struct lp_type uint64_type;
4440       uint64_type = lp_uint_type(type);
4441       uint64_type.width *= 2;
4442       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4443    }
4444    {
4445       struct lp_type int64_type;
4446       int64_type = lp_int_type(type);
4447       int64_type.width *= 2;
4448       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4449    }
4450    bld.mask = params->mask;
4451    bld.inputs = params->inputs;
4452    bld.outputs = outputs;
4453    bld.consts_ptr = params->consts_ptr;
4454    bld.const_sizes_ptr = params->const_sizes_ptr;
4455    bld.ssbo_ptr = params->ssbo_ptr;
4456    bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
4457    bld.sampler = params->sampler;
4458    bld.bld_base.info = params->info;
4459    bld.indirect_files = params->info->indirect_files;
4460    bld.context_ptr = params->context_ptr;
4461    bld.thread_data_ptr = params->thread_data_ptr;
4462    bld.image = params->image;
4463    bld.shared_ptr = params->shared_ptr;
4464    bld.coro = params->coro;
4465 
4466    /*
4467     * If the number of temporaries is rather large then we just
4468     * allocate them as an array right from the start and treat
4469     * like indirect temporaries.
4470     */
4471    if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4472       bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4473    }
4474    /*
4475     * For performance reason immediates are always backed in a static
4476     * array, but if their number is too great, we have to use just
4477     * a dynamically allocated array.
4478     */
4479    bld.use_immediates_array =
4480          (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4481    if (bld.use_immediates_array) {
4482       bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4483    }
4484 
4485 
4486    bld.bld_base.soa = TRUE;
4487    bld.bld_base.emit_debug = emit_debug;
4488    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4489    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4490    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4491    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4492    bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4493 
4494    bld.bld_base.emit_store = emit_store;
4495    bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output;
4496    bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp;
4497    bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address;
4498 
4499    bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4500    bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4501 
4502    bld.bld_base.emit_prologue = emit_prologue;
4503    bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl;
4504    bld.bld_base.emit_epilogue = emit_epilogue;
4505 
4506    /* Set opcode actions */
4507    lp_set_default_actions_cpu(&bld.bld_base);
4508 
4509    bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4510    bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4511    bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4512    bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4513    bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4514    bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4515    bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4516    bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4517    bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4518    bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4519    bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4520    bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4521    bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4522    bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4523    bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4524    bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4525    bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4526    bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4527    bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4528    bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4529    bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4530    bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4531    bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4532    bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4533    bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
4534    bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4535    bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4536    bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4537    bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
4538    bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4539    bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4540    bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4541    bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4542    bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4543    /* DX10 sampling ops */
4544    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4545    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4546    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4547    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4548    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4549    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4550    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4551    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4552    bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4553    bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4554    bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4555 
4556    bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4557    bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4558    bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4559 
4560    bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4561    bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4562    bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4563    bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4564    bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4565    bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4566    bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4567    bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4568    bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4569    bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4570 
4571    bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4572    bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4573 
4574    if (params->gs_iface) {
4575       /* There's no specific value for this because it should always
4576        * be set, but apps using ext_geometry_shader4 quite often
4577        * were forgetting so we're using MAX_VERTEX_VARYING from
4578        * that spec even though we could assert if it's not
4579        * set, but that's a lot uglier. */
4580       uint max_output_vertices;
4581 
4582       /* inputs are always indirect with gs */
4583       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4584       bld.gs_iface = params->gs_iface;
4585       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4586       bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4587       bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4588 
4589       max_output_vertices =
4590          params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4591       if (!max_output_vertices)
4592          max_output_vertices = 32;
4593 
4594       bld.max_output_vertices_vec =
4595          lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4596                                 max_output_vertices);
4597    }
4598 
4599    if (params->tes_iface) {
4600       /* inputs are always indirect with tes */
4601       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4602       bld.tes_iface = params->tes_iface;
4603       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input;
4604    }
4605 
4606    if (params->tcs_iface) {
4607       bld.tcs_iface = params->tcs_iface;
4608       /* outputs and inputs are always indirect with tcs */
4609       bld.indirect_files |= (1 << TGSI_FILE_OUTPUT);
4610       bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output;
4611       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4612       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input;
4613       bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input;
4614       bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs;
4615    }
4616 
4617    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4618 
4619    bld.system_values = *params->system_values;
4620 
4621    lp_build_tgsi_llvm(&bld.bld_base, tokens);
4622 
4623    if (0) {
4624       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4625       LLVMValueRef function = LLVMGetBasicBlockParent(block);
4626       debug_printf("11111111111111111111111111111 \n");
4627       tgsi_dump(tokens, 0);
4628       lp_debug_dump_value(function);
4629       debug_printf("2222222222222222222222222222 \n");
4630    }
4631 
4632    if (0) {
4633       LLVMModuleRef module = LLVMGetGlobalParent(
4634          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4635       LLVMDumpModule(module);
4636 
4637    }
4638    lp_exec_mask_fini(&bld.exec_mask);
4639 }
4640