• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * Copyright 2007-2008 VMware, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * @file
31  * TGSI to LLVM IR translation -- SoA.
32  *
33  * @author Jose Fonseca <jfonseca@vmware.com>
34  *
35  * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36  * Brian Paul, and others.
37  */
38 
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "tgsi/tgsi_strings.h"
51 #include "lp_bld_tgsi_action.h"
52 #include "lp_bld_type.h"
53 #include "lp_bld_const.h"
54 #include "lp_bld_arit.h"
55 #include "lp_bld_bitarit.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_init.h"
58 #include "lp_bld_logic.h"
59 #include "lp_bld_swizzle.h"
60 #include "lp_bld_flow.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_tgsi.h"
63 #include "lp_bld_limits.h"
64 #include "lp_bld_debug.h"
65 #include "lp_bld_printf.h"
66 #include "lp_bld_sample.h"
67 #include "lp_bld_struct.h"
68 
69 /* SM 4.0 says that subroutines can nest 32 deep and
70  * we need one more for our main function */
71 #define LP_MAX_NUM_FUNCS 33
72 
73 #define DUMP_GS_EMITS 0
74 
75 /*
76  * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
77  * instruction.
78  *
79  * TODO:
80  * - take execution masks in consideration
81  * - debug control-flow instructions
82  */
83 #define DEBUG_EXECUTION 0
84 
85 
86 /*
87  * Emit code to print a register value.
88  */
89 static void
emit_dump_reg(struct gallivm_state * gallivm,unsigned file,unsigned index,unsigned chan,LLVMValueRef value)90 emit_dump_reg(struct gallivm_state *gallivm,
91               unsigned file,
92               unsigned index,
93               unsigned chan,
94               LLVMValueRef value)
95 {
96    char buf[32];
97 
98    util_snprintf(buf, sizeof buf, "    %s[%u].%c = ",
99                  tgsi_file_name(file),
100                  index, "xyzw"[chan]);
101 
102    lp_build_print_value(gallivm, buf, value);
103 }
104 
105 /*
106  * Return the context for the current function.
107  * (always 'main', if shader doesn't do any function calls)
108  */
109 static inline struct function_ctx *
func_ctx(struct lp_exec_mask * mask)110 func_ctx(struct lp_exec_mask *mask)
111 {
112    assert(mask->function_stack_size > 0);
113    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
114    return &mask->function_stack[mask->function_stack_size - 1];
115 }
116 
117 /*
118  * Returns true if we're in a loop.
119  * It's global, meaning that it returns true even if there's
120  * no loop inside the current function, but we were inside
121  * a loop inside another function, from which this one was called.
122  */
123 static inline boolean
mask_has_loop(struct lp_exec_mask * mask)124 mask_has_loop(struct lp_exec_mask *mask)
125 {
126    int i;
127    for (i = mask->function_stack_size - 1; i >= 0; --i) {
128       const struct function_ctx *ctx = &mask->function_stack[i];
129       if (ctx->loop_stack_size > 0)
130          return TRUE;
131    }
132    return FALSE;
133 }
134 
135 /*
136  * Returns true if we're inside a switch statement.
137  * It's global, meaning that it returns true even if there's
138  * no switch in the current function, but we were inside
139  * a switch inside another function, from which this one was called.
140  */
141 static inline boolean
mask_has_switch(struct lp_exec_mask * mask)142 mask_has_switch(struct lp_exec_mask *mask)
143 {
144    int i;
145    for (i = mask->function_stack_size - 1; i >= 0; --i) {
146       const struct function_ctx *ctx = &mask->function_stack[i];
147       if (ctx->switch_stack_size > 0)
148          return TRUE;
149    }
150    return FALSE;
151 }
152 
153 /*
154  * Returns true if we're inside a conditional.
155  * It's global, meaning that it returns true even if there's
156  * no conditional in the current function, but we were inside
157  * a conditional inside another function, from which this one was called.
158  */
159 static inline boolean
mask_has_cond(struct lp_exec_mask * mask)160 mask_has_cond(struct lp_exec_mask *mask)
161 {
162    int i;
163    for (i = mask->function_stack_size - 1; i >= 0; --i) {
164       const struct function_ctx *ctx = &mask->function_stack[i];
165       if (ctx->cond_stack_size > 0)
166          return TRUE;
167    }
168    return FALSE;
169 }
170 
171 
172 /*
173  * Initialize a function context at the specified index.
174  */
175 static void
lp_exec_mask_function_init(struct lp_exec_mask * mask,int function_idx)176 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
177 {
178    LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
179    LLVMBuilderRef builder = mask->bld->gallivm->builder;
180    struct function_ctx *ctx =  &mask->function_stack[function_idx];
181 
182    ctx->cond_stack_size = 0;
183    ctx->loop_stack_size = 0;
184    ctx->switch_stack_size = 0;
185 
186    if (function_idx == 0) {
187       ctx->ret_mask = mask->ret_mask;
188    }
189 
190    ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
191                                        int_type, "looplimiter");
192    LLVMBuildStore(
193       builder,
194       LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
195       ctx->loop_limiter);
196 }
197 
lp_exec_mask_init(struct lp_exec_mask * mask,struct lp_build_context * bld)198 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
199 {
200    mask->bld = bld;
201    mask->has_mask = FALSE;
202    mask->ret_in_main = FALSE;
203    /* For the main function */
204    mask->function_stack_size = 1;
205 
206    mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
207    mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
208          mask->cond_mask = mask->switch_mask =
209          LLVMConstAllOnes(mask->int_vec_type);
210 
211    mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
212                                  sizeof(mask->function_stack[0]));
213    lp_exec_mask_function_init(mask, 0);
214 }
215 
216 static void
lp_exec_mask_fini(struct lp_exec_mask * mask)217 lp_exec_mask_fini(struct lp_exec_mask *mask)
218 {
219    FREE(mask->function_stack);
220 }
221 
lp_exec_mask_update(struct lp_exec_mask * mask)222 static void lp_exec_mask_update(struct lp_exec_mask *mask)
223 {
224    LLVMBuilderRef builder = mask->bld->gallivm->builder;
225    boolean has_loop_mask = mask_has_loop(mask);
226    boolean has_cond_mask = mask_has_cond(mask);
227    boolean has_switch_mask = mask_has_switch(mask);
228    boolean has_ret_mask = mask->function_stack_size > 1 ||
229          mask->ret_in_main;
230 
231    if (has_loop_mask) {
232       /*for loops we need to update the entire mask at runtime */
233       LLVMValueRef tmp;
234       assert(mask->break_mask);
235       tmp = LLVMBuildAnd(builder,
236                          mask->cont_mask,
237                          mask->break_mask,
238                          "maskcb");
239       mask->exec_mask = LLVMBuildAnd(builder,
240                                      mask->cond_mask,
241                                      tmp,
242                                      "maskfull");
243    } else
244       mask->exec_mask = mask->cond_mask;
245 
246    if (has_switch_mask) {
247       mask->exec_mask = LLVMBuildAnd(builder,
248                                      mask->exec_mask,
249                                      mask->switch_mask,
250                                      "switchmask");
251    }
252 
253    if (has_ret_mask) {
254       mask->exec_mask = LLVMBuildAnd(builder,
255                                      mask->exec_mask,
256                                      mask->ret_mask,
257                                      "callmask");
258    }
259 
260    mask->has_mask = (has_cond_mask ||
261                      has_loop_mask ||
262                      has_switch_mask ||
263                      has_ret_mask);
264 }
265 
lp_exec_mask_cond_push(struct lp_exec_mask * mask,LLVMValueRef val)266 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
267                                    LLVMValueRef val)
268 {
269    LLVMBuilderRef builder = mask->bld->gallivm->builder;
270    struct function_ctx *ctx = func_ctx(mask);
271 
272    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
273       ctx->cond_stack_size++;
274       return;
275    }
276    if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
277       assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
278    }
279    ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
280    assert(LLVMTypeOf(val) == mask->int_vec_type);
281    mask->cond_mask = LLVMBuildAnd(builder,
282                                   mask->cond_mask,
283                                   val,
284                                   "");
285    lp_exec_mask_update(mask);
286 }
287 
lp_exec_mask_cond_invert(struct lp_exec_mask * mask)288 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
289 {
290    LLVMBuilderRef builder = mask->bld->gallivm->builder;
291    struct function_ctx *ctx = func_ctx(mask);
292    LLVMValueRef prev_mask;
293    LLVMValueRef inv_mask;
294 
295    assert(ctx->cond_stack_size);
296    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
297       return;
298    prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
299    if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
300       assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
301    }
302 
303    inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
304 
305    mask->cond_mask = LLVMBuildAnd(builder,
306                                   inv_mask,
307                                   prev_mask, "");
308    lp_exec_mask_update(mask);
309 }
310 
lp_exec_mask_cond_pop(struct lp_exec_mask * mask)311 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
312 {
313    struct function_ctx *ctx = func_ctx(mask);
314    assert(ctx->cond_stack_size);
315    --ctx->cond_stack_size;
316    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
317       return;
318    mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
319    lp_exec_mask_update(mask);
320 }
321 
lp_exec_bgnloop(struct lp_exec_mask * mask)322 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
323 {
324    LLVMBuilderRef builder = mask->bld->gallivm->builder;
325    struct function_ctx *ctx = func_ctx(mask);
326 
327    if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
328       ++ctx->loop_stack_size;
329       return;
330    }
331 
332    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
333       ctx->break_type;
334    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
335 
336    ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
337    ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
338    ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
339    ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
340    ++ctx->loop_stack_size;
341 
342    ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
343    LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
344 
345    ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
346 
347    LLVMBuildBr(builder, ctx->loop_block);
348    LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
349 
350    mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
351 
352    lp_exec_mask_update(mask);
353 }
354 
lp_exec_break(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)355 static void lp_exec_break(struct lp_exec_mask *mask,
356                           struct lp_build_tgsi_context * bld_base)
357 {
358    LLVMBuilderRef builder = mask->bld->gallivm->builder;
359    struct function_ctx *ctx = func_ctx(mask);
360 
361    if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
362       LLVMValueRef exec_mask = LLVMBuildNot(builder,
363                                             mask->exec_mask,
364                                             "break");
365 
366       mask->break_mask = LLVMBuildAnd(builder,
367                                       mask->break_mask,
368                                       exec_mask, "break_full");
369    }
370    else {
371       unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
372       boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
373                               opcode == TGSI_OPCODE_CASE);
374 
375 
376       if (ctx->switch_in_default) {
377          /*
378           * stop default execution but only if this is an unconditional switch.
379           * (The condition here is not perfect since dead code after break is
380           * allowed but should be sufficient since false negatives are just
381           * unoptimized - so we don't have to pre-evaluate that).
382           */
383          if(break_always && ctx->switch_pc) {
384             bld_base->pc = ctx->switch_pc;
385             return;
386          }
387       }
388 
389       if (break_always) {
390          mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
391       }
392       else {
393          LLVMValueRef exec_mask = LLVMBuildNot(builder,
394                                                mask->exec_mask,
395                                                "break");
396          mask->switch_mask = LLVMBuildAnd(builder,
397                                           mask->switch_mask,
398                                           exec_mask, "break_switch");
399       }
400    }
401 
402    lp_exec_mask_update(mask);
403 }
404 
lp_exec_break_condition(struct lp_exec_mask * mask,LLVMValueRef cond)405 static void lp_exec_break_condition(struct lp_exec_mask *mask,
406                                     LLVMValueRef cond)
407 {
408    LLVMBuilderRef builder = mask->bld->gallivm->builder;
409    struct function_ctx *ctx = func_ctx(mask);
410    LLVMValueRef cond_mask = LLVMBuildAnd(builder,
411                                          mask->exec_mask,
412                                          cond, "cond_mask");
413    cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
414 
415    if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
416       mask->break_mask = LLVMBuildAnd(builder,
417                                       mask->break_mask,
418                                       cond_mask, "breakc_full");
419    }
420    else {
421       mask->switch_mask = LLVMBuildAnd(builder,
422                                        mask->switch_mask,
423                                        cond_mask, "breakc_switch");
424    }
425 
426    lp_exec_mask_update(mask);
427 }
428 
lp_exec_continue(struct lp_exec_mask * mask)429 static void lp_exec_continue(struct lp_exec_mask *mask)
430 {
431    LLVMBuilderRef builder = mask->bld->gallivm->builder;
432    LLVMValueRef exec_mask = LLVMBuildNot(builder,
433                                          mask->exec_mask,
434                                          "");
435 
436    mask->cont_mask = LLVMBuildAnd(builder,
437                                   mask->cont_mask,
438                                   exec_mask, "");
439 
440    lp_exec_mask_update(mask);
441 }
442 
443 
lp_exec_endloop(struct gallivm_state * gallivm,struct lp_exec_mask * mask)444 static void lp_exec_endloop(struct gallivm_state *gallivm,
445                             struct lp_exec_mask *mask)
446 {
447    LLVMBuilderRef builder = mask->bld->gallivm->builder;
448    struct function_ctx *ctx = func_ctx(mask);
449    LLVMBasicBlockRef endloop;
450    LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
451    LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
452                                                mask->bld->type.width *
453                                                mask->bld->type.length);
454    LLVMValueRef i1cond, i2cond, icond, limiter;
455 
456    assert(mask->break_mask);
457 
458 
459    assert(ctx->loop_stack_size);
460    if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
461       --ctx->loop_stack_size;
462       return;
463    }
464 
465    /*
466     * Restore the cont_mask, but don't pop
467     */
468    mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
469    lp_exec_mask_update(mask);
470 
471    /*
472     * Unlike the continue mask, the break_mask must be preserved across loop
473     * iterations
474     */
475    LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
476 
477    /* Decrement the loop limiter */
478    limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
479 
480    limiter = LLVMBuildSub(
481       builder,
482       limiter,
483       LLVMConstInt(int_type, 1, false),
484       "");
485 
486    LLVMBuildStore(builder, limiter, ctx->loop_limiter);
487 
488    /* i1cond = (mask != 0) */
489    i1cond = LLVMBuildICmp(
490       builder,
491       LLVMIntNE,
492       LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
493       LLVMConstNull(reg_type), "i1cond");
494 
495    /* i2cond = (looplimiter > 0) */
496    i2cond = LLVMBuildICmp(
497       builder,
498       LLVMIntSGT,
499       limiter,
500       LLVMConstNull(int_type), "i2cond");
501 
502    /* if( i1cond && i2cond ) */
503    icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
504 
505    endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
506 
507    LLVMBuildCondBr(builder,
508                    icond, ctx->loop_block, endloop);
509 
510    LLVMPositionBuilderAtEnd(builder, endloop);
511 
512    assert(ctx->loop_stack_size);
513    --ctx->loop_stack_size;
514    mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
515    mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
516    ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
517    ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
518    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
519          ctx->switch_stack_size];
520 
521    lp_exec_mask_update(mask);
522 }
523 
lp_exec_switch(struct lp_exec_mask * mask,LLVMValueRef switchval)524 static void lp_exec_switch(struct lp_exec_mask *mask,
525                            LLVMValueRef switchval)
526 {
527    struct function_ctx *ctx = func_ctx(mask);
528 
529    if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
530        ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
531       ctx->switch_stack_size++;
532       return;
533    }
534 
535    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
536       ctx->break_type;
537    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
538 
539    ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
540    ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
541    ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
542    ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
543    ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
544    ctx->switch_stack_size++;
545 
546    mask->switch_mask = LLVMConstNull(mask->int_vec_type);
547    ctx->switch_val = switchval;
548    ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
549    ctx->switch_in_default = false;
550    ctx->switch_pc = 0;
551 
552    lp_exec_mask_update(mask);
553 }
554 
lp_exec_endswitch(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)555 static void lp_exec_endswitch(struct lp_exec_mask *mask,
556                               struct lp_build_tgsi_context * bld_base)
557 {
558    LLVMBuilderRef builder = mask->bld->gallivm->builder;
559    struct function_ctx *ctx = func_ctx(mask);
560 
561    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
562       ctx->switch_stack_size--;
563       return;
564    }
565 
566    /* check if there's deferred default if so do it now */
567    if (ctx->switch_pc && !ctx->switch_in_default) {
568       LLVMValueRef prevmask, defaultmask;
569       unsigned tmp_pc;
570       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
571       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
572       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
573       ctx->switch_in_default = true;
574 
575       lp_exec_mask_update(mask);
576 
577       assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
578              TGSI_OPCODE_DEFAULT);
579 
580       tmp_pc = bld_base->pc;
581       bld_base->pc = ctx->switch_pc;
582       /*
583        * re-purpose switch_pc to point to here again, since we stop execution of
584        * the deferred default after next break.
585        */
586       ctx->switch_pc = tmp_pc - 1;
587 
588       return;
589    }
590 
591    else if (ctx->switch_pc && ctx->switch_in_default) {
592       assert(bld_base->pc == ctx->switch_pc + 1);
593    }
594 
595    ctx->switch_stack_size--;
596    mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
597    ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
598    ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
599    ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
600    ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
601 
602    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
603 
604    lp_exec_mask_update(mask);
605 }
606 
lp_exec_case(struct lp_exec_mask * mask,LLVMValueRef caseval)607 static void lp_exec_case(struct lp_exec_mask *mask,
608                          LLVMValueRef caseval)
609 {
610    LLVMBuilderRef builder = mask->bld->gallivm->builder;
611    struct function_ctx *ctx = func_ctx(mask);
612 
613    LLVMValueRef casemask, prevmask;
614 
615    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
616       return;
617    }
618 
619    /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
620    if (!ctx->switch_in_default) {
621       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
622       casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
623       ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
624                                              ctx->switch_mask_default, "sw_default_mask");
625       casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
626       mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
627 
628       lp_exec_mask_update(mask);
629    }
630 }
631 
632 /*
633  * Analyse default statement in a switch.
634  * \return true if default is last statement, false otherwise
635  * \param default_pc_start contains pc of instruction to jump to
636  *                         if default wasn't last but there's no
637  *                         fallthrough into default.
638  */
default_analyse_is_last(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base,int * default_pc_start)639 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
640                                        struct lp_build_tgsi_context * bld_base,
641                                        int *default_pc_start)
642 {
643    unsigned pc = bld_base->pc;
644    struct function_ctx *ctx = func_ctx(mask);
645    int curr_switch_stack = ctx->switch_stack_size;
646 
647    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
648       return false;
649    }
650 
651    /* skip over case statements which are together with default */
652    while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
653       pc++;
654    }
655 
656    while (pc != ~0u && pc < bld_base->num_instructions) {
657       unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
658       switch (opcode) {
659       case TGSI_OPCODE_CASE:
660          if (curr_switch_stack == ctx->switch_stack_size) {
661             *default_pc_start = pc - 1;
662             return false;
663          }
664          break;
665       case TGSI_OPCODE_SWITCH:
666          curr_switch_stack++;
667          break;
668       case TGSI_OPCODE_ENDSWITCH:
669          if (curr_switch_stack == ctx->switch_stack_size) {
670             *default_pc_start = pc - 1;
671             return true;
672          }
673          curr_switch_stack--;
674          break;
675       }
676       pc++;
677    }
678    /* should never arrive here */
679    assert(0);
680    return true;
681 }
682 
lp_exec_default(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)683 static void lp_exec_default(struct lp_exec_mask *mask,
684                             struct lp_build_tgsi_context * bld_base)
685 {
686    LLVMBuilderRef builder = mask->bld->gallivm->builder;
687    struct function_ctx *ctx = func_ctx(mask);
688 
689    int default_exec_pc;
690    boolean default_is_last;
691 
692    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
693       return;
694    }
695 
696    /*
697     * This is a messy opcode, because it may not be always at the end and
698     * there can be fallthrough in and out of it.
699     */
700 
701    default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
702    /*
703     * If it is last statement in switch (note that case statements appearing
704     * "at the same time" as default don't change that) everything is just fine,
705     * update switch mask and go on. This means we can handle default with
706     * fallthrough INTO it without overhead, if it is last.
707     */
708    if (default_is_last) {
709       LLVMValueRef prevmask, defaultmask;
710       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
711       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
712       defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
713       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
714       ctx->switch_in_default = true;
715 
716       lp_exec_mask_update(mask);
717    }
718    else {
719       /*
720        * Technically, "case" immediately before default isn't really a
721        * fallthrough, however we still have to count them as such as we
722        * already have updated the masks.
723        * If that happens in practice could add a switch optimizer pass
724        * which just gets rid of all case statements appearing together with
725        * default (or could do switch analysis at switch start time instead).
726        */
727       unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
728       boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
729                          opcode != TGSI_OPCODE_SWITCH);
730       /*
731        * If it is not last statement and there was no fallthrough into it,
732        * we record the PC and continue execution at next case (again, those
733        * case encountered at the same time don't count). At endswitch
734        * time, we update switchmask, and go back executing the code we skipped
735        * until the next break (possibly re-executing some code with changed mask
736        * if there was a fallthrough out of default).
737        * Finally, if it is not last statement and there was a fallthrough into it,
738        * do the same as with the former case, except instead of skipping the code
739        * just execute it without updating the mask, then go back and re-execute.
740        */
741       ctx->switch_pc = bld_base->pc;
742       if (!ft_into) {
743          bld_base->pc = default_exec_pc;
744       }
745    }
746 }
747 
748 
749 /* stores val into an address pointed to by dst_ptr.
750  * mask->exec_mask is used to figure out which bits of val
751  * should be stored into the address
752  * (0 means don't store this bit, 1 means do store).
753  */
lp_exec_mask_store(struct lp_exec_mask * mask,struct lp_build_context * bld_store,LLVMValueRef pred,LLVMValueRef val,LLVMValueRef dst_ptr)754 static void lp_exec_mask_store(struct lp_exec_mask *mask,
755                                struct lp_build_context *bld_store,
756                                LLVMValueRef pred,
757                                LLVMValueRef val,
758                                LLVMValueRef dst_ptr)
759 {
760    LLVMBuilderRef builder = mask->bld->gallivm->builder;
761 
762    assert(lp_check_value(bld_store->type, val));
763    assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
764    assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
765 
766    /* Mix the predicate and execution mask */
767    if (mask->has_mask) {
768       if (pred) {
769          pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
770       } else {
771          pred = mask->exec_mask;
772       }
773    }
774 
775    if (pred) {
776       LLVMValueRef res, dst;
777 
778       dst = LLVMBuildLoad(builder, dst_ptr, "");
779       res = lp_build_select(bld_store, pred, val, dst);
780       LLVMBuildStore(builder, res, dst_ptr);
781    } else
782       LLVMBuildStore(builder, val, dst_ptr);
783 }
784 
lp_exec_mask_call(struct lp_exec_mask * mask,int func,int * pc)785 static void lp_exec_mask_call(struct lp_exec_mask *mask,
786                               int func,
787                               int *pc)
788 {
789    if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
790       return;
791    }
792 
793    lp_exec_mask_function_init(mask, mask->function_stack_size);
794    mask->function_stack[mask->function_stack_size].pc = *pc;
795    mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
796    mask->function_stack_size++;
797    *pc = func;
798 }
799 
lp_exec_mask_ret(struct lp_exec_mask * mask,int * pc)800 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
801 {
802    LLVMBuilderRef builder = mask->bld->gallivm->builder;
803    struct function_ctx *ctx = func_ctx(mask);
804    LLVMValueRef exec_mask;
805 
806    if (ctx->cond_stack_size == 0 &&
807        ctx->loop_stack_size == 0 &&
808        ctx->switch_stack_size == 0 &&
809        mask->function_stack_size == 1) {
810       /* returning from main() */
811       *pc = -1;
812       return;
813    }
814 
815    if (mask->function_stack_size == 1) {
816       /*
817        * This requires special handling since we need to ensure
818        * we don't drop the mask even if we have no call stack
819        * (e.g. after a ret in a if clause after the endif)
820        */
821       mask->ret_in_main = TRUE;
822    }
823 
824    exec_mask = LLVMBuildNot(builder,
825                             mask->exec_mask,
826                             "ret");
827 
828    mask->ret_mask = LLVMBuildAnd(builder,
829                                  mask->ret_mask,
830                                  exec_mask, "ret_full");
831 
832    lp_exec_mask_update(mask);
833 }
834 
lp_exec_mask_bgnsub(struct lp_exec_mask * mask)835 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
836 {
837 }
838 
lp_exec_mask_endsub(struct lp_exec_mask * mask,int * pc)839 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
840 {
841    struct function_ctx *ctx;
842 
843    assert(mask->function_stack_size > 1);
844    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
845 
846    ctx = func_ctx(mask);
847    mask->function_stack_size--;
848 
849    *pc = ctx->pc;
850    mask->ret_mask = ctx->ret_mask;
851 
852    lp_exec_mask_update(mask);
853 }
854 
855 
856 static LLVMValueRef
get_file_ptr(struct lp_build_tgsi_soa_context * bld,unsigned file,int index,unsigned chan)857 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
858              unsigned file,
859              int index,
860              unsigned chan)
861 {
862    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
863    LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
864    LLVMValueRef var_of_array;
865 
866    switch (file) {
867    case TGSI_FILE_TEMPORARY:
868       array_of_vars = bld->temps;
869       var_of_array = bld->temps_array;
870       break;
871    case TGSI_FILE_OUTPUT:
872       array_of_vars = bld->outputs;
873       var_of_array = bld->outputs_array;
874       break;
875    default:
876       assert(0);
877       return NULL;
878    }
879 
880    assert(chan < 4);
881 
882    if (bld->indirect_files & (1 << file)) {
883       LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
884       return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
885    }
886    else {
887       assert(index <= bld->bld_base.info->file_max[file]);
888       return array_of_vars[index][chan];
889    }
890 }
891 
892 
893 /**
894  * Return pointer to a temporary register channel (src or dest).
895  * Note that indirect addressing cannot be handled here.
896  * \param index  which temporary register
897  * \param chan  which channel of the temp register.
898  */
899 LLVMValueRef
lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)900 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
901              unsigned index,
902              unsigned chan)
903 {
904    return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
905 }
906 
907 /**
908  * Return pointer to a output register channel (src or dest).
909  * Note that indirect addressing cannot be handled here.
910  * \param index  which output register
911  * \param chan  which channel of the output register.
912  */
913 LLVMValueRef
lp_get_output_ptr(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)914 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
915                unsigned index,
916                unsigned chan)
917 {
918    return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
919 }
920 
921 /*
922  * If we have indirect addressing in outputs copy our alloca array
923  * to the outputs slots specified by the caller to make sure
924  * our outputs are delivered consistently via the same interface.
925  */
926 static void
gather_outputs(struct lp_build_tgsi_soa_context * bld)927 gather_outputs(struct lp_build_tgsi_soa_context * bld)
928 {
929    if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
930       unsigned index, chan;
931       assert(bld->bld_base.info->num_outputs <=
932              bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
933       for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
934          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
935             bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
936          }
937       }
938    }
939 }
940 
941 /**
942  * Gather vector.
943  * XXX the lp_build_gather() function should be capable of doing this
944  * with a little work.
945  */
946 static LLVMValueRef
build_gather(struct lp_build_tgsi_context * bld_base,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)947 build_gather(struct lp_build_tgsi_context *bld_base,
948              LLVMValueRef base_ptr,
949              LLVMValueRef indexes,
950              LLVMValueRef overflow_mask,
951              LLVMValueRef indexes2)
952 {
953    struct gallivm_state *gallivm = bld_base->base.gallivm;
954    LLVMBuilderRef builder = gallivm->builder;
955    struct lp_build_context *uint_bld = &bld_base->uint_bld;
956    struct lp_build_context *bld = &bld_base->base;
957    LLVMValueRef res;
958    unsigned i;
959 
960    if (indexes2)
961       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
962    else
963       res = bld->undef;
964    /*
965     * overflow_mask is a vector telling us which channels
966     * in the vector overflowed. We use the overflow behavior for
967     * constant buffers which is defined as:
968     * Out of bounds access to constant buffer returns 0 in all
969     * components. Out of bounds behavior is always with respect
970     * to the size of the buffer bound at that slot.
971     */
972 
973    if (overflow_mask) {
974       /*
975        * We avoid per-element control flow here (also due to llvm going crazy,
976        * though I suspect it's better anyway since overflow is likely rare).
977        * Note that since we still fetch from buffers even if num_elements was
978        * zero (in this case we'll fetch from index zero) the jit func callers
979        * MUST provide valid fake constant buffers of size 4x32 (the values do
980        * not matter), otherwise we'd still need (not per element though)
981        * control flow.
982        */
983       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
984       if (indexes2)
985          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
986    }
987 
988    /*
989     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
990     */
991    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
992       LLVMValueRef si, di;
993       LLVMValueRef index;
994       LLVMValueRef scalar_ptr, scalar;
995 
996       di = lp_build_const_int32(bld->gallivm, i);
997       if (indexes2)
998          si = lp_build_const_int32(bld->gallivm, i >> 1);
999       else
1000          si = di;
1001 
1002       if (indexes2 && (i & 1)) {
1003          index = LLVMBuildExtractElement(builder,
1004                                          indexes2, si, "");
1005       } else {
1006          index = LLVMBuildExtractElement(builder,
1007                                          indexes, si, "");
1008       }
1009       scalar_ptr = LLVMBuildGEP(builder, base_ptr,
1010                                 &index, 1, "gather_ptr");
1011       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1012 
1013       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
1014    }
1015 
1016    if (overflow_mask) {
1017       if (indexes2) {
1018          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
1019          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
1020                                        bld_base->dbl_bld.int_vec_type, "");
1021          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
1022                                bld_base->dbl_bld.zero, res);
1023       } else
1024          res = lp_build_select(bld, overflow_mask, bld->zero, res);
1025    }
1026 
1027    return res;
1028 }
1029 
1030 
1031 /**
1032  * Scatter/store vector.
1033  */
1034 static void
emit_mask_scatter(struct lp_build_tgsi_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask,LLVMValueRef pred)1035 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1036                   LLVMValueRef base_ptr,
1037                   LLVMValueRef indexes,
1038                   LLVMValueRef values,
1039                   struct lp_exec_mask *mask,
1040                   LLVMValueRef pred)
1041 {
1042    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1043    LLVMBuilderRef builder = gallivm->builder;
1044    unsigned i;
1045 
1046    /* Mix the predicate and execution mask */
1047    if (mask->has_mask) {
1048       if (pred) {
1049          pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
1050       }
1051       else {
1052          pred = mask->exec_mask;
1053       }
1054    }
1055 
1056    /*
1057     * Loop over elements of index_vec, store scalar value.
1058     */
1059    for (i = 0; i < bld->bld_base.base.type.length; i++) {
1060       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1061       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1062       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1063       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1064       LLVMValueRef scalar_pred = pred ?
1065          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1066 
1067       if (0)
1068          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1069                          ii, val, index, scalar_ptr);
1070 
1071       if (scalar_pred) {
1072          LLVMValueRef real_val, dst_val;
1073          dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1074          real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1075          LLVMBuildStore(builder, real_val, scalar_ptr);
1076       }
1077       else {
1078          LLVMBuildStore(builder, val, scalar_ptr);
1079       }
1080    }
1081 }
1082 
1083 
1084 /**
1085  * Read the current value of the ADDR register, convert the floats to
1086  * ints, add the base index and return the vector of offsets.
1087  * The offsets will be used to index into the constant buffer or
1088  * temporary register file.
1089  */
1090 static LLVMValueRef
get_indirect_index(struct lp_build_tgsi_soa_context * bld,unsigned reg_file,unsigned reg_index,const struct tgsi_ind_register * indirect_reg)1091 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1092                    unsigned reg_file, unsigned reg_index,
1093                    const struct tgsi_ind_register *indirect_reg)
1094 {
1095    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1096    struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1097    /* always use X component of address register */
1098    unsigned swizzle = indirect_reg->Swizzle;
1099    LLVMValueRef base;
1100    LLVMValueRef rel;
1101    LLVMValueRef max_index;
1102    LLVMValueRef index;
1103 
1104    assert(bld->indirect_files & (1 << reg_file));
1105 
1106    base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1107 
1108    assert(swizzle < 4);
1109    switch (indirect_reg->File) {
1110    case TGSI_FILE_ADDRESS:
1111       rel = LLVMBuildLoad(builder,
1112                           bld->addr[indirect_reg->Index][swizzle],
1113                           "load addr reg");
1114       /* ADDR LLVM values already have LLVM integer type. */
1115       break;
1116    case TGSI_FILE_TEMPORARY:
1117       rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1118       rel = LLVMBuildLoad(builder, rel, "load temp reg");
1119       /* TEMP LLVM values always have LLVM float type, but for indirection, the
1120        * value actually stored is expected to be an integer */
1121       rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1122       break;
1123    default:
1124       assert(0);
1125       rel = uint_bld->zero;
1126    }
1127 
1128    index = lp_build_add(uint_bld, base, rel);
1129 
1130    /*
1131     * emit_fetch_constant handles constant buffer overflow so this code
1132     * is pointless for them.
1133     * Furthermore the D3D10 spec in section 6.5 says:
1134     * If the constant buffer bound to a slot is larger than the size
1135     * declared in the shader for that slot, implementations are allowed
1136     * to return incorrect data (not necessarily 0) for indices that are
1137     * larger than the declared size but smaller than the buffer size.
1138     */
1139    if (reg_file != TGSI_FILE_CONSTANT) {
1140       max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1141                                          uint_bld->type,
1142                                          bld->bld_base.info->file_max[reg_file]);
1143 
1144       assert(!uint_bld->type.sign);
1145       index = lp_build_min(uint_bld, index, max_index);
1146    }
1147 
1148    return index;
1149 }
1150 
1151 static struct lp_build_context *
stype_to_fetch(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype)1152 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1153 	       enum tgsi_opcode_type stype)
1154 {
1155    struct lp_build_context *bld_fetch;
1156 
1157    switch (stype) {
1158    case TGSI_TYPE_FLOAT:
1159    case TGSI_TYPE_UNTYPED:
1160       bld_fetch = &bld_base->base;
1161       break;
1162    case TGSI_TYPE_UNSIGNED:
1163       bld_fetch = &bld_base->uint_bld;
1164       break;
1165    case TGSI_TYPE_SIGNED:
1166       bld_fetch = &bld_base->int_bld;
1167       break;
1168    case TGSI_TYPE_DOUBLE:
1169       bld_fetch = &bld_base->dbl_bld;
1170       break;
1171    case TGSI_TYPE_UNSIGNED64:
1172       bld_fetch = &bld_base->uint64_bld;
1173       break;
1174    case TGSI_TYPE_SIGNED64:
1175       bld_fetch = &bld_base->int64_bld;
1176       break;
1177    case TGSI_TYPE_VOID:
1178    default:
1179       assert(0);
1180       bld_fetch = NULL;
1181       break;
1182    }
1183    return bld_fetch;
1184 }
1185 
1186 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,unsigned chan_index,boolean need_perelement_offset)1187 get_soa_array_offsets(struct lp_build_context *uint_bld,
1188                       LLVMValueRef indirect_index,
1189                       unsigned chan_index,
1190                       boolean need_perelement_offset)
1191 {
1192    struct gallivm_state *gallivm = uint_bld->gallivm;
1193    LLVMValueRef chan_vec =
1194       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1195    LLVMValueRef length_vec =
1196       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1197    LLVMValueRef index_vec;
1198 
1199    /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1200    index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1201    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1202    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1203 
1204    if (need_perelement_offset) {
1205       LLVMValueRef pixel_offsets;
1206       unsigned i;
1207      /* build pixel offset vector: {0, 1, 2, 3, ...} */
1208       pixel_offsets = uint_bld->undef;
1209       for (i = 0; i < uint_bld->type.length; i++) {
1210          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1211          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1212                                                 ii, ii, "");
1213       }
1214       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1215    }
1216    return index_vec;
1217 }
1218 
1219 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)1220 emit_fetch_constant(
1221    struct lp_build_tgsi_context * bld_base,
1222    const struct tgsi_full_src_register * reg,
1223    enum tgsi_opcode_type stype,
1224    unsigned swizzle)
1225 {
1226    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1227    struct gallivm_state *gallivm = bld_base->base.gallivm;
1228    LLVMBuilderRef builder = gallivm->builder;
1229    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1230    unsigned dimension = 0;
1231    LLVMValueRef consts_ptr;
1232    LLVMValueRef num_consts;
1233    LLVMValueRef res;
1234 
1235    /* XXX: Handle fetching xyzw components as a vector */
1236    assert(swizzle != ~0u);
1237 
1238    if (reg->Register.Dimension) {
1239       assert(!reg->Dimension.Indirect);
1240       dimension = reg->Dimension.Index;
1241       assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1242    }
1243 
1244    consts_ptr = bld->consts[dimension];
1245    num_consts = bld->consts_sizes[dimension];
1246 
1247    if (reg->Register.Indirect) {
1248       LLVMValueRef indirect_index;
1249       LLVMValueRef swizzle_vec =
1250          lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1251       LLVMValueRef index_vec;  /* index into the const buffer */
1252       LLVMValueRef overflow_mask;
1253       LLVMValueRef index_vec2 = NULL;
1254 
1255       indirect_index = get_indirect_index(bld,
1256                                           reg->Register.File,
1257                                           reg->Register.Index,
1258                                           &reg->Indirect);
1259 
1260       /* All fetches are from the same constant buffer, so
1261        * we need to propagate the size to a vector to do a
1262        * vector comparison */
1263       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1264       /* Construct a boolean vector telling us which channels
1265        * overflow the bound constant buffer */
1266       overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1267                                        indirect_index, num_consts);
1268 
1269       /* index_vec = indirect_index * 4 + swizzle */
1270       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1271       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1272 
1273       if (tgsi_type_is_64bit(stype)) {
1274          LLVMValueRef swizzle_vec2;
1275          swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
1276          index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
1277          index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
1278       }
1279       /* Gather values from the constant buffer */
1280       res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
1281    }
1282    else {
1283       LLVMValueRef index;  /* index into the const buffer */
1284       LLVMValueRef scalar, scalar_ptr;
1285       struct lp_build_context *bld_broad = &bld_base->base;
1286       index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1287 
1288       scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1289                                 &index, 1, "");
1290       if (stype == TGSI_TYPE_DOUBLE) {
1291          LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
1292          scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
1293          bld_broad = &bld_base->dbl_bld;
1294       } else if (stype == TGSI_TYPE_UNSIGNED64) {
1295          LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1296          scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
1297          bld_broad = &bld_base->uint64_bld;
1298       } else if (stype == TGSI_TYPE_SIGNED64) {
1299          LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1300          scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
1301          bld_broad = &bld_base->int64_bld;
1302       }
1303       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1304       res = lp_build_broadcast_scalar(bld_broad, scalar);
1305    }
1306 
1307    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
1308       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1309       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1310    }
1311 
1312    return res;
1313 }
1314 
1315 /**
1316  * Fetch 64-bit values from two separate channels.
1317  * 64-bit values are stored split across two channels, like xy and zw.
1318  * This function creates a set of 16 floats,
1319  * extracts the values from the two channels,
1320  * puts them in the correct place, then casts to 8 64-bits.
1321  */
1322 static LLVMValueRef
emit_fetch_64bit(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype,LLVMValueRef input,LLVMValueRef input2)1323 emit_fetch_64bit(
1324    struct lp_build_tgsi_context * bld_base,
1325    enum tgsi_opcode_type stype,
1326    LLVMValueRef input,
1327    LLVMValueRef input2)
1328 {
1329    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1330    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1331    LLVMBuilderRef builder = gallivm->builder;
1332    LLVMValueRef res;
1333    struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1334    int i;
1335    LLVMValueRef shuffles[16];
1336    int len = bld_base->base.type.length * 2;
1337    assert(len <= 16);
1338 
1339    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
1340       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
1341       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
1342    }
1343    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
1344 
1345    return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1346 }
1347 
1348 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)1349 emit_fetch_immediate(
1350    struct lp_build_tgsi_context * bld_base,
1351    const struct tgsi_full_src_register * reg,
1352    enum tgsi_opcode_type stype,
1353    unsigned swizzle)
1354 {
1355    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1356    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1357    LLVMBuilderRef builder = gallivm->builder;
1358    LLVMValueRef res = NULL;
1359 
1360    if (bld->use_immediates_array || reg->Register.Indirect) {
1361       LLVMValueRef imms_array;
1362       LLVMTypeRef fptr_type;
1363 
1364       /* cast imms_array pointer to float* */
1365       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1366       imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1367 
1368       if (reg->Register.Indirect) {
1369          LLVMValueRef indirect_index;
1370          LLVMValueRef index_vec;  /* index into the immediate register array */
1371          LLVMValueRef index_vec2 = NULL;
1372          indirect_index = get_indirect_index(bld,
1373                                              reg->Register.File,
1374                                              reg->Register.Index,
1375                                              &reg->Indirect);
1376          /*
1377           * Unlike for other reg classes, adding pixel offsets is unnecessary -
1378           * immediates are stored as full vectors (FIXME??? - might be better
1379           * to store them the same as constants) but all elements are the same
1380           * in any case.
1381           */
1382          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1383                                            indirect_index,
1384                                            swizzle,
1385                                            FALSE);
1386          if (tgsi_type_is_64bit(stype))
1387             index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1388                                               indirect_index,
1389                                               swizzle + 1,
1390                                               FALSE);
1391          /* Gather values from the immediate register array */
1392          res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1393       } else {
1394          LLVMValueRef lindex = lp_build_const_int32(gallivm,
1395                                         reg->Register.Index * 4 + swizzle);
1396          LLVMValueRef imms_ptr =  LLVMBuildGEP(builder,
1397                                                 bld->imms_array, &lindex, 1, "");
1398          res = LLVMBuildLoad(builder, imms_ptr, "");
1399 
1400          if (tgsi_type_is_64bit(stype)) {
1401             LLVMValueRef lindex1;
1402             LLVMValueRef imms_ptr2;
1403             LLVMValueRef res2;
1404 
1405             lindex1 = lp_build_const_int32(gallivm,
1406                                            reg->Register.Index * 4 + swizzle + 1);
1407             imms_ptr2 = LLVMBuildGEP(builder,
1408                                       bld->imms_array, &lindex1, 1, "");
1409             res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1410             res = emit_fetch_64bit(bld_base, stype, res, res2);
1411          }
1412       }
1413    }
1414    else {
1415       res = bld->immediates[reg->Register.Index][swizzle];
1416       if (tgsi_type_is_64bit(stype))
1417          res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
1418    }
1419 
1420    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1421       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1422       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1423    }
1424    return res;
1425 }
1426 
1427 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)1428 emit_fetch_input(
1429    struct lp_build_tgsi_context * bld_base,
1430    const struct tgsi_full_src_register * reg,
1431    enum tgsi_opcode_type stype,
1432    unsigned swizzle)
1433 {
1434    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1435    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1436    LLVMBuilderRef builder = gallivm->builder;
1437    LLVMValueRef res;
1438 
1439    if (reg->Register.Indirect) {
1440       LLVMValueRef indirect_index;
1441       LLVMValueRef index_vec;  /* index into the input reg array */
1442       LLVMValueRef index_vec2 = NULL;
1443       LLVMValueRef inputs_array;
1444       LLVMTypeRef fptr_type;
1445 
1446       indirect_index = get_indirect_index(bld,
1447                                           reg->Register.File,
1448                                           reg->Register.Index,
1449                                           &reg->Indirect);
1450 
1451       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1452                                         indirect_index,
1453                                         swizzle,
1454                                         TRUE);
1455       if (tgsi_type_is_64bit(stype)) {
1456          index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1457                                            indirect_index,
1458                                            swizzle + 1,
1459                                            TRUE);
1460       }
1461       /* cast inputs_array pointer to float* */
1462       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1463       inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1464 
1465       /* Gather values from the input register array */
1466       res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1467    } else {
1468       if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1469          LLVMValueRef lindex = lp_build_const_int32(gallivm,
1470                                         reg->Register.Index * 4 + swizzle);
1471          LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1472                                                bld->inputs_array, &lindex, 1, "");
1473 
1474          res = LLVMBuildLoad(builder, input_ptr, "");
1475          if (tgsi_type_is_64bit(stype)) {
1476             LLVMValueRef lindex1;
1477             LLVMValueRef input_ptr2;
1478             LLVMValueRef res2;
1479 
1480             lindex1 = lp_build_const_int32(gallivm,
1481                                            reg->Register.Index * 4 + swizzle + 1);
1482             input_ptr2 = LLVMBuildGEP(builder,
1483                                       bld->inputs_array, &lindex1, 1, "");
1484             res2 = LLVMBuildLoad(builder, input_ptr2, "");
1485             res = emit_fetch_64bit(bld_base, stype, res, res2);
1486          }
1487       }
1488       else {
1489          res = bld->inputs[reg->Register.Index][swizzle];
1490          if (tgsi_type_is_64bit(stype))
1491             res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
1492       }
1493    }
1494 
1495    assert(res);
1496 
1497    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1498       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1499       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1500    }
1501 
1502    return res;
1503 }
1504 
1505 
1506 static LLVMValueRef
emit_fetch_gs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)1507 emit_fetch_gs_input(
1508    struct lp_build_tgsi_context * bld_base,
1509    const struct tgsi_full_src_register * reg,
1510    enum tgsi_opcode_type stype,
1511    unsigned swizzle)
1512 {
1513    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1514    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1515    const struct tgsi_shader_info *info = bld->bld_base.info;
1516    LLVMBuilderRef builder = gallivm->builder;
1517    LLVMValueRef attrib_index = NULL;
1518    LLVMValueRef vertex_index = NULL;
1519    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1520    LLVMValueRef res;
1521 
1522    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1523       /* This is really a system value not a regular input */
1524       assert(!reg->Register.Indirect);
1525       assert(!reg->Dimension.Indirect);
1526       res = bld->system_values.prim_id;
1527       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1528          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1529       }
1530       return res;
1531    }
1532 
1533    if (reg->Register.Indirect) {
1534       attrib_index = get_indirect_index(bld,
1535                                         reg->Register.File,
1536                                         reg->Register.Index,
1537                                         &reg->Indirect);
1538    } else {
1539       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1540    }
1541 
1542    if (reg->Dimension.Indirect) {
1543       vertex_index = get_indirect_index(bld,
1544                                         reg->Register.File,
1545                                         reg->Dimension.Index,
1546                                         &reg->DimIndirect);
1547    } else {
1548       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1549    }
1550 
1551    res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1552                                     reg->Dimension.Indirect,
1553                                     vertex_index,
1554                                     reg->Register.Indirect,
1555                                     attrib_index,
1556                                     swizzle_index);
1557 
1558    assert(res);
1559    if (tgsi_type_is_64bit(stype)) {
1560       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1);
1561       LLVMValueRef res2;
1562       res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1563                                         reg->Dimension.Indirect,
1564                                         vertex_index,
1565                                         reg->Register.Indirect,
1566                                         attrib_index,
1567                                         swizzle_index);
1568       assert(res2);
1569       res = emit_fetch_64bit(bld_base, stype, res, res2);
1570    } else if (stype == TGSI_TYPE_UNSIGNED) {
1571       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1572    } else if (stype == TGSI_TYPE_SIGNED) {
1573       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1574    }
1575 
1576    return res;
1577 }
1578 
1579 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)1580 emit_fetch_temporary(
1581    struct lp_build_tgsi_context * bld_base,
1582    const struct tgsi_full_src_register * reg,
1583    enum tgsi_opcode_type stype,
1584    unsigned swizzle)
1585 {
1586    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1587    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1588    LLVMBuilderRef builder = gallivm->builder;
1589    LLVMValueRef res;
1590 
1591    if (reg->Register.Indirect) {
1592       LLVMValueRef indirect_index;
1593       LLVMValueRef index_vec, index_vec2 = NULL;  /* index into the temp reg array */
1594       LLVMValueRef temps_array;
1595       LLVMTypeRef fptr_type;
1596 
1597       indirect_index = get_indirect_index(bld,
1598                                           reg->Register.File,
1599                                           reg->Register.Index,
1600                                           &reg->Indirect);
1601 
1602       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1603                                         indirect_index,
1604                                         swizzle,
1605                                         TRUE);
1606       if (tgsi_type_is_64bit(stype)) {
1607                index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1608                                                   indirect_index,
1609                                                   swizzle + 1,
1610                                                   TRUE);
1611       }
1612 
1613       /* cast temps_array pointer to float* */
1614       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1615       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1616 
1617       /* Gather values from the temporary register array */
1618       res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1619    }
1620    else {
1621       LLVMValueRef temp_ptr;
1622       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1623       res = LLVMBuildLoad(builder, temp_ptr, "");
1624 
1625       if (tgsi_type_is_64bit(stype)) {
1626          LLVMValueRef temp_ptr2, res2;
1627 
1628          temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
1629          res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1630          res = emit_fetch_64bit(bld_base, stype, res, res2);
1631       }
1632    }
1633 
1634    if (stype == TGSI_TYPE_SIGNED ||
1635        stype == TGSI_TYPE_UNSIGNED ||
1636        stype == TGSI_TYPE_DOUBLE ||
1637        stype == TGSI_TYPE_SIGNED64 ||
1638        stype == TGSI_TYPE_UNSIGNED64) {
1639       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1640       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1641    }
1642 
1643    return res;
1644 }
1645 
1646 static LLVMValueRef
emit_fetch_system_value(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)1647 emit_fetch_system_value(
1648    struct lp_build_tgsi_context * bld_base,
1649    const struct tgsi_full_src_register * reg,
1650    enum tgsi_opcode_type stype,
1651    unsigned swizzle)
1652 {
1653    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1654    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1655    const struct tgsi_shader_info *info = bld->bld_base.info;
1656    LLVMBuilderRef builder = gallivm->builder;
1657    LLVMValueRef res;
1658    enum tgsi_opcode_type atype; // Actual type of the value
1659 
1660    assert(!reg->Register.Indirect);
1661 
1662    switch (info->system_value_semantic_name[reg->Register.Index]) {
1663    case TGSI_SEMANTIC_INSTANCEID:
1664       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1665       atype = TGSI_TYPE_UNSIGNED;
1666       break;
1667 
1668    case TGSI_SEMANTIC_VERTEXID:
1669       res = bld->system_values.vertex_id;
1670       atype = TGSI_TYPE_UNSIGNED;
1671       break;
1672 
1673    case TGSI_SEMANTIC_VERTEXID_NOBASE:
1674       res = bld->system_values.vertex_id_nobase;
1675       atype = TGSI_TYPE_UNSIGNED;
1676       break;
1677 
1678    case TGSI_SEMANTIC_BASEVERTEX:
1679       res = bld->system_values.basevertex;
1680       atype = TGSI_TYPE_UNSIGNED;
1681       break;
1682 
1683    case TGSI_SEMANTIC_PRIMID:
1684       res = bld->system_values.prim_id;
1685       atype = TGSI_TYPE_UNSIGNED;
1686       break;
1687 
1688    case TGSI_SEMANTIC_INVOCATIONID:
1689       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1690       atype = TGSI_TYPE_UNSIGNED;
1691       break;
1692 
1693    default:
1694       assert(!"unexpected semantic in emit_fetch_system_value");
1695       res = bld_base->base.zero;
1696       atype = TGSI_TYPE_FLOAT;
1697       break;
1698    }
1699 
1700    if (atype != stype) {
1701       if (stype == TGSI_TYPE_FLOAT) {
1702          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1703       } else if (stype == TGSI_TYPE_UNSIGNED) {
1704          res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1705       } else if (stype == TGSI_TYPE_SIGNED) {
1706          res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1707       }
1708    }
1709 
1710    return res;
1711 }
1712 
1713 /**
1714  * Register fetch with derivatives.
1715  */
1716 static void
emit_fetch_deriv(struct lp_build_tgsi_soa_context * bld,LLVMValueRef src,LLVMValueRef * res,LLVMValueRef * ddx,LLVMValueRef * ddy)1717 emit_fetch_deriv(
1718    struct lp_build_tgsi_soa_context *bld,
1719    LLVMValueRef src,
1720    LLVMValueRef *res,
1721    LLVMValueRef *ddx,
1722    LLVMValueRef *ddy)
1723 {
1724    if (res)
1725       *res = src;
1726 
1727    /* TODO: use interpolation coeffs for inputs */
1728 
1729    if (ddx)
1730       *ddx = lp_build_ddx(&bld->bld_base.base, src);
1731 
1732    if (ddy)
1733       *ddy = lp_build_ddy(&bld->bld_base.base, src);
1734 }
1735 
1736 
1737 /**
1738  * Predicate.
1739  */
1740 static void
emit_fetch_predicate(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * pred)1741 emit_fetch_predicate(
1742    struct lp_build_tgsi_soa_context *bld,
1743    const struct tgsi_full_instruction *inst,
1744    LLVMValueRef *pred)
1745 {
1746    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1747    unsigned index;
1748    unsigned char swizzles[4];
1749    LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
1750    LLVMValueRef value;
1751    unsigned chan;
1752 
1753    if (!inst->Instruction.Predicate) {
1754       TGSI_FOR_EACH_CHANNEL( chan ) {
1755          pred[chan] = NULL;
1756       }
1757       return;
1758    }
1759 
1760    swizzles[0] = inst->Predicate.SwizzleX;
1761    swizzles[1] = inst->Predicate.SwizzleY;
1762    swizzles[2] = inst->Predicate.SwizzleZ;
1763    swizzles[3] = inst->Predicate.SwizzleW;
1764 
1765    index = inst->Predicate.Index;
1766    assert(index < LP_MAX_TGSI_PREDS);
1767 
1768    TGSI_FOR_EACH_CHANNEL( chan ) {
1769       unsigned swizzle = swizzles[chan];
1770 
1771       /*
1772        * Only fetch the predicate register channels that are actually listed
1773        * in the swizzles
1774        */
1775       if (!unswizzled[swizzle]) {
1776          value = LLVMBuildLoad(builder,
1777                                bld->preds[index][swizzle], "");
1778 
1779          /*
1780           * Convert the value to an integer mask.
1781           *
1782           * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
1783           * is needlessly causing two comparisons due to storing the intermediate
1784           * result as float vector instead of an integer mask vector.
1785           */
1786          value = lp_build_compare(bld->bld_base.base.gallivm,
1787                                   bld->bld_base.base.type,
1788                                   PIPE_FUNC_NOTEQUAL,
1789                                   value,
1790                                   bld->bld_base.base.zero);
1791          if (inst->Predicate.Negate) {
1792             value = LLVMBuildNot(builder, value, "");
1793          }
1794 
1795          unswizzled[swizzle] = value;
1796       } else {
1797          value = unswizzled[swizzle];
1798       }
1799 
1800       pred[chan] = value;
1801    }
1802 }
1803 
1804 /**
1805  * store an array of 8 64-bit into two arrays of 8 floats
1806  * i.e.
1807  * value is d0, d1, d2, d3 etc.
1808  * each 64-bit has high and low pieces x, y
1809  * so gets stored into the separate channels as:
1810  * chan_ptr = d0.x, d1.x, d2.x, d3.x
1811  * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1812  */
1813 static void
emit_store_64bit_chan(struct lp_build_tgsi_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef pred,LLVMValueRef value)1814 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1815                       LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1816                       LLVMValueRef pred,
1817                       LLVMValueRef value)
1818 {
1819    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1820    struct gallivm_state *gallivm = bld_base->base.gallivm;
1821    LLVMBuilderRef builder = gallivm->builder;
1822    struct lp_build_context *float_bld = &bld_base->base;
1823    unsigned i;
1824    LLVMValueRef temp, temp2;
1825    LLVMValueRef shuffles[8];
1826    LLVMValueRef shuffles2[8];
1827 
1828    for (i = 0; i < bld_base->base.type.length; i++) {
1829       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1830       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1831    }
1832 
1833    temp = LLVMBuildShuffleVector(builder, value,
1834                                  LLVMGetUndef(LLVMTypeOf(value)),
1835                                  LLVMConstVector(shuffles,
1836                                                  bld_base->base.type.length),
1837                                  "");
1838    temp2 = LLVMBuildShuffleVector(builder, value,
1839                                   LLVMGetUndef(LLVMTypeOf(value)),
1840                                   LLVMConstVector(shuffles2,
1841                                                   bld_base->base.type.length),
1842                                   "");
1843 
1844    lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp, chan_ptr);
1845    lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp2, chan_ptr2);
1846 }
1847 
1848 /**
1849  * Register store.
1850  */
1851 static void
emit_store_chan(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned index,unsigned chan_index,LLVMValueRef pred,LLVMValueRef value)1852 emit_store_chan(
1853    struct lp_build_tgsi_context *bld_base,
1854    const struct tgsi_full_instruction *inst,
1855    unsigned index,
1856    unsigned chan_index,
1857    LLVMValueRef pred,
1858    LLVMValueRef value)
1859 {
1860    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1861    struct gallivm_state *gallivm = bld_base->base.gallivm;
1862    LLVMBuilderRef builder = gallivm->builder;
1863    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1864    struct lp_build_context *float_bld = &bld_base->base;
1865    struct lp_build_context *int_bld = &bld_base->int_bld;
1866    LLVMValueRef indirect_index = NULL;
1867    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1868 
1869    /*
1870     * Apply saturation.
1871     *
1872     * It is always assumed to be float.
1873     */
1874    if (inst->Instruction.Saturate) {
1875       assert(dtype == TGSI_TYPE_FLOAT ||
1876              dtype == TGSI_TYPE_UNTYPED);
1877       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1878       value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1879    }
1880 
1881    if (reg->Register.Indirect) {
1882       /*
1883        * Currently the mesa/st doesn't generate indirect stores
1884        * to 64-bit values, it normally uses MOV to do indirect stores.
1885        */
1886       assert(!tgsi_type_is_64bit(dtype));
1887       indirect_index = get_indirect_index(bld,
1888                                           reg->Register.File,
1889                                           reg->Register.Index,
1890                                           &reg->Indirect);
1891    } else {
1892       assert(reg->Register.Index <=
1893                              bld_base->info->file_max[reg->Register.File]);
1894    }
1895 
1896    if (DEBUG_EXECUTION) {
1897       emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1898    }
1899 
1900    switch( reg->Register.File ) {
1901    case TGSI_FILE_OUTPUT:
1902       /* Outputs are always stored as floats */
1903       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1904 
1905       if (reg->Register.Indirect) {
1906          LLVMValueRef index_vec;  /* indexes into the output registers */
1907          LLVMValueRef outputs_array;
1908          LLVMTypeRef fptr_type;
1909 
1910          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1911                                            indirect_index,
1912                                            chan_index,
1913                                            TRUE);
1914 
1915          fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1916          outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1917 
1918          /* Scatter store values into output registers */
1919          emit_mask_scatter(bld, outputs_array, index_vec, value,
1920                            &bld->exec_mask, pred);
1921       }
1922       else {
1923          LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1924                                                   chan_index);
1925 
1926          if (tgsi_type_is_64bit(dtype)) {
1927             LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1928                                                       chan_index + 1);
1929             emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1930                                   pred, value);
1931          } else
1932             lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
1933       }
1934       break;
1935 
1936    case TGSI_FILE_TEMPORARY:
1937       /* Temporaries are always stored as floats */
1938       if (!tgsi_type_is_64bit(dtype))
1939          value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1940       else
1941          value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1942 
1943       if (reg->Register.Indirect) {
1944          LLVMValueRef index_vec;  /* indexes into the temp registers */
1945          LLVMValueRef temps_array;
1946          LLVMTypeRef fptr_type;
1947 
1948          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1949                                            indirect_index,
1950                                            chan_index,
1951                                            TRUE);
1952 
1953          fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1954          temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1955 
1956          /* Scatter store values into temp registers */
1957          emit_mask_scatter(bld, temps_array, index_vec, value,
1958                            &bld->exec_mask, pred);
1959       }
1960       else {
1961          LLVMValueRef temp_ptr;
1962          temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1963 
1964          if (tgsi_type_is_64bit(dtype)) {
1965             LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1966                                                          reg->Register.Index,
1967                                                          chan_index + 1);
1968             emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1969                                   pred, value);
1970          }
1971          else
1972             lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
1973       }
1974       break;
1975 
1976    case TGSI_FILE_ADDRESS:
1977       assert(dtype == TGSI_TYPE_SIGNED);
1978       assert(LLVMTypeOf(value) == int_bld->vec_type);
1979       value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1980       lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value,
1981                          bld->addr[reg->Register.Index][chan_index]);
1982       break;
1983 
1984    case TGSI_FILE_PREDICATE:
1985       assert(LLVMTypeOf(value) == float_bld->vec_type);
1986       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1987       lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value,
1988                          bld->preds[reg->Register.Index][chan_index]);
1989       break;
1990 
1991    default:
1992       assert( 0 );
1993    }
1994 
1995    (void)dtype;
1996 }
1997 
1998 /*
1999  * Called at the beginning of the translation of each TGSI instruction, to
2000  * emit some debug code.
2001  */
2002 static void
emit_debug(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info)2003 emit_debug(
2004    struct lp_build_tgsi_context * bld_base,
2005    const struct tgsi_full_instruction * inst,
2006    const struct tgsi_opcode_info * info)
2007 
2008 {
2009    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2010 
2011    if (DEBUG_EXECUTION) {
2012       /*
2013        * Dump the TGSI instruction.
2014        */
2015 
2016       struct gallivm_state *gallivm = bld_base->base.gallivm;
2017       char buf[512];
2018       buf[0] = '$';
2019       buf[1] = ' ';
2020       tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
2021       lp_build_printf(gallivm, buf);
2022 
2023       /* Dump the execution mask.
2024        */
2025       if (bld->exec_mask.has_mask) {
2026          lp_build_print_value(gallivm, "    mask = ", bld->exec_mask.exec_mask);
2027       }
2028    }
2029 }
2030 
2031 static void
emit_store(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,LLVMValueRef dst[4])2032 emit_store(
2033    struct lp_build_tgsi_context * bld_base,
2034    const struct tgsi_full_instruction * inst,
2035    const struct tgsi_opcode_info * info,
2036    LLVMValueRef dst[4])
2037 
2038 {
2039    unsigned chan_index;
2040    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2041    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
2042    if(info->num_dst) {
2043       LLVMValueRef pred[TGSI_NUM_CHANNELS];
2044 
2045       emit_fetch_predicate( bld, inst, pred );
2046 
2047       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2048 
2049          if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
2050              continue;
2051          emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
2052       }
2053    }
2054 }
2055 
2056 static unsigned
tgsi_to_pipe_tex_target(unsigned tgsi_target)2057 tgsi_to_pipe_tex_target(unsigned tgsi_target)
2058 {
2059    switch (tgsi_target) {
2060    case TGSI_TEXTURE_BUFFER:
2061       return PIPE_BUFFER;
2062    case TGSI_TEXTURE_1D:
2063    case TGSI_TEXTURE_SHADOW1D:
2064       return PIPE_TEXTURE_1D;
2065    case TGSI_TEXTURE_2D:
2066    case TGSI_TEXTURE_SHADOW2D:
2067    case TGSI_TEXTURE_2D_MSAA:
2068       return PIPE_TEXTURE_2D;
2069    case TGSI_TEXTURE_3D:
2070       return PIPE_TEXTURE_3D;
2071    case TGSI_TEXTURE_CUBE:
2072    case TGSI_TEXTURE_SHADOWCUBE:
2073       return PIPE_TEXTURE_CUBE;
2074    case TGSI_TEXTURE_RECT:
2075    case TGSI_TEXTURE_SHADOWRECT:
2076       return PIPE_TEXTURE_RECT;
2077    case TGSI_TEXTURE_1D_ARRAY:
2078    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2079       return PIPE_TEXTURE_1D_ARRAY;
2080    case TGSI_TEXTURE_2D_ARRAY:
2081    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2082    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2083       return PIPE_TEXTURE_2D_ARRAY;
2084    case TGSI_TEXTURE_CUBE_ARRAY:
2085    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2086       return PIPE_TEXTURE_CUBE_ARRAY;
2087    default:
2088       assert(0);
2089       return PIPE_BUFFER;
2090    }
2091 }
2092 
2093 
2094 static enum lp_sampler_lod_property
lp_build_lod_property(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned src_op)2095 lp_build_lod_property(
2096    struct lp_build_tgsi_context *bld_base,
2097    const struct tgsi_full_instruction *inst,
2098    unsigned src_op)
2099 {
2100    const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2101    enum lp_sampler_lod_property lod_property;
2102 
2103    /*
2104     * Not much we can do here. We could try catching inputs declared
2105     * with constant interpolation but not sure it's worth it - since for
2106     * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2107     * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2108     * like the constant/immediate recognition below.
2109     * What seems to be of more value would be to recognize temps holding
2110     * broadcasted scalars but no way we can do it.
2111     * Tried asking llvm but without any success (using LLVMIsConstant
2112     * even though this isn't exactly what we'd need), even as simple as
2113     * IMM[0] UINT32 (0,-1,0,0)
2114     * MOV TEMP[0] IMM[0].yyyy
2115     * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2116     * doesn't work.
2117     * This means there's ZERO chance this will ever catch a scalar lod
2118     * with traditional tex opcodes as well as texel fetches, since the lod
2119     * comes from the same reg as coords (except some test shaders using
2120     * constant coords maybe).
2121     * There's at least hope for sample opcodes as well as size queries.
2122     */
2123    if (reg->Register.File == TGSI_FILE_CONSTANT ||
2124        reg->Register.File == TGSI_FILE_IMMEDIATE) {
2125       lod_property = LP_SAMPLER_LOD_SCALAR;
2126    }
2127    else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2128       if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2129          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2130       }
2131       else {
2132          lod_property = LP_SAMPLER_LOD_PER_QUAD;
2133       }
2134    }
2135    else {
2136       /* never use scalar (per-quad) lod the results are just too wrong. */
2137       lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2138    }
2139    return lod_property;
2140 }
2141 
2142 
2143 /**
2144  * High-level instruction translators.
2145  */
2146 
2147 static void
emit_tex(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,LLVMValueRef * texel,unsigned sampler_reg,enum lp_sampler_op_type sampler_op)2148 emit_tex( struct lp_build_tgsi_soa_context *bld,
2149           const struct tgsi_full_instruction *inst,
2150           enum lp_build_tex_modifier modifier,
2151           LLVMValueRef *texel,
2152           unsigned sampler_reg,
2153           enum lp_sampler_op_type sampler_op)
2154 {
2155    unsigned unit = inst->Src[sampler_reg].Register.Index;
2156    LLVMValueRef oow = NULL;
2157    LLVMValueRef lod = NULL;
2158    LLVMValueRef coords[5];
2159    LLVMValueRef offsets[3] = { NULL };
2160    struct lp_derivatives derivs;
2161    struct lp_sampler_params params;
2162    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2163    unsigned num_derivs, num_offsets, i;
2164    unsigned shadow_coord = 0;
2165    unsigned layer_coord = 0;
2166    unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2167 
2168    memset(&params, 0, sizeof(params));
2169 
2170    if (!bld->sampler) {
2171       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2172       for (i = 0; i < 4; i++) {
2173          texel[i] = bld->bld_base.base.undef;
2174       }
2175       return;
2176    }
2177 
2178    switch (inst->Texture.Texture) {
2179    case TGSI_TEXTURE_1D_ARRAY:
2180       layer_coord = 1;
2181       /* fallthrough */
2182    case TGSI_TEXTURE_1D:
2183       num_offsets = 1;
2184       num_derivs = 1;
2185       break;
2186    case TGSI_TEXTURE_2D_ARRAY:
2187       layer_coord = 2;
2188       /* fallthrough */
2189    case TGSI_TEXTURE_2D:
2190    case TGSI_TEXTURE_RECT:
2191       num_offsets = 2;
2192       num_derivs = 2;
2193       break;
2194    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2195       layer_coord = 1;
2196       /* fallthrough */
2197    case TGSI_TEXTURE_SHADOW1D:
2198       shadow_coord = 2;
2199       num_offsets = 1;
2200       num_derivs = 1;
2201       break;
2202    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2203       layer_coord = 2;
2204       shadow_coord = 3;
2205       num_offsets = 2;
2206       num_derivs = 2;
2207       break;
2208    case TGSI_TEXTURE_SHADOW2D:
2209    case TGSI_TEXTURE_SHADOWRECT:
2210       shadow_coord = 2;
2211       num_offsets = 2;
2212       num_derivs = 2;
2213       break;
2214    case TGSI_TEXTURE_CUBE:
2215       num_offsets = 2;
2216       num_derivs = 3;
2217       break;
2218    case TGSI_TEXTURE_3D:
2219       num_offsets = 3;
2220       num_derivs = 3;
2221       break;
2222    case TGSI_TEXTURE_SHADOWCUBE:
2223       shadow_coord = 3;
2224       num_offsets = 2;
2225       num_derivs = 3;
2226       break;
2227    case TGSI_TEXTURE_CUBE_ARRAY:
2228       num_offsets = 2;
2229       num_derivs = 3;
2230       layer_coord = 3;
2231       break;
2232    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2233       num_offsets = 2;
2234       num_derivs = 3;
2235       layer_coord = 3;
2236       shadow_coord = 4; /* shadow coord special different reg */
2237       break;
2238    case TGSI_TEXTURE_2D_MSAA:
2239    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2240    default:
2241       assert(0);
2242       return;
2243    }
2244 
2245    /* Note lod and especially projected are illegal in a LOT of cases */
2246    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2247        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2248       if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2249           inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2250          /* note that shadow cube array with bias/explicit lod does not exist */
2251          lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2252       }
2253       else {
2254          lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2255       }
2256       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2257          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2258       }
2259       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2260          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2261       }
2262       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2263    }
2264 
2265    if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2266       oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2267       oow = lp_build_rcp(&bld->bld_base.base, oow);
2268    }
2269 
2270    for (i = 0; i < num_derivs; i++) {
2271       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2272       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2273          coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2274    }
2275    for (i = num_derivs; i < 5; i++) {
2276       coords[i] = bld->bld_base.base.undef;
2277    }
2278 
2279    /* Layer coord always goes into 3rd slot, except for cube map arrays */
2280    if (layer_coord) {
2281       if (layer_coord == 3) {
2282          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2283       }
2284       else {
2285          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2286       }
2287       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2288          coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2289    }
2290    /* Shadow coord occupies always 5th slot. */
2291    if (shadow_coord) {
2292       sample_key |= LP_SAMPLER_SHADOW;
2293       if (shadow_coord == 4) {
2294          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2295       }
2296       else {
2297          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2298       }
2299       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2300          coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2301    }
2302 
2303    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2304       unsigned dim;
2305       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2306       for (dim = 0; dim < num_derivs; ++dim) {
2307          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2308          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2309       }
2310       params.derivs = &derivs;
2311       /*
2312        * could also check all src regs if constant but I doubt such
2313        * cases exist in practice.
2314        */
2315       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2316          if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2317             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2318          }
2319          else {
2320             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2321          }
2322       }
2323       else {
2324          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2325       }
2326    }
2327    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2328 
2329    /* we don't handle the 4 offset version of tg4 */
2330    if (inst->Texture.NumOffsets == 1) {
2331       unsigned dim;
2332       sample_key |= LP_SAMPLER_OFFSETS;
2333       for (dim = 0; dim < num_offsets; dim++) {
2334          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2335       }
2336    }
2337 
2338    params.type = bld->bld_base.base.type;
2339    params.sample_key = sample_key;
2340    params.texture_index = unit;
2341    params.sampler_index = unit;
2342    params.context_ptr = bld->context_ptr;
2343    params.thread_data_ptr = bld->thread_data_ptr;
2344    params.coords = coords;
2345    params.offsets = offsets;
2346    params.lod = lod;
2347    params.texel = texel;
2348 
2349    bld->sampler->emit_tex_sample(bld->sampler,
2350                                  bld->bld_base.base.gallivm,
2351                                  &params);
2352 }
2353 
2354 static void
emit_sample(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,boolean compare,LLVMValueRef * texel)2355 emit_sample(struct lp_build_tgsi_soa_context *bld,
2356             const struct tgsi_full_instruction *inst,
2357             enum lp_build_tex_modifier modifier,
2358             boolean compare,
2359             LLVMValueRef *texel)
2360 {
2361    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2362    unsigned texture_unit, sampler_unit;
2363    LLVMValueRef lod = NULL;
2364    LLVMValueRef coords[5];
2365    LLVMValueRef offsets[3] = { NULL };
2366    struct lp_derivatives derivs;
2367    struct lp_sampler_params params;
2368    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2369 
2370    unsigned num_offsets, num_derivs, i;
2371    unsigned layer_coord = 0;
2372    unsigned sample_key = LP_SAMPLER_OP_TEXTURE << LP_SAMPLER_OP_TYPE_SHIFT;
2373 
2374    memset(&params, 0, sizeof(params));
2375 
2376    if (!bld->sampler) {
2377       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2378       for (i = 0; i < 4; i++) {
2379          texel[i] = bld->bld_base.base.undef;
2380       }
2381       return;
2382    }
2383 
2384    /*
2385     * unlike old-style tex opcodes the texture/sampler indices
2386     * always come from src1 and src2 respectively.
2387     */
2388    texture_unit = inst->Src[1].Register.Index;
2389    sampler_unit = inst->Src[2].Register.Index;
2390 
2391    /*
2392     * Note inst->Texture.Texture will contain the number of offsets,
2393     * however the target information is NOT there and comes from the
2394     * declared sampler views instead.
2395     */
2396    switch (bld->sv[texture_unit].Resource) {
2397    case TGSI_TEXTURE_1D:
2398       num_offsets = 1;
2399       num_derivs = 1;
2400       break;
2401    case TGSI_TEXTURE_1D_ARRAY:
2402       layer_coord = 1;
2403       num_offsets = 1;
2404       num_derivs = 1;
2405       break;
2406    case TGSI_TEXTURE_2D:
2407    case TGSI_TEXTURE_RECT:
2408       num_offsets = 2;
2409       num_derivs = 2;
2410       break;
2411    case TGSI_TEXTURE_2D_ARRAY:
2412       layer_coord = 2;
2413       num_offsets = 2;
2414       num_derivs = 2;
2415       break;
2416    case TGSI_TEXTURE_CUBE:
2417       num_offsets = 2;
2418       num_derivs = 3;
2419       break;
2420    case TGSI_TEXTURE_3D:
2421       num_offsets = 3;
2422       num_derivs = 3;
2423       break;
2424    case TGSI_TEXTURE_CUBE_ARRAY:
2425       layer_coord = 3;
2426       num_offsets = 2;
2427       num_derivs = 3;
2428       break;
2429    default:
2430       assert(0);
2431       return;
2432    }
2433 
2434    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2435        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2436       lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2437       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2438          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2439       }
2440       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2441          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2442       }
2443       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2444    }
2445    else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2446       /* XXX might be better to explicitly pass the level zero information */
2447       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2448       lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2449    }
2450 
2451    for (i = 0; i < num_derivs; i++) {
2452       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2453    }
2454    for (i = num_derivs; i < 5; i++) {
2455       coords[i] = bld->bld_base.base.undef;
2456    }
2457 
2458    /* Layer coord always goes into 3rd slot, except for cube map arrays */
2459    if (layer_coord) {
2460       if (layer_coord == 3)
2461          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2462       else
2463          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2464    }
2465    /* Shadow coord occupies always 5th slot. */
2466    if (compare) {
2467       sample_key |= LP_SAMPLER_SHADOW;
2468       coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2469    }
2470 
2471    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2472       unsigned dim;
2473       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2474       for (dim = 0; dim < num_derivs; ++dim) {
2475          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2476          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2477       }
2478       params.derivs = &derivs;
2479       /*
2480        * could also check all src regs if constant but I doubt such
2481        * cases exist in practice.
2482        */
2483       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2484          if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2485             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2486          }
2487          else {
2488             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2489          }
2490       }
2491       else {
2492          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2493       }
2494    }
2495 
2496    /* some advanced gather instructions (txgo) would require 4 offsets */
2497    if (inst->Texture.NumOffsets == 1) {
2498       unsigned dim;
2499       sample_key |= LP_SAMPLER_OFFSETS;
2500       for (dim = 0; dim < num_offsets; dim++) {
2501          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2502       }
2503    }
2504    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2505 
2506    params.type = bld->bld_base.base.type;
2507    params.sample_key = sample_key;
2508    params.texture_index = texture_unit;
2509    params.sampler_index = sampler_unit;
2510    params.context_ptr = bld->context_ptr;
2511    params.thread_data_ptr = bld->thread_data_ptr;
2512    params.coords = coords;
2513    params.offsets = offsets;
2514    params.lod = lod;
2515    params.texel = texel;
2516 
2517    bld->sampler->emit_tex_sample(bld->sampler,
2518                                  bld->bld_base.base.gallivm,
2519                                  &params);
2520 
2521    if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2522        inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2523        inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2524        inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2525       unsigned char swizzles[4];
2526       swizzles[0] = inst->Src[1].Register.SwizzleX;
2527       swizzles[1] = inst->Src[1].Register.SwizzleY;
2528       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2529       swizzles[3] = inst->Src[1].Register.SwizzleW;
2530 
2531       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2532    }
2533 }
2534 
2535 static void
emit_fetch_texels(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * texel,boolean is_samplei)2536 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2537                    const struct tgsi_full_instruction *inst,
2538                    LLVMValueRef *texel,
2539                    boolean is_samplei)
2540 {
2541    unsigned unit, target;
2542    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2543    LLVMValueRef explicit_lod = NULL;
2544    LLVMValueRef coords[5];
2545    LLVMValueRef offsets[3] = { NULL };
2546    struct lp_sampler_params params;
2547    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2548    unsigned dims, i;
2549    unsigned layer_coord = 0;
2550    unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2551 
2552    memset(&params, 0, sizeof(params));
2553 
2554    if (!bld->sampler) {
2555       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2556       for (i = 0; i < 4; i++) {
2557          texel[i] = coord_undef;
2558       }
2559       return;
2560    }
2561 
2562    unit = inst->Src[1].Register.Index;
2563 
2564    if (is_samplei) {
2565       target = bld->sv[unit].Resource;
2566    }
2567    else {
2568       target = inst->Texture.Texture;
2569    }
2570 
2571    switch (target) {
2572    case TGSI_TEXTURE_1D:
2573    case TGSI_TEXTURE_BUFFER:
2574       dims = 1;
2575       break;
2576    case TGSI_TEXTURE_1D_ARRAY:
2577       layer_coord = 1;
2578       dims = 1;
2579       break;
2580    case TGSI_TEXTURE_2D:
2581    case TGSI_TEXTURE_RECT:
2582    case TGSI_TEXTURE_2D_MSAA:
2583       dims = 2;
2584       break;
2585    case TGSI_TEXTURE_2D_ARRAY:
2586    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2587       layer_coord = 2;
2588       dims = 2;
2589       break;
2590    case TGSI_TEXTURE_3D:
2591       dims = 3;
2592       break;
2593    default:
2594       assert(0);
2595       return;
2596    }
2597 
2598    /* always have lod except for buffers and msaa targets ? */
2599    if (target != TGSI_TEXTURE_BUFFER &&
2600        target != TGSI_TEXTURE_2D_MSAA &&
2601        target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2602       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2603       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2604       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2605    }
2606    /*
2607     * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2608     * would be the sample index.
2609     */
2610 
2611    for (i = 0; i < dims; i++) {
2612       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2613    }
2614    /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2615    for (i = dims; i < 5; i++) {
2616       coords[i] = coord_undef;
2617    }
2618    if (layer_coord)
2619       coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2620 
2621    if (inst->Texture.NumOffsets == 1) {
2622       unsigned dim;
2623       sample_key |= LP_SAMPLER_OFFSETS;
2624       for (dim = 0; dim < dims; dim++) {
2625          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2626       }
2627    }
2628    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2629 
2630    params.type = bld->bld_base.base.type;
2631    params.sample_key = sample_key;
2632    params.texture_index = unit;
2633    /*
2634     * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2635     * and trigger some assertions with d3d10 where the sampler view number
2636     * can exceed this.
2637     */
2638    params.sampler_index = 0;
2639    params.context_ptr = bld->context_ptr;
2640    params.thread_data_ptr = bld->thread_data_ptr;
2641    params.coords = coords;
2642    params.offsets = offsets;
2643    params.derivs = NULL;
2644    params.lod = explicit_lod;
2645    params.texel = texel;
2646 
2647    bld->sampler->emit_tex_sample(bld->sampler,
2648                                  bld->bld_base.base.gallivm,
2649                                  &params);
2650 
2651    if (is_samplei &&
2652        (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2653         inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2654         inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2655         inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2656       unsigned char swizzles[4];
2657       swizzles[0] = inst->Src[1].Register.SwizzleX;
2658       swizzles[1] = inst->Src[1].Register.SwizzleY;
2659       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2660       swizzles[3] = inst->Src[1].Register.SwizzleW;
2661 
2662       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2663    }
2664 }
2665 
2666 static void
emit_size_query(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * sizes_out,boolean is_sviewinfo)2667 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2668                  const struct tgsi_full_instruction *inst,
2669                  LLVMValueRef *sizes_out,
2670                  boolean is_sviewinfo)
2671 {
2672    LLVMValueRef explicit_lod;
2673    enum lp_sampler_lod_property lod_property;
2674    unsigned has_lod;
2675    unsigned i;
2676    unsigned unit = inst->Src[1].Register.Index;
2677    unsigned target, pipe_target;
2678    struct lp_sampler_size_query_params params;
2679 
2680    if (is_sviewinfo) {
2681       target = bld->sv[unit].Resource;
2682    }
2683    else {
2684       target = inst->Texture.Texture;
2685    }
2686    switch (target) {
2687    case TGSI_TEXTURE_BUFFER:
2688    case TGSI_TEXTURE_RECT:
2689    case TGSI_TEXTURE_SHADOWRECT:
2690       has_lod = 0;
2691       break;
2692    default:
2693       has_lod = 1;
2694       break;
2695    }
2696 
2697    if (!bld->sampler) {
2698       _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2699       for (i = 0; i < 4; i++)
2700          sizes_out[i] = bld->bld_base.int_bld.undef;
2701       return;
2702    }
2703 
2704    if (has_lod) {
2705       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2706       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2707    }
2708    else {
2709       explicit_lod = NULL;
2710       lod_property = LP_SAMPLER_LOD_SCALAR;
2711    }
2712 
2713 
2714    pipe_target = tgsi_to_pipe_tex_target(target);
2715 
2716    params.int_type = bld->bld_base.int_bld.type;
2717    params.texture_unit = unit;
2718    params.target = pipe_target;
2719    params.context_ptr = bld->context_ptr;
2720    params.is_sviewinfo = TRUE;
2721    params.lod_property = lod_property;
2722    params.explicit_lod = explicit_lod;
2723    params.sizes_out = sizes_out;
2724 
2725    bld->sampler->emit_size_query(bld->sampler,
2726                                  bld->bld_base.base.gallivm,
2727                                  &params);
2728 }
2729 
2730 static boolean
near_end_of_shader(struct lp_build_tgsi_soa_context * bld,int pc)2731 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2732                    int pc)
2733 {
2734    unsigned i;
2735 
2736    for (i = 0; i < 5; i++) {
2737       unsigned opcode;
2738 
2739       if (pc + i >= bld->bld_base.info->num_instructions)
2740          return TRUE;
2741 
2742       opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2743 
2744       if (opcode == TGSI_OPCODE_END)
2745          return TRUE;
2746 
2747       if (opcode == TGSI_OPCODE_TEX ||
2748          opcode == TGSI_OPCODE_TXP ||
2749          opcode == TGSI_OPCODE_TXD ||
2750          opcode == TGSI_OPCODE_TXB ||
2751          opcode == TGSI_OPCODE_TXL ||
2752          opcode == TGSI_OPCODE_TXF ||
2753          opcode == TGSI_OPCODE_TXQ ||
2754          opcode == TGSI_OPCODE_TEX2 ||
2755          opcode == TGSI_OPCODE_TXB2 ||
2756          opcode == TGSI_OPCODE_TXL2 ||
2757          opcode == TGSI_OPCODE_SAMPLE ||
2758          opcode == TGSI_OPCODE_SAMPLE_B ||
2759          opcode == TGSI_OPCODE_SAMPLE_C ||
2760          opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2761          opcode == TGSI_OPCODE_SAMPLE_D ||
2762          opcode == TGSI_OPCODE_SAMPLE_I ||
2763          opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2764          opcode == TGSI_OPCODE_SAMPLE_L ||
2765          opcode == TGSI_OPCODE_SVIEWINFO ||
2766          opcode == TGSI_OPCODE_CAL ||
2767          opcode == TGSI_OPCODE_CALLNZ ||
2768          opcode == TGSI_OPCODE_IF ||
2769          opcode == TGSI_OPCODE_UIF ||
2770          opcode == TGSI_OPCODE_BGNLOOP ||
2771          opcode == TGSI_OPCODE_SWITCH)
2772          return FALSE;
2773    }
2774 
2775    return TRUE;
2776 }
2777 
2778 
2779 
2780 /**
2781  * Kill fragment if any of the src register values are negative.
2782  */
2783 static void
emit_kill_if(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,int pc)2784 emit_kill_if(
2785    struct lp_build_tgsi_soa_context *bld,
2786    const struct tgsi_full_instruction *inst,
2787    int pc)
2788 {
2789    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2790    const struct tgsi_full_src_register *reg = &inst->Src[0];
2791    LLVMValueRef terms[TGSI_NUM_CHANNELS];
2792    LLVMValueRef mask;
2793    unsigned chan_index;
2794 
2795    memset(&terms, 0, sizeof terms);
2796 
2797    TGSI_FOR_EACH_CHANNEL( chan_index ) {
2798       unsigned swizzle;
2799 
2800       /* Unswizzle channel */
2801       swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2802 
2803       /* Check if the component has not been already tested. */
2804       assert(swizzle < TGSI_NUM_CHANNELS);
2805       if( !terms[swizzle] )
2806          /* TODO: change the comparison operator instead of setting the sign */
2807          terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2808    }
2809 
2810    mask = NULL;
2811    TGSI_FOR_EACH_CHANNEL( chan_index ) {
2812       if(terms[chan_index]) {
2813          LLVMValueRef chan_mask;
2814 
2815          /*
2816           * If term < 0 then mask = 0 else mask = ~0.
2817           */
2818          chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2819 
2820          if(mask)
2821             mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2822          else
2823             mask = chan_mask;
2824       }
2825    }
2826 
2827    if (bld->exec_mask.has_mask) {
2828       LLVMValueRef invmask;
2829       invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2830       mask = LLVMBuildOr(builder, mask, invmask, "");
2831    }
2832 
2833    lp_build_mask_update(bld->mask, mask);
2834    if (!near_end_of_shader(bld, pc))
2835       lp_build_mask_check(bld->mask);
2836 }
2837 
2838 
2839 /**
2840  * Unconditional fragment kill.
2841  * The only predication is the execution mask which will apply if
2842  * we're inside a loop or conditional.
2843  */
2844 static void
emit_kill(struct lp_build_tgsi_soa_context * bld,int pc)2845 emit_kill(struct lp_build_tgsi_soa_context *bld,
2846           int pc)
2847 {
2848    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2849    LLVMValueRef mask;
2850 
2851    /* For those channels which are "alive", disable fragment shader
2852     * execution.
2853     */
2854    if (bld->exec_mask.has_mask) {
2855       mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2856    }
2857    else {
2858       LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2859       mask = zero;
2860    }
2861 
2862    lp_build_mask_update(bld->mask, mask);
2863 
2864    if (!near_end_of_shader(bld, pc))
2865       lp_build_mask_check(bld->mask);
2866 }
2867 
2868 
2869 /**
2870  * Emit code which will dump the value of all the temporary registers
2871  * to stdout.
2872  */
2873 static void
emit_dump_file(struct lp_build_tgsi_soa_context * bld,unsigned file)2874 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2875                unsigned file)
2876 {
2877    const struct tgsi_shader_info *info = bld->bld_base.info;
2878    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2879    LLVMBuilderRef builder = gallivm->builder;
2880    LLVMValueRef reg_ptr;
2881    int index;
2882    int max_index = info->file_max[file];
2883 
2884    /*
2885     * Some register files, particularly constants, can be very large,
2886     * and dumping everything could make this unusably slow.
2887     */
2888    max_index = MIN2(max_index, 32);
2889 
2890    for (index = 0; index <= max_index; index++) {
2891       LLVMValueRef res;
2892       unsigned mask;
2893       int chan;
2894 
2895       if (index < 8 * sizeof(unsigned) &&
2896           (info->file_mask[file] & (1u << index)) == 0)  {
2897          /* This was not declared.*/
2898          continue;
2899       }
2900 
2901       if (file == TGSI_FILE_INPUT) {
2902          mask = info->input_usage_mask[index];
2903       } else {
2904          mask = TGSI_WRITEMASK_XYZW;
2905       }
2906 
2907       for (chan = 0; chan < 4; chan++) {
2908          if ((mask & (1 << chan)) == 0) {
2909             /* This channel is not used.*/
2910             continue;
2911          }
2912 
2913          if (file == TGSI_FILE_CONSTANT) {
2914             struct tgsi_full_src_register reg;
2915             memset(&reg, 0, sizeof reg);
2916             reg.Register.File = file;
2917             reg.Register.Index = index;
2918             reg.Register.SwizzleX = 0;
2919             reg.Register.SwizzleY = 1;
2920             reg.Register.SwizzleZ = 2;
2921             reg.Register.SwizzleW = 3;
2922 
2923             res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2924             if (!res) {
2925                continue;
2926             }
2927          } else if (file == TGSI_FILE_INPUT) {
2928             res = bld->inputs[index][chan];
2929             if (!res) {
2930                continue;
2931             }
2932          } else if (file == TGSI_FILE_TEMPORARY) {
2933             reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2934             assert(reg_ptr);
2935             res = LLVMBuildLoad(builder, reg_ptr, "");
2936          } else if (file == TGSI_FILE_OUTPUT) {
2937             reg_ptr = lp_get_output_ptr(bld, index, chan);
2938             assert(reg_ptr);
2939             res = LLVMBuildLoad(builder, reg_ptr, "");
2940          } else {
2941             assert(0);
2942             continue;
2943          }
2944 
2945          emit_dump_reg(gallivm, file, index, chan, res);
2946       }
2947    }
2948 }
2949 
2950 
2951 
2952 void
lp_emit_declaration_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_declaration * decl)2953 lp_emit_declaration_soa(
2954    struct lp_build_tgsi_context *bld_base,
2955    const struct tgsi_full_declaration *decl)
2956 {
2957    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2958    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2959    LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2960    const unsigned first = decl->Range.First;
2961    const unsigned last = decl->Range.Last;
2962    unsigned idx, i;
2963 
2964    assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2965 
2966    switch (decl->Declaration.File) {
2967    case TGSI_FILE_TEMPORARY:
2968       if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2969          assert(last < LP_MAX_INLINED_TEMPS);
2970          for (idx = first; idx <= last; ++idx) {
2971             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2972                bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2973          }
2974       }
2975       break;
2976 
2977    case TGSI_FILE_OUTPUT:
2978       if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2979          for (idx = first; idx <= last; ++idx) {
2980             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2981                bld->outputs[idx][i] = lp_build_alloca(gallivm,
2982                                                       vec_type, "output");
2983          }
2984       }
2985       break;
2986 
2987    case TGSI_FILE_ADDRESS:
2988       /* ADDR registers are only allocated with an integer LLVM IR type,
2989        * as they are guaranteed to always have integers.
2990        * XXX: Not sure if this exception is worthwhile (or the whole idea of
2991        * an ADDR register for that matter).
2992        */
2993       assert(last < LP_MAX_TGSI_ADDRS);
2994       for (idx = first; idx <= last; ++idx) {
2995          assert(idx < LP_MAX_TGSI_ADDRS);
2996          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2997             bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2998       }
2999       break;
3000 
3001    case TGSI_FILE_PREDICATE:
3002       assert(last < LP_MAX_TGSI_PREDS);
3003       for (idx = first; idx <= last; ++idx) {
3004          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
3005             bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
3006                                                  "predicate");
3007       }
3008       break;
3009 
3010    case TGSI_FILE_SAMPLER_VIEW:
3011       /*
3012        * The target stored here MUST match whatever there actually
3013        * is in the set sampler views (what about return type?).
3014        */
3015       assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3016       for (idx = first; idx <= last; ++idx) {
3017          bld->sv[idx] = decl->SamplerView;
3018       }
3019       break;
3020 
3021    case TGSI_FILE_CONSTANT:
3022    {
3023       /*
3024        * We could trivially fetch the per-buffer pointer when fetching the
3025        * constant, relying on llvm to figure out it's always the same pointer
3026        * anyway. However, doing so results in a huge (more than factor of 10)
3027        * slowdown in llvm compilation times for some (but not all) shaders
3028        * (more specifically, the IR optimization spends way more time in
3029        * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
3030        */
3031       unsigned idx2D = decl->Dim.Index2D;
3032       LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
3033       assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
3034       bld->consts[idx2D] =
3035          lp_build_array_get(gallivm, bld->consts_ptr, index2D);
3036       bld->consts_sizes[idx2D] =
3037          lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
3038    }
3039       break;
3040 
3041    default:
3042       /* don't need to declare other vars */
3043       break;
3044    }
3045 }
3046 
3047 
lp_emit_immediate_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_immediate * imm)3048 void lp_emit_immediate_soa(
3049    struct lp_build_tgsi_context *bld_base,
3050    const struct tgsi_full_immediate *imm)
3051 {
3052    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3053    struct gallivm_state * gallivm = bld_base->base.gallivm;
3054    LLVMValueRef imms[4];
3055    unsigned i;
3056    const uint size = imm->Immediate.NrTokens - 1;
3057    assert(size <= 4);
3058    switch (imm->Immediate.DataType) {
3059    case TGSI_IMM_FLOAT32:
3060       for( i = 0; i < size; ++i )
3061          imms[i] =
3062                lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
3063 
3064       break;
3065    case TGSI_IMM_FLOAT64:
3066    case TGSI_IMM_UINT64:
3067    case TGSI_IMM_INT64:
3068    case TGSI_IMM_UINT32:
3069       for( i = 0; i < size; ++i ) {
3070          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3071          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3072       }
3073 
3074       break;
3075    case TGSI_IMM_INT32:
3076       for( i = 0; i < size; ++i ) {
3077          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3078          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3079       }
3080 
3081       break;
3082    }
3083    for( i = size; i < 4; ++i )
3084       imms[i] = bld_base->base.undef;
3085 
3086    if (bld->use_immediates_array) {
3087       unsigned index = bld->num_immediates;
3088       struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3089       LLVMBuilderRef builder = gallivm->builder;
3090 
3091       assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3092       for (i = 0; i < 4; ++i ) {
3093          LLVMValueRef lindex = lp_build_const_int32(
3094                   bld->bld_base.base.gallivm, index * 4 + i);
3095          LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3096                                              bld->imms_array, &lindex, 1, "");
3097          LLVMBuildStore(builder, imms[i], imm_ptr);
3098       }
3099    } else {
3100       /* simply copy the immediate values into the next immediates[] slot */
3101       unsigned i;
3102       assert(imm->Immediate.NrTokens - 1 <= 4);
3103       assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3104 
3105       for(i = 0; i < 4; ++i )
3106          bld->immediates[bld->num_immediates][i] = imms[i];
3107 
3108       if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3109          unsigned index = bld->num_immediates;
3110          struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3111          LLVMBuilderRef builder = gallivm->builder;
3112          for (i = 0; i < 4; ++i ) {
3113             LLVMValueRef lindex = lp_build_const_int32(
3114                      bld->bld_base.base.gallivm, index * 4 + i);
3115             LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3116                                                 bld->imms_array, &lindex, 1, "");
3117             LLVMBuildStore(builder,
3118                            bld->immediates[index][i],
3119                            imm_ptr);
3120          }
3121       }
3122    }
3123 
3124    bld->num_immediates++;
3125 }
3126 
3127 static void
ddx_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3128 ddx_emit(
3129    const struct lp_build_tgsi_action * action,
3130    struct lp_build_tgsi_context * bld_base,
3131    struct lp_build_emit_data * emit_data)
3132 {
3133    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3134 
3135    emit_fetch_deriv(bld, emit_data->args[0], NULL,
3136                     &emit_data->output[emit_data->chan], NULL);
3137 }
3138 
3139 static void
ddy_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3140 ddy_emit(
3141    const struct lp_build_tgsi_action * action,
3142    struct lp_build_tgsi_context * bld_base,
3143    struct lp_build_emit_data * emit_data)
3144 {
3145    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3146 
3147    emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3148                     &emit_data->output[emit_data->chan]);
3149 }
3150 
3151 static void
kill_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3152 kill_emit(
3153    const struct lp_build_tgsi_action * action,
3154    struct lp_build_tgsi_context * bld_base,
3155    struct lp_build_emit_data * emit_data)
3156 {
3157    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3158 
3159    emit_kill(bld, bld_base->pc - 1);
3160 }
3161 
3162 static void
kill_if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3163 kill_if_emit(
3164    const struct lp_build_tgsi_action * action,
3165    struct lp_build_tgsi_context * bld_base,
3166    struct lp_build_emit_data * emit_data)
3167 {
3168    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3169 
3170    emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3171 }
3172 
3173 static void
tex_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3174 tex_emit(
3175    const struct lp_build_tgsi_action * action,
3176    struct lp_build_tgsi_context * bld_base,
3177    struct lp_build_emit_data * emit_data)
3178 {
3179    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3180 
3181    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3182             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3183 }
3184 
3185 static void
tex2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3186 tex2_emit(
3187    const struct lp_build_tgsi_action * action,
3188    struct lp_build_tgsi_context * bld_base,
3189    struct lp_build_emit_data * emit_data)
3190 {
3191    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3192 
3193    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3194             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3195 }
3196 
3197 static void
txb_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3198 txb_emit(
3199    const struct lp_build_tgsi_action * action,
3200    struct lp_build_tgsi_context * bld_base,
3201    struct lp_build_emit_data * emit_data)
3202 {
3203    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3204 
3205    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3206             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3207 }
3208 
3209 static void
txb2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3210 txb2_emit(
3211    const struct lp_build_tgsi_action * action,
3212    struct lp_build_tgsi_context * bld_base,
3213    struct lp_build_emit_data * emit_data)
3214 {
3215    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3216 
3217    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3218             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3219 }
3220 
3221 static void
txd_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3222 txd_emit(
3223    const struct lp_build_tgsi_action * action,
3224    struct lp_build_tgsi_context * bld_base,
3225    struct lp_build_emit_data * emit_data)
3226 {
3227    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3228 
3229    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3230             emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3231 }
3232 
3233 static void
txl_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3234 txl_emit(
3235    const struct lp_build_tgsi_action * action,
3236    struct lp_build_tgsi_context * bld_base,
3237    struct lp_build_emit_data * emit_data)
3238 {
3239    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3240 
3241    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3242             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3243 }
3244 
3245 static void
txl2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3246 txl2_emit(
3247    const struct lp_build_tgsi_action * action,
3248    struct lp_build_tgsi_context * bld_base,
3249    struct lp_build_emit_data * emit_data)
3250 {
3251    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3252 
3253    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3254             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3255 }
3256 
3257 static void
txp_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3258 txp_emit(
3259    const struct lp_build_tgsi_action * action,
3260    struct lp_build_tgsi_context * bld_base,
3261    struct lp_build_emit_data * emit_data)
3262 {
3263    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3264 
3265    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3266             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3267 }
3268 
3269 static void
tg4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3270 tg4_emit(
3271    const struct lp_build_tgsi_action * action,
3272    struct lp_build_tgsi_context * bld_base,
3273    struct lp_build_emit_data * emit_data)
3274 {
3275    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3276 
3277    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3278             emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3279 }
3280 
3281 static void
txq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3282 txq_emit(
3283    const struct lp_build_tgsi_action * action,
3284    struct lp_build_tgsi_context * bld_base,
3285    struct lp_build_emit_data * emit_data)
3286 {
3287    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3288 
3289    emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3290 }
3291 
3292 static void
txf_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3293 txf_emit(
3294    const struct lp_build_tgsi_action * action,
3295    struct lp_build_tgsi_context * bld_base,
3296    struct lp_build_emit_data * emit_data)
3297 {
3298    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3299 
3300    emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3301 }
3302 
3303 static void
sample_i_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3304 sample_i_emit(
3305    const struct lp_build_tgsi_action * action,
3306    struct lp_build_tgsi_context * bld_base,
3307    struct lp_build_emit_data * emit_data)
3308 {
3309    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3310 
3311    emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3312 }
3313 
3314 static void
sample_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3315 sample_emit(
3316    const struct lp_build_tgsi_action * action,
3317    struct lp_build_tgsi_context * bld_base,
3318    struct lp_build_emit_data * emit_data)
3319 {
3320    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3321 
3322    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3323                FALSE, emit_data->output);
3324 }
3325 
3326 static void
sample_b_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3327 sample_b_emit(
3328    const struct lp_build_tgsi_action * action,
3329    struct lp_build_tgsi_context * bld_base,
3330    struct lp_build_emit_data * emit_data)
3331 {
3332    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3333 
3334    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3335                FALSE, emit_data->output);
3336 }
3337 
3338 static void
sample_c_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3339 sample_c_emit(
3340    const struct lp_build_tgsi_action * action,
3341    struct lp_build_tgsi_context * bld_base,
3342    struct lp_build_emit_data * emit_data)
3343 {
3344    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3345 
3346    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3347                TRUE, emit_data->output);
3348 }
3349 
3350 static void
sample_c_lz_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3351 sample_c_lz_emit(
3352    const struct lp_build_tgsi_action * action,
3353    struct lp_build_tgsi_context * bld_base,
3354    struct lp_build_emit_data * emit_data)
3355 {
3356    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3357 
3358    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3359                TRUE, emit_data->output);
3360 }
3361 
3362 static void
sample_d_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3363 sample_d_emit(
3364    const struct lp_build_tgsi_action * action,
3365    struct lp_build_tgsi_context * bld_base,
3366    struct lp_build_emit_data * emit_data)
3367 {
3368    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3369 
3370    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3371                FALSE, emit_data->output);
3372 }
3373 
3374 static void
sample_l_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3375 sample_l_emit(
3376    const struct lp_build_tgsi_action * action,
3377    struct lp_build_tgsi_context * bld_base,
3378    struct lp_build_emit_data * emit_data)
3379 {
3380    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3381 
3382    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3383                FALSE, emit_data->output);
3384 }
3385 
3386 static void
sviewinfo_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3387 sviewinfo_emit(
3388    const struct lp_build_tgsi_action * action,
3389    struct lp_build_tgsi_context * bld_base,
3390    struct lp_build_emit_data * emit_data)
3391 {
3392    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3393 
3394    emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3395 }
3396 
3397 static LLVMValueRef
mask_vec(struct lp_build_tgsi_context * bld_base)3398 mask_vec(struct lp_build_tgsi_context *bld_base)
3399 {
3400    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3401    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3402    struct lp_exec_mask *exec_mask = &bld->exec_mask;
3403 
3404    if (!exec_mask->has_mask) {
3405       return lp_build_mask_value(bld->mask);
3406    }
3407    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
3408                        exec_mask->exec_mask, "");
3409 }
3410 
3411 static void
increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3412 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3413                           LLVMValueRef ptr,
3414                           LLVMValueRef mask)
3415 {
3416    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3417    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3418 
3419    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3420 
3421    LLVMBuildStore(builder, current_vec, ptr);
3422 }
3423 
3424 static void
clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3425 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3426                              LLVMValueRef ptr,
3427                              LLVMValueRef mask)
3428 {
3429    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3430    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3431 
3432    current_vec = lp_build_select(&bld_base->uint_bld,
3433                                  mask,
3434                                  bld_base->uint_bld.zero,
3435                                  current_vec);
3436 
3437    LLVMBuildStore(builder, current_vec, ptr);
3438 }
3439 
3440 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)3441 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3442                                   LLVMValueRef current_mask_vec,
3443                                   LLVMValueRef total_emitted_vertices_vec)
3444 {
3445    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3446    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3447    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3448                                         total_emitted_vertices_vec,
3449                                         bld->max_output_vertices_vec);
3450 
3451    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3452 }
3453 
3454 static void
emit_vertex(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3455 emit_vertex(
3456    const struct lp_build_tgsi_action * action,
3457    struct lp_build_tgsi_context * bld_base,
3458    struct lp_build_emit_data * emit_data)
3459 {
3460    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3461    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3462 
3463    if (bld->gs_iface->emit_vertex) {
3464       LLVMValueRef mask = mask_vec(bld_base);
3465       LLVMValueRef total_emitted_vertices_vec =
3466          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3467       mask = clamp_mask_to_max_output_vertices(bld, mask,
3468                                                total_emitted_vertices_vec);
3469       gather_outputs(bld);
3470       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3471                                  bld->outputs,
3472                                  total_emitted_vertices_vec);
3473       increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3474                                 mask);
3475       increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3476                                 mask);
3477 #if DUMP_GS_EMITS
3478       lp_build_print_value(bld->bld_base.base.gallivm,
3479                            " +++ emit vertex masked ones = ",
3480                            mask);
3481       lp_build_print_value(bld->bld_base.base.gallivm,
3482                            " +++ emit vertex emitted = ",
3483                            total_emitted_vertices_vec);
3484 #endif
3485    }
3486 }
3487 
3488 
3489 static void
end_primitive_masked(struct lp_build_tgsi_context * bld_base,LLVMValueRef mask)3490 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3491                      LLVMValueRef mask)
3492 {
3493    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3494    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3495 
3496    if (bld->gs_iface->end_primitive) {
3497       struct lp_build_context *uint_bld = &bld_base->uint_bld;
3498       LLVMValueRef emitted_vertices_vec =
3499          LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3500       LLVMValueRef emitted_prims_vec =
3501          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3502 
3503       LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3504                                                emitted_vertices_vec,
3505                                                uint_bld->zero);
3506       /* We need to combine the current execution mask with the mask
3507          telling us which, if any, execution slots actually have
3508          unemitted primitives, this way we make sure that end_primitives
3509          executes only on the paths that have unflushed vertices */
3510       mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3511 
3512       bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
3513                                    emitted_vertices_vec,
3514                                    emitted_prims_vec);
3515 
3516 #if DUMP_GS_EMITS
3517       lp_build_print_value(bld->bld_base.base.gallivm,
3518                            " +++ end prim masked ones = ",
3519                            mask);
3520       lp_build_print_value(bld->bld_base.base.gallivm,
3521                            " +++ end prim emitted verts1 = ",
3522                            emitted_vertices_vec);
3523       lp_build_print_value(bld->bld_base.base.gallivm,
3524                            " +++ end prim emitted prims1 = ",
3525                            LLVMBuildLoad(builder,
3526                                          bld->emitted_prims_vec_ptr, ""));
3527 #endif
3528       increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3529                                 mask);
3530       clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3531                                    mask);
3532 #if DUMP_GS_EMITS
3533       lp_build_print_value(bld->bld_base.base.gallivm,
3534                            " +++ end prim emitted verts2 = ",
3535                            LLVMBuildLoad(builder,
3536                                          bld->emitted_vertices_vec_ptr, ""));
3537 #endif
3538    }
3539 
3540 }
3541 
3542 static void
end_primitive(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3543 end_primitive(
3544    const struct lp_build_tgsi_action * action,
3545    struct lp_build_tgsi_context * bld_base,
3546    struct lp_build_emit_data * emit_data)
3547 {
3548    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3549 
3550    if (bld->gs_iface->end_primitive) {
3551       LLVMValueRef mask = mask_vec(bld_base);
3552       end_primitive_masked(bld_base, mask);
3553    }
3554 }
3555 
3556 static void
cal_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3557 cal_emit(
3558    const struct lp_build_tgsi_action * action,
3559    struct lp_build_tgsi_context * bld_base,
3560    struct lp_build_emit_data * emit_data)
3561 {
3562    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3563 
3564    lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3565                      &bld_base->pc);
3566 }
3567 
3568 static void
ret_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3569 ret_emit(
3570    const struct lp_build_tgsi_action * action,
3571    struct lp_build_tgsi_context * bld_base,
3572    struct lp_build_emit_data * emit_data)
3573 {
3574    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3575 
3576    lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3577 }
3578 
3579 static void
brk_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3580 brk_emit(
3581    const struct lp_build_tgsi_action * action,
3582    struct lp_build_tgsi_context * bld_base,
3583    struct lp_build_emit_data * emit_data)
3584 {
3585    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3586 
3587    lp_exec_break(&bld->exec_mask, bld_base);
3588 }
3589 
3590 static void
breakc_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3591 breakc_emit(
3592    const struct lp_build_tgsi_action * action,
3593    struct lp_build_tgsi_context * bld_base,
3594    struct lp_build_emit_data * emit_data)
3595 {
3596    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3597    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3598    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3599    LLVMValueRef unsigned_cond =
3600       LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
3601    LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3602                                     unsigned_cond,
3603                                     uint_bld->zero);
3604 
3605    lp_exec_break_condition(&bld->exec_mask, cond);
3606 }
3607 
3608 static void
if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3609 if_emit(
3610    const struct lp_build_tgsi_action * action,
3611    struct lp_build_tgsi_context * bld_base,
3612    struct lp_build_emit_data * emit_data)
3613 {
3614    LLVMValueRef tmp;
3615    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3616 
3617    tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3618                       emit_data->args[0], bld->bld_base.base.zero);
3619    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3620 }
3621 
3622 static void
uif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3623 uif_emit(
3624    const struct lp_build_tgsi_action * action,
3625    struct lp_build_tgsi_context * bld_base,
3626    struct lp_build_emit_data * emit_data)
3627 {
3628    LLVMValueRef tmp;
3629    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3630    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3631 
3632    tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3633                       emit_data->args[0], uint_bld->zero);
3634    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3635 }
3636 
3637 static void
case_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3638 case_emit(
3639    const struct lp_build_tgsi_action * action,
3640    struct lp_build_tgsi_context * bld_base,
3641    struct lp_build_emit_data * emit_data)
3642 {
3643    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3644 
3645    lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3646 }
3647 
3648 static void
default_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3649 default_emit(
3650    const struct lp_build_tgsi_action * action,
3651    struct lp_build_tgsi_context * bld_base,
3652    struct lp_build_emit_data * emit_data)
3653 {
3654    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3655 
3656    lp_exec_default(&bld->exec_mask, bld_base);
3657 }
3658 
3659 static void
switch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3660 switch_emit(
3661    const struct lp_build_tgsi_action * action,
3662    struct lp_build_tgsi_context * bld_base,
3663    struct lp_build_emit_data * emit_data)
3664 {
3665    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3666 
3667    lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3668 }
3669 
3670 static void
endswitch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3671 endswitch_emit(
3672    const struct lp_build_tgsi_action * action,
3673    struct lp_build_tgsi_context * bld_base,
3674    struct lp_build_emit_data * emit_data)
3675 {
3676    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3677 
3678    lp_exec_endswitch(&bld->exec_mask, bld_base);
3679 }
3680 
3681 static void
bgnloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3682 bgnloop_emit(
3683    const struct lp_build_tgsi_action * action,
3684    struct lp_build_tgsi_context * bld_base,
3685    struct lp_build_emit_data * emit_data)
3686 {
3687    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3688 
3689    lp_exec_bgnloop(&bld->exec_mask);
3690 }
3691 
3692 static void
bgnsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3693 bgnsub_emit(
3694    const struct lp_build_tgsi_action * action,
3695    struct lp_build_tgsi_context * bld_base,
3696    struct lp_build_emit_data * emit_data)
3697 {
3698    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3699 
3700    lp_exec_mask_bgnsub(&bld->exec_mask);
3701 }
3702 
3703 static void
else_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3704 else_emit(
3705    const struct lp_build_tgsi_action * action,
3706    struct lp_build_tgsi_context * bld_base,
3707    struct lp_build_emit_data * emit_data)
3708 {
3709    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3710 
3711    lp_exec_mask_cond_invert(&bld->exec_mask);
3712 }
3713 
3714 static void
endif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3715 endif_emit(
3716    const struct lp_build_tgsi_action * action,
3717    struct lp_build_tgsi_context * bld_base,
3718    struct lp_build_emit_data * emit_data)
3719 {
3720    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3721 
3722    lp_exec_mask_cond_pop(&bld->exec_mask);
3723 }
3724 
3725 static void
endloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3726 endloop_emit(
3727    const struct lp_build_tgsi_action * action,
3728    struct lp_build_tgsi_context * bld_base,
3729    struct lp_build_emit_data * emit_data)
3730 {
3731    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3732 
3733    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3734 }
3735 
3736 static void
endsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3737 endsub_emit(
3738    const struct lp_build_tgsi_action * action,
3739    struct lp_build_tgsi_context * bld_base,
3740    struct lp_build_emit_data * emit_data)
3741 {
3742    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3743 
3744    lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3745 }
3746 
3747 static void
cont_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3748 cont_emit(
3749    const struct lp_build_tgsi_action * action,
3750    struct lp_build_tgsi_context * bld_base,
3751    struct lp_build_emit_data * emit_data)
3752 {
3753    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3754 
3755    lp_exec_continue(&bld->exec_mask);
3756 }
3757 
emit_prologue(struct lp_build_tgsi_context * bld_base)3758 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3759 {
3760    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3761    struct gallivm_state * gallivm = bld_base->base.gallivm;
3762 
3763    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3764       LLVMValueRef array_size =
3765          lp_build_const_int32(gallivm,
3766                          bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
3767       bld->temps_array = lp_build_array_alloca(gallivm,
3768                                               bld_base->base.vec_type, array_size,
3769                                               "temp_array");
3770    }
3771 
3772    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3773       LLVMValueRef array_size =
3774          lp_build_const_int32(gallivm,
3775                             bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3776       bld->outputs_array = lp_build_array_alloca(gallivm,
3777                                                 bld_base->base.vec_type, array_size,
3778                                                 "output_array");
3779    }
3780 
3781    if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3782       LLVMValueRef array_size =
3783          lp_build_const_int32(gallivm,
3784                          bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
3785       bld->imms_array = lp_build_array_alloca(gallivm,
3786                                               bld_base->base.vec_type, array_size,
3787                                               "imms_array");
3788    }
3789 
3790    /* If we have indirect addressing in inputs we need to copy them into
3791     * our alloca array to be able to iterate over them */
3792    if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3793       unsigned index, chan;
3794       LLVMTypeRef vec_type = bld_base->base.vec_type;
3795       LLVMValueRef array_size = lp_build_const_int32(gallivm,
3796             bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3797       bld->inputs_array = lp_build_array_alloca(gallivm,
3798                                                vec_type, array_size,
3799                                                "input_array");
3800 
3801       assert(bld_base->info->num_inputs
3802                         <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3803 
3804       for (index = 0; index < bld_base->info->num_inputs; ++index) {
3805          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3806             LLVMValueRef lindex =
3807                lp_build_const_int32(gallivm, index * 4 + chan);
3808             LLVMValueRef input_ptr =
3809                LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3810                             &lindex, 1, "");
3811             LLVMValueRef value = bld->inputs[index][chan];
3812             if (value)
3813                LLVMBuildStore(gallivm->builder, value, input_ptr);
3814          }
3815       }
3816    }
3817 
3818    if (bld->gs_iface) {
3819       struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3820       bld->emitted_prims_vec_ptr =
3821          lp_build_alloca(gallivm,
3822                          uint_bld->vec_type,
3823                          "emitted_prims_ptr");
3824       bld->emitted_vertices_vec_ptr =
3825          lp_build_alloca(gallivm,
3826                          uint_bld->vec_type,
3827                          "emitted_vertices_ptr");
3828       bld->total_emitted_vertices_vec_ptr =
3829          lp_build_alloca(gallivm,
3830                          uint_bld->vec_type,
3831                          "total_emitted_vertices_ptr");
3832 
3833       LLVMBuildStore(gallivm->builder, uint_bld->zero,
3834                      bld->emitted_prims_vec_ptr);
3835       LLVMBuildStore(gallivm->builder, uint_bld->zero,
3836                      bld->emitted_vertices_vec_ptr);
3837       LLVMBuildStore(gallivm->builder, uint_bld->zero,
3838                      bld->total_emitted_vertices_vec_ptr);
3839    }
3840 
3841    if (DEBUG_EXECUTION) {
3842       lp_build_printf(gallivm, "\n");
3843       emit_dump_file(bld, TGSI_FILE_CONSTANT);
3844       if (!bld->gs_iface)
3845          emit_dump_file(bld, TGSI_FILE_INPUT);
3846    }
3847 }
3848 
emit_epilogue(struct lp_build_tgsi_context * bld_base)3849 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3850 {
3851    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3852    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3853 
3854    if (DEBUG_EXECUTION) {
3855       /* for debugging */
3856       if (0) {
3857          emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3858       }
3859       emit_dump_file(bld, TGSI_FILE_OUTPUT);
3860       lp_build_printf(bld_base->base.gallivm, "\n");
3861    }
3862 
3863    /* If we have indirect addressing in outputs we need to copy our alloca array
3864     * to the outputs slots specified by the caller */
3865    if (bld->gs_iface) {
3866       LLVMValueRef total_emitted_vertices_vec;
3867       LLVMValueRef emitted_prims_vec;
3868       /* implicit end_primitives, needed in case there are any unflushed
3869          vertices in the cache. Note must not call end_primitive here
3870          since the exec_mask is not valid at this point. */
3871       end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3872 
3873       total_emitted_vertices_vec =
3874          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3875       emitted_prims_vec =
3876          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3877 
3878       bld->gs_iface->gs_epilogue(bld->gs_iface,
3879                                  &bld->bld_base,
3880                                  total_emitted_vertices_vec,
3881                                  emitted_prims_vec);
3882    } else {
3883       gather_outputs(bld);
3884    }
3885 }
3886 
3887 void
lp_build_tgsi_soa(struct gallivm_state * gallivm,const struct tgsi_token * tokens,struct lp_type type,struct lp_build_mask_context * mask,LLVMValueRef consts_ptr,LLVMValueRef const_sizes_ptr,const struct lp_bld_tgsi_system_values * system_values,const LLVMValueRef (* inputs)[TGSI_NUM_CHANNELS],LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],LLVMValueRef context_ptr,LLVMValueRef thread_data_ptr,struct lp_build_sampler_soa * sampler,const struct tgsi_shader_info * info,const struct lp_build_tgsi_gs_iface * gs_iface)3888 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3889                   const struct tgsi_token *tokens,
3890                   struct lp_type type,
3891                   struct lp_build_mask_context *mask,
3892                   LLVMValueRef consts_ptr,
3893                   LLVMValueRef const_sizes_ptr,
3894                   const struct lp_bld_tgsi_system_values *system_values,
3895                   const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3896                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3897                   LLVMValueRef context_ptr,
3898                   LLVMValueRef thread_data_ptr,
3899                   struct lp_build_sampler_soa *sampler,
3900                   const struct tgsi_shader_info *info,
3901                   const struct lp_build_tgsi_gs_iface *gs_iface)
3902 {
3903    struct lp_build_tgsi_soa_context bld;
3904 
3905    struct lp_type res_type;
3906 
3907    assert(type.length <= LP_MAX_VECTOR_LENGTH);
3908    memset(&res_type, 0, sizeof res_type);
3909    res_type.width = type.width;
3910    res_type.length = type.length;
3911    res_type.sign = 1;
3912 
3913    /* Setup build context */
3914    memset(&bld, 0, sizeof bld);
3915    lp_build_context_init(&bld.bld_base.base, gallivm, type);
3916    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3917    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3918    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3919    {
3920       struct lp_type dbl_type;
3921       dbl_type = type;
3922       dbl_type.width *= 2;
3923       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
3924    }
3925    {
3926       struct lp_type uint64_type;
3927       uint64_type = lp_uint_type(type);
3928       uint64_type.width *= 2;
3929       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
3930    }
3931    {
3932       struct lp_type int64_type;
3933       int64_type = lp_int_type(type);
3934       int64_type.width *= 2;
3935       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
3936    }
3937    bld.mask = mask;
3938    bld.inputs = inputs;
3939    bld.outputs = outputs;
3940    bld.consts_ptr = consts_ptr;
3941    bld.const_sizes_ptr = const_sizes_ptr;
3942    bld.sampler = sampler;
3943    bld.bld_base.info = info;
3944    bld.indirect_files = info->indirect_files;
3945    bld.context_ptr = context_ptr;
3946    bld.thread_data_ptr = thread_data_ptr;
3947 
3948    /*
3949     * If the number of temporaries is rather large then we just
3950     * allocate them as an array right from the start and treat
3951     * like indirect temporaries.
3952     */
3953    if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
3954       bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
3955    }
3956    /*
3957     * For performance reason immediates are always backed in a static
3958     * array, but if their number is too great, we have to use just
3959     * a dynamically allocated array.
3960     */
3961    bld.use_immediates_array =
3962          (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
3963    if (bld.use_immediates_array) {
3964       bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
3965    }
3966 
3967 
3968    bld.bld_base.soa = TRUE;
3969    bld.bld_base.emit_debug = emit_debug;
3970    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3971    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3972    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3973    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3974    bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3975    bld.bld_base.emit_store = emit_store;
3976 
3977    bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3978    bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3979 
3980    bld.bld_base.emit_prologue = emit_prologue;
3981    bld.bld_base.emit_epilogue = emit_epilogue;
3982 
3983    /* Set opcode actions */
3984    lp_set_default_actions_cpu(&bld.bld_base);
3985 
3986    bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3987    bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3988    bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3989    bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
3990    bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3991    bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3992    bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3993    bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3994    bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3995    bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3996    bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3997    bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3998    bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3999    bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4000    bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4001    bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4002    bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4003    bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4004    bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4005    bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4006    bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4007    bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4008    bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4009    bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4010    bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4011    bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4012    bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4013    bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4014    bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4015    bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4016    bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4017    bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4018    /* DX10 sampling ops */
4019    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4020    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4021    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4022    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4023    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4024    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4025    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4026    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4027    bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4028 
4029    if (gs_iface) {
4030       /* There's no specific value for this because it should always
4031        * be set, but apps using ext_geometry_shader4 quite often
4032        * were forgetting so we're using MAX_VERTEX_VARYING from
4033        * that spec even though we could debug_assert if it's not
4034        * set, but that's a lot uglier. */
4035       uint max_output_vertices;
4036 
4037       /* inputs are always indirect with gs */
4038       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4039       bld.gs_iface = gs_iface;
4040       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4041       bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4042       bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4043 
4044       max_output_vertices =
4045             info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4046       if (!max_output_vertices)
4047          max_output_vertices = 32;
4048 
4049       bld.max_output_vertices_vec =
4050          lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4051                                 max_output_vertices);
4052    }
4053 
4054    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4055 
4056    bld.system_values = *system_values;
4057 
4058    lp_build_tgsi_llvm(&bld.bld_base, tokens);
4059 
4060    if (0) {
4061       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4062       LLVMValueRef function = LLVMGetBasicBlockParent(block);
4063       debug_printf("11111111111111111111111111111 \n");
4064       tgsi_dump(tokens, 0);
4065       lp_debug_dump_value(function);
4066       debug_printf("2222222222222222222222222222 \n");
4067    }
4068 
4069    if (0) {
4070       LLVMModuleRef module = LLVMGetGlobalParent(
4071          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4072       LLVMDumpModule(module);
4073 
4074    }
4075    lp_exec_mask_fini(&bld.exec_mask);
4076 }
4077