• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * TGSI to LLVM IR translation -- AoS.
31  *
32  * FIXME:
33  * - No control flow support: the existing control flow code should be factored
34  * out into from the SoA code into a common module and shared.
35  * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36  *
37  * @author Jose Fonseca <jfonseca@vmware.com>
38  */
39 
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_debug.h"
59 #include "lp_bld_sample.h"
60 
61 
62 /**
63  * Wrapper around lp_build_swizzle_aos which translates swizzles to another
64  * ordering.
65  */
66 static LLVMValueRef
swizzle_aos(struct lp_build_tgsi_context * bld_base,LLVMValueRef a,unsigned swizzle_x,unsigned swizzle_y,unsigned swizzle_z,unsigned swizzle_w)67 swizzle_aos(struct lp_build_tgsi_context *bld_base,
68             LLVMValueRef a,
69             unsigned swizzle_x,
70             unsigned swizzle_y,
71             unsigned swizzle_z,
72             unsigned swizzle_w)
73 {
74    unsigned char swizzles[4];
75    struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
76 
77    assert(swizzle_x < 4);
78    assert(swizzle_y < 4);
79    assert(swizzle_z < 4);
80    assert(swizzle_w < 4);
81 
82    swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
83    swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
84    swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
85    swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
86 
87    return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
88 }
89 
90 
91 static LLVMValueRef
swizzle_scalar_aos(struct lp_build_tgsi_aos_context * bld,LLVMValueRef a,unsigned chan)92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
93                    LLVMValueRef a,
94                    unsigned chan)
95 {
96    chan = bld->swizzles[chan];
97    return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
98 }
99 
100 
101 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)102 emit_fetch_constant(
103    struct lp_build_tgsi_context * bld_base,
104    const struct tgsi_full_src_register * reg,
105    enum tgsi_opcode_type stype,
106    unsigned swizzle)
107 {
108    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
109    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
110    struct lp_type type = bld_base->base.type;
111    LLVMValueRef res;
112    unsigned chan;
113 
114    assert(!reg->Register.Indirect);
115 
116    /*
117     * Get the constants components
118     */
119 
120    res = bld->bld_base.base.undef;
121    for (chan = 0; chan < 4; ++chan) {
122       LLVMValueRef index;
123       LLVMValueRef scalar_ptr;
124       LLVMValueRef scalar;
125       LLVMValueRef swizzle;
126 
127       index = lp_build_const_int32(bld->bld_base.base.gallivm,
128                                    reg->Register.Index * 4 + chan);
129 
130       scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
131 
132       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
133 
134       lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
135 
136       /*
137        * NOTE: constants array is always assumed to be RGBA
138        */
139 
140       swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
141                                      bld->swizzles[chan]);
142 
143       res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
144    }
145 
146    /*
147     * Broadcast the first quaternion to all others.
148     *
149     * XXX: could be factored into a reusable function.
150     */
151 
152    if (type.length > 4) {
153       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154       unsigned i;
155 
156       for (chan = 0; chan < 4; ++chan) {
157          shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
158       }
159 
160       for (i = 4; i < type.length; ++i) {
161          shuffles[i] = shuffles[i % 4];
162       }
163 
164       res = LLVMBuildShuffleVector(builder,
165                                    res, bld->bld_base.base.undef,
166                                    LLVMConstVector(shuffles, type.length),
167                                    "");
168    }
169    return res;
170 }
171 
172 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)173 emit_fetch_immediate(
174    struct lp_build_tgsi_context * bld_base,
175    const struct tgsi_full_src_register * reg,
176    enum tgsi_opcode_type stype,
177    unsigned swizzle)
178 {
179    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
180    LLVMValueRef res = bld->immediates[reg->Register.Index];
181    assert(res);
182    return res;
183 }
184 
185 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)186 emit_fetch_input(
187    struct lp_build_tgsi_context * bld_base,
188    const struct tgsi_full_src_register * reg,
189    enum tgsi_opcode_type stype,
190    unsigned swizzle)
191 {
192    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
193    LLVMValueRef res = bld->inputs[reg->Register.Index];
194    assert(!reg->Register.Indirect);
195    assert(res);
196    return res;
197 }
198 
199 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)200 emit_fetch_temporary(
201    struct lp_build_tgsi_context * bld_base,
202    const struct tgsi_full_src_register * reg,
203    enum tgsi_opcode_type stype,
204    unsigned swizzle)
205 {
206    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
207    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208    LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
209    LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
210    assert(!reg->Register.Indirect);
211    if (!res)
212       return bld->bld_base.base.undef;
213 
214    return res;
215 }
216 
217 /**
218  * Register store.
219  */
220 void
lp_emit_store_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,unsigned index,LLVMValueRef value)221 lp_emit_store_aos(
222    struct lp_build_tgsi_aos_context *bld,
223    const struct tgsi_full_instruction *inst,
224    unsigned index,
225    LLVMValueRef value)
226 {
227    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
228    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
229    LLVMValueRef mask = NULL;
230    LLVMValueRef ptr;
231 
232    /*
233     * Saturate the value
234     */
235    if (inst->Instruction.Saturate) {
236       value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
237       value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
238    }
239 
240    /*
241     * Translate the register file
242     */
243 
244    assert(!reg->Register.Indirect);
245 
246    switch (reg->Register.File) {
247    case TGSI_FILE_OUTPUT:
248       ptr = bld->outputs[reg->Register.Index];
249       break;
250 
251    case TGSI_FILE_TEMPORARY:
252       ptr = bld->temps[reg->Register.Index];
253       break;
254 
255    case TGSI_FILE_ADDRESS:
256       ptr = bld->addr[reg->Indirect.Index];
257       break;
258 
259    case TGSI_FILE_PREDICATE:
260       ptr = bld->preds[reg->Register.Index];
261       break;
262 
263    default:
264       assert(0);
265       return;
266    }
267 
268    if (!ptr)
269       return;
270    /*
271     * Predicate
272     */
273 
274    if (inst->Instruction.Predicate) {
275       LLVMValueRef pred;
276 
277       assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
278 
279       pred = LLVMBuildLoad(builder,
280                            bld->preds[inst->Predicate.Index], "");
281 
282       /*
283        * Convert the value to an integer mask.
284        */
285       pred = lp_build_compare(bld->bld_base.base.gallivm,
286                                bld->bld_base.base.type,
287                                PIPE_FUNC_NOTEQUAL,
288                                pred,
289                                bld->bld_base.base.zero);
290 
291       if (inst->Predicate.Negate) {
292          pred = LLVMBuildNot(builder, pred, "");
293       }
294 
295       pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
296                          inst->Predicate.SwizzleX,
297                          inst->Predicate.SwizzleY,
298                          inst->Predicate.SwizzleZ,
299                          inst->Predicate.SwizzleW);
300 
301       if (mask) {
302          mask = LLVMBuildAnd(builder, mask, pred, "");
303       } else {
304          mask = pred;
305       }
306    }
307 
308    /*
309     * Writemask
310     */
311 
312    if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
313       LLVMValueRef writemask;
314 
315       writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
316                                                    bld->bld_base.base.type,
317                                                    reg->Register.WriteMask,
318                                                    TGSI_NUM_CHANNELS,
319                                                    bld->swizzles);
320 
321       if (mask) {
322          mask = LLVMBuildAnd(builder, mask, writemask, "");
323       } else {
324          mask = writemask;
325       }
326    }
327 
328    if (mask) {
329       LLVMValueRef orig_value;
330 
331       orig_value = LLVMBuildLoad(builder, ptr, "");
332       value = lp_build_select(&bld->bld_base.base,
333                               mask, value, orig_value);
334    }
335 
336    LLVMBuildStore(builder, value, ptr);
337 }
338 
339 
340 /**
341  * High-level instruction translators.
342  */
343 
344 static LLVMValueRef
emit_tex(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier)345 emit_tex(struct lp_build_tgsi_aos_context *bld,
346          const struct tgsi_full_instruction *inst,
347          enum lp_build_tex_modifier modifier)
348 {
349    unsigned target;
350    unsigned unit;
351    LLVMValueRef coords;
352    struct lp_derivatives derivs = { {NULL}, {NULL} };
353 
354    if (!bld->sampler) {
355       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
356       return bld->bld_base.base.undef;
357    }
358 
359    target = inst->Texture.Texture;
360 
361    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
362 
363    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
364       /* probably not going to work */
365       derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
366       derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
367       unit = inst->Src[3].Register.Index;
368    }
369    else {
370       unit = inst->Src[1].Register.Index;
371    }
372    return bld->sampler->emit_fetch_texel(bld->sampler,
373                                          &bld->bld_base.base,
374                                          target, unit,
375                                          coords, derivs,
376                                          modifier);
377 }
378 
379 
380 static LLVMValueRef
emit_sample(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier)381 emit_sample(struct lp_build_tgsi_aos_context *bld,
382             const struct tgsi_full_instruction *inst,
383             enum lp_build_tex_modifier modifier)
384 {
385    unsigned target;
386    unsigned unit;
387    LLVMValueRef coords;
388    struct lp_derivatives derivs = { {NULL}, {NULL} };
389 
390    if (!bld->sampler) {
391       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
392       return bld->bld_base.base.undef;
393    }
394 
395    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
396 
397    /* ignore modifiers, can't handle different sampler / sampler view, etc... */
398    unit = inst->Src[1].Register.Index;
399    assert(inst->Src[2].Register.Index == unit);
400 
401    target = bld->sv[unit].Resource;
402 
403    return bld->sampler->emit_fetch_texel(bld->sampler,
404                                          &bld->bld_base.base,
405                                          target, unit,
406                                          coords, derivs,
407                                          modifier);
408 }
409 
410 
411 void
lp_emit_declaration_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_declaration * decl)412 lp_emit_declaration_aos(
413    struct lp_build_tgsi_aos_context *bld,
414    const struct tgsi_full_declaration *decl)
415 {
416    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
417    LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
418 
419    unsigned first = decl->Range.First;
420    unsigned last = decl->Range.Last;
421    unsigned idx;
422 
423    for (idx = first; idx <= last; ++idx) {
424       switch (decl->Declaration.File) {
425       case TGSI_FILE_TEMPORARY:
426          assert(idx < LP_MAX_INLINED_TEMPS);
427          if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
428             LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
429             bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
430                                                      vec_type, array_size, "");
431          } else {
432             bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
433          }
434          break;
435 
436       case TGSI_FILE_OUTPUT:
437          bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
438          break;
439 
440       case TGSI_FILE_ADDRESS:
441          assert(idx < LP_MAX_TGSI_ADDRS);
442          bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
443          break;
444 
445       case TGSI_FILE_PREDICATE:
446          assert(idx < LP_MAX_TGSI_PREDS);
447          bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
448          break;
449 
450       case TGSI_FILE_SAMPLER_VIEW:
451          /*
452           * The target stored here MUST match whatever there actually
453           * is in the set sampler views (what about return type?).
454           */
455          assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
456          for (idx = first; idx <= last; ++idx) {
457             bld->sv[idx] = decl->SamplerView;
458          }
459          break;
460 
461       default:
462          /* don't need to declare other vars */
463          break;
464       }
465    }
466 }
467 
468 
469 /**
470  * Emit LLVM for one TGSI instruction.
471  * \param return TRUE for success, FALSE otherwise
472  */
473 boolean
lp_emit_instruction_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,int * pc)474 lp_emit_instruction_aos(
475    struct lp_build_tgsi_aos_context *bld,
476    const struct tgsi_full_instruction *inst,
477    const struct tgsi_opcode_info *info,
478    int *pc)
479 {
480    LLVMValueRef src0, src1, src2;
481    LLVMValueRef tmp0;
482    LLVMValueRef dst0 = NULL;
483 
484    /*
485     * Stores and write masks are handled in a general fashion after the long
486     * instruction opcode switch statement.
487     *
488     * Although not stricitly necessary, we avoid generating instructions for
489     * channels which won't be stored, in cases where's that easy. For some
490     * complex instructions, like texture sampling, it is more convenient to
491     * assume a full writemask and then let LLVM optimization passes eliminate
492     * redundant code.
493     */
494 
495    (*pc)++;
496 
497    assert(info->num_dst <= 1);
498    if (info->num_dst) {
499       dst0 = bld->bld_base.base.undef;
500    }
501 
502    switch (inst->Instruction.Opcode) {
503    case TGSI_OPCODE_ARL:
504       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
505       dst0 = lp_build_floor(&bld->bld_base.base, src0);
506       break;
507 
508    case TGSI_OPCODE_MOV:
509       dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
510       break;
511 
512    case TGSI_OPCODE_LIT:
513       return FALSE;
514 
515    case TGSI_OPCODE_RCP:
516    /* TGSI_OPCODE_RECIP */
517       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
518       dst0 = lp_build_rcp(&bld->bld_base.base, src0);
519       break;
520 
521    case TGSI_OPCODE_RSQ:
522    /* TGSI_OPCODE_RECIPSQRT */
523       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
524       tmp0 = lp_build_abs(&bld->bld_base.base, src0);
525       dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
526       break;
527 
528    case TGSI_OPCODE_EXP:
529       return FALSE;
530 
531    case TGSI_OPCODE_LOG:
532       return FALSE;
533 
534    case TGSI_OPCODE_MUL:
535       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
536       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
537       dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
538       break;
539 
540    case TGSI_OPCODE_ADD:
541       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
542       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
543       dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
544       break;
545 
546    case TGSI_OPCODE_DP3:
547    /* TGSI_OPCODE_DOT3 */
548       return FALSE;
549 
550    case TGSI_OPCODE_DP4:
551    /* TGSI_OPCODE_DOT4 */
552       return FALSE;
553 
554    case TGSI_OPCODE_DST:
555       return FALSE;
556 
557    case TGSI_OPCODE_MIN:
558       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
559       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
560       dst0 = lp_build_min(&bld->bld_base.base, src0, src1);
561       break;
562 
563    case TGSI_OPCODE_MAX:
564       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
565       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
566       dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
567       break;
568 
569    case TGSI_OPCODE_SLT:
570    /* TGSI_OPCODE_SETLT */
571       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
572       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
573       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
574       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
575       break;
576 
577    case TGSI_OPCODE_SGE:
578    /* TGSI_OPCODE_SETGE */
579       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
580       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
581       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
582       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
583       break;
584 
585    case TGSI_OPCODE_MAD:
586    /* TGSI_OPCODE_MADD */
587       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
588       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
589       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
590       tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
591       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
592       break;
593 
594    case TGSI_OPCODE_LRP:
595       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
596       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
597       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
598       tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
599       tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
600       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
601       break;
602 
603    case TGSI_OPCODE_DP2A:
604       return FALSE;
605 
606    case TGSI_OPCODE_FRC:
607       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
608       tmp0 = lp_build_floor(&bld->bld_base.base, src0);
609       dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
610       break;
611 
612    case TGSI_OPCODE_CLAMP:
613       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
614       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
615       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
616       tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
617       dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
618       break;
619 
620    case TGSI_OPCODE_FLR:
621       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
622       dst0 = lp_build_floor(&bld->bld_base.base, src0);
623       break;
624 
625    case TGSI_OPCODE_ROUND:
626       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
627       dst0 = lp_build_round(&bld->bld_base.base, src0);
628       break;
629 
630    case TGSI_OPCODE_EX2:
631       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
632       tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
633       dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
634       break;
635 
636    case TGSI_OPCODE_LG2:
637       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
638       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
639       dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
640       break;
641 
642    case TGSI_OPCODE_POW:
643       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
644       src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
645       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
646       src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
647       dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
648       break;
649 
650    case TGSI_OPCODE_XPD:
651       return FALSE;
652 
653    case TGSI_OPCODE_DPH:
654       return FALSE;
655 
656    case TGSI_OPCODE_COS:
657       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
658       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
659       dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
660       break;
661 
662    case TGSI_OPCODE_DDX:
663       return FALSE;
664 
665    case TGSI_OPCODE_DDY:
666       return FALSE;
667 
668    case TGSI_OPCODE_KILL:
669       return FALSE;
670 
671    case TGSI_OPCODE_KILL_IF:
672       return FALSE;
673 
674    case TGSI_OPCODE_PK2H:
675       return FALSE;
676       break;
677 
678    case TGSI_OPCODE_PK2US:
679       return FALSE;
680       break;
681 
682    case TGSI_OPCODE_PK4B:
683       return FALSE;
684       break;
685 
686    case TGSI_OPCODE_PK4UB:
687       return FALSE;
688 
689    case TGSI_OPCODE_SEQ:
690       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
691       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
692       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
693       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
694       break;
695 
696    case TGSI_OPCODE_SGT:
697       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
698       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
699       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
700       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
701       break;
702 
703    case TGSI_OPCODE_SIN:
704       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
705       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
706       dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
707       break;
708 
709    case TGSI_OPCODE_SLE:
710       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
711       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
712       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
713       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
714       break;
715 
716    case TGSI_OPCODE_SNE:
717       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
718       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
719       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
720       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
721       break;
722 
723    case TGSI_OPCODE_TEX:
724       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
725       break;
726 
727    case TGSI_OPCODE_TXD:
728       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
729       break;
730 
731    case TGSI_OPCODE_UP2H:
732       /* deprecated */
733       assert (0);
734       return FALSE;
735       break;
736 
737    case TGSI_OPCODE_UP2US:
738       /* deprecated */
739       assert(0);
740       return FALSE;
741       break;
742 
743    case TGSI_OPCODE_UP4B:
744       /* deprecated */
745       assert(0);
746       return FALSE;
747       break;
748 
749    case TGSI_OPCODE_UP4UB:
750       /* deprecated */
751       assert(0);
752       return FALSE;
753       break;
754 
755    case TGSI_OPCODE_ARR:
756       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
757       dst0 = lp_build_round(&bld->bld_base.base, src0);
758       break;
759 
760    case TGSI_OPCODE_CAL:
761       return FALSE;
762 
763    case TGSI_OPCODE_RET:
764       /* safe to ignore at end */
765       break;
766 
767    case TGSI_OPCODE_END:
768       *pc = -1;
769       break;
770 
771    case TGSI_OPCODE_SSG:
772    /* TGSI_OPCODE_SGN */
773       tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
774       dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
775       break;
776 
777    case TGSI_OPCODE_CMP:
778       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
779       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
780       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
781       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
782       dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
783       break;
784 
785    case TGSI_OPCODE_SCS:
786       return FALSE;
787 
788    case TGSI_OPCODE_TXB:
789       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
790       break;
791 
792    case TGSI_OPCODE_DIV:
793       assert(0);
794       return FALSE;
795       break;
796 
797    case TGSI_OPCODE_DP2:
798       return FALSE;
799 
800    case TGSI_OPCODE_TXL:
801       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
802       break;
803 
804    case TGSI_OPCODE_TXP:
805       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
806       break;
807 
808    case TGSI_OPCODE_BRK:
809       return FALSE;
810 
811    case TGSI_OPCODE_IF:
812    case TGSI_OPCODE_UIF:
813       return FALSE;
814 
815    case TGSI_OPCODE_BGNLOOP:
816       return FALSE;
817 
818    case TGSI_OPCODE_BGNSUB:
819       return FALSE;
820 
821    case TGSI_OPCODE_ELSE:
822       return FALSE;
823 
824    case TGSI_OPCODE_ENDIF:
825       return FALSE;
826 
827    case TGSI_OPCODE_ENDLOOP:
828       return FALSE;
829 
830    case TGSI_OPCODE_ENDSUB:
831       return FALSE;
832 
833    case TGSI_OPCODE_PUSHA:
834       /* deprecated? */
835       assert(0);
836       return FALSE;
837       break;
838 
839    case TGSI_OPCODE_POPA:
840       /* deprecated? */
841       assert(0);
842       return FALSE;
843       break;
844 
845    case TGSI_OPCODE_CEIL:
846       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
847       dst0 = lp_build_ceil(&bld->bld_base.base, src0);
848       break;
849 
850    case TGSI_OPCODE_I2F:
851       assert(0);
852       return FALSE;
853       break;
854 
855    case TGSI_OPCODE_NOT:
856       assert(0);
857       return FALSE;
858       break;
859 
860    case TGSI_OPCODE_TRUNC:
861       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
862       dst0 = lp_build_trunc(&bld->bld_base.base, src0);
863       break;
864 
865    case TGSI_OPCODE_SHL:
866       assert(0);
867       return FALSE;
868       break;
869 
870    case TGSI_OPCODE_ISHR:
871       assert(0);
872       return FALSE;
873       break;
874 
875    case TGSI_OPCODE_AND:
876       assert(0);
877       return FALSE;
878       break;
879 
880    case TGSI_OPCODE_OR:
881       assert(0);
882       return FALSE;
883       break;
884 
885    case TGSI_OPCODE_MOD:
886       assert(0);
887       return FALSE;
888       break;
889 
890    case TGSI_OPCODE_XOR:
891       assert(0);
892       return FALSE;
893       break;
894 
895    case TGSI_OPCODE_SAD:
896       assert(0);
897       return FALSE;
898       break;
899 
900    case TGSI_OPCODE_TXF:
901       assert(0);
902       return FALSE;
903       break;
904 
905    case TGSI_OPCODE_TXQ:
906       assert(0);
907       return FALSE;
908       break;
909 
910    case TGSI_OPCODE_CONT:
911       return FALSE;
912 
913    case TGSI_OPCODE_EMIT:
914       return FALSE;
915       break;
916 
917    case TGSI_OPCODE_ENDPRIM:
918       return FALSE;
919       break;
920 
921    case TGSI_OPCODE_NOP:
922       break;
923 
924    case TGSI_OPCODE_SAMPLE:
925       dst0 = emit_sample(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
926       break;
927 
928    default:
929       return FALSE;
930    }
931 
932    if (info->num_dst) {
933       lp_emit_store_aos(bld, inst, 0, dst0);
934    }
935 
936    return TRUE;
937 }
938 
939 
940 void
lp_build_tgsi_aos(struct gallivm_state * gallivm,const struct tgsi_token * tokens,struct lp_type type,const unsigned char swizzles[4],LLVMValueRef consts_ptr,const LLVMValueRef * inputs,LLVMValueRef * outputs,struct lp_build_sampler_aos * sampler,const struct tgsi_shader_info * info)941 lp_build_tgsi_aos(struct gallivm_state *gallivm,
942                   const struct tgsi_token *tokens,
943                   struct lp_type type,
944                   const unsigned char swizzles[4],
945                   LLVMValueRef consts_ptr,
946                   const LLVMValueRef *inputs,
947                   LLVMValueRef *outputs,
948                   struct lp_build_sampler_aos *sampler,
949                   const struct tgsi_shader_info *info)
950 {
951    struct lp_build_tgsi_aos_context bld;
952    struct tgsi_parse_context parse;
953    uint num_immediates = 0;
954    unsigned chan;
955    int pc = 0;
956 
957    /* Setup build context */
958    memset(&bld, 0, sizeof bld);
959    lp_build_context_init(&bld.bld_base.base, gallivm, type);
960    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
961    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
962    lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
963 
964    for (chan = 0; chan < 4; ++chan) {
965       bld.swizzles[chan] = swizzles[chan];
966       bld.inv_swizzles[swizzles[chan]] = chan;
967    }
968 
969    bld.inputs = inputs;
970    bld.outputs = outputs;
971    bld.consts_ptr = consts_ptr;
972    bld.sampler = sampler;
973    bld.indirect_files = info->indirect_files;
974    bld.bld_base.emit_swizzle = swizzle_aos;
975    bld.bld_base.info = info;
976 
977    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
978    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
979    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
980    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
981 
982    /* Set opcode actions */
983    lp_set_default_actions_cpu(&bld.bld_base);
984 
985    if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
986       return;
987    }
988 
989    tgsi_parse_init(&parse, tokens);
990 
991    while (!tgsi_parse_end_of_tokens(&parse)) {
992       tgsi_parse_token(&parse);
993 
994       switch(parse.FullToken.Token.Type) {
995       case TGSI_TOKEN_TYPE_DECLARATION:
996          /* Inputs already interpolated */
997          lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
998          break;
999 
1000       case TGSI_TOKEN_TYPE_INSTRUCTION:
1001          /* save expanded instruction */
1002          lp_bld_tgsi_add_instruction(&bld.bld_base,
1003                                      &parse.FullToken.FullInstruction);
1004          break;
1005 
1006       case TGSI_TOKEN_TYPE_IMMEDIATE:
1007          /* simply copy the immediate values into the next immediates[] slot */
1008          {
1009             const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1010             float imm[4];
1011             assert(size <= 4);
1012             assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
1013             for (chan = 0; chan < 4; ++chan) {
1014                imm[chan] = 0.0f;
1015             }
1016             for (chan = 0; chan < size; ++chan) {
1017                unsigned swizzle = bld.swizzles[chan];
1018                imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1019             }
1020             bld.immediates[num_immediates] =
1021                      lp_build_const_aos(gallivm, type,
1022                                         imm[0], imm[1], imm[2], imm[3],
1023                                         NULL);
1024             num_immediates++;
1025          }
1026          break;
1027 
1028       case TGSI_TOKEN_TYPE_PROPERTY:
1029          break;
1030 
1031       default:
1032          assert(0);
1033       }
1034    }
1035 
1036    while (pc != -1) {
1037       struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
1038       const struct tgsi_opcode_info *opcode_info =
1039          tgsi_get_opcode_info(instr->Instruction.Opcode);
1040       if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
1041          _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1042                        opcode_info->mnemonic);
1043    }
1044 
1045    if (0) {
1046       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1047       LLVMValueRef function = LLVMGetBasicBlockParent(block);
1048       debug_printf("11111111111111111111111111111 \n");
1049       tgsi_dump(tokens, 0);
1050       lp_debug_dump_value(function);
1051       debug_printf("2222222222222222222222222222 \n");
1052    }
1053    tgsi_parse_free(&parse);
1054    FREE(bld.bld_base.instructions);
1055 
1056    if (0) {
1057       LLVMModuleRef module = LLVMGetGlobalParent(
1058          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1059       LLVMDumpModule(module);
1060    }
1061 
1062 }
1063 
1064