• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * TGSI to LLVM IR translation -- AoS.
31  *
32  * FIXME:
33  * - No control flow support: the existing control flow code should be factored
34  * out into from the SoA code into a common module and shared.
35  * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36  *
37  * @author Jose Fonseca <jfonseca@vmware.com>
38  */
39 
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_debug.h"
59 #include "lp_bld_sample.h"
60 
61 
62 /**
63  * Wrapper around lp_build_swizzle_aos which translates swizzles to another
64  * ordering.
65  */
66 static LLVMValueRef
swizzle_aos(struct lp_build_tgsi_context * bld_base,LLVMValueRef a,unsigned swizzle_x,unsigned swizzle_y,unsigned swizzle_z,unsigned swizzle_w)67 swizzle_aos(struct lp_build_tgsi_context *bld_base,
68             LLVMValueRef a,
69             unsigned swizzle_x,
70             unsigned swizzle_y,
71             unsigned swizzle_z,
72             unsigned swizzle_w)
73 {
74    unsigned char swizzles[4];
75    struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
76 
77    assert(swizzle_x < 4);
78    assert(swizzle_y < 4);
79    assert(swizzle_z < 4);
80    assert(swizzle_w < 4);
81 
82    swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
83    swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
84    swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
85    swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
86 
87    return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
88 }
89 
90 
91 static LLVMValueRef
swizzle_scalar_aos(struct lp_build_tgsi_aos_context * bld,LLVMValueRef a,unsigned chan)92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
93                    LLVMValueRef a,
94                    unsigned chan)
95 {
96    chan = bld->swizzles[chan];
97    return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
98 }
99 
100 
101 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)102 emit_fetch_constant(
103    struct lp_build_tgsi_context * bld_base,
104    const struct tgsi_full_src_register * reg,
105    enum tgsi_opcode_type stype,
106    unsigned swizzle)
107 {
108    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
109    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
110    struct lp_type type = bld_base->base.type;
111    LLVMValueRef res;
112    unsigned chan;
113 
114    assert(!reg->Register.Indirect);
115 
116    /*
117     * Get the constants components
118     */
119 
120    res = bld->bld_base.base.undef;
121    for (chan = 0; chan < 4; ++chan) {
122       LLVMValueRef index;
123       LLVMValueRef scalar_ptr;
124       LLVMValueRef scalar;
125       LLVMValueRef swizzle;
126 
127       index = lp_build_const_int32(bld->bld_base.base.gallivm,
128                                    reg->Register.Index * 4 + chan);
129 
130       scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
131 
132       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
133 
134       lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
135 
136       /*
137        * NOTE: constants array is always assumed to be RGBA
138        */
139 
140       swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
141                                      bld->swizzles[chan]);
142 
143       res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
144    }
145 
146    /*
147     * Broadcast the first quaternion to all others.
148     *
149     * XXX: could be factored into a reusable function.
150     */
151 
152    if (type.length > 4) {
153       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154       unsigned i;
155 
156       for (chan = 0; chan < 4; ++chan) {
157          shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
158       }
159 
160       for (i = 4; i < type.length; ++i) {
161          shuffles[i] = shuffles[i % 4];
162       }
163 
164       res = LLVMBuildShuffleVector(builder,
165                                    res, bld->bld_base.base.undef,
166                                    LLVMConstVector(shuffles, type.length),
167                                    "");
168    }
169    return res;
170 }
171 
172 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)173 emit_fetch_immediate(
174    struct lp_build_tgsi_context * bld_base,
175    const struct tgsi_full_src_register * reg,
176    enum tgsi_opcode_type stype,
177    unsigned swizzle)
178 {
179    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
180    LLVMValueRef res = bld->immediates[reg->Register.Index];
181    assert(res);
182    return res;
183 }
184 
185 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)186 emit_fetch_input(
187    struct lp_build_tgsi_context * bld_base,
188    const struct tgsi_full_src_register * reg,
189    enum tgsi_opcode_type stype,
190    unsigned swizzle)
191 {
192    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
193    LLVMValueRef res = bld->inputs[reg->Register.Index];
194    assert(!reg->Register.Indirect);
195    assert(res);
196    return res;
197 }
198 
199 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)200 emit_fetch_temporary(
201    struct lp_build_tgsi_context * bld_base,
202    const struct tgsi_full_src_register * reg,
203    enum tgsi_opcode_type stype,
204    unsigned swizzle)
205 {
206    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
207    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208    LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
209    LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
210    LLVMValueRef res = LLVMBuildLoad2(builder, vec_type, temp_ptr, "");
211    assert(!reg->Register.Indirect);
212    if (!res)
213       return bld->bld_base.base.undef;
214 
215    return res;
216 }
217 
218 /**
219  * Register store.
220  */
221 void
lp_emit_store_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,unsigned index,LLVMValueRef value)222 lp_emit_store_aos(
223    struct lp_build_tgsi_aos_context *bld,
224    const struct tgsi_full_instruction *inst,
225    unsigned index,
226    LLVMValueRef value)
227 {
228    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
229    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
230    LLVMValueRef mask = NULL;
231    LLVMValueRef ptr;
232 
233    /*
234     * Saturate the value
235     */
236    if (inst->Instruction.Saturate) {
237       value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
238       value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
239    }
240 
241    /*
242     * Translate the register file
243     */
244 
245    assert(!reg->Register.Indirect);
246 
247    switch (reg->Register.File) {
248    case TGSI_FILE_OUTPUT:
249       ptr = bld->outputs[reg->Register.Index];
250       break;
251 
252    case TGSI_FILE_TEMPORARY:
253       ptr = bld->temps[reg->Register.Index];
254       break;
255 
256    case TGSI_FILE_ADDRESS:
257       ptr = bld->addr[reg->Indirect.Index];
258       break;
259 
260    default:
261       assert(0);
262       return;
263    }
264 
265    if (!ptr)
266       return;
267 
268    /*
269     * Writemask
270     */
271 
272    if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
273       LLVMValueRef writemask;
274 
275       writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
276                                                    bld->bld_base.base.type,
277                                                    reg->Register.WriteMask,
278                                                    TGSI_NUM_CHANNELS,
279                                                    bld->swizzles);
280 
281       if (mask) {
282          mask = LLVMBuildAnd(builder, mask, writemask, "");
283       } else {
284          mask = writemask;
285       }
286    }
287 
288    if (mask) {
289       LLVMValueRef orig_value;
290       LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
291       orig_value = LLVMBuildLoad2(builder, vec_type, ptr, "");
292       value = lp_build_select(&bld->bld_base.base,
293                               mask, value, orig_value);
294    }
295 
296    LLVMBuildStore(builder, value, ptr);
297 }
298 
299 
300 /**
301  * High-level instruction translators.
302  */
303 
304 static LLVMValueRef
emit_tex(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier)305 emit_tex(struct lp_build_tgsi_aos_context *bld,
306          const struct tgsi_full_instruction *inst,
307          enum lp_build_tex_modifier modifier)
308 {
309    unsigned target;
310    unsigned unit;
311    LLVMValueRef coords;
312    struct lp_derivatives derivs = { {NULL}, {NULL} };
313 
314    if (!bld->sampler) {
315       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
316       return bld->bld_base.base.undef;
317    }
318 
319    target = inst->Texture.Texture;
320 
321    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
322 
323    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
324       /* probably not going to work */
325       derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
326       derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
327       unit = inst->Src[3].Register.Index;
328    }
329    else {
330       unit = inst->Src[1].Register.Index;
331    }
332    return bld->sampler->emit_fetch_texel(bld->sampler,
333                                          &bld->bld_base.base,
334                                          target, unit,
335                                          coords, derivs,
336                                          modifier);
337 }
338 
339 
340 static LLVMValueRef
emit_sample(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier)341 emit_sample(struct lp_build_tgsi_aos_context *bld,
342             const struct tgsi_full_instruction *inst,
343             enum lp_build_tex_modifier modifier)
344 {
345    unsigned target;
346    unsigned unit;
347    LLVMValueRef coords;
348    struct lp_derivatives derivs = { {NULL}, {NULL} };
349 
350    if (!bld->sampler) {
351       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
352       return bld->bld_base.base.undef;
353    }
354 
355    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
356 
357    /* ignore modifiers, can't handle different sampler / sampler view, etc... */
358    unit = inst->Src[1].Register.Index;
359    assert(inst->Src[2].Register.Index == unit);
360 
361    target = bld->sv[unit].Resource;
362 
363    return bld->sampler->emit_fetch_texel(bld->sampler,
364                                          &bld->bld_base.base,
365                                          target, unit,
366                                          coords, derivs,
367                                          modifier);
368 }
369 
370 
371 void
lp_emit_declaration_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_declaration * decl)372 lp_emit_declaration_aos(
373    struct lp_build_tgsi_aos_context *bld,
374    const struct tgsi_full_declaration *decl)
375 {
376    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
377    LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
378 
379    unsigned first = decl->Range.First;
380    unsigned last = decl->Range.Last;
381    unsigned idx;
382 
383    for (idx = first; idx <= last; ++idx) {
384       switch (decl->Declaration.File) {
385       case TGSI_FILE_TEMPORARY:
386          assert(idx < LP_MAX_INLINED_TEMPS);
387          if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
388             LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
389             bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
390                                                      vec_type, array_size, "");
391          } else {
392             bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
393          }
394          break;
395 
396       case TGSI_FILE_OUTPUT:
397          bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
398          break;
399 
400       case TGSI_FILE_ADDRESS:
401          assert(idx < LP_MAX_TGSI_ADDRS);
402          bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
403          break;
404 
405       case TGSI_FILE_SAMPLER_VIEW:
406          /*
407           * The target stored here MUST match whatever there actually
408           * is in the set sampler views (what about return type?).
409           */
410          assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
411          for (idx = first; idx <= last; ++idx) {
412             bld->sv[idx] = decl->SamplerView;
413          }
414          break;
415 
416       default:
417          /* don't need to declare other vars */
418          break;
419       }
420    }
421 }
422 
423 
424 /**
425  * Emit LLVM for one TGSI instruction.
426  * \param return TRUE for success, FALSE otherwise
427  */
428 boolean
lp_emit_instruction_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,int * pc)429 lp_emit_instruction_aos(
430    struct lp_build_tgsi_aos_context *bld,
431    const struct tgsi_full_instruction *inst,
432    const struct tgsi_opcode_info *info,
433    int *pc)
434 {
435    LLVMValueRef src0, src1, src2;
436    LLVMValueRef tmp0;
437    LLVMValueRef dst0 = NULL;
438 
439    /*
440     * Stores and write masks are handled in a general fashion after the long
441     * instruction opcode switch statement.
442     *
443     * Although not stricitly necessary, we avoid generating instructions for
444     * channels which won't be stored, in cases where's that easy. For some
445     * complex instructions, like texture sampling, it is more convenient to
446     * assume a full writemask and then let LLVM optimization passes eliminate
447     * redundant code.
448     */
449 
450    (*pc)++;
451 
452    assert(info->num_dst <= 1);
453    if (info->num_dst) {
454       dst0 = bld->bld_base.base.undef;
455    }
456 
457    switch (inst->Instruction.Opcode) {
458    case TGSI_OPCODE_ARL:
459       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
460       dst0 = lp_build_floor(&bld->bld_base.base, src0);
461       break;
462 
463    case TGSI_OPCODE_MOV:
464       dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
465       break;
466 
467    case TGSI_OPCODE_LIT:
468       return FALSE;
469 
470    case TGSI_OPCODE_RCP:
471    /* TGSI_OPCODE_RECIP */
472       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
473       dst0 = lp_build_rcp(&bld->bld_base.base, src0);
474       break;
475 
476    case TGSI_OPCODE_RSQ:
477    /* TGSI_OPCODE_RECIPSQRT */
478       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
479       tmp0 = lp_build_abs(&bld->bld_base.base, src0);
480       dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
481       break;
482 
483    case TGSI_OPCODE_EXP:
484       return FALSE;
485 
486    case TGSI_OPCODE_LOG:
487       return FALSE;
488 
489    case TGSI_OPCODE_MUL:
490       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
491       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
492       dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
493       break;
494 
495    case TGSI_OPCODE_ADD:
496       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
497       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
498       dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
499       break;
500 
501    case TGSI_OPCODE_DP3:
502    /* TGSI_OPCODE_DOT3 */
503       return FALSE;
504 
505    case TGSI_OPCODE_DP4:
506    /* TGSI_OPCODE_DOT4 */
507       return FALSE;
508 
509    case TGSI_OPCODE_DST:
510       return FALSE;
511 
512    case TGSI_OPCODE_MIN:
513       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
514       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
515       dst0 = lp_build_min(&bld->bld_base.base, src0, src1);
516       break;
517 
518    case TGSI_OPCODE_MAX:
519       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
520       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
521       dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
522       break;
523 
524    case TGSI_OPCODE_SLT:
525    /* TGSI_OPCODE_SETLT */
526       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
527       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
528       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
529       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
530       break;
531 
532    case TGSI_OPCODE_SGE:
533    /* TGSI_OPCODE_SETGE */
534       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
535       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
536       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
537       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
538       break;
539 
540    case TGSI_OPCODE_MAD:
541    /* TGSI_OPCODE_MADD */
542       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
543       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
544       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
545       tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
546       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
547       break;
548 
549    case TGSI_OPCODE_LRP:
550       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
551       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
552       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
553       tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
554       tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
555       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
556       break;
557 
558    case TGSI_OPCODE_FRC:
559       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
560       tmp0 = lp_build_floor(&bld->bld_base.base, src0);
561       dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
562       break;
563 
564    case TGSI_OPCODE_FLR:
565       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
566       dst0 = lp_build_floor(&bld->bld_base.base, src0);
567       break;
568 
569    case TGSI_OPCODE_ROUND:
570       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
571       dst0 = lp_build_round(&bld->bld_base.base, src0);
572       break;
573 
574    case TGSI_OPCODE_EX2:
575       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
576       tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
577       dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
578       break;
579 
580    case TGSI_OPCODE_LG2:
581       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
582       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
583       dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
584       break;
585 
586    case TGSI_OPCODE_POW:
587       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
588       src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
589       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
590       src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
591       dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
592       break;
593 
594    case TGSI_OPCODE_COS:
595       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
596       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
597       dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
598       break;
599 
600    case TGSI_OPCODE_DDX:
601       return FALSE;
602 
603    case TGSI_OPCODE_DDY:
604       return FALSE;
605 
606    case TGSI_OPCODE_KILL:
607       return FALSE;
608 
609    case TGSI_OPCODE_KILL_IF:
610       return FALSE;
611 
612    case TGSI_OPCODE_PK2H:
613       return FALSE;
614       break;
615 
616    case TGSI_OPCODE_PK2US:
617       return FALSE;
618       break;
619 
620    case TGSI_OPCODE_PK4B:
621       return FALSE;
622       break;
623 
624    case TGSI_OPCODE_PK4UB:
625       return FALSE;
626 
627    case TGSI_OPCODE_SEQ:
628       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
629       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
630       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
631       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
632       break;
633 
634    case TGSI_OPCODE_SGT:
635       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
636       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
637       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
638       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
639       break;
640 
641    case TGSI_OPCODE_SIN:
642       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
643       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
644       dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
645       break;
646 
647    case TGSI_OPCODE_SLE:
648       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
649       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
650       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
651       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
652       break;
653 
654    case TGSI_OPCODE_SNE:
655       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
656       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
657       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
658       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
659       break;
660 
661    case TGSI_OPCODE_TEX:
662       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
663       break;
664 
665    case TGSI_OPCODE_TXD:
666       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
667       break;
668 
669    case TGSI_OPCODE_UP2H:
670       /* deprecated */
671       assert (0);
672       return FALSE;
673       break;
674 
675    case TGSI_OPCODE_UP2US:
676       /* deprecated */
677       assert(0);
678       return FALSE;
679       break;
680 
681    case TGSI_OPCODE_UP4B:
682       /* deprecated */
683       assert(0);
684       return FALSE;
685       break;
686 
687    case TGSI_OPCODE_UP4UB:
688       /* deprecated */
689       assert(0);
690       return FALSE;
691       break;
692 
693    case TGSI_OPCODE_ARR:
694       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
695       dst0 = lp_build_round(&bld->bld_base.base, src0);
696       break;
697 
698    case TGSI_OPCODE_CAL:
699       return FALSE;
700 
701    case TGSI_OPCODE_RET:
702       /* safe to ignore at end */
703       break;
704 
705    case TGSI_OPCODE_END:
706       *pc = -1;
707       break;
708 
709    case TGSI_OPCODE_SSG:
710    /* TGSI_OPCODE_SGN */
711       tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
712       dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
713       break;
714 
715    case TGSI_OPCODE_CMP:
716       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
717       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
718       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
719       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
720       dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
721       break;
722 
723    case TGSI_OPCODE_TXB:
724       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
725       break;
726 
727    case TGSI_OPCODE_DIV:
728       assert(0);
729       return FALSE;
730       break;
731 
732    case TGSI_OPCODE_DP2:
733       return FALSE;
734 
735    case TGSI_OPCODE_TXL:
736       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
737       break;
738 
739    case TGSI_OPCODE_TXP:
740       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
741       break;
742 
743    case TGSI_OPCODE_BRK:
744       return FALSE;
745 
746    case TGSI_OPCODE_IF:
747    case TGSI_OPCODE_UIF:
748       return FALSE;
749 
750    case TGSI_OPCODE_BGNLOOP:
751       return FALSE;
752 
753    case TGSI_OPCODE_BGNSUB:
754       return FALSE;
755 
756    case TGSI_OPCODE_ELSE:
757       return FALSE;
758 
759    case TGSI_OPCODE_ENDIF:
760       return FALSE;
761 
762    case TGSI_OPCODE_ENDLOOP:
763       return FALSE;
764 
765    case TGSI_OPCODE_ENDSUB:
766       return FALSE;
767 
768    case TGSI_OPCODE_CEIL:
769       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
770       dst0 = lp_build_ceil(&bld->bld_base.base, src0);
771       break;
772 
773    case TGSI_OPCODE_I2F:
774       assert(0);
775       return FALSE;
776       break;
777 
778    case TGSI_OPCODE_NOT:
779       assert(0);
780       return FALSE;
781       break;
782 
783    case TGSI_OPCODE_TRUNC:
784       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
785       dst0 = lp_build_trunc(&bld->bld_base.base, src0);
786       break;
787 
788    case TGSI_OPCODE_SHL:
789       assert(0);
790       return FALSE;
791       break;
792 
793    case TGSI_OPCODE_ISHR:
794       assert(0);
795       return FALSE;
796       break;
797 
798    case TGSI_OPCODE_AND:
799       assert(0);
800       return FALSE;
801       break;
802 
803    case TGSI_OPCODE_OR:
804       assert(0);
805       return FALSE;
806       break;
807 
808    case TGSI_OPCODE_MOD:
809       assert(0);
810       return FALSE;
811       break;
812 
813    case TGSI_OPCODE_XOR:
814       assert(0);
815       return FALSE;
816       break;
817 
818    case TGSI_OPCODE_TXF:
819       assert(0);
820       return FALSE;
821       break;
822 
823    case TGSI_OPCODE_TXQ:
824       assert(0);
825       return FALSE;
826       break;
827 
828    case TGSI_OPCODE_CONT:
829       return FALSE;
830 
831    case TGSI_OPCODE_EMIT:
832       return FALSE;
833       break;
834 
835    case TGSI_OPCODE_ENDPRIM:
836       return FALSE;
837       break;
838 
839    case TGSI_OPCODE_NOP:
840       break;
841 
842    case TGSI_OPCODE_SAMPLE:
843       dst0 = emit_sample(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
844       break;
845 
846    default:
847       return FALSE;
848    }
849 
850    if (info->num_dst) {
851       lp_emit_store_aos(bld, inst, 0, dst0);
852    }
853 
854    return TRUE;
855 }
856 
857 
858 void
lp_build_tgsi_aos(struct gallivm_state * gallivm,const struct tgsi_token * tokens,struct lp_type type,const unsigned char swizzles[4],LLVMValueRef consts_ptr,const LLVMValueRef * inputs,LLVMValueRef * outputs,const struct lp_build_sampler_aos * sampler,const struct tgsi_shader_info * info)859 lp_build_tgsi_aos(struct gallivm_state *gallivm,
860                   const struct tgsi_token *tokens,
861                   struct lp_type type,
862                   const unsigned char swizzles[4],
863                   LLVMValueRef consts_ptr,
864                   const LLVMValueRef *inputs,
865                   LLVMValueRef *outputs,
866                   const struct lp_build_sampler_aos *sampler,
867                   const struct tgsi_shader_info *info)
868 {
869    struct lp_build_tgsi_aos_context bld;
870    struct tgsi_parse_context parse;
871    uint num_immediates = 0;
872    unsigned chan;
873    int pc = 0;
874 
875    /* Setup build context */
876    memset(&bld, 0, sizeof bld);
877    lp_build_context_init(&bld.bld_base.base, gallivm, type);
878    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
879    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
880    lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
881 
882    for (chan = 0; chan < 4; ++chan) {
883       bld.swizzles[chan] = swizzles[chan];
884       bld.inv_swizzles[swizzles[chan]] = chan;
885    }
886 
887    bld.inputs = inputs;
888    bld.outputs = outputs;
889    bld.consts_ptr = consts_ptr;
890    bld.sampler = sampler;
891    bld.indirect_files = info->indirect_files;
892    bld.bld_base.emit_swizzle = swizzle_aos;
893    bld.bld_base.info = info;
894 
895    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
896    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
897    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
898    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
899 
900    /* Set opcode actions */
901    lp_set_default_actions_cpu(&bld.bld_base);
902 
903    if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
904       return;
905    }
906 
907    tgsi_parse_init(&parse, tokens);
908 
909    while (!tgsi_parse_end_of_tokens(&parse)) {
910       tgsi_parse_token(&parse);
911 
912       switch(parse.FullToken.Token.Type) {
913       case TGSI_TOKEN_TYPE_DECLARATION:
914          /* Inputs already interpolated */
915          lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
916          break;
917 
918       case TGSI_TOKEN_TYPE_INSTRUCTION:
919          /* save expanded instruction */
920          lp_bld_tgsi_add_instruction(&bld.bld_base,
921                                      &parse.FullToken.FullInstruction);
922          break;
923 
924       case TGSI_TOKEN_TYPE_IMMEDIATE:
925          /* simply copy the immediate values into the next immediates[] slot */
926          {
927             const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
928             float imm[4];
929             assert(size <= 4);
930             assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
931             for (chan = 0; chan < 4; ++chan) {
932                imm[chan] = 0.0f;
933             }
934             for (chan = 0; chan < size; ++chan) {
935                unsigned swizzle = bld.swizzles[chan];
936                imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
937             }
938             bld.immediates[num_immediates] =
939                      lp_build_const_aos(gallivm, type,
940                                         imm[0], imm[1], imm[2], imm[3],
941                                         NULL);
942             num_immediates++;
943          }
944          break;
945 
946       case TGSI_TOKEN_TYPE_PROPERTY:
947          break;
948 
949       default:
950          assert(0);
951       }
952    }
953 
954    while (pc != -1) {
955       struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
956       const struct tgsi_opcode_info *opcode_info =
957          tgsi_get_opcode_info(instr->Instruction.Opcode);
958       if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
959          _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
960                        tgsi_get_opcode_name(instr->Instruction.Opcode));
961    }
962 
963    if (0) {
964       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
965       LLVMValueRef function = LLVMGetBasicBlockParent(block);
966       debug_printf("11111111111111111111111111111 \n");
967       tgsi_dump(tokens, 0);
968       lp_debug_dump_value(function);
969       debug_printf("2222222222222222222222222222 \n");
970    }
971    tgsi_parse_free(&parse);
972    FREE(bld.bld_base.instructions);
973 
974    if (0) {
975       LLVMModuleRef module = LLVMGetGlobalParent(
976          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
977       LLVMDumpModule(module);
978    }
979 
980 }
981 
982