1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_debug.h"
59 #include "lp_bld_sample.h"
60
61
62 /**
63 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
64 * ordering.
65 */
66 static LLVMValueRef
swizzle_aos(struct lp_build_tgsi_context * bld_base,LLVMValueRef a,unsigned swizzle_x,unsigned swizzle_y,unsigned swizzle_z,unsigned swizzle_w)67 swizzle_aos(struct lp_build_tgsi_context *bld_base,
68 LLVMValueRef a,
69 unsigned swizzle_x,
70 unsigned swizzle_y,
71 unsigned swizzle_z,
72 unsigned swizzle_w)
73 {
74 unsigned char swizzles[4];
75 struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
76
77 assert(swizzle_x < 4);
78 assert(swizzle_y < 4);
79 assert(swizzle_z < 4);
80 assert(swizzle_w < 4);
81
82 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
83 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
84 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
85 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
86
87 return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
88 }
89
90
91 static LLVMValueRef
swizzle_scalar_aos(struct lp_build_tgsi_aos_context * bld,LLVMValueRef a,unsigned chan)92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
93 LLVMValueRef a,
94 unsigned chan)
95 {
96 chan = bld->swizzles[chan];
97 return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
98 }
99
100
101 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)102 emit_fetch_constant(
103 struct lp_build_tgsi_context * bld_base,
104 const struct tgsi_full_src_register * reg,
105 enum tgsi_opcode_type stype,
106 unsigned swizzle)
107 {
108 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
109 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
110 struct lp_type type = bld_base->base.type;
111 LLVMValueRef res;
112 unsigned chan;
113
114 assert(!reg->Register.Indirect);
115
116 /*
117 * Get the constants components
118 */
119
120 res = bld->bld_base.base.undef;
121 for (chan = 0; chan < 4; ++chan) {
122 LLVMValueRef index;
123 LLVMValueRef scalar_ptr;
124 LLVMValueRef scalar;
125 LLVMValueRef swizzle;
126
127 index = lp_build_const_int32(bld->bld_base.base.gallivm,
128 reg->Register.Index * 4 + chan);
129
130 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
131
132 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
133
134 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
135
136 /*
137 * NOTE: constants array is always assumed to be RGBA
138 */
139
140 swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
141 bld->swizzles[chan]);
142
143 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
144 }
145
146 /*
147 * Broadcast the first quaternion to all others.
148 *
149 * XXX: could be factored into a reusable function.
150 */
151
152 if (type.length > 4) {
153 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154 unsigned i;
155
156 for (chan = 0; chan < 4; ++chan) {
157 shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
158 }
159
160 for (i = 4; i < type.length; ++i) {
161 shuffles[i] = shuffles[i % 4];
162 }
163
164 res = LLVMBuildShuffleVector(builder,
165 res, bld->bld_base.base.undef,
166 LLVMConstVector(shuffles, type.length),
167 "");
168 }
169 return res;
170 }
171
172 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)173 emit_fetch_immediate(
174 struct lp_build_tgsi_context * bld_base,
175 const struct tgsi_full_src_register * reg,
176 enum tgsi_opcode_type stype,
177 unsigned swizzle)
178 {
179 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
180 LLVMValueRef res = bld->immediates[reg->Register.Index];
181 assert(res);
182 return res;
183 }
184
185 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)186 emit_fetch_input(
187 struct lp_build_tgsi_context * bld_base,
188 const struct tgsi_full_src_register * reg,
189 enum tgsi_opcode_type stype,
190 unsigned swizzle)
191 {
192 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
193 LLVMValueRef res = bld->inputs[reg->Register.Index];
194 assert(!reg->Register.Indirect);
195 assert(res);
196 return res;
197 }
198
199 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)200 emit_fetch_temporary(
201 struct lp_build_tgsi_context * bld_base,
202 const struct tgsi_full_src_register * reg,
203 enum tgsi_opcode_type stype,
204 unsigned swizzle)
205 {
206 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
207 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208 LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
209 LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
210 assert(!reg->Register.Indirect);
211 if (!res)
212 return bld->bld_base.base.undef;
213
214 return res;
215 }
216
217 /**
218 * Register store.
219 */
220 void
lp_emit_store_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,unsigned index,LLVMValueRef value)221 lp_emit_store_aos(
222 struct lp_build_tgsi_aos_context *bld,
223 const struct tgsi_full_instruction *inst,
224 unsigned index,
225 LLVMValueRef value)
226 {
227 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
228 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
229 LLVMValueRef mask = NULL;
230 LLVMValueRef ptr;
231
232 /*
233 * Saturate the value
234 */
235 if (inst->Instruction.Saturate) {
236 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
237 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
238 }
239
240 /*
241 * Translate the register file
242 */
243
244 assert(!reg->Register.Indirect);
245
246 switch (reg->Register.File) {
247 case TGSI_FILE_OUTPUT:
248 ptr = bld->outputs[reg->Register.Index];
249 break;
250
251 case TGSI_FILE_TEMPORARY:
252 ptr = bld->temps[reg->Register.Index];
253 break;
254
255 case TGSI_FILE_ADDRESS:
256 ptr = bld->addr[reg->Indirect.Index];
257 break;
258
259 default:
260 assert(0);
261 return;
262 }
263
264 if (!ptr)
265 return;
266
267 /*
268 * Writemask
269 */
270
271 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
272 LLVMValueRef writemask;
273
274 writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
275 bld->bld_base.base.type,
276 reg->Register.WriteMask,
277 TGSI_NUM_CHANNELS,
278 bld->swizzles);
279
280 if (mask) {
281 mask = LLVMBuildAnd(builder, mask, writemask, "");
282 } else {
283 mask = writemask;
284 }
285 }
286
287 if (mask) {
288 LLVMValueRef orig_value;
289
290 orig_value = LLVMBuildLoad(builder, ptr, "");
291 value = lp_build_select(&bld->bld_base.base,
292 mask, value, orig_value);
293 }
294
295 LLVMBuildStore(builder, value, ptr);
296 }
297
298
299 /**
300 * High-level instruction translators.
301 */
302
303 static LLVMValueRef
emit_tex(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier)304 emit_tex(struct lp_build_tgsi_aos_context *bld,
305 const struct tgsi_full_instruction *inst,
306 enum lp_build_tex_modifier modifier)
307 {
308 unsigned target;
309 unsigned unit;
310 LLVMValueRef coords;
311 struct lp_derivatives derivs = { {NULL}, {NULL} };
312
313 if (!bld->sampler) {
314 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
315 return bld->bld_base.base.undef;
316 }
317
318 target = inst->Texture.Texture;
319
320 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
321
322 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
323 /* probably not going to work */
324 derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
325 derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
326 unit = inst->Src[3].Register.Index;
327 }
328 else {
329 unit = inst->Src[1].Register.Index;
330 }
331 return bld->sampler->emit_fetch_texel(bld->sampler,
332 &bld->bld_base.base,
333 target, unit,
334 coords, derivs,
335 modifier);
336 }
337
338
339 static LLVMValueRef
emit_sample(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier)340 emit_sample(struct lp_build_tgsi_aos_context *bld,
341 const struct tgsi_full_instruction *inst,
342 enum lp_build_tex_modifier modifier)
343 {
344 unsigned target;
345 unsigned unit;
346 LLVMValueRef coords;
347 struct lp_derivatives derivs = { {NULL}, {NULL} };
348
349 if (!bld->sampler) {
350 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
351 return bld->bld_base.base.undef;
352 }
353
354 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
355
356 /* ignore modifiers, can't handle different sampler / sampler view, etc... */
357 unit = inst->Src[1].Register.Index;
358 assert(inst->Src[2].Register.Index == unit);
359
360 target = bld->sv[unit].Resource;
361
362 return bld->sampler->emit_fetch_texel(bld->sampler,
363 &bld->bld_base.base,
364 target, unit,
365 coords, derivs,
366 modifier);
367 }
368
369
370 void
lp_emit_declaration_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_declaration * decl)371 lp_emit_declaration_aos(
372 struct lp_build_tgsi_aos_context *bld,
373 const struct tgsi_full_declaration *decl)
374 {
375 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
376 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
377
378 unsigned first = decl->Range.First;
379 unsigned last = decl->Range.Last;
380 unsigned idx;
381
382 for (idx = first; idx <= last; ++idx) {
383 switch (decl->Declaration.File) {
384 case TGSI_FILE_TEMPORARY:
385 assert(idx < LP_MAX_INLINED_TEMPS);
386 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
387 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
388 bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
389 vec_type, array_size, "");
390 } else {
391 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
392 }
393 break;
394
395 case TGSI_FILE_OUTPUT:
396 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
397 break;
398
399 case TGSI_FILE_ADDRESS:
400 assert(idx < LP_MAX_TGSI_ADDRS);
401 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
402 break;
403
404 case TGSI_FILE_SAMPLER_VIEW:
405 /*
406 * The target stored here MUST match whatever there actually
407 * is in the set sampler views (what about return type?).
408 */
409 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
410 for (idx = first; idx <= last; ++idx) {
411 bld->sv[idx] = decl->SamplerView;
412 }
413 break;
414
415 default:
416 /* don't need to declare other vars */
417 break;
418 }
419 }
420 }
421
422
423 /**
424 * Emit LLVM for one TGSI instruction.
425 * \param return TRUE for success, FALSE otherwise
426 */
427 boolean
lp_emit_instruction_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,int * pc)428 lp_emit_instruction_aos(
429 struct lp_build_tgsi_aos_context *bld,
430 const struct tgsi_full_instruction *inst,
431 const struct tgsi_opcode_info *info,
432 int *pc)
433 {
434 LLVMValueRef src0, src1, src2;
435 LLVMValueRef tmp0;
436 LLVMValueRef dst0 = NULL;
437
438 /*
439 * Stores and write masks are handled in a general fashion after the long
440 * instruction opcode switch statement.
441 *
442 * Although not stricitly necessary, we avoid generating instructions for
443 * channels which won't be stored, in cases where's that easy. For some
444 * complex instructions, like texture sampling, it is more convenient to
445 * assume a full writemask and then let LLVM optimization passes eliminate
446 * redundant code.
447 */
448
449 (*pc)++;
450
451 assert(info->num_dst <= 1);
452 if (info->num_dst) {
453 dst0 = bld->bld_base.base.undef;
454 }
455
456 switch (inst->Instruction.Opcode) {
457 case TGSI_OPCODE_ARL:
458 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
459 dst0 = lp_build_floor(&bld->bld_base.base, src0);
460 break;
461
462 case TGSI_OPCODE_MOV:
463 dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
464 break;
465
466 case TGSI_OPCODE_LIT:
467 return FALSE;
468
469 case TGSI_OPCODE_RCP:
470 /* TGSI_OPCODE_RECIP */
471 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
472 dst0 = lp_build_rcp(&bld->bld_base.base, src0);
473 break;
474
475 case TGSI_OPCODE_RSQ:
476 /* TGSI_OPCODE_RECIPSQRT */
477 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
478 tmp0 = lp_build_abs(&bld->bld_base.base, src0);
479 dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
480 break;
481
482 case TGSI_OPCODE_EXP:
483 return FALSE;
484
485 case TGSI_OPCODE_LOG:
486 return FALSE;
487
488 case TGSI_OPCODE_MUL:
489 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
490 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
491 dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
492 break;
493
494 case TGSI_OPCODE_ADD:
495 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
496 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
497 dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
498 break;
499
500 case TGSI_OPCODE_DP3:
501 /* TGSI_OPCODE_DOT3 */
502 return FALSE;
503
504 case TGSI_OPCODE_DP4:
505 /* TGSI_OPCODE_DOT4 */
506 return FALSE;
507
508 case TGSI_OPCODE_DST:
509 return FALSE;
510
511 case TGSI_OPCODE_MIN:
512 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
513 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
514 dst0 = lp_build_min(&bld->bld_base.base, src0, src1);
515 break;
516
517 case TGSI_OPCODE_MAX:
518 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
519 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
520 dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
521 break;
522
523 case TGSI_OPCODE_SLT:
524 /* TGSI_OPCODE_SETLT */
525 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
526 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
527 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
528 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
529 break;
530
531 case TGSI_OPCODE_SGE:
532 /* TGSI_OPCODE_SETGE */
533 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
534 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
535 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
536 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
537 break;
538
539 case TGSI_OPCODE_MAD:
540 /* TGSI_OPCODE_MADD */
541 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
542 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
543 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
544 tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
545 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
546 break;
547
548 case TGSI_OPCODE_LRP:
549 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
550 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
551 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
552 tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
553 tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
554 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
555 break;
556
557 case TGSI_OPCODE_FRC:
558 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
559 tmp0 = lp_build_floor(&bld->bld_base.base, src0);
560 dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
561 break;
562
563 case TGSI_OPCODE_FLR:
564 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
565 dst0 = lp_build_floor(&bld->bld_base.base, src0);
566 break;
567
568 case TGSI_OPCODE_ROUND:
569 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
570 dst0 = lp_build_round(&bld->bld_base.base, src0);
571 break;
572
573 case TGSI_OPCODE_EX2:
574 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
575 tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
576 dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
577 break;
578
579 case TGSI_OPCODE_LG2:
580 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
581 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
582 dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
583 break;
584
585 case TGSI_OPCODE_POW:
586 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
587 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
588 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
589 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
590 dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
591 break;
592
593 case TGSI_OPCODE_COS:
594 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
595 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
596 dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
597 break;
598
599 case TGSI_OPCODE_DDX:
600 return FALSE;
601
602 case TGSI_OPCODE_DDY:
603 return FALSE;
604
605 case TGSI_OPCODE_KILL:
606 return FALSE;
607
608 case TGSI_OPCODE_KILL_IF:
609 return FALSE;
610
611 case TGSI_OPCODE_PK2H:
612 return FALSE;
613 break;
614
615 case TGSI_OPCODE_PK2US:
616 return FALSE;
617 break;
618
619 case TGSI_OPCODE_PK4B:
620 return FALSE;
621 break;
622
623 case TGSI_OPCODE_PK4UB:
624 return FALSE;
625
626 case TGSI_OPCODE_SEQ:
627 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
628 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
629 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
630 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
631 break;
632
633 case TGSI_OPCODE_SGT:
634 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
635 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
636 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
637 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
638 break;
639
640 case TGSI_OPCODE_SIN:
641 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
642 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
643 dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
644 break;
645
646 case TGSI_OPCODE_SLE:
647 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
648 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
649 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
650 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
651 break;
652
653 case TGSI_OPCODE_SNE:
654 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
655 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
656 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
657 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
658 break;
659
660 case TGSI_OPCODE_TEX:
661 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
662 break;
663
664 case TGSI_OPCODE_TXD:
665 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
666 break;
667
668 case TGSI_OPCODE_UP2H:
669 /* deprecated */
670 assert (0);
671 return FALSE;
672 break;
673
674 case TGSI_OPCODE_UP2US:
675 /* deprecated */
676 assert(0);
677 return FALSE;
678 break;
679
680 case TGSI_OPCODE_UP4B:
681 /* deprecated */
682 assert(0);
683 return FALSE;
684 break;
685
686 case TGSI_OPCODE_UP4UB:
687 /* deprecated */
688 assert(0);
689 return FALSE;
690 break;
691
692 case TGSI_OPCODE_ARR:
693 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
694 dst0 = lp_build_round(&bld->bld_base.base, src0);
695 break;
696
697 case TGSI_OPCODE_CAL:
698 return FALSE;
699
700 case TGSI_OPCODE_RET:
701 /* safe to ignore at end */
702 break;
703
704 case TGSI_OPCODE_END:
705 *pc = -1;
706 break;
707
708 case TGSI_OPCODE_SSG:
709 /* TGSI_OPCODE_SGN */
710 tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
711 dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
712 break;
713
714 case TGSI_OPCODE_CMP:
715 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
716 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
717 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
718 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
719 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
720 break;
721
722 case TGSI_OPCODE_TXB:
723 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
724 break;
725
726 case TGSI_OPCODE_DIV:
727 assert(0);
728 return FALSE;
729 break;
730
731 case TGSI_OPCODE_DP2:
732 return FALSE;
733
734 case TGSI_OPCODE_TXL:
735 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
736 break;
737
738 case TGSI_OPCODE_TXP:
739 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
740 break;
741
742 case TGSI_OPCODE_BRK:
743 return FALSE;
744
745 case TGSI_OPCODE_IF:
746 case TGSI_OPCODE_UIF:
747 return FALSE;
748
749 case TGSI_OPCODE_BGNLOOP:
750 return FALSE;
751
752 case TGSI_OPCODE_BGNSUB:
753 return FALSE;
754
755 case TGSI_OPCODE_ELSE:
756 return FALSE;
757
758 case TGSI_OPCODE_ENDIF:
759 return FALSE;
760
761 case TGSI_OPCODE_ENDLOOP:
762 return FALSE;
763
764 case TGSI_OPCODE_ENDSUB:
765 return FALSE;
766
767 case TGSI_OPCODE_CEIL:
768 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
769 dst0 = lp_build_ceil(&bld->bld_base.base, src0);
770 break;
771
772 case TGSI_OPCODE_I2F:
773 assert(0);
774 return FALSE;
775 break;
776
777 case TGSI_OPCODE_NOT:
778 assert(0);
779 return FALSE;
780 break;
781
782 case TGSI_OPCODE_TRUNC:
783 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
784 dst0 = lp_build_trunc(&bld->bld_base.base, src0);
785 break;
786
787 case TGSI_OPCODE_SHL:
788 assert(0);
789 return FALSE;
790 break;
791
792 case TGSI_OPCODE_ISHR:
793 assert(0);
794 return FALSE;
795 break;
796
797 case TGSI_OPCODE_AND:
798 assert(0);
799 return FALSE;
800 break;
801
802 case TGSI_OPCODE_OR:
803 assert(0);
804 return FALSE;
805 break;
806
807 case TGSI_OPCODE_MOD:
808 assert(0);
809 return FALSE;
810 break;
811
812 case TGSI_OPCODE_XOR:
813 assert(0);
814 return FALSE;
815 break;
816
817 case TGSI_OPCODE_TXF:
818 assert(0);
819 return FALSE;
820 break;
821
822 case TGSI_OPCODE_TXQ:
823 assert(0);
824 return FALSE;
825 break;
826
827 case TGSI_OPCODE_CONT:
828 return FALSE;
829
830 case TGSI_OPCODE_EMIT:
831 return FALSE;
832 break;
833
834 case TGSI_OPCODE_ENDPRIM:
835 return FALSE;
836 break;
837
838 case TGSI_OPCODE_NOP:
839 break;
840
841 case TGSI_OPCODE_SAMPLE:
842 dst0 = emit_sample(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
843 break;
844
845 default:
846 return FALSE;
847 }
848
849 if (info->num_dst) {
850 lp_emit_store_aos(bld, inst, 0, dst0);
851 }
852
853 return TRUE;
854 }
855
856
857 void
lp_build_tgsi_aos(struct gallivm_state * gallivm,const struct tgsi_token * tokens,struct lp_type type,const unsigned char swizzles[4],LLVMValueRef consts_ptr,const LLVMValueRef * inputs,LLVMValueRef * outputs,struct lp_build_sampler_aos * sampler,const struct tgsi_shader_info * info)858 lp_build_tgsi_aos(struct gallivm_state *gallivm,
859 const struct tgsi_token *tokens,
860 struct lp_type type,
861 const unsigned char swizzles[4],
862 LLVMValueRef consts_ptr,
863 const LLVMValueRef *inputs,
864 LLVMValueRef *outputs,
865 struct lp_build_sampler_aos *sampler,
866 const struct tgsi_shader_info *info)
867 {
868 struct lp_build_tgsi_aos_context bld;
869 struct tgsi_parse_context parse;
870 uint num_immediates = 0;
871 unsigned chan;
872 int pc = 0;
873
874 /* Setup build context */
875 memset(&bld, 0, sizeof bld);
876 lp_build_context_init(&bld.bld_base.base, gallivm, type);
877 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
878 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
879 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
880
881 for (chan = 0; chan < 4; ++chan) {
882 bld.swizzles[chan] = swizzles[chan];
883 bld.inv_swizzles[swizzles[chan]] = chan;
884 }
885
886 bld.inputs = inputs;
887 bld.outputs = outputs;
888 bld.consts_ptr = consts_ptr;
889 bld.sampler = sampler;
890 bld.indirect_files = info->indirect_files;
891 bld.bld_base.emit_swizzle = swizzle_aos;
892 bld.bld_base.info = info;
893
894 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
895 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
896 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
897 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
898
899 /* Set opcode actions */
900 lp_set_default_actions_cpu(&bld.bld_base);
901
902 if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
903 return;
904 }
905
906 tgsi_parse_init(&parse, tokens);
907
908 while (!tgsi_parse_end_of_tokens(&parse)) {
909 tgsi_parse_token(&parse);
910
911 switch(parse.FullToken.Token.Type) {
912 case TGSI_TOKEN_TYPE_DECLARATION:
913 /* Inputs already interpolated */
914 lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
915 break;
916
917 case TGSI_TOKEN_TYPE_INSTRUCTION:
918 /* save expanded instruction */
919 lp_bld_tgsi_add_instruction(&bld.bld_base,
920 &parse.FullToken.FullInstruction);
921 break;
922
923 case TGSI_TOKEN_TYPE_IMMEDIATE:
924 /* simply copy the immediate values into the next immediates[] slot */
925 {
926 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
927 float imm[4];
928 assert(size <= 4);
929 assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
930 for (chan = 0; chan < 4; ++chan) {
931 imm[chan] = 0.0f;
932 }
933 for (chan = 0; chan < size; ++chan) {
934 unsigned swizzle = bld.swizzles[chan];
935 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
936 }
937 bld.immediates[num_immediates] =
938 lp_build_const_aos(gallivm, type,
939 imm[0], imm[1], imm[2], imm[3],
940 NULL);
941 num_immediates++;
942 }
943 break;
944
945 case TGSI_TOKEN_TYPE_PROPERTY:
946 break;
947
948 default:
949 assert(0);
950 }
951 }
952
953 while (pc != -1) {
954 struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
955 const struct tgsi_opcode_info *opcode_info =
956 tgsi_get_opcode_info(instr->Instruction.Opcode);
957 if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
958 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
959 tgsi_get_opcode_name(instr->Instruction.Opcode));
960 }
961
962 if (0) {
963 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
964 LLVMValueRef function = LLVMGetBasicBlockParent(block);
965 debug_printf("11111111111111111111111111111 \n");
966 tgsi_dump(tokens, 0);
967 lp_debug_dump_value(function);
968 debug_printf("2222222222222222222222222222 \n");
969 }
970 tgsi_parse_free(&parse);
971 FREE(bld.bld_base.instructions);
972
973 if (0) {
974 LLVMModuleRef module = LLVMGetGlobalParent(
975 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
976 LLVMDumpModule(module);
977 }
978
979 }
980
981