1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_debug.h"
59 #include "lp_bld_sample.h"
60
61
62 /**
63 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
64 * ordering.
65 */
66 static LLVMValueRef
swizzle_aos(struct lp_build_tgsi_context * bld_base,LLVMValueRef a,unsigned swizzle_x,unsigned swizzle_y,unsigned swizzle_z,unsigned swizzle_w)67 swizzle_aos(struct lp_build_tgsi_context *bld_base,
68 LLVMValueRef a,
69 unsigned swizzle_x,
70 unsigned swizzle_y,
71 unsigned swizzle_z,
72 unsigned swizzle_w)
73 {
74 unsigned char swizzles[4];
75 struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
76
77 assert(swizzle_x < 4);
78 assert(swizzle_y < 4);
79 assert(swizzle_z < 4);
80 assert(swizzle_w < 4);
81
82 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
83 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
84 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
85 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
86
87 return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
88 }
89
90
91 static LLVMValueRef
swizzle_scalar_aos(struct lp_build_tgsi_aos_context * bld,LLVMValueRef a,unsigned chan)92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
93 LLVMValueRef a,
94 unsigned chan)
95 {
96 chan = bld->swizzles[chan];
97 return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
98 }
99
100
101 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)102 emit_fetch_constant(
103 struct lp_build_tgsi_context * bld_base,
104 const struct tgsi_full_src_register * reg,
105 enum tgsi_opcode_type stype,
106 unsigned swizzle)
107 {
108 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
109 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
110 struct lp_type type = bld_base->base.type;
111 LLVMValueRef res;
112 unsigned chan;
113
114 assert(!reg->Register.Indirect);
115
116 /*
117 * Get the constants components
118 */
119
120 res = bld->bld_base.base.undef;
121 for (chan = 0; chan < 4; ++chan) {
122 LLVMValueRef index;
123 LLVMValueRef scalar_ptr;
124 LLVMValueRef scalar;
125 LLVMValueRef swizzle;
126
127 index = lp_build_const_int32(bld->bld_base.base.gallivm,
128 reg->Register.Index * 4 + chan);
129
130 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
131
132 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
133
134 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
135
136 /*
137 * NOTE: constants array is always assumed to be RGBA
138 */
139
140 swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
141 bld->swizzles[chan]);
142
143 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
144 }
145
146 /*
147 * Broadcast the first quaternion to all others.
148 *
149 * XXX: could be factored into a reusable function.
150 */
151
152 if (type.length > 4) {
153 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154 unsigned i;
155
156 for (chan = 0; chan < 4; ++chan) {
157 shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
158 }
159
160 for (i = 4; i < type.length; ++i) {
161 shuffles[i] = shuffles[i % 4];
162 }
163
164 res = LLVMBuildShuffleVector(builder,
165 res, bld->bld_base.base.undef,
166 LLVMConstVector(shuffles, type.length),
167 "");
168 }
169 return res;
170 }
171
172 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)173 emit_fetch_immediate(
174 struct lp_build_tgsi_context * bld_base,
175 const struct tgsi_full_src_register * reg,
176 enum tgsi_opcode_type stype,
177 unsigned swizzle)
178 {
179 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
180 LLVMValueRef res = bld->immediates[reg->Register.Index];
181 assert(res);
182 return res;
183 }
184
185 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)186 emit_fetch_input(
187 struct lp_build_tgsi_context * bld_base,
188 const struct tgsi_full_src_register * reg,
189 enum tgsi_opcode_type stype,
190 unsigned swizzle)
191 {
192 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
193 LLVMValueRef res = bld->inputs[reg->Register.Index];
194 assert(!reg->Register.Indirect);
195 assert(res);
196 return res;
197 }
198
199 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle)200 emit_fetch_temporary(
201 struct lp_build_tgsi_context * bld_base,
202 const struct tgsi_full_src_register * reg,
203 enum tgsi_opcode_type stype,
204 unsigned swizzle)
205 {
206 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
207 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208 LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
209 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
210 LLVMValueRef res = LLVMBuildLoad2(builder, vec_type, temp_ptr, "");
211 assert(!reg->Register.Indirect);
212 if (!res)
213 return bld->bld_base.base.undef;
214
215 return res;
216 }
217
218 /**
219 * Register store.
220 */
221 void
lp_emit_store_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,unsigned index,LLVMValueRef value)222 lp_emit_store_aos(
223 struct lp_build_tgsi_aos_context *bld,
224 const struct tgsi_full_instruction *inst,
225 unsigned index,
226 LLVMValueRef value)
227 {
228 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
229 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
230 LLVMValueRef mask = NULL;
231 LLVMValueRef ptr;
232
233 /*
234 * Saturate the value
235 */
236 if (inst->Instruction.Saturate) {
237 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
238 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
239 }
240
241 /*
242 * Translate the register file
243 */
244
245 assert(!reg->Register.Indirect);
246
247 switch (reg->Register.File) {
248 case TGSI_FILE_OUTPUT:
249 ptr = bld->outputs[reg->Register.Index];
250 break;
251
252 case TGSI_FILE_TEMPORARY:
253 ptr = bld->temps[reg->Register.Index];
254 break;
255
256 case TGSI_FILE_ADDRESS:
257 ptr = bld->addr[reg->Indirect.Index];
258 break;
259
260 default:
261 assert(0);
262 return;
263 }
264
265 if (!ptr)
266 return;
267
268 /*
269 * Writemask
270 */
271
272 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
273 LLVMValueRef writemask;
274
275 writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
276 bld->bld_base.base.type,
277 reg->Register.WriteMask,
278 TGSI_NUM_CHANNELS,
279 bld->swizzles);
280
281 if (mask) {
282 mask = LLVMBuildAnd(builder, mask, writemask, "");
283 } else {
284 mask = writemask;
285 }
286 }
287
288 if (mask) {
289 LLVMValueRef orig_value;
290 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
291 orig_value = LLVMBuildLoad2(builder, vec_type, ptr, "");
292 value = lp_build_select(&bld->bld_base.base,
293 mask, value, orig_value);
294 }
295
296 LLVMBuildStore(builder, value, ptr);
297 }
298
299
300 /**
301 * High-level instruction translators.
302 */
303
304 static LLVMValueRef
emit_tex(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier)305 emit_tex(struct lp_build_tgsi_aos_context *bld,
306 const struct tgsi_full_instruction *inst,
307 enum lp_build_tex_modifier modifier)
308 {
309 unsigned target;
310 unsigned unit;
311 LLVMValueRef coords;
312 struct lp_derivatives derivs = { {NULL}, {NULL} };
313
314 if (!bld->sampler) {
315 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
316 return bld->bld_base.base.undef;
317 }
318
319 target = inst->Texture.Texture;
320
321 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
322
323 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
324 /* probably not going to work */
325 derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
326 derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
327 unit = inst->Src[3].Register.Index;
328 }
329 else {
330 unit = inst->Src[1].Register.Index;
331 }
332 return bld->sampler->emit_fetch_texel(bld->sampler,
333 &bld->bld_base.base,
334 target, unit,
335 coords, derivs,
336 modifier);
337 }
338
339
340 static LLVMValueRef
emit_sample(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier)341 emit_sample(struct lp_build_tgsi_aos_context *bld,
342 const struct tgsi_full_instruction *inst,
343 enum lp_build_tex_modifier modifier)
344 {
345 unsigned target;
346 unsigned unit;
347 LLVMValueRef coords;
348 struct lp_derivatives derivs = { {NULL}, {NULL} };
349
350 if (!bld->sampler) {
351 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
352 return bld->bld_base.base.undef;
353 }
354
355 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
356
357 /* ignore modifiers, can't handle different sampler / sampler view, etc... */
358 unit = inst->Src[1].Register.Index;
359 assert(inst->Src[2].Register.Index == unit);
360
361 target = bld->sv[unit].Resource;
362
363 return bld->sampler->emit_fetch_texel(bld->sampler,
364 &bld->bld_base.base,
365 target, unit,
366 coords, derivs,
367 modifier);
368 }
369
370
371 void
lp_emit_declaration_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_declaration * decl)372 lp_emit_declaration_aos(
373 struct lp_build_tgsi_aos_context *bld,
374 const struct tgsi_full_declaration *decl)
375 {
376 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
377 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
378
379 unsigned first = decl->Range.First;
380 unsigned last = decl->Range.Last;
381 unsigned idx;
382
383 for (idx = first; idx <= last; ++idx) {
384 switch (decl->Declaration.File) {
385 case TGSI_FILE_TEMPORARY:
386 assert(idx < LP_MAX_INLINED_TEMPS);
387 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
388 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
389 bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
390 vec_type, array_size, "");
391 } else {
392 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
393 }
394 break;
395
396 case TGSI_FILE_OUTPUT:
397 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
398 break;
399
400 case TGSI_FILE_ADDRESS:
401 assert(idx < LP_MAX_TGSI_ADDRS);
402 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
403 break;
404
405 case TGSI_FILE_SAMPLER_VIEW:
406 /*
407 * The target stored here MUST match whatever there actually
408 * is in the set sampler views (what about return type?).
409 */
410 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
411 for (idx = first; idx <= last; ++idx) {
412 bld->sv[idx] = decl->SamplerView;
413 }
414 break;
415
416 default:
417 /* don't need to declare other vars */
418 break;
419 }
420 }
421 }
422
423
424 /**
425 * Emit LLVM for one TGSI instruction.
426 * \param return TRUE for success, FALSE otherwise
427 */
428 boolean
lp_emit_instruction_aos(struct lp_build_tgsi_aos_context * bld,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,int * pc)429 lp_emit_instruction_aos(
430 struct lp_build_tgsi_aos_context *bld,
431 const struct tgsi_full_instruction *inst,
432 const struct tgsi_opcode_info *info,
433 int *pc)
434 {
435 LLVMValueRef src0, src1, src2;
436 LLVMValueRef tmp0;
437 LLVMValueRef dst0 = NULL;
438
439 /*
440 * Stores and write masks are handled in a general fashion after the long
441 * instruction opcode switch statement.
442 *
443 * Although not stricitly necessary, we avoid generating instructions for
444 * channels which won't be stored, in cases where's that easy. For some
445 * complex instructions, like texture sampling, it is more convenient to
446 * assume a full writemask and then let LLVM optimization passes eliminate
447 * redundant code.
448 */
449
450 (*pc)++;
451
452 assert(info->num_dst <= 1);
453 if (info->num_dst) {
454 dst0 = bld->bld_base.base.undef;
455 }
456
457 switch (inst->Instruction.Opcode) {
458 case TGSI_OPCODE_ARL:
459 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
460 dst0 = lp_build_floor(&bld->bld_base.base, src0);
461 break;
462
463 case TGSI_OPCODE_MOV:
464 dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
465 break;
466
467 case TGSI_OPCODE_LIT:
468 return FALSE;
469
470 case TGSI_OPCODE_RCP:
471 /* TGSI_OPCODE_RECIP */
472 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
473 dst0 = lp_build_rcp(&bld->bld_base.base, src0);
474 break;
475
476 case TGSI_OPCODE_RSQ:
477 /* TGSI_OPCODE_RECIPSQRT */
478 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
479 tmp0 = lp_build_abs(&bld->bld_base.base, src0);
480 dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
481 break;
482
483 case TGSI_OPCODE_EXP:
484 return FALSE;
485
486 case TGSI_OPCODE_LOG:
487 return FALSE;
488
489 case TGSI_OPCODE_MUL:
490 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
491 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
492 dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
493 break;
494
495 case TGSI_OPCODE_ADD:
496 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
497 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
498 dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
499 break;
500
501 case TGSI_OPCODE_DP3:
502 /* TGSI_OPCODE_DOT3 */
503 return FALSE;
504
505 case TGSI_OPCODE_DP4:
506 /* TGSI_OPCODE_DOT4 */
507 return FALSE;
508
509 case TGSI_OPCODE_DST:
510 return FALSE;
511
512 case TGSI_OPCODE_MIN:
513 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
514 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
515 dst0 = lp_build_min(&bld->bld_base.base, src0, src1);
516 break;
517
518 case TGSI_OPCODE_MAX:
519 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
520 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
521 dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
522 break;
523
524 case TGSI_OPCODE_SLT:
525 /* TGSI_OPCODE_SETLT */
526 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
527 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
528 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
529 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
530 break;
531
532 case TGSI_OPCODE_SGE:
533 /* TGSI_OPCODE_SETGE */
534 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
535 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
536 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
537 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
538 break;
539
540 case TGSI_OPCODE_MAD:
541 /* TGSI_OPCODE_MADD */
542 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
543 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
544 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
545 tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
546 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
547 break;
548
549 case TGSI_OPCODE_LRP:
550 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
551 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
552 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
553 tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
554 tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
555 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
556 break;
557
558 case TGSI_OPCODE_FRC:
559 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
560 tmp0 = lp_build_floor(&bld->bld_base.base, src0);
561 dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
562 break;
563
564 case TGSI_OPCODE_FLR:
565 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
566 dst0 = lp_build_floor(&bld->bld_base.base, src0);
567 break;
568
569 case TGSI_OPCODE_ROUND:
570 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
571 dst0 = lp_build_round(&bld->bld_base.base, src0);
572 break;
573
574 case TGSI_OPCODE_EX2:
575 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
576 tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
577 dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
578 break;
579
580 case TGSI_OPCODE_LG2:
581 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
582 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
583 dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
584 break;
585
586 case TGSI_OPCODE_POW:
587 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
588 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
589 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
590 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
591 dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
592 break;
593
594 case TGSI_OPCODE_COS:
595 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
596 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
597 dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
598 break;
599
600 case TGSI_OPCODE_DDX:
601 return FALSE;
602
603 case TGSI_OPCODE_DDY:
604 return FALSE;
605
606 case TGSI_OPCODE_KILL:
607 return FALSE;
608
609 case TGSI_OPCODE_KILL_IF:
610 return FALSE;
611
612 case TGSI_OPCODE_PK2H:
613 return FALSE;
614 break;
615
616 case TGSI_OPCODE_PK2US:
617 return FALSE;
618 break;
619
620 case TGSI_OPCODE_PK4B:
621 return FALSE;
622 break;
623
624 case TGSI_OPCODE_PK4UB:
625 return FALSE;
626
627 case TGSI_OPCODE_SEQ:
628 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
629 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
630 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
631 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
632 break;
633
634 case TGSI_OPCODE_SGT:
635 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
636 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
637 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
638 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
639 break;
640
641 case TGSI_OPCODE_SIN:
642 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
643 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
644 dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
645 break;
646
647 case TGSI_OPCODE_SLE:
648 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
649 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
650 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
651 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
652 break;
653
654 case TGSI_OPCODE_SNE:
655 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
656 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
657 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
658 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
659 break;
660
661 case TGSI_OPCODE_TEX:
662 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
663 break;
664
665 case TGSI_OPCODE_TXD:
666 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
667 break;
668
669 case TGSI_OPCODE_UP2H:
670 /* deprecated */
671 assert (0);
672 return FALSE;
673 break;
674
675 case TGSI_OPCODE_UP2US:
676 /* deprecated */
677 assert(0);
678 return FALSE;
679 break;
680
681 case TGSI_OPCODE_UP4B:
682 /* deprecated */
683 assert(0);
684 return FALSE;
685 break;
686
687 case TGSI_OPCODE_UP4UB:
688 /* deprecated */
689 assert(0);
690 return FALSE;
691 break;
692
693 case TGSI_OPCODE_ARR:
694 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
695 dst0 = lp_build_round(&bld->bld_base.base, src0);
696 break;
697
698 case TGSI_OPCODE_CAL:
699 return FALSE;
700
701 case TGSI_OPCODE_RET:
702 /* safe to ignore at end */
703 break;
704
705 case TGSI_OPCODE_END:
706 *pc = -1;
707 break;
708
709 case TGSI_OPCODE_SSG:
710 /* TGSI_OPCODE_SGN */
711 tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
712 dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
713 break;
714
715 case TGSI_OPCODE_CMP:
716 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
717 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
718 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
719 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
720 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
721 break;
722
723 case TGSI_OPCODE_TXB:
724 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
725 break;
726
727 case TGSI_OPCODE_DIV:
728 assert(0);
729 return FALSE;
730 break;
731
732 case TGSI_OPCODE_DP2:
733 return FALSE;
734
735 case TGSI_OPCODE_TXL:
736 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
737 break;
738
739 case TGSI_OPCODE_TXP:
740 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
741 break;
742
743 case TGSI_OPCODE_BRK:
744 return FALSE;
745
746 case TGSI_OPCODE_IF:
747 case TGSI_OPCODE_UIF:
748 return FALSE;
749
750 case TGSI_OPCODE_BGNLOOP:
751 return FALSE;
752
753 case TGSI_OPCODE_BGNSUB:
754 return FALSE;
755
756 case TGSI_OPCODE_ELSE:
757 return FALSE;
758
759 case TGSI_OPCODE_ENDIF:
760 return FALSE;
761
762 case TGSI_OPCODE_ENDLOOP:
763 return FALSE;
764
765 case TGSI_OPCODE_ENDSUB:
766 return FALSE;
767
768 case TGSI_OPCODE_CEIL:
769 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
770 dst0 = lp_build_ceil(&bld->bld_base.base, src0);
771 break;
772
773 case TGSI_OPCODE_I2F:
774 assert(0);
775 return FALSE;
776 break;
777
778 case TGSI_OPCODE_NOT:
779 assert(0);
780 return FALSE;
781 break;
782
783 case TGSI_OPCODE_TRUNC:
784 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
785 dst0 = lp_build_trunc(&bld->bld_base.base, src0);
786 break;
787
788 case TGSI_OPCODE_SHL:
789 assert(0);
790 return FALSE;
791 break;
792
793 case TGSI_OPCODE_ISHR:
794 assert(0);
795 return FALSE;
796 break;
797
798 case TGSI_OPCODE_AND:
799 assert(0);
800 return FALSE;
801 break;
802
803 case TGSI_OPCODE_OR:
804 assert(0);
805 return FALSE;
806 break;
807
808 case TGSI_OPCODE_MOD:
809 assert(0);
810 return FALSE;
811 break;
812
813 case TGSI_OPCODE_XOR:
814 assert(0);
815 return FALSE;
816 break;
817
818 case TGSI_OPCODE_TXF:
819 assert(0);
820 return FALSE;
821 break;
822
823 case TGSI_OPCODE_TXQ:
824 assert(0);
825 return FALSE;
826 break;
827
828 case TGSI_OPCODE_CONT:
829 return FALSE;
830
831 case TGSI_OPCODE_EMIT:
832 return FALSE;
833 break;
834
835 case TGSI_OPCODE_ENDPRIM:
836 return FALSE;
837 break;
838
839 case TGSI_OPCODE_NOP:
840 break;
841
842 case TGSI_OPCODE_SAMPLE:
843 dst0 = emit_sample(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
844 break;
845
846 default:
847 return FALSE;
848 }
849
850 if (info->num_dst) {
851 lp_emit_store_aos(bld, inst, 0, dst0);
852 }
853
854 return TRUE;
855 }
856
857
858 void
lp_build_tgsi_aos(struct gallivm_state * gallivm,const struct tgsi_token * tokens,struct lp_type type,const unsigned char swizzles[4],LLVMValueRef consts_ptr,const LLVMValueRef * inputs,LLVMValueRef * outputs,const struct lp_build_sampler_aos * sampler,const struct tgsi_shader_info * info)859 lp_build_tgsi_aos(struct gallivm_state *gallivm,
860 const struct tgsi_token *tokens,
861 struct lp_type type,
862 const unsigned char swizzles[4],
863 LLVMValueRef consts_ptr,
864 const LLVMValueRef *inputs,
865 LLVMValueRef *outputs,
866 const struct lp_build_sampler_aos *sampler,
867 const struct tgsi_shader_info *info)
868 {
869 struct lp_build_tgsi_aos_context bld;
870 struct tgsi_parse_context parse;
871 uint num_immediates = 0;
872 unsigned chan;
873 int pc = 0;
874
875 /* Setup build context */
876 memset(&bld, 0, sizeof bld);
877 lp_build_context_init(&bld.bld_base.base, gallivm, type);
878 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
879 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
880 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
881
882 for (chan = 0; chan < 4; ++chan) {
883 bld.swizzles[chan] = swizzles[chan];
884 bld.inv_swizzles[swizzles[chan]] = chan;
885 }
886
887 bld.inputs = inputs;
888 bld.outputs = outputs;
889 bld.consts_ptr = consts_ptr;
890 bld.sampler = sampler;
891 bld.indirect_files = info->indirect_files;
892 bld.bld_base.emit_swizzle = swizzle_aos;
893 bld.bld_base.info = info;
894
895 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
896 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
897 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
898 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
899
900 /* Set opcode actions */
901 lp_set_default_actions_cpu(&bld.bld_base);
902
903 if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
904 return;
905 }
906
907 tgsi_parse_init(&parse, tokens);
908
909 while (!tgsi_parse_end_of_tokens(&parse)) {
910 tgsi_parse_token(&parse);
911
912 switch(parse.FullToken.Token.Type) {
913 case TGSI_TOKEN_TYPE_DECLARATION:
914 /* Inputs already interpolated */
915 lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
916 break;
917
918 case TGSI_TOKEN_TYPE_INSTRUCTION:
919 /* save expanded instruction */
920 lp_bld_tgsi_add_instruction(&bld.bld_base,
921 &parse.FullToken.FullInstruction);
922 break;
923
924 case TGSI_TOKEN_TYPE_IMMEDIATE:
925 /* simply copy the immediate values into the next immediates[] slot */
926 {
927 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
928 float imm[4];
929 assert(size <= 4);
930 assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
931 for (chan = 0; chan < 4; ++chan) {
932 imm[chan] = 0.0f;
933 }
934 for (chan = 0; chan < size; ++chan) {
935 unsigned swizzle = bld.swizzles[chan];
936 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
937 }
938 bld.immediates[num_immediates] =
939 lp_build_const_aos(gallivm, type,
940 imm[0], imm[1], imm[2], imm[3],
941 NULL);
942 num_immediates++;
943 }
944 break;
945
946 case TGSI_TOKEN_TYPE_PROPERTY:
947 break;
948
949 default:
950 assert(0);
951 }
952 }
953
954 while (pc != -1) {
955 struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
956 const struct tgsi_opcode_info *opcode_info =
957 tgsi_get_opcode_info(instr->Instruction.Opcode);
958 if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
959 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
960 tgsi_get_opcode_name(instr->Instruction.Opcode));
961 }
962
963 if (0) {
964 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
965 LLVMValueRef function = LLVMGetBasicBlockParent(block);
966 debug_printf("11111111111111111111111111111 \n");
967 tgsi_dump(tokens, 0);
968 lp_debug_dump_value(function);
969 debug_printf("2222222222222222222222222222 \n");
970 }
971 tgsi_parse_free(&parse);
972 FREE(bld.bld_base.instructions);
973
974 if (0) {
975 LLVMModuleRef module = LLVMGetGlobalParent(
976 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
977 LLVMDumpModule(module);
978 }
979
980 }
981
982