• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 #include "r600_sq.h"
24 #include "r600_llvm.h"
25 #include "r600_formats.h"
26 #include "r600_opcodes.h"
27 #include "r600d.h"
28 
29 #include "pipe/p_shader_tokens.h"
30 #include "tgsi/tgsi_info.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_scan.h"
33 #include "tgsi/tgsi_dump.h"
34 #include "util/u_memory.h"
35 #include <stdio.h>
36 #include <errno.h>
37 #include <byteswap.h>
38 
39 /* CAYMAN notes
40 Why CAYMAN got loops for lots of instructions is explained here.
41 
42 -These 8xx t-slot only ops are implemented in all vector slots.
43 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44 These 8xx t-slot only opcodes become vector ops, with all four
45 slots expecting the arguments on sources a and b. Result is
46 broadcast to all channels.
47 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48 These 8xx t-slot only opcodes become vector ops in the z, y, and
49 x slots.
50 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52 SQRT_IEEE/_64
53 SIN/COS
54 The w slot may have an independent co-issued operation, or if the
55 result is required to be in the w slot, the opcode above may be
56 issued in the w slot as well.
57 The compiler must issue the source argument to slots z, y, and x
58 */
59 
r600_pipe_shader(struct pipe_context * ctx,struct r600_pipe_shader * shader)60 static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
61 {
62 	struct r600_context *rctx = (struct r600_context *)ctx;
63 	struct r600_shader *rshader = &shader->shader;
64 	uint32_t *ptr;
65 	int	i;
66 
67 	/* copy new shader */
68 	if (shader->bo == NULL) {
69 		shader->bo = (struct r600_resource*)
70 			pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
71 		if (shader->bo == NULL) {
72 			return -ENOMEM;
73 		}
74 		ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
75 		if (R600_BIG_ENDIAN) {
76 			for (i = 0; i < rshader->bc.ndw; ++i) {
77 				ptr[i] = bswap_32(rshader->bc.bytecode[i]);
78 			}
79 		} else {
80 			memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
81 		}
82 		rctx->ws->buffer_unmap(shader->bo->cs_buf);
83 	}
84 	/* build state */
85 	switch (rshader->processor_type) {
86 	case TGSI_PROCESSOR_VERTEX:
87 		if (rctx->chip_class >= EVERGREEN) {
88 			evergreen_pipe_shader_vs(ctx, shader);
89 		} else {
90 			r600_pipe_shader_vs(ctx, shader);
91 		}
92 		break;
93 	case TGSI_PROCESSOR_FRAGMENT:
94 		if (rctx->chip_class >= EVERGREEN) {
95 			evergreen_pipe_shader_ps(ctx, shader);
96 		} else {
97 			r600_pipe_shader_ps(ctx, shader);
98 		}
99 		break;
100 	default:
101 		return -EINVAL;
102 	}
103 	return 0;
104 }
105 
106 static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader);
107 
r600_pipe_shader_create(struct pipe_context * ctx,struct r600_pipe_shader * shader)108 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
109 {
110 	static int dump_shaders = -1;
111 	struct r600_context *rctx = (struct r600_context *)ctx;
112 	struct r600_pipe_shader_selector *sel = shader->selector;
113 	int r;
114 
115 	/* Would like some magic "get_bool_option_once" routine.
116 	*/
117 	if (dump_shaders == -1)
118 		dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
119 
120 	if (dump_shaders) {
121 		fprintf(stderr, "--------------------------------------------------------------\n");
122 		tgsi_dump(sel->tokens, 0);
123 
124 		if (sel->so.num_outputs) {
125 			unsigned i;
126 			fprintf(stderr, "STREAMOUT\n");
127 			for (i = 0; i < sel->so.num_outputs; i++) {
128 				unsigned mask = ((1 << sel->so.output[i].num_components) - 1) <<
129 						sel->so.output[i].start_component;
130 				fprintf(stderr, "  %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
131 					sel->so.output[i].output_buffer, sel->so.output[i].register_index,
132 				        mask & 1 ? "x" : "_",
133 				        (mask >> 1) & 1 ? "y" : "_",
134 				        (mask >> 2) & 1 ? "z" : "_",
135 				        (mask >> 3) & 1 ? "w" : "_");
136 			}
137 		}
138 	}
139 	r = r600_shader_from_tgsi(rctx, shader);
140 	if (r) {
141 		R600_ERR("translation from TGSI failed !\n");
142 		return r;
143 	}
144 	r = r600_bytecode_build(&shader->shader.bc);
145 	if (r) {
146 		R600_ERR("building bytecode failed !\n");
147 		return r;
148 	}
149 	if (dump_shaders) {
150 		r600_bytecode_dump(&shader->shader.bc);
151 		fprintf(stderr, "______________________________________________________________\n");
152 	}
153 	return r600_pipe_shader(ctx, shader);
154 }
155 
r600_pipe_shader_destroy(struct pipe_context * ctx,struct r600_pipe_shader * shader)156 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
157 {
158 	pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
159 	r600_bytecode_clear(&shader->shader.bc);
160 }
161 
162 /*
163  * tgsi -> r600 shader
164  */
165 struct r600_shader_tgsi_instruction;
166 
167 struct r600_shader_src {
168 	unsigned				sel;
169 	unsigned				swizzle[4];
170 	unsigned				neg;
171 	unsigned				abs;
172 	unsigned				rel;
173 	uint32_t				value[4];
174 };
175 
176 struct r600_shader_ctx {
177 	struct tgsi_shader_info			info;
178 	struct tgsi_parse_context		parse;
179 	const struct tgsi_token			*tokens;
180 	unsigned				type;
181 	unsigned				file_offset[TGSI_FILE_COUNT];
182 	unsigned				temp_reg;
183 	struct r600_shader_tgsi_instruction	*inst_info;
184 	struct r600_bytecode			*bc;
185 	struct r600_shader			*shader;
186 	struct r600_shader_src			src[4];
187 	uint32_t				*literals;
188 	uint32_t				nliterals;
189 	uint32_t				max_driver_temp_used;
190 	/* needed for evergreen interpolation */
191 	boolean                                 input_centroid;
192 	boolean                                 input_linear;
193 	boolean                                 input_perspective;
194 	int					num_interp_gpr;
195 	int					face_gpr;
196 	int					colors_used;
197 	boolean                 clip_vertex_write;
198 	unsigned                cv_output;
199 	int					fragcoord_input;
200 	int					native_integers;
201 };
202 
203 struct r600_shader_tgsi_instruction {
204 	unsigned	tgsi_opcode;
205 	unsigned	is_op3;
206 	unsigned	r600_opcode;
207 	int (*process)(struct r600_shader_ctx *ctx);
208 };
209 
210 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
211 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
212 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only);
213 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
214 static int tgsi_else(struct r600_shader_ctx *ctx);
215 static int tgsi_endif(struct r600_shader_ctx *ctx);
216 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
217 static int tgsi_endloop(struct r600_shader_ctx *ctx);
218 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
219 
220 /*
221  * bytestream -> r600 shader
222  *
223  * These functions are used to transform the output of the LLVM backend into
224  * struct r600_bytecode.
225  */
226 
227 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
228 				unsigned char * bytes,	unsigned num_bytes);
229 
230 #ifdef HAVE_OPENCL
r600_compute_shader_create(struct pipe_context * ctx,LLVMModuleRef mod,struct r600_bytecode * bytecode)231 int r600_compute_shader_create(struct pipe_context * ctx,
232 	LLVMModuleRef mod,  struct r600_bytecode * bytecode)
233 {
234 	struct r600_context *r600_ctx = (struct r600_context *)ctx;
235 	unsigned char * bytes;
236 	unsigned byte_count;
237 	struct r600_shader_ctx shader_ctx;
238 	unsigned dump = 0;
239 
240 	if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
241 		dump = 1;
242 	}
243 
244 	r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
245 	shader_ctx.bc = bytecode;
246 	r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family);
247 	shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
248 	r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
249 	if (shader_ctx.bc->chip_class == CAYMAN) {
250 		cm_bytecode_add_cf_end(shader_ctx.bc);
251 	}
252 	r600_bytecode_build(shader_ctx.bc);
253 	if (dump) {
254 		r600_bytecode_dump(shader_ctx.bc);
255 	}
256 	return 1;
257 }
258 
259 #endif /* HAVE_OPENCL */
260 
i32_from_byte_stream(unsigned char * bytes,unsigned * bytes_read)261 static uint32_t i32_from_byte_stream(unsigned char * bytes,
262 		unsigned * bytes_read)
263 {
264 	unsigned i;
265 	uint32_t out = 0;
266 	for (i = 0; i < 4; i++) {
267 		out |= bytes[(*bytes_read)++] << (8 * i);
268 	}
269 	return out;
270 }
271 
r600_src_from_byte_stream(unsigned char * bytes,unsigned bytes_read,struct r600_bytecode_alu * alu,unsigned src_idx)272 static unsigned r600_src_from_byte_stream(unsigned char * bytes,
273 		unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
274 {
275 	unsigned i;
276 	unsigned sel0, sel1;
277 	sel0 = bytes[bytes_read++];
278 	sel1 = bytes[bytes_read++];
279 	alu->src[src_idx].sel = sel0 | (sel1 << 8);
280 	alu->src[src_idx].chan = bytes[bytes_read++];
281 	alu->src[src_idx].neg = bytes[bytes_read++];
282 	alu->src[src_idx].abs = bytes[bytes_read++];
283 	alu->src[src_idx].rel = bytes[bytes_read++];
284 	alu->src[src_idx].kc_bank = bytes[bytes_read++];
285 	for (i = 0; i < 4; i++) {
286 		alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8);
287 	}
288 	return bytes_read;
289 }
290 
r600_alu_from_byte_stream(struct r600_shader_ctx * ctx,unsigned char * bytes,unsigned bytes_read)291 static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
292 				unsigned char * bytes, unsigned bytes_read)
293 {
294 	unsigned src_idx;
295 	unsigned inst0, inst1;
296 	unsigned push_modifier;
297 	struct r600_bytecode_alu alu;
298 	memset(&alu, 0, sizeof(alu));
299 	for(src_idx = 0; src_idx < 3; src_idx++) {
300 		bytes_read = r600_src_from_byte_stream(bytes, bytes_read,
301 								&alu, src_idx);
302 	}
303 
304 	alu.dst.sel = bytes[bytes_read++];
305 	alu.dst.chan = bytes[bytes_read++];
306 	alu.dst.clamp = bytes[bytes_read++];
307 	alu.dst.write = bytes[bytes_read++];
308 	alu.dst.rel = bytes[bytes_read++];
309 	inst0 = bytes[bytes_read++];
310 	inst1 = bytes[bytes_read++];
311 	alu.inst = inst0 | (inst1 << 8);
312 	alu.last = bytes[bytes_read++];
313 	alu.is_op3 = bytes[bytes_read++];
314 	push_modifier = bytes[bytes_read++];
315 	alu.pred_sel = bytes[bytes_read++];
316 	alu.bank_swizzle = bytes[bytes_read++];
317 	alu.bank_swizzle_force = bytes[bytes_read++];
318 	alu.omod = bytes[bytes_read++];
319 	alu.index_mode = bytes[bytes_read++];
320 
321 
322 	if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE) ||
323 	    alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE) ||
324 	    alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT) ||
325 	    alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)) {
326 		alu.update_pred = 1;
327 		alu.dst.write = 0;
328 		alu.src[1].sel = V_SQ_ALU_SRC_0;
329 		alu.src[1].chan = 0;
330 		alu.last = 1;
331     }
332 
333     if (push_modifier) {
334         alu.pred_sel = 0;
335 		alu.execute_mask = 1;
336 		r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
337 	} else
338 		r600_bytecode_add_alu(ctx->bc, &alu);
339 
340 
341 	/* XXX: Handle other KILL instructions */
342 	if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) {
343 		ctx->shader->uses_kill = 1;
344 		/* XXX: This should be enforced in the LLVM backend. */
345 		ctx->bc->force_add_cf = 1;
346 	}
347 	return bytes_read;
348 }
349 
llvm_if(struct r600_shader_ctx * ctx,struct r600_bytecode_alu * alu,unsigned pred_inst)350 static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu,
351 	unsigned pred_inst)
352 {
353 	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
354 	fc_pushlevel(ctx, FC_IF);
355 	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
356 }
357 
r600_break_from_byte_stream(struct r600_shader_ctx * ctx,struct r600_bytecode_alu * alu,unsigned compare_opcode)358 static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx,
359 			struct r600_bytecode_alu *alu, unsigned compare_opcode)
360 {
361 	unsigned opcode = TGSI_OPCODE_BRK;
362 	if (ctx->bc->chip_class == CAYMAN)
363 		ctx->inst_info = &cm_shader_tgsi_instruction[opcode];
364 	else if (ctx->bc->chip_class >= EVERGREEN)
365 		ctx->inst_info = &eg_shader_tgsi_instruction[opcode];
366 	else
367 		ctx->inst_info = &r600_shader_tgsi_instruction[opcode];
368 	llvm_if(ctx, alu, compare_opcode);
369 	tgsi_loop_brk_cont(ctx);
370 	tgsi_endif(ctx);
371 }
372 
r600_fc_from_byte_stream(struct r600_shader_ctx * ctx,unsigned char * bytes,unsigned bytes_read)373 static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx,
374 				unsigned char * bytes, unsigned bytes_read)
375 {
376 	struct r600_bytecode_alu alu;
377 	unsigned inst;
378 	memset(&alu, 0, sizeof(alu));
379 	bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0);
380 	inst = bytes[bytes_read++];
381 	switch (inst) {
382 	case 0: /* FC_IF */
383 		llvm_if(ctx, &alu,
384 			CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
385 		break;
386 	case 1: /* FC_IF_INT */
387 		llvm_if(ctx, &alu,
388 			CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
389 		break;
390 	case 2: /* FC_ELSE */
391 		tgsi_else(ctx);
392 		break;
393 	case 3: /* FC_ENDIF */
394 		tgsi_endif(ctx);
395 		break;
396 	case 4: /* FC_BGNLOOP */
397 		tgsi_bgnloop(ctx);
398 		break;
399 	case 5: /* FC_ENDLOOP */
400 		tgsi_endloop(ctx);
401 		break;
402 	case 6: /* FC_BREAK */
403 		r600_break_from_byte_stream(ctx, &alu,
404 			CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
405 		break;
406 	case 7: /* FC_BREAK_NZ_INT */
407 		r600_break_from_byte_stream(ctx, &alu,
408 			CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
409 		break;
410 	case 8: /* FC_CONTINUE */
411 		{
412 			unsigned opcode = TGSI_OPCODE_CONT;
413 			if (ctx->bc->chip_class == CAYMAN) {
414 				ctx->inst_info =
415 					&cm_shader_tgsi_instruction[opcode];
416 			} else if (ctx->bc->chip_class >= EVERGREEN) {
417 				ctx->inst_info =
418 					&eg_shader_tgsi_instruction[opcode];
419 			} else {
420 				ctx->inst_info =
421 					&r600_shader_tgsi_instruction[opcode];
422 			}
423 			tgsi_loop_brk_cont(ctx);
424 		}
425 		break;
426 	case 9: /* FC_BREAK_Z_INT */
427 		r600_break_from_byte_stream(ctx, &alu,
428 			CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
429 		break;
430 	case 10: /* FC_BREAK_NZ */
431 		r600_break_from_byte_stream(ctx, &alu,
432 			CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
433 		break;
434 	}
435 
436 	return bytes_read;
437 }
438 
r600_tex_from_byte_stream(struct r600_shader_ctx * ctx,unsigned char * bytes,unsigned bytes_read)439 static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx,
440 				unsigned char * bytes, unsigned bytes_read)
441 {
442 	struct r600_bytecode_tex tex;
443 
444 	tex.inst = bytes[bytes_read++];
445 	tex.resource_id = bytes[bytes_read++];
446 	tex.src_gpr = bytes[bytes_read++];
447 	tex.src_rel = bytes[bytes_read++];
448 	tex.dst_gpr = bytes[bytes_read++];
449 	tex.dst_rel = bytes[bytes_read++];
450 	tex.dst_sel_x = bytes[bytes_read++];
451 	tex.dst_sel_y = bytes[bytes_read++];
452 	tex.dst_sel_z = bytes[bytes_read++];
453 	tex.dst_sel_w = bytes[bytes_read++];
454 	tex.lod_bias = bytes[bytes_read++];
455 	tex.coord_type_x = bytes[bytes_read++];
456 	tex.coord_type_y = bytes[bytes_read++];
457 	tex.coord_type_z = bytes[bytes_read++];
458 	tex.coord_type_w = bytes[bytes_read++];
459 	tex.offset_x = bytes[bytes_read++];
460 	tex.offset_y = bytes[bytes_read++];
461 	tex.offset_z = bytes[bytes_read++];
462 	tex.sampler_id = bytes[bytes_read++];
463 	tex.src_sel_x = bytes[bytes_read++];
464 	tex.src_sel_y = bytes[bytes_read++];
465 	tex.src_sel_z = bytes[bytes_read++];
466 	tex.src_sel_w = bytes[bytes_read++];
467 
468 	r600_bytecode_add_tex(ctx->bc, &tex);
469 
470 	return bytes_read;
471 }
472 
r600_vtx_from_byte_stream(struct r600_shader_ctx * ctx,unsigned char * bytes,unsigned bytes_read)473 static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
474 	unsigned char * bytes, unsigned bytes_read)
475 {
476 	struct r600_bytecode_vtx vtx;
477 
478 	uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
479         uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
480 	uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read);
481 
482 	memset(&vtx, 0, sizeof(vtx));
483 
484 	/* WORD0 */
485 	vtx.inst = G_SQ_VTX_WORD0_VTX_INST(word0);
486 	vtx.fetch_type = G_SQ_VTX_WORD0_FETCH_TYPE(word0);
487 	vtx.buffer_id = G_SQ_VTX_WORD0_BUFFER_ID(word0);
488 	vtx.src_gpr = G_SQ_VTX_WORD0_SRC_GPR(word0);
489 	vtx.src_sel_x = G_SQ_VTX_WORD0_SRC_SEL_X(word0);
490 	vtx.mega_fetch_count = G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(word0);
491 
492 	/* WORD1 */
493 	vtx.dst_gpr = G_SQ_VTX_WORD1_GPR_DST_GPR(word1);
494 	vtx.dst_sel_x = G_SQ_VTX_WORD1_DST_SEL_X(word1);
495 	vtx.dst_sel_y = G_SQ_VTX_WORD1_DST_SEL_Y(word1);
496 	vtx.dst_sel_z = G_SQ_VTX_WORD1_DST_SEL_Z(word1);
497 	vtx.dst_sel_w = G_SQ_VTX_WORD1_DST_SEL_W(word1);
498 	vtx.use_const_fields = G_SQ_VTX_WORD1_USE_CONST_FIELDS(word1);
499 	vtx.data_format = G_SQ_VTX_WORD1_DATA_FORMAT(word1);
500 	vtx.num_format_all = G_SQ_VTX_WORD1_NUM_FORMAT_ALL(word1);
501 	vtx.format_comp_all = G_SQ_VTX_WORD1_FORMAT_COMP_ALL(word1);
502 	vtx.srf_mode_all = G_SQ_VTX_WORD1_SRF_MODE_ALL(word1);
503 
504 	/* WORD 2*/
505 	vtx.offset = G_SQ_VTX_WORD2_OFFSET(word2);
506 	vtx.endian = G_SQ_VTX_WORD2_ENDIAN_SWAP(word2);
507 
508 	if (r600_bytecode_add_vtx(ctx->bc, &vtx)) {
509 		fprintf(stderr, "Error adding vtx\n");
510 	}
511 	/* Use the Texture Cache */
512 	ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX;
513 	return bytes_read;
514 }
515 
r600_bytecode_from_byte_stream(struct r600_shader_ctx * ctx,unsigned char * bytes,unsigned num_bytes)516 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
517 				unsigned char * bytes,	unsigned num_bytes)
518 {
519 	unsigned bytes_read = 0;
520 	unsigned i, byte;
521 	while (bytes_read < num_bytes) {
522 		char inst_type = bytes[bytes_read++];
523 		switch (inst_type) {
524 		case 0:
525 			bytes_read = r600_alu_from_byte_stream(ctx, bytes,
526 								bytes_read);
527 			break;
528 		case 1:
529 			bytes_read = r600_tex_from_byte_stream(ctx, bytes,
530 								bytes_read);
531 			break;
532 		case 2:
533 			bytes_read = r600_fc_from_byte_stream(ctx, bytes,
534 								bytes_read);
535 			break;
536 		case 3:
537 			r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
538 			for (i = 0; i < 2; i++) {
539 				for (byte = 0 ; byte < 4; byte++) {
540 					ctx->bc->cf_last->isa[i] |=
541 					(bytes[bytes_read++] << (byte * 8));
542 				}
543 			}
544 			break;
545 
546 		case 4:
547 			bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
548 								bytes_read);
549 			break;
550 		default:
551 			/* XXX: Error here */
552 			break;
553 		}
554 	}
555 }
556 
557 /* End bytestream -> r600 shader functions*/
558 
tgsi_is_supported(struct r600_shader_ctx * ctx)559 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
560 {
561 	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
562 	int j;
563 
564 	if (i->Instruction.NumDstRegs > 1) {
565 		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
566 		return -EINVAL;
567 	}
568 	if (i->Instruction.Predicate) {
569 		R600_ERR("predicate unsupported\n");
570 		return -EINVAL;
571 	}
572 #if 0
573 	if (i->Instruction.Label) {
574 		R600_ERR("label unsupported\n");
575 		return -EINVAL;
576 	}
577 #endif
578 	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
579 		if (i->Src[j].Register.Dimension) {
580 			R600_ERR("unsupported src %d (dimension %d)\n", j,
581 				 i->Src[j].Register.Dimension);
582 			return -EINVAL;
583 		}
584 	}
585 	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
586 		if (i->Dst[j].Register.Dimension) {
587 			R600_ERR("unsupported dst (dimension)\n");
588 			return -EINVAL;
589 		}
590 	}
591 	return 0;
592 }
593 
evergreen_interp_alu(struct r600_shader_ctx * ctx,int input)594 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
595 {
596 	int i, r;
597 	struct r600_bytecode_alu alu;
598 	int gpr = 0, base_chan = 0;
599 	int ij_index = 0;
600 
601 	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
602 		ij_index = 0;
603 		if (ctx->shader->input[input].centroid)
604 			ij_index++;
605 	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
606 		ij_index = 0;
607 		/* if we have perspective add one */
608 		if (ctx->input_perspective)  {
609 			ij_index++;
610 			/* if we have perspective centroid */
611 			if (ctx->input_centroid)
612 				ij_index++;
613 		}
614 		if (ctx->shader->input[input].centroid)
615 			ij_index++;
616 	}
617 
618 	/* work out gpr and base_chan from index */
619 	gpr = ij_index / 2;
620 	base_chan = (2 * (ij_index % 2)) + 1;
621 
622 	for (i = 0; i < 8; i++) {
623 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
624 
625 		if (i < 4)
626 			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW;
627 		else
628 			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY;
629 
630 		if ((i > 1) && (i < 6)) {
631 			alu.dst.sel = ctx->shader->input[input].gpr;
632 			alu.dst.write = 1;
633 		}
634 
635 		alu.dst.chan = i % 4;
636 
637 		alu.src[0].sel = gpr;
638 		alu.src[0].chan = (base_chan - (i % 2));
639 
640 		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
641 
642 		alu.bank_swizzle_force = SQ_ALU_VEC_210;
643 		if ((i % 4) == 3)
644 			alu.last = 1;
645 		r = r600_bytecode_add_alu(ctx->bc, &alu);
646 		if (r)
647 			return r;
648 	}
649 	return 0;
650 }
651 
evergreen_interp_flat(struct r600_shader_ctx * ctx,int input)652 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
653 {
654 	int i, r;
655 	struct r600_bytecode_alu alu;
656 
657 	for (i = 0; i < 4; i++) {
658 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
659 
660 		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0;
661 
662 		alu.dst.sel = ctx->shader->input[input].gpr;
663 		alu.dst.write = 1;
664 
665 		alu.dst.chan = i;
666 
667 		alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
668 		alu.src[0].chan = i;
669 
670 		if (i == 3)
671 			alu.last = 1;
672 		r = r600_bytecode_add_alu(ctx->bc, &alu);
673 		if (r)
674 			return r;
675 	}
676 	return 0;
677 }
678 
679 /*
680  * Special export handling in shaders
681  *
682  * shader export ARRAY_BASE for EXPORT_POS:
683  * 60 is position
684  * 61 is misc vector
685  * 62, 63 are clip distance vectors
686  *
687  * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
688  * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
689  * USE_VTX_POINT_SIZE - point size in the X channel of export 61
690  * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
691  * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
692  * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
693  * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
694  * exclusive from render target index)
695  * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
696  *
697  *
698  * shader export ARRAY_BASE for EXPORT_PIXEL:
699  * 0-7 CB targets
700  * 61 computed Z vector
701  *
702  * The use of the values exported in the computed Z vector are controlled
703  * by DB_SHADER_CONTROL:
704  * Z_EXPORT_ENABLE - Z as a float in RED
705  * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
706  * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
707  * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
708  * DB_SOURCE_FORMAT - export control restrictions
709  *
710  */
711 
712 
713 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
r600_spi_sid(struct r600_shader_io * io)714 static int r600_spi_sid(struct r600_shader_io * io)
715 {
716 	int index, name = io->name;
717 
718 	/* These params are handled differently, they don't need
719 	 * semantic indices, so we'll use 0 for them.
720 	 */
721 	if (name == TGSI_SEMANTIC_POSITION ||
722 		name == TGSI_SEMANTIC_PSIZE ||
723 		name == TGSI_SEMANTIC_FACE)
724 		index = 0;
725 	else {
726 		if (name == TGSI_SEMANTIC_GENERIC) {
727 			/* For generic params simply use sid from tgsi */
728 			index = io->sid;
729 		} else {
730 			/* For non-generic params - pack name and sid into 8 bits */
731 			index = 0x80 | (name<<3) | (io->sid);
732 		}
733 
734 		/* Make sure that all really used indices have nonzero value, so
735 		 * we can just compare it to 0 later instead of comparing the name
736 		 * with different values to detect special cases. */
737 		index++;
738 	}
739 
740 	return index;
741 };
742 
743 /* turn input into interpolate on EG */
evergreen_interp_input(struct r600_shader_ctx * ctx,int index)744 static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
745 {
746 	int r = 0;
747 
748 	if (ctx->shader->input[index].spi_sid) {
749 		ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
750 		if (ctx->shader->input[index].interpolate > 0) {
751 			r = evergreen_interp_alu(ctx, index);
752 		} else {
753 			r = evergreen_interp_flat(ctx, index);
754 		}
755 	}
756 	return r;
757 }
758 
select_twoside_color(struct r600_shader_ctx * ctx,int front,int back)759 static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back)
760 {
761 	struct r600_bytecode_alu alu;
762 	int i, r;
763 	int gpr_front = ctx->shader->input[front].gpr;
764 	int gpr_back = ctx->shader->input[back].gpr;
765 
766 	for (i = 0; i < 4; i++) {
767 		memset(&alu, 0, sizeof(alu));
768 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
769 		alu.is_op3 = 1;
770 		alu.dst.write = 1;
771 		alu.dst.sel = gpr_front;
772 		alu.src[0].sel = ctx->face_gpr;
773 		alu.src[1].sel = gpr_front;
774 		alu.src[2].sel = gpr_back;
775 
776 		alu.dst.chan = i;
777 		alu.src[1].chan = i;
778 		alu.src[2].chan = i;
779 		alu.last = (i==3);
780 
781 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
782 			return r;
783 	}
784 
785 	return 0;
786 }
787 
tgsi_declaration(struct r600_shader_ctx * ctx)788 static int tgsi_declaration(struct r600_shader_ctx *ctx)
789 {
790 	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
791 	unsigned i;
792 	int r;
793 
794 	switch (d->Declaration.File) {
795 	case TGSI_FILE_INPUT:
796 		i = ctx->shader->ninput++;
797 		ctx->shader->input[i].name = d->Semantic.Name;
798 		ctx->shader->input[i].sid = d->Semantic.Index;
799 		ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
800 		ctx->shader->input[i].interpolate = d->Interp.Interpolate;
801 		ctx->shader->input[i].centroid = d->Interp.Centroid;
802 		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
803 		if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
804 			switch (ctx->shader->input[i].name) {
805 			case TGSI_SEMANTIC_FACE:
806 				ctx->face_gpr = ctx->shader->input[i].gpr;
807 				break;
808 			case TGSI_SEMANTIC_COLOR:
809 				ctx->colors_used++;
810 				break;
811 			case TGSI_SEMANTIC_POSITION:
812 				ctx->fragcoord_input = i;
813 				break;
814 			}
815 			if (ctx->bc->chip_class >= EVERGREEN) {
816 				if ((r = evergreen_interp_input(ctx, i)))
817 					return r;
818 			}
819 		}
820 		break;
821 	case TGSI_FILE_OUTPUT:
822 		i = ctx->shader->noutput++;
823 		ctx->shader->output[i].name = d->Semantic.Name;
824 		ctx->shader->output[i].sid = d->Semantic.Index;
825 		ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
826 		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
827 		ctx->shader->output[i].interpolate = d->Interp.Interpolate;
828 		ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
829 		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
830 			switch (d->Semantic.Name) {
831 			case TGSI_SEMANTIC_CLIPDIST:
832 				ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
833 				break;
834 			case TGSI_SEMANTIC_PSIZE:
835 				ctx->shader->vs_out_misc_write = 1;
836 				ctx->shader->vs_out_point_size = 1;
837 				break;
838 			case TGSI_SEMANTIC_CLIPVERTEX:
839 				ctx->clip_vertex_write = TRUE;
840 				ctx->cv_output = i;
841 				break;
842 			}
843 		} else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
844 			switch (d->Semantic.Name) {
845 			case TGSI_SEMANTIC_COLOR:
846 				ctx->shader->nr_ps_max_color_exports++;
847 				break;
848 			}
849 		}
850 		break;
851 	case TGSI_FILE_CONSTANT:
852 	case TGSI_FILE_TEMPORARY:
853 	case TGSI_FILE_SAMPLER:
854 	case TGSI_FILE_ADDRESS:
855 		break;
856 
857 	case TGSI_FILE_SYSTEM_VALUE:
858 		if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
859 			if (!ctx->native_integers) {
860 				struct r600_bytecode_alu alu;
861 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
862 
863 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
864 				alu.src[0].sel = 0;
865 				alu.src[0].chan = 3;
866 
867 				alu.dst.sel = 0;
868 				alu.dst.chan = 3;
869 				alu.dst.write = 1;
870 				alu.last = 1;
871 
872 				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
873 					return r;
874 			}
875 			break;
876 		} else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
877 			break;
878 	default:
879 		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
880 		return -EINVAL;
881 	}
882 	return 0;
883 }
884 
r600_get_temp(struct r600_shader_ctx * ctx)885 static int r600_get_temp(struct r600_shader_ctx *ctx)
886 {
887 	return ctx->temp_reg + ctx->max_driver_temp_used++;
888 }
889 
890 /*
891  * for evergreen we need to scan the shader to find the number of GPRs we need to
892  * reserve for interpolation.
893  *
894  * we need to know if we are going to emit
895  * any centroid inputs
896  * if perspective and linear are required
897 */
evergreen_gpr_count(struct r600_shader_ctx * ctx)898 static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
899 {
900 	int i;
901 	int num_baryc;
902 
903 	ctx->input_linear = FALSE;
904 	ctx->input_perspective = FALSE;
905 	ctx->input_centroid = FALSE;
906 	ctx->num_interp_gpr = 1;
907 
908 	/* any centroid inputs */
909 	for (i = 0; i < ctx->info.num_inputs; i++) {
910 		/* skip position/face */
911 		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
912 		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
913 			continue;
914 		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
915 			ctx->input_linear = TRUE;
916 		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
917 			ctx->input_perspective = TRUE;
918 		if (ctx->info.input_centroid[i])
919 			ctx->input_centroid = TRUE;
920 	}
921 
922 	num_baryc = 0;
923 	/* ignoring sample for now */
924 	if (ctx->input_perspective)
925 		num_baryc++;
926 	if (ctx->input_linear)
927 		num_baryc++;
928 	if (ctx->input_centroid)
929 		num_baryc *= 2;
930 
931 	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
932 
933 	/* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
934 	return ctx->num_interp_gpr;
935 }
936 
tgsi_src(struct r600_shader_ctx * ctx,const struct tgsi_full_src_register * tgsi_src,struct r600_shader_src * r600_src)937 static void tgsi_src(struct r600_shader_ctx *ctx,
938 		     const struct tgsi_full_src_register *tgsi_src,
939 		     struct r600_shader_src *r600_src)
940 {
941 	memset(r600_src, 0, sizeof(*r600_src));
942 	r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
943 	r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
944 	r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
945 	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
946 	r600_src->neg = tgsi_src->Register.Negate;
947 	r600_src->abs = tgsi_src->Register.Absolute;
948 
949 	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
950 		int index;
951 		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
952 			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
953 			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
954 
955 			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
956 			r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
957 			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
958 				return;
959 		}
960 		index = tgsi_src->Register.Index;
961 		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
962 		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
963 	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
964 		if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
965 			r600_src->swizzle[0] = 3;
966 			r600_src->swizzle[1] = 3;
967 			r600_src->swizzle[2] = 3;
968 			r600_src->swizzle[3] = 3;
969 			r600_src->sel = 0;
970 		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
971 			r600_src->swizzle[0] = 0;
972 			r600_src->swizzle[1] = 0;
973 			r600_src->swizzle[2] = 0;
974 			r600_src->swizzle[3] = 0;
975 			r600_src->sel = 0;
976 		}
977 	} else {
978 		if (tgsi_src->Register.Indirect)
979 			r600_src->rel = V_SQ_REL_RELATIVE;
980 		r600_src->sel = tgsi_src->Register.Index;
981 		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
982 	}
983 }
984 
tgsi_fetch_rel_const(struct r600_shader_ctx * ctx,unsigned int offset,unsigned int dst_reg)985 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
986 {
987 	struct r600_bytecode_vtx vtx;
988 	unsigned int ar_reg;
989 	int r;
990 
991 	if (offset) {
992 		struct r600_bytecode_alu alu;
993 
994 		memset(&alu, 0, sizeof(alu));
995 
996 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
997 		alu.src[0].sel = ctx->bc->ar_reg;
998 
999 		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1000 		alu.src[1].value = offset;
1001 
1002 		alu.dst.sel = dst_reg;
1003 		alu.dst.write = 1;
1004 		alu.last = 1;
1005 
1006 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
1007 			return r;
1008 
1009 		ar_reg = dst_reg;
1010 	} else {
1011 		ar_reg = ctx->bc->ar_reg;
1012 	}
1013 
1014 	memset(&vtx, 0, sizeof(vtx));
1015 	vtx.fetch_type = 2;		/* VTX_FETCH_NO_INDEX_OFFSET */
1016 	vtx.src_gpr = ar_reg;
1017 	vtx.mega_fetch_count = 16;
1018 	vtx.dst_gpr = dst_reg;
1019 	vtx.dst_sel_x = 0;		/* SEL_X */
1020 	vtx.dst_sel_y = 1;		/* SEL_Y */
1021 	vtx.dst_sel_z = 2;		/* SEL_Z */
1022 	vtx.dst_sel_w = 3;		/* SEL_W */
1023 	vtx.data_format = FMT_32_32_32_32_FLOAT;
1024 	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
1025 	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
1026 	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
1027 	vtx.endian = r600_endian_swap(32);
1028 
1029 	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
1030 		return r;
1031 
1032 	return 0;
1033 }
1034 
tgsi_split_constant(struct r600_shader_ctx * ctx)1035 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
1036 {
1037 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1038 	struct r600_bytecode_alu alu;
1039 	int i, j, k, nconst, r;
1040 
1041 	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
1042 		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
1043 			nconst++;
1044 		}
1045 		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
1046 	}
1047 	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
1048 		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
1049 			continue;
1050 		}
1051 
1052 		if (ctx->src[i].rel) {
1053 			int treg = r600_get_temp(ctx);
1054 			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
1055 				return r;
1056 
1057 			ctx->src[i].sel = treg;
1058 			ctx->src[i].rel = 0;
1059 			j--;
1060 		} else if (j > 0) {
1061 			int treg = r600_get_temp(ctx);
1062 			for (k = 0; k < 4; k++) {
1063 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1064 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1065 				alu.src[0].sel = ctx->src[i].sel;
1066 				alu.src[0].chan = k;
1067 				alu.src[0].rel = ctx->src[i].rel;
1068 				alu.dst.sel = treg;
1069 				alu.dst.chan = k;
1070 				alu.dst.write = 1;
1071 				if (k == 3)
1072 					alu.last = 1;
1073 				r = r600_bytecode_add_alu(ctx->bc, &alu);
1074 				if (r)
1075 					return r;
1076 			}
1077 			ctx->src[i].sel = treg;
1078 			ctx->src[i].rel =0;
1079 			j--;
1080 		}
1081 	}
1082 	return 0;
1083 }
1084 
1085 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
tgsi_split_literal_constant(struct r600_shader_ctx * ctx)1086 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
1087 {
1088 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1089 	struct r600_bytecode_alu alu;
1090 	int i, j, k, nliteral, r;
1091 
1092 	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
1093 		if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1094 			nliteral++;
1095 		}
1096 	}
1097 	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
1098 		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
1099 			int treg = r600_get_temp(ctx);
1100 			for (k = 0; k < 4; k++) {
1101 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1102 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1103 				alu.src[0].sel = ctx->src[i].sel;
1104 				alu.src[0].chan = k;
1105 				alu.src[0].value = ctx->src[i].value[k];
1106 				alu.dst.sel = treg;
1107 				alu.dst.chan = k;
1108 				alu.dst.write = 1;
1109 				if (k == 3)
1110 					alu.last = 1;
1111 				r = r600_bytecode_add_alu(ctx->bc, &alu);
1112 				if (r)
1113 					return r;
1114 			}
1115 			ctx->src[i].sel = treg;
1116 			j--;
1117 		}
1118 	}
1119 	return 0;
1120 }
1121 
process_twoside_color_inputs(struct r600_shader_ctx * ctx)1122 static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
1123 {
1124 	int i, r, count = ctx->shader->ninput;
1125 
1126 	/* additional inputs will be allocated right after the existing inputs,
1127 	 * we won't need them after the color selection, so we don't need to
1128 	 * reserve these gprs for the rest of the shader code and to adjust
1129 	 * output offsets etc. */
1130 	int gpr = ctx->file_offset[TGSI_FILE_INPUT] +
1131 			ctx->info.file_max[TGSI_FILE_INPUT] + 1;
1132 
1133 	if (ctx->face_gpr == -1) {
1134 		i = ctx->shader->ninput++;
1135 		ctx->shader->input[i].name = TGSI_SEMANTIC_FACE;
1136 		ctx->shader->input[i].spi_sid = 0;
1137 		ctx->shader->input[i].gpr = gpr++;
1138 		ctx->face_gpr = ctx->shader->input[i].gpr;
1139 	}
1140 
1141 	for (i = 0; i < count; i++) {
1142 		if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
1143 			int ni = ctx->shader->ninput++;
1144 			memcpy(&ctx->shader->input[ni],&ctx->shader->input[i], sizeof(struct r600_shader_io));
1145 			ctx->shader->input[ni].name = TGSI_SEMANTIC_BCOLOR;
1146 			ctx->shader->input[ni].spi_sid = r600_spi_sid(&ctx->shader->input[ni]);
1147 			ctx->shader->input[ni].gpr = gpr++;
1148 
1149 			if (ctx->bc->chip_class >= EVERGREEN) {
1150 				r = evergreen_interp_input(ctx, ni);
1151 				if (r)
1152 					return r;
1153 			}
1154 
1155 			r = select_twoside_color(ctx, i, ni);
1156 			if (r)
1157 				return r;
1158 		}
1159 	}
1160 	return 0;
1161 }
1162 
r600_shader_from_tgsi(struct r600_context * rctx,struct r600_pipe_shader * pipeshader)1163 static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader)
1164 {
1165 	struct r600_shader *shader = &pipeshader->shader;
1166 	struct tgsi_token *tokens = pipeshader->selector->tokens;
1167 	struct pipe_stream_output_info so = pipeshader->selector->so;
1168 	struct tgsi_full_immediate *immediate;
1169 	struct tgsi_full_property *property;
1170 	struct r600_shader_ctx ctx;
1171 	struct r600_bytecode_output output[32];
1172 	unsigned output_done, noutput;
1173 	unsigned opcode;
1174 	int i, j, k, r = 0;
1175 	int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
1176 	/* Declarations used by llvm code */
1177 	bool use_llvm = false;
1178 	unsigned char * inst_bytes = NULL;
1179 	unsigned inst_byte_count = 0;
1180 
1181 #ifdef R600_USE_LLVM
1182 	use_llvm = debug_get_bool_option("R600_LLVM", TRUE);
1183 #endif
1184 	ctx.bc = &shader->bc;
1185 	ctx.shader = shader;
1186 	ctx.native_integers = true;
1187 
1188 	r600_bytecode_init(ctx.bc, rctx->chip_class, rctx->family);
1189 	ctx.tokens = tokens;
1190 	tgsi_scan_shader(tokens, &ctx.info);
1191 	tgsi_parse_init(&ctx.parse, tokens);
1192 	ctx.type = ctx.parse.FullHeader.Processor.Processor;
1193 	shader->processor_type = ctx.type;
1194 	ctx.bc->type = shader->processor_type;
1195 
1196 	ctx.face_gpr = -1;
1197 	ctx.fragcoord_input = -1;
1198 	ctx.colors_used = 0;
1199 	ctx.clip_vertex_write = 0;
1200 
1201 	shader->nr_ps_color_exports = 0;
1202 	shader->nr_ps_max_color_exports = 0;
1203 
1204 	shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side;
1205 
1206 	/* register allocations */
1207 	/* Values [0,127] correspond to GPR[0..127].
1208 	 * Values [128,159] correspond to constant buffer bank 0
1209 	 * Values [160,191] correspond to constant buffer bank 1
1210 	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
1211 	 * Values [256,287] correspond to constant buffer bank 2 (EG)
1212 	 * Values [288,319] correspond to constant buffer bank 3 (EG)
1213 	 * Other special values are shown in the list below.
1214 	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
1215 	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
1216 	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
1217 	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
1218 	 * 248	SQ_ALU_SRC_0: special constant 0.0.
1219 	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
1220 	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
1221 	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1222 	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
1223 	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
1224 	 * 254	SQ_ALU_SRC_PV: previous vector result.
1225 	 * 255	SQ_ALU_SRC_PS: previous scalar result.
1226 	 */
1227 	for (i = 0; i < TGSI_FILE_COUNT; i++) {
1228 		ctx.file_offset[i] = 0;
1229 	}
1230 	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1231 		ctx.file_offset[TGSI_FILE_INPUT] = 1;
1232 		if (ctx.bc->chip_class >= EVERGREEN) {
1233 			r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1234 		} else {
1235 			r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
1236 		}
1237 	}
1238 	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
1239 		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
1240 	}
1241 
1242 	/* LLVM backend setup */
1243 #ifdef R600_USE_LLVM
1244 	if (use_llvm && ctx.info.indirect_files) {
1245 		fprintf(stderr, "Warning: R600 LLVM backend does not support "
1246 				"indirect adressing.  Falling back to TGSI "
1247 				"backend.\n");
1248 		use_llvm = 0;
1249 	}
1250 	if (use_llvm) {
1251 		struct radeon_llvm_context radeon_llvm_ctx;
1252 		LLVMModuleRef mod;
1253 		unsigned dump = 0;
1254 		memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
1255 		radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
1256 		mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
1257 		if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
1258 			dump = 1;
1259 		}
1260 		if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
1261 							rctx->family, dump)) {
1262 			FREE(inst_bytes);
1263 			radeon_llvm_dispose(&radeon_llvm_ctx);
1264 			use_llvm = 0;
1265 			fprintf(stderr, "R600 LLVM backend failed to compile "
1266 				"shader.  Falling back to TGSI\n");
1267 		} else {
1268 			ctx.file_offset[TGSI_FILE_OUTPUT] =
1269 					ctx.file_offset[TGSI_FILE_INPUT];
1270 		}
1271 		radeon_llvm_dispose(&radeon_llvm_ctx);
1272 	}
1273 #endif
1274 	/* End of LLVM backend setup */
1275 
1276 	if (!use_llvm) {
1277 		ctx.file_offset[TGSI_FILE_OUTPUT] =
1278 			ctx.file_offset[TGSI_FILE_INPUT] +
1279 			ctx.info.file_max[TGSI_FILE_INPUT] + 1;
1280 	}
1281 	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
1282 						ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
1283 
1284 	/* Outside the GPR range. This will be translated to one of the
1285 	 * kcache banks later. */
1286 	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
1287 
1288 	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
1289 	ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
1290 			ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
1291 	ctx.temp_reg = ctx.bc->ar_reg + 1;
1292 
1293 	ctx.nliterals = 0;
1294 	ctx.literals = NULL;
1295 	shader->fs_write_all = FALSE;
1296 	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1297 		tgsi_parse_token(&ctx.parse);
1298 		switch (ctx.parse.FullToken.Token.Type) {
1299 		case TGSI_TOKEN_TYPE_IMMEDIATE:
1300 			immediate = &ctx.parse.FullToken.FullImmediate;
1301 			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
1302 			if(ctx.literals == NULL) {
1303 				r = -ENOMEM;
1304 				goto out_err;
1305 			}
1306 			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
1307 			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
1308 			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
1309 			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
1310 			ctx.nliterals++;
1311 			break;
1312 		case TGSI_TOKEN_TYPE_DECLARATION:
1313 			r = tgsi_declaration(&ctx);
1314 			if (r)
1315 				goto out_err;
1316 			break;
1317 		case TGSI_TOKEN_TYPE_INSTRUCTION:
1318 			break;
1319 		case TGSI_TOKEN_TYPE_PROPERTY:
1320 			property = &ctx.parse.FullToken.FullProperty;
1321 			switch (property->Property.PropertyName) {
1322 			case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1323 				if (property->u[0].Data == 1)
1324 					shader->fs_write_all = TRUE;
1325 				break;
1326 			case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1327 				if (property->u[0].Data == 1)
1328 					shader->vs_prohibit_ucps = TRUE;
1329 				break;
1330 			}
1331 			break;
1332 		default:
1333 			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
1334 			r = -EINVAL;
1335 			goto out_err;
1336 		}
1337 	}
1338 
1339 	if (shader->fs_write_all && rctx->chip_class >= EVERGREEN)
1340 		shader->nr_ps_max_color_exports = 8;
1341 
1342 	if (ctx.fragcoord_input >= 0) {
1343 		if (ctx.bc->chip_class == CAYMAN) {
1344 			for (j = 0 ; j < 4; j++) {
1345 				struct r600_bytecode_alu alu;
1346 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1347 				alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1348 				alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1349 				alu.src[0].chan = 3;
1350 
1351 				alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1352 				alu.dst.chan = j;
1353 				alu.dst.write = (j == 3);
1354 				alu.last = 1;
1355 				if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1356 					return r;
1357 			}
1358 		} else {
1359 			struct r600_bytecode_alu alu;
1360 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1361 			alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1362 			alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
1363 			alu.src[0].chan = 3;
1364 
1365 			alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
1366 			alu.dst.chan = 3;
1367 			alu.dst.write = 1;
1368 			alu.last = 1;
1369 			if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
1370 				return r;
1371 		}
1372 	}
1373 
1374 	if (shader->two_side && ctx.colors_used) {
1375 		if ((r = process_twoside_color_inputs(&ctx)))
1376 			return r;
1377 	}
1378 
1379 	tgsi_parse_init(&ctx.parse, tokens);
1380 	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
1381 		tgsi_parse_token(&ctx.parse);
1382 		switch (ctx.parse.FullToken.Token.Type) {
1383 		case TGSI_TOKEN_TYPE_INSTRUCTION:
1384 			if (use_llvm) {
1385 				continue;
1386 			}
1387 			r = tgsi_is_supported(&ctx);
1388 			if (r)
1389 				goto out_err;
1390 			ctx.max_driver_temp_used = 0;
1391 			/* reserve first tmp for everyone */
1392 			r600_get_temp(&ctx);
1393 
1394 			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
1395 			if ((r = tgsi_split_constant(&ctx)))
1396 				goto out_err;
1397 			if ((r = tgsi_split_literal_constant(&ctx)))
1398 				goto out_err;
1399 			if (ctx.bc->chip_class == CAYMAN)
1400 				ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
1401 			else if (ctx.bc->chip_class >= EVERGREEN)
1402 				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
1403 			else
1404 				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
1405 			r = ctx.inst_info->process(&ctx);
1406 			if (r)
1407 				goto out_err;
1408 			break;
1409 		default:
1410 			break;
1411 		}
1412 	}
1413 
1414 	/* Get instructions if we are using the LLVM backend. */
1415 	if (use_llvm) {
1416 		r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count);
1417 		FREE(inst_bytes);
1418 	}
1419 
1420 	noutput = shader->noutput;
1421 
1422 	if (ctx.clip_vertex_write) {
1423 		/* need to convert a clipvertex write into clipdistance writes and not export
1424 		   the clip vertex anymore */
1425 
1426 		memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io));
1427 		shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1428 		shader->output[noutput].gpr = ctx.temp_reg;
1429 		noutput++;
1430 		shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST;
1431 		shader->output[noutput].gpr = ctx.temp_reg+1;
1432 		noutput++;
1433 
1434 		/* reset spi_sid for clipvertex output to avoid confusing spi */
1435 		shader->output[ctx.cv_output].spi_sid = 0;
1436 
1437 		shader->clip_dist_write = 0xFF;
1438 
1439 		for (i = 0; i < 8; i++) {
1440 			int oreg = i >> 2;
1441 			int ochan = i & 3;
1442 
1443 			for (j = 0; j < 4; j++) {
1444 				struct r600_bytecode_alu alu;
1445 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1446 				alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4);
1447 				alu.src[0].sel = shader->output[ctx.cv_output].gpr;
1448 				alu.src[0].chan = j;
1449 
1450 				alu.src[1].sel = 512 + i;
1451 				alu.src[1].kc_bank = 1;
1452 				alu.src[1].chan = j;
1453 
1454 				alu.dst.sel = ctx.temp_reg + oreg;
1455 				alu.dst.chan = j;
1456 				alu.dst.write = (j == ochan);
1457 				if (j == 3)
1458 					alu.last = 1;
1459 				r = r600_bytecode_add_alu(ctx.bc, &alu);
1460 				if (r)
1461 					return r;
1462 			}
1463 		}
1464 	}
1465 
1466 	/* Add stream outputs. */
1467 	if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
1468 		for (i = 0; i < so.num_outputs; i++) {
1469 			struct r600_bytecode_output output;
1470 
1471 			if (so.output[i].output_buffer >= 4) {
1472 				R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
1473 					 so.output[i].output_buffer);
1474 				r = -EINVAL;
1475 				goto out_err;
1476 			}
1477 			if (so.output[i].dst_offset < so.output[i].start_component) {
1478 			   R600_ERR("stream_output - dst_offset cannot be less than start_component\n");
1479 			   r = -EINVAL;
1480 			   goto out_err;
1481 			}
1482 
1483 			memset(&output, 0, sizeof(struct r600_bytecode_output));
1484 			output.gpr = shader->output[so.output[i].register_index].gpr;
1485 			output.elem_size = 0;
1486 			output.array_base = so.output[i].dst_offset - so.output[i].start_component;
1487 			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
1488 			output.burst_count = 1;
1489 			output.barrier = 1;
1490 			/* array_size is an upper limit for the burst_count
1491 			 * with MEM_STREAM instructions */
1492 			output.array_size = 0xFFF;
1493 			output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component;
1494 			if (ctx.bc->chip_class >= EVERGREEN) {
1495 				switch (so.output[i].output_buffer) {
1496 				case 0:
1497 					output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
1498 					break;
1499 				case 1:
1500 					output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
1501 					break;
1502 				case 2:
1503 					output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
1504 					break;
1505 				case 3:
1506 					output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
1507 					break;
1508 				}
1509 			} else {
1510 				switch (so.output[i].output_buffer) {
1511 				case 0:
1512 					output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
1513 					break;
1514 				case 1:
1515 					output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
1516 					break;
1517 				case 2:
1518 					output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
1519 					break;
1520 				case 3:
1521 					output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
1522 					break;
1523 				}
1524 			}
1525 			r = r600_bytecode_add_output(ctx.bc, &output);
1526 			if (r)
1527 				goto out_err;
1528 		}
1529 	}
1530 
1531 	/* export output */
1532 	for (i = 0, j = 0; i < noutput; i++, j++) {
1533 		memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1534 		output[j].gpr = shader->output[i].gpr;
1535 		output[j].elem_size = 3;
1536 		output[j].swizzle_x = 0;
1537 		output[j].swizzle_y = 1;
1538 		output[j].swizzle_z = 2;
1539 		output[j].swizzle_w = 3;
1540 		output[j].burst_count = 1;
1541 		output[j].barrier = 1;
1542 		output[j].type = -1;
1543 		output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1544 		switch (ctx.type) {
1545 		case TGSI_PROCESSOR_VERTEX:
1546 			switch (shader->output[i].name) {
1547 			case TGSI_SEMANTIC_POSITION:
1548 				output[j].array_base = next_pos_base++;
1549 				output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1550 				break;
1551 
1552 			case TGSI_SEMANTIC_PSIZE:
1553 				output[j].array_base = next_pos_base++;
1554 				output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1555 				break;
1556 			case TGSI_SEMANTIC_CLIPVERTEX:
1557 				j--;
1558 				break;
1559 			case TGSI_SEMANTIC_CLIPDIST:
1560 				output[j].array_base = next_pos_base++;
1561 				output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
1562 				/* spi_sid is 0 for clipdistance outputs that were generated
1563 				 * for clipvertex - we don't need to pass them to PS */
1564 				if (shader->output[i].spi_sid) {
1565 					j++;
1566 					/* duplicate it as PARAM to pass to the pixel shader */
1567 					memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
1568 					output[j].array_base = next_param_base++;
1569 					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1570 				}
1571 				break;
1572 			case TGSI_SEMANTIC_FOG:
1573 				output[j].swizzle_y = 4; /* 0 */
1574 				output[j].swizzle_z = 4; /* 0 */
1575 				output[j].swizzle_w = 5; /* 1 */
1576 				break;
1577 			}
1578 			break;
1579 		case TGSI_PROCESSOR_FRAGMENT:
1580 			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
1581 				/* never export more colors than the number of CBs */
1582 				if (next_pixel_base && next_pixel_base >= (rctx->nr_cbufs + rctx->dual_src_blend * 1)) {
1583 					/* skip export */
1584 					j--;
1585 					continue;
1586 				}
1587 				output[j].swizzle_w = rctx->alpha_to_one && rctx->multisample_enable && !rctx->cb0_is_integer ? 5 : 3;
1588 				output[j].array_base = next_pixel_base++;
1589 				output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1590 				shader->nr_ps_color_exports++;
1591 				if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
1592 					for (k = 1; k < rctx->nr_cbufs; k++) {
1593 						j++;
1594 						memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1595 						output[j].gpr = shader->output[i].gpr;
1596 						output[j].elem_size = 3;
1597 						output[j].swizzle_x = 0;
1598 						output[j].swizzle_y = 1;
1599 						output[j].swizzle_z = 2;
1600 						output[j].swizzle_w = rctx->alpha_to_one && rctx->multisample_enable && !rctx->cb0_is_integer ? 5 : 3;
1601 						output[j].burst_count = 1;
1602 						output[j].barrier = 1;
1603 						output[j].array_base = next_pixel_base++;
1604 						output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1605 						output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1606 						shader->nr_ps_color_exports++;
1607 					}
1608 				}
1609 			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
1610 				output[j].array_base = 61;
1611 				output[j].swizzle_x = 2;
1612 				output[j].swizzle_y = 7;
1613 				output[j].swizzle_z = output[j].swizzle_w = 7;
1614 				output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1615 			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
1616 				output[j].array_base = 61;
1617 				output[j].swizzle_x = 7;
1618 				output[j].swizzle_y = 1;
1619 				output[j].swizzle_z = output[j].swizzle_w = 7;
1620 				output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1621 			} else {
1622 				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
1623 				r = -EINVAL;
1624 				goto out_err;
1625 			}
1626 			break;
1627 		default:
1628 			R600_ERR("unsupported processor type %d\n", ctx.type);
1629 			r = -EINVAL;
1630 			goto out_err;
1631 		}
1632 
1633 		if (output[j].type==-1) {
1634 			output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1635 			output[j].array_base = next_param_base++;
1636 		}
1637 	}
1638 
1639 	/* add fake param output for vertex shader if no param is exported */
1640 	if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
1641 			memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1642 			output[j].gpr = 0;
1643 			output[j].elem_size = 3;
1644 			output[j].swizzle_x = 7;
1645 			output[j].swizzle_y = 7;
1646 			output[j].swizzle_z = 7;
1647 			output[j].swizzle_w = 7;
1648 			output[j].burst_count = 1;
1649 			output[j].barrier = 1;
1650 			output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1651 			output[j].array_base = 0;
1652 			output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1653 			j++;
1654 	}
1655 
1656 	/* add fake pixel export */
1657 	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
1658 		memset(&output[j], 0, sizeof(struct r600_bytecode_output));
1659 		output[j].gpr = 0;
1660 		output[j].elem_size = 3;
1661 		output[j].swizzle_x = 7;
1662 		output[j].swizzle_y = 7;
1663 		output[j].swizzle_z = 7;
1664 		output[j].swizzle_w = 7;
1665 		output[j].burst_count = 1;
1666 		output[j].barrier = 1;
1667 		output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1668 		output[j].array_base = 0;
1669 		output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1670 		j++;
1671 	}
1672 
1673 	noutput = j;
1674 
1675 	/* set export done on last export of each type */
1676 	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1677 		if (ctx.bc->chip_class < CAYMAN) {
1678 			if (i == (noutput - 1)) {
1679 				output[i].end_of_program = 1;
1680 			}
1681 		}
1682 		if (!(output_done & (1 << output[i].type))) {
1683 			output_done |= (1 << output[i].type);
1684 			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1685 		}
1686 	}
1687 	/* add output to bytecode */
1688 	for (i = 0; i < noutput; i++) {
1689 		r = r600_bytecode_add_output(ctx.bc, &output[i]);
1690 		if (r)
1691 			goto out_err;
1692 	}
1693 	/* add program end */
1694 	if (ctx.bc->chip_class == CAYMAN)
1695 		cm_bytecode_add_cf_end(ctx.bc);
1696 
1697 	/* check GPR limit - we have 124 = 128 - 4
1698 	 * (4 are reserved as alu clause temporary registers) */
1699 	if (ctx.bc->ngpr > 124) {
1700 		R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
1701 		r = -ENOMEM;
1702 		goto out_err;
1703 	}
1704 
1705 	free(ctx.literals);
1706 	tgsi_parse_free(&ctx.parse);
1707 	return 0;
1708 out_err:
1709 	free(ctx.literals);
1710 	tgsi_parse_free(&ctx.parse);
1711 	return r;
1712 }
1713 
tgsi_unsupported(struct r600_shader_ctx * ctx)1714 static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1715 {
1716 	R600_ERR("%s tgsi opcode unsupported\n",
1717 		 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1718 	return -EINVAL;
1719 }
1720 
tgsi_end(struct r600_shader_ctx * ctx)1721 static int tgsi_end(struct r600_shader_ctx *ctx)
1722 {
1723 	return 0;
1724 }
1725 
r600_bytecode_src(struct r600_bytecode_alu_src * bc_src,const struct r600_shader_src * shader_src,unsigned chan)1726 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1727 			const struct r600_shader_src *shader_src,
1728 			unsigned chan)
1729 {
1730 	bc_src->sel = shader_src->sel;
1731 	bc_src->chan = shader_src->swizzle[chan];
1732 	bc_src->neg = shader_src->neg;
1733 	bc_src->abs = shader_src->abs;
1734 	bc_src->rel = shader_src->rel;
1735 	bc_src->value = shader_src->value[bc_src->chan];
1736 }
1737 
r600_bytecode_src_set_abs(struct r600_bytecode_alu_src * bc_src)1738 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1739 {
1740 	bc_src->abs = 1;
1741 	bc_src->neg = 0;
1742 }
1743 
r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src * bc_src)1744 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1745 {
1746 	bc_src->neg = !bc_src->neg;
1747 }
1748 
tgsi_dst(struct r600_shader_ctx * ctx,const struct tgsi_full_dst_register * tgsi_dst,unsigned swizzle,struct r600_bytecode_alu_dst * r600_dst)1749 static void tgsi_dst(struct r600_shader_ctx *ctx,
1750 		     const struct tgsi_full_dst_register *tgsi_dst,
1751 		     unsigned swizzle,
1752 		     struct r600_bytecode_alu_dst *r600_dst)
1753 {
1754 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1755 
1756 	r600_dst->sel = tgsi_dst->Register.Index;
1757 	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1758 	r600_dst->chan = swizzle;
1759 	r600_dst->write = 1;
1760 	if (tgsi_dst->Register.Indirect)
1761 		r600_dst->rel = V_SQ_REL_RELATIVE;
1762 	if (inst->Instruction.Saturate) {
1763 		r600_dst->clamp = 1;
1764 	}
1765 }
1766 
tgsi_last_instruction(unsigned writemask)1767 static int tgsi_last_instruction(unsigned writemask)
1768 {
1769 	int i, lasti = 0;
1770 
1771 	for (i = 0; i < 4; i++) {
1772 		if (writemask & (1 << i)) {
1773 			lasti = i;
1774 		}
1775 	}
1776 	return lasti;
1777 }
1778 
tgsi_op2_s(struct r600_shader_ctx * ctx,int swap,int trans_only)1779 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1780 {
1781 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1782 	struct r600_bytecode_alu alu;
1783 	int i, j, r;
1784 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1785 
1786 	for (i = 0; i < lasti + 1; i++) {
1787 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1788 			continue;
1789 
1790 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1791 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1792 
1793 		alu.inst = ctx->inst_info->r600_opcode;
1794 		if (!swap) {
1795 			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1796 				r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1797 			}
1798 		} else {
1799 			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1800 			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1801 		}
1802 		/* handle some special cases */
1803 		switch (ctx->inst_info->tgsi_opcode) {
1804 		case TGSI_OPCODE_SUB:
1805 			r600_bytecode_src_toggle_neg(&alu.src[1]);
1806 			break;
1807 		case TGSI_OPCODE_ABS:
1808 			r600_bytecode_src_set_abs(&alu.src[0]);
1809 			break;
1810 		default:
1811 			break;
1812 		}
1813 		if (i == lasti || trans_only) {
1814 			alu.last = 1;
1815 		}
1816 		r = r600_bytecode_add_alu(ctx->bc, &alu);
1817 		if (r)
1818 			return r;
1819 	}
1820 	return 0;
1821 }
1822 
tgsi_op2(struct r600_shader_ctx * ctx)1823 static int tgsi_op2(struct r600_shader_ctx *ctx)
1824 {
1825 	return tgsi_op2_s(ctx, 0, 0);
1826 }
1827 
tgsi_op2_swap(struct r600_shader_ctx * ctx)1828 static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1829 {
1830 	return tgsi_op2_s(ctx, 1, 0);
1831 }
1832 
tgsi_op2_trans(struct r600_shader_ctx * ctx)1833 static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1834 {
1835 	return tgsi_op2_s(ctx, 0, 1);
1836 }
1837 
tgsi_ineg(struct r600_shader_ctx * ctx)1838 static int tgsi_ineg(struct r600_shader_ctx *ctx)
1839 {
1840 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1841 	struct r600_bytecode_alu alu;
1842 	int i, r;
1843 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1844 
1845 	for (i = 0; i < lasti + 1; i++) {
1846 
1847 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1848 			continue;
1849 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1850 		alu.inst = ctx->inst_info->r600_opcode;
1851 
1852 		alu.src[0].sel = V_SQ_ALU_SRC_0;
1853 
1854 		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1855 
1856 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1857 
1858 		if (i == lasti) {
1859 			alu.last = 1;
1860 		}
1861 		r = r600_bytecode_add_alu(ctx->bc, &alu);
1862 		if (r)
1863 			return r;
1864 	}
1865 	return 0;
1866 
1867 }
1868 
cayman_emit_float_instr(struct r600_shader_ctx * ctx)1869 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1870 {
1871 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1872 	int i, j, r;
1873 	struct r600_bytecode_alu alu;
1874 	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1875 
1876 	for (i = 0 ; i < last_slot; i++) {
1877 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1878 		alu.inst = ctx->inst_info->r600_opcode;
1879 		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1880 			r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1881 
1882 			/* RSQ should take the absolute value of src */
1883 			if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_RSQ) {
1884 				r600_bytecode_src_set_abs(&alu.src[j]);
1885 			}
1886 		}
1887 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1888 		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1889 
1890 		if (i == last_slot - 1)
1891 			alu.last = 1;
1892 		r = r600_bytecode_add_alu(ctx->bc, &alu);
1893 		if (r)
1894 			return r;
1895 	}
1896 	return 0;
1897 }
1898 
cayman_mul_int_instr(struct r600_shader_ctx * ctx)1899 static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
1900 {
1901 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1902 	int i, j, k, r;
1903 	struct r600_bytecode_alu alu;
1904 	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1905 	for (k = 0; k < last_slot; k++) {
1906 		if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
1907 			continue;
1908 
1909 		for (i = 0 ; i < 4; i++) {
1910 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1911 			alu.inst = ctx->inst_info->r600_opcode;
1912 			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1913 				r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
1914 			}
1915 			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1916 			alu.dst.write = (i == k);
1917 			if (i == 3)
1918 				alu.last = 1;
1919 			r = r600_bytecode_add_alu(ctx->bc, &alu);
1920 			if (r)
1921 				return r;
1922 		}
1923 	}
1924 	return 0;
1925 }
1926 
1927 /*
1928  * r600 - trunc to -PI..PI range
1929  * r700 - normalize by dividing by 2PI
1930  * see fdo bug 27901
1931  */
tgsi_setup_trig(struct r600_shader_ctx * ctx)1932 static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1933 {
1934 	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1935 	static float double_pi = 3.1415926535 * 2;
1936 	static float neg_pi = -3.1415926535;
1937 
1938 	int r;
1939 	struct r600_bytecode_alu alu;
1940 
1941 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1942 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1943 	alu.is_op3 = 1;
1944 
1945 	alu.dst.chan = 0;
1946 	alu.dst.sel = ctx->temp_reg;
1947 	alu.dst.write = 1;
1948 
1949 	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1950 
1951 	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1952 	alu.src[1].chan = 0;
1953 	alu.src[1].value = *(uint32_t *)&half_inv_pi;
1954 	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1955 	alu.src[2].chan = 0;
1956 	alu.last = 1;
1957 	r = r600_bytecode_add_alu(ctx->bc, &alu);
1958 	if (r)
1959 		return r;
1960 
1961 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1962 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1963 
1964 	alu.dst.chan = 0;
1965 	alu.dst.sel = ctx->temp_reg;
1966 	alu.dst.write = 1;
1967 
1968 	alu.src[0].sel = ctx->temp_reg;
1969 	alu.src[0].chan = 0;
1970 	alu.last = 1;
1971 	r = r600_bytecode_add_alu(ctx->bc, &alu);
1972 	if (r)
1973 		return r;
1974 
1975 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1976 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1977 	alu.is_op3 = 1;
1978 
1979 	alu.dst.chan = 0;
1980 	alu.dst.sel = ctx->temp_reg;
1981 	alu.dst.write = 1;
1982 
1983 	alu.src[0].sel = ctx->temp_reg;
1984 	alu.src[0].chan = 0;
1985 
1986 	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1987 	alu.src[1].chan = 0;
1988 	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1989 	alu.src[2].chan = 0;
1990 
1991 	if (ctx->bc->chip_class == R600) {
1992 		alu.src[1].value = *(uint32_t *)&double_pi;
1993 		alu.src[2].value = *(uint32_t *)&neg_pi;
1994 	} else {
1995 		alu.src[1].sel = V_SQ_ALU_SRC_1;
1996 		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1997 		alu.src[2].neg = 1;
1998 	}
1999 
2000 	alu.last = 1;
2001 	r = r600_bytecode_add_alu(ctx->bc, &alu);
2002 	if (r)
2003 		return r;
2004 	return 0;
2005 }
2006 
cayman_trig(struct r600_shader_ctx * ctx)2007 static int cayman_trig(struct r600_shader_ctx *ctx)
2008 {
2009 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2010 	struct r600_bytecode_alu alu;
2011 	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2012 	int i, r;
2013 
2014 	r = tgsi_setup_trig(ctx);
2015 	if (r)
2016 		return r;
2017 
2018 
2019 	for (i = 0; i < last_slot; i++) {
2020 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2021 		alu.inst = ctx->inst_info->r600_opcode;
2022 		alu.dst.chan = i;
2023 
2024 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2025 		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2026 
2027 		alu.src[0].sel = ctx->temp_reg;
2028 		alu.src[0].chan = 0;
2029 		if (i == last_slot - 1)
2030 			alu.last = 1;
2031 		r = r600_bytecode_add_alu(ctx->bc, &alu);
2032 		if (r)
2033 			return r;
2034 	}
2035 	return 0;
2036 }
2037 
tgsi_trig(struct r600_shader_ctx * ctx)2038 static int tgsi_trig(struct r600_shader_ctx *ctx)
2039 {
2040 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2041 	struct r600_bytecode_alu alu;
2042 	int i, r;
2043 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2044 
2045 	r = tgsi_setup_trig(ctx);
2046 	if (r)
2047 		return r;
2048 
2049 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2050 	alu.inst = ctx->inst_info->r600_opcode;
2051 	alu.dst.chan = 0;
2052 	alu.dst.sel = ctx->temp_reg;
2053 	alu.dst.write = 1;
2054 
2055 	alu.src[0].sel = ctx->temp_reg;
2056 	alu.src[0].chan = 0;
2057 	alu.last = 1;
2058 	r = r600_bytecode_add_alu(ctx->bc, &alu);
2059 	if (r)
2060 		return r;
2061 
2062 	/* replicate result */
2063 	for (i = 0; i < lasti + 1; i++) {
2064 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2065 			continue;
2066 
2067 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2068 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2069 
2070 		alu.src[0].sel = ctx->temp_reg;
2071 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2072 		if (i == lasti)
2073 			alu.last = 1;
2074 		r = r600_bytecode_add_alu(ctx->bc, &alu);
2075 		if (r)
2076 			return r;
2077 	}
2078 	return 0;
2079 }
2080 
tgsi_scs(struct r600_shader_ctx * ctx)2081 static int tgsi_scs(struct r600_shader_ctx *ctx)
2082 {
2083 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2084 	struct r600_bytecode_alu alu;
2085 	int i, r;
2086 
2087 	/* We'll only need the trig stuff if we are going to write to the
2088 	 * X or Y components of the destination vector.
2089 	 */
2090 	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
2091 		r = tgsi_setup_trig(ctx);
2092 		if (r)
2093 			return r;
2094 	}
2095 
2096 	/* dst.x = COS */
2097 	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2098 		if (ctx->bc->chip_class == CAYMAN) {
2099 			for (i = 0 ; i < 3; i++) {
2100 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2101 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2102 				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2103 
2104 				if (i == 0)
2105 					alu.dst.write = 1;
2106 				else
2107 					alu.dst.write = 0;
2108 				alu.src[0].sel = ctx->temp_reg;
2109 				alu.src[0].chan = 0;
2110 				if (i == 2)
2111 					alu.last = 1;
2112 				r = r600_bytecode_add_alu(ctx->bc, &alu);
2113 				if (r)
2114 					return r;
2115 			}
2116 		} else {
2117 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2118 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
2119 			tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2120 
2121 			alu.src[0].sel = ctx->temp_reg;
2122 			alu.src[0].chan = 0;
2123 			alu.last = 1;
2124 			r = r600_bytecode_add_alu(ctx->bc, &alu);
2125 			if (r)
2126 				return r;
2127 		}
2128 	}
2129 
2130 	/* dst.y = SIN */
2131 	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2132 		if (ctx->bc->chip_class == CAYMAN) {
2133 			for (i = 0 ; i < 3; i++) {
2134 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2135 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2136 				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2137 				if (i == 1)
2138 					alu.dst.write = 1;
2139 				else
2140 					alu.dst.write = 0;
2141 				alu.src[0].sel = ctx->temp_reg;
2142 				alu.src[0].chan = 0;
2143 				if (i == 2)
2144 					alu.last = 1;
2145 				r = r600_bytecode_add_alu(ctx->bc, &alu);
2146 				if (r)
2147 					return r;
2148 			}
2149 		} else {
2150 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2151 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
2152 			tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2153 
2154 			alu.src[0].sel = ctx->temp_reg;
2155 			alu.src[0].chan = 0;
2156 			alu.last = 1;
2157 			r = r600_bytecode_add_alu(ctx->bc, &alu);
2158 			if (r)
2159 				return r;
2160 		}
2161 	}
2162 
2163 	/* dst.z = 0.0; */
2164 	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2165 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2166 
2167 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2168 
2169 		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2170 
2171 		alu.src[0].sel = V_SQ_ALU_SRC_0;
2172 		alu.src[0].chan = 0;
2173 
2174 		alu.last = 1;
2175 
2176 		r = r600_bytecode_add_alu(ctx->bc, &alu);
2177 		if (r)
2178 			return r;
2179 	}
2180 
2181 	/* dst.w = 1.0; */
2182 	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2183 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2184 
2185 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2186 
2187 		tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2188 
2189 		alu.src[0].sel = V_SQ_ALU_SRC_1;
2190 		alu.src[0].chan = 0;
2191 
2192 		alu.last = 1;
2193 
2194 		r = r600_bytecode_add_alu(ctx->bc, &alu);
2195 		if (r)
2196 			return r;
2197 	}
2198 
2199 	return 0;
2200 }
2201 
tgsi_kill(struct r600_shader_ctx * ctx)2202 static int tgsi_kill(struct r600_shader_ctx *ctx)
2203 {
2204 	struct r600_bytecode_alu alu;
2205 	int i, r;
2206 
2207 	for (i = 0; i < 4; i++) {
2208 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2209 		alu.inst = ctx->inst_info->r600_opcode;
2210 
2211 		alu.dst.chan = i;
2212 
2213 		alu.src[0].sel = V_SQ_ALU_SRC_0;
2214 
2215 		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
2216 			alu.src[1].sel = V_SQ_ALU_SRC_1;
2217 			alu.src[1].neg = 1;
2218 		} else {
2219 			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2220 		}
2221 		if (i == 3) {
2222 			alu.last = 1;
2223 		}
2224 		r = r600_bytecode_add_alu(ctx->bc, &alu);
2225 		if (r)
2226 			return r;
2227 	}
2228 
2229 	/* kill must be last in ALU */
2230 	ctx->bc->force_add_cf = 1;
2231 	ctx->shader->uses_kill = TRUE;
2232 	return 0;
2233 }
2234 
tgsi_lit(struct r600_shader_ctx * ctx)2235 static int tgsi_lit(struct r600_shader_ctx *ctx)
2236 {
2237 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2238 	struct r600_bytecode_alu alu;
2239 	int r;
2240 
2241 	/* tmp.x = max(src.y, 0.0) */
2242 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2243 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2244 	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
2245 	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
2246 	alu.src[1].chan = 1;
2247 
2248 	alu.dst.sel = ctx->temp_reg;
2249 	alu.dst.chan = 0;
2250 	alu.dst.write = 1;
2251 
2252 	alu.last = 1;
2253 	r = r600_bytecode_add_alu(ctx->bc, &alu);
2254 	if (r)
2255 		return r;
2256 
2257 	if (inst->Dst[0].Register.WriteMask & (1 << 2))
2258 	{
2259 		int chan;
2260 		int sel;
2261 		int i;
2262 
2263 		if (ctx->bc->chip_class == CAYMAN) {
2264 			for (i = 0; i < 3; i++) {
2265 				/* tmp.z = log(tmp.x) */
2266 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2267 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2268 				alu.src[0].sel = ctx->temp_reg;
2269 				alu.src[0].chan = 0;
2270 				alu.dst.sel = ctx->temp_reg;
2271 				alu.dst.chan = i;
2272 				if (i == 2) {
2273 					alu.dst.write = 1;
2274 					alu.last = 1;
2275 				} else
2276 					alu.dst.write = 0;
2277 
2278 				r = r600_bytecode_add_alu(ctx->bc, &alu);
2279 				if (r)
2280 					return r;
2281 			}
2282 		} else {
2283 			/* tmp.z = log(tmp.x) */
2284 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2285 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
2286 			alu.src[0].sel = ctx->temp_reg;
2287 			alu.src[0].chan = 0;
2288 			alu.dst.sel = ctx->temp_reg;
2289 			alu.dst.chan = 2;
2290 			alu.dst.write = 1;
2291 			alu.last = 1;
2292 			r = r600_bytecode_add_alu(ctx->bc, &alu);
2293 			if (r)
2294 				return r;
2295 		}
2296 
2297 		chan = alu.dst.chan;
2298 		sel = alu.dst.sel;
2299 
2300 		/* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
2301 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2302 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
2303 		alu.src[0].sel  = sel;
2304 		alu.src[0].chan = chan;
2305 		r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
2306 		r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
2307 		alu.dst.sel = ctx->temp_reg;
2308 		alu.dst.chan = 0;
2309 		alu.dst.write = 1;
2310 		alu.is_op3 = 1;
2311 		alu.last = 1;
2312 		r = r600_bytecode_add_alu(ctx->bc, &alu);
2313 		if (r)
2314 			return r;
2315 
2316 		if (ctx->bc->chip_class == CAYMAN) {
2317 			for (i = 0; i < 3; i++) {
2318 				/* dst.z = exp(tmp.x) */
2319 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2320 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2321 				alu.src[0].sel = ctx->temp_reg;
2322 				alu.src[0].chan = 0;
2323 				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2324 				if (i == 2) {
2325 					alu.dst.write = 1;
2326 					alu.last = 1;
2327 				} else
2328 					alu.dst.write = 0;
2329 				r = r600_bytecode_add_alu(ctx->bc, &alu);
2330 				if (r)
2331 					return r;
2332 			}
2333 		} else {
2334 			/* dst.z = exp(tmp.x) */
2335 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2336 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2337 			alu.src[0].sel = ctx->temp_reg;
2338 			alu.src[0].chan = 0;
2339 			tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
2340 			alu.last = 1;
2341 			r = r600_bytecode_add_alu(ctx->bc, &alu);
2342 			if (r)
2343 				return r;
2344 		}
2345 	}
2346 
2347 	/* dst.x, <- 1.0  */
2348 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2349 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2350 	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
2351 	alu.src[0].chan = 0;
2352 	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
2353 	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
2354 	r = r600_bytecode_add_alu(ctx->bc, &alu);
2355 	if (r)
2356 		return r;
2357 
2358 	/* dst.y = max(src.x, 0.0) */
2359 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2360 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
2361 	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2362 	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
2363 	alu.src[1].chan = 0;
2364 	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
2365 	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
2366 	r = r600_bytecode_add_alu(ctx->bc, &alu);
2367 	if (r)
2368 		return r;
2369 
2370 	/* dst.w, <- 1.0  */
2371 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2372 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2373 	alu.src[0].sel  = V_SQ_ALU_SRC_1;
2374 	alu.src[0].chan = 0;
2375 	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
2376 	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
2377 	alu.last = 1;
2378 	r = r600_bytecode_add_alu(ctx->bc, &alu);
2379 	if (r)
2380 		return r;
2381 
2382 	return 0;
2383 }
2384 
tgsi_rsq(struct r600_shader_ctx * ctx)2385 static int tgsi_rsq(struct r600_shader_ctx *ctx)
2386 {
2387 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2388 	struct r600_bytecode_alu alu;
2389 	int i, r;
2390 
2391 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2392 
2393 	/* XXX:
2394 	 * For state trackers other than OpenGL, we'll want to use
2395 	 * _RECIPSQRT_IEEE instead.
2396 	 */
2397 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
2398 
2399 	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2400 		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2401 		r600_bytecode_src_set_abs(&alu.src[i]);
2402 	}
2403 	alu.dst.sel = ctx->temp_reg;
2404 	alu.dst.write = 1;
2405 	alu.last = 1;
2406 	r = r600_bytecode_add_alu(ctx->bc, &alu);
2407 	if (r)
2408 		return r;
2409 	/* replicate result */
2410 	return tgsi_helper_tempx_replicate(ctx);
2411 }
2412 
tgsi_helper_tempx_replicate(struct r600_shader_ctx * ctx)2413 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
2414 {
2415 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2416 	struct r600_bytecode_alu alu;
2417 	int i, r;
2418 
2419 	for (i = 0; i < 4; i++) {
2420 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2421 		alu.src[0].sel = ctx->temp_reg;
2422 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2423 		alu.dst.chan = i;
2424 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2425 		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2426 		if (i == 3)
2427 			alu.last = 1;
2428 		r = r600_bytecode_add_alu(ctx->bc, &alu);
2429 		if (r)
2430 			return r;
2431 	}
2432 	return 0;
2433 }
2434 
tgsi_trans_srcx_replicate(struct r600_shader_ctx * ctx)2435 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
2436 {
2437 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2438 	struct r600_bytecode_alu alu;
2439 	int i, r;
2440 
2441 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2442 	alu.inst = ctx->inst_info->r600_opcode;
2443 	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
2444 		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
2445 	}
2446 	alu.dst.sel = ctx->temp_reg;
2447 	alu.dst.write = 1;
2448 	alu.last = 1;
2449 	r = r600_bytecode_add_alu(ctx->bc, &alu);
2450 	if (r)
2451 		return r;
2452 	/* replicate result */
2453 	return tgsi_helper_tempx_replicate(ctx);
2454 }
2455 
cayman_pow(struct r600_shader_ctx * ctx)2456 static int cayman_pow(struct r600_shader_ctx *ctx)
2457 {
2458 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2459 	int i, r;
2460 	struct r600_bytecode_alu alu;
2461 	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
2462 
2463 	for (i = 0; i < 3; i++) {
2464 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2465 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2466 		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2467 		alu.dst.sel = ctx->temp_reg;
2468 		alu.dst.chan = i;
2469 		alu.dst.write = 1;
2470 		if (i == 2)
2471 			alu.last = 1;
2472 		r = r600_bytecode_add_alu(ctx->bc, &alu);
2473 		if (r)
2474 			return r;
2475 	}
2476 
2477 	/* b * LOG2(a) */
2478 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2479 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2480 	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2481 	alu.src[1].sel = ctx->temp_reg;
2482 	alu.dst.sel = ctx->temp_reg;
2483 	alu.dst.write = 1;
2484 	alu.last = 1;
2485 	r = r600_bytecode_add_alu(ctx->bc, &alu);
2486 	if (r)
2487 		return r;
2488 
2489 	for (i = 0; i < last_slot; i++) {
2490 		/* POW(a,b) = EXP2(b * LOG2(a))*/
2491 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2492 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2493 		alu.src[0].sel = ctx->temp_reg;
2494 
2495 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2496 		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2497 		if (i == last_slot - 1)
2498 			alu.last = 1;
2499 		r = r600_bytecode_add_alu(ctx->bc, &alu);
2500 		if (r)
2501 			return r;
2502 	}
2503 	return 0;
2504 }
2505 
tgsi_pow(struct r600_shader_ctx * ctx)2506 static int tgsi_pow(struct r600_shader_ctx *ctx)
2507 {
2508 	struct r600_bytecode_alu alu;
2509 	int r;
2510 
2511 	/* LOG2(a) */
2512 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2513 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2514 	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2515 	alu.dst.sel = ctx->temp_reg;
2516 	alu.dst.write = 1;
2517 	alu.last = 1;
2518 	r = r600_bytecode_add_alu(ctx->bc, &alu);
2519 	if (r)
2520 		return r;
2521 	/* b * LOG2(a) */
2522 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2523 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2524 	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
2525 	alu.src[1].sel = ctx->temp_reg;
2526 	alu.dst.sel = ctx->temp_reg;
2527 	alu.dst.write = 1;
2528 	alu.last = 1;
2529 	r = r600_bytecode_add_alu(ctx->bc, &alu);
2530 	if (r)
2531 		return r;
2532 	/* POW(a,b) = EXP2(b * LOG2(a))*/
2533 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2534 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2535 	alu.src[0].sel = ctx->temp_reg;
2536 	alu.dst.sel = ctx->temp_reg;
2537 	alu.dst.write = 1;
2538 	alu.last = 1;
2539 	r = r600_bytecode_add_alu(ctx->bc, &alu);
2540 	if (r)
2541 		return r;
2542 	return tgsi_helper_tempx_replicate(ctx);
2543 }
2544 
tgsi_divmod(struct r600_shader_ctx * ctx,int mod,int signed_op)2545 static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
2546 {
2547 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2548 	struct r600_bytecode_alu alu;
2549 	int i, r, j;
2550 	unsigned write_mask = inst->Dst[0].Register.WriteMask;
2551 	int tmp0 = ctx->temp_reg;
2552 	int tmp1 = r600_get_temp(ctx);
2553 	int tmp2 = r600_get_temp(ctx);
2554 	int tmp3 = r600_get_temp(ctx);
2555 	/* Unsigned path:
2556 	 *
2557 	 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
2558 	 *
2559 	 * 1. tmp0.x = rcp (src2)     = 2^32/src2 + e, where e is rounding error
2560 	 * 2. tmp0.z = lo (tmp0.x * src2)
2561 	 * 3. tmp0.w = -tmp0.z
2562 	 * 4. tmp0.y = hi (tmp0.x * src2)
2563 	 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z)      = abs(lo(rcp*src2))
2564 	 * 6. tmp0.w = hi (tmp0.z * tmp0.x)    = e, rounding error
2565 	 * 7. tmp1.x = tmp0.x - tmp0.w
2566 	 * 8. tmp1.y = tmp0.x + tmp0.w
2567 	 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x)
2568 	 * 10. tmp0.z = hi(tmp0.x * src1)     = q
2569 	 * 11. tmp0.y = lo (tmp0.z * src2)     = src2*q = src1 - r
2570 	 *
2571 	 * 12. tmp0.w = src1 - tmp0.y       = r
2572 	 * 13. tmp1.x = tmp0.w >= src2		= r >= src2 (uint comparison)
2573 	 * 14. tmp1.y = src1 >= tmp0.y      = r >= 0 (uint comparison)
2574 	 *
2575 	 * if DIV
2576 	 *
2577 	 *   15. tmp1.z = tmp0.z + 1			= q + 1
2578 	 *   16. tmp1.w = tmp0.z - 1			= q - 1
2579 	 *
2580 	 * else MOD
2581 	 *
2582 	 *   15. tmp1.z = tmp0.w - src2			= r - src2
2583 	 *   16. tmp1.w = tmp0.w + src2			= r + src2
2584 	 *
2585 	 * endif
2586 	 *
2587 	 * 17. tmp1.x = tmp1.x & tmp1.y
2588 	 *
2589 	 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z
2590 	 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z
2591 	 *
2592 	 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z
2593 	 * 20. dst = src2==0 ? MAX_UINT : tmp0.z
2594 	 *
2595 	 * Signed path:
2596 	 *
2597 	 * Same as unsigned, using abs values of the operands,
2598 	 * and fixing the sign of the result in the end.
2599 	 */
2600 
2601 	for (i = 0; i < 4; i++) {
2602 		if (!(write_mask & (1<<i)))
2603 			continue;
2604 
2605 		if (signed_op) {
2606 
2607 			/* tmp2.x = -src0 */
2608 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2609 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2610 
2611 			alu.dst.sel = tmp2;
2612 			alu.dst.chan = 0;
2613 			alu.dst.write = 1;
2614 
2615 			alu.src[0].sel = V_SQ_ALU_SRC_0;
2616 
2617 			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2618 
2619 			alu.last = 1;
2620 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2621 				return r;
2622 
2623 			/* tmp2.y = -src1 */
2624 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2625 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2626 
2627 			alu.dst.sel = tmp2;
2628 			alu.dst.chan = 1;
2629 			alu.dst.write = 1;
2630 
2631 			alu.src[0].sel = V_SQ_ALU_SRC_0;
2632 
2633 			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2634 
2635 			alu.last = 1;
2636 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2637 				return r;
2638 
2639 			/* tmp2.z sign bit is set if src0 and src2 signs are different */
2640 			/* it will be a sign of the quotient */
2641 			if (!mod) {
2642 
2643 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2644 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT);
2645 
2646 				alu.dst.sel = tmp2;
2647 				alu.dst.chan = 2;
2648 				alu.dst.write = 1;
2649 
2650 				r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2651 				r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2652 
2653 				alu.last = 1;
2654 				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2655 					return r;
2656 			}
2657 
2658 			/* tmp2.x = |src0| */
2659 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2660 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2661 			alu.is_op3 = 1;
2662 
2663 			alu.dst.sel = tmp2;
2664 			alu.dst.chan = 0;
2665 			alu.dst.write = 1;
2666 
2667 			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2668 			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2669 			alu.src[2].sel = tmp2;
2670 			alu.src[2].chan = 0;
2671 
2672 			alu.last = 1;
2673 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2674 				return r;
2675 
2676 			/* tmp2.y = |src1| */
2677 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2678 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2679 			alu.is_op3 = 1;
2680 
2681 			alu.dst.sel = tmp2;
2682 			alu.dst.chan = 1;
2683 			alu.dst.write = 1;
2684 
2685 			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2686 			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2687 			alu.src[2].sel = tmp2;
2688 			alu.src[2].chan = 1;
2689 
2690 			alu.last = 1;
2691 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2692 				return r;
2693 
2694 		}
2695 
2696 		/* 1. tmp0.x = rcp_u (src2)     = 2^32/src2 + e, where e is rounding error */
2697 		if (ctx->bc->chip_class == CAYMAN) {
2698 			/* tmp3.x = u2f(src2) */
2699 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2700 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT);
2701 
2702 			alu.dst.sel = tmp3;
2703 			alu.dst.chan = 0;
2704 			alu.dst.write = 1;
2705 
2706 			if (signed_op) {
2707 				alu.src[0].sel = tmp2;
2708 				alu.src[0].chan = 1;
2709 			} else {
2710 				r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2711 			}
2712 
2713 			alu.last = 1;
2714 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2715 				return r;
2716 
2717 			/* tmp0.x = recip(tmp3.x) */
2718 			for (j = 0 ; j < 3; j++) {
2719 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2720 				alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
2721 
2722 				alu.dst.sel = tmp0;
2723 				alu.dst.chan = j;
2724 				alu.dst.write = (j == 0);
2725 
2726 				alu.src[0].sel = tmp3;
2727 				alu.src[0].chan = 0;
2728 
2729 				if (j == 2)
2730 					alu.last = 1;
2731 				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2732 					return r;
2733 			}
2734 
2735 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2736 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2737 
2738 			alu.src[0].sel = tmp0;
2739 			alu.src[0].chan = 0;
2740 
2741 			alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2742 			alu.src[1].value = 0x4f800000;
2743 
2744 			alu.dst.sel = tmp3;
2745 			alu.dst.write = 1;
2746 			alu.last = 1;
2747 			r = r600_bytecode_add_alu(ctx->bc, &alu);
2748 			if (r)
2749 				return r;
2750 
2751 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2752 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT);
2753 
2754 			alu.dst.sel = tmp0;
2755 			alu.dst.chan = 0;
2756 			alu.dst.write = 1;
2757 
2758 			alu.src[0].sel = tmp3;
2759 			alu.src[0].chan = 0;
2760 
2761 			alu.last = 1;
2762 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2763 				return r;
2764 
2765 		} else {
2766 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2767 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT);
2768 
2769 			alu.dst.sel = tmp0;
2770 			alu.dst.chan = 0;
2771 			alu.dst.write = 1;
2772 
2773 			if (signed_op) {
2774 				alu.src[0].sel = tmp2;
2775 				alu.src[0].chan = 1;
2776 			} else {
2777 				r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2778 			}
2779 
2780 			alu.last = 1;
2781 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2782 				return r;
2783 		}
2784 
2785 		/* 2. tmp0.z = lo (tmp0.x * src2) */
2786 		if (ctx->bc->chip_class == CAYMAN) {
2787 			for (j = 0 ; j < 4; j++) {
2788 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2789 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2790 
2791 				alu.dst.sel = tmp0;
2792 				alu.dst.chan = j;
2793 				alu.dst.write = (j == 2);
2794 
2795 				alu.src[0].sel = tmp0;
2796 				alu.src[0].chan = 0;
2797 				if (signed_op) {
2798 					alu.src[1].sel = tmp2;
2799 					alu.src[1].chan = 1;
2800 				} else {
2801 					r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2802 				}
2803 
2804 				alu.last = (j == 3);
2805 				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2806 					return r;
2807 			}
2808 		} else {
2809 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2810 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
2811 
2812 			alu.dst.sel = tmp0;
2813 			alu.dst.chan = 2;
2814 			alu.dst.write = 1;
2815 
2816 			alu.src[0].sel = tmp0;
2817 			alu.src[0].chan = 0;
2818 			if (signed_op) {
2819 				alu.src[1].sel = tmp2;
2820 				alu.src[1].chan = 1;
2821 			} else {
2822 				r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2823 			}
2824 
2825 			alu.last = 1;
2826 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2827 				return r;
2828 		}
2829 
2830 		/* 3. tmp0.w = -tmp0.z */
2831 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2832 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2833 
2834 		alu.dst.sel = tmp0;
2835 		alu.dst.chan = 3;
2836 		alu.dst.write = 1;
2837 
2838 		alu.src[0].sel = V_SQ_ALU_SRC_0;
2839 		alu.src[1].sel = tmp0;
2840 		alu.src[1].chan = 2;
2841 
2842 		alu.last = 1;
2843 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2844 			return r;
2845 
2846 		/* 4. tmp0.y = hi (tmp0.x * src2) */
2847 		if (ctx->bc->chip_class == CAYMAN) {
2848 			for (j = 0 ; j < 4; j++) {
2849 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2850 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2851 
2852 				alu.dst.sel = tmp0;
2853 				alu.dst.chan = j;
2854 				alu.dst.write = (j == 1);
2855 
2856 				alu.src[0].sel = tmp0;
2857 				alu.src[0].chan = 0;
2858 
2859 				if (signed_op) {
2860 					alu.src[1].sel = tmp2;
2861 					alu.src[1].chan = 1;
2862 				} else {
2863 					r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2864 				}
2865 				alu.last = (j == 3);
2866 				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2867 					return r;
2868 			}
2869 		} else {
2870 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2871 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2872 
2873 			alu.dst.sel = tmp0;
2874 			alu.dst.chan = 1;
2875 			alu.dst.write = 1;
2876 
2877 			alu.src[0].sel = tmp0;
2878 			alu.src[0].chan = 0;
2879 
2880 			if (signed_op) {
2881 				alu.src[1].sel = tmp2;
2882 				alu.src[1].chan = 1;
2883 			} else {
2884 				r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2885 			}
2886 
2887 			alu.last = 1;
2888 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2889 				return r;
2890 		}
2891 
2892 		/* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z)      = abs(lo(rcp*src)) */
2893 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2894 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2895 		alu.is_op3 = 1;
2896 
2897 		alu.dst.sel = tmp0;
2898 		alu.dst.chan = 2;
2899 		alu.dst.write = 1;
2900 
2901 		alu.src[0].sel = tmp0;
2902 		alu.src[0].chan = 1;
2903 		alu.src[1].sel = tmp0;
2904 		alu.src[1].chan = 3;
2905 		alu.src[2].sel = tmp0;
2906 		alu.src[2].chan = 2;
2907 
2908 		alu.last = 1;
2909 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2910 			return r;
2911 
2912 		/* 6. tmp0.w = hi (tmp0.z * tmp0.x)    = e, rounding error */
2913 		if (ctx->bc->chip_class == CAYMAN) {
2914 			for (j = 0 ; j < 4; j++) {
2915 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2916 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2917 
2918 				alu.dst.sel = tmp0;
2919 				alu.dst.chan = j;
2920 				alu.dst.write = (j == 3);
2921 
2922 				alu.src[0].sel = tmp0;
2923 				alu.src[0].chan = 2;
2924 
2925 				alu.src[1].sel = tmp0;
2926 				alu.src[1].chan = 0;
2927 
2928 				alu.last = (j == 3);
2929 				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2930 					return r;
2931 			}
2932 		} else {
2933 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2934 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
2935 
2936 			alu.dst.sel = tmp0;
2937 			alu.dst.chan = 3;
2938 			alu.dst.write = 1;
2939 
2940 			alu.src[0].sel = tmp0;
2941 			alu.src[0].chan = 2;
2942 
2943 			alu.src[1].sel = tmp0;
2944 			alu.src[1].chan = 0;
2945 
2946 			alu.last = 1;
2947 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2948 				return r;
2949 		}
2950 
2951 		/* 7. tmp1.x = tmp0.x - tmp0.w */
2952 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2953 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2954 
2955 		alu.dst.sel = tmp1;
2956 		alu.dst.chan = 0;
2957 		alu.dst.write = 1;
2958 
2959 		alu.src[0].sel = tmp0;
2960 		alu.src[0].chan = 0;
2961 		alu.src[1].sel = tmp0;
2962 		alu.src[1].chan = 3;
2963 
2964 		alu.last = 1;
2965 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2966 			return r;
2967 
2968 		/* 8. tmp1.y = tmp0.x + tmp0.w */
2969 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2970 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
2971 
2972 		alu.dst.sel = tmp1;
2973 		alu.dst.chan = 1;
2974 		alu.dst.write = 1;
2975 
2976 		alu.src[0].sel = tmp0;
2977 		alu.src[0].chan = 0;
2978 		alu.src[1].sel = tmp0;
2979 		alu.src[1].chan = 3;
2980 
2981 		alu.last = 1;
2982 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2983 			return r;
2984 
2985 		/* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
2986 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2987 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
2988 		alu.is_op3 = 1;
2989 
2990 		alu.dst.sel = tmp0;
2991 		alu.dst.chan = 0;
2992 		alu.dst.write = 1;
2993 
2994 		alu.src[0].sel = tmp0;
2995 		alu.src[0].chan = 1;
2996 		alu.src[1].sel = tmp1;
2997 		alu.src[1].chan = 1;
2998 		alu.src[2].sel = tmp1;
2999 		alu.src[2].chan = 0;
3000 
3001 		alu.last = 1;
3002 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3003 			return r;
3004 
3005 		/* 10. tmp0.z = hi(tmp0.x * src1)     = q */
3006 		if (ctx->bc->chip_class == CAYMAN) {
3007 			for (j = 0 ; j < 4; j++) {
3008 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3009 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3010 
3011 				alu.dst.sel = tmp0;
3012 				alu.dst.chan = j;
3013 				alu.dst.write = (j == 2);
3014 
3015 				alu.src[0].sel = tmp0;
3016 				alu.src[0].chan = 0;
3017 
3018 				if (signed_op) {
3019 					alu.src[1].sel = tmp2;
3020 					alu.src[1].chan = 0;
3021 				} else {
3022 					r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3023 				}
3024 
3025 				alu.last = (j == 3);
3026 				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3027 					return r;
3028 			}
3029 		} else {
3030 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3031 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
3032 
3033 			alu.dst.sel = tmp0;
3034 			alu.dst.chan = 2;
3035 			alu.dst.write = 1;
3036 
3037 			alu.src[0].sel = tmp0;
3038 			alu.src[0].chan = 0;
3039 
3040 			if (signed_op) {
3041 				alu.src[1].sel = tmp2;
3042 				alu.src[1].chan = 0;
3043 			} else {
3044 				r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3045 			}
3046 
3047 			alu.last = 1;
3048 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3049 				return r;
3050 		}
3051 
3052 		/* 11. tmp0.y = lo (src2 * tmp0.z)     = src2*q = src1 - r */
3053 		if (ctx->bc->chip_class == CAYMAN) {
3054 			for (j = 0 ; j < 4; j++) {
3055 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3056 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3057 
3058 				alu.dst.sel = tmp0;
3059 				alu.dst.chan = j;
3060 				alu.dst.write = (j == 1);
3061 
3062 				if (signed_op) {
3063 					alu.src[0].sel = tmp2;
3064 					alu.src[0].chan = 1;
3065 				} else {
3066 					r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3067 				}
3068 
3069 				alu.src[1].sel = tmp0;
3070 				alu.src[1].chan = 2;
3071 
3072 				alu.last = (j == 3);
3073 				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3074 					return r;
3075 			}
3076 		} else {
3077 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3078 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3079 
3080 			alu.dst.sel = tmp0;
3081 			alu.dst.chan = 1;
3082 			alu.dst.write = 1;
3083 
3084 			if (signed_op) {
3085 				alu.src[0].sel = tmp2;
3086 				alu.src[0].chan = 1;
3087 			} else {
3088 				r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
3089 			}
3090 
3091 			alu.src[1].sel = tmp0;
3092 			alu.src[1].chan = 2;
3093 
3094 			alu.last = 1;
3095 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3096 				return r;
3097 		}
3098 
3099 		/* 12. tmp0.w = src1 - tmp0.y       = r */
3100 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3101 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3102 
3103 		alu.dst.sel = tmp0;
3104 		alu.dst.chan = 3;
3105 		alu.dst.write = 1;
3106 
3107 		if (signed_op) {
3108 			alu.src[0].sel = tmp2;
3109 			alu.src[0].chan = 0;
3110 		} else {
3111 			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3112 		}
3113 
3114 		alu.src[1].sel = tmp0;
3115 		alu.src[1].chan = 1;
3116 
3117 		alu.last = 1;
3118 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3119 			return r;
3120 
3121 		/* 13. tmp1.x = tmp0.w >= src2		= r >= src2 */
3122 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3123 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3124 
3125 		alu.dst.sel = tmp1;
3126 		alu.dst.chan = 0;
3127 		alu.dst.write = 1;
3128 
3129 		alu.src[0].sel = tmp0;
3130 		alu.src[0].chan = 3;
3131 		if (signed_op) {
3132 			alu.src[1].sel = tmp2;
3133 			alu.src[1].chan = 1;
3134 		} else {
3135 			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3136 		}
3137 
3138 		alu.last = 1;
3139 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3140 			return r;
3141 
3142 		/* 14. tmp1.y = src1 >= tmp0.y       = r >= 0 */
3143 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3144 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT);
3145 
3146 		alu.dst.sel = tmp1;
3147 		alu.dst.chan = 1;
3148 		alu.dst.write = 1;
3149 
3150 		if (signed_op) {
3151 			alu.src[0].sel = tmp2;
3152 			alu.src[0].chan = 0;
3153 		} else {
3154 			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3155 		}
3156 
3157 		alu.src[1].sel = tmp0;
3158 		alu.src[1].chan = 1;
3159 
3160 		alu.last = 1;
3161 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3162 			return r;
3163 
3164 		if (mod) { /* UMOD */
3165 
3166 			/* 15. tmp1.z = tmp0.w - src2			= r - src2 */
3167 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3168 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3169 
3170 			alu.dst.sel = tmp1;
3171 			alu.dst.chan = 2;
3172 			alu.dst.write = 1;
3173 
3174 			alu.src[0].sel = tmp0;
3175 			alu.src[0].chan = 3;
3176 
3177 			if (signed_op) {
3178 				alu.src[1].sel = tmp2;
3179 				alu.src[1].chan = 1;
3180 			} else {
3181 				r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3182 			}
3183 
3184 			alu.last = 1;
3185 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3186 				return r;
3187 
3188 			/* 16. tmp1.w = tmp0.w + src2			= r + src2 */
3189 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3190 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3191 
3192 			alu.dst.sel = tmp1;
3193 			alu.dst.chan = 3;
3194 			alu.dst.write = 1;
3195 
3196 			alu.src[0].sel = tmp0;
3197 			alu.src[0].chan = 3;
3198 			if (signed_op) {
3199 				alu.src[1].sel = tmp2;
3200 				alu.src[1].chan = 1;
3201 			} else {
3202 				r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3203 			}
3204 
3205 			alu.last = 1;
3206 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3207 				return r;
3208 
3209 		} else { /* UDIV */
3210 
3211 			/* 15. tmp1.z = tmp0.z + 1       = q + 1       DIV */
3212 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3213 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3214 
3215 			alu.dst.sel = tmp1;
3216 			alu.dst.chan = 2;
3217 			alu.dst.write = 1;
3218 
3219 			alu.src[0].sel = tmp0;
3220 			alu.src[0].chan = 2;
3221 			alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3222 
3223 			alu.last = 1;
3224 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3225 				return r;
3226 
3227 			/* 16. tmp1.w = tmp0.z - 1			= q - 1 */
3228 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3229 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3230 
3231 			alu.dst.sel = tmp1;
3232 			alu.dst.chan = 3;
3233 			alu.dst.write = 1;
3234 
3235 			alu.src[0].sel = tmp0;
3236 			alu.src[0].chan = 2;
3237 			alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT;
3238 
3239 			alu.last = 1;
3240 			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3241 				return r;
3242 
3243 		}
3244 
3245 		/* 17. tmp1.x = tmp1.x & tmp1.y */
3246 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3247 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
3248 
3249 		alu.dst.sel = tmp1;
3250 		alu.dst.chan = 0;
3251 		alu.dst.write = 1;
3252 
3253 		alu.src[0].sel = tmp1;
3254 		alu.src[0].chan = 0;
3255 		alu.src[1].sel = tmp1;
3256 		alu.src[1].chan = 1;
3257 
3258 		alu.last = 1;
3259 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3260 			return r;
3261 
3262 		/* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z    DIV */
3263 		/* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z    MOD */
3264 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3265 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3266 		alu.is_op3 = 1;
3267 
3268 		alu.dst.sel = tmp0;
3269 		alu.dst.chan = 2;
3270 		alu.dst.write = 1;
3271 
3272 		alu.src[0].sel = tmp1;
3273 		alu.src[0].chan = 0;
3274 		alu.src[1].sel = tmp0;
3275 		alu.src[1].chan = mod ? 3 : 2;
3276 		alu.src[2].sel = tmp1;
3277 		alu.src[2].chan = 2;
3278 
3279 		alu.last = 1;
3280 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3281 			return r;
3282 
3283 		/* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
3284 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3285 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT);
3286 		alu.is_op3 = 1;
3287 
3288 		if (signed_op) {
3289 			alu.dst.sel = tmp0;
3290 			alu.dst.chan = 2;
3291 			alu.dst.write = 1;
3292 		} else {
3293 			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3294 		}
3295 
3296 		alu.src[0].sel = tmp1;
3297 		alu.src[0].chan = 1;
3298 		alu.src[1].sel = tmp1;
3299 		alu.src[1].chan = 3;
3300 		alu.src[2].sel = tmp0;
3301 		alu.src[2].chan = 2;
3302 
3303 		alu.last = 1;
3304 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3305 			return r;
3306 
3307 		if (signed_op) {
3308 
3309 			/* fix the sign of the result */
3310 
3311 			if (mod) {
3312 
3313 				/* tmp0.x = -tmp0.z */
3314 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3315 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3316 
3317 				alu.dst.sel = tmp0;
3318 				alu.dst.chan = 0;
3319 				alu.dst.write = 1;
3320 
3321 				alu.src[0].sel = V_SQ_ALU_SRC_0;
3322 				alu.src[1].sel = tmp0;
3323 				alu.src[1].chan = 2;
3324 
3325 				alu.last = 1;
3326 				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3327 					return r;
3328 
3329 				/* sign of the remainder is the same as the sign of src0 */
3330 				/* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
3331 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3332 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3333 				alu.is_op3 = 1;
3334 
3335 				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3336 
3337 				r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3338 				alu.src[1].sel = tmp0;
3339 				alu.src[1].chan = 2;
3340 				alu.src[2].sel = tmp0;
3341 				alu.src[2].chan = 0;
3342 
3343 				alu.last = 1;
3344 				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3345 					return r;
3346 
3347 			} else {
3348 
3349 				/* tmp0.x = -tmp0.z */
3350 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3351 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3352 
3353 				alu.dst.sel = tmp0;
3354 				alu.dst.chan = 0;
3355 				alu.dst.write = 1;
3356 
3357 				alu.src[0].sel = V_SQ_ALU_SRC_0;
3358 				alu.src[1].sel = tmp0;
3359 				alu.src[1].chan = 2;
3360 
3361 				alu.last = 1;
3362 				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3363 					return r;
3364 
3365 				/* fix the quotient sign (same as the sign of src0*src1) */
3366 				/* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
3367 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3368 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3369 				alu.is_op3 = 1;
3370 
3371 				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3372 
3373 				alu.src[0].sel = tmp2;
3374 				alu.src[0].chan = 2;
3375 				alu.src[1].sel = tmp0;
3376 				alu.src[1].chan = 2;
3377 				alu.src[2].sel = tmp0;
3378 				alu.src[2].chan = 0;
3379 
3380 				alu.last = 1;
3381 				if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3382 					return r;
3383 			}
3384 		}
3385 	}
3386 	return 0;
3387 }
3388 
tgsi_udiv(struct r600_shader_ctx * ctx)3389 static int tgsi_udiv(struct r600_shader_ctx *ctx)
3390 {
3391 	return tgsi_divmod(ctx, 0, 0);
3392 }
3393 
tgsi_umod(struct r600_shader_ctx * ctx)3394 static int tgsi_umod(struct r600_shader_ctx *ctx)
3395 {
3396 	return tgsi_divmod(ctx, 1, 0);
3397 }
3398 
tgsi_idiv(struct r600_shader_ctx * ctx)3399 static int tgsi_idiv(struct r600_shader_ctx *ctx)
3400 {
3401 	return tgsi_divmod(ctx, 0, 1);
3402 }
3403 
tgsi_imod(struct r600_shader_ctx * ctx)3404 static int tgsi_imod(struct r600_shader_ctx *ctx)
3405 {
3406 	return tgsi_divmod(ctx, 1, 1);
3407 }
3408 
3409 
tgsi_f2i(struct r600_shader_ctx * ctx)3410 static int tgsi_f2i(struct r600_shader_ctx *ctx)
3411 {
3412 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3413 	struct r600_bytecode_alu alu;
3414 	int i, r;
3415 	unsigned write_mask = inst->Dst[0].Register.WriteMask;
3416 	int last_inst = tgsi_last_instruction(write_mask);
3417 
3418 	for (i = 0; i < 4; i++) {
3419 		if (!(write_mask & (1<<i)))
3420 			continue;
3421 
3422 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3423 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
3424 
3425 		alu.dst.sel = ctx->temp_reg;
3426 		alu.dst.chan = i;
3427 		alu.dst.write = 1;
3428 
3429 		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3430 		if (i == last_inst)
3431 			alu.last = 1;
3432 		r = r600_bytecode_add_alu(ctx->bc, &alu);
3433 		if (r)
3434 			return r;
3435 	}
3436 
3437 	for (i = 0; i < 4; i++) {
3438 		if (!(write_mask & (1<<i)))
3439 			continue;
3440 
3441 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3442 		alu.inst = ctx->inst_info->r600_opcode;
3443 
3444 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3445 
3446 		alu.src[0].sel = ctx->temp_reg;
3447 		alu.src[0].chan = i;
3448 
3449 		if (i == last_inst || alu.inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT)
3450 			alu.last = 1;
3451 		r = r600_bytecode_add_alu(ctx->bc, &alu);
3452 		if (r)
3453 			return r;
3454 	}
3455 
3456 	return 0;
3457 }
3458 
tgsi_iabs(struct r600_shader_ctx * ctx)3459 static int tgsi_iabs(struct r600_shader_ctx *ctx)
3460 {
3461 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3462 	struct r600_bytecode_alu alu;
3463 	int i, r;
3464 	unsigned write_mask = inst->Dst[0].Register.WriteMask;
3465 	int last_inst = tgsi_last_instruction(write_mask);
3466 
3467 	/* tmp = -src */
3468 	for (i = 0; i < 4; i++) {
3469 		if (!(write_mask & (1<<i)))
3470 			continue;
3471 
3472 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3473 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
3474 
3475 		alu.dst.sel = ctx->temp_reg;
3476 		alu.dst.chan = i;
3477 		alu.dst.write = 1;
3478 
3479 		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3480 		alu.src[0].sel = V_SQ_ALU_SRC_0;
3481 
3482 		if (i == last_inst)
3483 			alu.last = 1;
3484 		r = r600_bytecode_add_alu(ctx->bc, &alu);
3485 		if (r)
3486 			return r;
3487 	}
3488 
3489 	/* dst = (src >= 0 ? src : tmp) */
3490 	for (i = 0; i < 4; i++) {
3491 		if (!(write_mask & (1<<i)))
3492 			continue;
3493 
3494 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3495 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3496 		alu.is_op3 = 1;
3497 		alu.dst.write = 1;
3498 
3499 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3500 
3501 		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3502 		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3503 		alu.src[2].sel = ctx->temp_reg;
3504 		alu.src[2].chan = i;
3505 
3506 		if (i == last_inst)
3507 			alu.last = 1;
3508 		r = r600_bytecode_add_alu(ctx->bc, &alu);
3509 		if (r)
3510 			return r;
3511 	}
3512 	return 0;
3513 }
3514 
tgsi_issg(struct r600_shader_ctx * ctx)3515 static int tgsi_issg(struct r600_shader_ctx *ctx)
3516 {
3517 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3518 	struct r600_bytecode_alu alu;
3519 	int i, r;
3520 	unsigned write_mask = inst->Dst[0].Register.WriteMask;
3521 	int last_inst = tgsi_last_instruction(write_mask);
3522 
3523 	/* tmp = (src >= 0 ? src : -1) */
3524 	for (i = 0; i < 4; i++) {
3525 		if (!(write_mask & (1<<i)))
3526 			continue;
3527 
3528 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3529 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
3530 		alu.is_op3 = 1;
3531 
3532 		alu.dst.sel = ctx->temp_reg;
3533 		alu.dst.chan = i;
3534 		alu.dst.write = 1;
3535 
3536 		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3537 		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3538 		alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
3539 
3540 		if (i == last_inst)
3541 			alu.last = 1;
3542 		r = r600_bytecode_add_alu(ctx->bc, &alu);
3543 		if (r)
3544 			return r;
3545 	}
3546 
3547 	/* dst = (tmp > 0 ? 1 : tmp) */
3548 	for (i = 0; i < 4; i++) {
3549 		if (!(write_mask & (1<<i)))
3550 			continue;
3551 
3552 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3553 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT);
3554 		alu.is_op3 = 1;
3555 		alu.dst.write = 1;
3556 
3557 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3558 
3559 		alu.src[0].sel = ctx->temp_reg;
3560 		alu.src[0].chan = i;
3561 
3562 		alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
3563 
3564 		alu.src[2].sel = ctx->temp_reg;
3565 		alu.src[2].chan = i;
3566 
3567 		if (i == last_inst)
3568 			alu.last = 1;
3569 		r = r600_bytecode_add_alu(ctx->bc, &alu);
3570 		if (r)
3571 			return r;
3572 	}
3573 	return 0;
3574 }
3575 
3576 
3577 
tgsi_ssg(struct r600_shader_ctx * ctx)3578 static int tgsi_ssg(struct r600_shader_ctx *ctx)
3579 {
3580 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3581 	struct r600_bytecode_alu alu;
3582 	int i, r;
3583 
3584 	/* tmp = (src > 0 ? 1 : src) */
3585 	for (i = 0; i < 4; i++) {
3586 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3587 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3588 		alu.is_op3 = 1;
3589 
3590 		alu.dst.sel = ctx->temp_reg;
3591 		alu.dst.chan = i;
3592 
3593 		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3594 		alu.src[1].sel = V_SQ_ALU_SRC_1;
3595 		r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
3596 
3597 		if (i == 3)
3598 			alu.last = 1;
3599 		r = r600_bytecode_add_alu(ctx->bc, &alu);
3600 		if (r)
3601 			return r;
3602 	}
3603 
3604 	/* dst = (-tmp > 0 ? -1 : tmp) */
3605 	for (i = 0; i < 4; i++) {
3606 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3607 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
3608 		alu.is_op3 = 1;
3609 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3610 
3611 		alu.src[0].sel = ctx->temp_reg;
3612 		alu.src[0].chan = i;
3613 		alu.src[0].neg = 1;
3614 
3615 		alu.src[1].sel = V_SQ_ALU_SRC_1;
3616 		alu.src[1].neg = 1;
3617 
3618 		alu.src[2].sel = ctx->temp_reg;
3619 		alu.src[2].chan = i;
3620 
3621 		if (i == 3)
3622 			alu.last = 1;
3623 		r = r600_bytecode_add_alu(ctx->bc, &alu);
3624 		if (r)
3625 			return r;
3626 	}
3627 	return 0;
3628 }
3629 
tgsi_helper_copy(struct r600_shader_ctx * ctx,struct tgsi_full_instruction * inst)3630 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
3631 {
3632 	struct r600_bytecode_alu alu;
3633 	int i, r;
3634 
3635 	for (i = 0; i < 4; i++) {
3636 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3637 		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
3638 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
3639 			alu.dst.chan = i;
3640 		} else {
3641 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3642 			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3643 			alu.src[0].sel = ctx->temp_reg;
3644 			alu.src[0].chan = i;
3645 		}
3646 		if (i == 3) {
3647 			alu.last = 1;
3648 		}
3649 		r = r600_bytecode_add_alu(ctx->bc, &alu);
3650 		if (r)
3651 			return r;
3652 	}
3653 	return 0;
3654 }
3655 
tgsi_op3(struct r600_shader_ctx * ctx)3656 static int tgsi_op3(struct r600_shader_ctx *ctx)
3657 {
3658 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3659 	struct r600_bytecode_alu alu;
3660 	int i, j, r;
3661 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3662 
3663 	for (i = 0; i < lasti + 1; i++) {
3664 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3665 			continue;
3666 
3667 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3668 		alu.inst = ctx->inst_info->r600_opcode;
3669 		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3670 			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3671 		}
3672 
3673 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3674 		alu.dst.chan = i;
3675 		alu.dst.write = 1;
3676 		alu.is_op3 = 1;
3677 		if (i == lasti) {
3678 			alu.last = 1;
3679 		}
3680 		r = r600_bytecode_add_alu(ctx->bc, &alu);
3681 		if (r)
3682 			return r;
3683 	}
3684 	return 0;
3685 }
3686 
tgsi_dp(struct r600_shader_ctx * ctx)3687 static int tgsi_dp(struct r600_shader_ctx *ctx)
3688 {
3689 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3690 	struct r600_bytecode_alu alu;
3691 	int i, j, r;
3692 
3693 	for (i = 0; i < 4; i++) {
3694 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3695 		alu.inst = ctx->inst_info->r600_opcode;
3696 		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
3697 			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3698 		}
3699 
3700 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3701 		alu.dst.chan = i;
3702 		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
3703 		/* handle some special cases */
3704 		switch (ctx->inst_info->tgsi_opcode) {
3705 		case TGSI_OPCODE_DP2:
3706 			if (i > 1) {
3707 				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3708 				alu.src[0].chan = alu.src[1].chan = 0;
3709 			}
3710 			break;
3711 		case TGSI_OPCODE_DP3:
3712 			if (i > 2) {
3713 				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
3714 				alu.src[0].chan = alu.src[1].chan = 0;
3715 			}
3716 			break;
3717 		case TGSI_OPCODE_DPH:
3718 			if (i == 3) {
3719 				alu.src[0].sel = V_SQ_ALU_SRC_1;
3720 				alu.src[0].chan = 0;
3721 				alu.src[0].neg = 0;
3722 			}
3723 			break;
3724 		default:
3725 			break;
3726 		}
3727 		if (i == 3) {
3728 			alu.last = 1;
3729 		}
3730 		r = r600_bytecode_add_alu(ctx->bc, &alu);
3731 		if (r)
3732 			return r;
3733 	}
3734 	return 0;
3735 }
3736 
tgsi_tex_src_requires_loading(struct r600_shader_ctx * ctx,unsigned index)3737 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
3738 						    unsigned index)
3739 {
3740 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3741 	return 	(inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
3742 		inst->Src[index].Register.File != TGSI_FILE_INPUT &&
3743 		inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
3744 		ctx->src[index].neg || ctx->src[index].abs;
3745 }
3746 
tgsi_tex_get_src_gpr(struct r600_shader_ctx * ctx,unsigned index)3747 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
3748 					unsigned index)
3749 {
3750 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3751 	return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
3752 }
3753 
tgsi_tex(struct r600_shader_ctx * ctx)3754 static int tgsi_tex(struct r600_shader_ctx *ctx)
3755 {
3756 	static float one_point_five = 1.5f;
3757 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3758 	struct r600_bytecode_tex tex;
3759 	struct r600_bytecode_alu alu;
3760 	unsigned src_gpr;
3761 	int r, i, j;
3762 	int opcode;
3763 	/* Texture fetch instructions can only use gprs as source.
3764 	 * Also they cannot negate the source or take the absolute value */
3765 	const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
3766                                              tgsi_tex_src_requires_loading(ctx, 0);
3767 	boolean src_loaded = FALSE;
3768 	unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
3769 	uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
3770 
3771 	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
3772 
3773 	if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
3774 		/* get offset values */
3775 		if (inst->Texture.NumOffsets) {
3776 			assert(inst->Texture.NumOffsets == 1);
3777 
3778 			offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
3779 			offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
3780 			offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
3781 		}
3782 	} else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
3783 		/* TGSI moves the sampler to src reg 3 for TXD */
3784 		sampler_src_reg = 3;
3785 
3786 		for (i = 1; i < 3; i++) {
3787 			/* set gradients h/v */
3788 			memset(&tex, 0, sizeof(struct r600_bytecode_tex));
3789 			tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
3790 				SQ_TEX_INST_SET_GRADIENTS_V;
3791 			tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
3792 			tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
3793 
3794 			if (tgsi_tex_src_requires_loading(ctx, i)) {
3795 				tex.src_gpr = r600_get_temp(ctx);
3796 				tex.src_sel_x = 0;
3797 				tex.src_sel_y = 1;
3798 				tex.src_sel_z = 2;
3799 				tex.src_sel_w = 3;
3800 
3801 				for (j = 0; j < 4; j++) {
3802 					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3803 					alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3804                                         r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
3805                                         alu.dst.sel = tex.src_gpr;
3806                                         alu.dst.chan = j;
3807                                         if (j == 3)
3808                                                 alu.last = 1;
3809                                         alu.dst.write = 1;
3810                                         r = r600_bytecode_add_alu(ctx->bc, &alu);
3811                                         if (r)
3812                                                 return r;
3813 				}
3814 
3815 			} else {
3816 				tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
3817 				tex.src_sel_x = ctx->src[i].swizzle[0];
3818 				tex.src_sel_y = ctx->src[i].swizzle[1];
3819 				tex.src_sel_z = ctx->src[i].swizzle[2];
3820 				tex.src_sel_w = ctx->src[i].swizzle[3];
3821 				tex.src_rel = ctx->src[i].rel;
3822 			}
3823 			tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
3824 			tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
3825 			if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
3826 				tex.coord_type_x = 1;
3827 				tex.coord_type_y = 1;
3828 				tex.coord_type_z = 1;
3829 				tex.coord_type_w = 1;
3830 			}
3831 			r = r600_bytecode_add_tex(ctx->bc, &tex);
3832 			if (r)
3833 				return r;
3834 		}
3835 	} else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
3836 		int out_chan;
3837 		/* Add perspective divide */
3838 		if (ctx->bc->chip_class == CAYMAN) {
3839 			out_chan = 2;
3840 			for (i = 0; i < 3; i++) {
3841 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3842 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3843 				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3844 
3845 				alu.dst.sel = ctx->temp_reg;
3846 				alu.dst.chan = i;
3847 				if (i == 2)
3848 					alu.last = 1;
3849 				if (out_chan == i)
3850 					alu.dst.write = 1;
3851 				r = r600_bytecode_add_alu(ctx->bc, &alu);
3852 				if (r)
3853 					return r;
3854 			}
3855 
3856 		} else {
3857 			out_chan = 3;
3858 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3859 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3860 			r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
3861 
3862 			alu.dst.sel = ctx->temp_reg;
3863 			alu.dst.chan = out_chan;
3864 			alu.last = 1;
3865 			alu.dst.write = 1;
3866 			r = r600_bytecode_add_alu(ctx->bc, &alu);
3867 			if (r)
3868 				return r;
3869 		}
3870 
3871 		for (i = 0; i < 3; i++) {
3872 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3873 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3874 			alu.src[0].sel = ctx->temp_reg;
3875 			alu.src[0].chan = out_chan;
3876 			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
3877 			alu.dst.sel = ctx->temp_reg;
3878 			alu.dst.chan = i;
3879 			alu.dst.write = 1;
3880 			r = r600_bytecode_add_alu(ctx->bc, &alu);
3881 			if (r)
3882 				return r;
3883 		}
3884 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3885 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3886 		alu.src[0].sel = V_SQ_ALU_SRC_1;
3887 		alu.src[0].chan = 0;
3888 		alu.dst.sel = ctx->temp_reg;
3889 		alu.dst.chan = 3;
3890 		alu.last = 1;
3891 		alu.dst.write = 1;
3892 		r = r600_bytecode_add_alu(ctx->bc, &alu);
3893 		if (r)
3894 			return r;
3895 		src_loaded = TRUE;
3896 		src_gpr = ctx->temp_reg;
3897 	}
3898 
3899 	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
3900 	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
3901 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
3902 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
3903 
3904 		static const unsigned src0_swizzle[] = {2, 2, 0, 1};
3905 		static const unsigned src1_swizzle[] = {1, 0, 2, 2};
3906 
3907 		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3908 		for (i = 0; i < 4; i++) {
3909 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3910 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
3911 			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
3912 			r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
3913 			alu.dst.sel = ctx->temp_reg;
3914 			alu.dst.chan = i;
3915 			if (i == 3)
3916 				alu.last = 1;
3917 			alu.dst.write = 1;
3918 			r = r600_bytecode_add_alu(ctx->bc, &alu);
3919 			if (r)
3920 				return r;
3921 		}
3922 
3923 		/* tmp1.z = RCP_e(|tmp1.z|) */
3924 		if (ctx->bc->chip_class == CAYMAN) {
3925 			for (i = 0; i < 3; i++) {
3926 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3927 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3928 				alu.src[0].sel = ctx->temp_reg;
3929 				alu.src[0].chan = 2;
3930 				alu.src[0].abs = 1;
3931 				alu.dst.sel = ctx->temp_reg;
3932 				alu.dst.chan = i;
3933 				if (i == 2)
3934 					alu.dst.write = 1;
3935 				if (i == 2)
3936 					alu.last = 1;
3937 				r = r600_bytecode_add_alu(ctx->bc, &alu);
3938 				if (r)
3939 					return r;
3940 			}
3941 		} else {
3942 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3943 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3944 			alu.src[0].sel = ctx->temp_reg;
3945 			alu.src[0].chan = 2;
3946 			alu.src[0].abs = 1;
3947 			alu.dst.sel = ctx->temp_reg;
3948 			alu.dst.chan = 2;
3949 			alu.dst.write = 1;
3950 			alu.last = 1;
3951 			r = r600_bytecode_add_alu(ctx->bc, &alu);
3952 			if (r)
3953 				return r;
3954 		}
3955 
3956 		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
3957 		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
3958 		 * muladd has no writemask, have to use another temp
3959 		 */
3960 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3961 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3962 		alu.is_op3 = 1;
3963 
3964 		alu.src[0].sel = ctx->temp_reg;
3965 		alu.src[0].chan = 0;
3966 		alu.src[1].sel = ctx->temp_reg;
3967 		alu.src[1].chan = 2;
3968 
3969 		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3970 		alu.src[2].chan = 0;
3971 		alu.src[2].value = *(uint32_t *)&one_point_five;
3972 
3973 		alu.dst.sel = ctx->temp_reg;
3974 		alu.dst.chan = 0;
3975 		alu.dst.write = 1;
3976 
3977 		r = r600_bytecode_add_alu(ctx->bc, &alu);
3978 		if (r)
3979 			return r;
3980 
3981 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3982 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
3983 		alu.is_op3 = 1;
3984 
3985 		alu.src[0].sel = ctx->temp_reg;
3986 		alu.src[0].chan = 1;
3987 		alu.src[1].sel = ctx->temp_reg;
3988 		alu.src[1].chan = 2;
3989 
3990 		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
3991 		alu.src[2].chan = 0;
3992 		alu.src[2].value = *(uint32_t *)&one_point_five;
3993 
3994 		alu.dst.sel = ctx->temp_reg;
3995 		alu.dst.chan = 1;
3996 		alu.dst.write = 1;
3997 
3998 		alu.last = 1;
3999 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4000 		if (r)
4001 			return r;
4002 		/* write initial W value into Z component */
4003 		if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
4004 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4005 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4006 			r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
4007 			alu.dst.sel = ctx->temp_reg;
4008 			alu.dst.chan = 2;
4009 			alu.dst.write = 1;
4010 			alu.last = 1;
4011 			r = r600_bytecode_add_alu(ctx->bc, &alu);
4012 			if (r)
4013 				return r;
4014 		}
4015 
4016 		/* for cube forms of lod and bias we need to route the lod
4017 		   value into Z */
4018 		if (inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
4019 		    inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
4020 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4021 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4022 			r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
4023 			alu.dst.sel = ctx->temp_reg;
4024 			alu.dst.chan = 2;
4025 			alu.last = 1;
4026 			alu.dst.write = 1;
4027 			r = r600_bytecode_add_alu(ctx->bc, &alu);
4028 			if (r)
4029 				return r;
4030 		}
4031 
4032 		src_loaded = TRUE;
4033 		src_gpr = ctx->temp_reg;
4034 	}
4035 
4036 	if (src_requires_loading && !src_loaded) {
4037 		for (i = 0; i < 4; i++) {
4038 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4039 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4040 			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4041 			alu.dst.sel = ctx->temp_reg;
4042 			alu.dst.chan = i;
4043 			if (i == 3)
4044 				alu.last = 1;
4045 			alu.dst.write = 1;
4046 			r = r600_bytecode_add_alu(ctx->bc, &alu);
4047 			if (r)
4048 				return r;
4049 		}
4050 		src_loaded = TRUE;
4051 		src_gpr = ctx->temp_reg;
4052 	}
4053 
4054 	opcode = ctx->inst_info->r600_opcode;
4055 	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4056 	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4057 	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4058 	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
4059 	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
4060 	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
4061 		switch (opcode) {
4062 		case SQ_TEX_INST_SAMPLE:
4063 			opcode = SQ_TEX_INST_SAMPLE_C;
4064 			break;
4065 		case SQ_TEX_INST_SAMPLE_L:
4066 			opcode = SQ_TEX_INST_SAMPLE_C_L;
4067 			break;
4068 		case SQ_TEX_INST_SAMPLE_LB:
4069 			opcode = SQ_TEX_INST_SAMPLE_C_LB;
4070 			break;
4071 		case SQ_TEX_INST_SAMPLE_G:
4072 			opcode = SQ_TEX_INST_SAMPLE_C_G;
4073 			break;
4074 		}
4075 	}
4076 
4077 	memset(&tex, 0, sizeof(struct r600_bytecode_tex));
4078 	tex.inst = opcode;
4079 
4080 	tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
4081 	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
4082 	tex.src_gpr = src_gpr;
4083 	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
4084 	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
4085 	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
4086 	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
4087 	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
4088 
4089 	if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
4090 		tex.src_sel_x = 4;
4091 		tex.src_sel_y = 4;
4092 		tex.src_sel_z = 4;
4093 		tex.src_sel_w = 4;
4094 	} else if (src_loaded) {
4095 		tex.src_sel_x = 0;
4096 		tex.src_sel_y = 1;
4097 		tex.src_sel_z = 2;
4098 		tex.src_sel_w = 3;
4099 	} else {
4100 		tex.src_sel_x = ctx->src[0].swizzle[0];
4101 		tex.src_sel_y = ctx->src[0].swizzle[1];
4102 		tex.src_sel_z = ctx->src[0].swizzle[2];
4103 		tex.src_sel_w = ctx->src[0].swizzle[3];
4104 		tex.src_rel = ctx->src[0].rel;
4105 	}
4106 
4107 	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
4108 	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
4109 		tex.src_sel_x = 1;
4110 		tex.src_sel_y = 0;
4111 		tex.src_sel_z = 3;
4112 		tex.src_sel_w = 2; /* route Z compare or Lod value into W */
4113 	}
4114 
4115 	if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
4116 	    inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
4117 		tex.coord_type_x = 1;
4118 		tex.coord_type_y = 1;
4119 	}
4120 	tex.coord_type_z = 1;
4121 	tex.coord_type_w = 1;
4122 
4123 	tex.offset_x = offset_x;
4124 	tex.offset_y = offset_y;
4125 	tex.offset_z = offset_z;
4126 
4127 	/* Put the depth for comparison in W.
4128 	 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
4129 	 * Some instructions expect the depth in Z. */
4130 	if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4131 	     inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
4132 	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
4133 	     inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
4134 	    opcode != SQ_TEX_INST_SAMPLE_C_L &&
4135 	    opcode != SQ_TEX_INST_SAMPLE_C_LB) {
4136 		tex.src_sel_w = tex.src_sel_z;
4137 	}
4138 
4139 	if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
4140 	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
4141 		if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
4142 		    opcode == SQ_TEX_INST_SAMPLE_C_LB) {
4143 			/* the array index is read from Y */
4144 			tex.coord_type_y = 0;
4145 		} else {
4146 			/* the array index is read from Z */
4147 			tex.coord_type_z = 0;
4148 			tex.src_sel_z = tex.src_sel_y;
4149 		}
4150 	} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
4151 		   inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
4152 		/* the array index is read from Z */
4153 		tex.coord_type_z = 0;
4154 
4155 	r = r600_bytecode_add_tex(ctx->bc, &tex);
4156 	if (r)
4157 		return r;
4158 
4159 	/* add shadow ambient support  - gallium doesn't do it yet */
4160 	return 0;
4161 }
4162 
tgsi_lrp(struct r600_shader_ctx * ctx)4163 static int tgsi_lrp(struct r600_shader_ctx *ctx)
4164 {
4165 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4166 	struct r600_bytecode_alu alu;
4167 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4168 	unsigned i;
4169 	int r;
4170 
4171 	/* optimize if it's just an equal balance */
4172 	if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
4173 		for (i = 0; i < lasti + 1; i++) {
4174 			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4175 				continue;
4176 
4177 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4178 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4179 			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
4180 			r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4181 			alu.omod = 3;
4182 			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4183 			alu.dst.chan = i;
4184 			if (i == lasti) {
4185 				alu.last = 1;
4186 			}
4187 			r = r600_bytecode_add_alu(ctx->bc, &alu);
4188 			if (r)
4189 				return r;
4190 		}
4191 		return 0;
4192 	}
4193 
4194 	/* 1 - src0 */
4195 	for (i = 0; i < lasti + 1; i++) {
4196 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4197 			continue;
4198 
4199 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4200 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
4201 		alu.src[0].sel = V_SQ_ALU_SRC_1;
4202 		alu.src[0].chan = 0;
4203 		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
4204 		r600_bytecode_src_toggle_neg(&alu.src[1]);
4205 		alu.dst.sel = ctx->temp_reg;
4206 		alu.dst.chan = i;
4207 		if (i == lasti) {
4208 			alu.last = 1;
4209 		}
4210 		alu.dst.write = 1;
4211 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4212 		if (r)
4213 			return r;
4214 	}
4215 
4216 	/* (1 - src0) * src2 */
4217 	for (i = 0; i < lasti + 1; i++) {
4218 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4219 			continue;
4220 
4221 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4222 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4223 		alu.src[0].sel = ctx->temp_reg;
4224 		alu.src[0].chan = i;
4225 		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4226 		alu.dst.sel = ctx->temp_reg;
4227 		alu.dst.chan = i;
4228 		if (i == lasti) {
4229 			alu.last = 1;
4230 		}
4231 		alu.dst.write = 1;
4232 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4233 		if (r)
4234 			return r;
4235 	}
4236 
4237 	/* src0 * src1 + (1 - src0) * src2 */
4238 	for (i = 0; i < lasti + 1; i++) {
4239 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4240 			continue;
4241 
4242 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4243 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4244 		alu.is_op3 = 1;
4245 		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4246 		r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4247 		alu.src[2].sel = ctx->temp_reg;
4248 		alu.src[2].chan = i;
4249 
4250 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4251 		alu.dst.chan = i;
4252 		if (i == lasti) {
4253 			alu.last = 1;
4254 		}
4255 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4256 		if (r)
4257 			return r;
4258 	}
4259 	return 0;
4260 }
4261 
tgsi_cmp(struct r600_shader_ctx * ctx)4262 static int tgsi_cmp(struct r600_shader_ctx *ctx)
4263 {
4264 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4265 	struct r600_bytecode_alu alu;
4266 	int i, r;
4267 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
4268 
4269 	for (i = 0; i < lasti + 1; i++) {
4270 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
4271 			continue;
4272 
4273 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4274 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
4275 		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4276 		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
4277 		r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
4278 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4279 		alu.dst.chan = i;
4280 		alu.dst.write = 1;
4281 		alu.is_op3 = 1;
4282 		if (i == lasti)
4283 			alu.last = 1;
4284 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4285 		if (r)
4286 			return r;
4287 	}
4288 	return 0;
4289 }
4290 
tgsi_xpd(struct r600_shader_ctx * ctx)4291 static int tgsi_xpd(struct r600_shader_ctx *ctx)
4292 {
4293 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4294 	static const unsigned int src0_swizzle[] = {2, 0, 1};
4295 	static const unsigned int src1_swizzle[] = {1, 2, 0};
4296 	struct r600_bytecode_alu alu;
4297 	uint32_t use_temp = 0;
4298 	int i, r;
4299 
4300 	if (inst->Dst[0].Register.WriteMask != 0xf)
4301 		use_temp = 1;
4302 
4303 	for (i = 0; i < 4; i++) {
4304 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4305 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4306 		if (i < 3) {
4307 			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
4308 			r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
4309 		} else {
4310 			alu.src[0].sel = V_SQ_ALU_SRC_0;
4311 			alu.src[0].chan = i;
4312 			alu.src[1].sel = V_SQ_ALU_SRC_0;
4313 			alu.src[1].chan = i;
4314 		}
4315 
4316 		alu.dst.sel = ctx->temp_reg;
4317 		alu.dst.chan = i;
4318 		alu.dst.write = 1;
4319 
4320 		if (i == 3)
4321 			alu.last = 1;
4322 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4323 		if (r)
4324 			return r;
4325 	}
4326 
4327 	for (i = 0; i < 4; i++) {
4328 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4329 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
4330 
4331 		if (i < 3) {
4332 			r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
4333 			r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
4334 		} else {
4335 			alu.src[0].sel = V_SQ_ALU_SRC_0;
4336 			alu.src[0].chan = i;
4337 			alu.src[1].sel = V_SQ_ALU_SRC_0;
4338 			alu.src[1].chan = i;
4339 		}
4340 
4341 		alu.src[2].sel = ctx->temp_reg;
4342 		alu.src[2].neg = 1;
4343 		alu.src[2].chan = i;
4344 
4345 		if (use_temp)
4346 			alu.dst.sel = ctx->temp_reg;
4347 		else
4348 			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4349 		alu.dst.chan = i;
4350 		alu.dst.write = 1;
4351 		alu.is_op3 = 1;
4352 		if (i == 3)
4353 			alu.last = 1;
4354 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4355 		if (r)
4356 			return r;
4357 	}
4358 	if (use_temp)
4359 		return tgsi_helper_copy(ctx, inst);
4360 	return 0;
4361 }
4362 
tgsi_exp(struct r600_shader_ctx * ctx)4363 static int tgsi_exp(struct r600_shader_ctx *ctx)
4364 {
4365 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4366 	struct r600_bytecode_alu alu;
4367 	int r;
4368 	int i;
4369 
4370 	/* result.x = 2^floor(src); */
4371 	if (inst->Dst[0].Register.WriteMask & 1) {
4372 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4373 
4374 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4375 		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4376 
4377 		alu.dst.sel = ctx->temp_reg;
4378 		alu.dst.chan = 0;
4379 		alu.dst.write = 1;
4380 		alu.last = 1;
4381 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4382 		if (r)
4383 			return r;
4384 
4385 		if (ctx->bc->chip_class == CAYMAN) {
4386 			for (i = 0; i < 3; i++) {
4387 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4388 				alu.src[0].sel = ctx->temp_reg;
4389 				alu.src[0].chan = 0;
4390 
4391 				alu.dst.sel = ctx->temp_reg;
4392 				alu.dst.chan = i;
4393 				alu.dst.write = i == 0;
4394 				alu.last = i == 2;
4395 				r = r600_bytecode_add_alu(ctx->bc, &alu);
4396 				if (r)
4397 					return r;
4398 			}
4399 		} else {
4400 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4401 			alu.src[0].sel = ctx->temp_reg;
4402 			alu.src[0].chan = 0;
4403 
4404 			alu.dst.sel = ctx->temp_reg;
4405 			alu.dst.chan = 0;
4406 			alu.dst.write = 1;
4407 			alu.last = 1;
4408 			r = r600_bytecode_add_alu(ctx->bc, &alu);
4409 			if (r)
4410 				return r;
4411 		}
4412 	}
4413 
4414 	/* result.y = tmp - floor(tmp); */
4415 	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4416 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4417 
4418 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
4419 		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4420 
4421 		alu.dst.sel = ctx->temp_reg;
4422 #if 0
4423 		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4424 		if (r)
4425 			return r;
4426 #endif
4427 		alu.dst.write = 1;
4428 		alu.dst.chan = 1;
4429 
4430 		alu.last = 1;
4431 
4432 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4433 		if (r)
4434 			return r;
4435 	}
4436 
4437 	/* result.z = RoughApprox2ToX(tmp);*/
4438 	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
4439 		if (ctx->bc->chip_class == CAYMAN) {
4440 			for (i = 0; i < 3; i++) {
4441 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4442 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4443 				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4444 
4445 				alu.dst.sel = ctx->temp_reg;
4446 				alu.dst.chan = i;
4447 				if (i == 2) {
4448 					alu.dst.write = 1;
4449 					alu.last = 1;
4450 				}
4451 
4452 				r = r600_bytecode_add_alu(ctx->bc, &alu);
4453 				if (r)
4454 					return r;
4455 			}
4456 		} else {
4457 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4458 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4459 			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4460 
4461 			alu.dst.sel = ctx->temp_reg;
4462 			alu.dst.write = 1;
4463 			alu.dst.chan = 2;
4464 
4465 			alu.last = 1;
4466 
4467 			r = r600_bytecode_add_alu(ctx->bc, &alu);
4468 			if (r)
4469 				return r;
4470 		}
4471 	}
4472 
4473 	/* result.w = 1.0;*/
4474 	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
4475 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4476 
4477 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4478 		alu.src[0].sel = V_SQ_ALU_SRC_1;
4479 		alu.src[0].chan = 0;
4480 
4481 		alu.dst.sel = ctx->temp_reg;
4482 		alu.dst.chan = 3;
4483 		alu.dst.write = 1;
4484 		alu.last = 1;
4485 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4486 		if (r)
4487 			return r;
4488 	}
4489 	return tgsi_helper_copy(ctx, inst);
4490 }
4491 
tgsi_log(struct r600_shader_ctx * ctx)4492 static int tgsi_log(struct r600_shader_ctx *ctx)
4493 {
4494 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4495 	struct r600_bytecode_alu alu;
4496 	int r;
4497 	int i;
4498 
4499 	/* result.x = floor(log2(|src|)); */
4500 	if (inst->Dst[0].Register.WriteMask & 1) {
4501 		if (ctx->bc->chip_class == CAYMAN) {
4502 			for (i = 0; i < 3; i++) {
4503 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4504 
4505 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4506 				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4507 				r600_bytecode_src_set_abs(&alu.src[0]);
4508 
4509 				alu.dst.sel = ctx->temp_reg;
4510 				alu.dst.chan = i;
4511 				if (i == 0)
4512 					alu.dst.write = 1;
4513 				if (i == 2)
4514 					alu.last = 1;
4515 				r = r600_bytecode_add_alu(ctx->bc, &alu);
4516 				if (r)
4517 					return r;
4518 			}
4519 
4520 		} else {
4521 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4522 
4523 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4524 			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4525 			r600_bytecode_src_set_abs(&alu.src[0]);
4526 
4527 			alu.dst.sel = ctx->temp_reg;
4528 			alu.dst.chan = 0;
4529 			alu.dst.write = 1;
4530 			alu.last = 1;
4531 			r = r600_bytecode_add_alu(ctx->bc, &alu);
4532 			if (r)
4533 				return r;
4534 		}
4535 
4536 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4537 		alu.src[0].sel = ctx->temp_reg;
4538 		alu.src[0].chan = 0;
4539 
4540 		alu.dst.sel = ctx->temp_reg;
4541 		alu.dst.chan = 0;
4542 		alu.dst.write = 1;
4543 		alu.last = 1;
4544 
4545 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4546 		if (r)
4547 			return r;
4548 	}
4549 
4550 	/* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
4551 	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
4552 
4553 		if (ctx->bc->chip_class == CAYMAN) {
4554 			for (i = 0; i < 3; i++) {
4555 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4556 
4557 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4558 				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4559 				r600_bytecode_src_set_abs(&alu.src[0]);
4560 
4561 				alu.dst.sel = ctx->temp_reg;
4562 				alu.dst.chan = i;
4563 				if (i == 1)
4564 					alu.dst.write = 1;
4565 				if (i == 2)
4566 					alu.last = 1;
4567 
4568 				r = r600_bytecode_add_alu(ctx->bc, &alu);
4569 				if (r)
4570 					return r;
4571 			}
4572 		} else {
4573 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4574 
4575 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4576 			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4577 			r600_bytecode_src_set_abs(&alu.src[0]);
4578 
4579 			alu.dst.sel = ctx->temp_reg;
4580 			alu.dst.chan = 1;
4581 			alu.dst.write = 1;
4582 			alu.last = 1;
4583 
4584 			r = r600_bytecode_add_alu(ctx->bc, &alu);
4585 			if (r)
4586 				return r;
4587 		}
4588 
4589 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4590 
4591 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
4592 		alu.src[0].sel = ctx->temp_reg;
4593 		alu.src[0].chan = 1;
4594 
4595 		alu.dst.sel = ctx->temp_reg;
4596 		alu.dst.chan = 1;
4597 		alu.dst.write = 1;
4598 		alu.last = 1;
4599 
4600 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4601 		if (r)
4602 			return r;
4603 
4604 		if (ctx->bc->chip_class == CAYMAN) {
4605 			for (i = 0; i < 3; i++) {
4606 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4607 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4608 				alu.src[0].sel = ctx->temp_reg;
4609 				alu.src[0].chan = 1;
4610 
4611 				alu.dst.sel = ctx->temp_reg;
4612 				alu.dst.chan = i;
4613 				if (i == 1)
4614 					alu.dst.write = 1;
4615 				if (i == 2)
4616 					alu.last = 1;
4617 
4618 				r = r600_bytecode_add_alu(ctx->bc, &alu);
4619 				if (r)
4620 					return r;
4621 			}
4622 		} else {
4623 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4624 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
4625 			alu.src[0].sel = ctx->temp_reg;
4626 			alu.src[0].chan = 1;
4627 
4628 			alu.dst.sel = ctx->temp_reg;
4629 			alu.dst.chan = 1;
4630 			alu.dst.write = 1;
4631 			alu.last = 1;
4632 
4633 			r = r600_bytecode_add_alu(ctx->bc, &alu);
4634 			if (r)
4635 				return r;
4636 		}
4637 
4638 		if (ctx->bc->chip_class == CAYMAN) {
4639 			for (i = 0; i < 3; i++) {
4640 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4641 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4642 				alu.src[0].sel = ctx->temp_reg;
4643 				alu.src[0].chan = 1;
4644 
4645 				alu.dst.sel = ctx->temp_reg;
4646 				alu.dst.chan = i;
4647 				if (i == 1)
4648 					alu.dst.write = 1;
4649 				if (i == 2)
4650 					alu.last = 1;
4651 
4652 				r = r600_bytecode_add_alu(ctx->bc, &alu);
4653 				if (r)
4654 					return r;
4655 			}
4656 		} else {
4657 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4658 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
4659 			alu.src[0].sel = ctx->temp_reg;
4660 			alu.src[0].chan = 1;
4661 
4662 			alu.dst.sel = ctx->temp_reg;
4663 			alu.dst.chan = 1;
4664 			alu.dst.write = 1;
4665 			alu.last = 1;
4666 
4667 			r = r600_bytecode_add_alu(ctx->bc, &alu);
4668 			if (r)
4669 				return r;
4670 		}
4671 
4672 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4673 
4674 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4675 
4676 		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4677 		r600_bytecode_src_set_abs(&alu.src[0]);
4678 
4679 		alu.src[1].sel = ctx->temp_reg;
4680 		alu.src[1].chan = 1;
4681 
4682 		alu.dst.sel = ctx->temp_reg;
4683 		alu.dst.chan = 1;
4684 		alu.dst.write = 1;
4685 		alu.last = 1;
4686 
4687 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4688 		if (r)
4689 			return r;
4690 	}
4691 
4692 	/* result.z = log2(|src|);*/
4693 	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
4694 		if (ctx->bc->chip_class == CAYMAN) {
4695 			for (i = 0; i < 3; i++) {
4696 				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4697 
4698 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4699 				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4700 				r600_bytecode_src_set_abs(&alu.src[0]);
4701 
4702 				alu.dst.sel = ctx->temp_reg;
4703 				if (i == 2)
4704 					alu.dst.write = 1;
4705 				alu.dst.chan = i;
4706 				if (i == 2)
4707 					alu.last = 1;
4708 
4709 				r = r600_bytecode_add_alu(ctx->bc, &alu);
4710 				if (r)
4711 					return r;
4712 			}
4713 		} else {
4714 			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4715 
4716 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
4717 			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4718 			r600_bytecode_src_set_abs(&alu.src[0]);
4719 
4720 			alu.dst.sel = ctx->temp_reg;
4721 			alu.dst.write = 1;
4722 			alu.dst.chan = 2;
4723 			alu.last = 1;
4724 
4725 			r = r600_bytecode_add_alu(ctx->bc, &alu);
4726 			if (r)
4727 				return r;
4728 		}
4729 	}
4730 
4731 	/* result.w = 1.0; */
4732 	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
4733 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4734 
4735 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
4736 		alu.src[0].sel = V_SQ_ALU_SRC_1;
4737 		alu.src[0].chan = 0;
4738 
4739 		alu.dst.sel = ctx->temp_reg;
4740 		alu.dst.chan = 3;
4741 		alu.dst.write = 1;
4742 		alu.last = 1;
4743 
4744 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4745 		if (r)
4746 			return r;
4747 	}
4748 
4749 	return tgsi_helper_copy(ctx, inst);
4750 }
4751 
tgsi_eg_arl(struct r600_shader_ctx * ctx)4752 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
4753 {
4754 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4755 	struct r600_bytecode_alu alu;
4756 	int r;
4757 
4758 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4759 
4760 	switch (inst->Instruction.Opcode) {
4761 	case TGSI_OPCODE_ARL:
4762 		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
4763 		break;
4764 	case TGSI_OPCODE_ARR:
4765 		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4766 		break;
4767 	case TGSI_OPCODE_UARL:
4768 		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4769 		break;
4770 	default:
4771 		assert(0);
4772 		return -1;
4773 	}
4774 
4775 	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4776 	alu.last = 1;
4777 	alu.dst.sel = ctx->bc->ar_reg;
4778 	alu.dst.write = 1;
4779 	r = r600_bytecode_add_alu(ctx->bc, &alu);
4780 	if (r)
4781 		return r;
4782 
4783 	ctx->bc->ar_loaded = 0;
4784 	return 0;
4785 }
tgsi_r600_arl(struct r600_shader_ctx * ctx)4786 static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
4787 {
4788 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4789 	struct r600_bytecode_alu alu;
4790 	int r;
4791 
4792 	switch (inst->Instruction.Opcode) {
4793 	case TGSI_OPCODE_ARL:
4794 		memset(&alu, 0, sizeof(alu));
4795 		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
4796 		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4797 		alu.dst.sel = ctx->bc->ar_reg;
4798 		alu.dst.write = 1;
4799 		alu.last = 1;
4800 
4801 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4802 			return r;
4803 
4804 		memset(&alu, 0, sizeof(alu));
4805 		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4806 		alu.src[0].sel = ctx->bc->ar_reg;
4807 		alu.dst.sel = ctx->bc->ar_reg;
4808 		alu.dst.write = 1;
4809 		alu.last = 1;
4810 
4811 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4812 			return r;
4813 		break;
4814 	case TGSI_OPCODE_ARR:
4815 		memset(&alu, 0, sizeof(alu));
4816 		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
4817 		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4818 		alu.dst.sel = ctx->bc->ar_reg;
4819 		alu.dst.write = 1;
4820 		alu.last = 1;
4821 
4822 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4823 			return r;
4824 		break;
4825 	case TGSI_OPCODE_UARL:
4826 		memset(&alu, 0, sizeof(alu));
4827 		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
4828 		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4829 		alu.dst.sel = ctx->bc->ar_reg;
4830 		alu.dst.write = 1;
4831 		alu.last = 1;
4832 
4833 		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
4834 			return r;
4835 		break;
4836 	default:
4837 		assert(0);
4838 		return -1;
4839 	}
4840 
4841 	ctx->bc->ar_loaded = 0;
4842 	return 0;
4843 }
4844 
tgsi_opdst(struct r600_shader_ctx * ctx)4845 static int tgsi_opdst(struct r600_shader_ctx *ctx)
4846 {
4847 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
4848 	struct r600_bytecode_alu alu;
4849 	int i, r = 0;
4850 
4851 	for (i = 0; i < 4; i++) {
4852 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4853 
4854 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
4855 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
4856 
4857 		if (i == 0 || i == 3) {
4858 			alu.src[0].sel = V_SQ_ALU_SRC_1;
4859 		} else {
4860 			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
4861 		}
4862 
4863 		if (i == 0 || i == 2) {
4864 			alu.src[1].sel = V_SQ_ALU_SRC_1;
4865 		} else {
4866 			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
4867 		}
4868 		if (i == 3)
4869 			alu.last = 1;
4870 		r = r600_bytecode_add_alu(ctx->bc, &alu);
4871 		if (r)
4872 			return r;
4873 	}
4874 	return 0;
4875 }
4876 
emit_logic_pred(struct r600_shader_ctx * ctx,int opcode)4877 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
4878 {
4879 	struct r600_bytecode_alu alu;
4880 	int r;
4881 
4882 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4883 	alu.inst = opcode;
4884 	alu.execute_mask = 1;
4885 	alu.update_pred = 1;
4886 
4887 	alu.dst.sel = ctx->temp_reg;
4888 	alu.dst.write = 1;
4889 	alu.dst.chan = 0;
4890 
4891 	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
4892 	alu.src[1].sel = V_SQ_ALU_SRC_0;
4893 	alu.src[1].chan = 0;
4894 
4895 	alu.last = 1;
4896 
4897 	r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
4898 	if (r)
4899 		return r;
4900 	return 0;
4901 }
4902 
pops(struct r600_shader_ctx * ctx,int pops)4903 static int pops(struct r600_shader_ctx *ctx, int pops)
4904 {
4905 	unsigned force_pop = ctx->bc->force_add_cf;
4906 
4907 	if (!force_pop) {
4908 		int alu_pop = 3;
4909 		if (ctx->bc->cf_last) {
4910 			if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
4911 				alu_pop = 0;
4912 			else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
4913 				alu_pop = 1;
4914 		}
4915 		alu_pop += pops;
4916 		if (alu_pop == 1) {
4917 			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
4918 			ctx->bc->force_add_cf = 1;
4919 		} else if (alu_pop == 2) {
4920 			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
4921 			ctx->bc->force_add_cf = 1;
4922 		} else {
4923 			force_pop = 1;
4924 		}
4925 	}
4926 
4927 	if (force_pop) {
4928 		r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
4929 		ctx->bc->cf_last->pop_count = pops;
4930 		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
4931 	}
4932 
4933 	return 0;
4934 }
4935 
callstack_decrease_current(struct r600_shader_ctx * ctx,unsigned reason)4936 static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
4937 {
4938 	switch(reason) {
4939 	case FC_PUSH_VPM:
4940 		ctx->bc->callstack[ctx->bc->call_sp].current--;
4941 		break;
4942 	case FC_PUSH_WQM:
4943 	case FC_LOOP:
4944 		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
4945 		break;
4946 	case FC_REP:
4947 		/* TOODO : for 16 vp asic should -= 2; */
4948 		ctx->bc->callstack[ctx->bc->call_sp].current --;
4949 		break;
4950 	}
4951 }
4952 
callstack_check_depth(struct r600_shader_ctx * ctx,unsigned reason,unsigned check_max_only)4953 static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
4954 {
4955 	if (check_max_only) {
4956 		int diff;
4957 		switch (reason) {
4958 		case FC_PUSH_VPM:
4959 			diff = 1;
4960 			break;
4961 		case FC_PUSH_WQM:
4962 			diff = 4;
4963 			break;
4964 		default:
4965 			assert(0);
4966 			diff = 0;
4967 		}
4968 		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
4969 		    ctx->bc->callstack[ctx->bc->call_sp].max) {
4970 			ctx->bc->callstack[ctx->bc->call_sp].max =
4971 				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
4972 		}
4973 		return;
4974 	}
4975 	switch (reason) {
4976 	case FC_PUSH_VPM:
4977 		ctx->bc->callstack[ctx->bc->call_sp].current++;
4978 		break;
4979 	case FC_PUSH_WQM:
4980 	case FC_LOOP:
4981 		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
4982 		break;
4983 	case FC_REP:
4984 		ctx->bc->callstack[ctx->bc->call_sp].current++;
4985 		break;
4986 	}
4987 
4988 	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
4989 	    ctx->bc->callstack[ctx->bc->call_sp].max) {
4990 		ctx->bc->callstack[ctx->bc->call_sp].max =
4991 			ctx->bc->callstack[ctx->bc->call_sp].current;
4992 	}
4993 }
4994 
fc_set_mid(struct r600_shader_ctx * ctx,int fc_sp)4995 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
4996 {
4997 	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
4998 
4999 	sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
5000 						sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
5001 	sp->mid[sp->num_mid] = ctx->bc->cf_last;
5002 	sp->num_mid++;
5003 }
5004 
fc_pushlevel(struct r600_shader_ctx * ctx,int type)5005 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
5006 {
5007 	ctx->bc->fc_sp++;
5008 	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
5009 	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
5010 }
5011 
fc_poplevel(struct r600_shader_ctx * ctx)5012 static void fc_poplevel(struct r600_shader_ctx *ctx)
5013 {
5014 	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
5015 	if (sp->mid) {
5016 		free(sp->mid);
5017 		sp->mid = NULL;
5018 	}
5019 	sp->num_mid = 0;
5020 	sp->start = NULL;
5021 	sp->type = 0;
5022 	ctx->bc->fc_sp--;
5023 }
5024 
5025 #if 0
5026 static int emit_return(struct r600_shader_ctx *ctx)
5027 {
5028 	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
5029 	return 0;
5030 }
5031 
5032 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
5033 {
5034 
5035 	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5036 	ctx->bc->cf_last->pop_count = pops;
5037 	/* XXX work out offset */
5038 	return 0;
5039 }
5040 
5041 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
5042 {
5043 	return 0;
5044 }
5045 
5046 static void emit_testflag(struct r600_shader_ctx *ctx)
5047 {
5048 
5049 }
5050 
5051 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
5052 {
5053 	emit_testflag(ctx);
5054 	emit_jump_to_offset(ctx, 1, 4);
5055 	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
5056 	pops(ctx, ifidx + 1);
5057 	emit_return(ctx);
5058 }
5059 
5060 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
5061 {
5062 	emit_testflag(ctx);
5063 
5064 	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5065 	ctx->bc->cf_last->pop_count = 1;
5066 
5067 	fc_set_mid(ctx, fc_sp);
5068 
5069 	pops(ctx, 1);
5070 }
5071 #endif
5072 
tgsi_if(struct r600_shader_ctx * ctx)5073 static int tgsi_if(struct r600_shader_ctx *ctx)
5074 {
5075 	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
5076 
5077 	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
5078 
5079 	fc_pushlevel(ctx, FC_IF);
5080 
5081 	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
5082 	return 0;
5083 }
5084 
tgsi_else(struct r600_shader_ctx * ctx)5085 static int tgsi_else(struct r600_shader_ctx *ctx)
5086 {
5087 	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
5088 	ctx->bc->cf_last->pop_count = 1;
5089 
5090 	fc_set_mid(ctx, ctx->bc->fc_sp);
5091 	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
5092 	return 0;
5093 }
5094 
tgsi_endif(struct r600_shader_ctx * ctx)5095 static int tgsi_endif(struct r600_shader_ctx *ctx)
5096 {
5097 	pops(ctx, 1);
5098 	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
5099 		R600_ERR("if/endif unbalanced in shader\n");
5100 		return -1;
5101 	}
5102 
5103 	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
5104 		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5105 		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
5106 	} else {
5107 		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
5108 	}
5109 	fc_poplevel(ctx);
5110 
5111 	callstack_decrease_current(ctx, FC_PUSH_VPM);
5112 	return 0;
5113 }
5114 
tgsi_bgnloop(struct r600_shader_ctx * ctx)5115 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
5116 {
5117 	/* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
5118 	 * limited to 4096 iterations, like the other LOOP_* instructions. */
5119 	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10));
5120 
5121 	fc_pushlevel(ctx, FC_LOOP);
5122 
5123 	/* check stack depth */
5124 	callstack_check_depth(ctx, FC_LOOP, 0);
5125 	return 0;
5126 }
5127 
tgsi_endloop(struct r600_shader_ctx * ctx)5128 static int tgsi_endloop(struct r600_shader_ctx *ctx)
5129 {
5130 	int i;
5131 
5132 	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
5133 
5134 	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
5135 		R600_ERR("loop/endloop in shader code are not paired.\n");
5136 		return -EINVAL;
5137 	}
5138 
5139 	/* fixup loop pointers - from r600isa
5140 	   LOOP END points to CF after LOOP START,
5141 	   LOOP START point to CF after LOOP END
5142 	   BRK/CONT point to LOOP END CF
5143 	*/
5144 	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
5145 
5146 	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
5147 
5148 	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
5149 		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
5150 	}
5151 	/* XXX add LOOPRET support */
5152 	fc_poplevel(ctx);
5153 	callstack_decrease_current(ctx, FC_LOOP);
5154 	return 0;
5155 }
5156 
tgsi_loop_brk_cont(struct r600_shader_ctx * ctx)5157 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
5158 {
5159 	unsigned int fscp;
5160 
5161 	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
5162 	{
5163 		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
5164 			break;
5165 	}
5166 
5167 	if (fscp == 0) {
5168 		R600_ERR("Break not inside loop/endloop pair\n");
5169 		return -EINVAL;
5170 	}
5171 
5172 	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
5173 
5174 	fc_set_mid(ctx, fscp);
5175 
5176 	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
5177 	return 0;
5178 }
5179 
tgsi_umad(struct r600_shader_ctx * ctx)5180 static int tgsi_umad(struct r600_shader_ctx *ctx)
5181 {
5182 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
5183 	struct r600_bytecode_alu alu;
5184 	int i, j, r;
5185 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
5186 
5187 	/* src0 * src1 */
5188 	for (i = 0; i < lasti + 1; i++) {
5189 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5190 			continue;
5191 
5192 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5193 
5194 		alu.dst.chan = i;
5195 		alu.dst.sel = ctx->temp_reg;
5196 		alu.dst.write = 1;
5197 
5198 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
5199 		for (j = 0; j < 2; j++) {
5200 		        r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
5201 		}
5202 
5203 		alu.last = 1;
5204 		r = r600_bytecode_add_alu(ctx->bc, &alu);
5205 		if (r)
5206 			return r;
5207 	}
5208 
5209 
5210 	for (i = 0; i < lasti + 1; i++) {
5211 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
5212 			continue;
5213 
5214 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
5215 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
5216 
5217 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
5218 
5219 		alu.src[0].sel = ctx->temp_reg;
5220 		alu.src[0].chan = i;
5221 
5222 		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
5223 		if (i == lasti) {
5224 			alu.last = 1;
5225 		}
5226 		r = r600_bytecode_add_alu(ctx->bc, &alu);
5227 		if (r)
5228 			return r;
5229 	}
5230 	return 0;
5231 }
5232 
5233 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
5234 	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5235 	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5236 	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5237 
5238 	/* XXX:
5239 	 * For state trackers other than OpenGL, we'll want to use
5240 	 * _RECIP_IEEE instead.
5241 	 */
5242 	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
5243 
5244 	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
5245 	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5246 	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5247 	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5248 	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5249 	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5250 	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5251 	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5252 	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5253 	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5254 	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5255 	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5256 	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5257 	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5258 	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5259 	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5260 	/* gap */
5261 	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5262 	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5263 	/* gap */
5264 	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5265 	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5266 	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5267 	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5268 	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5269 	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5270 	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5271 	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5272 	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5273 	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5274 	/* gap */
5275 	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5276 	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5277 	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5278 	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5279 	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5280 	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5281 	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5282 	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
5283 	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5284 	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5285 	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5286 	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5287 	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5288 	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5289 	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5290 	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5291 	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5292 	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5293 	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5294 	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5295 	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5296 	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5297 	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5298 	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5299 	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5300 	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5301 	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5302 	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5303 	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5304 	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
5305 	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5306 	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5307 	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5308 	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5309 	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5310 	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5311 	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5312 	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5313 	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5314 	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5315 	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5316 	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5317 	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5318 	/* gap */
5319 	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5320 	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5321 	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5322 	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5323 	/* gap */
5324 	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5325 	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5326 	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5327 	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5328 	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5329 	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5330 	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5331 	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5332 	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans},
5333 	/* gap */
5334 	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5335 	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5336 	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5337 	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5338 	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5339 	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5340 	{TGSI_OPCODE_TXF,	0, SQ_TEX_INST_LD, tgsi_tex},
5341 	{TGSI_OPCODE_TXQ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5342 	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5343 	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5344 	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5345 	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5346 	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5347 	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5348 	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5349 	{TGSI_OPCODE_TXQ_LZ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5350 	/* gap */
5351 	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5352 	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5353 	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5354 	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5355 	/* gap */
5356 	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5357 	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5358 	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5359 	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5360 	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5361 	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5362 	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5363 	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5364 	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
5365 	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
5366 	/* gap */
5367 	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5368 	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
5369 	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5370 	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5371 	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5372 	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5373 	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5374 	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans},
5375 	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5376 	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2_trans},
5377 	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5378 	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5379 	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5380 	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5381 	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5382 	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5383 	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5384 	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5385 	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5386 	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5387 	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans},
5388 	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5389 	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
5390 	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5391 	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5392 	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5393 	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5394 	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
5395 	{TGSI_OPCODE_SAMPLE_I,  0, 0, tgsi_unsupported},
5396 	{TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
5397 	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
5398 	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
5399 	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5400 	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
5401 	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
5402 	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
5403 	{TGSI_OPCODE_SVIEWINFO,	0, 0, tgsi_unsupported},
5404 	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5405 	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5406 	{TGSI_OPCODE_UARL,      0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
5407 	{TGSI_OPCODE_UCMP,      0, 0, tgsi_unsupported},
5408 	{TGSI_OPCODE_IABS,      0, 0, tgsi_iabs},
5409 	{TGSI_OPCODE_ISSG,      0, 0, tgsi_issg},
5410 	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5411 };
5412 
5413 static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
5414 	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5415 	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5416 	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5417 	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
5418 	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
5419 	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5420 	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5421 	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5422 	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5423 	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5424 	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5425 	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5426 	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5427 	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5428 	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5429 	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5430 	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5431 	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5432 	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5433 	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5434 	/* gap */
5435 	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5436 	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5437 	/* gap */
5438 	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5439 	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5440 	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5441 	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5442 	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5443 	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5444 	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
5445 	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
5446 	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
5447 	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5448 	/* gap */
5449 	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5450 	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5451 	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5452 	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5453 	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
5454 	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5455 	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5456 	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
5457 	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5458 	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5459 	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5460 	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5461 	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5462 	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5463 	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5464 	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5465 	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
5466 	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5467 	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5468 	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5469 	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5470 	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5471 	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5472 	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5473 	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5474 	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5475 	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5476 	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5477 	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5478 	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5479 	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5480 	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5481 	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5482 	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5483 	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5484 	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5485 	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5486 	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5487 	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5488 	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5489 	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5490 	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5491 	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5492 	/* gap */
5493 	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5494 	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5495 	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5496 	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5497 	/* gap */
5498 	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5499 	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5500 	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5501 	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5502 	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5503 	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
5504 	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5505 	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5506 	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5507 	/* gap */
5508 	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5509 	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5510 	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5511 	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5512 	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5513 	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5514 	{TGSI_OPCODE_TXF,	0, SQ_TEX_INST_LD, tgsi_tex},
5515 	{TGSI_OPCODE_TXQ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5516 	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5517 	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5518 	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5519 	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5520 	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5521 	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5522 	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5523 	{TGSI_OPCODE_TXQ_LZ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5524 	/* gap */
5525 	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5526 	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5527 	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5528 	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5529 	/* gap */
5530 	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5531 	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5532 	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5533 	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5534 	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5535 	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5536 	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5537 	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5538 	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
5539 	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
5540 	/* gap */
5541 	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5542 	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i},
5543 	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5544 	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5545 	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5546 	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5547 	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5548 	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5549 	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5550 	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i},
5551 	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
5552 	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5553 	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5554 	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5555 	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5556 	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5557 	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5558 	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
5559 	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5560 	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5561 	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5562 	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5563 	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5564 	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5565 	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5566 	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5567 	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5568 	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
5569 	{TGSI_OPCODE_SAMPLE_I,      0, 0, tgsi_unsupported},
5570 	{TGSI_OPCODE_SAMPLE_I_MS,   0, 0, tgsi_unsupported},
5571 	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
5572 	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
5573 	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5574 	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
5575 	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
5576 	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
5577 	{TGSI_OPCODE_SVIEWINFO,	0, 0, tgsi_unsupported},
5578 	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5579 	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5580 	{TGSI_OPCODE_UARL,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5581 	{TGSI_OPCODE_UCMP,      0, 0, tgsi_unsupported},
5582 	{TGSI_OPCODE_IABS,      0, 0, tgsi_iabs},
5583 	{TGSI_OPCODE_ISSG,      0, 0, tgsi_issg},
5584 	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5585 };
5586 
5587 static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
5588 	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5589 	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5590 	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
5591 	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
5592 	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
5593 	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
5594 	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
5595 	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
5596 	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5597 	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5598 	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5599 	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
5600 	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
5601 	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
5602 	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
5603 	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
5604 	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
5605 	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
5606 	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
5607 	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5608 	/* gap */
5609 	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5610 	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5611 	/* gap */
5612 	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5613 	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5614 	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
5615 	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5616 	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
5617 	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
5618 	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
5619 	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
5620 	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
5621 	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
5622 	/* gap */
5623 	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5624 	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
5625 	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5626 	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5627 	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
5628 	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
5629 	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
5630 	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
5631 	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5632 	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5633 	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5634 	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5635 	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5636 	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
5637 	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5638 	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
5639 	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
5640 	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
5641 	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
5642 	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5643 	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5644 	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
5645 	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
5646 	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5647 	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5648 	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5649 	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5650 	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5651 	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5652 	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
5653 	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5654 	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5655 	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5656 	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
5657 	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
5658 	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
5659 	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
5660 	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5661 	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5662 	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
5663 	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
5664 	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
5665 	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
5666 	/* gap */
5667 	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5668 	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5669 	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
5670 	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
5671 	/* gap */
5672 	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5673 	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5674 	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5675 	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5676 	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
5677 	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2},
5678 	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
5679 	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
5680 	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
5681 	/* gap */
5682 	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5683 	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
5684 	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
5685 	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod},
5686 	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
5687 	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5688 	{TGSI_OPCODE_TXF,	0, SQ_TEX_INST_LD, tgsi_tex},
5689 	{TGSI_OPCODE_TXQ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5690 	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
5691 	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5692 	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5693 	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
5694 	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5695 	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
5696 	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5697 	{TGSI_OPCODE_TXQ_LZ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
5698 	/* gap */
5699 	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5700 	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5701 	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5702 	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5703 	/* gap */
5704 	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5705 	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5706 	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5707 	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5708 	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5709 	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5710 	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5711 	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5712 	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
5713 	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
5714 	/* gap */
5715 	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5716 	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
5717 	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
5718 	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
5719 	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
5720 	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
5721 	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
5722 	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
5723 	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
5724 	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
5725 	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
5726 	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
5727 	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv},
5728 	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
5729 	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
5730 	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
5731 	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod},
5732 	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, cayman_mul_int_instr},
5733 	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
5734 	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
5735 	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
5736 	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap},
5737 	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
5738 	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5739 	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5740 	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5741 	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5742 	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
5743 	{TGSI_OPCODE_SAMPLE_I,      0, 0, tgsi_unsupported},
5744 	{TGSI_OPCODE_SAMPLE_I_MS,   0, 0, tgsi_unsupported},
5745 	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
5746 	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
5747 	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
5748 	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
5749 	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
5750 	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
5751 	{TGSI_OPCODE_SVIEWINFO,	0, 0, tgsi_unsupported},
5752 	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
5753 	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
5754 	{TGSI_OPCODE_UARL,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
5755 	{TGSI_OPCODE_UCMP,      0, 0, tgsi_unsupported},
5756 	{TGSI_OPCODE_IABS,      0, 0, tgsi_iabs},
5757 	{TGSI_OPCODE_ISSG,      0, 0, tgsi_issg},
5758 	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
5759 };
5760