• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 #include "r600_asm.h"
24 #include "r600_sq.h"
25 #include "r600_opcodes.h"
26 #include "r600_formats.h"
27 #include "r600d.h"
28 #include "r600d_common.h"
29 
30 #include <errno.h>
31 #include <string.h>
32 #include "compiler/shader_enums.h"
33 #include "util/u_memory.h"
34 #include "util/u_math.h"
35 
36 #define NUM_OF_CYCLES 3
37 #define NUM_OF_COMPONENTS 4
38 
alu_writes(struct r600_bytecode_alu * alu)39 static inline bool alu_writes(struct r600_bytecode_alu *alu)
40 {
41 	return alu->dst.write || alu->is_op3;
42 }
43 
r600_bytecode_get_num_operands(const struct r600_bytecode_alu * alu)44 static inline unsigned int r600_bytecode_get_num_operands(const struct r600_bytecode_alu *alu)
45 {
46 	return r600_isa_alu(alu->op)->src_count;
47 }
48 
r600_bytecode_cf(void)49 static struct r600_bytecode_cf *r600_bytecode_cf(void)
50 {
51 	struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf);
52 
53 	if (!cf)
54 		return NULL;
55 	list_inithead(&cf->list);
56 	list_inithead(&cf->alu);
57 	list_inithead(&cf->vtx);
58 	list_inithead(&cf->tex);
59 	list_inithead(&cf->gds);
60 	return cf;
61 }
62 
r600_bytecode_alu(void)63 static struct r600_bytecode_alu *r600_bytecode_alu(void)
64 {
65 	struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu);
66 
67 	if (!alu)
68 		return NULL;
69 	list_inithead(&alu->list);
70 	return alu;
71 }
72 
r600_bytecode_vtx(void)73 static struct r600_bytecode_vtx *r600_bytecode_vtx(void)
74 {
75 	struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx);
76 
77 	if (!vtx)
78 		return NULL;
79 	list_inithead(&vtx->list);
80 	return vtx;
81 }
82 
r600_bytecode_tex(void)83 static struct r600_bytecode_tex *r600_bytecode_tex(void)
84 {
85 	struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex);
86 
87 	if (!tex)
88 		return NULL;
89 	list_inithead(&tex->list);
90 	return tex;
91 }
92 
r600_bytecode_gds(void)93 static struct r600_bytecode_gds *r600_bytecode_gds(void)
94 {
95 	struct r600_bytecode_gds *gds = CALLOC_STRUCT(r600_bytecode_gds);
96 
97 	if (gds == NULL)
98 		return NULL;
99 	list_inithead(&gds->list);
100 	return gds;
101 }
102 
stack_entry_size(enum radeon_family chip)103 static unsigned stack_entry_size(enum radeon_family chip) {
104 	/* Wavefront size:
105 	 *   64: R600/RV670/RV770/Cypress/R740/Barts/Turks/Caicos/
106 	 *       Aruba/Sumo/Sumo2/redwood/juniper
107 	 *   32: R630/R730/R710/Palm/Cedar
108 	 *   16: R610/Rs780
109 	 *
110 	 * Stack row size:
111 	 * 	Wavefront Size                        16  32  48  64
112 	 * 	Columns per Row (R6xx/R7xx/R8xx only)  8   8   4   4
113 	 * 	Columns per Row (R9xx+)                8   4   4   4 */
114 
115 	switch (chip) {
116 	/* FIXME: are some chips missing here? */
117 	/* wavefront size 16 */
118 	case CHIP_RV610:
119 	case CHIP_RS780:
120 	case CHIP_RV620:
121 	case CHIP_RS880:
122 	/* wavefront size 32 */
123 	case CHIP_RV630:
124 	case CHIP_RV635:
125 	case CHIP_RV730:
126 	case CHIP_RV710:
127 	case CHIP_PALM:
128 	case CHIP_CEDAR:
129 		return 8;
130 
131 	/* wavefront size 64 */
132 	default:
133 		return 4;
134 	}
135 }
136 
r600_bytecode_init(struct r600_bytecode * bc,enum amd_gfx_level gfx_level,enum radeon_family family,bool has_compressed_msaa_texturing)137 void r600_bytecode_init(struct r600_bytecode *bc,
138 			enum amd_gfx_level gfx_level,
139 			enum radeon_family family,
140 			bool has_compressed_msaa_texturing)
141 {
142 	static unsigned next_shader_id = 0;
143 
144 	bc->debug_id = ++next_shader_id;
145 
146 	if ((gfx_level == R600) &&
147 	    (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) {
148 		bc->ar_handling = AR_HANDLE_RV6XX;
149 
150 		/* Insert a nop after a relative temp write so that a read in
151 		 * the following instruction group gets the right value.  The
152 		 * r600 and EG ISA specs both say that read-after-rel-write of a
153 		 * register in the next instr group is illegal, but apparently
154 		 * that's not true on all chips (see commit
155 		 * c96b9834032952492efbd2d1f5511fe225704918).
156 		 */
157 		bc->r6xx_nop_after_rel_dst = 1;
158 	} else if (family == CHIP_RV770) {
159 		bc->ar_handling = AR_HANDLE_NORMAL;
160 		bc->r6xx_nop_after_rel_dst = 1;
161 	} else {
162 		bc->ar_handling = AR_HANDLE_NORMAL;
163 		bc->r6xx_nop_after_rel_dst = 0;
164 	}
165 
166 	list_inithead(&bc->cf);
167 	bc->gfx_level = gfx_level;
168 	bc->family = family;
169 	bc->has_compressed_msaa_texturing = has_compressed_msaa_texturing;
170 	bc->stack.entry_size = stack_entry_size(family);
171 }
172 
r600_bytecode_add_cf(struct r600_bytecode * bc)173 int r600_bytecode_add_cf(struct r600_bytecode *bc)
174 {
175 	struct r600_bytecode_cf *cf = r600_bytecode_cf();
176 
177 	if (!cf)
178 		return -ENOMEM;
179 	list_addtail(&cf->list, &bc->cf);
180 	if (bc->cf_last) {
181 		cf->id = bc->cf_last->id + 2;
182 		if (bc->cf_last->eg_alu_extended) {
183 			/* take into account extended alu size */
184 			cf->id += 2;
185 			bc->ndw += 2;
186 		}
187 	}
188 	bc->cf_last = cf;
189 	bc->ncf++;
190 	bc->ndw += 2;
191 	bc->force_add_cf = 0;
192 	bc->ar_loaded = 0;
193 	return 0;
194 }
195 
r600_bytecode_add_output(struct r600_bytecode * bc,const struct r600_bytecode_output * output)196 int r600_bytecode_add_output(struct r600_bytecode *bc,
197 		const struct r600_bytecode_output *output)
198 {
199 	int r;
200 
201 	if (output->gpr >= bc->ngpr)
202 		bc->ngpr = output->gpr + 1;
203 
204 	if (bc->cf_last && (bc->cf_last->op == output->op ||
205 		(bc->cf_last->op == CF_OP_EXPORT &&
206 		output->op == CF_OP_EXPORT_DONE)) &&
207 		output->type == bc->cf_last->output.type &&
208 		output->elem_size == bc->cf_last->output.elem_size &&
209 		output->swizzle_x == bc->cf_last->output.swizzle_x &&
210 		output->swizzle_y == bc->cf_last->output.swizzle_y &&
211 		output->swizzle_z == bc->cf_last->output.swizzle_z &&
212 		output->swizzle_w == bc->cf_last->output.swizzle_w &&
213 		output->comp_mask == bc->cf_last->output.comp_mask &&
214 		(output->burst_count + bc->cf_last->output.burst_count) <= 16) {
215 
216 		if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
217 			(output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
218 
219 			bc->cf_last->op = bc->cf_last->output.op = output->op;
220 			bc->cf_last->output.gpr = output->gpr;
221 			bc->cf_last->output.array_base = output->array_base;
222 			bc->cf_last->output.burst_count += output->burst_count;
223 			return 0;
224 
225 		} else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
226 			output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
227 
228 			bc->cf_last->op = bc->cf_last->output.op = output->op;
229 			bc->cf_last->output.burst_count += output->burst_count;
230 			return 0;
231 		}
232 	}
233 
234 	r = r600_bytecode_add_cf(bc);
235 	if (r)
236 		return r;
237 	bc->cf_last->op = output->op;
238 	memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output));
239 	bc->cf_last->barrier = 1;
240 	return 0;
241 }
242 
r600_bytecode_add_pending_output(struct r600_bytecode * bc,const struct r600_bytecode_output * output)243 int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
244 		const struct r600_bytecode_output *output)
245 {
246 	assert(bc->n_pending_outputs + 1 < ARRAY_SIZE(bc->pending_outputs));
247 	bc->pending_outputs[bc->n_pending_outputs++] = *output;
248 
249 	return 0;
250 }
251 
252 void
r600_bytecode_add_ack(struct r600_bytecode * bc)253 r600_bytecode_add_ack(struct r600_bytecode *bc)
254 {
255 	bc->need_wait_ack = true;
256 }
257 
258 int
r600_bytecode_wait_acks(struct r600_bytecode * bc)259 r600_bytecode_wait_acks(struct r600_bytecode *bc)
260 {
261 	/* Store acks are an R700+ feature. */
262 	if (bc->gfx_level < R700)
263 		return 0;
264 
265 	if (!bc->need_wait_ack)
266 		return 0;
267 
268 	int ret = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK);
269 	if (ret != 0)
270 		return ret;
271 
272 	struct r600_bytecode_cf *cf = bc->cf_last;
273 	cf->barrier = 1;
274 	/* Request a wait if the number of outstanding acks is > 0 */
275 	cf->cf_addr = 0;
276 
277 	return 0;
278 }
279 
280 uint32_t
r600_bytecode_write_export_ack_type(struct r600_bytecode * bc,bool indirect)281 r600_bytecode_write_export_ack_type(struct r600_bytecode *bc, bool indirect)
282 {
283 	if (bc->gfx_level >= R700) {
284 		if (indirect)
285 			return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK_EG;
286 		else
287 			return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_ACK_EG;
288 	} else {
289 		if (indirect)
290 			return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
291 		else
292 			return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
293 	}
294 }
295 
296 /* alu instructions that can only exits once per group */
is_alu_once_inst(struct r600_bytecode_alu * alu)297 static int is_alu_once_inst(struct r600_bytecode_alu *alu)
298 {
299 	return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER;
300 }
301 
is_alu_reduction_inst(struct r600_bytecode * bc,struct r600_bytecode_alu * alu)302 static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
303 {
304 	return (r600_isa_alu(alu->op)->flags & AF_REPL) &&
305 			(r600_isa_alu_slots(bc->isa->hw_class, alu->op) == AF_4V);
306 }
307 
is_alu_mova_inst(struct r600_bytecode_alu * alu)308 static int is_alu_mova_inst(struct r600_bytecode_alu *alu)
309 {
310 	return r600_isa_alu(alu->op)->flags & AF_MOVA;
311 }
312 
alu_uses_rel(struct r600_bytecode_alu * alu)313 static int alu_uses_rel(struct r600_bytecode_alu *alu)
314 {
315 	unsigned num_src = r600_bytecode_get_num_operands(alu);
316 	unsigned src;
317 
318 	if (alu->dst.rel) {
319 		return 1;
320 	}
321 
322 	for (src = 0; src < num_src; ++src) {
323 		if (alu->src[src].rel) {
324 			return 1;
325 		}
326 	}
327 	return 0;
328 }
329 
is_lds_read(int sel)330 static int is_lds_read(int sel)
331 {
332   return sel == EG_V_SQ_ALU_SRC_LDS_OQ_A_POP || sel == EG_V_SQ_ALU_SRC_LDS_OQ_B_POP;
333 }
334 
alu_uses_lds(struct r600_bytecode_alu * alu)335 static int alu_uses_lds(struct r600_bytecode_alu *alu)
336 {
337 	unsigned num_src = r600_bytecode_get_num_operands(alu);
338 	unsigned src;
339 
340 	for (src = 0; src < num_src; ++src) {
341 		if (is_lds_read(alu->src[src].sel)) {
342 			return 1;
343 		}
344 	}
345 	return 0;
346 }
347 
is_alu_64bit_inst(struct r600_bytecode_alu * alu)348 static int is_alu_64bit_inst(struct r600_bytecode_alu *alu)
349 {
350 	const struct alu_op_info *op = r600_isa_alu(alu->op);
351 	return (op->flags & AF_64);
352 }
353 
is_alu_vec_unit_inst(struct r600_bytecode * bc,struct r600_bytecode_alu * alu)354 static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
355 {
356 	unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
357 	return !(slots & AF_S);
358 }
359 
is_alu_trans_unit_inst(struct r600_bytecode * bc,struct r600_bytecode_alu * alu)360 static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
361 {
362 	unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
363 	return !(slots & AF_V);
364 }
365 
366 /* alu instructions that can execute on any unit */
is_alu_any_unit_inst(struct r600_bytecode * bc,struct r600_bytecode_alu * alu)367 static int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
368 {
369 	unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
370 	return slots == AF_VS;
371 }
372 
is_nop_inst(struct r600_bytecode_alu * alu)373 static int is_nop_inst(struct r600_bytecode_alu *alu)
374 {
375 	return alu->op == ALU_OP0_NOP;
376 }
377 
assign_alu_units(struct r600_bytecode * bc,struct r600_bytecode_alu * alu_first,struct r600_bytecode_alu * assignment[5])378 static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first,
379 			    struct r600_bytecode_alu *assignment[5])
380 {
381 	struct r600_bytecode_alu *alu;
382 	unsigned i, chan, trans;
383 	int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
384 
385 	for (i = 0; i < max_slots; i++)
386 		assignment[i] = NULL;
387 
388 	for (alu = alu_first; alu; alu = list_entry(alu->list.next, struct r600_bytecode_alu, list)) {
389 		chan = alu->dst.chan;
390 		if (max_slots == 4)
391 			trans = 0;
392 		else if (is_alu_trans_unit_inst(bc, alu))
393 			trans = 1;
394 		else if (is_alu_vec_unit_inst(bc, alu))
395 			trans = 0;
396 		else if (assignment[chan])
397 			trans = 1; /* Assume ALU_INST_PREFER_VECTOR. */
398 		else
399 			trans = 0;
400 
401 		if (trans) {
402 			if (assignment[4]) {
403 				assert(0); /* ALU.Trans has already been allocated. */
404 				return -1;
405 			}
406 			assignment[4] = alu;
407 		} else {
408                         if (assignment[chan]) {
409 			 	assert(0); /* ALU.chan has already been allocated. */
410 				return -1;
411 			}
412 			assignment[chan] = alu;
413 		}
414 
415 		if (alu->last)
416 			break;
417 	}
418 	return 0;
419 }
420 
421 struct alu_bank_swizzle {
422 	int	hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
423 	int	hw_cfile_addr[4];
424 	int	hw_cfile_elem[4];
425 };
426 
427 static const unsigned cycle_for_bank_swizzle_vec[][3] = {
428 	[SQ_ALU_VEC_012] = { 0, 1, 2 },
429 	[SQ_ALU_VEC_021] = { 0, 2, 1 },
430 	[SQ_ALU_VEC_120] = { 1, 2, 0 },
431 	[SQ_ALU_VEC_102] = { 1, 0, 2 },
432 	[SQ_ALU_VEC_201] = { 2, 0, 1 },
433 	[SQ_ALU_VEC_210] = { 2, 1, 0 }
434 };
435 
436 static const unsigned cycle_for_bank_swizzle_scl[][3] = {
437 	[SQ_ALU_SCL_210] = { 2, 1, 0 },
438 	[SQ_ALU_SCL_122] = { 1, 2, 2 },
439 	[SQ_ALU_SCL_212] = { 2, 1, 2 },
440 	[SQ_ALU_SCL_221] = { 2, 2, 1 }
441 };
442 
init_bank_swizzle(struct alu_bank_swizzle * bs)443 static void init_bank_swizzle(struct alu_bank_swizzle *bs)
444 {
445 	int i, cycle, component;
446 	/* set up gpr use */
447 	for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
448 		for (component = 0; component < NUM_OF_COMPONENTS; component++)
449 			 bs->hw_gpr[cycle][component] = -1;
450 	for (i = 0; i < 4; i++)
451 		bs->hw_cfile_addr[i] = -1;
452 	for (i = 0; i < 4; i++)
453 		bs->hw_cfile_elem[i] = -1;
454 }
455 
reserve_gpr(struct alu_bank_swizzle * bs,unsigned sel,unsigned chan,unsigned cycle)456 static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle)
457 {
458 	if (bs->hw_gpr[cycle][chan] == -1)
459 		bs->hw_gpr[cycle][chan] = sel;
460 	else if (bs->hw_gpr[cycle][chan] != (int)sel) {
461 		/* Another scalar operation has already used the GPR read port for the channel. */
462 		return -1;
463 	}
464 	return 0;
465 }
466 
reserve_cfile(const struct r600_bytecode * bc,struct alu_bank_swizzle * bs,unsigned sel,unsigned chan)467 static int reserve_cfile(const struct r600_bytecode *bc,
468 			 struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
469 {
470 	int res, num_res = 4;
471 	if (bc->gfx_level >= R700) {
472 		num_res = 2;
473 		chan /= 2;
474 	}
475 	for (res = 0; res < num_res; ++res) {
476 		if (bs->hw_cfile_addr[res] == -1) {
477 			bs->hw_cfile_addr[res] = sel;
478 			bs->hw_cfile_elem[res] = chan;
479 			return 0;
480 		} else if (bs->hw_cfile_addr[res] == sel &&
481 			bs->hw_cfile_elem[res] == chan)
482 			return 0; /* Read for this scalar element already reserved, nothing to do here. */
483 	}
484 	/* All cfile read ports are used, cannot reference vector element. */
485 	return -1;
486 }
487 
is_gpr(unsigned sel)488 static int is_gpr(unsigned sel)
489 {
490 	return (sel <= 127);
491 }
492 
493 /* CB constants start at 512, and get translated to a kcache index when ALU
494  * clauses are constructed. Note that we handle kcache constants the same way
495  * as (the now gone) cfile constants, is that really required? */
is_kcache(unsigned sel)496 static int is_kcache(unsigned sel)
497 {
498    return (sel > 511 && sel < 4607) || /* Kcache before translation. */
499          (sel > 127 && sel < 192) || /* Kcache 0 & 1 after translation. */
500          (sel > 256  && sel < 320);  /* Kcache 2 & 3 after translation (EG). */
501 }
502 
is_const(int sel)503 static int is_const(int sel)
504 {
505    return is_kcache(sel) ||
506 		(sel >= V_SQ_ALU_SRC_0 &&
507 		sel <= V_SQ_ALU_SRC_LITERAL);
508 }
509 
check_vector(const struct r600_bytecode * bc,const struct r600_bytecode_alu * alu,struct alu_bank_swizzle * bs,int bank_swizzle)510 static int check_vector(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
511 			struct alu_bank_swizzle *bs, int bank_swizzle)
512 {
513 	int r, src, num_src, sel, elem, cycle;
514 
515 	num_src = r600_bytecode_get_num_operands(alu);
516 	for (src = 0; src < num_src; src++) {
517 		sel = alu->src[src].sel;
518 		elem = alu->src[src].chan;
519 		if (is_gpr(sel)) {
520 			cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
521 			if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
522 				/* Nothing to do; special-case optimization,
523 				 * second source uses first source’s reservation. */
524 				continue;
525 			else {
526 				r = reserve_gpr(bs, sel, elem, cycle);
527 				if (r)
528 					return r;
529 			}
530       } else if (is_kcache(sel)) {
531 			r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem);
532 			if (r)
533 				return r;
534 		}
535 		/* No restrictions on PV, PS, literal or special constants. */
536 	}
537 	return 0;
538 }
539 
check_scalar(const struct r600_bytecode * bc,const struct r600_bytecode_alu * alu,struct alu_bank_swizzle * bs,int bank_swizzle)540 static int check_scalar(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
541 			struct alu_bank_swizzle *bs, int bank_swizzle)
542 {
543 	int r, src, num_src, const_count, sel, elem, cycle;
544 
545 	num_src = r600_bytecode_get_num_operands(alu);
546 	for (const_count = 0, src = 0; src < num_src; ++src) {
547 		sel = alu->src[src].sel;
548 		elem = alu->src[src].chan;
549 		if (is_const(sel)) { /* Any constant, including literal and inline constants. */
550 			if (const_count >= 2)
551 				/* More than two references to a constant in
552 				 * transcendental operation. */
553 				return -1;
554 			else
555 				const_count++;
556 		}
557       if (is_kcache(sel)) {
558 			r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem);
559 			if (r)
560 				return r;
561 		}
562 	}
563 	for (src = 0; src < num_src; ++src) {
564 		sel = alu->src[src].sel;
565 		elem = alu->src[src].chan;
566 		if (is_gpr(sel)) {
567 			cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
568 			if (cycle < const_count)
569 				/* Cycle for GPR load conflicts with
570 				 * constant load in transcendental operation. */
571 				return -1;
572 			r = reserve_gpr(bs, sel, elem, cycle);
573 			if (r)
574 				return r;
575 		}
576 		/* PV PS restrictions */
577 		if (const_count && (sel == 254 || sel == 255)) {
578 			cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
579 			if (cycle < const_count)
580 				return -1;
581 		}
582 	}
583 	return 0;
584 }
585 
check_and_set_bank_swizzle(const struct r600_bytecode * bc,struct r600_bytecode_alu * slots[5])586 static int check_and_set_bank_swizzle(const struct r600_bytecode *bc,
587 				      struct r600_bytecode_alu *slots[5])
588 {
589 	struct alu_bank_swizzle bs;
590 	int bank_swizzle[5];
591 	int i, r = 0, forced = 1;
592 	bool scalar_only = bc->gfx_level == CAYMAN ? false : true;
593 	int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
594 	int max_checks = max_slots * 1000;
595 
596 	for (i = 0; i < max_slots; i++) {
597 		if (slots[i]) {
598 			if (slots[i]->bank_swizzle_force) {
599 				slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
600 			} else {
601 				forced = 0;
602 			}
603 		}
604 
605 		if (i < 4 && slots[i])
606 			scalar_only = false;
607 	}
608 	if (forced)
609 		return 0;
610 
611 	/* Just check every possible combination of bank swizzle.
612 	 * Not very efficient, but works on the first try in most of the cases. */
613 	for (i = 0; i < 4; i++)
614 		if (!slots[i] || !slots[i]->bank_swizzle_force || slots[i]->is_lds_idx_op)
615 			bank_swizzle[i] = SQ_ALU_VEC_012;
616 		else
617 			bank_swizzle[i] = slots[i]->bank_swizzle;
618 
619 	bank_swizzle[4] = SQ_ALU_SCL_210;
620 
621 	while(bank_swizzle[4] <= SQ_ALU_SCL_221 && max_checks--) {
622 		init_bank_swizzle(&bs);
623 		if (scalar_only == false) {
624 			for (i = 0; i < 4; i++) {
625 				if (slots[i]) {
626 					r = check_vector(bc, slots[i], &bs, bank_swizzle[i]);
627 					if (r)
628 						break;
629 				}
630 			}
631 		} else
632 			r = 0;
633 
634 		if (!r && max_slots == 5 && slots[4]) {
635 			r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]);
636 		}
637 		if (!r) {
638 			for (i = 0; i < max_slots; i++) {
639 				if (slots[i])
640 					slots[i]->bank_swizzle = bank_swizzle[i];
641 			}
642 			return 0;
643 		}
644 
645 		if (scalar_only) {
646 			bank_swizzle[4]++;
647 		} else {
648 			for (i = 0; i < max_slots; i++) {
649 				if (!slots[i] || (!slots[i]->bank_swizzle_force && !slots[i]->is_lds_idx_op)) {
650 					bank_swizzle[i]++;
651 					if (bank_swizzle[i] <= SQ_ALU_VEC_210)
652 						break;
653 					else if (i < max_slots - 1)
654 						bank_swizzle[i] = SQ_ALU_VEC_012;
655 					else
656 						return -1;
657 				}
658 			}
659 		}
660 	}
661 
662 	/* Couldn't find a working swizzle. */
663 	return -1;
664 }
665 
replace_gpr_with_pv_ps(struct r600_bytecode * bc,struct r600_bytecode_alu * slots[5],struct r600_bytecode_alu * alu_prev)666 static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
667 				  struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev)
668 {
669 	struct r600_bytecode_alu *prev[5];
670 	int gpr[5], chan[5];
671 	int i, j, r, src, num_src;
672 	int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
673 
674 	r = assign_alu_units(bc, alu_prev, prev);
675 	if (r)
676 		return r;
677 
678 	for (i = 0; i < max_slots; ++i) {
679 		if (prev[i] && alu_writes(prev[i]) && !prev[i]->dst.rel) {
680 
681 			if (is_alu_64bit_inst(prev[i])) {
682 				gpr[i] = -1;
683 				continue;
684 			}
685 
686 			gpr[i] = prev[i]->dst.sel;
687 			/* cube writes more than PV.X */
688 			if (is_alu_reduction_inst(bc, prev[i]))
689 				chan[i] = 0;
690 			else
691 				chan[i] = prev[i]->dst.chan;
692 		} else
693 			gpr[i] = -1;
694 	}
695 
696 	for (i = 0; i < max_slots; ++i) {
697 		struct r600_bytecode_alu *alu = slots[i];
698 		if (!alu)
699 			continue;
700 
701 		if (is_alu_64bit_inst(alu))
702 			continue;
703 		num_src = r600_bytecode_get_num_operands(alu);
704 		for (src = 0; src < num_src; ++src) {
705 			if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
706 				continue;
707 
708 			if (bc->gfx_level < CAYMAN) {
709 				if (alu->src[src].sel == gpr[4] &&
710 				    alu->src[src].chan == chan[4] &&
711 				    alu_prev->pred_sel == alu->pred_sel) {
712 					alu->src[src].sel = V_SQ_ALU_SRC_PS;
713 					alu->src[src].chan = 0;
714 					continue;
715 				}
716 			}
717 
718 			for (j = 0; j < 4; ++j) {
719 				if (alu->src[src].sel == gpr[j] &&
720 					alu->src[src].chan == j &&
721 				      alu_prev->pred_sel == alu->pred_sel) {
722 					alu->src[src].sel = V_SQ_ALU_SRC_PV;
723 					alu->src[src].chan = chan[j];
724 					break;
725 				}
726 			}
727 		}
728 	}
729 
730 	return 0;
731 }
732 
r600_bytecode_special_constants(uint32_t value,unsigned * sel)733 void r600_bytecode_special_constants(uint32_t value, unsigned *sel)
734 {
735 	switch(value) {
736 	case 0:
737 		*sel = V_SQ_ALU_SRC_0;
738 		break;
739 	case 1:
740 		*sel = V_SQ_ALU_SRC_1_INT;
741 		break;
742 	case -1:
743 		*sel = V_SQ_ALU_SRC_M_1_INT;
744 		break;
745 	case 0x3F800000: /* 1.0f */
746 		*sel = V_SQ_ALU_SRC_1;
747 		break;
748 	case 0x3F000000: /* 0.5f */
749 		*sel = V_SQ_ALU_SRC_0_5;
750 		break;
751 	default:
752 		*sel = V_SQ_ALU_SRC_LITERAL;
753 		break;
754 	}
755 }
756 
757 /* compute how many literal are needed */
r600_bytecode_alu_nliterals(struct r600_bytecode_alu * alu,uint32_t literal[4],unsigned * nliteral)758 static int r600_bytecode_alu_nliterals(struct r600_bytecode_alu *alu,
759 				 uint32_t literal[4], unsigned *nliteral)
760 {
761 	unsigned num_src = r600_bytecode_get_num_operands(alu);
762 	unsigned i, j;
763 
764 	for (i = 0; i < num_src; ++i) {
765 		if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
766 			uint32_t value = alu->src[i].value;
767 			unsigned found = 0;
768 			for (j = 0; j < *nliteral; ++j) {
769 				if (literal[j] == value) {
770 					found = 1;
771 					break;
772 				}
773 			}
774 			if (!found) {
775 				if (*nliteral >= 4)
776 					return -EINVAL;
777 				literal[(*nliteral)++] = value;
778 			}
779 		}
780 	}
781 	return 0;
782 }
783 
r600_bytecode_alu_adjust_literals(struct r600_bytecode_alu * alu,uint32_t literal[4],unsigned nliteral)784 static void r600_bytecode_alu_adjust_literals(struct r600_bytecode_alu *alu,
785 					      uint32_t literal[4], unsigned nliteral)
786 {
787 	unsigned num_src = r600_bytecode_get_num_operands(alu);
788 	unsigned i, j;
789 
790 	for (i = 0; i < num_src; ++i) {
791 		if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
792 			uint32_t value = alu->src[i].value;
793 			for (j = 0; j < nliteral; ++j) {
794 				if (literal[j] == value) {
795 					alu->src[i].chan = j;
796 					break;
797 				}
798 			}
799 		}
800 	}
801 }
802 
merge_inst_groups(struct r600_bytecode * bc,struct r600_bytecode_alu * slots[5],struct r600_bytecode_alu * alu_prev)803 static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5],
804 			     struct r600_bytecode_alu *alu_prev)
805 {
806 	struct r600_bytecode_alu *prev[5];
807 	struct r600_bytecode_alu *result[5] = { NULL };
808 
809         uint8_t interp_xz = 0;
810 
811 	uint32_t literal[4], prev_literal[4];
812 	unsigned nliteral = 0, prev_nliteral = 0;
813 
814 	int i, j, r, src, num_src;
815 	int num_once_inst = 0;
816 	int have_mova = 0, have_rel = 0;
817 	int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
818 
819    bool has_dot = false;
820 
821 	r = assign_alu_units(bc, alu_prev, prev);
822 	if (r)
823 		return r;
824 
825 	for (i = 0; i < max_slots; ++i) {
826 		if (prev[i]) {
827 		      if (prev[i]->pred_sel)
828 			      return 0;
829 		      if (is_alu_once_inst(prev[i]))
830 			      return 0;
831 				has_dot |= prev[i]->op == ALU_OP2_DOT || prev[i]->op == ALU_OP2_DOT_IEEE;
832 
833 
834                       if (prev[i]->op == ALU_OP1_INTERP_LOAD_P0)
835                          interp_xz |= 3;
836                       if (prev[i]->op == ALU_OP2_INTERP_X)
837                          interp_xz |= 1;
838                       if (prev[i]->op == ALU_OP2_INTERP_Z)
839                          interp_xz |= 2;
840 		}
841 		if (slots[i]) {
842 			if (slots[i]->pred_sel)
843 				return 0;
844 			if (is_alu_once_inst(slots[i]))
845 				return 0;
846          has_dot |= slots[i]->op == ALU_OP2_DOT || slots[i]->op == ALU_OP2_DOT_IEEE;
847 				return 0;
848                         if (slots[i]->op == ALU_OP1_INTERP_LOAD_P0)
849                            interp_xz |= 3;
850                         if (slots[i]->op == ALU_OP2_INTERP_X)
851                            interp_xz |= 1;
852                         if (slots[i]->op == ALU_OP2_INTERP_Z)
853                            interp_xz |= 2;
854 		}
855                 if (interp_xz == 3)
856                    return 0;
857 	}
858 
859 	for (i = 0; i < max_slots; ++i) {
860 		struct r600_bytecode_alu *alu;
861 
862 		if (num_once_inst > 0)
863 		   return 0;
864 
865 		/* check number of literals */
866 		if (prev[i]) {
867 			if (r600_bytecode_alu_nliterals(prev[i], literal, &nliteral))
868 				return 0;
869 			if (r600_bytecode_alu_nliterals(prev[i], prev_literal, &prev_nliteral))
870 				return 0;
871 			if (is_alu_mova_inst(prev[i])) {
872 				if (have_rel)
873 					return 0;
874 				have_mova = 1;
875 			}
876 
877 			if (alu_uses_rel(prev[i])) {
878 				if (have_mova) {
879 					return 0;
880 				}
881 				have_rel = 1;
882 			}
883 			if (alu_uses_lds(prev[i]))
884 				return 0;
885 
886 			num_once_inst += is_alu_once_inst(prev[i]);
887 		}
888 		if (slots[i] && r600_bytecode_alu_nliterals(slots[i], literal, &nliteral))
889 			return 0;
890 
891 		/* Let's check used slots. */
892 		if (prev[i] && !slots[i]) {
893 			result[i] = prev[i];
894 			continue;
895 		} else if (prev[i] && slots[i]) {
896 			if (max_slots == 5 && !has_dot && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
897 				/* Trans unit is still free try to use it. */
898 				if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(slots[i])) {
899 					result[i] = prev[i];
900 					result[4] = slots[i];
901 				} else if (is_alu_any_unit_inst(bc, prev[i])) {
902 					if (slots[i]->dst.sel == prev[i]->dst.sel &&
903 					    alu_writes(slots[i]) &&
904 					    alu_writes(prev[i]))
905 						return 0;
906 
907 					result[i] = slots[i];
908 					result[4] = prev[i];
909 				} else
910 					return 0;
911 			} else
912 				return 0;
913 		} else if(!slots[i]) {
914 			continue;
915 		} else {
916 			if (max_slots == 5 && slots[i] && prev[4] &&
917 					slots[i]->dst.sel == prev[4]->dst.sel &&
918 					slots[i]->dst.chan == prev[4]->dst.chan &&
919 					alu_writes(slots[i]) &&
920 					alu_writes(prev[4]))
921 				return 0;
922 
923 			result[i] = slots[i];
924 		}
925 
926 		alu = slots[i];
927 		num_once_inst += is_alu_once_inst(alu);
928 
929 		/* don't reschedule NOPs */
930 		if (is_nop_inst(alu))
931 			return 0;
932 
933 		if (is_alu_mova_inst(alu)) {
934 			if (have_rel) {
935 				return 0;
936 			}
937 			have_mova = 1;
938 		}
939 
940 		if (alu_uses_rel(alu)) {
941 			if (have_mova) {
942 				return 0;
943 			}
944 			have_rel = 1;
945 		}
946 
947 		if (alu->op == ALU_OP0_SET_CF_IDX0 ||
948 			alu->op == ALU_OP0_SET_CF_IDX1)
949 			return 0; /* data hazard with MOVA */
950 
951 		/* Let's check source gprs */
952 		num_src = r600_bytecode_get_num_operands(alu);
953 		for (src = 0; src < num_src; ++src) {
954 
955 			/* Constants don't matter. */
956 			if (!is_gpr(alu->src[src].sel))
957 				continue;
958 
959 			for (j = 0; j < max_slots; ++j) {
960 				if (!prev[j] || !alu_writes(prev[j]))
961 					continue;
962 
963 				/* If it's relative then we can't determine which gpr is really used. */
964 				if (prev[j]->dst.chan == alu->src[src].chan &&
965 					(prev[j]->dst.sel == alu->src[src].sel ||
966 					prev[j]->dst.rel || alu->src[src].rel))
967 					return 0;
968 			}
969 		}
970 	}
971 
972 	/* more than one PRED_ or KILL_ ? */
973 	if (num_once_inst > 1)
974 		return 0;
975 
976 	/* check if the result can still be swizzlet */
977 	r = check_and_set_bank_swizzle(bc, result);
978 	if (r)
979 		return 0;
980 
981 	/* looks like everything worked out right, apply the changes */
982 
983 	/* undo adding previous literals */
984 	bc->cf_last->ndw -= align(prev_nliteral, 2);
985 
986 	/* sort instructions */
987 	for (i = 0; i < max_slots; ++i) {
988 		slots[i] = result[i];
989 		if (result[i]) {
990 			list_del(&result[i]->list);
991 			result[i]->last = 0;
992 			list_addtail(&result[i]->list, &bc->cf_last->alu);
993 		}
994 	}
995 
996 	/* determine new last instruction */
997 	list_entry(bc->cf_last->alu.prev, struct r600_bytecode_alu, list)->last = 1;
998 
999 	/* determine new first instruction */
1000 	for (i = 0; i < max_slots; ++i) {
1001 		if (result[i]) {
1002 			bc->cf_last->curr_bs_head = result[i];
1003 			break;
1004 		}
1005 	}
1006 
1007 	bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head;
1008 	bc->cf_last->prev2_bs_head = NULL;
1009 
1010 	return 0;
1011 }
1012 
1013 /* we'll keep kcache sets sorted by bank & addr */
r600_bytecode_alloc_kcache_line(struct r600_bytecode * bc,struct r600_bytecode_kcache * kcache,unsigned bank,unsigned line,unsigned index_mode)1014 static int r600_bytecode_alloc_kcache_line(struct r600_bytecode *bc,
1015 		struct r600_bytecode_kcache *kcache,
1016 		unsigned bank, unsigned line, unsigned index_mode)
1017 {
1018 	int i, kcache_banks = bc->gfx_level >= EVERGREEN ? 4 : 2;
1019 
1020 	for (i = 0; i < kcache_banks; i++) {
1021 		if (kcache[i].mode) {
1022 			int d;
1023 
1024 			if (kcache[i].bank < bank)
1025 				continue;
1026 
1027 			if ((kcache[i].bank == bank && kcache[i].addr > line+1) ||
1028 					kcache[i].bank > bank) {
1029 				/* try to insert new line */
1030 				if (kcache[kcache_banks-1].mode) {
1031 					/* all sets are in use */
1032 					return -ENOMEM;
1033 				}
1034 
1035 				memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(struct r600_bytecode_kcache));
1036 				kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1;
1037 				kcache[i].bank = bank;
1038 				kcache[i].addr = line;
1039 				kcache[i].index_mode = index_mode;
1040 				return 0;
1041 			}
1042 
1043 			d = line - kcache[i].addr;
1044 
1045 			if (d == -1) {
1046 				kcache[i].addr--;
1047 				if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_2) {
1048 					/* we are prepending the line to the current set,
1049 					 * discarding the existing second line,
1050 					 * so we'll have to insert line+2 after it */
1051 					line += 2;
1052 					continue;
1053 				} else if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_1) {
1054 					kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2;
1055 					return 0;
1056 				} else {
1057 					/* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
1058 					return -ENOMEM;
1059 				}
1060 			} else if (d == 1) {
1061 				kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2;
1062 				return 0;
1063 			} else if (d == 0)
1064 				return 0;
1065 		} else { /* free kcache set - use it */
1066 			kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1;
1067 			kcache[i].bank = bank;
1068 			kcache[i].addr = line;
1069 			kcache[i].index_mode = index_mode;
1070 			return 0;
1071 		}
1072 	}
1073 	return -ENOMEM;
1074 }
1075 
r600_bytecode_alloc_inst_kcache_lines(struct r600_bytecode * bc,struct r600_bytecode_kcache * kcache,struct r600_bytecode_alu * alu)1076 static int r600_bytecode_alloc_inst_kcache_lines(struct r600_bytecode *bc,
1077 		struct r600_bytecode_kcache *kcache,
1078 		struct r600_bytecode_alu *alu)
1079 {
1080 	int i, r;
1081 
1082 	for (i = 0; i < 3; i++) {
1083 		unsigned bank, line, sel = alu->src[i].sel, index_mode;
1084 
1085 		if (sel < 512)
1086 			continue;
1087 
1088 		bank = alu->src[i].kc_bank;
1089 		assert(bank < R600_MAX_ALU_CONST_BUFFERS);
1090 		line = (sel-512)>>4;
1091 		index_mode = alu->src[i].kc_rel;
1092 
1093 		if ((r = r600_bytecode_alloc_kcache_line(bc, kcache, bank, line, index_mode)))
1094 			return r;
1095 	}
1096 	return 0;
1097 }
1098 
r600_bytecode_assign_kcache_banks(struct r600_bytecode_alu * alu,struct r600_bytecode_kcache * kcache)1099 static int r600_bytecode_assign_kcache_banks(
1100 		struct r600_bytecode_alu *alu,
1101 		struct r600_bytecode_kcache * kcache)
1102 {
1103 	int i, j;
1104 
1105 	/* Alter the src operands to refer to the kcache. */
1106 	for (i = 0; i < 3; ++i) {
1107 		static const unsigned int base[] = {128, 160, 256, 288};
1108 		unsigned int line, sel = alu->src[i].sel, found = 0;
1109 
1110 		if (sel < 512)
1111 			continue;
1112 
1113 		sel -= 512;
1114 		line = sel>>4;
1115 
1116 		for (j = 0; j < 4 && !found; ++j) {
1117 			switch (kcache[j].mode) {
1118 			case V_SQ_CF_KCACHE_NOP:
1119 			case V_SQ_CF_KCACHE_LOCK_LOOP_INDEX:
1120 				R600_ASM_ERR("unexpected kcache line mode\n");
1121 				return -ENOMEM;
1122 			default:
1123 				if (kcache[j].bank == alu->src[i].kc_bank &&
1124 						kcache[j].addr <= line &&
1125 						line < kcache[j].addr + kcache[j].mode) {
1126 					alu->src[i].sel = sel - (kcache[j].addr<<4);
1127 					alu->src[i].sel += base[j];
1128 					found=1;
1129 			    }
1130 			}
1131 		}
1132 	}
1133 	return 0;
1134 }
1135 
r600_bytecode_alloc_kcache_lines(struct r600_bytecode * bc,struct r600_bytecode_alu * alu,unsigned type)1136 static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc,
1137 		struct r600_bytecode_alu *alu,
1138 		unsigned type)
1139 {
1140 	struct r600_bytecode_kcache kcache_sets[4];
1141 	struct r600_bytecode_kcache *kcache = kcache_sets;
1142 	int r;
1143 
1144 	memcpy(kcache, bc->cf_last->kcache, 4 * sizeof(struct r600_bytecode_kcache));
1145 
1146 	if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) {
1147 		/* can't alloc, need to start new clause */
1148 
1149 		/* Make sure the CF ends with an "last" instruction when
1150 		 * we split an ALU group because of a new CF */
1151 		if (!list_is_empty(&bc->cf_last->alu))  {
1152 			struct r600_bytecode_alu *last_submitted =
1153 				list_last_entry(&bc->cf_last->alu, struct r600_bytecode_alu, list);
1154 				last_submitted->last = 1;
1155 		}
1156 
1157 		if ((r = r600_bytecode_add_cf(bc))) {
1158 			return r;
1159 		}
1160 		bc->cf_last->op = type;
1161 
1162 		/* retry with the new clause */
1163 		kcache = bc->cf_last->kcache;
1164 		if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) {
1165 			/* can't alloc again- should never happen */
1166 			return r;
1167 		}
1168 	} else {
1169 		/* update kcache sets */
1170 		memcpy(bc->cf_last->kcache, kcache, 4 * sizeof(struct r600_bytecode_kcache));
1171 	}
1172 
1173 	/* if we actually used more than 2 kcache sets, or have relative indexing - use ALU_EXTENDED on eg+ */
1174 	if (kcache[2].mode != V_SQ_CF_KCACHE_NOP ||
1175 		kcache[0].index_mode || kcache[1].index_mode || kcache[2].index_mode || kcache[3].index_mode) {
1176 		if (bc->gfx_level < EVERGREEN)
1177 			return -ENOMEM;
1178 		bc->cf_last->eg_alu_extended = 1;
1179 	}
1180 
1181 	return 0;
1182 }
1183 
insert_nop_r6xx(struct r600_bytecode * bc,int max_slots)1184 static int insert_nop_r6xx(struct r600_bytecode *bc, int max_slots)
1185 {
1186 	struct r600_bytecode_alu alu;
1187 	int r, i;
1188 
1189 	for (i = 0; i < max_slots; i++) {
1190 		memset(&alu, 0, sizeof(alu));
1191 		alu.op = ALU_OP0_NOP;
1192 		alu.src[0].chan = i & 3;
1193 		alu.dst.chan = i & 3;
1194 		alu.last = (i == max_slots - 1);
1195 		r = r600_bytecode_add_alu(bc, &alu);
1196 		if (r)
1197 			return r;
1198 	}
1199 	return 0;
1200 }
1201 
1202 /* load AR register from gpr (bc->ar_reg) with MOVA_INT */
load_ar_r6xx(struct r600_bytecode * bc,bool for_src)1203 static int load_ar_r6xx(struct r600_bytecode *bc, bool for_src)
1204 {
1205 	struct r600_bytecode_alu alu;
1206 	int r;
1207 
1208 	if (bc->ar_loaded)
1209 		return 0;
1210 
1211 	/* hack to avoid making MOVA the last instruction in the clause */
1212 	if (bc->cf_last == NULL || (bc->cf_last->ndw>>1) >= 110)
1213 		bc->force_add_cf = 1;
1214    else if (for_src) {
1215       insert_nop_r6xx(bc, 4);
1216       bc->nalu_groups++;
1217    }
1218 
1219 	memset(&alu, 0, sizeof(alu));
1220 	alu.op = ALU_OP1_MOVA_GPR_INT;
1221 	alu.src[0].sel = bc->ar_reg;
1222 	alu.src[0].chan = bc->ar_chan;
1223 	alu.last = 1;
1224 	alu.index_mode = INDEX_MODE_LOOP;
1225 	r = r600_bytecode_add_alu(bc, &alu);
1226 	if (r)
1227 		return r;
1228 
1229 	/* no requirement to set uses waterfall on MOVA_GPR_INT */
1230 	bc->ar_loaded = 1;
1231 	return 0;
1232 }
1233 
1234 /* load AR register from gpr (bc->ar_reg) with MOVA_INT */
r600_load_ar(struct r600_bytecode * bc,bool for_src)1235 int r600_load_ar(struct r600_bytecode *bc, bool for_src)
1236 {
1237 	struct r600_bytecode_alu alu;
1238 	int r;
1239 
1240 	if (bc->ar_handling)
1241 		return load_ar_r6xx(bc, for_src);
1242 
1243 	if (bc->ar_loaded)
1244 		return 0;
1245 
1246 	/* hack to avoid making MOVA the last instruction in the clause */
1247 	if (bc->cf_last == NULL || (bc->cf_last->ndw>>1) >= 110)
1248 		bc->force_add_cf = 1;
1249 
1250 	memset(&alu, 0, sizeof(alu));
1251 	alu.op = ALU_OP1_MOVA_INT;
1252 	alu.src[0].sel = bc->ar_reg;
1253 	alu.src[0].chan = bc->ar_chan;
1254 	alu.last = 1;
1255 	r = r600_bytecode_add_alu(bc, &alu);
1256 	if (r)
1257 		return r;
1258 
1259 	bc->cf_last->r6xx_uses_waterfall = 1;
1260 	bc->ar_loaded = 1;
1261 	return 0;
1262 }
1263 
r600_bytecode_add_alu_type(struct r600_bytecode * bc,const struct r600_bytecode_alu * alu,unsigned type)1264 int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
1265 		const struct r600_bytecode_alu *alu, unsigned type)
1266 {
1267 	struct r600_bytecode_alu *nalu = r600_bytecode_alu();
1268 	struct r600_bytecode_alu *lalu;
1269 	int i, r;
1270 
1271 	if (!nalu)
1272 		return -ENOMEM;
1273 	memcpy(nalu, alu, sizeof(struct r600_bytecode_alu));
1274 
1275 	if (alu->is_op3) {
1276 		/* will fail later since alu does not support it. */
1277 		assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
1278 	}
1279 
1280 	if (bc->cf_last != NULL && bc->cf_last->op != type) {
1281 		/* check if we could add it anyway */
1282 		if ((bc->cf_last->op == CF_OP_ALU && type == CF_OP_ALU_PUSH_BEFORE) ||
1283 		 	(bc->cf_last->op == CF_OP_ALU_PUSH_BEFORE && type == CF_OP_ALU)) {
1284 		 	LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
1285 		 		if (lalu->execute_mask) {
1286                                         assert(bc->force_add_cf || !"no force cf");
1287 					bc->force_add_cf = 1;
1288 					break;
1289 				}
1290 		 		type = CF_OP_ALU_PUSH_BEFORE;
1291 			}
1292 		} else  {
1293                    assert(bc->force_add_cf ||!"no force cf");
1294 			bc->force_add_cf = 1;
1295                 }
1296 	}
1297 
1298 	/* cf can contains only alu or only vtx or only tex */
1299 	if (bc->cf_last == NULL || bc->force_add_cf) {
1300                if (bc->cf_last && bc->cf_last->curr_bs_head)
1301                   bc->cf_last->curr_bs_head->last = 1;
1302 		r = r600_bytecode_add_cf(bc);
1303 		if (r) {
1304 			free(nalu);
1305 			return r;
1306 		}
1307 	}
1308 	bc->cf_last->op = type;
1309 
1310 	if (bc->gfx_level >= EVERGREEN) {
1311 		for (i = 0; i < 3; i++)
1312 			if (nalu->src[i].kc_bank &&  nalu->src[i].kc_rel)
1313 				assert(bc->index_loaded[nalu->src[i].kc_rel - 1]);
1314 	}
1315 
1316 	/* Check AR usage and load it if required */
1317 	for (i = 0; i < 3; i++)
1318 		if (nalu->src[i].rel && !bc->ar_loaded)
1319 			r600_load_ar(bc, true);
1320 
1321 	if (nalu->dst.rel && !bc->ar_loaded)
1322 		r600_load_ar(bc, false);
1323 
1324 	/* Setup the kcache for this ALU instruction. This will start a new
1325 	 * ALU clause if needed. */
1326 	if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) {
1327 		free(nalu);
1328 		return r;
1329 	}
1330 
1331 	if (!bc->cf_last->curr_bs_head) {
1332 		bc->cf_last->curr_bs_head = nalu;
1333 	}
1334 	/* number of gpr == the last gpr used in any alu */
1335 	for (i = 0; i < 3; i++) {
1336 		if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 123) {
1337 			bc->ngpr = nalu->src[i].sel + 1;
1338 		}
1339 		if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
1340 			r600_bytecode_special_constants(nalu->src[i].value,
1341 				&nalu->src[i].sel);
1342 	}
1343 	if (nalu->dst.write && nalu->dst.sel >= bc->ngpr && nalu->dst.sel < 123) {
1344 		bc->ngpr = nalu->dst.sel + 1;
1345 	}
1346 	list_addtail(&nalu->list, &bc->cf_last->alu);
1347 	/* each alu use 2 dwords */
1348 	bc->cf_last->ndw += 2;
1349 	bc->ndw += 2;
1350 
1351 	/* process cur ALU instructions for bank swizzle */
1352 	if (nalu->last) {
1353 		uint32_t literal[4];
1354 		unsigned nliteral;
1355 		struct r600_bytecode_alu *slots[5];
1356 		int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
1357 		r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
1358 		if (r)
1359 			return r;
1360 
1361 		if (bc->cf_last->prev_bs_head) {
1362          struct r600_bytecode_alu *cur_prev_head = bc->cf_last->prev_bs_head;
1363 			r = merge_inst_groups(bc, slots, cur_prev_head);
1364 			if (r)
1365 				return r;
1366          if (cur_prev_head != bc->cf_last->prev_bs_head)
1367             bc->nalu_groups--;
1368 		}
1369 
1370 		if (bc->cf_last->prev_bs_head) {
1371 			r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head);
1372 			if (r)
1373 				return r;
1374 		}
1375 
1376 		r = check_and_set_bank_swizzle(bc, slots);
1377 		if (r)
1378 			return r;
1379 
1380 		for (i = 0, nliteral = 0; i < max_slots; i++) {
1381 			if (slots[i]) {
1382 				r = r600_bytecode_alu_nliterals(slots[i], literal, &nliteral);
1383 				if (r)
1384 					return r;
1385 			}
1386 		}
1387 		bc->cf_last->ndw += align(nliteral, 2);
1388 
1389 		bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head;
1390 		bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
1391 		bc->cf_last->curr_bs_head = NULL;
1392 
1393 		bc->nalu_groups++;
1394 
1395 		if (bc->r6xx_nop_after_rel_dst) {
1396 			for (int i = 0; i < max_slots; ++i) {
1397 				if (slots[i] && slots[i]->dst.rel) {
1398 					insert_nop_r6xx(bc, max_slots);
1399 					bc->nalu_groups++;
1400 					break;
1401 				}
1402 			}
1403 		}
1404 	}
1405 
1406 	/* Might need to insert spill write ops after current clause */
1407 	if (nalu->last && bc->n_pending_outputs) {
1408 		while (bc->n_pending_outputs) {
1409 			r = r600_bytecode_add_output(bc, &bc->pending_outputs[--bc->n_pending_outputs]);
1410 			if (r)
1411 				return r;
1412 		}
1413 	}
1414 
1415 	return 0;
1416 }
1417 
r600_bytecode_add_alu(struct r600_bytecode * bc,const struct r600_bytecode_alu * alu)1418 int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu)
1419 {
1420 	return r600_bytecode_add_alu_type(bc, alu, CF_OP_ALU);
1421 }
1422 
r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode * bc)1423 static unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc)
1424 {
1425 	switch (bc->gfx_level) {
1426 	case R600:
1427 		return 8;
1428 
1429 	case R700:
1430 	case EVERGREEN:
1431 	case CAYMAN:
1432 		return 16;
1433 
1434 	default:
1435 		R600_ASM_ERR("Unknown gfx level %d.\n", bc->gfx_level);
1436 		return 8;
1437 	}
1438 }
1439 
last_inst_was_not_vtx_fetch(struct r600_bytecode * bc,bool use_tc)1440 static inline bool last_inst_was_not_vtx_fetch(struct r600_bytecode *bc, bool use_tc)
1441 {
1442 	return !((r600_isa_cf(bc->cf_last->op)->flags & CF_FETCH) &&
1443 		 bc->cf_last->op != CF_OP_GDS &&
1444 		 (bc->gfx_level == CAYMAN || use_tc ||
1445 		  bc->cf_last->op != CF_OP_TEX));
1446 }
1447 
r600_bytecode_add_vtx_internal(struct r600_bytecode * bc,const struct r600_bytecode_vtx * vtx,bool use_tc)1448 static int r600_bytecode_add_vtx_internal(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx,
1449 					  bool use_tc)
1450 {
1451 	struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx();
1452 	int r;
1453 
1454 	if (!nvtx)
1455 		return -ENOMEM;
1456 	memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx));
1457 
1458 	if (bc->gfx_level >= EVERGREEN) {
1459 		assert(!vtx->buffer_index_mode ||
1460 		       bc->index_loaded[vtx->buffer_index_mode - 1]);
1461 	}
1462 
1463 
1464 	/* cf can contains only alu or only vtx or only tex */
1465 	if (bc->cf_last == NULL ||
1466 	    last_inst_was_not_vtx_fetch(bc, use_tc) ||
1467 	    bc->force_add_cf) {
1468 		r = r600_bytecode_add_cf(bc);
1469 		if (r) {
1470 			free(nvtx);
1471 			return r;
1472 		}
1473 		switch (bc->gfx_level) {
1474 		case R600:
1475 		case R700:
1476 			bc->cf_last->op = CF_OP_VTX;
1477 			break;
1478 		case EVERGREEN:
1479 			if (use_tc)
1480 				bc->cf_last->op = CF_OP_TEX;
1481 			else
1482 				bc->cf_last->op = CF_OP_VTX;
1483 			break;
1484 		case CAYMAN:
1485 			bc->cf_last->op = CF_OP_TEX;
1486 			break;
1487 		default:
1488 			R600_ASM_ERR("Unknown gfx level %d.\n", bc->gfx_level);
1489 			free(nvtx);
1490 			return -EINVAL;
1491 		}
1492 	}
1493 	list_addtail(&nvtx->list, &bc->cf_last->vtx);
1494 	/* each fetch use 4 dwords */
1495 	bc->cf_last->ndw += 4;
1496 	bc->ndw += 4;
1497 	if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1498 		bc->force_add_cf = 1;
1499 
1500 	bc->ngpr = MAX2(bc->ngpr, vtx->src_gpr + 1);
1501 	bc->ngpr = MAX2(bc->ngpr, vtx->dst_gpr + 1);
1502 
1503 	return 0;
1504 }
1505 
r600_bytecode_add_vtx(struct r600_bytecode * bc,const struct r600_bytecode_vtx * vtx)1506 int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
1507 {
1508 	return r600_bytecode_add_vtx_internal(bc, vtx, false);
1509 }
1510 
r600_bytecode_add_vtx_tc(struct r600_bytecode * bc,const struct r600_bytecode_vtx * vtx)1511 int r600_bytecode_add_vtx_tc(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
1512 {
1513 	return r600_bytecode_add_vtx_internal(bc, vtx, true);
1514 }
1515 
r600_bytecode_add_tex(struct r600_bytecode * bc,const struct r600_bytecode_tex * tex)1516 int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex)
1517 {
1518 	struct r600_bytecode_tex *ntex = r600_bytecode_tex();
1519 	int r;
1520 
1521 	if (!ntex)
1522 		return -ENOMEM;
1523 	memcpy(ntex, tex, sizeof(struct r600_bytecode_tex));
1524 
1525 	if (bc->gfx_level >= EVERGREEN) {
1526 		assert(!tex->sampler_index_mode ||
1527 		       bc->index_loaded[tex->sampler_index_mode - 1]);
1528 		assert(!tex->resource_index_mode ||
1529                        bc->index_loaded[tex->resource_index_mode - 1]);
1530 	}
1531 
1532 	/* we can't fetch data und use it as texture lookup address in the same TEX clause */
1533 	if (bc->cf_last != NULL &&
1534 		bc->cf_last->op == CF_OP_TEX) {
1535 		struct r600_bytecode_tex *ttex;
1536                 uint8_t use_mask = ((1 << ntex->src_sel_x) |
1537                                     (1 << ntex->src_sel_y) |
1538                                     (1 << ntex->src_sel_z) |
1539                                     (1 << ntex->src_sel_w)) & 0xf;
1540 
1541 		LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
1542 			if (ttex->dst_gpr == ntex->src_gpr) {
1543                            uint8_t write_mask = (ttex->dst_sel_x < 6 ? 1 : 0) |
1544                                                 (ttex->dst_sel_y < 6 ? 2 : 0) |
1545                                                 (ttex->dst_sel_z < 6 ? 4 : 0) |
1546                                                 (ttex->dst_sel_w < 6 ? 8 : 0);
1547                            if (use_mask & write_mask) {
1548                               bc->force_add_cf = 1;
1549                               break;
1550                            }
1551 			}
1552 		}
1553 		/* vtx instrs get inserted after tex, so make sure we aren't moving the tex
1554 		 * before (say) the instr fetching the texcoord.
1555 		 */
1556 		if (!list_is_empty(&bc->cf_last->vtx))
1557 			bc->force_add_cf = 1;
1558 
1559 		/* slight hack to make gradients always go into same cf */
1560 		if (ntex->op == FETCH_OP_SET_GRADIENTS_H)
1561 			bc->force_add_cf = 1;
1562 	}
1563 
1564 	/* cf can contains only alu or only vtx or only tex */
1565 	if (bc->cf_last == NULL ||
1566 		bc->cf_last->op != CF_OP_TEX ||
1567 	        bc->force_add_cf) {
1568 		r = r600_bytecode_add_cf(bc);
1569 		if (r) {
1570 			free(ntex);
1571 			return r;
1572 		}
1573 		bc->cf_last->op = CF_OP_TEX;
1574 	}
1575 	if (ntex->src_gpr >= bc->ngpr) {
1576 		bc->ngpr = ntex->src_gpr + 1;
1577 	}
1578 	if (ntex->dst_gpr >= bc->ngpr) {
1579 		bc->ngpr = ntex->dst_gpr + 1;
1580 	}
1581 	list_addtail(&ntex->list, &bc->cf_last->tex);
1582 	/* each texture fetch use 4 dwords */
1583 	bc->cf_last->ndw += 4;
1584 	bc->ndw += 4;
1585 	if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1586 		bc->force_add_cf = 1;
1587 	return 0;
1588 }
1589 
r600_bytecode_add_gds(struct r600_bytecode * bc,const struct r600_bytecode_gds * gds)1590 int r600_bytecode_add_gds(struct r600_bytecode *bc, const struct r600_bytecode_gds *gds)
1591 {
1592 	struct r600_bytecode_gds *ngds = r600_bytecode_gds();
1593 	int r;
1594 
1595 	if (ngds == NULL)
1596 		return -ENOMEM;
1597 	memcpy(ngds, gds, sizeof(struct r600_bytecode_gds));
1598 
1599 	if (bc->gfx_level >= EVERGREEN) {
1600 		assert(!gds->uav_index_mode ||
1601 		       bc->index_loaded[gds->uav_index_mode - 1]);
1602 	}
1603 
1604 	if (bc->cf_last == NULL ||
1605 	    bc->cf_last->op != CF_OP_GDS ||
1606 	    bc->force_add_cf) {
1607 		r = r600_bytecode_add_cf(bc);
1608 		if (r) {
1609 			free(ngds);
1610 			return r;
1611 		}
1612 		bc->cf_last->op = CF_OP_GDS;
1613 	}
1614 
1615 	list_addtail(&ngds->list, &bc->cf_last->gds);
1616 	bc->cf_last->ndw += 4; /* each GDS uses 4 dwords */
1617 	if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1618 		bc->force_add_cf = 1;
1619 	return 0;
1620 }
1621 
r600_bytecode_add_cfinst(struct r600_bytecode * bc,unsigned op)1622 int r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op)
1623 {
1624 	int r;
1625 
1626 	/* Emit WAIT_ACK before control flow to ensure pending writes are always acked. */
1627 	if (op != CF_OP_WAIT_ACK && op != CF_OP_MEM_SCRATCH)
1628 		r600_bytecode_wait_acks(bc);
1629 
1630 	r = r600_bytecode_add_cf(bc);
1631 	if (r)
1632 		return r;
1633 
1634 	bc->cf_last->cond = V_SQ_CF_COND_ACTIVE;
1635 	bc->cf_last->op = op;
1636 	return 0;
1637 }
1638 
cm_bytecode_add_cf_end(struct r600_bytecode * bc)1639 int cm_bytecode_add_cf_end(struct r600_bytecode *bc)
1640 {
1641 	return r600_bytecode_add_cfinst(bc, CF_OP_CF_END);
1642 }
1643 
1644 /* common to all 3 families */
r600_bytecode_vtx_build(struct r600_bytecode * bc,struct r600_bytecode_vtx * vtx,unsigned id)1645 static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id)
1646 {
1647 	if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
1648 		return r700_bytecode_fetch_mem_build(bc, vtx, id);
1649 	bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(r600_isa_fetch_opcode(bc->isa->hw_class, vtx->op)) |
1650 			S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
1651 			S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
1652 			S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
1653 			S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x);
1654 	if (bc->gfx_level < CAYMAN)
1655 		bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
1656 	id++;
1657 	bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) |
1658 				S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) |
1659 				S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) |
1660 				S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) |
1661 				S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) |
1662 				S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) |
1663 				S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) |
1664 				S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
1665 				S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
1666 				S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
1667 	bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)|
1668 				S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian);
1669 	if (bc->gfx_level >= EVERGREEN)
1670 		bc->bytecode[id] |= ((vtx->buffer_index_mode & 0x3) << 21); // S_SQ_VTX_WORD2_BIM(vtx->buffer_index_mode);
1671 	if (bc->gfx_level < CAYMAN)
1672 		bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1);
1673 	id++;
1674 	bc->bytecode[id++] = 0;
1675 	return 0;
1676 }
1677 
1678 /* common to all 3 families */
r600_bytecode_tex_build(struct r600_bytecode * bc,struct r600_bytecode_tex * tex,unsigned id)1679 static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id)
1680 {
1681 	bc->bytecode[id] = S_SQ_TEX_WORD0_TEX_INST(
1682 					r600_isa_fetch_opcode(bc->isa->hw_class, tex->op)) |
1683 			    EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) |
1684 				S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
1685 				S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
1686 				S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
1687 	if (bc->gfx_level >= EVERGREEN)
1688 		bc->bytecode[id] |= ((tex->sampler_index_mode & 0x3) << 27) | // S_SQ_TEX_WORD0_SIM(tex->sampler_index_mode);
1689 				((tex->resource_index_mode & 0x3) << 25); // S_SQ_TEX_WORD0_RIM(tex->resource_index_mode)
1690 	id++;
1691 	bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) |
1692 				S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) |
1693 				S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) |
1694 				S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) |
1695 				S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) |
1696 				S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) |
1697 				S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) |
1698 				S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) |
1699 				S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) |
1700 				S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) |
1701 				S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w);
1702 	bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) |
1703 				S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) |
1704 				S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) |
1705 				S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) |
1706 				S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) |
1707 				S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) |
1708 				S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) |
1709 				S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w);
1710 	bc->bytecode[id++] = 0;
1711 	return 0;
1712 }
1713 
1714 /* r600 only, r700/eg bits in r700_asm.c */
r600_bytecode_alu_build(struct r600_bytecode * bc,struct r600_bytecode_alu * alu,unsigned id)1715 static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)
1716 {
1717 	unsigned opcode = r600_isa_alu_opcode(bc->isa->hw_class, alu->op);
1718 
1719 	/* don't replace gpr by pv or ps for destination register */
1720 	bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
1721 				S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
1722 				S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
1723 				S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
1724 				S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
1725 				S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
1726 				S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
1727 				S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
1728 				S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) |
1729 				S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) |
1730 				S_SQ_ALU_WORD0_LAST(alu->last);
1731 
1732 	if (alu->is_op3) {
1733 		assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
1734 		bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1735 					S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1736 					S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1737 					S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1738 					S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
1739 					S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) |
1740 					S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
1741 					S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
1742 					S_SQ_ALU_WORD1_OP3_ALU_INST(opcode) |
1743 					S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle);
1744 	} else {
1745 		bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1746 					S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1747 					S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1748 					S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1749 					S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
1750 					S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
1751 					S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
1752 					S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
1753 					S_SQ_ALU_WORD1_OP2_ALU_INST(opcode) |
1754 					S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
1755 					S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->execute_mask) |
1756 					S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->update_pred);
1757 	}
1758 	return 0;
1759 }
1760 
r600_bytecode_cf_vtx_build(uint32_t * bytecode,const struct r600_bytecode_cf * cf)1761 static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf)
1762 {
1763 	*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
1764 	*bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) |
1765 			S_SQ_CF_WORD1_BARRIER(1) |
1766 			S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)|
1767 			S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
1768 }
1769 
1770 /* common for r600/r700 - eg in eg_asm.c */
r600_bytecode_cf_build(struct r600_bytecode * bc,struct r600_bytecode_cf * cf)1771 static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
1772 {
1773 	unsigned id = cf->id;
1774 	const struct cf_op_info *cfop = r600_isa_cf(cf->op);
1775 	unsigned opcode = r600_isa_cf_opcode(bc->isa->hw_class, cf->op);
1776 
1777 
1778 	if (cf->op == CF_NATIVE) {
1779 		bc->bytecode[id++] = cf->isa[0];
1780 		bc->bytecode[id++] = cf->isa[1];
1781 	} else if (cfop->flags & CF_ALU) {
1782 		bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
1783 			S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
1784 			S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
1785 			S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
1786 
1787 		bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(opcode) |
1788 			S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
1789 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
1790 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
1791 					S_SQ_CF_ALU_WORD1_BARRIER(1) |
1792 					S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->gfx_level == R600 ? cf->r6xx_uses_waterfall : 0) |
1793 					S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
1794 	} else if (cfop->flags & CF_FETCH) {
1795 		if (bc->gfx_level == R700)
1796 			r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
1797 		else
1798 			r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
1799 	} else if (cfop->flags & CF_EXP) {
1800 		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
1801 			S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
1802 			S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
1803 			S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) |
1804 			S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr);
1805 		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
1806 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
1807 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
1808 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
1809 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
1810 			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
1811 			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
1812 			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
1813 	} else if (cfop->flags & CF_MEM) {
1814 		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
1815 			S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
1816 			S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
1817 			S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) |
1818 			S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr);
1819 		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
1820 			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
1821 			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
1822 			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program) |
1823 			S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) |
1824 			S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask);
1825 	} else {
1826 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
1827 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) |
1828 					S_SQ_CF_WORD1_BARRIER(1) |
1829 			                S_SQ_CF_WORD1_COND(cf->cond) |
1830 			                S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
1831 					S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
1832 	}
1833 	return 0;
1834 }
1835 
r600_bytecode_build(struct r600_bytecode * bc)1836 int r600_bytecode_build(struct r600_bytecode *bc)
1837 {
1838 	struct r600_bytecode_cf *cf;
1839 	struct r600_bytecode_alu *alu;
1840 	struct r600_bytecode_vtx *vtx;
1841 	struct r600_bytecode_tex *tex;
1842 	struct r600_bytecode_gds *gds;
1843 	uint32_t literal[4];
1844 	unsigned nliteral;
1845 	unsigned addr;
1846 	int i, r;
1847 
1848 	if (!bc->nstack) { // If not 0, Stack_size already provided by llvm
1849 		if (bc->stack.max_entries)
1850 			bc->nstack = bc->stack.max_entries;
1851 		else if (bc->type == PIPE_SHADER_VERTEX ||
1852 			 bc->type == PIPE_SHADER_TESS_EVAL ||
1853 			 bc->type == PIPE_SHADER_TESS_CTRL)
1854 			bc->nstack = 1;
1855 	}
1856 
1857 	/* first path compute addr of each CF block */
1858 	/* addr start after all the CF instructions */
1859 	addr = bc->cf_last->id + 2;
1860 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
1861 		if (r600_isa_cf(cf->op)->flags & CF_FETCH) {
1862 			addr += 3;
1863 			addr &= 0xFFFFFFFCUL;
1864 		}
1865 		cf->addr = addr;
1866 		addr += cf->ndw;
1867 		bc->ndw = cf->addr + cf->ndw;
1868 	}
1869 	free(bc->bytecode);
1870 	bc->bytecode = calloc(4, bc->ndw);
1871 	if (bc->bytecode == NULL)
1872 		return -ENOMEM;
1873 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
1874 		const struct cf_op_info *cfop = r600_isa_cf(cf->op);
1875 		addr = cf->addr;
1876 		if (bc->gfx_level >= EVERGREEN)
1877 			r = eg_bytecode_cf_build(bc, cf);
1878 		else
1879 			r = r600_bytecode_cf_build(bc, cf);
1880 		if (r)
1881 			return r;
1882 		if (cfop->flags & CF_ALU) {
1883 			nliteral = 0;
1884 			memset(literal, 0, sizeof(literal));
1885 			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
1886 				r = r600_bytecode_alu_nliterals(alu, literal, &nliteral);
1887 				if (r)
1888 					return r;
1889 				r600_bytecode_alu_adjust_literals(alu, literal, nliteral);
1890 				r600_bytecode_assign_kcache_banks(alu, cf->kcache);
1891 
1892 				switch(bc->gfx_level) {
1893 				case R600:
1894 					r = r600_bytecode_alu_build(bc, alu, addr);
1895 					break;
1896 				case R700:
1897 					r = r700_bytecode_alu_build(bc, alu, addr);
1898 					break;
1899 				case EVERGREEN:
1900 				case CAYMAN:
1901 					r = eg_bytecode_alu_build(bc, alu, addr);
1902 					break;
1903 				default:
1904 					R600_ASM_ERR("unknown gfx level %d.\n", bc->gfx_level);
1905 					return -EINVAL;
1906 				}
1907 				if (r)
1908 					return r;
1909 				addr += 2;
1910 				if (alu->last) {
1911 					for (i = 0; i < align(nliteral, 2); ++i) {
1912 						bc->bytecode[addr++] = literal[i];
1913 					}
1914 					nliteral = 0;
1915 					memset(literal, 0, sizeof(literal));
1916 				}
1917 			}
1918 		} else if (cf->op == CF_OP_VTX) {
1919 			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
1920 				r = r600_bytecode_vtx_build(bc, vtx, addr);
1921 				if (r)
1922 					return r;
1923 				addr += 4;
1924 			}
1925 		} else if (cf->op == CF_OP_GDS) {
1926 			assert(bc->gfx_level >= EVERGREEN);
1927 			LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) {
1928 				r = eg_bytecode_gds_build(bc, gds, addr);
1929 				if (r)
1930 					return r;
1931 				addr += 4;
1932 			}
1933 		} else if (cf->op == CF_OP_TEX) {
1934 			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
1935 				assert(bc->gfx_level >= EVERGREEN);
1936 				r = r600_bytecode_vtx_build(bc, vtx, addr);
1937 				if (r)
1938 					return r;
1939 				addr += 4;
1940 			}
1941 			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
1942 				r = r600_bytecode_tex_build(bc, tex, addr);
1943 				if (r)
1944 					return r;
1945 				addr += 4;
1946 			}
1947 		}
1948 	}
1949 	return 0;
1950 }
1951 
r600_bytecode_clear(struct r600_bytecode * bc)1952 void r600_bytecode_clear(struct r600_bytecode *bc)
1953 {
1954 	struct r600_bytecode_cf *cf = NULL, *next_cf;
1955 
1956 	free(bc->bytecode);
1957 	bc->bytecode = NULL;
1958 
1959 	LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
1960 		struct r600_bytecode_alu *alu = NULL, *next_alu;
1961 		struct r600_bytecode_tex *tex = NULL, *next_tex;
1962 		struct r600_bytecode_tex *vtx = NULL, *next_vtx;
1963 		struct r600_bytecode_gds *gds = NULL, *next_gds;
1964 
1965 		LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
1966 			free(alu);
1967 		}
1968 
1969 		list_inithead(&cf->alu);
1970 
1971 		LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) {
1972 			free(tex);
1973 		}
1974 
1975 		list_inithead(&cf->tex);
1976 
1977 		LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) {
1978 			free(vtx);
1979 		}
1980 
1981 		list_inithead(&cf->vtx);
1982 
1983 		LIST_FOR_EACH_ENTRY_SAFE(gds, next_gds, &cf->gds, list) {
1984 			free(gds);
1985 		}
1986 
1987 		list_inithead(&cf->gds);
1988 
1989 		free(cf);
1990 	}
1991 
1992 	list_inithead(&cf->list);
1993 }
1994 
print_swizzle(unsigned swz)1995 static int print_swizzle(unsigned swz)
1996 {
1997 	const char * swzchars = "xyzw01?_";
1998 	assert(swz<8 && swz != 6);
1999 	return fprintf(stderr, "%c", swzchars[swz]);
2000 }
2001 
print_sel(unsigned sel,unsigned rel,unsigned index_mode,unsigned need_brackets)2002 static int print_sel(unsigned sel, unsigned rel, unsigned index_mode,
2003 		unsigned need_brackets)
2004 {
2005 	int o = 0;
2006 	if (rel && index_mode >= 5 && sel < 128)
2007 		o += fprintf(stderr, "G");
2008 	if (rel || need_brackets) {
2009 		o += fprintf(stderr, "[");
2010 	}
2011 	o += fprintf(stderr, "%d", sel);
2012 	if (rel) {
2013 		if (index_mode == 0 || index_mode == 6)
2014 			o += fprintf(stderr, "+AR");
2015 		else if (index_mode == 4)
2016 			o += fprintf(stderr, "+AL");
2017 	}
2018 	if (rel || need_brackets) {
2019 		o += fprintf(stderr, "]");
2020 	}
2021 	return o;
2022 }
2023 
print_dst(struct r600_bytecode_alu * alu)2024 static int print_dst(struct r600_bytecode_alu *alu)
2025 {
2026 	int o = 0;
2027 	unsigned sel = alu->dst.sel;
2028 	char reg_char = 'R';
2029 	if (sel >= 128 - 4) { /* clause temporary gpr */
2030 		sel -= 128 - 4;
2031 		reg_char = 'T';
2032 	}
2033 
2034 	if (alu_writes(alu)) {
2035 		o += fprintf(stderr, "%c", reg_char);
2036 		o += print_sel(sel, alu->dst.rel, alu->index_mode, 0);
2037 	} else {
2038 		o += fprintf(stderr, "__");
2039 	}
2040 	o += fprintf(stderr, ".");
2041 	o += print_swizzle(alu->dst.chan);
2042 	return o;
2043 }
2044 
print_src(struct r600_bytecode_alu * alu,unsigned idx)2045 static int print_src(struct r600_bytecode_alu *alu, unsigned idx)
2046 {
2047 	int o = 0;
2048 	struct r600_bytecode_alu_src *src = &alu->src[idx];
2049 	unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0;
2050 
2051 	if (src->neg)
2052 		o += fprintf(stderr,"-");
2053 	if (src->abs)
2054 		o += fprintf(stderr,"|");
2055 
2056 	if (sel < 128 - 4) {
2057 		o += fprintf(stderr, "R");
2058 	} else if (sel < 128) {
2059 		o += fprintf(stderr, "T");
2060 		sel -= 128 - 4;
2061 	} else if (sel < 160) {
2062 		o += fprintf(stderr, "KC0");
2063 		need_brackets = 1;
2064 		sel -= 128;
2065 	} else if (sel < 192) {
2066 		o += fprintf(stderr, "KC1");
2067 		need_brackets = 1;
2068 		sel -= 160;
2069 	} else if (sel >= 512) {
2070 		o += fprintf(stderr, "C%d", src->kc_bank);
2071 		need_brackets = 1;
2072 		sel -= 512;
2073 	} else if (sel >= 448) {
2074 		o += fprintf(stderr, "Param");
2075 		sel -= 448;
2076 		need_chan = 0;
2077 	} else if (sel >= 288) {
2078 		o += fprintf(stderr, "KC3");
2079 		need_brackets = 1;
2080 		sel -= 288;
2081 	} else if (sel >= 256) {
2082 		o += fprintf(stderr, "KC2");
2083 		need_brackets = 1;
2084 		sel -= 256;
2085 	} else {
2086 		need_sel = 0;
2087 		need_chan = 0;
2088 		switch (sel) {
2089 		case EG_V_SQ_ALU_SRC_LDS_DIRECT_A:
2090 			o += fprintf(stderr, "LDS_A[0x%08X]", src->value);
2091 			break;
2092 		case EG_V_SQ_ALU_SRC_LDS_DIRECT_B:
2093 			o += fprintf(stderr, "LDS_B[0x%08X]", src->value);
2094 			break;
2095 		case EG_V_SQ_ALU_SRC_LDS_OQ_A:
2096 			o += fprintf(stderr, "LDS_OQ_A");
2097 			need_chan = 1;
2098 			break;
2099 		case EG_V_SQ_ALU_SRC_LDS_OQ_B:
2100 			o += fprintf(stderr, "LDS_OQ_B");
2101 			need_chan = 1;
2102 			break;
2103 		case EG_V_SQ_ALU_SRC_LDS_OQ_A_POP:
2104 			o += fprintf(stderr, "LDS_OQ_A_POP");
2105 			need_chan = 1;
2106 			break;
2107 		case EG_V_SQ_ALU_SRC_LDS_OQ_B_POP:
2108 			o += fprintf(stderr, "LDS_OQ_B_POP");
2109 			need_chan = 1;
2110 			break;
2111 		case EG_V_SQ_ALU_SRC_TIME_LO:
2112 			o += fprintf(stderr, "TIME_LO");
2113 			break;
2114 		case EG_V_SQ_ALU_SRC_TIME_HI:
2115 			o += fprintf(stderr, "TIME_HI");
2116 			break;
2117 		case EG_V_SQ_ALU_SRC_SE_ID:
2118 			o += fprintf(stderr, "SE_ID");
2119 			break;
2120 		case EG_V_SQ_ALU_SRC_SIMD_ID:
2121 			o += fprintf(stderr, "SIMD_ID");
2122 			break;
2123 		case EG_V_SQ_ALU_SRC_HW_WAVE_ID:
2124 			o += fprintf(stderr, "HW_WAVE_ID");
2125 			break;
2126 		case V_SQ_ALU_SRC_PS:
2127 			o += fprintf(stderr, "PS");
2128 			break;
2129 		case V_SQ_ALU_SRC_PV:
2130 			o += fprintf(stderr, "PV");
2131 			need_chan = 1;
2132 			break;
2133 		case V_SQ_ALU_SRC_LITERAL:
2134 			{
2135 				const uint32_t value_uint32 = src->value;
2136 				float value_float;
2137 				memcpy(&value_float, &value_uint32, sizeof(float));
2138 				o += fprintf(stderr, "[0x%08X %f]", value_uint32, value_float);
2139 			}
2140 			break;
2141 		case V_SQ_ALU_SRC_0_5:
2142 			o += fprintf(stderr, "0.5");
2143 			break;
2144 		case V_SQ_ALU_SRC_M_1_INT:
2145 			o += fprintf(stderr, "-1");
2146 			break;
2147 		case V_SQ_ALU_SRC_1_INT:
2148 			o += fprintf(stderr, "1");
2149 			break;
2150 		case V_SQ_ALU_SRC_1:
2151 			o += fprintf(stderr, "1.0");
2152 			break;
2153 		case V_SQ_ALU_SRC_0:
2154 			o += fprintf(stderr, "0");
2155 			break;
2156 		default:
2157 			o += fprintf(stderr, "??IMM_%d", sel);
2158 			break;
2159 		}
2160 	}
2161 
2162 	if (need_sel)
2163 		o += print_sel(sel, src->rel, alu->index_mode, need_brackets);
2164 
2165 	if (need_chan) {
2166 		o += fprintf(stderr, ".");
2167 		o += print_swizzle(src->chan);
2168 	}
2169 
2170 	if (src->abs)
2171 		o += fprintf(stderr,"|");
2172 
2173 	return o;
2174 }
2175 
print_indent(int p,int c)2176 static int print_indent(int p, int c)
2177 {
2178 	int o = 0;
2179 	while (p++ < c)
2180 		o += fprintf(stderr, " ");
2181 	return o;
2182 }
2183 
2184 const char *rat_instr_name[] = {
2185    "NOP",
2186    "STORE_TYPED",
2187    "STORE_RAW",
2188    "STORE_RAW_FDENORM",
2189    "CMP_XCHG_INT",
2190    "CMP_XCHG_FLT",
2191    "CMP_XCHG_FDENORM",
2192    "ADD",
2193    "SUB",
2194    "RSUB",
2195    "MIN_INT",
2196    "MIN_UINT",
2197    "MAX_INT",
2198    "MAX_UINT",
2199    "AND",
2200    "OR",
2201    "XOR",
2202    "MSKOR",
2203    "INC_UINT",
2204    "DEC_UINT",
2205    "RESERVED20",
2206    "RESERVED21",
2207    "RESERVED22",
2208    "RESERVED23",
2209    "RESERVED24",
2210    "RESERVED25",
2211    "RESERVED26",
2212    "RESERVED27",
2213    "RESERVED28",
2214    "RESERVED29",
2215    "RESERVED30",
2216    "RESERVED31",
2217    "NOP_RTN",
2218    "RESERVED33",
2219    "XCHG_RTN",
2220    "XCHG_FDENORM_RTN",
2221    "CMPXCHG_INT_RTN",
2222    "CMPXCHG_FLT_RTN",
2223    "CMPXCHG_FDENORM_RTN",
2224    "ADD_RTN",
2225    "SUB_RTN",
2226    "RSUB_RTN",
2227    "MIN_INT_RTN",
2228    "MIN_UINT_RTN",
2229    "MAX_INT_RTN",
2230    "MAX_UINT_RTN",
2231    "AND_RTN",
2232    "OR_RTN",
2233    "XOR_RTN",
2234    "MSKOR_RTN",
2235    "INC_UINT_RTN",
2236    "DEC_UINT_RTN",
2237 };
2238 
2239 
r600_bytecode_disasm(struct r600_bytecode * bc)2240 void r600_bytecode_disasm(struct r600_bytecode *bc)
2241 {
2242 	const char *index_mode[] = {"CF_INDEX_NONE", "CF_INDEX_0", "CF_INDEX_1"};
2243 	static int index = 0;
2244 	struct r600_bytecode_cf *cf = NULL;
2245 	struct r600_bytecode_alu *alu = NULL;
2246 	struct r600_bytecode_vtx *vtx = NULL;
2247 	struct r600_bytecode_tex *tex = NULL;
2248 	struct r600_bytecode_gds *gds = NULL;
2249 
2250 	unsigned id, ngr = 0, last;
2251 	uint32_t literal[4];
2252 	unsigned nliteral;
2253 	char chip = '6';
2254 
2255 	switch (bc->gfx_level) {
2256 	case R700:
2257 		chip = '7';
2258 		break;
2259 	case EVERGREEN:
2260 		chip = 'E';
2261 		break;
2262 	case CAYMAN:
2263 		chip = 'C';
2264 		break;
2265 	case R600:
2266 	default:
2267 		chip = '6';
2268 		break;
2269 	}
2270 	fprintf(stderr, "bytecode %d dw -- %d gprs -- %d nstack -------------\n",
2271 	        bc->ndw, bc->ngpr, bc->nstack);
2272 	fprintf(stderr, "shader %d -- %c\n", index++, chip);
2273 
2274 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
2275 		id = cf->id;
2276 		if (cf->op == CF_NATIVE) {
2277 			fprintf(stderr, "%04d %08X %08X CF_NATIVE\n", id, bc->bytecode[id],
2278 					bc->bytecode[id + 1]);
2279 		} else {
2280 			const struct cf_op_info *cfop = r600_isa_cf(cf->op);
2281 			if (cfop->flags & CF_ALU) {
2282 				if (cf->eg_alu_extended) {
2283 					fprintf(stderr, "%04d %08X %08X  %s\n", id, bc->bytecode[id],
2284 							bc->bytecode[id + 1], "ALU_EXT");
2285 					id += 2;
2286 				}
2287 				fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2288 						bc->bytecode[id + 1], cfop->name);
2289 				fprintf(stderr, "%d @%d ", cf->ndw / 2, cf->addr);
2290 				for (int i = 0; i < 4; ++i) {
2291 					if (cf->kcache[i].mode) {
2292 						int c_start = (cf->kcache[i].addr << 4);
2293 						int c_end = c_start + (cf->kcache[i].mode << 4);
2294 						fprintf(stderr, "KC%d[CB%d:%d-%d%s%s] ",
2295 						        i, cf->kcache[i].bank, c_start, c_end,
2296 						        cf->kcache[i].index_mode ? " " : "",
2297 						        cf->kcache[i].index_mode ? index_mode[cf->kcache[i].index_mode] : "");
2298 					}
2299 				}
2300 				fprintf(stderr, "\n");
2301 			} else if (cfop->flags & CF_FETCH) {
2302 				fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2303 						bc->bytecode[id + 1], cfop->name);
2304 				fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
2305 				if (cf->vpm)
2306 					fprintf(stderr, "VPM ");
2307 				if (cf->end_of_program)
2308 					fprintf(stderr, "EOP ");
2309 				fprintf(stderr, "\n");
2310 
2311 			} else if (cfop->flags & CF_EXP) {
2312 				int o = 0;
2313 				const char *exp_type[] = {"PIXEL", "POS  ", "PARAM"};
2314 				o += fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2315 						bc->bytecode[id + 1], cfop->name);
2316 				o += print_indent(o, 43);
2317 				o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
2318 				if (cf->output.burst_count > 1) {
2319 					o += fprintf(stderr, "%d-%d ", cf->output.array_base,
2320 							cf->output.array_base + cf->output.burst_count - 1);
2321 
2322 					o += print_indent(o, 55);
2323 					o += fprintf(stderr, "R%d-%d.", cf->output.gpr,
2324 							cf->output.gpr + cf->output.burst_count - 1);
2325 				} else {
2326 					o += fprintf(stderr, "%d ", cf->output.array_base);
2327 					o += print_indent(o, 55);
2328 					o += fprintf(stderr, "R%d.", cf->output.gpr);
2329 				}
2330 
2331 				o += print_swizzle(cf->output.swizzle_x);
2332 				o += print_swizzle(cf->output.swizzle_y);
2333 				o += print_swizzle(cf->output.swizzle_z);
2334 				o += print_swizzle(cf->output.swizzle_w);
2335 
2336 				print_indent(o, 67);
2337 
2338 				fprintf(stderr, " ES:%X ", cf->output.elem_size);
2339 				if (cf->mark)
2340 					fprintf(stderr, "MARK ");
2341 				if (!cf->barrier)
2342 					fprintf(stderr, "NO_BARRIER ");
2343 				if (cf->end_of_program)
2344 					fprintf(stderr, "EOP ");
2345 				fprintf(stderr, "\n");
2346 			} else if (r600_isa_cf(cf->op)->flags & CF_MEM) {
2347 				int o = 0;
2348 				const char *exp_type_r600[] = {"WRITE", "WRITE_IND", "READ",
2349 				                               "READ_IND"};
2350 				const char *exp_type_r700[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
2351 				                               "WRITE_IND_ACK"};
2352 
2353 				const char **exp_type = bc->gfx_level >= R700 ?
2354                                        exp_type_r700 : exp_type_r600;
2355 
2356 				o += fprintf(stderr, "%04d %08X %08X  %s ", id,
2357 						bc->bytecode[id], bc->bytecode[id + 1], cfop->name);
2358 				o += print_indent(o, 43);
2359 				o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
2360 
2361 				if (r600_isa_cf(cf->op)->flags & CF_RAT) {
2362 					o += fprintf(stderr, "RAT%d", cf->rat.id);
2363 					if (cf->rat.index_mode) {
2364 						o += fprintf(stderr, "[IDX%d]", cf->rat.index_mode - 1);
2365 					}
2366                assert(ARRAY_SIZE(rat_instr_name) > cf->rat.inst);
2367 					o += fprintf(stderr, " %s ", rat_instr_name[cf->rat.inst]);
2368 				}
2369 
2370 				if (cf->output.burst_count > 1) {
2371 					o += fprintf(stderr, "%d-%d ", cf->output.array_base,
2372 							cf->output.array_base + cf->output.burst_count - 1);
2373 					o += print_indent(o, 55);
2374 					o += fprintf(stderr, "R%d-%d.", cf->output.gpr,
2375 							cf->output.gpr + cf->output.burst_count - 1);
2376 				} else {
2377 					o += fprintf(stderr, "%d ", cf->output.array_base);
2378 					o += print_indent(o, 55);
2379 					o += fprintf(stderr, "R%d.", cf->output.gpr);
2380 				}
2381 				for (int i = 0; i < 4; ++i) {
2382 					if (cf->output.comp_mask & (1 << i))
2383 						o += print_swizzle(i);
2384 					else
2385 						o += print_swizzle(7);
2386 				}
2387 
2388 				if (cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND ||
2389 				    cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND)
2390 					o += fprintf(stderr, " R%d.xyz", cf->output.index_gpr);
2391 
2392 				o += print_indent(o, 67);
2393 
2394 				fprintf(stderr, " ES:%i ", cf->output.elem_size);
2395 				if (cf->output.array_size != 0xFFF)
2396 					fprintf(stderr, "AS:%i ", cf->output.array_size);
2397 				if (cf->mark)
2398 					fprintf(stderr, "MARK ");
2399 				if (!cf->barrier)
2400 					fprintf(stderr, "NO_BARRIER ");
2401 				if (cf->end_of_program)
2402 					fprintf(stderr, "EOP ");
2403 
2404 				if (cf->output.mark)
2405 					fprintf(stderr, "MARK ");
2406 
2407 				fprintf(stderr, "\n");
2408 			} else {
2409 				fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2410 						bc->bytecode[id + 1], cfop->name);
2411 				fprintf(stderr, "@%d ", cf->cf_addr);
2412 				if (cf->cond)
2413 					fprintf(stderr, "CND:%X ", cf->cond);
2414 				if (cf->pop_count)
2415 					fprintf(stderr, "POP:%X ", cf->pop_count);
2416 				if (cf->count && (cfop->flags & CF_EMIT))
2417 					fprintf(stderr, "STREAM%d ", cf->count);
2418 				if (cf->vpm)
2419 					fprintf(stderr, "VPM ");
2420 				if (cf->end_of_program)
2421 					fprintf(stderr, "EOP ");
2422 				fprintf(stderr, "\n");
2423 			}
2424 		}
2425 
2426 		id = cf->addr;
2427 		nliteral = 0;
2428 		last = 1;
2429 		int chan_mask = 0;
2430 		LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
2431 			const char chan[] = "xyzwt";
2432 			const char *omod_str[] = {"","*2","*4","/2"};
2433 			const struct alu_op_info *aop = r600_isa_alu(alu->op);
2434 			int o = 0;
2435 
2436 			r600_bytecode_alu_nliterals(alu, literal, &nliteral);
2437 			o += fprintf(stderr, " %04d %08X %08X  ", id, bc->bytecode[id], bc->bytecode[id+1]);
2438 			if (last)
2439 				o += fprintf(stderr, "%4d ", ++ngr);
2440 			else
2441 				o += fprintf(stderr, "     ");
2442 
2443 			if ((chan_mask & (1 << alu->dst.chan)) ||
2444 				((aop->slots[bc->isa->hw_class] == AF_S) && !(bc->isa->hw_class == ISA_CC_CAYMAN)))
2445 				o += fprintf(stderr, "t:");
2446 			else
2447 				o += fprintf(stderr, "%c:", chan[alu->dst.chan]);
2448 			chan_mask |= 1 << alu->dst.chan;
2449 
2450 			o += fprintf(stderr, "%c%c %c ", alu->execute_mask ? 'M':' ',
2451 					alu->update_pred ? 'P':' ',
2452 					alu->pred_sel ? alu->pred_sel==2 ? '0':'1':' ');
2453 
2454 			o += fprintf(stderr, "%s%s%s ", aop->name,
2455 					omod_str[alu->omod], alu->dst.clamp ? "_sat":"");
2456 
2457 			o += print_indent(o,60);
2458 			if (bc->isa->hw_class == ISA_CC_CAYMAN && alu->op == ALU_OP1_MOVA_INT) {
2459 				switch (alu->dst.sel) {
2460 				case 0: fprintf(stderr, "AR"); break;
2461 				case 2: fprintf(stderr, "CF_IDX0"); break;
2462 				case 3: fprintf(stderr, "CF_IDX1"); break;
2463 				}
2464 			} else {
2465 				o += print_dst(alu);
2466 			}
2467 			for (int i = 0; i < aop->src_count; ++i) {
2468 				o += fprintf(stderr, i == 0 ? ",  ": ", ");
2469 				o += print_src(alu, i);
2470 			}
2471 
2472 			if (alu->bank_swizzle) {
2473 				o += print_indent(o,75);
2474 				o += fprintf(stderr, "  BS:%d", alu->bank_swizzle);
2475 			}
2476 
2477 			fprintf(stderr, "\n");
2478 			id += 2;
2479 
2480 			if (alu->last) {
2481 				for (unsigned i = 0; i < nliteral; i++, id++) {
2482 					float *f = (float*)(bc->bytecode + id);
2483 					o = fprintf(stderr, " %04d %08X", id, bc->bytecode[id]);
2484 					print_indent(o, 60);
2485 					fprintf(stderr, " %f (%d)\n", *f, *(bc->bytecode + id));
2486 				}
2487 				id += nliteral & 1;
2488 				nliteral = 0;
2489 				chan_mask = 0;
2490 			}
2491 			last = alu->last;
2492 		}
2493 
2494 		LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
2495 			int o = 0;
2496 			o += fprintf(stderr, " %04d %08X %08X %08X   ", id, bc->bytecode[id],
2497 					bc->bytecode[id + 1], bc->bytecode[id + 2]);
2498 
2499 			o += fprintf(stderr, "%s ", r600_isa_fetch(tex->op)->name);
2500 
2501 			o += print_indent(o, 50);
2502 
2503 			o += fprintf(stderr, "R%d.", tex->dst_gpr);
2504 			o += print_swizzle(tex->dst_sel_x);
2505 			o += print_swizzle(tex->dst_sel_y);
2506 			o += print_swizzle(tex->dst_sel_z);
2507 			o += print_swizzle(tex->dst_sel_w);
2508 
2509 			o += fprintf(stderr, ", R%d.", tex->src_gpr);
2510 			o += print_swizzle(tex->src_sel_x);
2511 			o += print_swizzle(tex->src_sel_y);
2512 			o += print_swizzle(tex->src_sel_z);
2513 			o += print_swizzle(tex->src_sel_w);
2514 
2515 			o += fprintf(stderr, ",  RID:%d ", tex->resource_id);
2516                         if (tex->resource_index_mode)
2517 				fprintf(stderr, "RQ_%s", index_mode[tex->resource_index_mode]);
2518 
2519 			o += fprintf(stderr, ", SID:%d  ", tex->sampler_id);
2520 
2521 			if (tex->sampler_index_mode)
2522 				fprintf(stderr, "SQ_%s ", index_mode[tex->sampler_index_mode]);
2523 
2524 
2525 
2526 			if (tex->lod_bias)
2527 				fprintf(stderr, "LB:%d ", tex->lod_bias);
2528 
2529 			fprintf(stderr, "CT:%c%c%c%c ",
2530 					tex->coord_type_x ? 'N' : 'U',
2531 					tex->coord_type_y ? 'N' : 'U',
2532 					tex->coord_type_z ? 'N' : 'U',
2533 					tex->coord_type_w ? 'N' : 'U');
2534 
2535 			if (tex->offset_x)
2536 				fprintf(stderr, "OX:%d ", tex->offset_x);
2537 			if (tex->offset_y)
2538 				fprintf(stderr, "OY:%d ", tex->offset_y);
2539 			if (tex->offset_z)
2540 				fprintf(stderr, "OZ:%d ", tex->offset_z);
2541 
2542 			id += 4;
2543 			fprintf(stderr, "\n");
2544 		}
2545 
2546 		LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
2547 			int o = 0;
2548 			const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
2549 			o += fprintf(stderr, " %04d %08X %08X %08X   ", id, bc->bytecode[id],
2550 					bc->bytecode[id + 1], bc->bytecode[id + 2]);
2551 
2552 			o += fprintf(stderr, "%s ", r600_isa_fetch(vtx->op)->name);
2553 
2554 			o += print_indent(o, 50);
2555 
2556 			o += fprintf(stderr, "R%d.", vtx->dst_gpr);
2557 			o += print_swizzle(vtx->dst_sel_x);
2558 			o += print_swizzle(vtx->dst_sel_y);
2559 			o += print_swizzle(vtx->dst_sel_z);
2560 			o += print_swizzle(vtx->dst_sel_w);
2561 
2562 			o += fprintf(stderr, ", R%d.", vtx->src_gpr);
2563 			o += print_swizzle(vtx->src_sel_x);
2564 			if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
2565 				o += print_swizzle(vtx->src_sel_y);
2566 
2567 			if (vtx->offset)
2568 				fprintf(stderr, " +%db", vtx->offset);
2569 
2570 			o += print_indent(o, 55);
2571 
2572 			fprintf(stderr, ",  RID:%d ", vtx->buffer_id);
2573 
2574 			fprintf(stderr, "%s ", fetch_type[vtx->fetch_type]);
2575 
2576 			if (bc->gfx_level < CAYMAN && vtx->mega_fetch_count)
2577 				fprintf(stderr, "MFC:%d ", vtx->mega_fetch_count);
2578 
2579 			if (bc->gfx_level >= EVERGREEN && vtx->buffer_index_mode)
2580 				fprintf(stderr, "SQ_%s ", index_mode[vtx->buffer_index_mode]);
2581 
2582 			if (r600_isa_fetch(vtx->op)->flags & FF_MEM) {
2583 				if (vtx->uncached)
2584 					fprintf(stderr, "UNCACHED ");
2585 				if (vtx->indexed)
2586 					fprintf(stderr, "INDEXED:%d ", vtx->indexed);
2587 
2588 				fprintf(stderr, "ELEM_SIZE:%d ", vtx->elem_size);
2589 				if (vtx->burst_count)
2590 					fprintf(stderr, "BURST_COUNT:%d ", vtx->burst_count);
2591 				fprintf(stderr, "ARRAY_BASE:%d ", vtx->array_base);
2592 				fprintf(stderr, "ARRAY_SIZE:%d ", vtx->array_size);
2593 			}
2594 
2595 			fprintf(stderr, "UCF:%d ", vtx->use_const_fields);
2596 			fprintf(stderr, "FMT(DTA:%d ", vtx->data_format);
2597 			fprintf(stderr, "NUM:%d ", vtx->num_format_all);
2598 			fprintf(stderr, "COMP:%d ", vtx->format_comp_all);
2599 			fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all);
2600 
2601 			id += 4;
2602 		}
2603 
2604 		LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) {
2605 			UNUSED int o = 0;
2606 			o += fprintf(stderr, " %04d %08X %08X %08X   ", id, bc->bytecode[id],
2607 					bc->bytecode[id + 1], bc->bytecode[id + 2]);
2608 
2609 			o += fprintf(stderr, "%s ", r600_isa_fetch(gds->op)->name);
2610 
2611 			if (gds->op != FETCH_OP_TF_WRITE) {
2612 				o += fprintf(stderr, "R%d.", gds->dst_gpr);
2613 				o += print_swizzle(gds->dst_sel_x);
2614 				o += print_swizzle(gds->dst_sel_y);
2615 				o += print_swizzle(gds->dst_sel_z);
2616 				o += print_swizzle(gds->dst_sel_w);
2617 			}
2618 
2619 			o += fprintf(stderr, ", R%d.", gds->src_gpr);
2620 			o += print_swizzle(gds->src_sel_x);
2621 			o += print_swizzle(gds->src_sel_y);
2622 			o += print_swizzle(gds->src_sel_z);
2623 
2624 			if (gds->op != FETCH_OP_TF_WRITE) {
2625 				o += fprintf(stderr, ", R%d.", gds->src_gpr2);
2626 			}
2627 			if (gds->alloc_consume) {
2628 				o += fprintf(stderr, " UAV: %d", gds->uav_id);
2629 				if (gds->uav_index_mode)
2630 					o += fprintf(stderr, "[%s]", index_mode[gds->uav_index_mode]);
2631 			}
2632 			fprintf(stderr, "\n");
2633 			id += 4;
2634 		}
2635 	}
2636 
2637 	fprintf(stderr, "--------------------------------------\n");
2638 }
2639 
r600_vertex_data_type(enum pipe_format pformat,unsigned * format,unsigned * num_format,unsigned * format_comp,unsigned * endian)2640 void r600_vertex_data_type(enum pipe_format pformat,
2641 				  unsigned *format,
2642 				  unsigned *num_format, unsigned *format_comp, unsigned *endian)
2643 {
2644 	const struct util_format_description *desc;
2645 	unsigned i;
2646 
2647 	*format = 0;
2648 	*num_format = 0;
2649 	*format_comp = 0;
2650 	*endian = ENDIAN_NONE;
2651 
2652 	if (pformat == PIPE_FORMAT_R11G11B10_FLOAT) {
2653 		*format = FMT_10_11_11_FLOAT;
2654 		*endian = r600_endian_swap(32);
2655 		return;
2656 	}
2657 
2658 	if (pformat == PIPE_FORMAT_B5G6R5_UNORM) {
2659 		*format = FMT_5_6_5;
2660 		*endian = r600_endian_swap(16);
2661 		return;
2662 	}
2663 
2664 	if (pformat == PIPE_FORMAT_B5G5R5A1_UNORM) {
2665 		*format = FMT_1_5_5_5;
2666 		*endian = r600_endian_swap(16);
2667 		return;
2668 	}
2669 
2670 	if (pformat == PIPE_FORMAT_A1B5G5R5_UNORM) {
2671 		*format = FMT_5_5_5_1;
2672 		return;
2673 	}
2674 
2675 	desc = util_format_description(pformat);
2676 	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
2677 		goto out_unknown;
2678 	}
2679 
2680 	i = util_format_get_first_non_void_channel(pformat);
2681 
2682 	*endian = r600_endian_swap(desc->channel[i].size);
2683 
2684 	switch (desc->channel[i].type) {
2685 	/* Half-floats, floats, ints */
2686 	case UTIL_FORMAT_TYPE_FLOAT:
2687 		switch (desc->channel[i].size) {
2688 		case 16:
2689 			switch (desc->nr_channels) {
2690 			case 1:
2691 				*format = FMT_16_FLOAT;
2692 				break;
2693 			case 2:
2694 				*format = FMT_16_16_FLOAT;
2695 				break;
2696 			case 3:
2697 			case 4:
2698 				*format = FMT_16_16_16_16_FLOAT;
2699 				break;
2700 			}
2701 			break;
2702 		case 32:
2703 			switch (desc->nr_channels) {
2704 			case 1:
2705 				*format = FMT_32_FLOAT;
2706 				break;
2707 			case 2:
2708 				*format = FMT_32_32_FLOAT;
2709 				break;
2710 			case 3:
2711 				*format = FMT_32_32_32_FLOAT;
2712 				break;
2713 			case 4:
2714 				*format = FMT_32_32_32_32_FLOAT;
2715 				break;
2716 			}
2717 			break;
2718 		default:
2719 			goto out_unknown;
2720 		}
2721 		break;
2722 		/* Unsigned ints */
2723 	case UTIL_FORMAT_TYPE_UNSIGNED:
2724 		/* Signed ints */
2725 	case UTIL_FORMAT_TYPE_SIGNED:
2726 		switch (desc->channel[i].size) {
2727 		case 4:
2728 			switch (desc->nr_channels) {
2729 			case 2:
2730 				*format = FMT_4_4;
2731 				break;
2732 			case 4:
2733 				*format = FMT_4_4_4_4;
2734 				break;
2735 			}
2736 			break;
2737 		case 8:
2738 			switch (desc->nr_channels) {
2739 			case 1:
2740 				*format = FMT_8;
2741 				break;
2742 			case 2:
2743 				*format = FMT_8_8;
2744 				break;
2745 			case 3:
2746 			case 4:
2747 				*format = FMT_8_8_8_8;
2748 				break;
2749 			}
2750 			break;
2751 		case 10:
2752 			if (desc->nr_channels != 4)
2753 				goto out_unknown;
2754 
2755 			*format = FMT_2_10_10_10;
2756 			break;
2757 		case 16:
2758 			switch (desc->nr_channels) {
2759 			case 1:
2760 				*format = FMT_16;
2761 				break;
2762 			case 2:
2763 				*format = FMT_16_16;
2764 				break;
2765 			case 3:
2766 			case 4:
2767 				*format = FMT_16_16_16_16;
2768 				break;
2769 			}
2770 			break;
2771 		case 32:
2772 			switch (desc->nr_channels) {
2773 			case 1:
2774 				*format = FMT_32;
2775 				break;
2776 			case 2:
2777 				*format = FMT_32_32;
2778 				break;
2779 			case 3:
2780 				*format = FMT_32_32_32;
2781 				break;
2782 			case 4:
2783 				*format = FMT_32_32_32_32;
2784 				break;
2785 			}
2786 			break;
2787 		default:
2788 			goto out_unknown;
2789 		}
2790 		break;
2791 	default:
2792 		goto out_unknown;
2793 	}
2794 
2795 	if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2796 		*format_comp = 1;
2797 	}
2798 
2799 	*num_format = 0;
2800 	if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
2801 	    desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2802 		if (!desc->channel[i].normalized) {
2803 			if (desc->channel[i].pure_integer)
2804 				*num_format = 1;
2805 			else
2806 				*num_format = 2;
2807 		}
2808 	}
2809 	return;
2810 out_unknown:
2811 	R600_ASM_ERR("unsupported vertex format %s\n", util_format_name(pformat));
2812 }
2813 
r600_bytecode_alu_read(struct r600_bytecode * bc,struct r600_bytecode_alu * alu,uint32_t word0,uint32_t word1)2814 void r600_bytecode_alu_read(struct r600_bytecode *bc,
2815 		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1)
2816 {
2817 	/* WORD0 */
2818 	alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0);
2819 	alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0);
2820 	alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0);
2821 	alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0);
2822 	alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0);
2823 	alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0);
2824 	alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0);
2825 	alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0);
2826 	alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0);
2827 	alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0);
2828 	alu->last = G_SQ_ALU_WORD0_LAST(word0);
2829 
2830 	/* WORD1 */
2831 	alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1);
2832 	if (alu->bank_swizzle)
2833 		alu->bank_swizzle_force = alu->bank_swizzle;
2834 	alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1);
2835 	alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1);
2836 	alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1);
2837 	alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1);
2838 	if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/
2839 	{
2840 		alu->is_op3 = 1;
2841 		alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1);
2842 		alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1);
2843 		alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1);
2844 		alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1);
2845 		alu->op = r600_isa_alu_by_opcode(bc->isa,
2846 				G_SQ_ALU_WORD1_OP3_ALU_INST(word1), /* is_op3 = */ 1);
2847 
2848 	}
2849 	else /*ALU_DWORD1_OP2*/
2850 	{
2851 		alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1);
2852 		alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1);
2853 		alu->op = r600_isa_alu_by_opcode(bc->isa,
2854 				G_SQ_ALU_WORD1_OP2_ALU_INST(word1), /* is_op3 = */ 0);
2855 		alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1);
2856 		alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1);
2857 		alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1);
2858 		alu->execute_mask =
2859 			G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1);
2860 	}
2861 }
2862 
2863 #if 0
2864 void r600_bytecode_export_read(struct r600_bytecode *bc,
2865 		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
2866 {
2867 	output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
2868 	output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
2869 	output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
2870 	output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
2871 
2872 	output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
2873 	output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
2874 	output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
2875 	output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
2876 	output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
2877 	output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
2878     output->op = r600_isa_cf_by_opcode(bc->isa,
2879 			G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1), 0);
2880 	output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
2881 	output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
2882 	output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
2883 }
2884 #endif
2885