1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #include "sb_bc.h"
28 #include "sb_shader.h"
29 #include "sb_pass.h"
30
31 namespace r600_sb {
32
bc_builder(shader & s)33 bc_builder::bc_builder(shader &s)
34 : sh(s), ctx(s.get_ctx()), bb(ctx.hw_class_bit()), error(0) {}
35
build()36 int bc_builder::build() {
37
38 container_node *root = sh.root;
39 int cf_cnt = 0;
40
41 // FIXME reserve total size to avoid reallocs
42
43 for (node_iterator it = root->begin(), end = root->end();
44 it != end; ++it) {
45
46 cf_node *cf = static_cast<cf_node*>(*it);
47 assert(cf->is_cf_inst() || cf->is_alu_clause() || cf->is_fetch_clause());
48
49 cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
50
51 cf->bc.id = cf_cnt++;
52
53 if (flags & CF_ALU) {
54 if (cf->bc.is_alu_extended())
55 cf_cnt++;
56 }
57 }
58
59 bb.set_size(cf_cnt << 1);
60 bb.seek(cf_cnt << 1);
61
62 unsigned cf_pos = 0;
63
64 for (node_iterator I = root->begin(), end = root->end();
65 I != end; ++I) {
66
67 cf_node *cf = static_cast<cf_node*>(*I);
68 cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
69
70 if (flags & CF_ALU) {
71 bb.seek(bb.ndw());
72 cf->bc.addr = bb.ndw() >> 1;
73 build_alu_clause(cf);
74 cf->bc.count = (bb.ndw() >> 1) - cf->bc.addr - 1;
75 } else if (flags & CF_FETCH) {
76 bb.align(4);
77 bb.seek(bb.ndw());
78 cf->bc.addr = bb.ndw() >> 1;
79 build_fetch_clause(cf);
80 cf->bc.count = (((bb.ndw() >> 1) - cf->bc.addr) >> 1) - 1;
81 } else if (cf->jump_target) {
82 cf->bc.addr = cf->jump_target->bc.id;
83 if (cf->jump_after_target)
84 cf->bc.addr += 1;
85 }
86
87 bb.seek(cf_pos);
88 build_cf(cf);
89 cf_pos = bb.get_pos();
90 }
91
92 return 0;
93 }
94
build_alu_clause(cf_node * n)95 int bc_builder::build_alu_clause(cf_node* n) {
96 for (node_iterator I = n->begin(), E = n->end();
97 I != E; ++I) {
98
99 alu_group_node *g = static_cast<alu_group_node*>(*I);
100 assert(g->is_valid());
101
102 build_alu_group(g);
103 }
104 return 0;
105 }
106
build_alu_group(alu_group_node * n)107 int bc_builder::build_alu_group(alu_group_node* n) {
108
109 for (node_iterator I = n->begin(), E = n->end();
110 I != E; ++I) {
111
112 alu_node *a = static_cast<alu_node*>(*I);
113 assert(a->is_valid());
114 build_alu(a);
115 }
116
117 for(int i = 0, ls = n->literals.size(); i < ls; ++i) {
118 bb << n->literals.at(i).u;
119 }
120
121 bb.align(2);
122 bb.seek(bb.ndw());
123
124 return 0;
125 }
126
build_fetch_clause(cf_node * n)127 int bc_builder::build_fetch_clause(cf_node* n) {
128 for (node_iterator I = n->begin(), E = n->end();
129 I != E; ++I) {
130 fetch_node *f = static_cast<fetch_node*>(*I);
131
132 if (f->bc.op_ptr->flags & FF_GDS)
133 build_fetch_gds(f);
134 else if (f->bc.op_ptr->flags & FF_MEM)
135 build_fetch_mem(f);
136 else if (f->bc.op_ptr->flags & FF_VTX)
137 build_fetch_vtx(f);
138 else
139 build_fetch_tex(f);
140 }
141 return 0;
142 }
143
144
build_cf(cf_node * n)145 int bc_builder::build_cf(cf_node* n) {
146 const bc_cf &bc = n->bc;
147 const cf_op_info *cfop = bc.op_ptr;
148
149 if (cfop->flags & CF_ALU)
150 return build_cf_alu(n);
151 if (cfop->flags & (CF_EXP | CF_MEM))
152 return build_cf_exp(n);
153
154 if (ctx.is_egcm()) {
155 bb << CF_WORD0_EGCM()
156 .ADDR(bc.addr)
157 .JUMPTABLE_SEL(bc.jumptable_sel);
158
159 if (ctx.is_evergreen())
160
161 bb << CF_WORD1_EG()
162 .BARRIER(bc.barrier)
163 .CF_CONST(bc.cf_const)
164 .CF_INST(ctx.cf_opcode(bc.op))
165 .COND(bc.cond)
166 .COUNT(bc.count)
167 .END_OF_PROGRAM(bc.end_of_program)
168 .POP_COUNT(bc.pop_count)
169 .VALID_PIXEL_MODE(bc.valid_pixel_mode)
170 .WHOLE_QUAD_MODE(bc.whole_quad_mode);
171
172 else //cayman
173
174 bb << CF_WORD1_CM()
175 .BARRIER(bc.barrier)
176 .CF_CONST(bc.cf_const)
177 .CF_INST(ctx.cf_opcode(bc.op))
178 .COND(bc.cond)
179 .COUNT(bc.count)
180 .POP_COUNT(bc.pop_count)
181 .VALID_PIXEL_MODE(bc.valid_pixel_mode);
182 } else {
183 bb << CF_WORD0_R6R7()
184 .ADDR(bc.addr);
185
186 assert(bc.count < ctx.max_fetch);
187
188 bb << CF_WORD1_R6R7()
189 .BARRIER(bc.barrier)
190 .CALL_COUNT(bc.call_count)
191 .CF_CONST(bc.cf_const)
192 .CF_INST(ctx.cf_opcode(bc.op))
193 .COND(bc.cond)
194 .COUNT(bc.count & 7)
195 .COUNT_3(bc.count >> 3)
196 .END_OF_PROGRAM(bc.end_of_program)
197 .POP_COUNT(bc.pop_count)
198 .VALID_PIXEL_MODE(bc.valid_pixel_mode)
199 .WHOLE_QUAD_MODE(bc.whole_quad_mode);
200 }
201
202 return 0;
203 }
204
build_cf_alu(cf_node * n)205 int bc_builder::build_cf_alu(cf_node* n) {
206 const bc_cf &bc = n->bc;
207
208 assert(bc.count < 128);
209
210 if (n->bc.is_alu_extended()) {
211 assert(ctx.is_egcm());
212
213 bb << CF_ALU_WORD0_EXT_EGCM()
214 .KCACHE_BANK2(bc.kc[2].bank)
215 .KCACHE_BANK3(bc.kc[3].bank)
216 .KCACHE_BANK_INDEX_MODE0(bc.kc[0].index_mode)
217 .KCACHE_BANK_INDEX_MODE1(bc.kc[1].index_mode)
218 .KCACHE_BANK_INDEX_MODE2(bc.kc[2].index_mode)
219 .KCACHE_BANK_INDEX_MODE3(bc.kc[3].index_mode)
220 .KCACHE_MODE2(bc.kc[2].mode);
221
222 bb << CF_ALU_WORD1_EXT_EGCM()
223 .BARRIER(bc.barrier)
224 .CF_INST(ctx.cf_opcode(CF_OP_ALU_EXT))
225 .KCACHE_ADDR2(bc.kc[2].addr)
226 .KCACHE_ADDR3(bc.kc[3].addr)
227 .KCACHE_MODE3(bc.kc[3].mode);
228 }
229
230 bb << CF_ALU_WORD0_ALL()
231 .ADDR(bc.addr)
232 .KCACHE_BANK0(bc.kc[0].bank)
233 .KCACHE_BANK1(bc.kc[1].bank)
234 .KCACHE_MODE0(bc.kc[0].mode);
235
236 assert(bc.count < 128);
237
238 if (ctx.is_r600())
239 bb << CF_ALU_WORD1_R6()
240 .BARRIER(bc.barrier)
241 .CF_INST(ctx.cf_opcode(bc.op))
242 .COUNT(bc.count)
243 .KCACHE_ADDR0(bc.kc[0].addr)
244 .KCACHE_ADDR1(bc.kc[1].addr)
245 .KCACHE_MODE1(bc.kc[1].mode)
246 .USES_WATERFALL(bc.uses_waterfall)
247 .WHOLE_QUAD_MODE(bc.whole_quad_mode);
248 else
249 bb << CF_ALU_WORD1_R7EGCM()
250 .ALT_CONST(bc.alt_const)
251 .BARRIER(bc.barrier)
252 .CF_INST(ctx.cf_opcode(bc.op))
253 .COUNT(bc.count)
254 .KCACHE_ADDR0(bc.kc[0].addr)
255 .KCACHE_ADDR1(bc.kc[1].addr)
256 .KCACHE_MODE1(bc.kc[1].mode)
257 .WHOLE_QUAD_MODE(bc.whole_quad_mode);
258
259 return 0;
260 }
261
build_cf_exp(cf_node * n)262 int bc_builder::build_cf_exp(cf_node* n) {
263 const bc_cf &bc = n->bc;
264 const cf_op_info *cfop = bc.op_ptr;
265
266 if (cfop->flags & CF_RAT) {
267 assert(ctx.is_egcm());
268
269 bb << CF_ALLOC_EXPORT_WORD0_RAT_EGCM()
270 .ELEM_SIZE(bc.elem_size)
271 .INDEX_GPR(bc.index_gpr)
272 .RAT_ID(bc.rat_id)
273 .RAT_INDEX_MODE(bc.rat_index_mode)
274 .RAT_INST(bc.rat_inst)
275 .RW_GPR(bc.rw_gpr)
276 .RW_REL(bc.rw_rel)
277 .TYPE(bc.type);
278 } else {
279
280 bb << CF_ALLOC_EXPORT_WORD0_ALL()
281 .ARRAY_BASE(bc.array_base)
282 .ELEM_SIZE(bc.elem_size)
283 .INDEX_GPR(bc.index_gpr)
284 .RW_GPR(bc.rw_gpr)
285 .RW_REL(bc.rw_rel)
286 .TYPE(bc.type);
287 }
288
289 if (cfop->flags & CF_EXP) {
290
291 if (!ctx.is_egcm())
292 bb << CF_ALLOC_EXPORT_WORD1_SWIZ_R6R7()
293 .BARRIER(bc.barrier)
294 .BURST_COUNT(bc.burst_count)
295 .CF_INST(ctx.cf_opcode(bc.op))
296 .END_OF_PROGRAM(bc.end_of_program)
297 .SEL_X(bc.sel[0])
298 .SEL_Y(bc.sel[1])
299 .SEL_Z(bc.sel[2])
300 .SEL_W(bc.sel[3])
301 .VALID_PIXEL_MODE(bc.valid_pixel_mode)
302 .WHOLE_QUAD_MODE(bc.whole_quad_mode);
303
304 else if (ctx.is_evergreen())
305 bb << CF_ALLOC_EXPORT_WORD1_SWIZ_EG()
306 .BARRIER(bc.barrier)
307 .BURST_COUNT(bc.burst_count)
308 .CF_INST(ctx.cf_opcode(bc.op))
309 .END_OF_PROGRAM(bc.end_of_program)
310 .MARK(bc.mark)
311 .SEL_X(bc.sel[0])
312 .SEL_Y(bc.sel[1])
313 .SEL_Z(bc.sel[2])
314 .SEL_W(bc.sel[3])
315 .VALID_PIXEL_MODE(bc.valid_pixel_mode);
316
317 else // cayman
318 bb << CF_ALLOC_EXPORT_WORD1_SWIZ_CM()
319 .BARRIER(bc.barrier)
320 .BURST_COUNT(bc.burst_count)
321 .CF_INST(ctx.cf_opcode(bc.op))
322 .MARK(bc.mark)
323 .SEL_X(bc.sel[0])
324 .SEL_Y(bc.sel[1])
325 .SEL_Z(bc.sel[2])
326 .SEL_W(bc.sel[3])
327 .VALID_PIXEL_MODE(bc.valid_pixel_mode);
328
329 } else if (cfop->flags & CF_MEM) {
330 return build_cf_mem(n);
331 }
332
333 return 0;
334 }
335
build_cf_mem(cf_node * n)336 int bc_builder::build_cf_mem(cf_node* n) {
337 const bc_cf &bc = n->bc;
338
339 if (!ctx.is_egcm())
340 bb << CF_ALLOC_EXPORT_WORD1_BUF_R6R7()
341 .ARR_SIZE(bc.array_size)
342 .BARRIER(bc.barrier)
343 .BURST_COUNT(bc.burst_count)
344 .CF_INST(ctx.cf_opcode(bc.op))
345 .COMP_MASK(bc.comp_mask)
346 .END_OF_PROGRAM(bc.end_of_program)
347 .VALID_PIXEL_MODE(bc.valid_pixel_mode)
348 .WHOLE_QUAD_MODE(bc.whole_quad_mode);
349
350 else if (ctx.is_evergreen())
351 bb << CF_ALLOC_EXPORT_WORD1_BUF_EG()
352 .ARR_SIZE(bc.array_size)
353 .BARRIER(bc.barrier)
354 .BURST_COUNT(bc.burst_count)
355 .CF_INST(ctx.cf_opcode(bc.op))
356 .COMP_MASK(bc.comp_mask)
357 .END_OF_PROGRAM(bc.end_of_program)
358 .MARK(bc.mark)
359 .VALID_PIXEL_MODE(bc.valid_pixel_mode);
360
361 else // cayman
362 bb << CF_ALLOC_EXPORT_WORD1_BUF_CM()
363 .ARR_SIZE(bc.array_size)
364 .BARRIER(bc.barrier)
365 .BURST_COUNT(bc.burst_count)
366 .CF_INST(ctx.cf_opcode(bc.op))
367 .COMP_MASK(bc.comp_mask)
368 .MARK(bc.mark)
369 .VALID_PIXEL_MODE(bc.valid_pixel_mode);
370
371 return 0;
372 }
373
build_alu(alu_node * n)374 int bc_builder::build_alu(alu_node* n) {
375 const bc_alu &bc = n->bc;
376 const alu_op_info *aop = bc.op_ptr;
377
378 if (n->bc.op_ptr->flags & AF_LDS) {
379 assert(ctx.is_egcm());
380 bb << ALU_WORD0_LDS_IDX_OP_EGCM()
381 .SRC0_SEL(bc.src[0].sel)
382 .SRC0_REL(bc.src[0].rel)
383 .SRC0_CHAN(bc.src[0].chan)
384 .IDX_OFFSET_4((bc.lds_idx_offset >> 4) & 1)
385 .SRC1_SEL(bc.src[1].sel)
386 .SRC1_REL(bc.src[1].rel)
387 .SRC1_CHAN(bc.src[1].chan)
388 .IDX_OFFSET_5((bc.lds_idx_offset >> 5) & 1)
389 .INDEX_MODE(bc.index_mode)
390 .PRED_SEL(bc.pred_sel)
391 .LAST(bc.last);
392
393 bb << ALU_WORD1_LDS_IDX_OP_EGCM()
394 .SRC2_SEL(bc.src[2].sel)
395 .SRC2_REL(bc.src[2].rel)
396 .SRC2_CHAN(bc.src[2].chan)
397 .IDX_OFFSET_1((bc.lds_idx_offset >> 1) & 1)
398 .ALU_INST(ctx.alu_opcode(ALU_OP3_LDS_IDX_OP))
399 .BANK_SWIZZLE(bc.bank_swizzle)
400 .LDS_OP((bc.op_ptr->opcode[1] >> 8) & 0xff)
401 .IDX_OFFSET_0((bc.lds_idx_offset >> 0) & 1)
402 .IDX_OFFSET_2((bc.lds_idx_offset >> 2) & 1)
403 .DST_CHAN(bc.dst_chan)
404 .IDX_OFFSET_3((bc.lds_idx_offset >> 3) & 1);
405
406 return 0;
407 }
408
409 bb << ALU_WORD0_ALL()
410 .INDEX_MODE(bc.index_mode)
411 .LAST(bc.last)
412 .PRED_SEL(bc.pred_sel)
413 .SRC0_SEL(bc.src[0].sel)
414 .SRC0_CHAN(bc.src[0].chan)
415 .SRC0_NEG(bc.src[0].neg)
416 .SRC0_REL(bc.src[0].rel)
417 .SRC1_SEL(bc.src[1].sel)
418 .SRC1_CHAN(bc.src[1].chan)
419 .SRC1_NEG(bc.src[1].neg)
420 .SRC1_REL(bc.src[1].rel);
421
422 if (aop->src_count<3) {
423 if (ctx.is_r600())
424 bb << ALU_WORD1_OP2_R6()
425 .ALU_INST(ctx.alu_opcode(bc.op))
426 .BANK_SWIZZLE(bc.bank_swizzle)
427 .CLAMP(bc.clamp)
428 .DST_GPR(bc.dst_gpr)
429 .DST_CHAN(bc.dst_chan)
430 .DST_REL(bc.dst_rel)
431 .FOG_MERGE(bc.fog_merge)
432 .OMOD(bc.omod)
433 .SRC0_ABS(bc.src[0].abs)
434 .SRC1_ABS(bc.src[1].abs)
435 .UPDATE_EXEC_MASK(bc.update_exec_mask)
436 .UPDATE_PRED(bc.update_pred)
437 .WRITE_MASK(bc.write_mask);
438 else {
439
440 if (ctx.is_cayman() && (aop->flags & AF_MOVA)) {
441
442 bb << ALU_WORD1_OP2_MOVA_CM()
443 .ALU_INST(ctx.alu_opcode(bc.op))
444 .BANK_SWIZZLE(bc.bank_swizzle)
445 .CLAMP(bc.clamp)
446 .MOVA_DST(bc.dst_gpr)
447 .DST_CHAN(bc.dst_chan)
448 .DST_REL(bc.dst_rel)
449 .OMOD(bc.omod)
450 .UPDATE_EXEC_MASK(bc.update_exec_mask)
451 .UPDATE_PRED(bc.update_pred)
452 .WRITE_MASK(bc.write_mask)
453 .SRC0_ABS(bc.src[0].abs)
454 .SRC1_ABS(bc.src[1].abs);
455
456 } else if (ctx.is_cayman() && (aop->flags & (AF_PRED|AF_KILL))) {
457 bb << ALU_WORD1_OP2_EXEC_MASK_CM()
458 .ALU_INST(ctx.alu_opcode(bc.op))
459 .BANK_SWIZZLE(bc.bank_swizzle)
460 .CLAMP(bc.clamp)
461 .DST_CHAN(bc.dst_chan)
462 .DST_REL(bc.dst_rel)
463 .EXECUTE_MASK_OP(bc.omod)
464 .UPDATE_EXEC_MASK(bc.update_exec_mask)
465 .UPDATE_PRED(bc.update_pred)
466 .WRITE_MASK(bc.write_mask)
467 .SRC0_ABS(bc.src[0].abs)
468 .SRC1_ABS(bc.src[1].abs);
469
470 } else
471 bb << ALU_WORD1_OP2_R7EGCM()
472 .ALU_INST(ctx.alu_opcode(bc.op))
473 .BANK_SWIZZLE(bc.bank_swizzle)
474 .CLAMP(bc.clamp)
475 .DST_GPR(bc.dst_gpr)
476 .DST_CHAN(bc.dst_chan)
477 .DST_REL(bc.dst_rel)
478 .OMOD(bc.omod)
479 .UPDATE_EXEC_MASK(bc.update_exec_mask)
480 .UPDATE_PRED(bc.update_pred)
481 .WRITE_MASK(bc.write_mask)
482 .SRC0_ABS(bc.src[0].abs)
483 .SRC1_ABS(bc.src[1].abs);
484
485 }
486 } else
487 bb << ALU_WORD1_OP3_ALL()
488 .ALU_INST(ctx.alu_opcode(bc.op))
489 .BANK_SWIZZLE(bc.bank_swizzle)
490 .CLAMP(bc.clamp)
491 .DST_GPR(bc.dst_gpr)
492 .DST_CHAN(bc.dst_chan)
493 .DST_REL(bc.dst_rel)
494 .SRC2_SEL(bc.src[2].sel)
495 .SRC2_CHAN(bc.src[2].chan)
496 .SRC2_NEG(bc.src[2].neg)
497 .SRC2_REL(bc.src[2].rel);
498 return 0;
499 }
500
build_fetch_tex(fetch_node * n)501 int bc_builder::build_fetch_tex(fetch_node* n) {
502 const bc_fetch &bc = n->bc;
503 const fetch_op_info *fop = bc.op_ptr;
504
505 assert(!(fop->flags & FF_VTX));
506
507 if (ctx.is_r600())
508 bb << TEX_WORD0_R6()
509 .BC_FRAC_MODE(bc.bc_frac_mode)
510 .FETCH_WHOLE_QUAD(bc.fetch_whole_quad)
511 .RESOURCE_ID(bc.resource_id)
512 .SRC_GPR(bc.src_gpr)
513 .SRC_REL(bc.src_rel)
514 .TEX_INST(ctx.fetch_opcode(bc.op));
515
516 else if (ctx.is_r700())
517 bb << TEX_WORD0_R7()
518 .ALT_CONST(bc.alt_const)
519 .BC_FRAC_MODE(bc.bc_frac_mode)
520 .FETCH_WHOLE_QUAD(bc.fetch_whole_quad)
521 .RESOURCE_ID(bc.resource_id)
522 .SRC_GPR(bc.src_gpr)
523 .SRC_REL(bc.src_rel)
524 .TEX_INST(ctx.fetch_opcode(bc.op));
525
526 else
527 bb << TEX_WORD0_EGCM()
528 .ALT_CONST(bc.alt_const)
529 .FETCH_WHOLE_QUAD(bc.fetch_whole_quad)
530 .INST_MOD(bc.inst_mod)
531 .RESOURCE_ID(bc.resource_id)
532 .RESOURCE_INDEX_MODE(bc.resource_index_mode)
533 .SAMPLER_INDEX_MODE(bc.sampler_index_mode)
534 .SRC_GPR(bc.src_gpr)
535 .SRC_REL(bc.src_rel)
536 .TEX_INST(ctx.fetch_opcode(bc.op));
537
538 bb << TEX_WORD1_ALL()
539 .COORD_TYPE_X(bc.coord_type[0])
540 .COORD_TYPE_Y(bc.coord_type[1])
541 .COORD_TYPE_Z(bc.coord_type[2])
542 .COORD_TYPE_W(bc.coord_type[3])
543 .DST_GPR(bc.dst_gpr)
544 .DST_REL(bc.dst_rel)
545 .DST_SEL_X(bc.dst_sel[0])
546 .DST_SEL_Y(bc.dst_sel[1])
547 .DST_SEL_Z(bc.dst_sel[2])
548 .DST_SEL_W(bc.dst_sel[3])
549 .LOD_BIAS(bc.lod_bias);
550
551 bb << TEX_WORD2_ALL()
552 .OFFSET_X(bc.offset[0])
553 .OFFSET_Y(bc.offset[1])
554 .OFFSET_Z(bc.offset[2])
555 .SAMPLER_ID(bc.sampler_id)
556 .SRC_SEL_X(bc.src_sel[0])
557 .SRC_SEL_Y(bc.src_sel[1])
558 .SRC_SEL_Z(bc.src_sel[2])
559 .SRC_SEL_W(bc.src_sel[3]);
560
561 bb << 0;
562 return 0;
563 }
564
build_fetch_gds(fetch_node * n)565 int bc_builder::build_fetch_gds(fetch_node *n) {
566 const bc_fetch &bc = n->bc;
567 const fetch_op_info *fop = bc.op_ptr;
568 unsigned gds_op = (ctx.fetch_opcode(bc.op) >> 8) & 0x3f;
569 unsigned mem_op = 4;
570 assert(fop->flags & FF_GDS);
571
572 if (bc.op == FETCH_OP_TF_WRITE) {
573 mem_op = 5;
574 gds_op = 0;
575 }
576
577 bb << MEM_GDS_WORD0_EGCM()
578 .MEM_INST(2)
579 .MEM_OP(mem_op)
580 .SRC_GPR(bc.src_gpr)
581 .SRC_SEL_X(bc.src_sel[0])
582 .SRC_SEL_Y(bc.src_sel[1])
583 .SRC_SEL_Z(bc.src_sel[2]);
584
585 bb << MEM_GDS_WORD1_EGCM()
586 .DST_GPR(bc.dst_gpr)
587 .DST_REL_MODE(bc.dst_rel)
588 .GDS_OP(gds_op)
589 .SRC_GPR(bc.src2_gpr)
590 .UAV_INDEX_MODE(bc.uav_index_mode)
591 .UAV_ID(bc.uav_id)
592 .ALLOC_CONSUME(bc.alloc_consume)
593 .BCAST_FIRST_REQ(bc.bcast_first_req);
594
595 bb << MEM_GDS_WORD2_EGCM()
596 .DST_SEL_X(bc.dst_sel[0])
597 .DST_SEL_Y(bc.dst_sel[1])
598 .DST_SEL_Z(bc.dst_sel[2])
599 .DST_SEL_W(bc.dst_sel[3]);
600
601 bb << 0;
602 return 0;
603 }
604
build_fetch_vtx(fetch_node * n)605 int bc_builder::build_fetch_vtx(fetch_node* n) {
606 const bc_fetch &bc = n->bc;
607 const fetch_op_info *fop = bc.op_ptr;
608
609 assert(fop->flags & FF_VTX);
610
611 if (!ctx.is_cayman())
612 bb << VTX_WORD0_R6R7EG()
613 .BUFFER_ID(bc.resource_id)
614 .FETCH_TYPE(bc.fetch_type)
615 .FETCH_WHOLE_QUAD(bc.fetch_whole_quad)
616 .MEGA_FETCH_COUNT(bc.mega_fetch_count)
617 .SRC_GPR(bc.src_gpr)
618 .SRC_REL(bc.src_rel)
619 .SRC_SEL_X(bc.src_sel[0])
620 .VC_INST(ctx.fetch_opcode(bc.op));
621
622 else
623 bb << VTX_WORD0_CM()
624 .BUFFER_ID(bc.resource_id)
625 .COALESCED_READ(bc.coalesced_read)
626 .FETCH_TYPE(bc.fetch_type)
627 .FETCH_WHOLE_QUAD(bc.fetch_whole_quad)
628 .LDS_REQ(bc.lds_req)
629 .SRC_GPR(bc.src_gpr)
630 .SRC_REL(bc.src_rel)
631 .SRC_SEL_X(bc.src_sel[0])
632 .SRC_SEL_Y(bc.src_sel[1])
633 .STRUCTURED_READ(bc.structured_read)
634 .VC_INST(ctx.fetch_opcode(bc.op));
635
636 if (bc.op == FETCH_OP_SEMFETCH)
637 bb << VTX_WORD1_SEM_ALL()
638 .DATA_FORMAT(bc.data_format)
639 .DST_SEL_X(bc.dst_sel[0])
640 .DST_SEL_Y(bc.dst_sel[1])
641 .DST_SEL_Z(bc.dst_sel[2])
642 .DST_SEL_W(bc.dst_sel[3])
643 .FORMAT_COMP_ALL(bc.format_comp_all)
644 .NUM_FORMAT_ALL(bc.num_format_all)
645 .SEMANTIC_ID(bc.semantic_id)
646 .SRF_MODE_ALL(bc.srf_mode_all)
647 .USE_CONST_FIELDS(bc.use_const_fields);
648 else
649 bb << VTX_WORD1_GPR_ALL()
650 .DATA_FORMAT(bc.data_format)
651 .DST_GPR(bc.dst_gpr)
652 .DST_REL(bc.dst_rel)
653 .DST_SEL_X(bc.dst_sel[0])
654 .DST_SEL_Y(bc.dst_sel[1])
655 .DST_SEL_Z(bc.dst_sel[2])
656 .DST_SEL_W(bc.dst_sel[3])
657 .FORMAT_COMP_ALL(bc.format_comp_all)
658 .NUM_FORMAT_ALL(bc.num_format_all)
659 .SRF_MODE_ALL(bc.srf_mode_all)
660 .USE_CONST_FIELDS(bc.use_const_fields);
661
662 switch (ctx.hw_class) {
663 case HW_CLASS_R600:
664 bb << VTX_WORD2_R6()
665 .CONST_BUF_NO_STRIDE(bc.const_buf_no_stride)
666 .ENDIAN_SWAP(bc.endian_swap)
667 .MEGA_FETCH(bc.mega_fetch)
668 .OFFSET(bc.offset[0]);
669 break;
670 case HW_CLASS_R700:
671 bb << VTX_WORD2_R7()
672 .ALT_CONST(bc.alt_const)
673 .CONST_BUF_NO_STRIDE(bc.const_buf_no_stride)
674 .ENDIAN_SWAP(bc.endian_swap)
675 .MEGA_FETCH(bc.mega_fetch)
676 .OFFSET(bc.offset[0]);
677 break;
678 case HW_CLASS_EVERGREEN:
679 bb << VTX_WORD2_EG()
680 .ALT_CONST(bc.alt_const)
681 .BUFFER_INDEX_MODE(bc.resource_index_mode)
682 .CONST_BUF_NO_STRIDE(bc.const_buf_no_stride)
683 .ENDIAN_SWAP(bc.endian_swap)
684 .MEGA_FETCH(bc.mega_fetch)
685 .OFFSET(bc.offset[0]);
686 break;
687 case HW_CLASS_CAYMAN:
688 bb << VTX_WORD2_CM()
689 .ALT_CONST(bc.alt_const)
690 .BUFFER_INDEX_MODE(bc.resource_index_mode)
691 .CONST_BUF_NO_STRIDE(bc.const_buf_no_stride)
692 .ENDIAN_SWAP(bc.endian_swap)
693 .OFFSET(bc.offset[0]);
694 break;
695 default:
696 assert(!"unknown hw class");
697 return -1;
698 }
699
700 bb << 0;
701 return 0;
702 }
703
build_fetch_mem(fetch_node * n)704 int bc_builder::build_fetch_mem(fetch_node* n) {
705 const bc_fetch &bc = n->bc;
706 const fetch_op_info *fop = bc.op_ptr;
707
708 assert(fop->flags & FF_MEM);
709
710 bb << MEM_RD_WORD0_R7EGCM()
711 .MEM_INST(2)
712 .ELEM_SIZE(bc.elem_size)
713 .FETCH_WHOLE_QUAD(bc.fetch_whole_quad)
714 .MEM_OP(0)
715 .UNCACHED(bc.uncached)
716 .INDEXED(bc.indexed)
717 .SRC_SEL_Y(bc.src_sel[1])
718 .SRC_GPR(bc.src_gpr)
719 .SRC_REL(bc.src_rel)
720 .SRC_SEL_X(bc.src_sel[0])
721 .BURST_COUNT(bc.burst_count)
722 .LDS_REQ(bc.lds_req)
723 .COALESCED_READ(bc.coalesced_read);
724
725 bb << MEM_RD_WORD1_R7EGCM()
726 .DST_GPR(bc.dst_gpr)
727 .DST_REL(bc.dst_rel)
728 .DST_SEL_X(bc.dst_sel[0])
729 .DST_SEL_Y(bc.dst_sel[1])
730 .DST_SEL_Z(bc.dst_sel[2])
731 .DST_SEL_W(bc.dst_sel[3])
732 .DATA_FORMAT(bc.data_format)
733 .NUM_FORMAT_ALL(bc.num_format_all)
734 .FORMAT_COMP_ALL(bc.format_comp_all)
735 .SRF_MODE_ALL(bc.srf_mode_all);
736
737 bb << MEM_RD_WORD2_R7EGCM()
738 .ARRAY_BASE(bc.array_base)
739 .ENDIAN_SWAP(bc.endian_swap)
740 .ARR_SIZE(bc.array_size);
741
742 bb << 0;
743 return 0;
744 }
745
746 }
747