1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/ralloc.h"
26 #include "util/half_float.h"
27 #include "util/bitscan.h"
28
29 #include "ppir.h"
30 #include "codegen.h"
31 #include "lima_context.h"
32
encode_swizzle(uint8_t * swizzle,int shift,int dest_shift)33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
34 {
35 unsigned ret = 0;
36 for (int i = 0; i < 4; i++)
37 ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
38 return ret;
39 }
40
get_scl_reg_index(ppir_src * src,int component)41 static int get_scl_reg_index(ppir_src *src, int component)
42 {
43 int ret = ppir_target_get_src_reg_index(src);
44 ret += src->swizzle[component];
45 return ret;
46 }
47
ppir_codegen_encode_varying(ppir_node * node,void * code)48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
49 {
50 ppir_codegen_field_varying *f = code;
51 ppir_load_node *load = ppir_node_to_load(node);
52 ppir_dest *dest = &load->dest;
53 int index = ppir_target_get_dest_reg_index(dest);
54 int num_components = load->num_components;
55
56 if (node->op != ppir_op_load_coords_reg) {
57 assert(node->op == ppir_op_load_varying ||
58 node->op == ppir_op_load_coords ||
59 node->op == ppir_op_load_fragcoord ||
60 node->op == ppir_op_load_pointcoord ||
61 node->op == ppir_op_load_frontface);
62
63 f->imm.dest = index >> 2;
64 f->imm.mask = dest->write_mask << (index & 0x3);
65
66 int alignment = num_components == 3 ? 3 : num_components - 1;
67 f->imm.alignment = alignment;
68
69 if (load->num_src) {
70 index = ppir_target_get_src_reg_index(&load->src);
71 f->imm.offset_vector = index >> 2;
72 f->imm.offset_scalar = index & 0x3;
73 } else
74 f->imm.offset_vector = 0xf;
75
76 if (alignment == 3)
77 f->imm.index = load->index >> 2;
78 else
79 f->imm.index = load->index >> alignment;
80
81 switch (node->op) {
82 case ppir_op_load_fragcoord:
83 f->imm.source_type = 2;
84 f->imm.perspective = 3;
85 break;
86 case ppir_op_load_pointcoord:
87 f->imm.source_type = 3;
88 break;
89 case ppir_op_load_frontface:
90 f->imm.source_type = 3;
91 f->imm.perspective = 1;
92 break;
93 case ppir_op_load_coords:
94 /* num_components == 3 implies cubemap as we don't support 3D textures */
95 f->imm.source_type = num_components == 3 ? 2 : 0;
96 break;
97 default:
98 break;
99 }
100 }
101 else { /* node->op == ppir_op_load_coords_reg */
102 f->reg.dest = index >> 2;
103 f->reg.mask = dest->write_mask << (index & 0x3);
104
105 if (load->num_src) {
106 /* num_components == 3 implies cubemap as we don't support 3D textures */
107 if (num_components == 3) {
108 f->reg.source_type = 2;
109 f->reg.perspective = 1;
110 } else {
111 f->reg.source_type = 1;
112 }
113 ppir_src *src = &load->src;
114 index = ppir_target_get_src_reg_index(src);
115 f->reg.source = index >> 2;
116 f->reg.negate = src->negate;
117 f->reg.absolute = src->absolute;
118 f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
119 }
120 }
121 }
122
ppir_codegen_encode_texld(ppir_node * node,void * code)123 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
124 {
125 ppir_codegen_field_sampler *f = code;
126 ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
127
128 f->index = ldtex->sampler;
129
130 f->lod_bias_en = ldtex->lod_bias_en;
131 f->explicit_lod = ldtex->explicit_lod;
132 if (ldtex->lod_bias_en)
133 f->lod_bias = ppir_target_get_src_reg_index(&ldtex->src[1]);
134
135 switch (ldtex->sampler_dim) {
136 case GLSL_SAMPLER_DIM_2D:
137 case GLSL_SAMPLER_DIM_RECT:
138 case GLSL_SAMPLER_DIM_EXTERNAL:
139 f->type = ppir_codegen_sampler_type_2d;
140 break;
141 case GLSL_SAMPLER_DIM_CUBE:
142 f->type = ppir_codegen_sampler_type_cube;
143 break;
144 default:
145 break;
146 }
147
148 f->offset_en = 0;
149 f->unknown_2 = 0x39001;
150 }
151
ppir_codegen_encode_uniform(ppir_node * node,void * code)152 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
153 {
154 ppir_codegen_field_uniform *f = code;
155 ppir_load_node *load = ppir_node_to_load(node);
156
157 switch (node->op) {
158 case ppir_op_load_uniform:
159 f->source = ppir_codegen_uniform_src_uniform;
160 break;
161 case ppir_op_load_temp:
162 f->source = ppir_codegen_uniform_src_temporary;
163 break;
164 default:
165 assert(0);
166 }
167
168 /* Uniforms are always aligned to vec4 boundary */
169 f->alignment = 2;
170 f->index = load->index;
171
172 if (load->num_src) {
173 f->offset_en = 1;
174 f->offset_reg = ppir_target_get_src_reg_index(&load->src);
175 }
176 }
177
shift_to_op(int shift)178 static unsigned shift_to_op(int shift)
179 {
180 assert(shift >= -3 && shift <= 3);
181 return shift < 0 ? shift + 8 : shift;
182 }
183
ppir_codegen_encode_vec_mul(ppir_node * node,void * code)184 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
185 {
186 ppir_codegen_field_vec4_mul *f = code;
187 ppir_alu_node *alu = ppir_node_to_alu(node);
188
189 ppir_dest *dest = &alu->dest;
190 int dest_shift = 0;
191 if (dest->type != ppir_target_pipeline) {
192 int index = ppir_target_get_dest_reg_index(dest);
193 dest_shift = index & 0x3;
194 f->dest = index >> 2;
195 f->mask = dest->write_mask << dest_shift;
196 }
197 f->dest_modifier = dest->modifier;
198
199 switch (node->op) {
200 case ppir_op_mul:
201 f->op = shift_to_op(alu->shift);
202 break;
203 case ppir_op_mov:
204 f->op = ppir_codegen_vec4_mul_op_mov;
205 break;
206 case ppir_op_max:
207 f->op = ppir_codegen_vec4_mul_op_max;
208 break;
209 case ppir_op_min:
210 f->op = ppir_codegen_vec4_mul_op_min;
211 break;
212 case ppir_op_and:
213 f->op = ppir_codegen_vec4_mul_op_and;
214 break;
215 case ppir_op_or:
216 f->op = ppir_codegen_vec4_mul_op_or;
217 break;
218 case ppir_op_xor:
219 f->op = ppir_codegen_vec4_mul_op_xor;
220 break;
221 case ppir_op_gt:
222 f->op = ppir_codegen_vec4_mul_op_gt;
223 break;
224 case ppir_op_ge:
225 f->op = ppir_codegen_vec4_mul_op_ge;
226 break;
227 case ppir_op_eq:
228 f->op = ppir_codegen_vec4_mul_op_eq;
229 break;
230 case ppir_op_ne:
231 f->op = ppir_codegen_vec4_mul_op_ne;
232 break;
233 case ppir_op_not:
234 f->op = ppir_codegen_vec4_mul_op_not;
235 break;
236 default:
237 break;
238 }
239
240 ppir_src *src = alu->src;
241 int index = ppir_target_get_src_reg_index(src);
242 f->arg0_source = index >> 2;
243 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
244 f->arg0_absolute = src->absolute;
245 f->arg0_negate = src->negate;
246
247 if (alu->num_src == 2) {
248 src = alu->src + 1;
249 index = ppir_target_get_src_reg_index(src);
250 f->arg1_source = index >> 2;
251 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
252 f->arg1_absolute = src->absolute;
253 f->arg1_negate = src->negate;
254 }
255 }
256
ppir_codegen_encode_scl_mul(ppir_node * node,void * code)257 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
258 {
259 ppir_codegen_field_float_mul *f = code;
260 ppir_alu_node *alu = ppir_node_to_alu(node);
261
262 ppir_dest *dest = &alu->dest;
263 int dest_component = ffs(dest->write_mask) - 1;
264 assert(dest_component >= 0);
265
266 if (dest->type != ppir_target_pipeline) {
267 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
268 f->output_en = true;
269 }
270 f->dest_modifier = dest->modifier;
271
272 switch (node->op) {
273 case ppir_op_mul:
274 f->op = shift_to_op(alu->shift);
275 break;
276 case ppir_op_mov:
277 f->op = ppir_codegen_float_mul_op_mov;
278 break;
279 case ppir_op_max:
280 f->op = ppir_codegen_float_mul_op_max;
281 break;
282 case ppir_op_min:
283 f->op = ppir_codegen_float_mul_op_min;
284 break;
285 case ppir_op_and:
286 f->op = ppir_codegen_float_mul_op_and;
287 break;
288 case ppir_op_or:
289 f->op = ppir_codegen_float_mul_op_or;
290 break;
291 case ppir_op_xor:
292 f->op = ppir_codegen_float_mul_op_xor;
293 break;
294 case ppir_op_gt:
295 f->op = ppir_codegen_float_mul_op_gt;
296 break;
297 case ppir_op_ge:
298 f->op = ppir_codegen_float_mul_op_ge;
299 break;
300 case ppir_op_eq:
301 f->op = ppir_codegen_float_mul_op_eq;
302 break;
303 case ppir_op_ne:
304 f->op = ppir_codegen_float_mul_op_ne;
305 break;
306 case ppir_op_not:
307 f->op = ppir_codegen_float_mul_op_not;
308 break;
309 default:
310 break;
311 }
312
313 ppir_src *src = alu->src;
314 f->arg0_source = get_scl_reg_index(src, dest_component);
315 f->arg0_absolute = src->absolute;
316 f->arg0_negate = src->negate;
317
318 if (alu->num_src == 2) {
319 src = alu->src + 1;
320 f->arg1_source = get_scl_reg_index(src, dest_component);
321 f->arg1_absolute = src->absolute;
322 f->arg1_negate = src->negate;
323 }
324 }
325
ppir_codegen_encode_vec_add(ppir_node * node,void * code)326 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
327 {
328 ppir_codegen_field_vec4_acc *f = code;
329 ppir_alu_node *alu = ppir_node_to_alu(node);
330
331 ppir_dest *dest = &alu->dest;
332 int index = ppir_target_get_dest_reg_index(dest);
333 int dest_shift = index & 0x3;
334 f->dest = index >> 2;
335 f->mask = dest->write_mask << dest_shift;
336 f->dest_modifier = dest->modifier;
337
338 switch (node->op) {
339 case ppir_op_add:
340 f->op = ppir_codegen_vec4_acc_op_add;
341 break;
342 case ppir_op_mov:
343 f->op = ppir_codegen_vec4_acc_op_mov;
344 break;
345 case ppir_op_sum3:
346 f->op = ppir_codegen_vec4_acc_op_sum3;
347 dest_shift = 0;
348 break;
349 case ppir_op_sum4:
350 f->op = ppir_codegen_vec4_acc_op_sum4;
351 dest_shift = 0;
352 break;
353 case ppir_op_floor:
354 f->op = ppir_codegen_vec4_acc_op_floor;
355 break;
356 case ppir_op_ceil:
357 f->op = ppir_codegen_vec4_acc_op_ceil;
358 break;
359 case ppir_op_fract:
360 f->op = ppir_codegen_vec4_acc_op_fract;
361 break;
362 case ppir_op_gt:
363 f->op = ppir_codegen_vec4_acc_op_gt;
364 break;
365 case ppir_op_ge:
366 f->op = ppir_codegen_vec4_acc_op_ge;
367 break;
368 case ppir_op_eq:
369 f->op = ppir_codegen_vec4_acc_op_eq;
370 break;
371 case ppir_op_ne:
372 f->op = ppir_codegen_vec4_acc_op_ne;
373 break;
374 case ppir_op_select:
375 f->op = ppir_codegen_vec4_acc_op_sel;
376 break;
377 case ppir_op_max:
378 f->op = ppir_codegen_vec4_acc_op_max;
379 break;
380 case ppir_op_min:
381 f->op = ppir_codegen_vec4_acc_op_min;
382 break;
383 case ppir_op_ddx:
384 f->op = ppir_codegen_vec4_acc_op_dFdx;
385 break;
386 case ppir_op_ddy:
387 f->op = ppir_codegen_vec4_acc_op_dFdy;
388 break;
389 default:
390 break;
391 }
392
393 ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
394 index = ppir_target_get_src_reg_index(src);
395
396 if (src->type == ppir_target_pipeline &&
397 src->pipeline == ppir_pipeline_reg_vmul)
398 f->mul_in = true;
399 else
400 f->arg0_source = index >> 2;
401
402 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
403 f->arg0_absolute = src->absolute;
404 f->arg0_negate = src->negate;
405
406 if (++src < alu->src + alu->num_src) {
407 index = ppir_target_get_src_reg_index(src);
408 f->arg1_source = index >> 2;
409 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
410 f->arg1_absolute = src->absolute;
411 f->arg1_negate = src->negate;
412 }
413 }
414
ppir_codegen_encode_scl_add(ppir_node * node,void * code)415 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
416 {
417 ppir_codegen_field_float_acc *f = code;
418 ppir_alu_node *alu = ppir_node_to_alu(node);
419
420 ppir_dest *dest = &alu->dest;
421 int dest_component = ffs(dest->write_mask) - 1;
422 assert(dest_component >= 0);
423
424 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
425 f->output_en = true;
426 f->dest_modifier = dest->modifier;
427
428 switch (node->op) {
429 case ppir_op_add:
430 f->op = shift_to_op(alu->shift);
431 break;
432 case ppir_op_mov:
433 f->op = ppir_codegen_float_acc_op_mov;
434 break;
435 case ppir_op_max:
436 f->op = ppir_codegen_float_acc_op_max;
437 break;
438 case ppir_op_min:
439 f->op = ppir_codegen_float_acc_op_min;
440 break;
441 case ppir_op_floor:
442 f->op = ppir_codegen_float_acc_op_floor;
443 break;
444 case ppir_op_ceil:
445 f->op = ppir_codegen_float_acc_op_ceil;
446 break;
447 case ppir_op_fract:
448 f->op = ppir_codegen_float_acc_op_fract;
449 break;
450 case ppir_op_gt:
451 f->op = ppir_codegen_float_acc_op_gt;
452 break;
453 case ppir_op_ge:
454 f->op = ppir_codegen_float_acc_op_ge;
455 break;
456 case ppir_op_eq:
457 f->op = ppir_codegen_float_acc_op_eq;
458 break;
459 case ppir_op_ne:
460 f->op = ppir_codegen_float_acc_op_ne;
461 break;
462 case ppir_op_select:
463 f->op = ppir_codegen_float_acc_op_sel;
464 break;
465 case ppir_op_ddx:
466 f->op = ppir_codegen_float_acc_op_dFdx;
467 break;
468 case ppir_op_ddy:
469 f->op = ppir_codegen_float_acc_op_dFdy;
470 break;
471 default:
472 break;
473 }
474
475 ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
476 if (src->type == ppir_target_pipeline &&
477 src->pipeline == ppir_pipeline_reg_fmul)
478 f->mul_in = true;
479 else
480 f->arg0_source = get_scl_reg_index(src, dest_component);
481 f->arg0_absolute = src->absolute;
482 f->arg0_negate = src->negate;
483
484 if (++src < alu->src + alu->num_src) {
485 f->arg1_source = get_scl_reg_index(src, dest_component);
486 f->arg1_absolute = src->absolute;
487 f->arg1_negate = src->negate;
488 }
489 }
490
ppir_codegen_encode_combine(ppir_node * node,void * code)491 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
492 {
493 ppir_codegen_field_combine *f = code;
494 ppir_alu_node *alu = ppir_node_to_alu(node);
495
496 switch (node->op) {
497 case ppir_op_rsqrt:
498 case ppir_op_log2:
499 case ppir_op_exp2:
500 case ppir_op_rcp:
501 case ppir_op_sqrt:
502 case ppir_op_sin:
503 case ppir_op_cos:
504 {
505 f->scalar.dest_vec = false;
506 f->scalar.arg1_en = false;
507
508 ppir_dest *dest = &alu->dest;
509 int dest_component = ffs(dest->write_mask) - 1;
510 assert(dest_component >= 0);
511 f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
512 f->scalar.dest_modifier = dest->modifier;
513
514 ppir_src *src = alu->src;
515 f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
516 f->scalar.arg0_absolute = src->absolute;
517 f->scalar.arg0_negate = src->negate;
518
519 switch (node->op) {
520 case ppir_op_rsqrt:
521 f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
522 break;
523 case ppir_op_log2:
524 f->scalar.op = ppir_codegen_combine_scalar_op_log2;
525 break;
526 case ppir_op_exp2:
527 f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
528 break;
529 case ppir_op_rcp:
530 f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
531 break;
532 case ppir_op_sqrt:
533 f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
534 break;
535 case ppir_op_sin:
536 f->scalar.op = ppir_codegen_combine_scalar_op_sin;
537 break;
538 case ppir_op_cos:
539 f->scalar.op = ppir_codegen_combine_scalar_op_cos;
540 break;
541 default:
542 break;
543 }
544 break;
545 }
546 default:
547 break;
548 }
549 }
550
ppir_codegen_encode_store_temp(ppir_node * node,void * code)551 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
552 {
553 assert(node->op == ppir_op_store_temp);
554
555 ppir_codegen_field_temp_write *f = code;
556 ppir_store_node *snode = ppir_node_to_store(node);
557 int num_components = snode->num_components;
558
559 f->temp_write.dest = 0x03; // 11 - temporary
560 f->temp_write.source = snode->src.reg->index;
561
562 int alignment = num_components == 4 ? 2 : num_components - 1;
563 f->temp_write.alignment = alignment;
564 f->temp_write.index = snode->index << (2 - alignment);
565
566 f->temp_write.offset_reg = snode->index >> 2;
567 }
568
ppir_codegen_encode_const(ppir_const * constant,uint16_t * code)569 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
570 {
571 for (int i = 0; i < constant->num; i++)
572 code[i] = _mesa_float_to_half(constant->value[i].f);
573 }
574
ppir_codegen_encode_discard(ppir_node * node,void * code)575 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
576 {
577 ppir_codegen_field_branch *b = code;
578 assert(node->op == ppir_op_discard);
579
580 b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
581 b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
582 b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
583 }
584
ppir_codegen_encode_branch(ppir_node * node,void * code)585 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
586 {
587 ppir_codegen_field_branch *b = code;
588 ppir_branch_node *branch;
589 ppir_instr *target_instr;
590 ppir_block *target;
591 if (node->op == ppir_op_discard) {
592 ppir_codegen_encode_discard(node, code);
593 return;
594 }
595
596 assert(node->op == ppir_op_branch);
597 branch = ppir_node_to_branch(node);
598
599 b->branch.unknown_0 = 0x0;
600 b->branch.unknown_1 = 0x0;
601
602 if (branch->num_src == 2) {
603 b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
604 b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
605 b->branch.cond_gt = branch->cond_gt;
606 b->branch.cond_eq = branch->cond_eq;
607 b->branch.cond_lt = branch->cond_lt;
608 } else if (branch->num_src == 0) {
609 /* Unconditional branch */
610 b->branch.arg0_source = 0;
611 b->branch.arg1_source = 0;
612 b->branch.cond_gt = true;
613 b->branch.cond_eq = true;
614 b->branch.cond_lt = true;
615 } else {
616 assert(false);
617 }
618
619 target = branch->target;
620 while (list_is_empty(&target->instr_list)) {
621 if (!target->list.next)
622 break;
623 target = LIST_ENTRY(ppir_block, target->list.next, list);
624 }
625
626 assert(!list_is_empty(&target->instr_list));
627
628 target_instr = list_first_entry(&target->instr_list, ppir_instr, list);
629 b->branch.target = target_instr->offset - node->instr->offset;
630 b->branch.next_count = target_instr->encode_size;
631 }
632
633 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
634
635 static const ppir_codegen_instr_slot_encode_func
636 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
637 [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
638 [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
639 [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
640 [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
641 [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
642 [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
643 [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
644 [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
645 [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
646 [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
647 };
648
649 static const int ppir_codegen_field_size[] = {
650 34, 62, 41, 43, 30, 44, 31, 30, 41, 73
651 };
652
align_to_word(int size)653 static inline int align_to_word(int size)
654 {
655 return ((size + 0x1f) >> 5);
656 }
657
get_instr_encode_size(ppir_instr * instr)658 static int get_instr_encode_size(ppir_instr *instr)
659 {
660 int size = 0;
661
662 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
663 if (instr->slots[i])
664 size += ppir_codegen_field_size[i];
665 }
666
667 for (int i = 0; i < 2; i++) {
668 if (instr->constant[i].num)
669 size += 64;
670 }
671
672 return align_to_word(size) + 1;
673 }
674
bitcopy(void * dst,int dst_offset,void * src,int src_size)675 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
676 {
677 int off1 = dst_offset & 0x1f;
678 uint32_t *cpy_dst = dst, *cpy_src = src;
679
680 cpy_dst += (dst_offset >> 5);
681
682 if (off1) {
683 int off2 = 32 - off1;
684 int cpy_size = 0;
685 while (1) {
686 *cpy_dst |= *cpy_src << off1;
687 cpy_dst++;
688
689 cpy_size += off2;
690 if (cpy_size >= src_size)
691 break;
692
693 *cpy_dst |= *cpy_src >> off2;
694 cpy_src++;
695
696 cpy_size += off1;
697 if (cpy_size >= src_size)
698 break;
699 }
700 }
701 else
702 memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
703 }
704
encode_instr(ppir_instr * instr,void * code,void * last_code)705 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
706 {
707 int size = 0;
708 ppir_codegen_ctrl *ctrl = code;
709
710 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
711 if (instr->slots[i]) {
712 /* max field size (73), align to dword */
713 uint8_t output[12] = {0};
714
715 ppir_codegen_encode_slot[i](instr->slots[i], output);
716 bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
717
718 size += ppir_codegen_field_size[i];
719 ctrl->fields |= 1 << i;
720 }
721 }
722
723 if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
724 ctrl->sync = true;
725
726 if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
727 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
728 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
729 ctrl->sync = true;
730 }
731
732 if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
733 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
734 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
735 ctrl->sync = true;
736 }
737
738 for (int i = 0; i < 2; i++) {
739 if (instr->constant[i].num) {
740 uint16_t output[4] = {0};
741
742 ppir_codegen_encode_const(instr->constant + i, output);
743 bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
744
745 size += 64;
746 ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
747 }
748 }
749
750 size = align_to_word(size) + 1;
751
752 ctrl->count = size;
753 if (instr->is_end)
754 ctrl->stop = true;
755
756 if (last_code) {
757 ppir_codegen_ctrl *last_ctrl = last_code;
758 last_ctrl->next_count = size;
759 last_ctrl->prefetch = true;
760 }
761
762 return size;
763 }
764
ppir_codegen_print_prog(ppir_compiler * comp)765 static void ppir_codegen_print_prog(ppir_compiler *comp)
766 {
767 uint32_t *prog = comp->prog->shader;
768 unsigned offset = 0;
769
770 printf("========ppir codegen========\n");
771 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
772 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
773 printf("%03d (@%6d): ", instr->index, instr->offset);
774 int n = prog[0] & 0x1f;
775 for (int i = 0; i < n; i++) {
776 if (i && i % 6 == 0)
777 printf("\n ");
778 printf("%08x ", prog[i]);
779 }
780 printf("\n");
781 ppir_disassemble_instr(prog, offset, stdout);
782 prog += n;
783 offset += n;
784 }
785 }
786 printf("-----------------------\n");
787 }
788
ppir_codegen_prog(ppir_compiler * comp)789 bool ppir_codegen_prog(ppir_compiler *comp)
790 {
791 int size = 0;
792 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
793 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
794 instr->offset = size;
795 instr->encode_size = get_instr_encode_size(instr);
796 size += instr->encode_size;
797 }
798 }
799
800 uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
801 if (!prog)
802 return false;
803
804 uint32_t *code = prog, *last_code = NULL;
805 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
806 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
807 int offset = encode_instr(instr, code, last_code);
808 last_code = code;
809 code += offset;
810 }
811 }
812
813 if (comp->prog->shader)
814 ralloc_free(comp->prog->shader);
815
816 comp->prog->shader = prog;
817 comp->prog->state.shader_size = size * sizeof(uint32_t);
818
819 if (lima_debug & LIMA_DEBUG_PP)
820 ppir_codegen_print_prog(comp);
821
822 return true;
823 }
824