1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/ralloc.h"
26 #include "util/half_float.h"
27 #include "util/bitscan.h"
28
29 #include "ppir.h"
30 #include "codegen.h"
31 #include "lima_context.h"
32
encode_swizzle(uint8_t * swizzle,int shift,int dest_shift)33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
34 {
35 unsigned ret = 0;
36 for (int i = 0; i < 4; i++)
37 ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
38 return ret;
39 }
40
get_scl_reg_index(ppir_src * src,int component)41 static int get_scl_reg_index(ppir_src *src, int component)
42 {
43 int ret = ppir_target_get_src_reg_index(src);
44 ret += src->swizzle[component];
45 return ret;
46 }
47
ppir_codegen_encode_varying(ppir_node * node,void * code)48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
49 {
50 ppir_codegen_field_varying *f = code;
51 ppir_load_node *load = ppir_node_to_load(node);
52 ppir_dest *dest = &load->dest;
53 int index = ppir_target_get_dest_reg_index(dest);
54 int num_components = load->num_components;
55
56 if (node->op != ppir_op_load_coords_reg) {
57 assert(node->op == ppir_op_load_varying ||
58 node->op == ppir_op_load_coords ||
59 node->op == ppir_op_load_fragcoord ||
60 node->op == ppir_op_load_pointcoord ||
61 node->op == ppir_op_load_frontface);
62
63 f->imm.dest = index >> 2;
64 f->imm.mask = dest->write_mask << (index & 0x3);
65
66 int alignment = num_components == 3 ? 3 : num_components - 1;
67 f->imm.alignment = alignment;
68
69 if (load->num_src) {
70 index = ppir_target_get_src_reg_index(&load->src);
71 f->imm.offset_vector = index >> 2;
72 f->imm.offset_scalar = index & 0x3;
73 } else
74 f->imm.offset_vector = 0xf;
75
76 if (alignment == 3)
77 f->imm.index = load->index >> 2;
78 else
79 f->imm.index = load->index >> alignment;
80
81 switch (node->op) {
82 case ppir_op_load_fragcoord:
83 f->imm.source_type = 2;
84 f->imm.perspective = 3;
85 break;
86 case ppir_op_load_pointcoord:
87 f->imm.source_type = 3;
88 break;
89 case ppir_op_load_frontface:
90 f->imm.source_type = 3;
91 f->imm.perspective = 1;
92 break;
93 case ppir_op_load_coords:
94 if (load->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
95 f->imm.source_type = 2;
96
97 switch (load->perspective) {
98 case ppir_perspective_none:
99 f->imm.perspective = 0;
100 break;
101 case ppir_perspective_z:
102 f->imm.perspective = 2;
103 break;
104 case ppir_perspective_w:
105 f->imm.perspective = 3;
106 break;
107 }
108 break;
109 default:
110 break;
111 }
112 }
113 else { /* node->op == ppir_op_load_coords_reg */
114 f->reg.dest = index >> 2;
115 f->reg.mask = dest->write_mask << (index & 0x3);
116
117 if (load->num_src) {
118 if (load->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
119 f->reg.source_type = 2;
120 f->reg.perspective = 1;
121 } else {
122 f->reg.source_type = 1;
123 switch (load->perspective) {
124 case ppir_perspective_none:
125 f->reg.perspective = 0;
126 break;
127 case ppir_perspective_z:
128 f->reg.perspective = 2;
129 break;
130 case ppir_perspective_w:
131 f->reg.perspective = 3;
132 break;
133 }
134 }
135 ppir_src *src = &load->src;
136 index = ppir_target_get_src_reg_index(src);
137 f->reg.source = index >> 2;
138 f->reg.negate = src->negate;
139 f->reg.absolute = src->absolute;
140 f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
141 }
142 }
143 }
144
ppir_codegen_encode_texld(ppir_node * node,void * code)145 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
146 {
147 ppir_codegen_field_sampler *f = code;
148 ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
149
150 f->index = ldtex->sampler;
151
152 f->lod_bias_en = ldtex->lod_bias_en;
153 f->explicit_lod = ldtex->explicit_lod;
154 if (ldtex->lod_bias_en)
155 f->lod_bias = ppir_target_get_src_reg_index(&ldtex->src[1]);
156
157 switch (ldtex->sampler_dim) {
158 case GLSL_SAMPLER_DIM_2D:
159 case GLSL_SAMPLER_DIM_3D:
160 case GLSL_SAMPLER_DIM_RECT:
161 case GLSL_SAMPLER_DIM_EXTERNAL:
162 f->type = ppir_codegen_sampler_type_generic;
163 break;
164 case GLSL_SAMPLER_DIM_CUBE:
165 f->type = ppir_codegen_sampler_type_cube;
166 break;
167 default:
168 break;
169 }
170
171 f->offset_en = 0;
172 f->unknown_2 = 0x39001;
173 }
174
ppir_codegen_encode_uniform(ppir_node * node,void * code)175 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
176 {
177 ppir_codegen_field_uniform *f = code;
178 ppir_load_node *load = ppir_node_to_load(node);
179
180 switch (node->op) {
181 case ppir_op_load_uniform:
182 f->source = ppir_codegen_uniform_src_uniform;
183 break;
184 case ppir_op_load_temp:
185 f->source = ppir_codegen_uniform_src_temporary;
186 break;
187 default:
188 assert(0);
189 }
190
191 /* Uniforms are always aligned to vec4 boundary */
192 f->alignment = 2;
193 f->index = load->index;
194
195 if (load->num_src) {
196 f->offset_en = 1;
197 f->offset_reg = ppir_target_get_src_reg_index(&load->src);
198 }
199 }
200
ppir_codegen_get_outmod(ppir_outmod outmod)201 static ppir_codegen_outmod ppir_codegen_get_outmod(ppir_outmod outmod)
202 {
203 switch (outmod) {
204 case ppir_outmod_none:
205 return ppir_codegen_outmod_none;
206 case ppir_outmod_clamp_fraction:
207 return ppir_codegen_outmod_clamp_fraction;
208 case ppir_outmod_clamp_positive:
209 return ppir_codegen_outmod_clamp_positive;
210 case ppir_outmod_round:
211 return ppir_codegen_outmod_round;
212 default:
213 unreachable("invalid ppir_outmod");
214 }
215 }
216
shift_to_op(int shift)217 static unsigned shift_to_op(int shift)
218 {
219 assert(shift >= -3 && shift <= 3);
220 return shift < 0 ? shift + 8 : shift;
221 }
222
ppir_codegen_encode_vec_mul(ppir_node * node,void * code)223 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
224 {
225 ppir_codegen_field_vec4_mul *f = code;
226 ppir_alu_node *alu = ppir_node_to_alu(node);
227
228 ppir_dest *dest = &alu->dest;
229 int dest_shift = 0;
230 if (dest->type != ppir_target_pipeline) {
231 int index = ppir_target_get_dest_reg_index(dest);
232 dest_shift = index & 0x3;
233 f->dest = index >> 2;
234 f->mask = dest->write_mask << dest_shift;
235 }
236 f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
237
238 switch (node->op) {
239 case ppir_op_mul:
240 f->op = shift_to_op(alu->shift);
241 break;
242 case ppir_op_mov:
243 f->op = ppir_codegen_vec4_mul_op_mov;
244 break;
245 case ppir_op_max:
246 f->op = ppir_codegen_vec4_mul_op_max;
247 break;
248 case ppir_op_min:
249 f->op = ppir_codegen_vec4_mul_op_min;
250 break;
251 case ppir_op_and:
252 f->op = ppir_codegen_vec4_mul_op_and;
253 break;
254 case ppir_op_or:
255 f->op = ppir_codegen_vec4_mul_op_or;
256 break;
257 case ppir_op_xor:
258 f->op = ppir_codegen_vec4_mul_op_xor;
259 break;
260 case ppir_op_gt:
261 f->op = ppir_codegen_vec4_mul_op_gt;
262 break;
263 case ppir_op_ge:
264 f->op = ppir_codegen_vec4_mul_op_ge;
265 break;
266 case ppir_op_eq:
267 f->op = ppir_codegen_vec4_mul_op_eq;
268 break;
269 case ppir_op_ne:
270 f->op = ppir_codegen_vec4_mul_op_ne;
271 break;
272 case ppir_op_not:
273 f->op = ppir_codegen_vec4_mul_op_not;
274 break;
275 default:
276 break;
277 }
278
279 ppir_src *src = alu->src;
280 int index = ppir_target_get_src_reg_index(src);
281 f->arg0_source = index >> 2;
282 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
283 f->arg0_absolute = src->absolute;
284 f->arg0_negate = src->negate;
285
286 if (alu->num_src == 2) {
287 src = alu->src + 1;
288 index = ppir_target_get_src_reg_index(src);
289 f->arg1_source = index >> 2;
290 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
291 f->arg1_absolute = src->absolute;
292 f->arg1_negate = src->negate;
293 }
294 }
295
ppir_codegen_encode_scl_mul(ppir_node * node,void * code)296 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
297 {
298 ppir_codegen_field_float_mul *f = code;
299 ppir_alu_node *alu = ppir_node_to_alu(node);
300
301 ppir_dest *dest = &alu->dest;
302 int dest_component = ffs(dest->write_mask) - 1;
303 assert(dest_component >= 0);
304
305 if (dest->type != ppir_target_pipeline) {
306 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
307 f->output_en = true;
308 }
309 f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
310
311 switch (node->op) {
312 case ppir_op_mul:
313 f->op = shift_to_op(alu->shift);
314 break;
315 case ppir_op_mov:
316 f->op = ppir_codegen_float_mul_op_mov;
317 break;
318 case ppir_op_max:
319 f->op = ppir_codegen_float_mul_op_max;
320 break;
321 case ppir_op_min:
322 f->op = ppir_codegen_float_mul_op_min;
323 break;
324 case ppir_op_and:
325 f->op = ppir_codegen_float_mul_op_and;
326 break;
327 case ppir_op_or:
328 f->op = ppir_codegen_float_mul_op_or;
329 break;
330 case ppir_op_xor:
331 f->op = ppir_codegen_float_mul_op_xor;
332 break;
333 case ppir_op_gt:
334 f->op = ppir_codegen_float_mul_op_gt;
335 break;
336 case ppir_op_ge:
337 f->op = ppir_codegen_float_mul_op_ge;
338 break;
339 case ppir_op_eq:
340 f->op = ppir_codegen_float_mul_op_eq;
341 break;
342 case ppir_op_ne:
343 f->op = ppir_codegen_float_mul_op_ne;
344 break;
345 case ppir_op_not:
346 f->op = ppir_codegen_float_mul_op_not;
347 break;
348 default:
349 break;
350 }
351
352 ppir_src *src = alu->src;
353 f->arg0_source = get_scl_reg_index(src, dest_component);
354 f->arg0_absolute = src->absolute;
355 f->arg0_negate = src->negate;
356
357 if (alu->num_src == 2) {
358 src = alu->src + 1;
359 f->arg1_source = get_scl_reg_index(src, dest_component);
360 f->arg1_absolute = src->absolute;
361 f->arg1_negate = src->negate;
362 }
363 }
364
ppir_codegen_encode_vec_add(ppir_node * node,void * code)365 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
366 {
367 ppir_codegen_field_vec4_acc *f = code;
368 ppir_alu_node *alu = ppir_node_to_alu(node);
369
370 ppir_dest *dest = &alu->dest;
371 int index = ppir_target_get_dest_reg_index(dest);
372 int dest_shift = index & 0x3;
373 f->dest = index >> 2;
374 f->mask = dest->write_mask << dest_shift;
375 f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
376
377 switch (node->op) {
378 case ppir_op_add:
379 f->op = ppir_codegen_vec4_acc_op_add;
380 break;
381 case ppir_op_mov:
382 f->op = ppir_codegen_vec4_acc_op_mov;
383 break;
384 case ppir_op_sum3:
385 f->op = ppir_codegen_vec4_acc_op_sum3;
386 dest_shift = 0;
387 break;
388 case ppir_op_sum4:
389 f->op = ppir_codegen_vec4_acc_op_sum4;
390 dest_shift = 0;
391 break;
392 case ppir_op_floor:
393 f->op = ppir_codegen_vec4_acc_op_floor;
394 break;
395 case ppir_op_ceil:
396 f->op = ppir_codegen_vec4_acc_op_ceil;
397 break;
398 case ppir_op_fract:
399 f->op = ppir_codegen_vec4_acc_op_fract;
400 break;
401 case ppir_op_gt:
402 f->op = ppir_codegen_vec4_acc_op_gt;
403 break;
404 case ppir_op_ge:
405 f->op = ppir_codegen_vec4_acc_op_ge;
406 break;
407 case ppir_op_eq:
408 f->op = ppir_codegen_vec4_acc_op_eq;
409 break;
410 case ppir_op_ne:
411 f->op = ppir_codegen_vec4_acc_op_ne;
412 break;
413 case ppir_op_select:
414 f->op = ppir_codegen_vec4_acc_op_sel;
415 break;
416 case ppir_op_max:
417 f->op = ppir_codegen_vec4_acc_op_max;
418 break;
419 case ppir_op_min:
420 f->op = ppir_codegen_vec4_acc_op_min;
421 break;
422 case ppir_op_ddx:
423 f->op = ppir_codegen_vec4_acc_op_dFdx;
424 break;
425 case ppir_op_ddy:
426 f->op = ppir_codegen_vec4_acc_op_dFdy;
427 break;
428 default:
429 break;
430 }
431
432 ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
433 index = ppir_target_get_src_reg_index(src);
434
435 if (src->type == ppir_target_pipeline &&
436 src->pipeline == ppir_pipeline_reg_vmul)
437 f->mul_in = true;
438 else
439 f->arg0_source = index >> 2;
440
441 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
442 f->arg0_absolute = src->absolute;
443 f->arg0_negate = src->negate;
444
445 if (++src < alu->src + alu->num_src) {
446 index = ppir_target_get_src_reg_index(src);
447 f->arg1_source = index >> 2;
448 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
449 f->arg1_absolute = src->absolute;
450 f->arg1_negate = src->negate;
451 }
452 }
453
ppir_codegen_encode_scl_add(ppir_node * node,void * code)454 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
455 {
456 ppir_codegen_field_float_acc *f = code;
457 ppir_alu_node *alu = ppir_node_to_alu(node);
458
459 ppir_dest *dest = &alu->dest;
460 int dest_component = ffs(dest->write_mask) - 1;
461 assert(dest_component >= 0);
462
463 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
464 f->output_en = true;
465 f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
466
467 switch (node->op) {
468 case ppir_op_add:
469 f->op = shift_to_op(alu->shift);
470 break;
471 case ppir_op_mov:
472 f->op = ppir_codegen_float_acc_op_mov;
473 break;
474 case ppir_op_max:
475 f->op = ppir_codegen_float_acc_op_max;
476 break;
477 case ppir_op_min:
478 f->op = ppir_codegen_float_acc_op_min;
479 break;
480 case ppir_op_floor:
481 f->op = ppir_codegen_float_acc_op_floor;
482 break;
483 case ppir_op_ceil:
484 f->op = ppir_codegen_float_acc_op_ceil;
485 break;
486 case ppir_op_fract:
487 f->op = ppir_codegen_float_acc_op_fract;
488 break;
489 case ppir_op_gt:
490 f->op = ppir_codegen_float_acc_op_gt;
491 break;
492 case ppir_op_ge:
493 f->op = ppir_codegen_float_acc_op_ge;
494 break;
495 case ppir_op_eq:
496 f->op = ppir_codegen_float_acc_op_eq;
497 break;
498 case ppir_op_ne:
499 f->op = ppir_codegen_float_acc_op_ne;
500 break;
501 case ppir_op_select:
502 f->op = ppir_codegen_float_acc_op_sel;
503 break;
504 case ppir_op_ddx:
505 f->op = ppir_codegen_float_acc_op_dFdx;
506 break;
507 case ppir_op_ddy:
508 f->op = ppir_codegen_float_acc_op_dFdy;
509 break;
510 default:
511 break;
512 }
513
514 ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
515 if (src->type == ppir_target_pipeline &&
516 src->pipeline == ppir_pipeline_reg_fmul)
517 f->mul_in = true;
518 else
519 f->arg0_source = get_scl_reg_index(src, dest_component);
520 f->arg0_absolute = src->absolute;
521 f->arg0_negate = src->negate;
522
523 if (++src < alu->src + alu->num_src) {
524 f->arg1_source = get_scl_reg_index(src, dest_component);
525 f->arg1_absolute = src->absolute;
526 f->arg1_negate = src->negate;
527 }
528 }
529
ppir_codegen_encode_combine(ppir_node * node,void * code)530 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
531 {
532 ppir_codegen_field_combine *f = code;
533 ppir_alu_node *alu = ppir_node_to_alu(node);
534
535 switch (node->op) {
536 case ppir_op_rsqrt:
537 case ppir_op_log2:
538 case ppir_op_exp2:
539 case ppir_op_rcp:
540 case ppir_op_sqrt:
541 case ppir_op_sin:
542 case ppir_op_cos:
543 {
544 f->scalar.dest_vec = false;
545 f->scalar.arg1_en = false;
546
547 ppir_dest *dest = &alu->dest;
548 int dest_component = ffs(dest->write_mask) - 1;
549 assert(dest_component >= 0);
550 f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
551 f->scalar.dest_modifier = ppir_codegen_get_outmod(dest->modifier);
552
553 ppir_src *src = alu->src;
554 f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
555 f->scalar.arg0_absolute = src->absolute;
556 f->scalar.arg0_negate = src->negate;
557
558 switch (node->op) {
559 case ppir_op_rsqrt:
560 f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
561 break;
562 case ppir_op_log2:
563 f->scalar.op = ppir_codegen_combine_scalar_op_log2;
564 break;
565 case ppir_op_exp2:
566 f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
567 break;
568 case ppir_op_rcp:
569 f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
570 break;
571 case ppir_op_sqrt:
572 f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
573 break;
574 case ppir_op_sin:
575 f->scalar.op = ppir_codegen_combine_scalar_op_sin;
576 break;
577 case ppir_op_cos:
578 f->scalar.op = ppir_codegen_combine_scalar_op_cos;
579 break;
580 default:
581 break;
582 }
583 break;
584 }
585 default:
586 break;
587 }
588 }
589
ppir_codegen_encode_store_temp(ppir_node * node,void * code)590 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
591 {
592 assert(node->op == ppir_op_store_temp);
593
594 ppir_codegen_field_temp_write *f = code;
595 ppir_store_node *snode = ppir_node_to_store(node);
596 int num_components = snode->num_components;
597
598 f->temp_write.dest = 0x03; // 11 - temporary
599 f->temp_write.source = snode->src.reg->index;
600
601 int alignment = num_components == 4 ? 2 : num_components - 1;
602 f->temp_write.alignment = alignment;
603 f->temp_write.index = snode->index << (2 - alignment);
604
605 f->temp_write.offset_reg = snode->index >> 2;
606 }
607
ppir_codegen_encode_const(ppir_const * constant,uint16_t * code)608 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
609 {
610 for (int i = 0; i < constant->num; i++)
611 code[i] = _mesa_float_to_half(constant->value[i].f);
612 }
613
ppir_codegen_encode_discard(ppir_node * node,void * code)614 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
615 {
616 ppir_codegen_field_branch *b = code;
617 assert(node->op == ppir_op_discard);
618
619 b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
620 b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
621 b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
622 }
623
ppir_codegen_encode_branch(ppir_node * node,void * code)624 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
625 {
626 ppir_codegen_field_branch *b = code;
627 ppir_branch_node *branch;
628 ppir_instr *target_instr;
629 ppir_block *target;
630 if (node->op == ppir_op_discard) {
631 ppir_codegen_encode_discard(node, code);
632 return;
633 }
634
635 assert(node->op == ppir_op_branch);
636 branch = ppir_node_to_branch(node);
637
638 b->branch.unknown_0 = 0x0;
639 b->branch.unknown_1 = 0x0;
640
641 if (branch->num_src == 2) {
642 b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
643 b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
644 b->branch.cond_gt = branch->cond_gt;
645 b->branch.cond_eq = branch->cond_eq;
646 b->branch.cond_lt = branch->cond_lt;
647 } else if (branch->num_src == 0) {
648 /* Unconditional branch */
649 b->branch.arg0_source = 0;
650 b->branch.arg1_source = 0;
651 b->branch.cond_gt = true;
652 b->branch.cond_eq = true;
653 b->branch.cond_lt = true;
654 } else {
655 assert(false);
656 }
657
658 target = branch->target;
659 while (list_is_empty(&target->instr_list)) {
660 if (!target->list.next)
661 break;
662 target = list_entry(target->list.next, ppir_block, list);
663 }
664
665 assert(!list_is_empty(&target->instr_list));
666
667 target_instr = list_first_entry(&target->instr_list, ppir_instr, list);
668 b->branch.target = target_instr->offset - node->instr->offset;
669 b->branch.next_count = target_instr->encode_size;
670 }
671
672 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
673
674 static const ppir_codegen_instr_slot_encode_func
675 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
676 [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
677 [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
678 [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
679 [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
680 [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
681 [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
682 [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
683 [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
684 [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
685 [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
686 };
687
688 static const int ppir_codegen_field_size[] = {
689 34, 62, 41, 43, 30, 44, 31, 30, 41, 73
690 };
691
align_to_word(int size)692 static inline int align_to_word(int size)
693 {
694 return ((size + 0x1f) >> 5);
695 }
696
get_instr_encode_size(ppir_instr * instr)697 static int get_instr_encode_size(ppir_instr *instr)
698 {
699 int size = 0;
700
701 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
702 if (instr->slots[i])
703 size += ppir_codegen_field_size[i];
704 }
705
706 for (int i = 0; i < 2; i++) {
707 if (instr->constant[i].num)
708 size += 64;
709 }
710
711 return align_to_word(size) + 1;
712 }
713
bitcopy(void * dst,int dst_offset,void * src,int src_size)714 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
715 {
716 unsigned char *cpy_dst = dst, *cpy_src = src;
717 int off1 = dst_offset & 0x07;
718
719 cpy_dst += (dst_offset >> 3);
720
721 if (off1) {
722 int off2 = 0x08 - off1;
723 int cpy_size = 0;
724 while (1) {
725 *cpy_dst |= *cpy_src << off1;
726 cpy_dst++;
727
728 cpy_size += off2;
729 if (cpy_size >= src_size)
730 break;
731
732 *cpy_dst |= *cpy_src >> off2;
733 cpy_src++;
734
735 cpy_size += off1;
736 if (cpy_size >= src_size)
737 break;
738 }
739 }
740 else
741 memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
742 }
743
encode_instr(ppir_instr * instr,void * code,void * last_code)744 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
745 {
746 int size = 0;
747 ppir_codegen_ctrl *ctrl = code;
748
749 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
750 if (instr->slots[i]) {
751 /* max field size (73), align to dword */
752 uint8_t output[12] = {0};
753
754 ppir_codegen_encode_slot[i](instr->slots[i], output);
755 bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
756
757 size += ppir_codegen_field_size[i];
758 ctrl->fields |= 1 << i;
759 }
760 }
761
762 if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
763 ctrl->sync = true;
764
765 if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
766 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
767 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
768 ctrl->sync = true;
769 }
770
771 if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
772 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
773 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
774 ctrl->sync = true;
775 }
776
777 for (int i = 0; i < 2; i++) {
778 if (instr->constant[i].num) {
779 uint16_t output[4] = {0};
780
781 ppir_codegen_encode_const(instr->constant + i, output);
782 bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
783
784 size += 64;
785 ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
786 }
787 }
788
789 size = align_to_word(size) + 1;
790
791 ctrl->count = size;
792 if (instr->stop)
793 ctrl->stop = true;
794
795 if (last_code) {
796 ppir_codegen_ctrl *last_ctrl = last_code;
797 last_ctrl->next_count = size;
798 last_ctrl->prefetch = true;
799 }
800
801 return size;
802 }
803
ppir_codegen_print_prog(ppir_compiler * comp)804 static void ppir_codegen_print_prog(ppir_compiler *comp)
805 {
806 uint32_t *prog = comp->prog->shader;
807 unsigned offset = 0;
808
809 printf("========ppir codegen========\n");
810 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
811 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
812 printf("%03d (@%6d): ", instr->index, instr->offset);
813 int n = prog[0] & 0x1f;
814 for (int i = 0; i < n; i++) {
815 if (i && i % 6 == 0)
816 printf("\n ");
817 printf("%08x ", prog[i]);
818 }
819 printf("\n");
820 ppir_disassemble_instr(prog, offset, stdout);
821 prog += n;
822 offset += n;
823 }
824 }
825 printf("-----------------------\n");
826 }
827
ppir_codegen_prog(ppir_compiler * comp)828 bool ppir_codegen_prog(ppir_compiler *comp)
829 {
830 int size = 0;
831 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
832 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
833 instr->offset = size;
834 instr->encode_size = get_instr_encode_size(instr);
835 size += instr->encode_size;
836 }
837 /* Set stop flag for the last instruction if block has stop flag */
838 if (block->stop) {
839 ppir_instr *instr = list_last_entry(&block->instr_list, ppir_instr, list);
840 instr->stop = true;
841 }
842 }
843
844 uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
845 if (!prog)
846 return false;
847
848 uint32_t *code = prog, *last_code = NULL;
849 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
850 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
851 int offset = encode_instr(instr, code, last_code);
852 last_code = code;
853 code += offset;
854 }
855 }
856
857 if (comp->prog->shader)
858 ralloc_free(comp->prog->shader);
859
860 comp->prog->shader = prog;
861 comp->prog->state.shader_size = size * sizeof(uint32_t);
862
863 if (lima_debug & LIMA_DEBUG_PP)
864 ppir_codegen_print_prog(comp);
865
866 return true;
867 }
868