• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include "util/ralloc.h"
26 #include "util/half_float.h"
27 #include "util/bitscan.h"
28 
29 #include "ppir.h"
30 #include "codegen.h"
31 #include "lima_context.h"
32 
encode_swizzle(uint8_t * swizzle,int shift,int dest_shift)33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
34 {
35    unsigned ret = 0;
36    for (int i = 0; i < 4; i++)
37       ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
38    return ret;
39 }
40 
get_scl_reg_index(ppir_src * src,int component)41 static int get_scl_reg_index(ppir_src *src, int component)
42 {
43    int ret = ppir_target_get_src_reg_index(src);
44    ret += src->swizzle[component];
45    return ret;
46 }
47 
ppir_codegen_encode_varying(ppir_node * node,void * code)48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
49 {
50    ppir_codegen_field_varying *f = code;
51    ppir_load_node *load = ppir_node_to_load(node);
52    ppir_dest *dest = &load->dest;
53    int index = ppir_target_get_dest_reg_index(dest);
54    int num_components = load->num_components;
55 
56    if (node->op != ppir_op_load_coords_reg) {
57       assert(node->op == ppir_op_load_varying ||
58              node->op == ppir_op_load_coords ||
59              node->op == ppir_op_load_fragcoord ||
60              node->op == ppir_op_load_pointcoord ||
61              node->op == ppir_op_load_frontface);
62 
63       f->imm.dest = index >> 2;
64       f->imm.mask = dest->write_mask << (index & 0x3);
65 
66       int alignment = num_components == 3 ? 3 : num_components - 1;
67       f->imm.alignment = alignment;
68 
69       if (load->num_src) {
70          index = ppir_target_get_src_reg_index(&load->src);
71          f->imm.offset_vector = index >> 2;
72          f->imm.offset_scalar = index & 0x3;
73       } else
74          f->imm.offset_vector = 0xf;
75 
76       if (alignment == 3)
77          f->imm.index = load->index >> 2;
78       else
79          f->imm.index = load->index >> alignment;
80 
81       switch (node->op) {
82          case ppir_op_load_fragcoord:
83             f->imm.source_type = 2;
84             f->imm.perspective = 3;
85             break;
86          case ppir_op_load_pointcoord:
87             f->imm.source_type = 3;
88             break;
89          case ppir_op_load_frontface:
90             f->imm.source_type = 3;
91             f->imm.perspective = 1;
92             break;
93          case ppir_op_load_coords:
94             if (load->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
95                f->imm.source_type = 2;
96 
97             switch (load->perspective) {
98             case ppir_perspective_none:
99                f->imm.perspective = 0;
100                break;
101             case ppir_perspective_z:
102                f->imm.perspective = 2;
103                break;
104             case ppir_perspective_w:
105                f->imm.perspective = 3;
106                break;
107             }
108             break;
109          default:
110             break;
111       }
112    }
113    else {  /* node->op == ppir_op_load_coords_reg */
114       f->reg.dest = index >> 2;
115       f->reg.mask = dest->write_mask << (index & 0x3);
116 
117       if (load->num_src) {
118          if (load->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
119             f->reg.source_type = 2;
120             f->reg.perspective = 1;
121          } else {
122             f->reg.source_type = 1;
123             switch (load->perspective) {
124             case ppir_perspective_none:
125                f->reg.perspective = 0;
126                break;
127             case ppir_perspective_z:
128                f->reg.perspective = 2;
129                break;
130             case ppir_perspective_w:
131                f->reg.perspective = 3;
132                break;
133             }
134          }
135          ppir_src *src = &load->src;
136          index = ppir_target_get_src_reg_index(src);
137          f->reg.source = index >> 2;
138          f->reg.negate = src->negate;
139          f->reg.absolute = src->absolute;
140          f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
141       }
142    }
143 }
144 
ppir_codegen_encode_texld(ppir_node * node,void * code)145 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
146 {
147    ppir_codegen_field_sampler *f = code;
148    ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
149 
150    f->index = ldtex->sampler;
151 
152    f->lod_bias_en = ldtex->lod_bias_en;
153    f->explicit_lod = ldtex->explicit_lod;
154    if (ldtex->lod_bias_en)
155       f->lod_bias = ppir_target_get_src_reg_index(&ldtex->src[1]);
156 
157    switch (ldtex->sampler_dim) {
158    case GLSL_SAMPLER_DIM_2D:
159    case GLSL_SAMPLER_DIM_3D:
160    case GLSL_SAMPLER_DIM_RECT:
161    case GLSL_SAMPLER_DIM_EXTERNAL:
162       f->type = ppir_codegen_sampler_type_generic;
163       break;
164    case GLSL_SAMPLER_DIM_CUBE:
165       f->type = ppir_codegen_sampler_type_cube;
166       break;
167    default:
168       break;
169    }
170 
171    f->offset_en = 0;
172    f->unknown_2 = 0x39001;
173 }
174 
ppir_codegen_encode_uniform(ppir_node * node,void * code)175 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
176 {
177    ppir_codegen_field_uniform *f = code;
178    ppir_load_node *load = ppir_node_to_load(node);
179 
180    switch (node->op) {
181       case ppir_op_load_uniform:
182          f->source = ppir_codegen_uniform_src_uniform;
183          break;
184       case ppir_op_load_temp:
185          f->source = ppir_codegen_uniform_src_temporary;
186          break;
187       default:
188          assert(0);
189    }
190 
191    /* Uniforms are always aligned to vec4 boundary */
192    f->alignment = 2;
193    f->index = load->index;
194 
195    if (load->num_src) {
196       f->offset_en = 1;
197       f->offset_reg = ppir_target_get_src_reg_index(&load->src);
198    }
199 }
200 
ppir_codegen_get_outmod(ppir_outmod outmod)201 static ppir_codegen_outmod ppir_codegen_get_outmod(ppir_outmod outmod)
202 {
203    switch (outmod) {
204       case ppir_outmod_none:
205          return ppir_codegen_outmod_none;
206       case ppir_outmod_clamp_fraction:
207          return ppir_codegen_outmod_clamp_fraction;
208       case ppir_outmod_clamp_positive:
209          return ppir_codegen_outmod_clamp_positive;
210       case ppir_outmod_round:
211          return ppir_codegen_outmod_round;
212       default:
213          unreachable("invalid ppir_outmod");
214    }
215 }
216 
shift_to_op(int shift)217 static unsigned shift_to_op(int shift)
218 {
219    assert(shift >= -3 && shift <= 3);
220    return shift < 0 ? shift + 8 : shift;
221 }
222 
ppir_codegen_encode_vec_mul(ppir_node * node,void * code)223 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
224 {
225    ppir_codegen_field_vec4_mul *f = code;
226    ppir_alu_node *alu = ppir_node_to_alu(node);
227 
228    ppir_dest *dest = &alu->dest;
229    int dest_shift = 0;
230    if (dest->type != ppir_target_pipeline) {
231       int index = ppir_target_get_dest_reg_index(dest);
232       dest_shift = index & 0x3;
233       f->dest = index >> 2;
234       f->mask = dest->write_mask << dest_shift;
235    }
236    f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
237 
238    switch (node->op) {
239    case ppir_op_mul:
240       f->op = shift_to_op(alu->shift);
241       break;
242    case ppir_op_mov:
243       f->op = ppir_codegen_vec4_mul_op_mov;
244       break;
245    case ppir_op_max:
246       f->op = ppir_codegen_vec4_mul_op_max;
247       break;
248    case ppir_op_min:
249       f->op = ppir_codegen_vec4_mul_op_min;
250       break;
251    case ppir_op_and:
252       f->op = ppir_codegen_vec4_mul_op_and;
253       break;
254    case ppir_op_or:
255       f->op = ppir_codegen_vec4_mul_op_or;
256       break;
257    case ppir_op_xor:
258       f->op = ppir_codegen_vec4_mul_op_xor;
259       break;
260    case ppir_op_gt:
261       f->op = ppir_codegen_vec4_mul_op_gt;
262       break;
263    case ppir_op_ge:
264       f->op = ppir_codegen_vec4_mul_op_ge;
265       break;
266    case ppir_op_eq:
267       f->op = ppir_codegen_vec4_mul_op_eq;
268       break;
269    case ppir_op_ne:
270       f->op = ppir_codegen_vec4_mul_op_ne;
271       break;
272    case ppir_op_not:
273       f->op = ppir_codegen_vec4_mul_op_not;
274       break;
275    default:
276       break;
277    }
278 
279    ppir_src *src = alu->src;
280    int index = ppir_target_get_src_reg_index(src);
281    f->arg0_source = index >> 2;
282    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
283    f->arg0_absolute = src->absolute;
284    f->arg0_negate = src->negate;
285 
286    if (alu->num_src == 2) {
287       src = alu->src + 1;
288       index = ppir_target_get_src_reg_index(src);
289       f->arg1_source = index >> 2;
290       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
291       f->arg1_absolute = src->absolute;
292       f->arg1_negate = src->negate;
293    }
294 }
295 
ppir_codegen_encode_scl_mul(ppir_node * node,void * code)296 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
297 {
298    ppir_codegen_field_float_mul *f = code;
299    ppir_alu_node *alu = ppir_node_to_alu(node);
300 
301    ppir_dest *dest = &alu->dest;
302    int dest_component = ffs(dest->write_mask) - 1;
303    assert(dest_component >= 0);
304 
305    if (dest->type != ppir_target_pipeline) {
306       f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
307       f->output_en = true;
308    }
309    f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
310 
311    switch (node->op) {
312    case ppir_op_mul:
313       f->op = shift_to_op(alu->shift);
314       break;
315    case ppir_op_mov:
316       f->op = ppir_codegen_float_mul_op_mov;
317       break;
318    case ppir_op_max:
319       f->op = ppir_codegen_float_mul_op_max;
320       break;
321    case ppir_op_min:
322       f->op = ppir_codegen_float_mul_op_min;
323       break;
324    case ppir_op_and:
325       f->op = ppir_codegen_float_mul_op_and;
326       break;
327    case ppir_op_or:
328       f->op = ppir_codegen_float_mul_op_or;
329       break;
330    case ppir_op_xor:
331       f->op = ppir_codegen_float_mul_op_xor;
332       break;
333    case ppir_op_gt:
334       f->op = ppir_codegen_float_mul_op_gt;
335       break;
336    case ppir_op_ge:
337       f->op = ppir_codegen_float_mul_op_ge;
338       break;
339    case ppir_op_eq:
340       f->op = ppir_codegen_float_mul_op_eq;
341       break;
342    case ppir_op_ne:
343       f->op = ppir_codegen_float_mul_op_ne;
344       break;
345    case ppir_op_not:
346       f->op = ppir_codegen_float_mul_op_not;
347       break;
348    default:
349       break;
350    }
351 
352    ppir_src *src = alu->src;
353    f->arg0_source = get_scl_reg_index(src, dest_component);
354    f->arg0_absolute = src->absolute;
355    f->arg0_negate = src->negate;
356 
357    if (alu->num_src == 2) {
358       src = alu->src + 1;
359       f->arg1_source = get_scl_reg_index(src, dest_component);
360       f->arg1_absolute = src->absolute;
361       f->arg1_negate = src->negate;
362    }
363 }
364 
ppir_codegen_encode_vec_add(ppir_node * node,void * code)365 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
366 {
367    ppir_codegen_field_vec4_acc *f = code;
368    ppir_alu_node *alu = ppir_node_to_alu(node);
369 
370    ppir_dest *dest = &alu->dest;
371    int index = ppir_target_get_dest_reg_index(dest);
372    int dest_shift = index & 0x3;
373    f->dest = index >> 2;
374    f->mask = dest->write_mask << dest_shift;
375    f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
376 
377    switch (node->op) {
378    case ppir_op_add:
379       f->op = ppir_codegen_vec4_acc_op_add;
380       break;
381    case ppir_op_mov:
382       f->op = ppir_codegen_vec4_acc_op_mov;
383       break;
384    case ppir_op_sum3:
385       f->op = ppir_codegen_vec4_acc_op_sum3;
386       dest_shift = 0;
387       break;
388    case ppir_op_sum4:
389       f->op = ppir_codegen_vec4_acc_op_sum4;
390       dest_shift = 0;
391       break;
392    case ppir_op_floor:
393       f->op = ppir_codegen_vec4_acc_op_floor;
394       break;
395    case ppir_op_ceil:
396       f->op = ppir_codegen_vec4_acc_op_ceil;
397       break;
398    case ppir_op_fract:
399       f->op = ppir_codegen_vec4_acc_op_fract;
400       break;
401    case ppir_op_gt:
402       f->op = ppir_codegen_vec4_acc_op_gt;
403       break;
404    case ppir_op_ge:
405       f->op = ppir_codegen_vec4_acc_op_ge;
406       break;
407    case ppir_op_eq:
408       f->op = ppir_codegen_vec4_acc_op_eq;
409       break;
410    case ppir_op_ne:
411       f->op = ppir_codegen_vec4_acc_op_ne;
412       break;
413    case ppir_op_select:
414       f->op = ppir_codegen_vec4_acc_op_sel;
415       break;
416    case ppir_op_max:
417       f->op = ppir_codegen_vec4_acc_op_max;
418       break;
419    case ppir_op_min:
420       f->op = ppir_codegen_vec4_acc_op_min;
421       break;
422    case ppir_op_ddx:
423       f->op = ppir_codegen_vec4_acc_op_dFdx;
424       break;
425    case ppir_op_ddy:
426       f->op = ppir_codegen_vec4_acc_op_dFdy;
427       break;
428    default:
429       break;
430    }
431 
432    ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
433    index = ppir_target_get_src_reg_index(src);
434 
435    if (src->type == ppir_target_pipeline &&
436        src->pipeline == ppir_pipeline_reg_vmul)
437       f->mul_in = true;
438    else
439       f->arg0_source = index >> 2;
440 
441    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
442    f->arg0_absolute = src->absolute;
443    f->arg0_negate = src->negate;
444 
445    if (++src < alu->src + alu->num_src) {
446       index = ppir_target_get_src_reg_index(src);
447       f->arg1_source = index >> 2;
448       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
449       f->arg1_absolute = src->absolute;
450       f->arg1_negate = src->negate;
451    }
452 }
453 
ppir_codegen_encode_scl_add(ppir_node * node,void * code)454 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
455 {
456    ppir_codegen_field_float_acc *f = code;
457    ppir_alu_node *alu = ppir_node_to_alu(node);
458 
459    ppir_dest *dest = &alu->dest;
460    int dest_component = ffs(dest->write_mask) - 1;
461    assert(dest_component >= 0);
462 
463    f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
464    f->output_en = true;
465    f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
466 
467    switch (node->op) {
468    case ppir_op_add:
469       f->op = shift_to_op(alu->shift);
470       break;
471    case ppir_op_mov:
472       f->op = ppir_codegen_float_acc_op_mov;
473       break;
474    case ppir_op_max:
475       f->op = ppir_codegen_float_acc_op_max;
476       break;
477    case ppir_op_min:
478       f->op = ppir_codegen_float_acc_op_min;
479       break;
480    case ppir_op_floor:
481       f->op = ppir_codegen_float_acc_op_floor;
482       break;
483    case ppir_op_ceil:
484       f->op = ppir_codegen_float_acc_op_ceil;
485       break;
486    case ppir_op_fract:
487       f->op = ppir_codegen_float_acc_op_fract;
488       break;
489    case ppir_op_gt:
490       f->op = ppir_codegen_float_acc_op_gt;
491       break;
492    case ppir_op_ge:
493       f->op = ppir_codegen_float_acc_op_ge;
494       break;
495    case ppir_op_eq:
496       f->op = ppir_codegen_float_acc_op_eq;
497       break;
498    case ppir_op_ne:
499       f->op = ppir_codegen_float_acc_op_ne;
500       break;
501    case ppir_op_select:
502       f->op = ppir_codegen_float_acc_op_sel;
503       break;
504    case ppir_op_ddx:
505       f->op = ppir_codegen_float_acc_op_dFdx;
506       break;
507    case ppir_op_ddy:
508       f->op = ppir_codegen_float_acc_op_dFdy;
509       break;
510    default:
511       break;
512    }
513 
514    ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
515    if (src->type == ppir_target_pipeline &&
516        src->pipeline == ppir_pipeline_reg_fmul)
517       f->mul_in = true;
518    else
519       f->arg0_source = get_scl_reg_index(src, dest_component);
520    f->arg0_absolute = src->absolute;
521    f->arg0_negate = src->negate;
522 
523    if (++src < alu->src + alu->num_src) {
524       f->arg1_source = get_scl_reg_index(src, dest_component);
525       f->arg1_absolute = src->absolute;
526       f->arg1_negate = src->negate;
527    }
528 }
529 
ppir_codegen_encode_combine(ppir_node * node,void * code)530 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
531 {
532    ppir_codegen_field_combine *f = code;
533    ppir_alu_node *alu = ppir_node_to_alu(node);
534 
535    switch (node->op) {
536    case ppir_op_rsqrt:
537    case ppir_op_log2:
538    case ppir_op_exp2:
539    case ppir_op_rcp:
540    case ppir_op_sqrt:
541    case ppir_op_sin:
542    case ppir_op_cos:
543    {
544       f->scalar.dest_vec = false;
545       f->scalar.arg1_en = false;
546 
547       ppir_dest *dest = &alu->dest;
548       int dest_component = ffs(dest->write_mask) - 1;
549       assert(dest_component >= 0);
550       f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
551       f->scalar.dest_modifier = ppir_codegen_get_outmod(dest->modifier);
552 
553       ppir_src *src = alu->src;
554       f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
555       f->scalar.arg0_absolute = src->absolute;
556       f->scalar.arg0_negate = src->negate;
557 
558       switch (node->op) {
559       case ppir_op_rsqrt:
560          f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
561          break;
562       case ppir_op_log2:
563          f->scalar.op = ppir_codegen_combine_scalar_op_log2;
564          break;
565       case ppir_op_exp2:
566          f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
567          break;
568       case ppir_op_rcp:
569          f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
570          break;
571       case ppir_op_sqrt:
572          f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
573          break;
574       case ppir_op_sin:
575          f->scalar.op = ppir_codegen_combine_scalar_op_sin;
576          break;
577       case ppir_op_cos:
578          f->scalar.op = ppir_codegen_combine_scalar_op_cos;
579          break;
580       default:
581          break;
582       }
583       break;
584    }
585    default:
586       break;
587    }
588 }
589 
ppir_codegen_encode_store_temp(ppir_node * node,void * code)590 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
591 {
592    assert(node->op == ppir_op_store_temp);
593 
594    ppir_codegen_field_temp_write *f = code;
595    ppir_store_node *snode = ppir_node_to_store(node);
596    int num_components = snode->num_components;
597 
598    f->temp_write.dest = 0x03; // 11 - temporary
599    f->temp_write.source = snode->src.reg->index;
600 
601    int alignment = num_components == 4 ? 2 : num_components - 1;
602    f->temp_write.alignment = alignment;
603    f->temp_write.index = snode->index << (2 - alignment);
604 
605    f->temp_write.offset_reg = snode->index >> 2;
606 }
607 
ppir_codegen_encode_const(ppir_const * constant,uint16_t * code)608 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
609 {
610    for (int i = 0; i < constant->num; i++)
611       code[i] = _mesa_float_to_half(constant->value[i].f);
612 }
613 
ppir_codegen_encode_discard(ppir_node * node,void * code)614 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
615 {
616    ppir_codegen_field_branch *b = code;
617    assert(node->op == ppir_op_discard);
618 
619    b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
620    b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
621    b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
622 }
623 
ppir_codegen_encode_branch(ppir_node * node,void * code)624 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
625 {
626    ppir_codegen_field_branch *b = code;
627    ppir_branch_node *branch;
628    ppir_instr *target_instr;
629    ppir_block *target;
630    if (node->op == ppir_op_discard) {
631       ppir_codegen_encode_discard(node, code);
632       return;
633    }
634 
635    assert(node->op == ppir_op_branch);
636    branch = ppir_node_to_branch(node);
637 
638    b->branch.unknown_0 = 0x0;
639    b->branch.unknown_1 = 0x0;
640 
641    if (branch->num_src == 2) {
642       b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
643       b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
644       b->branch.cond_gt = branch->cond_gt;
645       b->branch.cond_eq = branch->cond_eq;
646       b->branch.cond_lt = branch->cond_lt;
647    } else if (branch->num_src == 0) {
648       /* Unconditional branch */
649       b->branch.arg0_source = 0;
650       b->branch.arg1_source = 0;
651       b->branch.cond_gt = true;
652       b->branch.cond_eq = true;
653       b->branch.cond_lt = true;
654    } else {
655       assert(false);
656    }
657 
658    target = branch->target;
659    while (list_is_empty(&target->instr_list)) {
660       if (!target->list.next)
661          break;
662       target = list_entry(target->list.next, ppir_block, list);
663    }
664 
665    assert(!list_is_empty(&target->instr_list));
666 
667    target_instr = list_first_entry(&target->instr_list, ppir_instr, list);
668    b->branch.target = target_instr->offset - node->instr->offset;
669    b->branch.next_count = target_instr->encode_size;
670 }
671 
672 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
673 
674 static const ppir_codegen_instr_slot_encode_func
675 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
676    [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
677    [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
678    [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
679    [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
680    [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
681    [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
682    [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
683    [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
684    [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
685    [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
686 };
687 
688 static const int ppir_codegen_field_size[] = {
689    34, 62, 41, 43, 30, 44, 31, 30, 41, 73
690 };
691 
align_to_word(int size)692 static inline int align_to_word(int size)
693 {
694    return ((size + 0x1f) >> 5);
695 }
696 
get_instr_encode_size(ppir_instr * instr)697 static int get_instr_encode_size(ppir_instr *instr)
698 {
699    int size = 0;
700 
701    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
702       if (instr->slots[i])
703          size += ppir_codegen_field_size[i];
704    }
705 
706    for (int i = 0; i < 2; i++) {
707       if (instr->constant[i].num)
708          size += 64;
709    }
710 
711    return align_to_word(size) + 1;
712 }
713 
bitcopy(void * dst,int dst_offset,void * src,int src_size)714 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
715 {
716    unsigned char *cpy_dst = dst, *cpy_src = src;
717    int off1 = dst_offset & 0x07;
718 
719    cpy_dst += (dst_offset >> 3);
720 
721    if (off1) {
722       int off2 = 0x08 - off1;
723       int cpy_size = 0;
724       while (1) {
725          *cpy_dst |= *cpy_src << off1;
726          cpy_dst++;
727 
728          cpy_size += off2;
729          if (cpy_size >= src_size)
730             break;
731 
732          *cpy_dst |= *cpy_src >> off2;
733          cpy_src++;
734 
735          cpy_size += off1;
736          if (cpy_size >= src_size)
737             break;
738       }
739    }
740    else
741       memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
742 }
743 
encode_instr(ppir_instr * instr,void * code,void * last_code)744 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
745 {
746    int size = 0;
747    ppir_codegen_ctrl *ctrl = code;
748 
749    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
750       if (instr->slots[i]) {
751          /* max field size (73), align to dword */
752          uint8_t output[12] = {0};
753 
754          ppir_codegen_encode_slot[i](instr->slots[i], output);
755          bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
756 
757          size += ppir_codegen_field_size[i];
758          ctrl->fields |= 1 << i;
759       }
760    }
761 
762    if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
763       ctrl->sync = true;
764 
765    if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
766       ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
767       if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
768          ctrl->sync = true;
769    }
770 
771    if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
772       ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
773       if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
774          ctrl->sync = true;
775    }
776 
777    for (int i = 0; i < 2; i++) {
778       if (instr->constant[i].num) {
779          uint16_t output[4] = {0};
780 
781          ppir_codegen_encode_const(instr->constant + i, output);
782          bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
783 
784          size += 64;
785          ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
786       }
787    }
788 
789    size = align_to_word(size) + 1;
790 
791    ctrl->count = size;
792    if (instr->stop)
793       ctrl->stop = true;
794 
795    if (last_code) {
796       ppir_codegen_ctrl *last_ctrl = last_code;
797       last_ctrl->next_count = size;
798       last_ctrl->prefetch = true;
799    }
800 
801    return size;
802 }
803 
ppir_codegen_print_prog(ppir_compiler * comp)804 static void ppir_codegen_print_prog(ppir_compiler *comp)
805 {
806    uint32_t *prog = comp->prog->shader;
807    unsigned offset = 0;
808 
809    printf("========ppir codegen========\n");
810    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
811       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
812          printf("%03d (@%6d): ", instr->index, instr->offset);
813          int n = prog[0] & 0x1f;
814          for (int i = 0; i < n; i++) {
815             if (i && i % 6 == 0)
816                printf("\n    ");
817             printf("%08x ", prog[i]);
818          }
819          printf("\n");
820          ppir_disassemble_instr(prog, offset, stdout);
821          prog += n;
822          offset += n;
823       }
824    }
825    printf("-----------------------\n");
826 }
827 
ppir_codegen_prog(ppir_compiler * comp)828 bool ppir_codegen_prog(ppir_compiler *comp)
829 {
830    int size = 0;
831    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
832       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
833          instr->offset = size;
834          instr->encode_size = get_instr_encode_size(instr);
835          size += instr->encode_size;
836       }
837       /* Set stop flag for the last instruction if block has stop flag */
838       if (block->stop) {
839          ppir_instr *instr = list_last_entry(&block->instr_list, ppir_instr, list);
840          instr->stop = true;
841       }
842    }
843 
844    uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
845    if (!prog)
846       return false;
847 
848    uint32_t *code = prog, *last_code = NULL;
849    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
850       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
851          int offset = encode_instr(instr, code, last_code);
852          last_code = code;
853          code += offset;
854       }
855    }
856 
857    if (comp->prog->shader)
858       ralloc_free(comp->prog->shader);
859 
860    comp->prog->shader = prog;
861    comp->prog->state.shader_size = size * sizeof(uint32_t);
862 
863    if (lima_debug & LIMA_DEBUG_PP)
864       ppir_codegen_print_prog(comp);
865 
866    return true;
867 }
868