1 /*
2 * Copyright © 2024 Imagination Technologies Ltd.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 /**
8 * \file pco_trans_nir.c
9 *
10 * \brief NIR translation functions.
11 */
12
13 #include "compiler/glsl/list.h"
14 #include "compiler/shader_enums.h"
15 #include "hwdef/rogue_hw_defs.h"
16 #include "pco.h"
17 #include "pco_builder.h"
18 #include "pco_internal.h"
19 #include "util/bitset.h"
20 #include "util/list.h"
21 #include "util/macros.h"
22 #include "util/ralloc.h"
23
24 #include <assert.h>
25 #include <stdio.h>
26
27 /** Translation context. */
28 typedef struct _trans_ctx {
29 pco_ctx *pco_ctx; /** PCO compiler context. */
30 pco_shader *shader; /** Current shader. */
31 pco_func *func; /** Current function. */
32 pco_builder b; /** Builder. */
33 gl_shader_stage stage; /** Shader stage. */
34
35 BITSET_WORD *float_types; /** NIR SSA float vars. */
36 BITSET_WORD *int_types; /** NIR SSA int vars. */
37 } trans_ctx;
38
39 /* Forward declarations. */
40 static pco_block *trans_cf_nodes(trans_ctx *tctx,
41 pco_cf_node *parent_cf_node,
42 struct list_head *cf_node_list,
43 struct exec_list *nir_cf_node_list);
44
45 /**
46 * \brief Splits a vector destination into scalar components.
47 *
48 * \param[in,out] tctx Translation context.
49 * \param[in] instr Instruction producing the vector destination.
50 * \param[in] dest Instruction destination.
51 */
split_dest_comps(trans_ctx * tctx,pco_instr * instr,pco_ref dest)52 static void split_dest_comps(trans_ctx *tctx, pco_instr *instr, pco_ref dest)
53 {
54 unsigned chans = pco_ref_get_chans(dest);
55 assert(chans > 1);
56
57 pco_func *func = tctx->func;
58
59 pco_vec_info *vec_info = rzalloc_size(func->vec_infos, sizeof(*vec_info));
60 vec_info->instr = instr;
61 vec_info->comps =
62 rzalloc_array_size(vec_info, sizeof(*vec_info->comps), chans);
63
64 for (unsigned u = 0; u < chans; ++u) {
65 pco_ref comp = pco_ref_new_ssa(func, pco_ref_get_bits(dest), 1);
66 vec_info->comps[u] = pco_comp(&tctx->b, comp, dest, pco_ref_val16(u));
67 }
68
69 _mesa_hash_table_u64_insert(func->vec_infos, dest.val, vec_info);
70 }
71
72 /**
73 * \brief Translates a NIR def into a PCO reference.
74 *
75 * \param[in] def The nir def.
76 * \return The PCO reference.
77 */
pco_ref_nir_def(const nir_def * def)78 static inline pco_ref pco_ref_nir_def(const nir_def *def)
79 {
80 return pco_ref_ssa(def->index, def->bit_size, def->num_components);
81 }
82
83 /**
84 * \brief Translates a NIR src into a PCO reference.
85 *
86 * \param[in] src The nir src.
87 * \return The PCO reference.
88 */
pco_ref_nir_src(const nir_src * src)89 static inline pco_ref pco_ref_nir_src(const nir_src *src)
90 {
91 return pco_ref_nir_def(src->ssa);
92 }
93
94 /**
95 * \brief Translates a NIR def into a PCO reference with type information.
96 *
97 * \param[in] def The nir def.
98 * \param[in] tctx Translation context.
99 * \return The PCO reference.
100 */
pco_ref_nir_def_t(const nir_def * def,trans_ctx * tctx)101 static inline pco_ref pco_ref_nir_def_t(const nir_def *def, trans_ctx *tctx)
102 {
103 pco_ref ref = pco_ref_nir_def(def);
104
105 bool is_float = BITSET_TEST(tctx->float_types, def->index);
106 bool is_int = BITSET_TEST(tctx->int_types, def->index);
107
108 if (is_float)
109 ref.dtype = PCO_DTYPE_FLOAT;
110 else if (is_int)
111 ref.dtype = PCO_DTYPE_UNSIGNED;
112
113 return ref;
114 }
115
116 /**
117 * \brief Translates a NIR src into a PCO reference with type information.
118 *
119 * \param[in] src The nir src.
120 * \param[in] tctx Translation context.
121 * \return The PCO reference.
122 */
pco_ref_nir_src_t(const nir_src * src,trans_ctx * tctx)123 static inline pco_ref pco_ref_nir_src_t(const nir_src *src, trans_ctx *tctx)
124 {
125 return pco_ref_nir_def_t(src->ssa, tctx);
126 }
127
128 /**
129 * \brief Translates a NIR alu src into a PCO reference with type information,
130 * extracting from and/or building new vectors as needed.
131 *
132 * \param[in] src The nir src.
133 * \param[in,out] tctx Translation context.
134 * \return The PCO reference.
135 */
136 static inline pco_ref
pco_ref_nir_alu_src_t(const nir_alu_instr * alu,unsigned src,trans_ctx * tctx)137 pco_ref_nir_alu_src_t(const nir_alu_instr *alu, unsigned src, trans_ctx *tctx)
138 {
139 const nir_alu_src *alu_src = &alu->src[src];
140 /* unsigned chans = nir_src_num_components(alu_src->src); */
141 unsigned chans = nir_ssa_alu_instr_src_components(alu, src);
142
143 bool seq_comps =
144 nir_is_sequential_comp_swizzle((uint8_t *)alu_src->swizzle, chans);
145 pco_ref ref = pco_ref_nir_src_t(&alu_src->src, tctx);
146 uint8_t swizzle0 = alu_src->swizzle[0];
147
148 /* Multiple channels, but referencing the entire vector; return as-is. */
149 if (!swizzle0 && seq_comps && chans == nir_src_num_components(alu_src->src))
150 return ref;
151
152 pco_vec_info *vec_info =
153 _mesa_hash_table_u64_search(tctx->func->vec_infos, ref.val);
154 assert(vec_info);
155
156 /* One channel; just return its component. */
157 if (chans == 1)
158 return vec_info->comps[swizzle0]->dest[0];
159
160 /* Multiple channels, either a partial vec and/or swizzling; we need to build
161 * a new vec for this.
162 */
163 pco_ref comps[NIR_MAX_VEC_COMPONENTS] = { 0 };
164 for (unsigned u = 0; u < chans; ++u)
165 comps[u] = vec_info->comps[alu_src->swizzle[u]]->dest[0];
166
167 pco_ref vec = pco_ref_new_ssa(tctx->func, pco_ref_get_bits(ref), chans);
168 pco_instr *instr = pco_vec(&tctx->b, vec, chans, comps);
169
170 split_dest_comps(tctx, instr, vec);
171
172 return vec;
173 }
174
175 /**
176 * \brief Translates a NIR vs load_input intrinsic into PCO.
177 *
178 * \param[in,out] tctx Translation context.
179 * \param[in] intr load_input intrinsic.
180 * \param[in] dest Instruction destination.
181 * \return The translated PCO instruction.
182 */
183 static pco_instr *
trans_load_input_vs(trans_ctx * tctx,nir_intrinsic_instr * intr,pco_ref dest)184 trans_load_input_vs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
185 {
186 ASSERTED unsigned base = nir_intrinsic_base(intr);
187 assert(!base);
188
189 ASSERTED nir_alu_type type = nir_intrinsic_dest_type(intr);
190 assert(type == nir_type_float32);
191 /* TODO: f16 support. */
192
193 ASSERTED const nir_src offset = intr->src[0];
194 assert(nir_src_as_uint(offset) == 0);
195
196 gl_vert_attrib location = nir_intrinsic_io_semantics(intr).location;
197 unsigned component = nir_intrinsic_component(intr);
198 unsigned chans = pco_ref_get_chans(dest);
199
200 const pco_range *range = &tctx->shader->data.vs.attribs[location];
201 assert(component + chans <= range->count);
202
203 pco_ref src =
204 pco_ref_hwreg_vec(range->start + component, PCO_REG_CLASS_VTXIN, chans);
205 return pco_mov(&tctx->b, dest, src, .rpt = chans);
206 }
207
208 /**
209 * \brief Translates a NIR vs store_output intrinsic into PCO.
210 *
211 * \param[in,out] tctx Translation context.
212 * \param[in] intr store_output intrinsic.
213 * \param[in] src Instruction source.
214 * \return The translated PCO instruction.
215 */
216 static pco_instr *
trans_store_output_vs(trans_ctx * tctx,nir_intrinsic_instr * intr,pco_ref src)217 trans_store_output_vs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref src)
218 {
219 ASSERTED unsigned base = nir_intrinsic_base(intr);
220 assert(!base);
221
222 ASSERTED nir_alu_type type = nir_intrinsic_src_type(intr);
223 assert(type == nir_type_float32);
224 /* TODO: f16 support. */
225
226 ASSERTED const nir_src offset = intr->src[1];
227 assert(nir_src_as_uint(offset) == 0);
228
229 gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
230 unsigned component = nir_intrinsic_component(intr);
231 unsigned chans = pco_ref_get_chans(src);
232
233 /* Only contiguous write masks supported. */
234 ASSERTED unsigned write_mask = nir_intrinsic_write_mask(intr);
235 assert(write_mask == BITFIELD_MASK(chans));
236
237 const pco_range *range = &tctx->shader->data.vs.varyings[location];
238 assert(component + chans <= range->count);
239
240 pco_ref vtxout_addr = pco_ref_val8(range->start + component);
241 return pco_uvsw_write(&tctx->b, src, vtxout_addr, .rpt = chans);
242 }
243
244 /**
245 * \brief Translates a NIR fs load_input intrinsic into PCO.
246 *
247 * \param[in,out] tctx Translation context.
248 * \param[in] intr load_input intrinsic.
249 * \param[in] dest Instruction destination.
250 * \return The translated PCO instruction.
251 */
252 static pco_instr *
trans_load_input_fs(trans_ctx * tctx,nir_intrinsic_instr * intr,pco_ref dest)253 trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
254 {
255 pco_fs_data *fs_data = &tctx->shader->data.fs;
256 ASSERTED unsigned base = nir_intrinsic_base(intr);
257 assert(!base);
258
259 unsigned component = nir_intrinsic_component(intr);
260 unsigned chans = pco_ref_get_chans(dest);
261
262 const nir_src offset = intr->src[0];
263 assert(nir_src_as_uint(offset) == 0);
264
265 struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
266 gl_varying_slot location = io_semantics.location;
267
268 nir_variable *var = nir_find_variable_with_location(tctx->shader->nir,
269 nir_var_shader_in,
270 location);
271
272 enum pco_itr_mode itr_mode = PCO_ITR_MODE_PIXEL;
273 assert(!(var->data.sample && var->data.centroid));
274 if (var->data.sample)
275 itr_mode = PCO_ITR_MODE_SAMPLE;
276 else if (var->data.centroid)
277 itr_mode = PCO_ITR_MODE_CENTROID;
278
279 if (location == VARYING_SLOT_POS) {
280 /* Only scalar supported for now. */
281 /* TODO: support vector for zw. */
282 assert(chans == 1);
283
284 /* TODO: support packing/partial vars. */
285 assert(!var->data.location_frac);
286
287 assert(var->data.interpolation == INTERP_MODE_NOPERSPECTIVE);
288
289 /* Special case: x and y are loaded from special registers. */
290 /* TODO: select appropriate regs if sample rate shading. */
291 switch (component) {
292 case 0: /* x */
293 return pco_mov(&tctx->b,
294 dest,
295 pco_ref_hwreg(PCO_SR_X_P, PCO_REG_CLASS_SPEC));
296
297 case 1: /* y */
298 return pco_mov(&tctx->b,
299 dest,
300 pco_ref_hwreg(PCO_SR_Y_P, PCO_REG_CLASS_SPEC));
301
302 case 2:
303 assert(fs_data->uses.z);
304 component = 0;
305 break;
306
307 case 3:
308 assert(fs_data->uses.w);
309 component = fs_data->uses.z ? 1 : 0;
310 break;
311
312 default:
313 unreachable();
314 }
315 }
316
317 const pco_range *range = &fs_data->varyings[location];
318 assert(component + (ROGUE_USC_COEFFICIENT_SET_SIZE * chans) <= range->count);
319
320 unsigned coeffs_index =
321 range->start + (ROGUE_USC_COEFFICIENT_SET_SIZE * component);
322
323 pco_ref coeffs = pco_ref_hwreg_vec(coeffs_index,
324 PCO_REG_CLASS_COEFF,
325 ROGUE_USC_COEFFICIENT_SET_SIZE * chans);
326 pco_ref itr_count = pco_ref_val16(chans);
327
328 bool usc_itrsmp_enhanced =
329 PVR_HAS_FEATURE(tctx->pco_ctx->dev_info, usc_itrsmp_enhanced);
330
331 switch (var->data.interpolation) {
332 case INTERP_MODE_SMOOTH: {
333 assert(fs_data->uses.w);
334
335 unsigned wcoeffs_index = fs_data->uses.z ? ROGUE_USC_COEFFICIENT_SET_SIZE
336 : 0;
337
338 pco_ref wcoeffs = pco_ref_hwreg_vec(wcoeffs_index,
339 PCO_REG_CLASS_COEFF,
340 ROGUE_USC_COEFFICIENT_SET_SIZE);
341
342 return usc_itrsmp_enhanced ? pco_ditrp(&tctx->b,
343 dest,
344 pco_ref_drc(PCO_DRC_0),
345 coeffs,
346 wcoeffs,
347 itr_count,
348 .itr_mode = itr_mode)
349 : pco_fitrp(&tctx->b,
350 dest,
351 pco_ref_drc(PCO_DRC_0),
352 coeffs,
353 wcoeffs,
354 itr_count,
355 .itr_mode = itr_mode);
356 }
357
358 case INTERP_MODE_NOPERSPECTIVE:
359 return usc_itrsmp_enhanced ? pco_ditr(&tctx->b,
360 dest,
361 pco_ref_drc(PCO_DRC_0),
362 coeffs,
363 itr_count,
364 .itr_mode = itr_mode)
365 : pco_fitr(&tctx->b,
366 dest,
367 pco_ref_drc(PCO_DRC_0),
368 coeffs,
369 itr_count,
370 .itr_mode = itr_mode);
371
372 default:
373 /* Should have been previously lowered. */
374 unreachable();
375 }
376 }
377
378 /**
379 * \brief Translates a NIR fs store_output intrinsic into PCO.
380 *
381 * \param[in,out] tctx Translation context.
382 * \param[in] intr store_output intrinsic.
383 * \param[in] src Instruction source.
384 * \return The translated PCO instruction.
385 */
386 static pco_instr *
trans_store_output_fs(trans_ctx * tctx,nir_intrinsic_instr * intr,pco_ref src)387 trans_store_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref src)
388 {
389 ASSERTED unsigned base = nir_intrinsic_base(intr);
390 assert(!base);
391
392 assert(pco_ref_is_scalar(src));
393 unsigned component = nir_intrinsic_component(intr);
394
395 ASSERTED const nir_src offset = intr->src[1];
396 assert(nir_src_as_uint(offset) == 0);
397
398 gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
399
400 const pco_range *range = &tctx->shader->data.fs.outputs[location];
401 assert(component < range->count);
402
403 ASSERTED bool output_reg = tctx->shader->data.fs.output_reg[location];
404 assert(output_reg);
405 /* TODO: tile buffer support. */
406
407 pco_ref dest = pco_ref_hwreg(range->start + component, PCO_REG_CLASS_PIXOUT);
408 return pco_mov(&tctx->b, dest, src, .olchk = true);
409 }
410
411 /**
412 * \brief Translates a NIR intrinsic instruction into PCO.
413 *
414 * \param[in,out] tctx Translation context.
415 * \param[in] intr The nir intrinsic instruction.
416 * \return The PCO instruction.
417 */
trans_intr(trans_ctx * tctx,nir_intrinsic_instr * intr)418 static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
419 {
420 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
421
422 pco_ref dest = info->has_dest ? pco_ref_nir_def_t(&intr->def, tctx)
423 : pco_ref_null();
424
425 pco_ref src[NIR_MAX_VEC_COMPONENTS] = { 0 };
426 for (unsigned s = 0; s < info->num_srcs; ++s)
427 src[s] = pco_ref_nir_src_t(&intr->src[s], tctx);
428
429 pco_instr *instr;
430 switch (intr->intrinsic) {
431 case nir_intrinsic_load_input:
432 if (tctx->stage == MESA_SHADER_VERTEX)
433 instr = trans_load_input_vs(tctx, intr, dest);
434 else if (tctx->stage == MESA_SHADER_FRAGMENT)
435 instr = trans_load_input_fs(tctx, intr, dest);
436 else
437 unreachable("Unsupported stage for \"nir_intrinsic_load_input\".");
438 break;
439
440 case nir_intrinsic_store_output:
441 if (tctx->stage == MESA_SHADER_VERTEX)
442 instr = trans_store_output_vs(tctx, intr, src[0]);
443 else if (tctx->stage == MESA_SHADER_FRAGMENT)
444 instr = trans_store_output_fs(tctx, intr, src[0]);
445 else
446 unreachable("Unsupported stage for \"nir_intrinsic_store_output\".");
447 break;
448
449 default:
450 printf("Unsupported intrinsic: \"");
451 nir_print_instr(&intr->instr, stdout);
452 printf("\"\n");
453 unreachable();
454 break;
455 }
456
457 if (!pco_ref_is_scalar(dest))
458 split_dest_comps(tctx, instr, dest);
459
460 return instr;
461 }
462
463 /**
464 * \brief Attempts to collate a vector within a vector.
465 *
466 * If a vector references another vector in its entirety in order/without
467 * swizzling, we try to store a reference to said vector rather than its
468 * individual components.
469 *
470 * \param[in] src The source/vector channel to start checking from.
471 * \param[in] from The instruction the vector components are from.
472 * \param[in] vec The potential vector reference from the parent instruction.
473 * \param[in] vec_chans The number of sources/vector channels.
474 * \return The number of collated sources, or 0 if collation failed.
475 */
476 static pco_ref
try_collate_vec(pco_ref * src,pco_instr * from,pco_ref vec,unsigned vec_chans)477 try_collate_vec(pco_ref *src, pco_instr *from, pco_ref vec, unsigned vec_chans)
478 {
479 /* Skip the first one since it's our reference (and we already know its
480 * component is 0.
481 */
482 for (unsigned s = 1; s < vec_chans; ++s) {
483 pco_instr *parent_instr = find_parent_instr_from(src[s], from);
484 assert(parent_instr);
485
486 if (parent_instr->op != PCO_OP_COMP)
487 return pco_ref_null();
488
489 pco_ref comp_src = parent_instr->src[0];
490 unsigned comp_idx = pco_ref_get_imm(parent_instr->src[1]);
491 ASSERTED unsigned chans = pco_ref_get_chans(comp_src);
492
493 if (!pco_refs_are_equal(comp_src, vec))
494 return pco_ref_null();
495
496 assert(chans == vec_chans);
497
498 if (comp_idx != s)
499 return pco_ref_null();
500 }
501
502 return vec;
503 }
504
505 /**
506 * \brief Attempts to collate vector sources.
507 *
508 * \param[in] tctx Translation context.
509 * \param[in] dest Instruction destination.
510 * \param[in] num_srcs The number of sources/vector channels.
511 * \param[in] src The sources/vector components.
512 * \return The number of collated sources, or 0 if collation failed.
513 */
try_collate_vec_srcs(trans_ctx * tctx,unsigned num_srcs,pco_ref * src,pco_ref * collated_src)514 static unsigned try_collate_vec_srcs(trans_ctx *tctx,
515 unsigned num_srcs,
516 pco_ref *src,
517 pco_ref *collated_src)
518 {
519 bool collated_vector = false;
520 unsigned num_srcs_collated = 0;
521 pco_instr *from = pco_cursor_instr(tctx->b.cursor);
522
523 for (unsigned s = 0; s < num_srcs; ++s) {
524 pco_instr *parent_instr = find_parent_instr_from(src[s], from);
525 assert(parent_instr);
526
527 /* This is a purely scalar source; append it and continue. */
528 if (parent_instr->op != PCO_OP_COMP) {
529 collated_src[num_srcs_collated++] = src[s];
530 continue;
531 }
532
533 pco_ref comp_src = parent_instr->src[0];
534 unsigned comp_idx = pco_ref_get_imm(parent_instr->src[1]);
535 unsigned chans = pco_ref_get_chans(comp_src);
536
537 /* We have a vector source, but it either:
538 * - doesn't start from the first element
539 * - is bigger than the remaining channels of *this* vec
540 * so it's impossible for it to be contained in its entirety;
541 * append the component and continue.
542 */
543 if (comp_idx != 0 || chans > (num_srcs - s)) {
544 collated_src[num_srcs_collated++] = src[s];
545 continue;
546 }
547
548 /* We have a candidate for an entire vector to be inserted. */
549 pco_ref collated_ref = try_collate_vec(&src[s], from, comp_src, chans);
550 if (pco_ref_is_null(collated_ref)) {
551 collated_src[num_srcs_collated++] = src[s];
552 continue;
553 }
554
555 /* We were successful, record this and increment accordingly. */
556 collated_src[num_srcs_collated++] = collated_ref;
557
558 s += chans - 1;
559 collated_vector = true;
560 }
561
562 return collated_vector ? num_srcs_collated : 0;
563 }
564
565 /**
566 * \brief Translates a NIR vec instruction into PCO, attempting collation.
567 *
568 * \param[in] tctx Translation context.
569 * \param[in] dest Instruction destination.
570 * \param[in] num_srcs The number of sources/vector components.
571 * \param[in] src The sources/vector components.
572 * \return The PCO instruction.
573 */
pco_trans_nir_vec(trans_ctx * tctx,pco_ref dest,unsigned num_srcs,pco_ref * src)574 static pco_instr *pco_trans_nir_vec(trans_ctx *tctx,
575 pco_ref dest,
576 unsigned num_srcs,
577 pco_ref *src)
578
579 {
580 /* If a vec contains entire other vecs, try to reference them directly. */
581 pco_ref collated_src[NIR_MAX_VEC_COMPONENTS] = { 0 };
582 unsigned num_srcs_collated =
583 try_collate_vec_srcs(tctx, num_srcs, src, collated_src);
584 if (!num_srcs_collated)
585 return pco_vec(&tctx->b, dest, num_srcs, src);
586
587 pco_instr *instr = pco_vec(&tctx->b, dest, num_srcs_collated, collated_src);
588
589 /* Record the collated vectors. */
590 for (unsigned s = 0; s < num_srcs_collated; ++s) {
591 if (pco_ref_is_scalar(collated_src[s]))
592 continue;
593
594 pco_vec_info *vec_info =
595 _mesa_hash_table_u64_search(tctx->func->vec_infos,
596 collated_src[s].val);
597 assert(vec_info);
598
599 /* Skip if there are multiple users. */
600 vec_info->vec_user = vec_info->vec_user ? VEC_USER_MULTI : instr;
601 }
602
603 return instr;
604 }
605
606 /**
607 * \brief Translates a NIR alu instruction into PCO.
608 *
609 * \param[in] tctx Translation context.
610 * \param[in] alu The nir alu instruction.
611 * \return The PCO instruction.
612 */
trans_alu(trans_ctx * tctx,nir_alu_instr * alu)613 static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
614 {
615 const nir_op_info *info = &nir_op_infos[alu->op];
616 unsigned num_srcs = info->num_inputs;
617
618 pco_ref dest = pco_ref_nir_def_t(&alu->def, tctx);
619
620 pco_ref src[NIR_MAX_VEC_COMPONENTS] = { 0 };
621 for (unsigned s = 0; s < num_srcs; ++s)
622 src[s] = pco_ref_nir_alu_src_t(alu, s, tctx);
623
624 pco_instr *instr;
625 switch (alu->op) {
626 case nir_op_fneg:
627 instr = pco_neg(&tctx->b, dest, src[0]);
628 break;
629
630 case nir_op_fabs:
631 instr = pco_abs(&tctx->b, dest, src[0]);
632 break;
633
634 case nir_op_ffloor:
635 instr = pco_flr(&tctx->b, dest, src[0]);
636 break;
637
638 case nir_op_fadd:
639 instr = pco_fadd(&tctx->b, dest, src[0], src[1]);
640 break;
641
642 case nir_op_fmul:
643 instr = pco_fmul(&tctx->b, dest, src[0], src[1]);
644 break;
645
646 case nir_op_ffma:
647 instr = pco_fmad(&tctx->b, dest, src[0], src[1], src[2]);
648 break;
649
650 case nir_op_pack_unorm_4x8:
651 instr = pco_pck(&tctx->b,
652 dest,
653 src[0],
654 .rpt = 4,
655 .pck_fmt = PCO_PCK_FMT_U8888,
656 .scale = true);
657 break;
658
659 case nir_op_vec2:
660 case nir_op_vec3:
661 case nir_op_vec4:
662 case nir_op_vec5:
663 case nir_op_vec8:
664 case nir_op_vec16:
665 instr = pco_trans_nir_vec(tctx, dest, num_srcs, src);
666 break;
667
668 default:
669 printf("Unsupported alu instruction: \"");
670 nir_print_instr(&alu->instr, stdout);
671 printf("\"\n");
672 unreachable();
673 }
674
675 if (!pco_ref_is_scalar(dest))
676 split_dest_comps(tctx, instr, dest);
677
678 return instr;
679 }
680
681 /**
682 * \brief Translates a NIR load constant instruction into PCO.
683 *
684 * \param[in] tctx Translation context.
685 * \param[in] nconst The nir load constant instruction.
686 * \return The PCO instruction.
687 */
trans_const(trans_ctx * tctx,nir_load_const_instr * nconst)688 static pco_instr *trans_const(trans_ctx *tctx, nir_load_const_instr *nconst)
689 {
690 unsigned num_bits = nconst->def.bit_size;
691 unsigned chans = nconst->def.num_components;
692
693 /* TODO: support more bit sizes/components. */
694 assert(num_bits == 32);
695
696 pco_ref dest = pco_ref_nir_def_t(&nconst->def, tctx);
697
698 if (pco_ref_is_scalar(dest)) {
699 assert(chans == 1);
700
701 uint64_t val = nir_const_value_as_uint(nconst->value[0], num_bits);
702 pco_ref imm =
703 pco_ref_imm(val, pco_bits(num_bits), pco_ref_get_dtype(dest));
704
705 return pco_movi32(&tctx->b, dest, imm);
706 }
707
708 pco_ref comps[NIR_MAX_VEC_COMPONENTS] = { 0 };
709 for (unsigned c = 0; c < chans; ++c) {
710 comps[c] = pco_ref_new_ssa(tctx->func, pco_ref_get_bits(dest), 1);
711
712 uint64_t val = nir_const_value_as_uint(nconst->value[c], num_bits);
713 pco_ref imm =
714 pco_ref_imm(val, pco_bits(num_bits), pco_ref_get_dtype(dest));
715
716 pco_movi32(&tctx->b, comps[c], imm);
717 }
718
719 pco_instr *instr = pco_vec(&tctx->b, dest, chans, comps);
720
721 split_dest_comps(tctx, instr, dest);
722
723 return instr;
724 }
725
726 /**
727 * \brief Translates a NIR instruction into PCO.
728 *
729 * \param[in] tctx Translation context.
730 * \param[in] ninstr The nir instruction.
731 * \return The PCO instruction.
732 */
trans_instr(trans_ctx * tctx,nir_instr * ninstr)733 static pco_instr *trans_instr(trans_ctx *tctx, nir_instr *ninstr)
734 {
735 switch (ninstr->type) {
736 case nir_instr_type_intrinsic:
737 return trans_intr(tctx, nir_instr_as_intrinsic(ninstr));
738
739 case nir_instr_type_load_const:
740 return trans_const(tctx, nir_instr_as_load_const(ninstr));
741
742 case nir_instr_type_alu:
743 return trans_alu(tctx, nir_instr_as_alu(ninstr));
744
745 default:
746 break;
747 }
748
749 unreachable();
750 }
751
752 /**
753 * \brief Translates a NIR block into PCO.
754 *
755 * \param[in] tctx Translation context.
756 * \param[in] nblock The nir block.
757 * \return The PCO block.
758 */
trans_block(trans_ctx * tctx,nir_block * nblock)759 static pco_block *trans_block(trans_ctx *tctx, nir_block *nblock)
760 {
761 pco_block *block = pco_block_create(tctx->func);
762 tctx->b = pco_builder_create(tctx->func, pco_cursor_after_block(block));
763
764 nir_foreach_instr (ninstr, nblock) {
765 trans_instr(tctx, ninstr);
766 }
767
768 return block;
769 }
770
771 /**
772 * \brief Translates a NIR if into PCO.
773 *
774 * \param[in] tctx Translation context.
775 * \param[in] nif The nir if.
776 * \return The PCO if.
777 */
trans_if(trans_ctx * tctx,nir_if * nif)778 static pco_if *trans_if(trans_ctx *tctx, nir_if *nif)
779 {
780 pco_if *pif = pco_if_create(tctx->func);
781
782 unreachable("finishme: trans_if");
783
784 return pif;
785 }
786
787 /**
788 * \brief Translates a NIR loop into PCO.
789 *
790 * \param[in] tctx Translation context.
791 * \param[in] nloop The nir loop.
792 * \return The PCO loop.
793 */
trans_loop(trans_ctx * tctx,nir_loop * nloop)794 static pco_loop *trans_loop(trans_ctx *tctx, nir_loop *nloop)
795 {
796 pco_loop *loop = pco_loop_create(tctx->func);
797
798 unreachable("finishme: trans_loop");
799
800 return loop;
801 }
802
803 /**
804 * \brief Translates a NIR function into PCO.
805 *
806 * \param[in] tctx Translation context.
807 * \param[in] impl The nir function impl.
808 * \return The PCO function.
809 */
trans_func(trans_ctx * tctx,nir_function_impl * impl)810 static pco_func *trans_func(trans_ctx *tctx, nir_function_impl *impl)
811 {
812 nir_function *nfunc = impl->function;
813 enum pco_func_type func_type = PCO_FUNC_TYPE_CALLABLE;
814
815 if (nfunc->is_preamble)
816 func_type = PCO_FUNC_TYPE_PREAMBLE;
817 else if (nfunc->is_entrypoint)
818 func_type = PCO_FUNC_TYPE_ENTRYPOINT;
819
820 pco_func *func = pco_func_create(tctx->shader, func_type, nfunc->num_params);
821 tctx->func = func;
822
823 func->name = ralloc_strdup(func, nfunc->name);
824 func->next_ssa = impl->ssa_alloc;
825
826 /* TODO: Function parameter support. */
827 assert(func->num_params == 0 && func->params == NULL);
828
829 /* Gather types. */
830 tctx->float_types =
831 rzalloc_array(NULL, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
832 tctx->int_types =
833 rzalloc_array(NULL, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
834 nir_gather_types(impl, tctx->float_types, tctx->int_types);
835
836 trans_cf_nodes(tctx, &func->cf_node, &func->body, &impl->body);
837
838 ralloc_free(tctx->float_types);
839 ralloc_free(tctx->int_types);
840
841 return func;
842 }
843
844 /**
845 * \brief Translates NIR control flow nodes into PCO.
846 *
847 * \param[in] tctx Translation context.
848 * \param[in] parent_cf_node The parent cf node.
849 * \param[in] cf_node_list The PCO cf node list.
850 * \param[in,out] nir_cf_node_list The NIR cf node list.
851 * \return The first block from the cf nodes.
852 */
trans_cf_nodes(trans_ctx * tctx,pco_cf_node * parent_cf_node,struct list_head * cf_node_list,struct exec_list * nir_cf_node_list)853 static pco_block *trans_cf_nodes(trans_ctx *tctx,
854 pco_cf_node *parent_cf_node,
855 struct list_head *cf_node_list,
856 struct exec_list *nir_cf_node_list)
857 {
858 pco_block *start_block = NULL;
859
860 pco_cf_node *cf_node;
861 foreach_list_typed (nir_cf_node, ncf_node, node, nir_cf_node_list) {
862 switch (ncf_node->type) {
863 case nir_cf_node_block: {
864 pco_block *block = trans_block(tctx, nir_cf_node_as_block(ncf_node));
865 cf_node = &block->cf_node;
866
867 if (!start_block)
868 start_block = block;
869 break;
870 }
871
872 case nir_cf_node_if: {
873 pco_if *pif = trans_if(tctx, nir_cf_node_as_if(ncf_node));
874 cf_node = &pif->cf_node;
875 break;
876 }
877
878 case nir_cf_node_loop: {
879 pco_loop *loop = trans_loop(tctx, nir_cf_node_as_loop(ncf_node));
880 cf_node = &loop->cf_node;
881 break;
882 }
883
884 default:
885 unreachable();
886 }
887
888 cf_node->parent = parent_cf_node;
889 list_addtail(&cf_node->link, cf_node_list);
890 }
891
892 return start_block;
893 }
894
895 /**
896 * \brief Translates a NIR shader into a PCO shader.
897 *
898 * \param[in] ctx PCO compiler context.
899 * \param[in] nir NIR shader.
900 * \param[in] data Shader-specific data.
901 * \param[in] mem_ctx Ralloc memory allocation context.
902 * \return The PCO shader.
903 */
904 pco_shader *
pco_trans_nir(pco_ctx * ctx,nir_shader * nir,pco_data * data,void * mem_ctx)905 pco_trans_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data, void *mem_ctx)
906 {
907 pco_shader *shader = pco_shader_create(ctx, nir, mem_ctx);
908
909 if (data)
910 memcpy(&shader->data, data, sizeof(*data));
911
912 trans_ctx tctx = {
913 .pco_ctx = ctx,
914 .shader = shader,
915 .stage = shader->stage,
916 };
917
918 nir_foreach_function_with_impl (func, impl, nir) {
919 trans_func(&tctx, impl);
920 }
921
922 if (pco_should_print_shader(shader))
923 pco_print_shader(shader, stdout, "before passes");
924
925 return shader;
926 }
927