1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29
30 #include "compiler/shader_enums.h"
31 #include "nir/nir.h"
32 #include "rogue_build_data.h"
33 #include "rogue_nir_helpers.h"
34 #include "rogue_operand.h"
35 #include "util/macros.h"
36
37 #define __pvr_address_type uint64_t
38 #define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr)
39 #define __pvr_make_address(addr_u64) (addr_u64)
40
41 #include "csbgen/rogue_pds.h"
42
43 #undef __pvr_make_address
44 #undef __pvr_get_address
45 #undef __pvr_address_type
46
47 /**
48 * \brief Allocates the coefficient registers that will contain the iterator
49 * data for the fragment shader input varyings.
50 *
51 * \param[in] args The iterator argument data.
52 * \return The total number of coefficient registers required by the iterators.
53 */
alloc_iterator_regs(struct rogue_iterator_args * args)54 static size_t alloc_iterator_regs(struct rogue_iterator_args *args)
55 {
56 size_t coeffs = 0;
57
58 for (size_t u = 0; u < args->num_fpu_iterators; ++u) {
59 /* Ensure there aren't any gaps. */
60 assert(args->base[u] == ~0);
61
62 args->base[u] = coeffs;
63 coeffs += ROGUE_COEFF_ALIGN * args->components[u];
64 }
65
66 return coeffs;
67 }
68
69 /**
70 * \brief Reserves an iterator for a fragment shader input varying,
71 * and calculates its setup data.
72 *
73 * \param[in] args The iterator argument data.
74 * \param[in] i The iterator index.
75 * \param[in] type The interpolation type of the varying.
76 * \param[in] f16 Whether the data type is F16 or F32.
77 * \param[in] components The number of components in the varying.
78 */
reserve_iterator(struct rogue_iterator_args * args,size_t i,enum glsl_interp_mode type,bool f16,size_t components)79 static void reserve_iterator(struct rogue_iterator_args *args,
80 size_t i,
81 enum glsl_interp_mode type,
82 bool f16,
83 size_t components)
84 {
85 struct ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC data = { 0 };
86
87 assert(components >= 1 && components <= 4);
88
89 /* The first iterator (W) *must* be INTERP_MODE_NOPERSPECTIVE. */
90 assert(i > 0 || type == INTERP_MODE_NOPERSPECTIVE);
91 assert(i < ARRAY_SIZE(args->fpu_iterators));
92
93 switch (type) {
94 /* Default interpolation is smooth. */
95 case INTERP_MODE_NONE:
96 data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
97 data.perspective = true;
98 break;
99
100 case INTERP_MODE_NOPERSPECTIVE:
101 data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
102 data.perspective = false;
103 break;
104
105 default:
106 unreachable("Unimplemented interpolation type.");
107 }
108
109 /* Number of components in this varying
110 * (corresponds to ROGUE_PDSINST_DOUTI_SIZE_1..4D).
111 */
112 data.size = (components - 1);
113
114 /* TODO: Investigate F16 support. */
115 assert(!f16);
116 data.f16 = f16;
117
118 /* Offsets within the vertex. */
119 data.f32_offset = 2 * i;
120 data.f16_offset = data.f32_offset;
121
122 ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_pack(&args->fpu_iterators[i], &data);
123 args->destination[i] = i;
124 args->base[i] = ~0;
125 args->components[i] = components;
126 ++args->num_fpu_iterators;
127 }
128
129 /**
130 * \brief Collects the fragment shader I/O data to feed-back to the driver.
131 *
132 * \sa #collect_io_data()
133 *
134 * \param[in] common_data Common build data.
135 * \param[in] fs_data Fragment-specific build data.
136 * \param[in] nir NIR fragment shader.
137 * \return true if successful, otherwise false.
138 */
collect_io_data_fs(struct rogue_common_build_data * common_data,struct rogue_fs_build_data * fs_data,nir_shader * nir)139 static bool collect_io_data_fs(struct rogue_common_build_data *common_data,
140 struct rogue_fs_build_data *fs_data,
141 nir_shader *nir)
142 {
143 size_t num_inputs = nir_count_variables_with_modes(nir, nir_var_shader_in);
144 assert(num_inputs < (ARRAY_SIZE(fs_data->iterator_args.fpu_iterators) - 1));
145
146 /* Process inputs (if present). */
147 if (num_inputs) {
148 /* If the fragment shader has inputs, the first iterator
149 * must be used for the W component.
150 */
151 reserve_iterator(&fs_data->iterator_args,
152 0,
153 INTERP_MODE_NOPERSPECTIVE,
154 false,
155 1);
156
157 nir_foreach_shader_in_variable (var, nir) {
158 size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1;
159 size_t components = glsl_get_components(var->type);
160 enum glsl_interp_mode interp = var->data.interpolation;
161 bool f16 = glsl_type_is_16bit(var->type);
162
163 /* Check that arguments are either F16 or F32. */
164 assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
165 assert(f16 || glsl_type_is_32bit(var->type));
166
167 /* Check input location. */
168 assert(var->data.location >= VARYING_SLOT_VAR0 &&
169 var->data.location <= VARYING_SLOT_VAR31);
170
171 reserve_iterator(&fs_data->iterator_args, i, interp, f16, components);
172 }
173
174 common_data->coeffs = alloc_iterator_regs(&fs_data->iterator_args);
175 assert(common_data->coeffs);
176 assert(common_data->coeffs < ROGUE_MAX_REG_COEFF);
177 }
178
179 /* TODO: Process outputs. */
180
181 return true;
182 }
183
184 /**
185 * \brief Allocates the vertex shader input registers.
186 *
187 * \param[in] inputs The vertex shader input data.
188 * \return The total number of vertex input registers required.
189 */
alloc_vs_inputs(struct rogue_vertex_inputs * inputs)190 static size_t alloc_vs_inputs(struct rogue_vertex_inputs *inputs)
191 {
192 size_t vs_inputs = 0;
193
194 for (size_t u = 0; u < inputs->num_input_vars; ++u) {
195 /* Ensure there aren't any gaps. */
196 assert(inputs->base[u] == ~0);
197
198 inputs->base[u] = vs_inputs;
199 vs_inputs += inputs->components[u];
200 }
201
202 return vs_inputs;
203 }
204
205 /**
206 * \brief Allocates the vertex shader outputs.
207 *
208 * \param[in] outputs The vertex shader output data.
209 * \return The total number of vertex outputs required.
210 */
alloc_vs_outputs(struct rogue_vertex_outputs * outputs)211 static size_t alloc_vs_outputs(struct rogue_vertex_outputs *outputs)
212 {
213 size_t vs_outputs = 0;
214
215 for (size_t u = 0; u < outputs->num_output_vars; ++u) {
216 /* Ensure there aren't any gaps. */
217 assert(outputs->base[u] == ~0);
218
219 outputs->base[u] = vs_outputs;
220 vs_outputs += outputs->components[u];
221 }
222
223 return vs_outputs;
224 }
225
226 /**
227 * \brief Counts the varyings used by the vertex shader.
228 *
229 * \param[in] outputs The vertex shader output data.
230 * \return The number of varyings used.
231 */
count_vs_varyings(struct rogue_vertex_outputs * outputs)232 static size_t count_vs_varyings(struct rogue_vertex_outputs *outputs)
233 {
234 size_t varyings = 0;
235
236 /* Skip the position. */
237 for (size_t u = 1; u < outputs->num_output_vars; ++u)
238 varyings += outputs->components[u];
239
240 return varyings;
241 }
242
243 /**
244 * \brief Reserves space for a vertex shader input.
245 *
246 * \param[in] inputs The vertex input data.
247 * \param[in] i The vertex input index.
248 * \param[in] components The number of components in the input.
249 */
reserve_vs_input(struct rogue_vertex_inputs * inputs,size_t i,size_t components)250 static void reserve_vs_input(struct rogue_vertex_inputs *inputs,
251 size_t i,
252 size_t components)
253 {
254 assert(components >= 1 && components <= 4);
255
256 assert(i < ARRAY_SIZE(inputs->base));
257
258 inputs->base[i] = ~0;
259 inputs->components[i] = components;
260 ++inputs->num_input_vars;
261 }
262
263 /**
264 * \brief Reserves space for a vertex shader output.
265 *
266 * \param[in] outputs The vertex output data.
267 * \param[in] i The vertex output index.
268 * \param[in] components The number of components in the output.
269 */
reserve_vs_output(struct rogue_vertex_outputs * outputs,size_t i,size_t components)270 static void reserve_vs_output(struct rogue_vertex_outputs *outputs,
271 size_t i,
272 size_t components)
273 {
274 assert(components >= 1 && components <= 4);
275
276 assert(i < ARRAY_SIZE(outputs->base));
277
278 outputs->base[i] = ~0;
279 outputs->components[i] = components;
280 ++outputs->num_output_vars;
281 }
282
283 /**
284 * \brief Collects the vertex shader I/O data to feed-back to the driver.
285 *
286 * \sa #collect_io_data()
287 *
288 * \param[in] common_data Common build data.
289 * \param[in] vs_data Vertex-specific build data.
290 * \param[in] nir NIR vertex shader.
291 * \return true if successful, otherwise false.
292 */
collect_io_data_vs(struct rogue_common_build_data * common_data,struct rogue_vs_build_data * vs_data,nir_shader * nir)293 static bool collect_io_data_vs(struct rogue_common_build_data *common_data,
294 struct rogue_vs_build_data *vs_data,
295 nir_shader *nir)
296 {
297 ASSERTED bool out_pos_present = false;
298 ASSERTED size_t num_outputs =
299 nir_count_variables_with_modes(nir, nir_var_shader_out);
300
301 /* Process inputs. */
302 nir_foreach_shader_in_variable (var, nir) {
303 size_t components = glsl_get_components(var->type);
304 size_t i = var->data.location - VERT_ATTRIB_GENERIC0;
305
306 /* Check that inputs are F32. */
307 /* TODO: Support other types. */
308 assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
309 assert(glsl_type_is_32bit(var->type));
310
311 /* Check input location. */
312 assert(var->data.location >= VERT_ATTRIB_GENERIC0 &&
313 var->data.location <= VERT_ATTRIB_GENERIC15);
314
315 reserve_vs_input(&vs_data->inputs, i, components);
316 }
317
318 vs_data->num_vertex_input_regs = alloc_vs_inputs(&vs_data->inputs);
319 assert(vs_data->num_vertex_input_regs);
320 assert(vs_data->num_vertex_input_regs < ROGUE_MAX_REG_VERTEX_IN);
321
322 /* Process outputs. */
323
324 /* We should always have at least a position variable. */
325 assert(num_outputs > 0 && "Invalid number of vertex shader outputs.");
326
327 nir_foreach_shader_out_variable (var, nir) {
328 size_t components = glsl_get_components(var->type);
329
330 /* Check that outputs are F32. */
331 /* TODO: Support other types. */
332 assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
333 assert(glsl_type_is_32bit(var->type));
334
335 if (var->data.location == VARYING_SLOT_POS) {
336 assert(components == 4);
337 out_pos_present = true;
338
339 reserve_vs_output(&vs_data->outputs, 0, components);
340 } else if ((var->data.location >= VARYING_SLOT_VAR0) &&
341 (var->data.location <= VARYING_SLOT_VAR31)) {
342 size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1;
343 reserve_vs_output(&vs_data->outputs, i, components);
344 } else {
345 unreachable("Unsupported vertex output type.");
346 }
347 }
348
349 /* Always need the output position to be present. */
350 assert(out_pos_present);
351
352 vs_data->num_vertex_outputs = alloc_vs_outputs(&vs_data->outputs);
353 assert(vs_data->num_vertex_outputs);
354 assert(vs_data->num_vertex_outputs < ROGUE_MAX_VERTEX_OUTPUTS);
355
356 vs_data->num_varyings = count_vs_varyings(&vs_data->outputs);
357
358 return true;
359 }
360
361 /**
362 * \brief Allocates the shared registers that will contain the UBOs.
363 *
364 * \param[in] ubo_data The UBO data.
365 * \return The total number of coefficient registers required by the iterators.
366 */
alloc_ubos(struct rogue_ubo_data * ubo_data)367 static size_t alloc_ubos(struct rogue_ubo_data *ubo_data)
368 {
369 size_t shareds = 0;
370
371 for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) {
372 /* Ensure there aren't any gaps. */
373 assert(ubo_data->dest[u] == ~0);
374
375 ubo_data->dest[u] = shareds;
376 shareds += ubo_data->size[u];
377 }
378
379 return shareds;
380 }
381
382 /**
383 * \brief Reserves a UBO and calculates its data.
384 *
385 * \param[in] ubo_data The UBO data.
386 * \param[in] desc_set The UBO descriptor set.
387 * \param[in] binding The UBO binding.
388 * \param[in] size The size required by the UBO (in dwords).
389 */
reserve_ubo(struct rogue_ubo_data * ubo_data,size_t desc_set,size_t binding,size_t size)390 static void reserve_ubo(struct rogue_ubo_data *ubo_data,
391 size_t desc_set,
392 size_t binding,
393 size_t size)
394 {
395 size_t i = ubo_data->num_ubo_entries;
396 assert(i < ARRAY_SIZE(ubo_data->desc_set));
397
398 ubo_data->desc_set[i] = desc_set;
399 ubo_data->binding[i] = binding;
400 ubo_data->dest[i] = ~0;
401 ubo_data->size[i] = size;
402 ++ubo_data->num_ubo_entries;
403 }
404
405 /**
406 * \brief Collects UBO data to feed-back to the driver.
407 *
408 * \param[in] common_data Common build data.
409 * \param[in] nir NIR shader.
410 * \return true if successful, otherwise false.
411 */
collect_ubo_data(struct rogue_common_build_data * common_data,nir_shader * nir)412 static bool collect_ubo_data(struct rogue_common_build_data *common_data,
413 nir_shader *nir)
414 {
415 /* Iterate over each UBO. */
416 nir_foreach_variable_with_modes (var, nir, nir_var_mem_ubo) {
417 size_t desc_set = var->data.driver_location;
418 size_t binding = var->data.binding;
419 size_t ubo_size_regs = 0;
420
421 nir_function_impl *entry = nir_shader_get_entrypoint(nir);
422 /* Iterate over each load_ubo that uses this UBO. */
423 nir_foreach_block (block, entry) {
424 nir_foreach_instr (instr, block) {
425 if (instr->type != nir_instr_type_intrinsic)
426 continue;
427
428 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
429 if (intr->intrinsic != nir_intrinsic_load_ubo)
430 continue;
431
432 assert(nir_src_num_components(intr->src[0]) == 2);
433 assert(nir_intr_src_is_const(intr, 0));
434
435 size_t load_desc_set = nir_intr_src_comp_const(intr, 0, 0);
436 size_t load_binding = nir_intr_src_comp_const(intr, 0, 1);
437
438 if (load_desc_set != desc_set || load_binding != binding)
439 continue;
440
441 ASSERTED size_t size_bytes = nir_intrinsic_range(intr);
442 assert(size_bytes == ROGUE_REG_SIZE_BYTES);
443
444 size_t offset_bytes = nir_intrinsic_range_base(intr);
445 assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES));
446
447 size_t offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES;
448
449 /* TODO: Put offsets in a BITSET_DECLARE and check for gaps. */
450
451 /* Find the largest load offset. */
452 ubo_size_regs = MAX2(ubo_size_regs, offset_regs);
453 }
454 }
455
456 /* UBO size = largest offset + 1. */
457 ++ubo_size_regs;
458
459 reserve_ubo(&common_data->ubo_data, desc_set, binding, ubo_size_regs);
460 }
461
462 common_data->shareds = alloc_ubos(&common_data->ubo_data);
463 assert(common_data->shareds < ROGUE_MAX_REG_SHARED);
464
465 return true;
466 }
467
468 /**
469 * \brief Collects I/O data to feed-back to the driver.
470 *
471 * Collects the inputs/outputs/memory required, and feeds that back to the
472 * driver. Done at this stage rather than at the start of rogue_to_binary, so
473 * that all the I/O of all the shader stages is known before backend
474 * compilation, which would let us do things like cull unused inputs.
475 *
476 * \param[in] ctx Shared multi-stage build context.
477 * \param[in] nir NIR shader.
478 * \return true if successful, otherwise false.
479 */
rogue_collect_io_data(struct rogue_build_ctx * ctx,nir_shader * nir)480 bool rogue_collect_io_data(struct rogue_build_ctx *ctx, nir_shader *nir)
481 {
482 gl_shader_stage stage = nir->info.stage;
483 struct rogue_common_build_data *common_data = &ctx->common_data[stage];
484
485 /* Collect stage-agnostic data. */
486 if (!collect_ubo_data(common_data, nir))
487 return false;
488
489 /* Collect stage-specific data. */
490 switch (stage) {
491 case MESA_SHADER_FRAGMENT:
492 return collect_io_data_fs(common_data, &ctx->stage_data.fs, nir);
493
494 case MESA_SHADER_VERTEX:
495 return collect_io_data_vs(common_data, &ctx->stage_data.vs, nir);
496
497 default:
498 break;
499 }
500
501 return false;
502 }
503
504 /**
505 * \brief Returns the allocated coefficient register index for a component of an
506 * input varying location.
507 *
508 * \param[in] args The allocated iterator argument data.
509 * \param[in] location The input varying location, or ~0 for the W coefficient.
510 * \param[in] component The requested component.
511 * \return The coefficient register index.
512 */
rogue_coeff_index_fs(struct rogue_iterator_args * args,gl_varying_slot location,size_t component)513 size_t rogue_coeff_index_fs(struct rogue_iterator_args *args,
514 gl_varying_slot location,
515 size_t component)
516 {
517 size_t i;
518
519 /* Special case: W coefficient. */
520 if (location == ~0) {
521 /* The W component shouldn't be the only one. */
522 assert(args->num_fpu_iterators > 1);
523 assert(args->destination[0] == 0);
524 return 0;
525 }
526
527 i = (location - VARYING_SLOT_VAR0) + 1;
528 assert(location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31);
529 assert(i < args->num_fpu_iterators);
530 assert(component < args->components[i]);
531 assert(args->base[i] != ~0);
532
533 return args->base[i] + (ROGUE_COEFF_ALIGN * component);
534 }
535
536 /**
537 * \brief Returns the allocated vertex output index for a component of an input
538 * varying location.
539 *
540 * \param[in] outputs The vertex output data.
541 * \param[in] location The output varying location.
542 * \param[in] component The requested component.
543 * \return The vertex output index.
544 */
rogue_output_index_vs(struct rogue_vertex_outputs * outputs,gl_varying_slot location,size_t component)545 size_t rogue_output_index_vs(struct rogue_vertex_outputs *outputs,
546 gl_varying_slot location,
547 size_t component)
548 {
549 size_t i;
550
551 if (location == VARYING_SLOT_POS) {
552 /* Always at location 0. */
553 assert(outputs->base[0] == 0);
554 i = 0;
555 } else if ((location >= VARYING_SLOT_VAR0) &&
556 (location <= VARYING_SLOT_VAR31)) {
557 i = (location - VARYING_SLOT_VAR0) + 1;
558 } else {
559 unreachable("Unsupported vertex output type.");
560 }
561
562 assert(i < outputs->num_output_vars);
563 assert(component < outputs->components[i]);
564 assert(outputs->base[i] != ~0);
565
566 return outputs->base[i] + component;
567 }
568
569 /**
570 * \brief Returns the allocated shared register index for a given UBO offset.
571 *
572 * \param[in] ubo_data The UBO data.
573 * \param[in] desc_set The UBO descriptor set.
574 * \param[in] binding The UBO binding.
575 * \param[in] offset_bytes The UBO offset in bytes.
576 * \return The UBO offset shared register index.
577 */
rogue_ubo_reg(struct rogue_ubo_data * ubo_data,size_t desc_set,size_t binding,size_t offset_bytes)578 size_t rogue_ubo_reg(struct rogue_ubo_data *ubo_data,
579 size_t desc_set,
580 size_t binding,
581 size_t offset_bytes)
582 {
583 size_t ubo_index = ~0;
584 size_t offset_regs;
585
586 /* Find UBO located at (desc_set, binding). */
587 for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) {
588 if (ubo_data->dest[u] == ~0)
589 continue;
590
591 if (ubo_data->desc_set[u] != desc_set || ubo_data->binding[u] != binding)
592 continue;
593
594 ubo_index = u;
595 break;
596 }
597
598 assert(ubo_index != ~0);
599
600 assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES));
601 offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES;
602
603 return ubo_data->dest[ubo_index] + offset_regs;
604 }
605