• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 
30 #include "compiler/shader_enums.h"
31 #include "nir/nir.h"
32 #include "rogue_build_data.h"
33 #include "rogue_nir_helpers.h"
34 #include "rogue_operand.h"
35 #include "util/macros.h"
36 
37 #define __pvr_address_type uint64_t
38 #define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr)
39 #define __pvr_make_address(addr_u64) (addr_u64)
40 
41 #include "csbgen/rogue_pds.h"
42 
43 #undef __pvr_make_address
44 #undef __pvr_get_address
45 #undef __pvr_address_type
46 
47 /**
48  * \brief Allocates the coefficient registers that will contain the iterator
49  * data for the fragment shader input varyings.
50  *
51  * \param[in] args The iterator argument data.
52  * \return The total number of coefficient registers required by the iterators.
53  */
alloc_iterator_regs(struct rogue_iterator_args * args)54 static size_t alloc_iterator_regs(struct rogue_iterator_args *args)
55 {
56    size_t coeffs = 0;
57 
58    for (size_t u = 0; u < args->num_fpu_iterators; ++u) {
59       /* Ensure there aren't any gaps. */
60       assert(args->base[u] == ~0);
61 
62       args->base[u] = coeffs;
63       coeffs += ROGUE_COEFF_ALIGN * args->components[u];
64    }
65 
66    return coeffs;
67 }
68 
69 /**
70  * \brief Reserves an iterator for a fragment shader input varying,
71  * and calculates its setup data.
72  *
73  * \param[in] args The iterator argument data.
74  * \param[in] i The iterator index.
75  * \param[in] type The interpolation type of the varying.
76  * \param[in] f16 Whether the data type is F16 or F32.
77  * \param[in] components The number of components in the varying.
78  */
reserve_iterator(struct rogue_iterator_args * args,size_t i,enum glsl_interp_mode type,bool f16,size_t components)79 static void reserve_iterator(struct rogue_iterator_args *args,
80                              size_t i,
81                              enum glsl_interp_mode type,
82                              bool f16,
83                              size_t components)
84 {
85    struct ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC data = { 0 };
86 
87    assert(components >= 1 && components <= 4);
88 
89    /* The first iterator (W) *must* be INTERP_MODE_NOPERSPECTIVE. */
90    assert(i > 0 || type == INTERP_MODE_NOPERSPECTIVE);
91    assert(i < ARRAY_SIZE(args->fpu_iterators));
92 
93    switch (type) {
94    /* Default interpolation is smooth. */
95    case INTERP_MODE_NONE:
96       data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
97       data.perspective = true;
98       break;
99 
100    case INTERP_MODE_NOPERSPECTIVE:
101       data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
102       data.perspective = false;
103       break;
104 
105    default:
106       unreachable("Unimplemented interpolation type.");
107    }
108 
109    /* Number of components in this varying
110     * (corresponds to ROGUE_PDSINST_DOUTI_SIZE_1..4D).
111     */
112    data.size = (components - 1);
113 
114    /* TODO: Investigate F16 support. */
115    assert(!f16);
116    data.f16 = f16;
117 
118    /* Offsets within the vertex. */
119    data.f32_offset = 2 * i;
120    data.f16_offset = data.f32_offset;
121 
122    ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_pack(&args->fpu_iterators[i], &data);
123    args->destination[i] = i;
124    args->base[i] = ~0;
125    args->components[i] = components;
126    ++args->num_fpu_iterators;
127 }
128 
129 /**
130  * \brief Collects the fragment shader I/O data to feed-back to the driver.
131  *
132  * \sa #collect_io_data()
133  *
134  * \param[in] common_data Common build data.
135  * \param[in] fs_data Fragment-specific build data.
136  * \param[in] nir NIR fragment shader.
137  * \return true if successful, otherwise false.
138  */
collect_io_data_fs(struct rogue_common_build_data * common_data,struct rogue_fs_build_data * fs_data,nir_shader * nir)139 static bool collect_io_data_fs(struct rogue_common_build_data *common_data,
140                                struct rogue_fs_build_data *fs_data,
141                                nir_shader *nir)
142 {
143    size_t num_inputs = nir_count_variables_with_modes(nir, nir_var_shader_in);
144    assert(num_inputs < (ARRAY_SIZE(fs_data->iterator_args.fpu_iterators) - 1));
145 
146    /* Process inputs (if present). */
147    if (num_inputs) {
148       /* If the fragment shader has inputs, the first iterator
149        * must be used for the W component.
150        */
151       reserve_iterator(&fs_data->iterator_args,
152                        0,
153                        INTERP_MODE_NOPERSPECTIVE,
154                        false,
155                        1);
156 
157       nir_foreach_shader_in_variable (var, nir) {
158          size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1;
159          size_t components = glsl_get_components(var->type);
160          enum glsl_interp_mode interp = var->data.interpolation;
161          bool f16 = glsl_type_is_16bit(var->type);
162 
163          /* Check that arguments are either F16 or F32. */
164          assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
165          assert(f16 || glsl_type_is_32bit(var->type));
166 
167          /* Check input location. */
168          assert(var->data.location >= VARYING_SLOT_VAR0 &&
169                 var->data.location <= VARYING_SLOT_VAR31);
170 
171          reserve_iterator(&fs_data->iterator_args, i, interp, f16, components);
172       }
173 
174       common_data->coeffs = alloc_iterator_regs(&fs_data->iterator_args);
175       assert(common_data->coeffs);
176       assert(common_data->coeffs < ROGUE_MAX_REG_COEFF);
177    }
178 
179    /* TODO: Process outputs. */
180 
181    return true;
182 }
183 
184 /**
185  * \brief Allocates the vertex shader input registers.
186  *
187  * \param[in] inputs The vertex shader input data.
188  * \return The total number of vertex input registers required.
189  */
alloc_vs_inputs(struct rogue_vertex_inputs * inputs)190 static size_t alloc_vs_inputs(struct rogue_vertex_inputs *inputs)
191 {
192    size_t vs_inputs = 0;
193 
194    for (size_t u = 0; u < inputs->num_input_vars; ++u) {
195       /* Ensure there aren't any gaps. */
196       assert(inputs->base[u] == ~0);
197 
198       inputs->base[u] = vs_inputs;
199       vs_inputs += inputs->components[u];
200    }
201 
202    return vs_inputs;
203 }
204 
205 /**
206  * \brief Allocates the vertex shader outputs.
207  *
208  * \param[in] outputs The vertex shader output data.
209  * \return The total number of vertex outputs required.
210  */
alloc_vs_outputs(struct rogue_vertex_outputs * outputs)211 static size_t alloc_vs_outputs(struct rogue_vertex_outputs *outputs)
212 {
213    size_t vs_outputs = 0;
214 
215    for (size_t u = 0; u < outputs->num_output_vars; ++u) {
216       /* Ensure there aren't any gaps. */
217       assert(outputs->base[u] == ~0);
218 
219       outputs->base[u] = vs_outputs;
220       vs_outputs += outputs->components[u];
221    }
222 
223    return vs_outputs;
224 }
225 
226 /**
227  * \brief Counts the varyings used by the vertex shader.
228  *
229  * \param[in] outputs The vertex shader output data.
230  * \return The number of varyings used.
231  */
count_vs_varyings(struct rogue_vertex_outputs * outputs)232 static size_t count_vs_varyings(struct rogue_vertex_outputs *outputs)
233 {
234    size_t varyings = 0;
235 
236    /* Skip the position. */
237    for (size_t u = 1; u < outputs->num_output_vars; ++u)
238       varyings += outputs->components[u];
239 
240    return varyings;
241 }
242 
243 /**
244  * \brief Reserves space for a vertex shader input.
245  *
246  * \param[in] inputs The vertex input data.
247  * \param[in] i The vertex input index.
248  * \param[in] components The number of components in the input.
249  */
reserve_vs_input(struct rogue_vertex_inputs * inputs,size_t i,size_t components)250 static void reserve_vs_input(struct rogue_vertex_inputs *inputs,
251                              size_t i,
252                              size_t components)
253 {
254    assert(components >= 1 && components <= 4);
255 
256    assert(i < ARRAY_SIZE(inputs->base));
257 
258    inputs->base[i] = ~0;
259    inputs->components[i] = components;
260    ++inputs->num_input_vars;
261 }
262 
263 /**
264  * \brief Reserves space for a vertex shader output.
265  *
266  * \param[in] outputs The vertex output data.
267  * \param[in] i The vertex output index.
268  * \param[in] components The number of components in the output.
269  */
reserve_vs_output(struct rogue_vertex_outputs * outputs,size_t i,size_t components)270 static void reserve_vs_output(struct rogue_vertex_outputs *outputs,
271                               size_t i,
272                               size_t components)
273 {
274    assert(components >= 1 && components <= 4);
275 
276    assert(i < ARRAY_SIZE(outputs->base));
277 
278    outputs->base[i] = ~0;
279    outputs->components[i] = components;
280    ++outputs->num_output_vars;
281 }
282 
283 /**
284  * \brief Collects the vertex shader I/O data to feed-back to the driver.
285  *
286  * \sa #collect_io_data()
287  *
288  * \param[in] common_data Common build data.
289  * \param[in] vs_data Vertex-specific build data.
290  * \param[in] nir NIR vertex shader.
291  * \return true if successful, otherwise false.
292  */
collect_io_data_vs(struct rogue_common_build_data * common_data,struct rogue_vs_build_data * vs_data,nir_shader * nir)293 static bool collect_io_data_vs(struct rogue_common_build_data *common_data,
294                                struct rogue_vs_build_data *vs_data,
295                                nir_shader *nir)
296 {
297    ASSERTED bool out_pos_present = false;
298    ASSERTED size_t num_outputs =
299       nir_count_variables_with_modes(nir, nir_var_shader_out);
300 
301    /* Process inputs. */
302    nir_foreach_shader_in_variable (var, nir) {
303       size_t components = glsl_get_components(var->type);
304       size_t i = var->data.location - VERT_ATTRIB_GENERIC0;
305 
306       /* Check that inputs are F32. */
307       /* TODO: Support other types. */
308       assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
309       assert(glsl_type_is_32bit(var->type));
310 
311       /* Check input location. */
312       assert(var->data.location >= VERT_ATTRIB_GENERIC0 &&
313              var->data.location <= VERT_ATTRIB_GENERIC15);
314 
315       reserve_vs_input(&vs_data->inputs, i, components);
316    }
317 
318    vs_data->num_vertex_input_regs = alloc_vs_inputs(&vs_data->inputs);
319    assert(vs_data->num_vertex_input_regs);
320    assert(vs_data->num_vertex_input_regs < ROGUE_MAX_REG_VERTEX_IN);
321 
322    /* Process outputs. */
323 
324    /* We should always have at least a position variable. */
325    assert(num_outputs > 0 && "Invalid number of vertex shader outputs.");
326 
327    nir_foreach_shader_out_variable (var, nir) {
328       size_t components = glsl_get_components(var->type);
329 
330       /* Check that outputs are F32. */
331       /* TODO: Support other types. */
332       assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
333       assert(glsl_type_is_32bit(var->type));
334 
335       if (var->data.location == VARYING_SLOT_POS) {
336          assert(components == 4);
337          out_pos_present = true;
338 
339          reserve_vs_output(&vs_data->outputs, 0, components);
340       } else if ((var->data.location >= VARYING_SLOT_VAR0) &&
341                  (var->data.location <= VARYING_SLOT_VAR31)) {
342          size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1;
343          reserve_vs_output(&vs_data->outputs, i, components);
344       } else {
345          unreachable("Unsupported vertex output type.");
346       }
347    }
348 
349    /* Always need the output position to be present. */
350    assert(out_pos_present);
351 
352    vs_data->num_vertex_outputs = alloc_vs_outputs(&vs_data->outputs);
353    assert(vs_data->num_vertex_outputs);
354    assert(vs_data->num_vertex_outputs < ROGUE_MAX_VERTEX_OUTPUTS);
355 
356    vs_data->num_varyings = count_vs_varyings(&vs_data->outputs);
357 
358    return true;
359 }
360 
361 /**
362  * \brief Allocates the shared registers that will contain the UBOs.
363  *
364  * \param[in] ubo_data The UBO data.
365  * \return The total number of coefficient registers required by the iterators.
366  */
alloc_ubos(struct rogue_ubo_data * ubo_data)367 static size_t alloc_ubos(struct rogue_ubo_data *ubo_data)
368 {
369    size_t shareds = 0;
370 
371    for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) {
372       /* Ensure there aren't any gaps. */
373       assert(ubo_data->dest[u] == ~0);
374 
375       ubo_data->dest[u] = shareds;
376       shareds += ubo_data->size[u];
377    }
378 
379    return shareds;
380 }
381 
382 /**
383  * \brief Reserves a UBO and calculates its data.
384  *
385  * \param[in] ubo_data The UBO data.
386  * \param[in] desc_set The UBO descriptor set.
387  * \param[in] binding The UBO binding.
388  * \param[in] size The size required by the UBO (in dwords).
389  */
reserve_ubo(struct rogue_ubo_data * ubo_data,size_t desc_set,size_t binding,size_t size)390 static void reserve_ubo(struct rogue_ubo_data *ubo_data,
391                         size_t desc_set,
392                         size_t binding,
393                         size_t size)
394 {
395    size_t i = ubo_data->num_ubo_entries;
396    assert(i < ARRAY_SIZE(ubo_data->desc_set));
397 
398    ubo_data->desc_set[i] = desc_set;
399    ubo_data->binding[i] = binding;
400    ubo_data->dest[i] = ~0;
401    ubo_data->size[i] = size;
402    ++ubo_data->num_ubo_entries;
403 }
404 
405 /**
406  * \brief Collects UBO data to feed-back to the driver.
407  *
408  * \param[in] common_data Common build data.
409  * \param[in] nir NIR shader.
410  * \return true if successful, otherwise false.
411  */
collect_ubo_data(struct rogue_common_build_data * common_data,nir_shader * nir)412 static bool collect_ubo_data(struct rogue_common_build_data *common_data,
413                              nir_shader *nir)
414 {
415    /* Iterate over each UBO. */
416    nir_foreach_variable_with_modes (var, nir, nir_var_mem_ubo) {
417       size_t desc_set = var->data.driver_location;
418       size_t binding = var->data.binding;
419       size_t ubo_size_regs = 0;
420 
421       nir_function_impl *entry = nir_shader_get_entrypoint(nir);
422       /* Iterate over each load_ubo that uses this UBO. */
423       nir_foreach_block (block, entry) {
424          nir_foreach_instr (instr, block) {
425             if (instr->type != nir_instr_type_intrinsic)
426                continue;
427 
428             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
429             if (intr->intrinsic != nir_intrinsic_load_ubo)
430                continue;
431 
432             assert(nir_src_num_components(intr->src[0]) == 2);
433             assert(nir_intr_src_is_const(intr, 0));
434 
435             size_t load_desc_set = nir_intr_src_comp_const(intr, 0, 0);
436             size_t load_binding = nir_intr_src_comp_const(intr, 0, 1);
437 
438             if (load_desc_set != desc_set || load_binding != binding)
439                continue;
440 
441             ASSERTED size_t size_bytes = nir_intrinsic_range(intr);
442             assert(size_bytes == ROGUE_REG_SIZE_BYTES);
443 
444             size_t offset_bytes = nir_intrinsic_range_base(intr);
445             assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES));
446 
447             size_t offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES;
448 
449             /* TODO: Put offsets in a BITSET_DECLARE and check for gaps. */
450 
451             /* Find the largest load offset. */
452             ubo_size_regs = MAX2(ubo_size_regs, offset_regs);
453          }
454       }
455 
456       /* UBO size = largest offset + 1. */
457       ++ubo_size_regs;
458 
459       reserve_ubo(&common_data->ubo_data, desc_set, binding, ubo_size_regs);
460    }
461 
462    common_data->shareds = alloc_ubos(&common_data->ubo_data);
463    assert(common_data->shareds < ROGUE_MAX_REG_SHARED);
464 
465    return true;
466 }
467 
468 /**
469  * \brief Collects I/O data to feed-back to the driver.
470  *
471  * Collects the inputs/outputs/memory required, and feeds that back to the
472  * driver. Done at this stage rather than at the start of rogue_to_binary, so
473  * that all the I/O of all the shader stages is known before backend
474  * compilation, which would let us do things like cull unused inputs.
475  *
476  * \param[in] ctx Shared multi-stage build context.
477  * \param[in] nir NIR shader.
478  * \return true if successful, otherwise false.
479  */
rogue_collect_io_data(struct rogue_build_ctx * ctx,nir_shader * nir)480 bool rogue_collect_io_data(struct rogue_build_ctx *ctx, nir_shader *nir)
481 {
482    gl_shader_stage stage = nir->info.stage;
483    struct rogue_common_build_data *common_data = &ctx->common_data[stage];
484 
485    /* Collect stage-agnostic data. */
486    if (!collect_ubo_data(common_data, nir))
487       return false;
488 
489    /* Collect stage-specific data. */
490    switch (stage) {
491    case MESA_SHADER_FRAGMENT:
492       return collect_io_data_fs(common_data, &ctx->stage_data.fs, nir);
493 
494    case MESA_SHADER_VERTEX:
495       return collect_io_data_vs(common_data, &ctx->stage_data.vs, nir);
496 
497    default:
498       break;
499    }
500 
501    return false;
502 }
503 
504 /**
505  * \brief Returns the allocated coefficient register index for a component of an
506  * input varying location.
507  *
508  * \param[in] args The allocated iterator argument data.
509  * \param[in] location The input varying location, or ~0 for the W coefficient.
510  * \param[in] component The requested component.
511  * \return The coefficient register index.
512  */
rogue_coeff_index_fs(struct rogue_iterator_args * args,gl_varying_slot location,size_t component)513 size_t rogue_coeff_index_fs(struct rogue_iterator_args *args,
514                             gl_varying_slot location,
515                             size_t component)
516 {
517    size_t i;
518 
519    /* Special case: W coefficient. */
520    if (location == ~0) {
521       /* The W component shouldn't be the only one. */
522       assert(args->num_fpu_iterators > 1);
523       assert(args->destination[0] == 0);
524       return 0;
525    }
526 
527    i = (location - VARYING_SLOT_VAR0) + 1;
528    assert(location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31);
529    assert(i < args->num_fpu_iterators);
530    assert(component < args->components[i]);
531    assert(args->base[i] != ~0);
532 
533    return args->base[i] + (ROGUE_COEFF_ALIGN * component);
534 }
535 
536 /**
537  * \brief Returns the allocated vertex output index for a component of an input
538  * varying location.
539  *
540  * \param[in] outputs The vertex output data.
541  * \param[in] location The output varying location.
542  * \param[in] component The requested component.
543  * \return The vertex output index.
544  */
rogue_output_index_vs(struct rogue_vertex_outputs * outputs,gl_varying_slot location,size_t component)545 size_t rogue_output_index_vs(struct rogue_vertex_outputs *outputs,
546                              gl_varying_slot location,
547                              size_t component)
548 {
549    size_t i;
550 
551    if (location == VARYING_SLOT_POS) {
552       /* Always at location 0. */
553       assert(outputs->base[0] == 0);
554       i = 0;
555    } else if ((location >= VARYING_SLOT_VAR0) &&
556               (location <= VARYING_SLOT_VAR31)) {
557       i = (location - VARYING_SLOT_VAR0) + 1;
558    } else {
559       unreachable("Unsupported vertex output type.");
560    }
561 
562    assert(i < outputs->num_output_vars);
563    assert(component < outputs->components[i]);
564    assert(outputs->base[i] != ~0);
565 
566    return outputs->base[i] + component;
567 }
568 
569 /**
570  * \brief Returns the allocated shared register index for a given UBO offset.
571  *
572  * \param[in] ubo_data The UBO data.
573  * \param[in] desc_set The UBO descriptor set.
574  * \param[in] binding The UBO binding.
575  * \param[in] offset_bytes The UBO offset in bytes.
576  * \return The UBO offset shared register index.
577  */
rogue_ubo_reg(struct rogue_ubo_data * ubo_data,size_t desc_set,size_t binding,size_t offset_bytes)578 size_t rogue_ubo_reg(struct rogue_ubo_data *ubo_data,
579                      size_t desc_set,
580                      size_t binding,
581                      size_t offset_bytes)
582 {
583    size_t ubo_index = ~0;
584    size_t offset_regs;
585 
586    /* Find UBO located at (desc_set, binding). */
587    for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) {
588       if (ubo_data->dest[u] == ~0)
589          continue;
590 
591       if (ubo_data->desc_set[u] != desc_set || ubo_data->binding[u] != binding)
592          continue;
593 
594       ubo_index = u;
595       break;
596    }
597 
598    assert(ubo_index != ~0);
599 
600    assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES));
601    offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES;
602 
603    return ubo_data->dest[ubo_index] + offset_regs;
604 }
605