• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  *
4  * Derived from tu_shader.c which is:
5  * Copyright © 2019 Google LLC
6  *
7  * Also derived from anv_pipeline.c which is
8  * Copyright © 2015 Intel Corporation
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, including without limitation
13  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14  * and/or sell copies of the Software, and to permit persons to whom the
15  * Software is furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice (including the next
18  * paragraph) shall be included in all copies or substantial portions of the
19  * Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  */
29 
30 #include "genxml/gen_macros.h"
31 
32 #include "panvk_private.h"
33 
34 #include "spirv/nir_spirv.h"
35 #include "util/mesa-sha1.h"
36 #include "nir_builder.h"
37 #include "nir_conversion_builder.h"
38 #include "nir_deref.h"
39 #include "nir_lower_blend.h"
40 #include "vk_shader_module.h"
41 
42 #include "compiler/bifrost_nir.h"
43 #include "util/pan_lower_framebuffer.h"
44 #include "pan_shader.h"
45 
46 #include "vk_util.h"
47 
48 static nir_def *
load_sysval_from_ubo(nir_builder * b,nir_intrinsic_instr * intr,unsigned offset)49 load_sysval_from_ubo(nir_builder *b, nir_intrinsic_instr *intr, unsigned offset)
50 {
51    return nir_load_ubo(b, intr->def.num_components, intr->def.bit_size,
52                        nir_imm_int(b, PANVK_SYSVAL_UBO_INDEX),
53                        nir_imm_int(b, offset),
54                        .align_mul = intr->def.bit_size / 8, .align_offset = 0,
55                        .range_base = offset, .range = intr->def.bit_size / 8);
56 }
57 
58 struct sysval_options {
59    /* If non-null, a vec4 of blend constants known at pipeline compile time. If
60     * null, blend constants are dynamic.
61     */
62    float *static_blend_constants;
63 };
64 
65 static bool
panvk_lower_sysvals(nir_builder * b,nir_instr * instr,void * data)66 panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
67 {
68    if (instr->type != nir_instr_type_intrinsic)
69       return false;
70 
71    struct sysval_options *opts = data;
72    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
73    nir_def *val = NULL;
74    b->cursor = nir_before_instr(instr);
75 
76 #define SYSVAL(name) offsetof(struct panvk_sysvals, name)
77    switch (intr->intrinsic) {
78    case nir_intrinsic_load_num_workgroups:
79       val = load_sysval_from_ubo(b, intr, SYSVAL(num_work_groups));
80       break;
81    case nir_intrinsic_load_workgroup_size:
82       val = load_sysval_from_ubo(b, intr, SYSVAL(local_group_size));
83       break;
84    case nir_intrinsic_load_viewport_scale:
85       val = load_sysval_from_ubo(b, intr, SYSVAL(viewport_scale));
86       break;
87    case nir_intrinsic_load_viewport_offset:
88       val = load_sysval_from_ubo(b, intr, SYSVAL(viewport_offset));
89       break;
90    case nir_intrinsic_load_first_vertex:
91       val = load_sysval_from_ubo(b, intr, SYSVAL(first_vertex));
92       break;
93    case nir_intrinsic_load_base_vertex:
94       val = load_sysval_from_ubo(b, intr, SYSVAL(base_vertex));
95       break;
96    case nir_intrinsic_load_base_instance:
97       val = load_sysval_from_ubo(b, intr, SYSVAL(base_instance));
98       break;
99    case nir_intrinsic_load_blend_const_color_rgba:
100       if (opts->static_blend_constants) {
101          const nir_const_value constants[4] = {
102             {.f32 = opts->static_blend_constants[0]},
103             {.f32 = opts->static_blend_constants[1]},
104             {.f32 = opts->static_blend_constants[2]},
105             {.f32 = opts->static_blend_constants[3]},
106          };
107 
108          val = nir_build_imm(b, 4, 32, constants);
109       } else {
110          val = load_sysval_from_ubo(b, intr, SYSVAL(blend_constants));
111       }
112       break;
113    default:
114       return false;
115    }
116 #undef SYSVAL
117 
118    b->cursor = nir_after_instr(instr);
119    nir_def_rewrite_uses(&intr->def, val);
120    return true;
121 }
122 
123 static void
panvk_lower_blend(struct panvk_device * dev,nir_shader * nir,struct panfrost_compile_inputs * inputs,struct pan_blend_state * blend_state)124 panvk_lower_blend(struct panvk_device *dev, nir_shader *nir,
125                   struct panfrost_compile_inputs *inputs,
126                   struct pan_blend_state *blend_state)
127 {
128    nir_lower_blend_options options = {
129       .logicop_enable = blend_state->logicop_enable,
130       .logicop_func = blend_state->logicop_func,
131    };
132 
133    bool lower_blend = false;
134 
135    for (unsigned rt = 0; rt < blend_state->rt_count; rt++) {
136       struct pan_blend_rt_state *rt_state = &blend_state->rts[rt];
137 
138       if (!panvk_per_arch(blend_needs_lowering)(dev, blend_state, rt))
139          continue;
140 
141       enum pipe_format fmt = rt_state->format;
142 
143       options.format[rt] = fmt;
144       options.rt[rt].colormask = rt_state->equation.color_mask;
145 
146       if (!rt_state->equation.blend_enable) {
147          static const nir_lower_blend_channel replace = {
148             .func = PIPE_BLEND_ADD,
149             .src_factor = PIPE_BLENDFACTOR_ONE,
150             .dst_factor = PIPE_BLENDFACTOR_ZERO,
151          };
152 
153          options.rt[rt].rgb = replace;
154          options.rt[rt].alpha = replace;
155       } else {
156          options.rt[rt].rgb.func = rt_state->equation.rgb_func;
157          options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor;
158          options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
159          options.rt[rt].alpha.func = rt_state->equation.alpha_func;
160          options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor;
161          options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
162       }
163 
164       /* Update the equation to force a color replacement */
165       rt_state->equation.color_mask = 0xf;
166       rt_state->equation.rgb_func = PIPE_BLEND_ADD;
167       rt_state->equation.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
168       rt_state->equation.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
169       rt_state->equation.alpha_func = PIPE_BLEND_ADD;
170       rt_state->equation.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
171       rt_state->equation.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
172       lower_blend = true;
173    }
174 
175    if (lower_blend) {
176       NIR_PASS_V(nir, nir_lower_blend, &options);
177       NIR_PASS_V(nir, bifrost_nir_lower_load_output);
178    }
179 }
180 
181 static bool
panvk_lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * intr,void * data)182 panvk_lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *intr,
183                                void *data)
184 {
185    if (intr->intrinsic != nir_intrinsic_load_push_constant)
186       return false;
187 
188    b->cursor = nir_before_instr(&intr->instr);
189    nir_def *ubo_load =
190       nir_load_ubo(b, intr->def.num_components, intr->def.bit_size,
191                    nir_imm_int(b, PANVK_PUSH_CONST_UBO_INDEX), intr->src[0].ssa,
192                    .align_mul = intr->def.bit_size / 8, .align_offset = 0,
193                    .range_base = nir_intrinsic_base(intr),
194                    .range = nir_intrinsic_range(intr));
195    nir_def_rewrite_uses(&intr->def, ubo_load);
196    nir_instr_remove(&intr->instr);
197    return true;
198 }
199 
200 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)201 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
202 {
203    assert(glsl_type_is_vector_or_scalar(type));
204 
205    uint32_t comp_size =
206       glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
207    unsigned length = glsl_get_vector_elements(type);
208    *size = comp_size * length, *align = comp_size * (length == 3 ? 4 : length);
209 }
210 
211 struct panvk_shader *
panvk_per_arch(shader_create)212 panvk_per_arch(shader_create)(struct panvk_device *dev, gl_shader_stage stage,
213                               const VkPipelineShaderStageCreateInfo *stage_info,
214                               const struct panvk_pipeline_layout *layout,
215                               unsigned sysval_ubo,
216                               struct pan_blend_state *blend_state,
217                               bool static_blend_constants,
218                               const VkAllocationCallbacks *alloc)
219 {
220    VK_FROM_HANDLE(vk_shader_module, module, stage_info->module);
221    struct panvk_shader *shader;
222 
223    shader = vk_zalloc2(&dev->vk.alloc, alloc, sizeof(*shader), 8,
224                        VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
225    if (!shader)
226       return NULL;
227 
228    util_dynarray_init(&shader->binary, NULL);
229 
230    /* TODO these are made-up */
231    const struct spirv_to_nir_options spirv_options = {
232       .caps =
233          {
234             .variable_pointers = true,
235          },
236       .ubo_addr_format = nir_address_format_32bit_index_offset,
237       .ssbo_addr_format = dev->vk.enabled_features.robustBufferAccess
238                              ? nir_address_format_64bit_bounded_global
239                              : nir_address_format_64bit_global_32bit_offset,
240    };
241 
242    nir_shader *nir;
243    VkResult result = vk_shader_module_to_nir(
244       &dev->vk, module, stage, stage_info->pName,
245       stage_info->pSpecializationInfo, &spirv_options,
246       GENX(pan_shader_get_compiler_options)(), NULL, &nir);
247    if (result != VK_SUCCESS) {
248       vk_free2(&dev->vk.alloc, alloc, shader);
249       return NULL;
250    }
251 
252    NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir),
253               true, true);
254 
255    struct panfrost_compile_inputs inputs = {
256       .gpu_id = dev->physical_device->kmod.props.gpu_prod_id,
257       .no_ubo_to_push = true,
258       .no_idvs = true, /* TODO */
259    };
260 
261    NIR_PASS_V(nir, nir_lower_indirect_derefs,
262               nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
263 
264    NIR_PASS_V(nir, nir_opt_copy_prop_vars);
265    NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
266    NIR_PASS_V(nir, nir_opt_loop);
267 
268    /* Do texture lowering here.  Yes, it's a duplication of the texture
269     * lowering in bifrost_compile.  However, we need to lower texture stuff
270     * now, before we call panvk_per_arch(nir_lower_descriptors)() because some
271     * of the texture lowering generates nir_texop_txs which we handle as part
272     * of descriptor lowering.
273     *
274     * TODO: We really should be doing this in common code, not dpulicated in
275     * panvk.  In order to do that, we need to rework the panfrost compile
276     * flow to look more like the Intel flow:
277     *
278     *  1. Compile SPIR-V to NIR and maybe do a tiny bit of lowering that needs
279     *     to be done really early.
280     *
281     *  2. bi_preprocess_nir: Does common lowering and runs the optimization
282     *     loop.  Nothing here should be API-specific.
283     *
284     *  3. Do additional lowering in panvk
285     *
286     *  4. bi_postprocess_nir: Does final lowering and runs the optimization
287     *     loop again.  This can happen as part of the final compile.
288     *
289     * This would give us a better place to do panvk-specific lowering.
290     */
291    nir_lower_tex_options lower_tex_options = {
292       .lower_txs_lod = true,
293       .lower_txp = ~0,
294       .lower_tg4_broadcom_swizzle = true,
295       .lower_txd = true,
296       .lower_invalid_implicit_lod = true,
297    };
298    NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
299 
300    NIR_PASS_V(nir, panvk_per_arch(nir_lower_descriptors), dev, layout,
301               &shader->has_img_access);
302 
303    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
304               nir_address_format_32bit_index_offset);
305    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
306               spirv_options.ssbo_addr_format);
307    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
308               nir_address_format_32bit_offset);
309 
310    if (gl_shader_stage_uses_workgroup(stage)) {
311       if (!nir->info.shared_memory_explicit_layout) {
312          NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared,
313                     shared_type_info);
314       }
315 
316       NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared,
317                  nir_address_format_32bit_offset);
318    }
319 
320    NIR_PASS_V(nir, nir_shader_intrinsics_pass, panvk_lower_load_push_constant,
321               nir_metadata_block_index | nir_metadata_dominance,
322               (void *)layout);
323 
324    NIR_PASS_V(nir, nir_lower_system_values);
325    NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
326 
327    NIR_PASS_V(nir, nir_split_var_copies);
328    NIR_PASS_V(nir, nir_lower_var_copies);
329 
330    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, stage);
331    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
332                                stage);
333 
334    /* Needed to turn shader_temp into function_temp since the backend only
335     * handles the latter for now.
336     */
337    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
338 
339    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
340    if (unlikely(dev->physical_device->instance->debug_flags &
341                 PANVK_DEBUG_NIR)) {
342       fprintf(stderr, "translated nir:\n");
343       nir_print_shader(nir, stderr);
344    }
345 
346    pan_shader_preprocess(nir, inputs.gpu_id);
347 
348    if (stage == MESA_SHADER_FRAGMENT) {
349       panvk_lower_blend(dev, nir, &inputs, blend_state);
350    }
351 
352    struct sysval_options sysval_options = {
353       .static_blend_constants =
354          static_blend_constants ? blend_state->constants : NULL,
355    };
356 
357    NIR_PASS_V(nir, nir_shader_instructions_pass, panvk_lower_sysvals,
358               nir_metadata_block_index | nir_metadata_dominance,
359               &sysval_options);
360 
361    if (stage == MESA_SHADER_FRAGMENT) {
362       enum pipe_format rt_formats[MAX_RTS] = {PIPE_FORMAT_NONE};
363 
364       for (unsigned rt = 0; rt < MAX_RTS; ++rt)
365          rt_formats[rt] = blend_state->rts[rt].format;
366 
367       NIR_PASS_V(nir, GENX(pan_inline_rt_conversion), rt_formats);
368    }
369 
370    GENX(pan_shader_compile)(nir, &inputs, &shader->binary, &shader->info);
371 
372    /* Patch the descriptor count */
373    shader->info.ubo_count =
374       PANVK_NUM_BUILTIN_UBOS + layout->num_ubos + layout->num_dyn_ubos;
375    shader->info.sampler_count = layout->num_samplers;
376    shader->info.texture_count = layout->num_textures;
377    if (shader->has_img_access)
378       shader->info.attribute_count += layout->num_imgs;
379 
380    shader->sysval_ubo = sysval_ubo;
381    shader->local_size.x = nir->info.workgroup_size[0];
382    shader->local_size.y = nir->info.workgroup_size[1];
383    shader->local_size.z = nir->info.workgroup_size[2];
384 
385    ralloc_free(nir);
386 
387    return shader;
388 }
389