• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  *
4  * Derived from tu_shader.c which is:
5  * Copyright © 2019 Google LLC
6  *
7  * Also derived from anv_pipeline.c which is
8  * Copyright © 2015 Intel Corporation
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, including without limitation
13  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14  * and/or sell copies of the Software, and to permit persons to whom the
15  * Software is furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice (including the next
18  * paragraph) shall be included in all copies or substantial portions of the
19  * Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  */
29 
30 #include "genxml/gen_macros.h"
31 
32 #include "panvk_private.h"
33 
34 #include "nir_builder.h"
35 #include "nir_deref.h"
36 #include "nir_lower_blend.h"
37 #include "nir_conversion_builder.h"
38 #include "spirv/nir_spirv.h"
39 #include "util/mesa-sha1.h"
40 #include "vk_shader_module.h"
41 
42 #include "pan_shader.h"
43 #include "util/pan_lower_framebuffer.h"
44 
45 #include "vk_util.h"
46 
47 static void
panvk_init_sysvals(struct panfrost_sysvals * sysvals,gl_shader_stage stage)48 panvk_init_sysvals(struct panfrost_sysvals *sysvals,
49                    gl_shader_stage stage)
50 {
51    memset(sysvals, 0, sizeof(*sysvals));
52 
53 #define SYSVAL_SLOT(name) \
54    (assert(offsetof(struct panvk_sysvals, name) % 16 == 0), \
55     offsetof(struct panvk_sysvals, name) / 16)
56 
57 #define INIT_SYSVAL(name, SYSVAL) \
58    sysvals->sysvals[SYSVAL_SLOT(name)] = PAN_SYSVAL_##SYSVAL
59 
60    if (gl_shader_stage_is_compute(stage)) {
61       INIT_SYSVAL(num_work_groups, NUM_WORK_GROUPS);
62       INIT_SYSVAL(local_group_size, LOCAL_GROUP_SIZE);
63    } else {
64       INIT_SYSVAL(viewport_scale, VIEWPORT_SCALE);
65       INIT_SYSVAL(viewport_offset, VIEWPORT_OFFSET);
66       INIT_SYSVAL(vertex_instance_offsets, VERTEX_INSTANCE_OFFSETS);
67       INIT_SYSVAL(blend_constants, BLEND_CONSTANTS);
68    }
69    sysvals->sysval_count = SYSVAL_SLOT(dyn_ssbos);
70 
71 #undef SYSVAL_SLOT
72 #undef INIT_SYSVAL
73 }
74 
75 static bool
panvk_inline_blend_constants(nir_builder * b,nir_instr * instr,void * data)76 panvk_inline_blend_constants(nir_builder *b, nir_instr *instr, void *data)
77 {
78    if (instr->type != nir_instr_type_intrinsic)
79       return false;
80 
81    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
82    if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba)
83       return false;
84 
85    const nir_const_value *constants = data;
86 
87    b->cursor = nir_after_instr(instr);
88    nir_ssa_def *constant = nir_build_imm(b, 4, 32, constants);
89    nir_ssa_def_rewrite_uses(&intr->dest.ssa, constant);
90    nir_instr_remove(instr);
91    return true;
92 }
93 
94 static void
panvk_lower_blend(struct panfrost_device * pdev,nir_shader * nir,struct panfrost_compile_inputs * inputs,struct pan_blend_state * blend_state,bool static_blend_constants)95 panvk_lower_blend(struct panfrost_device *pdev,
96                   nir_shader *nir,
97                   struct panfrost_compile_inputs *inputs,
98                   struct pan_blend_state *blend_state,
99                   bool static_blend_constants)
100 {
101    nir_lower_blend_options options = {
102       .logicop_enable = blend_state->logicop_enable,
103       .logicop_func = blend_state->logicop_func,
104    };
105 
106    bool lower_blend = false;
107 
108    for (unsigned rt = 0; rt < blend_state->rt_count; rt++) {
109       struct pan_blend_rt_state *rt_state = &blend_state->rts[rt];
110 
111       if (!panvk_per_arch(blend_needs_lowering)(pdev, blend_state, rt))
112          continue;
113 
114       enum pipe_format fmt = rt_state->format;
115 
116       options.format[rt] = fmt;
117       options.rt[rt].colormask = rt_state->equation.color_mask;
118 
119       if (!rt_state->equation.blend_enable) {
120          static const nir_lower_blend_channel replace = {
121             .func = BLEND_FUNC_ADD,
122             .src_factor = BLEND_FACTOR_ZERO,
123             .invert_src_factor = true,
124             .dst_factor = BLEND_FACTOR_ZERO,
125             .invert_dst_factor = false,
126          };
127 
128          options.rt[rt].rgb = replace;
129          options.rt[rt].alpha = replace;
130       } else {
131          options.rt[rt].rgb.func = rt_state->equation.rgb_func;
132          options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor;
133          options.rt[rt].rgb.invert_src_factor = rt_state->equation.rgb_invert_src_factor;
134          options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
135          options.rt[rt].rgb.invert_dst_factor = rt_state->equation.rgb_invert_dst_factor;
136          options.rt[rt].alpha.func = rt_state->equation.alpha_func;
137          options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor;
138          options.rt[rt].alpha.invert_src_factor = rt_state->equation.alpha_invert_src_factor;
139          options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
140          options.rt[rt].alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor;
141       }
142 
143       /* Update the equation to force a color replacement */
144       rt_state->equation.color_mask = 0xf;
145       rt_state->equation.rgb_func = BLEND_FUNC_ADD;
146       rt_state->equation.rgb_src_factor = BLEND_FACTOR_ZERO;
147       rt_state->equation.rgb_invert_src_factor = true;
148       rt_state->equation.rgb_dst_factor = BLEND_FACTOR_ZERO;
149       rt_state->equation.rgb_invert_dst_factor = false;
150       rt_state->equation.alpha_func = BLEND_FUNC_ADD;
151       rt_state->equation.alpha_src_factor = BLEND_FACTOR_ZERO;
152       rt_state->equation.alpha_invert_src_factor = true;
153       rt_state->equation.alpha_dst_factor = BLEND_FACTOR_ZERO;
154       rt_state->equation.alpha_invert_dst_factor = false;
155       lower_blend = true;
156 
157       inputs->bifrost.static_rt_conv = true;
158       inputs->bifrost.rt_conv[rt] =
159          GENX(pan_blend_get_internal_desc)(pdev, fmt, rt, 32, false) >> 32;
160    }
161 
162    if (lower_blend) {
163       NIR_PASS_V(nir, nir_lower_blend, &options);
164 
165       if (static_blend_constants) {
166          const nir_const_value constants[4] = {
167             { .f32 = CLAMP(blend_state->constants[0], 0.0f, 1.0f) },
168             { .f32 = CLAMP(blend_state->constants[1], 0.0f, 1.0f) },
169             { .f32 = CLAMP(blend_state->constants[2], 0.0f, 1.0f) },
170             { .f32 = CLAMP(blend_state->constants[3], 0.0f, 1.0f) },
171          };
172          NIR_PASS_V(nir, nir_shader_instructions_pass,
173                     panvk_inline_blend_constants,
174                     nir_metadata_block_index |
175                     nir_metadata_dominance,
176                     (void *)constants);
177       }
178    }
179 }
180 
181 static bool
panvk_lower_load_push_constant(nir_builder * b,nir_instr * instr,void * data)182 panvk_lower_load_push_constant(nir_builder *b, nir_instr *instr, void *data)
183 {
184    if (instr->type != nir_instr_type_intrinsic)
185       return false;
186 
187    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
188    if (intr->intrinsic != nir_intrinsic_load_push_constant)
189       return false;
190 
191    b->cursor = nir_before_instr(instr);
192    nir_ssa_def *ubo_load =
193       nir_load_ubo(b, nir_dest_num_components(intr->dest),
194                    nir_dest_bit_size(intr->dest),
195                    nir_imm_int(b, PANVK_PUSH_CONST_UBO_INDEX),
196                    intr->src[0].ssa,
197                    .align_mul = nir_dest_bit_size(intr->dest) / 8,
198                    .align_offset = 0,
199                    .range_base = nir_intrinsic_base(intr),
200                    .range = nir_intrinsic_range(intr));
201    nir_ssa_def_rewrite_uses(&intr->dest.ssa, ubo_load);
202    nir_instr_remove(instr);
203    return true;
204 }
205 
206 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)207 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
208 {
209    assert(glsl_type_is_vector_or_scalar(type));
210 
211    uint32_t comp_size = glsl_type_is_boolean(type)
212       ? 4 : glsl_get_bit_size(type) / 8;
213    unsigned length = glsl_get_vector_elements(type);
214    *size = comp_size * length,
215    *align = comp_size * (length == 3 ? 4 : length);
216 }
217 
218 struct panvk_shader *
panvk_per_arch(shader_create)219 panvk_per_arch(shader_create)(struct panvk_device *dev,
220                               gl_shader_stage stage,
221                               const VkPipelineShaderStageCreateInfo *stage_info,
222                               const struct panvk_pipeline_layout *layout,
223                               unsigned sysval_ubo,
224                               struct pan_blend_state *blend_state,
225                               bool static_blend_constants,
226                               const VkAllocationCallbacks *alloc)
227 {
228    VK_FROM_HANDLE(vk_shader_module, module, stage_info->module);
229    struct panfrost_device *pdev = &dev->physical_device->pdev;
230    struct panvk_shader *shader;
231 
232    shader = vk_zalloc2(&dev->vk.alloc, alloc, sizeof(*shader), 8,
233                        VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
234    if (!shader)
235       return NULL;
236 
237    util_dynarray_init(&shader->binary, NULL);
238 
239    /* TODO these are made-up */
240    const struct spirv_to_nir_options spirv_options = {
241       .caps = {
242          .variable_pointers = true,
243       },
244       .ubo_addr_format = nir_address_format_32bit_index_offset,
245       .ssbo_addr_format = dev->vk.enabled_features.robustBufferAccess ?
246                           nir_address_format_64bit_bounded_global :
247                           nir_address_format_64bit_global_32bit_offset,
248    };
249 
250    nir_shader *nir;
251    VkResult result = vk_shader_module_to_nir(&dev->vk, module, stage,
252                                              stage_info->pName,
253                                              stage_info->pSpecializationInfo,
254                                              &spirv_options,
255                                              GENX(pan_shader_get_compiler_options)(),
256                                              NULL, &nir);
257    if (result != VK_SUCCESS) {
258       vk_free2(&dev->vk.alloc, alloc, shader);
259       return NULL;
260    }
261 
262    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
263               nir_shader_get_entrypoint(nir), true, true);
264 
265    struct panfrost_sysvals fixed_sysvals;
266    panvk_init_sysvals(&fixed_sysvals, stage);
267 
268    struct panfrost_compile_inputs inputs = {
269       .gpu_id = pdev->gpu_id,
270       .no_ubo_to_push = true,
271       .no_idvs = true, /* TODO */
272       .fixed_sysval_ubo = sysval_ubo,
273       .fixed_sysval_layout = &fixed_sysvals,
274    };
275 
276    NIR_PASS_V(nir, nir_lower_indirect_derefs,
277               nir_var_shader_in | nir_var_shader_out,
278               UINT32_MAX);
279 
280    NIR_PASS_V(nir, nir_opt_copy_prop_vars);
281    NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
282    NIR_PASS_V(nir, nir_opt_trivial_continues);
283 
284    /* Do texture lowering here.  Yes, it's a duplication of the texture
285     * lowering in bifrost_compile.  However, we need to lower texture stuff
286     * now, before we call panvk_per_arch(nir_lower_descriptors)() because some
287     * of the texture lowering generates nir_texop_txs which we handle as part
288     * of descriptor lowering.
289     *
290     * TODO: We really should be doing this in common code, not dpulicated in
291     * panvk.  In order to do that, we need to rework the panfrost compile
292     * flow to look more like the Intel flow:
293     *
294     *  1. Compile SPIR-V to NIR and maybe do a tiny bit of lowering that needs
295     *     to be done really early.
296     *
297     *  2. bi_preprocess_nir: Does common lowering and runs the optimization
298     *     loop.  Nothing here should be API-specific.
299     *
300     *  3. Do additional lowering in panvk
301     *
302     *  4. bi_postprocess_nir: Does final lowering and runs the optimization
303     *     loop again.  This can happen as part of the final compile.
304     *
305     * This would give us a better place to do panvk-specific lowering.
306     */
307    nir_lower_tex_options lower_tex_options = {
308       .lower_txs_lod = true,
309       .lower_txp = ~0,
310       .lower_tg4_broadcom_swizzle = true,
311       .lower_txd = true,
312       .lower_invalid_implicit_lod = true,
313    };
314    NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
315 
316    NIR_PASS_V(nir, panvk_per_arch(nir_lower_descriptors),
317               dev, layout, &shader->has_img_access);
318 
319    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
320               nir_address_format_32bit_index_offset);
321    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
322               spirv_options.ssbo_addr_format);
323    NIR_PASS_V(nir, nir_lower_explicit_io,
324               nir_var_mem_push_const,
325               nir_address_format_32bit_offset);
326 
327    if (gl_shader_stage_uses_workgroup(stage)) {
328       if (!nir->info.shared_memory_explicit_layout) {
329          NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
330                     nir_var_mem_shared,
331                     shared_type_info);
332       }
333 
334       NIR_PASS_V(nir, nir_lower_explicit_io,
335                  nir_var_mem_shared,
336                  nir_address_format_32bit_offset);
337    }
338 
339    NIR_PASS_V(nir, nir_shader_instructions_pass,
340               panvk_lower_load_push_constant,
341               nir_metadata_block_index |
342               nir_metadata_dominance,
343               (void *)layout);
344 
345    NIR_PASS_V(nir, nir_lower_system_values);
346    NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
347 
348    NIR_PASS_V(nir, nir_split_var_copies);
349    NIR_PASS_V(nir, nir_lower_var_copies);
350 
351    /* We have to run nir_lower_blend() after we've gotten rid of copies (it
352     * requires load/store) and before we assign output locations.
353     */
354    if (stage == MESA_SHADER_FRAGMENT) {
355       /* This is required for nir_lower_blend */
356       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
357       panvk_lower_blend(pdev, nir, &inputs, blend_state, static_blend_constants);
358    }
359 
360    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, stage);
361    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, stage);
362 
363    /* Needed to turn shader_temp into function_temp since the backend only
364     * handles the latter for now.
365     */
366    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
367 
368    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
369    if (unlikely(dev->physical_device->instance->debug_flags & PANVK_DEBUG_NIR)) {
370       fprintf(stderr, "translated nir:\n");
371       nir_print_shader(nir, stderr);
372    }
373 
374    GENX(pan_shader_compile)(nir, &inputs, &shader->binary, &shader->info);
375 
376    /* System values shouldn't have changed */
377    assert(memcmp(&shader->info.sysvals, &fixed_sysvals,
378                  sizeof(fixed_sysvals)) == 0);
379 
380    /* Patch the descriptor count */
381    shader->info.ubo_count = PANVK_NUM_BUILTIN_UBOS +
382                             layout->num_ubos + layout->num_dyn_ubos;
383    shader->info.sampler_count = layout->num_samplers;
384    shader->info.texture_count = layout->num_textures;
385    if (shader->has_img_access)
386       shader->info.attribute_count += layout->num_imgs;
387 
388    shader->sysval_ubo = sysval_ubo;
389    shader->local_size.x = nir->info.workgroup_size[0];
390    shader->local_size.y = nir->info.workgroup_size[1];
391    shader->local_size.z = nir->info.workgroup_size[2];
392 
393    ralloc_free(nir);
394 
395    return shader;
396 }
397