1 /*
2 * Copyright © 2019 Google LLC
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "tu_private.h"
25
26 #include "spirv/nir_spirv.h"
27 #include "util/mesa-sha1.h"
28 #include "nir/nir_xfb_info.h"
29 #include "nir/nir_vulkan.h"
30 #include "vk_util.h"
31
32 #include "ir3/ir3_nir.h"
33
34 nir_shader *
tu_spirv_to_nir(struct tu_device * dev,const VkPipelineShaderStageCreateInfo * stage_info,gl_shader_stage stage)35 tu_spirv_to_nir(struct tu_device *dev,
36 const VkPipelineShaderStageCreateInfo *stage_info,
37 gl_shader_stage stage)
38 {
39 /* TODO these are made-up */
40 const struct spirv_to_nir_options spirv_options = {
41 .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
42 .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
43
44 /* Accessed via stg/ldg */
45 .phys_ssbo_addr_format = nir_address_format_64bit_global,
46
47 /* Accessed via the const register file */
48 .push_const_addr_format = nir_address_format_logical,
49
50 /* Accessed via ldl/stl */
51 .shared_addr_format = nir_address_format_32bit_offset,
52
53 /* Accessed via stg/ldg (not used with Vulkan?) */
54 .global_addr_format = nir_address_format_64bit_global,
55
56 /* ViewID is a sysval in geometry stages and an input in the FS */
57 .view_index_is_input = stage == MESA_SHADER_FRAGMENT,
58 .caps = {
59 .transform_feedback = true,
60 .tessellation = true,
61 .draw_parameters = true,
62 .image_read_without_format = true,
63 .image_write_without_format = true,
64 .variable_pointers = true,
65 .stencil_export = true,
66 .multiview = true,
67 .shader_viewport_index_layer = true,
68 .geometry_streams = true,
69 .device_group = true,
70 .descriptor_indexing = true,
71 .descriptor_array_dynamic_indexing = true,
72 .descriptor_array_non_uniform_indexing = true,
73 .runtime_descriptor_array = true,
74 .float_controls = true,
75 .float16 = true,
76 .int16 = true,
77 .storage_16bit = dev->physical_device->info->a6xx.storage_16bit,
78 .demote_to_helper_invocation = true,
79 .vk_memory_model = true,
80 .vk_memory_model_device_scope = true,
81 .subgroup_basic = true,
82 .subgroup_ballot = true,
83 .subgroup_vote = true,
84 },
85 };
86
87 const struct nir_lower_compute_system_values_options compute_sysval_options = {
88 .has_base_workgroup_id = true,
89 };
90
91 const nir_shader_compiler_options *nir_options =
92 ir3_get_compiler_options(dev->compiler);
93
94 /* convert VkSpecializationInfo */
95 const VkSpecializationInfo *spec_info = stage_info->pSpecializationInfo;
96 uint32_t num_spec = 0;
97 struct nir_spirv_specialization *spec =
98 vk_spec_info_to_nir_spirv(spec_info, &num_spec);
99
100 struct vk_shader_module *module =
101 vk_shader_module_from_handle(stage_info->module);
102 assert(module->size % 4 == 0);
103 nir_shader *nir =
104 spirv_to_nir((void*)module->data, module->size / 4,
105 spec, num_spec, stage, stage_info->pName,
106 &spirv_options, nir_options);
107
108 free(spec);
109
110 assert(nir->info.stage == stage);
111 nir_validate_shader(nir, "after spirv_to_nir");
112
113 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
114 .point_coord = true,
115 };
116 NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
117
118 if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_NIR)) {
119 fprintf(stderr, "translated nir:\n");
120 nir_print_shader(nir, stderr);
121 }
122
123 /* multi step inlining procedure */
124 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
125 NIR_PASS_V(nir, nir_lower_returns);
126 NIR_PASS_V(nir, nir_inline_functions);
127 NIR_PASS_V(nir, nir_copy_prop);
128 NIR_PASS_V(nir, nir_opt_deref);
129 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
130 if (!func->is_entrypoint)
131 exec_node_remove(&func->node);
132 }
133 assert(exec_list_length(&nir->functions) == 1);
134 NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);
135
136 /* Split member structs. We do this before lower_io_to_temporaries so that
137 * it doesn't lower system values to temporaries by accident.
138 */
139 NIR_PASS_V(nir, nir_split_var_copies);
140 NIR_PASS_V(nir, nir_split_per_member_structs);
141
142 NIR_PASS_V(nir, nir_remove_dead_variables,
143 nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
144 NULL);
145
146 NIR_PASS_V(nir, nir_propagate_invariant, false);
147
148 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
149 NIR_PASS_V(nir, nir_split_var_copies);
150 NIR_PASS_V(nir, nir_lower_var_copies);
151
152 NIR_PASS_V(nir, nir_opt_copy_prop_vars);
153 NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
154
155 NIR_PASS_V(nir, nir_lower_is_helper_invocation);
156
157 NIR_PASS_V(nir, nir_lower_system_values);
158 NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_sysval_options);
159
160 NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
161
162 NIR_PASS_V(nir, nir_lower_frexp);
163
164 ir3_optimize_loop(dev->compiler, nir);
165
166 return nir;
167 }
168
169 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader)170 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
171 struct tu_shader *shader)
172 {
173 uint32_t base = nir_intrinsic_base(instr);
174 assert(base % 4 == 0);
175 assert(base >= shader->push_consts.lo * 16);
176 base -= shader->push_consts.lo * 16;
177
178 nir_ssa_def *load =
179 nir_load_uniform(b, instr->num_components, instr->dest.ssa.bit_size,
180 nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2)),
181 .base = base / 4);
182
183 nir_ssa_def_rewrite_uses(&instr->dest.ssa, load);
184
185 nir_instr_remove(&instr->instr);
186 }
187
188 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader,const struct tu_pipeline_layout * layout)189 lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
190 struct tu_shader *shader,
191 const struct tu_pipeline_layout *layout)
192 {
193 nir_ssa_def *vulkan_idx = instr->src[0].ssa;
194
195 unsigned set = nir_intrinsic_desc_set(instr);
196 unsigned binding = nir_intrinsic_binding(instr);
197 struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
198 struct tu_descriptor_set_binding_layout *binding_layout =
199 &set_layout->binding[binding];
200 uint32_t base;
201
202 shader->active_desc_sets |= 1u << set;
203
204 switch (binding_layout->type) {
205 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
206 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
207 base = layout->set[set].dynamic_offset_start +
208 binding_layout->dynamic_offset_offset;
209 set = MAX_SETS;
210 break;
211 default:
212 base = binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS);
213 break;
214 }
215
216 nir_ssa_def *def = nir_vec3(b, nir_imm_int(b, set),
217 nir_iadd(b, nir_imm_int(b, base), vulkan_idx),
218 nir_imm_int(b, 0));
219
220 nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
221 nir_instr_remove(&instr->instr);
222 }
223
224 static void
lower_vulkan_resource_reindex(nir_builder * b,nir_intrinsic_instr * instr)225 lower_vulkan_resource_reindex(nir_builder *b, nir_intrinsic_instr *instr)
226 {
227 nir_ssa_def *old_index = instr->src[0].ssa;
228 nir_ssa_def *delta = instr->src[1].ssa;
229
230 nir_ssa_def *new_index =
231 nir_vec3(b, nir_channel(b, old_index, 0),
232 nir_iadd(b, nir_channel(b, old_index, 1), delta),
233 nir_channel(b, old_index, 2));
234
235 nir_ssa_def_rewrite_uses(&instr->dest.ssa, new_index);
236 nir_instr_remove(&instr->instr);
237 }
238
239 static void
lower_load_vulkan_descriptor(nir_intrinsic_instr * intrin)240 lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin)
241 {
242 /* Loading the descriptor happens as part of the load/store instruction so
243 * this is a no-op.
244 */
245 nir_ssa_def_rewrite_uses_src(&intrin->dest.ssa, intrin->src[0]);
246 nir_instr_remove(&intrin->instr);
247 }
248
249 static void
lower_ssbo_ubo_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin)250 lower_ssbo_ubo_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
251 {
252 const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];
253
254 /* The bindless base is part of the instruction, which means that part of
255 * the "pointer" has to be constant. We solve this in the same way the blob
256 * does, by generating a bunch of if-statements. In the usual case where
257 * the descriptor set is constant we can skip that, though).
258 */
259
260 unsigned buffer_src;
261 if (intrin->intrinsic == nir_intrinsic_store_ssbo) {
262 /* This has the value first */
263 buffer_src = 1;
264 } else {
265 buffer_src = 0;
266 }
267
268 nir_ssa_scalar scalar_idx = nir_ssa_scalar_resolved(intrin->src[buffer_src].ssa, 0);
269 nir_ssa_def *descriptor_idx = nir_channel(b, intrin->src[buffer_src].ssa, 1);
270
271 nir_ssa_def *results[MAX_SETS + 1] = { NULL };
272
273 if (nir_ssa_scalar_is_const(scalar_idx)) {
274 nir_ssa_def *bindless =
275 nir_bindless_resource_ir3(b, 32, descriptor_idx, .desc_set = nir_ssa_scalar_as_uint(scalar_idx));
276 nir_instr_rewrite_src_ssa(&intrin->instr, &intrin->src[buffer_src], bindless);
277 return;
278 }
279
280 nir_ssa_def *base_idx = nir_channel(b, scalar_idx.def, scalar_idx.comp);
281 for (unsigned i = 0; i < MAX_SETS + 1; i++) {
282 /* if (base_idx == i) { ... */
283 nir_if *nif = nir_push_if(b, nir_ieq_imm(b, base_idx, i));
284
285 nir_ssa_def *bindless =
286 nir_bindless_resource_ir3(b, 32, descriptor_idx, .desc_set = i);
287
288 nir_intrinsic_instr *copy =
289 nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
290
291 copy->num_components = intrin->num_components;
292
293 for (unsigned src = 0; src < info->num_srcs; src++) {
294 if (src == buffer_src)
295 copy->src[src] = nir_src_for_ssa(bindless);
296 else
297 copy->src[src] = nir_src_for_ssa(intrin->src[src].ssa);
298 }
299
300 for (unsigned idx = 0; idx < info->num_indices; idx++) {
301 copy->const_index[idx] = intrin->const_index[idx];
302 }
303
304 if (info->has_dest) {
305 nir_ssa_dest_init(©->instr, ©->dest,
306 intrin->dest.ssa.num_components,
307 intrin->dest.ssa.bit_size,
308 NULL);
309 results[i] = ©->dest.ssa;
310 }
311
312 nir_builder_instr_insert(b, ©->instr);
313
314 /* } else { ... */
315 nir_push_else(b, nif);
316 }
317
318 nir_ssa_def *result =
319 nir_ssa_undef(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size);
320 for (int i = MAX_SETS; i >= 0; i--) {
321 nir_pop_if(b, NULL);
322 if (info->has_dest)
323 result = nir_if_phi(b, results[i], result);
324 }
325
326 if (info->has_dest)
327 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, result);
328 nir_instr_remove(&intrin->instr);
329 }
330
331 static nir_ssa_def *
build_bindless(nir_builder * b,nir_deref_instr * deref,bool is_sampler,struct tu_shader * shader,const struct tu_pipeline_layout * layout)332 build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler,
333 struct tu_shader *shader,
334 const struct tu_pipeline_layout *layout)
335 {
336 nir_variable *var = nir_deref_instr_get_variable(deref);
337
338 unsigned set = var->data.descriptor_set;
339 unsigned binding = var->data.binding;
340 const struct tu_descriptor_set_binding_layout *bind_layout =
341 &layout->set[set].layout->binding[binding];
342
343 /* input attachments use non bindless workaround */
344 if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
345 const struct glsl_type *glsl_type = glsl_without_array(var->type);
346 uint32_t idx = var->data.index * 2;
347
348 BITSET_SET_RANGE_INSIDE_WORD(b->shader->info.textures_used, idx * 2, ((idx * 2) + (bind_layout->array_size * 2)) - 1);
349
350 /* D24S8 workaround: stencil of D24S8 will be sampled as uint */
351 if (glsl_get_sampler_result_type(glsl_type) == GLSL_TYPE_UINT)
352 idx += 1;
353
354 if (deref->deref_type == nir_deref_type_var)
355 return nir_imm_int(b, idx);
356
357 nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
358 return nir_iadd(b, nir_imm_int(b, idx),
359 nir_imul_imm(b, arr_index, 2));
360 }
361
362 shader->active_desc_sets |= 1u << set;
363
364 nir_ssa_def *desc_offset;
365 unsigned descriptor_stride;
366 unsigned offset = 0;
367 /* Samplers come second in combined image/sampler descriptors, see
368 * write_combined_image_sampler_descriptor().
369 */
370 if (is_sampler && bind_layout->type ==
371 VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
372 offset = 1;
373 }
374 desc_offset =
375 nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) +
376 offset);
377 descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS);
378
379 if (deref->deref_type != nir_deref_type_var) {
380 assert(deref->deref_type == nir_deref_type_array);
381
382 nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
383 desc_offset = nir_iadd(b, desc_offset,
384 nir_imul_imm(b, arr_index, descriptor_stride));
385 }
386
387 return nir_bindless_resource_ir3(b, 32, desc_offset, .desc_set = set);
388 }
389
390 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader,const struct tu_pipeline_layout * layout)391 lower_image_deref(nir_builder *b,
392 nir_intrinsic_instr *instr, struct tu_shader *shader,
393 const struct tu_pipeline_layout *layout)
394 {
395 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
396 nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
397 nir_rewrite_image_intrinsic(instr, bindless, true);
398 }
399
400 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader,const struct tu_pipeline_layout * layout)401 lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
402 struct tu_shader *shader,
403 const struct tu_pipeline_layout *layout)
404 {
405 switch (instr->intrinsic) {
406 case nir_intrinsic_load_push_constant:
407 lower_load_push_constant(b, instr, shader);
408 return true;
409
410 case nir_intrinsic_load_vulkan_descriptor:
411 lower_load_vulkan_descriptor(instr);
412 return true;
413
414 case nir_intrinsic_vulkan_resource_index:
415 lower_vulkan_resource_index(b, instr, shader, layout);
416 return true;
417 case nir_intrinsic_vulkan_resource_reindex:
418 lower_vulkan_resource_reindex(b, instr);
419 return true;
420
421 case nir_intrinsic_load_ubo:
422 case nir_intrinsic_load_ssbo:
423 case nir_intrinsic_store_ssbo:
424 case nir_intrinsic_ssbo_atomic_add:
425 case nir_intrinsic_ssbo_atomic_imin:
426 case nir_intrinsic_ssbo_atomic_umin:
427 case nir_intrinsic_ssbo_atomic_imax:
428 case nir_intrinsic_ssbo_atomic_umax:
429 case nir_intrinsic_ssbo_atomic_and:
430 case nir_intrinsic_ssbo_atomic_or:
431 case nir_intrinsic_ssbo_atomic_xor:
432 case nir_intrinsic_ssbo_atomic_exchange:
433 case nir_intrinsic_ssbo_atomic_comp_swap:
434 case nir_intrinsic_ssbo_atomic_fadd:
435 case nir_intrinsic_ssbo_atomic_fmin:
436 case nir_intrinsic_ssbo_atomic_fmax:
437 case nir_intrinsic_ssbo_atomic_fcomp_swap:
438 case nir_intrinsic_get_ssbo_size:
439 lower_ssbo_ubo_intrinsic(b, instr);
440 return true;
441
442 case nir_intrinsic_image_deref_load:
443 case nir_intrinsic_image_deref_store:
444 case nir_intrinsic_image_deref_atomic_add:
445 case nir_intrinsic_image_deref_atomic_imin:
446 case nir_intrinsic_image_deref_atomic_umin:
447 case nir_intrinsic_image_deref_atomic_imax:
448 case nir_intrinsic_image_deref_atomic_umax:
449 case nir_intrinsic_image_deref_atomic_and:
450 case nir_intrinsic_image_deref_atomic_or:
451 case nir_intrinsic_image_deref_atomic_xor:
452 case nir_intrinsic_image_deref_atomic_exchange:
453 case nir_intrinsic_image_deref_atomic_comp_swap:
454 case nir_intrinsic_image_deref_size:
455 case nir_intrinsic_image_deref_samples:
456 lower_image_deref(b, instr, shader, layout);
457 return true;
458
459 default:
460 return false;
461 }
462 }
463
464 static void
lower_tex_ycbcr(const struct tu_pipeline_layout * layout,nir_builder * builder,nir_tex_instr * tex)465 lower_tex_ycbcr(const struct tu_pipeline_layout *layout,
466 nir_builder *builder,
467 nir_tex_instr *tex)
468 {
469 int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
470 assert(deref_src_idx >= 0);
471 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
472
473 nir_variable *var = nir_deref_instr_get_variable(deref);
474 const struct tu_descriptor_set_layout *set_layout =
475 layout->set[var->data.descriptor_set].layout;
476 const struct tu_descriptor_set_binding_layout *binding =
477 &set_layout->binding[var->data.binding];
478 const struct tu_sampler_ycbcr_conversion *ycbcr_samplers =
479 tu_immutable_ycbcr_samplers(set_layout, binding);
480
481 if (!ycbcr_samplers)
482 return;
483
484 /* For the following instructions, we don't apply any change */
485 if (tex->op == nir_texop_txs ||
486 tex->op == nir_texop_query_levels ||
487 tex->op == nir_texop_lod)
488 return;
489
490 assert(tex->texture_index == 0);
491 unsigned array_index = 0;
492 if (deref->deref_type != nir_deref_type_var) {
493 assert(deref->deref_type == nir_deref_type_array);
494 if (!nir_src_is_const(deref->arr.index))
495 return;
496 array_index = nir_src_as_uint(deref->arr.index);
497 array_index = MIN2(array_index, binding->array_size - 1);
498 }
499 const struct tu_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;
500
501 if (ycbcr_sampler->ycbcr_model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
502 return;
503
504 builder->cursor = nir_after_instr(&tex->instr);
505
506 uint8_t bits = vk_format_get_component_bits(ycbcr_sampler->format,
507 UTIL_FORMAT_COLORSPACE_RGB,
508 PIPE_SWIZZLE_X);
509 uint32_t bpcs[3] = {bits, bits, bits}; /* TODO: use right bpc for each channel ? */
510 nir_ssa_def *result = nir_convert_ycbcr_to_rgb(builder,
511 ycbcr_sampler->ycbcr_model,
512 ycbcr_sampler->ycbcr_range,
513 &tex->dest.ssa,
514 bpcs);
515 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, result,
516 result->parent_instr);
517
518 builder->cursor = nir_before_instr(&tex->instr);
519 }
520
521 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,struct tu_shader * shader,const struct tu_pipeline_layout * layout)522 lower_tex(nir_builder *b, nir_tex_instr *tex,
523 struct tu_shader *shader, const struct tu_pipeline_layout *layout)
524 {
525 lower_tex_ycbcr(layout, b, tex);
526
527 int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
528 if (sampler_src_idx >= 0) {
529 nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
530 nir_ssa_def *bindless = build_bindless(b, deref, true, shader, layout);
531 nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_src_idx].src,
532 nir_src_for_ssa(bindless));
533 tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
534 }
535
536 int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
537 if (tex_src_idx >= 0) {
538 nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
539 nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
540 nir_instr_rewrite_src(&tex->instr, &tex->src[tex_src_idx].src,
541 nir_src_for_ssa(bindless));
542 tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle;
543
544 /* for the input attachment case: */
545 if (bindless->parent_instr->type != nir_instr_type_intrinsic)
546 tex->src[tex_src_idx].src_type = nir_tex_src_texture_offset;
547 }
548
549 return true;
550 }
551
552 struct lower_instr_params {
553 struct tu_shader *shader;
554 const struct tu_pipeline_layout *layout;
555 };
556
557 static bool
lower_instr(nir_builder * b,nir_instr * instr,void * cb_data)558 lower_instr(nir_builder *b, nir_instr *instr, void *cb_data)
559 {
560 struct lower_instr_params *params = cb_data;
561 b->cursor = nir_before_instr(instr);
562 switch (instr->type) {
563 case nir_instr_type_tex:
564 return lower_tex(b, nir_instr_as_tex(instr), params->shader, params->layout);
565 case nir_instr_type_intrinsic:
566 return lower_intrinsic(b, nir_instr_as_intrinsic(instr), params->shader, params->layout);
567 default:
568 return false;
569 }
570 }
571
572 /* Figure out the range of push constants that we're actually going to push to
573 * the shader, and tell the backend to reserve this range when pushing UBO
574 * constants.
575 */
576
577 static void
gather_push_constants(nir_shader * shader,struct tu_shader * tu_shader)578 gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
579 {
580 uint32_t min = UINT32_MAX, max = 0;
581 nir_foreach_function(function, shader) {
582 if (!function->impl)
583 continue;
584
585 nir_foreach_block(block, function->impl) {
586 nir_foreach_instr_safe(instr, block) {
587 if (instr->type != nir_instr_type_intrinsic)
588 continue;
589
590 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
591 if (intrin->intrinsic != nir_intrinsic_load_push_constant)
592 continue;
593
594 uint32_t base = nir_intrinsic_base(intrin);
595 uint32_t range = nir_intrinsic_range(intrin);
596 min = MIN2(min, base);
597 max = MAX2(max, base + range);
598 break;
599 }
600 }
601 }
602
603 if (min >= max) {
604 tu_shader->push_consts.lo = 0;
605 tu_shader->push_consts.count = 0;
606 return;
607 }
608
609 /* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords),
610 * however there's an alignment requirement of 4 on OFFSET. Expand the
611 * range and change units accordingly.
612 */
613 tu_shader->push_consts.lo = (min / 16) / 4 * 4;
614 tu_shader->push_consts.count =
615 align(max, 16) / 16 - tu_shader->push_consts.lo;
616 }
617
618 static bool
tu_lower_io(nir_shader * shader,struct tu_shader * tu_shader,const struct tu_pipeline_layout * layout)619 tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
620 const struct tu_pipeline_layout *layout)
621 {
622 gather_push_constants(shader, tu_shader);
623
624 struct lower_instr_params params = {
625 .shader = tu_shader,
626 .layout = layout,
627 };
628
629 bool progress = nir_shader_instructions_pass(shader,
630 lower_instr,
631 nir_metadata_none,
632 ¶ms);
633
634 /* Remove now-unused variables so that when we gather the shader info later
635 * they won't be counted.
636 */
637
638 if (progress)
639 nir_opt_dce(shader);
640
641 progress |=
642 nir_remove_dead_variables(shader,
643 nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo,
644 NULL);
645
646 return progress;
647 }
648
649 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)650 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
651 {
652 assert(glsl_type_is_vector_or_scalar(type));
653
654 unsigned comp_size =
655 glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
656 unsigned length = glsl_get_vector_elements(type);
657 *size = comp_size * length;
658 *align = comp_size;
659 }
660
661 static void
tu_gather_xfb_info(nir_shader * nir,struct ir3_stream_output_info * info)662 tu_gather_xfb_info(nir_shader *nir, struct ir3_stream_output_info *info)
663 {
664 nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
665
666 if (!xfb)
667 return;
668
669 uint8_t output_map[VARYING_SLOT_TESS_MAX];
670 memset(output_map, 0, sizeof(output_map));
671
672 nir_foreach_shader_out_variable(var, nir) {
673 unsigned slots =
674 var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
675 : glsl_count_attribute_slots(var->type, false);
676 for (unsigned i = 0; i < slots; i++)
677 output_map[var->data.location + i] = var->data.driver_location + i;
678 }
679
680 assert(xfb->output_count < IR3_MAX_SO_OUTPUTS);
681 info->num_outputs = xfb->output_count;
682
683 for (int i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
684 info->stride[i] = xfb->buffers[i].stride / 4;
685 info->buffer_to_stream[i] = xfb->buffer_to_stream[i];
686 }
687
688 info->streams_written = xfb->streams_written;
689
690 for (int i = 0; i < xfb->output_count; i++) {
691 info->output[i].register_index = output_map[xfb->outputs[i].location];
692 info->output[i].start_component = xfb->outputs[i].component_offset;
693 info->output[i].num_components =
694 util_bitcount(xfb->outputs[i].component_mask);
695 info->output[i].output_buffer = xfb->outputs[i].buffer;
696 info->output[i].dst_offset = xfb->outputs[i].offset / 4;
697 info->output[i].stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
698 }
699
700 ralloc_free(xfb);
701 }
702
703 struct tu_shader *
tu_shader_create(struct tu_device * dev,nir_shader * nir,unsigned multiview_mask,struct tu_pipeline_layout * layout,const VkAllocationCallbacks * alloc)704 tu_shader_create(struct tu_device *dev,
705 nir_shader *nir,
706 unsigned multiview_mask,
707 struct tu_pipeline_layout *layout,
708 const VkAllocationCallbacks *alloc)
709 {
710 struct tu_shader *shader;
711
712 shader = vk_zalloc2(
713 &dev->vk.alloc, alloc,
714 sizeof(*shader),
715 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
716 if (!shader)
717 return NULL;
718
719 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
720 NIR_PASS_V(nir, nir_lower_input_attachments,
721 &(nir_input_attachment_options) {
722 .use_fragcoord_sysval = true,
723 .use_layer_id_sysval = false,
724 /* When using multiview rendering, we must use
725 * gl_ViewIndex as the layer id to pass to the texture
726 * sampling function. gl_Layer doesn't work when
727 * multiview is enabled.
728 */
729 .use_view_id_for_layer = multiview_mask != 0,
730 });
731 }
732
733 /* This needs to happen before multiview lowering which rewrites store
734 * instructions of the position variable, so that we can just rewrite one
735 * store at the end instead of having to rewrite every store specified by
736 * the user.
737 */
738 ir3_nir_lower_io_to_temporaries(nir);
739
740 if (nir->info.stage == MESA_SHADER_VERTEX && multiview_mask) {
741 tu_nir_lower_multiview(nir, multiview_mask,
742 &shader->multi_pos_output, dev);
743 }
744
745 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
746 nir_address_format_32bit_offset);
747
748 NIR_PASS_V(nir, nir_lower_explicit_io,
749 nir_var_mem_ubo | nir_var_mem_ssbo,
750 nir_address_format_vec2_index_32bit_offset);
751
752 if (nir->info.stage == MESA_SHADER_COMPUTE) {
753 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
754 nir_var_mem_shared, shared_type_info);
755 NIR_PASS_V(nir, nir_lower_explicit_io,
756 nir_var_mem_shared,
757 nir_address_format_32bit_offset);
758 }
759
760 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
761 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage);
762
763 /* Gather information for transform feedback. This should be called after:
764 * - nir_split_per_member_structs.
765 * - nir_remove_dead_variables with varyings, so that we could align
766 * stream outputs correctly.
767 * - nir_assign_io_var_locations - to have valid driver_location
768 */
769 struct ir3_stream_output_info so_info = {};
770 if (nir->info.stage == MESA_SHADER_VERTEX ||
771 nir->info.stage == MESA_SHADER_TESS_EVAL ||
772 nir->info.stage == MESA_SHADER_GEOMETRY)
773 tu_gather_xfb_info(nir, &so_info);
774
775 NIR_PASS_V(nir, tu_lower_io, shader, layout);
776
777 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
778
779 ir3_finalize_nir(dev->compiler, nir);
780
781 shader->ir3_shader =
782 ir3_shader_from_nir(dev->compiler, nir,
783 align(shader->push_consts.count, 4),
784 &so_info);
785
786 return shader;
787 }
788
789 void
tu_shader_destroy(struct tu_device * dev,struct tu_shader * shader,const VkAllocationCallbacks * alloc)790 tu_shader_destroy(struct tu_device *dev,
791 struct tu_shader *shader,
792 const VkAllocationCallbacks *alloc)
793 {
794 ir3_shader_destroy(shader->ir3_shader);
795
796 vk_free2(&dev->vk.alloc, alloc, shader);
797 }
798