/* * Copyright © Microsoft Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "spirv_to_dxil.h" #include "nir_to_dxil.h" #include "dxil_nir.h" #include "dxil_nir_lower_int_cubemaps.h" #include "shader_enums.h" #include "spirv/nir_spirv.h" #include "spirv/spirv_info.h" #include "util/blob.h" #include "dxil_spirv_nir.h" #include "git_sha1.h" #include "vulkan/vulkan.h" static const struct spirv_capabilities spirv_caps = { .Shader = true, .Geometry = true, .DrawParameters = true, .MultiView = true, .GroupNonUniform = true, .GroupNonUniformBallot = true, .GroupNonUniformVote = true, .GroupNonUniformShuffle = true, .GroupNonUniformQuad = true, .GroupNonUniformArithmetic = true, .InputAttachmentArrayDynamicIndexingEXT = true, .UniformTexelBufferArrayDynamicIndexingEXT = true, .StorageTexelBufferArrayDynamicIndexingEXT = true, .DenormFlushToZero = true, .DenormPreserve = true, .SignedZeroInfNanPreserve = true, .RoundingModeRTE = true, .RoundingModeRTZ = true, .Float16 = true, .Int16 = true, .StorageBuffer8BitAccess = true, .UniformAndStorageBuffer8BitAccess = true, .StoragePushConstant8 = true, .StorageUniformBufferBlock16 = true, .StorageUniform16 = true, .StoragePushConstant16 = true, .StorageInputOutput16 = true, .ShaderNonUniformEXT = true, .RuntimeDescriptorArray = true, .UniformBufferArrayNonUniformIndexingEXT = true, .SampledImageArrayNonUniformIndexingEXT = true, .StorageBufferArrayNonUniformIndexingEXT = true, .StorageImageArrayNonUniformIndexingEXT = true, .InputAttachmentArrayNonUniformIndexingEXT = true, .UniformTexelBufferArrayNonUniformIndexingEXT = true, .StorageTexelBufferArrayNonUniformIndexingEXT = true, .StorageImageReadWithoutFormat = true, .StorageImageWriteWithoutFormat = true, .ImageQuery = true, .Int64 = true, .Float64 = true, .Tessellation = true, .PhysicalStorageBufferAddresses = true, }; static const struct spirv_to_nir_options spirv_to_nir_options = { .capabilities = &spirv_caps, .ubo_addr_format = nir_address_format_32bit_index_offset, .ssbo_addr_format = nir_address_format_32bit_index_offset, .shared_addr_format = nir_address_format_logical, .phys_ssbo_addr_format = nir_address_format_32bit_index_offset_pack64, .min_ubo_alignment = 256, /* D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT */ .min_ssbo_alignment = 16, /* D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT */ .mediump_16bit_alu = true, .mediump_16bit_derivatives = true, }; const struct spirv_to_nir_options* dxil_spirv_nir_get_spirv_options(void) { return &spirv_to_nir_options; } /* Logic extracted from vk_spirv_to_nir() so we have the same preparation * steps for both the vulkan driver and the lib used by the WebGPU * implementation. * Maybe we should move those steps out of vk_spirv_to_nir() and make * them vk agnosting (right, the only vk specific thing is the vk_device * object that's used for the debug callback passed to spirv_to_nir()). */ void dxil_spirv_nir_prep(nir_shader *nir) { /* We have to lower away local constant initializers right before we * inline functions. That way they get properly initialized at the top * of the function and not at the top of its caller. */ NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); NIR_PASS_V(nir, nir_lower_returns); NIR_PASS_V(nir, nir_inline_functions); NIR_PASS_V(nir, nir_copy_prop); NIR_PASS_V(nir, nir_opt_deref); /* Pick off the single entrypoint that we want */ nir_remove_non_entrypoints(nir); /* Now that we've deleted all but the main function, we can go ahead and * lower the rest of the constant initializers. We do this here so that * nir_remove_dead_variables and split_per_member_structs below see the * corresponding stores. */ NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); /* Split member structs. We do this before lower_io_to_temporaries so that * it doesn't lower system values to temporaries by accident. */ NIR_PASS_V(nir, nir_split_var_copies); NIR_PASS_V(nir, nir_split_per_member_structs); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_shader_call_data | nir_var_ray_hit_attrib, NULL); NIR_PASS_V(nir, nir_propagate_invariant, false); } static void shared_var_info(const struct glsl_type* type, unsigned* size, unsigned* align) { assert(glsl_type_is_vector_or_scalar(type)); uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; unsigned length = glsl_get_vector_elements(type); *size = comp_size * length; *align = comp_size; } static void temp_var_info(const struct glsl_type* type, unsigned* size, unsigned* align) { uint32_t base_size, base_align; switch (glsl_get_base_type(type)) { case GLSL_TYPE_ARRAY: temp_var_info(glsl_get_array_element(type), &base_size, align); *size = base_size * glsl_array_size(type); break; case GLSL_TYPE_STRUCT: case GLSL_TYPE_INTERFACE: *size = 0; *align = 0; for (uint32_t i = 0; i < glsl_get_length(type); ++i) { temp_var_info(glsl_get_struct_field(type, i), &base_size, &base_align); *size = ALIGN_POT(*size, base_align) + base_size; *align = MAX2(*align, base_align); } break; default: glsl_get_natural_size_align_bytes(type, &base_size, &base_align); *align = MAX2(base_align, 4); *size = ALIGN_POT(base_size, *align); break; } } static nir_variable * add_runtime_data_var(nir_shader *nir, unsigned desc_set, unsigned binding) { unsigned runtime_data_size = nir->info.stage == MESA_SHADER_COMPUTE ? sizeof(struct dxil_spirv_compute_runtime_data) : sizeof(struct dxil_spirv_vertex_runtime_data); const struct glsl_type *array_type = glsl_array_type(glsl_uint_type(), runtime_data_size / sizeof(unsigned), sizeof(unsigned)); const struct glsl_struct_field field = {array_type, "arr"}; nir_variable *var = nir_variable_create( nir, nir_var_mem_ubo, glsl_struct_type(&field, 1, "runtime_data", false), "runtime_data"); var->data.descriptor_set = desc_set; // Check that desc_set fits on descriptor_set assert(var->data.descriptor_set == desc_set); var->data.binding = binding; var->data.how_declared = nir_var_hidden; return var; } static bool lower_shader_system_values(struct nir_builder *builder, nir_instr *instr, void *cb_data) { if (instr->type != nir_instr_type_intrinsic) { return false; } nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); /* All the intrinsics we care about are loads */ if (!nir_intrinsic_infos[intrin->intrinsic].has_dest) return false; const struct dxil_spirv_runtime_conf *conf = (const struct dxil_spirv_runtime_conf *)cb_data; int offset = 0; switch (intrin->intrinsic) { case nir_intrinsic_load_num_workgroups: offset = offsetof(struct dxil_spirv_compute_runtime_data, group_count_x); break; case nir_intrinsic_load_base_workgroup_id: offset = offsetof(struct dxil_spirv_compute_runtime_data, base_group_x); break; case nir_intrinsic_load_first_vertex: if (conf->first_vertex_and_base_instance_mode == DXIL_SPIRV_SYSVAL_TYPE_NATIVE) return false; offset = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex); break; case nir_intrinsic_load_is_indexed_draw: offset = offsetof(struct dxil_spirv_vertex_runtime_data, is_indexed_draw); break; case nir_intrinsic_load_base_instance: if (conf->first_vertex_and_base_instance_mode == DXIL_SPIRV_SYSVAL_TYPE_NATIVE) return false; offset = offsetof(struct dxil_spirv_vertex_runtime_data, base_instance); break; case nir_intrinsic_load_draw_id: offset = offsetof(struct dxil_spirv_vertex_runtime_data, draw_id); break; case nir_intrinsic_load_view_index: if (!conf->lower_view_index) return false; offset = offsetof(struct dxil_spirv_vertex_runtime_data, view_index); break; default: return false; } builder->cursor = nir_after_instr(instr); nir_address_format ubo_format = nir_address_format_32bit_index_offset; nir_def *index = nir_vulkan_resource_index( builder, nir_address_format_num_components(ubo_format), nir_address_format_bit_size(ubo_format), nir_imm_int(builder, 0), .desc_set = conf->runtime_data_cbv.register_space, .binding = conf->runtime_data_cbv.base_shader_register, .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); nir_def *load_desc = nir_load_vulkan_descriptor( builder, nir_address_format_num_components(ubo_format), nir_address_format_bit_size(ubo_format), index, .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); nir_def *load_data = nir_load_ubo( builder, intrin->def.num_components, intrin->def.bit_size, nir_channel(builder, load_desc, 0), nir_imm_int(builder, offset), .align_mul = 256, .align_offset = offset, .range_base = offset, .range = intrin->def.bit_size * intrin->def.num_components / 8); nir_def_rewrite_uses(&intrin->def, load_data); nir_instr_remove(instr); return true; } static bool dxil_spirv_nir_lower_shader_system_values(nir_shader *shader, const struct dxil_spirv_runtime_conf *conf) { return nir_shader_instructions_pass(shader, lower_shader_system_values, nir_metadata_control_flow | nir_metadata_loop_analysis, (void *)conf); } static nir_variable * add_push_constant_var(nir_shader *nir, unsigned size, unsigned desc_set, unsigned binding) { /* Size must be a multiple of 16 as buffer load is loading 16 bytes at a time */ unsigned num_32bit_words = ALIGN_POT(size, 16) / 4; const struct glsl_type *array_type = glsl_array_type(glsl_uint_type(), num_32bit_words, 4); const struct glsl_struct_field field = {array_type, "arr"}; nir_variable *var = nir_variable_create( nir, nir_var_mem_ubo, glsl_struct_type(&field, 1, "block", false), "push_constants"); var->data.descriptor_set = desc_set; var->data.binding = binding; var->data.how_declared = nir_var_hidden; return var; } struct lower_load_push_constant_data { nir_address_format ubo_format; unsigned desc_set; unsigned binding; unsigned size; }; static bool lower_load_push_constant(struct nir_builder *builder, nir_instr *instr, void *cb_data) { struct lower_load_push_constant_data *data = (struct lower_load_push_constant_data *)cb_data; if (instr->type != nir_instr_type_intrinsic) return false; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); /* All the intrinsics we care about are loads */ if (intrin->intrinsic != nir_intrinsic_load_push_constant) return false; uint32_t base = nir_intrinsic_base(intrin); uint32_t range = nir_intrinsic_range(intrin); data->size = MAX2(base + range, data->size); builder->cursor = nir_after_instr(instr); nir_address_format ubo_format = data->ubo_format; nir_def *index = nir_vulkan_resource_index( builder, nir_address_format_num_components(ubo_format), nir_address_format_bit_size(ubo_format), nir_imm_int(builder, 0), .desc_set = data->desc_set, .binding = data->binding, .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); nir_def *load_desc = nir_load_vulkan_descriptor( builder, nir_address_format_num_components(ubo_format), nir_address_format_bit_size(ubo_format), index, .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); nir_def *offset = intrin->src[0].ssa; nir_def *load_data = nir_load_ubo( builder, intrin->def.num_components, intrin->def.bit_size, nir_channel(builder, load_desc, 0), nir_iadd_imm(builder, offset, base), .align_mul = nir_intrinsic_align_mul(intrin), .align_offset = nir_intrinsic_align_offset(intrin), .range_base = base, .range = range); nir_def_replace(&intrin->def, load_data); return true; } static bool dxil_spirv_nir_lower_load_push_constant(nir_shader *shader, nir_address_format ubo_format, unsigned desc_set, unsigned binding, uint32_t *size) { bool ret; struct lower_load_push_constant_data data = { .ubo_format = ubo_format, .desc_set = desc_set, .binding = binding, }; ret = nir_shader_instructions_pass(shader, lower_load_push_constant, nir_metadata_control_flow | nir_metadata_loop_analysis, &data); *size = data.size; assert(ret == (*size > 0)); return ret; } struct lower_yz_flip_data { bool *reads_sysval_ubo; const struct dxil_spirv_runtime_conf *rt_conf; }; static bool lower_yz_flip(struct nir_builder *builder, nir_instr *instr, void *cb_data) { struct lower_yz_flip_data *data = (struct lower_yz_flip_data *)cb_data; if (instr->type != nir_instr_type_intrinsic) return false; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); if (intrin->intrinsic != nir_intrinsic_store_deref) return false; nir_variable *var = nir_intrinsic_get_var(intrin, 0); if (var->data.mode != nir_var_shader_out || var->data.location != VARYING_SLOT_POS) return false; builder->cursor = nir_before_instr(instr); const struct dxil_spirv_runtime_conf *rt_conf = data->rt_conf; nir_def *pos = intrin->src[1].ssa; nir_def *y_pos = nir_channel(builder, pos, 1); nir_def *z_pos = nir_channel(builder, pos, 2); nir_def *y_flip_mask = NULL, *z_flip_mask = NULL, *dyn_yz_flip_mask = NULL; if (rt_conf->yz_flip.mode & DXIL_SPIRV_YZ_FLIP_CONDITIONAL) { // conditional YZ-flip. The flip bitmask is passed through the vertex // runtime data UBO. unsigned offset = offsetof(struct dxil_spirv_vertex_runtime_data, yz_flip_mask); nir_address_format ubo_format = nir_address_format_32bit_index_offset; nir_def *index = nir_vulkan_resource_index( builder, nir_address_format_num_components(ubo_format), nir_address_format_bit_size(ubo_format), nir_imm_int(builder, 0), .desc_set = rt_conf->runtime_data_cbv.register_space, .binding = rt_conf->runtime_data_cbv.base_shader_register, .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); nir_def *load_desc = nir_load_vulkan_descriptor( builder, nir_address_format_num_components(ubo_format), nir_address_format_bit_size(ubo_format), index, .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); dyn_yz_flip_mask = nir_load_ubo(builder, 1, 32, nir_channel(builder, load_desc, 0), nir_imm_int(builder, offset), .align_mul = 256, .align_offset = offset, .range_base = offset, .range = 4); *data->reads_sysval_ubo = true; } if (rt_conf->yz_flip.mode & DXIL_SPIRV_Y_FLIP_UNCONDITIONAL) y_flip_mask = nir_imm_int(builder, rt_conf->yz_flip.y_mask); else if (rt_conf->yz_flip.mode & DXIL_SPIRV_Y_FLIP_CONDITIONAL) y_flip_mask = nir_iand_imm(builder, dyn_yz_flip_mask, DXIL_SPIRV_Y_FLIP_MASK); if (rt_conf->yz_flip.mode & DXIL_SPIRV_Z_FLIP_UNCONDITIONAL) z_flip_mask = nir_imm_int(builder, rt_conf->yz_flip.z_mask); else if (rt_conf->yz_flip.mode & DXIL_SPIRV_Z_FLIP_CONDITIONAL) z_flip_mask = nir_ushr_imm(builder, dyn_yz_flip_mask, DXIL_SPIRV_Z_FLIP_SHIFT); /* TODO: Multi-viewport */ if (y_flip_mask) { nir_def *flip = nir_test_mask(builder, y_flip_mask, 1); // Z-flip => pos.y = -pos.y y_pos = nir_bcsel(builder, flip, nir_fneg(builder, y_pos), y_pos); } if (z_flip_mask) { nir_def *flip = nir_test_mask(builder, z_flip_mask, 1); // Z-flip => pos.z = -pos.z + 1.0f z_pos = nir_bcsel(builder, flip, nir_fadd_imm(builder, nir_fneg(builder, z_pos), 1.0f), z_pos); } nir_def *def = nir_vec4(builder, nir_channel(builder, pos, 0), y_pos, z_pos, nir_channel(builder, pos, 3)); nir_src_rewrite(&intrin->src[1], def); return true; } bool dxil_spirv_nir_lower_yz_flip(nir_shader *shader, const struct dxil_spirv_runtime_conf *rt_conf, bool *reads_sysval_ubo) { struct lower_yz_flip_data data = { .rt_conf = rt_conf, .reads_sysval_ubo = reads_sysval_ubo, }; return nir_shader_instructions_pass(shader, lower_yz_flip, nir_metadata_control_flow | nir_metadata_loop_analysis, &data); } static bool discard_psiz_access(struct nir_builder *builder, nir_intrinsic_instr *intrin, void *cb_data) { if (intrin->intrinsic != nir_intrinsic_store_deref && intrin->intrinsic != nir_intrinsic_load_deref) return false; nir_variable *var = nir_intrinsic_get_var(intrin, 0); if (!var || var->data.mode != nir_var_shader_out || var->data.location != VARYING_SLOT_PSIZ) return false; builder->cursor = nir_before_instr(&intrin->instr); if (intrin->intrinsic == nir_intrinsic_load_deref) nir_def_rewrite_uses(&intrin->def, nir_imm_float(builder, 1.0)); nir_instr_remove(&intrin->instr); return true; } static bool dxil_spirv_nir_discard_point_size_var(nir_shader *shader) { if (shader->info.stage != MESA_SHADER_VERTEX && shader->info.stage != MESA_SHADER_TESS_EVAL && shader->info.stage != MESA_SHADER_GEOMETRY) return false; nir_variable *psiz = NULL; nir_foreach_shader_out_variable(var, shader) { if (var->data.location == VARYING_SLOT_PSIZ) { psiz = var; break; } } if (!psiz) return false; if (!nir_shader_intrinsics_pass(shader, discard_psiz_access, nir_metadata_control_flow | nir_metadata_loop_analysis, NULL)) return false; nir_remove_dead_derefs(shader); return true; } struct lower_pntc_data { const struct dxil_spirv_runtime_conf *conf; nir_variable *pntc; }; static bool write_pntc_with_pos(nir_builder *b, nir_instr *instr, void *_data) { struct lower_pntc_data *data = (struct lower_pntc_data *)_data; if (instr->type != nir_instr_type_intrinsic) return false; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); if (intr->intrinsic != nir_intrinsic_store_deref) return false; nir_variable *var = nir_intrinsic_get_var(intr, 0); if (!var || var->data.location != VARYING_SLOT_POS) return false; nir_def *pos = intr->src[1].ssa; unsigned offset = offsetof(struct dxil_spirv_vertex_runtime_data, viewport_width) - 4; static_assert(offsetof(struct dxil_spirv_vertex_runtime_data, viewport_width) % 16 == 4, "Doing vector unpacking with this assumption"); nir_address_format ubo_format = nir_address_format_32bit_index_offset; b->cursor = nir_before_instr(instr); nir_def *index = nir_vulkan_resource_index( b, nir_address_format_num_components(ubo_format), nir_address_format_bit_size(ubo_format), nir_imm_int(b, 0), .desc_set = data->conf->runtime_data_cbv.register_space, .binding = data->conf->runtime_data_cbv.base_shader_register, .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); nir_def *load_desc = nir_load_vulkan_descriptor( b, nir_address_format_num_components(ubo_format), nir_address_format_bit_size(ubo_format), index, .desc_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); nir_def *transform = nir_channels(b, nir_load_ubo(b, 4, 32, nir_channel(b, load_desc, 0), nir_imm_int(b, offset), .align_mul = 16, .range_base = offset, .range = 16), 0x6); nir_def *point_center_in_clip = nir_fmul(b, nir_trim_vector(b, pos, 2), nir_frcp(b, nir_channel(b, pos, 3))); nir_def *point_center = nir_fmul(b, nir_fadd_imm(b, nir_fmul(b, point_center_in_clip, nir_vec2(b, nir_imm_float(b, 0.5), nir_imm_float(b, -0.5f))), 0.5), transform); nir_store_var(b, data->pntc, nir_pad_vec4(b, point_center), 0xf); return true; } static void dxil_spirv_write_pntc(nir_shader *nir, const struct dxil_spirv_runtime_conf *conf) { struct lower_pntc_data data = { .conf = conf }; data.pntc = nir_variable_create(nir, nir_var_shader_out, glsl_vec4_type(), "gl_PointCoord"); data.pntc->data.location = VARYING_SLOT_PNTC; nir_shader_instructions_pass(nir, write_pntc_with_pos, nir_metadata_control_flow | nir_metadata_loop_analysis, &data); nir->info.outputs_written |= VARYING_BIT_PNTC; /* Add the runtime data var if it's not already there */ nir_binding binding = { .binding = conf->runtime_data_cbv.base_shader_register, .desc_set = conf->runtime_data_cbv.register_space, .success = true, }; nir_variable *ubo_var = nir_get_binding_variable(nir, binding); if (!ubo_var) add_runtime_data_var(nir, conf->runtime_data_cbv.register_space, conf->runtime_data_cbv.base_shader_register); } static bool lower_pntc_read(nir_builder *b, nir_intrinsic_instr *intr, void *data) { if (intr->intrinsic != nir_intrinsic_load_deref) return false; nir_variable *var = nir_intrinsic_get_var(intr, 0); if (!var || var->data.location != VARYING_SLOT_PNTC) return false; nir_def *point_center = &intr->def; nir_variable *pos_var = (nir_variable *)data; b->cursor = nir_after_instr(&intr->instr); nir_def *pos; if (var->data.sample == pos_var->data.sample) pos = nir_load_var(b, pos_var); else if (var->data.sample) pos = nir_interp_deref_at_sample(b, 4, 32, &nir_build_deref_var(b, pos_var)->def, nir_load_sample_id(b)); else pos = nir_interp_deref_at_offset(b, 4, 32, &nir_build_deref_var(b, pos_var)->def, nir_imm_zero(b, 2, 32)); nir_def *pntc = nir_fadd_imm(b, nir_fsub(b, nir_trim_vector(b, pos, 2), nir_trim_vector(b, point_center, 2)), 0.5); nir_def_rewrite_uses_after(point_center, pntc, pntc->parent_instr); return true; } static void dxil_spirv_compute_pntc(nir_shader *nir) { nir_variable *pos = nir_find_variable_with_location(nir, nir_var_shader_in, VARYING_SLOT_POS); if (!pos) { pos = nir_variable_create(nir, nir_var_shader_in, glsl_vec4_type(), "gl_FragCoord"); pos->data.location = VARYING_SLOT_POS; pos->data.sample = nir_find_variable_with_location(nir, nir_var_shader_in, VARYING_SLOT_PNTC)->data.sample; } nir_shader_intrinsics_pass(nir, lower_pntc_read, nir_metadata_control_flow | nir_metadata_loop_analysis, pos); } static bool lower_view_index_to_rt_layer_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data) { if (intr->intrinsic != nir_intrinsic_store_deref) return false; nir_variable *var = nir_intrinsic_get_var(intr, 0); if (!var || var->data.mode != nir_var_shader_out || var->data.location != VARYING_SLOT_LAYER) return false; b->cursor = nir_before_instr(&intr->instr); nir_def *layer = intr->src[1].ssa; nir_def *new_layer = nir_iadd(b, layer, nir_load_view_index(b)); nir_src_rewrite(&intr->src[1], new_layer); return true; } static bool add_layer_write(nir_builder *b, nir_instr *instr, void *data) { if (instr) { if (instr->type != nir_instr_type_intrinsic) return false; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); if (intr->intrinsic != nir_intrinsic_emit_vertex && intr->intrinsic != nir_intrinsic_emit_vertex_with_counter) return false; b->cursor = nir_before_instr(instr); } nir_variable *var = (nir_variable *)data; nir_store_var(b, var, nir_load_view_index(b), 0x1); return true; } static void lower_view_index_to_rt_layer(nir_shader *nir) { bool existing_write = nir_shader_intrinsics_pass(nir, lower_view_index_to_rt_layer_instr, nir_metadata_control_flow | nir_metadata_loop_analysis, NULL); if (existing_write) return; nir_variable *var = nir_variable_create(nir, nir_var_shader_out, glsl_uint_type(), "gl_Layer"); var->data.location = VARYING_SLOT_LAYER; var->data.interpolation = INTERP_MODE_FLAT; if (nir->info.stage == MESA_SHADER_GEOMETRY) { nir_shader_instructions_pass(nir, add_layer_write, nir_metadata_control_flow | nir_metadata_loop_analysis, var); } else { nir_function_impl *func = nir_shader_get_entrypoint(nir); nir_builder b = nir_builder_at(nir_after_impl(func)); add_layer_write(&b, NULL, var); } } void dxil_spirv_nir_link(nir_shader *nir, nir_shader *prev_stage_nir, const struct dxil_spirv_runtime_conf *conf, struct dxil_spirv_metadata *metadata) { glsl_type_singleton_init_or_ref(); metadata->requires_runtime_data = false; if (prev_stage_nir) { if (nir->info.stage == MESA_SHADER_FRAGMENT) { nir->info.clip_distance_array_size = prev_stage_nir->info.clip_distance_array_size; if (nir->info.inputs_read & VARYING_BIT_PNTC) { NIR_PASS_V(prev_stage_nir, dxil_spirv_write_pntc, conf); NIR_PASS_V(nir, dxil_spirv_compute_pntc); metadata->requires_runtime_data = true; } } NIR_PASS_V(nir, dxil_nir_kill_undefined_varyings, prev_stage_nir->info.outputs_written, prev_stage_nir->info.patch_outputs_written, NULL); NIR_PASS_V(prev_stage_nir, dxil_nir_kill_unused_outputs, nir->info.inputs_read, nir->info.patch_inputs_read, NULL); dxil_reassign_driver_locations(nir, nir_var_shader_in, prev_stage_nir->info.outputs_written, NULL); dxil_reassign_driver_locations(prev_stage_nir, nir_var_shader_out, nir->info.inputs_read, NULL); if (nir->info.stage == MESA_SHADER_TESS_EVAL) { assert(prev_stage_nir->info.stage == MESA_SHADER_TESS_CTRL); nir->info.tess.tcs_vertices_out = prev_stage_nir->info.tess.tcs_vertices_out; prev_stage_nir->info.tess = nir->info.tess; for (uint32_t i = 0; i < 2; ++i) { unsigned loc = i == 0 ? VARYING_SLOT_TESS_LEVEL_OUTER : VARYING_SLOT_TESS_LEVEL_INNER; nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_in, loc); if (!var) { var = nir_variable_create(nir, nir_var_shader_in, glsl_array_type(glsl_float_type(), i == 0 ? 4 : 2, 0), i == 0 ? "outer" : "inner"); var->data.location = loc; var->data.patch = true; var->data.compact = true; } } } } glsl_type_singleton_decref(); } static unsigned lower_bit_size_callback(const nir_instr *instr, void *data) { if (instr->type != nir_instr_type_intrinsic) return 0; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); switch (intr->intrinsic) { case nir_intrinsic_quad_swap_horizontal: case nir_intrinsic_quad_swap_vertical: case nir_intrinsic_quad_swap_diagonal: case nir_intrinsic_reduce: case nir_intrinsic_inclusive_scan: case nir_intrinsic_exclusive_scan: return intr->def.bit_size == 1 ? 32 : 0; default: return 0; } } static bool merge_ubos_and_ssbos(nir_shader *nir) { bool progress = false; nir_foreach_variable_with_modes_safe(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo) { nir_variable *other_var = NULL; nir_foreach_variable_with_modes(var2, nir, var->data.mode) { if (var->data.descriptor_set == var2->data.descriptor_set && var->data.binding == var2->data.binding) { other_var = var2; break; } } if (!other_var) continue; progress = true; /* Merge types */ if (var->type != other_var->type) { /* Pick the larger array size */ uint32_t desc_array_size = 1; if (glsl_type_is_array(var->type)) desc_array_size = glsl_get_aoa_size(var->type); if (glsl_type_is_array(other_var->type)) desc_array_size = MAX2(desc_array_size, glsl_get_aoa_size(other_var->type)); const glsl_type *struct_type = glsl_without_array(var->type); if (var->data.mode == nir_var_mem_ubo) { /* Pick the larger struct type; doesn't matter for ssbos */ uint32_t size = glsl_get_explicit_size(struct_type, false); const glsl_type *other_type = glsl_without_array(other_var->type); if (glsl_get_explicit_size(other_type, false) > size) struct_type = other_type; } var->type = glsl_array_type(struct_type, desc_array_size, 0); /* An ssbo is non-writeable if all aliased vars are non-writeable */ if (var->data.mode == nir_var_mem_ssbo) var->data.access &= ~(other_var->data.access & ACCESS_NON_WRITEABLE); exec_node_remove(&other_var->node); } } nir_shader_preserve_all_metadata(nir); return progress; } void dxil_spirv_nir_passes(nir_shader *nir, const struct dxil_spirv_runtime_conf *conf, struct dxil_spirv_metadata *metadata) { glsl_type_singleton_init_or_ref(); NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out | (nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0)); NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_shader_out); NIR_PASS_V(nir, nir_remove_dead_derefs); const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { .frag_coord = true, .point_coord = true, .front_face = true, }; NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); NIR_PASS_V(nir, nir_lower_system_values); nir_lower_compute_system_values_options compute_options = { .has_base_workgroup_id = conf->workgroup_id_mode != DXIL_SPIRV_SYSVAL_TYPE_ZERO, }; NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_options); NIR_PASS_V(nir, dxil_nir_lower_subgroup_id); NIR_PASS_V(nir, dxil_nir_lower_num_subgroups); nir_lower_subgroups_options subgroup_options = { .ballot_bit_size = 32, .ballot_components = 4, .lower_subgroup_masks = true, .lower_to_scalar = true, .lower_relative_shuffle = true, .lower_inverse_ballot = true, }; if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_COMPUTE) subgroup_options.lower_quad = true; NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options); NIR_PASS_V(nir, nir_lower_bit_size, lower_bit_size_callback, NULL); // Ensure subgroup scans on bools are gone NIR_PASS_V(nir, nir_opt_dce); NIR_PASS_V(nir, dxil_nir_lower_unsupported_subgroup_scan); // Force sample-rate shading if we're asked to. if (conf->force_sample_rate_shading) { assert(nir->info.stage == MESA_SHADER_FRAGMENT); nir->info.fs.uses_sample_shading = true; } if (conf->first_vertex_and_base_instance_mode == DXIL_SPIRV_SYSVAL_TYPE_ZERO) { // vertex_id and instance_id should have already been transformed to // base zero before spirv_to_dxil was called. Therefore, we can zero out // base/firstVertex/Instance. gl_system_value system_values[] = {SYSTEM_VALUE_FIRST_VERTEX, SYSTEM_VALUE_BASE_VERTEX, SYSTEM_VALUE_BASE_INSTANCE}; NIR_PASS_V(nir, dxil_nir_lower_system_values_to_zero, system_values, ARRAY_SIZE(system_values)); } if (conf->lower_view_index_to_rt_layer) NIR_PASS_V(nir, lower_view_index_to_rt_layer); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); metadata->needs_draw_sysvals = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FIRST_VERTEX) || BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE); NIR_PASS(metadata->requires_runtime_data, nir, dxil_spirv_nir_lower_shader_system_values, conf); if (nir->info.stage == MESA_SHADER_FRAGMENT) { NIR_PASS_V(nir, nir_lower_input_attachments, &(nir_input_attachment_options){ .use_fragcoord_sysval = false, .use_layer_id_sysval = !conf->lower_view_index, .use_view_id_for_layer = !conf->lower_view_index, }); NIR_PASS_V(nir, dxil_nir_lower_discard_and_terminate); NIR_PASS_V(nir, nir_lower_returns); NIR_PASS_V(nir, dxil_nir_lower_sample_pos); NIR_PASS_V(nir, nir_lower_fragcoord_wtrans); } NIR_PASS_V(nir, nir_opt_deref); NIR_PASS_V(nir, nir_lower_memory_model); NIR_PASS_V(nir, dxil_nir_lower_coherent_loads_and_stores); if (conf->inferred_read_only_images_as_srvs) { const nir_opt_access_options opt_access_options = { .is_vulkan = true, }; NIR_PASS_V(nir, nir_opt_access, &opt_access_options); } NIR_PASS_V(nir, dxil_spirv_nir_discard_point_size_var); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared, NULL); uint32_t push_constant_size = 0; NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const, nir_address_format_32bit_offset); NIR_PASS_V(nir, dxil_spirv_nir_lower_load_push_constant, nir_address_format_32bit_index_offset, conf->push_constant_cbv.register_space, conf->push_constant_cbv.base_shader_register, &push_constant_size); NIR_PASS_V(nir, dxil_spirv_nir_lower_buffer_device_address); NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo | nir_var_mem_ssbo, nir_address_format_32bit_index_offset); NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global, nir_address_format_32bit_index_offset_pack64); if (nir->info.shared_memory_explicit_layout) { NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info); NIR_PASS_V(nir, dxil_nir_split_unaligned_loads_stores, nir_var_mem_shared); NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); } else { NIR_PASS_V(nir, nir_split_struct_vars, nir_var_mem_shared); NIR_PASS_V(nir, dxil_nir_flatten_var_arrays, nir_var_mem_shared); NIR_PASS_V(nir, dxil_nir_lower_var_bit_size, nir_var_mem_shared, conf->shader_model_max >= SHADER_MODEL_6_2 ? 16 : 32, 64); } NIR_PASS_V(nir, dxil_nir_lower_int_cubemaps, false); NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true); NIR_PASS_V(nir, nir_lower_global_vars_to_local); NIR_PASS_V(nir, nir_split_var_copies); NIR_PASS_V(nir, nir_lower_var_copies); NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); if (conf->yz_flip.mode != DXIL_SPIRV_YZ_FLIP_NONE) { assert(nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_GEOMETRY || nir->info.stage == MESA_SHADER_TESS_EVAL); NIR_PASS_V(nir, dxil_spirv_nir_lower_yz_flip, conf, &metadata->requires_runtime_data); } if (metadata->requires_runtime_data) { add_runtime_data_var(nir, conf->runtime_data_cbv.register_space, conf->runtime_data_cbv.base_shader_register); } if (push_constant_size > 0) { add_push_constant_var(nir, push_constant_size, conf->push_constant_cbv.register_space, conf->push_constant_cbv.base_shader_register); } NIR_PASS_V(nir, nir_lower_fp16_casts, nir_lower_fp16_all & ~nir_lower_fp16_rtz); NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS_V(nir, nir_opt_dce); NIR_PASS_V(nir, dxil_nir_lower_double_math); { bool progress; do { progress = false; NIR_PASS(progress, nir, nir_copy_prop); NIR_PASS(progress, nir, nir_opt_copy_prop_vars); NIR_PASS(progress, nir, nir_opt_deref); NIR_PASS(progress, nir, nir_opt_dce); NIR_PASS(progress, nir, nir_opt_undef); NIR_PASS(progress, nir, nir_opt_constant_folding); NIR_PASS(progress, nir, nir_opt_cse); if (nir_opt_loop(nir)) { progress = true; NIR_PASS(progress, nir, nir_copy_prop); NIR_PASS(progress, nir, nir_opt_dce); } NIR_PASS(progress, nir, nir_lower_vars_to_ssa); NIR_PASS(progress, nir, nir_opt_algebraic); NIR_PASS(progress, nir, nir_opt_dead_cf); NIR_PASS(progress, nir, nir_opt_remove_phis); } while (progress); } NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp); NIR_PASS_V(nir, dxil_nir_flatten_var_arrays, nir_var_function_temp); NIR_PASS_V(nir, dxil_nir_lower_var_bit_size, nir_var_function_temp, conf->shader_model_max >= SHADER_MODEL_6_2 ? 16 : 32, 64); NIR_PASS_V(nir, nir_lower_doubles, NULL, nir->options->lower_doubles_options); if (conf->declared_read_only_images_as_srvs) NIR_PASS_V(nir, nir_lower_readonly_images_to_tex, true); nir_lower_tex_options lower_tex_options = { .lower_txp = UINT32_MAX, .lower_invalid_implicit_lod = true, .lower_tg4_offsets = true, }; NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options); NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance); const struct dxil_nir_lower_loads_stores_options loads_stores_options = { .use_16bit_ssbo = conf->shader_model_max >= SHADER_MODEL_6_2, }; NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil, &loads_stores_options); NIR_PASS_V(nir, dxil_nir_split_typed_samplers); NIR_PASS_V(nir, dxil_nir_lower_ubo_array_one_to_static); NIR_PASS_V(nir, nir_opt_dce); NIR_PASS_V(nir, nir_remove_dead_derefs); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_shader_in | nir_var_shader_out, NULL); NIR_PASS_V(nir, merge_ubos_and_ssbos); if (nir->info.stage == MESA_SHADER_FRAGMENT) { dxil_sort_ps_outputs(nir); } else { /* Dummy linking step so we get different driver_location * assigned even if there's just a single vertex shader in the * pipeline. The real linking happens in dxil_spirv_nir_link(). */ dxil_reassign_driver_locations(nir, nir_var_shader_out, 0, NULL); } if (nir->info.stage == MESA_SHADER_VERTEX) { nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) { /* spirv_to_dxil() only emits generic vertex attributes. */ assert(var->data.location >= VERT_ATTRIB_GENERIC0); var->data.driver_location = var->data.location - VERT_ATTRIB_GENERIC0; } dxil_sort_by_driver_location(nir, nir_var_shader_in); } else { dxil_reassign_driver_locations(nir, nir_var_shader_in, 0, NULL); } nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); glsl_type_singleton_decref(); }