1 /* 2 * Copyright © 2024 Valve Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 7 8 #ifndef AC_NIR_HELPERS_H 9 #define AC_NIR_HELPERS_H 10 11 #include "ac_hw_stage.h" 12 #include "ac_shader_args.h" 13 #include "ac_shader_util.h" 14 #include "nir.h" 15 16 #ifdef __cplusplus 17 extern "C" { 18 #endif 19 20 #define AC_NIR_STORE_IO(b, store_val, const_offset, write_mask, hi_16bit, func, ...) \ 21 do { \ 22 if ((store_val)->bit_size >= 32) { \ 23 const unsigned store_write_mask = (write_mask); \ 24 const unsigned store_const_offset = (const_offset); \ 25 func((b), (store_val), __VA_ARGS__); \ 26 } else { \ 27 u_foreach_bit(c, (write_mask)) { \ 28 const unsigned store_write_mask = 1; \ 29 const unsigned store_const_offset = (const_offset) + c * 4 + ((hi_16bit) ? 2 : 0); \ 30 nir_def *store_component = nir_channel(b, (store_val), c); \ 31 func((b), store_component, __VA_ARGS__); \ 32 } \ 33 } \ 34 } while (0) 35 36 #define AC_NIR_LOAD_IO(load, b, num_components, bit_size, hi_16bit, func, ...) \ 37 do { \ 38 const unsigned load_bit_size = MAX2(32, (bit_size)); \ 39 (load) = func((b), (num_components), load_bit_size, __VA_ARGS__); \ 40 if ((bit_size) < load_bit_size) { \ 41 if ((hi_16bit)) { \ 42 (load) = nir_unpack_32_2x16_split_y(b, load); \ 43 } else { \ 44 (load) = nir_unpack_32_2x16_split_x(b, load); \ 45 } \ 46 } \ 47 } while (0) 48 49 typedef struct 50 { 51 /* GS output stream index, 2 bit per component */ 52 uint8_t stream; 53 /* Bitmask of components used: 4 bits per slot, 1 bit per component. */ 54 uint8_t components_mask : 4; 55 /* Bitmask of components that are used as varying, 1 bit per component. */ 56 uint8_t as_varying_mask : 4; 57 /* Bitmask of components that are used as sysval, 1 bit per component. */ 58 uint8_t as_sysval_mask : 4; 59 } ac_nir_prerast_per_output_info; 60 61 typedef struct 62 { 63 nir_def *outputs[VARYING_SLOT_MAX][4]; 64 nir_def *outputs_16bit_lo[16][4]; 65 nir_def *outputs_16bit_hi[16][4]; 66 67 nir_alu_type types[VARYING_SLOT_MAX][4]; 68 nir_alu_type types_16bit_lo[16][4]; 69 nir_alu_type types_16bit_hi[16][4]; 70 71 ac_nir_prerast_per_output_info infos[VARYING_SLOT_MAX]; 72 ac_nir_prerast_per_output_info infos_16bit_lo[16]; 73 ac_nir_prerast_per_output_info infos_16bit_hi[16]; 74 } ac_nir_prerast_out; 75 76 typedef struct { 77 nir_def *num_repacked_invocations; 78 nir_def *repacked_invocation_index; 79 } ac_nir_wg_repack_result; 80 81 /* Maps I/O semantics to the actual location used by the lowering pass. */ 82 typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic); 83 84 /* Forward declaration of nir_builder so we don't have to include nir_builder.h here */ 85 struct nir_builder; 86 typedef struct nir_builder nir_builder; 87 88 struct nir_xfb_info; 89 typedef struct nir_xfb_info nir_xfb_info; 90 91 /* Executed by ac_nir_cull when the current primitive is accepted. */ 92 typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state); 93 94 nir_def * 95 ac_nir_unpack_value(nir_builder *b, nir_def *value, unsigned rshift, unsigned bitwidth); 96 97 void 98 ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_def *value, 99 unsigned component, unsigned writemask); 100 101 void 102 ac_nir_gather_prerast_store_output_info(nir_builder *b, 103 nir_intrinsic_instr *intrin, 104 ac_nir_prerast_out *out); 105 106 void 107 ac_nir_export_primitive(nir_builder *b, nir_def *prim, nir_def *row); 108 109 void 110 ac_nir_export_position(nir_builder *b, 111 enum amd_gfx_level gfx_level, 112 uint32_t clip_cull_mask, 113 bool no_param_export, 114 bool force_vrs, 115 bool done, 116 uint64_t outputs_written, 117 ac_nir_prerast_out *out, 118 nir_def *row); 119 120 void 121 ac_nir_export_parameters(nir_builder *b, 122 const uint8_t *param_offsets, 123 uint64_t outputs_written, 124 uint16_t outputs_written_16bit, 125 ac_nir_prerast_out *out); 126 127 void 128 ac_nir_store_parameters_to_attr_ring(nir_builder *b, 129 const uint8_t *param_offsets, 130 const uint64_t outputs_written, 131 const uint16_t outputs_written_16bit, 132 ac_nir_prerast_out *out, 133 nir_def *num_export_threads_in_wave); 134 135 nir_def * 136 ac_nir_calc_io_off(nir_builder *b, 137 nir_intrinsic_instr *intrin, 138 nir_def *base_stride, 139 unsigned component_stride, 140 unsigned mapped_location); 141 142 unsigned 143 ac_nir_map_io_location(unsigned location, 144 uint64_t mask, 145 ac_nir_map_io_driver_location map_io); 146 147 nir_def * 148 ac_nir_cull_primitive(nir_builder *b, 149 nir_def *initially_accepted, 150 nir_def *pos[3][4], 151 unsigned num_vertices, 152 ac_nir_cull_accepted accept_func, 153 void *state); 154 155 void 156 ac_nir_sleep(nir_builder *b, unsigned num_cycles); 157 158 nir_def * 159 ac_average_samples(nir_builder *b, nir_def **samples, unsigned num_samples); 160 161 void 162 ac_optimization_barrier_vgpr_array(const struct radeon_info *info, nir_builder *b, 163 nir_def **array, unsigned num_elements, 164 unsigned num_components); 165 166 nir_def * 167 ac_get_global_ids(nir_builder *b, unsigned num_components, unsigned bit_size); 168 169 void 170 ac_nir_emit_legacy_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info, ac_nir_prerast_out *out); 171 172 bool 173 ac_nir_gs_shader_query(nir_builder *b, 174 bool has_gen_prim_query, 175 bool has_gs_invocations_query, 176 bool has_gs_primitives_query, 177 unsigned num_vertices_per_primitive, 178 unsigned wave_size, 179 nir_def *vertex_count[4], 180 nir_def *primitive_count[4]); 181 182 nir_def * 183 ac_nir_pack_ngg_prim_exp_arg(nir_builder *b, unsigned num_vertices_per_primitives, 184 nir_def *vertex_indices[3], nir_def *is_null_prim, 185 enum amd_gfx_level gfx_level); 186 187 void 188 ac_nir_clamp_vertex_color_outputs(nir_builder *b, ac_nir_prerast_out *out); 189 190 void 191 ac_nir_ngg_alloc_vertices_and_primitives(nir_builder *b, 192 nir_def *num_vtx, 193 nir_def *num_prim, 194 bool fully_culled_workaround); 195 196 void 197 ac_nir_create_output_phis(nir_builder *b, 198 const uint64_t outputs_written, 199 const uint64_t outputs_written_16bit, 200 ac_nir_prerast_out *out); 201 202 void 203 ac_nir_ngg_build_streamout_buffer_info(nir_builder *b, 204 nir_xfb_info *info, 205 enum amd_gfx_level gfx_level, 206 bool has_xfb_prim_query, 207 bool use_gfx12_xfb_intrinsic, 208 nir_def *scratch_base, 209 nir_def *tid_in_tg, 210 nir_def *gen_prim[4], 211 nir_def *so_buffer_ret[4], 212 nir_def *buffer_offsets_ret[4], 213 nir_def *emit_prim_ret[4]); 214 215 void 216 ac_nir_ngg_build_streamout_vertex(nir_builder *b, nir_xfb_info *info, 217 unsigned stream, nir_def *so_buffer[4], 218 nir_def *buffer_offsets[4], 219 unsigned vertex_index, nir_def *vtx_lds_addr, 220 ac_nir_prerast_out *pr_out, 221 bool skip_primitive_id); 222 223 void 224 ac_nir_repack_invocations_in_workgroup(nir_builder *b, nir_def **input_bool, 225 ac_nir_wg_repack_result *results, const unsigned num_repacks, 226 nir_def *lds_addr_base, unsigned max_num_waves, 227 unsigned wave_size); 228 229 #ifdef __cplusplus 230 } 231 #endif 232 233 #endif /* AC_NIR_HELPERS_H */ 234