1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 7 #ifndef AC_SHADER_UTIL_H 8 #define AC_SHADER_UTIL_H 9 10 #include "ac_binary.h" 11 #include "amd_family.h" 12 #include "compiler/nir/nir.h" 13 #include "compiler/shader_enums.h" 14 #include "util/format/u_format.h" 15 16 #include <stdbool.h> 17 #include <stdint.h> 18 19 #ifdef __cplusplus 20 extern "C" { 21 #endif 22 23 #define AC_SENDMSG_GS 2 24 #define AC_SENDMSG_GS_DONE 3 25 #define AC_SENDMSG_GS_ALLOC_REQ 9 26 27 #define AC_SENDMSG_GS_OP_NOP (0 << 4) 28 #define AC_SENDMSG_GS_OP_CUT (1 << 4) 29 #define AC_SENDMSG_GS_OP_EMIT (2 << 4) 30 #define AC_SENDMSG_GS_OP_EMIT_CUT (3 << 4) 31 32 /* An extension of gl_access_qualifier describing other aspects of memory operations 33 * for code generation. 34 */ 35 enum { 36 /* Only one of LOAD/STORE/ATOMIC can be set. */ 37 ACCESS_TYPE_LOAD = BITFIELD_BIT(27), 38 ACCESS_TYPE_STORE = BITFIELD_BIT(28), 39 ACCESS_TYPE_ATOMIC = BITFIELD_BIT(29), 40 41 /* This access is expected to use an SMEM instruction if source operands are non-divergent. 42 * Only loads can set this. 43 */ 44 ACCESS_TYPE_SMEM = BITFIELD_BIT(30), 45 46 /* Whether a store offset or size alignment is less than 4. */ 47 ACCESS_MAY_STORE_SUBDWORD = BITFIELD_BIT(31), 48 }; 49 50 /* The meaning of these enums is different between chips. They match LLVM definitions, 51 * but they can also be used by ACO. Use ac_get_hw_cache_flags to get these. 52 */ 53 enum ac_cache_flags 54 { 55 ac_glc = BITFIELD_BIT(0), 56 ac_slc = BITFIELD_BIT(1), 57 ac_dlc = BITFIELD_BIT(2), 58 ac_swizzled = BITFIELD_BIT(3), 59 }; 60 61 union ac_hw_cache_flags 62 { 63 /* NOTE: This will contain more fields in the future. */ 64 enum ac_cache_flags value; 65 }; 66 67 enum ac_image_dim 68 { 69 ac_image_1d, 70 ac_image_2d, 71 ac_image_3d, 72 ac_image_cube, // includes cube arrays 73 ac_image_1darray, 74 ac_image_2darray, 75 ac_image_2dmsaa, 76 ac_image_2darraymsaa, 77 }; 78 79 struct ac_data_format_info { 80 uint8_t element_size; 81 uint8_t num_channels; 82 uint8_t chan_byte_size; 83 uint8_t chan_format; 84 }; 85 86 enum ac_vs_input_alpha_adjust { 87 AC_ALPHA_ADJUST_NONE = 0, 88 AC_ALPHA_ADJUST_SNORM = 1, 89 AC_ALPHA_ADJUST_SSCALED = 2, 90 AC_ALPHA_ADJUST_SINT = 3, 91 }; 92 93 struct ac_vtx_format_info { 94 uint16_t dst_sel; 95 uint8_t element_size; 96 uint8_t num_channels; 97 uint8_t chan_byte_size; /* 0 for packed formats */ 98 99 /* These last three are dependent on the family. */ 100 101 uint8_t has_hw_format; 102 /* Index is number of channels minus one. Use any index for packed formats. 103 * GFX6-8 is dfmt[0:3],nfmt[4:7]. 104 */ 105 uint8_t hw_format[4]; 106 enum ac_vs_input_alpha_adjust alpha_adjust : 8; 107 }; 108 109 struct ac_spi_color_formats { 110 unsigned normal : 8; 111 unsigned alpha : 8; 112 unsigned blend : 8; 113 unsigned blend_alpha : 8; 114 }; 115 116 /* For ac_build_fetch_format. 117 * 118 * Note: FLOAT must be 0 (used for convenience of encoding in radeonsi). 119 */ 120 enum ac_fetch_format 121 { 122 AC_FETCH_FORMAT_FLOAT = 0, 123 AC_FETCH_FORMAT_FIXED, 124 AC_FETCH_FORMAT_UNORM, 125 AC_FETCH_FORMAT_SNORM, 126 AC_FETCH_FORMAT_USCALED, 127 AC_FETCH_FORMAT_SSCALED, 128 AC_FETCH_FORMAT_UINT, 129 AC_FETCH_FORMAT_SINT, 130 AC_FETCH_FORMAT_NONE, 131 }; 132 133 enum ac_descriptor_type 134 { 135 AC_DESC_IMAGE, 136 AC_DESC_FMASK, 137 AC_DESC_SAMPLER, 138 AC_DESC_BUFFER, 139 AC_DESC_PLANE_0, 140 AC_DESC_PLANE_1, 141 AC_DESC_PLANE_2, 142 }; 143 144 void ac_set_nir_options(struct radeon_info *info, bool use_llvm, 145 nir_shader_compiler_options *options); 146 147 unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask, 148 bool writes_mrt0_alpha); 149 150 unsigned ac_get_cb_shader_mask(unsigned spi_shader_col_format); 151 152 uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum amd_gfx_level gfx_level); 153 154 unsigned ac_get_tbuffer_format(enum amd_gfx_level gfx_level, unsigned dfmt, unsigned nfmt); 155 156 const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt); 157 158 const struct ac_vtx_format_info *ac_get_vtx_format_info_table(enum amd_gfx_level level, 159 enum radeon_family family); 160 161 const struct ac_vtx_format_info *ac_get_vtx_format_info(enum amd_gfx_level level, 162 enum radeon_family family, 163 enum pipe_format fmt); 164 165 unsigned ac_get_safe_fetch_size(const enum amd_gfx_level gfx_level, const struct ac_vtx_format_info* vtx_info, 166 const unsigned offset, const unsigned max_channels, const unsigned alignment, 167 const unsigned num_channels); 168 169 enum ac_image_dim ac_get_sampler_dim(enum amd_gfx_level gfx_level, enum glsl_sampler_dim dim, 170 bool is_array); 171 172 enum ac_image_dim ac_get_image_dim(enum amd_gfx_level gfx_level, enum glsl_sampler_dim sdim, 173 bool is_array); 174 175 unsigned ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config, 176 uint8_t *num_fragcoord_components); 177 178 uint16_t ac_get_ps_iter_mask(unsigned ps_iter_samples); 179 180 void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, 181 bool is_depth, bool use_rbplus, 182 struct ac_spi_color_formats *formats); 183 184 void ac_compute_late_alloc(const struct radeon_info *info, bool ngg, bool ngg_culling, 185 bool uses_scratch, unsigned *late_alloc_wave64, unsigned *cu_mask); 186 187 unsigned ac_compute_cs_workgroup_size(const uint16_t sizes[3], bool variable, unsigned max); 188 189 unsigned ac_compute_lshs_workgroup_size(enum amd_gfx_level gfx_level, gl_shader_stage stage, 190 unsigned tess_num_patches, 191 unsigned tess_patch_in_vtx, 192 unsigned tess_patch_out_vtx); 193 194 unsigned ac_compute_esgs_workgroup_size(enum amd_gfx_level gfx_level, unsigned wave_size, 195 unsigned es_verts, unsigned gs_inst_prims); 196 197 unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims, 198 unsigned max_vtx_out, unsigned prim_amp_factor); 199 200 uint32_t ac_apply_cu_en(uint32_t value, uint32_t clear_mask, unsigned value_shift, 201 const struct radeon_info *info); 202 203 void ac_get_scratch_tmpring_size(const struct radeon_info *info, 204 unsigned bytes_per_wave, unsigned *max_seen_bytes_per_wave, 205 uint32_t *tmpring_size); 206 207 unsigned 208 ac_ngg_nogs_get_pervertex_lds_size(gl_shader_stage stage, 209 unsigned shader_num_outputs, 210 bool streamout_enabled, 211 bool export_prim_id, 212 bool has_user_edgeflags, 213 bool can_cull, 214 bool uses_instance_id, 215 bool uses_primitive_id); 216 217 unsigned 218 ac_ngg_get_scratch_lds_size(gl_shader_stage stage, 219 unsigned workgroup_size, 220 unsigned wave_size, 221 bool streamout_enabled, 222 bool can_cull); 223 224 enum gl_access_qualifier ac_get_mem_access_flags(const nir_intrinsic_instr *instr); 225 226 union ac_hw_cache_flags ac_get_hw_cache_flags(const struct radeon_info *info, 227 enum gl_access_qualifier access); 228 229 unsigned ac_get_all_edge_flag_bits(void); 230 231 unsigned ac_shader_io_get_unique_index_patch(unsigned semantic); 232 233 #ifdef __cplusplus 234 } 235 #endif 236 237 #endif 238