1 /* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 #ifndef ACO_SHADER_INFO_H 28 #define ACO_SHADER_INFO_H 29 30 #include "ac_hw_stage.h" 31 #include "ac_shader_args.h" 32 #include "amd_family.h" 33 #include "shader_enums.h" 34 35 #ifdef __cplusplus 36 extern "C" { 37 #endif 38 39 #define ACO_MAX_SO_OUTPUTS 128 40 #define ACO_MAX_SO_BUFFERS 4 41 #define ACO_MAX_VERTEX_ATTRIBS 32 42 #define ACO_MAX_VBS 32 43 44 struct aco_vs_prolog_info { 45 struct ac_arg inputs; 46 47 uint32_t instance_rate_inputs; 48 uint32_t nontrivial_divisors; 49 uint32_t zero_divisors; 50 uint32_t post_shuffle; 51 /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes 52 * using bitwise arithmetic. 53 */ 54 uint32_t alpha_adjust_lo; 55 uint32_t alpha_adjust_hi; 56 57 uint8_t formats[ACO_MAX_VERTEX_ATTRIBS]; 58 59 unsigned num_attributes; 60 uint32_t misaligned_mask; 61 bool is_ngg; 62 gl_shader_stage next_stage; 63 }; 64 65 struct aco_ps_epilog_info { 66 struct ac_arg colors[MAX_DRAW_BUFFERS]; 67 68 uint32_t spi_shader_col_format; 69 70 /* Bitmasks, each bit represents one of the 8 MRTs. */ 71 uint8_t color_is_int8; 72 uint8_t color_is_int10; 73 74 bool mrt0_is_dual_src; 75 76 bool alpha_to_coverage_via_mrtz; 77 78 /* OpenGL only */ 79 uint16_t color_types; 80 bool clamp_color; 81 bool alpha_to_one; 82 bool skip_null_export; 83 unsigned broadcast_last_cbuf; 84 enum compare_func alpha_func; 85 struct ac_arg alpha_reference; 86 struct ac_arg depth; 87 struct ac_arg stencil; 88 struct ac_arg samplemask; 89 }; 90 91 struct aco_tcs_epilog_info { 92 bool pass_tessfactors_by_reg; 93 bool tcs_out_patch_fits_subgroup; 94 enum tess_primitive_mode primitive_mode; 95 unsigned tess_offchip_ring_size; 96 bool tes_reads_tessfactors; 97 98 struct ac_arg invocation_id; 99 struct ac_arg rel_patch_id; 100 struct ac_arg tcs_out_current_patch_data_offset; 101 struct ac_arg patch_base; 102 struct ac_arg tess_lvl_in[2]; 103 struct ac_arg tess_lvl_out[4]; 104 struct ac_arg tcs_out_lds_layout; 105 struct ac_arg tcs_offchip_layout; 106 }; 107 108 struct aco_ps_prolog_info { 109 bool poly_stipple; 110 unsigned poly_stipple_buf_offset; 111 112 bool bc_optimize_for_persp; 113 bool bc_optimize_for_linear; 114 bool force_persp_sample_interp; 115 bool force_linear_sample_interp; 116 bool force_persp_center_interp; 117 bool force_linear_center_interp; 118 119 unsigned samplemask_log_ps_iter; 120 unsigned num_interp_inputs; 121 unsigned colors_read; 122 int color_interp_vgpr_index[2]; 123 int color_attr_index[2]; 124 bool color_two_side; 125 bool needs_wqm; 126 127 struct ac_arg internal_bindings; 128 }; 129 130 struct aco_shader_info { 131 enum ac_hw_stage hw_stage; 132 uint8_t wave_size; 133 bool has_ngg_culling; 134 bool has_ngg_early_prim_export; 135 bool image_2d_view_of_3d; 136 unsigned workgroup_size; 137 bool has_epilog; /* Only for TCS or PS. */ 138 bool merged_shader_compiled_separately; /* GFX9+ */ 139 struct ac_arg next_stage_pc; 140 struct { 141 bool tcs_in_out_eq; 142 uint64_t tcs_temp_only_input_mask; 143 bool has_prolog; 144 } vs; 145 struct { 146 struct ac_arg tcs_offchip_layout; 147 148 /* Vulkan only */ 149 uint32_t num_lds_blocks; 150 struct ac_arg epilog_pc; 151 uint32_t num_linked_outputs; 152 uint32_t num_linked_patch_outputs; 153 uint32_t tcs_vertices_out; 154 155 /* OpenGL only */ 156 bool pass_tessfactors_by_reg; 157 unsigned patch_stride; 158 struct ac_arg tes_offchip_addr; 159 struct ac_arg vs_state_bits; 160 } tcs; 161 struct { 162 uint32_t num_interp; 163 unsigned spi_ps_input_ena; 164 unsigned spi_ps_input_addr; 165 166 /* Vulkan only */ 167 struct ac_arg epilog_pc; 168 169 /* OpenGL only */ 170 struct ac_arg alpha_reference; 171 } ps; 172 struct { 173 bool uses_full_subgroups; 174 } cs; 175 176 uint32_t gfx9_gs_ring_lds_size; 177 178 bool is_trap_handler_shader; 179 }; 180 181 enum aco_compiler_debug_level { 182 ACO_COMPILER_DEBUG_LEVEL_PERFWARN, 183 ACO_COMPILER_DEBUG_LEVEL_ERROR, 184 }; 185 186 struct aco_compiler_options { 187 bool dump_shader; 188 bool dump_preoptir; 189 bool record_ir; 190 bool record_stats; 191 bool has_ls_vgpr_init_bug; 192 bool load_grid_size_from_user_sgpr; 193 bool optimisations_disabled; 194 uint8_t enable_mrt_output_nan_fixup; 195 bool wgp_mode; 196 bool is_opengl; 197 enum radeon_family family; 198 enum amd_gfx_level gfx_level; 199 uint32_t address32_hi; 200 struct { 201 void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message); 202 void* private_data; 203 } debug; 204 }; 205 206 enum aco_statistic { 207 aco_statistic_hash, 208 aco_statistic_instructions, 209 aco_statistic_copies, 210 aco_statistic_branches, 211 aco_statistic_latency, 212 aco_statistic_inv_throughput, 213 aco_statistic_vmem_clauses, 214 aco_statistic_smem_clauses, 215 aco_statistic_sgpr_presched, 216 aco_statistic_vgpr_presched, 217 aco_statistic_valu, 218 aco_statistic_salu, 219 aco_statistic_vmem, 220 aco_statistic_smem, 221 aco_statistic_vopd, 222 aco_num_statistics 223 }; 224 225 enum aco_symbol_id { 226 aco_symbol_invalid, 227 aco_symbol_scratch_addr_lo, 228 aco_symbol_scratch_addr_hi, 229 aco_symbol_lds_ngg_scratch_base, 230 aco_symbol_lds_ngg_gs_out_vertex_base, 231 aco_symbol_const_data_addr, 232 }; 233 234 struct aco_symbol { 235 enum aco_symbol_id id; 236 unsigned offset; 237 }; 238 239 #ifdef __cplusplus 240 } 241 #endif 242 #endif 243