1 /* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * SPDX-License-Identifier: MIT 9 */ 10 #ifndef ACO_SHADER_INFO_H 11 #define ACO_SHADER_INFO_H 12 13 #include "ac_hw_stage.h" 14 #include "ac_shader_args.h" 15 #include "amd_family.h" 16 #include "shader_enums.h" 17 18 #ifdef __cplusplus 19 extern "C" { 20 #endif 21 22 #define ACO_MAX_SO_OUTPUTS 128 23 #define ACO_MAX_SO_BUFFERS 4 24 #define ACO_MAX_VERTEX_ATTRIBS 32 25 #define ACO_MAX_VBS 32 26 27 struct aco_vs_prolog_info { 28 struct ac_arg inputs; 29 30 uint32_t instance_rate_inputs; 31 uint32_t nontrivial_divisors; 32 uint32_t zero_divisors; 33 uint32_t post_shuffle; 34 /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes 35 * using bitwise arithmetic. 36 */ 37 uint32_t alpha_adjust_lo; 38 uint32_t alpha_adjust_hi; 39 40 uint8_t formats[ACO_MAX_VERTEX_ATTRIBS]; 41 42 unsigned num_attributes; 43 uint32_t misaligned_mask; 44 uint32_t unaligned_mask; 45 bool is_ngg; 46 gl_shader_stage next_stage; 47 }; 48 49 struct aco_ps_epilog_info { 50 struct ac_arg colors[MAX_DRAW_BUFFERS]; 51 uint8_t color_map[MAX_DRAW_BUFFERS]; 52 53 uint32_t spi_shader_col_format; 54 55 /* Bitmasks, each bit represents one of the 8 MRTs. */ 56 uint8_t color_is_int8; 57 uint8_t color_is_int10; 58 59 bool mrt0_is_dual_src; 60 61 bool alpha_to_coverage_via_mrtz; 62 bool alpha_to_one; 63 64 /* OpenGL only */ 65 uint16_t color_types; 66 bool clamp_color; 67 bool skip_null_export; 68 unsigned broadcast_last_cbuf; 69 enum compare_func alpha_func; 70 /* Depth/stencil/samplemask are always passed via VGPRs, and the epilog key can choose 71 * not to export them using these flags, which can be dynamic states. 72 */ 73 bool kill_depth; 74 bool kill_stencil; 75 bool kill_samplemask; 76 77 struct ac_arg alpha_reference; 78 struct ac_arg depth; 79 struct ac_arg stencil; 80 struct ac_arg samplemask; 81 }; 82 83 struct aco_ps_prolog_info { 84 bool poly_stipple; 85 unsigned poly_stipple_buf_offset; 86 87 bool bc_optimize_for_persp; 88 bool bc_optimize_for_linear; 89 bool force_persp_sample_interp; 90 bool force_linear_sample_interp; 91 bool force_persp_center_interp; 92 bool force_linear_center_interp; 93 94 unsigned samplemask_log_ps_iter; 95 unsigned num_interp_inputs; 96 unsigned colors_read; 97 int color_interp_vgpr_index[2]; 98 int color_attr_index[2]; 99 bool color_two_side; 100 bool needs_wqm; 101 102 struct ac_arg internal_bindings; 103 }; 104 105 struct aco_shader_info { 106 enum ac_hw_stage hw_stage; 107 uint8_t wave_size; 108 bool has_ngg_culling; 109 bool has_ngg_early_prim_export; 110 bool image_2d_view_of_3d; 111 unsigned workgroup_size; 112 bool merged_shader_compiled_separately; /* GFX9+ */ 113 struct ac_arg next_stage_pc; 114 struct ac_arg epilog_pc; /* Vulkan only */ 115 struct { 116 bool tcs_in_out_eq; 117 bool any_tcs_inputs_via_lds; 118 bool has_prolog; 119 } vs; 120 struct { 121 struct ac_arg tcs_offchip_layout; 122 123 /* Vulkan only */ 124 uint32_t num_lds_blocks; 125 } tcs; 126 struct { 127 uint32_t num_inputs; 128 unsigned spi_ps_input_ena; 129 unsigned spi_ps_input_addr; 130 bool has_prolog; 131 bool has_epilog; 132 133 /* OpenGL only */ 134 struct ac_arg alpha_reference; 135 } ps; 136 struct { 137 bool uses_full_subgroups; 138 } cs; 139 140 uint32_t gfx9_gs_ring_lds_size; 141 }; 142 143 enum aco_compiler_debug_level { 144 ACO_COMPILER_DEBUG_LEVEL_ERROR, 145 }; 146 147 struct aco_compiler_options { 148 bool dump_ir; 149 bool dump_preoptir; 150 bool record_asm; 151 bool record_ir; 152 bool record_stats; 153 bool has_ls_vgpr_init_bug; 154 bool load_grid_size_from_user_sgpr; 155 bool optimisations_disabled; 156 uint8_t enable_mrt_output_nan_fixup; 157 bool wgp_mode; 158 bool is_opengl; 159 enum radeon_family family; 160 enum amd_gfx_level gfx_level; 161 uint32_t address32_hi; 162 struct { 163 void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message); 164 void* private_data; 165 } debug; 166 }; 167 168 enum aco_statistic { 169 aco_statistic_hash, 170 aco_statistic_instructions, 171 aco_statistic_copies, 172 aco_statistic_branches, 173 aco_statistic_latency, 174 aco_statistic_inv_throughput, 175 aco_statistic_vmem_clauses, 176 aco_statistic_smem_clauses, 177 aco_statistic_sgpr_presched, 178 aco_statistic_vgpr_presched, 179 aco_statistic_valu, 180 aco_statistic_salu, 181 aco_statistic_vmem, 182 aco_statistic_smem, 183 aco_statistic_vopd, 184 aco_num_statistics 185 }; 186 187 enum aco_symbol_id { 188 aco_symbol_invalid, 189 aco_symbol_scratch_addr_lo, 190 aco_symbol_scratch_addr_hi, 191 aco_symbol_lds_ngg_scratch_base, 192 aco_symbol_lds_ngg_gs_out_vertex_base, 193 aco_symbol_const_data_addr, 194 }; 195 196 struct aco_symbol { 197 enum aco_symbol_id id; 198 unsigned offset; 199 }; 200 201 #define MAX_SGPRS 108 202 #define MAX_VGPRS 256 203 #define MAX_LDS_SIZE 65536 /* 64 KiB */ 204 #define NUM_SAVED_VGPRS 2 205 206 struct aco_trap_handler_layout { 207 uint32_t saved_vgprs[NUM_SAVED_VGPRS * 64]; 208 209 uint32_t ttmp0; 210 uint32_t ttmp1; 211 212 struct { 213 uint32_t status; 214 uint32_t mode; 215 uint32_t trap_sts; 216 uint32_t hw_id1; 217 uint32_t gpr_alloc; 218 uint32_t lds_alloc; 219 uint32_t ib_sts; 220 } sq_wave_regs; 221 222 uint32_t m0; 223 uint32_t exec_lo; 224 uint32_t exec_hi; 225 uint32_t sgprs[MAX_SGPRS]; 226 uint32_t vgprs[MAX_VGPRS * 64]; 227 uint32_t lds[MAX_LDS_SIZE / 4]; 228 }; 229 230 #ifdef __cplusplus 231 } 232 #endif 233 #endif 234