1 /* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * SPDX-License-Identifier: MIT 9 */ 10 #ifndef ACO_SHADER_INFO_H 11 #define ACO_SHADER_INFO_H 12 13 #include "ac_hw_stage.h" 14 #include "ac_shader_args.h" 15 #include "amd_family.h" 16 #include "shader_enums.h" 17 18 #ifdef __cplusplus 19 extern "C" { 20 #endif 21 22 #define ACO_MAX_SO_OUTPUTS 128 23 #define ACO_MAX_SO_BUFFERS 4 24 #define ACO_MAX_VERTEX_ATTRIBS 32 25 #define ACO_MAX_VBS 32 26 27 struct aco_vs_prolog_info { 28 struct ac_arg inputs; 29 30 uint32_t instance_rate_inputs; 31 uint32_t nontrivial_divisors; 32 uint32_t zero_divisors; 33 uint32_t post_shuffle; 34 /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes 35 * using bitwise arithmetic. 36 */ 37 uint32_t alpha_adjust_lo; 38 uint32_t alpha_adjust_hi; 39 40 uint8_t formats[ACO_MAX_VERTEX_ATTRIBS]; 41 42 unsigned num_attributes; 43 uint32_t misaligned_mask; 44 uint32_t unaligned_mask; 45 bool is_ngg; 46 gl_shader_stage next_stage; 47 }; 48 49 struct aco_ps_epilog_info { 50 struct ac_arg colors[MAX_DRAW_BUFFERS]; 51 uint8_t color_map[MAX_DRAW_BUFFERS]; 52 53 uint32_t spi_shader_col_format; 54 55 /* Bitmasks, each bit represents one of the 8 MRTs. */ 56 uint8_t color_is_int8; 57 uint8_t color_is_int10; 58 59 bool mrt0_is_dual_src; 60 61 bool alpha_to_coverage_via_mrtz; 62 bool alpha_to_one; 63 64 /* OpenGL only */ 65 uint16_t color_types; 66 bool clamp_color; 67 bool skip_null_export; 68 bool writes_all_cbufs; 69 enum compare_func alpha_func; 70 /* Depth/stencil/samplemask are always passed via VGPRs, and the epilog key can choose 71 * not to export them using these flags, which can be dynamic states. 72 */ 73 bool kill_depth; 74 bool kill_stencil; 75 bool kill_samplemask; 76 77 struct ac_arg alpha_reference; 78 struct ac_arg depth; 79 struct ac_arg stencil; 80 struct ac_arg samplemask; 81 }; 82 83 struct aco_ps_prolog_info { 84 bool poly_stipple; 85 unsigned poly_stipple_buf_offset; 86 87 bool bc_optimize_for_persp; 88 bool bc_optimize_for_linear; 89 bool force_persp_sample_interp; 90 bool force_linear_sample_interp; 91 bool force_persp_center_interp; 92 bool force_linear_center_interp; 93 94 unsigned samplemask_log_ps_iter; 95 bool get_frag_coord_from_pixel_coord; 96 bool pixel_center_integer; 97 bool force_samplemask_to_helper_invocation; 98 unsigned num_interp_inputs; 99 unsigned colors_read; 100 int color_interp_vgpr_index[2]; 101 int color_attr_index[2]; 102 bool color_two_side; 103 bool needs_wqm; 104 105 struct ac_arg internal_bindings; 106 }; 107 108 struct aco_shader_info { 109 enum ac_hw_stage hw_stage; 110 uint8_t wave_size; 111 bool schedule_ngg_pos_exports; /* Whether we should schedule position exports up or not. */ 112 bool image_2d_view_of_3d; 113 unsigned workgroup_size; 114 bool merged_shader_compiled_separately; /* GFX9+ */ 115 struct ac_arg next_stage_pc; 116 struct ac_arg epilog_pc; /* Vulkan only */ 117 struct { 118 bool tcs_in_out_eq; 119 bool any_tcs_inputs_via_lds; 120 bool has_prolog; 121 } vs; 122 struct { 123 struct ac_arg tcs_offchip_layout; 124 125 /* Vulkan only */ 126 uint32_t num_lds_blocks; 127 } tcs; 128 struct { 129 uint32_t num_inputs; 130 unsigned spi_ps_input_ena; 131 unsigned spi_ps_input_addr; 132 bool has_prolog; 133 bool has_epilog; 134 135 /* OpenGL only */ 136 struct ac_arg alpha_reference; 137 } ps; 138 struct { 139 bool uses_full_subgroups; 140 } cs; 141 142 uint32_t gfx9_gs_ring_lds_size; 143 }; 144 145 enum aco_compiler_debug_level { 146 ACO_COMPILER_DEBUG_LEVEL_ERROR, 147 }; 148 149 struct aco_compiler_options { 150 bool dump_ir; 151 bool dump_preoptir; 152 bool record_asm; 153 bool record_ir; 154 bool record_stats; 155 bool has_ls_vgpr_init_bug; 156 bool load_grid_size_from_user_sgpr; 157 bool optimisations_disabled; 158 uint8_t enable_mrt_output_nan_fixup; 159 bool wgp_mode; 160 bool is_opengl; 161 enum radeon_family family; 162 enum amd_gfx_level gfx_level; 163 uint32_t address32_hi; 164 struct { 165 void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message); 166 void* private_data; 167 } debug; 168 }; 169 170 enum aco_statistic { 171 aco_statistic_hash, 172 aco_statistic_instructions, 173 aco_statistic_copies, 174 aco_statistic_branches, 175 aco_statistic_latency, 176 aco_statistic_inv_throughput, 177 aco_statistic_vmem_clauses, 178 aco_statistic_smem_clauses, 179 aco_statistic_sgpr_presched, 180 aco_statistic_vgpr_presched, 181 aco_statistic_valu, 182 aco_statistic_salu, 183 aco_statistic_vmem, 184 aco_statistic_smem, 185 aco_statistic_vopd, 186 aco_num_statistics 187 }; 188 189 enum aco_symbol_id { 190 aco_symbol_invalid, 191 aco_symbol_scratch_addr_lo, 192 aco_symbol_scratch_addr_hi, 193 aco_symbol_lds_ngg_scratch_base, 194 aco_symbol_lds_ngg_gs_out_vertex_base, 195 aco_symbol_const_data_addr, 196 }; 197 198 struct aco_symbol { 199 enum aco_symbol_id id; 200 unsigned offset; 201 }; 202 203 #define MAX_SGPRS 108 204 #define MAX_VGPRS 256 205 #define MAX_LDS_SIZE 65536 /* 64 KiB */ 206 #define NUM_SAVED_VGPRS 2 207 208 struct aco_trap_handler_layout { 209 uint32_t saved_vgprs[NUM_SAVED_VGPRS * 64]; 210 211 uint32_t ttmp0; 212 uint32_t ttmp1; 213 214 struct { 215 uint32_t status; 216 uint32_t mode; 217 uint32_t trap_sts; 218 uint32_t hw_id1; 219 uint32_t gpr_alloc; 220 uint32_t lds_alloc; 221 uint32_t ib_sts; 222 } sq_wave_regs; 223 224 uint32_t m0; 225 uint32_t exec_lo; 226 uint32_t exec_hi; 227 uint32_t sgprs[MAX_SGPRS]; 228 uint32_t vgprs[MAX_VGPRS * 64]; 229 uint32_t lds[MAX_LDS_SIZE / 4]; 230 }; 231 232 #ifdef __cplusplus 233 } 234 #endif 235 #endif 236