1 /* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 #ifndef ACO_SHADER_INFO_H 28 #define ACO_SHADER_INFO_H 29 30 #include "shader_enums.h" 31 32 #ifdef __cplusplus 33 extern "C" { 34 #endif 35 36 #define ACO_MAX_SO_OUTPUTS 64 37 #define ACO_MAX_SO_BUFFERS 4 38 #define ACO_MAX_VERTEX_ATTRIBS 32 39 #define ACO_MAX_VBS 32 40 41 struct aco_vs_input_state { 42 uint32_t instance_rate_inputs; 43 uint32_t nontrivial_divisors; 44 uint32_t post_shuffle; 45 /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes 46 * using bitwise arithmetic. 47 */ 48 uint32_t alpha_adjust_lo; 49 uint32_t alpha_adjust_hi; 50 51 uint32_t divisors[ACO_MAX_VERTEX_ATTRIBS]; 52 uint8_t formats[ACO_MAX_VERTEX_ATTRIBS]; 53 }; 54 55 struct aco_vs_prolog_key { 56 struct aco_vs_input_state state; 57 unsigned num_attributes; 58 uint32_t misaligned_mask; 59 bool is_ngg; 60 gl_shader_stage next_stage; 61 }; 62 63 struct aco_ps_epilog_key { 64 uint32_t spi_shader_col_format; 65 66 /* Bitmasks, each bit represents one of the 8 MRTs. */ 67 uint8_t color_is_int8; 68 uint8_t color_is_int10; 69 uint8_t enable_mrt_output_nan_fixup; 70 }; 71 72 struct aco_vp_output_info { 73 uint8_t vs_output_param_offset[VARYING_SLOT_MAX]; 74 uint8_t clip_dist_mask; 75 uint8_t cull_dist_mask; 76 uint8_t param_exports; 77 uint8_t prim_param_exports; 78 bool writes_pointsize; 79 bool writes_layer; 80 bool writes_layer_per_primitive; 81 bool writes_viewport_index; 82 bool writes_viewport_index_per_primitive; 83 bool writes_primitive_shading_rate; 84 bool writes_primitive_shading_rate_per_primitive; 85 bool export_prim_id; 86 bool export_clip_dists; 87 }; 88 89 struct aco_stream_output { 90 uint8_t location; 91 uint8_t buffer; 92 uint16_t offset; 93 uint8_t component_mask; 94 uint8_t stream; 95 }; 96 97 struct aco_streamout_info { 98 uint16_t num_outputs; 99 struct aco_stream_output outputs[ACO_MAX_SO_OUTPUTS]; 100 uint16_t strides[ACO_MAX_SO_BUFFERS]; 101 }; 102 103 struct aco_shader_info { 104 uint8_t wave_size; 105 bool is_ngg; 106 bool has_ngg_culling; 107 bool has_ngg_early_prim_export; 108 uint32_t num_tess_patches; 109 unsigned workgroup_size; 110 struct { 111 struct aco_vp_output_info outinfo; 112 bool as_es; 113 bool as_ls; 114 bool tcs_in_out_eq; 115 uint64_t tcs_temp_only_input_mask; 116 bool use_per_attribute_vb_descs; 117 uint32_t vb_desc_usage_mask; 118 bool has_prolog; 119 bool dynamic_inputs; 120 } vs; 121 struct { 122 uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 123 uint8_t num_stream_output_components[4]; 124 uint8_t output_streams[VARYING_SLOT_VAR31 + 1]; 125 unsigned vertices_out; 126 } gs; 127 struct { 128 uint32_t num_lds_blocks; 129 } tcs; 130 struct { 131 struct aco_vp_output_info outinfo; 132 bool as_es; 133 } tes; 134 struct { 135 bool writes_z; 136 bool writes_stencil; 137 bool writes_sample_mask; 138 bool has_epilog; 139 uint32_t num_interp; 140 unsigned spi_ps_input; 141 } ps; 142 struct { 143 uint8_t subgroup_size; 144 } cs; 145 struct { 146 struct aco_vp_output_info outinfo; 147 } ms; 148 struct aco_streamout_info so; 149 150 uint32_t gfx9_gs_ring_lds_size; 151 }; 152 153 enum aco_compiler_debug_level { 154 ACO_COMPILER_DEBUG_LEVEL_PERFWARN, 155 ACO_COMPILER_DEBUG_LEVEL_ERROR, 156 }; 157 158 struct aco_stage_input { 159 uint32_t optimisations_disabled : 1; 160 uint32_t image_2d_view_of_3d : 1; 161 struct { 162 uint32_t instance_rate_inputs; 163 uint32_t instance_rate_divisors[ACO_MAX_VERTEX_ATTRIBS]; 164 uint8_t vertex_attribute_formats[ACO_MAX_VERTEX_ATTRIBS]; 165 uint32_t vertex_attribute_bindings[ACO_MAX_VERTEX_ATTRIBS]; 166 uint32_t vertex_attribute_offsets[ACO_MAX_VERTEX_ATTRIBS]; 167 uint32_t vertex_attribute_strides[ACO_MAX_VERTEX_ATTRIBS]; 168 uint8_t vertex_binding_align[ACO_MAX_VBS]; 169 } vs; 170 171 struct { 172 unsigned tess_input_vertices; 173 } tcs; 174 175 struct { 176 uint32_t col_format; 177 uint8_t num_samples; 178 179 /* Used to export alpha through MRTZ for alpha-to-coverage (GFX11+). */ 180 bool alpha_to_coverage_via_mrtz; 181 } ps; 182 }; 183 184 struct aco_compiler_options { 185 struct aco_stage_input key; 186 bool robust_buffer_access; 187 bool dump_shader; 188 bool dump_preoptir; 189 bool record_ir; 190 bool record_stats; 191 bool has_ls_vgpr_init_bug; 192 bool wgp_mode; 193 enum radeon_family family; 194 enum amd_gfx_level gfx_level; 195 uint32_t address32_hi; 196 struct { 197 void (*func)(void *private_data, enum aco_compiler_debug_level level, const char *message); 198 void *private_data; 199 } debug; 200 }; 201 202 #ifdef __cplusplus 203 } 204 #endif 205 #endif 206