1 /* 2 * Copyright 2024 Alyssa Rosenzweig 3 * SPDX-License-Identifier: MIT 4 */ 5 6 #pragma once 7 8 #include "agx_bo.h" 9 #include "agx_compile.h" 10 #include "agx_nir_lower_vbo.h" 11 #include "agx_pack.h" 12 #include "nir_lower_blend.h" 13 14 struct agx_linked_shader { 15 /* Mapped executable memory */ 16 struct agx_bo *bo; 17 18 /* Set if the linked SW vertex shader reads base vertex/instance. The VS 19 * prolog can read base instance even when the API VS does not, which is why 20 * this needs to be aggregated in the linker. 21 */ 22 bool uses_base_param; 23 24 /* Set if the linked shader uses txf. The epilog may even if the main shader 25 * does not, in the case of spilled render targets. 26 */ 27 bool uses_txf; 28 29 /* Coefficient register bindings */ 30 struct agx_varyings_fs cf; 31 32 /* Data structures packed for the linked program */ 33 struct agx_usc_shader_packed shader; 34 struct agx_usc_registers_packed regs; 35 struct agx_usc_fragment_properties_packed fragment_props; 36 struct agx_output_select_packed osel; 37 struct agx_fragment_control_packed fragment_control; 38 }; 39 40 void agx_fast_link(struct agx_linked_shader *linked, struct agx_device *dev, 41 bool fragment, struct agx_shader_part *main, 42 struct agx_shader_part *prolog, 43 struct agx_shader_part *epilog, unsigned nr_samples_shaded); 44 45 /* These parts of the vertex element affect the generated code */ 46 struct agx_velem_key { 47 uint32_t divisor; 48 uint16_t stride; 49 uint8_t format; 50 bool instanced; 51 }; 52 53 struct agx_vs_prolog_key { 54 struct agx_velem_key attribs[AGX_MAX_VBUFS]; 55 56 /* Bit mask of attribute components to load */ 57 BITSET_DECLARE(component_mask, AGX_MAX_ATTRIBS * 4); 58 59 /* Whether running as a hardware vertex shader (versus compute) */ 60 bool hw; 61 62 /* If !hw and the draw call is indexed, the index size */ 63 uint8_t sw_index_size_B; 64 65 /* Adjacency primitive to emulate */ 66 enum mesa_prim adjacency; 67 68 /* Robustness settings for the vertex fetch */ 69 struct agx_robustness robustness; 70 }; 71 72 struct agx_fs_prolog_key { 73 /* glSampleMask() mask */ 74 uint8_t api_sample_mask; 75 76 /* Number of cull planes requiring lowering */ 77 uint8_t cull_distance_size; 78 79 /* Need to count FRAGMENT_SHADER_INVOCATIONS */ 80 bool statistics; 81 82 /* Need to lower desktop OpenGL polygon stipple */ 83 bool polygon_stipple; 84 85 /* If we discard, whether we need to run Z/S tests */ 86 bool run_zs_tests; 87 88 /* If we emulate cull distance, the base offset for our allocated coefficient 89 * registers so we don't interfere with the main shader. 90 */ 91 unsigned cf_base; 92 }; 93 94 struct agx_blend_rt_key { 95 enum pipe_blend_func rgb_func : 3; 96 enum pipe_blendfactor rgb_src_factor : 5; 97 enum pipe_blendfactor rgb_dst_factor : 5; 98 enum pipe_blend_func alpha_func : 3; 99 enum pipe_blendfactor alpha_src_factor : 5; 100 enum pipe_blendfactor alpha_dst_factor : 5; 101 unsigned colormask : 4; 102 unsigned pad : 2; 103 }; 104 static_assert(sizeof(struct agx_blend_rt_key) == 4, "packed"); 105 106 struct agx_blend_key { 107 struct agx_blend_rt_key rt[8]; 108 uint8_t logicop_func; 109 bool alpha_to_coverage, alpha_to_one; 110 bool padding; 111 }; 112 static_assert(sizeof(struct agx_blend_key) == 36, "packed"); 113 114 struct agx_fs_epilog_link_info { 115 /* Base index of spilled render targets in the binding table */ 116 uint8_t rt_spill_base; 117 118 /* Bit mask of the bit size written to each render target. Bit i set if RT i 119 * uses 32-bit registers, else 16-bit registers. 120 */ 121 uint8_t size_32; 122 123 /* Mask of locations written by the main shader */ 124 uint8_t loc_written; 125 126 /* If set, the API fragment shader uses sample shading. This means the epilog 127 * will be invoked per-sample as well. 128 */ 129 unsigned sample_shading : 1; 130 131 /* If set, broadcast location #0 value to all render targets. This 132 * implements gl_FragColor semantics. This tells the driver to set remap 133 * appropriately. 134 */ 135 unsigned broadcast_rt0 : 1; 136 137 /* If set, force location 0's W channel to 1.0. This optimizes blending 138 * calculations in some applications. 139 */ 140 unsigned loc0_w_1 : 1; 141 142 /* If set, the API fragment shader wants to write depth/stencil respectively. 143 * This happens in the epilog for correctness when the epilog discards. 144 */ 145 unsigned write_z : 1; 146 unsigned write_s : 1; 147 148 /* Whether the fragment prolog or main fragment shader already ran tests due 149 * to early_fragment_tests. In this case, the epilog must not run tests. 150 */ 151 unsigned already_ran_zs : 1; 152 153 /* Whether the main fragment shader ran tests before discards due to 154 * early_fragment_tests. In this case, the epilog must mask the stores in 155 * software instead. 156 */ 157 bool sample_mask_after_force_early : 1; 158 159 unsigned padding : 1; 160 }; 161 static_assert(sizeof(struct agx_fs_epilog_link_info) == 4, "packed"); 162 163 struct agx_fs_epilog_key { 164 struct agx_fs_epilog_link_info link; 165 166 /* Blend state. Blending happens in the epilog. */ 167 struct agx_blend_key blend; 168 169 /* Colour attachment remapping for Vulkan. Negative values indicate that an 170 * attachment is discarded. Positive values indicate the output location we 171 * want to store at the indexed colour attachment. 172 */ 173 int8_t remap[8]; 174 175 /* Tilebuffer configuration */ 176 enum pipe_format rt_formats[8]; 177 uint8_t nr_samples; 178 bool force_small_tile; 179 }; 180 181 void agx_nir_vs_prolog(struct nir_builder *b, const void *key_); 182 void agx_nir_fs_epilog(struct nir_builder *b, const void *key_); 183 void agx_nir_fs_prolog(struct nir_builder *b, const void *key_); 184 185 bool agx_nir_lower_vs_input_to_prolog(nir_shader *s, 186 BITSET_WORD *attrib_components_read); 187 188 bool agx_nir_lower_fs_output_to_epilog(nir_shader *s, 189 struct agx_fs_epilog_link_info *out); 190 191 bool agx_nir_lower_fs_active_samples_to_register(nir_shader *s); 192