• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2024 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 
8 #ifndef AC_NIR_HELPERS_H
9 #define AC_NIR_HELPERS_H
10 
11 #include "ac_hw_stage.h"
12 #include "ac_shader_args.h"
13 #include "ac_shader_util.h"
14 #include "nir.h"
15 
16 #ifdef __cplusplus
17 extern "C" {
18 #endif
19 
20 #define AC_NIR_STORE_IO(b, store_val, const_offset, write_mask, hi_16bit, func, ...) \
21    do { \
22       if ((store_val)->bit_size >= 32) { \
23          const unsigned store_write_mask = (write_mask); \
24          const unsigned store_const_offset = (const_offset); \
25          func((b), (store_val), __VA_ARGS__); \
26       } else { \
27          u_foreach_bit(c, (write_mask)) { \
28             const unsigned store_write_mask = 1; \
29             const unsigned store_const_offset = (const_offset) + c * 4 + ((hi_16bit) ? 2 : 0); \
30             nir_def *store_component = nir_channel(b, (store_val), c); \
31             func((b), store_component, __VA_ARGS__); \
32          } \
33       } \
34    } while (0)
35 
36 #define AC_NIR_LOAD_IO(load, b, num_components, bit_size, hi_16bit, func, ...) \
37    do { \
38       const unsigned load_bit_size = MAX2(32, (bit_size)); \
39       (load) = func((b), (num_components), load_bit_size, __VA_ARGS__); \
40       if ((bit_size) < load_bit_size) { \
41          if ((hi_16bit)) { \
42             (load) = nir_unpack_32_2x16_split_y(b, load); \
43          } else { \
44             (load) = nir_unpack_32_2x16_split_x(b, load); \
45          } \
46       } \
47    } while (0)
48 
49 typedef struct
50 {
51    /* GS output stream index, 2 bit per component */
52    uint8_t stream;
53    /* Bitmask of components used: 4 bits per slot, 1 bit per component. */
54    uint8_t components_mask : 4;
55    /* Bitmask of components that are used as varying, 1 bit per component. */
56    uint8_t as_varying_mask : 4;
57    /* Bitmask of components that are used as sysval, 1 bit per component. */
58    uint8_t as_sysval_mask : 4;
59 } ac_nir_prerast_per_output_info;
60 
61 typedef struct
62 {
63    nir_def *outputs[VARYING_SLOT_MAX][4];
64    nir_def *outputs_16bit_lo[16][4];
65    nir_def *outputs_16bit_hi[16][4];
66 
67    nir_alu_type types[VARYING_SLOT_MAX][4];
68    nir_alu_type types_16bit_lo[16][4];
69    nir_alu_type types_16bit_hi[16][4];
70 
71    ac_nir_prerast_per_output_info infos[VARYING_SLOT_MAX];
72    ac_nir_prerast_per_output_info infos_16bit_lo[16];
73    ac_nir_prerast_per_output_info infos_16bit_hi[16];
74 } ac_nir_prerast_out;
75 
76 typedef struct {
77    nir_def *num_repacked_invocations;
78    nir_def *repacked_invocation_index;
79 } ac_nir_wg_repack_result;
80 
81 /* Maps I/O semantics to the actual location used by the lowering pass. */
82 typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic);
83 
84 /* Forward declaration of nir_builder so we don't have to include nir_builder.h here */
85 struct nir_builder;
86 typedef struct nir_builder nir_builder;
87 
88 struct nir_xfb_info;
89 typedef struct nir_xfb_info nir_xfb_info;
90 
91 /* Executed by ac_nir_cull when the current primitive is accepted. */
92 typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state);
93 
94 nir_def *
95 ac_nir_unpack_value(nir_builder *b, nir_def *value, unsigned rshift, unsigned bitwidth);
96 
97 void
98 ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_def *value,
99                             unsigned component, unsigned writemask);
100 
101 void
102 ac_nir_gather_prerast_store_output_info(nir_builder *b,
103                                         nir_intrinsic_instr *intrin,
104                                         ac_nir_prerast_out *out);
105 
106 void
107 ac_nir_export_primitive(nir_builder *b, nir_def *prim, nir_def *row);
108 
109 void
110 ac_nir_export_position(nir_builder *b,
111                        enum amd_gfx_level gfx_level,
112                        uint32_t clip_cull_mask,
113                        bool no_param_export,
114                        bool force_vrs,
115                        bool done,
116                        uint64_t outputs_written,
117                        ac_nir_prerast_out *out,
118                        nir_def *row);
119 
120 void
121 ac_nir_export_parameters(nir_builder *b,
122                          const uint8_t *param_offsets,
123                          uint64_t outputs_written,
124                          uint16_t outputs_written_16bit,
125                          ac_nir_prerast_out *out);
126 
127 void
128 ac_nir_store_parameters_to_attr_ring(nir_builder *b,
129                                      const uint8_t *param_offsets,
130                                      const uint64_t outputs_written,
131                                      const uint16_t outputs_written_16bit,
132                                      ac_nir_prerast_out *out,
133                                      nir_def *num_export_threads_in_wave);
134 
135 nir_def *
136 ac_nir_calc_io_off(nir_builder *b,
137                              nir_intrinsic_instr *intrin,
138                              nir_def *base_stride,
139                              unsigned component_stride,
140                              unsigned mapped_location);
141 
142 unsigned
143 ac_nir_map_io_location(unsigned location,
144                        uint64_t mask,
145                        ac_nir_map_io_driver_location map_io);
146 
147 nir_def *
148 ac_nir_cull_primitive(nir_builder *b,
149                       nir_def *initially_accepted,
150                       nir_def *pos[3][4],
151                       unsigned num_vertices,
152                       ac_nir_cull_accepted accept_func,
153                       void *state);
154 
155 void
156 ac_nir_sleep(nir_builder *b, unsigned num_cycles);
157 
158 nir_def *
159 ac_average_samples(nir_builder *b, nir_def **samples, unsigned num_samples);
160 
161 void
162 ac_optimization_barrier_vgpr_array(const struct radeon_info *info, nir_builder *b,
163                                    nir_def **array, unsigned num_elements,
164                                    unsigned num_components);
165 
166 nir_def *
167 ac_get_global_ids(nir_builder *b, unsigned num_components, unsigned bit_size);
168 
169 void
170 ac_nir_emit_legacy_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info, ac_nir_prerast_out *out);
171 
172 bool
173 ac_nir_gs_shader_query(nir_builder *b,
174                        bool has_gen_prim_query,
175                        bool has_gs_invocations_query,
176                        bool has_gs_primitives_query,
177                        unsigned num_vertices_per_primitive,
178                        unsigned wave_size,
179                        nir_def *vertex_count[4],
180                        nir_def *primitive_count[4]);
181 
182 nir_def *
183 ac_nir_pack_ngg_prim_exp_arg(nir_builder *b, unsigned num_vertices_per_primitives,
184                              nir_def *vertex_indices[3], nir_def *is_null_prim,
185                              enum amd_gfx_level gfx_level);
186 
187 void
188 ac_nir_clamp_vertex_color_outputs(nir_builder *b, ac_nir_prerast_out *out);
189 
190 void
191 ac_nir_ngg_alloc_vertices_and_primitives(nir_builder *b,
192                                          nir_def *num_vtx,
193                                          nir_def *num_prim,
194                                          bool fully_culled_workaround);
195 
196 void
197 ac_nir_create_output_phis(nir_builder *b,
198                           const uint64_t outputs_written,
199                           const uint64_t outputs_written_16bit,
200                           ac_nir_prerast_out *out);
201 
202 void
203 ac_nir_ngg_build_streamout_buffer_info(nir_builder *b,
204                                        nir_xfb_info *info,
205                                        enum amd_gfx_level gfx_level,
206                                        bool has_xfb_prim_query,
207                                        bool use_gfx12_xfb_intrinsic,
208                                        nir_def *scratch_base,
209                                        nir_def *tid_in_tg,
210                                        nir_def *gen_prim[4],
211                                        nir_def *so_buffer_ret[4],
212                                        nir_def *buffer_offsets_ret[4],
213                                        nir_def *emit_prim_ret[4]);
214 
215 void
216 ac_nir_ngg_build_streamout_vertex(nir_builder *b, nir_xfb_info *info,
217                                   unsigned stream, nir_def *so_buffer[4],
218                                   nir_def *buffer_offsets[4],
219                                   unsigned vertex_index, nir_def *vtx_lds_addr,
220                                   ac_nir_prerast_out *pr_out,
221                                   bool skip_primitive_id);
222 
223 void
224 ac_nir_repack_invocations_in_workgroup(nir_builder *b, nir_def **input_bool,
225                                        ac_nir_wg_repack_result *results, const unsigned num_repacks,
226                                        nir_def *lds_addr_base, unsigned max_num_waves,
227                                        unsigned wave_size);
228 
229 #ifdef __cplusplus
230 }
231 #endif
232 
233 #endif /* AC_NIR_HELPERS_H */
234