• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #pragma once
25 
26 #include "compiler/nir/nir.h"
27 #include "elk_compiler.h"
28 #include "nir_builder.h"
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33 
34 int elk_type_size_vec4(const struct glsl_type *type, bool bindless);
35 int elk_type_size_dvec4(const struct glsl_type *type, bool bindless);
36 
37 static inline int
elk_type_size_scalar_bytes(const struct glsl_type * type,bool bindless)38 elk_type_size_scalar_bytes(const struct glsl_type *type, bool bindless)
39 {
40    return glsl_count_dword_slots(type, bindless) * 4;
41 }
42 
43 static inline int
elk_type_size_vec4_bytes(const struct glsl_type * type,bool bindless)44 elk_type_size_vec4_bytes(const struct glsl_type *type, bool bindless)
45 {
46    return elk_type_size_vec4(type, bindless) * 16;
47 }
48 
49 /* Flags set in the instr->pass_flags field by i965 analysis passes */
50 enum {
51    ELK_NIR_NON_BOOLEAN           = 0x0,
52 
53    /* Indicates that the given instruction's destination is a boolean
54     * value but that it needs to be resolved before it can be used.
55     * On Gen <= 5, CMP instructions return a 32-bit value where the bottom
56     * bit represents the actual true/false value of the compare and the top
57     * 31 bits are undefined.  In order to use this value, we have to do a
58     * "resolve" operation by replacing the value of the CMP with -(x & 1)
59     * to sign-extend the bottom bit to 0/~0.
60     */
61    ELK_NIR_BOOLEAN_NEEDS_RESOLVE = 0x1,
62 
63    /* Indicates that the given instruction's destination is a boolean
64     * value that has intentionally been left unresolved.  Not all boolean
65     * values need to be resolved immediately.  For instance, if we have
66     *
67     *    CMP r1 r2 r3
68     *    CMP r4 r5 r6
69     *    AND r7 r1 r4
70     *
71     * We don't have to resolve the result of the two CMP instructions
72     * immediately because the AND still does an AND of the bottom bits.
73     * Instead, we can save ourselves instructions by delaying the resolve
74     * until after the AND.  The result of the two CMP instructions is left
75     * as ELK_NIR_BOOLEAN_UNRESOLVED.
76     */
77    ELK_NIR_BOOLEAN_UNRESOLVED    = 0x2,
78 
79    /* Indicates a that the given instruction's destination is a boolean
80     * value that does not need a resolve.  For instance, if you AND two
81     * values that are ELK_NIR_BOOLEAN_NEEDS_RESOLVE then we know that both
82     * values will be 0/~0 before we get them and the result of the AND is
83     * also guaranteed to be 0/~0 and does not need a resolve.
84     */
85    ELK_NIR_BOOLEAN_NO_RESOLVE    = 0x3,
86 
87    /* A mask to mask the boolean status values off of instr->pass_flags */
88    ELK_NIR_BOOLEAN_MASK          = 0x3,
89 };
90 
91 void elk_nir_analyze_boolean_resolves(nir_shader *nir);
92 
93 struct elk_nir_compiler_opts {
94    /* Soft floating point implementation shader */
95    const nir_shader *softfp64;
96 
97    /* Whether robust image access is enabled */
98    bool robust_image_access;
99 
100    /* Input vertices for TCS stage (0 means dynamic) */
101    unsigned input_vertices;
102 };
103 
104 /* UBO surface index can come in 2 flavors :
105  *    - nir_intrinsic_resource_intel
106  *    - anything else
107  *
108  * In the first case, checking that the surface index is const requires
109  * checking resource_intel::src[1]. In any other case it's a simple
110  * nir_src_is_const().
111  *
112  * This function should only be called on src[0] of load_ubo intrinsics.
113  */
114 static inline bool
elk_nir_ubo_surface_index_is_pushable(nir_src src)115 elk_nir_ubo_surface_index_is_pushable(nir_src src)
116 {
117    nir_intrinsic_instr *intrin =
118       src.ssa->parent_instr->type == nir_instr_type_intrinsic ?
119       nir_instr_as_intrinsic(src.ssa->parent_instr) : NULL;
120 
121    if (intrin && intrin->intrinsic == nir_intrinsic_resource_intel) {
122       return (nir_intrinsic_resource_access_intel(intrin) &
123               nir_resource_intel_pushable);
124    }
125 
126    return nir_src_is_const(src);
127 }
128 
129 static inline unsigned
elk_nir_ubo_surface_index_get_push_block(nir_src src)130 elk_nir_ubo_surface_index_get_push_block(nir_src src)
131 {
132    if (nir_src_is_const(src))
133       return nir_src_as_uint(src);
134 
135    if (!elk_nir_ubo_surface_index_is_pushable(src))
136       return UINT32_MAX;
137 
138    assert(src.ssa->parent_instr->type == nir_instr_type_intrinsic);
139 
140    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src.ssa->parent_instr);
141    assert(intrin->intrinsic == nir_intrinsic_resource_intel);
142 
143    return nir_intrinsic_resource_block_intel(intrin);
144 }
145 
146 /* This helper return the binding table index of a surface access (any
147  * buffer/image/etc...). It works off the source of one of the intrinsics
148  * (load_ubo, load_ssbo, store_ssbo, load_image, store_image, etc...).
149  *
150  * If the source is constant, then this is the binding table index. If we're
151  * going through a resource_intel intel intrinsic, then we need to check
152  * src[1] of that intrinsic.
153  */
154 static inline unsigned
elk_nir_ubo_surface_index_get_bti(nir_src src)155 elk_nir_ubo_surface_index_get_bti(nir_src src)
156 {
157    if (nir_src_is_const(src))
158       return nir_src_as_uint(src);
159 
160    assert(src.ssa->parent_instr->type == nir_instr_type_intrinsic);
161 
162    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src.ssa->parent_instr);
163    if (!intrin || intrin->intrinsic != nir_intrinsic_resource_intel)
164       return UINT32_MAX;
165 
166    /* In practice we could even drop this intrinsic because the bindless
167     * access always operate from a base offset coming from a push constant, so
168     * they can never be constant.
169     */
170    if (nir_intrinsic_resource_access_intel(intrin) &
171        nir_resource_intel_bindless)
172       return UINT32_MAX;
173 
174    if (!nir_src_is_const(intrin->src[1]))
175       return UINT32_MAX;
176 
177    return nir_src_as_uint(intrin->src[1]);
178 }
179 
180 void elk_preprocess_nir(const struct elk_compiler *compiler,
181                         nir_shader *nir,
182                         const struct elk_nir_compiler_opts *opts);
183 
184 void
185 elk_nir_link_shaders(const struct elk_compiler *compiler,
186                      nir_shader *producer, nir_shader *consumer);
187 
188 bool elk_nir_lower_cs_intrinsics(nir_shader *nir,
189                                  const struct intel_device_info *devinfo,
190                                  struct elk_cs_prog_data *prog_data);
191 bool elk_nir_lower_alpha_to_coverage(nir_shader *shader,
192                                      const struct elk_wm_prog_key *key,
193                                      const struct elk_wm_prog_data *prog_data);
194 void elk_nir_lower_vs_inputs(nir_shader *nir,
195                              bool edgeflag_is_last,
196                              const uint8_t *vs_attrib_wa_flags);
197 void elk_nir_lower_vue_inputs(nir_shader *nir,
198                               const struct intel_vue_map *vue_map);
199 void elk_nir_lower_tes_inputs(nir_shader *nir, const struct intel_vue_map *vue);
200 void elk_nir_lower_fs_inputs(nir_shader *nir,
201                              const struct intel_device_info *devinfo,
202                              const struct elk_wm_prog_key *key);
203 void elk_nir_lower_vue_outputs(nir_shader *nir);
204 void elk_nir_lower_tcs_outputs(nir_shader *nir, const struct intel_vue_map *vue,
205                                enum tess_primitive_mode tes_primitive_mode);
206 void elk_nir_lower_fs_outputs(nir_shader *nir);
207 
208 bool elk_nir_lower_cmat(nir_shader *nir, unsigned subgroup_size);
209 
210 bool elk_nir_lower_shading_rate_output(nir_shader *nir);
211 
212 bool elk_nir_lower_sparse_intrinsics(nir_shader *nir);
213 
214 struct elk_nir_lower_storage_image_opts {
215    const struct intel_device_info *devinfo;
216 
217    bool lower_loads;
218    bool lower_stores;
219    bool lower_atomics;
220    bool lower_get_size;
221 };
222 
223 bool elk_nir_lower_storage_image(nir_shader *nir,
224                                  const struct elk_nir_lower_storage_image_opts *opts);
225 
226 bool elk_nir_lower_mem_access_bit_sizes(nir_shader *shader,
227                                         const struct
228                                         intel_device_info *devinfo);
229 
230 void elk_postprocess_nir(nir_shader *nir,
231                          const struct elk_compiler *compiler,
232                          bool debug_enabled,
233                          enum elk_robustness_flags robust_flags);
234 
235 bool elk_nir_apply_attribute_workarounds(nir_shader *nir,
236                                          const uint8_t *attrib_wa_flags);
237 
238 bool elk_nir_apply_trig_workarounds(nir_shader *nir);
239 
240 bool elk_nir_limit_trig_input_range_workaround(nir_shader *nir);
241 
242 void elk_nir_apply_key(nir_shader *nir,
243                        const struct elk_compiler *compiler,
244                        const struct elk_base_prog_key *key,
245                        unsigned max_subgroup_size);
246 
247 unsigned elk_nir_api_subgroup_size(const nir_shader *nir,
248                                    unsigned hw_subgroup_size);
249 
250 void elk_nir_analyze_ubo_ranges(const struct elk_compiler *compiler,
251                                 nir_shader *nir,
252                                 struct elk_ubo_range out_ranges[4]);
253 
254 void elk_nir_optimize(nir_shader *nir, bool is_scalar,
255                       const struct intel_device_info *devinfo);
256 
257 nir_shader *elk_nir_create_passthrough_tcs(void *mem_ctx,
258                                            const struct elk_compiler *compiler,
259                                            const struct elk_tcs_prog_key *key);
260 
261 #define ELK_NIR_FRAG_OUTPUT_INDEX_SHIFT 0
262 #define ELK_NIR_FRAG_OUTPUT_INDEX_MASK INTEL_MASK(0, 0)
263 #define ELK_NIR_FRAG_OUTPUT_LOCATION_SHIFT 1
264 #define ELK_NIR_FRAG_OUTPUT_LOCATION_MASK INTEL_MASK(31, 1)
265 
266 bool elk_nir_move_interpolation_to_top(nir_shader *nir);
267 nir_def *elk_nir_load_global_const(nir_builder *b,
268                                        nir_intrinsic_instr *load_uniform,
269                                        nir_def *base_addr,
270                                        unsigned off);
271 
272 const struct glsl_type *elk_nir_get_var_type(const struct nir_shader *nir,
273                                              nir_variable *var);
274 
275 void elk_nir_adjust_payload(nir_shader *shader);
276 
277 #ifdef __cplusplus
278 }
279 #endif
280