• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2020 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef __PAN_IR_H
25 #define __PAN_IR_H
26 
27 #include <stdint.h>
28 #include "compiler/nir/nir.h"
29 #include "util/hash_table.h"
30 #include "util/u_dynarray.h"
31 
32 /* Indices for named (non-XFB) varyings that are present. These are packed
33  * tightly so they correspond to a bitfield present (P) indexed by (1 <<
34  * PAN_VARY_*). This has the nice property that you can lookup the buffer index
35  * of a given special field given a shift S by:
36  *
37  *      idx = popcount(P & ((1 << S) - 1))
38  *
39  * That is... look at all of the varyings that come earlier and count them, the
40  * count is the new index since plus one. Likewise, the total number of special
41  * buffers required is simply popcount(P)
42  */
43 
44 enum pan_special_varying {
45    PAN_VARY_GENERAL = 0,
46    PAN_VARY_POSITION = 1,
47    PAN_VARY_PSIZ = 2,
48    PAN_VARY_PNTCOORD = 3,
49    PAN_VARY_FACE = 4,
50    PAN_VARY_FRAGCOORD = 5,
51 
52    /* Keep last */
53    PAN_VARY_MAX,
54 };
55 
56 /* Maximum number of attribute descriptors required for varyings. These include
57  * up to MAX_VARYING source level varyings plus a descriptor each non-GENERAL
58  * special varying */
59 #define PAN_MAX_VARYINGS (MAX_VARYING + PAN_VARY_MAX - 1)
60 
61 /* Special attribute slots for vertex builtins. Sort of arbitrary but let's be
62  * consistent with the blob so we can compare traces easier. */
63 
64 enum { PAN_VERTEX_ID = 16, PAN_INSTANCE_ID = 17, PAN_MAX_ATTRIBUTE };
65 
66 /* Architecturally, Bifrost/Valhall can address 128 FAU slots of 64-bits each.
67  * In practice, the maximum number of FAU slots is limited by implementation.
68  * All known Bifrost and Valhall devices limit to 64 FAU slots. Therefore the
69  * maximum number of 32-bit words is 128, since there are 2 words per FAU slot.
70  *
71  * Midgard can push at most 92 words, so this bound suffices. The Midgard
72  * compiler pushes less than this, as Midgard uses register-mapped uniforms
73  * instead of FAU, preventing large numbers of uniforms to be pushed for
74  * nontrivial programs.
75  */
76 #define PAN_MAX_PUSH 128
77 
78 /* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so
79  * an offset to a word must be < 2^16. There are less than 2^8 UBOs */
80 
81 struct panfrost_ubo_word {
82    uint16_t ubo;
83    uint16_t offset;
84 };
85 
86 struct panfrost_ubo_push {
87    unsigned count;
88    struct panfrost_ubo_word words[PAN_MAX_PUSH];
89 };
90 
91 /* Helper for searching the above. Note this is O(N) to the number of pushed
92  * constants, do not run in the draw call hot path */
93 
94 unsigned pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo,
95                                unsigned offs);
96 
97 struct panfrost_compile_inputs {
98    struct util_debug_callback *debug;
99 
100    unsigned gpu_id;
101    bool is_blend, is_blit;
102    struct {
103       unsigned nr_samples;
104       uint64_t bifrost_blend_desc;
105    } blend;
106    bool no_idvs;
107    bool no_ubo_to_push;
108    uint32_t view_mask;
109 
110    /* Used on Valhall.
111     *
112     * Bit mask of special desktop-only varyings (e.g VARYING_SLOT_TEX0)
113     * written by the previous stage (fragment shader) or written by this
114     * stage (vertex shader). Bits are slots from gl_varying_slot.
115     *
116     * For modern APIs (GLES or VK), this should be 0.
117     */
118    uint32_t fixed_varying_mask;
119 
120    union {
121       struct {
122          uint32_t rt_conv[8];
123       } bifrost;
124       struct {
125          /* Use LD_VAR_BUF[_IMM] instead of LD_VAR[_IMM] to load varyings. */
126          bool use_ld_var_buf;
127       } valhall;
128    };
129 };
130 
131 struct pan_shader_varying {
132    gl_varying_slot location;
133    enum pipe_format format;
134 };
135 
136 struct bifrost_shader_blend_info {
137    nir_alu_type type;
138    uint32_t return_offset;
139 
140    /* mali_bifrost_register_file_format corresponding to nir_alu_type */
141    unsigned format;
142 };
143 
144 /*
145  * Unpacked form of a v7 message preload descriptor, produced by the compiler's
146  * message preload optimization. By splitting out this struct, the compiler does
147  * not need to know about data structure packing, avoiding a dependency on
148  * GenXML.
149  */
150 struct bifrost_message_preload {
151    /* Whether to preload this message */
152    bool enabled;
153 
154    /* Varying to load from */
155    unsigned varying_index;
156 
157    /* Register type, FP32 otherwise */
158    bool fp16;
159 
160    /* Number of components, ignored if texturing */
161    unsigned num_components;
162 
163    /* If texture is set, performs a texture instruction according to
164     * texture_index, skip, and zero_lod. If texture is unset, only the
165     * varying load is performed.
166     */
167    bool texture, skip, zero_lod;
168    unsigned texture_index;
169 };
170 
171 struct bifrost_shader_info {
172    struct bifrost_shader_blend_info blend[8];
173    nir_alu_type blend_src1_type;
174    bool wait_6, wait_7;
175    struct bifrost_message_preload messages[2];
176 
177    /* Whether any flat varyings are loaded. This may disable optimizations
178     * that change the provoking vertex, since that would load incorrect
179     * values for flat varyings.
180     */
181    bool uses_flat_shading;
182 };
183 
184 struct midgard_shader_info {
185    unsigned first_tag;
186    union {
187       struct {
188          bool reads_raw_vertex_id;
189       } vs;
190    };
191 };
192 
193 struct pan_shader_info {
194    gl_shader_stage stage;
195    unsigned work_reg_count;
196    unsigned tls_size;
197    unsigned wls_size;
198 
199    /* Bit mask of preloaded registers */
200    uint64_t preload;
201 
202    union {
203       struct {
204          bool reads_frag_coord;
205          bool reads_point_coord;
206          bool reads_face;
207          bool can_discard;
208          bool writes_depth;
209          bool writes_stencil;
210          bool writes_coverage;
211          bool sidefx;
212          bool sample_shading;
213          bool early_fragment_tests;
214          bool can_early_z, can_fpk;
215          bool untyped_color_outputs;
216          BITSET_WORD outputs_read;
217          BITSET_WORD outputs_written;
218       } fs;
219 
220       struct {
221          bool writes_point_size;
222 
223          /* If the primary shader writes point size, the Valhall
224           * driver may need a variant that does not write point
225           * size. Offset to such a shader in the program binary.
226           *
227           * Zero if no such variant is required.
228           *
229           * Only used with IDVS on Valhall.
230           */
231          unsigned no_psiz_offset;
232 
233          /* Set if Index-Driven Vertex Shading is in use */
234          bool idvs;
235 
236          /* If IDVS is used, whether a varying shader is used */
237          bool secondary_enable;
238 
239          /* If a varying shader is used, the varying shader's
240           * offset in the program binary
241           */
242          unsigned secondary_offset;
243 
244          /* If IDVS is in use, number of work registers used by
245           * the varying shader
246           */
247          unsigned secondary_work_reg_count;
248 
249          /* If IDVS is in use, bit mask of preloaded registers
250           * used by the varying shader
251           */
252          uint64_t secondary_preload;
253       } vs;
254 
255       struct {
256          /* Is it legal to merge workgroups? This is true if the
257           * shader uses neither barriers nor shared memory. This
258           * requires caution: if the API allows specifying shared
259           * memory at launch time (instead of compile time), that
260           * memory will not be accounted for by the compiler.
261           *
262           * Used by the Valhall hardware.
263           */
264          bool allow_merging_workgroups;
265       } cs;
266    };
267 
268    /* Does the shader contains a barrier? or (for fragment shaders) does it
269     * require helper invocations, which demand the same ordering guarantees
270     * of the hardware? These notions are unified in the hardware, so we
271     * unify them here as well.
272     */
273    bool contains_barrier;
274    bool separable;
275    bool writes_global;
276    uint64_t outputs_written;
277 
278    /* Floating point controls that the driver should try to honour */
279    bool ftz_fp16, ftz_fp32;
280 
281    unsigned sampler_count;
282    unsigned texture_count;
283    unsigned ubo_count;
284    unsigned attributes_read_count;
285    unsigned attribute_count;
286    unsigned attributes_read;
287 
288    struct {
289       unsigned input_count;
290       struct pan_shader_varying input[PAN_MAX_VARYINGS];
291       unsigned output_count;
292       struct pan_shader_varying output[PAN_MAX_VARYINGS];
293 
294       /* Bitfield of noperspective varyings, starting at VARYING_SLOT_VAR0 */
295       uint32_t noperspective;
296    } varyings;
297 
298    /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
299     * Uniforms (Bifrost) */
300    struct panfrost_ubo_push push;
301 
302    uint32_t ubo_mask;
303 
304    /* Quirk for GPUs that does not support auto32 types. */
305    bool quirk_no_auto32;
306 
307    union {
308       struct bifrost_shader_info bifrost;
309       struct midgard_shader_info midgard;
310    };
311 };
312 
313 typedef struct pan_block {
314    /* Link to next block. Must be first for mir_get_block */
315    struct list_head link;
316 
317    /* List of instructions emitted for the current block */
318    struct list_head instructions;
319 
320    /* Index of the block in source order */
321    unsigned name;
322 
323    /* Control flow graph */
324    struct pan_block *successors[2];
325    struct set *predecessors;
326    bool unconditional_jumps;
327 
328    /* In liveness analysis, these are live masks (per-component) for
329     * indices for the block. Scalar compilers have the luxury of using
330     * simple bit fields, but for us, liveness is a vector idea. */
331    uint16_t *live_in;
332    uint16_t *live_out;
333 } pan_block;
334 
335 struct pan_instruction {
336    struct list_head link;
337 };
338 
339 #define pan_foreach_instr_in_block_rev(block, v)                               \
340    list_for_each_entry_rev(struct pan_instruction, v, &block->instructions,    \
341                            link)
342 
343 #define pan_foreach_successor(blk, v)                                          \
344    pan_block *v;                                                               \
345    pan_block **_v;                                                             \
346    for (_v = (pan_block **)&blk->successors[0], v = *_v;                       \
347         v != NULL && _v < (pan_block **)&blk->successors[2]; _v++, v = *_v)
348 
349 #define pan_foreach_predecessor(blk, v)                                        \
350    struct set_entry *_entry_##v;                                               \
351    struct pan_block *v;                                                        \
352    for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL),            \
353        v = (struct pan_block *)(_entry_##v ? _entry_##v->key : NULL);          \
354         _entry_##v != NULL;                                                    \
355         _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v),      \
356        v = (struct pan_block *)(_entry_##v ? _entry_##v->key : NULL))
357 
358 static inline pan_block *
pan_exit_block(struct list_head * blocks)359 pan_exit_block(struct list_head *blocks)
360 {
361    pan_block *last = list_last_entry(blocks, pan_block, link);
362    assert(!last->successors[0] && !last->successors[1]);
363    return last;
364 }
365 
366 typedef void (*pan_liveness_update)(uint16_t *, void *, unsigned max);
367 
368 void pan_liveness_gen(uint16_t *live, unsigned node, unsigned max,
369                       uint16_t mask);
370 void pan_liveness_kill(uint16_t *live, unsigned node, unsigned max,
371                        uint16_t mask);
372 bool pan_liveness_get(uint16_t *live, unsigned node, uint16_t max);
373 
374 void pan_compute_liveness(struct list_head *blocks, unsigned temp_count,
375                           pan_liveness_update callback);
376 
377 void pan_free_liveness(struct list_head *blocks);
378 
379 uint16_t pan_to_bytemask(unsigned bytes, unsigned mask);
380 
381 void pan_block_add_successor(pan_block *block, pan_block *successor);
382 
383 /* IR indexing */
384 #define PAN_IS_REG (1)
385 
386 /* IR printing helpers */
387 void pan_print_alu_type(nir_alu_type t, FILE *fp);
388 
389 /* NIR passes to do some backend-specific lowering */
390 
391 #define PAN_WRITEOUT_C 1
392 #define PAN_WRITEOUT_Z 2
393 #define PAN_WRITEOUT_S 4
394 #define PAN_WRITEOUT_2 8
395 
396 bool pan_nir_lower_zs_store(nir_shader *nir);
397 bool pan_nir_lower_store_component(nir_shader *shader);
398 
399 bool pan_nir_lower_vertex_id(nir_shader *shader);
400 
401 bool pan_nir_lower_image_ms(nir_shader *shader);
402 
403 bool pan_nir_lower_frag_coord_zw(nir_shader *shader);
404 bool pan_nir_lower_noperspective_vs(nir_shader *shader);
405 bool pan_nir_lower_noperspective_fs(nir_shader *shader);
406 bool pan_nir_lower_static_noperspective(nir_shader *shader,
407                                         uint32_t noperspective_varyings);
408 
409 bool pan_lower_helper_invocation(nir_shader *shader);
410 bool pan_lower_sample_pos(nir_shader *shader);
411 bool pan_lower_xfb(nir_shader *nir);
412 
413 bool pan_lower_image_index(nir_shader *shader, unsigned vs_img_attrib_offset);
414 
415 uint32_t pan_nir_collect_noperspective_varyings_fs(nir_shader *s);
416 void pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info);
417 
418 /*
419  * Helper returning the subgroup size. Generally, this is equal to the number of
420  * threads in a warp. For Midgard (including warping models), this returns 1, as
421  * subgroups are not supported.
422  */
423 static inline unsigned
pan_subgroup_size(unsigned arch)424 pan_subgroup_size(unsigned arch)
425 {
426    if (arch >= 9)
427       return 16;
428    else if (arch >= 7)
429       return 8;
430    else if (arch >= 6)
431       return 4;
432    else
433       return 1;
434 }
435 
436 /*
437  * Helper extracting the table from a given handle of Valhall descriptor model.
438  */
439 static inline unsigned
pan_res_handle_get_table(unsigned handle)440 pan_res_handle_get_table(unsigned handle)
441 {
442    unsigned table = handle >> 24;
443 
444    assert(table < 64);
445    return table;
446 }
447 
448 /*
449  * Helper returning the index from a given handle of Valhall descriptor model.
450  */
451 static inline unsigned
pan_res_handle_get_index(unsigned handle)452 pan_res_handle_get_index(unsigned handle)
453 {
454    return handle & BITFIELD_MASK(24);
455 }
456 
457 /*
458  * Helper creating an handle for Valhall descriptor model.
459  */
460 static inline unsigned
pan_res_handle(unsigned table,unsigned index)461 pan_res_handle(unsigned table, unsigned index)
462 {
463    assert(table < 64);
464    assert(index < (1u << 24));
465 
466    return (table << 24) | index;
467 }
468 
469 #endif
470