1 /*
2  * © Copyright 2018 Alyssa Rosenzweig
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  */
24 
25 #ifndef __BUILDER_H__
26 #define __BUILDER_H__
27 
28 #define _LARGEFILE64_SOURCE 1
29 #include <assert.h>
30 #include <sys/mman.h>
31 #include "pan_afbc_cso.h"
32 #include "pan_blend_cso.h"
33 #include "pan_earlyzs.h"
34 #include "pan_encoder.h"
35 #include "pan_job.h"
36 #include "pan_resource.h"
37 #include "pan_texture.h"
38 
39 #include "pipe/p_context.h"
40 #include "pipe/p_defines.h"
41 #include "pipe/p_screen.h"
42 #include "pipe/p_state.h"
43 #include "util/compiler.h"
44 #include "util/detect.h"
45 #include "util/format/u_formats.h"
46 #include "util/hash_table.h"
47 #include "util/simple_mtx.h"
48 #include "util/u_blitter.h"
49 
50 #include "compiler/shader_enums.h"
51 #include "midgard/midgard_compile.h"
52 
53 #include "pan_csf.h"
54 
55 #define SET_BIT(lval, bit, cond)                                               \
56    if (cond)                                                                   \
57       lval |= (bit);                                                           \
58    else                                                                        \
59       lval &= ~(bit);
60 
61 /* Dirty tracking flags. 3D is for general 3D state. Shader flags are
62  * per-stage. Renderer refers to Renderer State Descriptors. Vertex refers to
63  * vertex attributes/elements. */
64 
65 enum pan_dirty_3d {
66    PAN_DIRTY_VIEWPORT = BITFIELD_BIT(0),
67    PAN_DIRTY_SCISSOR = BITFIELD_BIT(1),
68    PAN_DIRTY_VERTEX = BITFIELD_BIT(2),
69    PAN_DIRTY_PARAMS = BITFIELD_BIT(3),
70    PAN_DIRTY_DRAWID = BITFIELD_BIT(4),
71    PAN_DIRTY_TLS_SIZE = BITFIELD_BIT(5),
72    PAN_DIRTY_ZS = BITFIELD_BIT(6),
73    PAN_DIRTY_BLEND = BITFIELD_BIT(7),
74    PAN_DIRTY_MSAA = BITFIELD_BIT(8),
75    PAN_DIRTY_OQ = BITFIELD_BIT(9),
76    PAN_DIRTY_RASTERIZER = BITFIELD_BIT(10),
77    PAN_DIRTY_POINTS = BITFIELD_BIT(11),
78    PAN_DIRTY_SO = BITFIELD_BIT(12),
79 };
80 
81 enum pan_dirty_shader {
82    PAN_DIRTY_STAGE_SHADER = BITFIELD_BIT(0),
83    PAN_DIRTY_STAGE_TEXTURE = BITFIELD_BIT(1),
84    PAN_DIRTY_STAGE_SAMPLER = BITFIELD_BIT(2),
85    PAN_DIRTY_STAGE_IMAGE = BITFIELD_BIT(3),
86    PAN_DIRTY_STAGE_CONST = BITFIELD_BIT(4),
87    PAN_DIRTY_STAGE_SSBO = BITFIELD_BIT(5),
88 };
89 
90 struct panfrost_constant_buffer {
91    struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
92    uint32_t enabled_mask;
93 };
94 
95 struct panfrost_query {
96    /* Passthrough from Gallium */
97    unsigned type;
98    unsigned index;
99 
100    /* For computed queries. 64-bit to prevent overflow */
101    struct {
102       uint64_t start;
103       uint64_t end;
104    };
105 
106    /* Memory for the GPU to writeback the value of the query */
107    struct pipe_resource *rsrc;
108 
109    /* Whether an occlusion query is for a MSAA framebuffer */
110    bool msaa;
111 };
112 
113 struct panfrost_streamout_target {
114    struct pipe_stream_output_target base;
115    uint32_t offset;
116 };
117 
118 struct panfrost_streamout {
119    struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
120    unsigned num_targets;
121 };
122 
123 struct panfrost_context {
124    /* Gallium context */
125    struct pipe_context base;
126 
127    /* Context flags */
128    unsigned flags;
129 
130    /* Dirty global state */
131    enum pan_dirty_3d dirty;
132 
133    /* Per shader stage dirty state */
134    enum pan_dirty_shader dirty_shader[PIPE_SHADER_TYPES];
135 
136    /* Unowned pools, so manage yourself. */
137    struct panfrost_pool descs, shaders;
138 
139    /* Sync obj used to keep track of in-flight jobs. */
140    uint32_t syncobj;
141 
142    /* Set of 32 batches. When the set is full, the LRU entry (the batch
143     * with the smallest seqnum) is flushed to free a slot.
144     */
145    struct {
146       uint64_t seqnum;
147       struct panfrost_batch slots[PAN_MAX_BATCHES];
148 
149       /** Set of active batches for faster traversal */
150       BITSET_DECLARE(active, PAN_MAX_BATCHES);
151    } batches;
152 
153    /* Map from resources to panfrost_batches */
154    struct hash_table *writers;
155 
156    /* Bound job batch */
157    struct panfrost_batch *batch;
158 
159    /* Within a launch_grid call.. */
160    const struct pipe_grid_info *compute_grid;
161 
162    struct pipe_framebuffer_state pipe_framebuffer;
163    struct panfrost_streamout streamout;
164 
165    bool active_queries;
166    uint64_t prims_generated;
167    uint64_t tf_prims_generated;
168    uint64_t draw_calls;
169    struct panfrost_query *occlusion_query;
170 
171    unsigned drawid;
172    unsigned vertex_count;
173    unsigned instance_count;
174    unsigned offset_start;
175    unsigned base_vertex;
176    unsigned base_instance;
177    enum mesa_prim active_prim;
178 
179    /* If instancing is enabled, vertex count padded for instance; if
180     * it is disabled, just equal to plain vertex count */
181    unsigned padded_count;
182 
183    struct panfrost_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
184    struct panfrost_rasterizer *rasterizer;
185    struct panfrost_vertex_state *vertex;
186 
187    struct panfrost_uncompiled_shader *uncompiled[PIPE_SHADER_TYPES];
188    struct panfrost_compiled_shader *prog[PIPE_SHADER_TYPES];
189 
190    struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
191    uint32_t vb_mask;
192 
193    /* Bound CL global buffers */
194    struct util_dynarray global_buffers;
195 
196    struct pipe_shader_buffer ssbo[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
197    uint32_t ssbo_mask[PIPE_SHADER_TYPES];
198 
199    struct pipe_image_view images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
200    uint32_t image_mask[PIPE_SHADER_TYPES];
201 
202    struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
203    unsigned sampler_count[PIPE_SHADER_TYPES];
204    uint32_t valid_samplers[PIPE_SHADER_TYPES];
205 
206    struct panfrost_sampler_view
207       *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
208    unsigned sampler_view_count[PIPE_SHADER_TYPES];
209 
210    struct blitter_context *blitter;
211 
212    struct pan_afbc_shaders afbc_shaders;
213 
214    struct panfrost_blend_state *blend;
215 
216    /* On Valhall, does the current blend state use a blend shader for any
217     * output? We need this information in a hot path to decide if
218     * per-sample shading should be enabled.
219     */
220    bool valhall_has_blend_shader;
221 
222    struct pipe_viewport_state pipe_viewport;
223    struct pipe_scissor_state scissor;
224    struct pipe_blend_color blend_color;
225    struct panfrost_zsa_state *depth_stencil;
226    struct pipe_stencil_ref stencil_ref;
227    uint16_t sample_mask;
228    unsigned min_samples;
229 
230    struct panfrost_query *cond_query;
231    bool cond_cond;
232    enum pipe_render_cond_flag cond_mode;
233 
234    bool is_noop;
235 
236    /* Mask of active render targets */
237    uint8_t fb_rt_mask;
238 
239    int in_sync_fd;
240    uint32_t in_sync_obj;
241 
242    union {
243       struct panfrost_csf_context csf;
244    };
245 };
246 
247 /* Corresponds to the CSO */
248 
249 struct panfrost_rasterizer;
250 
251 /* Linked varyings */
252 struct pan_linkage {
253    /* If the upload is owned by the CSO instead
254     * of the pool, the referenced BO. Else,
255     * NULL. */
256    struct panfrost_bo *bo;
257 
258    /* Uploaded attribute descriptors */
259    uint64_t producer, consumer;
260 
261    /* Varyings buffers required */
262    uint32_t present;
263 
264    /* Per-vertex stride for general varying buffer */
265    uint32_t stride;
266 };
267 
268 /* System value infrastructure */
269 #define MAX_SYSVAL_COUNT 32
270 
271 /* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
272  * their class for equal comparison */
273 
274 #define PAN_SYSVAL(type, no)    (((no) << 16) | PAN_SYSVAL_##type)
275 #define PAN_SYSVAL_TYPE(sysval) ((sysval)&0xffff)
276 #define PAN_SYSVAL_ID(sysval)   ((sysval) >> 16)
277 
278 /* Define some common types. We start at one for easy indexing of hash
279  * tables internal to the compiler */
280 
281 enum {
282    PAN_SYSVAL_VIEWPORT_SCALE = 1,
283    PAN_SYSVAL_VIEWPORT_OFFSET = 2,
284    PAN_SYSVAL_TEXTURE_SIZE = 3,
285    PAN_SYSVAL_SSBO = 4,
286    PAN_SYSVAL_NUM_WORK_GROUPS = 5,
287    PAN_SYSVAL_SAMPLER = 7,
288    PAN_SYSVAL_LOCAL_GROUP_SIZE = 8,
289    PAN_SYSVAL_WORK_DIM = 9,
290    PAN_SYSVAL_IMAGE_SIZE = 10,
291    PAN_SYSVAL_SAMPLE_POSITIONS = 11,
292    PAN_SYSVAL_MULTISAMPLED = 12,
293    PAN_SYSVAL_RT_CONVERSION = 13,
294    PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14,
295    PAN_SYSVAL_DRAWID = 15,
296    PAN_SYSVAL_BLEND_CONSTANTS = 16,
297    PAN_SYSVAL_XFB = 17,
298    PAN_SYSVAL_NUM_VERTICES = 18,
299 };
300 
301 #define PAN_TXS_SYSVAL_ID(texidx, dim, is_array)                               \
302    ((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))
303 
304 #define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id)  ((id)&0x7f)
305 #define PAN_SYSVAL_ID_TO_TXS_DIM(id)      (((id) >> 7) & 0x3)
306 #define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id) !!((id) & (1 << 9))
307 
308 struct panfrost_sysvals {
309    /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
310    unsigned sysvals[MAX_SYSVAL_COUNT];
311    unsigned sysval_count;
312 };
313 
314 /* On Valhall, the driver gives the hardware a table of resource tables.
315  * Resources are addressed as the index of the table together with the index of
316  * the resource within the table. For simplicity, we put one type of resource
317  * in each table and fix the numbering of the tables.
318  *
319  * This numbering is arbitrary.
320  */
321 enum panfrost_resource_table {
322    PAN_TABLE_UBO = 0,
323    PAN_TABLE_ATTRIBUTE,
324    PAN_TABLE_ATTRIBUTE_BUFFER,
325    PAN_TABLE_SAMPLER,
326    PAN_TABLE_TEXTURE,
327    PAN_TABLE_IMAGE,
328    PAN_TABLE_SSBO,
329 
330    PAN_NUM_RESOURCE_TABLES
331 };
332 
333 #define RSD_WORDS 16
334 
335 /* Variants bundle together to form the backing CSO, bundling multiple
336  * shaders with varying emulated features baked in
337  */
338 struct panfrost_fs_key {
339    /* Number of colour buffers if gl_FragColor is written */
340    unsigned nr_cbufs_for_fragcolor;
341 
342    /* On Valhall, fixed_varying_mask of the linked vertex shader */
343    uint32_t fixed_varying_mask;
344 
345    /* Midgard shaders that read the tilebuffer must be keyed for
346     * non-blendable formats
347     */
348    enum pipe_format rt_formats[8];
349 
350    /* From rasterize state, to lower point sprites */
351    uint16_t sprite_coord_enable;
352 
353    /* User clip plane lowering */
354    uint8_t clip_plane_enable;
355 
356    bool line_smooth;
357 };
358 
359 struct panfrost_vs_key {
360    /* We have a special "transform feedback" vertex program derived from a
361     * vertex shader. If is_xfb is set on a vertex shader, this is a transform
362     * feedback shader, else it is a regular vertex shader. */
363    bool is_xfb;
364 
365    /* Bit mask of varyings in the linked FS that use noperspective
366     * interpolation, starting at VARYING_SLOT_VAR0 */
367    uint32_t noperspective_varyings;
368 };
369 
370 struct panfrost_shader_key {
371    union {
372       struct panfrost_vs_key vs;
373       struct panfrost_fs_key fs;
374    };
375 };
376 
377 struct panfrost_compiled_shader {
378    /* Respectively, shader binary and Renderer State Descriptor */
379    struct panfrost_pool_ref bin, state;
380 
381    /* For fragment shaders, a prepared (but not uploaded RSD) */
382    uint32_t partial_rsd[RSD_WORDS];
383 
384    struct pan_shader_info info;
385    struct panfrost_sysvals sysvals;
386 
387    struct pan_earlyzs_lut earlyzs;
388 
389    /* Linked varyings, for non-separable programs */
390    struct pan_linkage linkage;
391 
392    struct pipe_stream_output_info stream_output;
393 
394    struct panfrost_shader_key key;
395 
396    /* Mask of state that dirties the sysvals */
397    unsigned dirty_3d, dirty_shader;
398 };
399 
400 /* Shader CSO */
401 struct panfrost_uncompiled_shader {
402    /* NIR for the shader. For graphics, this will be non-NULL even for
403     * TGSI. For compute, this will be NULL after the shader is compiled,
404     * as we don't need any compute variants.
405     */
406    const nir_shader *nir;
407 
408    /* A SHA1 of the serialized NIR for the disk cache. */
409    unsigned char nir_sha1[20];
410 
411    /* Stream output information */
412    struct pipe_stream_output_info stream_output;
413 
414    /** Lock for the variants array */
415    simple_mtx_t lock;
416 
417    /* Array of panfrost_compiled_shader */
418    struct util_dynarray variants;
419 
420    /* Compiled transform feedback program, if one is required */
421    struct panfrost_compiled_shader *xfb;
422 
423    /* On vertex shaders, bit mask of special desktop-only varyings to link
424     * with the fragment shader. Used on Valhall to implement separable
425     * shaders for desktop GL.
426     */
427    uint32_t fixed_varying_mask;
428 
429    /* On fragments shaders, bit mask of varyings using noprespective
430     * interpolation, starting at VARYING_SLOT_VAR0 */
431    uint32_t noperspective_varyings;
432 
433    /* If gl_FragColor was lowered, we need to optimize the stores later */
434    bool fragcolor_lowered;
435 };
436 
437 /* The binary artefacts of compiling a shader. This differs from
438  * panfrost_compiled_shader, which adds extra metadata beyond compiling but
439  * throws away information not needed after the initial compile.
440  *
441  * This structure is serialized for the shader disk cache.
442  */
443 struct panfrost_shader_binary {
444    /* Collected information about the compiled shader */
445    struct pan_shader_info info;
446    struct panfrost_sysvals sysvals;
447 
448    /* The binary itself */
449    struct util_dynarray binary;
450 };
451 
452 void
453 panfrost_disk_cache_store(struct disk_cache *cache,
454                           const struct panfrost_uncompiled_shader *uncompiled,
455                           const struct panfrost_shader_key *key,
456                           const struct panfrost_shader_binary *binary);
457 
458 bool panfrost_disk_cache_retrieve(
459    struct disk_cache *cache,
460    const struct panfrost_uncompiled_shader *uncompiled,
461    const struct panfrost_shader_key *key,
462    struct panfrost_shader_binary *binary);
463 
464 void panfrost_disk_cache_init(struct panfrost_screen *screen);
465 
466 bool panfrost_nir_remove_fragcolor_stores(nir_shader *s, unsigned nr_cbufs);
467 
468 bool panfrost_nir_lower_sysvals(nir_shader *s, unsigned arch,
469                                 struct panfrost_sysvals *sysvals);
470 
471 bool panfrost_nir_lower_res_indices(nir_shader *shader,
472                                     struct panfrost_compile_inputs *inputs);
473 
474 /** (Vertex buffer index, divisor) tuple that will become an Attribute Buffer
475  * Descriptor at draw-time on Midgard
476  */
477 struct pan_vertex_buffer {
478    unsigned vbi;
479    unsigned divisor;
480 };
481 
482 unsigned pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
483                                   unsigned *nr_bufs, unsigned vbi,
484                                   unsigned divisor);
485 
486 struct panfrost_zsa_state;
487 struct panfrost_sampler_state;
488 struct panfrost_sampler_view;
489 struct panfrost_vertex_state;
490 
491 static inline struct panfrost_context *
pan_context(struct pipe_context * pcontext)492 pan_context(struct pipe_context *pcontext)
493 {
494    return (struct panfrost_context *)pcontext;
495 }
496 
497 static inline struct panfrost_streamout_target *
pan_so_target(struct pipe_stream_output_target * target)498 pan_so_target(struct pipe_stream_output_target *target)
499 {
500    return (struct panfrost_streamout_target *)target;
501 }
502 
503 struct pipe_context *panfrost_create_context(struct pipe_screen *screen,
504                                              void *priv, unsigned flags);
505 
506 bool panfrost_writes_point_size(struct panfrost_context *ctx);
507 
508 struct panfrost_ptr panfrost_vertex_tiler_job(struct panfrost_context *ctx,
509                                               bool is_tiler);
510 
511 void panfrost_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence,
512                     unsigned flags);
513 
514 bool panfrost_render_condition_check(struct panfrost_context *ctx);
515 
516 void panfrost_update_shader_variant(struct panfrost_context *ctx,
517                                     enum pipe_shader_type type);
518 
519 void panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss);
520 
521 uint64_t
522 panfrost_get_index_buffer(struct panfrost_batch *batch,
523                           const struct pipe_draw_info *info,
524                           const struct pipe_draw_start_count_bias *draw);
525 
526 uint64_t
527 panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
528                                   const struct pipe_draw_info *info,
529                                   const struct pipe_draw_start_count_bias *draw,
530                                   unsigned *min_index, unsigned *max_index);
531 
532 /* Instancing */
533 
534 uint64_t panfrost_vertex_buffer_address(struct panfrost_context *ctx,
535                                         unsigned i);
536 
537 void panfrost_shader_context_init(struct pipe_context *pctx);
538 
539 static inline void
panfrost_dirty_state_all(struct panfrost_context * ctx)540 panfrost_dirty_state_all(struct panfrost_context *ctx)
541 {
542    ctx->dirty = ~0;
543 
544    for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
545       ctx->dirty_shader[i] = ~0;
546 }
547 
548 static inline void
panfrost_clean_state_3d(struct panfrost_context * ctx)549 panfrost_clean_state_3d(struct panfrost_context *ctx)
550 {
551    ctx->dirty = 0;
552 
553    for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
554       if (i != PIPE_SHADER_COMPUTE)
555          ctx->dirty_shader[i] = 0;
556    }
557 }
558 
559 void panfrost_set_batch_masks_blend(struct panfrost_batch *batch);
560 
561 void panfrost_set_batch_masks_zs(struct panfrost_batch *batch);
562 
563 void panfrost_track_image_access(struct panfrost_batch *batch,
564                                  enum pipe_shader_type stage,
565                                  struct pipe_image_view *image);
566 
567 void panfrost_context_reinit(struct panfrost_context *ctx);
568 
569 #endif
570