• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Broadcom
3  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #ifndef VC4_CONTEXT_H
26 #define VC4_CONTEXT_H
27 
28 #include <stdio.h>
29 
30 #include "pipe/p_context.h"
31 #include "pipe/p_state.h"
32 #include "util/slab.h"
33 #include "util/u_debug_cb.h"
34 #include "xf86drm.h"
35 
36 #define __user
37 #include "drm-uapi/vc4_drm.h"
38 #include "vc4_bufmgr.h"
39 #include "vc4_resource.h"
40 #include "vc4_cl.h"
41 #include "vc4_qir.h"
42 
43 #ifndef DRM_VC4_PARAM_SUPPORTS_ETC1
44 #define DRM_VC4_PARAM_SUPPORTS_ETC1		4
45 #endif
46 #ifndef DRM_VC4_PARAM_SUPPORTS_THREADED_FS
47 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
48 #endif
49 
50 #ifdef USE_VC4_SIMULATOR
51 #define using_vc4_simulator true
52 #else
53 #define using_vc4_simulator false
54 #endif
55 
56 #define VC4_DIRTY_BLEND         (1 <<  0)
57 #define VC4_DIRTY_RASTERIZER    (1 <<  1)
58 #define VC4_DIRTY_ZSA           (1 <<  2)
59 #define VC4_DIRTY_FRAGTEX       (1 <<  3)
60 #define VC4_DIRTY_VERTTEX       (1 <<  4)
61 
62 #define VC4_DIRTY_BLEND_COLOR   (1 <<  7)
63 #define VC4_DIRTY_STENCIL_REF   (1 <<  8)
64 #define VC4_DIRTY_SAMPLE_MASK   (1 <<  9)
65 #define VC4_DIRTY_FRAMEBUFFER   (1 << 10)
66 #define VC4_DIRTY_STIPPLE       (1 << 11)
67 #define VC4_DIRTY_VIEWPORT      (1 << 12)
68 #define VC4_DIRTY_CONSTBUF      (1 << 13)
69 #define VC4_DIRTY_VTXSTATE      (1 << 14)
70 #define VC4_DIRTY_VTXBUF        (1 << 15)
71 
72 #define VC4_DIRTY_SCISSOR       (1 << 17)
73 #define VC4_DIRTY_FLAT_SHADE_FLAGS (1 << 18)
74 #define VC4_DIRTY_PRIM_MODE     (1 << 19)
75 #define VC4_DIRTY_CLIP          (1 << 20)
76 #define VC4_DIRTY_UNCOMPILED_VS (1 << 21)
77 #define VC4_DIRTY_UNCOMPILED_FS (1 << 22)
78 #define VC4_DIRTY_COMPILED_CS   (1 << 23)
79 #define VC4_DIRTY_COMPILED_VS   (1 << 24)
80 #define VC4_DIRTY_COMPILED_FS   (1 << 25)
81 #define VC4_DIRTY_FS_INPUTS     (1 << 26)
82 #define VC4_DIRTY_UBO_1_SIZE    (1 << 27)
83 
84 struct vc4_sampler_view {
85         struct pipe_sampler_view base;
86         uint32_t texture_p0;
87         uint32_t texture_p1;
88         bool force_first_level;
89         /**
90          * Resource containing the actual texture that will be sampled.
91          *
92          * We may need to rebase the .base.texture resource to work around the
93          * lack of GL_TEXTURE_BASE_LEVEL, or to upload the texture as tiled.
94          */
95         struct pipe_resource *texture;
96 };
97 
98 struct vc4_sampler_state {
99         struct pipe_sampler_state base;
100         uint32_t texture_p1;
101 };
102 
103 struct vc4_texture_stateobj {
104         struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
105         unsigned num_textures;
106         struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
107         unsigned num_samplers;
108 };
109 
110 struct vc4_shader_uniform_info {
111         enum quniform_contents *contents;
112         uint32_t *data;
113         uint32_t count;
114         uint32_t num_texture_samples;
115 };
116 
117 struct vc4_uncompiled_shader {
118         /** A name for this program, so you can track it in shader-db output. */
119         uint32_t program_id;
120         /** How many variants of this program were compiled, for shader-db. */
121         uint32_t compiled_variant_count;
122         struct pipe_shader_state base;
123 };
124 
125 struct vc4_fs_inputs {
126         /**
127          * Array of the meanings of the VPM inputs this shader needs.
128          *
129          * It doesn't include those that aren't part of the VPM, like
130          * point/line coordinates.
131          */
132         struct vc4_varying_slot *input_slots;
133         uint32_t num_inputs;
134 };
135 
136 struct vc4_compiled_shader {
137         uint64_t program_id;
138         struct vc4_bo *bo;
139 
140         struct vc4_shader_uniform_info uniforms;
141 
142         /**
143          * VC4_DIRTY_* flags that, when set in vc4->dirty, mean that the
144          * uniforms have to be rewritten (and therefore the shader state
145          * reemitted).
146          */
147         uint32_t uniform_dirty_bits;
148 
149         /** bitmask of which inputs are color inputs, for flat shade handling. */
150         uint32_t color_inputs;
151 
152         bool disable_early_z;
153 
154         /* Set if the compile failed, likely due to register allocation
155          * failure.  In this case, we have no shader to run and should not try
156          * to do any draws.
157          */
158         bool failed;
159 
160         bool fs_threaded;
161 
162         uint8_t num_inputs;
163 
164         /* Byte offsets for the start of the vertex attributes 0-7, and the
165          * total size as "attribute" 8.
166          */
167         uint8_t vattr_offsets[9];
168         uint8_t vattrs_live;
169 
170         const struct vc4_fs_inputs *fs_inputs;
171 };
172 
173 struct vc4_program_stateobj {
174         struct vc4_uncompiled_shader *bind_vs, *bind_fs;
175         struct vc4_compiled_shader *cs, *vs, *fs;
176 };
177 
178 struct vc4_constbuf_stateobj {
179         struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
180         uint32_t enabled_mask;
181         uint32_t dirty_mask;
182 };
183 
184 struct vc4_vertexbuf_stateobj {
185         struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
186         unsigned count;
187         uint32_t enabled_mask;
188         uint32_t dirty_mask;
189 };
190 
191 struct vc4_vertex_stateobj {
192         struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
193         unsigned num_elements;
194 };
195 
196 /* Hash table key for vc4->jobs */
197 struct vc4_job_key {
198         struct pipe_surface *cbuf;
199         struct pipe_surface *zsbuf;
200 };
201 
202 struct vc4_hwperfmon {
203         uint32_t id;
204         uint64_t last_seqno;
205         uint8_t events[DRM_VC4_MAX_PERF_COUNTERS];
206         uint64_t counters[DRM_VC4_MAX_PERF_COUNTERS];
207 };
208 
209 /**
210  * A complete bin/render job.
211  *
212  * This is all of the state necessary to submit a bin/render to the kernel.
213  * We want to be able to have multiple in progress at a time, so that we don't
214  * need to flush an existing CL just to switch to rendering to a new render
215  * target (which would mean reading back from the old render target when
216  * starting to render to it again).
217  */
218 struct vc4_job {
219         struct vc4_cl bcl;
220         struct vc4_cl shader_rec;
221         struct vc4_cl uniforms;
222         struct vc4_cl bo_handles;
223         struct vc4_cl bo_pointers;
224         uint32_t shader_rec_count;
225         /**
226          * Amount of memory used by the BOs in bo_pointers.
227          *
228          * Used for checking when we should flush the job early so we don't
229          * OOM.
230          */
231         uint32_t bo_space;
232 
233         /* Last BO hindex referenced from VC4_PACKET_GEM_HANDLES. */
234         uint32_t last_gem_handle_hindex;
235 
236         /** @{ Surfaces to submit rendering for. */
237         struct pipe_surface *color_read;
238         struct pipe_surface *color_write;
239         struct pipe_surface *zs_read;
240         struct pipe_surface *zs_write;
241         struct pipe_surface *msaa_color_write;
242         struct pipe_surface *msaa_zs_write;
243         /** @} */
244         /** @{
245          * Bounding box of the scissor across all queued drawing.
246          *
247          * Note that the max values are exclusive.
248          */
249         uint32_t draw_min_x;
250         uint32_t draw_min_y;
251         uint32_t draw_max_x;
252         uint32_t draw_max_y;
253         /** @} */
254         /** @{
255          * Width/height of the color framebuffer being rendered to,
256          * for VC4_TILE_RENDERING_MODE_CONFIG.
257         */
258         uint32_t draw_width;
259         uint32_t draw_height;
260         /** @} */
261         /** @{ Tile information, depending on MSAA and float color buffer. */
262         uint32_t draw_tiles_x; /** @< Number of tiles wide for framebuffer. */
263         uint32_t draw_tiles_y; /** @< Number of tiles high for framebuffer. */
264 
265         uint32_t tile_width; /** @< Width of a tile. */
266         uint32_t tile_height; /** @< Height of a tile. */
267         /** Whether the current rendering is in a 4X MSAA tile buffer. */
268         bool msaa;
269 	/** @} */
270 
271         /* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the
272          * first rendering.
273          */
274         uint32_t cleared;
275         /* Bitmask of PIPE_CLEAR_* of buffers that have been rendered to
276          * (either clears or draws).
277          */
278         uint32_t resolve;
279         uint32_t clear_color[2];
280         uint32_t clear_depth; /**< 24-bit unorm depth */
281         uint8_t clear_stencil;
282 
283         /**
284          * Set if some drawing (triangles, blits, or just a glClear()) has
285          * been done to the FBO, meaning that we need to
286          * DRM_IOCTL_VC4_SUBMIT_CL.
287          */
288         bool needs_flush;
289 
290         /**
291          * Number of draw calls (not counting full buffer clears) queued in
292          * the current job.
293          */
294         uint32_t draw_calls_queued;
295 
296         /** Any flags to be passed in drm_vc4_submit_cl.flags. */
297         uint32_t flags;
298 
299 	/* Performance monitor attached to this job. */
300 	struct vc4_hwperfmon *perfmon;
301 
302         struct vc4_job_key key;
303 };
304 
305 struct vc4_context {
306         struct pipe_context base;
307 
308         int fd;
309         struct vc4_screen *screen;
310 
311         /** The 3D rendering job for the currently bound FBO. */
312         struct vc4_job *job;
313 
314         /* Map from struct vc4_job_key to the job for that FBO.
315          */
316         struct hash_table *jobs;
317 
318         /**
319          * Map from vc4_resource to a job writing to that resource.
320          *
321          * Primarily for flushing jobs rendering to textures that are now
322          * being read from.
323          */
324         struct hash_table *write_jobs;
325 
326         struct slab_child_pool transfer_pool;
327         struct blitter_context *blitter;
328 
329         /** bitfield of VC4_DIRTY_* */
330         uint32_t dirty;
331 
332         struct hash_table *fs_cache, *vs_cache;
333         struct set *fs_inputs_set;
334         uint32_t next_uncompiled_program_id;
335         uint64_t next_compiled_program_id;
336 
337         struct ra_regs *regs;
338         struct ra_class *reg_class_any[2];
339         struct ra_class *reg_class_a_or_b[2];
340         struct ra_class *reg_class_a_or_b_or_acc[2];
341         struct ra_class *reg_class_r0_r3;
342         struct ra_class *reg_class_r4_or_a[2];
343         struct ra_class *reg_class_a[2];
344 
345         uint8_t prim_mode;
346 
347         /** Maximum index buffer valid for the current shader_rec. */
348         uint32_t max_index;
349         /** Last index bias baked into the current shader_rec. */
350         uint32_t last_index_bias;
351 
352         /** Seqno of the last CL flush's job. */
353         uint64_t last_emit_seqno;
354 
355         struct u_upload_mgr *uploader;
356 
357         struct pipe_shader_state *yuv_linear_blit_vs;
358         struct pipe_shader_state *yuv_linear_blit_fs_8bit;
359         struct pipe_shader_state *yuv_linear_blit_fs_16bit;
360 
361         /** @{ Current pipeline state objects */
362         struct pipe_scissor_state scissor;
363         struct pipe_blend_state *blend;
364         struct vc4_rasterizer_state *rasterizer;
365         struct vc4_depth_stencil_alpha_state *zsa;
366 
367         struct vc4_texture_stateobj verttex, fragtex;
368 
369         struct vc4_program_stateobj prog;
370 
371         struct vc4_vertex_stateobj *vtx;
372 
373         struct {
374                 struct pipe_blend_color f;
375                 uint8_t ub[4];
376         } blend_color;
377         struct pipe_stencil_ref stencil_ref;
378         unsigned sample_mask;
379         struct pipe_framebuffer_state framebuffer;
380         struct pipe_poly_stipple stipple;
381         struct pipe_clip_state clip;
382         struct pipe_viewport_state viewport;
383         struct vc4_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
384         struct vc4_vertexbuf_stateobj vertexbuf;
385 
386         struct vc4_hwperfmon *perfmon;
387         /** @} */
388 
389         /** Handle of syncobj containing the last submitted job fence. */
390         uint32_t job_syncobj;
391 
392         int in_fence_fd;
393         /** Handle of the syncobj that holds in_fence_fd for submission. */
394         uint32_t in_syncobj;
395 };
396 
397 struct vc4_rasterizer_state {
398         struct pipe_rasterizer_state base;
399 
400         /* VC4_CONFIGURATION_BITS */
401         uint8_t config_bits[V3D21_CONFIGURATION_BITS_length];
402 
403         struct PACKED {
404                 uint8_t depth_offset[V3D21_DEPTH_OFFSET_length];
405                 uint8_t point_size[V3D21_POINT_SIZE_length];
406                 uint8_t line_width[V3D21_LINE_WIDTH_length];
407         } packed;
408 
409         /** Raster order flags to be passed in struct drm_vc4_submit_cl.flags. */
410         uint32_t tile_raster_order_flags;
411 };
412 
413 struct vc4_depth_stencil_alpha_state {
414         struct pipe_depth_stencil_alpha_state base;
415 
416         /* VC4_CONFIGURATION_BITS */
417         uint8_t config_bits[V3D21_CONFIGURATION_BITS_length];
418 
419         /** Uniforms for stencil state.
420          *
421          * Index 0 is either the front config, or the front-and-back config.
422          * Index 1 is the back config if doing separate back stencil.
423          * Index 2 is the writemask config if it's not a common mask value.
424          */
425         uint32_t stencil_uniforms[3];
426 };
427 
428 #define perf_debug(...) do {                            \
429         if (VC4_DBG(PERF))                            \
430                 fprintf(stderr, __VA_ARGS__);           \
431         if (unlikely(vc4->base.debug.debug_message))         \
432                 util_debug_message(&vc4->base.debug, PERF_INFO, __VA_ARGS__); \
433 } while (0)
434 
435 static inline struct vc4_context *
vc4_context(struct pipe_context * pcontext)436 vc4_context(struct pipe_context *pcontext)
437 {
438         return (struct vc4_context *)pcontext;
439 }
440 
441 static inline struct vc4_sampler_view *
vc4_sampler_view(struct pipe_sampler_view * psview)442 vc4_sampler_view(struct pipe_sampler_view *psview)
443 {
444         return (struct vc4_sampler_view *)psview;
445 }
446 
447 static inline struct vc4_sampler_state *
vc4_sampler_state(struct pipe_sampler_state * psampler)448 vc4_sampler_state(struct pipe_sampler_state *psampler)
449 {
450         return (struct vc4_sampler_state *)psampler;
451 }
452 
453 int vc4_get_driver_query_group_info(struct pipe_screen *pscreen,
454                                     unsigned index,
455                                     struct pipe_driver_query_group_info *info);
456 int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
457                               struct pipe_driver_query_info *info);
458 
459 struct pipe_context *vc4_context_create(struct pipe_screen *pscreen,
460                                         void *priv, unsigned flags);
461 void vc4_draw_init(struct pipe_context *pctx);
462 void vc4_state_init(struct pipe_context *pctx);
463 void vc4_program_init(struct pipe_context *pctx);
464 void vc4_program_fini(struct pipe_context *pctx);
465 void vc4_query_init(struct pipe_context *pctx);
466 void vc4_simulator_init(struct vc4_screen *screen);
467 void vc4_simulator_destroy(struct vc4_screen *screen);
468 int vc4_simulator_ioctl(int fd, unsigned long request, void *arg);
469 void vc4_simulator_open_from_handle(int fd, int handle, uint32_t size);
470 
471 static inline int
vc4_ioctl(int fd,unsigned long request,void * arg)472 vc4_ioctl(int fd, unsigned long request, void *arg)
473 {
474         if (using_vc4_simulator)
475                 return vc4_simulator_ioctl(fd, request, arg);
476         else
477                 return drmIoctl(fd, request, arg);
478 }
479 
480 void vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader);
481 void vc4_write_uniforms(struct vc4_context *vc4,
482                         struct vc4_compiled_shader *shader,
483                         struct vc4_constbuf_stateobj *cb,
484                         struct vc4_texture_stateobj *texstate);
485 
486 void vc4_flush(struct pipe_context *pctx);
487 int vc4_job_init(struct vc4_context *vc4);
488 int vc4_fence_context_init(struct vc4_context *vc4);
489 struct vc4_job *vc4_get_job(struct vc4_context *vc4,
490                             struct pipe_surface *cbuf,
491                             struct pipe_surface *zsbuf);
492 struct vc4_job *vc4_get_job_for_fbo(struct vc4_context *vc4);
493 
494 void vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job);
495 void vc4_flush_jobs_writing_resource(struct vc4_context *vc4,
496                                      struct pipe_resource *prsc);
497 void vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
498                                      struct pipe_resource *prsc);
499 void vc4_emit_state(struct pipe_context *pctx);
500 void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c);
501 struct qpu_reg *vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c);
502 bool vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode);
503 
504 bool vc4_rt_format_supported(enum pipe_format f);
505 bool vc4_rt_format_is_565(enum pipe_format f);
506 bool vc4_tex_format_supported(enum pipe_format f);
507 uint8_t vc4_get_tex_format(enum pipe_format f);
508 const uint8_t *vc4_get_format_swizzle(enum pipe_format f);
509 void vc4_init_query_functions(struct vc4_context *vc4);
510 void vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info);
511 void vc4_blitter_save(struct vc4_context *vc4);
512 #endif /* VC4_CONTEXT_H */
513