• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #ifndef FREEDRENO_UTIL_H_
28 #define FREEDRENO_UTIL_H_
29 
30 #include "common/freedreno_common.h"
31 
32 #include "drm/freedreno_drmif.h"
33 #include "drm/freedreno_ringbuffer.h"
34 
35 #include "util/format/u_formats.h"
36 #include "pipe/p_state.h"
37 #include "util/compiler.h"
38 #include "util/half_float.h"
39 #include "util/log.h"
40 #ifndef __cplusplus  // TODO fix cpu_trace.h to be c++ friendly
41 #include "util/perf/cpu_trace.h"
42 #endif
43 #include "util/u_debug.h"
44 #include "util/u_dynarray.h"
45 #include "util/u_math.h"
46 #include "util/u_pack_color.h"
47 
48 #include "adreno_common.xml.h"
49 #include "adreno_pm4.xml.h"
50 #include "disasm.h"
51 
52 #ifdef __cplusplus
53 extern "C" {
54 #endif
55 
56 enum adreno_rb_depth_format fd_pipe2depth(enum pipe_format format);
57 enum pc_di_index_size fd_pipe2index(enum pipe_format format);
58 enum pipe_format fd_gmem_restore_format(enum pipe_format format);
59 enum adreno_rb_blend_factor fd_blend_factor(unsigned factor);
60 enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode);
61 enum adreno_stencil_op fd_stencil_op(unsigned op);
62 
63 #define A3XX_MAX_MIP_LEVELS 14
64 
65 #define A2XX_MAX_RENDER_TARGETS 1
66 #define A3XX_MAX_RENDER_TARGETS 4
67 #define A4XX_MAX_RENDER_TARGETS 8
68 #define A5XX_MAX_RENDER_TARGETS 8
69 #define A6XX_MAX_RENDER_TARGETS 8
70 
71 #define MAX_RENDER_TARGETS A6XX_MAX_RENDER_TARGETS
72 
73 /* clang-format off */
74 enum fd_debug_flag {
75    FD_DBG_MSGS         = BITFIELD_BIT(0),
76    FD_DBG_DISASM       = BITFIELD_BIT(1),
77    FD_DBG_DCLEAR       = BITFIELD_BIT(2),
78    FD_DBG_DDRAW        = BITFIELD_BIT(3),
79    FD_DBG_NOSCIS       = BITFIELD_BIT(4),
80    FD_DBG_DIRECT       = BITFIELD_BIT(5),
81    FD_DBG_GMEM         = BITFIELD_BIT(6),
82    FD_DBG_PERF         = BITFIELD_BIT(7),
83    FD_DBG_NOBIN        = BITFIELD_BIT(8),
84    FD_DBG_SYSMEM       = BITFIELD_BIT(9),
85    FD_DBG_SERIALC      = BITFIELD_BIT(10),
86    FD_DBG_SHADERDB     = BITFIELD_BIT(11),
87    FD_DBG_FLUSH        = BITFIELD_BIT(12),
88    FD_DBG_DEQP         = BITFIELD_BIT(13),
89    FD_DBG_INORDER      = BITFIELD_BIT(14),
90    FD_DBG_BSTAT        = BITFIELD_BIT(15),
91    FD_DBG_NOGROW       = BITFIELD_BIT(16),
92    FD_DBG_LRZ          = BITFIELD_BIT(17),
93    FD_DBG_NOINDR       = BITFIELD_BIT(18),
94    FD_DBG_NOBLIT       = BITFIELD_BIT(19),
95    FD_DBG_HIPRIO       = BITFIELD_BIT(20),
96    FD_DBG_TTILE        = BITFIELD_BIT(21),
97    FD_DBG_PERFC        = BITFIELD_BIT(22),
98    FD_DBG_NOUBWC       = BITFIELD_BIT(23),
99    FD_DBG_NOLRZ        = BITFIELD_BIT(24),
100    FD_DBG_NOTILE       = BITFIELD_BIT(25),
101    FD_DBG_LAYOUT       = BITFIELD_BIT(26),
102    FD_DBG_NOFP16       = BITFIELD_BIT(27),
103    FD_DBG_NOHW         = BITFIELD_BIT(28),
104    FD_DBG_NOSBIN       = BITFIELD_BIT(29),
105 };
106 /* clang-format on */
107 
108 extern int fd_mesa_debug;
109 extern bool fd_binning_enabled;
110 
111 #define FD_DBG(category) unlikely(fd_mesa_debug &FD_DBG_##category)
112 
113 #include <unistd.h>
114 #include <sys/types.h>
115 #include <sys/syscall.h>
116 
117 #define DBG(fmt, ...)                                                          \
118    do {                                                                        \
119       if (FD_DBG(MSGS))                                                        \
120          mesa_logi("%5d: %s:%d: " fmt, ((pid_t)syscall(SYS_gettid)),           \
121                                         __func__, __LINE__,                    \
122                                         ##__VA_ARGS__);                        \
123    } while (0)
124 
125 #define perf_debug_message(debug, type, ...)                                   \
126    do {                                                                        \
127       if (FD_DBG(PERF))                                                        \
128          mesa_logw(__VA_ARGS__);                                               \
129       struct util_debug_callback *__d = (debug);                               \
130       if (__d)                                                                 \
131          util_debug_message(__d, type, __VA_ARGS__);                           \
132    } while (0)
133 
134 #define perf_debug_ctx(ctx, ...)                                               \
135    do {                                                                        \
136       struct fd_context *__c = (ctx);                                          \
137       perf_debug_message(__c ? &__c->debug : NULL, PERF_INFO, __VA_ARGS__);    \
138    } while (0)
139 
140 #define perf_debug(...) perf_debug_ctx(NULL, __VA_ARGS__)
141 
142 #define perf_time_ctx(ctx, limit_ns, fmt, ...)                                 \
143    for (struct __perf_time_state __s =                                         \
144            {                                                                   \
145               .t = -__perf_get_time(ctx),                                      \
146            };                                                                  \
147         !__s.done; ({                                                          \
148            __s.t += __perf_get_time(ctx);                                      \
149            __s.done = true;                                                    \
150            if (__s.t > (limit_ns)) {                                           \
151               perf_debug_ctx(ctx, fmt " (%.03f ms)", ##__VA_ARGS__,            \
152                              (double)__s.t / 1000000.0);                       \
153            }                                                                   \
154         }))
155 
156 #define perf_time(limit_ns, fmt, ...)                                          \
157    perf_time_ctx(NULL, limit_ns, fmt, ##__VA_ARGS__)
158 
159 struct __perf_time_state {
160    int64_t t;
161    bool done;
162 };
163 
164 /* static inline would be nice here, except 'struct fd_context' is not
165  * defined yet:
166  */
167 #define __perf_get_time(ctx)                                                   \
168    ((FD_DBG(PERF) || ({                                                        \
169         struct fd_context *__c = (ctx);                                        \
170         unlikely(__c && __c->debug.debug_message);                             \
171      }))                                                                       \
172        ? os_time_get_nano()                                                    \
173        : 0)
174 
175 #define DEFINE_CAST(parent, child)                                             \
176    static inline struct child *child(struct parent *x)                         \
177    {                                                                           \
178       return (struct child *)x;                                                \
179    }
180 
181 struct fd_context;
182 
183 /**
184  * A psuedo-variable for defining where various parts of the fd_context
185  * can be safely accessed.
186  *
187  * With threaded_context, certain pctx funcs are called from gallium
188  * front-end/state-tracker (eg. CSO creation), while others are called
189  * from the driver thread.  Things called from driver thread can safely
190  * access anything in the ctx, while things called from the fe/st thread
191  * must limit themselves to "safe" things (ie. ctx->screen is safe as it
192  * is immutable, but the blitter_context is not).
193  */
194 extern lock_cap_t fd_context_access_cap;
195 
196 /**
197  * Make the annotation a bit less verbose.. mark fields which should only
198  * be accessed by driver-thread with 'dt'
199  */
200 #define dt guarded_by(fd_context_access_cap)
201 
202 /**
203  * Annotation for entry-point functions only called in driver thread.
204  *
205  * For static functions, apply the annotation to the function declaration.
206  * Otherwise apply to the function prototype.
207  */
208 #define in_dt assert_cap(fd_context_access_cap)
209 
210 /**
211  * Annotation for internal functions which are only called from entry-
212  * point functions (with 'in_dt' annotation) or other internal functions
213  * with the 'assert_dt' annotation.
214  *
215  * For static functions, apply the annotation to the function declaration.
216  * Otherwise apply to the function prototype.
217  */
218 #define assert_dt requires_cap(fd_context_access_cap)
219 
220 /**
221  * Special helpers for context access outside of driver thread.  For ex,
222  * pctx->get_query_result() is not called on driver thread, but the
223  * query is guaranteed to be flushed, or the driver thread queue is
224  * guaranteed to be flushed.
225  *
226  * Use with caution!
227  */
228 static inline void
fd_context_access_begin(struct fd_context * ctx)229 fd_context_access_begin(struct fd_context *ctx)
230    acquire_cap(fd_context_access_cap)
231 {
232 }
233 
234 static inline void
fd_context_access_end(struct fd_context * ctx)235 fd_context_access_end(struct fd_context *ctx) release_cap(fd_context_access_cap)
236 {
237 }
238 
239 #define CP_REG(reg) ((0x4 << 16) | ((unsigned int)((reg) - (0x2000))))
240 
241 static inline uint32_t
DRAW(enum pc_di_primtype prim_type,enum pc_di_src_sel source_select,enum pc_di_index_size index_size,enum pc_di_vis_cull_mode vis_cull_mode,uint8_t instances)242 DRAW(enum pc_di_primtype prim_type, enum pc_di_src_sel source_select,
243      enum pc_di_index_size index_size, enum pc_di_vis_cull_mode vis_cull_mode,
244      uint8_t instances)
245 {
246    return (prim_type << 0) | (source_select << 6) | ((index_size & 1) << 11) |
247           ((index_size >> 1) << 13) | (vis_cull_mode << 9) | (1 << 14) |
248           (instances << 24);
249 }
250 
251 static inline uint32_t
DRAW_A20X(enum pc_di_primtype prim_type,enum pc_di_face_cull_sel faceness_cull_select,enum pc_di_src_sel source_select,enum pc_di_index_size index_size,bool pre_fetch_cull_enable,bool grp_cull_enable,uint16_t count)252 DRAW_A20X(enum pc_di_primtype prim_type,
253           enum pc_di_face_cull_sel faceness_cull_select,
254           enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
255           bool pre_fetch_cull_enable, bool grp_cull_enable, uint16_t count)
256 {
257    return (prim_type << 0) | (source_select << 6) |
258           (faceness_cull_select << 8) | ((index_size & 1) << 11) |
259           ((index_size >> 1) << 13) | (pre_fetch_cull_enable << 14) |
260           (grp_cull_enable << 15) | (count << 16);
261 }
262 
263 /* for tracking cmdstream positions that need to be patched: */
264 struct fd_cs_patch {
265    uint32_t *cs;
266    uint32_t val;
267 };
268 #define fd_patch_num_elements(buf) ((buf)->size / sizeof(struct fd_cs_patch))
269 #define fd_patch_element(buf, i)                                               \
270    util_dynarray_element(buf, struct fd_cs_patch, i)
271 
272 static inline enum pipe_format
pipe_surface_format(struct pipe_surface * psurf)273 pipe_surface_format(struct pipe_surface *psurf)
274 {
275    if (!psurf)
276       return PIPE_FORMAT_NONE;
277    return psurf->format;
278 }
279 
280 static inline bool
fd_surface_half_precision(const struct pipe_surface * psurf)281 fd_surface_half_precision(const struct pipe_surface *psurf)
282 {
283    enum pipe_format format;
284 
285    if (!psurf)
286       return true;
287 
288    format = psurf->format;
289 
290    /* colors are provided in consts, which go through cov.f32f16, which will
291     * break these values
292     */
293    if (util_format_is_pure_integer(format))
294       return false;
295 
296    /* avoid losing precision on 32-bit float formats */
297    if (util_format_is_float(format) &&
298        util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) ==
299           32)
300       return false;
301 
302    return true;
303 }
304 
305 static inline unsigned
fd_sampler_first_level(const struct pipe_sampler_view * view)306 fd_sampler_first_level(const struct pipe_sampler_view *view)
307 {
308    if (view->target == PIPE_BUFFER)
309       return 0;
310    return view->u.tex.first_level;
311 }
312 
313 static inline unsigned
fd_sampler_last_level(const struct pipe_sampler_view * view)314 fd_sampler_last_level(const struct pipe_sampler_view *view)
315 {
316    if (view->target == PIPE_BUFFER)
317       return 0;
318    return view->u.tex.last_level;
319 }
320 
321 static inline bool
fd_half_precision(struct pipe_framebuffer_state * pfb)322 fd_half_precision(struct pipe_framebuffer_state *pfb)
323 {
324    unsigned i;
325 
326    for (i = 0; i < pfb->nr_cbufs; i++)
327       if (!fd_surface_half_precision(pfb->cbufs[i]))
328          return false;
329 
330    return true;
331 }
332 
333 static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx);
334 
335 /* like OUT_RING() but appends a cmdstream patch point to 'buf' */
336 static inline void
OUT_RINGP(struct fd_ringbuffer * ring,uint32_t data,struct util_dynarray * buf)337 OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data, struct util_dynarray *buf)
338 {
339    if (LOG_DWORDS) {
340       DBG("ring[%p]: OUT_RINGP  %04x:  %08x", ring,
341           (uint32_t)(ring->cur - ring->start), data);
342    }
343    util_dynarray_append(buf, struct fd_cs_patch,
344                         ((struct fd_cs_patch){
345                            .cs = ring->cur++,
346                            .val = data,
347                         }));
348 }
349 
350 static inline void
__OUT_IB(struct fd_ringbuffer * ring,bool prefetch,struct fd_ringbuffer * target)351 __OUT_IB(struct fd_ringbuffer *ring, bool prefetch,
352          struct fd_ringbuffer *target)
353 {
354    if (target->cur == target->start)
355       return;
356 
357    unsigned count = fd_ringbuffer_cmd_count(target);
358 
359    /* for debug after a lock up, write a unique counter value
360     * to scratch6 for each IB, to make it easier to match up
361     * register dumps to cmdstream.  The combination of IB and
362     * DRAW (scratch7) is enough to "triangulate" the particular
363     * draw that caused lockup.
364     */
365    emit_marker(ring, 6);
366 
367    for (unsigned i = 0; i < count; i++) {
368       uint32_t dwords;
369       OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD,
370                2);
371       dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
372       assert(dwords > 0);
373       OUT_RING(ring, dwords);
374       OUT_PKT2(ring);
375    }
376 
377    emit_marker(ring, 6);
378 }
379 
380 static inline void
__OUT_IB5(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)381 __OUT_IB5(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
382 {
383    if (target->cur == target->start)
384       return;
385 
386    unsigned count = fd_ringbuffer_cmd_count(target);
387 
388    for (unsigned i = 0; i < count; i++) {
389       uint32_t dwords;
390       OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
391       dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
392       assert(dwords > 0);
393       OUT_RING(ring, dwords);
394    }
395 }
396 
397 /* CP_SCRATCH_REG4 is used to hold base address for query results:
398  * Note the scratch register move on a5xx+ but this is only used
399  * for pre-a5xx hw queries where we cannot allocate the query buf
400  * until the # of tiles is known.
401  */
402 #define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4
403 
404 #ifdef DEBUG
405 #define __EMIT_MARKER 1
406 #else
407 #define __EMIT_MARKER 0
408 #endif
409 
410 static inline void
emit_marker(struct fd_ringbuffer * ring,int scratch_idx)411 emit_marker(struct fd_ringbuffer *ring, int scratch_idx)
412 {
413    extern int32_t marker_cnt;
414    unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx;
415    assert(reg != HW_QUERY_BASE_REG);
416    if (reg == HW_QUERY_BASE_REG)
417       return;
418    if (__EMIT_MARKER) {
419       OUT_WFI(ring);
420       OUT_PKT0(ring, reg, 1);
421       OUT_RING(ring, p_atomic_inc_return(&marker_cnt));
422    }
423 }
424 
425 
426 /*
427  * a3xx+ helpers:
428  */
429 
430 static inline enum a3xx_msaa_samples
fd_msaa_samples(unsigned samples)431 fd_msaa_samples(unsigned samples)
432 {
433    switch (samples) {
434    default:
435       unreachable("Unsupported samples");
436    case 0:
437    case 1:
438       return MSAA_ONE;
439    case 2:
440       return MSAA_TWO;
441    case 4:
442       return MSAA_FOUR;
443    case 8:
444       return MSAA_EIGHT;
445    }
446 }
447 
448 #define A3XX_MAX_TEXEL_BUFFER_ELEMENTS_UINT (1 << 13)
449 
450 /* Note that the Vulkan blob on a540 and 640 report a
451  * maxTexelBufferElements of just 65536 (the GLES3.2 and Vulkan
452  * minimum).
453  */
454 #define A4XX_MAX_TEXEL_BUFFER_ELEMENTS_UINT (1 << 27)
455 
456 static inline uint32_t
fd_clamp_buffer_size(enum pipe_format format,uint32_t size,unsigned max_texel_buffer_elements)457 fd_clamp_buffer_size(enum pipe_format format, uint32_t size,
458                      unsigned max_texel_buffer_elements)
459 {
460    /* The spec says:
461     *    The number of texels in the texel array is then clamped to the value of
462     *    the implementation-dependent limit GL_MAX_TEXTURE_BUFFER_SIZE.
463     *
464     * So compute the number of texels, compare to GL_MAX_TEXTURE_BUFFER_SIZE and update it.
465     */
466    unsigned blocksize = util_format_get_blocksize(format);
467    unsigned elements = MIN2(max_texel_buffer_elements, size / blocksize);
468 
469    return elements * blocksize;
470 }
471 
472 
473 /*
474  * a4xx+ helpers:
475  */
476 
477 static inline enum a4xx_state_block
fd4_stage2shadersb(gl_shader_stage type)478 fd4_stage2shadersb(gl_shader_stage type)
479 {
480    switch (type) {
481    case MESA_SHADER_VERTEX:
482       return SB4_VS_SHADER;
483    case MESA_SHADER_FRAGMENT:
484       return SB4_FS_SHADER;
485    case MESA_SHADER_COMPUTE:
486    case MESA_SHADER_KERNEL:
487       return SB4_CS_SHADER;
488    default:
489       unreachable("bad shader type");
490       return (enum a4xx_state_block) ~0;
491    }
492 }
493 
494 static inline enum a4xx_index_size
fd4_size2indextype(unsigned index_size)495 fd4_size2indextype(unsigned index_size)
496 {
497    switch (index_size) {
498    case 1:
499       return INDEX4_SIZE_8_BIT;
500    case 2:
501       return INDEX4_SIZE_16_BIT;
502    case 4:
503       return INDEX4_SIZE_32_BIT;
504    }
505    DBG("unsupported index size: %d", index_size);
506    assert(0);
507    return INDEX4_SIZE_32_BIT;
508 }
509 
510 /* Convert 19.2MHz RBBM always-on timer ticks to ns */
511 static inline uint64_t
ticks_to_ns(uint64_t ts)512 ticks_to_ns(uint64_t ts)
513 {
514    return ts * (1000000000 / 19200000);
515 }
516 
517 #ifdef __cplusplus
518 }
519 #endif
520 
521 #endif /* FREEDRENO_UTIL_H_ */
522