• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2010 Red Hat Inc.
3  * Copyright 2014-2017 Broadcom
4  * Copyright 2019-2020 Collabora, Ltd.
5  * Copyright 2006 VMware, Inc.
6  * SPDX-License-Identifier: MIT
7  */
8 #include <errno.h>
9 #include <stdio.h>
10 #include <xf86drm.h>
11 #include "asahi/compiler/agx_compile.h"
12 #include "asahi/layout/layout.h"
13 #include "asahi/lib/agx_formats.h"
14 #include "asahi/lib/decode.h"
15 #include "drm-uapi/drm_fourcc.h"
16 #include "frontend/winsys_handle.h"
17 #include "gallium/auxiliary/renderonly/renderonly.h"
18 #include "gallium/auxiliary/util/u_debug_cb.h"
19 #include "gallium/auxiliary/util/u_framebuffer.h"
20 #include "gallium/auxiliary/util/u_sample_positions.h"
21 #include "gallium/auxiliary/util/u_surface.h"
22 #include "gallium/auxiliary/util/u_transfer.h"
23 #include "gallium/auxiliary/util/u_transfer_helper.h"
24 #include "pipe/p_context.h"
25 #include "pipe/p_defines.h"
26 #include "pipe/p_screen.h"
27 #include "pipe/p_state.h"
28 #include "util/format/u_format.h"
29 #include "util/half_float.h"
30 #include "util/macros.h"
31 #include "util/timespec.h"
32 #include "util/u_drm.h"
33 #include "util/u_gen_mipmap.h"
34 #include "util/u_helpers.h"
35 #include "util/u_inlines.h"
36 #include "util/u_memory.h"
37 #include "util/u_resource.h"
38 #include "util/u_screen.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/xmlconfig.h"
41 #include "agx_device.h"
42 #include "agx_disk_cache.h"
43 #include "agx_fence.h"
44 #include "agx_helpers.h"
45 #include "agx_pack.h"
46 #include "agx_public.h"
47 #include "agx_state.h"
48 #include "agx_tilebuffer.h"
49 #include "shader_enums.h"
50 
51 /* Fake values, pending UAPI upstreaming */
52 #ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED
53 #define DRM_FORMAT_MOD_APPLE_TWIDDLED (2)
54 #endif
55 #ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED
56 #define DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED (3)
57 #endif
58 
59 /* clang-format off */
60 static const struct debug_named_value agx_debug_options[] = {
61    {"trace",     AGX_DBG_TRACE,    "Trace the command stream"},
62    {"no16",      AGX_DBG_NO16,     "Disable 16-bit support"},
63    {"perf",      AGX_DBG_PERF,     "Print performance warnings"},
64 #ifndef NDEBUG
65    {"dirty",     AGX_DBG_DIRTY,    "Disable dirty tracking"},
66 #endif
67    {"compblit",  AGX_DBG_COMPBLIT, "Enable compute blitter"},
68    {"precompile",AGX_DBG_PRECOMPILE,"Precompile shaders for shader-db"},
69    {"nocompress",AGX_DBG_NOCOMPRESS,"Disable lossless compression"},
70    {"nocluster", AGX_DBG_NOCLUSTER,"Disable vertex clustering"},
71    {"sync",      AGX_DBG_SYNC,     "Synchronously wait for all submissions"},
72    {"stats",     AGX_DBG_STATS,    "Show command execution statistics"},
73    {"resource",  AGX_DBG_RESOURCE, "Log resource operations"},
74    {"batch",     AGX_DBG_BATCH,    "Log batches"},
75    {"nowc",      AGX_DBG_NOWC,     "Disable write-combining"},
76    {"synctvb",   AGX_DBG_SYNCTVB,  "Synchronous TVB growth"},
77    {"smalltile", AGX_DBG_SMALLTILE,"Force 16x16 tiles"},
78    {"feedback",  AGX_DBG_FEEDBACK, "Debug feedback loops"},
79    {"nomsaa",    AGX_DBG_NOMSAA,   "Force disable MSAA"},
80    {"noshadow",  AGX_DBG_NOSHADOW, "Force disable resource shadowing"},
81    {"varyings",  AGX_DBG_VARYINGS,  "Validate varying linkage"},
82    {"scratch",   AGX_DBG_SCRATCH,  "Debug scratch memory usage"},
83    DEBUG_NAMED_VALUE_END
84 };
85 /* clang-format on */
86 
87 uint64_t agx_best_modifiers[] = {
88    DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED,
89    DRM_FORMAT_MOD_APPLE_TWIDDLED,
90    DRM_FORMAT_MOD_LINEAR,
91 };
92 
93 /* These limits are arbitrarily chosen and subject to change as
94  * we discover more workloads with heavy shadowing.
95  *
96  * Maximum size of a shadowed object in bytes.
97  * Hint: 1024x1024xRGBA8 = 4 MiB. Go higher for compression.
98  */
99 #define MAX_SHADOW_BYTES (6 * 1024 * 1024)
100 
101 /* Maximum cumulative size to shadow an object before we flush.
102  * Allows shadowing a 4MiB + meta object 8 times with the logic
103  * below (+1 shadow offset implied).
104  */
105 #define MAX_TOTAL_SHADOW_BYTES (32 * 1024 * 1024)
106 
107 void agx_init_state_functions(struct pipe_context *ctx);
108 
109 /*
110  * resource
111  */
112 
113 static enum ail_tiling
ail_modifier_to_tiling(uint64_t modifier)114 ail_modifier_to_tiling(uint64_t modifier)
115 {
116    switch (modifier) {
117    case DRM_FORMAT_MOD_LINEAR:
118       return AIL_TILING_LINEAR;
119    case DRM_FORMAT_MOD_APPLE_TWIDDLED:
120       return AIL_TILING_TWIDDLED;
121    case DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED:
122       return AIL_TILING_TWIDDLED_COMPRESSED;
123    default:
124       unreachable("Unsupported modifier");
125    }
126 }
127 
128 const static char *s_tiling[] = {
129    [AIL_TILING_LINEAR] = "LINR",
130    [AIL_TILING_TWIDDLED] = "TWID",
131    [AIL_TILING_TWIDDLED_COMPRESSED] = "COMP",
132 };
133 
134 #define rsrc_debug(res, ...)                                                   \
135    do {                                                                        \
136       if (agx_device((res)->base.screen)->debug & AGX_DBG_RESOURCE)            \
137          agx_msg(__VA_ARGS__);                                                 \
138    } while (0)
139 
140 static void
agx_resource_debug(struct agx_resource * res,const char * msg)141 agx_resource_debug(struct agx_resource *res, const char *msg)
142 {
143    if (!(agx_device(res->base.screen)->debug & AGX_DBG_RESOURCE))
144       return;
145 
146    int ino = -1;
147    if (res->bo->prime_fd >= 0) {
148       struct stat sb;
149       if (!fstat(res->bo->prime_fd, &sb))
150          ino = sb.st_ino;
151    }
152 
153    agx_msg(
154       "%s%s %dx%dx%d %dL %d/%dM %dS M:%llx %s %s%s S:0x%llx LS:0x%llx CS:0x%llx "
155       "Base=0x%llx Size=0x%llx Meta=0x%llx/0x%llx (%s) %s%s%s%s%s%sfd:%d(%d) @ %p\n",
156       msg ?: "", util_format_short_name(res->base.format), res->base.width0,
157       res->base.height0, res->base.depth0, res->base.array_size,
158       res->base.last_level, res->layout.levels, res->layout.sample_count_sa,
159       (long long)res->modifier, s_tiling[res->layout.tiling],
160       res->layout.mipmapped_z ? "MZ " : "",
161       res->layout.page_aligned_layers ? "PL " : "",
162       (long long)res->layout.linear_stride_B,
163       (long long)res->layout.layer_stride_B,
164       (long long)res->layout.compression_layer_stride_B,
165       (long long)res->bo->ptr.gpu, (long long)res->layout.size_B,
166       res->layout.metadata_offset_B
167          ? ((long long)res->bo->ptr.gpu + res->layout.metadata_offset_B)
168          : 0,
169       (long long)res->layout.metadata_offset_B, res->bo->label,
170       res->bo->flags & AGX_BO_SHARED ? "SH " : "",
171       res->bo->flags & AGX_BO_LOW_VA ? "LO " : "",
172       res->bo->flags & AGX_BO_EXEC ? "EX " : "",
173       res->bo->flags & AGX_BO_WRITEBACK ? "WB " : "",
174       res->bo->flags & AGX_BO_SHAREABLE ? "SA " : "",
175       res->bo->flags & AGX_BO_READONLY ? "RO " : "", res->bo->prime_fd, ino,
176       res);
177 }
178 
179 static void
agx_resource_setup(struct agx_device * dev,struct agx_resource * nresource)180 agx_resource_setup(struct agx_device *dev, struct agx_resource *nresource)
181 {
182    struct pipe_resource *templ = &nresource->base;
183 
184    nresource->layout = (struct ail_layout){
185       .tiling = ail_modifier_to_tiling(nresource->modifier),
186       .mipmapped_z = templ->target == PIPE_TEXTURE_3D,
187       .format = templ->format,
188       .width_px = templ->width0,
189       .height_px = templ->height0,
190       .depth_px = templ->depth0 * templ->array_size,
191       .sample_count_sa = MAX2(templ->nr_samples, 1),
192       .levels = templ->last_level + 1,
193       .writeable_image = templ->bind & PIPE_BIND_SHADER_IMAGE,
194 
195       /* Ostensibly this should be based on the bind, but Gallium bind flags are
196        * notoriously unreliable. The only cost of setting this excessively is a
197        * bit of extra memory use for layered textures, which isn't worth trying
198        * to optimize.
199        */
200       .renderable = true,
201    };
202 }
203 
204 static struct pipe_resource *
agx_resource_from_handle(struct pipe_screen * pscreen,const struct pipe_resource * templat,struct winsys_handle * whandle,unsigned usage)205 agx_resource_from_handle(struct pipe_screen *pscreen,
206                          const struct pipe_resource *templat,
207                          struct winsys_handle *whandle, unsigned usage)
208 {
209    struct agx_device *dev = agx_device(pscreen);
210    struct agx_resource *rsc;
211    struct pipe_resource *prsc;
212 
213    assert(whandle->type == WINSYS_HANDLE_TYPE_FD);
214 
215    rsc = CALLOC_STRUCT(agx_resource);
216    if (!rsc)
217       return NULL;
218 
219    rsc->modifier = whandle->modifier == DRM_FORMAT_MOD_INVALID
220                       ? DRM_FORMAT_MOD_LINEAR
221                       : whandle->modifier;
222 
223    /* We need strides to be aligned. ail asserts this, but we want to fail
224     * gracefully so the app can handle the error.
225     */
226    if (rsc->modifier == DRM_FORMAT_MOD_LINEAR && (whandle->stride % 16) != 0) {
227       FREE(rsc);
228       return false;
229    }
230 
231    prsc = &rsc->base;
232 
233    *prsc = *templat;
234 
235    pipe_reference_init(&prsc->reference, 1);
236    prsc->screen = pscreen;
237 
238    rsc->bo = agx_bo_import(dev, whandle->handle);
239    /* Sometimes an import can fail e.g. on an invalid buffer fd, out of
240     * memory space to mmap it etc.
241     */
242    if (!rsc->bo) {
243       FREE(rsc);
244       return NULL;
245    }
246 
247    agx_resource_setup(dev, rsc);
248 
249    if (rsc->layout.tiling == AIL_TILING_LINEAR) {
250       rsc->layout.linear_stride_B = whandle->stride;
251    } else if (whandle->stride != ail_get_wsi_stride_B(&rsc->layout, 0)) {
252       FREE(rsc);
253       return NULL;
254    }
255 
256    assert(whandle->offset == 0);
257 
258    ail_make_miptree(&rsc->layout);
259 
260    if (prsc->target == PIPE_BUFFER) {
261       assert(rsc->layout.tiling == AIL_TILING_LINEAR);
262       util_range_init(&rsc->valid_buffer_range);
263    }
264 
265    agx_resource_debug(rsc, "Import: ");
266 
267    return prsc;
268 }
269 
270 static bool
agx_resource_get_handle(struct pipe_screen * pscreen,struct pipe_context * ctx,struct pipe_resource * pt,struct winsys_handle * handle,unsigned usage)271 agx_resource_get_handle(struct pipe_screen *pscreen, struct pipe_context *ctx,
272                         struct pipe_resource *pt, struct winsys_handle *handle,
273                         unsigned usage)
274 {
275    struct agx_device *dev = agx_device(pscreen);
276    struct pipe_resource *cur = pt;
277 
278    /* Even though asahi doesn't support multi-planar formats, we
279     * can get here through GBM, which does. Walk the list of planes
280     * to find the right one.
281     */
282    for (int i = 0; i < handle->plane; i++) {
283       cur = cur->next;
284       if (!cur)
285          return false;
286    }
287 
288    struct agx_resource *rsrc = agx_resource(cur);
289 
290    if (handle->type == WINSYS_HANDLE_TYPE_KMS && dev->ro) {
291       rsrc_debug(rsrc, "Get handle: %p (KMS RO)\n", rsrc);
292 
293       if (!rsrc->scanout && dev->ro && (rsrc->base.bind & PIPE_BIND_SCANOUT)) {
294          rsrc->scanout =
295             renderonly_scanout_for_resource(&rsrc->base, dev->ro, NULL);
296       }
297 
298       if (!rsrc->scanout)
299          return false;
300 
301       return renderonly_get_handle(rsrc->scanout, handle);
302    } else if (handle->type == WINSYS_HANDLE_TYPE_KMS) {
303       rsrc_debug(rsrc, "Get handle: %p (KMS)\n", rsrc);
304 
305       handle->handle = rsrc->bo->handle;
306    } else if (handle->type == WINSYS_HANDLE_TYPE_FD) {
307       int fd = agx_bo_export(rsrc->bo);
308 
309       if (fd < 0)
310          return false;
311 
312       handle->handle = fd;
313       if (dev->debug & AGX_DBG_RESOURCE) {
314          struct stat sb;
315          fstat(rsrc->bo->prime_fd, &sb);
316          agx_msg("Get handle: %p (FD %d/%ld)\n", rsrc, fd, (long)sb.st_ino);
317       }
318    } else {
319       /* Other handle types not supported */
320       return false;
321    }
322 
323    handle->stride = ail_get_wsi_stride_B(&rsrc->layout, 0);
324    handle->size = rsrc->layout.size_B;
325    handle->offset = rsrc->layout.level_offsets_B[0];
326    handle->format = rsrc->layout.format;
327    handle->modifier = rsrc->modifier;
328 
329    return true;
330 }
331 
332 static bool
agx_resource_get_param(struct pipe_screen * pscreen,struct pipe_context * pctx,struct pipe_resource * prsc,unsigned plane,unsigned layer,unsigned level,enum pipe_resource_param param,unsigned usage,uint64_t * value)333 agx_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx,
334                        struct pipe_resource *prsc, unsigned plane,
335                        unsigned layer, unsigned level,
336                        enum pipe_resource_param param, unsigned usage,
337                        uint64_t *value)
338 {
339    struct agx_resource *rsrc = (struct agx_resource *)prsc;
340 
341    switch (param) {
342    case PIPE_RESOURCE_PARAM_STRIDE:
343       *value = ail_get_wsi_stride_B(&rsrc->layout, level);
344       return true;
345    case PIPE_RESOURCE_PARAM_OFFSET:
346       *value = rsrc->layout.level_offsets_B[level];
347       return true;
348    case PIPE_RESOURCE_PARAM_MODIFIER:
349       *value = rsrc->modifier;
350       return true;
351    case PIPE_RESOURCE_PARAM_NPLANES:
352       /* We don't support multi-planar formats, but we should still handle
353        * this case for GBM shared resources.
354        */
355       *value = util_resource_num(prsc);
356       return true;
357    default:
358       return false;
359    }
360 }
361 
362 static bool
agx_is_2d(enum pipe_texture_target target)363 agx_is_2d(enum pipe_texture_target target)
364 {
365    return (target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_RECT);
366 }
367 
368 static bool
agx_linear_allowed(const struct agx_resource * pres)369 agx_linear_allowed(const struct agx_resource *pres)
370 {
371    /* Mipmapping not allowed with linear */
372    if (pres->base.last_level != 0)
373       return false;
374 
375    /* Depth/stencil buffers must not be linear */
376    if (pres->base.bind & PIPE_BIND_DEPTH_STENCIL)
377       return false;
378 
379    /* Multisampling not allowed with linear */
380    if (pres->base.nr_samples > 1)
381       return false;
382 
383    /* Block compression not allowed with linear */
384    if (util_format_is_compressed(pres->base.format))
385       return false;
386 
387    switch (pres->base.target) {
388    /* Buffers are always linear, even with image atomics */
389    case PIPE_BUFFER:
390 
391    /* Linear textures require specifying their strides explicitly, which only
392     * works for 2D textures. Rectangle textures are a special case of 2D.
393     *
394     * 1D textures only exist in GLES and are lowered to 2D to bypass hardware
395     * limitations.
396     *
397     * However, we don't want to support this case in the image atomic
398     * implementation, so linear shader images are specially forbidden.
399     */
400    case PIPE_TEXTURE_1D:
401    case PIPE_TEXTURE_1D_ARRAY:
402    case PIPE_TEXTURE_2D:
403    case PIPE_TEXTURE_2D_ARRAY:
404    case PIPE_TEXTURE_RECT:
405       if (pres->base.bind & PIPE_BIND_SHADER_IMAGE)
406          return false;
407 
408       break;
409 
410    /* No other texture type can specify a stride */
411    default:
412       return false;
413    }
414 
415    return true;
416 }
417 
418 static bool
agx_twiddled_allowed(const struct agx_resource * pres)419 agx_twiddled_allowed(const struct agx_resource *pres)
420 {
421    /* Certain binds force linear */
422    if (pres->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_LINEAR))
423       return false;
424 
425    /* Buffers must be linear */
426    if (pres->base.target == PIPE_BUFFER)
427       return false;
428 
429    /* Anything else may be twiddled */
430    return true;
431 }
432 
433 static bool
agx_compression_allowed(const struct agx_resource * pres)434 agx_compression_allowed(const struct agx_resource *pres)
435 {
436    /* Allow disabling compression for debugging */
437    if (agx_device(pres->base.screen)->debug & AGX_DBG_NOCOMPRESS) {
438       rsrc_debug(pres, "No compression: disabled\n");
439       return false;
440    }
441 
442    /* Limited to renderable */
443    if (pres->base.bind &
444        ~(PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET |
445          PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) {
446       rsrc_debug(pres, "No compression: not renderable\n");
447       return false;
448    }
449 
450    /* We use the PBE for compression via staging blits, so we can only compress
451     * renderable formats. As framebuffer compression, other formats don't make a
452     * ton of sense to compress anyway.
453     */
454    if (agx_pixel_format[pres->base.format].renderable == PIPE_FORMAT_NONE &&
455        !util_format_is_depth_or_stencil(pres->base.format)) {
456       rsrc_debug(pres, "No compression: format not renderable\n");
457       return false;
458    }
459 
460    /* Lossy-compressed texture formats cannot be compressed */
461    assert(!util_format_is_compressed(pres->base.format) &&
462           "block-compressed formats are not renderable");
463 
464    if (!ail_can_compress(pres->base.width0, pres->base.height0,
465                          MAX2(pres->base.nr_samples, 1))) {
466       rsrc_debug(pres, "No compression: too small\n");
467       return false;
468    }
469 
470    return true;
471 }
472 
473 static uint64_t
agx_select_modifier_from_list(const struct agx_resource * pres,const uint64_t * modifiers,int count)474 agx_select_modifier_from_list(const struct agx_resource *pres,
475                               const uint64_t *modifiers, int count)
476 {
477    if (agx_twiddled_allowed(pres) && agx_compression_allowed(pres) &&
478        drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED, modifiers,
479                          count))
480       return DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED;
481 
482    if (agx_twiddled_allowed(pres) &&
483        drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED, modifiers, count))
484       return DRM_FORMAT_MOD_APPLE_TWIDDLED;
485 
486    if (agx_linear_allowed(pres) &&
487        drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count))
488       return DRM_FORMAT_MOD_LINEAR;
489 
490    /* We didn't find anything */
491    return DRM_FORMAT_MOD_INVALID;
492 }
493 
494 static uint64_t
agx_select_best_modifier(const struct agx_resource * pres)495 agx_select_best_modifier(const struct agx_resource *pres)
496 {
497    /* Prefer linear for staging resources, which should be as fast as possible
498     * to write from the CPU.
499     */
500    if (agx_linear_allowed(pres) && pres->base.usage == PIPE_USAGE_STAGING)
501       return DRM_FORMAT_MOD_LINEAR;
502 
503    /* For SCANOUT or SHARED resources with no explicit modifier selection, force
504     * linear since we cannot expect consumers to correctly pass through the
505     * modifier (unless linear is not allowed at all).
506     */
507    if (agx_linear_allowed(pres) &&
508        pres->base.bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) {
509       return DRM_FORMAT_MOD_LINEAR;
510    }
511 
512    if (agx_twiddled_allowed(pres)) {
513       if (agx_compression_allowed(pres))
514          return DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED;
515       else
516          return DRM_FORMAT_MOD_APPLE_TWIDDLED;
517    }
518 
519    if (agx_linear_allowed(pres))
520       return DRM_FORMAT_MOD_LINEAR;
521    else
522       return DRM_FORMAT_MOD_INVALID;
523 }
524 
525 static struct pipe_resource *
agx_resource_create_with_modifiers(struct pipe_screen * screen,const struct pipe_resource * templ,const uint64_t * modifiers,int count)526 agx_resource_create_with_modifiers(struct pipe_screen *screen,
527                                    const struct pipe_resource *templ,
528                                    const uint64_t *modifiers, int count)
529 {
530    struct agx_device *dev = agx_device(screen);
531    struct agx_resource *nresource;
532 
533    nresource = CALLOC_STRUCT(agx_resource);
534    if (!nresource)
535       return NULL;
536 
537    nresource->base = *templ;
538    nresource->base.screen = screen;
539 
540    if (modifiers) {
541       nresource->modifier =
542          agx_select_modifier_from_list(nresource, modifiers, count);
543    } else {
544       nresource->modifier = agx_select_best_modifier(nresource);
545    }
546 
547    /* There may not be a matching modifier, bail if so */
548    if (nresource->modifier == DRM_FORMAT_MOD_INVALID) {
549       free(nresource);
550       return NULL;
551    }
552 
553    /* If there's only 1 layer and there's no compression, there's no harm in
554     * inferring the shader image flag. Do so to avoid reallocation in case the
555     * resource is later used as an image.
556     */
557    if (nresource->modifier != DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED &&
558        templ->depth0 == 1) {
559 
560       nresource->base.bind |= PIPE_BIND_SHADER_IMAGE;
561    }
562 
563    nresource->mipmapped = (templ->last_level > 0);
564 
565    assert(templ->format != PIPE_FORMAT_Z24X8_UNORM &&
566           templ->format != PIPE_FORMAT_Z24_UNORM_S8_UINT &&
567           "u_transfer_helper should have lowered");
568 
569    agx_resource_setup(dev, nresource);
570 
571    pipe_reference_init(&nresource->base.reference, 1);
572 
573    ail_make_miptree(&nresource->layout);
574 
575    /* Fail Piglit's obnoxious allocations */
576    if (nresource->layout.size_B >= (1ull << 32)) {
577       free(nresource);
578       return NULL;
579    }
580 
581    if (templ->target == PIPE_BUFFER) {
582       assert(nresource->layout.tiling == AIL_TILING_LINEAR);
583       util_range_init(&nresource->valid_buffer_range);
584    }
585 
586    /* Guess a label based on the bind */
587    unsigned bind = templ->bind;
588 
589    const char *label = (bind & PIPE_BIND_INDEX_BUFFER)     ? "Index buffer"
590                        : (bind & PIPE_BIND_SCANOUT)        ? "Scanout"
591                        : (bind & PIPE_BIND_DISPLAY_TARGET) ? "Display target"
592                        : (bind & PIPE_BIND_SHARED)         ? "Shared resource"
593                        : (bind & PIPE_BIND_RENDER_TARGET)  ? "Render target"
594                        : (bind & PIPE_BIND_DEPTH_STENCIL)
595                           ? "Depth/stencil buffer"
596                        : (bind & PIPE_BIND_SAMPLER_VIEW)    ? "Texture"
597                        : (bind & PIPE_BIND_VERTEX_BUFFER)   ? "Vertex buffer"
598                        : (bind & PIPE_BIND_CONSTANT_BUFFER) ? "Constant buffer"
599                        : (bind & PIPE_BIND_GLOBAL)          ? "Global memory"
600                        : (bind & PIPE_BIND_SHADER_BUFFER)   ? "Shader buffer"
601                        : (bind & PIPE_BIND_SHADER_IMAGE)    ? "Shader image"
602                                                             : "Other resource";
603 
604    uint32_t create_flags = 0;
605 
606    /* Default to write-combine resources, but use writeback if that is expected
607     * to be beneficial.
608     */
609    if (nresource->base.usage == PIPE_USAGE_STAGING ||
610        (nresource->base.flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
611 
612       create_flags |= AGX_BO_WRITEBACK;
613    }
614 
615    /* Allow disabling write-combine to debug performance issues */
616    if (dev->debug & AGX_DBG_NOWC) {
617       create_flags |= AGX_BO_WRITEBACK;
618    }
619 
620    /* Create buffers that might be shared with the SHAREABLE flag */
621    if (bind & (PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SHARED))
622       create_flags |= AGX_BO_SHAREABLE;
623 
624    nresource->bo =
625       agx_bo_create(dev, nresource->layout.size_B, create_flags, label);
626 
627    if (!nresource->bo) {
628       FREE(nresource);
629       return NULL;
630    }
631 
632    agx_resource_debug(nresource, "New: ");
633    return &nresource->base;
634 }
635 
636 static struct pipe_resource *
agx_resource_create(struct pipe_screen * screen,const struct pipe_resource * templ)637 agx_resource_create(struct pipe_screen *screen,
638                     const struct pipe_resource *templ)
639 {
640    return agx_resource_create_with_modifiers(screen, templ, NULL, 0);
641 }
642 
643 static void
agx_resource_destroy(struct pipe_screen * screen,struct pipe_resource * prsrc)644 agx_resource_destroy(struct pipe_screen *screen, struct pipe_resource *prsrc)
645 {
646    struct agx_resource *rsrc = (struct agx_resource *)prsrc;
647    struct agx_screen *agx_screen = (struct agx_screen *)screen;
648 
649    agx_resource_debug(rsrc, "Destroy: ");
650 
651    if (prsrc->target == PIPE_BUFFER)
652       util_range_destroy(&rsrc->valid_buffer_range);
653 
654    if (rsrc->scanout)
655       renderonly_scanout_destroy(rsrc->scanout, agx_screen->dev.ro);
656 
657    agx_bo_unreference(rsrc->bo);
658    FREE(rsrc);
659 }
660 
661 void
agx_batch_track_image(struct agx_batch * batch,struct pipe_image_view * image)662 agx_batch_track_image(struct agx_batch *batch, struct pipe_image_view *image)
663 {
664    struct agx_resource *rsrc = agx_resource(image->resource);
665 
666    if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
667       batch->incoherent_writes = true;
668 
669       if (rsrc->base.target == PIPE_BUFFER) {
670          agx_batch_writes_range(batch, rsrc, image->u.buf.offset,
671                                 image->u.buf.size);
672       } else {
673          agx_batch_writes(batch, rsrc, image->u.tex.level);
674       }
675    } else {
676       agx_batch_reads(batch, rsrc);
677    }
678 }
679 
680 /*
681  * transfer
682  */
683 
684 static void
agx_transfer_flush_region(struct pipe_context * pipe,struct pipe_transfer * transfer,const struct pipe_box * box)685 agx_transfer_flush_region(struct pipe_context *pipe,
686                           struct pipe_transfer *transfer,
687                           const struct pipe_box *box)
688 {
689 }
690 
691 /* Reallocate the backing buffer of a resource, returns true if successful */
692 static bool
agx_shadow(struct agx_context * ctx,struct agx_resource * rsrc,bool needs_copy)693 agx_shadow(struct agx_context *ctx, struct agx_resource *rsrc, bool needs_copy)
694 {
695    struct agx_device *dev = agx_device(ctx->base.screen);
696    struct agx_bo *old = rsrc->bo;
697    size_t size = rsrc->layout.size_B;
698    unsigned flags = old->flags;
699 
700    if (dev->debug & AGX_DBG_NOSHADOW)
701       return false;
702 
703    /* If a resource is (or could be) shared, shadowing would desync across
704     * processes. (It's also not what this path is for.)
705     */
706    if (flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))
707       return false;
708 
709    /* Do not shadow resources that are too large */
710    if (size > MAX_SHADOW_BYTES && needs_copy)
711       return false;
712 
713    /* Do not shadow resources too much */
714    if (rsrc->shadowed_bytes >= MAX_TOTAL_SHADOW_BYTES && needs_copy)
715       return false;
716 
717    rsrc->shadowed_bytes += size;
718 
719    /* If we need to copy, we reallocate the resource with cached-coherent
720     * memory. This is a heuristic: it assumes that if the app needs a shadows
721     * (with a copy) now, it will again need to shadow-and-copy the same resource
722     * in the future. This accelerates the later copies, since otherwise the copy
723     * involves reading uncached memory.
724     */
725    if (needs_copy)
726       flags |= AGX_BO_WRITEBACK;
727 
728    struct agx_bo *new_ = agx_bo_create(dev, size, flags, old->label);
729 
730    /* If allocation failed, we can fallback on a flush gracefully*/
731    if (new_ == NULL)
732       return false;
733 
734    if (needs_copy) {
735       perf_debug_ctx(ctx, "Shadowing %zu bytes on the CPU (%s)", size,
736                      (old->flags & AGX_BO_WRITEBACK) ? "cached" : "uncached");
737       agx_resource_debug(rsrc, "Shadowed: ");
738 
739       memcpy(new_->ptr.cpu, old->ptr.cpu, size);
740    }
741 
742    /* Swap the pointers, dropping a reference */
743    agx_bo_unreference(rsrc->bo);
744    rsrc->bo = new_;
745 
746    /* Reemit descriptors using this resource */
747    agx_dirty_all(ctx);
748    return true;
749 }
750 
751 /*
752  * Perform the required synchronization before a transfer_map operation can
753  * complete. This may require syncing batches.
754  */
755 static void
agx_prepare_for_map(struct agx_context * ctx,struct agx_resource * rsrc,unsigned level,unsigned usage,const struct pipe_box * box,bool staging_blit)756 agx_prepare_for_map(struct agx_context *ctx, struct agx_resource *rsrc,
757                     unsigned level,
758                     unsigned usage, /* a combination of PIPE_MAP_x */
759                     const struct pipe_box *box, bool staging_blit)
760 {
761    /* GPU access does not require explicit syncs, as the batch tracking logic
762     * will ensure correct ordering automatically.
763     */
764    if (staging_blit)
765       return;
766 
767    /* If the level has not been written, we may freely do CPU access (writes),
768     * even if other levels are being written by the GPU. This lets us write some
769     * mip levels on the CPU and some on the GPU, without stalling.
770     */
771    if (!agx_resource_valid(rsrc, level))
772       return;
773 
774    /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
775     * being mapped.
776     */
777    if ((usage & PIPE_MAP_DISCARD_RANGE) &&
778        !(rsrc->base.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
779        rsrc->base.last_level == 0 &&
780        util_texrange_covers_whole_level(&rsrc->base, 0, box->x, box->y, box->z,
781                                         box->width, box->height, box->depth)) {
782 
783       usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
784    }
785 
786    /* Shadowing doesn't work separate stencil or shared resources */
787    if (rsrc->separate_stencil || (rsrc->bo->flags & AGX_BO_SHARED))
788       usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
789 
790    /* If the access is unsynchronized, there's nothing to do */
791    if (usage & PIPE_MAP_UNSYNCHRONIZED)
792       return;
793 
794    /* If the range being accessed is uninitialized, we do not need to sync. */
795    if (rsrc->base.target == PIPE_BUFFER && !(rsrc->bo->flags & AGX_BO_SHARED) &&
796        !util_ranges_intersect(&rsrc->valid_buffer_range, box->x,
797                               box->x + box->width))
798       return;
799 
800    /* Everything after this needs the context, which is not safe for
801     * unsynchronized transfers when we claim
802     * PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE.
803     */
804    assert(!(usage & PIPE_MAP_UNSYNCHRONIZED));
805 
806    /* Reading or writing from the CPU requires syncing writers. */
807    agx_sync_writer(ctx, rsrc, "Unsynchronized CPU transfer");
808 
809    /* Additionally, writing needs readers synced. */
810    if (!(usage & PIPE_MAP_WRITE))
811       return;
812 
813    /* If there are no readers, we're done. We check at the start to
814     * avoid expensive shadowing paths or duplicated checks in this hapyp path.
815     */
816    if (!agx_any_batch_uses_resource(ctx, rsrc)) {
817       rsrc->shadowed_bytes = 0;
818       return;
819    }
820 
821    /* There are readers. Try to invalidate the resource to avoid a sync */
822    if ((usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
823        agx_shadow(ctx, rsrc, false))
824       return;
825 
826    /* Or try to shadow it */
827    if (!(rsrc->base.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
828        agx_shadow(ctx, rsrc, true))
829       return;
830 
831    /* Otherwise, we need to sync */
832    agx_sync_readers(ctx, rsrc, "Unsynchronized write");
833 
834    rsrc->shadowed_bytes = 0;
835 }
836 
837 /*
838  * Return a colour-renderable format compatible with a depth/stencil format, to
839  * be used as an interchange format for depth/stencil blits. For
840  * non-depth/stencil formats, returns the format itself, except when that format
841  * would not round-trip so we return a compatible roundtrippable format.
842  */
843 static enum pipe_format
agx_staging_format(enum pipe_format format)844 agx_staging_format(enum pipe_format format)
845 {
846    switch (format) {
847    case PIPE_FORMAT_Z16_UNORM:
848       return PIPE_FORMAT_R16_UNORM;
849    case PIPE_FORMAT_Z32_FLOAT:
850       return PIPE_FORMAT_R32_FLOAT;
851    case PIPE_FORMAT_S8_UINT:
852       return PIPE_FORMAT_R8_UINT;
853    default:
854       /* Z24 and combined Z/S are lowered to one of the above formats by
855        * u_transfer_helper. The caller needs to pass in the rsrc->layout.format
856        * and not the rsrc->base.format to get the lowered physical format
857        * (rather than the API logical format).
858        */
859       assert(!util_format_is_depth_or_stencil(format) &&
860              "no other depth/stencil formats allowed for staging");
861 
862       /* However, snorm does not round trip, so don't use that for staging */
863       return util_format_snorm_to_sint(format);
864    }
865 }
866 
867 /* Most of the time we can do CPU-side transfers, but sometimes we need to use
868  * the 3D pipe for this. Let's wrap u_blitter to blit to/from staging textures.
869  * Code adapted from panfrost */
870 
871 static struct agx_resource *
agx_alloc_staging(struct pipe_screen * screen,struct agx_resource * rsc,unsigned level,const struct pipe_box * box)872 agx_alloc_staging(struct pipe_screen *screen, struct agx_resource *rsc,
873                   unsigned level, const struct pipe_box *box)
874 {
875    struct pipe_resource tmpl = rsc->base;
876 
877    tmpl.usage = PIPE_USAGE_STAGING;
878    tmpl.width0 = box->width;
879    tmpl.height0 = box->height;
880    tmpl.depth0 = 1;
881 
882    /* We need a linear staging resource. We have linear 2D arrays, but not
883     * linear 3D or cube textures. So switch to 2D arrays if needed.
884     */
885    switch (tmpl.target) {
886    case PIPE_TEXTURE_2D_ARRAY:
887    case PIPE_TEXTURE_CUBE:
888    case PIPE_TEXTURE_CUBE_ARRAY:
889    case PIPE_TEXTURE_3D:
890       tmpl.target = PIPE_TEXTURE_2D_ARRAY;
891       tmpl.array_size = box->depth;
892       break;
893    default:
894       assert(tmpl.array_size == 1);
895       assert(box->depth == 1);
896       break;
897    }
898 
899    tmpl.last_level = 0;
900 
901    /* Linear is incompatible with depth/stencil, so we convert */
902    tmpl.format = agx_staging_format(rsc->layout.format);
903    tmpl.bind =
904       PIPE_BIND_LINEAR | PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
905 
906    struct pipe_resource *pstaging = screen->resource_create(screen, &tmpl);
907    if (!pstaging)
908       return NULL;
909 
910    return agx_resource(pstaging);
911 }
912 
913 static void
agx_blit_from_staging(struct pipe_context * pctx,struct agx_transfer * trans)914 agx_blit_from_staging(struct pipe_context *pctx, struct agx_transfer *trans)
915 {
916    struct pipe_resource *dst = trans->base.resource;
917    struct pipe_blit_info blit = {0};
918 
919    blit.dst.resource = dst;
920    blit.dst.format = agx_staging_format(agx_resource(dst)->layout.format);
921    blit.dst.level = trans->base.level;
922    blit.dst.box = trans->base.box;
923    blit.src.resource = trans->staging.rsrc;
924    blit.src.format = blit.dst.format;
925    blit.src.level = 0;
926    blit.src.box = trans->staging.box;
927    blit.mask = util_format_get_mask(blit.src.format);
928    blit.filter = PIPE_TEX_FILTER_NEAREST;
929 
930    agx_blit(pctx, &blit);
931 }
932 
933 static void
agx_blit_to_staging(struct pipe_context * pctx,struct agx_transfer * trans)934 agx_blit_to_staging(struct pipe_context *pctx, struct agx_transfer *trans)
935 {
936    struct pipe_resource *src = trans->base.resource;
937    struct pipe_blit_info blit = {0};
938 
939    blit.src.resource = src;
940    blit.src.format = agx_staging_format(agx_resource(src)->layout.format);
941    blit.src.level = trans->base.level;
942    blit.src.box = trans->base.box;
943    blit.dst.resource = trans->staging.rsrc;
944    blit.dst.format = blit.src.format;
945    blit.dst.level = 0;
946    blit.dst.box = trans->staging.box;
947    blit.mask = util_format_get_mask(blit.dst.format);
948    blit.filter = PIPE_TEX_FILTER_NEAREST;
949 
950    agx_blit(pctx, &blit);
951 }
952 
953 static void *
agx_transfer_map(struct pipe_context * pctx,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** out_transfer)954 agx_transfer_map(struct pipe_context *pctx, struct pipe_resource *resource,
955                  unsigned level,
956                  unsigned usage, /* a combination of PIPE_MAP_x */
957                  const struct pipe_box *box,
958                  struct pipe_transfer **out_transfer)
959 {
960    struct agx_context *ctx = agx_context(pctx);
961    struct agx_resource *rsrc = agx_resource(resource);
962 
963    /* Can't map tiled/compressed directly */
964    if ((usage & PIPE_MAP_DIRECTLY) && rsrc->modifier != DRM_FORMAT_MOD_LINEAR)
965       return NULL;
966 
967    /* Can't transfer out of bounds mip levels */
968    if (level >= rsrc->layout.levels)
969       return NULL;
970 
971    /* For compression, we use a staging blit as we do not implement AGX
972     * compression in software. In some cases, we could use this path for
973     * twiddled too, but we don't have a use case for that yet.
974     */
975    bool staging_blit = ail_is_level_compressed(&rsrc->layout, level);
976 
977    agx_prepare_for_map(ctx, rsrc, level, usage, box, staging_blit);
978 
979    /* Track the written buffer range */
980    if (resource->target == PIPE_BUFFER) {
981       /* Note the ordering: DISCARD|WRITE is valid, so clear before adding. */
982       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
983          util_range_set_empty(&rsrc->valid_buffer_range);
984       if (usage & PIPE_MAP_WRITE) {
985          util_range_add(resource, &rsrc->valid_buffer_range, box->x,
986                         box->x + box->width);
987       }
988    }
989 
990    struct agx_transfer *transfer = CALLOC_STRUCT(agx_transfer);
991    transfer->base.level = level;
992    transfer->base.usage = usage;
993    transfer->base.box = *box;
994 
995    pipe_resource_reference(&transfer->base.resource, resource);
996    *out_transfer = &transfer->base;
997 
998    if (staging_blit) {
999       /* Should never happen for buffers, and it's not safe */
1000       assert(resource->target != PIPE_BUFFER);
1001 
1002       struct agx_resource *staging =
1003          agx_alloc_staging(pctx->screen, rsrc, level, box);
1004       assert(staging);
1005 
1006       /* Staging resources have one LOD: level 0. Query the strides
1007        * on this LOD.
1008        */
1009       transfer->base.stride = ail_get_linear_stride_B(&staging->layout, 0);
1010       transfer->base.layer_stride = staging->layout.layer_stride_B;
1011       transfer->staging.rsrc = &staging->base;
1012 
1013       transfer->staging.box = *box;
1014       transfer->staging.box.x = 0;
1015       transfer->staging.box.y = 0;
1016       transfer->staging.box.z = 0;
1017 
1018       assert(transfer->staging.rsrc != NULL);
1019 
1020       if ((usage & PIPE_MAP_READ) && agx_resource_valid(rsrc, level)) {
1021          agx_blit_to_staging(pctx, transfer);
1022          agx_sync_writer(ctx, staging, "GPU read staging blit");
1023       }
1024 
1025       agx_bo_mmap(staging->bo);
1026       return staging->bo->ptr.cpu;
1027    }
1028 
1029    agx_bo_mmap(rsrc->bo);
1030 
1031    if (ail_is_level_twiddled_uncompressed(&rsrc->layout, level)) {
1032       /* Should never happen for buffers, and it's not safe */
1033       assert(resource->target != PIPE_BUFFER);
1034 
1035       transfer->base.stride =
1036          util_format_get_stride(rsrc->layout.format, box->width);
1037 
1038       transfer->base.layer_stride = util_format_get_2d_size(
1039          rsrc->layout.format, transfer->base.stride, box->height);
1040 
1041       transfer->map = calloc(transfer->base.layer_stride, box->depth);
1042 
1043       if ((usage & PIPE_MAP_READ) && agx_resource_valid(rsrc, level)) {
1044          for (unsigned z = 0; z < box->depth; ++z) {
1045             uint8_t *map = agx_map_texture_cpu(rsrc, level, box->z + z);
1046             uint8_t *dst =
1047                (uint8_t *)transfer->map + transfer->base.layer_stride * z;
1048 
1049             ail_detile(map, dst, &rsrc->layout, level, transfer->base.stride,
1050                        box->x, box->y, box->width, box->height);
1051          }
1052       }
1053 
1054       return transfer->map;
1055    } else {
1056       assert(rsrc->modifier == DRM_FORMAT_MOD_LINEAR);
1057 
1058       transfer->base.stride = ail_get_linear_stride_B(&rsrc->layout, level);
1059       transfer->base.layer_stride = rsrc->layout.layer_stride_B;
1060 
1061       /* Be conservative for direct writes */
1062       if ((usage & PIPE_MAP_WRITE) &&
1063           (usage &
1064            (PIPE_MAP_DIRECTLY | PIPE_MAP_PERSISTENT | PIPE_MAP_COHERENT))) {
1065          BITSET_SET(rsrc->data_valid, level);
1066       }
1067 
1068       uint32_t offset =
1069          ail_get_linear_pixel_B(&rsrc->layout, level, box->x, box->y, box->z);
1070 
1071       return ((uint8_t *)rsrc->bo->ptr.cpu) + offset;
1072    }
1073 }
1074 
1075 static void
agx_transfer_unmap(struct pipe_context * pctx,struct pipe_transfer * transfer)1076 agx_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer)
1077 {
1078    /* Gallium expects writeback here, so we tile */
1079 
1080    struct agx_transfer *trans = agx_transfer(transfer);
1081    struct pipe_resource *prsrc = transfer->resource;
1082    struct agx_resource *rsrc = (struct agx_resource *)prsrc;
1083 
1084    if (trans->staging.rsrc && (transfer->usage & PIPE_MAP_WRITE)) {
1085       assert(prsrc->target != PIPE_BUFFER);
1086       agx_blit_from_staging(pctx, trans);
1087       agx_flush_readers(agx_context(pctx), agx_resource(trans->staging.rsrc),
1088                         "GPU write staging blit");
1089    } else if (trans->map && (transfer->usage & PIPE_MAP_WRITE)) {
1090       assert(
1091          ail_is_level_twiddled_uncompressed(&rsrc->layout, transfer->level));
1092 
1093       for (unsigned z = 0; z < transfer->box.depth; ++z) {
1094          uint8_t *map =
1095             agx_map_texture_cpu(rsrc, transfer->level, transfer->box.z + z);
1096          uint8_t *src = (uint8_t *)trans->map + transfer->layer_stride * z;
1097 
1098          ail_tile(map, src, &rsrc->layout, transfer->level, transfer->stride,
1099                   transfer->box.x, transfer->box.y, transfer->box.width,
1100                   transfer->box.height);
1101       }
1102    }
1103 
1104    /* The level we wrote is now initialized. We do this at the end so
1105     * blit_from_staging can avoid reloading existing contents.
1106     */
1107    if (transfer->usage & PIPE_MAP_WRITE)
1108       BITSET_SET(rsrc->data_valid, transfer->level);
1109 
1110    /* Free the transfer */
1111    free(trans->map);
1112    pipe_resource_reference(&trans->staging.rsrc, NULL);
1113    pipe_resource_reference(&transfer->resource, NULL);
1114    FREE(transfer);
1115 }
1116 
1117 /*
1118  * clear/copy
1119  */
1120 static void
agx_clear(struct pipe_context * pctx,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)1121 agx_clear(struct pipe_context *pctx, unsigned buffers,
1122           const struct pipe_scissor_state *scissor_state,
1123           const union pipe_color_union *color, double depth, unsigned stencil)
1124 {
1125    struct agx_context *ctx = agx_context(pctx);
1126    struct agx_batch *batch = agx_get_batch(ctx);
1127 
1128    if (unlikely(!agx_render_condition_check(ctx)))
1129       return;
1130 
1131    unsigned fastclear = buffers & ~(batch->draw | batch->load);
1132    unsigned slowclear = buffers & ~fastclear;
1133 
1134    assert(scissor_state == NULL && "we don't support PIPE_CAP_CLEAR_SCISSORED");
1135 
1136    /* Fast clears configure the batch */
1137    for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) {
1138       if (!(fastclear & (PIPE_CLEAR_COLOR0 << rt)))
1139          continue;
1140 
1141       static_assert(sizeof(color->f) == 16, "mismatched structure");
1142 
1143       /* Clear colour must be clamped to properly handle signed ints. */
1144       union pipe_color_union clamped =
1145          util_clamp_color(batch->key.cbufs[rt]->format, color);
1146 
1147       batch->uploaded_clear_color[rt] = agx_pool_upload_aligned(
1148          &batch->pool, clamped.f, sizeof(clamped.f), 16);
1149    }
1150 
1151    if (fastclear & PIPE_CLEAR_DEPTH)
1152       batch->clear_depth = depth;
1153 
1154    if (fastclear & PIPE_CLEAR_STENCIL)
1155       batch->clear_stencil = stencil;
1156 
1157    /* Slow clears draw a fullscreen rectangle */
1158    if (slowclear) {
1159       agx_blitter_save(ctx, ctx->blitter, false /* render cond */);
1160       util_blitter_clear(
1161          ctx->blitter, ctx->framebuffer.width, ctx->framebuffer.height,
1162          util_framebuffer_get_num_layers(&ctx->framebuffer), slowclear, color,
1163          depth, stencil,
1164          util_framebuffer_get_num_samples(&ctx->framebuffer) > 1);
1165    }
1166 
1167    if (fastclear)
1168       agx_batch_init_state(batch);
1169 
1170    batch->clear |= fastclear;
1171    batch->resolve |= buffers;
1172    assert((batch->draw & slowclear) == slowclear);
1173 }
1174 
1175 static void
transition_resource(struct pipe_context * pctx,struct agx_resource * rsrc,struct pipe_resource * templ)1176 transition_resource(struct pipe_context *pctx, struct agx_resource *rsrc,
1177                     struct pipe_resource *templ)
1178 {
1179    struct agx_resource *new_res =
1180       agx_resource(pctx->screen->resource_create(pctx->screen, templ));
1181 
1182    assert(new_res);
1183    assert(!(rsrc->base.bind & PIPE_BIND_SHARED) && "cannot swap BOs if shared");
1184 
1185    int level;
1186    BITSET_FOREACH_SET(level, rsrc->data_valid, PIPE_MAX_TEXTURE_LEVELS) {
1187       /* Copy each valid level */
1188       struct pipe_box box;
1189       u_box_3d(0, 0, 0, u_minify(rsrc->layout.width_px, level),
1190                u_minify(rsrc->layout.height_px, level),
1191                util_num_layers(&rsrc->base, level), &box);
1192 
1193       agx_resource_copy_region(pctx, &new_res->base, level, 0, 0, 0,
1194                                &rsrc->base, level, &box);
1195    }
1196 
1197    /* Flush the blits out, to make sure the old resource is no longer used */
1198    agx_flush_writer(agx_context(pctx), new_res, "flush_resource");
1199 
1200    /* Copy the bind flags and swap the BOs */
1201    struct agx_bo *old = rsrc->bo;
1202    rsrc->base.bind = new_res->base.bind;
1203    rsrc->layout = new_res->layout;
1204    rsrc->modifier = new_res->modifier;
1205    rsrc->bo = new_res->bo;
1206    new_res->bo = old;
1207 
1208    /* Free the new resource, which now owns the old BO */
1209    pipe_resource_reference((struct pipe_resource **)&new_res, NULL);
1210 }
1211 
1212 void
agx_decompress(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)1213 agx_decompress(struct agx_context *ctx, struct agx_resource *rsrc,
1214                const char *reason)
1215 {
1216    if (rsrc->layout.tiling == AIL_TILING_TWIDDLED_COMPRESSED) {
1217       perf_debug_ctx(ctx, "Decompressing resource due to %s", reason);
1218    } else if (!rsrc->layout.writeable_image) {
1219       perf_debug_ctx(ctx, "Reallocating image due to %s", reason);
1220    }
1221 
1222    struct pipe_resource templ = rsrc->base;
1223    assert(!(templ.bind & PIPE_BIND_SHADER_IMAGE) && "currently compressed");
1224    templ.bind |= PIPE_BIND_SHADER_IMAGE /* forces off compression */;
1225    transition_resource(&ctx->base, rsrc, &templ);
1226 }
1227 
1228 static void
agx_flush_resource(struct pipe_context * pctx,struct pipe_resource * pres)1229 agx_flush_resource(struct pipe_context *pctx, struct pipe_resource *pres)
1230 {
1231    struct agx_resource *rsrc = agx_resource(pres);
1232 
1233    /* flush_resource is used to prepare resources for sharing, so if this is not
1234     * already a shareabe resource, make it so
1235     */
1236    struct agx_bo *old = rsrc->bo;
1237    if (!(old->flags & AGX_BO_SHAREABLE)) {
1238       assert(rsrc->layout.levels == 1 &&
1239              "Shared resources must not be mipmapped");
1240       assert(rsrc->layout.sample_count_sa == 1 &&
1241              "Shared resources must not be multisampled");
1242       assert(rsrc->bo);
1243       assert(!(pres->bind & PIPE_BIND_SHARED));
1244 
1245       struct pipe_resource templ = *pres;
1246       templ.bind |= PIPE_BIND_SHARED;
1247       transition_resource(pctx, rsrc, &templ);
1248    } else {
1249       /* Otherwise just claim it's already shared */
1250       pres->bind |= PIPE_BIND_SHARED;
1251       agx_flush_writer(agx_context(pctx), rsrc, "flush_resource");
1252    }
1253 }
1254 
1255 /*
1256  * context
1257  */
1258 static void
agx_flush(struct pipe_context * pctx,struct pipe_fence_handle ** fence,unsigned flags)1259 agx_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
1260           unsigned flags)
1261 {
1262    struct agx_context *ctx = agx_context(pctx);
1263 
1264    agx_flush_all(ctx, "Gallium flush");
1265 
1266    /* At this point all pending work has been submitted. Since jobs are
1267     * started and completed sequentially from a UAPI perspective, and since
1268     * we submit all jobs with compute+render barriers on the prior job,
1269     * waiting on the last submitted job is sufficient to guarantee completion
1270     * of all GPU work thus far, so we can create a fence out of the latest
1271     * syncobj.
1272     *
1273     * See this page for more info on how the GPU/UAPI queueing works:
1274     * https://github.com/AsahiLinux/docs/wiki/SW:AGX-driver-notes#queues
1275     */
1276 
1277    if (fence) {
1278       struct pipe_fence_handle *f = agx_fence_create(ctx);
1279       pctx->screen->fence_reference(pctx->screen, fence, NULL);
1280       *fence = f;
1281    }
1282 }
1283 
1284 void
agx_flush_batch(struct agx_context * ctx,struct agx_batch * batch)1285 agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
1286 {
1287    struct agx_device *dev = agx_device(ctx->base.screen);
1288 
1289    assert(agx_batch_is_active(batch));
1290    assert(!agx_batch_is_submitted(batch));
1291 
1292    /* Make sure there's something to submit. */
1293    if (!batch->clear) {
1294       agx_batch_reset(ctx, batch);
1295       return;
1296    }
1297 
1298    if (batch->cs_scratch)
1299       agx_batch_add_bo(batch, ctx->scratch_cs.buf);
1300 
1301    assert(batch->initialized);
1302 
1303    /* Finalize the encoder */
1304    uint8_t stop[5 + 64] = {0x00, 0x00, 0x00, 0xc0, 0x00};
1305    memcpy(batch->vdm.current, stop, sizeof(stop));
1306 
1307    uint64_t pipeline_background = agx_build_meta(batch, false, false);
1308    uint64_t pipeline_background_partial = agx_build_meta(batch, false, true);
1309    uint64_t pipeline_store = agx_build_meta(batch, true, false);
1310 
1311    bool clear_pipeline_textures =
1312       agx_tilebuffer_spills(&batch->tilebuffer_layout);
1313 
1314    for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
1315       struct pipe_surface *surf = batch->key.cbufs[i];
1316 
1317       clear_pipeline_textures |=
1318          surf && surf->texture && !(batch->clear & (PIPE_CLEAR_COLOR0 << i));
1319    }
1320 
1321    /* Scissor and depth bias arrays are staged to dynamic arrays on the CPU. At
1322     * submit time, they're done growing and are uploaded to GPU memory attached
1323     * to the batch.
1324     */
1325    uint64_t scissor = agx_pool_upload_aligned(&batch->pool, batch->scissor.data,
1326                                               batch->scissor.size, 64);
1327    uint64_t zbias = agx_pool_upload_aligned(
1328       &batch->pool, batch->depth_bias.data, batch->depth_bias.size, 64);
1329 
1330    /* BO list for a given batch consists of:
1331     *  - BOs for the batch's pools
1332     *  - BOs for the encoder
1333     *  - BO for internal shaders
1334     *  - BOs added to the batch explicitly
1335     */
1336    agx_batch_add_bo(batch, batch->vdm.bo);
1337 
1338    if (batch->vs_scratch)
1339       agx_batch_add_bo(batch, ctx->scratch_vs.buf);
1340    if (batch->fs_scratch)
1341       agx_batch_add_bo(batch, ctx->scratch_fs.buf);
1342 
1343    /* TODO: Linux UAPI submission */
1344    (void)dev;
1345    (void)zbias;
1346    (void)scissor;
1347    (void)clear_pipeline_textures;
1348    (void)pipeline_store;
1349    (void)pipeline_background;
1350    (void)pipeline_background_partial;
1351 
1352    unreachable("Linux UAPI not yet upstream");
1353    agx_batch_submit(ctx, batch, 0, 0, NULL);
1354 }
1355 
1356 static void
agx_destroy_context(struct pipe_context * pctx)1357 agx_destroy_context(struct pipe_context *pctx)
1358 {
1359    struct agx_device *dev = agx_device(pctx->screen);
1360    struct agx_context *ctx = agx_context(pctx);
1361 
1362    /* Batch state needs to be freed on completion, and we don't want to yank
1363     * buffers out from in-progress GPU jobs to avoid faults, so just wait until
1364     * everything in progress is actually done on context destroy. This will
1365     * ensure everything is cleaned up properly.
1366     */
1367    agx_sync_all(ctx, "destroy context");
1368 
1369    if (pctx->stream_uploader)
1370       u_upload_destroy(pctx->stream_uploader);
1371 
1372    if (ctx->blitter)
1373       util_blitter_destroy(ctx->blitter);
1374 
1375    util_unreference_framebuffer_state(&ctx->framebuffer);
1376 
1377    agx_meta_cleanup(&ctx->meta);
1378    agx_destroy_meta_shaders(ctx);
1379 
1380    agx_bo_unreference(ctx->result_buf);
1381 
1382    drmSyncobjDestroy(dev->fd, ctx->in_sync_obj);
1383    drmSyncobjDestroy(dev->fd, ctx->dummy_syncobj);
1384    if (ctx->in_sync_fd != -1)
1385       close(ctx->in_sync_fd);
1386 
1387    for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
1388       if (ctx->batches.slots[i].syncobj)
1389          drmSyncobjDestroy(dev->fd, ctx->batches.slots[i].syncobj);
1390    }
1391 
1392    pipe_resource_reference(&ctx->heap, NULL);
1393 
1394    agx_scratch_fini(&ctx->scratch_vs);
1395    agx_scratch_fini(&ctx->scratch_fs);
1396    agx_scratch_fini(&ctx->scratch_cs);
1397 
1398    ralloc_free(ctx);
1399 }
1400 
1401 static void
agx_invalidate_resource(struct pipe_context * pctx,struct pipe_resource * resource)1402 agx_invalidate_resource(struct pipe_context *pctx,
1403                         struct pipe_resource *resource)
1404 {
1405    struct agx_context *ctx = agx_context(pctx);
1406    struct agx_batch *batch = agx_get_batch(ctx);
1407 
1408    /* Handle the glInvalidateFramebuffer case */
1409    if (batch->key.zsbuf && batch->key.zsbuf->texture == resource)
1410       batch->resolve &= ~PIPE_CLEAR_DEPTHSTENCIL;
1411 
1412    for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
1413       struct pipe_surface *surf = batch->key.cbufs[i];
1414 
1415       if (surf && surf->texture == resource)
1416          batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
1417    }
1418 }
1419 
1420 static enum pipe_reset_status
asahi_get_device_reset_status(struct pipe_context * pipe)1421 asahi_get_device_reset_status(struct pipe_context *pipe)
1422 {
1423    struct agx_context *ctx = agx_context(pipe);
1424 
1425    return ctx->any_faults ? PIPE_GUILTY_CONTEXT_RESET : PIPE_NO_RESET;
1426 }
1427 
1428 static struct pipe_context *
agx_create_context(struct pipe_screen * screen,void * priv,unsigned flags)1429 agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
1430 {
1431    struct agx_context *ctx = rzalloc(NULL, struct agx_context);
1432    struct pipe_context *pctx = &ctx->base;
1433    int ret;
1434 
1435    if (!ctx)
1436       return NULL;
1437 
1438    pctx->screen = screen;
1439    pctx->priv = priv;
1440 
1441    util_dynarray_init(&ctx->writer, ctx);
1442    util_dynarray_init(&ctx->global_buffers, ctx);
1443 
1444    pctx->stream_uploader = u_upload_create_default(pctx);
1445    if (!pctx->stream_uploader) {
1446       FREE(pctx);
1447       return NULL;
1448    }
1449    pctx->const_uploader = pctx->stream_uploader;
1450 
1451    pctx->destroy = agx_destroy_context;
1452    pctx->flush = agx_flush;
1453    pctx->clear = agx_clear;
1454    pctx->resource_copy_region = agx_resource_copy_region;
1455    pctx->blit = agx_blit;
1456    pctx->flush_resource = agx_flush_resource;
1457 
1458    pctx->buffer_map = u_transfer_helper_transfer_map;
1459    pctx->buffer_unmap = u_transfer_helper_transfer_unmap;
1460    pctx->texture_map = u_transfer_helper_transfer_map;
1461    pctx->texture_unmap = u_transfer_helper_transfer_unmap;
1462    pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
1463 
1464    pctx->buffer_subdata = u_default_buffer_subdata;
1465    pctx->clear_buffer = u_default_clear_buffer;
1466    pctx->texture_subdata = u_default_texture_subdata;
1467    pctx->set_debug_callback = u_default_set_debug_callback;
1468    pctx->get_sample_position = u_default_get_sample_position;
1469    pctx->invalidate_resource = agx_invalidate_resource;
1470    pctx->memory_barrier = agx_memory_barrier;
1471 
1472    pctx->create_fence_fd = agx_create_fence_fd;
1473    pctx->fence_server_sync = agx_fence_server_sync;
1474 
1475    pctx->get_device_reset_status = asahi_get_device_reset_status;
1476 
1477    agx_init_state_functions(pctx);
1478    agx_init_query_functions(pctx);
1479    agx_init_streamout_functions(pctx);
1480 
1481    agx_meta_init(&ctx->meta, agx_device(screen));
1482    agx_init_meta_shaders(ctx);
1483 
1484    ctx->blitter = util_blitter_create(pctx);
1485 
1486    ctx->result_buf = agx_bo_create(
1487       agx_device(screen), sizeof(union agx_batch_result) * AGX_MAX_BATCHES,
1488       AGX_BO_WRITEBACK, "Batch result buffer");
1489    assert(ctx->result_buf);
1490 
1491    /* Sync object/FD used for NATIVE_FENCE_FD. */
1492    ctx->in_sync_fd = -1;
1493    ret = drmSyncobjCreate(agx_device(screen)->fd, 0, &ctx->in_sync_obj);
1494    assert(!ret);
1495 
1496    /* Dummy sync object used before any work has been submitted. */
1497    ret = drmSyncobjCreate(agx_device(screen)->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
1498                           &ctx->dummy_syncobj);
1499    assert(!ret);
1500    ctx->syncobj = ctx->dummy_syncobj;
1501 
1502    /* By default all samples are enabled */
1503    ctx->sample_mask = ~0;
1504 
1505    ctx->support_lod_bias = !(flags & PIPE_CONTEXT_NO_LOD_BIAS);
1506    ctx->robust = (flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS);
1507 
1508    agx_scratch_init(agx_device(screen), &ctx->scratch_vs);
1509    agx_scratch_init(agx_device(screen), &ctx->scratch_fs);
1510    agx_scratch_init(agx_device(screen), &ctx->scratch_cs);
1511 
1512    return pctx;
1513 }
1514 
1515 static const char *
agx_get_vendor(struct pipe_screen * pscreen)1516 agx_get_vendor(struct pipe_screen *pscreen)
1517 {
1518    return "Mesa";
1519 }
1520 
1521 static const char *
agx_get_device_vendor(struct pipe_screen * pscreen)1522 agx_get_device_vendor(struct pipe_screen *pscreen)
1523 {
1524    return "Apple";
1525 }
1526 
1527 static const char *
agx_get_name(struct pipe_screen * pscreen)1528 agx_get_name(struct pipe_screen *pscreen)
1529 {
1530    struct agx_device *dev = agx_device(pscreen);
1531 
1532    return dev->name;
1533 }
1534 
1535 static int
agx_get_param(struct pipe_screen * pscreen,enum pipe_cap param)1536 agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
1537 {
1538    struct agx_device *dev = agx_device(pscreen);
1539 
1540    switch (param) {
1541    case PIPE_CAP_CLIP_HALFZ:
1542    case PIPE_CAP_NPOT_TEXTURES:
1543    case PIPE_CAP_SHADER_STENCIL_EXPORT:
1544    case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
1545    case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
1546    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
1547    case PIPE_CAP_DEPTH_CLIP_DISABLE:
1548    case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
1549    case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
1550    case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
1551    case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
1552    case PIPE_CAP_FS_FINE_DERIVATIVE:
1553    case PIPE_CAP_CULL_DISTANCE_NOCOMBINE:
1554    case PIPE_CAP_NIR_COMPACT_ARRAYS:
1555    case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
1556    case PIPE_CAP_DOUBLES:
1557       return 1;
1558 
1559    case PIPE_CAP_MAX_RENDER_TARGETS:
1560    case PIPE_CAP_FBFETCH:
1561    case PIPE_CAP_FBFETCH_COHERENT:
1562       return 8;
1563    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
1564       return 1;
1565 
1566    case PIPE_CAP_OCCLUSION_QUERY:
1567    case PIPE_CAP_QUERY_TIMESTAMP:
1568    case PIPE_CAP_QUERY_TIME_ELAPSED:
1569    case PIPE_CAP_QUERY_SO_OVERFLOW:
1570    case PIPE_CAP_PRIMITIVE_RESTART:
1571    case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
1572    case PIPE_CAP_ANISOTROPIC_FILTER:
1573    case PIPE_CAP_NATIVE_FENCE_FD:
1574    case PIPE_CAP_TEXTURE_BARRIER:
1575       return true;
1576 
1577    case PIPE_CAP_TIMER_RESOLUTION:
1578       /* Timer resolution is the length of a single tick in nanos */
1579       return agx_gpu_time_to_ns(dev, 1);
1580 
1581    case PIPE_CAP_SAMPLER_VIEW_TARGET:
1582    case PIPE_CAP_TEXTURE_SWIZZLE:
1583    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
1584    case PIPE_CAP_INDEP_BLEND_ENABLE:
1585    case PIPE_CAP_INDEP_BLEND_FUNC:
1586    case PIPE_CAP_ACCELERATED:
1587    case PIPE_CAP_UMA:
1588    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
1589    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
1590    case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
1591    case PIPE_CAP_SHADER_ARRAY_COMPONENTS:
1592    case PIPE_CAP_PACKED_UNIFORMS:
1593    case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
1594    case PIPE_CAP_VS_INSTANCEID:
1595    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
1596    case PIPE_CAP_CONDITIONAL_RENDER:
1597    case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
1598    case PIPE_CAP_SEAMLESS_CUBE_MAP:
1599    case PIPE_CAP_LOAD_CONSTBUF:
1600    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
1601    case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
1602    case PIPE_CAP_NULL_TEXTURES:
1603    case PIPE_CAP_TEXTURE_MULTISAMPLE:
1604    case PIPE_CAP_IMAGE_LOAD_FORMATTED:
1605    case PIPE_CAP_IMAGE_STORE_FORMATTED:
1606    case PIPE_CAP_COMPUTE:
1607    case PIPE_CAP_INT64:
1608    case PIPE_CAP_SAMPLE_SHADING:
1609    case PIPE_CAP_START_INSTANCE:
1610    case PIPE_CAP_DRAW_PARAMETERS:
1611    case PIPE_CAP_MULTI_DRAW_INDIRECT:
1612    case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
1613    case PIPE_CAP_CULL_DISTANCE:
1614    case PIPE_CAP_GL_SPIRV:
1615    case PIPE_CAP_POLYGON_OFFSET_CLAMP:
1616       return 1;
1617    case PIPE_CAP_SURFACE_SAMPLE_COUNT:
1618       /* TODO: MSRTT */
1619       return 0;
1620 
1621    case PIPE_CAP_CUBE_MAP_ARRAY:
1622       return 1;
1623 
1624    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
1625       return 1;
1626 
1627    case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
1628       return PIPE_MAX_SO_BUFFERS;
1629 
1630    case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
1631    case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
1632       return PIPE_MAX_SO_OUTPUTS;
1633 
1634    case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
1635    case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
1636       return 1;
1637 
1638    case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
1639       return 2048;
1640 
1641    case PIPE_CAP_GLSL_FEATURE_LEVEL:
1642    case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
1643       return 460;
1644    case PIPE_CAP_ESSL_FEATURE_LEVEL:
1645       return 320;
1646 
1647    /* Settings from iris, may need tuning */
1648    case PIPE_CAP_MAX_VERTEX_STREAMS:
1649       return 4;
1650    case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
1651       return 256;
1652    case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
1653       return 1024;
1654    case PIPE_CAP_MAX_GS_INVOCATIONS:
1655       return 32;
1656    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
1657       return 16;
1658 
1659    case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
1660       return AGX_TEXTURE_BUFFER_MAX_SIZE;
1661 
1662    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
1663       return 64;
1664 
1665    case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY:
1666       return 1;
1667 
1668    case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
1669       return true;
1670 
1671    case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
1672       return 16384;
1673    case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
1674       /* Max 16384x16384 */
1675       return 15;
1676    case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
1677       /* Max 2048x2048x2048 */
1678       return 12;
1679 
1680    case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
1681    case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
1682    case PIPE_CAP_TGSI_TEXCOORD:
1683    case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL:
1684    case PIPE_CAP_FS_POSITION_IS_SYSVAL:
1685       return true;
1686    case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
1687    case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
1688    case PIPE_CAP_FS_POINT_IS_SYSVAL:
1689       return false;
1690 
1691    case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
1692       return 0xffff;
1693 
1694    case PIPE_CAP_TEXTURE_TRANSFER_MODES:
1695       return PIPE_TEXTURE_TRANSFER_BLIT;
1696 
1697    case PIPE_CAP_ENDIANNESS:
1698       return PIPE_ENDIAN_LITTLE;
1699 
1700    case PIPE_CAP_SHADER_GROUP_VOTE:
1701    case PIPE_CAP_SHADER_BALLOT:
1702       return true;
1703 
1704    case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
1705       return 4;
1706    case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
1707       return -8;
1708    case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
1709       return 7;
1710    case PIPE_CAP_DRAW_INDIRECT:
1711    case PIPE_CAP_TEXTURE_QUERY_SAMPLES:
1712    case PIPE_CAP_TEXTURE_QUERY_LOD:
1713    case PIPE_CAP_TEXTURE_SHADOW_LOD:
1714       return true;
1715 
1716    case PIPE_CAP_MAX_VIEWPORTS:
1717       return AGX_MAX_VIEWPORTS;
1718 
1719    case PIPE_CAP_VIDEO_MEMORY: {
1720       uint64_t system_memory;
1721 
1722       if (!os_get_total_physical_memory(&system_memory))
1723          return 0;
1724 
1725       return (int)(system_memory >> 20);
1726    }
1727 
1728    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
1729    case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
1730       return true;
1731 
1732    case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
1733       return 4;
1734 
1735    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
1736       return 32;
1737    case PIPE_CAP_MAX_VARYINGS:
1738       /* TODO: Probably should bump to 32? */
1739       return 16;
1740 
1741    case PIPE_CAP_FLATSHADE:
1742    case PIPE_CAP_TWO_SIDED_COLOR:
1743    case PIPE_CAP_ALPHA_TEST:
1744    case PIPE_CAP_CLIP_PLANES:
1745    case PIPE_CAP_NIR_IMAGES_AS_DEREF:
1746       return 0;
1747 
1748    case PIPE_CAP_QUERY_BUFFER_OBJECT:
1749       return true;
1750 
1751    case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
1752       return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO;
1753 
1754    case PIPE_CAP_SUPPORTED_PRIM_MODES:
1755    case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART:
1756       return BITFIELD_BIT(MESA_PRIM_POINTS) | BITFIELD_BIT(MESA_PRIM_LINES) |
1757              BITFIELD_BIT(MESA_PRIM_LINE_STRIP) |
1758              BITFIELD_BIT(MESA_PRIM_LINE_LOOP) |
1759              BITFIELD_BIT(MESA_PRIM_TRIANGLES) |
1760              BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP) |
1761              BITFIELD_BIT(MESA_PRIM_TRIANGLE_FAN) |
1762              BITFIELD_BIT(MESA_PRIM_LINES_ADJACENCY) |
1763              BITFIELD_BIT(MESA_PRIM_LINE_STRIP_ADJACENCY) |
1764              BITFIELD_BIT(MESA_PRIM_TRIANGLES_ADJACENCY) |
1765              BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP_ADJACENCY) |
1766              BITFIELD_BIT(MESA_PRIM_PATCHES);
1767 
1768    case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE:
1769       return 1;
1770 
1771    case PIPE_CAP_VS_LAYER_VIEWPORT:
1772    case PIPE_CAP_TES_LAYER_VIEWPORT:
1773       return true;
1774 
1775    default:
1776       return u_pipe_screen_get_param_defaults(pscreen, param);
1777    }
1778 }
1779 
1780 static float
agx_get_paramf(struct pipe_screen * pscreen,enum pipe_capf param)1781 agx_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
1782 {
1783    switch (param) {
1784    case PIPE_CAPF_MIN_LINE_WIDTH:
1785    case PIPE_CAPF_MIN_LINE_WIDTH_AA:
1786    case PIPE_CAPF_MIN_POINT_SIZE:
1787    case PIPE_CAPF_MIN_POINT_SIZE_AA:
1788       return 1;
1789 
1790    case PIPE_CAPF_POINT_SIZE_GRANULARITY:
1791    case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
1792       return 0.1;
1793 
1794    case PIPE_CAPF_MAX_LINE_WIDTH:
1795    case PIPE_CAPF_MAX_LINE_WIDTH_AA:
1796       return 16.0; /* Off-by-one fixed point 4:4 encoding */
1797 
1798    case PIPE_CAPF_MAX_POINT_SIZE:
1799    case PIPE_CAPF_MAX_POINT_SIZE_AA:
1800       return 511.95f;
1801 
1802    case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
1803       return 16.0;
1804 
1805    case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
1806       return 16.0; /* arbitrary */
1807 
1808    case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
1809    case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
1810    case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
1811       return 0.0f;
1812 
1813    default:
1814       debug_printf("Unexpected PIPE_CAPF %d query\n", param);
1815       return 0.0;
1816    }
1817 }
1818 
1819 static int
agx_get_shader_param(struct pipe_screen * pscreen,enum pipe_shader_type shader,enum pipe_shader_cap param)1820 agx_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader,
1821                      enum pipe_shader_cap param)
1822 {
1823    bool is_no16 = agx_device(pscreen)->debug & AGX_DBG_NO16;
1824 
1825    switch (shader) {
1826    case PIPE_SHADER_VERTEX:
1827    case PIPE_SHADER_FRAGMENT:
1828    case PIPE_SHADER_COMPUTE:
1829    case PIPE_SHADER_GEOMETRY:
1830    case PIPE_SHADER_TESS_CTRL:
1831    case PIPE_SHADER_TESS_EVAL:
1832       break;
1833    default:
1834       return false;
1835    }
1836 
1837    /* this is probably not totally correct.. but it's a start: */
1838    switch (param) {
1839    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
1840    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
1841    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
1842    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
1843       return 16384;
1844 
1845    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
1846       return 1024;
1847 
1848    case PIPE_SHADER_CAP_MAX_INPUTS:
1849       return shader == PIPE_SHADER_VERTEX ? 16 : 32;
1850 
1851    case PIPE_SHADER_CAP_MAX_OUTPUTS:
1852       /* For vertex, the spec min/max is 16. We need more to handle dmat3
1853        * correctly, though. The full 32 is undesirable since it would require
1854        * shenanigans to handle.
1855        */
1856       return shader == PIPE_SHADER_FRAGMENT ? 8
1857              : shader == PIPE_SHADER_VERTEX ? 24
1858                                             : 32;
1859 
1860    case PIPE_SHADER_CAP_MAX_TEMPS:
1861       return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
1862 
1863    case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
1864       return 16 * 1024 * sizeof(float);
1865 
1866    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
1867       return 16;
1868 
1869    case PIPE_SHADER_CAP_CONT_SUPPORTED:
1870       return 1;
1871 
1872    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
1873       return shader == PIPE_SHADER_TESS_CTRL || shader == PIPE_SHADER_TESS_EVAL;
1874 
1875    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
1876       return shader == PIPE_SHADER_TESS_CTRL;
1877 
1878    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
1879    case PIPE_SHADER_CAP_SUBROUTINES:
1880    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
1881       return 0;
1882 
1883    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
1884    case PIPE_SHADER_CAP_INTEGERS:
1885       return true;
1886 
1887    case PIPE_SHADER_CAP_FP16:
1888    case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
1889    case PIPE_SHADER_CAP_FP16_DERIVATIVES:
1890       return !is_no16;
1891    case PIPE_SHADER_CAP_INT16:
1892       /* GLSL compiler is broken. Flip this on when Panfrost does. */
1893       return false;
1894    case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
1895       /* This cap is broken, see 9a38dab2d18 ("zink: disable
1896        * PIPE_SHADER_CAP_FP16_CONST_BUFFERS") */
1897       return false;
1898 
1899    case PIPE_SHADER_CAP_INT64_ATOMICS:
1900    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
1901       return 0;
1902 
1903    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
1904       return 16;
1905 
1906    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
1907       return PIPE_MAX_SHADER_SAMPLER_VIEWS;
1908 
1909    case PIPE_SHADER_CAP_SUPPORTED_IRS:
1910       return (1 << PIPE_SHADER_IR_NIR);
1911 
1912    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
1913       return PIPE_MAX_SHADER_BUFFERS;
1914 
1915    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
1916       return PIPE_MAX_SHADER_IMAGES;
1917 
1918    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
1919    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
1920       return 0;
1921 
1922    default:
1923       /* Other params are unknown */
1924       return 0;
1925    }
1926 
1927    return 0;
1928 }
1929 
1930 static int
agx_get_compute_param(struct pipe_screen * pscreen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * ret)1931 agx_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
1932                       enum pipe_compute_cap param, void *ret)
1933 {
1934 #define RET(x)                                                                 \
1935    do {                                                                        \
1936       if (ret)                                                                 \
1937          memcpy(ret, x, sizeof(x));                                            \
1938       return sizeof(x);                                                        \
1939    } while (0)
1940 
1941    switch (param) {
1942    case PIPE_COMPUTE_CAP_ADDRESS_BITS:
1943       RET((uint32_t[]){64});
1944 
1945    case PIPE_COMPUTE_CAP_IR_TARGET:
1946       if (ret)
1947          sprintf(ret, "agx");
1948       return strlen("agx") * sizeof(char);
1949 
1950    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
1951       RET((uint64_t[]){3});
1952 
1953    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
1954       RET(((uint64_t[]){65535, 65535, 65535}));
1955 
1956    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
1957       RET(((uint64_t[]){1024, 1024, 1024}));
1958 
1959    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
1960       RET((uint64_t[]){1024});
1961 
1962    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
1963    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: {
1964       uint64_t system_memory;
1965 
1966       if (!os_get_total_physical_memory(&system_memory))
1967          return 0;
1968 
1969       RET((uint64_t[]){system_memory});
1970    }
1971 
1972    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
1973       RET((uint64_t[]){32768});
1974 
1975    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
1976    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
1977       RET((uint64_t[]){4096});
1978 
1979    case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
1980       RET((uint32_t[]){800 /* MHz -- TODO */});
1981 
1982    case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
1983       RET((uint32_t[]){4 /* TODO */});
1984 
1985    case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
1986       RET((uint32_t[]){1});
1987 
1988    case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
1989       RET((uint32_t[]){32});
1990 
1991    case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
1992       RET((uint32_t[]){0 /* TODO */});
1993 
1994    case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
1995       RET((uint64_t[]){1024}); // TODO
1996    }
1997 
1998    return 0;
1999 }
2000 
2001 static bool
agx_is_format_supported(struct pipe_screen * pscreen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned storage_sample_count,unsigned usage)2002 agx_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format,
2003                         enum pipe_texture_target target, unsigned sample_count,
2004                         unsigned storage_sample_count, unsigned usage)
2005 {
2006    assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D ||
2007           target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D ||
2008           target == PIPE_TEXTURE_2D_ARRAY || target == PIPE_TEXTURE_RECT ||
2009           target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE ||
2010           target == PIPE_TEXTURE_CUBE_ARRAY);
2011 
2012    if (sample_count > 1 && sample_count != 4 && sample_count != 2)
2013       return false;
2014 
2015    if (sample_count > 1 && agx_device(pscreen)->debug & AGX_DBG_NOMSAA)
2016       return false;
2017 
2018    if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
2019       return false;
2020 
2021    if ((usage & PIPE_BIND_VERTEX_BUFFER) && !agx_vbo_supports_format(format))
2022       return false;
2023 
2024    /* For framebuffer_no_attachments, fake support for "none" images */
2025    if (format == PIPE_FORMAT_NONE)
2026       return true;
2027 
2028    if (usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW |
2029                 PIPE_BIND_SHADER_IMAGE)) {
2030       enum pipe_format tex_format = format;
2031 
2032       /* Mimic the fixup done in create_sampler_view and u_transfer_helper so we
2033        * advertise GL_OES_texture_stencil8. Alternatively, we could make mesa/st
2034        * less stupid?
2035        */
2036       if (tex_format == PIPE_FORMAT_X24S8_UINT)
2037          tex_format = PIPE_FORMAT_S8_UINT;
2038 
2039       struct agx_pixel_format_entry ent = agx_pixel_format[tex_format];
2040 
2041       if (!agx_is_valid_pixel_format(tex_format))
2042          return false;
2043 
2044       /* RGB32, luminance/alpha/intensity emulated for texture buffers only */
2045       if ((ent.channels == AGX_CHANNELS_R32G32B32_EMULATED ||
2046            util_format_is_luminance(tex_format) ||
2047            util_format_is_alpha(tex_format) ||
2048            util_format_is_luminance_alpha(tex_format) ||
2049            util_format_is_intensity(tex_format)) &&
2050           target != PIPE_BUFFER)
2051          return false;
2052 
2053       if ((usage & PIPE_BIND_RENDER_TARGET) && !ent.renderable)
2054          return false;
2055    }
2056 
2057    if (usage & PIPE_BIND_DEPTH_STENCIL) {
2058       switch (format) {
2059       /* natively supported */
2060       case PIPE_FORMAT_Z16_UNORM:
2061       case PIPE_FORMAT_Z32_FLOAT:
2062       case PIPE_FORMAT_S8_UINT:
2063 
2064       /* lowered by u_transfer_helper to one of the above */
2065       case PIPE_FORMAT_Z24X8_UNORM:
2066       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2067       case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2068          break;
2069 
2070       default:
2071          return false;
2072       }
2073    }
2074 
2075    return true;
2076 }
2077 
2078 static void
agx_query_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * out_count)2079 agx_query_dmabuf_modifiers(struct pipe_screen *screen, enum pipe_format format,
2080                            int max, uint64_t *modifiers,
2081                            unsigned int *external_only, int *out_count)
2082 {
2083    int i;
2084 
2085    if (max == 0) {
2086       *out_count = ARRAY_SIZE(agx_best_modifiers);
2087       return;
2088    }
2089 
2090    for (i = 0; i < ARRAY_SIZE(agx_best_modifiers) && i < max; i++) {
2091       if (external_only)
2092          external_only[i] = 0;
2093 
2094       modifiers[i] = agx_best_modifiers[i];
2095    }
2096 
2097    /* Return the number of modifiers copied */
2098    *out_count = i;
2099 }
2100 
2101 static bool
agx_is_dmabuf_modifier_supported(struct pipe_screen * screen,uint64_t modifier,enum pipe_format format,bool * external_only)2102 agx_is_dmabuf_modifier_supported(struct pipe_screen *screen, uint64_t modifier,
2103                                  enum pipe_format format, bool *external_only)
2104 {
2105    if (external_only)
2106       *external_only = false;
2107 
2108    for (unsigned i = 0; i < ARRAY_SIZE(agx_best_modifiers); ++i) {
2109       if (agx_best_modifiers[i] == modifier)
2110          return true;
2111    }
2112 
2113    return false;
2114 }
2115 
2116 static void
agx_destroy_screen(struct pipe_screen * pscreen)2117 agx_destroy_screen(struct pipe_screen *pscreen)
2118 {
2119    struct agx_screen *screen = agx_screen(pscreen);
2120 
2121    if (screen->dev.ro)
2122       screen->dev.ro->destroy(screen->dev.ro);
2123 
2124    u_transfer_helper_destroy(pscreen->transfer_helper);
2125    agx_close_device(&screen->dev);
2126    disk_cache_destroy(screen->disk_cache);
2127    ralloc_free(screen);
2128 }
2129 
2130 static const void *
agx_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)2131 agx_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir,
2132                          enum pipe_shader_type shader)
2133 {
2134    return &agx_nir_options;
2135 }
2136 
2137 static void
agx_resource_set_stencil(struct pipe_resource * prsrc,struct pipe_resource * stencil)2138 agx_resource_set_stencil(struct pipe_resource *prsrc,
2139                          struct pipe_resource *stencil)
2140 {
2141    agx_resource(prsrc)->separate_stencil = agx_resource(stencil);
2142 }
2143 
2144 static struct pipe_resource *
agx_resource_get_stencil(struct pipe_resource * prsrc)2145 agx_resource_get_stencil(struct pipe_resource *prsrc)
2146 {
2147    return (struct pipe_resource *)agx_resource(prsrc)->separate_stencil;
2148 }
2149 
2150 static enum pipe_format
agx_resource_get_internal_format(struct pipe_resource * prsrc)2151 agx_resource_get_internal_format(struct pipe_resource *prsrc)
2152 {
2153    return agx_resource(prsrc)->layout.format;
2154 }
2155 
2156 static struct disk_cache *
agx_get_disk_shader_cache(struct pipe_screen * pscreen)2157 agx_get_disk_shader_cache(struct pipe_screen *pscreen)
2158 {
2159    return agx_screen(pscreen)->disk_cache;
2160 }
2161 
2162 static const struct u_transfer_vtbl transfer_vtbl = {
2163    .resource_create = agx_resource_create,
2164    .resource_destroy = agx_resource_destroy,
2165    .transfer_map = agx_transfer_map,
2166    .transfer_unmap = agx_transfer_unmap,
2167    .transfer_flush_region = agx_transfer_flush_region,
2168    .get_internal_format = agx_resource_get_internal_format,
2169    .set_stencil = agx_resource_set_stencil,
2170    .get_stencil = agx_resource_get_stencil,
2171 };
2172 
2173 static int
agx_screen_get_fd(struct pipe_screen * pscreen)2174 agx_screen_get_fd(struct pipe_screen *pscreen)
2175 {
2176    return agx_device(pscreen)->fd;
2177 }
2178 
2179 static uint64_t
agx_get_timestamp(struct pipe_screen * pscreen)2180 agx_get_timestamp(struct pipe_screen *pscreen)
2181 {
2182    struct agx_device *dev = agx_device(pscreen);
2183    return agx_gpu_time_to_ns(dev, agx_get_gpu_timestamp(dev));
2184 }
2185 
2186 struct pipe_screen *
agx_screen_create(int fd,struct renderonly * ro,const struct pipe_screen_config * config)2187 agx_screen_create(int fd, struct renderonly *ro,
2188                   const struct pipe_screen_config *config)
2189 {
2190    struct agx_screen *agx_screen;
2191    struct pipe_screen *screen;
2192 
2193    agx_screen = rzalloc(NULL, struct agx_screen);
2194    if (!agx_screen)
2195       return NULL;
2196 
2197    screen = &agx_screen->pscreen;
2198 
2199    /* Set debug before opening */
2200    agx_screen->dev.debug =
2201       debug_get_flags_option("ASAHI_MESA_DEBUG", agx_debug_options, 0);
2202 
2203    /* parse driconf configuration now for device specific overrides */
2204    driParseConfigFiles(config->options, config->options_info, 0, "asahi", NULL,
2205                        NULL, NULL, 0, NULL, 0);
2206 
2207    /* Forward no16 flag from driconf */
2208    if (driQueryOptionb(config->options, "no_fp16"))
2209       agx_screen->dev.debug |= AGX_DBG_NO16;
2210 
2211    agx_screen->dev.fd = fd;
2212    agx_screen->dev.ro = ro;
2213 
2214    /* Try to open an AGX device */
2215    if (!agx_open_device(agx_screen, &agx_screen->dev)) {
2216       ralloc_free(agx_screen);
2217       return NULL;
2218    }
2219 
2220    agx_screen->queue_id = agx_create_command_queue(&agx_screen->dev, 0);
2221 
2222    screen->destroy = agx_destroy_screen;
2223    screen->get_screen_fd = agx_screen_get_fd;
2224    screen->get_name = agx_get_name;
2225    screen->get_vendor = agx_get_vendor;
2226    screen->get_device_vendor = agx_get_device_vendor;
2227    screen->get_param = agx_get_param;
2228    screen->get_shader_param = agx_get_shader_param;
2229    screen->get_compute_param = agx_get_compute_param;
2230    screen->get_paramf = agx_get_paramf;
2231    screen->is_format_supported = agx_is_format_supported;
2232    screen->query_dmabuf_modifiers = agx_query_dmabuf_modifiers;
2233    screen->is_dmabuf_modifier_supported = agx_is_dmabuf_modifier_supported;
2234    screen->context_create = agx_create_context;
2235    screen->resource_from_handle = agx_resource_from_handle;
2236    screen->resource_get_handle = agx_resource_get_handle;
2237    screen->resource_get_param = agx_resource_get_param;
2238    screen->resource_create_with_modifiers = agx_resource_create_with_modifiers;
2239    screen->get_timestamp = agx_get_timestamp;
2240    screen->fence_reference = agx_fence_reference;
2241    screen->fence_finish = agx_fence_finish;
2242    screen->fence_get_fd = agx_fence_get_fd;
2243    screen->get_compiler_options = agx_get_compiler_options;
2244    screen->get_disk_shader_cache = agx_get_disk_shader_cache;
2245 
2246    screen->resource_create = u_transfer_helper_resource_create;
2247    screen->resource_destroy = u_transfer_helper_resource_destroy;
2248    screen->transfer_helper = u_transfer_helper_create(
2249       &transfer_vtbl,
2250       U_TRANSFER_HELPER_SEPARATE_Z32S8 | U_TRANSFER_HELPER_SEPARATE_STENCIL |
2251          U_TRANSFER_HELPER_MSAA_MAP | U_TRANSFER_HELPER_Z24_IN_Z32F);
2252 
2253    agx_disk_cache_init(agx_screen);
2254 
2255    return screen;
2256 }
2257