• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2010 Red Hat Inc.
3  * Copyright 2014-2017 Broadcom
4  * Copyright 2019-2020 Collabora, Ltd.
5  * Copyright 2006 VMware, Inc.
6  * SPDX-License-Identifier: MIT
7  */
8 #include <errno.h>
9 #include <stdio.h>
10 #include <xf86drm.h>
11 #include "asahi/compiler/agx_compile.h"
12 #include "asahi/layout/layout.h"
13 #include "asahi/lib/decode.h"
14 #include "asahi/lib/unstable_asahi_drm.h"
15 #include "drm-uapi/drm_fourcc.h"
16 #include "frontend/winsys_handle.h"
17 #include "gallium/auxiliary/renderonly/renderonly.h"
18 #include "gallium/auxiliary/util/u_debug_cb.h"
19 #include "gallium/auxiliary/util/u_framebuffer.h"
20 #include "gallium/auxiliary/util/u_sample_positions.h"
21 #include "gallium/auxiliary/util/u_surface.h"
22 #include "gallium/auxiliary/util/u_transfer.h"
23 #include "gallium/auxiliary/util/u_transfer_helper.h"
24 #include "pipe/p_context.h"
25 #include "pipe/p_defines.h"
26 #include "pipe/p_screen.h"
27 #include "pipe/p_state.h"
28 #include "util/bitscan.h"
29 #include "util/format/u_format.h"
30 #include "util/format/u_formats.h"
31 #include "util/half_float.h"
32 #include "util/macros.h"
33 #include "util/simple_mtx.h"
34 #include "util/timespec.h"
35 #include "util/u_drm.h"
36 #include "util/u_gen_mipmap.h"
37 #include "util/u_helpers.h"
38 #include "util/u_inlines.h"
39 #include "util/u_memory.h"
40 #include "util/u_process.h"
41 #include "util/u_resource.h"
42 #include "util/u_screen.h"
43 #include "util/u_upload_mgr.h"
44 #include "util/xmlconfig.h"
45 #include "agx_bg_eot.h"
46 #include "agx_bo.h"
47 #include "agx_device.h"
48 #include "agx_disk_cache.h"
49 #include "agx_fence.h"
50 #include "agx_helpers.h"
51 #include "agx_pack.h"
52 #include "agx_public.h"
53 #include "agx_state.h"
54 #include "agx_tilebuffer.h"
55 #include "shader_enums.h"
56 
57 /* Fake values, pending UAPI upstreaming */
58 #ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED
59 #define DRM_FORMAT_MOD_APPLE_TWIDDLED (2)
60 #endif
61 #ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED
62 #define DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED (3)
63 #endif
64 
65 uint64_t agx_best_modifiers[] = {
66    DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED,
67    DRM_FORMAT_MOD_APPLE_TWIDDLED,
68    DRM_FORMAT_MOD_LINEAR,
69 };
70 
71 /* These limits are arbitrarily chosen and subject to change as
72  * we discover more workloads with heavy shadowing.
73  *
74  * Maximum size of a shadowed object in bytes.
75  * Hint: 1024x1024xRGBA8 = 4 MiB. Go higher for compression.
76  */
77 #define MAX_SHADOW_BYTES (6 * 1024 * 1024)
78 
79 /* Maximum cumulative size to shadow an object before we flush.
80  * Allows shadowing a 4MiB + meta object 8 times with the logic
81  * below (+1 shadow offset implied).
82  */
83 #define MAX_TOTAL_SHADOW_BYTES (32 * 1024 * 1024)
84 
85 void agx_init_state_functions(struct pipe_context *ctx);
86 
87 /*
88  * resource
89  */
90 
91 const static char *s_tiling[] = {
92    [AIL_TILING_LINEAR] = "LINR",
93    [AIL_TILING_TWIDDLED] = "TWID",
94    [AIL_TILING_TWIDDLED_COMPRESSED] = "COMP",
95 };
96 
97 #define rsrc_debug(res, ...)                                                   \
98    do {                                                                        \
99       if (agx_device((res)->base.screen)->debug & AGX_DBG_RESOURCE)            \
100          agx_msg(__VA_ARGS__);                                                 \
101    } while (0)
102 
103 static void
agx_resource_debug(struct agx_resource * res,const char * msg)104 agx_resource_debug(struct agx_resource *res, const char *msg)
105 {
106    if (!(agx_device(res->base.screen)->debug & AGX_DBG_RESOURCE))
107       return;
108 
109    int ino = -1;
110    if (res->bo->prime_fd >= 0) {
111       struct stat sb;
112       if (!fstat(res->bo->prime_fd, &sb))
113          ino = sb.st_ino;
114    }
115 
116    agx_msg(
117       "%s%s %dx%dx%d %dL %d/%dM %dS M:%llx %s %s%s S:0x%llx LS:0x%llx CS:0x%llx "
118       "Base=0x%llx Size=0x%llx Meta=0x%llx/0x%llx (%s) %s%s%s%s%s%sfd:%d(%d) B:%x @ %p\n",
119       msg ?: "", util_format_short_name(res->base.format), res->base.width0,
120       res->base.height0, res->base.depth0, res->base.array_size,
121       res->base.last_level, res->layout.levels, res->layout.sample_count_sa,
122       (long long)res->modifier, s_tiling[res->layout.tiling],
123       res->layout.mipmapped_z ? "MZ " : "",
124       res->layout.page_aligned_layers ? "PL " : "",
125       (long long)res->layout.linear_stride_B,
126       (long long)res->layout.layer_stride_B,
127       (long long)res->layout.compression_layer_stride_B,
128       (long long)res->bo->va->addr, (long long)res->layout.size_B,
129       res->layout.metadata_offset_B
130          ? ((long long)res->bo->va->addr + res->layout.metadata_offset_B)
131          : 0,
132       (long long)res->layout.metadata_offset_B, res->bo->label,
133       res->bo->flags & AGX_BO_SHARED ? "SH " : "",
134       res->bo->flags & AGX_BO_LOW_VA ? "LO " : "",
135       res->bo->flags & AGX_BO_EXEC ? "EX " : "",
136       res->bo->flags & AGX_BO_WRITEBACK ? "WB " : "",
137       res->bo->flags & AGX_BO_SHAREABLE ? "SA " : "",
138       res->bo->flags & AGX_BO_READONLY ? "RO " : "", res->bo->prime_fd, ino,
139       res->base.bind, res);
140 }
141 
142 static void
agx_resource_setup(struct agx_device * dev,struct agx_resource * nresource)143 agx_resource_setup(struct agx_device *dev, struct agx_resource *nresource)
144 {
145    struct pipe_resource *templ = &nresource->base;
146 
147    nresource->layout = (struct ail_layout){
148       .tiling = ail_drm_modifier_to_tiling(nresource->modifier),
149       .mipmapped_z = templ->target == PIPE_TEXTURE_3D,
150       .format = templ->format,
151       .width_px = templ->width0,
152       .height_px = templ->height0,
153       .depth_px = templ->depth0 * templ->array_size,
154       .sample_count_sa = MAX2(templ->nr_samples, 1),
155       .levels = templ->last_level + 1,
156       .writeable_image = templ->bind & PIPE_BIND_SHADER_IMAGE,
157 
158       /* Ostensibly this should be based on the bind, but Gallium bind flags are
159        * notoriously unreliable. The only cost of setting this excessively is a
160        * bit of extra memory use for layered textures, which isn't worth trying
161        * to optimize.
162        */
163       .renderable = true,
164    };
165 }
166 
167 static struct pipe_resource *
agx_resource_from_handle(struct pipe_screen * pscreen,const struct pipe_resource * templat,struct winsys_handle * whandle,unsigned usage)168 agx_resource_from_handle(struct pipe_screen *pscreen,
169                          const struct pipe_resource *templat,
170                          struct winsys_handle *whandle, unsigned usage)
171 {
172    struct agx_device *dev = agx_device(pscreen);
173    struct agx_resource *rsc;
174    struct pipe_resource *prsc;
175 
176    assert(whandle->type == WINSYS_HANDLE_TYPE_FD);
177 
178    rsc = CALLOC_STRUCT(agx_resource);
179    if (!rsc)
180       return NULL;
181 
182    rsc->modifier = whandle->modifier == DRM_FORMAT_MOD_INVALID
183                       ? DRM_FORMAT_MOD_LINEAR
184                       : whandle->modifier;
185 
186    /* We need strides to be aligned. ail asserts this, but we want to fail
187     * gracefully so the app can handle the error.
188     */
189    if (rsc->modifier == DRM_FORMAT_MOD_LINEAR && (whandle->stride % 16) != 0) {
190       FREE(rsc);
191       return false;
192    }
193 
194    prsc = &rsc->base;
195 
196    *prsc = *templat;
197 
198    pipe_reference_init(&prsc->reference, 1);
199    prsc->screen = pscreen;
200 
201    prsc->bind |= PIPE_BIND_SHARED;
202 
203    rsc->bo = agx_bo_import(dev, whandle->handle);
204    /* Sometimes an import can fail e.g. on an invalid buffer fd, out of
205     * memory space to mmap it etc.
206     */
207    if (!rsc->bo) {
208       FREE(rsc);
209       return NULL;
210    }
211 
212    agx_resource_setup(dev, rsc);
213 
214    if (rsc->layout.tiling == AIL_TILING_LINEAR) {
215       rsc->layout.linear_stride_B = whandle->stride;
216    } else if (whandle->stride != ail_get_wsi_stride_B(&rsc->layout, 0)) {
217       FREE(rsc);
218       return NULL;
219    }
220 
221    assert(whandle->offset == 0);
222 
223    ail_make_miptree(&rsc->layout);
224 
225    if (prsc->target == PIPE_BUFFER) {
226       assert(rsc->layout.tiling == AIL_TILING_LINEAR);
227       util_range_init(&rsc->valid_buffer_range);
228    }
229 
230    agx_resource_debug(rsc, "Import: ");
231 
232    return prsc;
233 }
234 
235 static bool
agx_resource_get_handle(struct pipe_screen * pscreen,struct pipe_context * ctx,struct pipe_resource * pt,struct winsys_handle * handle,unsigned usage)236 agx_resource_get_handle(struct pipe_screen *pscreen, struct pipe_context *ctx,
237                         struct pipe_resource *pt, struct winsys_handle *handle,
238                         unsigned usage)
239 {
240    struct agx_device *dev = agx_device(pscreen);
241    struct pipe_resource *cur = pt;
242 
243    /* Even though asahi doesn't support multi-planar formats, we
244     * can get here through GBM, which does. Walk the list of planes
245     * to find the right one.
246     */
247    for (int i = 0; i < handle->plane; i++) {
248       cur = cur->next;
249       if (!cur)
250          return false;
251    }
252 
253    struct agx_resource *rsrc = agx_resource(cur);
254 
255    if (handle->type == WINSYS_HANDLE_TYPE_KMS && dev->ro) {
256       rsrc_debug(rsrc, "Get handle: %p (KMS RO)\n", rsrc);
257 
258       if (!rsrc->scanout && dev->ro && (rsrc->base.bind & PIPE_BIND_SCANOUT)) {
259          rsrc->scanout =
260             renderonly_scanout_for_resource(&rsrc->base, dev->ro, NULL);
261       }
262 
263       if (!rsrc->scanout)
264          return false;
265 
266       return renderonly_get_handle(rsrc->scanout, handle);
267    } else if (handle->type == WINSYS_HANDLE_TYPE_KMS) {
268       rsrc_debug(rsrc, "Get handle: %p (KMS)\n", rsrc);
269 
270       handle->handle = rsrc->bo->handle;
271    } else if (handle->type == WINSYS_HANDLE_TYPE_FD) {
272       int fd = agx_bo_export(dev, rsrc->bo);
273 
274       if (fd < 0)
275          return false;
276 
277       handle->handle = fd;
278       if (dev->debug & AGX_DBG_RESOURCE) {
279          struct stat sb;
280          fstat(rsrc->bo->prime_fd, &sb);
281          agx_msg("Get handle: %p (FD %d/%ld)\n", rsrc, fd, (long)sb.st_ino);
282       }
283    } else {
284       /* Other handle types not supported */
285       return false;
286    }
287 
288    handle->stride = ail_get_wsi_stride_B(&rsrc->layout, 0);
289    handle->size = rsrc->layout.size_B;
290    handle->offset = rsrc->layout.level_offsets_B[0];
291    handle->format = rsrc->layout.format;
292    handle->modifier = rsrc->modifier;
293 
294    return true;
295 }
296 
297 static bool
agx_resource_get_param(struct pipe_screen * pscreen,struct pipe_context * pctx,struct pipe_resource * prsc,unsigned plane,unsigned layer,unsigned level,enum pipe_resource_param param,unsigned usage,uint64_t * value)298 agx_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx,
299                        struct pipe_resource *prsc, unsigned plane,
300                        unsigned layer, unsigned level,
301                        enum pipe_resource_param param, unsigned usage,
302                        uint64_t *value)
303 {
304    struct agx_resource *rsrc = (struct agx_resource *)prsc;
305 
306    switch (param) {
307    case PIPE_RESOURCE_PARAM_STRIDE:
308       *value = ail_get_wsi_stride_B(&rsrc->layout, level);
309       return true;
310    case PIPE_RESOURCE_PARAM_OFFSET:
311       *value = rsrc->layout.level_offsets_B[level];
312       return true;
313    case PIPE_RESOURCE_PARAM_MODIFIER:
314       *value = rsrc->modifier;
315       return true;
316    case PIPE_RESOURCE_PARAM_NPLANES:
317       /* We don't support multi-planar formats, but we should still handle
318        * this case for GBM shared resources.
319        */
320       *value = util_resource_num(prsc);
321       return true;
322    default:
323       return false;
324    }
325 }
326 
327 static bool
agx_is_2d(enum pipe_texture_target target)328 agx_is_2d(enum pipe_texture_target target)
329 {
330    return (target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_RECT);
331 }
332 
333 static bool
agx_linear_allowed(const struct agx_resource * pres)334 agx_linear_allowed(const struct agx_resource *pres)
335 {
336    /* Mipmapping not allowed with linear */
337    if (pres->base.last_level != 0)
338       return false;
339 
340    /* Depth/stencil buffers must not be linear */
341    if (pres->base.bind & PIPE_BIND_DEPTH_STENCIL)
342       return false;
343 
344    /* Multisampling not allowed with linear */
345    if (pres->base.nr_samples > 1)
346       return false;
347 
348    /* Block compression not allowed with linear */
349    if (util_format_is_compressed(pres->base.format))
350       return false;
351 
352    switch (pres->base.target) {
353    /* Buffers are always linear, even with image atomics */
354    case PIPE_BUFFER:
355 
356    /* Linear textures require specifying their strides explicitly, which only
357     * works for 2D textures. Rectangle textures are a special case of 2D.
358     *
359     * 1D textures only exist in GLES and are lowered to 2D to bypass hardware
360     * limitations.
361     *
362     * However, we don't want to support this case in the image atomic
363     * implementation, so linear shader images are specially forbidden.
364     */
365    case PIPE_TEXTURE_1D:
366    case PIPE_TEXTURE_1D_ARRAY:
367    case PIPE_TEXTURE_2D:
368    case PIPE_TEXTURE_2D_ARRAY:
369    case PIPE_TEXTURE_RECT:
370       if (pres->base.bind & PIPE_BIND_SHADER_IMAGE)
371          return false;
372 
373       break;
374 
375    /* No other texture type can specify a stride */
376    default:
377       return false;
378    }
379 
380    return true;
381 }
382 
383 static bool
agx_twiddled_allowed(const struct agx_resource * pres)384 agx_twiddled_allowed(const struct agx_resource *pres)
385 {
386    /* Certain binds force linear */
387    if (pres->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_LINEAR))
388       return false;
389 
390    /* Buffers must be linear */
391    if (pres->base.target == PIPE_BUFFER)
392       return false;
393 
394    /* Anything else may be twiddled */
395    return true;
396 }
397 
398 static bool
agx_compression_allowed(const struct agx_resource * pres)399 agx_compression_allowed(const struct agx_resource *pres)
400 {
401    /* Allow disabling compression for debugging */
402    if (agx_device(pres->base.screen)->debug & AGX_DBG_NOCOMPRESS) {
403       rsrc_debug(pres, "No compression: disabled\n");
404       return false;
405    }
406 
407    /* Limited to renderable */
408    if (pres->base.bind &
409        ~(PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET |
410          PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) {
411       rsrc_debug(pres, "No compression: not renderable\n");
412       return false;
413    }
414 
415    if (!ail_can_compress(pres->base.format, pres->base.width0,
416                          pres->base.height0, MAX2(pres->base.nr_samples, 1))) {
417       rsrc_debug(pres, "No compression: incompatible layout\n");
418       return false;
419    }
420 
421    if (pres->base.format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
422       rsrc_debug(pres, "No compression: RGB9E5 copies need work\n");
423       return false;
424    }
425 
426    return true;
427 }
428 
429 static uint64_t
agx_select_modifier_from_list(const struct agx_resource * pres,const uint64_t * modifiers,int count)430 agx_select_modifier_from_list(const struct agx_resource *pres,
431                               const uint64_t *modifiers, int count)
432 {
433    if (agx_twiddled_allowed(pres) && agx_compression_allowed(pres) &&
434        drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED, modifiers,
435                          count))
436       return DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED;
437 
438    if (agx_twiddled_allowed(pres) &&
439        drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED, modifiers, count))
440       return DRM_FORMAT_MOD_APPLE_TWIDDLED;
441 
442    if (agx_linear_allowed(pres) &&
443        drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count))
444       return DRM_FORMAT_MOD_LINEAR;
445 
446    /* We didn't find anything */
447    return DRM_FORMAT_MOD_INVALID;
448 }
449 
450 static uint64_t
agx_select_best_modifier(const struct agx_resource * pres)451 agx_select_best_modifier(const struct agx_resource *pres)
452 {
453    /* Prefer linear for staging resources, which should be as fast as possible
454     * to write from the CPU.
455     */
456    if (agx_linear_allowed(pres) && pres->base.usage == PIPE_USAGE_STAGING)
457       return DRM_FORMAT_MOD_LINEAR;
458 
459    /* For SCANOUT or SHARED resources with no explicit modifier selection, force
460     * linear since we cannot expect consumers to correctly pass through the
461     * modifier (unless linear is not allowed at all).
462     */
463    if (agx_linear_allowed(pres) &&
464        pres->base.bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) {
465       return DRM_FORMAT_MOD_LINEAR;
466    }
467 
468    if (agx_twiddled_allowed(pres)) {
469       if (agx_compression_allowed(pres))
470          return DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED;
471       else
472          return DRM_FORMAT_MOD_APPLE_TWIDDLED;
473    }
474 
475    if (agx_linear_allowed(pres))
476       return DRM_FORMAT_MOD_LINEAR;
477    else
478       return DRM_FORMAT_MOD_INVALID;
479 }
480 
481 static struct pipe_resource *
agx_resource_create_with_modifiers(struct pipe_screen * screen,const struct pipe_resource * templ,const uint64_t * modifiers,int count)482 agx_resource_create_with_modifiers(struct pipe_screen *screen,
483                                    const struct pipe_resource *templ,
484                                    const uint64_t *modifiers, int count)
485 {
486    struct agx_device *dev = agx_device(screen);
487    struct agx_resource *nresource;
488 
489    nresource = CALLOC_STRUCT(agx_resource);
490    if (!nresource)
491       return NULL;
492 
493    nresource->base = *templ;
494    nresource->base.screen = screen;
495 
496    if (modifiers) {
497       nresource->modifier =
498          agx_select_modifier_from_list(nresource, modifiers, count);
499    } else {
500       nresource->modifier = agx_select_best_modifier(nresource);
501    }
502 
503    /* There may not be a matching modifier, bail if so */
504    if (nresource->modifier == DRM_FORMAT_MOD_INVALID) {
505       free(nresource);
506       return NULL;
507    }
508 
509    /* If there's only 1 layer and there's no compression, there's no harm in
510     * inferring the shader image flag. Do so to avoid reallocation in case the
511     * resource is later used as an image.
512     */
513    if (nresource->modifier != DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED &&
514        templ->depth0 == 1) {
515 
516       nresource->base.bind |= PIPE_BIND_SHADER_IMAGE;
517    }
518 
519    nresource->mipmapped = (templ->last_level > 0);
520 
521    assert(templ->format != PIPE_FORMAT_Z24X8_UNORM &&
522           templ->format != PIPE_FORMAT_Z24_UNORM_S8_UINT &&
523           "u_transfer_helper should have lowered");
524 
525    agx_resource_setup(dev, nresource);
526 
527    pipe_reference_init(&nresource->base.reference, 1);
528 
529    ail_make_miptree(&nresource->layout);
530 
531    /* Fail Piglit's obnoxious allocations */
532    if (nresource->layout.size_B >= (1ull << 32)) {
533       free(nresource);
534       return NULL;
535    }
536 
537    if (templ->target == PIPE_BUFFER) {
538       assert(nresource->layout.tiling == AIL_TILING_LINEAR);
539       util_range_init(&nresource->valid_buffer_range);
540    }
541 
542    /* Guess a label based on the bind */
543    unsigned bind = templ->bind;
544 
545    const char *label = (bind & PIPE_BIND_INDEX_BUFFER)     ? "Index buffer"
546                        : (bind & PIPE_BIND_SCANOUT)        ? "Scanout"
547                        : (bind & PIPE_BIND_DISPLAY_TARGET) ? "Display target"
548                        : (bind & PIPE_BIND_SHARED)         ? "Shared resource"
549                        : (bind & PIPE_BIND_RENDER_TARGET)  ? "Render target"
550                        : (bind & PIPE_BIND_DEPTH_STENCIL)
551                           ? "Depth/stencil buffer"
552                        : (bind & PIPE_BIND_SAMPLER_VIEW)    ? "Texture"
553                        : (bind & PIPE_BIND_VERTEX_BUFFER)   ? "Vertex buffer"
554                        : (bind & PIPE_BIND_CONSTANT_BUFFER) ? "Constant buffer"
555                        : (bind & PIPE_BIND_GLOBAL)          ? "Global memory"
556                        : (bind & PIPE_BIND_SHADER_BUFFER)   ? "Shader buffer"
557                        : (bind & PIPE_BIND_SHADER_IMAGE)    ? "Shader image"
558                                                             : "Other resource";
559 
560    uint32_t create_flags = 0;
561 
562    /* Default to write-combine resources, but use writeback if that is expected
563     * to be beneficial.
564     */
565    if (nresource->base.usage == PIPE_USAGE_STAGING ||
566        (nresource->base.flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
567 
568       create_flags |= AGX_BO_WRITEBACK;
569    }
570 
571    /* Allow disabling write-combine to debug performance issues */
572    if (dev->debug & AGX_DBG_NOWC) {
573       create_flags |= AGX_BO_WRITEBACK;
574    }
575 
576    /* Create buffers that might be shared with the SHAREABLE flag */
577    if (bind & (PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SHARED))
578       create_flags |= AGX_BO_SHAREABLE;
579 
580    nresource->bo =
581       agx_bo_create(dev, nresource->layout.size_B, 0, create_flags, label);
582 
583    if (!nresource->bo) {
584       FREE(nresource);
585       return NULL;
586    }
587 
588    agx_resource_debug(nresource, "New: ");
589    return &nresource->base;
590 }
591 
592 static struct pipe_resource *
agx_resource_create(struct pipe_screen * screen,const struct pipe_resource * templ)593 agx_resource_create(struct pipe_screen *screen,
594                     const struct pipe_resource *templ)
595 {
596    return agx_resource_create_with_modifiers(screen, templ, NULL, 0);
597 }
598 
599 static void
agx_resource_destroy(struct pipe_screen * screen,struct pipe_resource * prsrc)600 agx_resource_destroy(struct pipe_screen *screen, struct pipe_resource *prsrc)
601 {
602    struct agx_resource *rsrc = (struct agx_resource *)prsrc;
603    struct agx_screen *agx_screen = (struct agx_screen *)screen;
604 
605    agx_resource_debug(rsrc, "Destroy: ");
606 
607    if (prsrc->target == PIPE_BUFFER)
608       util_range_destroy(&rsrc->valid_buffer_range);
609 
610    if (rsrc->scanout)
611       renderonly_scanout_destroy(rsrc->scanout, agx_screen->dev.ro);
612 
613    agx_bo_unreference(&agx_screen->dev, rsrc->bo);
614    FREE(rsrc);
615 }
616 
617 void
agx_batch_track_image(struct agx_batch * batch,struct pipe_image_view * image)618 agx_batch_track_image(struct agx_batch *batch, struct pipe_image_view *image)
619 {
620    struct agx_resource *rsrc = agx_resource(image->resource);
621 
622    if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
623       batch->incoherent_writes = true;
624 
625       if (rsrc->base.target == PIPE_BUFFER) {
626          agx_batch_writes_range(batch, rsrc, image->u.buf.offset,
627                                 image->u.buf.size);
628       } else {
629          agx_batch_writes(batch, rsrc, image->u.tex.level);
630       }
631    } else {
632       agx_batch_reads(batch, rsrc);
633    }
634 }
635 
636 /*
637  * transfer
638  */
639 
640 static void
agx_transfer_flush_region(struct pipe_context * pipe,struct pipe_transfer * transfer,const struct pipe_box * box)641 agx_transfer_flush_region(struct pipe_context *pipe,
642                           struct pipe_transfer *transfer,
643                           const struct pipe_box *box)
644 {
645 }
646 
647 /* Reallocate the backing buffer of a resource, returns true if successful */
648 static bool
agx_shadow(struct agx_context * ctx,struct agx_resource * rsrc,bool needs_copy)649 agx_shadow(struct agx_context *ctx, struct agx_resource *rsrc, bool needs_copy)
650 {
651    struct agx_device *dev = agx_device(ctx->base.screen);
652    struct agx_bo *old = rsrc->bo;
653    size_t size = rsrc->layout.size_B;
654    unsigned flags = old->flags;
655 
656    if (dev->debug & AGX_DBG_NOSHADOW)
657       return false;
658 
659    /* If a resource is (or could be) shared, shadowing would desync across
660     * processes. (It's also not what this path is for.)
661     */
662    if (flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))
663       return false;
664 
665    /* Do not shadow resources that are too large */
666    if (size > MAX_SHADOW_BYTES && needs_copy)
667       return false;
668 
669    /* Do not shadow resources too much */
670    if (rsrc->shadowed_bytes >= MAX_TOTAL_SHADOW_BYTES && needs_copy)
671       return false;
672 
673    rsrc->shadowed_bytes += size;
674 
675    /* If we need to copy, we reallocate the resource with cached-coherent
676     * memory. This is a heuristic: it assumes that if the app needs a shadows
677     * (with a copy) now, it will again need to shadow-and-copy the same resource
678     * in the future. This accelerates the later copies, since otherwise the copy
679     * involves reading uncached memory.
680     */
681    if (needs_copy)
682       flags |= AGX_BO_WRITEBACK;
683 
684    struct agx_bo *new_ = agx_bo_create(dev, size, 0, flags, old->label);
685 
686    /* If allocation failed, we can fallback on a flush gracefully*/
687    if (new_ == NULL)
688       return false;
689 
690    if (needs_copy) {
691       perf_debug_ctx(ctx, "Shadowing %zu bytes on the CPU (%s)", size,
692                      (old->flags & AGX_BO_WRITEBACK) ? "cached" : "uncached");
693       agx_resource_debug(rsrc, "Shadowed: ");
694 
695       memcpy(agx_bo_map(new_), agx_bo_map(old), size);
696    }
697 
698    /* Swap the pointers, dropping a reference */
699    agx_bo_unreference(dev, rsrc->bo);
700    rsrc->bo = new_;
701 
702    /* Reemit descriptors using this resource */
703    agx_dirty_all(ctx);
704    return true;
705 }
706 
707 /*
708  * Perform the required synchronization before a transfer_map operation can
709  * complete. This may require syncing batches.
710  */
711 static void
agx_prepare_for_map(struct agx_context * ctx,struct agx_resource * rsrc,unsigned level,unsigned usage,const struct pipe_box * box,bool staging_blit)712 agx_prepare_for_map(struct agx_context *ctx, struct agx_resource *rsrc,
713                     unsigned level,
714                     unsigned usage, /* a combination of PIPE_MAP_x */
715                     const struct pipe_box *box, bool staging_blit)
716 {
717    /* GPU access does not require explicit syncs, as the batch tracking logic
718     * will ensure correct ordering automatically.
719     */
720    if (staging_blit)
721       return;
722 
723    /* If the level has not been written, we may freely do CPU access (writes),
724     * even if other levels are being written by the GPU. This lets us write some
725     * mip levels on the CPU and some on the GPU, without stalling.
726     */
727    if (!agx_resource_valid(rsrc, level))
728       return;
729 
730    /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
731     * being mapped.
732     */
733    if ((usage & PIPE_MAP_DISCARD_RANGE) &&
734        !(rsrc->base.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
735        rsrc->base.last_level == 0 &&
736        util_texrange_covers_whole_level(&rsrc->base, 0, box->x, box->y, box->z,
737                                         box->width, box->height, box->depth)) {
738 
739       usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
740    }
741 
742    /* Shadowing doesn't work separate stencil or shared resources */
743    if (rsrc->separate_stencil || (rsrc->bo->flags & AGX_BO_SHARED))
744       usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
745 
746    /* If the access is unsynchronized, there's nothing to do */
747    if (usage & PIPE_MAP_UNSYNCHRONIZED)
748       return;
749 
750    /* If the range being accessed is uninitialized, we do not need to sync. */
751    if (rsrc->base.target == PIPE_BUFFER && !(rsrc->bo->flags & AGX_BO_SHARED) &&
752        !util_ranges_intersect(&rsrc->valid_buffer_range, box->x,
753                               box->x + box->width))
754       return;
755 
756    /* Everything after this needs the context, which is not safe for
757     * unsynchronized transfers when we claim
758     * pipe_caps.map_unsynchronized_thread_safe.
759     */
760    assert(!(usage & PIPE_MAP_UNSYNCHRONIZED));
761 
762    /* Reading or writing from the CPU requires syncing writers. */
763    agx_sync_writer(ctx, rsrc, "Unsynchronized CPU transfer");
764 
765    /* Additionally, writing needs readers synced. */
766    if (!(usage & PIPE_MAP_WRITE))
767       return;
768 
769    /* If there are no readers, we're done. We check at the start to
770     * avoid expensive shadowing paths or duplicated checks in this hapyp path.
771     */
772    if (!agx_any_batch_uses_resource(ctx, rsrc)) {
773       rsrc->shadowed_bytes = 0;
774       return;
775    }
776 
777    /* There are readers. Try to invalidate the resource to avoid a sync */
778    if ((usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
779        agx_shadow(ctx, rsrc, false))
780       return;
781 
782    /* Or try to shadow it */
783    if (!(rsrc->base.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
784        agx_shadow(ctx, rsrc, true))
785       return;
786 
787    /* Otherwise, we need to sync */
788    agx_sync_readers(ctx, rsrc, "Unsynchronized write");
789 
790    rsrc->shadowed_bytes = 0;
791 }
792 
793 /*
794  * Return a colour-renderable format compatible with a depth/stencil format, to
795  * be used as an interchange format for depth/stencil blits. For
796  * non-depth/stencil formats, returns the format itself, except when that format
797  * would not round-trip so we return a compatible roundtrippable format.
798  */
799 static enum pipe_format
agx_staging_format(enum pipe_format format)800 agx_staging_format(enum pipe_format format)
801 {
802    switch (format) {
803    case PIPE_FORMAT_Z16_UNORM:
804       return PIPE_FORMAT_R16_UNORM;
805    case PIPE_FORMAT_Z32_FLOAT:
806       return PIPE_FORMAT_R32_FLOAT;
807    case PIPE_FORMAT_S8_UINT:
808       return PIPE_FORMAT_R8_UINT;
809    default:
810       /* Z24 and combined Z/S are lowered to one of the above formats by
811        * u_transfer_helper. The caller needs to pass in the rsrc->layout.format
812        * and not the rsrc->base.format to get the lowered physical format
813        * (rather than the API logical format).
814        */
815       assert(!util_format_is_depth_or_stencil(format) &&
816              "no other depth/stencil formats allowed for staging");
817 
818       /* However, snorm does not round trip, so don't use that for staging */
819       return util_format_snorm_to_sint(format);
820    }
821 }
822 
823 /* Most of the time we can do CPU-side transfers, but sometimes we need to use
824  * the 3D pipe for this. Let's wrap u_blitter to blit to/from staging textures.
825  * Code adapted from panfrost */
826 
827 static struct agx_resource *
agx_alloc_staging(struct pipe_screen * screen,struct agx_resource * rsc,unsigned level,const struct pipe_box * box)828 agx_alloc_staging(struct pipe_screen *screen, struct agx_resource *rsc,
829                   unsigned level, const struct pipe_box *box)
830 {
831    struct pipe_resource tmpl = rsc->base;
832 
833    tmpl.usage = PIPE_USAGE_STAGING;
834    tmpl.width0 = box->width;
835    tmpl.height0 = box->height;
836    tmpl.depth0 = 1;
837 
838    /* We need a linear staging resource. We have linear 2D arrays, but not
839     * linear 3D or cube textures. So switch to 2D arrays if needed.
840     */
841    switch (tmpl.target) {
842    case PIPE_TEXTURE_2D_ARRAY:
843    case PIPE_TEXTURE_CUBE:
844    case PIPE_TEXTURE_CUBE_ARRAY:
845    case PIPE_TEXTURE_3D:
846       tmpl.target = PIPE_TEXTURE_2D_ARRAY;
847       tmpl.array_size = box->depth;
848       break;
849    default:
850       assert(tmpl.array_size == 1);
851       assert(box->depth == 1);
852       break;
853    }
854 
855    tmpl.last_level = 0;
856 
857    /* Linear is incompatible with depth/stencil, so we convert */
858    tmpl.format = agx_staging_format(rsc->layout.format);
859    tmpl.bind =
860       PIPE_BIND_LINEAR | PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
861 
862    struct pipe_resource *pstaging = screen->resource_create(screen, &tmpl);
863    if (!pstaging)
864       return NULL;
865 
866    return agx_resource(pstaging);
867 }
868 
869 static void
agx_blit_from_staging(struct pipe_context * pctx,struct agx_transfer * trans)870 agx_blit_from_staging(struct pipe_context *pctx, struct agx_transfer *trans)
871 {
872    struct pipe_resource *dst = trans->base.resource;
873    struct pipe_blit_info blit = {0};
874 
875    blit.dst.resource = dst;
876    blit.dst.format = agx_staging_format(agx_resource(dst)->layout.format);
877    blit.dst.level = trans->base.level;
878    blit.dst.box = trans->base.box;
879    blit.src.resource = trans->staging.rsrc;
880    blit.src.format = blit.dst.format;
881    blit.src.level = 0;
882    blit.src.box = trans->staging.box;
883    blit.mask = util_format_get_mask(blit.src.format);
884    blit.filter = PIPE_TEX_FILTER_NEAREST;
885 
886    agx_blit(pctx, &blit);
887 }
888 
889 static void
agx_blit_to_staging(struct pipe_context * pctx,struct agx_transfer * trans)890 agx_blit_to_staging(struct pipe_context *pctx, struct agx_transfer *trans)
891 {
892    struct pipe_resource *src = trans->base.resource;
893    struct pipe_blit_info blit = {0};
894 
895    blit.src.resource = src;
896    blit.src.format = agx_staging_format(agx_resource(src)->layout.format);
897    blit.src.level = trans->base.level;
898    blit.src.box = trans->base.box;
899    blit.dst.resource = trans->staging.rsrc;
900    blit.dst.format = blit.src.format;
901    blit.dst.level = 0;
902    blit.dst.box = trans->staging.box;
903    blit.mask = util_format_get_mask(blit.dst.format);
904    blit.filter = PIPE_TEX_FILTER_NEAREST;
905 
906    agx_blit(pctx, &blit);
907 }
908 
909 static void *
agx_transfer_map(struct pipe_context * pctx,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** out_transfer)910 agx_transfer_map(struct pipe_context *pctx, struct pipe_resource *resource,
911                  unsigned level,
912                  unsigned usage, /* a combination of PIPE_MAP_x */
913                  const struct pipe_box *box,
914                  struct pipe_transfer **out_transfer)
915 {
916    struct agx_context *ctx = agx_context(pctx);
917    struct agx_resource *rsrc = agx_resource(resource);
918 
919    /* Can't map tiled/compressed directly */
920    if ((usage & PIPE_MAP_DIRECTLY) && rsrc->modifier != DRM_FORMAT_MOD_LINEAR)
921       return NULL;
922 
923    /* Can't transfer out of bounds mip levels */
924    if (level >= rsrc->layout.levels)
925       return NULL;
926 
927    /* For compression, we use a staging blit as we do not implement AGX
928     * compression in software. In some cases, we could use this path for
929     * twiddled too, but we don't have a use case for that yet.
930     */
931    bool staging_blit = ail_is_level_compressed(&rsrc->layout, level);
932 
933    agx_prepare_for_map(ctx, rsrc, level, usage, box, staging_blit);
934 
935    /* Track the written buffer range */
936    if (resource->target == PIPE_BUFFER) {
937       /* Note the ordering: DISCARD|WRITE is valid, so clear before adding. */
938       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
939          util_range_set_empty(&rsrc->valid_buffer_range);
940       if (usage & PIPE_MAP_WRITE) {
941          util_range_add(resource, &rsrc->valid_buffer_range, box->x,
942                         box->x + box->width);
943       }
944    }
945 
946    struct agx_transfer *transfer = CALLOC_STRUCT(agx_transfer);
947    transfer->base.level = level;
948    transfer->base.usage = usage;
949    transfer->base.box = *box;
950 
951    pipe_resource_reference(&transfer->base.resource, resource);
952    *out_transfer = &transfer->base;
953 
954    if (staging_blit) {
955       /* Should never happen for buffers, and it's not safe */
956       assert(resource->target != PIPE_BUFFER);
957 
958       struct agx_resource *staging =
959          agx_alloc_staging(pctx->screen, rsrc, level, box);
960       assert(staging);
961 
962       /* Staging resources have one LOD: level 0. Query the strides
963        * on this LOD.
964        */
965       transfer->base.stride = ail_get_linear_stride_B(&staging->layout, 0);
966       transfer->base.layer_stride = staging->layout.layer_stride_B;
967       transfer->staging.rsrc = &staging->base;
968 
969       transfer->staging.box = *box;
970       transfer->staging.box.x = 0;
971       transfer->staging.box.y = 0;
972       transfer->staging.box.z = 0;
973 
974       assert(transfer->staging.rsrc != NULL);
975 
976       if ((usage & PIPE_MAP_READ) && agx_resource_valid(rsrc, level)) {
977          agx_blit_to_staging(pctx, transfer);
978          agx_sync_writer(ctx, staging, "GPU read staging blit");
979       }
980 
981       return agx_bo_map(staging->bo);
982    }
983 
984    if (ail_is_level_twiddled_uncompressed(&rsrc->layout, level)) {
985       /* Should never happen for buffers, and it's not safe */
986       assert(resource->target != PIPE_BUFFER);
987 
988       transfer->base.stride =
989          util_format_get_stride(rsrc->layout.format, box->width);
990 
991       transfer->base.layer_stride = util_format_get_2d_size(
992          rsrc->layout.format, transfer->base.stride, box->height);
993 
994       transfer->map = calloc(transfer->base.layer_stride, box->depth);
995 
996       if ((usage & PIPE_MAP_READ) && agx_resource_valid(rsrc, level)) {
997          for (unsigned z = 0; z < box->depth; ++z) {
998             uint8_t *map = agx_map_texture_cpu(rsrc, level, box->z + z);
999             uint8_t *dst =
1000                (uint8_t *)transfer->map + transfer->base.layer_stride * z;
1001 
1002             ail_detile(map, dst, &rsrc->layout, level, transfer->base.stride,
1003                        box->x, box->y, box->width, box->height);
1004          }
1005       }
1006 
1007       return transfer->map;
1008    } else {
1009       assert(rsrc->modifier == DRM_FORMAT_MOD_LINEAR);
1010 
1011       transfer->base.stride = ail_get_linear_stride_B(&rsrc->layout, level);
1012       transfer->base.layer_stride = rsrc->layout.layer_stride_B;
1013 
1014       /* Be conservative for direct writes */
1015       if ((usage & PIPE_MAP_WRITE) &&
1016           (usage &
1017            (PIPE_MAP_DIRECTLY | PIPE_MAP_PERSISTENT | PIPE_MAP_COHERENT))) {
1018          BITSET_SET(rsrc->data_valid, level);
1019       }
1020 
1021       uint32_t offset =
1022          ail_get_linear_pixel_B(&rsrc->layout, level, box->x, box->y, box->z);
1023 
1024       return ((uint8_t *)agx_bo_map(rsrc->bo)) + offset;
1025    }
1026 }
1027 
1028 static void
agx_transfer_unmap(struct pipe_context * pctx,struct pipe_transfer * transfer)1029 agx_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer)
1030 {
1031    /* Gallium expects writeback here, so we tile */
1032 
1033    struct agx_transfer *trans = agx_transfer(transfer);
1034    struct pipe_resource *prsrc = transfer->resource;
1035    struct agx_resource *rsrc = (struct agx_resource *)prsrc;
1036 
1037    if (trans->staging.rsrc && (transfer->usage & PIPE_MAP_WRITE)) {
1038       assert(prsrc->target != PIPE_BUFFER);
1039       agx_blit_from_staging(pctx, trans);
1040       agx_flush_readers(agx_context(pctx), agx_resource(trans->staging.rsrc),
1041                         "GPU write staging blit");
1042    } else if (trans->map && (transfer->usage & PIPE_MAP_WRITE)) {
1043       assert(
1044          ail_is_level_twiddled_uncompressed(&rsrc->layout, transfer->level));
1045 
1046       for (unsigned z = 0; z < transfer->box.depth; ++z) {
1047          uint8_t *map =
1048             agx_map_texture_cpu(rsrc, transfer->level, transfer->box.z + z);
1049          uint8_t *src = (uint8_t *)trans->map + transfer->layer_stride * z;
1050 
1051          ail_tile(map, src, &rsrc->layout, transfer->level, transfer->stride,
1052                   transfer->box.x, transfer->box.y, transfer->box.width,
1053                   transfer->box.height);
1054       }
1055    }
1056 
1057    /* The level we wrote is now initialized. We do this at the end so
1058     * blit_from_staging can avoid reloading existing contents.
1059     */
1060    if (transfer->usage & PIPE_MAP_WRITE)
1061       BITSET_SET(rsrc->data_valid, transfer->level);
1062 
1063    /* Free the transfer */
1064    free(trans->map);
1065    pipe_resource_reference(&trans->staging.rsrc, NULL);
1066    pipe_resource_reference(&transfer->resource, NULL);
1067    FREE(transfer);
1068 }
1069 
1070 /*
1071  * clear/copy
1072  */
1073 static void
agx_clear(struct pipe_context * pctx,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)1074 agx_clear(struct pipe_context *pctx, unsigned buffers,
1075           const struct pipe_scissor_state *scissor_state,
1076           const union pipe_color_union *color, double depth, unsigned stencil)
1077 {
1078    struct agx_context *ctx = agx_context(pctx);
1079    struct agx_batch *batch = agx_get_batch(ctx);
1080 
1081    if (unlikely(!agx_render_condition_check(ctx)))
1082       return;
1083 
1084    unsigned fastclear = buffers & ~(batch->draw | batch->load);
1085    unsigned slowclear = buffers & ~fastclear;
1086 
1087    assert(scissor_state == NULL && "we don't support pipe_caps.clear_scissored");
1088 
1089    /* Fast clears configure the batch */
1090    for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) {
1091       if (!(fastclear & (PIPE_CLEAR_COLOR0 << rt)))
1092          continue;
1093 
1094       static_assert(sizeof(color->f) == 16, "mismatched structure");
1095 
1096       /* Clear colour must be clamped to properly handle signed ints. */
1097       union pipe_color_union clamped =
1098          util_clamp_color(batch->key.cbufs[rt]->format, color);
1099 
1100       batch->uploaded_clear_color[rt] = agx_pool_upload_aligned(
1101          &batch->pool, clamped.f, sizeof(clamped.f), 16);
1102    }
1103 
1104    if (fastclear & PIPE_CLEAR_DEPTH)
1105       batch->clear_depth = depth;
1106 
1107    if (fastclear & PIPE_CLEAR_STENCIL)
1108       batch->clear_stencil = stencil;
1109 
1110    /* Slow clears draw a fullscreen rectangle */
1111    if (slowclear) {
1112       agx_blitter_save(ctx, ctx->blitter, ASAHI_CLEAR);
1113       util_blitter_clear(
1114          ctx->blitter, ctx->framebuffer.width, ctx->framebuffer.height,
1115          util_framebuffer_get_num_layers(&ctx->framebuffer), slowclear, color,
1116          depth, stencil,
1117          util_framebuffer_get_num_samples(&ctx->framebuffer) > 1);
1118    }
1119 
1120    if (fastclear)
1121       agx_batch_init_state(batch);
1122 
1123    batch->clear |= fastclear;
1124    batch->resolve |= buffers;
1125    assert((batch->draw & slowclear) == slowclear);
1126 }
1127 
1128 static void
transition_resource(struct pipe_context * pctx,struct agx_resource * rsrc,struct pipe_resource * templ)1129 transition_resource(struct pipe_context *pctx, struct agx_resource *rsrc,
1130                     struct pipe_resource *templ)
1131 {
1132    struct agx_resource *new_res =
1133       agx_resource(pctx->screen->resource_create(pctx->screen, templ));
1134 
1135    assert(new_res);
1136    assert(!(rsrc->base.bind & PIPE_BIND_SHARED) && "cannot swap BOs if shared");
1137 
1138    int level;
1139    BITSET_FOREACH_SET(level, rsrc->data_valid, PIPE_MAX_TEXTURE_LEVELS) {
1140       /* Copy each valid level */
1141       struct pipe_box box;
1142       u_box_3d(0, 0, 0, u_minify(rsrc->layout.width_px, level),
1143                u_minify(rsrc->layout.height_px, level),
1144                util_num_layers(&rsrc->base, level), &box);
1145 
1146       agx_resource_copy_region(pctx, &new_res->base, level, 0, 0, 0,
1147                                &rsrc->base, level, &box);
1148    }
1149 
1150    /* Flush the blits out, to make sure the old resource is no longer used */
1151    agx_flush_writer(agx_context(pctx), new_res, "flush_resource");
1152 
1153    /* Copy the bind flags and swap the BOs */
1154    struct agx_bo *old = rsrc->bo;
1155    rsrc->base.bind = new_res->base.bind;
1156    rsrc->layout = new_res->layout;
1157    rsrc->modifier = new_res->modifier;
1158    rsrc->bo = new_res->bo;
1159    new_res->bo = old;
1160 
1161    /* Free the new resource, which now owns the old BO */
1162    pipe_resource_reference((struct pipe_resource **)&new_res, NULL);
1163 }
1164 
1165 void
agx_decompress(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)1166 agx_decompress(struct agx_context *ctx, struct agx_resource *rsrc,
1167                const char *reason)
1168 {
1169    if (rsrc->layout.tiling == AIL_TILING_TWIDDLED_COMPRESSED) {
1170       perf_debug_ctx(ctx, "Decompressing resource due to %s", reason);
1171    } else if (!rsrc->layout.writeable_image) {
1172       perf_debug_ctx(ctx, "Reallocating image due to %s", reason);
1173    }
1174 
1175    struct pipe_resource templ = rsrc->base;
1176    assert(!(templ.bind & PIPE_BIND_SHADER_IMAGE) && "currently compressed");
1177    templ.bind |= PIPE_BIND_SHADER_IMAGE /* forces off compression */;
1178    transition_resource(&ctx->base, rsrc, &templ);
1179 }
1180 
1181 static void
agx_flush_resource(struct pipe_context * pctx,struct pipe_resource * pres)1182 agx_flush_resource(struct pipe_context *pctx, struct pipe_resource *pres)
1183 {
1184    struct agx_resource *rsrc = agx_resource(pres);
1185 
1186    /* flush_resource is used to prepare resources for sharing, so if this is not
1187     * already a shareabe resource, make it so
1188     */
1189    struct agx_bo *old = rsrc->bo;
1190    if (!(old->flags & AGX_BO_SHAREABLE)) {
1191       assert(rsrc->layout.levels == 1 &&
1192              "Shared resources must not be mipmapped");
1193       assert(rsrc->layout.sample_count_sa == 1 &&
1194              "Shared resources must not be multisampled");
1195       assert(rsrc->bo);
1196       assert(!(pres->bind & PIPE_BIND_SHARED));
1197 
1198       struct pipe_resource templ = *pres;
1199       templ.bind |= PIPE_BIND_SHARED;
1200       transition_resource(pctx, rsrc, &templ);
1201    } else {
1202       /* Otherwise just claim it's already shared */
1203       pres->bind |= PIPE_BIND_SHARED;
1204       agx_flush_writer(agx_context(pctx), rsrc, "flush_resource");
1205    }
1206 }
1207 
1208 #define MAX_ATTACHMENTS 16
1209 
1210 struct attachments {
1211    struct drm_asahi_attachment list[MAX_ATTACHMENTS];
1212    size_t count;
1213 };
1214 
1215 static void
asahi_add_attachment(struct attachments * att,struct agx_resource * rsrc,struct pipe_surface * surf)1216 asahi_add_attachment(struct attachments *att, struct agx_resource *rsrc,
1217                      struct pipe_surface *surf)
1218 {
1219    assert(att->count < MAX_ATTACHMENTS);
1220    int idx = att->count++;
1221 
1222    att->list[idx].size = rsrc->layout.size_B;
1223    att->list[idx].pointer = rsrc->bo->va->addr;
1224    att->list[idx].order = 1; // TODO: What does this do?
1225    att->list[idx].flags = 0;
1226 }
1227 
1228 static bool
is_aligned(unsigned x,unsigned pot_alignment)1229 is_aligned(unsigned x, unsigned pot_alignment)
1230 {
1231    assert(util_is_power_of_two_nonzero(pot_alignment));
1232    return (x & (pot_alignment - 1)) == 0;
1233 }
1234 
1235 static void
agx_cmdbuf(struct agx_device * dev,struct drm_asahi_cmd_render * c,struct attachments * att,struct agx_pool * pool,struct agx_batch * batch,struct pipe_framebuffer_state * framebuffer,uint64_t encoder_ptr,uint64_t encoder_id,uint64_t cmd_ta_id,uint64_t cmd_3d_id,uint64_t scissor_ptr,uint64_t depth_bias_ptr,uint64_t visibility_result_ptr,struct asahi_bg_eot pipeline_clear,struct asahi_bg_eot pipeline_load,struct asahi_bg_eot pipeline_store,bool clear_pipeline_textures,double clear_depth,unsigned clear_stencil,struct agx_tilebuffer_layout * tib)1236 agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
1237            struct attachments *att, struct agx_pool *pool,
1238            struct agx_batch *batch, struct pipe_framebuffer_state *framebuffer,
1239            uint64_t encoder_ptr, uint64_t encoder_id, uint64_t cmd_ta_id,
1240            uint64_t cmd_3d_id, uint64_t scissor_ptr, uint64_t depth_bias_ptr,
1241            uint64_t visibility_result_ptr, struct asahi_bg_eot pipeline_clear,
1242            struct asahi_bg_eot pipeline_load,
1243            struct asahi_bg_eot pipeline_store, bool clear_pipeline_textures,
1244            double clear_depth, unsigned clear_stencil,
1245            struct agx_tilebuffer_layout *tib)
1246 {
1247    memset(c, 0, sizeof(*c));
1248 
1249    c->encoder_ptr = encoder_ptr;
1250    c->encoder_id = encoder_id;
1251    c->cmd_3d_id = cmd_3d_id;
1252    c->cmd_ta_id = cmd_ta_id;
1253 
1254    c->fragment_usc_base = dev->shader_base;
1255    c->vertex_usc_base = dev->shader_base;
1256 
1257    /* bit 0 specifies OpenGL clip behaviour. Since ARB_clip_control is
1258     * advertised, we don't set it and lower in the vertex shader.
1259     */
1260    c->ppp_ctrl = 0x202;
1261 
1262    c->fb_width = framebuffer->width;
1263    c->fb_height = framebuffer->height;
1264 
1265    c->iogpu_unk_214 = 0xc000;
1266 
1267    c->isp_bgobjvals = 0x300;
1268 
1269    struct agx_resource *zres = NULL, *sres = NULL;
1270 
1271    agx_pack(&c->zls_ctrl, ZLS_CONTROL, zls_control) {
1272 
1273       if (framebuffer->zsbuf) {
1274          struct pipe_surface *zsbuf = framebuffer->zsbuf;
1275          struct agx_resource *zsres = agx_resource(zsbuf->texture);
1276 
1277          unsigned level = zsbuf->u.tex.level;
1278          unsigned first_layer = zsbuf->u.tex.first_layer;
1279 
1280          const struct util_format_description *desc = util_format_description(
1281             agx_resource(zsbuf->texture)->layout.format);
1282 
1283          assert(desc->format == PIPE_FORMAT_Z32_FLOAT ||
1284                 desc->format == PIPE_FORMAT_Z16_UNORM ||
1285                 desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
1286                 desc->format == PIPE_FORMAT_S8_UINT);
1287 
1288          c->depth_dimensions =
1289             (framebuffer->width - 1) | ((framebuffer->height - 1) << 15);
1290 
1291          if (util_format_has_depth(desc))
1292             zres = zsres;
1293          else
1294             sres = zsres;
1295 
1296          if (zsres->separate_stencil)
1297             sres = zsres->separate_stencil;
1298 
1299          if (zres) {
1300             bool clear = (batch->clear & PIPE_CLEAR_DEPTH);
1301             bool load = (batch->load & PIPE_CLEAR_DEPTH);
1302 
1303             zls_control.z_store_enable = (batch->resolve & PIPE_CLEAR_DEPTH);
1304             zls_control.z_load_enable = !clear && load;
1305 
1306             c->depth_buffer_load = agx_map_texture_gpu(zres, first_layer) +
1307                                    ail_get_level_offset_B(&zres->layout, level);
1308 
1309             c->depth_buffer_store = c->depth_buffer_load;
1310             c->depth_buffer_partial = c->depth_buffer_load;
1311 
1312             /* Main stride in pages */
1313             assert((zres->layout.depth_px == 1 ||
1314                     is_aligned(zres->layout.layer_stride_B, AIL_PAGESIZE)) &&
1315                    "Page aligned Z layers");
1316 
1317             unsigned stride_pages = zres->layout.layer_stride_B / AIL_PAGESIZE;
1318             c->depth_buffer_load_stride = ((stride_pages - 1) << 14) | 1;
1319             c->depth_buffer_store_stride = c->depth_buffer_load_stride;
1320             c->depth_buffer_partial_stride = c->depth_buffer_load_stride;
1321 
1322             assert(zres->layout.tiling != AIL_TILING_LINEAR && "must tile");
1323 
1324             if (ail_is_compressed(&zres->layout)) {
1325                c->depth_meta_buffer_load =
1326                   agx_map_texture_gpu(zres, 0) +
1327                   zres->layout.metadata_offset_B +
1328                   (first_layer * zres->layout.compression_layer_stride_B) +
1329                   zres->layout.level_offsets_compressed_B[level];
1330 
1331                /* Meta stride in cache lines */
1332                assert(is_aligned(zres->layout.compression_layer_stride_B,
1333                                  AIL_CACHELINE) &&
1334                       "Cacheline aligned Z meta layers");
1335                unsigned stride_lines =
1336                   zres->layout.compression_layer_stride_B / AIL_CACHELINE;
1337                c->depth_meta_buffer_load_stride = (stride_lines - 1) << 14;
1338 
1339                c->depth_meta_buffer_store = c->depth_meta_buffer_load;
1340                c->depth_meta_buffer_store_stride =
1341                   c->depth_meta_buffer_load_stride;
1342                c->depth_meta_buffer_partial = c->depth_meta_buffer_load;
1343                c->depth_meta_buffer_partial_stride =
1344                   c->depth_meta_buffer_load_stride;
1345 
1346                zls_control.z_compress_1 = true;
1347                zls_control.z_compress_2 = true;
1348             }
1349 
1350             if (zres->base.format == PIPE_FORMAT_Z16_UNORM) {
1351                const float scale = 0xffff;
1352                c->isp_bgobjdepth =
1353                   (uint16_t)(SATURATE(clear_depth) * scale + 0.5f);
1354                zls_control.z_format = AGX_ZLS_FORMAT_16;
1355                c->iogpu_unk_214 |= 0x40000;
1356             } else {
1357                c->isp_bgobjdepth = fui(clear_depth);
1358                zls_control.z_format = AGX_ZLS_FORMAT_32F;
1359             }
1360          }
1361 
1362          if (sres) {
1363             bool clear = (batch->clear & PIPE_CLEAR_STENCIL);
1364             bool load = (batch->load & PIPE_CLEAR_STENCIL);
1365 
1366             zls_control.s_store_enable = (batch->resolve & PIPE_CLEAR_STENCIL);
1367             zls_control.s_load_enable = !clear && load;
1368 
1369             c->stencil_buffer_load =
1370                agx_map_texture_gpu(sres, first_layer) +
1371                ail_get_level_offset_B(&sres->layout, level);
1372 
1373             c->stencil_buffer_store = c->stencil_buffer_load;
1374             c->stencil_buffer_partial = c->stencil_buffer_load;
1375 
1376             /* Main stride in pages */
1377             assert((sres->layout.depth_px == 1 ||
1378                     is_aligned(sres->layout.layer_stride_B, AIL_PAGESIZE)) &&
1379                    "Page aligned S layers");
1380             unsigned stride_pages = sres->layout.layer_stride_B / AIL_PAGESIZE;
1381             c->stencil_buffer_load_stride = ((stride_pages - 1) << 14) | 1;
1382             c->stencil_buffer_store_stride = c->stencil_buffer_load_stride;
1383             c->stencil_buffer_partial_stride = c->stencil_buffer_load_stride;
1384 
1385             if (ail_is_compressed(&sres->layout)) {
1386                c->stencil_meta_buffer_load =
1387                   agx_map_texture_gpu(sres, 0) +
1388                   sres->layout.metadata_offset_B +
1389                   (first_layer * sres->layout.compression_layer_stride_B) +
1390                   sres->layout.level_offsets_compressed_B[level];
1391 
1392                /* Meta stride in cache lines */
1393                assert(is_aligned(sres->layout.compression_layer_stride_B,
1394                                  AIL_CACHELINE) &&
1395                       "Cacheline aligned S meta layers");
1396                unsigned stride_lines =
1397                   sres->layout.compression_layer_stride_B / AIL_CACHELINE;
1398                c->stencil_meta_buffer_load_stride = (stride_lines - 1) << 14;
1399 
1400                c->stencil_meta_buffer_store = c->stencil_meta_buffer_load;
1401                c->stencil_meta_buffer_store_stride =
1402                   c->stencil_meta_buffer_load_stride;
1403                c->stencil_meta_buffer_partial = c->stencil_meta_buffer_load;
1404                c->stencil_meta_buffer_partial_stride =
1405                   c->stencil_meta_buffer_load_stride;
1406 
1407                zls_control.s_compress_1 = true;
1408                zls_control.s_compress_2 = true;
1409             }
1410 
1411             c->isp_bgobjvals |= clear_stencil;
1412          }
1413       }
1414    }
1415 
1416    if (clear_pipeline_textures)
1417       c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
1418    else
1419       c->flags |= ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES;
1420 
1421    if (zres && !(batch->clear & PIPE_CLEAR_DEPTH))
1422       c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
1423 
1424    if (sres && !(batch->clear & PIPE_CLEAR_STENCIL))
1425       c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
1426 
1427    if (dev->debug & AGX_DBG_NOCLUSTER)
1428       c->flags |= ASAHI_RENDER_NO_VERTEX_CLUSTERING;
1429 
1430    /* XXX is this for just MSAA+Z+S or MSAA+(Z|S)? */
1431    if (tib->nr_samples > 1 && framebuffer->zsbuf)
1432       c->flags |= ASAHI_RENDER_MSAA_ZS;
1433 
1434    memcpy(&c->load_pipeline_bind, &pipeline_clear.counts,
1435           sizeof(struct agx_counts_packed));
1436 
1437    memcpy(&c->store_pipeline_bind, &pipeline_store.counts,
1438           sizeof(struct agx_counts_packed));
1439 
1440    memcpy(&c->partial_reload_pipeline_bind, &pipeline_load.counts,
1441           sizeof(struct agx_counts_packed));
1442 
1443    memcpy(&c->partial_store_pipeline_bind, &pipeline_store.counts,
1444           sizeof(struct agx_counts_packed));
1445 
1446    /* XXX is this correct? */
1447    c->load_pipeline = pipeline_clear.usc | (framebuffer->nr_cbufs >= 4 ? 8 : 4);
1448    c->store_pipeline = pipeline_store.usc | 4;
1449    c->partial_reload_pipeline = pipeline_load.usc | 4;
1450    c->partial_store_pipeline = pipeline_store.usc | 4;
1451 
1452    c->utile_width = tib->tile_size.width;
1453    c->utile_height = tib->tile_size.height;
1454 
1455    c->samples = tib->nr_samples;
1456    c->layers = MAX2(util_framebuffer_get_num_layers(framebuffer), 1);
1457 
1458    c->ppp_multisamplectl = batch->uniforms.ppp_multisamplectl;
1459    c->sample_size = tib->sample_size_B;
1460 
1461    /* XXX OR 0x80 with eMRT? */
1462    c->tib_blocks = ALIGN_POT(agx_tilebuffer_total_size(tib), 2048) / 2048;
1463 
1464    float tan_60 = 1.732051f;
1465    c->merge_upper_x = fui(tan_60 / framebuffer->width);
1466    c->merge_upper_y = fui(tan_60 / framebuffer->height);
1467 
1468    c->scissor_array = scissor_ptr;
1469    c->depth_bias_array = depth_bias_ptr;
1470    c->visibility_result_buffer = visibility_result_ptr;
1471 
1472    c->vertex_sampler_array =
1473       batch->sampler_heap.bo ? batch->sampler_heap.bo->va->addr : 0;
1474    c->vertex_sampler_count = batch->sampler_heap.count;
1475    c->vertex_sampler_max = batch->sampler_heap.count + 1;
1476 
1477    /* In the future we could split the heaps if useful */
1478    c->fragment_sampler_array = c->vertex_sampler_array;
1479    c->fragment_sampler_count = c->vertex_sampler_count;
1480    c->fragment_sampler_max = c->vertex_sampler_max;
1481 
1482    /* If a tile is empty, we do not want to process it, as the redundant
1483     * roundtrip of memory-->tilebuffer-->memory wastes a tremendous amount of
1484     * memory bandwidth. Any draw marks a tile as non-empty, so we only need to
1485     * process empty tiles if the background+EOT programs have a side effect.
1486     * This is the case exactly when there is an attachment we are clearing (some
1487     * attachment A in clear and in resolve <==> non-empty intersection).
1488     *
1489     * This case matters a LOT for performance in workloads that split batches.
1490     */
1491    if (batch->clear & batch->resolve)
1492       c->flags |= ASAHI_RENDER_PROCESS_EMPTY_TILES;
1493 
1494    for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) {
1495       if (!framebuffer->cbufs[i])
1496          continue;
1497 
1498       asahi_add_attachment(att, agx_resource(framebuffer->cbufs[i]->texture),
1499                            framebuffer->cbufs[i]);
1500    }
1501 
1502    if (framebuffer->zsbuf) {
1503       struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture);
1504 
1505       asahi_add_attachment(att, rsrc, framebuffer->zsbuf);
1506 
1507       if (rsrc->separate_stencil) {
1508          asahi_add_attachment(att, rsrc->separate_stencil, framebuffer->zsbuf);
1509       }
1510    }
1511 
1512    c->fragment_attachments = (uint64_t)(uintptr_t)&att->list[0];
1513    c->fragment_attachment_count = att->count;
1514 
1515    if (batch->vs_scratch) {
1516       c->flags |= ASAHI_RENDER_VERTEX_SPILLS;
1517       c->vertex_helper_arg = batch->ctx->scratch_vs.buf->va->addr;
1518       c->vertex_helper_cfg = batch->vs_preamble_scratch << 16;
1519       c->vertex_helper_program = agx_helper_program(&batch->ctx->bg_eot);
1520    }
1521    if (batch->fs_scratch) {
1522       c->fragment_helper_arg = batch->ctx->scratch_fs.buf->va->addr;
1523       c->fragment_helper_cfg = batch->fs_preamble_scratch << 16;
1524       c->fragment_helper_program = agx_helper_program(&batch->ctx->bg_eot);
1525    }
1526 }
1527 
1528 /*
1529  * context
1530  */
1531 static void
agx_flush(struct pipe_context * pctx,struct pipe_fence_handle ** fence,unsigned flags)1532 agx_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
1533           unsigned flags)
1534 {
1535    struct agx_context *ctx = agx_context(pctx);
1536    struct agx_screen *screen = agx_screen(ctx->base.screen);
1537 
1538    agx_flush_all(ctx, "Gallium flush");
1539 
1540    if (!(flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC)) &&
1541        ctx->flush_last_seqid) {
1542       /* Ensure other contexts in this screen serialize against the last
1543        * submission (and all prior submissions).
1544        */
1545       simple_mtx_lock(&screen->flush_seqid_lock);
1546 
1547       uint64_t val = p_atomic_read(&screen->flush_wait_seqid);
1548       if (val < ctx->flush_last_seqid)
1549          p_atomic_set(&screen->flush_wait_seqid, ctx->flush_last_seqid);
1550 
1551       /* Note: it's possible for the max() logic above to be "wrong" due
1552        * to a race in agx_batch_submit causing out-of-order timeline point
1553        * updates, making the larger value not actually a later submission.
1554        * However, see the comment in agx_batch.c for why this doesn't matter
1555        * because this corner case is handled conservatively in the kernel.
1556        */
1557 
1558       simple_mtx_unlock(&screen->flush_seqid_lock);
1559 
1560       /* Optimization: Avoid serializing against our own queue by
1561        * recording the last seen foreign seqid when flushing, and our own
1562        * flush seqid. If we then try to sync against our own seqid, we'll
1563        * instead sync against the last possible foreign one. This is *not*
1564        * the `val` we got above, because another context might flush with a
1565        * seqid between `val` and `flush_last_seqid` (which would not update
1566        * `flush_wait_seqid` per the logic above). This is somewhat
1567        * conservative: it means that if *any* foreign context flushes, then
1568        * on next flush of this context we will start waiting for *all*
1569        * prior submits on *all* contexts (even if unflushed) at that point,
1570        * including any local submissions prior to the latest one. That's
1571        * probably fine, it creates a one-time "wait for the second-previous
1572        * batch" wait on this queue but that still allows for at least
1573        * the previous batch to pipeline on the GPU and it's one-time
1574        * until another foreign flush happens. Phew.
1575        */
1576       if (val && val != ctx->flush_my_seqid)
1577          ctx->flush_other_seqid = ctx->flush_last_seqid - 1;
1578 
1579       ctx->flush_my_seqid = ctx->flush_last_seqid;
1580    }
1581 
1582    /* At this point all pending work has been submitted. Since jobs are
1583     * started and completed sequentially from a UAPI perspective, and since
1584     * we submit all jobs with compute+render barriers on the prior job,
1585     * waiting on the last submitted job is sufficient to guarantee completion
1586     * of all GPU work thus far, so we can create a fence out of the latest
1587     * syncobj.
1588     *
1589     * See this page for more info on how the GPU/UAPI queueing works:
1590     * https://github.com/AsahiLinux/docs/wiki/SW:AGX-driver-notes#queues
1591     */
1592 
1593    if (fence) {
1594       struct pipe_fence_handle *f = agx_fence_create(ctx);
1595       pctx->screen->fence_reference(pctx->screen, fence, NULL);
1596       *fence = f;
1597    }
1598 }
1599 
1600 static void
agx_flush_compute(struct agx_context * ctx,struct agx_batch * batch,struct drm_asahi_cmd_compute * cmdbuf)1601 agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch,
1602                   struct drm_asahi_cmd_compute *cmdbuf)
1603 {
1604    struct agx_device *dev = agx_device(ctx->base.screen);
1605 
1606    /* Finalize the encoder */
1607    agx_pack(batch->cdm.current, CDM_STREAM_TERMINATE, _)
1608       ;
1609 
1610    agx_batch_add_bo(batch, batch->cdm.bo);
1611 
1612    if (batch->cs_scratch)
1613       agx_batch_add_bo(batch, ctx->scratch_cs.buf);
1614 
1615    unsigned cmdbuf_id = agx_get_global_id(dev);
1616    unsigned encoder_id = agx_get_global_id(dev);
1617 
1618    *cmdbuf = (struct drm_asahi_cmd_compute){
1619       .flags = 0,
1620       .encoder_ptr = batch->cdm.bo->va->addr,
1621       .encoder_end =
1622          batch->cdm.bo->va->addr +
1623          (batch->cdm.current - (uint8_t *)agx_bo_map(batch->cdm.bo)),
1624       .usc_base = dev->shader_base,
1625       .helper_arg = 0,
1626       .helper_cfg = 0,
1627       .helper_program = 0,
1628       .iogpu_unk_40 = 0,
1629       .sampler_array =
1630          batch->sampler_heap.bo ? batch->sampler_heap.bo->va->addr : 0,
1631       .sampler_count = batch->sampler_heap.count,
1632       .sampler_max = batch->sampler_heap.count + 1,
1633       .encoder_id = encoder_id,
1634       .cmd_id = cmdbuf_id,
1635       .unk_mask = 0xffffffff,
1636    };
1637 
1638    if (batch->cs_scratch) {
1639       // The commented out lines *may* be related to subgroup-level preemption,
1640       // which we can't support without implementing threadgroup memory in the
1641       // helper. Disable them for now.
1642 
1643       // cmdbuf->iogpu_unk_40 = 0x1c;
1644       cmdbuf->helper_arg = ctx->scratch_cs.buf->va->addr;
1645       cmdbuf->helper_cfg = batch->cs_preamble_scratch << 16;
1646       // cmdbuf->helper_cfg |= 0x40;
1647       cmdbuf->helper_program = agx_helper_program(&batch->ctx->bg_eot);
1648    }
1649 }
1650 
1651 static void
agx_flush_render(struct agx_context * ctx,struct agx_batch * batch,struct drm_asahi_cmd_render * cmdbuf,struct attachments * att)1652 agx_flush_render(struct agx_context *ctx, struct agx_batch *batch,
1653                  struct drm_asahi_cmd_render *cmdbuf, struct attachments *att)
1654 {
1655    struct agx_device *dev = agx_device(ctx->base.screen);
1656 
1657    if (batch->vs_scratch)
1658       agx_batch_add_bo(batch, ctx->scratch_vs.buf);
1659    if (batch->fs_scratch)
1660       agx_batch_add_bo(batch, ctx->scratch_fs.buf);
1661 
1662    assert(batch->initialized);
1663 
1664    /* Finalize the encoder */
1665    uint8_t stop[5 + 64] = {0x00, 0x00, 0x00, 0xc0, 0x00};
1666    memcpy(batch->vdm.current, stop, sizeof(stop));
1667 
1668    struct asahi_bg_eot pipeline_background =
1669       agx_build_bg_eot(batch, false, false);
1670 
1671    struct asahi_bg_eot pipeline_background_partial =
1672       agx_build_bg_eot(batch, false, true);
1673 
1674    struct asahi_bg_eot pipeline_store = agx_build_bg_eot(batch, true, false);
1675 
1676    bool clear_pipeline_textures =
1677       agx_tilebuffer_spills(&batch->tilebuffer_layout);
1678 
1679    for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
1680       struct pipe_surface *surf = batch->key.cbufs[i];
1681 
1682       clear_pipeline_textures |=
1683          surf && surf->texture && !(batch->clear & (PIPE_CLEAR_COLOR0 << i));
1684    }
1685 
1686    /* Scissor and depth bias arrays are staged to dynamic arrays on the CPU. At
1687     * submit time, they're done growing and are uploaded to GPU memory attached
1688     * to the batch.
1689     */
1690    uint64_t scissor = agx_pool_upload_aligned(&batch->pool, batch->scissor.data,
1691                                               batch->scissor.size, 64);
1692    uint64_t zbias = agx_pool_upload_aligned(
1693       &batch->pool, batch->depth_bias.data, batch->depth_bias.size, 64);
1694 
1695    /* BO list for a given batch consists of:
1696     *  - BOs for the batch's pools
1697     *  - BOs for the encoder
1698     *  - BO for internal shaders
1699     *  - BOs added to the batch explicitly
1700     */
1701    agx_batch_add_bo(batch, batch->vdm.bo);
1702 
1703    unsigned cmd_ta_id = agx_get_global_id(dev);
1704    unsigned cmd_3d_id = agx_get_global_id(dev);
1705    unsigned encoder_id = agx_get_global_id(dev);
1706 
1707    agx_cmdbuf(dev, cmdbuf, att, &batch->pool, batch, &batch->key,
1708               batch->vdm.bo->va->addr, encoder_id, cmd_ta_id, cmd_3d_id,
1709               scissor, zbias, agx_get_occlusion_heap(batch),
1710               pipeline_background, pipeline_background_partial, pipeline_store,
1711               clear_pipeline_textures, batch->clear_depth, batch->clear_stencil,
1712               &batch->tilebuffer_layout);
1713 }
1714 
1715 void
agx_flush_batch(struct agx_context * ctx,struct agx_batch * batch)1716 agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
1717 {
1718    assert(agx_batch_is_active(batch));
1719    assert(!agx_batch_is_submitted(batch));
1720 
1721    struct attachments att = {.count = 0};
1722    struct drm_asahi_cmd_render render;
1723    struct drm_asahi_cmd_compute compute;
1724    bool has_vdm = false, has_cdm = false;
1725 
1726    if (batch->cdm.bo) {
1727       agx_flush_compute(ctx, batch, &compute);
1728       has_cdm = true;
1729    }
1730 
1731    if (batch->vdm.bo && (batch->clear || batch->initialized)) {
1732       agx_flush_render(ctx, batch, &render, &att);
1733       has_vdm = true;
1734    }
1735 
1736    if (!has_cdm && !has_vdm) {
1737       agx_batch_reset(ctx, batch);
1738       return;
1739    }
1740 
1741    agx_batch_submit(ctx, batch, has_cdm ? &compute : NULL,
1742                     has_vdm ? &render : NULL);
1743 }
1744 
1745 static void
agx_destroy_context(struct pipe_context * pctx)1746 agx_destroy_context(struct pipe_context *pctx)
1747 {
1748    struct agx_device *dev = agx_device(pctx->screen);
1749    struct agx_context *ctx = agx_context(pctx);
1750    struct agx_screen *screen = agx_screen(pctx->screen);
1751 
1752    /* Batch state needs to be freed on completion, and we don't want to yank
1753     * buffers out from in-progress GPU jobs to avoid faults, so just wait until
1754     * everything in progress is actually done on context destroy. This will
1755     * ensure everything is cleaned up properly.
1756     */
1757    agx_sync_all(ctx, "destroy context");
1758 
1759    if (pctx->stream_uploader)
1760       u_upload_destroy(pctx->stream_uploader);
1761 
1762    if (ctx->blitter)
1763       util_blitter_destroy(ctx->blitter);
1764 
1765    util_unreference_framebuffer_state(&ctx->framebuffer);
1766 
1767    agx_bg_eot_cleanup(&ctx->bg_eot);
1768    agx_destroy_meta_shaders(ctx);
1769 
1770    agx_bo_unreference(dev, ctx->result_buf);
1771 
1772    /* Lock around the syncobj destruction, to avoid racing
1773     * command submission in another context.
1774     **/
1775    u_rwlock_wrlock(&screen->destroy_lock);
1776 
1777    drmSyncobjDestroy(dev->fd, ctx->in_sync_obj);
1778    drmSyncobjDestroy(dev->fd, ctx->dummy_syncobj);
1779    if (ctx->in_sync_fd != -1)
1780       close(ctx->in_sync_fd);
1781 
1782    for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
1783       if (ctx->batches.slots[i].syncobj)
1784          drmSyncobjDestroy(dev->fd, ctx->batches.slots[i].syncobj);
1785    }
1786 
1787    u_rwlock_wrunlock(&screen->destroy_lock);
1788 
1789    pipe_resource_reference(&ctx->heap, NULL);
1790 
1791    agx_scratch_fini(&ctx->scratch_vs);
1792    agx_scratch_fini(&ctx->scratch_fs);
1793    agx_scratch_fini(&ctx->scratch_cs);
1794 
1795    agx_destroy_command_queue(dev, ctx->queue_id);
1796 
1797    ralloc_free(ctx);
1798 }
1799 
1800 static void
agx_invalidate_resource(struct pipe_context * pctx,struct pipe_resource * resource)1801 agx_invalidate_resource(struct pipe_context *pctx,
1802                         struct pipe_resource *resource)
1803 {
1804    struct agx_context *ctx = agx_context(pctx);
1805    struct agx_batch *batch = agx_get_batch(ctx);
1806 
1807    /* Handle the glInvalidateFramebuffer case */
1808    if (batch->key.zsbuf && batch->key.zsbuf->texture == resource)
1809       batch->resolve &= ~PIPE_CLEAR_DEPTHSTENCIL;
1810 
1811    for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
1812       struct pipe_surface *surf = batch->key.cbufs[i];
1813 
1814       if (surf && surf->texture == resource)
1815          batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
1816    }
1817 }
1818 
1819 static enum pipe_reset_status
asahi_get_device_reset_status(struct pipe_context * pipe)1820 asahi_get_device_reset_status(struct pipe_context *pipe)
1821 {
1822    struct agx_context *ctx = agx_context(pipe);
1823 
1824    return ctx->any_faults ? PIPE_GUILTY_CONTEXT_RESET : PIPE_NO_RESET;
1825 }
1826 
1827 static struct pipe_context *
agx_create_context(struct pipe_screen * screen,void * priv,unsigned flags)1828 agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
1829 {
1830    struct agx_context *ctx = rzalloc(NULL, struct agx_context);
1831    struct pipe_context *pctx = &ctx->base;
1832    int ret;
1833 
1834    if (!ctx)
1835       return NULL;
1836 
1837    pctx->screen = screen;
1838    pctx->priv = priv;
1839 
1840    util_dynarray_init(&ctx->writer, ctx);
1841    util_dynarray_init(&ctx->global_buffers, ctx);
1842 
1843    pctx->stream_uploader = u_upload_create_default(pctx);
1844    if (!pctx->stream_uploader) {
1845       FREE(pctx);
1846       return NULL;
1847    }
1848    pctx->const_uploader = pctx->stream_uploader;
1849 
1850    uint32_t priority = 2;
1851    if (flags & PIPE_CONTEXT_PRIORITY_LOW)
1852       priority = 3;
1853    else if (flags & PIPE_CONTEXT_PRIORITY_MEDIUM)
1854       priority = 2;
1855    else if (flags & PIPE_CONTEXT_PRIORITY_HIGH)
1856       priority = 1;
1857    else if (flags & PIPE_CONTEXT_PRIORITY_REALTIME)
1858       priority = 0;
1859 
1860    ctx->queue_id = agx_create_command_queue(agx_device(screen),
1861                                             DRM_ASAHI_QUEUE_CAP_RENDER |
1862                                                DRM_ASAHI_QUEUE_CAP_BLIT |
1863                                                DRM_ASAHI_QUEUE_CAP_COMPUTE,
1864                                             priority);
1865 
1866    pctx->destroy = agx_destroy_context;
1867    pctx->flush = agx_flush;
1868    pctx->clear = agx_clear;
1869    pctx->resource_copy_region = agx_resource_copy_region;
1870    pctx->blit = agx_blit;
1871    pctx->flush_resource = agx_flush_resource;
1872 
1873    pctx->buffer_map = u_transfer_helper_transfer_map;
1874    pctx->buffer_unmap = u_transfer_helper_transfer_unmap;
1875    pctx->texture_map = u_transfer_helper_transfer_map;
1876    pctx->texture_unmap = u_transfer_helper_transfer_unmap;
1877    pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
1878 
1879    pctx->buffer_subdata = u_default_buffer_subdata;
1880    pctx->clear_buffer = u_default_clear_buffer;
1881    pctx->texture_subdata = u_default_texture_subdata;
1882    pctx->set_debug_callback = u_default_set_debug_callback;
1883    pctx->get_sample_position = u_default_get_sample_position;
1884    pctx->invalidate_resource = agx_invalidate_resource;
1885    pctx->memory_barrier = agx_memory_barrier;
1886 
1887    pctx->create_fence_fd = agx_create_fence_fd;
1888    pctx->fence_server_sync = agx_fence_server_sync;
1889 
1890    pctx->get_device_reset_status = asahi_get_device_reset_status;
1891 
1892    agx_init_state_functions(pctx);
1893    agx_init_query_functions(pctx);
1894    agx_init_streamout_functions(pctx);
1895 
1896    agx_bg_eot_init(&ctx->bg_eot, agx_device(screen));
1897    agx_init_meta_shaders(ctx);
1898 
1899    ctx->blitter = util_blitter_create(pctx);
1900    ctx->compute_blitter.blit_cs = asahi_blit_key_table_create(ctx);
1901 
1902    ctx->result_buf =
1903       agx_bo_create(agx_device(screen),
1904                     (2 * sizeof(union agx_batch_result)) * AGX_MAX_BATCHES, 0,
1905                     AGX_BO_WRITEBACK, "Batch result buffer");
1906    assert(ctx->result_buf);
1907 
1908    /* Sync object/FD used for NATIVE_FENCE_FD. */
1909    ctx->in_sync_fd = -1;
1910    ret = drmSyncobjCreate(agx_device(screen)->fd, 0, &ctx->in_sync_obj);
1911    assert(!ret);
1912 
1913    /* Dummy sync object used before any work has been submitted. */
1914    ret = drmSyncobjCreate(agx_device(screen)->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
1915                           &ctx->dummy_syncobj);
1916    assert(!ret);
1917    ctx->syncobj = ctx->dummy_syncobj;
1918 
1919    /* By default all samples are enabled */
1920    ctx->sample_mask = ~0;
1921 
1922    ctx->support_lod_bias = !(flags & PIPE_CONTEXT_NO_LOD_BIAS);
1923    ctx->robust = (flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS);
1924 
1925    agx_scratch_init(agx_device(screen), &ctx->scratch_vs);
1926    agx_scratch_init(agx_device(screen), &ctx->scratch_fs);
1927    agx_scratch_init(agx_device(screen), &ctx->scratch_cs);
1928 
1929    return pctx;
1930 }
1931 
1932 static const char *
agx_get_vendor(struct pipe_screen * pscreen)1933 agx_get_vendor(struct pipe_screen *pscreen)
1934 {
1935    return "Mesa";
1936 }
1937 
1938 static const char *
agx_get_device_vendor(struct pipe_screen * pscreen)1939 agx_get_device_vendor(struct pipe_screen *pscreen)
1940 {
1941    return "Apple";
1942 }
1943 
1944 static const char *
agx_get_name(struct pipe_screen * pscreen)1945 agx_get_name(struct pipe_screen *pscreen)
1946 {
1947    struct agx_device *dev = agx_device(pscreen);
1948 
1949    return dev->name;
1950 }
1951 
1952 static void
agx_query_memory_info(struct pipe_screen * pscreen,struct pipe_memory_info * info)1953 agx_query_memory_info(struct pipe_screen *pscreen,
1954                       struct pipe_memory_info *info)
1955 {
1956    uint64_t mem_B = 0;
1957    os_get_total_physical_memory(&mem_B);
1958 
1959    uint64_t mem_kB = mem_B / 1024;
1960 
1961    *info = (struct pipe_memory_info){
1962       .total_device_memory = mem_kB,
1963       .avail_device_memory = mem_kB,
1964    };
1965 }
1966 
1967 static int
agx_get_shader_param(struct pipe_screen * pscreen,enum pipe_shader_type shader,enum pipe_shader_cap param)1968 agx_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader,
1969                      enum pipe_shader_cap param)
1970 {
1971    bool is_no16 = agx_device(pscreen)->debug & AGX_DBG_NO16;
1972 
1973    switch (shader) {
1974    case PIPE_SHADER_VERTEX:
1975    case PIPE_SHADER_FRAGMENT:
1976    case PIPE_SHADER_COMPUTE:
1977    case PIPE_SHADER_GEOMETRY:
1978    case PIPE_SHADER_TESS_CTRL:
1979    case PIPE_SHADER_TESS_EVAL:
1980       break;
1981    default:
1982       return false;
1983    }
1984 
1985    /* this is probably not totally correct.. but it's a start: */
1986    switch (param) {
1987    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
1988    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
1989    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
1990    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
1991       return 16384;
1992 
1993    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
1994       return 1024;
1995 
1996    case PIPE_SHADER_CAP_MAX_INPUTS:
1997       return shader == PIPE_SHADER_VERTEX ? 16 : 32;
1998 
1999    case PIPE_SHADER_CAP_MAX_OUTPUTS:
2000       /* For vertex, the spec min/max is 16. We need more to handle dmat3
2001        * correctly, though. The full 32 is undesirable since it would require
2002        * shenanigans to handle.
2003        */
2004       return shader == PIPE_SHADER_FRAGMENT ? 8
2005              : shader == PIPE_SHADER_VERTEX ? 24
2006                                             : 32;
2007 
2008    case PIPE_SHADER_CAP_MAX_TEMPS:
2009       return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
2010 
2011    case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
2012       return 16 * 1024 * sizeof(float);
2013 
2014    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
2015       return 16;
2016 
2017    case PIPE_SHADER_CAP_CONT_SUPPORTED:
2018       return 1;
2019 
2020    case PIPE_SHADER_CAP_SUBROUTINES:
2021    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
2022       return 0;
2023 
2024    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
2025    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
2026    case PIPE_SHADER_CAP_INTEGERS:
2027       return true;
2028 
2029    case PIPE_SHADER_CAP_FP16:
2030    case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
2031    case PIPE_SHADER_CAP_FP16_DERIVATIVES:
2032       return !is_no16;
2033    case PIPE_SHADER_CAP_INT16:
2034       /* GLSL compiler is broken. Flip this on when Panfrost does. */
2035       return false;
2036    case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
2037       /* This cap is broken, see 9a38dab2d18 ("zink: disable
2038        * PIPE_SHADER_CAP_FP16_CONST_BUFFERS") */
2039       return false;
2040 
2041    case PIPE_SHADER_CAP_INT64_ATOMICS:
2042    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
2043       return 0;
2044 
2045    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
2046       /* TODO: Enable when fully baked */
2047       if (strcmp(util_get_process_name(), "blender") == 0)
2048          return PIPE_MAX_SAMPLERS;
2049       else if (strcmp(util_get_process_name(), "run") == 0)
2050          return PIPE_MAX_SAMPLERS;
2051       else if (strcasestr(util_get_process_name(), "ryujinx") != NULL)
2052          return PIPE_MAX_SAMPLERS;
2053       else
2054          return 16;
2055 
2056    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
2057       return PIPE_MAX_SHADER_SAMPLER_VIEWS;
2058 
2059    case PIPE_SHADER_CAP_SUPPORTED_IRS:
2060       return (1 << PIPE_SHADER_IR_NIR);
2061 
2062    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
2063       return PIPE_MAX_SHADER_BUFFERS;
2064 
2065    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
2066       return PIPE_MAX_SHADER_IMAGES;
2067 
2068    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
2069    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
2070       return 0;
2071 
2072    default:
2073       /* Other params are unknown */
2074       return 0;
2075    }
2076 
2077    return 0;
2078 }
2079 
2080 static int
agx_get_compute_param(struct pipe_screen * pscreen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * ret)2081 agx_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
2082                       enum pipe_compute_cap param, void *ret)
2083 {
2084    struct agx_device *dev = agx_device(pscreen);
2085 
2086 #define RET(x)                                                                 \
2087    do {                                                                        \
2088       if (ret)                                                                 \
2089          memcpy(ret, x, sizeof(x));                                            \
2090       return sizeof(x);                                                        \
2091    } while (0)
2092 
2093    switch (param) {
2094    case PIPE_COMPUTE_CAP_ADDRESS_BITS:
2095       RET((uint32_t[]){64});
2096 
2097    case PIPE_COMPUTE_CAP_IR_TARGET:
2098       if (ret)
2099          sprintf(ret, "agx");
2100       return strlen("agx") * sizeof(char);
2101 
2102    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
2103       RET((uint64_t[]){3});
2104 
2105    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
2106       RET(((uint64_t[]){65535, 65535, 65535}));
2107 
2108    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
2109       RET(((uint64_t[]){1024, 1024, 1024}));
2110 
2111    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
2112       RET((uint64_t[]){1024});
2113 
2114    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
2115    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: {
2116       uint64_t system_memory;
2117 
2118       if (!os_get_total_physical_memory(&system_memory))
2119          return 0;
2120 
2121       RET((uint64_t[]){system_memory});
2122    }
2123 
2124    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
2125       RET((uint64_t[]){32768});
2126 
2127    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
2128    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
2129       RET((uint64_t[]){4096});
2130 
2131    case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
2132       RET((uint32_t[]){dev->params.max_frequency_khz / 1000});
2133 
2134    case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
2135       RET((uint32_t[]){agx_get_num_cores(dev)});
2136 
2137    case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
2138       RET((uint32_t[]){1});
2139 
2140    case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
2141       RET((uint32_t[]){32});
2142 
2143    case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
2144       RET((uint32_t[]){0 /* TODO */});
2145 
2146    case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
2147       RET((uint64_t[]){1024}); // TODO
2148    }
2149 
2150    return 0;
2151 }
2152 
2153 static void
agx_init_screen_caps(struct pipe_screen * pscreen)2154 agx_init_screen_caps(struct pipe_screen *pscreen)
2155 {
2156    struct pipe_caps *caps = (struct pipe_caps *)&pscreen->caps;
2157 
2158    u_init_pipe_screen_caps(pscreen, 1);
2159 
2160    caps->clip_halfz = true;
2161    caps->npot_textures = true;
2162    caps->shader_stencil_export = true;
2163    caps->mixed_color_depth_bits = true;
2164    caps->fragment_shader_texture_lod = true;
2165    caps->vertex_color_unclamped = true;
2166    caps->depth_clip_disable = true;
2167    caps->mixed_framebuffer_sizes = true;
2168    caps->fragment_shader_derivatives = true;
2169    caps->framebuffer_no_attachment = true;
2170    caps->shader_pack_half_float = true;
2171    caps->fs_fine_derivative = true;
2172    caps->glsl_tess_levels_as_inputs = true;
2173    caps->doubles = true;
2174 
2175    caps->max_render_targets =
2176    caps->fbfetch = 8;
2177    caps->fbfetch_coherent = true;
2178 
2179    caps->max_dual_source_render_targets = 1;
2180 
2181    caps->occlusion_query = true;
2182    caps->query_timestamp = true;
2183    caps->query_time_elapsed = true;
2184    caps->query_so_overflow = true;
2185    caps->query_memory_info = true;
2186    caps->primitive_restart = true;
2187    caps->primitive_restart_fixed_index = true;
2188    caps->anisotropic_filter = true;
2189    caps->native_fence_fd = true;
2190    caps->texture_barrier = true;
2191 
2192    /* Timer resolution is the length of a single tick in nanos */
2193    caps->timer_resolution = agx_gpu_time_to_ns(agx_device(pscreen), 1);
2194 
2195    caps->sampler_view_target = true;
2196    caps->texture_swizzle = true;
2197    caps->blend_equation_separate = true;
2198    caps->indep_blend_enable = true;
2199    caps->indep_blend_func = true;
2200    caps->uma = true;
2201    caps->texture_float_linear = true;
2202    caps->texture_half_float_linear = true;
2203    caps->texture_mirror_clamp_to_edge = true;
2204    caps->shader_array_components = true;
2205    caps->packed_uniforms = true;
2206    caps->quads_follow_provoking_vertex_convention = true;
2207    caps->vs_instanceid = true;
2208    caps->vertex_element_instance_divisor = true;
2209    caps->conditional_render = true;
2210    caps->conditional_render_inverted = true;
2211    caps->seamless_cube_map = true;
2212    caps->load_constbuf = true;
2213    caps->seamless_cube_map_per_texture = true;
2214    caps->texture_buffer_objects = true;
2215    caps->null_textures = true;
2216    caps->texture_multisample = true;
2217    caps->image_load_formatted = true;
2218    caps->image_store_formatted = true;
2219    caps->compute = true;
2220    caps->int64 = true;
2221    caps->sample_shading = true;
2222    caps->start_instance = true;
2223    caps->draw_parameters = true;
2224    caps->multi_draw_indirect = true;
2225    caps->multi_draw_indirect_params = true;
2226    caps->cull_distance = true;
2227    caps->gl_spirv = true;
2228    caps->polygon_offset_clamp = true;
2229 
2230    /* TODO: MSRTT */
2231    caps->surface_sample_count = false;
2232 
2233    caps->cube_map_array = true;
2234 
2235    caps->copy_between_compressed_and_plain_formats = true;
2236 
2237    caps->max_stream_output_buffers = PIPE_MAX_SO_BUFFERS;
2238 
2239    caps->max_stream_output_separate_components =
2240    caps->max_stream_output_interleaved_components = PIPE_MAX_SO_OUTPUTS;
2241 
2242    caps->stream_output_pause_resume = true;
2243    caps->stream_output_interleave_buffers = true;
2244 
2245    caps->max_texture_array_layers = 2048;
2246 
2247    caps->glsl_feature_level =
2248    caps->glsl_feature_level_compatibility = 460;
2249    caps->essl_feature_level = 320;
2250 
2251    /* Settings from iris, may need tuning */
2252    caps->max_vertex_streams = 4;
2253    caps->max_geometry_output_vertices = 256;
2254    caps->max_geometry_total_output_components = 1024;
2255    caps->max_gs_invocations = 32;
2256    caps->constant_buffer_offset_alignment = 16;
2257 
2258    caps->max_texel_buffer_elements = AGX_TEXTURE_BUFFER_MAX_SIZE;
2259 
2260    caps->texture_buffer_offset_alignment = 64;
2261 
2262    caps->vertex_input_alignment = PIPE_VERTEX_INPUT_ALIGNMENT_ELEMENT;
2263 
2264    caps->query_pipeline_statistics_single = true;
2265 
2266    caps->max_texture_2d_size = 16384;
2267    caps->max_texture_cube_levels = 15; /* Max 16384x16384 */
2268    caps->max_texture_3d_levels = 12; /* Max 2048x2048x2048 */
2269 
2270    caps->fs_coord_origin_upper_left = true;
2271    caps->fs_coord_pixel_center_integer = true;
2272    caps->tgsi_texcoord = true;
2273    caps->fs_face_is_integer_sysval = true;
2274    caps->fs_position_is_sysval = true;
2275 
2276    caps->fs_coord_origin_lower_left = false;
2277    caps->fs_coord_pixel_center_half_integer = false;
2278    caps->fs_point_is_sysval = false;
2279 
2280    caps->max_vertex_element_src_offset = 0xffff;
2281 
2282    caps->texture_transfer_modes = PIPE_TEXTURE_TRANSFER_BLIT;
2283 
2284    caps->endianness = PIPE_ENDIAN_LITTLE;
2285 
2286    caps->shader_group_vote = true;
2287    caps->shader_ballot = true;
2288 
2289    caps->max_texture_gather_components = 4;
2290    caps->min_texture_gather_offset = -8;
2291    caps->max_texture_gather_offset = 7;
2292    caps->draw_indirect = true;
2293    caps->texture_query_samples = true;
2294    caps->texture_query_lod = true;
2295    caps->texture_shadow_lod = true;
2296 
2297    caps->max_viewports = AGX_MAX_VIEWPORTS;
2298 
2299    uint64_t system_memory;
2300    caps->video_memory = os_get_total_physical_memory(&system_memory) ?
2301       (system_memory >> 20) : 0;
2302 
2303    caps->device_reset_status_query = true;
2304    caps->robust_buffer_access_behavior = true;
2305 
2306    caps->shader_buffer_offset_alignment = 4;
2307 
2308    caps->max_shader_patch_varyings = 32;
2309    /* TODO: Probably should bump to 32? */
2310    caps->max_varyings = 16;
2311 
2312    caps->flatshade = false;
2313    caps->two_sided_color = false;
2314    caps->alpha_test = false;
2315    caps->clip_planes = 0;
2316    caps->nir_images_as_deref = false;
2317 
2318    caps->query_buffer_object = true;
2319 
2320    caps->texture_border_color_quirk = PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO;
2321 
2322    caps->supported_prim_modes =
2323    caps->supported_prim_modes_with_restart =
2324       BITFIELD_BIT(MESA_PRIM_POINTS) | BITFIELD_BIT(MESA_PRIM_LINES) |
2325       BITFIELD_BIT(MESA_PRIM_LINE_STRIP) |
2326       BITFIELD_BIT(MESA_PRIM_LINE_LOOP) |
2327       BITFIELD_BIT(MESA_PRIM_TRIANGLES) |
2328       BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP) |
2329       BITFIELD_BIT(MESA_PRIM_TRIANGLE_FAN) |
2330       BITFIELD_BIT(MESA_PRIM_LINES_ADJACENCY) |
2331       BITFIELD_BIT(MESA_PRIM_LINE_STRIP_ADJACENCY) |
2332       BITFIELD_BIT(MESA_PRIM_TRIANGLES_ADJACENCY) |
2333       BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP_ADJACENCY) |
2334       BITFIELD_BIT(MESA_PRIM_PATCHES);
2335 
2336    caps->map_unsynchronized_thread_safe = true;
2337 
2338    caps->vs_layer_viewport = true;
2339    caps->tes_layer_viewport = true;
2340 
2341    caps->context_priority_mask =
2342       PIPE_CONTEXT_PRIORITY_LOW | PIPE_CONTEXT_PRIORITY_MEDIUM |
2343       PIPE_CONTEXT_PRIORITY_HIGH | PIPE_CONTEXT_PRIORITY_REALTIME;
2344 
2345    caps->min_line_width =
2346    caps->min_line_width_aa =
2347    caps->min_point_size =
2348    caps->min_point_size_aa = 1;
2349 
2350    caps->point_size_granularity =
2351    caps->line_width_granularity = 0.1;
2352 
2353    caps->max_line_width =
2354    caps->max_line_width_aa = 16.0; /* Off-by-one fixed point 4:4 encoding */
2355 
2356    caps->max_point_size =
2357    caps->max_point_size_aa = 511.95f;
2358 
2359    caps->max_texture_anisotropy = 16.0;
2360 
2361    caps->max_texture_lod_bias = 16.0; /* arbitrary */
2362 }
2363 
2364 static bool
agx_is_format_supported(struct pipe_screen * pscreen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned storage_sample_count,unsigned usage)2365 agx_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format,
2366                         enum pipe_texture_target target, unsigned sample_count,
2367                         unsigned storage_sample_count, unsigned usage)
2368 {
2369    assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D ||
2370           target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D ||
2371           target == PIPE_TEXTURE_2D_ARRAY || target == PIPE_TEXTURE_RECT ||
2372           target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE ||
2373           target == PIPE_TEXTURE_CUBE_ARRAY);
2374 
2375    if (sample_count > 1 && sample_count != 4 && sample_count != 2)
2376       return false;
2377 
2378    if (sample_count > 1 && agx_device(pscreen)->debug & AGX_DBG_NOMSAA)
2379       return false;
2380 
2381    if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
2382       return false;
2383 
2384    if ((usage & PIPE_BIND_VERTEX_BUFFER) && !agx_vbo_supports_format(format))
2385       return false;
2386 
2387    /* For framebuffer_no_attachments, fake support for "none" images */
2388    if (format == PIPE_FORMAT_NONE)
2389       return true;
2390 
2391    if (usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW |
2392                 PIPE_BIND_SHADER_IMAGE)) {
2393       enum pipe_format tex_format = format;
2394 
2395       /* Mimic the fixup done in create_sampler_view and u_transfer_helper so we
2396        * advertise GL_OES_texture_stencil8. Alternatively, we could make mesa/st
2397        * less stupid?
2398        */
2399       if (tex_format == PIPE_FORMAT_X24S8_UINT)
2400          tex_format = PIPE_FORMAT_S8_UINT;
2401 
2402       struct ail_pixel_format_entry ent = ail_pixel_format[tex_format];
2403 
2404       if (!ail_is_valid_pixel_format(tex_format))
2405          return false;
2406 
2407       /* RGB32, luminance/alpha/intensity emulated for texture buffers only */
2408       if ((ent.channels == AGX_CHANNELS_R32G32B32_EMULATED ||
2409            util_format_is_luminance(tex_format) ||
2410            util_format_is_alpha(tex_format) ||
2411            util_format_is_luminance_alpha(tex_format) ||
2412            util_format_is_intensity(tex_format)) &&
2413           target != PIPE_BUFFER)
2414          return false;
2415 
2416       /* XXX: sort out rgb9e5 rendering */
2417       if ((usage & PIPE_BIND_RENDER_TARGET) &&
2418           (!ent.renderable || (tex_format == PIPE_FORMAT_R9G9B9E5_FLOAT)))
2419          return false;
2420    }
2421 
2422    if (usage & PIPE_BIND_DEPTH_STENCIL) {
2423       switch (format) {
2424       /* natively supported */
2425       case PIPE_FORMAT_Z16_UNORM:
2426       case PIPE_FORMAT_Z32_FLOAT:
2427       case PIPE_FORMAT_S8_UINT:
2428 
2429       /* lowered by u_transfer_helper to one of the above */
2430       case PIPE_FORMAT_Z24X8_UNORM:
2431       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2432       case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2433          break;
2434 
2435       default:
2436          return false;
2437       }
2438    }
2439 
2440    return true;
2441 }
2442 
2443 static void
agx_query_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * out_count)2444 agx_query_dmabuf_modifiers(struct pipe_screen *screen, enum pipe_format format,
2445                            int max, uint64_t *modifiers,
2446                            unsigned int *external_only, int *out_count)
2447 {
2448    int i;
2449 
2450    if (max == 0) {
2451       *out_count = ARRAY_SIZE(agx_best_modifiers);
2452       return;
2453    }
2454 
2455    for (i = 0; i < ARRAY_SIZE(agx_best_modifiers) && i < max; i++) {
2456       if (external_only)
2457          external_only[i] = 0;
2458 
2459       modifiers[i] = agx_best_modifiers[i];
2460    }
2461 
2462    /* Return the number of modifiers copied */
2463    *out_count = i;
2464 }
2465 
2466 static bool
agx_is_dmabuf_modifier_supported(struct pipe_screen * screen,uint64_t modifier,enum pipe_format format,bool * external_only)2467 agx_is_dmabuf_modifier_supported(struct pipe_screen *screen, uint64_t modifier,
2468                                  enum pipe_format format, bool *external_only)
2469 {
2470    if (external_only)
2471       *external_only = false;
2472 
2473    for (unsigned i = 0; i < ARRAY_SIZE(agx_best_modifiers); ++i) {
2474       if (agx_best_modifiers[i] == modifier)
2475          return true;
2476    }
2477 
2478    return false;
2479 }
2480 
2481 static void
agx_destroy_screen(struct pipe_screen * pscreen)2482 agx_destroy_screen(struct pipe_screen *pscreen)
2483 {
2484    struct agx_screen *screen = agx_screen(pscreen);
2485 
2486    drmSyncobjDestroy(screen->dev.fd, screen->flush_syncobj);
2487 
2488    if (screen->dev.ro)
2489       screen->dev.ro->destroy(screen->dev.ro);
2490 
2491    agx_bo_unreference(&screen->dev, screen->rodata);
2492    u_transfer_helper_destroy(pscreen->transfer_helper);
2493    agx_close_device(&screen->dev);
2494    disk_cache_destroy(screen->disk_cache);
2495    ralloc_free(screen);
2496 }
2497 
2498 static const void *
agx_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)2499 agx_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir,
2500                          enum pipe_shader_type shader)
2501 {
2502    return &agx_nir_options;
2503 }
2504 
2505 static void
agx_resource_set_stencil(struct pipe_resource * prsrc,struct pipe_resource * stencil)2506 agx_resource_set_stencil(struct pipe_resource *prsrc,
2507                          struct pipe_resource *stencil)
2508 {
2509    agx_resource(prsrc)->separate_stencil = agx_resource(stencil);
2510 }
2511 
2512 static struct pipe_resource *
agx_resource_get_stencil(struct pipe_resource * prsrc)2513 agx_resource_get_stencil(struct pipe_resource *prsrc)
2514 {
2515    return (struct pipe_resource *)agx_resource(prsrc)->separate_stencil;
2516 }
2517 
2518 static enum pipe_format
agx_resource_get_internal_format(struct pipe_resource * prsrc)2519 agx_resource_get_internal_format(struct pipe_resource *prsrc)
2520 {
2521    return agx_resource(prsrc)->layout.format;
2522 }
2523 
2524 static struct disk_cache *
agx_get_disk_shader_cache(struct pipe_screen * pscreen)2525 agx_get_disk_shader_cache(struct pipe_screen *pscreen)
2526 {
2527    return agx_screen(pscreen)->disk_cache;
2528 }
2529 
2530 static const struct u_transfer_vtbl transfer_vtbl = {
2531    .resource_create = agx_resource_create,
2532    .resource_destroy = agx_resource_destroy,
2533    .transfer_map = agx_transfer_map,
2534    .transfer_unmap = agx_transfer_unmap,
2535    .transfer_flush_region = agx_transfer_flush_region,
2536    .get_internal_format = agx_resource_get_internal_format,
2537    .set_stencil = agx_resource_set_stencil,
2538    .get_stencil = agx_resource_get_stencil,
2539 };
2540 
2541 static int
agx_screen_get_fd(struct pipe_screen * pscreen)2542 agx_screen_get_fd(struct pipe_screen *pscreen)
2543 {
2544    return agx_device(pscreen)->fd;
2545 }
2546 
2547 static uint64_t
agx_get_timestamp(struct pipe_screen * pscreen)2548 agx_get_timestamp(struct pipe_screen *pscreen)
2549 {
2550    struct agx_device *dev = agx_device(pscreen);
2551    return agx_gpu_time_to_ns(dev, agx_get_gpu_timestamp(dev));
2552 }
2553 
2554 static void
agx_screen_get_device_uuid(struct pipe_screen * pscreen,char * uuid)2555 agx_screen_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
2556 {
2557    agx_get_device_uuid(agx_device(pscreen), uuid);
2558 }
2559 
2560 static void
agx_screen_get_driver_uuid(struct pipe_screen * pscreen,char * uuid)2561 agx_screen_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
2562 {
2563    agx_get_driver_uuid(uuid);
2564 }
2565 
2566 static const char *
agx_get_cl_cts_version(struct pipe_screen * pscreen)2567 agx_get_cl_cts_version(struct pipe_screen *pscreen)
2568 {
2569    struct agx_device *dev = agx_device(pscreen);
2570 
2571    /* https://www.khronos.org/conformance/adopters/conformant-products/opencl#submission_433
2572     */
2573    if (dev->params.gpu_generation < 15)
2574       return "v2024-08-08-00";
2575 
2576    return NULL;
2577 }
2578 
2579 struct pipe_screen *
agx_screen_create(int fd,struct renderonly * ro,const struct pipe_screen_config * config)2580 agx_screen_create(int fd, struct renderonly *ro,
2581                   const struct pipe_screen_config *config)
2582 {
2583    struct agx_screen *agx_screen;
2584    struct pipe_screen *screen;
2585 
2586    /* Refuse to probe. There is no stable UAPI yet. Upstream Mesa cannot be used
2587     * yet with Asahi. Do not try. Do not patch out this check. Do not teach
2588     * others about patching this check. Do not distribute upstream Mesa with
2589     * this check patched out.
2590     */
2591    return NULL;
2592 
2593    agx_screen = rzalloc(NULL, struct agx_screen);
2594    if (!agx_screen)
2595       return NULL;
2596 
2597    screen = &agx_screen->pscreen;
2598 
2599    /* parse driconf configuration now for device specific overrides */
2600    driParseConfigFiles(config->options, config->options_info, 0, "asahi", NULL,
2601                        NULL, NULL, 0, NULL, 0);
2602 
2603    agx_screen->dev.fd = fd;
2604    agx_screen->dev.ro = ro;
2605    u_rwlock_init(&agx_screen->destroy_lock);
2606 
2607    /* Try to open an AGX device */
2608    if (!agx_open_device(agx_screen, &agx_screen->dev)) {
2609       ralloc_free(agx_screen);
2610       return NULL;
2611    }
2612 
2613    /* Forward no16 flag from driconf. This must happen after opening the device,
2614     * since agx_open_device sets debug.
2615     */
2616    if (driQueryOptionb(config->options, "no_fp16"))
2617       agx_screen->dev.debug |= AGX_DBG_NO16;
2618 
2619    int ret =
2620       drmSyncobjCreate(agx_device(screen)->fd, 0, &agx_screen->flush_syncobj);
2621    assert(!ret);
2622 
2623    simple_mtx_init(&agx_screen->flush_seqid_lock, mtx_plain);
2624 
2625    screen->destroy = agx_destroy_screen;
2626    screen->get_screen_fd = agx_screen_get_fd;
2627    screen->get_name = agx_get_name;
2628    screen->get_vendor = agx_get_vendor;
2629    screen->get_device_vendor = agx_get_device_vendor;
2630    screen->get_shader_param = agx_get_shader_param;
2631    screen->get_compute_param = agx_get_compute_param;
2632    screen->get_device_uuid = agx_screen_get_device_uuid;
2633    screen->get_driver_uuid = agx_screen_get_driver_uuid;
2634    screen->is_format_supported = agx_is_format_supported;
2635    screen->query_dmabuf_modifiers = agx_query_dmabuf_modifiers;
2636    screen->query_memory_info = agx_query_memory_info;
2637    screen->is_dmabuf_modifier_supported = agx_is_dmabuf_modifier_supported;
2638    screen->context_create = agx_create_context;
2639    screen->resource_from_handle = agx_resource_from_handle;
2640    screen->resource_get_handle = agx_resource_get_handle;
2641    screen->resource_get_param = agx_resource_get_param;
2642    screen->resource_create_with_modifiers = agx_resource_create_with_modifiers;
2643    screen->get_timestamp = agx_get_timestamp;
2644    screen->fence_reference = agx_fence_reference;
2645    screen->fence_finish = agx_fence_finish;
2646    screen->fence_get_fd = agx_fence_get_fd;
2647    screen->get_compiler_options = agx_get_compiler_options;
2648    screen->get_disk_shader_cache = agx_get_disk_shader_cache;
2649    screen->get_cl_cts_version = agx_get_cl_cts_version;
2650 
2651    screen->resource_create = u_transfer_helper_resource_create;
2652    screen->resource_destroy = u_transfer_helper_resource_destroy;
2653    screen->transfer_helper = u_transfer_helper_create(
2654       &transfer_vtbl,
2655       U_TRANSFER_HELPER_SEPARATE_Z32S8 | U_TRANSFER_HELPER_SEPARATE_STENCIL |
2656          U_TRANSFER_HELPER_MSAA_MAP | U_TRANSFER_HELPER_Z24_IN_Z32F);
2657 
2658    agx_init_screen_caps(screen);
2659 
2660    agx_disk_cache_init(agx_screen);
2661 
2662    /* TODO: Refactor readonly data? */
2663    {
2664       struct agx_bo *bo =
2665          agx_bo_create(&agx_screen->dev, 16384, 0, 0, "Rodata");
2666 
2667       agx_pack_txf_sampler((struct agx_sampler_packed *)agx_bo_map(bo));
2668 
2669       agx_pack(&agx_screen->dev.txf_sampler, USC_SAMPLER, cfg) {
2670          cfg.start = 0;
2671          cfg.count = 1;
2672          cfg.buffer = bo->va->addr;
2673       }
2674 
2675       agx_screen->rodata = bo;
2676    }
2677 
2678    return screen;
2679 }
2680