• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * The aux map provides a multi-level lookup of the main surface address which
26  * ends up providing information about the auxiliary surface data, including
27  * the address where the auxiliary data resides.
28  *
29  * The below sections depict address splitting and formats of table entries of
30  * TGL platform. These may vary on other platforms.
31  *
32  * The 48-bit VMA (GPU) address of the main surface is split to do the address
33  * lookup:
34  *
35  *  48 bit address of main surface
36  * +--------+--------+--------+------+
37  * | 47:36  | 35:24  | 23:16  | 15:0 |
38  * | L3-idx | L2-idx | L1-idx | ...  |
39  * +--------+--------+--------+------+
40  *
41  * The GFX_AUX_TABLE_BASE_ADDR points to a buffer. The L3 Table Entry is
42  * located by indexing into this buffer as a uint64_t array using the L3-idx
43  * value. The 64-bit L3 entry is defined as:
44  *
45  * +-------+-------------+------+---+
46  * | 63:48 | 47:15       | 14:1 | 0 |
47  * |  ...  | L2-tbl-addr | ...  | V |
48  * +-------+-------------+------+---+
49  *
50  * If the `V` (valid) bit is set, then the L2-tbl-addr gives the address for
51  * the level-2 table entries, with the lower address bits filled with zero.
52  * The L2 Table Entry is located by indexing into this buffer as a uint64_t
53  * array using the L2-idx value. The 64-bit L2 entry is similar to the L3
54  * entry, except with 2 additional address bits:
55  *
56  * +-------+-------------+------+---+
57  * | 63:48 | 47:13       | 12:1 | 0 |
58  * |  ...  | L1-tbl-addr | ...  | V |
59  * +-------+-------------+------+---+
60  *
61  * If the `V` bit is set, then the L1-tbl-addr gives the address for the
62  * level-1 table entries, with the lower address bits filled with zero. The L1
63  * Table Entry is located by indexing into this buffer as a uint64_t array
64  * using the L1-idx value. The 64-bit L1 entry is defined as:
65  *
66  * +--------+------+-------+-------+-------+---------------+-----+---+
67  * | 63:58  | 57   | 56:54 | 53:52 | 51:48 | 47:8          | 7:1 | 0 |
68  * | Format | Y/Cr | Depth |  TM   |  ...  | aux-data-addr | ... | V |
69  * +--------+------+-------+-------+-------+---------------+-----+---+
70  *
71  * Where:
72  *  - Format: See `get_format_encoding`
73  *  - Y/Cr: 0=Y(Luma), 1=Cr(Chroma)
74  *  - (bit) Depth: See `get_bpp_encoding`
75  *  - TM (Tile-mode): 0=Ys, 1=Y, 2=rsvd, 3=rsvd
76  *  - aux-data-addr: VMA/GPU address for the aux-data
77  *  - V: entry is valid
78  *
79  * BSpec 44930
80  */
81 
82 #include "intel_aux_map.h"
83 #include "intel_gem.h"
84 
85 #include "dev/intel_device_info.h"
86 #include "isl/isl.h"
87 
88 #include "util/list.h"
89 #include "util/ralloc.h"
90 #include "util/u_atomic.h"
91 #include "util/u_math.h"
92 
93 #include <inttypes.h>
94 #include <stdlib.h>
95 #include <stdio.h>
96 #include <pthread.h>
97 
98 #define INTEL_AUX_MAP_FORMAT_BITS_MASK   0xfff0000000000000ull
99 
100 /* Mask with the firt 48bits set */
101 #define VALID_ADDRESS_MASK ((1ull << 48) - 1)
102 
103 #define L3_ENTRY_L2_ADDR_MASK 0xffffffff8000ull
104 
105 #define L3_L2_BITS_PER_LEVEL 12
106 #define L3_L2_SUB_TABLE_LEN (sizeof(uint64_t) * (1ull << L3_L2_BITS_PER_LEVEL))
107 
108 static const bool aux_map_debug = false;
109 
110 /**
111  * Auxiliary surface mapping formats
112  *
113  * Several formats of AUX mapping exist. The supported formats
114  * are designated by generation and granularity here. A device
115  * can support more than one format, depending on Hardware, but
116  * we expect only one of them of a device is needed. Otherwise,
117  * we could need to change this enum into a bit map in such case
118  * later.
119  */
120 enum intel_aux_map_format {
121    /**
122     * 64KB granularity format on GFX12 devices
123     */
124    INTEL_AUX_MAP_GFX12_64KB = 0,
125 
126    /**
127     * 1MB granularity format on GFX125 devices
128     */
129    INTEL_AUX_MAP_GFX125_1MB,
130 
131    INTEL_AUX_MAP_LAST,
132 };
133 
134 /**
135  * An incomplete description of AUX mapping formats
136  *
137  * Theoretically, many things can be different, depending on hardware
138  * design like level of page tables, address splitting, format bits
139  * etc. We only manage the known delta to simplify the implementation
140  * this time.
141  */
142 struct aux_format_info {
143    /**
144     * Granularity of main surface in compression. It must be power of 2.
145     */
146    uint64_t main_page_size;
147    /**
148     * The ratio of main surface to an AUX entry.
149     */
150    uint64_t main_to_aux_ratio;
151    /**
152     * Page size of level 1 page table. It must be power of 2.
153     */
154    uint64_t l1_page_size;
155    /**
156     * Mask of index bits of level 1 page table in address splitting.
157     */
158    uint64_t l1_index_mask;
159    /**
160     * Offset of index bits of level 1 page table in address splitting.
161     */
162    uint64_t l1_index_offset;
163 };
164 
165 static const struct aux_format_info aux_formats[] = {
166    [INTEL_AUX_MAP_GFX12_64KB] = {
167       .main_page_size = 64 * 1024,
168       .main_to_aux_ratio = 256,
169       .l1_page_size = 8 * 1024,
170       .l1_index_mask = 0xff,
171       .l1_index_offset = 16,
172    },
173    [INTEL_AUX_MAP_GFX125_1MB] = {
174       .main_page_size = 1024 * 1024,
175       .main_to_aux_ratio = 256,
176       .l1_page_size = 2 * 1024,
177       .l1_index_mask = 0xf,
178       .l1_index_offset = 20,
179    },
180 };
181 
182 struct aux_map_buffer {
183    struct list_head link;
184    struct intel_buffer *buffer;
185 };
186 
187 struct intel_aux_level {
188    /* GPU address of the  current level */
189    uint64_t address;
190 
191    /* Pointer to the GPU entries of this level */
192    uint64_t *entries;
193 
194    union {
195       /* Host tracking of a parent level to its children (only use on L3/L2
196        * levels which have 4096 entries)
197        */
198       struct intel_aux_level *children[4096];
199 
200       /* Refcount of AUX pages at the L1 level (MTL has only 16 entries in L1,
201        * which Gfx12 has 256 entries)
202        */
203       uint32_t ref_counts[256];
204    };
205 };
206 
207 struct intel_aux_map_context {
208    void *driver_ctx;
209    pthread_mutex_t mutex;
210    struct intel_aux_level *l3_level;
211    struct intel_mapped_pinned_buffer_alloc *buffer_alloc;
212    uint32_t num_buffers;
213    struct list_head buffers;
214    uint32_t tail_offset, tail_remaining;
215    uint32_t state_num;
216    const struct aux_format_info *format;
217 };
218 
219 static inline uint64_t
get_page_mask(const uint64_t page_size)220 get_page_mask(const uint64_t page_size)
221 {
222    return page_size - 1;
223 }
224 
225 static inline uint64_t
get_meta_page_size(const struct aux_format_info * info)226 get_meta_page_size(const struct aux_format_info *info)
227 {
228    return info->main_page_size / info->main_to_aux_ratio;
229 }
230 
231 static inline uint64_t
get_index(const uint64_t main_address,const uint64_t index_mask,const uint64_t index_offset)232 get_index(const uint64_t main_address,
233       const uint64_t index_mask, const uint64_t index_offset)
234 {
235    return (main_address >> index_offset) & index_mask;
236 }
237 
238 uint64_t
intel_aux_get_meta_address_mask(struct intel_aux_map_context * ctx)239 intel_aux_get_meta_address_mask(struct intel_aux_map_context *ctx)
240 {
241    return (~get_page_mask(get_meta_page_size(ctx->format))) & VALID_ADDRESS_MASK;
242 }
243 
244 uint64_t
intel_aux_get_main_to_aux_ratio(struct intel_aux_map_context * ctx)245 intel_aux_get_main_to_aux_ratio(struct intel_aux_map_context *ctx)
246 {
247    return ctx->format->main_to_aux_ratio;
248 }
249 
250 uint64_t
intel_aux_main_to_aux_offset(struct intel_aux_map_context * ctx,uint64_t main_offset)251 intel_aux_main_to_aux_offset(struct intel_aux_map_context *ctx,
252                              uint64_t main_offset)
253 {
254    return main_offset / ctx->format->main_to_aux_ratio;
255 }
256 
257 static const struct aux_format_info *
get_format(enum intel_aux_map_format format)258 get_format(enum intel_aux_map_format format)
259 {
260 
261    assert(format < INTEL_AUX_MAP_LAST);
262    assert(ARRAY_SIZE(aux_formats) == INTEL_AUX_MAP_LAST);
263    return &aux_formats[format];
264 }
265 
266 static enum intel_aux_map_format
select_format(const struct intel_device_info * devinfo)267 select_format(const struct intel_device_info *devinfo)
268 {
269    if (devinfo->verx10 >= 125)
270       return INTEL_AUX_MAP_GFX125_1MB;
271    else if (devinfo->verx10 == 120)
272       return INTEL_AUX_MAP_GFX12_64KB;
273    else
274       return INTEL_AUX_MAP_LAST;
275 }
276 
277 static bool
add_buffer(struct intel_aux_map_context * ctx)278 add_buffer(struct intel_aux_map_context *ctx)
279 {
280    struct aux_map_buffer *buf = rzalloc(ctx, struct aux_map_buffer);
281    if (!buf)
282       return false;
283 
284    const uint32_t size = 0x100000;
285    buf->buffer = ctx->buffer_alloc->alloc(ctx->driver_ctx, size);
286    if (!buf->buffer) {
287       ralloc_free(buf);
288       return false;
289    }
290 
291    assert(buf->buffer->map != NULL);
292 
293    list_addtail(&buf->link, &ctx->buffers);
294    ctx->tail_offset = 0;
295    ctx->tail_remaining = size;
296    p_atomic_inc(&ctx->num_buffers);
297 
298    return true;
299 }
300 
301 static void
advance_current_pos(struct intel_aux_map_context * ctx,uint32_t size)302 advance_current_pos(struct intel_aux_map_context *ctx, uint32_t size)
303 {
304    assert(ctx->tail_remaining >= size);
305    ctx->tail_remaining -= size;
306    ctx->tail_offset += size;
307 }
308 
309 static bool
align_and_verify_space(struct intel_aux_map_context * ctx,uint32_t size,uint32_t alignment)310 align_and_verify_space(struct intel_aux_map_context *ctx, uint32_t size,
311                        uint32_t alignment)
312 {
313    if (ctx->tail_remaining < size)
314       return false;
315 
316    struct aux_map_buffer *tail =
317       list_last_entry(&ctx->buffers, struct aux_map_buffer, link);
318    uint64_t gpu = tail->buffer->gpu + ctx->tail_offset;
319    uint64_t aligned = align64(gpu, alignment);
320 
321    if ((aligned - gpu) + size > ctx->tail_remaining) {
322       return false;
323    } else {
324       if (aligned - gpu > 0)
325          advance_current_pos(ctx, aligned - gpu);
326       return true;
327    }
328 }
329 
330 static void
get_current_pos(struct intel_aux_map_context * ctx,uint64_t * gpu,uint64_t ** map)331 get_current_pos(struct intel_aux_map_context *ctx, uint64_t *gpu, uint64_t **map)
332 {
333    assert(!list_is_empty(&ctx->buffers));
334    struct aux_map_buffer *tail =
335       list_last_entry(&ctx->buffers, struct aux_map_buffer, link);
336    if (gpu)
337       *gpu = tail->buffer->gpu + ctx->tail_offset;
338    if (map)
339       *map = (uint64_t*)((uint8_t*)tail->buffer->map + ctx->tail_offset);
340 }
341 
342 static struct intel_aux_level *
add_sub_table(struct intel_aux_map_context * ctx,struct intel_aux_level * parent,uint32_t parent_index,uint32_t size,uint32_t align)343 add_sub_table(struct intel_aux_map_context *ctx,
344               struct intel_aux_level *parent,
345               uint32_t parent_index,
346               uint32_t size, uint32_t align)
347 {
348    if (!align_and_verify_space(ctx, size, align)) {
349       if (!add_buffer(ctx))
350          return NULL;
351       UNUSED bool aligned = align_and_verify_space(ctx, size, align);
352       assert(aligned);
353    }
354 
355    struct intel_aux_level *level = rzalloc(ctx, struct intel_aux_level);
356 
357    get_current_pos(ctx, &level->address, &level->entries);
358    memset(level->entries, 0, size);
359    advance_current_pos(ctx, size);
360 
361    if (parent != NULL) {
362       assert(parent->children[parent_index] == NULL);
363       parent->children[parent_index] = level;
364    }
365 
366    return level;
367 }
368 
369 uint32_t
intel_aux_map_get_state_num(struct intel_aux_map_context * ctx)370 intel_aux_map_get_state_num(struct intel_aux_map_context *ctx)
371 {
372    return p_atomic_read(&ctx->state_num);
373 }
374 
375 struct intel_aux_map_context *
intel_aux_map_init(void * driver_ctx,struct intel_mapped_pinned_buffer_alloc * buffer_alloc,const struct intel_device_info * devinfo)376 intel_aux_map_init(void *driver_ctx,
377                    struct intel_mapped_pinned_buffer_alloc *buffer_alloc,
378                    const struct intel_device_info *devinfo)
379 {
380    struct intel_aux_map_context *ctx;
381 
382    enum intel_aux_map_format format = select_format(devinfo);
383    if (format == INTEL_AUX_MAP_LAST)
384       return NULL;
385 
386    ctx = ralloc(NULL, struct intel_aux_map_context);
387    if (!ctx)
388       return NULL;
389 
390    if (pthread_mutex_init(&ctx->mutex, NULL))
391       return NULL;
392 
393    ctx->format = get_format(format);
394    ctx->driver_ctx = driver_ctx;
395    ctx->buffer_alloc = buffer_alloc;
396    ctx->num_buffers = 0;
397    list_inithead(&ctx->buffers);
398    ctx->tail_offset = 0;
399    ctx->tail_remaining = 0;
400    ctx->state_num = 0;
401 
402    ctx->l3_level = add_sub_table(ctx, NULL, 0,
403                                  L3_L2_SUB_TABLE_LEN, L3_L2_SUB_TABLE_LEN);
404    if (ctx->l3_level != NULL) {
405       if (aux_map_debug)
406          fprintf(stderr, "AUX-MAP L3: 0x%"PRIx64", map=%p\n",
407                  ctx->l3_level->address, ctx->l3_level->entries);
408       p_atomic_inc(&ctx->state_num);
409       return ctx;
410    } else {
411       ralloc_free(ctx);
412       return NULL;
413    }
414 }
415 
416 void
intel_aux_map_finish(struct intel_aux_map_context * ctx)417 intel_aux_map_finish(struct intel_aux_map_context *ctx)
418 {
419    if (!ctx)
420       return;
421 
422    pthread_mutex_destroy(&ctx->mutex);
423    list_for_each_entry_safe(struct aux_map_buffer, buf, &ctx->buffers, link) {
424       ctx->buffer_alloc->free(ctx->driver_ctx, buf->buffer);
425       list_del(&buf->link);
426       p_atomic_dec(&ctx->num_buffers);
427       ralloc_free(buf);
428    }
429 
430    ralloc_free(ctx);
431 }
432 
433 uint32_t
intel_aux_map_get_alignment(struct intel_aux_map_context * ctx)434 intel_aux_map_get_alignment(struct intel_aux_map_context *ctx)
435 {
436    return ctx->format->main_page_size;
437 }
438 
439 uint64_t
intel_aux_map_get_base(struct intel_aux_map_context * ctx)440 intel_aux_map_get_base(struct intel_aux_map_context *ctx)
441 {
442    /**
443     * This get initialized in intel_aux_map_init, and never changes, so there is
444     * no need to lock the mutex.
445     */
446    return ctx->l3_level->address;
447 }
448 
449 static uint8_t
get_bpp_encoding(enum isl_format format)450 get_bpp_encoding(enum isl_format format)
451 {
452    if (isl_format_is_yuv(format)) {
453       switch (format) {
454       case ISL_FORMAT_YCRCB_NORMAL:
455       case ISL_FORMAT_YCRCB_SWAPY:
456       case ISL_FORMAT_PLANAR_420_8: return 3;
457       case ISL_FORMAT_PLANAR_420_12: return 2;
458       case ISL_FORMAT_PLANAR_420_10: return 1;
459       case ISL_FORMAT_PLANAR_420_16: return 0;
460       default:
461          unreachable("Unsupported format!");
462          return 0;
463       }
464    } else {
465       switch (isl_format_get_layout(format)->bpb) {
466       case 16:  return 0;
467       case 8:   return 4;
468       case 32:  return 5;
469       case 64:  return 6;
470       case 128: return 7;
471       default:
472          unreachable("Unsupported bpp!");
473          return 0;
474       }
475    }
476 }
477 
478 #define INTEL_AUX_MAP_ENTRY_Ys_TILED_BIT  (0x0ull << 52)
479 #define INTEL_AUX_MAP_ENTRY_Y_TILED_BIT   (0x1ull << 52)
480 
481 uint64_t
intel_aux_map_format_bits(enum isl_tiling tiling,enum isl_format format,uint8_t plane)482 intel_aux_map_format_bits(enum isl_tiling tiling, enum isl_format format,
483                           uint8_t plane)
484 {
485    /* gfx12.5+ uses tile-4 rather than y-tiling, and gfx12.5+ also uses
486     * compression info from the surface state and ignores the aux-map format
487     * bits metadata.
488     */
489    if (!isl_tiling_is_any_y(tiling))
490       return 0;
491 
492    if (aux_map_debug)
493       fprintf(stderr, "AUX-MAP entry %s, bpp_enc=%d\n",
494               isl_format_get_name(format),
495               isl_format_get_aux_map_encoding(format));
496 
497    assert(tiling == ISL_TILING_ICL_Ys ||
498           tiling == ISL_TILING_ICL_Yf ||
499           tiling == ISL_TILING_Y0);
500 
501    uint64_t format_bits =
502       ((uint64_t)isl_format_get_aux_map_encoding(format) << 58) |
503       ((uint64_t)(plane > 0) << 57) |
504       ((uint64_t)get_bpp_encoding(format) << 54) |
505       /* TODO: We assume that Yf is not Tiled-Ys, but waiting on
506        *       clarification
507        */
508       (tiling == ISL_TILING_ICL_Ys ? INTEL_AUX_MAP_ENTRY_Ys_TILED_BIT :
509                                      INTEL_AUX_MAP_ENTRY_Y_TILED_BIT);
510 
511    assert((format_bits & INTEL_AUX_MAP_FORMAT_BITS_MASK) == format_bits);
512 
513    return format_bits;
514 }
515 
516 uint64_t
intel_aux_map_format_bits_for_isl_surf(const struct isl_surf * isl_surf)517 intel_aux_map_format_bits_for_isl_surf(const struct isl_surf *isl_surf)
518 {
519    assert(!isl_format_is_planar(isl_surf->format));
520    return intel_aux_map_format_bits(isl_surf->tiling, isl_surf->format, 0);
521 }
522 
523 static uint64_t
get_l1_addr_mask(struct intel_aux_map_context * ctx)524 get_l1_addr_mask(struct intel_aux_map_context *ctx)
525 {
526    uint64_t l1_addr = ~get_page_mask(ctx->format->l1_page_size);
527    return l1_addr & VALID_ADDRESS_MASK;
528 }
529 
530 static void
get_aux_entry(struct intel_aux_map_context * ctx,uint64_t main_address,uint32_t * l1_index_out,uint64_t * l1_entry_addr_out,uint64_t ** l1_entry_map_out,struct intel_aux_level ** l1_aux_level_out)531 get_aux_entry(struct intel_aux_map_context *ctx, uint64_t main_address,
532               uint32_t *l1_index_out, uint64_t *l1_entry_addr_out,
533               uint64_t **l1_entry_map_out,
534               struct intel_aux_level **l1_aux_level_out)
535 {
536    struct intel_aux_level *l3_level = ctx->l3_level;
537    struct intel_aux_level *l2_level;
538    struct intel_aux_level *l1_level;
539 
540    uint32_t l3_index = (main_address >> 36) & 0xfff;
541 
542    if (l3_level->children[l3_index] == NULL) {
543       l2_level =
544          add_sub_table(ctx, ctx->l3_level, l3_index,
545                        L3_L2_SUB_TABLE_LEN, L3_L2_SUB_TABLE_LEN);
546       if (l2_level != NULL) {
547          if (aux_map_debug)
548             fprintf(stderr, "AUX-MAP L3[0x%x]: 0x%"PRIx64", map=%p\n",
549                     l3_index, l2_level->address, l2_level->entries);
550       } else {
551          unreachable("Failed to add L2 Aux-Map Page Table!");
552       }
553       l3_level->entries[l3_index] = (l2_level->address & L3_ENTRY_L2_ADDR_MASK) |
554                                     INTEL_AUX_MAP_ENTRY_VALID_BIT;
555    } else {
556       l2_level = l3_level->children[l3_index];
557    }
558    uint32_t l2_index = (main_address >> 24) & 0xfff;
559    uint64_t l1_page_size = ctx->format->l1_page_size;
560    if (l2_level->children[l2_index] == NULL) {
561       l1_level = add_sub_table(ctx, l2_level, l2_index, l1_page_size, l1_page_size);
562       if (l1_level != NULL) {
563          if (aux_map_debug)
564             fprintf(stderr, "AUX-MAP L2[0x%x]: 0x%"PRIx64", map=%p\n",
565                     l2_index, l1_level->address, l1_level->entries);
566       } else {
567          unreachable("Failed to add L1 Aux-Map Page Table!");
568       }
569       l2_level->entries[l2_index] = (l1_level->address & get_l1_addr_mask(ctx)) |
570                                     INTEL_AUX_MAP_ENTRY_VALID_BIT;
571    } else {
572       l1_level = l2_level->children[l2_index];
573    }
574    uint32_t l1_index = get_index(main_address, ctx->format->l1_index_mask,
575                                  ctx->format->l1_index_offset);
576    if (l1_index_out)
577       *l1_index_out = l1_index;
578    if (l1_entry_addr_out)
579       *l1_entry_addr_out = intel_canonical_address(l1_level->address + l1_index * sizeof(uint64_t));
580    if (l1_entry_map_out)
581       *l1_entry_map_out = &l1_level->entries[l1_index];
582    if (l1_aux_level_out)
583       *l1_aux_level_out = l1_level;
584 }
585 
586 static bool
add_mapping(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t aux_address,uint64_t format_bits,bool * state_changed)587 add_mapping(struct intel_aux_map_context *ctx, uint64_t main_address,
588             uint64_t aux_address, uint64_t format_bits,
589             bool *state_changed)
590 {
591    if (aux_map_debug)
592       fprintf(stderr, "AUX-MAP 0x%"PRIx64" => 0x%"PRIx64"\n", main_address,
593               aux_address);
594 
595    uint32_t l1_index;
596    uint64_t *l1_entry;
597    struct intel_aux_level *l1_aux_level;
598    get_aux_entry(ctx, main_address, &l1_index, NULL, &l1_entry, &l1_aux_level);
599 
600    const uint64_t l1_data =
601       (aux_address & intel_aux_get_meta_address_mask(ctx)) |
602       format_bits |
603       INTEL_AUX_MAP_ENTRY_VALID_BIT;
604 
605    const uint64_t current_l1_data = *l1_entry;
606    if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
607       assert(l1_aux_level->ref_counts[l1_index] == 0);
608       assert((aux_address & 0xffULL) == 0);
609       if (aux_map_debug)
610          fprintf(stderr, "AUX-MAP L1[0x%x] 0x%"PRIx64" -> 0x%"PRIx64"\n",
611                  l1_index, current_l1_data, l1_data);
612       /**
613        * We use non-zero bits in 63:1 to indicate the entry had been filled
614        * previously. If these bits are non-zero and they don't exactly match
615        * what we want to program into the entry, then we must force the
616        * aux-map tables to be flushed.
617        */
618       if (current_l1_data != 0 && \
619           (current_l1_data | INTEL_AUX_MAP_ENTRY_VALID_BIT) != l1_data)
620          *state_changed = true;
621       *l1_entry = l1_data;
622    } else {
623       if (aux_map_debug)
624          fprintf(stderr, "AUX-MAP L1[0x%x] is already marked valid!\n",
625                  l1_index);
626 
627       if (*l1_entry != l1_data) {
628          if (aux_map_debug)
629             fprintf(stderr,
630                     "AUX-MAP L1[0x%x] overwrite 0x%"PRIx64" != 0x%"PRIx64"\n",
631                     l1_index, current_l1_data, l1_data);
632 
633          return false;
634       }
635    }
636 
637    l1_aux_level->ref_counts[l1_index]++;
638 
639    return true;
640 }
641 
642 uint64_t *
intel_aux_map_get_entry(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t * aux_entry_address)643 intel_aux_map_get_entry(struct intel_aux_map_context *ctx,
644                         uint64_t main_address,
645                         uint64_t *aux_entry_address)
646 {
647    pthread_mutex_lock(&ctx->mutex);
648    uint64_t *l1_entry_map;
649    get_aux_entry(ctx, main_address, NULL, aux_entry_address, &l1_entry_map, NULL);
650    pthread_mutex_unlock(&ctx->mutex);
651 
652    return l1_entry_map;
653 }
654 
655 /**
656  * We mark the leaf entry as invalid, but we don't attempt to cleanup the
657  * other levels of translation mappings. Since we attempt to re-use VMA
658  * ranges, hopefully this will not lead to unbounded growth of the translation
659  * tables.
660  */
661 static void
remove_l1_mapping_locked(struct intel_aux_map_context * ctx,uint64_t main_address,bool reset_refcount,bool * state_changed)662 remove_l1_mapping_locked(struct intel_aux_map_context *ctx, uint64_t main_address,
663                          bool reset_refcount, bool *state_changed)
664 {
665    uint32_t l1_index;
666    uint64_t *l1_entry;
667    struct intel_aux_level *l1_aux_level;
668    get_aux_entry(ctx, main_address, &l1_index, NULL, &l1_entry, &l1_aux_level);
669 
670    const uint64_t current_l1_data = *l1_entry;
671    const uint64_t l1_data = current_l1_data & ~INTEL_AUX_MAP_ENTRY_VALID_BIT;
672 
673    if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
674       assert(l1_aux_level->ref_counts[l1_index] == 0);
675       return;
676    } else if (reset_refcount) {
677       l1_aux_level->ref_counts[l1_index] = 0;
678       if (unlikely(l1_data == 0))
679          *state_changed = true;
680       *l1_entry = l1_data;
681    } else {
682       assert(l1_aux_level->ref_counts[l1_index] > 0);
683       if (--l1_aux_level->ref_counts[l1_index] == 0) {
684          /**
685           * We use non-zero bits in 63:1 to indicate the entry had been filled
686           * previously. In the unlikely event that these are all zero, we
687           * force a flush of the aux-map tables.
688           */
689          if (unlikely(l1_data == 0))
690             *state_changed = true;
691          *l1_entry = l1_data;
692       }
693    }
694 }
695 
696 static void
remove_mapping_locked(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t size,bool reset_refcount,bool * state_changed)697 remove_mapping_locked(struct intel_aux_map_context *ctx, uint64_t main_address,
698                       uint64_t size, bool reset_refcount, bool *state_changed)
699 {
700    if (aux_map_debug)
701       fprintf(stderr, "AUX-MAP remove 0x%"PRIx64"-0x%"PRIx64"\n", main_address,
702               main_address + size);
703 
704    uint64_t main_inc_addr = main_address;
705    uint64_t main_page_size = ctx->format->main_page_size;
706    assert((main_address & get_page_mask(main_page_size)) == 0);
707    while (main_inc_addr - main_address < size) {
708       remove_l1_mapping_locked(ctx, main_inc_addr, reset_refcount,
709                                state_changed);
710       main_inc_addr += main_page_size;
711    }
712 }
713 
714 bool
intel_aux_map_add_mapping(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t aux_address,uint64_t main_size_B,uint64_t format_bits)715 intel_aux_map_add_mapping(struct intel_aux_map_context *ctx, uint64_t main_address,
716                           uint64_t aux_address, uint64_t main_size_B,
717                           uint64_t format_bits)
718 {
719    bool state_changed = false;
720    pthread_mutex_lock(&ctx->mutex);
721    uint64_t main_inc_addr = main_address;
722    uint64_t aux_inc_addr = aux_address;
723    const uint64_t main_page_size = ctx->format->main_page_size;
724    assert((main_address & get_page_mask(main_page_size)) == 0);
725    const uint64_t aux_page_size = get_meta_page_size(ctx->format);
726    assert((aux_address & get_page_mask(aux_page_size)) == 0);
727    while (main_inc_addr - main_address < main_size_B) {
728       if (!add_mapping(ctx, main_inc_addr, aux_inc_addr, format_bits,
729                        &state_changed)) {
730          break;
731       }
732       main_inc_addr = main_inc_addr + main_page_size;
733       aux_inc_addr = aux_inc_addr + aux_page_size;
734    }
735    bool success = main_inc_addr - main_address >= main_size_B;
736    if (!success && (main_inc_addr - main_address) > 0) {
737       /* If the mapping failed, remove the mapped portion. */
738       remove_mapping_locked(ctx, main_address,
739                             main_inc_addr - main_address,
740                             false /* reset_refcount */, &state_changed);
741    }
742    pthread_mutex_unlock(&ctx->mutex);
743    if (state_changed)
744       p_atomic_inc(&ctx->state_num);
745 
746 
747    return success;
748 }
749 
750 void
intel_aux_map_del_mapping(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t size)751 intel_aux_map_del_mapping(struct intel_aux_map_context *ctx, uint64_t main_address,
752                           uint64_t size)
753 {
754    bool state_changed = false;
755    pthread_mutex_lock(&ctx->mutex);
756    remove_mapping_locked(ctx, main_address, size, false /* reset_refcount */,
757                          &state_changed);
758    pthread_mutex_unlock(&ctx->mutex);
759    if (state_changed)
760       p_atomic_inc(&ctx->state_num);
761 }
762 
763 void
intel_aux_map_unmap_range(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t size)764 intel_aux_map_unmap_range(struct intel_aux_map_context *ctx, uint64_t main_address,
765                           uint64_t size)
766 {
767    bool state_changed = false;
768    pthread_mutex_lock(&ctx->mutex);
769    remove_mapping_locked(ctx, main_address, size, true /* reset_refcount */,
770                          &state_changed);
771    pthread_mutex_unlock(&ctx->mutex);
772    if (state_changed)
773       p_atomic_inc(&ctx->state_num);
774 }
775 
776 uint32_t
intel_aux_map_get_num_buffers(struct intel_aux_map_context * ctx)777 intel_aux_map_get_num_buffers(struct intel_aux_map_context *ctx)
778 {
779    return p_atomic_read(&ctx->num_buffers);
780 }
781 
782 void
intel_aux_map_fill_bos(struct intel_aux_map_context * ctx,void ** driver_bos,uint32_t max_bos)783 intel_aux_map_fill_bos(struct intel_aux_map_context *ctx, void **driver_bos,
784                        uint32_t max_bos)
785 {
786    assert(p_atomic_read(&ctx->num_buffers) >= max_bos);
787    uint32_t i = 0;
788    list_for_each_entry(struct aux_map_buffer, buf, &ctx->buffers, link) {
789       if (i >= max_bos)
790          return;
791       driver_bos[i++] = buf->buffer->driver_bo;
792    }
793 }
794