• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2023 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_private.h"
25 
26 #include "util/u_math.h"
27 
28 static uint64_t
va_add(struct anv_va_range * range,uint64_t addr,uint64_t size)29 va_add(struct anv_va_range *range, uint64_t addr, uint64_t size)
30 {
31    range->addr = addr;
32    range->size = size;
33 
34    return addr + size;
35 }
36 
37 static void
va_at(struct anv_va_range * range,uint64_t addr,uint64_t size)38 va_at(struct anv_va_range *range, uint64_t addr, uint64_t size)
39 {
40    range->addr = addr;
41    range->size = size;
42 }
43 
44 static void
anv_device_print_vas(struct anv_physical_device * device)45 anv_device_print_vas(struct anv_physical_device *device)
46 {
47    fprintf(stderr, "Driver heaps:\n");
48 #define PRINT_HEAP(name) \
49    fprintf(stderr, "   0x%016"PRIx64"-0x%016"PRIx64": %s\n", \
50            device->va.name.addr, \
51            device->va.name.addr + device->va.name.size, \
52            #name);
53    PRINT_HEAP(general_state_pool);
54    PRINT_HEAP(low_heap);
55    PRINT_HEAP(dynamic_state_pool);
56    PRINT_HEAP(sampler_state_pool);
57    PRINT_HEAP(binding_table_pool);
58    PRINT_HEAP(internal_surface_state_pool);
59    PRINT_HEAP(scratch_surface_state_pool);
60    PRINT_HEAP(bindless_surface_state_pool);
61    PRINT_HEAP(indirect_descriptor_pool);
62    PRINT_HEAP(indirect_push_descriptor_pool);
63    PRINT_HEAP(instruction_state_pool);
64    PRINT_HEAP(high_heap);
65    PRINT_HEAP(trtt);
66 }
67 
68 void
anv_physical_device_init_va_ranges(struct anv_physical_device * device)69 anv_physical_device_init_va_ranges(struct anv_physical_device *device)
70 {
71    /* anv Virtual Memory Layout
72     * =========================
73     *
74     * When the anv driver is determining the virtual graphics addresses of
75     * memory objects itself using the softpin mechanism, the following memory
76     * ranges will be used.
77     *
78     * Three special considerations to notice:
79     *
80     * (1) the dynamic state pool is located within the same 4 GiB as the low
81     * heap. This is to work around a VF cache issue described in a comment in
82     * anv_physical_device_init_heaps.
83     *
84     * (2) the binding table pool is located at lower addresses than the BT
85     * (binding table) surface state pool, within a 4 GiB range which also
86     * contains the bindless surface state pool. This allows surface state base
87     * addresses to cover both binding tables (16 bit offsets), the internal
88     * surface states (32 bit offsets) and the bindless surface states.
89     *
90     * (3) the last 4 GiB of the address space is withheld from the high heap.
91     * Various hardware units will read past the end of an object for various
92     * reasons. This healthy margin prevents reads from wrapping around 48-bit
93     * addresses.
94     */
95    uint64_t _1Mb = 1ull * 1024 * 1024;
96    uint64_t _1Gb = 1ull * 1024 * 1024 * 1024;
97    uint64_t _4Gb = 4ull * 1024 * 1024 * 1024;
98 
99    uint64_t address = 0x000000200000ULL; /* 2MiB */
100 
101    address = va_add(&device->va.general_state_pool, address,
102                     _1Gb - address);
103 
104    address = va_add(&device->va.low_heap, address, _1Gb);
105 
106    /* The binding table pool has to be located directly in front of the
107     * surface states.
108     */
109    address += _1Gb;
110    address = va_add(&device->va.binding_table_pool, address, _1Gb);
111    address = va_add(&device->va.internal_surface_state_pool, address, 1 * _1Gb);
112    assert(device->va.internal_surface_state_pool.addr ==
113           align64(device->va.internal_surface_state_pool.addr, 2 * _1Gb));
114    /* Scratch surface state overlaps with the internal surface state */
115    va_at(&device->va.scratch_surface_state_pool,
116          device->va.internal_surface_state_pool.addr,
117          8 * _1Mb);
118    address = va_add(&device->va.bindless_surface_state_pool, address, 2 * _1Gb);
119 
120 
121    /* PRMs & simulation disagrees on the actual size of this heap. Take the
122     * smallest (simulation) so that it works everywhere.
123     */
124    address = align64(address, _4Gb);
125    address = va_add(&device->va.dynamic_state_pool, address, _1Gb);
126    address = va_add(&device->va.sampler_state_pool, address, 2 * _1Gb);
127 
128    if (device->indirect_descriptors) {
129       /* With indirect descriptors, descriptor buffers can go anywhere, they
130        * just need to be in a 4Gb aligned range, so all shader accesses can
131        * use a relocatable upper dword for the 64bit address.
132        */
133       address = align64(address, _4Gb);
134       address = va_add(&device->va.indirect_descriptor_pool, address, 3 * _1Gb);
135       address = va_add(&device->va.indirect_push_descriptor_pool, address, _1Gb);
136    }
137 
138    /* We use a trick to compute constant data offsets in the shaders to avoid
139     * unnecessary 64bit address computations (see lower_load_constant() in
140     * anv_nir_apply_pipeline_layout.c). This assumes the instruction pool is
141     * located at an address with the lower 32bits at 0.
142     */
143    address = align64(address, _4Gb);
144    address = va_add(&device->va.instruction_state_pool, address, 2 * _1Gb);
145 
146    /* What's left to do for us is to set va.high_heap and va.trtt without
147     * overlap, but there are a few things to be considered:
148     *
149     * The TR-TT address space is governed by the GFX_TRTT_VA_RANGE register,
150     * which carves out part of the address space for TR-TT and is independent
151     * of device->gtt_size. We use 47:44 for gen9+, the values we set here
152     * should be in sync with what we write to the register.
153     *
154     * If we ever gain the capability to use more than 48 bits of address space
155     * we'll have to adjust where we put the TR-TT space (and how we set
156     * GFX_TRTT_VA_RANGE).
157     *
158     * We have to leave the last 4GiB out of the high vma range, so that no
159     * state base address + size can overflow 48 bits. For more information see
160     * the comment about Wa32bitGeneralStateOffset in anv_allocator.c
161     *
162     * Despite the comment above, before we had TR-TT we were not only avoiding
163     * the last 4GiB of the 48bit address space, but also avoiding the last
164     * 4GiB from gtt_size, so let's be on the safe side and do the 4GiB
165     * avoiding for both the TR-TT space top and the gtt top.
166     */
167    assert(device->gtt_size <= (1uLL << 48));
168    uint64_t trtt_start = 0xFuLL << 44;
169    uint64_t trtt_end = (1uLL << 48) - 4 * _1Gb;
170    uint64_t addressable_top = MIN2(device->gtt_size, trtt_start) - 4 * _1Gb;
171 
172    uint64_t user_heaps_size = addressable_top - address;
173    address = va_add(&device->va.high_heap, address, user_heaps_size);
174    assert(address <= trtt_start);
175    address = va_add(&device->va.trtt, trtt_start, trtt_end - trtt_start);
176 
177    if (INTEL_DEBUG(DEBUG_HEAPS))
178       anv_device_print_vas(device);
179 }
180