• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016-2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <errno.h>
25 #include <string.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #include <sys/mman.h>
29 
30 #include "aub_mem.h"
31 #include "util/anon_file.h"
32 
33 struct bo_map {
34    struct list_head link;
35    struct intel_batch_decode_bo bo;
36    bool unmap_after_use;
37    bool ppgtt;
38 };
39 
40 struct ggtt_entry {
41    struct rb_node node;
42    uint64_t virt_addr;
43    uint64_t phys_addr;
44 };
45 
46 struct phys_mem {
47    struct rb_node node;
48    uint64_t fd_offset;
49    uint64_t phys_addr;
50    uint8_t *data;
51    const uint8_t *aub_data;
52 };
53 
54 static void
add_gtt_bo_map(struct aub_mem * mem,struct intel_batch_decode_bo bo,bool ppgtt,bool unmap_after_use)55 add_gtt_bo_map(struct aub_mem *mem, struct intel_batch_decode_bo bo, bool ppgtt, bool unmap_after_use)
56 {
57    struct bo_map *m = calloc(1, sizeof(*m));
58 
59    m->ppgtt = ppgtt;
60    m->bo = bo;
61    m->unmap_after_use = unmap_after_use;
62    list_add(&m->link, &mem->maps);
63 }
64 
65 void
aub_mem_clear_bo_maps(struct aub_mem * mem)66 aub_mem_clear_bo_maps(struct aub_mem *mem)
67 {
68    list_for_each_entry_safe(struct bo_map, i, &mem->maps, link) {
69       if (i->unmap_after_use)
70          munmap((void *)i->bo.map, i->bo.size);
71       list_del(&i->link);
72       free(i);
73    }
74 }
75 
76 static inline struct ggtt_entry *
ggtt_entry_next(struct ggtt_entry * entry)77 ggtt_entry_next(struct ggtt_entry *entry)
78 {
79    if (!entry)
80       return NULL;
81    struct rb_node *node = rb_node_next(&entry->node);
82    if (!node)
83       return NULL;
84    return rb_node_data(struct ggtt_entry, node, node);
85 }
86 
87 static inline int
cmp_uint64(uint64_t a,uint64_t b)88 cmp_uint64(uint64_t a, uint64_t b)
89 {
90    if (a < b)
91       return 1;
92    if (a > b)
93       return -1;
94    return 0;
95 }
96 
97 static inline int
cmp_ggtt_entry(const struct rb_node * node,const void * addr)98 cmp_ggtt_entry(const struct rb_node *node, const void *addr)
99 {
100    struct ggtt_entry *entry = rb_node_data(struct ggtt_entry, node, node);
101    return cmp_uint64(entry->virt_addr, *(const uint64_t *)addr);
102 }
103 
104 static struct ggtt_entry *
ensure_ggtt_entry(struct aub_mem * mem,uint64_t virt_addr)105 ensure_ggtt_entry(struct aub_mem *mem, uint64_t virt_addr)
106 {
107    struct rb_node *node = rb_tree_search_sloppy(&mem->ggtt, &virt_addr,
108                                                 cmp_ggtt_entry);
109    int cmp = 0;
110    if (!node || (cmp = cmp_ggtt_entry(node, &virt_addr))) {
111       struct ggtt_entry *new_entry = calloc(1, sizeof(*new_entry));
112       new_entry->virt_addr = virt_addr;
113       rb_tree_insert_at(&mem->ggtt, node, &new_entry->node, cmp < 0);
114       node = &new_entry->node;
115    }
116 
117    return rb_node_data(struct ggtt_entry, node, node);
118 }
119 
120 static struct ggtt_entry *
search_ggtt_entry(struct aub_mem * mem,uint64_t virt_addr)121 search_ggtt_entry(struct aub_mem *mem, uint64_t virt_addr)
122 {
123    virt_addr &= ~0xfff;
124 
125    struct rb_node *node = rb_tree_search(&mem->ggtt, &virt_addr, cmp_ggtt_entry);
126 
127    if (!node)
128       return NULL;
129 
130    return rb_node_data(struct ggtt_entry, node, node);
131 }
132 
133 static inline int
cmp_phys_mem(const struct rb_node * node,const void * addr)134 cmp_phys_mem(const struct rb_node *node, const void *addr)
135 {
136    struct phys_mem *mem = rb_node_data(struct phys_mem, node, node);
137    return cmp_uint64(mem->phys_addr, *(uint64_t *)addr);
138 }
139 
140 static void
check_mmap_result(const void * res)141 check_mmap_result(const void *res)
142 {
143    if (res != MAP_FAILED)
144       return;
145 
146    if (errno == ENOMEM) {
147       fprintf(stderr,
148             "Not enough memory available or maximum number of mappings reached. "
149             "Consider increasing sysctl vm.max_map_count.\n");
150    } else {
151       perror("mmap");
152    }
153 
154    abort();
155 }
156 
157 static struct phys_mem *
ensure_phys_mem(struct aub_mem * mem,uint64_t phys_addr)158 ensure_phys_mem(struct aub_mem *mem, uint64_t phys_addr)
159 {
160    struct rb_node *node = rb_tree_search_sloppy(&mem->mem, &phys_addr, cmp_phys_mem);
161    int cmp = 0;
162    if (!node || (cmp = cmp_phys_mem(node, &phys_addr))) {
163       struct phys_mem *new_mem = calloc(1, sizeof(*new_mem));
164       new_mem->phys_addr = phys_addr;
165       new_mem->fd_offset = mem->mem_fd_len;
166 
167       ASSERTED int ftruncate_res = ftruncate(mem->mem_fd, mem->mem_fd_len += 4096);
168       assert(ftruncate_res == 0);
169 
170       new_mem->data = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED,
171                            mem->mem_fd, new_mem->fd_offset);
172       check_mmap_result(new_mem->data);
173 
174       rb_tree_insert_at(&mem->mem, node, &new_mem->node, cmp < 0);
175       node = &new_mem->node;
176    }
177 
178    return rb_node_data(struct phys_mem, node, node);
179 }
180 
181 static struct phys_mem *
search_phys_mem(struct aub_mem * mem,uint64_t phys_addr)182 search_phys_mem(struct aub_mem *mem, uint64_t phys_addr)
183 {
184    phys_addr &= ~0xfff;
185 
186    struct rb_node *node = rb_tree_search(&mem->mem, &phys_addr, cmp_phys_mem);
187 
188    if (!node)
189       return NULL;
190 
191    return rb_node_data(struct phys_mem, node, node);
192 }
193 
194 void
aub_mem_local_write(void * _mem,uint64_t address,const void * data,uint32_t size)195 aub_mem_local_write(void *_mem, uint64_t address,
196                     const void *data, uint32_t size)
197 {
198    struct aub_mem *mem = _mem;
199    struct intel_batch_decode_bo bo = {
200       .map = data,
201       .addr = address,
202       .size = size,
203    };
204    add_gtt_bo_map(mem, bo, false, false);
205 }
206 
207 void
aub_mem_ggtt_entry_write(void * _mem,uint64_t address,const void * _data,uint32_t _size)208 aub_mem_ggtt_entry_write(void *_mem, uint64_t address,
209                          const void *_data, uint32_t _size)
210 {
211    struct aub_mem *mem = _mem;
212    uint64_t virt_addr = (address / sizeof(uint64_t)) << 12;
213    const uint64_t *data = _data;
214    size_t size = _size / sizeof(*data);
215    for (const uint64_t *entry = data;
216         entry < data + size;
217         entry++, virt_addr += 4096) {
218       struct ggtt_entry *pt = ensure_ggtt_entry(mem, virt_addr);
219       pt->phys_addr = *entry;
220    }
221 }
222 
223 void
aub_mem_phys_write(void * _mem,uint64_t phys_address,const void * data,uint32_t size)224 aub_mem_phys_write(void *_mem, uint64_t phys_address,
225                    const void *data, uint32_t size)
226 {
227    struct aub_mem *mem = _mem;
228    uint32_t to_write = size;
229    for (uint64_t page = phys_address & ~0xfff; page < phys_address + size; page += 4096) {
230       struct phys_mem *pmem = ensure_phys_mem(mem, page);
231       uint64_t offset = MAX2(page, phys_address) - page;
232       uint32_t size_this_page = MIN2(to_write, 4096 - offset);
233       to_write -= size_this_page;
234       memcpy(pmem->data + offset, data, size_this_page);
235       pmem->aub_data = data - offset;
236       data = (const uint8_t *)data + size_this_page;
237    }
238 }
239 
240 void
aub_mem_ggtt_write(void * _mem,uint64_t virt_address,const void * data,uint32_t size)241 aub_mem_ggtt_write(void *_mem, uint64_t virt_address,
242                    const void *data, uint32_t size)
243 {
244    struct aub_mem *mem = _mem;
245    uint32_t to_write = size;
246    for (uint64_t page = virt_address & ~0xfff; page < virt_address + size; page += 4096) {
247       struct ggtt_entry *entry = search_ggtt_entry(mem, page);
248       assert(entry && entry->phys_addr & 0x1);
249 
250       uint64_t offset = MAX2(page, virt_address) - page;
251       uint32_t size_this_page = MIN2(to_write, 4096 - offset);
252       to_write -= size_this_page;
253 
254       uint64_t phys_page = entry->phys_addr & ~0xfff; /* Clear the validity bits. */
255       aub_mem_phys_write(mem, phys_page + offset, data, size_this_page);
256       data = (const uint8_t *)data + size_this_page;
257    }
258 }
259 
260 struct intel_batch_decode_bo
aub_mem_get_ggtt_bo(void * _mem,uint64_t address)261 aub_mem_get_ggtt_bo(void *_mem, uint64_t address)
262 {
263    struct aub_mem *mem = _mem;
264    struct intel_batch_decode_bo bo = {0};
265 
266    list_for_each_entry(struct bo_map, i, &mem->maps, link)
267       if (!i->ppgtt && i->bo.addr <= address && i->bo.addr + i->bo.size > address)
268          return i->bo;
269 
270    address &= ~0xfff;
271 
272    struct ggtt_entry *start =
273       (struct ggtt_entry *)rb_tree_search_sloppy(&mem->ggtt, &address,
274                                                  cmp_ggtt_entry);
275    if (start && start->virt_addr < address)
276       start = ggtt_entry_next(start);
277    if (!start)
278       return bo;
279 
280    struct ggtt_entry *last = start;
281    for (struct ggtt_entry *i = ggtt_entry_next(last);
282         i && last->virt_addr + 4096 == i->virt_addr;
283         last = i, i = ggtt_entry_next(last))
284       ;
285 
286    bo.addr = MIN2(address, start->virt_addr);
287    bo.size = last->virt_addr - bo.addr + 4096;
288    bo.map = mmap(NULL, bo.size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
289    check_mmap_result(bo.map);
290 
291    for (struct ggtt_entry *i = start;
292         i;
293         i = i == last ? NULL : ggtt_entry_next(i)) {
294       uint64_t phys_addr = i->phys_addr & ~0xfff;
295       struct phys_mem *phys_mem = search_phys_mem(mem, phys_addr);
296 
297       if (!phys_mem)
298          continue;
299 
300       uint32_t map_offset = i->virt_addr - address;
301       void *res = mmap((uint8_t *)bo.map + map_offset, 4096, PROT_READ,
302                   MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset);
303       check_mmap_result(res);
304    }
305 
306    add_gtt_bo_map(mem, bo, false, true);
307 
308    return bo;
309 }
310 
311 static struct phys_mem *
ppgtt_walk(struct aub_mem * mem,uint64_t pml4,uint64_t address)312 ppgtt_walk(struct aub_mem *mem, uint64_t pml4, uint64_t address)
313 {
314    uint64_t shift = 39;
315    uint64_t addr = pml4;
316    for (int level = 4; level > 0; level--) {
317       struct phys_mem *table = search_phys_mem(mem, addr);
318       if (!table)
319          return NULL;
320       int index = (address >> shift) & 0x1ff;
321       uint64_t entry = ((uint64_t *)table->data)[index];
322       if (!(entry & 1))
323          return NULL;
324       addr = entry & ~0xfff;
325       shift -= 9;
326    }
327    return search_phys_mem(mem, addr);
328 }
329 
330 static bool
ppgtt_mapped(struct aub_mem * mem,uint64_t pml4,uint64_t address)331 ppgtt_mapped(struct aub_mem *mem, uint64_t pml4, uint64_t address)
332 {
333    return ppgtt_walk(mem, pml4, address) != NULL;
334 }
335 
336 struct intel_batch_decode_bo
aub_mem_get_ppgtt_bo(void * _mem,uint64_t address)337 aub_mem_get_ppgtt_bo(void *_mem, uint64_t address)
338 {
339    struct aub_mem *mem = _mem;
340    struct intel_batch_decode_bo bo = {0};
341 
342    list_for_each_entry(struct bo_map, i, &mem->maps, link)
343       if (i->ppgtt && i->bo.addr <= address && i->bo.addr + i->bo.size > address)
344          return i->bo;
345 
346    address &= ~0xfff;
347 
348    if (!ppgtt_mapped(mem, mem->pml4, address))
349       return bo;
350 
351    /* Map everything until the first gap since we don't know how much the
352     * decoder actually needs.
353     */
354    uint64_t end = address;
355    while (ppgtt_mapped(mem, mem->pml4, end))
356       end += 4096;
357 
358    bo.addr = address;
359    bo.size = end - address;
360    bo.map = mmap(NULL, bo.size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
361    assert(bo.map != MAP_FAILED);
362 
363    for (uint64_t page = address; page < end; page += 4096) {
364       struct phys_mem *phys_mem = ppgtt_walk(mem, mem->pml4, page);
365 
366       void *res = mmap((uint8_t *)bo.map + (page - bo.addr), 4096, PROT_READ,
367                   MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset);
368       check_mmap_result(res);
369    }
370 
371    add_gtt_bo_map(mem, bo, true, true);
372 
373    return bo;
374 }
375 
376 bool
aub_mem_init(struct aub_mem * mem)377 aub_mem_init(struct aub_mem *mem)
378 {
379    memset(mem, 0, sizeof(*mem));
380 
381    list_inithead(&mem->maps);
382 
383    mem->mem_fd = os_create_anonymous_file(0, "phys memory");
384 
385    return mem->mem_fd != -1;
386 }
387 
388 void
aub_mem_fini(struct aub_mem * mem)389 aub_mem_fini(struct aub_mem *mem)
390 {
391    if (mem->mem_fd == -1)
392       return;
393 
394    aub_mem_clear_bo_maps(mem);
395 
396 
397    rb_tree_foreach_safe(struct ggtt_entry, entry, &mem->ggtt, node) {
398       rb_tree_remove(&mem->ggtt, &entry->node);
399       free(entry);
400    }
401    rb_tree_foreach_safe(struct phys_mem, entry, &mem->mem, node) {
402       rb_tree_remove(&mem->mem, &entry->node);
403       free(entry);
404    }
405 
406    close(mem->mem_fd);
407    mem->mem_fd = -1;
408 }
409 
410 struct intel_batch_decode_bo
aub_mem_get_phys_addr_data(struct aub_mem * mem,uint64_t phys_addr)411 aub_mem_get_phys_addr_data(struct aub_mem *mem, uint64_t phys_addr)
412 {
413    struct phys_mem *page = search_phys_mem(mem, phys_addr);
414    return page ?
415       (struct intel_batch_decode_bo) { .map = page->data, .addr = page->phys_addr, .size = 4096 } :
416       (struct intel_batch_decode_bo) {};
417 }
418 
419 struct intel_batch_decode_bo
aub_mem_get_ppgtt_addr_data(struct aub_mem * mem,uint64_t virt_addr)420 aub_mem_get_ppgtt_addr_data(struct aub_mem *mem, uint64_t virt_addr)
421 {
422    struct phys_mem *page = ppgtt_walk(mem, mem->pml4, virt_addr);
423    return page ?
424       (struct intel_batch_decode_bo) { .map = page->data, .addr = virt_addr & ~((1ULL << 12) - 1), .size = 4096 } :
425       (struct intel_batch_decode_bo) {};
426 }
427 
428 struct intel_batch_decode_bo
aub_mem_get_ppgtt_addr_aub_data(struct aub_mem * mem,uint64_t virt_addr)429 aub_mem_get_ppgtt_addr_aub_data(struct aub_mem *mem, uint64_t virt_addr)
430 {
431    struct phys_mem *page = ppgtt_walk(mem, mem->pml4, virt_addr);
432    return page ?
433       (struct intel_batch_decode_bo) { .map = page->aub_data, .addr = virt_addr & ~((1ULL << 12) - 1), .size = 4096 } :
434       (struct intel_batch_decode_bo) {};
435 }
436