• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014-2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "ac_rtld.h"
25 
26 #include "ac_binary.h"
27 #include "ac_gpu_info.h"
28 #include "util/compiler.h"
29 #include "util/u_dynarray.h"
30 #include "util/u_math.h"
31 
32 #include <gelf.h>
33 #include <libelf.h>
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 
39 #ifndef EM_AMDGPU
40 // Old distributions may not have this enum constant
41 #define EM_AMDGPU 224
42 #endif
43 
44 #ifndef STT_AMDGPU_LDS
45 #define STT_AMDGPU_LDS 13 // this is deprecated -- remove
46 #endif
47 
48 #ifndef SHN_AMDGPU_LDS
49 #define SHN_AMDGPU_LDS 0xff00
50 #endif
51 
52 #ifndef R_AMDGPU_NONE
53 #define R_AMDGPU_NONE          0
54 #define R_AMDGPU_ABS32_LO      1
55 #define R_AMDGPU_ABS32_HI      2
56 #define R_AMDGPU_ABS64         3
57 #define R_AMDGPU_REL32         4
58 #define R_AMDGPU_REL64         5
59 #define R_AMDGPU_ABS32         6
60 #define R_AMDGPU_GOTPCREL      7
61 #define R_AMDGPU_GOTPCREL32_LO 8
62 #define R_AMDGPU_GOTPCREL32_HI 9
63 #define R_AMDGPU_REL32_LO      10
64 #define R_AMDGPU_REL32_HI      11
65 #define R_AMDGPU_RELATIVE64    13
66 #endif
67 
68 /* For the UMR disassembler. */
69 #define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
70 #define DEBUGGER_NUM_MARKERS        5
71 
72 struct ac_rtld_section {
73    bool is_rx : 1;
74    bool is_pasted_text : 1;
75    uint64_t offset;
76    const char *name;
77 };
78 
79 struct ac_rtld_part {
80    Elf *elf;
81    struct ac_rtld_section *sections;
82    unsigned num_sections;
83 };
84 
report_errorvf(const char * fmt,va_list va)85 static void report_errorvf(const char *fmt, va_list va)
86 {
87    fprintf(stderr, "ac_rtld error: ");
88 
89    vfprintf(stderr, fmt, va);
90 
91    fprintf(stderr, "\n");
92 }
93 
94 static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
95 
report_errorf(const char * fmt,...)96 static void report_errorf(const char *fmt, ...)
97 {
98    va_list va;
99    va_start(va, fmt);
100    report_errorvf(fmt, va);
101    va_end(va);
102 }
103 
104 static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
105 
report_elf_errorf(const char * fmt,...)106 static void report_elf_errorf(const char *fmt, ...)
107 {
108    va_list va;
109    va_start(va, fmt);
110    report_errorvf(fmt, va);
111    va_end(va);
112 
113    fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
114 }
115 
116 /**
117  * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
118  * \p part_idx.
119  */
find_symbol(const struct util_dynarray * symbols,const char * name,unsigned part_idx)120 static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
121                                                 const char *name, unsigned part_idx)
122 {
123    util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) {
124       if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name))
125          return symbol;
126    }
127    return NULL;
128 }
129 
compare_symbol_by_align(const void * lhsp,const void * rhsp)130 static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
131 {
132    const struct ac_rtld_symbol *lhs = lhsp;
133    const struct ac_rtld_symbol *rhs = rhsp;
134    if (rhs->align > lhs->align)
135       return 1;
136    if (rhs->align < lhs->align)
137       return -1;
138    return 0;
139 }
140 
141 /**
142  * Sort the given symbol list by decreasing alignment and assign offsets.
143  */
layout_symbols(struct ac_rtld_symbol * symbols,unsigned num_symbols,uint64_t * ptotal_size)144 static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
145                            uint64_t *ptotal_size)
146 {
147    qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
148 
149    uint64_t total_size = *ptotal_size;
150 
151    for (unsigned i = 0; i < num_symbols; ++i) {
152       struct ac_rtld_symbol *s = &symbols[i];
153       assert(util_is_power_of_two_nonzero(s->align));
154 
155       total_size = align64(total_size, s->align);
156       s->offset = total_size;
157 
158       if (total_size + s->size < total_size) {
159          report_errorf("%s: size overflow", __FUNCTION__);
160          return false;
161       }
162 
163       total_size += s->size;
164    }
165 
166    *ptotal_size = total_size;
167    return true;
168 }
169 
170 /**
171  * Read LDS symbols from the given \p section of the ELF of \p part and append
172  * them to the LDS symbols list.
173  *
174  * Shared LDS symbols are filtered out.
175  */
read_private_lds_symbols(struct ac_rtld_binary * binary,unsigned part_idx,Elf_Scn * section,uint32_t * lds_end_align)176 static bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx,
177                                      Elf_Scn *section, uint32_t *lds_end_align)
178 {
179 #define report_if(cond)                                                                            \
180    do {                                                                                            \
181       if ((cond)) {                                                                                \
182          report_errorf(#cond);                                                                     \
183          return false;                                                                             \
184       }                                                                                            \
185    } while (false)
186 #define report_elf_if(cond)                                                                        \
187    do {                                                                                            \
188       if ((cond)) {                                                                                \
189          report_elf_errorf(#cond);                                                                 \
190          return false;                                                                             \
191       }                                                                                            \
192    } while (false)
193 
194    struct ac_rtld_part *part = &binary->parts[part_idx];
195    Elf64_Shdr *shdr = elf64_getshdr(section);
196    uint32_t strtabidx = shdr->sh_link;
197    Elf_Data *symbols_data = elf_getdata(section, NULL);
198    report_elf_if(!symbols_data);
199 
200    const Elf64_Sym *symbol = symbols_data->d_buf;
201    size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
202 
203    for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
204       struct ac_rtld_symbol s = {0};
205 
206       if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
207          /* old-style LDS symbols from initial prototype -- remove eventually */
208          s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
209       } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
210          s.align = MIN2(symbol->st_value, 1u << 16);
211          report_if(!util_is_power_of_two_nonzero(s.align));
212       } else
213          continue;
214 
215       report_if(symbol->st_size > 1u << 29);
216 
217       s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
218       s.size = symbol->st_size;
219       s.part_idx = part_idx;
220 
221       if (!strcmp(s.name, "__lds_end")) {
222          report_elf_if(s.size != 0);
223          *lds_end_align = MAX2(*lds_end_align, s.align);
224          continue;
225       }
226 
227       const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx);
228       if (shared) {
229          report_elf_if(s.align > shared->align);
230          report_elf_if(s.size > shared->size);
231          continue;
232       }
233 
234       util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
235    }
236 
237    return true;
238 
239 #undef report_if
240 #undef report_elf_if
241 }
242 
243 /**
244  * Open a binary consisting of one or more shader parts.
245  *
246  * \param binary the uninitialized struct
247  * \param i binary opening parameters
248  */
ac_rtld_open(struct ac_rtld_binary * binary,struct ac_rtld_open_info i)249 bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
250 {
251    /* One of the libelf implementations
252     * (http://www.mr511.de/software/english.htm) requires calling
253     * elf_version() before elf_memory().
254     */
255    elf_version(EV_CURRENT);
256 
257    memset(binary, 0, sizeof(*binary));
258    memcpy(&binary->options, &i.options, sizeof(binary->options));
259    binary->wave_size = i.wave_size;
260    binary->gfx_level = i.info->gfx_level;
261    binary->num_parts = i.num_parts;
262    binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
263    if (!binary->parts)
264       return false;
265 
266    uint64_t pasted_text_size = 0;
267    uint64_t rx_align = 1;
268    uint64_t rx_size = 0;
269    uint64_t exec_size = 0;
270 
271 #define report_if(cond)                                                                            \
272    do {                                                                                            \
273       if ((cond)) {                                                                                \
274          report_errorf(#cond);                                                                     \
275          goto fail;                                                                                \
276       }                                                                                            \
277    } while (false)
278 #define report_elf_if(cond)                                                                        \
279    do {                                                                                            \
280       if ((cond)) {                                                                                \
281          report_elf_errorf(#cond);                                                                 \
282          goto fail;                                                                                \
283       }                                                                                            \
284    } while (false)
285 
286    /* Copy and layout shared LDS symbols. */
287    if (i.num_shared_lds_symbols) {
288       if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
289                                 i.num_shared_lds_symbols))
290          goto fail;
291 
292       memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
293    }
294 
295    util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol)
296       symbol->part_idx = ~0u;
297 
298    unsigned max_lds_size = 64 * 1024;
299 
300    if (i.info->gfx_level == GFX6 ||
301        (i.shader_type != MESA_SHADER_COMPUTE && i.shader_type != MESA_SHADER_FRAGMENT))
302       max_lds_size = 32 * 1024;
303 
304    uint64_t shared_lds_size = 0;
305    if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
306       goto fail;
307 
308    if (shared_lds_size > max_lds_size) {
309       fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
310               (unsigned)shared_lds_size, max_lds_size);
311       goto fail;
312    }
313    binary->lds_size = shared_lds_size;
314 
315    /* First pass over all parts: open ELFs, pre-determine the placement of
316     * sections in the memory image, and collect and layout private LDS symbols. */
317    uint32_t lds_end_align = 0;
318 
319    if (binary->options.halt_at_entry)
320       pasted_text_size += 4;
321 
322    for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
323       struct ac_rtld_part *part = &binary->parts[part_idx];
324       unsigned part_lds_symbols_begin =
325          util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
326 
327       part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
328       report_elf_if(!part->elf);
329 
330       const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
331       report_elf_if(!ehdr);
332       report_if(ehdr->e_machine != EM_AMDGPU);
333 
334       size_t section_str_index;
335       size_t num_shdrs;
336       report_elf_if(elf_getshdrstrndx(part->elf, &section_str_index) < 0);
337       report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
338 
339       part->num_sections = num_shdrs;
340       part->sections = calloc(sizeof(*part->sections), num_shdrs);
341       report_if(!part->sections);
342 
343       Elf_Scn *section = NULL;
344       while ((section = elf_nextscn(part->elf, section))) {
345          Elf64_Shdr *shdr = elf64_getshdr(section);
346          struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
347          s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
348          report_elf_if(!s->name);
349 
350          /* Cannot actually handle linked objects yet */
351          report_elf_if(shdr->sh_addr != 0);
352 
353          /* Alignment must be 0 or a power of two */
354          report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
355          uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
356 
357          if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) {
358             report_if(shdr->sh_flags & SHF_WRITE);
359 
360             s->is_rx = true;
361 
362             if (shdr->sh_flags & SHF_EXECINSTR) {
363                report_elf_if(shdr->sh_size & 3);
364 
365                if (!strcmp(s->name, ".text"))
366                   s->is_pasted_text = true;
367 
368                exec_size += shdr->sh_size;
369             }
370 
371             if (s->is_pasted_text) {
372                s->offset = pasted_text_size;
373                pasted_text_size += shdr->sh_size;
374             } else {
375                rx_align = align(rx_align, sh_align);
376                rx_size = align(rx_size, sh_align);
377                s->offset = rx_size;
378                rx_size += shdr->sh_size;
379             }
380          } else if (shdr->sh_type == SHT_SYMTAB) {
381             if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
382                goto fail;
383          }
384       }
385 
386       uint64_t part_lds_size = shared_lds_size;
387       if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol,
388                                                 part_lds_symbols_begin),
389                           util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) -
390                              part_lds_symbols_begin,
391                           &part_lds_size))
392          goto fail;
393       binary->lds_size = MAX2(binary->lds_size, part_lds_size);
394    }
395 
396    binary->rx_end_markers = pasted_text_size;
397    pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
398 
399    /* __lds_end is a special symbol that points at the end of the memory
400     * occupied by other LDS symbols. Its alignment is taken as the
401     * maximum of its alignment over all shader parts where it occurs.
402     */
403    if (lds_end_align) {
404       binary->lds_size = align(binary->lds_size, lds_end_align);
405 
406       struct ac_rtld_symbol *lds_end =
407          util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
408       lds_end->name = "__lds_end";
409       lds_end->size = 0;
410       lds_end->align = lds_end_align;
411       lds_end->offset = binary->lds_size;
412       lds_end->part_idx = ~0u;
413    }
414 
415    if (binary->lds_size > max_lds_size) {
416       fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
417               (unsigned)binary->lds_size, max_lds_size);
418       goto fail;
419    }
420 
421    /* Second pass: Adjust offsets of non-pasted text sections. */
422    binary->rx_size = pasted_text_size;
423    binary->rx_size = align(binary->rx_size, rx_align);
424 
425    for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
426       struct ac_rtld_part *part = &binary->parts[part_idx];
427       size_t num_shdrs;
428       elf_getshdrnum(part->elf, &num_shdrs);
429 
430       for (unsigned j = 0; j < num_shdrs; ++j) {
431          struct ac_rtld_section *s = &part->sections[j];
432          if (s->is_rx && !s->is_pasted_text)
433             s->offset += binary->rx_size;
434       }
435    }
436 
437    binary->rx_size += rx_size;
438    binary->exec_size = exec_size;
439 
440    /* The SQ fetches up to N cache lines of 16 dwords
441     * ahead of the PC, configurable by SH_MEM_CONFIG and
442     * S_INST_PREFETCH. This can cause two issues:
443     *
444     * (1) Crossing a page boundary to an unmapped page. The logic
445     *     does not distinguish between a required fetch and a "mere"
446     *     prefetch and will fault.
447     *
448     * (2) Prefetching instructions that will be changed for a
449     *     different shader.
450     *
451     * (2) is not currently an issue because we flush the I$ at IB
452     * boundaries, but (1) needs to be addressed. Due to buffer
453     * suballocation, we just play it safe.
454     */
455    unsigned prefetch_distance = 0;
456 
457    if (!i.info->has_graphics && i.info->family >= CHIP_ALDEBARAN)
458       prefetch_distance = 16;
459    else if (i.info->gfx_level >= GFX10)
460       prefetch_distance = 3;
461 
462    if (prefetch_distance) {
463       if (i.info->gfx_level >= GFX11)
464          binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 128);
465       else
466          binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 64);
467    }
468 
469    return true;
470 
471 #undef report_if
472 #undef report_elf_if
473 
474 fail:
475    ac_rtld_close(binary);
476    return false;
477 }
478 
ac_rtld_close(struct ac_rtld_binary * binary)479 void ac_rtld_close(struct ac_rtld_binary *binary)
480 {
481    for (unsigned i = 0; i < binary->num_parts; ++i) {
482       struct ac_rtld_part *part = &binary->parts[i];
483       free(part->sections);
484       elf_end(part->elf);
485    }
486 
487    util_dynarray_fini(&binary->lds_symbols);
488    free(binary->parts);
489    binary->parts = NULL;
490    binary->num_parts = 0;
491 }
492 
get_section_by_name(struct ac_rtld_part * part,const char * name,const char ** data,size_t * nbytes)493 static bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data,
494                                 size_t *nbytes)
495 {
496    for (unsigned i = 0; i < part->num_sections; ++i) {
497       struct ac_rtld_section *s = &part->sections[i];
498       if (s->name && !strcmp(name, s->name)) {
499          Elf_Scn *target_scn = elf_getscn(part->elf, i);
500          Elf_Data *target_data = elf_getdata(target_scn, NULL);
501          if (!target_data) {
502             report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
503             return false;
504          }
505 
506          *data = target_data->d_buf;
507          *nbytes = target_data->d_size;
508          return true;
509       }
510    }
511    return false;
512 }
513 
ac_rtld_get_section_by_name(struct ac_rtld_binary * binary,const char * name,const char ** data,size_t * nbytes)514 bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
515                                  size_t *nbytes)
516 {
517    assert(binary->num_parts == 1);
518    return get_section_by_name(&binary->parts[0], name, data, nbytes);
519 }
520 
ac_rtld_read_config(const struct radeon_info * info,struct ac_rtld_binary * binary,struct ac_shader_config * config)521 bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
522                          struct ac_shader_config *config)
523 {
524    for (unsigned i = 0; i < binary->num_parts; ++i) {
525       struct ac_rtld_part *part = &binary->parts[i];
526       const char *config_data;
527       size_t config_nbytes;
528 
529       if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes))
530          return false;
531 
532       /* TODO: be precise about scratch use? */
533       struct ac_shader_config c = {0};
534       ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, info, &c);
535 
536       config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
537       config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
538       config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
539       config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
540       config->scratch_bytes_per_wave =
541          MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave);
542 
543       assert(i == 0 || config->float_mode == c.float_mode);
544       config->float_mode = c.float_mode;
545 
546       /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
547        * the main shader part is used. */
548       assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0);
549       config->spi_ps_input_ena = c.spi_ps_input_ena;
550       config->spi_ps_input_addr = c.spi_ps_input_addr;
551 
552       /* TODO: consistently use LDS symbols for this */
553       config->lds_size = MAX2(config->lds_size, c.lds_size);
554 
555       /* TODO: Should we combine these somehow? It's currently only
556        * used for radeonsi's compute, where multiple parts aren't used. */
557       assert(config->rsrc1 == 0 && config->rsrc2 == 0);
558       config->rsrc1 = c.rsrc1;
559       config->rsrc2 = c.rsrc2;
560    }
561 
562    return true;
563 }
564 
resolve_symbol(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Sym * sym,const char * name,uint64_t * value)565 static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx,
566                            const Elf64_Sym *sym, const char *name, uint64_t *value)
567 {
568    /* TODO: properly disentangle the undef and the LDS cases once
569     * STT_AMDGPU_LDS is retired. */
570    if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
571       const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx);
572 
573       if (lds_sym) {
574          *value = lds_sym->offset;
575          return true;
576       }
577 
578       /* TODO: resolve from other parts */
579 
580       if (u->get_external_symbol(u->binary->gfx_level, u->cb_data, name, value))
581          return true;
582 
583       report_errorf("symbol %s: unknown", name);
584       return false;
585    }
586 
587    struct ac_rtld_part *part = &u->binary->parts[part_idx];
588    if (sym->st_shndx >= part->num_sections) {
589       report_errorf("symbol %s: section out of bounds", name);
590       return false;
591    }
592 
593    struct ac_rtld_section *s = &part->sections[sym->st_shndx];
594    if (!s->is_rx) {
595       report_errorf("symbol %s: bad section", name);
596       return false;
597    }
598 
599    uint64_t section_base = u->rx_va + s->offset;
600 
601    *value = section_base + sym->st_value;
602    return true;
603 }
604 
apply_relocs(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Shdr * reloc_shdr,const Elf_Data * reloc_data)605 static bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx,
606                          const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data)
607 {
608 #define report_if(cond)                                                                            \
609    do {                                                                                            \
610       if ((cond)) {                                                                                \
611          report_errorf(#cond);                                                                     \
612          return false;                                                                             \
613       }                                                                                            \
614    } while (false)
615 #define report_elf_if(cond)                                                                        \
616    do {                                                                                            \
617       if ((cond)) {                                                                                \
618          report_elf_errorf(#cond);                                                                 \
619          return false;                                                                             \
620       }                                                                                            \
621    } while (false)
622 
623    struct ac_rtld_part *part = &u->binary->parts[part_idx];
624    Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
625    report_elf_if(!target_scn);
626 
627    Elf_Data *target_data = elf_getdata(target_scn, NULL);
628    report_elf_if(!target_data);
629 
630    Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
631    report_elf_if(!symbols_scn);
632 
633    Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
634    report_elf_if(!symbols_shdr);
635    uint32_t strtabidx = symbols_shdr->sh_link;
636 
637    Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
638    report_elf_if(!symbols_data);
639 
640    const Elf64_Sym *symbols = symbols_data->d_buf;
641    size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
642 
643    struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
644    report_if(!s->is_rx);
645 
646    const char *orig_base = target_data->d_buf;
647    char *dst_base = u->rx_ptr + s->offset;
648    uint64_t va_base = u->rx_va + s->offset;
649 
650    Elf64_Rel *rel = reloc_data->d_buf;
651    size_t num_relocs = reloc_data->d_size / sizeof(*rel);
652    for (size_t i = 0; i < num_relocs; ++i, ++rel) {
653       size_t r_sym = ELF64_R_SYM(rel->r_info);
654       unsigned r_type = ELF64_R_TYPE(rel->r_info);
655 
656       const char *orig_ptr = orig_base + rel->r_offset;
657       char *dst_ptr = dst_base + rel->r_offset;
658       uint64_t va = va_base + rel->r_offset;
659 
660       uint64_t symbol;
661       uint64_t addend;
662 
663       if (r_sym == STN_UNDEF) {
664          symbol = 0;
665       } else {
666          report_elf_if(r_sym >= num_symbols);
667 
668          const Elf64_Sym *sym = &symbols[r_sym];
669          const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name);
670          report_elf_if(!symbol_name);
671 
672          if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
673             return false;
674       }
675 
676       /* TODO: Should we also support .rela sections, where the
677        * addend is part of the relocation record? */
678 
679       /* Load the addend from the ELF instead of the destination,
680        * because the destination may be in VRAM. */
681       switch (r_type) {
682       case R_AMDGPU_ABS32:
683       case R_AMDGPU_ABS32_LO:
684       case R_AMDGPU_ABS32_HI:
685       case R_AMDGPU_REL32:
686       case R_AMDGPU_REL32_LO:
687       case R_AMDGPU_REL32_HI:
688          addend = *(const uint32_t *)orig_ptr;
689          break;
690       case R_AMDGPU_ABS64:
691       case R_AMDGPU_REL64:
692          addend = *(const uint64_t *)orig_ptr;
693          break;
694       default:
695          report_errorf("unsupported r_type == %u", r_type);
696          return false;
697       }
698 
699       uint64_t abs = symbol + addend;
700 
701       switch (r_type) {
702       case R_AMDGPU_ABS32:
703          assert((uint32_t)abs == abs);
704          FALLTHROUGH;
705       case R_AMDGPU_ABS32_LO:
706          *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
707          break;
708       case R_AMDGPU_ABS32_HI:
709          *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
710          break;
711       case R_AMDGPU_ABS64:
712          *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
713          break;
714       case R_AMDGPU_REL32:
715          assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
716          FALLTHROUGH;
717       case R_AMDGPU_REL32_LO:
718          *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
719          break;
720       case R_AMDGPU_REL32_HI:
721          *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
722          break;
723       case R_AMDGPU_REL64:
724          *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
725          break;
726       default:
727          unreachable("bad r_type");
728       }
729    }
730 
731    return true;
732 
733 #undef report_if
734 #undef report_elf_if
735 }
736 
737 /**
738  * Upload the binary or binaries to the provided GPU buffers, including
739  * relocations.
740  */
ac_rtld_upload(struct ac_rtld_upload_info * u)741 int ac_rtld_upload(struct ac_rtld_upload_info *u)
742 {
743 #define report_if(cond)                                                                            \
744    do {                                                                                            \
745       if ((cond)) {                                                                                \
746          report_errorf(#cond);                                                                     \
747          return -1;                                                                             \
748       }                                                                                            \
749    } while (false)
750 #define report_elf_if(cond)                                                                        \
751    do {                                                                                            \
752       if ((cond)) {                                                                                \
753          report_errorf(#cond);                                                                     \
754          return -1;                                                                             \
755       }                                                                                            \
756    } while (false)
757 
758    int size = 0;
759    if (u->binary->options.halt_at_entry) {
760       /* s_sethalt 1 */
761       *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
762    }
763 
764    /* First pass: upload raw section data and lay out private LDS symbols. */
765    for (unsigned i = 0; i < u->binary->num_parts; ++i) {
766       struct ac_rtld_part *part = &u->binary->parts[i];
767 
768       Elf_Scn *section = NULL;
769       while ((section = elf_nextscn(part->elf, section))) {
770          Elf64_Shdr *shdr = elf64_getshdr(section);
771          struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
772 
773          if (!s->is_rx)
774             continue;
775 
776          report_if(shdr->sh_type != SHT_PROGBITS);
777 
778          Elf_Data *data = elf_getdata(section, NULL);
779          report_elf_if(!data || data->d_size != shdr->sh_size);
780          memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
781 
782          size = MAX2(size, s->offset + shdr->sh_size);
783       }
784    }
785 
786    if (u->binary->rx_end_markers) {
787       uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
788       for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
789          *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
790       size += 4 * DEBUGGER_NUM_MARKERS;
791    }
792 
793    /* Second pass: handle relocations, overwriting uploaded data where
794     * appropriate. */
795    for (unsigned i = 0; i < u->binary->num_parts; ++i) {
796       struct ac_rtld_part *part = &u->binary->parts[i];
797       Elf_Scn *section = NULL;
798       while ((section = elf_nextscn(part->elf, section))) {
799          Elf64_Shdr *shdr = elf64_getshdr(section);
800          if (shdr->sh_type == SHT_REL) {
801             Elf_Data *relocs = elf_getdata(section, NULL);
802             report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
803             if (!apply_relocs(u, i, shdr, relocs))
804                return -1;
805          } else if (shdr->sh_type == SHT_RELA) {
806             report_errorf("SHT_RELA not supported");
807             return -1;
808          }
809       }
810    }
811 
812    return size;
813 
814 #undef report_if
815 #undef report_elf_if
816 }
817