• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014-2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "ac_rtld.h"
25 
26 #include "ac_binary.h"
27 #include "ac_gpu_info.h"
28 #include "util/u_dynarray.h"
29 #include "util/u_math.h"
30 
31 #include <gelf.h>
32 #include <libelf.h>
33 #include <stdarg.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 
38 // Old distributions may not have this enum constant
39 #define MY_EM_AMDGPU 224
40 
41 #ifndef STT_AMDGPU_LDS
42 #define STT_AMDGPU_LDS 13 // this is deprecated -- remove
43 #endif
44 
45 #ifndef SHN_AMDGPU_LDS
46 #define SHN_AMDGPU_LDS 0xff00
47 #endif
48 
49 #ifndef R_AMDGPU_NONE
50 #define R_AMDGPU_NONE          0
51 #define R_AMDGPU_ABS32_LO      1
52 #define R_AMDGPU_ABS32_HI      2
53 #define R_AMDGPU_ABS64         3
54 #define R_AMDGPU_REL32         4
55 #define R_AMDGPU_REL64         5
56 #define R_AMDGPU_ABS32         6
57 #define R_AMDGPU_GOTPCREL      7
58 #define R_AMDGPU_GOTPCREL32_LO 8
59 #define R_AMDGPU_GOTPCREL32_HI 9
60 #define R_AMDGPU_REL32_LO      10
61 #define R_AMDGPU_REL32_HI      11
62 #define R_AMDGPU_RELATIVE64    13
63 #endif
64 
65 /* For the UMR disassembler. */
66 #define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
67 #define DEBUGGER_NUM_MARKERS        5
68 
69 struct ac_rtld_section {
70    bool is_rx : 1;
71    bool is_pasted_text : 1;
72    uint64_t offset;
73    const char *name;
74 };
75 
76 struct ac_rtld_part {
77    Elf *elf;
78    struct ac_rtld_section *sections;
79    unsigned num_sections;
80 };
81 
report_erroraf(const char * fmt,va_list va)82 static void report_erroraf(const char *fmt, va_list va)
83 {
84    char *msg;
85    int ret = vasprintf(&msg, fmt, va);
86    if (ret < 0)
87       msg = "(vasprintf failed)";
88 
89    fprintf(stderr, "ac_rtld error: %s\n", msg);
90 
91    if (ret >= 0)
92       free(msg);
93 }
94 
95 static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
96 
report_errorf(const char * fmt,...)97 static void report_errorf(const char *fmt, ...)
98 {
99    va_list va;
100    va_start(va, fmt);
101    report_erroraf(fmt, va);
102    va_end(va);
103 }
104 
105 static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
106 
report_elf_errorf(const char * fmt,...)107 static void report_elf_errorf(const char *fmt, ...)
108 {
109    va_list va;
110    va_start(va, fmt);
111    report_erroraf(fmt, va);
112    va_end(va);
113 
114    fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
115 }
116 
117 /**
118  * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
119  * \p part_idx.
120  */
find_symbol(const struct util_dynarray * symbols,const char * name,unsigned part_idx)121 static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
122                                                 const char *name, unsigned part_idx)
123 {
124    util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) {
125       if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name))
126          return symbol;
127    }
128    return 0;
129 }
130 
compare_symbol_by_align(const void * lhsp,const void * rhsp)131 static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
132 {
133    const struct ac_rtld_symbol *lhs = lhsp;
134    const struct ac_rtld_symbol *rhs = rhsp;
135    if (rhs->align > lhs->align)
136       return 1;
137    if (rhs->align < lhs->align)
138       return -1;
139    return 0;
140 }
141 
142 /**
143  * Sort the given symbol list by decreasing alignment and assign offsets.
144  */
layout_symbols(struct ac_rtld_symbol * symbols,unsigned num_symbols,uint64_t * ptotal_size)145 static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
146                            uint64_t *ptotal_size)
147 {
148    qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
149 
150    uint64_t total_size = *ptotal_size;
151 
152    for (unsigned i = 0; i < num_symbols; ++i) {
153       struct ac_rtld_symbol *s = &symbols[i];
154       assert(util_is_power_of_two_nonzero(s->align));
155 
156       total_size = align64(total_size, s->align);
157       s->offset = total_size;
158 
159       if (total_size + s->size < total_size) {
160          report_errorf("%s: size overflow", __FUNCTION__);
161          return false;
162       }
163 
164       total_size += s->size;
165    }
166 
167    *ptotal_size = total_size;
168    return true;
169 }
170 
171 /**
172  * Read LDS symbols from the given \p section of the ELF of \p part and append
173  * them to the LDS symbols list.
174  *
175  * Shared LDS symbols are filtered out.
176  */
read_private_lds_symbols(struct ac_rtld_binary * binary,unsigned part_idx,Elf_Scn * section,uint32_t * lds_end_align)177 static bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx,
178                                      Elf_Scn *section, uint32_t *lds_end_align)
179 {
180 #define report_if(cond)                                                                            \
181    do {                                                                                            \
182       if ((cond)) {                                                                                \
183          report_errorf(#cond);                                                                     \
184          return false;                                                                             \
185       }                                                                                            \
186    } while (false)
187 #define report_elf_if(cond)                                                                        \
188    do {                                                                                            \
189       if ((cond)) {                                                                                \
190          report_elf_errorf(#cond);                                                                 \
191          return false;                                                                             \
192       }                                                                                            \
193    } while (false)
194 
195    struct ac_rtld_part *part = &binary->parts[part_idx];
196    Elf64_Shdr *shdr = elf64_getshdr(section);
197    uint32_t strtabidx = shdr->sh_link;
198    Elf_Data *symbols_data = elf_getdata(section, NULL);
199    report_elf_if(!symbols_data);
200 
201    const Elf64_Sym *symbol = symbols_data->d_buf;
202    size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
203 
204    for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
205       struct ac_rtld_symbol s = {0};
206 
207       if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
208          /* old-style LDS symbols from initial prototype -- remove eventually */
209          s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
210       } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
211          s.align = MIN2(symbol->st_value, 1u << 16);
212          report_if(!util_is_power_of_two_nonzero(s.align));
213       } else
214          continue;
215 
216       report_if(symbol->st_size > 1u << 29);
217 
218       s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
219       s.size = symbol->st_size;
220       s.part_idx = part_idx;
221 
222       if (!strcmp(s.name, "__lds_end")) {
223          report_elf_if(s.size != 0);
224          *lds_end_align = MAX2(*lds_end_align, s.align);
225          continue;
226       }
227 
228       const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx);
229       if (shared) {
230          report_elf_if(s.align > shared->align);
231          report_elf_if(s.size > shared->size);
232          continue;
233       }
234 
235       util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
236    }
237 
238    return true;
239 
240 #undef report_if
241 #undef report_elf_if
242 }
243 
244 /**
245  * Open a binary consisting of one or more shader parts.
246  *
247  * \param binary the uninitialized struct
248  * \param i binary opening parameters
249  */
ac_rtld_open(struct ac_rtld_binary * binary,struct ac_rtld_open_info i)250 bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
251 {
252    /* One of the libelf implementations
253     * (http://www.mr511.de/software/english.htm) requires calling
254     * elf_version() before elf_memory().
255     */
256    elf_version(EV_CURRENT);
257 
258    memset(binary, 0, sizeof(*binary));
259    memcpy(&binary->options, &i.options, sizeof(binary->options));
260    binary->wave_size = i.wave_size;
261    binary->num_parts = i.num_parts;
262    binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
263    if (!binary->parts)
264       return false;
265 
266    uint64_t pasted_text_size = 0;
267    uint64_t rx_align = 1;
268    uint64_t rx_size = 0;
269    uint64_t exec_size = 0;
270 
271 #define report_if(cond)                                                                            \
272    do {                                                                                            \
273       if ((cond)) {                                                                                \
274          report_errorf(#cond);                                                                     \
275          goto fail;                                                                                \
276       }                                                                                            \
277    } while (false)
278 #define report_elf_if(cond)                                                                        \
279    do {                                                                                            \
280       if ((cond)) {                                                                                \
281          report_elf_errorf(#cond);                                                                 \
282          goto fail;                                                                                \
283       }                                                                                            \
284    } while (false)
285 
286    /* Copy and layout shared LDS symbols. */
287    if (i.num_shared_lds_symbols) {
288       if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
289                                 i.num_shared_lds_symbols))
290          goto fail;
291 
292       memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
293    }
294 
295    util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol)
296       symbol->part_idx = ~0u;
297 
298    unsigned max_lds_size = 64 * 1024;
299 
300    if (i.info->chip_class == GFX6 ||
301        (i.shader_type != MESA_SHADER_COMPUTE && i.shader_type != MESA_SHADER_FRAGMENT))
302       max_lds_size = 32 * 1024;
303 
304    uint64_t shared_lds_size = 0;
305    if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
306       goto fail;
307 
308    if (shared_lds_size > max_lds_size) {
309       fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
310               (unsigned)shared_lds_size, max_lds_size);
311       goto fail;
312    }
313    binary->lds_size = shared_lds_size;
314 
315    /* First pass over all parts: open ELFs, pre-determine the placement of
316     * sections in the memory image, and collect and layout private LDS symbols. */
317    uint32_t lds_end_align = 0;
318 
319    if (binary->options.halt_at_entry)
320       pasted_text_size += 4;
321 
322    for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
323       struct ac_rtld_part *part = &binary->parts[part_idx];
324       unsigned part_lds_symbols_begin =
325          util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
326 
327       part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
328       report_elf_if(!part->elf);
329 
330       const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
331       report_elf_if(!ehdr);
332       report_if(ehdr->e_machine != MY_EM_AMDGPU);
333 
334       size_t section_str_index;
335       size_t num_shdrs;
336       report_elf_if(elf_getshdrstrndx(part->elf, &section_str_index) < 0);
337       report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
338 
339       part->num_sections = num_shdrs;
340       part->sections = calloc(sizeof(*part->sections), num_shdrs);
341       report_if(!part->sections);
342 
343       Elf_Scn *section = NULL;
344       while ((section = elf_nextscn(part->elf, section))) {
345          Elf64_Shdr *shdr = elf64_getshdr(section);
346          struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
347          s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
348          report_elf_if(!s->name);
349 
350          /* Cannot actually handle linked objects yet */
351          report_elf_if(shdr->sh_addr != 0);
352 
353          /* Alignment must be 0 or a power of two */
354          report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
355          uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
356 
357          if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) {
358             report_if(shdr->sh_flags & SHF_WRITE);
359 
360             s->is_rx = true;
361 
362             if (shdr->sh_flags & SHF_EXECINSTR) {
363                report_elf_if(shdr->sh_size & 3);
364 
365                if (!strcmp(s->name, ".text"))
366                   s->is_pasted_text = true;
367 
368                exec_size += shdr->sh_size;
369             }
370 
371             if (s->is_pasted_text) {
372                s->offset = pasted_text_size;
373                pasted_text_size += shdr->sh_size;
374             } else {
375                rx_align = align(rx_align, sh_align);
376                rx_size = align(rx_size, sh_align);
377                s->offset = rx_size;
378                rx_size += shdr->sh_size;
379             }
380          } else if (shdr->sh_type == SHT_SYMTAB) {
381             if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
382                goto fail;
383          }
384       }
385 
386       uint64_t part_lds_size = shared_lds_size;
387       if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol,
388                                                 part_lds_symbols_begin),
389                           util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) -
390                              part_lds_symbols_begin,
391                           &part_lds_size))
392          goto fail;
393       binary->lds_size = MAX2(binary->lds_size, part_lds_size);
394    }
395 
396    binary->rx_end_markers = pasted_text_size;
397    pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
398 
399    /* __lds_end is a special symbol that points at the end of the memory
400     * occupied by other LDS symbols. Its alignment is taken as the
401     * maximum of its alignment over all shader parts where it occurs.
402     */
403    if (lds_end_align) {
404       binary->lds_size = align(binary->lds_size, lds_end_align);
405 
406       struct ac_rtld_symbol *lds_end =
407          util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
408       lds_end->name = "__lds_end";
409       lds_end->size = 0;
410       lds_end->align = lds_end_align;
411       lds_end->offset = binary->lds_size;
412       lds_end->part_idx = ~0u;
413    }
414 
415    if (binary->lds_size > max_lds_size) {
416       fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
417               (unsigned)binary->lds_size, max_lds_size);
418       goto fail;
419    }
420 
421    /* Second pass: Adjust offsets of non-pasted text sections. */
422    binary->rx_size = pasted_text_size;
423    binary->rx_size = align(binary->rx_size, rx_align);
424 
425    for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
426       struct ac_rtld_part *part = &binary->parts[part_idx];
427       size_t num_shdrs;
428       elf_getshdrnum(part->elf, &num_shdrs);
429 
430       for (unsigned j = 0; j < num_shdrs; ++j) {
431          struct ac_rtld_section *s = &part->sections[j];
432          if (s->is_rx && !s->is_pasted_text)
433             s->offset += binary->rx_size;
434       }
435    }
436 
437    binary->rx_size += rx_size;
438    binary->exec_size = exec_size;
439 
440    if (i.info->chip_class >= GFX10) {
441       /* In gfx10, the SQ fetches up to 3 cache lines of 16 dwords
442        * ahead of the PC, configurable by SH_MEM_CONFIG and
443        * S_INST_PREFETCH. This can cause two issues:
444        *
445        * (1) Crossing a page boundary to an unmapped page. The logic
446        *     does not distinguish between a required fetch and a "mere"
447        *     prefetch and will fault.
448        *
449        * (2) Prefetching instructions that will be changed for a
450        *     different shader.
451        *
452        * (2) is not currently an issue because we flush the I$ at IB
453        * boundaries, but (1) needs to be addressed. Due to buffer
454        * suballocation, we just play it safe.
455        */
456       binary->rx_size = align(binary->rx_size + 3 * 64, 64);
457    }
458 
459    return true;
460 
461 #undef report_if
462 #undef report_elf_if
463 
464 fail:
465    ac_rtld_close(binary);
466    return false;
467 }
468 
ac_rtld_close(struct ac_rtld_binary * binary)469 void ac_rtld_close(struct ac_rtld_binary *binary)
470 {
471    for (unsigned i = 0; i < binary->num_parts; ++i) {
472       struct ac_rtld_part *part = &binary->parts[i];
473       free(part->sections);
474       elf_end(part->elf);
475    }
476 
477    util_dynarray_fini(&binary->lds_symbols);
478    free(binary->parts);
479    binary->parts = NULL;
480    binary->num_parts = 0;
481 }
482 
get_section_by_name(struct ac_rtld_part * part,const char * name,const char ** data,size_t * nbytes)483 static bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data,
484                                 size_t *nbytes)
485 {
486    for (unsigned i = 0; i < part->num_sections; ++i) {
487       struct ac_rtld_section *s = &part->sections[i];
488       if (s->name && !strcmp(name, s->name)) {
489          Elf_Scn *target_scn = elf_getscn(part->elf, i);
490          Elf_Data *target_data = elf_getdata(target_scn, NULL);
491          if (!target_data) {
492             report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
493             return false;
494          }
495 
496          *data = target_data->d_buf;
497          *nbytes = target_data->d_size;
498          return true;
499       }
500    }
501    return false;
502 }
503 
ac_rtld_get_section_by_name(struct ac_rtld_binary * binary,const char * name,const char ** data,size_t * nbytes)504 bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
505                                  size_t *nbytes)
506 {
507    assert(binary->num_parts == 1);
508    return get_section_by_name(&binary->parts[0], name, data, nbytes);
509 }
510 
ac_rtld_read_config(const struct radeon_info * info,struct ac_rtld_binary * binary,struct ac_shader_config * config)511 bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
512                          struct ac_shader_config *config)
513 {
514    for (unsigned i = 0; i < binary->num_parts; ++i) {
515       struct ac_rtld_part *part = &binary->parts[i];
516       const char *config_data;
517       size_t config_nbytes;
518 
519       if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes))
520          return false;
521 
522       /* TODO: be precise about scratch use? */
523       struct ac_shader_config c = {0};
524       ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, true, info, &c);
525 
526       config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
527       config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
528       config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
529       config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
530       config->scratch_bytes_per_wave =
531          MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave);
532 
533       assert(i == 0 || config->float_mode == c.float_mode);
534       config->float_mode = c.float_mode;
535 
536       /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
537        * the main shader part is used. */
538       assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0);
539       config->spi_ps_input_ena = c.spi_ps_input_ena;
540       config->spi_ps_input_addr = c.spi_ps_input_addr;
541 
542       /* TODO: consistently use LDS symbols for this */
543       config->lds_size = MAX2(config->lds_size, c.lds_size);
544 
545       /* TODO: Should we combine these somehow? It's currently only
546        * used for radeonsi's compute, where multiple parts aren't used. */
547       assert(config->rsrc1 == 0 && config->rsrc2 == 0);
548       config->rsrc1 = c.rsrc1;
549       config->rsrc2 = c.rsrc2;
550    }
551 
552    return true;
553 }
554 
resolve_symbol(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Sym * sym,const char * name,uint64_t * value)555 static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx,
556                            const Elf64_Sym *sym, const char *name, uint64_t *value)
557 {
558    /* TODO: properly disentangle the undef and the LDS cases once
559     * STT_AMDGPU_LDS is retired. */
560    if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
561       const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx);
562 
563       if (lds_sym) {
564          *value = lds_sym->offset;
565          return true;
566       }
567 
568       /* TODO: resolve from other parts */
569 
570       if (u->get_external_symbol(u->cb_data, name, value))
571          return true;
572 
573       report_errorf("symbol %s: unknown", name);
574       return false;
575    }
576 
577    struct ac_rtld_part *part = &u->binary->parts[part_idx];
578    if (sym->st_shndx >= part->num_sections) {
579       report_errorf("symbol %s: section out of bounds", name);
580       return false;
581    }
582 
583    struct ac_rtld_section *s = &part->sections[sym->st_shndx];
584    if (!s->is_rx) {
585       report_errorf("symbol %s: bad section", name);
586       return false;
587    }
588 
589    uint64_t section_base = u->rx_va + s->offset;
590 
591    *value = section_base + sym->st_value;
592    return true;
593 }
594 
apply_relocs(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Shdr * reloc_shdr,const Elf_Data * reloc_data)595 static bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx,
596                          const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data)
597 {
598 #define report_if(cond)                                                                            \
599    do {                                                                                            \
600       if ((cond)) {                                                                                \
601          report_errorf(#cond);                                                                     \
602          return false;                                                                             \
603       }                                                                                            \
604    } while (false)
605 #define report_elf_if(cond)                                                                        \
606    do {                                                                                            \
607       if ((cond)) {                                                                                \
608          report_elf_errorf(#cond);                                                                 \
609          return false;                                                                             \
610       }                                                                                            \
611    } while (false)
612 
613    struct ac_rtld_part *part = &u->binary->parts[part_idx];
614    Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
615    report_elf_if(!target_scn);
616 
617    Elf_Data *target_data = elf_getdata(target_scn, NULL);
618    report_elf_if(!target_data);
619 
620    Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
621    report_elf_if(!symbols_scn);
622 
623    Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
624    report_elf_if(!symbols_shdr);
625    uint32_t strtabidx = symbols_shdr->sh_link;
626 
627    Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
628    report_elf_if(!symbols_data);
629 
630    const Elf64_Sym *symbols = symbols_data->d_buf;
631    size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
632 
633    struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
634    report_if(!s->is_rx);
635 
636    const char *orig_base = target_data->d_buf;
637    char *dst_base = u->rx_ptr + s->offset;
638    uint64_t va_base = u->rx_va + s->offset;
639 
640    Elf64_Rel *rel = reloc_data->d_buf;
641    size_t num_relocs = reloc_data->d_size / sizeof(*rel);
642    for (size_t i = 0; i < num_relocs; ++i, ++rel) {
643       size_t r_sym = ELF64_R_SYM(rel->r_info);
644       unsigned r_type = ELF64_R_TYPE(rel->r_info);
645 
646       const char *orig_ptr = orig_base + rel->r_offset;
647       char *dst_ptr = dst_base + rel->r_offset;
648       uint64_t va = va_base + rel->r_offset;
649 
650       uint64_t symbol;
651       uint64_t addend;
652 
653       if (r_sym == STN_UNDEF) {
654          symbol = 0;
655       } else {
656          report_elf_if(r_sym >= num_symbols);
657 
658          const Elf64_Sym *sym = &symbols[r_sym];
659          const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name);
660          report_elf_if(!symbol_name);
661 
662          if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
663             return false;
664       }
665 
666       /* TODO: Should we also support .rela sections, where the
667        * addend is part of the relocation record? */
668 
669       /* Load the addend from the ELF instead of the destination,
670        * because the destination may be in VRAM. */
671       switch (r_type) {
672       case R_AMDGPU_ABS32:
673       case R_AMDGPU_ABS32_LO:
674       case R_AMDGPU_ABS32_HI:
675       case R_AMDGPU_REL32:
676       case R_AMDGPU_REL32_LO:
677       case R_AMDGPU_REL32_HI:
678          addend = *(const uint32_t *)orig_ptr;
679          break;
680       case R_AMDGPU_ABS64:
681       case R_AMDGPU_REL64:
682          addend = *(const uint64_t *)orig_ptr;
683          break;
684       default:
685          report_errorf("unsupported r_type == %u", r_type);
686          return false;
687       }
688 
689       uint64_t abs = symbol + addend;
690 
691       switch (r_type) {
692       case R_AMDGPU_ABS32:
693          assert((uint32_t)abs == abs);
694       case R_AMDGPU_ABS32_LO:
695          *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
696          break;
697       case R_AMDGPU_ABS32_HI:
698          *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
699          break;
700       case R_AMDGPU_ABS64:
701          *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
702          break;
703       case R_AMDGPU_REL32:
704          assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
705       case R_AMDGPU_REL32_LO:
706          *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
707          break;
708       case R_AMDGPU_REL32_HI:
709          *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
710          break;
711       case R_AMDGPU_REL64:
712          *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
713          break;
714       default:
715          unreachable("bad r_type");
716       }
717    }
718 
719    return true;
720 
721 #undef report_if
722 #undef report_elf_if
723 }
724 
725 /**
726  * Upload the binary or binaries to the provided GPU buffers, including
727  * relocations.
728  */
ac_rtld_upload(struct ac_rtld_upload_info * u)729 bool ac_rtld_upload(struct ac_rtld_upload_info *u)
730 {
731 #define report_if(cond)                                                                            \
732    do {                                                                                            \
733       if ((cond)) {                                                                                \
734          report_errorf(#cond);                                                                     \
735          return false;                                                                             \
736       }                                                                                            \
737    } while (false)
738 #define report_elf_if(cond)                                                                        \
739    do {                                                                                            \
740       if ((cond)) {                                                                                \
741          report_errorf(#cond);                                                                     \
742          return false;                                                                             \
743       }                                                                                            \
744    } while (false)
745 
746    if (u->binary->options.halt_at_entry) {
747       /* s_sethalt 1 */
748       *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
749    }
750 
751    /* First pass: upload raw section data and lay out private LDS symbols. */
752    for (unsigned i = 0; i < u->binary->num_parts; ++i) {
753       struct ac_rtld_part *part = &u->binary->parts[i];
754 
755       Elf_Scn *section = NULL;
756       while ((section = elf_nextscn(part->elf, section))) {
757          Elf64_Shdr *shdr = elf64_getshdr(section);
758          struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
759 
760          if (!s->is_rx)
761             continue;
762 
763          report_if(shdr->sh_type != SHT_PROGBITS);
764 
765          Elf_Data *data = elf_getdata(section, NULL);
766          report_elf_if(!data || data->d_size != shdr->sh_size);
767          memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
768       }
769    }
770 
771    if (u->binary->rx_end_markers) {
772       uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
773       for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
774          *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
775    }
776 
777    /* Second pass: handle relocations, overwriting uploaded data where
778     * appropriate. */
779    for (unsigned i = 0; i < u->binary->num_parts; ++i) {
780       struct ac_rtld_part *part = &u->binary->parts[i];
781       Elf_Scn *section = NULL;
782       while ((section = elf_nextscn(part->elf, section))) {
783          Elf64_Shdr *shdr = elf64_getshdr(section);
784          if (shdr->sh_type == SHT_REL) {
785             Elf_Data *relocs = elf_getdata(section, NULL);
786             report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
787             if (!apply_relocs(u, i, shdr, relocs))
788                return false;
789          } else if (shdr->sh_type == SHT_RELA) {
790             report_errorf("SHT_RELA not supported");
791             return false;
792          }
793       }
794    }
795 
796    return true;
797 
798 #undef report_if
799 #undef report_elf_if
800 }
801