1 /*
2 * Copyright 2014-2019 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "ac_rtld.h"
8
9 #include "ac_binary.h"
10 #include "ac_gpu_info.h"
11 #include "util/compiler.h"
12 #include "util/u_dynarray.h"
13 #include "util/u_math.h"
14
15 #include <gelf.h>
16 #include <libelf.h>
17 #include <stdarg.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21
22 #ifndef EM_AMDGPU
23 // Old distributions may not have this enum constant
24 #define EM_AMDGPU 224
25 #endif
26
27 #ifndef STT_AMDGPU_LDS
28 #define STT_AMDGPU_LDS 13 // this is deprecated -- remove
29 #endif
30
31 #ifndef SHN_AMDGPU_LDS
32 #define SHN_AMDGPU_LDS 0xff00
33 #endif
34
35 #ifndef R_AMDGPU_NONE
36 #define R_AMDGPU_NONE 0
37 #define R_AMDGPU_ABS32_LO 1
38 #define R_AMDGPU_ABS32_HI 2
39 #define R_AMDGPU_ABS64 3
40 #define R_AMDGPU_REL32 4
41 #define R_AMDGPU_REL64 5
42 #define R_AMDGPU_ABS32 6
43 #define R_AMDGPU_GOTPCREL 7
44 #define R_AMDGPU_GOTPCREL32_LO 8
45 #define R_AMDGPU_GOTPCREL32_HI 9
46 #define R_AMDGPU_REL32_LO 10
47 #define R_AMDGPU_REL32_HI 11
48 #define R_AMDGPU_RELATIVE64 13
49 #endif
50
51 /* For the UMR disassembler. */
52 #define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
53 #define DEBUGGER_NUM_MARKERS 5
54
55 struct ac_rtld_section {
56 bool is_rx : 1;
57 bool is_pasted_text : 1;
58 uint64_t offset;
59 const char *name;
60 };
61
62 struct ac_rtld_part {
63 Elf *elf;
64 struct ac_rtld_section *sections;
65 unsigned num_sections;
66 };
67
report_errorvf(const char * fmt,va_list va)68 static void report_errorvf(const char *fmt, va_list va)
69 {
70 fprintf(stderr, "ac_rtld error: ");
71
72 vfprintf(stderr, fmt, va);
73
74 fprintf(stderr, "\n");
75 }
76
77 static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
78
report_errorf(const char * fmt,...)79 static void report_errorf(const char *fmt, ...)
80 {
81 va_list va;
82 va_start(va, fmt);
83 report_errorvf(fmt, va);
84 va_end(va);
85 }
86
87 static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
88
report_elf_errorf(const char * fmt,...)89 static void report_elf_errorf(const char *fmt, ...)
90 {
91 va_list va;
92 va_start(va, fmt);
93 report_errorvf(fmt, va);
94 va_end(va);
95
96 fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
97 }
98
99 /**
100 * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
101 * \p part_idx.
102 */
find_symbol(const struct util_dynarray * symbols,const char * name,unsigned part_idx)103 static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
104 const char *name, unsigned part_idx)
105 {
106 util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) {
107 if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name))
108 return symbol;
109 }
110 return NULL;
111 }
112
compare_symbol_by_align(const void * lhsp,const void * rhsp)113 static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
114 {
115 const struct ac_rtld_symbol *lhs = lhsp;
116 const struct ac_rtld_symbol *rhs = rhsp;
117 if (rhs->align > lhs->align)
118 return 1;
119 if (rhs->align < lhs->align)
120 return -1;
121 return 0;
122 }
123
124 /**
125 * Sort the given symbol list by decreasing alignment and assign offsets.
126 */
layout_symbols(struct ac_rtld_symbol * symbols,unsigned num_symbols,uint64_t * ptotal_size)127 static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
128 uint64_t *ptotal_size)
129 {
130 qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
131
132 uint64_t total_size = *ptotal_size;
133
134 for (unsigned i = 0; i < num_symbols; ++i) {
135 struct ac_rtld_symbol *s = &symbols[i];
136 assert(util_is_power_of_two_nonzero(s->align));
137
138 total_size = align64(total_size, s->align);
139 s->offset = total_size;
140
141 if (total_size + s->size < total_size) {
142 report_errorf("%s: size overflow", __func__);
143 return false;
144 }
145
146 total_size += s->size;
147 }
148
149 *ptotal_size = total_size;
150 return true;
151 }
152
153 /**
154 * Read LDS symbols from the given \p section of the ELF of \p part and append
155 * them to the LDS symbols list.
156 *
157 * Shared LDS symbols are filtered out.
158 */
read_private_lds_symbols(struct ac_rtld_binary * binary,unsigned part_idx,Elf_Scn * section,uint32_t * lds_end_align)159 static bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx,
160 Elf_Scn *section, uint32_t *lds_end_align)
161 {
162 #define report_if(cond) \
163 do { \
164 if ((cond)) { \
165 report_errorf(#cond); \
166 return false; \
167 } \
168 } while (false)
169 #define report_elf_if(cond) \
170 do { \
171 if ((cond)) { \
172 report_elf_errorf(#cond); \
173 return false; \
174 } \
175 } while (false)
176
177 struct ac_rtld_part *part = &binary->parts[part_idx];
178 Elf64_Shdr *shdr = elf64_getshdr(section);
179 uint32_t strtabidx = shdr->sh_link;
180 Elf_Data *symbols_data = elf_getdata(section, NULL);
181 report_elf_if(!symbols_data);
182
183 const Elf64_Sym *symbol = symbols_data->d_buf;
184 size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
185
186 for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
187 struct ac_rtld_symbol s = {0};
188
189 if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
190 /* old-style LDS symbols from initial prototype -- remove eventually */
191 s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
192 } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
193 s.align = MIN2(symbol->st_value, 1u << 16);
194 report_if(!util_is_power_of_two_nonzero(s.align));
195 } else
196 continue;
197
198 report_if(symbol->st_size > 1u << 29);
199
200 s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
201 s.size = symbol->st_size;
202 s.part_idx = part_idx;
203
204 if (!strcmp(s.name, "__lds_end")) {
205 report_elf_if(s.size != 0);
206 *lds_end_align = MAX2(*lds_end_align, s.align);
207 continue;
208 }
209
210 const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx);
211 if (shared) {
212 report_elf_if(s.align > shared->align);
213 report_elf_if(s.size > shared->size);
214 continue;
215 }
216
217 util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
218 }
219
220 return true;
221
222 #undef report_if
223 #undef report_elf_if
224 }
225
226 /**
227 * Open a binary consisting of one or more shader parts.
228 *
229 * \param binary the uninitialized struct
230 * \param i binary opening parameters
231 */
ac_rtld_open(struct ac_rtld_binary * binary,struct ac_rtld_open_info i)232 bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
233 {
234 /* One of the libelf implementations
235 * (http://www.mr511.de/software/english.htm) requires calling
236 * elf_version() before elf_memory().
237 */
238 elf_version(EV_CURRENT);
239
240 memset(binary, 0, sizeof(*binary));
241 memcpy(&binary->options, &i.options, sizeof(binary->options));
242 binary->wave_size = i.wave_size;
243 binary->gfx_level = i.info->gfx_level;
244 binary->num_parts = i.num_parts;
245 binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
246 if (!binary->parts)
247 return false;
248
249 uint64_t pasted_text_size = 0;
250 uint64_t rx_align = 1;
251 uint64_t rx_size = 0;
252 uint64_t exec_size = 0;
253
254 #define report_if(cond) \
255 do { \
256 if ((cond)) { \
257 report_errorf(#cond); \
258 goto fail; \
259 } \
260 } while (false)
261 #define report_elf_if(cond) \
262 do { \
263 if ((cond)) { \
264 report_elf_errorf(#cond); \
265 goto fail; \
266 } \
267 } while (false)
268
269 /* Copy and layout shared LDS symbols. */
270 if (i.num_shared_lds_symbols) {
271 if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
272 i.num_shared_lds_symbols))
273 goto fail;
274
275 memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
276 }
277
278 util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol)
279 symbol->part_idx = ~0u;
280
281 unsigned max_lds_size = i.info->gfx_level == GFX6 ? 32 * 1024 : 64 * 1024;
282
283 uint64_t shared_lds_size = 0;
284 if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
285 goto fail;
286
287 if (shared_lds_size > max_lds_size) {
288 fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
289 (unsigned)shared_lds_size, max_lds_size);
290 goto fail;
291 }
292 binary->lds_size = shared_lds_size;
293
294 /* First pass over all parts: open ELFs, pre-determine the placement of
295 * sections in the memory image, and collect and layout private LDS symbols. */
296 uint32_t lds_end_align = 0;
297
298 if (binary->options.halt_at_entry)
299 pasted_text_size += 4;
300
301 for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
302 struct ac_rtld_part *part = &binary->parts[part_idx];
303 unsigned part_lds_symbols_begin =
304 util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
305
306 part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
307 report_elf_if(!part->elf);
308
309 const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
310 report_elf_if(!ehdr);
311 report_if(ehdr->e_machine != EM_AMDGPU);
312
313 size_t section_str_index;
314 size_t num_shdrs;
315 report_elf_if(elf_getshdrstrndx(part->elf, §ion_str_index) < 0);
316 report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
317
318 part->num_sections = num_shdrs;
319 part->sections = calloc(sizeof(*part->sections), num_shdrs);
320 report_if(!part->sections);
321
322 Elf_Scn *section = NULL;
323 while ((section = elf_nextscn(part->elf, section))) {
324 Elf64_Shdr *shdr = elf64_getshdr(section);
325 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
326 s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
327 report_elf_if(!s->name);
328
329 /* Cannot actually handle linked objects yet */
330 report_elf_if(shdr->sh_addr != 0);
331
332 /* Alignment must be 0 or a power of two */
333 report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
334 uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
335
336 if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) {
337 report_if(shdr->sh_flags & SHF_WRITE);
338
339 s->is_rx = true;
340
341 if (shdr->sh_flags & SHF_EXECINSTR) {
342 report_elf_if(shdr->sh_size & 3);
343
344 if (!strcmp(s->name, ".text"))
345 s->is_pasted_text = true;
346
347 exec_size += shdr->sh_size;
348 }
349
350 if (s->is_pasted_text) {
351 s->offset = pasted_text_size;
352 pasted_text_size += shdr->sh_size;
353 } else {
354 rx_align = align(rx_align, sh_align);
355 rx_size = align(rx_size, sh_align);
356 s->offset = rx_size;
357 rx_size += shdr->sh_size;
358 }
359 } else if (shdr->sh_type == SHT_SYMTAB) {
360 if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
361 goto fail;
362 }
363 }
364
365 uint64_t part_lds_size = shared_lds_size;
366 if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol,
367 part_lds_symbols_begin),
368 util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) -
369 part_lds_symbols_begin,
370 &part_lds_size))
371 goto fail;
372 binary->lds_size = MAX2(binary->lds_size, part_lds_size);
373 }
374
375 binary->rx_end_markers = pasted_text_size;
376 pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
377
378 /* __lds_end is a special symbol that points at the end of the memory
379 * occupied by other LDS symbols. Its alignment is taken as the
380 * maximum of its alignment over all shader parts where it occurs.
381 */
382 if (lds_end_align) {
383 binary->lds_size = align(binary->lds_size, lds_end_align);
384
385 struct ac_rtld_symbol *lds_end =
386 util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
387 lds_end->name = "__lds_end";
388 lds_end->size = 0;
389 lds_end->align = lds_end_align;
390 lds_end->offset = binary->lds_size;
391 lds_end->part_idx = ~0u;
392 }
393
394 if (binary->lds_size > max_lds_size) {
395 fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
396 (unsigned)binary->lds_size, max_lds_size);
397 goto fail;
398 }
399
400 /* Second pass: Adjust offsets of non-pasted text sections. */
401 binary->rx_size = pasted_text_size;
402 binary->rx_size = align(binary->rx_size, rx_align);
403
404 for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
405 struct ac_rtld_part *part = &binary->parts[part_idx];
406 size_t num_shdrs;
407 elf_getshdrnum(part->elf, &num_shdrs);
408
409 for (unsigned j = 0; j < num_shdrs; ++j) {
410 struct ac_rtld_section *s = &part->sections[j];
411 if (s->is_rx && !s->is_pasted_text)
412 s->offset += binary->rx_size;
413 }
414 }
415
416 binary->rx_size += rx_size;
417 binary->exec_size = exec_size;
418
419 return true;
420
421 #undef report_if
422 #undef report_elf_if
423
424 fail:
425 ac_rtld_close(binary);
426 return false;
427 }
428
ac_rtld_close(struct ac_rtld_binary * binary)429 void ac_rtld_close(struct ac_rtld_binary *binary)
430 {
431 for (unsigned i = 0; i < binary->num_parts; ++i) {
432 struct ac_rtld_part *part = &binary->parts[i];
433 free(part->sections);
434 elf_end(part->elf);
435 }
436
437 util_dynarray_fini(&binary->lds_symbols);
438 free(binary->parts);
439 binary->parts = NULL;
440 binary->num_parts = 0;
441 }
442
get_section_by_name(struct ac_rtld_part * part,const char * name,const char ** data,size_t * nbytes)443 static bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data,
444 size_t *nbytes)
445 {
446 for (unsigned i = 0; i < part->num_sections; ++i) {
447 struct ac_rtld_section *s = &part->sections[i];
448 if (s->name && !strcmp(name, s->name)) {
449 Elf_Scn *target_scn = elf_getscn(part->elf, i);
450 Elf_Data *target_data = elf_getdata(target_scn, NULL);
451 if (!target_data) {
452 report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
453 return false;
454 }
455
456 *data = target_data->d_buf;
457 *nbytes = target_data->d_size;
458 return true;
459 }
460 }
461 return false;
462 }
463
ac_rtld_get_section_by_name(struct ac_rtld_binary * binary,const char * name,const char ** data,size_t * nbytes)464 bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
465 size_t *nbytes)
466 {
467 assert(binary->num_parts == 1);
468 return get_section_by_name(&binary->parts[0], name, data, nbytes);
469 }
470
ac_rtld_read_config(const struct radeon_info * info,struct ac_rtld_binary * binary,struct ac_shader_config * config)471 bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
472 struct ac_shader_config *config)
473 {
474 for (unsigned i = 0; i < binary->num_parts; ++i) {
475 struct ac_rtld_part *part = &binary->parts[i];
476 const char *config_data;
477 size_t config_nbytes;
478
479 if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes))
480 return false;
481
482 /* TODO: be precise about scratch use? */
483 struct ac_shader_config c = {0};
484 ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, info, &c);
485
486 config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
487 config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
488 config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
489 config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
490 config->scratch_bytes_per_wave =
491 MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave);
492
493 assert(i == 0 || config->float_mode == c.float_mode);
494 config->float_mode = c.float_mode;
495
496 /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
497 * the main shader part is used. */
498 assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0);
499 config->spi_ps_input_ena = c.spi_ps_input_ena;
500 config->spi_ps_input_addr = c.spi_ps_input_addr;
501
502 /* TODO: consistently use LDS symbols for this */
503 config->lds_size = MAX2(config->lds_size, c.lds_size);
504
505 /* TODO: Should we combine these somehow? It's currently only
506 * used for radeonsi's compute, where multiple parts aren't used. */
507 assert(config->rsrc1 == 0 && config->rsrc2 == 0);
508 config->rsrc1 = c.rsrc1;
509 config->rsrc2 = c.rsrc2;
510 }
511
512 return true;
513 }
514
resolve_symbol(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Sym * sym,const char * name,uint64_t * value)515 static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx,
516 const Elf64_Sym *sym, const char *name, uint64_t *value)
517 {
518 /* TODO: properly disentangle the undef and the LDS cases once
519 * STT_AMDGPU_LDS is retired. */
520 if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
521 const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx);
522
523 if (lds_sym) {
524 *value = lds_sym->offset;
525 return true;
526 }
527
528 /* TODO: resolve from other parts */
529
530 if (u->get_external_symbol(u->binary->gfx_level, u->cb_data, name, value))
531 return true;
532
533 report_errorf("symbol %s: unknown", name);
534 return false;
535 }
536
537 struct ac_rtld_part *part = &u->binary->parts[part_idx];
538 if (sym->st_shndx >= part->num_sections) {
539 report_errorf("symbol %s: section out of bounds", name);
540 return false;
541 }
542
543 struct ac_rtld_section *s = &part->sections[sym->st_shndx];
544 if (!s->is_rx) {
545 report_errorf("symbol %s: bad section", name);
546 return false;
547 }
548
549 uint64_t section_base = u->rx_va + s->offset;
550
551 *value = section_base + sym->st_value;
552 return true;
553 }
554
apply_relocs(const struct ac_rtld_upload_info * u,unsigned part_idx,const Elf64_Shdr * reloc_shdr,const Elf_Data * reloc_data)555 static bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx,
556 const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data)
557 {
558 #define report_if(cond) \
559 do { \
560 if ((cond)) { \
561 report_errorf(#cond); \
562 return false; \
563 } \
564 } while (false)
565 #define report_elf_if(cond) \
566 do { \
567 if ((cond)) { \
568 report_elf_errorf(#cond); \
569 return false; \
570 } \
571 } while (false)
572
573 struct ac_rtld_part *part = &u->binary->parts[part_idx];
574 Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
575 report_elf_if(!target_scn);
576
577 Elf_Data *target_data = elf_getdata(target_scn, NULL);
578 report_elf_if(!target_data);
579
580 Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
581 report_elf_if(!symbols_scn);
582
583 Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
584 report_elf_if(!symbols_shdr);
585 uint32_t strtabidx = symbols_shdr->sh_link;
586
587 Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
588 report_elf_if(!symbols_data);
589
590 const Elf64_Sym *symbols = symbols_data->d_buf;
591 size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
592
593 struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
594 report_if(!s->is_rx);
595
596 const char *orig_base = target_data->d_buf;
597 char *dst_base = u->rx_ptr + s->offset;
598 uint64_t va_base = u->rx_va + s->offset;
599
600 Elf64_Rel *rel = reloc_data->d_buf;
601 size_t num_relocs = reloc_data->d_size / sizeof(*rel);
602 for (size_t i = 0; i < num_relocs; ++i, ++rel) {
603 size_t r_sym = ELF64_R_SYM(rel->r_info);
604 unsigned r_type = ELF64_R_TYPE(rel->r_info);
605
606 const char *orig_ptr = orig_base + rel->r_offset;
607 char *dst_ptr = dst_base + rel->r_offset;
608 uint64_t va = va_base + rel->r_offset;
609
610 uint64_t symbol;
611 uint64_t addend;
612
613 if (r_sym == STN_UNDEF) {
614 symbol = 0;
615 } else {
616 report_elf_if(r_sym >= num_symbols);
617
618 const Elf64_Sym *sym = &symbols[r_sym];
619 const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name);
620 report_elf_if(!symbol_name);
621
622 if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
623 return false;
624 }
625
626 /* TODO: Should we also support .rela sections, where the
627 * addend is part of the relocation record? */
628
629 /* Load the addend from the ELF instead of the destination,
630 * because the destination may be in VRAM. */
631 switch (r_type) {
632 case R_AMDGPU_ABS32:
633 case R_AMDGPU_ABS32_LO:
634 case R_AMDGPU_ABS32_HI:
635 case R_AMDGPU_REL32:
636 case R_AMDGPU_REL32_LO:
637 case R_AMDGPU_REL32_HI:
638 addend = *(const uint32_t *)orig_ptr;
639 break;
640 case R_AMDGPU_ABS64:
641 case R_AMDGPU_REL64:
642 addend = *(const uint64_t *)orig_ptr;
643 break;
644 default:
645 report_errorf("unsupported r_type == %u", r_type);
646 return false;
647 }
648
649 uint64_t abs = symbol + addend;
650
651 switch (r_type) {
652 case R_AMDGPU_ABS32:
653 assert((uint32_t)abs == abs);
654 FALLTHROUGH;
655 case R_AMDGPU_ABS32_LO:
656 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
657 break;
658 case R_AMDGPU_ABS32_HI:
659 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
660 break;
661 case R_AMDGPU_ABS64:
662 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
663 break;
664 case R_AMDGPU_REL32:
665 assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
666 FALLTHROUGH;
667 case R_AMDGPU_REL32_LO:
668 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
669 break;
670 case R_AMDGPU_REL32_HI:
671 *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
672 break;
673 case R_AMDGPU_REL64:
674 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
675 break;
676 default:
677 unreachable("bad r_type");
678 }
679 }
680
681 return true;
682
683 #undef report_if
684 #undef report_elf_if
685 }
686
687 /**
688 * Upload the binary or binaries to the provided GPU buffers, including
689 * relocations.
690 */
ac_rtld_upload(struct ac_rtld_upload_info * u)691 int ac_rtld_upload(struct ac_rtld_upload_info *u)
692 {
693 #define report_if(cond) \
694 do { \
695 if ((cond)) { \
696 report_errorf(#cond); \
697 return -1; \
698 } \
699 } while (false)
700 #define report_elf_if(cond) \
701 do { \
702 if ((cond)) { \
703 report_errorf(#cond); \
704 return -1; \
705 } \
706 } while (false)
707
708 int size = 0;
709 if (u->binary->options.halt_at_entry) {
710 /* s_sethalt 1 */
711 *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
712 }
713
714 /* First pass: upload raw section data and lay out private LDS symbols. */
715 for (unsigned i = 0; i < u->binary->num_parts; ++i) {
716 struct ac_rtld_part *part = &u->binary->parts[i];
717
718 Elf_Scn *section = NULL;
719 while ((section = elf_nextscn(part->elf, section))) {
720 Elf64_Shdr *shdr = elf64_getshdr(section);
721 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
722
723 if (!s->is_rx)
724 continue;
725
726 report_if(shdr->sh_type != SHT_PROGBITS);
727
728 Elf_Data *data = elf_getdata(section, NULL);
729 report_elf_if(!data || data->d_size != shdr->sh_size);
730 memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
731
732 size = MAX2(size, s->offset + shdr->sh_size);
733 }
734 }
735
736 if (u->binary->rx_end_markers) {
737 uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
738 for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
739 *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
740 size += 4 * DEBUGGER_NUM_MARKERS;
741 }
742
743 /* Second pass: handle relocations, overwriting uploaded data where
744 * appropriate. */
745 for (unsigned i = 0; i < u->binary->num_parts; ++i) {
746 struct ac_rtld_part *part = &u->binary->parts[i];
747 Elf_Scn *section = NULL;
748 while ((section = elf_nextscn(part->elf, section))) {
749 Elf64_Shdr *shdr = elf64_getshdr(section);
750 if (shdr->sh_type == SHT_REL) {
751 Elf_Data *relocs = elf_getdata(section, NULL);
752 report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
753 if (!apply_relocs(u, i, shdr, relocs))
754 return -1;
755 } else if (shdr->sh_type == SHT_RELA) {
756 report_errorf("SHT_RELA not supported");
757 return -1;
758 }
759 }
760 }
761
762 return size;
763
764 #undef report_if
765 #undef report_elf_if
766 }
767