• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2024 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radv_printf.h"
8 #include "radv_device.h"
9 #include "radv_physical_device.h"
10 
11 #include "util/hash_table.h"
12 #include "util/strndup.h"
13 #include "util/u_printf.h"
14 
15 #include "nir.h"
16 #include "nir_builder.h"
17 
18 static struct hash_table *device_ht = NULL;
19 
20 VkResult
radv_printf_data_init(struct radv_device * device)21 radv_printf_data_init(struct radv_device *device)
22 {
23    const struct radv_physical_device *pdev = radv_device_physical(device);
24 
25    util_dynarray_init(&device->printf.formats, NULL);
26 
27    device->printf.buffer_size = debug_get_num_option("RADV_PRINTF_BUFFER_SIZE", 0);
28    if (device->printf.buffer_size < sizeof(struct radv_printf_buffer_header))
29       return VK_SUCCESS;
30 
31    VkBufferCreateInfo buffer_create_info = {
32       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
33       .pNext =
34          &(VkBufferUsageFlags2CreateInfo){
35             .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO,
36             .usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT,
37          },
38       .size = device->printf.buffer_size,
39    };
40 
41    VkDevice _device = radv_device_to_handle(device);
42    VkResult result = device->vk.dispatch_table.CreateBuffer(_device, &buffer_create_info, NULL, &device->printf.buffer);
43    if (result != VK_SUCCESS)
44       return result;
45 
46    VkMemoryRequirements requirements;
47    device->vk.dispatch_table.GetBufferMemoryRequirements(_device, device->printf.buffer, &requirements);
48 
49    VkMemoryAllocateInfo alloc_info = {
50       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
51       .allocationSize = requirements.size,
52       .memoryTypeIndex =
53          radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
54                                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
55    };
56 
57    result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &device->printf.memory);
58    if (result != VK_SUCCESS)
59       return result;
60 
61    result = device->vk.dispatch_table.MapMemory(_device, device->printf.memory, 0, VK_WHOLE_SIZE, 0,
62                                                 (void **)&device->printf.data);
63    if (result != VK_SUCCESS)
64       return result;
65 
66    result = device->vk.dispatch_table.BindBufferMemory(_device, device->printf.buffer, device->printf.memory, 0);
67    if (result != VK_SUCCESS)
68       return result;
69 
70    struct radv_printf_buffer_header *header = device->printf.data;
71    header->offset = sizeof(struct radv_printf_buffer_header);
72    header->size = device->printf.buffer_size;
73 
74    VkBufferDeviceAddressInfo addr_info = {
75       .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
76       .buffer = device->printf.buffer,
77    };
78    device->printf.buffer_addr = device->vk.dispatch_table.GetBufferDeviceAddress(_device, &addr_info);
79 
80    return VK_SUCCESS;
81 }
82 
83 void
radv_printf_data_finish(struct radv_device * device)84 radv_printf_data_finish(struct radv_device *device)
85 {
86    VkDevice _device = radv_device_to_handle(device);
87 
88    device->vk.dispatch_table.DestroyBuffer(_device, device->printf.buffer, NULL);
89    if (device->printf.memory)
90       device->vk.dispatch_table.UnmapMemory(_device, device->printf.memory);
91    device->vk.dispatch_table.FreeMemory(_device, device->printf.memory, NULL);
92 
93    util_dynarray_foreach (&device->printf.formats, struct radv_printf_format, format)
94       free(format->string);
95 
96    util_dynarray_fini(&device->printf.formats);
97 }
98 
99 void
radv_build_printf(nir_builder * b,nir_def * cond,const char * format_string,...)100 radv_build_printf(nir_builder *b, nir_def *cond, const char *format_string, ...)
101 {
102    if (!device_ht)
103       return;
104 
105    struct radv_device *device = _mesa_hash_table_search(device_ht, b->shader)->data;
106    if (!device->printf.buffer_addr)
107       return;
108 
109    struct radv_printf_format format = {0};
110    format.string = strdup(format_string);
111    if (!format.string)
112       return;
113 
114    uint32_t format_index = util_dynarray_num_elements(&device->printf.formats, struct radv_printf_format);
115 
116    if (cond)
117       nir_push_if(b, cond);
118 
119    if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
120       nir_push_if(b, nir_inot(b, nir_is_helper_invocation(b, 1)));
121 
122    nir_def *size = nir_imm_int(b, 4);
123 
124    va_list arg_list;
125    va_start(arg_list, format_string);
126 
127    uint32_t num_args = 0;
128    for (uint32_t i = 0; i < strlen(format_string); i++)
129       if (format_string[i] == '%')
130          num_args++;
131 
132    nir_def **args = malloc(num_args * sizeof(nir_def *));
133    nir_def **strides = malloc(num_args * sizeof(nir_def *));
134 
135    nir_def *ballot = nir_ballot(b, 1, 64, nir_imm_true(b));
136    nir_def *active_invocation_count = nir_bit_count(b, ballot);
137 
138    for (uint32_t i = 0; i < num_args; i++) {
139       nir_def *arg = va_arg(arg_list, nir_def *);
140       bool divergent = arg->divergent;
141 
142       if (arg->bit_size == 1)
143          arg = nir_b2i32(b, arg);
144 
145       args[i] = arg;
146 
147       uint32_t arg_size = arg->bit_size == 1 ? 32 : arg->bit_size / 8;
148       format.element_sizes[i] = arg_size;
149 
150       if (divergent) {
151          strides[i] = nir_imul_imm(b, active_invocation_count, arg_size);
152          format.divergence_mask |= BITFIELD_BIT(i);
153       } else {
154          strides[i] = nir_imm_int(b, arg_size);
155       }
156 
157       size = nir_iadd(b, size, strides[i]);
158    }
159 
160    va_end(arg_list);
161 
162    nir_def *offset;
163    nir_def *undef;
164 
165    nir_push_if(b, nir_elect(b, 1));
166    {
167       offset = nir_global_atomic(
168          b, 32, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, offset)), size,
169          .atomic_op = nir_atomic_op_iadd);
170    }
171    nir_push_else(b, NULL);
172    {
173       undef = nir_undef(b, 1, 32);
174    }
175    nir_pop_if(b, NULL);
176 
177    offset = nir_read_first_invocation(b, nir_if_phi(b, offset, undef));
178 
179    nir_def *buffer_size = nir_load_global(
180       b, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, size)), 4, 1, 32);
181 
182    nir_push_if(b, nir_ige(b, buffer_size, nir_iadd(b, offset, size)));
183    {
184       nir_def *addr = nir_iadd_imm(b, nir_u2u64(b, offset), device->printf.buffer_addr);
185 
186       /* header */
187       nir_store_global(b, addr, 4, nir_ior_imm(b, active_invocation_count, format_index << 16), 1);
188       addr = nir_iadd_imm(b, addr, 4);
189 
190       for (uint32_t i = 0; i < num_args; i++) {
191          nir_def *arg = args[i];
192 
193          if (arg->divergent) {
194             nir_def *invocation_index = nir_mbcnt_amd(b, ballot, nir_imm_int(b, 0));
195             nir_store_global(
196                b, nir_iadd(b, addr, nir_u2u64(b, nir_imul_imm(b, invocation_index, format.element_sizes[i]))), 4, arg,
197                1);
198          } else {
199             nir_store_global(b, addr, 4, arg, 1);
200          }
201 
202          addr = nir_iadd(b, addr, nir_u2u64(b, strides[i]));
203       }
204    }
205    nir_pop_if(b, NULL);
206 
207    if (cond)
208       nir_pop_if(b, NULL);
209 
210    if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
211       nir_pop_if(b, NULL);
212 
213    free(args);
214    free(strides);
215 
216    util_dynarray_append(&device->printf.formats, struct radv_printf_format, format);
217 }
218 
219 void
radv_dump_printf_data(struct radv_device * device,FILE * out)220 radv_dump_printf_data(struct radv_device *device, FILE *out)
221 {
222    if (!device->printf.data)
223       return;
224 
225    device->vk.dispatch_table.DeviceWaitIdle(radv_device_to_handle(device));
226 
227    struct radv_printf_buffer_header *header = device->printf.data;
228    uint8_t *data = device->printf.data;
229 
230    for (uint32_t offset = sizeof(struct radv_printf_buffer_header); offset < header->offset;) {
231       uint32_t printf_header = *(uint32_t *)&data[offset];
232       offset += sizeof(uint32_t);
233 
234       uint32_t format_index = printf_header >> 16;
235       struct radv_printf_format *printf_format =
236          util_dynarray_element(&device->printf.formats, struct radv_printf_format, format_index);
237 
238       uint32_t invocation_count = printf_header & 0xFFFF;
239 
240       uint32_t num_args = 0;
241       for (uint32_t i = 0; i < strlen(printf_format->string); i++)
242          if (printf_format->string[i] == '%')
243             num_args++;
244 
245       char *format = printf_format->string;
246 
247       for (uint32_t i = 0; i <= num_args; i++) {
248          size_t spec_pos = util_printf_next_spec_pos(format, 0);
249 
250          if (spec_pos == -1) {
251             fprintf(out, "%s", format);
252             continue;
253          }
254 
255          const char *token = util_printf_prev_tok(&format[spec_pos]);
256          char *next_format = &format[spec_pos + 1];
257 
258          /* print the part before the format token */
259          if (token != format)
260             fwrite(format, token - format, 1, out);
261 
262          char *print_str = strndup(token, next_format - token);
263          /* rebase spec_pos so we can use it with print_str */
264          spec_pos += format - token;
265 
266          size_t element_size = printf_format->element_sizes[i];
267          bool is_float = strpbrk(print_str, "fFeEgGaA") != NULL;
268 
269          uint32_t lane_count = (printf_format->divergence_mask & BITFIELD_BIT(i)) ? invocation_count : 1;
270          for (uint32_t lane = 0; lane < lane_count; lane++) {
271             switch (element_size) {
272             case 1: {
273                uint8_t v;
274                memcpy(&v, &data[offset], element_size);
275                fprintf(out, print_str, v);
276                break;
277             }
278             case 2: {
279                uint16_t v;
280                memcpy(&v, &data[offset], element_size);
281                fprintf(out, print_str, v);
282                break;
283             }
284             case 4: {
285                if (is_float) {
286                   float v;
287                   memcpy(&v, &data[offset], element_size);
288                   fprintf(out, print_str, v);
289                } else {
290                   uint32_t v;
291                   memcpy(&v, &data[offset], element_size);
292                   fprintf(out, print_str, v);
293                }
294                break;
295             }
296             case 8: {
297                if (is_float) {
298                   double v;
299                   memcpy(&v, &data[offset], element_size);
300                   fprintf(out, print_str, v);
301                } else {
302                   uint64_t v;
303                   memcpy(&v, &data[offset], element_size);
304                   fprintf(out, print_str, v);
305                }
306                break;
307             }
308             default:
309                unreachable("Unsupported data type");
310             }
311 
312             if (lane != lane_count - 1)
313                fprintf(out, " ");
314 
315             offset += element_size;
316          }
317 
318          /* rebase format */
319          format = next_format;
320          free(print_str);
321       }
322    }
323 
324    fflush(out);
325 
326    header->offset = sizeof(struct radv_printf_buffer_header);
327 }
328 
329 void
radv_device_associate_nir(struct radv_device * device,nir_shader * nir)330 radv_device_associate_nir(struct radv_device *device, nir_shader *nir)
331 {
332    if (!device->printf.buffer_addr)
333       return;
334 
335    if (!device_ht)
336       device_ht = _mesa_pointer_hash_table_create(NULL);
337 
338    _mesa_hash_table_insert(device_ht, nir, device);
339 }
340