1 /*
2 * Copyright © 2024 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "radv_private.h"
8
9 #include "util/hash_table.h"
10 #include "util/strndup.h"
11 #include "util/u_printf.h"
12
13 #include "nir.h"
14 #include "nir_builder.h"
15
16 static struct hash_table *device_ht = NULL;
17
18 VkResult
radv_printf_data_init(struct radv_device * device)19 radv_printf_data_init(struct radv_device *device)
20 {
21 util_dynarray_init(&device->printf.formats, NULL);
22
23 device->printf.buffer_size = debug_get_num_option("RADV_PRINTF_BUFFER_SIZE", 0);
24 if (device->printf.buffer_size < sizeof(struct radv_printf_buffer_header))
25 return VK_SUCCESS;
26
27 VkBufferCreateInfo buffer_create_info = {
28 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
29 .pNext =
30 &(VkBufferUsageFlags2CreateInfoKHR){
31 .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
32 .usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT_KHR,
33 },
34 .size = device->printf.buffer_size,
35 };
36
37 VkDevice _device = radv_device_to_handle(device);
38 VkResult result = device->vk.dispatch_table.CreateBuffer(_device, &buffer_create_info, NULL, &device->printf.buffer);
39 if (result != VK_SUCCESS)
40 return result;
41
42 VkMemoryRequirements requirements;
43 device->vk.dispatch_table.GetBufferMemoryRequirements(_device, device->printf.buffer, &requirements);
44
45 VkMemoryAllocateInfo alloc_info = {
46 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
47 .allocationSize = requirements.size,
48 .memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
49 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
50 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
51 };
52
53 result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &device->printf.memory);
54 if (result != VK_SUCCESS)
55 return result;
56
57 result = device->vk.dispatch_table.MapMemory(_device, device->printf.memory, 0, VK_WHOLE_SIZE, 0,
58 (void **)&device->printf.data);
59 if (result != VK_SUCCESS)
60 return result;
61
62 result = device->vk.dispatch_table.BindBufferMemory(_device, device->printf.buffer, device->printf.memory, 0);
63 if (result != VK_SUCCESS)
64 return result;
65
66 struct radv_printf_buffer_header *header = device->printf.data;
67 header->offset = sizeof(struct radv_printf_buffer_header);
68 header->size = device->printf.buffer_size;
69
70 VkBufferDeviceAddressInfo addr_info = {
71 .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
72 .buffer = device->printf.buffer,
73 };
74 device->printf.buffer_addr = device->vk.dispatch_table.GetBufferDeviceAddress(_device, &addr_info);
75
76 return VK_SUCCESS;
77 }
78
79 void
radv_printf_data_finish(struct radv_device * device)80 radv_printf_data_finish(struct radv_device *device)
81 {
82 VkDevice _device = radv_device_to_handle(device);
83
84 device->vk.dispatch_table.DestroyBuffer(_device, device->printf.buffer, NULL);
85 if (device->printf.memory)
86 device->vk.dispatch_table.UnmapMemory(_device, device->printf.memory);
87 device->vk.dispatch_table.FreeMemory(_device, device->printf.memory, NULL);
88
89 util_dynarray_foreach (&device->printf.formats, struct radv_printf_format, format)
90 free(format->string);
91
92 util_dynarray_fini(&device->printf.formats);
93 }
94
95 void
radv_build_printf(nir_builder * b,nir_def * cond,const char * format_string,...)96 radv_build_printf(nir_builder *b, nir_def *cond, const char *format_string, ...)
97 {
98 if (!device_ht)
99 return;
100
101 struct radv_device *device = _mesa_hash_table_search(device_ht, b->shader)->data;
102 if (!device->printf.buffer_addr)
103 return;
104
105 struct radv_printf_format format = {0};
106 format.string = strdup(format_string);
107 if (!format.string)
108 return;
109
110 uint32_t format_index = util_dynarray_num_elements(&device->printf.formats, struct radv_printf_format);
111
112 if (cond)
113 nir_push_if(b, cond);
114
115 nir_def *size = nir_imm_int(b, 4);
116
117 va_list arg_list;
118 va_start(arg_list, format_string);
119
120 uint32_t num_args = 0;
121 for (uint32_t i = 0; i < strlen(format_string); i++)
122 if (format_string[i] == '%')
123 num_args++;
124
125 nir_def **args = malloc(num_args * sizeof(nir_def *));
126 nir_def **strides = malloc(num_args * sizeof(nir_def *));
127
128 nir_def *ballot = nir_ballot(b, 1, 64, nir_imm_true(b));
129 nir_def *active_invocation_count = nir_bit_count(b, ballot);
130
131 for (uint32_t i = 0; i < num_args; i++) {
132 nir_def *arg = va_arg(arg_list, nir_def *);
133
134 if (arg->bit_size == 1)
135 arg = nir_b2i32(b, arg);
136
137 args[i] = arg;
138
139 uint32_t arg_size = arg->bit_size == 1 ? 32 : arg->bit_size / 8;
140 format.element_sizes[i] = arg_size;
141
142 nir_update_instr_divergence(b->shader, arg->parent_instr);
143
144 if (arg->divergent) {
145 strides[i] = nir_imul_imm(b, active_invocation_count, arg_size);
146 format.divergence_mask |= BITFIELD_BIT(i);
147 } else {
148 strides[i] = nir_imm_int(b, arg_size);
149 }
150
151 size = nir_iadd(b, size, strides[i]);
152 }
153
154 va_end(arg_list);
155
156 nir_def *offset;
157 nir_def *undef;
158
159 nir_push_if(b, nir_elect(b, 1));
160 {
161 offset = nir_global_atomic(
162 b, 32, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, offset)), size,
163 .atomic_op = nir_atomic_op_iadd);
164 }
165 nir_push_else(b, NULL);
166 {
167 undef = nir_undef(b, 1, 32);
168 }
169 nir_pop_if(b, NULL);
170
171 offset = nir_read_first_invocation(b, nir_if_phi(b, offset, undef));
172
173 nir_def *buffer_size = nir_load_global(
174 b, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, size)), 4, 1, 32);
175
176 nir_push_if(b, nir_ige(b, buffer_size, nir_iadd(b, offset, size)));
177 {
178 nir_def *addr = nir_iadd_imm(b, nir_u2u64(b, offset), device->printf.buffer_addr);
179
180 /* header */
181 nir_store_global(b, addr, 4, nir_ior_imm(b, active_invocation_count, format_index << 16), 1);
182 addr = nir_iadd_imm(b, addr, 4);
183
184 for (uint32_t i = 0; i < num_args; i++) {
185 nir_def *arg = args[i];
186
187 if (arg->divergent) {
188 nir_def *invocation_index = nir_mbcnt_amd(b, ballot, nir_imm_int(b, 0));
189 nir_store_global(
190 b, nir_iadd(b, addr, nir_u2u64(b, nir_imul_imm(b, invocation_index, format.element_sizes[i]))), 4, arg,
191 1);
192 } else {
193 nir_store_global(b, addr, 4, arg, 1);
194 }
195
196 addr = nir_iadd(b, addr, nir_u2u64(b, strides[i]));
197 }
198 }
199 nir_pop_if(b, NULL);
200
201 if (cond)
202 nir_pop_if(b, NULL);
203
204 free(args);
205 free(strides);
206
207 util_dynarray_append(&device->printf.formats, struct radv_printf_format, format);
208 }
209
210 void
radv_dump_printf_data(struct radv_device * device)211 radv_dump_printf_data(struct radv_device *device)
212 {
213 if (!device->printf.data)
214 return;
215
216 device->vk.dispatch_table.DeviceWaitIdle(radv_device_to_handle(device));
217
218 struct radv_printf_buffer_header *header = device->printf.data;
219 uint8_t *data = device->printf.data;
220
221 for (uint32_t offset = sizeof(struct radv_printf_buffer_header); offset < header->offset;) {
222 uint32_t printf_header = *(uint32_t *)&data[offset];
223 offset += sizeof(uint32_t);
224
225 uint32_t format_index = printf_header >> 16;
226 struct radv_printf_format *printf_format =
227 util_dynarray_element(&device->printf.formats, struct radv_printf_format, format_index);
228
229 uint32_t invocation_count = printf_header & 0xFFFF;
230
231 uint32_t num_args = 0;
232 for (uint32_t i = 0; i < strlen(printf_format->string); i++)
233 if (printf_format->string[i] == '%')
234 num_args++;
235
236 char *format = printf_format->string;
237
238 for (uint32_t i = 0; i <= num_args; i++) {
239 size_t spec_pos = util_printf_next_spec_pos(format, 0);
240
241 if (spec_pos == -1) {
242 printf("%s", format);
243 continue;
244 }
245
246 const char *token = util_printf_prev_tok(&format[spec_pos]);
247 char *next_format = &format[spec_pos + 1];
248
249 /* print the part before the format token */
250 if (token != format) {
251 fwrite(format, token - format, 1, stdout);
252 fflush(stdout);
253 }
254
255 char *print_str = strndup(token, next_format - token);
256 /* rebase spec_pos so we can use it with print_str */
257 spec_pos += format - token;
258
259 size_t element_size = printf_format->element_sizes[i];
260 bool is_float = strpbrk(print_str, "fFeEgGaA") != NULL;
261
262 uint32_t lane_count = (printf_format->divergence_mask & BITFIELD_BIT(i)) ? invocation_count : 1;
263 for (uint32_t lane = 0; lane < lane_count; lane++) {
264 switch (element_size) {
265 case 1: {
266 uint8_t v;
267 memcpy(&v, &data[offset], element_size);
268 printf(print_str, v);
269 break;
270 }
271 case 2: {
272 uint16_t v;
273 memcpy(&v, &data[offset], element_size);
274 printf(print_str, v);
275 break;
276 }
277 case 4: {
278 if (is_float) {
279 float v;
280 memcpy(&v, &data[offset], element_size);
281 printf(print_str, v);
282 } else {
283 uint32_t v;
284 memcpy(&v, &data[offset], element_size);
285 printf(print_str, v);
286 }
287 break;
288 }
289 case 8: {
290 if (is_float) {
291 double v;
292 memcpy(&v, &data[offset], element_size);
293 printf(print_str, v);
294 } else {
295 uint64_t v;
296 memcpy(&v, &data[offset], element_size);
297 printf(print_str, v);
298 }
299 break;
300 }
301 default:
302 unreachable("Unsupported data type");
303 }
304
305 if (lane != lane_count - 1)
306 printf(" ");
307
308 offset += element_size;
309 }
310
311 /* rebase format */
312 format = next_format;
313 free(print_str);
314 }
315 }
316
317 header->offset = sizeof(struct radv_printf_buffer_header);
318 }
319
320 void
radv_device_associate_nir(struct radv_device * device,nir_shader * nir)321 radv_device_associate_nir(struct radv_device *device, nir_shader *nir)
322 {
323 if (!device->printf.buffer_addr)
324 return;
325
326 if (!device_ht)
327 device_ht = _mesa_pointer_hash_table_create(NULL);
328
329 _mesa_hash_table_insert(device_ht, nir, device);
330 }
331