1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27
28 #include "anv_private.h"
29 #include "vk_util.h"
30
31 #include "perf/intel_perf.h"
32 #include "perf/intel_perf_mdapi.h"
33
34 #include "util/mesa-sha1.h"
35
36 void
anv_physical_device_init_perf(struct anv_physical_device * device,int fd)37 anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
38 {
39 const struct intel_device_info *devinfo = &device->info;
40
41 device->perf = NULL;
42
43 /* We need self modifying batches. The i915 parser prevents it on
44 * Gfx7.5 :( maybe one day.
45 */
46 if (devinfo->ver < 8)
47 return;
48
49 struct intel_perf_config *perf = intel_perf_new(NULL);
50
51 intel_perf_init_metrics(perf, &device->info, fd,
52 false /* pipeline statistics */,
53 true /* register snapshots */);
54
55 if (!perf->n_queries)
56 goto err;
57
58 /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
59 * perf revision 2.
60 */
61 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
62 if (!intel_perf_has_hold_preemption(perf))
63 goto err;
64 }
65
66 device->perf = perf;
67
68 /* Compute the number of commands we need to implement a performance
69 * query.
70 */
71 const struct intel_perf_query_field_layout *layout = &perf->query_layout;
72 device->n_perf_query_commands = 0;
73 for (uint32_t f = 0; f < layout->n_fields; f++) {
74 struct intel_perf_query_field *field = &layout->fields[f];
75
76 switch (field->type) {
77 case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
78 device->n_perf_query_commands++;
79 break;
80 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
81 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
82 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
83 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
84 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
85 device->n_perf_query_commands += field->size / 4;
86 break;
87 default:
88 unreachable("Unhandled register type");
89 }
90 }
91 device->n_perf_query_commands *= 2; /* Begin & End */
92 device->n_perf_query_commands += 1; /* availability */
93
94 return;
95
96 err:
97 ralloc_free(perf);
98 }
99
100 void
anv_device_perf_init(struct anv_device * device)101 anv_device_perf_init(struct anv_device *device)
102 {
103 device->perf_fd = -1;
104 }
105
106 static int
anv_device_perf_open(struct anv_device * device,uint64_t metric_id)107 anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
108 {
109 uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
110 struct drm_i915_perf_open_param param;
111 int p = 0, stream_fd;
112
113 properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
114 properties[p++] = true;
115
116 properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
117 properties[p++] = metric_id;
118
119 properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
120 properties[p++] = device->info->ver >= 8 ?
121 I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
122 I915_OA_FORMAT_A45_B8_C8;
123
124 properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
125 properties[p++] = 31; /* slowest sampling period */
126
127 properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
128 properties[p++] = device->context_id;
129
130 properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
131 properties[p++] = true;
132
133 /* If global SSEU is available, pin it to the default. This will ensure on
134 * Gfx11 for instance we use the full EU array. Initially when perf was
135 * enabled we would use only half on Gfx11 because of functional
136 * requirements.
137 *
138 * Temporary disable this option on Gfx12.5+, kernel doesn't appear to
139 * support it.
140 */
141 if (intel_perf_has_global_sseu(device->physical->perf)) {
142 properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
143 properties[p++] = (uintptr_t) &device->physical->perf->sseu;
144 }
145
146 memset(¶m, 0, sizeof(param));
147 param.flags = 0;
148 param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
149 param.properties_ptr = (uintptr_t)properties;
150 param.num_properties = p / 2;
151
152 stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
153 return stream_fd;
154 }
155
156 /* VK_INTEL_performance_query */
anv_InitializePerformanceApiINTEL(VkDevice _device,const VkInitializePerformanceApiInfoINTEL * pInitializeInfo)157 VkResult anv_InitializePerformanceApiINTEL(
158 VkDevice _device,
159 const VkInitializePerformanceApiInfoINTEL* pInitializeInfo)
160 {
161 ANV_FROM_HANDLE(anv_device, device, _device);
162
163 if (!device->physical->perf)
164 return VK_ERROR_EXTENSION_NOT_PRESENT;
165
166 /* Not much to do here */
167 return VK_SUCCESS;
168 }
169
anv_GetPerformanceParameterINTEL(VkDevice _device,VkPerformanceParameterTypeINTEL parameter,VkPerformanceValueINTEL * pValue)170 VkResult anv_GetPerformanceParameterINTEL(
171 VkDevice _device,
172 VkPerformanceParameterTypeINTEL parameter,
173 VkPerformanceValueINTEL* pValue)
174 {
175 ANV_FROM_HANDLE(anv_device, device, _device);
176
177 if (!device->physical->perf)
178 return VK_ERROR_EXTENSION_NOT_PRESENT;
179
180 VkResult result = VK_SUCCESS;
181 switch (parameter) {
182 case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
183 pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
184 pValue->data.valueBool = VK_TRUE;
185 break;
186
187 case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
188 pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
189 pValue->data.value32 = 25;
190 break;
191
192 default:
193 result = VK_ERROR_FEATURE_NOT_PRESENT;
194 break;
195 }
196
197 return result;
198 }
199
anv_CmdSetPerformanceMarkerINTEL(VkCommandBuffer commandBuffer,const VkPerformanceMarkerInfoINTEL * pMarkerInfo)200 VkResult anv_CmdSetPerformanceMarkerINTEL(
201 VkCommandBuffer commandBuffer,
202 const VkPerformanceMarkerInfoINTEL* pMarkerInfo)
203 {
204 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
205
206 cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
207
208 return VK_SUCCESS;
209 }
210
anv_AcquirePerformanceConfigurationINTEL(VkDevice _device,const VkPerformanceConfigurationAcquireInfoINTEL * pAcquireInfo,VkPerformanceConfigurationINTEL * pConfiguration)211 VkResult anv_AcquirePerformanceConfigurationINTEL(
212 VkDevice _device,
213 const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
214 VkPerformanceConfigurationINTEL* pConfiguration)
215 {
216 ANV_FROM_HANDLE(anv_device, device, _device);
217 struct anv_performance_configuration_intel *config;
218
219 config = vk_object_alloc(&device->vk, NULL, sizeof(*config),
220 VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
221 if (!config)
222 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
223
224 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
225 config->register_config =
226 intel_perf_load_configuration(device->physical->perf, device->fd,
227 INTEL_PERF_QUERY_GUID_MDAPI);
228 if (!config->register_config) {
229 vk_object_free(&device->vk, NULL, config);
230 return VK_INCOMPLETE;
231 }
232
233 int ret =
234 intel_perf_store_configuration(device->physical->perf, device->fd,
235 config->register_config, NULL /* guid */);
236 if (ret < 0) {
237 ralloc_free(config->register_config);
238 vk_object_free(&device->vk, NULL, config);
239 return VK_INCOMPLETE;
240 }
241
242 config->config_id = ret;
243 }
244
245 *pConfiguration = anv_performance_configuration_intel_to_handle(config);
246
247 return VK_SUCCESS;
248 }
249
anv_ReleasePerformanceConfigurationINTEL(VkDevice _device,VkPerformanceConfigurationINTEL _configuration)250 VkResult anv_ReleasePerformanceConfigurationINTEL(
251 VkDevice _device,
252 VkPerformanceConfigurationINTEL _configuration)
253 {
254 ANV_FROM_HANDLE(anv_device, device, _device);
255 ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
256
257 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG))
258 intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id);
259
260 ralloc_free(config->register_config);
261
262 vk_object_free(&device->vk, NULL, config);
263
264 return VK_SUCCESS;
265 }
266
anv_QueueSetPerformanceConfigurationINTEL(VkQueue _queue,VkPerformanceConfigurationINTEL _configuration)267 VkResult anv_QueueSetPerformanceConfigurationINTEL(
268 VkQueue _queue,
269 VkPerformanceConfigurationINTEL _configuration)
270 {
271 ANV_FROM_HANDLE(anv_queue, queue, _queue);
272 ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
273 struct anv_device *device = queue->device;
274
275 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
276 if (device->perf_fd < 0) {
277 device->perf_fd = anv_device_perf_open(device, config->config_id);
278 if (device->perf_fd < 0)
279 return VK_ERROR_INITIALIZATION_FAILED;
280 } else {
281 int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
282 (void *)(uintptr_t) config->config_id);
283 if (ret < 0)
284 return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
285 }
286 }
287
288 return VK_SUCCESS;
289 }
290
anv_UninitializePerformanceApiINTEL(VkDevice _device)291 void anv_UninitializePerformanceApiINTEL(
292 VkDevice _device)
293 {
294 ANV_FROM_HANDLE(anv_device, device, _device);
295
296 if (device->perf_fd >= 0) {
297 close(device->perf_fd);
298 device->perf_fd = -1;
299 }
300 }
301
302 /* VK_KHR_performance_query */
303 static const VkPerformanceCounterUnitKHR
304 intel_perf_counter_unit_to_vk_unit[] = {
305 [INTEL_PERF_COUNTER_UNITS_BYTES] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
306 [INTEL_PERF_COUNTER_UNITS_HZ] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
307 [INTEL_PERF_COUNTER_UNITS_NS] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
308 [INTEL_PERF_COUNTER_UNITS_US] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
309 [INTEL_PERF_COUNTER_UNITS_PIXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
310 [INTEL_PERF_COUNTER_UNITS_TEXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
311 [INTEL_PERF_COUNTER_UNITS_THREADS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
312 [INTEL_PERF_COUNTER_UNITS_PERCENT] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
313 [INTEL_PERF_COUNTER_UNITS_MESSAGES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
314 [INTEL_PERF_COUNTER_UNITS_NUMBER] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
315 [INTEL_PERF_COUNTER_UNITS_CYCLES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
316 [INTEL_PERF_COUNTER_UNITS_EVENTS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
317 [INTEL_PERF_COUNTER_UNITS_UTILIZATION] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
318 [INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
319 [INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
320 [INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
321 [INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
322 };
323
324 static const VkPerformanceCounterStorageKHR
325 intel_perf_counter_data_type_to_vk_storage[] = {
326 [INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
327 [INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
328 [INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
329 [INTEL_PERF_COUNTER_DATA_TYPE_FLOAT] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
330 [INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
331 };
332
anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(VkPhysicalDevice physicalDevice,uint32_t queueFamilyIndex,uint32_t * pCounterCount,VkPerformanceCounterKHR * pCounters,VkPerformanceCounterDescriptionKHR * pCounterDescriptions)333 VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
334 VkPhysicalDevice physicalDevice,
335 uint32_t queueFamilyIndex,
336 uint32_t* pCounterCount,
337 VkPerformanceCounterKHR* pCounters,
338 VkPerformanceCounterDescriptionKHR* pCounterDescriptions)
339 {
340 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
341 struct intel_perf_config *perf = pdevice->perf;
342
343 uint32_t desc_count = *pCounterCount;
344
345 VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR, out, pCounters, pCounterCount);
346 VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR, out_desc,
347 pCounterDescriptions, &desc_count);
348
349 /* We cannot support performance queries on anything other than RCS,
350 * because the MI_REPORT_PERF_COUNT command is not available on other
351 * engines.
352 */
353 struct anv_queue_family *queue_family =
354 &pdevice->queue.families[queueFamilyIndex];
355 if (queue_family->engine_class != INTEL_ENGINE_CLASS_RENDER)
356 return vk_outarray_status(&out);
357
358 for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
359 const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;
360
361 vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) {
362 counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];
363 counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
364 counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];
365
366 unsigned char sha1_result[20];
367 _mesa_sha1_compute(intel_counter->symbol_name,
368 strlen(intel_counter->symbol_name),
369 sha1_result);
370 memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
371 }
372
373 vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, &out_desc, desc) {
374 desc->flags = 0; /* None so far. */
375 snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name);
376 snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
377 snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
378 }
379 }
380
381 return vk_outarray_status(&out);
382 }
383
anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(VkPhysicalDevice physicalDevice,const VkQueryPoolPerformanceCreateInfoKHR * pPerformanceQueryCreateInfo,uint32_t * pNumPasses)384 void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
385 VkPhysicalDevice physicalDevice,
386 const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo,
387 uint32_t* pNumPasses)
388 {
389 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
390 struct intel_perf_config *perf = pdevice->perf;
391
392 if (!perf) {
393 *pNumPasses = 0;
394 return;
395 }
396
397 *pNumPasses = intel_perf_get_n_passes(perf,
398 pPerformanceQueryCreateInfo->pCounterIndices,
399 pPerformanceQueryCreateInfo->counterIndexCount,
400 NULL);
401 }
402
anv_AcquireProfilingLockKHR(VkDevice _device,const VkAcquireProfilingLockInfoKHR * pInfo)403 VkResult anv_AcquireProfilingLockKHR(
404 VkDevice _device,
405 const VkAcquireProfilingLockInfoKHR* pInfo)
406 {
407 ANV_FROM_HANDLE(anv_device, device, _device);
408 struct intel_perf_config *perf = device->physical->perf;
409 struct intel_perf_query_info *first_metric_set = &perf->queries[0];
410 int fd = -1;
411
412 assert(device->perf_fd == -1);
413
414 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
415 fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
416 if (fd < 0)
417 return VK_TIMEOUT;
418 }
419
420 device->perf_fd = fd;
421 return VK_SUCCESS;
422 }
423
anv_ReleaseProfilingLockKHR(VkDevice _device)424 void anv_ReleaseProfilingLockKHR(
425 VkDevice _device)
426 {
427 ANV_FROM_HANDLE(anv_device, device, _device);
428
429 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
430 assert(device->perf_fd >= 0);
431 close(device->perf_fd);
432 }
433 device->perf_fd = -1;
434 }
435
436 void
anv_perf_write_pass_results(struct intel_perf_config * perf,struct anv_query_pool * pool,uint32_t pass,const struct intel_perf_query_result * accumulated_results,union VkPerformanceCounterResultKHR * results)437 anv_perf_write_pass_results(struct intel_perf_config *perf,
438 struct anv_query_pool *pool, uint32_t pass,
439 const struct intel_perf_query_result *accumulated_results,
440 union VkPerformanceCounterResultKHR *results)
441 {
442 const struct intel_perf_query_info *query = pool->pass_query[pass];
443
444 for (uint32_t c = 0; c < pool->n_counters; c++) {
445 const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
446
447 if (counter_pass->query != query)
448 continue;
449
450 switch (pool->pass_query[pass]->kind) {
451 case INTEL_PERF_QUERY_TYPE_PIPELINE: {
452 assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
453 uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
454 results[c].uint64 = accumulated_results->accumulator[accu_offset];
455 break;
456 }
457
458 case INTEL_PERF_QUERY_TYPE_OA:
459 case INTEL_PERF_QUERY_TYPE_RAW:
460 switch (counter_pass->counter->data_type) {
461 case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
462 results[c].uint64 =
463 counter_pass->counter->oa_counter_read_uint64(perf,
464 counter_pass->query,
465 accumulated_results);
466 break;
467 case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
468 results[c].float32 =
469 counter_pass->counter->oa_counter_read_float(perf,
470 counter_pass->query,
471 accumulated_results);
472 break;
473 default:
474 /* So far we aren't using uint32, double or bool32... */
475 unreachable("unexpected counter data type");
476 }
477 break;
478
479 default:
480 unreachable("invalid query type");
481 }
482
483 /* The Vulkan extension only has nanoseconds as a unit */
484 if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) {
485 assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
486 results[c].uint64 *= 1000;
487 }
488 }
489 }
490