1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27
28 #include "anv_private.h"
29 #include "vk_util.h"
30
31 #include "perf/intel_perf.h"
32 #include "perf/intel_perf_mdapi.h"
33
34 #include "util/mesa-sha1.h"
35
36 void
anv_physical_device_init_perf(struct anv_physical_device * device,int fd)37 anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
38 {
39 device->perf = NULL;
40
41 struct intel_perf_config *perf = intel_perf_new(NULL);
42
43 intel_perf_init_metrics(perf, &device->info, fd,
44 false /* pipeline statistics */,
45 true /* register snapshots */);
46
47 if (!perf->n_queries)
48 goto err;
49
50 /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
51 * perf revision 2.
52 */
53 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
54 if (!intel_perf_has_hold_preemption(perf))
55 goto err;
56 }
57
58 device->perf = perf;
59
60 /* Compute the number of commands we need to implement a performance
61 * query.
62 */
63 const struct intel_perf_query_field_layout *layout = &perf->query_layout;
64 device->n_perf_query_commands = 0;
65 for (uint32_t f = 0; f < layout->n_fields; f++) {
66 struct intel_perf_query_field *field = &layout->fields[f];
67
68 switch (field->type) {
69 case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
70 device->n_perf_query_commands++;
71 break;
72 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
73 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
74 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
75 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
76 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
77 device->n_perf_query_commands += field->size / 4;
78 break;
79 default:
80 unreachable("Unhandled register type");
81 }
82 }
83 device->n_perf_query_commands *= 2; /* Begin & End */
84 device->n_perf_query_commands += 1; /* availability */
85
86 return;
87
88 err:
89 ralloc_free(perf);
90 }
91
92 void
anv_device_perf_init(struct anv_device * device)93 anv_device_perf_init(struct anv_device *device)
94 {
95 device->perf_fd = -1;
96 }
97
98 static int
anv_device_perf_open(struct anv_device * device,uint64_t metric_id)99 anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
100 {
101 uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
102 struct drm_i915_perf_open_param param;
103 int p = 0, stream_fd;
104
105 properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
106 properties[p++] = true;
107
108 properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
109 properties[p++] = metric_id;
110
111 properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
112 properties[p++] =
113 device->info->verx10 >= 125 ?
114 I915_OA_FORMAT_A24u40_A14u32_B8_C8 :
115 I915_OA_FORMAT_A32u40_A4u32_B8_C8;
116
117 properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
118 properties[p++] = 31; /* slowest sampling period */
119
120 properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
121 properties[p++] = device->context_id;
122
123 properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
124 properties[p++] = true;
125
126 /* If global SSEU is available, pin it to the default. This will ensure on
127 * Gfx11 for instance we use the full EU array. Initially when perf was
128 * enabled we would use only half on Gfx11 because of functional
129 * requirements.
130 *
131 * Temporary disable this option on Gfx12.5+, kernel doesn't appear to
132 * support it.
133 */
134 if (intel_perf_has_global_sseu(device->physical->perf) &&
135 device->info->verx10 < 125) {
136 properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
137 properties[p++] = (uintptr_t) &device->physical->perf->sseu;
138 }
139
140 memset(¶m, 0, sizeof(param));
141 param.flags = 0;
142 param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
143 param.properties_ptr = (uintptr_t)properties;
144 param.num_properties = p / 2;
145
146 stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
147 return stream_fd;
148 }
149
150 /* VK_INTEL_performance_query */
anv_InitializePerformanceApiINTEL(VkDevice _device,const VkInitializePerformanceApiInfoINTEL * pInitializeInfo)151 VkResult anv_InitializePerformanceApiINTEL(
152 VkDevice _device,
153 const VkInitializePerformanceApiInfoINTEL* pInitializeInfo)
154 {
155 ANV_FROM_HANDLE(anv_device, device, _device);
156
157 if (!device->physical->perf)
158 return VK_ERROR_EXTENSION_NOT_PRESENT;
159
160 /* Not much to do here */
161 return VK_SUCCESS;
162 }
163
anv_GetPerformanceParameterINTEL(VkDevice _device,VkPerformanceParameterTypeINTEL parameter,VkPerformanceValueINTEL * pValue)164 VkResult anv_GetPerformanceParameterINTEL(
165 VkDevice _device,
166 VkPerformanceParameterTypeINTEL parameter,
167 VkPerformanceValueINTEL* pValue)
168 {
169 ANV_FROM_HANDLE(anv_device, device, _device);
170
171 if (!device->physical->perf)
172 return VK_ERROR_EXTENSION_NOT_PRESENT;
173
174 VkResult result = VK_SUCCESS;
175 switch (parameter) {
176 case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
177 pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
178 pValue->data.valueBool = VK_TRUE;
179 break;
180
181 case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
182 pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
183 pValue->data.value32 = 25;
184 break;
185
186 default:
187 result = VK_ERROR_FEATURE_NOT_PRESENT;
188 break;
189 }
190
191 return result;
192 }
193
anv_CmdSetPerformanceMarkerINTEL(VkCommandBuffer commandBuffer,const VkPerformanceMarkerInfoINTEL * pMarkerInfo)194 VkResult anv_CmdSetPerformanceMarkerINTEL(
195 VkCommandBuffer commandBuffer,
196 const VkPerformanceMarkerInfoINTEL* pMarkerInfo)
197 {
198 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
199
200 cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
201
202 return VK_SUCCESS;
203 }
204
anv_AcquirePerformanceConfigurationINTEL(VkDevice _device,const VkPerformanceConfigurationAcquireInfoINTEL * pAcquireInfo,VkPerformanceConfigurationINTEL * pConfiguration)205 VkResult anv_AcquirePerformanceConfigurationINTEL(
206 VkDevice _device,
207 const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
208 VkPerformanceConfigurationINTEL* pConfiguration)
209 {
210 ANV_FROM_HANDLE(anv_device, device, _device);
211 struct anv_performance_configuration_intel *config;
212
213 config = vk_object_alloc(&device->vk, NULL, sizeof(*config),
214 VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
215 if (!config)
216 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
217
218 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
219 config->register_config =
220 intel_perf_load_configuration(device->physical->perf, device->fd,
221 INTEL_PERF_QUERY_GUID_MDAPI);
222 if (!config->register_config) {
223 vk_object_free(&device->vk, NULL, config);
224 return VK_INCOMPLETE;
225 }
226
227 int ret =
228 intel_perf_store_configuration(device->physical->perf, device->fd,
229 config->register_config, NULL /* guid */);
230 if (ret < 0) {
231 ralloc_free(config->register_config);
232 vk_object_free(&device->vk, NULL, config);
233 return VK_INCOMPLETE;
234 }
235
236 config->config_id = ret;
237 }
238
239 *pConfiguration = anv_performance_configuration_intel_to_handle(config);
240
241 return VK_SUCCESS;
242 }
243
anv_ReleasePerformanceConfigurationINTEL(VkDevice _device,VkPerformanceConfigurationINTEL _configuration)244 VkResult anv_ReleasePerformanceConfigurationINTEL(
245 VkDevice _device,
246 VkPerformanceConfigurationINTEL _configuration)
247 {
248 ANV_FROM_HANDLE(anv_device, device, _device);
249 ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
250
251 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG))
252 intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id);
253
254 ralloc_free(config->register_config);
255
256 vk_object_free(&device->vk, NULL, config);
257
258 return VK_SUCCESS;
259 }
260
anv_QueueSetPerformanceConfigurationINTEL(VkQueue _queue,VkPerformanceConfigurationINTEL _configuration)261 VkResult anv_QueueSetPerformanceConfigurationINTEL(
262 VkQueue _queue,
263 VkPerformanceConfigurationINTEL _configuration)
264 {
265 ANV_FROM_HANDLE(anv_queue, queue, _queue);
266 ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
267 struct anv_device *device = queue->device;
268
269 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
270 if (device->perf_fd < 0) {
271 device->perf_fd = anv_device_perf_open(device, config->config_id);
272 if (device->perf_fd < 0)
273 return VK_ERROR_INITIALIZATION_FAILED;
274 } else {
275 int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
276 (void *)(uintptr_t) config->config_id);
277 if (ret < 0)
278 return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
279 }
280 }
281
282 return VK_SUCCESS;
283 }
284
anv_UninitializePerformanceApiINTEL(VkDevice _device)285 void anv_UninitializePerformanceApiINTEL(
286 VkDevice _device)
287 {
288 ANV_FROM_HANDLE(anv_device, device, _device);
289
290 if (device->perf_fd >= 0) {
291 close(device->perf_fd);
292 device->perf_fd = -1;
293 }
294 }
295
296 /* VK_KHR_performance_query */
297 static const VkPerformanceCounterUnitKHR
298 intel_perf_counter_unit_to_vk_unit[] = {
299 [INTEL_PERF_COUNTER_UNITS_BYTES] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
300 [INTEL_PERF_COUNTER_UNITS_HZ] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
301 [INTEL_PERF_COUNTER_UNITS_NS] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
302 [INTEL_PERF_COUNTER_UNITS_US] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
303 [INTEL_PERF_COUNTER_UNITS_PIXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
304 [INTEL_PERF_COUNTER_UNITS_TEXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
305 [INTEL_PERF_COUNTER_UNITS_THREADS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
306 [INTEL_PERF_COUNTER_UNITS_PERCENT] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
307 [INTEL_PERF_COUNTER_UNITS_MESSAGES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
308 [INTEL_PERF_COUNTER_UNITS_NUMBER] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
309 [INTEL_PERF_COUNTER_UNITS_CYCLES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
310 [INTEL_PERF_COUNTER_UNITS_EVENTS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
311 [INTEL_PERF_COUNTER_UNITS_UTILIZATION] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
312 [INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
313 [INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
314 [INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
315 [INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
316 };
317
318 static const VkPerformanceCounterStorageKHR
319 intel_perf_counter_data_type_to_vk_storage[] = {
320 [INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
321 [INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
322 [INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
323 [INTEL_PERF_COUNTER_DATA_TYPE_FLOAT] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
324 [INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
325 };
326
anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(VkPhysicalDevice physicalDevice,uint32_t queueFamilyIndex,uint32_t * pCounterCount,VkPerformanceCounterKHR * pCounters,VkPerformanceCounterDescriptionKHR * pCounterDescriptions)327 VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
328 VkPhysicalDevice physicalDevice,
329 uint32_t queueFamilyIndex,
330 uint32_t* pCounterCount,
331 VkPerformanceCounterKHR* pCounters,
332 VkPerformanceCounterDescriptionKHR* pCounterDescriptions)
333 {
334 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
335 struct intel_perf_config *perf = pdevice->perf;
336
337 uint32_t desc_count = *pCounterCount;
338
339 VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR, out, pCounters, pCounterCount);
340 VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR, out_desc,
341 pCounterDescriptions, &desc_count);
342
343 /* We cannot support performance queries on anything other than RCS,
344 * because the MI_REPORT_PERF_COUNT command is not available on other
345 * engines.
346 */
347 struct anv_queue_family *queue_family =
348 &pdevice->queue.families[queueFamilyIndex];
349 if (queue_family->engine_class != INTEL_ENGINE_CLASS_RENDER)
350 return vk_outarray_status(&out);
351
352 for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
353 const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;
354
355 vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) {
356 counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];
357 counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
358 counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];
359
360 unsigned char sha1_result[20];
361 _mesa_sha1_compute(intel_counter->symbol_name,
362 strlen(intel_counter->symbol_name),
363 sha1_result);
364 memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
365 }
366
367 vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, &out_desc, desc) {
368 desc->flags = 0; /* None so far. */
369 snprintf(desc->name, sizeof(desc->name), "%s",
370 INTEL_DEBUG(DEBUG_PERF_SYMBOL_NAMES) ?
371 intel_counter->symbol_name :
372 intel_counter->name);
373 snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
374 snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
375 }
376 }
377
378 return vk_outarray_status(&out);
379 }
380
anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(VkPhysicalDevice physicalDevice,const VkQueryPoolPerformanceCreateInfoKHR * pPerformanceQueryCreateInfo,uint32_t * pNumPasses)381 void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
382 VkPhysicalDevice physicalDevice,
383 const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo,
384 uint32_t* pNumPasses)
385 {
386 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
387 struct intel_perf_config *perf = pdevice->perf;
388
389 if (!perf) {
390 *pNumPasses = 0;
391 return;
392 }
393
394 *pNumPasses = intel_perf_get_n_passes(perf,
395 pPerformanceQueryCreateInfo->pCounterIndices,
396 pPerformanceQueryCreateInfo->counterIndexCount,
397 NULL);
398 }
399
anv_AcquireProfilingLockKHR(VkDevice _device,const VkAcquireProfilingLockInfoKHR * pInfo)400 VkResult anv_AcquireProfilingLockKHR(
401 VkDevice _device,
402 const VkAcquireProfilingLockInfoKHR* pInfo)
403 {
404 ANV_FROM_HANDLE(anv_device, device, _device);
405 struct intel_perf_config *perf = device->physical->perf;
406 struct intel_perf_query_info *first_metric_set = &perf->queries[0];
407 int fd = -1;
408
409 assert(device->perf_fd == -1);
410
411 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
412 fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
413 if (fd < 0)
414 return VK_TIMEOUT;
415 }
416
417 device->perf_fd = fd;
418 return VK_SUCCESS;
419 }
420
anv_ReleaseProfilingLockKHR(VkDevice _device)421 void anv_ReleaseProfilingLockKHR(
422 VkDevice _device)
423 {
424 ANV_FROM_HANDLE(anv_device, device, _device);
425
426 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
427 assert(device->perf_fd >= 0);
428 close(device->perf_fd);
429 }
430 device->perf_fd = -1;
431 }
432
433 void
anv_perf_write_pass_results(struct intel_perf_config * perf,struct anv_query_pool * pool,uint32_t pass,const struct intel_perf_query_result * accumulated_results,union VkPerformanceCounterResultKHR * results)434 anv_perf_write_pass_results(struct intel_perf_config *perf,
435 struct anv_query_pool *pool, uint32_t pass,
436 const struct intel_perf_query_result *accumulated_results,
437 union VkPerformanceCounterResultKHR *results)
438 {
439 const struct intel_perf_query_info *query = pool->pass_query[pass];
440
441 for (uint32_t c = 0; c < pool->n_counters; c++) {
442 const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
443
444 if (counter_pass->query != query)
445 continue;
446
447 switch (pool->pass_query[pass]->kind) {
448 case INTEL_PERF_QUERY_TYPE_PIPELINE: {
449 assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
450 uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
451 results[c].uint64 = accumulated_results->accumulator[accu_offset];
452 break;
453 }
454
455 case INTEL_PERF_QUERY_TYPE_OA:
456 case INTEL_PERF_QUERY_TYPE_RAW:
457 switch (counter_pass->counter->data_type) {
458 case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
459 results[c].uint64 =
460 counter_pass->counter->oa_counter_read_uint64(perf,
461 counter_pass->query,
462 accumulated_results);
463 break;
464 case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
465 results[c].float32 =
466 counter_pass->counter->oa_counter_read_float(perf,
467 counter_pass->query,
468 accumulated_results);
469 break;
470 default:
471 /* So far we aren't using uint32, double or bool32... */
472 unreachable("unexpected counter data type");
473 }
474 break;
475
476 default:
477 unreachable("invalid query type");
478 }
479
480 /* The Vulkan extension only has nanoseconds as a unit */
481 if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) {
482 assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
483 results[c].uint64 *= 1000;
484 }
485 }
486 }
487