1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "intel_perf.h"
25 #include "intel_perf_mdapi.h"
26 #include "intel_perf_private.h"
27 #include "intel_perf_regs.h"
28
29 #include "dev/intel_device_info.h"
30
31 #include <drm-uapi/i915_drm.h>
32
33
34 int
intel_perf_query_result_write_mdapi(void * data,uint32_t data_size,const struct intel_device_info * devinfo,const struct intel_perf_query_info * query,const struct intel_perf_query_result * result)35 intel_perf_query_result_write_mdapi(void *data, uint32_t data_size,
36 const struct intel_device_info *devinfo,
37 const struct intel_perf_query_info *query,
38 const struct intel_perf_query_result *result)
39 {
40 switch (devinfo->ver) {
41 case 7: {
42 struct gfx7_mdapi_metrics *mdapi_data = (struct gfx7_mdapi_metrics *) data;
43
44 if (data_size < sizeof(*mdapi_data))
45 return 0;
46
47 assert(devinfo->is_haswell);
48
49 for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)
50 mdapi_data->ACounters[i] = result->accumulator[1 + i];
51
52 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {
53 mdapi_data->NOACounters[i] =
54 result->accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
55 }
56
57 mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
58 mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
59
60 mdapi_data->ReportsCount = result->reports_accumulated;
61 mdapi_data->TotalTime =
62 intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
63 mdapi_data->CoreFrequency = result->gt_frequency[1];
64 mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
65 mdapi_data->SplitOccured = result->query_disjoint;
66 return sizeof(*mdapi_data);
67 }
68 case 8: {
69 struct gfx8_mdapi_metrics *mdapi_data = (struct gfx8_mdapi_metrics *) data;
70
71 if (data_size < sizeof(*mdapi_data))
72 return 0;
73
74 for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
75 mdapi_data->OaCntr[i] = result->accumulator[2 + i];
76 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
77 mdapi_data->NoaCntr[i] =
78 result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
79 }
80
81 mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
82 mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
83
84 mdapi_data->ReportId = result->hw_id;
85 mdapi_data->ReportsCount = result->reports_accumulated;
86 mdapi_data->TotalTime =
87 intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
88 mdapi_data->BeginTimestamp =
89 intel_device_info_timebase_scale(devinfo, result->begin_timestamp);
90 mdapi_data->GPUTicks = result->accumulator[1];
91 mdapi_data->CoreFrequency = result->gt_frequency[1];
92 mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
93 mdapi_data->SliceFrequency =
94 (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
95 mdapi_data->UnsliceFrequency =
96 (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
97 mdapi_data->SplitOccured = result->query_disjoint;
98 return sizeof(*mdapi_data);
99 }
100 case 9:
101 case 11:
102 case 12:{
103 struct gfx9_mdapi_metrics *mdapi_data = (struct gfx9_mdapi_metrics *) data;
104
105 if (data_size < sizeof(*mdapi_data))
106 return 0;
107
108 for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
109 mdapi_data->OaCntr[i] = result->accumulator[2 + i];
110 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
111 mdapi_data->NoaCntr[i] =
112 result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
113 }
114
115 mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
116 mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
117
118 mdapi_data->ReportId = result->hw_id;
119 mdapi_data->ReportsCount = result->reports_accumulated;
120 mdapi_data->TotalTime =
121 intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
122 mdapi_data->BeginTimestamp =
123 intel_device_info_timebase_scale(devinfo, result->begin_timestamp);
124 mdapi_data->GPUTicks = result->accumulator[1];
125 mdapi_data->CoreFrequency = result->gt_frequency[1];
126 mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
127 mdapi_data->SliceFrequency =
128 (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
129 mdapi_data->UnsliceFrequency =
130 (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
131 mdapi_data->SplitOccured = result->query_disjoint;
132 return sizeof(*mdapi_data);
133 }
134 default:
135 unreachable("unexpected gen");
136 }
137 }
138
139 void
intel_perf_register_mdapi_statistic_query(struct intel_perf_config * perf_cfg,const struct intel_device_info * devinfo)140 intel_perf_register_mdapi_statistic_query(struct intel_perf_config *perf_cfg,
141 const struct intel_device_info *devinfo)
142 {
143 if (!(devinfo->ver >= 7 && devinfo->ver <= 12))
144 return;
145
146 struct intel_perf_query_info *query =
147 intel_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
148
149 query->kind = INTEL_PERF_QUERY_TYPE_PIPELINE;
150 query->name = "Intel_Raw_Pipeline_Statistics_Query";
151
152 /* The order has to match mdapi_pipeline_metrics. */
153 intel_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
154 "N vertices submitted");
155 intel_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
156 "N primitives submitted");
157 intel_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
158 "N vertex shader invocations");
159 intel_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
160 "N geometry shader invocations");
161 intel_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
162 "N geometry shader primitives emitted");
163 intel_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
164 "N primitives entering clipping");
165 intel_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
166 "N primitives leaving clipping");
167 if (devinfo->is_haswell || devinfo->ver == 8) {
168 intel_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
169 "N fragment shader invocations",
170 "N fragment shader invocations");
171 } else {
172 intel_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
173 "N fragment shader invocations");
174 }
175 intel_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
176 "N TCS shader invocations");
177 intel_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
178 "N TES shader invocations");
179 if (devinfo->ver >= 7) {
180 intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
181 "N compute shader invocations");
182 }
183
184 if (devinfo->ver >= 10) {
185 /* Reuse existing CS invocation register until we can expose this new
186 * one.
187 */
188 intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
189 "Reserved1");
190 }
191
192 query->data_size = sizeof(uint64_t) * query->n_counters;
193 }
194
195 static void
fill_mdapi_perf_query_counter(struct intel_perf_query_info * query,const char * name,uint32_t data_offset,uint32_t data_size,enum intel_perf_counter_data_type data_type)196 fill_mdapi_perf_query_counter(struct intel_perf_query_info *query,
197 const char *name,
198 uint32_t data_offset,
199 uint32_t data_size,
200 enum intel_perf_counter_data_type data_type)
201 {
202 struct intel_perf_query_counter *counter = &query->counters[query->n_counters];
203
204 assert(query->n_counters <= query->max_counters);
205
206 counter->name = name;
207 counter->desc = "Raw counter value";
208 counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
209 counter->data_type = data_type;
210 counter->offset = data_offset;
211
212 query->n_counters++;
213
214 assert(counter->offset + intel_perf_query_counter_get_size(counter) <= query->data_size);
215 }
216
217 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
218 fill_mdapi_perf_query_counter(query, #field_name, \
219 (uint8_t *) &struct_name.field_name - \
220 (uint8_t *) &struct_name, \
221 sizeof(struct_name.field_name), \
222 INTEL_PERF_COUNTER_DATA_TYPE_##type_name)
223 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
224 fill_mdapi_perf_query_counter(query, \
225 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
226 (uint8_t *) &struct_name.field_name[idx] - \
227 (uint8_t *) &struct_name, \
228 sizeof(struct_name.field_name[0]), \
229 INTEL_PERF_COUNTER_DATA_TYPE_##type_name)
230
231 void
intel_perf_register_mdapi_oa_query(struct intel_perf_config * perf,const struct intel_device_info * devinfo)232 intel_perf_register_mdapi_oa_query(struct intel_perf_config *perf,
233 const struct intel_device_info *devinfo)
234 {
235 struct intel_perf_query_info *query = NULL;
236
237 /* MDAPI requires different structures for pretty much every generation
238 * (right now we have definitions for gen 7 to 12).
239 */
240 if (!(devinfo->ver >= 7 && devinfo->ver <= 12))
241 return;
242
243 switch (devinfo->ver) {
244 case 7: {
245 query = intel_perf_append_query_info(perf, 1 + 45 + 16 + 7);
246 query->oa_format = I915_OA_FORMAT_A45_B8_C8;
247
248 struct gfx7_mdapi_metrics metric_data;
249 query->data_size = sizeof(metric_data);
250
251 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
252 for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
253 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
254 metric_data, ACounters, i, UINT64);
255 }
256 for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
257 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
258 metric_data, NOACounters, i, UINT64);
259 }
260 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
261 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
262 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
263 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
264 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
265 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
266 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
267 break;
268 }
269 case 8: {
270 query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16);
271 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
272
273 struct gfx8_mdapi_metrics metric_data;
274 query->data_size = sizeof(metric_data);
275
276 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
277 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
278 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
279 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
280 metric_data, OaCntr, i, UINT64);
281 }
282 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
283 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
284 metric_data, NoaCntr, i, UINT64);
285 }
286 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
287 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
288 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
289 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
290 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
291 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
292 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
293 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
294 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
295 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
296 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
297 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
298 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
299 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
300 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
301 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
302 break;
303 }
304 case 9:
305 case 11:
306 case 12: {
307 query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
308 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
309
310 struct gfx9_mdapi_metrics metric_data;
311 query->data_size = sizeof(metric_data);
312
313 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
314 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
315 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
316 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
317 metric_data, OaCntr, i, UINT64);
318 }
319 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
320 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
321 metric_data, NoaCntr, i, UINT64);
322 }
323 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
324 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
325 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
326 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
327 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
328 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
329 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
330 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
331 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
332 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
333 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
334 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
335 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
336 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
337 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
338 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
339 for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
340 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
341 metric_data, UserCntr, i, UINT64);
342 }
343 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
344 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
345 break;
346 }
347 default:
348 unreachable("Unsupported gen");
349 break;
350 }
351
352 query->kind = INTEL_PERF_QUERY_TYPE_RAW;
353 query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
354 query->guid = INTEL_PERF_QUERY_GUID_MDAPI;
355
356 {
357 /* Accumulation buffer offsets copied from an actual query... */
358 const struct intel_perf_query_info *copy_query =
359 &perf->queries[0];
360
361 query->gpu_time_offset = copy_query->gpu_time_offset;
362 query->gpu_clock_offset = copy_query->gpu_clock_offset;
363 query->a_offset = copy_query->a_offset;
364 query->b_offset = copy_query->b_offset;
365 query->c_offset = copy_query->c_offset;
366 query->perfcnt_offset = copy_query->perfcnt_offset;
367 }
368 }
369