1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "gen_perf.h"
25 #include "gen_perf_mdapi.h"
26 #include "gen_perf_private.h"
27 #include "gen_perf_regs.h"
28
29 #include "dev/gen_device_info.h"
30
31 #include <drm-uapi/i915_drm.h>
32
33
34 int
gen_perf_query_result_write_mdapi(void * data,uint32_t data_size,const struct gen_device_info * devinfo,const struct gen_perf_query_result * result,uint64_t freq_start,uint64_t freq_end)35 gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
36 const struct gen_device_info *devinfo,
37 const struct gen_perf_query_result *result,
38 uint64_t freq_start, uint64_t freq_end)
39 {
40 switch (devinfo->gen) {
41 case 7: {
42 struct gen7_mdapi_metrics *mdapi_data = (struct gen7_mdapi_metrics *) data;
43
44 if (data_size < sizeof(*mdapi_data))
45 return 0;
46
47 assert(devinfo->is_haswell);
48
49 for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)
50 mdapi_data->ACounters[i] = result->accumulator[1 + i];
51
52 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {
53 mdapi_data->NOACounters[i] =
54 result->accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
55 }
56
57 mdapi_data->ReportsCount = result->reports_accumulated;
58 mdapi_data->TotalTime =
59 gen_device_info_timebase_scale(devinfo, result->accumulator[0]);
60 mdapi_data->CoreFrequency = freq_end;
61 mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
62 mdapi_data->SplitOccured = result->query_disjoint;
63 return sizeof(*mdapi_data);
64 }
65 case 8: {
66 struct gen8_mdapi_metrics *mdapi_data = (struct gen8_mdapi_metrics *) data;
67
68 if (data_size < sizeof(*mdapi_data))
69 return 0;
70
71 for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
72 mdapi_data->OaCntr[i] = result->accumulator[2 + i];
73 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
74 mdapi_data->NoaCntr[i] =
75 result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
76 }
77
78 mdapi_data->ReportId = result->hw_id;
79 mdapi_data->ReportsCount = result->reports_accumulated;
80 mdapi_data->TotalTime =
81 gen_device_info_timebase_scale(devinfo, result->accumulator[0]);
82 mdapi_data->BeginTimestamp =
83 gen_device_info_timebase_scale(devinfo, result->begin_timestamp);
84 mdapi_data->GPUTicks = result->accumulator[1];
85 mdapi_data->CoreFrequency = freq_end;
86 mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
87 mdapi_data->SliceFrequency =
88 (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
89 mdapi_data->UnsliceFrequency =
90 (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
91 mdapi_data->SplitOccured = result->query_disjoint;
92 return sizeof(*mdapi_data);
93 }
94 case 9:
95 case 11:
96 case 12:{
97 struct gen9_mdapi_metrics *mdapi_data = (struct gen9_mdapi_metrics *) data;
98
99 if (data_size < sizeof(*mdapi_data))
100 return 0;
101
102 for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
103 mdapi_data->OaCntr[i] = result->accumulator[2 + i];
104 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
105 mdapi_data->NoaCntr[i] =
106 result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
107 }
108
109 mdapi_data->ReportId = result->hw_id;
110 mdapi_data->ReportsCount = result->reports_accumulated;
111 mdapi_data->TotalTime =
112 gen_device_info_timebase_scale(devinfo, result->accumulator[0]);
113 mdapi_data->BeginTimestamp =
114 gen_device_info_timebase_scale(devinfo, result->begin_timestamp);
115 mdapi_data->GPUTicks = result->accumulator[1];
116 mdapi_data->CoreFrequency = freq_end;
117 mdapi_data->CoreFrequencyChanged = freq_end != freq_start;
118 mdapi_data->SliceFrequency =
119 (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
120 mdapi_data->UnsliceFrequency =
121 (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
122 mdapi_data->SplitOccured = result->query_disjoint;
123 return sizeof(*mdapi_data);
124 }
125 default:
126 unreachable("unexpected gen");
127 }
128 }
129
130 void
gen_perf_register_mdapi_statistic_query(struct gen_perf_config * perf_cfg,const struct gen_device_info * devinfo)131 gen_perf_register_mdapi_statistic_query(struct gen_perf_config *perf_cfg,
132 const struct gen_device_info *devinfo)
133 {
134 if (!(devinfo->gen >= 7 && devinfo->gen <= 12))
135 return;
136
137 struct gen_perf_query_info *query =
138 gen_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
139
140 query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
141 query->name = "Intel_Raw_Pipeline_Statistics_Query";
142
143 /* The order has to match mdapi_pipeline_metrics. */
144 gen_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
145 "N vertices submitted");
146 gen_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
147 "N primitives submitted");
148 gen_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
149 "N vertex shader invocations");
150 gen_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
151 "N geometry shader invocations");
152 gen_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
153 "N geometry shader primitives emitted");
154 gen_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
155 "N primitives entering clipping");
156 gen_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
157 "N primitives leaving clipping");
158 if (devinfo->is_haswell || devinfo->gen == 8) {
159 gen_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
160 "N fragment shader invocations",
161 "N fragment shader invocations");
162 } else {
163 gen_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
164 "N fragment shader invocations");
165 }
166 gen_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
167 "N TCS shader invocations");
168 gen_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
169 "N TES shader invocations");
170 if (devinfo->gen >= 7) {
171 gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
172 "N compute shader invocations");
173 }
174
175 if (devinfo->gen >= 10) {
176 /* Reuse existing CS invocation register until we can expose this new
177 * one.
178 */
179 gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
180 "Reserved1");
181 }
182
183 query->data_size = sizeof(uint64_t) * query->n_counters;
184 }
185
186 static void
fill_mdapi_perf_query_counter(struct gen_perf_query_info * query,const char * name,uint32_t data_offset,uint32_t data_size,enum gen_perf_counter_data_type data_type)187 fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
188 const char *name,
189 uint32_t data_offset,
190 uint32_t data_size,
191 enum gen_perf_counter_data_type data_type)
192 {
193 struct gen_perf_query_counter *counter = &query->counters[query->n_counters];
194
195 assert(query->n_counters <= query->max_counters);
196
197 counter->name = name;
198 counter->desc = "Raw counter value";
199 counter->type = GEN_PERF_COUNTER_TYPE_RAW;
200 counter->data_type = data_type;
201 counter->offset = data_offset;
202
203 query->n_counters++;
204
205 assert(counter->offset + gen_perf_query_counter_get_size(counter) <= query->data_size);
206 }
207
208 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
209 fill_mdapi_perf_query_counter(query, #field_name, \
210 (uint8_t *) &struct_name.field_name - \
211 (uint8_t *) &struct_name, \
212 sizeof(struct_name.field_name), \
213 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
214 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
215 fill_mdapi_perf_query_counter(query, \
216 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
217 (uint8_t *) &struct_name.field_name[idx] - \
218 (uint8_t *) &struct_name, \
219 sizeof(struct_name.field_name[0]), \
220 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
221
222 void
gen_perf_register_mdapi_oa_query(struct gen_perf_config * perf,const struct gen_device_info * devinfo)223 gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf,
224 const struct gen_device_info *devinfo)
225 {
226 struct gen_perf_query_info *query = NULL;
227
228 /* MDAPI requires different structures for pretty much every generation
229 * (right now we have definitions for gen 7 to 12).
230 */
231 if (!(devinfo->gen >= 7 && devinfo->gen <= 12))
232 return;
233
234 switch (devinfo->gen) {
235 case 7: {
236 query = gen_perf_append_query_info(perf, 1 + 45 + 16 + 7);
237 query->oa_format = I915_OA_FORMAT_A45_B8_C8;
238
239 struct gen7_mdapi_metrics metric_data;
240 query->data_size = sizeof(metric_data);
241
242 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
243 for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
244 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
245 metric_data, ACounters, i, UINT64);
246 }
247 for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
248 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
249 metric_data, NOACounters, i, UINT64);
250 }
251 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
252 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
253 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
254 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
255 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
256 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
257 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
258 break;
259 }
260 case 8: {
261 query = gen_perf_append_query_info(perf, 2 + 36 + 16 + 16);
262 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
263
264 struct gen8_mdapi_metrics metric_data;
265 query->data_size = sizeof(metric_data);
266
267 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
268 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
269 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
270 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
271 metric_data, OaCntr, i, UINT64);
272 }
273 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
274 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
275 metric_data, NoaCntr, i, UINT64);
276 }
277 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
278 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
279 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
280 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
281 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
282 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
283 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
284 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
285 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
286 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
287 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
288 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
289 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
290 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
291 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
292 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
293 break;
294 }
295 case 9:
296 case 11:
297 case 12: {
298 query = gen_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
299 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
300
301 struct gen9_mdapi_metrics metric_data;
302 query->data_size = sizeof(metric_data);
303
304 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
305 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
306 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
307 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
308 metric_data, OaCntr, i, UINT64);
309 }
310 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
311 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
312 metric_data, NoaCntr, i, UINT64);
313 }
314 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
315 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
316 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
317 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
318 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
319 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
320 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
321 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
322 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
323 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
324 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
325 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
326 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
327 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
328 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
329 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
330 for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
331 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
332 metric_data, UserCntr, i, UINT64);
333 }
334 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
335 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
336 break;
337 }
338 default:
339 unreachable("Unsupported gen");
340 break;
341 }
342
343 query->kind = GEN_PERF_QUERY_TYPE_RAW;
344 query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
345 query->guid = GEN_PERF_QUERY_GUID_MDAPI;
346
347 {
348 /* Accumulation buffer offsets copied from an actual query... */
349 const struct gen_perf_query_info *copy_query =
350 &perf->queries[0];
351
352 query->gpu_time_offset = copy_query->gpu_time_offset;
353 query->gpu_clock_offset = copy_query->gpu_clock_offset;
354 query->a_offset = copy_query->a_offset;
355 query->b_offset = copy_query->b_offset;
356 query->c_offset = copy_query->c_offset;
357 }
358 }
359