1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * Expose V3D HW perf counters.
26 *
27 * We also have code to fake support for occlusion queries.
28 * Since we expose support for GL 2.0, we have to expose occlusion queries,
29 * but the spec allows you to expose 0 query counter bits, so we just return 0
30 * as the result of all our queries.
31 */
32 #include "vc4_context.h"
33
34 struct vc4_query
35 {
36 unsigned num_queries;
37 struct vc4_hwperfmon *hwperfmon;
38 };
39
40 static const char *v3d_counter_names[] = {
41 "FEP-valid-primitives-no-rendered-pixels",
42 "FEP-valid-primitives-rendered-pixels",
43 "FEP-clipped-quads",
44 "FEP-valid-quads",
45 "TLB-quads-not-passing-stencil-test",
46 "TLB-quads-not-passing-z-and-stencil-test",
47 "TLB-quads-passing-z-and-stencil-test",
48 "TLB-quads-with-zero-coverage",
49 "TLB-quads-with-non-zero-coverage",
50 "TLB-quads-written-to-color-buffer",
51 "PTB-primitives-discarded-outside-viewport",
52 "PTB-primitives-need-clipping",
53 "PTB-primitives-discared-reversed",
54 "QPU-total-idle-clk-cycles",
55 "QPU-total-clk-cycles-vertex-coord-shading",
56 "QPU-total-clk-cycles-fragment-shading",
57 "QPU-total-clk-cycles-executing-valid-instr",
58 "QPU-total-clk-cycles-waiting-TMU",
59 "QPU-total-clk-cycles-waiting-scoreboard",
60 "QPU-total-clk-cycles-waiting-varyings",
61 "QPU-total-instr-cache-hit",
62 "QPU-total-instr-cache-miss",
63 "QPU-total-uniform-cache-hit",
64 "QPU-total-uniform-cache-miss",
65 "TMU-total-text-quads-processed",
66 "TMU-total-text-cache-miss",
67 "VPM-total-clk-cycles-VDW-stalled",
68 "VPM-total-clk-cycles-VCD-stalled",
69 "L2C-total-cache-hit",
70 "L2C-total-cache-miss",
71 };
72
vc4_get_driver_query_group_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_group_info * info)73 int vc4_get_driver_query_group_info(struct pipe_screen *pscreen,
74 unsigned index,
75 struct pipe_driver_query_group_info *info)
76 {
77 struct vc4_screen *screen = vc4_screen(pscreen);
78
79 if (!screen->has_perfmon_ioctl)
80 return 0;
81
82 if (!info)
83 return 1;
84
85 if (index > 0)
86 return 0;
87
88 info->name = "V3D counters";
89 info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS;
90 info->num_queries = ARRAY_SIZE(v3d_counter_names);
91 return 1;
92 }
93
vc4_get_driver_query_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_info * info)94 int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
95 struct pipe_driver_query_info *info)
96 {
97 struct vc4_screen *screen = vc4_screen(pscreen);
98
99 if (!screen->has_perfmon_ioctl)
100 return 0;
101
102 if (!info)
103 return ARRAY_SIZE(v3d_counter_names);
104
105 if (index >= ARRAY_SIZE(v3d_counter_names))
106 return 0;
107
108 info->group_id = 0;
109 info->name = v3d_counter_names[index];
110 info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
111 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
112 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
113 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
114 return 1;
115 }
116
117 static struct pipe_query *
vc4_create_batch_query(struct pipe_context * pctx,unsigned num_queries,unsigned * query_types)118 vc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
119 unsigned *query_types)
120 {
121 struct vc4_query *query = calloc(1, sizeof(*query));
122 struct vc4_hwperfmon *hwperfmon;
123 unsigned i, nhwqueries = 0;
124
125 if (!query)
126 return NULL;
127
128 for (i = 0; i < num_queries; i++) {
129 if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC)
130 nhwqueries++;
131 }
132
133 /* We can't mix HW and non-HW queries. */
134 if (nhwqueries && nhwqueries != num_queries)
135 goto err_free_query;
136
137 if (!nhwqueries)
138 return (struct pipe_query *)query;
139
140 hwperfmon = calloc(1, sizeof(*hwperfmon));
141 if (!hwperfmon)
142 goto err_free_query;
143
144 for (i = 0; i < num_queries; i++)
145 hwperfmon->events[i] = query_types[i] -
146 PIPE_QUERY_DRIVER_SPECIFIC;
147
148 query->hwperfmon = hwperfmon;
149 query->num_queries = num_queries;
150
151 /* Note that struct pipe_query isn't actually defined anywhere. */
152 return (struct pipe_query *)query;
153
154 err_free_query:
155 free(query);
156
157 return NULL;
158 }
159
160 static struct pipe_query *
vc4_create_query(struct pipe_context * ctx,unsigned query_type,unsigned index)161 vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
162 {
163 return vc4_create_batch_query(ctx, 1, &query_type);
164 }
165
166 static void
vc4_destroy_query(struct pipe_context * pctx,struct pipe_query * pquery)167 vc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery)
168 {
169 struct vc4_context *ctx = vc4_context(pctx);
170 struct vc4_query *query = (struct vc4_query *)pquery;
171
172 if (query->hwperfmon && query->hwperfmon->id) {
173 if (query->hwperfmon->id) {
174 struct drm_vc4_perfmon_destroy req = { };
175
176 req.id = query->hwperfmon->id;
177 vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY,
178 &req);
179 }
180
181 free(query->hwperfmon);
182 }
183
184 free(query);
185 }
186
187 static bool
vc4_begin_query(struct pipe_context * pctx,struct pipe_query * pquery)188 vc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
189 {
190 struct vc4_query *query = (struct vc4_query *)pquery;
191 struct vc4_context *ctx = vc4_context(pctx);
192 struct drm_vc4_perfmon_create req = { };
193 unsigned i;
194 int ret;
195
196 if (!query->hwperfmon)
197 return true;
198
199 /* Only one perfmon can be activated per context. */
200 if (ctx->perfmon)
201 return false;
202
203 /* Reset the counters by destroying the previously allocated perfmon */
204 if (query->hwperfmon->id) {
205 struct drm_vc4_perfmon_destroy destroyreq = { };
206
207 destroyreq.id = query->hwperfmon->id;
208 vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &destroyreq);
209 }
210
211 for (i = 0; i < query->num_queries; i++)
212 req.events[i] = query->hwperfmon->events[i];
213
214 req.ncounters = query->num_queries;
215 ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req);
216 if (ret)
217 return false;
218
219 query->hwperfmon->id = req.id;
220
221 /* Make sure all pendings jobs are flushed before activating the
222 * perfmon.
223 */
224 vc4_flush(pctx);
225 ctx->perfmon = query->hwperfmon;
226 return true;
227 }
228
229 static bool
vc4_end_query(struct pipe_context * pctx,struct pipe_query * pquery)230 vc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
231 {
232 struct vc4_query *query = (struct vc4_query *)pquery;
233 struct vc4_context *ctx = vc4_context(pctx);
234
235 if (!query->hwperfmon)
236 return true;
237
238 if (ctx->perfmon != query->hwperfmon)
239 return false;
240
241 /* Make sure all pendings jobs are flushed before deactivating the
242 * perfmon.
243 */
244 vc4_flush(pctx);
245 ctx->perfmon = NULL;
246 return true;
247 }
248
249 static bool
vc4_get_query_result(struct pipe_context * pctx,struct pipe_query * pquery,bool wait,union pipe_query_result * vresult)250 vc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
251 bool wait, union pipe_query_result *vresult)
252 {
253 struct vc4_context *ctx = vc4_context(pctx);
254 struct vc4_query *query = (struct vc4_query *)pquery;
255 struct drm_vc4_perfmon_get_values req;
256 unsigned i;
257 int ret;
258
259 if (!query->hwperfmon) {
260 vresult->u64 = 0;
261 return true;
262 }
263
264 if (!vc4_wait_seqno(ctx->screen, query->hwperfmon->last_seqno,
265 wait ? PIPE_TIMEOUT_INFINITE : 0, "perfmon"))
266 return false;
267
268 req.id = query->hwperfmon->id;
269 req.values_ptr = (uintptr_t)query->hwperfmon->counters;
270 ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req);
271 if (ret)
272 return false;
273
274 for (i = 0; i < query->num_queries; i++)
275 vresult->batch[i].u64 = query->hwperfmon->counters[i];
276
277 return true;
278 }
279
280 static void
vc4_set_active_query_state(struct pipe_context * pctx,bool enable)281 vc4_set_active_query_state(struct pipe_context *pctx, bool enable)
282 {
283 }
284
285 void
vc4_query_init(struct pipe_context * pctx)286 vc4_query_init(struct pipe_context *pctx)
287 {
288 pctx->create_query = vc4_create_query;
289 pctx->create_batch_query = vc4_create_batch_query;
290 pctx->destroy_query = vc4_destroy_query;
291 pctx->begin_query = vc4_begin_query;
292 pctx->end_query = vc4_end_query;
293 pctx->get_query_result = vc4_get_query_result;
294 pctx->set_active_query_state = vc4_set_active_query_state;
295 }
296