1 /*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * @file v3dx_simulator.c
26 *
27 * Implements the actual HW interaction betweeh the GL driver's V3D simulator and the simulator.
28 *
29 * The register headers between V3D versions will have conflicting defines, so
30 * all register interactions appear in this file and are compiled per V3D version
31 * we support.
32 */
33
34 #include <assert.h>
35 #include <stdbool.h>
36 #include <stdio.h>
37
38 #include "v3d_simulator.h"
39 #include "v3d_simulator_wrapper.h"
40
41 #include "common/v3d_performance_counters.h"
42
43 #include "util/macros.h"
44 #include "util/bitscan.h"
45 #include "drm-uapi/v3d_drm.h"
46
47 #define HW_REGISTER_RO(x) (x)
48 #define HW_REGISTER_RW(x) (x)
49 #if V3D_VERSION == 71
50 #include "libs/core/v3d/registers/7.1.7.0/v3d.h"
51 #else
52 #if V3D_VERSION == 42
53 #include "libs/core/v3d/registers/4.2.14.0/v3d.h"
54 #endif
55 #endif
56
57 #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
58 #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
59
60 /* Invalidates the L2C cache. This is a read-only cache for uniforms and instructions. */
61 static void
v3d_invalidate_l2c(struct v3d_hw * v3d)62 v3d_invalidate_l2c(struct v3d_hw *v3d)
63 {
64 if (V3D_VERSION >= 33)
65 return;
66
67 V3D_WRITE(V3D_CTL_0_L2CACTL,
68 V3D_CTL_0_L2CACTL_L2CCLR_SET |
69 V3D_CTL_0_L2CACTL_L2CENA_SET);
70 }
71
72 enum v3d_l2t_cache_flush_mode {
73 V3D_CACHE_FLUSH_MODE_FLUSH,
74 V3D_CACHE_FLUSH_MODE_CLEAR,
75 V3D_CACHE_FLUSH_MODE_CLEAN,
76 };
77
78 /* Invalidates texture L2 cachelines */
79 static void
v3d_invalidate_l2t(struct v3d_hw * v3d)80 v3d_invalidate_l2t(struct v3d_hw *v3d)
81 {
82 V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
83 V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
84 V3D_WRITE(V3D_CTL_0_L2TCACTL,
85 V3D_CTL_0_L2TCACTL_L2TFLS_SET |
86 (V3D_CACHE_FLUSH_MODE_FLUSH << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
87 }
88
89 /*
90 * Wait for l2tcactl, used for flushes.
91 *
92 * FIXME: for a multicore scenario we should pass here the core. All wrapper
93 * assumes just one core, so would be better to handle that on that case.
94 */
v3d_core_wait_l2tcactl(struct v3d_hw * v3d,uint32_t ctrl)95 static UNUSED void v3d_core_wait_l2tcactl(struct v3d_hw *v3d,
96 uint32_t ctrl)
97 {
98 assert(!(ctrl & ~(V3D_CTL_0_L2TCACTL_TMUWCF_SET | V3D_CTL_0_L2TCACTL_L2TFLS_SET)));
99
100 while (V3D_READ(V3D_CTL_0_L2TCACTL) & ctrl) {
101 v3d_hw_tick(v3d);
102 }
103 }
104
105 /* Flushes dirty texture cachelines from the L1 write combiner */
106 static void
v3d_flush_l1td(struct v3d_hw * v3d)107 v3d_flush_l1td(struct v3d_hw *v3d)
108 {
109 V3D_WRITE(V3D_CTL_0_L2TCACTL,
110 V3D_CTL_0_L2TCACTL_TMUWCF_SET);
111
112 /* Note: here the kernel (and previous versions of the simulator
113 * wrapper) is using V3D_CTL_0_L2TCACTL_L2TFLS_SET, as with l2t. We
114 * understand that it makes more sense to do like this. We need to
115 * confirm which one is doing it correctly. So far things work fine on
116 * the simulator this way.
117 */
118 v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_TMUWCF_SET);
119 }
120
121 /* Flushes dirty texture L2 cachelines */
122 static void
v3d_flush_l2t(struct v3d_hw * v3d)123 v3d_flush_l2t(struct v3d_hw *v3d)
124 {
125 V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
126 V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
127 V3D_WRITE(V3D_CTL_0_L2TCACTL,
128 V3D_CTL_0_L2TCACTL_L2TFLS_SET |
129 (V3D_CACHE_FLUSH_MODE_CLEAN << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
130
131 v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_L2TFLS_SET);
132 }
133
134 /* Invalidates the slice caches. These are read-only caches. */
135 static void
v3d_invalidate_slices(struct v3d_hw * v3d)136 v3d_invalidate_slices(struct v3d_hw *v3d)
137 {
138 V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
139 }
140
141 static void
v3d_invalidate_caches(struct v3d_hw * v3d)142 v3d_invalidate_caches(struct v3d_hw *v3d)
143 {
144 v3d_invalidate_l2c(v3d);
145 v3d_invalidate_l2t(v3d);
146 v3d_invalidate_slices(v3d);
147 }
148
149 static uint32_t g_gmp_ofs;
150 static void
v3d_reload_gmp(struct v3d_hw * v3d)151 v3d_reload_gmp(struct v3d_hw *v3d)
152 {
153 /* Completely reset the GMP. */
154 V3D_WRITE(V3D_GMP_CFG,
155 V3D_GMP_CFG_PROTENABLE_SET);
156 V3D_WRITE(V3D_GMP_TABLE_ADDR, g_gmp_ofs);
157 V3D_WRITE(V3D_GMP_CLEAR_LOAD, ~0);
158 while (V3D_READ(V3D_GMP_STATUS) &
159 V3D_GMP_STATUS_CFG_BUSY_SET) {
160 ;
161 }
162 }
163
164 static UNUSED void
v3d_flush_caches(struct v3d_hw * v3d)165 v3d_flush_caches(struct v3d_hw *v3d)
166 {
167 v3d_flush_l1td(v3d);
168 v3d_flush_l2t(v3d);
169 }
170
171 #if V3D_VERSION < 71
172 #define TFU_REG(NAME) V3D_TFU_ ## NAME
173 #else
174 #define TFU_REG(NAME) V3D_IFC_ ## NAME
175 #endif
176
177
178 int
v3dX(simulator_submit_tfu_ioctl)179 v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
180 struct drm_v3d_submit_tfu *args)
181 {
182 int last_vtct = V3D_READ(TFU_REG(CS)) & TFU_REG(CS_CVTCT_SET);
183
184 V3D_WRITE(TFU_REG(IIA), args->iia);
185 V3D_WRITE(TFU_REG(IIS), args->iis);
186 V3D_WRITE(TFU_REG(ICA), args->ica);
187 V3D_WRITE(TFU_REG(IUA), args->iua);
188 V3D_WRITE(TFU_REG(IOA), args->ioa);
189 #if V3D_VERSION >= 71
190 V3D_WRITE(TFU_REG(IOC), args->v71.ioc);
191 #endif
192 V3D_WRITE(TFU_REG(IOS), args->ios);
193 V3D_WRITE(TFU_REG(COEF0), args->coef[0]);
194 V3D_WRITE(TFU_REG(COEF1), args->coef[1]);
195 V3D_WRITE(TFU_REG(COEF2), args->coef[2]);
196 V3D_WRITE(TFU_REG(COEF3), args->coef[3]);
197
198 V3D_WRITE(TFU_REG(ICFG), args->icfg);
199
200 while ((V3D_READ(TFU_REG(CS)) & TFU_REG(CS_CVTCT_SET)) == last_vtct) {
201 v3d_hw_tick(v3d);
202 }
203
204 return 0;
205 }
206
207 int
v3dX(simulator_submit_csd_ioctl)208 v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
209 struct drm_v3d_submit_csd *args,
210 uint32_t gmp_ofs)
211 {
212 #if V3D_VERSION >= 42
213 int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) &
214 V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET);
215 g_gmp_ofs = gmp_ofs;
216 v3d_reload_gmp(v3d);
217
218 v3d_invalidate_caches(v3d);
219
220 V3D_WRITE(V3D_CSD_0_QUEUED_CFG1, args->cfg[1]);
221 V3D_WRITE(V3D_CSD_0_QUEUED_CFG2, args->cfg[2]);
222 V3D_WRITE(V3D_CSD_0_QUEUED_CFG3, args->cfg[3]);
223 V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
224 V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
225 V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
226 #if V3D_VERSION >= 71
227 V3D_WRITE(V3D_CSD_0_QUEUED_CFG7, 0);
228 #endif
229 /* CFG0 kicks off the job */
230 V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
231
232 /* Now we wait for the dispatch to finish. The safest way is to check
233 * if NUM_COMPLETED_JOBS has increased. Note that in spite of that
234 * name that register field is about the number of completed
235 * dispatches.
236 */
237 while ((V3D_READ(V3D_CSD_0_STATUS) &
238 V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET) == last_completed_jobs) {
239 v3d_hw_tick(v3d);
240 }
241
242 v3d_flush_caches(v3d);
243
244 return 0;
245 #else
246 return -1;
247 #endif
248 }
249
250 int
v3dX(simulator_get_param_ioctl)251 v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
252 uint32_t perfcnt_total,
253 struct drm_v3d_get_param *args)
254 {
255 static const uint32_t reg_map[] = {
256 #if V3D_VERSION >= 71
257 [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_IDENT0,
258 #else
259 [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
260 #endif
261 [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
262 [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
263 [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
264 [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
265 [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
266 [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
267 };
268
269 switch (args->param) {
270 case DRM_V3D_PARAM_SUPPORTS_TFU:
271 args->value = 1;
272 return 0;
273 case DRM_V3D_PARAM_SUPPORTS_CSD:
274 args->value = V3D_VERSION >= 42;
275 return 0;
276 case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
277 args->value = 1;
278 return 0;
279 case DRM_V3D_PARAM_SUPPORTS_PERFMON:
280 args->value = V3D_VERSION >= 42;
281 return 0;
282 case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
283 args->value = 1;
284 return 0;
285 case DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE:
286 args->value = 1;
287 return 0;
288 case DRM_V3D_PARAM_MAX_PERF_COUNTERS:
289 args->value = perfcnt_total;
290 return 0;
291 }
292
293 if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
294 args->value = V3D_READ(reg_map[args->param]);
295 return 0;
296 }
297
298 fprintf(stderr, "Unknown DRM_IOCTL_V3D_GET_PARAM(%lld)\n",
299 (long long)args->value);
300 abort();
301 }
302
303 int
v3dX(simulator_perfmon_get_counter_ioctl)304 v3dX(simulator_perfmon_get_counter_ioctl)(uint32_t perfcnt_total,
305 struct drm_v3d_perfmon_get_counter *args)
306 {
307 const char **counter = NULL;
308
309 /* Make sure that the counter ID is valid */
310 if (args->counter >= perfcnt_total)
311 return -1;
312
313 counter = v3d_performance_counters[args->counter];
314
315 strncpy((char *)args->name, counter[V3D_PERFCNT_NAME],
316 DRM_V3D_PERFCNT_MAX_NAME);
317
318 strncpy((char *)args->category, counter[V3D_PERFCNT_CATEGORY],
319 DRM_V3D_PERFCNT_MAX_CATEGORY);
320
321 strncpy((char *)args->description, counter[V3D_PERFCNT_DESCRIPTION],
322 DRM_V3D_PERFCNT_MAX_DESCRIPTION);
323
324 return 0;
325 }
326
327 static struct v3d_hw *v3d_isr_hw;
328
329
330 static void
v3d_isr_core(struct v3d_hw * v3d,unsigned core)331 v3d_isr_core(struct v3d_hw *v3d,
332 unsigned core)
333 {
334 /* FIXME: so far we are assuming just one core, and using only the _0_
335 * registers. If we add multiple-core on the simulator, we would need
336 * to pass core as a parameter, and chose the proper registers.
337 */
338 assert(core == 0);
339 uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
340 V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);
341
342 if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {
343 uint32_t size = 256 * 1024;
344 uint32_t offset = v3d_simulator_get_spill(size);
345
346 v3d_reload_gmp(v3d);
347
348 V3D_WRITE(V3D_PTB_0_BPOA, offset);
349 V3D_WRITE(V3D_PTB_0_BPOS, size);
350 return;
351 }
352
353 #if V3D_VERSION <= 42
354 if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
355 fprintf(stderr, "GMP violation at 0x%08x\n",
356 V3D_READ(V3D_GMP_VIO_ADDR));
357 } else {
358 fprintf(stderr,
359 "Unexpected ISR with core status 0x%08x\n",
360 core_status);
361 }
362 abort();
363 #endif
364 }
365
366 static void
handle_mmu_interruptions(struct v3d_hw * v3d,uint32_t hub_status)367 handle_mmu_interruptions(struct v3d_hw *v3d,
368 uint32_t hub_status)
369 {
370 bool wrv = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET;
371 bool pti = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET;
372 bool cap = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET;
373
374 if (!(pti || cap || wrv))
375 return;
376
377 const char *client = "?";
378 uint32_t axi_id = V3D_READ(V3D_MMU0_VIO_ID);
379 uint32_t va_width = 30;
380
381 static const char *const v3d42_axi_ids[] = {
382 "L2T",
383 "PTB",
384 "PSE",
385 "TLB",
386 "CLE",
387 "TFU",
388 "MMU",
389 "GMP",
390 };
391
392 axi_id = axi_id >> 5;
393 if (axi_id < ARRAY_SIZE(v3d42_axi_ids))
394 client = v3d42_axi_ids[axi_id];
395
396 uint32_t mmu_debug = V3D_READ(V3D_MMU0_DEBUG_INFO);
397
398 va_width += ((mmu_debug & V3D_MMU0_DEBUG_INFO_VA_WIDTH_SET)
399 >> V3D_MMU0_DEBUG_INFO_VA_WIDTH_LSB);
400
401 /* Only the top bits (final number depends on the gen) of the virtual
402 * address are reported in the MMU VIO_ADDR register.
403 */
404 uint64_t vio_addr = ((uint64_t)V3D_READ(V3D_MMU0_VIO_ADDR) <<
405 (va_width - 32));
406
407 /* Difference with the kernel: here were are going to abort after
408 * logging, so we don't bother with some stuff that the kernel does,
409 * like restoring the MMU ctrl bits
410 */
411
412 fprintf(stderr, "MMU error from client %s (%d) at 0x%llx%s%s%s\n",
413 client, axi_id, (long long) vio_addr,
414 wrv ? ", write violation" : "",
415 pti ? ", pte invalid" : "",
416 cap ? ", cap exceeded" : "");
417
418 abort();
419 }
420
421 static void
v3d_isr_hub(struct v3d_hw * v3d)422 v3d_isr_hub(struct v3d_hw *v3d)
423 {
424 uint32_t hub_status = V3D_READ(V3D_HUB_CTL_INT_STS);
425
426 /* Acknowledge the interrupts we're handling here */
427 V3D_WRITE(V3D_HUB_CTL_INT_CLR, hub_status);
428
429 if (hub_status & V3D_HUB_CTL_INT_STS_INT_TFUC_SET) {
430 /* FIXME: we were not able to raise this exception. We let the
431 * unreachable here, so we could get one if it is raised on
432 * the future. In any case, note that for this case we would
433 * only be doing debugging log.
434 */
435 unreachable("TFU Conversion Complete interrupt not handled");
436 }
437
438 handle_mmu_interruptions(v3d, hub_status);
439
440 #if V3D_VERSION == 71
441 if (hub_status & V3D_HUB_CTL_INT_STS_INT_GMPV_SET) {
442 fprintf(stderr, "GMP violation at 0x%08x\n",
443 V3D_READ(V3D_GMP_VIO_ADDR));
444 } else {
445 fprintf(stderr,
446 "Unexpected ISR with status 0x%08x\n",
447 hub_status);
448 }
449 abort();
450 #endif
451 }
452
453 static void
v3d_isr(uint32_t hub_status)454 v3d_isr(uint32_t hub_status)
455 {
456 struct v3d_hw *v3d = v3d_isr_hw;
457 uint32_t mask = hub_status;
458
459 /* Check the hub_status bits */
460 while (mask) {
461 unsigned core = u_bit_scan(&mask);
462
463 if (core == v3d_hw_get_hub_core())
464 v3d_isr_hub(v3d);
465 else
466 v3d_isr_core(v3d, core);
467 }
468
469 return;
470 }
471
472 void
v3dX(simulator_init_regs)473 v3dX(simulator_init_regs)(struct v3d_hw *v3d)
474 {
475 /* FIXME: the kernel captures some additional core interrupts here,
476 * for tracing. Perhaps we should evaluate to do the same here and add
477 * some debug options.
478 */
479 uint32_t core_interrupts = V3D_CTL_0_INT_STS_INT_OUTOMEM_SET;
480 #if V3D_VERSION <= 42
481 core_interrupts |= V3D_CTL_0_INT_STS_INT_GMPV_SET;
482 #endif
483
484 V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
485 V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
486
487 uint32_t hub_interrupts =
488 (V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET | /* write violation */
489 V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET | /* page table invalid */
490 V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET | /* CAP exceeded */
491 V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */
492
493 #if V3D_VERSION == 71
494 hub_interrupts |= V3D_HUB_CTL_INT_STS_INT_GMPV_SET;
495 #endif
496 V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts);
497 V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts);
498
499 v3d_isr_hw = v3d;
500 v3d_hw_set_isr(v3d, v3d_isr);
501 }
502
503 void
v3dX(simulator_submit_cl_ioctl)504 v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
505 struct drm_v3d_submit_cl *submit,
506 uint32_t gmp_ofs)
507 {
508 int last_bfc = (V3D_READ(V3D_CLE_0_BFC) &
509 V3D_CLE_0_BFC_BMFCT_SET);
510
511 int last_rfc = (V3D_READ(V3D_CLE_0_RFC) &
512 V3D_CLE_0_RFC_RMFCT_SET);
513
514 g_gmp_ofs = gmp_ofs;
515 v3d_reload_gmp(v3d);
516
517 v3d_invalidate_caches(v3d);
518
519 if (submit->qma) {
520 V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
521 V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
522 }
523 if (submit->qts) {
524 V3D_WRITE(V3D_CLE_0_CT0QTS,
525 V3D_CLE_0_CT0QTS_CTQTSEN_SET |
526 submit->qts);
527 }
528 V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
529 V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
530
531 /* Wait for bin to complete before firing render. The kernel's
532 * scheduler implements this using the GPU scheduler blocking on the
533 * bin fence completing. (We don't use HW semaphores).
534 */
535 while ((V3D_READ(V3D_CLE_0_BFC) &
536 V3D_CLE_0_BFC_BMFCT_SET) == last_bfc) {
537 v3d_hw_tick(v3d);
538 }
539
540 v3d_invalidate_caches(v3d);
541
542 V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
543 V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
544
545 while ((V3D_READ(V3D_CLE_0_RFC) &
546 V3D_CLE_0_RFC_RMFCT_SET) == last_rfc) {
547 v3d_hw_tick(v3d);
548 }
549 }
550
551 #define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x))
552 #define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x))
553 #define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8)
554 #define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \
555 V3D_PCTR_0_SRC_N_SHIFT(x) + \
556 V3D_PCTR_0_SRC_0_3_PCTRS0_MSB))
557
558 void
v3dX(simulator_perfmon_start)559 v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
560 uint32_t ncounters,
561 uint8_t *events)
562 {
563 int i, j;
564 uint32_t source;
565 uint32_t mask = BITFIELD_RANGE(0, ncounters);
566
567 for (i = 0; i < ncounters; i+=4) {
568 source = i / 4;
569 uint32_t channels = 0;
570 for (j = 0; j < 4 && (i + j) < ncounters; j++)
571 channels |= events[i + j] << V3D_PCTR_0_SRC_N_SHIFT(j);
572 V3D_WRITE(V3D_PCTR_0_SRC_N(source), channels);
573 }
574 V3D_WRITE(V3D_PCTR_0_CLR, mask);
575 V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask);
576 V3D_WRITE(V3D_PCTR_0_EN, mask);
577 }
578
v3dX(simulator_perfmon_stop)579 void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d,
580 uint32_t ncounters,
581 uint64_t *values)
582 {
583 int i;
584
585 for (i = 0; i < ncounters; i++)
586 values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i));
587
588 V3D_WRITE(V3D_PCTR_0_EN, 0);
589 }
590
v3dX(simulator_get_perfcnt_total)591 void v3dX(simulator_get_perfcnt_total)(uint32_t *count)
592 {
593 *count = ARRAY_SIZE(v3d_performance_counters);
594 }
595