1 /*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * @file v3dx_simulator.c
26 *
27 * Implements the actual HW interaction betweeh the GL driver's V3D simulator and the simulator.
28 *
29 * The register headers between V3D versions will have conflicting defines, so
30 * all register interactions appear in this file and are compiled per V3D version
31 * we support.
32 */
33
34 #ifdef USE_V3D_SIMULATOR
35
36 #include <assert.h>
37 #include <stdbool.h>
38 #include <stdio.h>
39
40 #include "v3d_simulator.h"
41 #include "v3d_simulator_wrapper.h"
42
43 #include "util/macros.h"
44 #include "util/bitscan.h"
45 #include "drm-uapi/v3d_drm.h"
46
47 #define HW_REGISTER_RO(x) (x)
48 #define HW_REGISTER_RW(x) (x)
49 #if V3D_VERSION >= 41
50 #include "libs/core/v3d/registers/4.1.35.0/v3d.h"
51 #else
52 #include "libs/core/v3d/registers/3.3.0.0/v3d.h"
53 #endif
54
55 #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
56 #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
57
58 static void
v3d_invalidate_l3(struct v3d_hw * v3d)59 v3d_invalidate_l3(struct v3d_hw *v3d)
60 {
61 #if V3D_VERSION < 40
62 uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
63
64 V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
65 V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
66 #endif
67 }
68
69 /* Invalidates the L2C cache. This is a read-only cache for uniforms and instructions. */
70 static void
v3d_invalidate_l2c(struct v3d_hw * v3d)71 v3d_invalidate_l2c(struct v3d_hw *v3d)
72 {
73 if (V3D_VERSION >= 33)
74 return;
75
76 V3D_WRITE(V3D_CTL_0_L2CACTL,
77 V3D_CTL_0_L2CACTL_L2CCLR_SET |
78 V3D_CTL_0_L2CACTL_L2CENA_SET);
79 }
80
81 enum v3d_l2t_cache_flush_mode {
82 V3D_CACHE_FLUSH_MODE_FLUSH,
83 V3D_CACHE_FLUSH_MODE_CLEAR,
84 V3D_CACHE_FLUSH_MODE_CLEAN,
85 };
86
87 /* Invalidates texture L2 cachelines */
88 static void
v3d_invalidate_l2t(struct v3d_hw * v3d)89 v3d_invalidate_l2t(struct v3d_hw *v3d)
90 {
91 V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
92 V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
93 V3D_WRITE(V3D_CTL_0_L2TCACTL,
94 V3D_CTL_0_L2TCACTL_L2TFLS_SET |
95 (V3D_CACHE_FLUSH_MODE_FLUSH << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
96 }
97
98 /*
99 * Wait for l2tcactl, used for flushes.
100 *
101 * FIXME: for a multicore scenario we should pass here the core. All wrapper
102 * assumes just one core, so would be better to handle that on that case.
103 */
v3d_core_wait_l2tcactl(struct v3d_hw * v3d,uint32_t ctrl)104 static UNUSED void v3d_core_wait_l2tcactl(struct v3d_hw *v3d,
105 uint32_t ctrl)
106 {
107 assert(!(ctrl & ~(V3D_CTL_0_L2TCACTL_TMUWCF_SET | V3D_CTL_0_L2TCACTL_L2TFLS_SET)));
108
109 while (V3D_READ(V3D_CTL_0_L2TCACTL) & ctrl) {
110 v3d_hw_tick(v3d);
111 }
112 }
113
114 /* Flushes dirty texture cachelines from the L1 write combiner */
115 static void
v3d_flush_l1td(struct v3d_hw * v3d)116 v3d_flush_l1td(struct v3d_hw *v3d)
117 {
118 V3D_WRITE(V3D_CTL_0_L2TCACTL,
119 V3D_CTL_0_L2TCACTL_TMUWCF_SET);
120
121 /* Note: here the kernel (and previous versions of the simulator
122 * wrapper) is using V3D_CTL_0_L2TCACTL_L2TFLS_SET, as with l2t. We
123 * understand that it makes more sense to do like this. We need to
124 * confirm which one is doing it correctly. So far things work fine on
125 * the simulator this way.
126 */
127 v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_TMUWCF_SET);
128 }
129
130 /* Flushes dirty texture L2 cachelines */
131 static void
v3d_flush_l2t(struct v3d_hw * v3d)132 v3d_flush_l2t(struct v3d_hw *v3d)
133 {
134 V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
135 V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
136 V3D_WRITE(V3D_CTL_0_L2TCACTL,
137 V3D_CTL_0_L2TCACTL_L2TFLS_SET |
138 (V3D_CACHE_FLUSH_MODE_CLEAN << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
139
140 v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_L2TFLS_SET);
141 }
142
143 /* Invalidates the slice caches. These are read-only caches. */
144 static void
v3d_invalidate_slices(struct v3d_hw * v3d)145 v3d_invalidate_slices(struct v3d_hw *v3d)
146 {
147 V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
148 }
149
150 static void
v3d_invalidate_caches(struct v3d_hw * v3d)151 v3d_invalidate_caches(struct v3d_hw *v3d)
152 {
153 v3d_invalidate_l3(v3d);
154 v3d_invalidate_l2c(v3d);
155 v3d_invalidate_l2t(v3d);
156 v3d_invalidate_slices(v3d);
157 }
158
159 static uint32_t g_gmp_ofs;
160 static void
v3d_reload_gmp(struct v3d_hw * v3d)161 v3d_reload_gmp(struct v3d_hw *v3d)
162 {
163 /* Completely reset the GMP. */
164 V3D_WRITE(V3D_GMP_CFG,
165 V3D_GMP_CFG_PROTENABLE_SET);
166 V3D_WRITE(V3D_GMP_TABLE_ADDR, g_gmp_ofs);
167 V3D_WRITE(V3D_GMP_CLEAR_LOAD, ~0);
168 while (V3D_READ(V3D_GMP_STATUS) &
169 V3D_GMP_STATUS_CFG_BUSY_SET) {
170 ;
171 }
172 }
173
174 static UNUSED void
v3d_flush_caches(struct v3d_hw * v3d)175 v3d_flush_caches(struct v3d_hw *v3d)
176 {
177 v3d_flush_l1td(v3d);
178 v3d_flush_l2t(v3d);
179 }
180
181 int
v3dX(simulator_submit_tfu_ioctl)182 v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
183 struct drm_v3d_submit_tfu *args)
184 {
185 int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET;
186
187 V3D_WRITE(V3D_TFU_IIA, args->iia);
188 V3D_WRITE(V3D_TFU_IIS, args->iis);
189 V3D_WRITE(V3D_TFU_ICA, args->ica);
190 V3D_WRITE(V3D_TFU_IUA, args->iua);
191 V3D_WRITE(V3D_TFU_IOA, args->ioa);
192 V3D_WRITE(V3D_TFU_IOS, args->ios);
193 V3D_WRITE(V3D_TFU_COEF0, args->coef[0]);
194 V3D_WRITE(V3D_TFU_COEF1, args->coef[1]);
195 V3D_WRITE(V3D_TFU_COEF2, args->coef[2]);
196 V3D_WRITE(V3D_TFU_COEF3, args->coef[3]);
197
198 V3D_WRITE(V3D_TFU_ICFG, args->icfg);
199
200 while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) {
201 v3d_hw_tick(v3d);
202 }
203
204 return 0;
205 }
206
207 #if V3D_VERSION >= 41
208 int
v3dX(simulator_submit_csd_ioctl)209 v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
210 struct drm_v3d_submit_csd *args,
211 uint32_t gmp_ofs)
212 {
213 int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) &
214 V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET);
215 g_gmp_ofs = gmp_ofs;
216 v3d_reload_gmp(v3d);
217
218 v3d_invalidate_caches(v3d);
219
220 V3D_WRITE(V3D_CSD_0_QUEUED_CFG1, args->cfg[1]);
221 V3D_WRITE(V3D_CSD_0_QUEUED_CFG2, args->cfg[2]);
222 V3D_WRITE(V3D_CSD_0_QUEUED_CFG3, args->cfg[3]);
223 V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
224 V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
225 V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
226 /* CFG0 kicks off the job */
227 V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
228
229 /* Now we wait for the dispatch to finish. The safest way is to check
230 * if NUM_COMPLETED_JOBS has increased. Note that in spite of that
231 * name that register field is about the number of completed
232 * dispatches.
233 */
234 while ((V3D_READ(V3D_CSD_0_STATUS) &
235 V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET) == last_completed_jobs) {
236 v3d_hw_tick(v3d);
237 }
238
239 v3d_flush_caches(v3d);
240
241 return 0;
242 }
243 #endif
244
245 int
v3dX(simulator_get_param_ioctl)246 v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
247 struct drm_v3d_get_param *args)
248 {
249 static const uint32_t reg_map[] = {
250 [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
251 [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
252 [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
253 [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
254 [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
255 [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
256 [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
257 };
258
259 switch (args->param) {
260 case DRM_V3D_PARAM_SUPPORTS_TFU:
261 args->value = 1;
262 return 0;
263 case DRM_V3D_PARAM_SUPPORTS_CSD:
264 args->value = V3D_VERSION >= 41;
265 return 0;
266 case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
267 args->value = 1;
268 return 0;
269 case DRM_V3D_PARAM_SUPPORTS_PERFMON:
270 args->value = V3D_VERSION >= 41;
271 return 0;
272 }
273
274 if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
275 args->value = V3D_READ(reg_map[args->param]);
276 return 0;
277 }
278
279 fprintf(stderr, "Unknown DRM_IOCTL_V3D_GET_PARAM(%lld)\n",
280 (long long)args->value);
281 abort();
282 }
283
284 static struct v3d_hw *v3d_isr_hw;
285
286
287 static void
v3d_isr_core(struct v3d_hw * v3d,unsigned core)288 v3d_isr_core(struct v3d_hw *v3d,
289 unsigned core)
290 {
291 /* FIXME: so far we are assuming just one core, and using only the _0_
292 * registers. If we add multiple-core on the simulator, we would need
293 * to pass core as a parameter, and chose the proper registers.
294 */
295 assert(core == 0);
296 uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
297 V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);
298
299 if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {
300 uint32_t size = 256 * 1024;
301 uint32_t offset = v3d_simulator_get_spill(size);
302
303 v3d_reload_gmp(v3d);
304
305 V3D_WRITE(V3D_PTB_0_BPOA, offset);
306 V3D_WRITE(V3D_PTB_0_BPOS, size);
307 return;
308 }
309
310 if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
311 fprintf(stderr, "GMP violation at 0x%08x\n",
312 V3D_READ(V3D_GMP_VIO_ADDR));
313 abort();
314 } else {
315 fprintf(stderr,
316 "Unexpected ISR with core status 0x%08x\n",
317 core_status);
318 }
319 abort();
320 }
321
322 static void
handle_mmu_interruptions(struct v3d_hw * v3d,uint32_t hub_status)323 handle_mmu_interruptions(struct v3d_hw *v3d,
324 uint32_t hub_status)
325 {
326 bool wrv = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET;
327 bool pti = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET;
328 bool cap = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET;
329
330 if (!(pti || cap || wrv))
331 return;
332
333 const char *client = "?";
334 uint32_t axi_id = V3D_READ(V3D_MMU_VIO_ID);
335 uint32_t va_width = 30;
336
337 #if V3D_VERSION >= 41
338 static const char *const v3d41_axi_ids[] = {
339 "L2T",
340 "PTB",
341 "PSE",
342 "TLB",
343 "CLE",
344 "TFU",
345 "MMU",
346 "GMP",
347 };
348
349 axi_id = axi_id >> 5;
350 if (axi_id < ARRAY_SIZE(v3d41_axi_ids))
351 client = v3d41_axi_ids[axi_id];
352
353 uint32_t mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO);
354
355 va_width += ((mmu_debug & V3D_MMU_DEBUG_INFO_VA_WIDTH_SET)
356 >> V3D_MMU_DEBUG_INFO_VA_WIDTH_LSB);
357 #endif
358 /* Only the top bits (final number depends on the gen) of the virtual
359 * address are reported in the MMU VIO_ADDR register.
360 */
361 uint64_t vio_addr = ((uint64_t)V3D_READ(V3D_MMU_VIO_ADDR) <<
362 (va_width - 32));
363
364 /* Difference with the kernal: here were are going to abort after
365 * logging, so we don't bother with some stuff that the kernel does,
366 * like restoring the MMU ctrl bits
367 */
368
369 fprintf(stderr, "MMU error from client %s (%d) at 0x%llx%s%s%s\n",
370 client, axi_id, (long long) vio_addr,
371 wrv ? ", write violation" : "",
372 pti ? ", pte invalid" : "",
373 cap ? ", cap exceeded" : "");
374
375 abort();
376 }
377
378 static void
v3d_isr_hub(struct v3d_hw * v3d)379 v3d_isr_hub(struct v3d_hw *v3d)
380 {
381 uint32_t hub_status = V3D_READ(V3D_HUB_CTL_INT_STS);
382
383 /* Acknowledge the interrupts we're handling here */
384 V3D_WRITE(V3D_HUB_CTL_INT_CLR, hub_status);
385
386 if (hub_status & V3D_HUB_CTL_INT_STS_INT_TFUC_SET) {
387 /* FIXME: we were not able to raise this exception. We let the
388 * unreachable here, so we could get one if it is raised on
389 * the future. In any case, note that for this case we would
390 * only be doing debugging log.
391 */
392 unreachable("TFU Conversion Complete interrupt not handled");
393 }
394
395 handle_mmu_interruptions(v3d, hub_status);
396 }
397
398 static void
v3d_isr(uint32_t hub_status)399 v3d_isr(uint32_t hub_status)
400 {
401 struct v3d_hw *v3d = v3d_isr_hw;
402 uint32_t mask = hub_status;
403
404 /* Check the hub_status bits */
405 while (mask) {
406 unsigned core = u_bit_scan(&mask);
407
408 if (core == v3d_hw_get_hub_core())
409 v3d_isr_hub(v3d);
410 else
411 v3d_isr_core(v3d, core);
412 }
413
414 return;
415 }
416
417 void
v3dX(simulator_init_regs)418 v3dX(simulator_init_regs)(struct v3d_hw *v3d)
419 {
420 #if V3D_VERSION == 33
421 /* Set OVRTMUOUT to match kernel behavior.
422 *
423 * This means that the texture sampler uniform configuration's tmu
424 * output type field is used, instead of using the hardware default
425 * behavior based on the texture type. If you want the default
426 * behavior, you can still put "2" in the indirect texture state's
427 * output_type field.
428 */
429 V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
430 #endif
431
432 /* FIXME: the kernel captures some additional core interrupts here,
433 * for tracing. Perhaps we should evaluate to do the same here and add
434 * some debug options.
435 */
436 uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |
437 V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);
438 V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
439 V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
440
441 uint32_t hub_interrupts =
442 (V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET | /* write violation */
443 V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET | /* page table invalid */
444 V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET | /* CAP exceeded */
445 V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */
446
447 V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts);
448 V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts);
449
450 v3d_isr_hw = v3d;
451 v3d_hw_set_isr(v3d, v3d_isr);
452 }
453
454 void
v3dX(simulator_submit_cl_ioctl)455 v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
456 struct drm_v3d_submit_cl *submit,
457 uint32_t gmp_ofs)
458 {
459 int last_bfc = (V3D_READ(V3D_CLE_0_BFC) &
460 V3D_CLE_0_BFC_BMFCT_SET);
461
462 int last_rfc = (V3D_READ(V3D_CLE_0_RFC) &
463 V3D_CLE_0_RFC_RMFCT_SET);
464
465 g_gmp_ofs = gmp_ofs;
466 v3d_reload_gmp(v3d);
467
468 v3d_invalidate_caches(v3d);
469
470 if (submit->qma) {
471 V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
472 V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
473 }
474 #if V3D_VERSION >= 41
475 if (submit->qts) {
476 V3D_WRITE(V3D_CLE_0_CT0QTS,
477 V3D_CLE_0_CT0QTS_CTQTSEN_SET |
478 submit->qts);
479 }
480 #endif
481 V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
482 V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
483
484 /* Wait for bin to complete before firing render. The kernel's
485 * scheduler implements this using the GPU scheduler blocking on the
486 * bin fence completing. (We don't use HW semaphores).
487 */
488 while ((V3D_READ(V3D_CLE_0_BFC) &
489 V3D_CLE_0_BFC_BMFCT_SET) == last_bfc) {
490 v3d_hw_tick(v3d);
491 }
492
493 v3d_invalidate_caches(v3d);
494
495 V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
496 V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
497
498 while ((V3D_READ(V3D_CLE_0_RFC) &
499 V3D_CLE_0_RFC_RMFCT_SET) == last_rfc) {
500 v3d_hw_tick(v3d);
501 }
502 }
503
504 #if V3D_VERSION >= 41
505 #define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x))
506 #define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x))
507 #define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8)
508 #define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \
509 V3D_PCTR_0_SRC_N_SHIFT(x) + 6))
510 #endif
511
512 void
v3dX(simulator_perfmon_start)513 v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
514 uint32_t ncounters,
515 uint8_t *events)
516 {
517 #if V3D_VERSION >= 41
518 int i, j;
519 uint32_t source;
520 uint32_t mask = BITFIELD_RANGE(0, ncounters);
521
522 for (i = 0; i < ncounters; i+=4) {
523 source = i / 4;
524 uint32_t channels = 0;
525 for (j = 0; j < 4 && (i + j) < ncounters; j++)
526 channels |= events[i + j] << V3D_PCTR_0_SRC_N_SHIFT(j);
527 V3D_WRITE(V3D_PCTR_0_SRC_N(source), channels);
528 }
529 V3D_WRITE(V3D_PCTR_0_CLR, mask);
530 V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask);
531 V3D_WRITE(V3D_PCTR_0_EN, mask);
532 #endif
533 }
534
v3dX(simulator_perfmon_stop)535 void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d,
536 uint32_t ncounters,
537 uint64_t *values)
538 {
539 #if V3D_VERSION >= 41
540 int i;
541
542 for (i = 0; i < ncounters; i++)
543 values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i));
544
545 V3D_WRITE(V3D_PCTR_0_EN, 0);
546 #endif
547 }
548
549 #endif /* USE_V3D_SIMULATOR */
550