• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file v3dx_simulator.c
26  *
27  * Implements the actual HW interaction betweeh the GL driver's V3D simulator and the simulator.
28  *
29  * The register headers between V3D versions will have conflicting defines, so
30  * all register interactions appear in this file and are compiled per V3D version
31  * we support.
32  */
33 
34 #ifdef USE_V3D_SIMULATOR
35 
36 #include <assert.h>
37 #include <stdbool.h>
38 #include <stdio.h>
39 
40 #include "v3d_simulator.h"
41 #include "v3d_simulator_wrapper.h"
42 
43 #include "util/macros.h"
44 #include "util/bitscan.h"
45 #include "drm-uapi/v3d_drm.h"
46 
47 #define HW_REGISTER_RO(x) (x)
48 #define HW_REGISTER_RW(x) (x)
49 #if V3D_VERSION >= 41
50 #include "libs/core/v3d/registers/4.1.35.0/v3d.h"
51 #else
52 #include "libs/core/v3d/registers/3.3.0.0/v3d.h"
53 #endif
54 
55 #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
56 #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
57 
58 static void
v3d_invalidate_l3(struct v3d_hw * v3d)59 v3d_invalidate_l3(struct v3d_hw *v3d)
60 {
61 #if V3D_VERSION < 40
62         uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
63 
64         V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
65         V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
66 #endif
67 }
68 
69 /* Invalidates the L2C cache.  This is a read-only cache for uniforms and instructions. */
70 static void
v3d_invalidate_l2c(struct v3d_hw * v3d)71 v3d_invalidate_l2c(struct v3d_hw *v3d)
72 {
73         if (V3D_VERSION >= 33)
74                 return;
75 
76         V3D_WRITE(V3D_CTL_0_L2CACTL,
77                   V3D_CTL_0_L2CACTL_L2CCLR_SET |
78                   V3D_CTL_0_L2CACTL_L2CENA_SET);
79 }
80 
81 enum v3d_l2t_cache_flush_mode {
82         V3D_CACHE_FLUSH_MODE_FLUSH,
83         V3D_CACHE_FLUSH_MODE_CLEAR,
84         V3D_CACHE_FLUSH_MODE_CLEAN,
85 };
86 
87 /* Invalidates texture L2 cachelines */
88 static void
v3d_invalidate_l2t(struct v3d_hw * v3d)89 v3d_invalidate_l2t(struct v3d_hw *v3d)
90 {
91         V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
92         V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
93         V3D_WRITE(V3D_CTL_0_L2TCACTL,
94                   V3D_CTL_0_L2TCACTL_L2TFLS_SET |
95                   (V3D_CACHE_FLUSH_MODE_FLUSH << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
96 }
97 
98 /*
99  * Wait for l2tcactl, used for flushes.
100  *
101  * FIXME: for a multicore scenario we should pass here the core. All wrapper
102  * assumes just one core, so would be better to handle that on that case.
103  */
v3d_core_wait_l2tcactl(struct v3d_hw * v3d,uint32_t ctrl)104 static UNUSED void v3d_core_wait_l2tcactl(struct v3d_hw *v3d,
105                                           uint32_t ctrl)
106 {
107    assert(!(ctrl & ~(V3D_CTL_0_L2TCACTL_TMUWCF_SET | V3D_CTL_0_L2TCACTL_L2TFLS_SET)));
108 
109    while (V3D_READ(V3D_CTL_0_L2TCACTL) & ctrl) {
110            v3d_hw_tick(v3d);
111    }
112 }
113 
114 /* Flushes dirty texture cachelines from the L1 write combiner */
115 static void
v3d_flush_l1td(struct v3d_hw * v3d)116 v3d_flush_l1td(struct v3d_hw *v3d)
117 {
118         V3D_WRITE(V3D_CTL_0_L2TCACTL,
119                   V3D_CTL_0_L2TCACTL_TMUWCF_SET);
120 
121         /* Note: here the kernel (and previous versions of the simulator
122          * wrapper) is using V3D_CTL_0_L2TCACTL_L2TFLS_SET, as with l2t. We
123          * understand that it makes more sense to do like this. We need to
124          * confirm which one is doing it correctly. So far things work fine on
125          * the simulator this way.
126          */
127         v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_TMUWCF_SET);
128 }
129 
130 /* Flushes dirty texture L2 cachelines */
131 static void
v3d_flush_l2t(struct v3d_hw * v3d)132 v3d_flush_l2t(struct v3d_hw *v3d)
133 {
134         V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
135         V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
136         V3D_WRITE(V3D_CTL_0_L2TCACTL,
137                   V3D_CTL_0_L2TCACTL_L2TFLS_SET |
138                   (V3D_CACHE_FLUSH_MODE_CLEAN << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
139 
140         v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_L2TFLS_SET);
141 }
142 
143 /* Invalidates the slice caches.  These are read-only caches. */
144 static void
v3d_invalidate_slices(struct v3d_hw * v3d)145 v3d_invalidate_slices(struct v3d_hw *v3d)
146 {
147         V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
148 }
149 
150 static void
v3d_invalidate_caches(struct v3d_hw * v3d)151 v3d_invalidate_caches(struct v3d_hw *v3d)
152 {
153         v3d_invalidate_l3(v3d);
154         v3d_invalidate_l2c(v3d);
155         v3d_invalidate_l2t(v3d);
156         v3d_invalidate_slices(v3d);
157 }
158 
159 static uint32_t g_gmp_ofs;
160 static void
v3d_reload_gmp(struct v3d_hw * v3d)161 v3d_reload_gmp(struct v3d_hw *v3d)
162 {
163         /* Completely reset the GMP. */
164         V3D_WRITE(V3D_GMP_CFG,
165                   V3D_GMP_CFG_PROTENABLE_SET);
166         V3D_WRITE(V3D_GMP_TABLE_ADDR, g_gmp_ofs);
167         V3D_WRITE(V3D_GMP_CLEAR_LOAD, ~0);
168         while (V3D_READ(V3D_GMP_STATUS) &
169                V3D_GMP_STATUS_CFG_BUSY_SET) {
170                 ;
171         }
172 }
173 
174 static UNUSED void
v3d_flush_caches(struct v3d_hw * v3d)175 v3d_flush_caches(struct v3d_hw *v3d)
176 {
177         v3d_flush_l1td(v3d);
178         v3d_flush_l2t(v3d);
179 }
180 
181 int
v3dX(simulator_submit_tfu_ioctl)182 v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
183                                  struct drm_v3d_submit_tfu *args)
184 {
185         int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET;
186 
187         V3D_WRITE(V3D_TFU_IIA, args->iia);
188         V3D_WRITE(V3D_TFU_IIS, args->iis);
189         V3D_WRITE(V3D_TFU_ICA, args->ica);
190         V3D_WRITE(V3D_TFU_IUA, args->iua);
191         V3D_WRITE(V3D_TFU_IOA, args->ioa);
192         V3D_WRITE(V3D_TFU_IOS, args->ios);
193         V3D_WRITE(V3D_TFU_COEF0, args->coef[0]);
194         V3D_WRITE(V3D_TFU_COEF1, args->coef[1]);
195         V3D_WRITE(V3D_TFU_COEF2, args->coef[2]);
196         V3D_WRITE(V3D_TFU_COEF3, args->coef[3]);
197 
198         V3D_WRITE(V3D_TFU_ICFG, args->icfg);
199 
200         while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) {
201                 v3d_hw_tick(v3d);
202         }
203 
204         return 0;
205 }
206 
207 #if V3D_VERSION >= 41
208 int
v3dX(simulator_submit_csd_ioctl)209 v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
210                                  struct drm_v3d_submit_csd *args,
211                                  uint32_t gmp_ofs)
212 {
213         int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) &
214                                    V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET);
215         g_gmp_ofs = gmp_ofs;
216         v3d_reload_gmp(v3d);
217 
218         v3d_invalidate_caches(v3d);
219 
220         V3D_WRITE(V3D_CSD_0_QUEUED_CFG1, args->cfg[1]);
221         V3D_WRITE(V3D_CSD_0_QUEUED_CFG2, args->cfg[2]);
222         V3D_WRITE(V3D_CSD_0_QUEUED_CFG3, args->cfg[3]);
223         V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
224         V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
225         V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
226         /* CFG0 kicks off the job */
227         V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
228 
229         /* Now we wait for the dispatch to finish. The safest way is to check
230          * if NUM_COMPLETED_JOBS has increased. Note that in spite of that
231          * name that register field is about the number of completed
232          * dispatches.
233          */
234         while ((V3D_READ(V3D_CSD_0_STATUS) &
235                 V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET) == last_completed_jobs) {
236                 v3d_hw_tick(v3d);
237         }
238 
239         v3d_flush_caches(v3d);
240 
241         return 0;
242 }
243 #endif
244 
245 int
v3dX(simulator_get_param_ioctl)246 v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
247                                 struct drm_v3d_get_param *args)
248 {
249         static const uint32_t reg_map[] = {
250                 [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
251                 [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
252                 [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
253                 [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
254                 [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
255                 [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
256                 [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
257         };
258 
259         switch (args->param) {
260         case DRM_V3D_PARAM_SUPPORTS_TFU:
261                 args->value = 1;
262                 return 0;
263         case DRM_V3D_PARAM_SUPPORTS_CSD:
264                 args->value = V3D_VERSION >= 41;
265                 return 0;
266         case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
267                 args->value = 1;
268                 return 0;
269         case DRM_V3D_PARAM_SUPPORTS_PERFMON:
270                 args->value = V3D_VERSION >= 41;
271                 return 0;
272         case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
273                 args->value = 1;
274                 return 0;
275         }
276 
277         if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
278                 args->value = V3D_READ(reg_map[args->param]);
279                 return 0;
280         }
281 
282         fprintf(stderr, "Unknown DRM_IOCTL_V3D_GET_PARAM(%lld)\n",
283                 (long long)args->value);
284         abort();
285 }
286 
287 static struct v3d_hw *v3d_isr_hw;
288 
289 
290 static void
v3d_isr_core(struct v3d_hw * v3d,unsigned core)291 v3d_isr_core(struct v3d_hw *v3d,
292              unsigned core)
293 {
294         /* FIXME: so far we are assuming just one core, and using only the _0_
295          * registers. If we add multiple-core on the simulator, we would need
296          * to pass core as a parameter, and chose the proper registers.
297          */
298         assert(core == 0);
299         uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
300         V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);
301 
302         if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {
303                 uint32_t size = 256 * 1024;
304                 uint32_t offset = v3d_simulator_get_spill(size);
305 
306                 v3d_reload_gmp(v3d);
307 
308                 V3D_WRITE(V3D_PTB_0_BPOA, offset);
309                 V3D_WRITE(V3D_PTB_0_BPOS, size);
310                 return;
311         }
312 
313         if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
314                 fprintf(stderr, "GMP violation at 0x%08x\n",
315                         V3D_READ(V3D_GMP_VIO_ADDR));
316                 abort();
317         } else {
318                 fprintf(stderr,
319                         "Unexpected ISR with core status 0x%08x\n",
320                         core_status);
321         }
322         abort();
323 }
324 
325 static void
handle_mmu_interruptions(struct v3d_hw * v3d,uint32_t hub_status)326 handle_mmu_interruptions(struct v3d_hw *v3d,
327                          uint32_t hub_status)
328 {
329         bool wrv = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET;
330         bool pti = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET;
331         bool cap = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET;
332 
333         if (!(pti || cap || wrv))
334                 return;
335 
336         const char *client = "?";
337         uint32_t axi_id = V3D_READ(V3D_MMU_VIO_ID);
338         uint32_t va_width = 30;
339 
340 #if V3D_VERSION >= 41
341         static const char *const v3d41_axi_ids[] = {
342                 "L2T",
343                 "PTB",
344                 "PSE",
345                 "TLB",
346                 "CLE",
347                 "TFU",
348                 "MMU",
349                 "GMP",
350         };
351 
352         axi_id = axi_id >> 5;
353         if (axi_id < ARRAY_SIZE(v3d41_axi_ids))
354                 client = v3d41_axi_ids[axi_id];
355 
356         uint32_t mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO);
357 
358         va_width += ((mmu_debug & V3D_MMU_DEBUG_INFO_VA_WIDTH_SET)
359                      >> V3D_MMU_DEBUG_INFO_VA_WIDTH_LSB);
360 #endif
361         /* Only the top bits (final number depends on the gen) of the virtual
362          * address are reported in the MMU VIO_ADDR register.
363          */
364         uint64_t vio_addr = ((uint64_t)V3D_READ(V3D_MMU_VIO_ADDR) <<
365                              (va_width - 32));
366 
367         /* Difference with the kernal: here were are going to abort after
368          * logging, so we don't bother with some stuff that the kernel does,
369          * like restoring the MMU ctrl bits
370          */
371 
372         fprintf(stderr, "MMU error from client %s (%d) at 0x%llx%s%s%s\n",
373                 client, axi_id, (long long) vio_addr,
374                 wrv ? ", write violation" : "",
375                 pti ? ", pte invalid" : "",
376                 cap ? ", cap exceeded" : "");
377 
378         abort();
379 }
380 
381 static void
v3d_isr_hub(struct v3d_hw * v3d)382 v3d_isr_hub(struct v3d_hw *v3d)
383 {
384         uint32_t hub_status = V3D_READ(V3D_HUB_CTL_INT_STS);
385 
386         /* Acknowledge the interrupts we're handling here */
387         V3D_WRITE(V3D_HUB_CTL_INT_CLR, hub_status);
388 
389         if (hub_status & V3D_HUB_CTL_INT_STS_INT_TFUC_SET) {
390                 /* FIXME: we were not able to raise this exception. We let the
391                  * unreachable here, so we could get one if it is raised on
392                  * the future. In any case, note that for this case we would
393                  * only be doing debugging log.
394                  */
395                 unreachable("TFU Conversion Complete interrupt not handled");
396         }
397 
398         handle_mmu_interruptions(v3d, hub_status);
399 }
400 
401 static void
v3d_isr(uint32_t hub_status)402 v3d_isr(uint32_t hub_status)
403 {
404         struct v3d_hw *v3d = v3d_isr_hw;
405         uint32_t mask = hub_status;
406 
407         /* Check the hub_status bits */
408         while (mask) {
409                 unsigned core = u_bit_scan(&mask);
410 
411                 if (core == v3d_hw_get_hub_core())
412                         v3d_isr_hub(v3d);
413                 else
414                         v3d_isr_core(v3d, core);
415         }
416 
417         return;
418 }
419 
420 void
v3dX(simulator_init_regs)421 v3dX(simulator_init_regs)(struct v3d_hw *v3d)
422 {
423 #if V3D_VERSION == 33
424         /* Set OVRTMUOUT to match kernel behavior.
425          *
426          * This means that the texture sampler uniform configuration's tmu
427          * output type field is used, instead of using the hardware default
428          * behavior based on the texture type.  If you want the default
429          * behavior, you can still put "2" in the indirect texture state's
430          * output_type field.
431          */
432         V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
433 #endif
434 
435         /* FIXME: the kernel captures some additional core interrupts here,
436          * for tracing. Perhaps we should evaluate to do the same here and add
437          * some debug options.
438          */
439         uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |
440                                     V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);
441         V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
442         V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
443 
444         uint32_t hub_interrupts =
445            (V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET |  /* write violation */
446             V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET |  /* page table invalid */
447             V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET |  /* CAP exceeded */
448             V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */
449 
450         V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts);
451         V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts);
452 
453         v3d_isr_hw = v3d;
454         v3d_hw_set_isr(v3d, v3d_isr);
455 }
456 
457 void
v3dX(simulator_submit_cl_ioctl)458 v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
459                                 struct drm_v3d_submit_cl *submit,
460                                 uint32_t gmp_ofs)
461 {
462         int last_bfc = (V3D_READ(V3D_CLE_0_BFC) &
463                         V3D_CLE_0_BFC_BMFCT_SET);
464 
465         int last_rfc = (V3D_READ(V3D_CLE_0_RFC) &
466                         V3D_CLE_0_RFC_RMFCT_SET);
467 
468         g_gmp_ofs = gmp_ofs;
469         v3d_reload_gmp(v3d);
470 
471         v3d_invalidate_caches(v3d);
472 
473         if (submit->qma) {
474                 V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
475                 V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
476         }
477 #if V3D_VERSION >= 41
478         if (submit->qts) {
479                 V3D_WRITE(V3D_CLE_0_CT0QTS,
480                           V3D_CLE_0_CT0QTS_CTQTSEN_SET |
481                           submit->qts);
482         }
483 #endif
484         V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
485         V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
486 
487         /* Wait for bin to complete before firing render.  The kernel's
488          * scheduler implements this using the GPU scheduler blocking on the
489          * bin fence completing.  (We don't use HW semaphores).
490          */
491         while ((V3D_READ(V3D_CLE_0_BFC) &
492                 V3D_CLE_0_BFC_BMFCT_SET) == last_bfc) {
493                 v3d_hw_tick(v3d);
494         }
495 
496         v3d_invalidate_caches(v3d);
497 
498         V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
499         V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
500 
501         while ((V3D_READ(V3D_CLE_0_RFC) &
502                 V3D_CLE_0_RFC_RMFCT_SET) == last_rfc) {
503                 v3d_hw_tick(v3d);
504         }
505 }
506 
507 #if V3D_VERSION >= 41
508 #define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x))
509 #define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x))
510 #define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8)
511 #define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \
512                                                  V3D_PCTR_0_SRC_N_SHIFT(x) + 6))
513 #endif
514 
515 void
v3dX(simulator_perfmon_start)516 v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
517                               uint32_t ncounters,
518                               uint8_t *events)
519 {
520 #if V3D_VERSION >= 41
521         int i, j;
522         uint32_t source;
523         uint32_t mask = BITFIELD_RANGE(0, ncounters);
524 
525         for (i = 0; i < ncounters; i+=4) {
526                 source = i / 4;
527                 uint32_t channels = 0;
528                 for (j = 0; j < 4 && (i + j) < ncounters; j++)
529                         channels |= events[i + j] << V3D_PCTR_0_SRC_N_SHIFT(j);
530                 V3D_WRITE(V3D_PCTR_0_SRC_N(source), channels);
531         }
532         V3D_WRITE(V3D_PCTR_0_CLR, mask);
533         V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask);
534         V3D_WRITE(V3D_PCTR_0_EN, mask);
535 #endif
536 }
537 
v3dX(simulator_perfmon_stop)538 void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d,
539                                   uint32_t ncounters,
540                                   uint64_t *values)
541 {
542 #if V3D_VERSION >= 41
543         int i;
544 
545         for (i = 0; i < ncounters; i++)
546                 values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i));
547 
548         V3D_WRITE(V3D_PCTR_0_EN, 0);
549 #endif
550 }
551 
552 #endif /* USE_V3D_SIMULATOR */
553