• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file v3dx_simulator.c
26  *
27  * Implements the actual HW interaction betweeh the GL driver's V3D simulator and the simulator.
28  *
29  * The register headers between V3D versions will have conflicting defines, so
30  * all register interactions appear in this file and are compiled per V3D version
31  * we support.
32  */
33 
34 #include <assert.h>
35 #include <stdbool.h>
36 #include <stdio.h>
37 
38 #include "v3d_simulator.h"
39 #include "v3d_simulator_wrapper.h"
40 
41 #include "common/v3d_performance_counters.h"
42 
43 #include "util/macros.h"
44 #include "util/bitscan.h"
45 #include "drm-uapi/v3d_drm.h"
46 
47 #define HW_REGISTER_RO(x) (x)
48 #define HW_REGISTER_RW(x) (x)
49 #if V3D_VERSION == 71
50 #include "libs/core/v3d/registers/7.1.7.0/v3d.h"
51 #else
52 #if V3D_VERSION == 42
53 #include "libs/core/v3d/registers/4.2.14.0/v3d.h"
54 #endif
55 #endif
56 
57 #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
58 #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
59 
60 /* Invalidates the L2C cache.  This is a read-only cache for uniforms and instructions. */
61 static void
v3d_invalidate_l2c(struct v3d_hw * v3d)62 v3d_invalidate_l2c(struct v3d_hw *v3d)
63 {
64         if (V3D_VERSION >= 33)
65                 return;
66 
67         V3D_WRITE(V3D_CTL_0_L2CACTL,
68                   V3D_CTL_0_L2CACTL_L2CCLR_SET |
69                   V3D_CTL_0_L2CACTL_L2CENA_SET);
70 }
71 
72 enum v3d_l2t_cache_flush_mode {
73         V3D_CACHE_FLUSH_MODE_FLUSH,
74         V3D_CACHE_FLUSH_MODE_CLEAR,
75         V3D_CACHE_FLUSH_MODE_CLEAN,
76 };
77 
78 /* Invalidates texture L2 cachelines */
79 static void
v3d_invalidate_l2t(struct v3d_hw * v3d)80 v3d_invalidate_l2t(struct v3d_hw *v3d)
81 {
82         V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
83         V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
84         V3D_WRITE(V3D_CTL_0_L2TCACTL,
85                   V3D_CTL_0_L2TCACTL_L2TFLS_SET |
86                   (V3D_CACHE_FLUSH_MODE_FLUSH << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
87 }
88 
89 /*
90  * Wait for l2tcactl, used for flushes.
91  *
92  * FIXME: for a multicore scenario we should pass here the core. All wrapper
93  * assumes just one core, so would be better to handle that on that case.
94  */
v3d_core_wait_l2tcactl(struct v3d_hw * v3d,uint32_t ctrl)95 static UNUSED void v3d_core_wait_l2tcactl(struct v3d_hw *v3d,
96                                           uint32_t ctrl)
97 {
98    assert(!(ctrl & ~(V3D_CTL_0_L2TCACTL_TMUWCF_SET | V3D_CTL_0_L2TCACTL_L2TFLS_SET)));
99 
100    while (V3D_READ(V3D_CTL_0_L2TCACTL) & ctrl) {
101            v3d_hw_tick(v3d);
102    }
103 }
104 
105 /* Flushes dirty texture cachelines from the L1 write combiner */
106 static void
v3d_flush_l1td(struct v3d_hw * v3d)107 v3d_flush_l1td(struct v3d_hw *v3d)
108 {
109         V3D_WRITE(V3D_CTL_0_L2TCACTL,
110                   V3D_CTL_0_L2TCACTL_TMUWCF_SET);
111 
112         /* Note: here the kernel (and previous versions of the simulator
113          * wrapper) is using V3D_CTL_0_L2TCACTL_L2TFLS_SET, as with l2t. We
114          * understand that it makes more sense to do like this. We need to
115          * confirm which one is doing it correctly. So far things work fine on
116          * the simulator this way.
117          */
118         v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_TMUWCF_SET);
119 }
120 
121 /* Flushes dirty texture L2 cachelines */
122 static void
v3d_flush_l2t(struct v3d_hw * v3d)123 v3d_flush_l2t(struct v3d_hw *v3d)
124 {
125         V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
126         V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
127         V3D_WRITE(V3D_CTL_0_L2TCACTL,
128                   V3D_CTL_0_L2TCACTL_L2TFLS_SET |
129                   (V3D_CACHE_FLUSH_MODE_CLEAN << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
130 
131         v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_L2TFLS_SET);
132 }
133 
134 /* Invalidates the slice caches.  These are read-only caches. */
135 static void
v3d_invalidate_slices(struct v3d_hw * v3d)136 v3d_invalidate_slices(struct v3d_hw *v3d)
137 {
138         V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
139 }
140 
141 static void
v3d_invalidate_caches(struct v3d_hw * v3d)142 v3d_invalidate_caches(struct v3d_hw *v3d)
143 {
144         v3d_invalidate_l2c(v3d);
145         v3d_invalidate_l2t(v3d);
146         v3d_invalidate_slices(v3d);
147 }
148 
149 static uint32_t g_gmp_ofs;
150 static void
v3d_reload_gmp(struct v3d_hw * v3d)151 v3d_reload_gmp(struct v3d_hw *v3d)
152 {
153         /* Completely reset the GMP. */
154         V3D_WRITE(V3D_GMP_CFG,
155                   V3D_GMP_CFG_PROTENABLE_SET);
156         V3D_WRITE(V3D_GMP_TABLE_ADDR, g_gmp_ofs);
157         V3D_WRITE(V3D_GMP_CLEAR_LOAD, ~0);
158         while (V3D_READ(V3D_GMP_STATUS) &
159                V3D_GMP_STATUS_CFG_BUSY_SET) {
160                 ;
161         }
162 }
163 
164 static UNUSED void
v3d_flush_caches(struct v3d_hw * v3d)165 v3d_flush_caches(struct v3d_hw *v3d)
166 {
167         v3d_flush_l1td(v3d);
168         v3d_flush_l2t(v3d);
169 }
170 
171 #if V3D_VERSION < 71
172 #define TFU_REG(NAME) V3D_TFU_ ## NAME
173 #else
174 #define TFU_REG(NAME) V3D_IFC_ ## NAME
175 #endif
176 
177 
178 int
v3dX(simulator_submit_tfu_ioctl)179 v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
180                                  struct drm_v3d_submit_tfu *args)
181 {
182         int last_vtct = V3D_READ(TFU_REG(CS)) & TFU_REG(CS_CVTCT_SET);
183 
184         V3D_WRITE(TFU_REG(IIA), args->iia);
185         V3D_WRITE(TFU_REG(IIS), args->iis);
186         V3D_WRITE(TFU_REG(ICA), args->ica);
187         V3D_WRITE(TFU_REG(IUA), args->iua);
188         V3D_WRITE(TFU_REG(IOA), args->ioa);
189 #if V3D_VERSION >= 71
190         V3D_WRITE(TFU_REG(IOC), args->v71.ioc);
191 #endif
192         V3D_WRITE(TFU_REG(IOS), args->ios);
193         V3D_WRITE(TFU_REG(COEF0), args->coef[0]);
194         V3D_WRITE(TFU_REG(COEF1), args->coef[1]);
195         V3D_WRITE(TFU_REG(COEF2), args->coef[2]);
196         V3D_WRITE(TFU_REG(COEF3), args->coef[3]);
197 
198         V3D_WRITE(TFU_REG(ICFG), args->icfg);
199 
200         while ((V3D_READ(TFU_REG(CS)) & TFU_REG(CS_CVTCT_SET)) == last_vtct) {
201                 v3d_hw_tick(v3d);
202         }
203 
204         return 0;
205 }
206 
207 int
v3dX(simulator_submit_csd_ioctl)208 v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
209                                  struct drm_v3d_submit_csd *args,
210                                  uint32_t gmp_ofs)
211 {
212 #if V3D_VERSION >= 42
213         int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) &
214                                    V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET);
215         g_gmp_ofs = gmp_ofs;
216         v3d_reload_gmp(v3d);
217 
218         v3d_invalidate_caches(v3d);
219 
220         V3D_WRITE(V3D_CSD_0_QUEUED_CFG1, args->cfg[1]);
221         V3D_WRITE(V3D_CSD_0_QUEUED_CFG2, args->cfg[2]);
222         V3D_WRITE(V3D_CSD_0_QUEUED_CFG3, args->cfg[3]);
223         V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
224         V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
225         V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
226 #if V3D_VERSION >= 71
227         V3D_WRITE(V3D_CSD_0_QUEUED_CFG7, 0);
228 #endif
229         /* CFG0 kicks off the job */
230         V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
231 
232         /* Now we wait for the dispatch to finish. The safest way is to check
233          * if NUM_COMPLETED_JOBS has increased. Note that in spite of that
234          * name that register field is about the number of completed
235          * dispatches.
236          */
237         while ((V3D_READ(V3D_CSD_0_STATUS) &
238                 V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET) == last_completed_jobs) {
239                 v3d_hw_tick(v3d);
240         }
241 
242         v3d_flush_caches(v3d);
243 
244         return 0;
245 #else
246         return -1;
247 #endif
248 }
249 
250 int
v3dX(simulator_get_param_ioctl)251 v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
252                                 uint32_t perfcnt_total,
253                                 struct drm_v3d_get_param *args)
254 {
255         static const uint32_t reg_map[] = {
256 #if V3D_VERSION >= 71
257                 [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_IDENT0,
258 #else
259                 [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
260 #endif
261                 [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
262                 [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
263                 [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
264                 [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
265                 [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
266                 [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
267         };
268 
269         switch (args->param) {
270         case DRM_V3D_PARAM_SUPPORTS_TFU:
271                 args->value = 1;
272                 return 0;
273         case DRM_V3D_PARAM_SUPPORTS_CSD:
274                 args->value = V3D_VERSION >= 42;
275                 return 0;
276         case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
277                 args->value = 1;
278                 return 0;
279         case DRM_V3D_PARAM_SUPPORTS_PERFMON:
280                 args->value = V3D_VERSION >= 42;
281                 return 0;
282         case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
283                 args->value = 1;
284                 return 0;
285 	case DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE:
286 		args->value = 1;
287 		return 0;
288 	case DRM_V3D_PARAM_MAX_PERF_COUNTERS:
289 		args->value = perfcnt_total;
290 		return 0;
291         }
292 
293         if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
294                 args->value = V3D_READ(reg_map[args->param]);
295                 return 0;
296         }
297 
298         fprintf(stderr, "Unknown DRM_IOCTL_V3D_GET_PARAM(%lld)\n",
299                 (long long)args->value);
300         abort();
301 }
302 
303 int
v3dX(simulator_perfmon_get_counter_ioctl)304 v3dX(simulator_perfmon_get_counter_ioctl)(uint32_t perfcnt_total,
305                                           struct drm_v3d_perfmon_get_counter *args)
306 {
307         const char **counter = NULL;
308 
309         /* Make sure that the counter ID is valid */
310         if (args->counter >= perfcnt_total)
311                 return -1;
312 
313         counter = v3d_performance_counters[args->counter];
314 
315         strncpy((char *)args->name, counter[V3D_PERFCNT_NAME],
316                 DRM_V3D_PERFCNT_MAX_NAME);
317 
318         strncpy((char *)args->category, counter[V3D_PERFCNT_CATEGORY],
319                 DRM_V3D_PERFCNT_MAX_CATEGORY);
320 
321         strncpy((char *)args->description, counter[V3D_PERFCNT_DESCRIPTION],
322                 DRM_V3D_PERFCNT_MAX_DESCRIPTION);
323 
324         return 0;
325 }
326 
327 static struct v3d_hw *v3d_isr_hw;
328 
329 
330 static void
v3d_isr_core(struct v3d_hw * v3d,unsigned core)331 v3d_isr_core(struct v3d_hw *v3d,
332              unsigned core)
333 {
334         /* FIXME: so far we are assuming just one core, and using only the _0_
335          * registers. If we add multiple-core on the simulator, we would need
336          * to pass core as a parameter, and chose the proper registers.
337          */
338         assert(core == 0);
339         uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
340         V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);
341 
342         if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {
343                 uint32_t size = 256 * 1024;
344                 uint32_t offset = v3d_simulator_get_spill(size);
345 
346                 v3d_reload_gmp(v3d);
347 
348                 V3D_WRITE(V3D_PTB_0_BPOA, offset);
349                 V3D_WRITE(V3D_PTB_0_BPOS, size);
350                 return;
351         }
352 
353 #if V3D_VERSION <= 42
354         if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
355                 fprintf(stderr, "GMP violation at 0x%08x\n",
356                         V3D_READ(V3D_GMP_VIO_ADDR));
357         } else {
358                 fprintf(stderr,
359                         "Unexpected ISR with core status 0x%08x\n",
360                         core_status);
361         }
362         abort();
363 #endif
364 }
365 
366 static void
handle_mmu_interruptions(struct v3d_hw * v3d,uint32_t hub_status)367 handle_mmu_interruptions(struct v3d_hw *v3d,
368                          uint32_t hub_status)
369 {
370         bool wrv = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET;
371         bool pti = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET;
372         bool cap = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET;
373 
374         if (!(pti || cap || wrv))
375                 return;
376 
377         const char *client = "?";
378         uint32_t axi_id = V3D_READ(V3D_MMU0_VIO_ID);
379         uint32_t va_width = 30;
380 
381         static const char *const v3d42_axi_ids[] = {
382                 "L2T",
383                 "PTB",
384                 "PSE",
385                 "TLB",
386                 "CLE",
387                 "TFU",
388                 "MMU",
389                 "GMP",
390         };
391 
392         axi_id = axi_id >> 5;
393         if (axi_id < ARRAY_SIZE(v3d42_axi_ids))
394                 client = v3d42_axi_ids[axi_id];
395 
396         uint32_t mmu_debug = V3D_READ(V3D_MMU0_DEBUG_INFO);
397 
398         va_width += ((mmu_debug & V3D_MMU0_DEBUG_INFO_VA_WIDTH_SET)
399                      >> V3D_MMU0_DEBUG_INFO_VA_WIDTH_LSB);
400 
401         /* Only the top bits (final number depends on the gen) of the virtual
402          * address are reported in the MMU VIO_ADDR register.
403          */
404         uint64_t vio_addr = ((uint64_t)V3D_READ(V3D_MMU0_VIO_ADDR) <<
405                              (va_width - 32));
406 
407         /* Difference with the kernel: here were are going to abort after
408          * logging, so we don't bother with some stuff that the kernel does,
409          * like restoring the MMU ctrl bits
410          */
411 
412         fprintf(stderr, "MMU error from client %s (%d) at 0x%llx%s%s%s\n",
413                 client, axi_id, (long long) vio_addr,
414                 wrv ? ", write violation" : "",
415                 pti ? ", pte invalid" : "",
416                 cap ? ", cap exceeded" : "");
417 
418         abort();
419 }
420 
421 static void
v3d_isr_hub(struct v3d_hw * v3d)422 v3d_isr_hub(struct v3d_hw *v3d)
423 {
424         uint32_t hub_status = V3D_READ(V3D_HUB_CTL_INT_STS);
425 
426         /* Acknowledge the interrupts we're handling here */
427         V3D_WRITE(V3D_HUB_CTL_INT_CLR, hub_status);
428 
429         if (hub_status & V3D_HUB_CTL_INT_STS_INT_TFUC_SET) {
430                 /* FIXME: we were not able to raise this exception. We let the
431                  * unreachable here, so we could get one if it is raised on
432                  * the future. In any case, note that for this case we would
433                  * only be doing debugging log.
434                  */
435                 unreachable("TFU Conversion Complete interrupt not handled");
436         }
437 
438         handle_mmu_interruptions(v3d, hub_status);
439 
440 #if V3D_VERSION == 71
441         if (hub_status & V3D_HUB_CTL_INT_STS_INT_GMPV_SET) {
442                 fprintf(stderr, "GMP violation at 0x%08x\n",
443                         V3D_READ(V3D_GMP_VIO_ADDR));
444         } else {
445                 fprintf(stderr,
446                         "Unexpected ISR with status 0x%08x\n",
447                         hub_status);
448         }
449         abort();
450 #endif
451 }
452 
453 static void
v3d_isr(uint32_t hub_status)454 v3d_isr(uint32_t hub_status)
455 {
456         struct v3d_hw *v3d = v3d_isr_hw;
457         uint32_t mask = hub_status;
458 
459         /* Check the hub_status bits */
460         while (mask) {
461                 unsigned core = u_bit_scan(&mask);
462 
463                 if (core == v3d_hw_get_hub_core())
464                         v3d_isr_hub(v3d);
465                 else
466                         v3d_isr_core(v3d, core);
467         }
468 
469         return;
470 }
471 
472 void
v3dX(simulator_init_regs)473 v3dX(simulator_init_regs)(struct v3d_hw *v3d)
474 {
475         /* FIXME: the kernel captures some additional core interrupts here,
476          * for tracing. Perhaps we should evaluate to do the same here and add
477          * some debug options.
478          */
479         uint32_t core_interrupts = V3D_CTL_0_INT_STS_INT_OUTOMEM_SET;
480 #if V3D_VERSION <= 42
481         core_interrupts |= V3D_CTL_0_INT_STS_INT_GMPV_SET;
482 #endif
483 
484         V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
485         V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
486 
487         uint32_t hub_interrupts =
488            (V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET |  /* write violation */
489             V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET |  /* page table invalid */
490             V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET |  /* CAP exceeded */
491             V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */
492 
493 #if V3D_VERSION == 71
494         hub_interrupts |= V3D_HUB_CTL_INT_STS_INT_GMPV_SET;
495 #endif
496         V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts);
497         V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts);
498 
499         v3d_isr_hw = v3d;
500         v3d_hw_set_isr(v3d, v3d_isr);
501 }
502 
503 void
v3dX(simulator_submit_cl_ioctl)504 v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
505                                 struct drm_v3d_submit_cl *submit,
506                                 uint32_t gmp_ofs)
507 {
508         int last_bfc = (V3D_READ(V3D_CLE_0_BFC) &
509                         V3D_CLE_0_BFC_BMFCT_SET);
510 
511         int last_rfc = (V3D_READ(V3D_CLE_0_RFC) &
512                         V3D_CLE_0_RFC_RMFCT_SET);
513 
514         g_gmp_ofs = gmp_ofs;
515         v3d_reload_gmp(v3d);
516 
517         v3d_invalidate_caches(v3d);
518 
519         if (submit->qma) {
520                 V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
521                 V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
522         }
523         if (submit->qts) {
524                 V3D_WRITE(V3D_CLE_0_CT0QTS,
525                           V3D_CLE_0_CT0QTS_CTQTSEN_SET |
526                           submit->qts);
527         }
528         V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
529         V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
530 
531         /* Wait for bin to complete before firing render.  The kernel's
532          * scheduler implements this using the GPU scheduler blocking on the
533          * bin fence completing.  (We don't use HW semaphores).
534          */
535         while ((V3D_READ(V3D_CLE_0_BFC) &
536                 V3D_CLE_0_BFC_BMFCT_SET) == last_bfc) {
537                 v3d_hw_tick(v3d);
538         }
539 
540         v3d_invalidate_caches(v3d);
541 
542         V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
543         V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
544 
545         while ((V3D_READ(V3D_CLE_0_RFC) &
546                 V3D_CLE_0_RFC_RMFCT_SET) == last_rfc) {
547                 v3d_hw_tick(v3d);
548         }
549 }
550 
551 #define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x))
552 #define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x))
553 #define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8)
554 #define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \
555                                                  V3D_PCTR_0_SRC_N_SHIFT(x) + \
556                                                  V3D_PCTR_0_SRC_0_3_PCTRS0_MSB))
557 
558 void
v3dX(simulator_perfmon_start)559 v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
560                               uint32_t ncounters,
561                               uint8_t *events)
562 {
563         int i, j;
564         uint32_t source;
565         uint32_t mask = BITFIELD_RANGE(0, ncounters);
566 
567         for (i = 0; i < ncounters; i+=4) {
568                 source = i / 4;
569                 uint32_t channels = 0;
570                 for (j = 0; j < 4 && (i + j) < ncounters; j++)
571                         channels |= events[i + j] << V3D_PCTR_0_SRC_N_SHIFT(j);
572                 V3D_WRITE(V3D_PCTR_0_SRC_N(source), channels);
573         }
574         V3D_WRITE(V3D_PCTR_0_CLR, mask);
575         V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask);
576         V3D_WRITE(V3D_PCTR_0_EN, mask);
577 }
578 
v3dX(simulator_perfmon_stop)579 void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d,
580                                   uint32_t ncounters,
581                                   uint64_t *values)
582 {
583         int i;
584 
585         for (i = 0; i < ncounters; i++)
586                 values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i));
587 
588         V3D_WRITE(V3D_PCTR_0_EN, 0);
589 }
590 
v3dX(simulator_get_perfcnt_total)591 void v3dX(simulator_get_perfcnt_total)(uint32_t *count)
592 {
593         *count = ARRAY_SIZE(v3d_performance_counters);
594 }
595