• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file v3dx_simulator.c
26  *
27  * Implements the actual HW interaction betweeh the GL driver's VC5 simulator and the simulator.
28  *
29  * The register headers between V3D versions will have conflicting defines, so
30  * all register interactions appear in this file and are compiled per V3D version
31  * we support.
32  */
33 
34 #ifdef USE_V3D_SIMULATOR
35 
36 #include <assert.h>
37 #include <stdbool.h>
38 #include <stdio.h>
39 
40 #include "v3d_simulator.h"
41 #include "v3d_simulator_wrapper.h"
42 
43 #include "util/macros.h"
44 #include "drm-uapi/v3d_drm.h"
45 
46 #define HW_REGISTER_RO(x) (x)
47 #define HW_REGISTER_RW(x) (x)
48 #if V3D_VERSION >= 41
49 #include "libs/core/v3d/registers/4.1.35.0/v3d.h"
50 #else
51 #include "libs/core/v3d/registers/3.3.0.0/v3d.h"
52 #endif
53 
54 #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
55 #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
56 
57 static void
v3d_invalidate_l3(struct v3d_hw * v3d)58 v3d_invalidate_l3(struct v3d_hw *v3d)
59 {
60         if (!v3d_hw_has_gca(v3d))
61                 return;
62 
63 #if V3D_VERSION < 40
64         uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
65 
66         V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
67         V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
68 #endif
69 }
70 
71 /* Invalidates the L2C cache.  This is a read-only cache for uniforms and instructions. */
72 static void
v3d_invalidate_l2c(struct v3d_hw * v3d)73 v3d_invalidate_l2c(struct v3d_hw *v3d)
74 {
75         if (V3D_VERSION >= 33)
76                 return;
77 
78         V3D_WRITE(V3D_CTL_0_L2CACTL,
79                   V3D_CTL_0_L2CACTL_L2CCLR_SET |
80                   V3D_CTL_0_L2CACTL_L2CENA_SET);
81 }
82 
83 /* Invalidates texture L2 cachelines */
84 static void
v3d_invalidate_l2t(struct v3d_hw * v3d)85 v3d_invalidate_l2t(struct v3d_hw *v3d)
86 {
87         V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
88         V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
89         V3D_WRITE(V3D_CTL_0_L2TCACTL,
90                   V3D_CTL_0_L2TCACTL_L2TFLS_SET |
91                   (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
92 }
93 
94 /* Flushes dirty texture cachelines from the L1 write combiner */
95 static void
v3d_flush_l1td(struct v3d_hw * v3d)96 v3d_flush_l1td(struct v3d_hw *v3d)
97 {
98         V3D_WRITE(V3D_CTL_0_L2TCACTL,
99                   V3D_CTL_0_L2TCACTL_TMUWCF_SET);
100 
101         assert(!(V3D_READ(V3D_CTL_0_L2TCACTL) & V3D_CTL_0_L2TCACTL_L2TFLS_SET));
102 }
103 
104 /* Flushes dirty texture L2 cachelines */
105 static void
v3d_flush_l2t(struct v3d_hw * v3d)106 v3d_flush_l2t(struct v3d_hw *v3d)
107 {
108         V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
109         V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
110         V3D_WRITE(V3D_CTL_0_L2TCACTL,
111                   V3D_CTL_0_L2TCACTL_L2TFLS_SET |
112                   (2 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
113 
114         assert(!(V3D_READ(V3D_CTL_0_L2TCACTL) & V3D_CTL_0_L2TCACTL_L2TFLS_SET));
115 }
116 
117 /* Invalidates the slice caches.  These are read-only caches. */
118 static void
v3d_invalidate_slices(struct v3d_hw * v3d)119 v3d_invalidate_slices(struct v3d_hw *v3d)
120 {
121         V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
122 }
123 
124 static void
v3d_invalidate_caches(struct v3d_hw * v3d)125 v3d_invalidate_caches(struct v3d_hw *v3d)
126 {
127         v3d_invalidate_l3(v3d);
128         v3d_invalidate_l2c(v3d);
129         v3d_invalidate_l2t(v3d);
130         v3d_invalidate_slices(v3d);
131 }
132 
133 static uint32_t g_gmp_ofs;
134 static void
v3d_reload_gmp(struct v3d_hw * v3d)135 v3d_reload_gmp(struct v3d_hw *v3d)
136 {
137         /* Completely reset the GMP. */
138         V3D_WRITE(V3D_GMP_CFG,
139                   V3D_GMP_CFG_PROTENABLE_SET);
140         V3D_WRITE(V3D_GMP_TABLE_ADDR, g_gmp_ofs);
141         V3D_WRITE(V3D_GMP_CLEAR_LOAD, ~0);
142         while (V3D_READ(V3D_GMP_STATUS) &
143                V3D_GMP_STATUS_CFG_BUSY_SET) {
144                 ;
145         }
146 }
147 
148 static UNUSED void
v3d_flush_caches(struct v3d_hw * v3d)149 v3d_flush_caches(struct v3d_hw *v3d)
150 {
151         v3d_flush_l1td(v3d);
152         v3d_flush_l2t(v3d);
153 }
154 
155 int
v3dX(simulator_submit_tfu_ioctl)156 v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
157                                  struct drm_v3d_submit_tfu *args)
158 {
159         int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET;
160 
161         V3D_WRITE(V3D_TFU_IIA, args->iia);
162         V3D_WRITE(V3D_TFU_IIS, args->iis);
163         V3D_WRITE(V3D_TFU_ICA, args->ica);
164         V3D_WRITE(V3D_TFU_IUA, args->iua);
165         V3D_WRITE(V3D_TFU_IOA, args->ioa);
166         V3D_WRITE(V3D_TFU_IOS, args->ios);
167         V3D_WRITE(V3D_TFU_COEF0, args->coef[0]);
168         V3D_WRITE(V3D_TFU_COEF1, args->coef[1]);
169         V3D_WRITE(V3D_TFU_COEF2, args->coef[2]);
170         V3D_WRITE(V3D_TFU_COEF3, args->coef[3]);
171 
172         V3D_WRITE(V3D_TFU_ICFG, args->icfg);
173 
174         while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) {
175                 v3d_hw_tick(v3d);
176         }
177 
178         return 0;
179 }
180 
181 #if V3D_VERSION >= 41
182 int
v3dX(simulator_submit_csd_ioctl)183 v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
184                                  struct drm_v3d_submit_csd *args,
185                                  uint32_t gmp_ofs)
186 {
187         g_gmp_ofs = gmp_ofs;
188         v3d_reload_gmp(v3d);
189 
190         v3d_invalidate_caches(v3d);
191 
192         V3D_WRITE(V3D_CSD_0_QUEUED_CFG1, args->cfg[1]);
193         V3D_WRITE(V3D_CSD_0_QUEUED_CFG2, args->cfg[2]);
194         V3D_WRITE(V3D_CSD_0_QUEUED_CFG3, args->cfg[3]);
195         V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
196         V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
197         V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
198         /* CFG0 kicks off the job */
199         V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
200 
201         while (V3D_READ(V3D_CSD_0_STATUS) &
202                (V3D_CSD_0_STATUS_HAVE_CURRENT_DISPATCH_SET |
203                 V3D_CSD_0_STATUS_HAVE_QUEUED_DISPATCH_SET)) {
204                 v3d_hw_tick(v3d);
205         }
206 
207         v3d_flush_caches(v3d);
208 
209         return 0;
210 }
211 #endif
212 
213 int
v3dX(simulator_get_param_ioctl)214 v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
215                                 struct drm_v3d_get_param *args)
216 {
217         static const uint32_t reg_map[] = {
218                 [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
219                 [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
220                 [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
221                 [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
222                 [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
223                 [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
224                 [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
225         };
226 
227         switch (args->param) {
228         case DRM_V3D_PARAM_SUPPORTS_TFU:
229                 args->value = 1;
230                 return 0;
231         case DRM_V3D_PARAM_SUPPORTS_CSD:
232                 args->value = V3D_VERSION >= 41;
233                 return 0;
234         case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
235                 args->value = 1;
236                 return 0;
237         }
238 
239         if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
240                 args->value = V3D_READ(reg_map[args->param]);
241                 return 0;
242         }
243 
244         fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n",
245                 (long long)args->value);
246         abort();
247 }
248 
249 static struct v3d_hw *v3d_isr_hw;
250 
251 static void
v3d_isr(uint32_t hub_status)252 v3d_isr(uint32_t hub_status)
253 {
254         struct v3d_hw *v3d = v3d_isr_hw;
255 
256         /* Check the per-core bits */
257         if (hub_status & (1 << 0)) {
258                 uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
259                 V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);
260 
261                 if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {
262                         uint32_t size = 256 * 1024;
263                         uint32_t offset = v3d_simulator_get_spill(size);
264 
265                         v3d_reload_gmp(v3d);
266 
267                         V3D_WRITE(V3D_PTB_0_BPOA, offset);
268                         V3D_WRITE(V3D_PTB_0_BPOS, size);
269                         return;
270                 }
271 
272                 if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
273                         fprintf(stderr, "GMP violation at 0x%08x\n",
274                                 V3D_READ(V3D_GMP_VIO_ADDR));
275                         abort();
276                 } else {
277                         fprintf(stderr,
278                                 "Unexpected ISR with core status 0x%08x\n",
279                                 core_status);
280                 }
281                 abort();
282         }
283 
284         return;
285 }
286 
287 void
v3dX(simulator_init_regs)288 v3dX(simulator_init_regs)(struct v3d_hw *v3d)
289 {
290 #if V3D_VERSION == 33
291         /* Set OVRTMUOUT to match kernel behavior.
292          *
293          * This means that the texture sampler uniform configuration's tmu
294          * output type field is used, instead of using the hardware default
295          * behavior based on the texture type.  If you want the default
296          * behavior, you can still put "2" in the indirect texture state's
297          * output_type field.
298          */
299         V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
300 #endif
301 
302         uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |
303                                     V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);
304         V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
305         V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
306 
307         v3d_isr_hw = v3d;
308         v3d_hw_set_isr(v3d, v3d_isr);
309 }
310 
311 void
v3dX(simulator_submit_cl_ioctl)312 v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
313                                 struct drm_v3d_submit_cl *submit,
314                                 uint32_t gmp_ofs)
315 {
316         g_gmp_ofs = gmp_ofs;
317         v3d_reload_gmp(v3d);
318 
319         v3d_invalidate_caches(v3d);
320 
321         if (submit->qma) {
322                 V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
323                 V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
324         }
325 #if V3D_VERSION >= 41
326         if (submit->qts) {
327                 V3D_WRITE(V3D_CLE_0_CT0QTS,
328                           V3D_CLE_0_CT0QTS_CTQTSEN_SET |
329                           submit->qts);
330         }
331 #endif
332         V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
333         V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
334 
335         /* Wait for bin to complete before firing render.  The kernel's
336          * scheduler implements this using the GPU scheduler blocking on the
337          * bin fence completing.  (We don't use HW semaphores).
338          */
339         while (V3D_READ(V3D_CLE_0_CT0CA) !=
340                V3D_READ(V3D_CLE_0_CT0EA)) {
341                 v3d_hw_tick(v3d);
342         }
343 
344         v3d_invalidate_caches(v3d);
345 
346         V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
347         V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
348 
349         while (V3D_READ(V3D_CLE_0_CT1CA) !=
350                V3D_READ(V3D_CLE_0_CT1EA) ||
351                V3D_READ(V3D_CLE_1_CT1CA) !=
352                V3D_READ(V3D_CLE_1_CT1EA)) {
353                 v3d_hw_tick(v3d);
354         }
355 }
356 
357 #endif /* USE_V3D_SIMULATOR */
358