1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3 *
4 * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 /* NOTES:
23 * - A default GPU can be compiled in during the build, by defining
24 * CONFIG_MALI_NO_MALI_DEFAULT_GPU. SCons sets this, which means that
25 * insmod'ing mali_kbase.ko with no arguments after a build with "scons
26 * gpu=tXYZ" will yield the expected GPU ID for tXYZ. This can always be
27 * overridden by passing the 'no_mali_gpu' argument to insmod.
28 *
29 * - if CONFIG_MALI_BIFROST_ERROR_INJECT is defined the error injection system is
30 * activated.
31 */
32
33 /* Implementation of failure injection system:
34 *
35 * Error conditions are generated by gpu_generate_error().
36 * According to CONFIG_MALI_BIFROST_ERROR_INJECT definition gpu_generate_error() either
37 * generates an error HW condition randomly (CONFIG_MALI_ERROR_INJECT_RANDOM) or
38 * checks if there is (in error_track_list) an error configuration to be set for
39 * the current job chain (CONFIG_MALI_ERROR_INJECT_RANDOM not defined).
40 * Each error condition will trigger a specific "state" for a certain set of
41 * registers as per Midgard Architecture Specifications doc.
42 *
43 * According to Midgard Architecture Specifications doc the following registers
44 * are always affected by error conditions:
45 *
46 * JOB Exception:
47 * JOB_IRQ_RAWSTAT
48 * JOB<n> STATUS AREA
49 *
50 * MMU Exception:
51 * MMU_IRQ_RAWSTAT
52 * AS<n>_FAULTSTATUS
53 * AS<n>_FAULTADDRESS
54 *
55 * GPU Exception:
56 * GPU_IRQ_RAWSTAT
57 * GPU_FAULTSTATUS
58 * GPU_FAULTADDRESS
59 *
60 * For further clarification on the model behaviour upon specific error
61 * conditions the user may refer to the Midgard Architecture Specification
62 * document
63 */
64 #include <mali_kbase.h>
65 #include <gpu/mali_kbase_gpu_regmap.h>
66 #include <backend/gpu/mali_kbase_model_dummy.h>
67 #include <mali_kbase_mem_linux.h>
68
69 #if MALI_USE_CSF
70 #include <csf/mali_kbase_csf_firmware.h>
71
72 /* Index of the last value register for each type of core, with the 1st value
73 * register being at index 0.
74 */
75 #define IPA_CTL_MAX_VAL_CNT_IDX (KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS - 1)
76
77 /* Array for storing the value of SELECT register for each type of core */
78 static u64 ipa_ctl_select_config[KBASE_IPA_CORE_TYPE_NUM];
79 static bool ipa_control_timer_enabled;
80 #endif
81
82 #define LO_MASK(M) ((M) & 0xFFFFFFFF)
83
get_implementation_register(u32 reg)84 static u32 get_implementation_register(u32 reg)
85 {
86 switch (reg) {
87 case GPU_CONTROL_REG(SHADER_PRESENT_LO):
88 return LO_MASK(DUMMY_IMPLEMENTATION_SHADER_PRESENT);
89 case GPU_CONTROL_REG(TILER_PRESENT_LO):
90 return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT);
91 case GPU_CONTROL_REG(L2_PRESENT_LO):
92 return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT);
93 case GPU_CONTROL_REG(STACK_PRESENT_LO):
94 return LO_MASK(DUMMY_IMPLEMENTATION_STACK_PRESENT);
95
96 case GPU_CONTROL_REG(SHADER_PRESENT_HI):
97 case GPU_CONTROL_REG(TILER_PRESENT_HI):
98 case GPU_CONTROL_REG(L2_PRESENT_HI):
99 case GPU_CONTROL_REG(STACK_PRESENT_HI):
100 /* *** FALLTHROUGH *** */
101 default:
102 return 0;
103 }
104 }
105
106 struct {
107 unsigned long prfcnt_base;
108 u32 *prfcnt_base_cpu;
109 struct kbase_device *kbdev;
110 struct tagged_addr *pages;
111 size_t page_count;
112
113 u32 time;
114
115 struct {
116 u32 jm;
117 u32 tiler;
118 u32 l2;
119 u32 shader;
120 } prfcnt_en;
121
122 u64 l2_present;
123 u64 shader_present;
124
125 #if !MALI_USE_CSF
126 u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
127 #else
128 u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
129 #endif /* !MALI_USE_CSF */
130 u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
131 u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
132 KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
133 u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES *
134 KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
135
136 } performance_counters = {
137 .l2_present = DUMMY_IMPLEMENTATION_L2_PRESENT,
138 .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
139 };
140
141 struct job_slot {
142 int job_active;
143 int job_queued;
144 int job_complete_irq_asserted;
145 int job_irq_mask;
146 int job_disabled;
147 };
148
149 /**
150 * struct control_reg_values_t - control register values specific to the GPU being 'emulated'
151 * @name: GPU name
152 * @gpu_id: GPU ID to report
153 * @as_present: Bitmap of address spaces present
154 * @thread_max_threads: Maximum number of threads per core
155 * @thread_max_workgroup_size: Maximum number of threads per workgroup
156 * @thread_max_barrier_size: Maximum number of threads per barrier
157 * @thread_features: Thread features, NOT INCLUDING the 2
158 * most-significant bits, which are always set to
159 * IMPLEMENTATION_MODEL.
160 * @core_features: Core features
161 * @tiler_features: Tiler features
162 * @mmu_features: MMU features
163 * @gpu_features_lo: GPU features (low)
164 * @gpu_features_hi: GPU features (high)
165 */
166 struct control_reg_values_t {
167 const char *name;
168 u32 gpu_id;
169 u32 as_present;
170 u32 thread_max_threads;
171 u32 thread_max_workgroup_size;
172 u32 thread_max_barrier_size;
173 u32 thread_features;
174 u32 core_features;
175 u32 tiler_features;
176 u32 mmu_features;
177 u32 gpu_features_lo;
178 u32 gpu_features_hi;
179 };
180
181 struct dummy_model_t {
182 int reset_completed;
183 int reset_completed_mask;
184 int prfcnt_sample_completed;
185 int power_changed_mask; /* 2bits: _ALL,_SINGLE */
186 int power_changed; /* 1bit */
187 bool clean_caches_completed;
188 bool clean_caches_completed_irq_enabled;
189 int power_on; /* 6bits: SHADER[4],TILER,L2 */
190 u32 stack_power_on_lo;
191 u32 coherency_enable;
192 unsigned int job_irq_js_state;
193 struct job_slot slots[NUM_SLOTS];
194 const struct control_reg_values_t *control_reg_values;
195 u32 l2_config;
196 void *data;
197 };
198
gpu_device_set_data(void * model,void * data)199 void gpu_device_set_data(void *model, void *data)
200 {
201 struct dummy_model_t *dummy = (struct dummy_model_t *)model;
202
203 dummy->data = data;
204 }
205
gpu_device_get_data(void * model)206 void *gpu_device_get_data(void *model)
207 {
208 struct dummy_model_t *dummy = (struct dummy_model_t *)model;
209
210 return dummy->data;
211 }
212
213 #define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1
214
215 /* SCons should pass in a default GPU, but other ways of building (e.g.
216 * in-tree) won't, so define one here in case.
217 */
218 #ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU
219 #define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx"
220 #endif
221
222 static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU;
223 module_param(no_mali_gpu, charp, 0000);
224 MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");
225
226 /* Construct a value for the THREAD_FEATURES register, *except* the two most
227 * significant bits, which are set to IMPLEMENTATION_MODEL in
228 * midgard_model_read_reg().
229 */
230 #if MALI_USE_CSF
231 #define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
232 ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24))
233 #else
234 #define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
235 ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24))
236 #endif
237
238 /* Array associating GPU names with control register values. The first
239 * one is used in the case of no match.
240 */
241 static const struct control_reg_values_t all_control_reg_values[] = {
242 {
243 .name = "tMIx",
244 .gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0),
245 .as_present = 0xFF,
246 .thread_max_threads = 0x180,
247 .thread_max_workgroup_size = 0x180,
248 .thread_max_barrier_size = 0x180,
249 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
250 .tiler_features = 0x809,
251 .mmu_features = 0x2830,
252 .gpu_features_lo = 0,
253 .gpu_features_hi = 0,
254 },
255 {
256 .name = "tHEx",
257 .gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0),
258 .as_present = 0xFF,
259 .thread_max_threads = 0x180,
260 .thread_max_workgroup_size = 0x180,
261 .thread_max_barrier_size = 0x180,
262 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
263 .tiler_features = 0x809,
264 .mmu_features = 0x2830,
265 .gpu_features_lo = 0,
266 .gpu_features_hi = 0,
267 },
268 {
269 .name = "tSIx",
270 .gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0),
271 .as_present = 0xFF,
272 .thread_max_threads = 0x300,
273 .thread_max_workgroup_size = 0x180,
274 .thread_max_barrier_size = 0x180,
275 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
276 .tiler_features = 0x209,
277 .mmu_features = 0x2821,
278 .gpu_features_lo = 0,
279 .gpu_features_hi = 0,
280 },
281 {
282 .name = "tDVx",
283 .gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0),
284 .as_present = 0xFF,
285 .thread_max_threads = 0x300,
286 .thread_max_workgroup_size = 0x180,
287 .thread_max_barrier_size = 0x180,
288 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
289 .tiler_features = 0x209,
290 .mmu_features = 0x2821,
291 .gpu_features_lo = 0,
292 .gpu_features_hi = 0,
293 },
294 {
295 .name = "tNOx",
296 .gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0),
297 .as_present = 0xFF,
298 .thread_max_threads = 0x180,
299 .thread_max_workgroup_size = 0x180,
300 .thread_max_barrier_size = 0x180,
301 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
302 .tiler_features = 0x809,
303 .mmu_features = 0x2830,
304 .gpu_features_lo = 0,
305 .gpu_features_hi = 0,
306 },
307 {
308 .name = "tGOx_r0p0",
309 .gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0),
310 .as_present = 0xFF,
311 .thread_max_threads = 0x180,
312 .thread_max_workgroup_size = 0x180,
313 .thread_max_barrier_size = 0x180,
314 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
315 .tiler_features = 0x809,
316 .mmu_features = 0x2830,
317 .gpu_features_lo = 0,
318 .gpu_features_hi = 0,
319 },
320 {
321 .name = "tGOx_r1p0",
322 .gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0),
323 .as_present = 0xFF,
324 .thread_max_threads = 0x180,
325 .thread_max_workgroup_size = 0x180,
326 .thread_max_barrier_size = 0x180,
327 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
328 .core_features = 0x2,
329 .tiler_features = 0x209,
330 .mmu_features = 0x2823,
331 .gpu_features_lo = 0,
332 .gpu_features_hi = 0,
333 },
334 {
335 .name = "tTRx",
336 .gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0),
337 .as_present = 0xFF,
338 .thread_max_threads = 0x180,
339 .thread_max_workgroup_size = 0x180,
340 .thread_max_barrier_size = 0x180,
341 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
342 .tiler_features = 0x809,
343 .mmu_features = 0x2830,
344 .gpu_features_lo = 0,
345 .gpu_features_hi = 0,
346 },
347 {
348 .name = "tNAx",
349 .gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0),
350 .as_present = 0xFF,
351 .thread_max_threads = 0x180,
352 .thread_max_workgroup_size = 0x180,
353 .thread_max_barrier_size = 0x180,
354 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
355 .tiler_features = 0x809,
356 .mmu_features = 0x2830,
357 .gpu_features_lo = 0,
358 .gpu_features_hi = 0,
359 },
360 {
361 .name = "tBEx",
362 .gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0),
363 .as_present = 0xFF,
364 .thread_max_threads = 0x180,
365 .thread_max_workgroup_size = 0x180,
366 .thread_max_barrier_size = 0x180,
367 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
368 .tiler_features = 0x809,
369 .mmu_features = 0x2830,
370 .gpu_features_lo = 0,
371 .gpu_features_hi = 0,
372 },
373 {
374 .name = "tBAx",
375 .gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0),
376 .as_present = 0xFF,
377 .thread_max_threads = 0x180,
378 .thread_max_workgroup_size = 0x180,
379 .thread_max_barrier_size = 0x180,
380 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
381 .tiler_features = 0x809,
382 .mmu_features = 0x2830,
383 .gpu_features_lo = 0,
384 .gpu_features_hi = 0,
385 },
386 {
387 .name = "tDUx",
388 .gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0),
389 .as_present = 0xFF,
390 .thread_max_threads = 0x180,
391 .thread_max_workgroup_size = 0x180,
392 .thread_max_barrier_size = 0x180,
393 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
394 .tiler_features = 0x809,
395 .mmu_features = 0x2830,
396 .gpu_features_lo = 0,
397 .gpu_features_hi = 0,
398 },
399 {
400 .name = "tODx",
401 .gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0),
402 .as_present = 0xFF,
403 .thread_max_threads = 0x180,
404 .thread_max_workgroup_size = 0x180,
405 .thread_max_barrier_size = 0x180,
406 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
407 .tiler_features = 0x809,
408 .mmu_features = 0x2830,
409 .gpu_features_lo = 0,
410 .gpu_features_hi = 0,
411 },
412 {
413 .name = "tGRx",
414 .gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0),
415 .as_present = 0xFF,
416 .thread_max_threads = 0x180,
417 .thread_max_workgroup_size = 0x180,
418 .thread_max_barrier_size = 0x180,
419 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
420 .core_features = 0x0, /* core_1e16fma2tex */
421 .tiler_features = 0x809,
422 .mmu_features = 0x2830,
423 .gpu_features_lo = 0,
424 .gpu_features_hi = 0,
425 },
426 {
427 .name = "tVAx",
428 .gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0),
429 .as_present = 0xFF,
430 .thread_max_threads = 0x180,
431 .thread_max_workgroup_size = 0x180,
432 .thread_max_barrier_size = 0x180,
433 .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
434 .core_features = 0x0, /* core_1e16fma2tex */
435 .tiler_features = 0x809,
436 .mmu_features = 0x2830,
437 .gpu_features_lo = 0,
438 .gpu_features_hi = 0,
439 },
440 {
441 .name = "tTUx",
442 .gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0),
443 .as_present = 0xFF,
444 .thread_max_threads = 0x800,
445 .thread_max_workgroup_size = 0x400,
446 .thread_max_barrier_size = 0x400,
447 .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0),
448 .core_features = 0x0, /* core_1e32fma2tex */
449 .tiler_features = 0x809,
450 .mmu_features = 0x2830,
451 .gpu_features_lo = 0xf,
452 .gpu_features_hi = 0,
453 },
454 };
455
456 struct error_status_t hw_error_status;
457
458 #if MALI_USE_CSF
gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,u32 cnt_idx,bool is_low_word)459 static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
460 u32 cnt_idx, bool is_low_word)
461 {
462 u64 *counters_data;
463 u32 core_count = 0;
464 u32 event_index;
465 u64 value = 0;
466 u32 core;
467
468 if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM))
469 return 0;
470
471 if (WARN_ON(cnt_idx >= KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS))
472 return 0;
473
474 event_index =
475 (ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF;
476
477 /* Currently only primary counter blocks are supported */
478 if (WARN_ON(event_index >= 64))
479 return 0;
480
481 /* The actual events start index 4 onwards. Spec also says PRFCNT_EN,
482 * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for
483 * IPA counters. If selected, the value returned for them will be zero.
484 */
485 if (WARN_ON(event_index <= 3))
486 return 0;
487
488 event_index -= 4;
489
490 switch (core_type) {
491 case KBASE_IPA_CORE_TYPE_CSHW:
492 core_count = 1;
493 counters_data = performance_counters.cshw_counters;
494 break;
495 case KBASE_IPA_CORE_TYPE_MEMSYS:
496 core_count = hweight64(performance_counters.l2_present);
497 counters_data = performance_counters.l2_counters;
498 break;
499 case KBASE_IPA_CORE_TYPE_TILER:
500 core_count = 1;
501 counters_data = performance_counters.tiler_counters;
502 break;
503 case KBASE_IPA_CORE_TYPE_SHADER:
504 core_count = hweight64(performance_counters.shader_present);
505 counters_data = performance_counters.shader_counters;
506 break;
507 default:
508 WARN(1, "Invalid core_type %d\n", core_type);
509 break;
510 }
511
512 for (core = 0; core < core_count; core++) {
513 value += counters_data[event_index];
514 event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
515 }
516
517 if (is_low_word)
518 return (value & U32_MAX);
519 else
520 return (value >> 32);
521 }
522
gpu_model_clear_prfcnt_values(void)523 void gpu_model_clear_prfcnt_values(void)
524 {
525 memset(performance_counters.cshw_counters, 0,
526 sizeof(performance_counters.cshw_counters));
527
528 memset(performance_counters.tiler_counters, 0,
529 sizeof(performance_counters.tiler_counters));
530
531 memset(performance_counters.l2_counters, 0,
532 sizeof(performance_counters.l2_counters));
533
534 memset(performance_counters.shader_counters, 0,
535 sizeof(performance_counters.shader_counters));
536 }
537 KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values);
538 #endif
539
540 /**
541 * gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer
542 *
543 * @values: Array of values to be written out
544 * @out_index: Index into performance counter buffer
545 * @block_count: Number of blocks to dump
546 * @prfcnt_enable_mask: Counter enable mask
547 * @blocks_present: Available blocks bit mask
548 */
gpu_model_dump_prfcnt_blocks(u64 * values,u32 * out_index,u32 block_count,u32 prfcnt_enable_mask,u64 blocks_present)549 static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index,
550 u32 block_count,
551 u32 prfcnt_enable_mask,
552 u64 blocks_present)
553 {
554 u32 block_idx, counter;
555 u32 counter_value = 0;
556 u32 *prfcnt_base;
557 u32 index = 0;
558
559 prfcnt_base = performance_counters.prfcnt_base_cpu;
560
561 for (block_idx = 0; block_idx < block_count; block_idx++) {
562 /* only dump values if core is present */
563 if (!(blocks_present & (1 << block_idx))) {
564 #if MALI_USE_CSF
565 /* if CSF dump zeroed out block */
566 memset(&prfcnt_base[*out_index], 0,
567 KBASE_DUMMY_MODEL_BLOCK_SIZE);
568 *out_index += KBASE_DUMMY_MODEL_VALUES_PER_BLOCK;
569 #endif /* MALI_USE_CSF */
570 continue;
571 }
572
573 /* write the header */
574 prfcnt_base[*out_index] = performance_counters.time++;
575 prfcnt_base[*out_index+2] = prfcnt_enable_mask;
576 *out_index += KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS;
577
578 /* write the counters */
579 for (counter = 0;
580 counter < KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
581 counter++) {
582 /* HW counter values retrieved through
583 * PRFCNT_SAMPLE request are of 32 bits only.
584 */
585 counter_value = (u32)values[index++];
586 if (KBASE_DUMMY_MODEL_COUNTER_ENABLED(
587 prfcnt_enable_mask, (counter +
588 KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))) {
589 prfcnt_base[*out_index + counter] =
590 counter_value;
591 }
592 }
593 *out_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
594 }
595 }
596
597 /**
598 * gpu_model_sync_dummy_prfcnt() - Synchronize dumped performance counter values
599 *
600 * Used to ensure counter values are not lost if cache invalidation is performed
601 * prior to reading.
602 */
gpu_model_sync_dummy_prfcnt(void)603 static void gpu_model_sync_dummy_prfcnt(void)
604 {
605 int i;
606 struct page *pg;
607
608 for (i = 0; i < performance_counters.page_count; i++) {
609 pg = as_page(performance_counters.pages[i]);
610 kbase_sync_single_for_device(performance_counters.kbdev,
611 kbase_dma_addr(pg), PAGE_SIZE,
612 DMA_BIDIRECTIONAL);
613 }
614 }
615
midgard_model_dump_prfcnt(void)616 static void midgard_model_dump_prfcnt(void)
617 {
618 u32 index = 0;
619
620 #if !MALI_USE_CSF
621 gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index,
622 1, 0xffffffff, 0x1);
623 #else
624 gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index,
625 1, 0xffffffff, 0x1);
626 #endif /* !MALI_USE_CSF */
627 gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters,
628 &index, 1,
629 performance_counters.prfcnt_en.tiler,
630 DUMMY_IMPLEMENTATION_TILER_PRESENT);
631 gpu_model_dump_prfcnt_blocks(performance_counters.l2_counters, &index,
632 KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS,
633 performance_counters.prfcnt_en.l2,
634 performance_counters.l2_present);
635 gpu_model_dump_prfcnt_blocks(performance_counters.shader_counters,
636 &index, KBASE_DUMMY_MODEL_MAX_SHADER_CORES,
637 performance_counters.prfcnt_en.shader,
638 performance_counters.shader_present);
639
640 gpu_model_sync_dummy_prfcnt();
641
642 /* simulate a 'long' time between samples */
643 performance_counters.time += 10;
644 }
645
init_register_statuses(struct dummy_model_t * dummy)646 static void init_register_statuses(struct dummy_model_t *dummy)
647 {
648 int i;
649
650 hw_error_status.errors_mask = 0;
651 hw_error_status.gpu_error_irq = 0;
652 hw_error_status.gpu_fault_status = 0;
653 hw_error_status.job_irq_rawstat = 0;
654 hw_error_status.job_irq_status = 0;
655 hw_error_status.mmu_irq_rawstat = 0;
656 hw_error_status.mmu_irq_mask = 0;
657
658 for (i = 0; i < NUM_SLOTS; i++) {
659 hw_error_status.js_status[i] = 0;
660 hw_error_status.job_irq_rawstat |=
661 (dummy->slots[i].job_complete_irq_asserted) << i;
662 hw_error_status.job_irq_status |=
663 (dummy->slots[i].job_complete_irq_asserted) << i;
664 }
665 for (i = 0; i < NUM_MMU_AS; i++) {
666 hw_error_status.as_command[i] = 0;
667 hw_error_status.as_faultstatus[i] = 0;
668 hw_error_status.mmu_irq_mask |= 1 << i;
669 }
670
671 performance_counters.time = 0;
672 }
673
update_register_statuses(struct dummy_model_t * dummy,int job_slot)674 static void update_register_statuses(struct dummy_model_t *dummy, int job_slot)
675 {
676 if (hw_error_status.errors_mask & IS_A_JOB_ERROR) {
677 if (job_slot == hw_error_status.current_job_slot) {
678 #if !MALI_USE_CSF
679 if (hw_error_status.js_status[job_slot] == 0) {
680 /* status reg is clean; it can be written */
681
682 switch (hw_error_status.errors_mask &
683 IS_A_JOB_ERROR) {
684 case KBASE_JOB_INTERRUPTED:
685 hw_error_status.js_status[job_slot] =
686 JS_STATUS_INTERRUPTED;
687 break;
688
689 case KBASE_JOB_STOPPED:
690 hw_error_status.js_status[job_slot] =
691 JS_STATUS_STOPPED;
692 break;
693
694 case KBASE_JOB_TERMINATED:
695 hw_error_status.js_status[job_slot] =
696 JS_STATUS_TERMINATED;
697 break;
698
699 case KBASE_JOB_CONFIG_FAULT:
700 hw_error_status.js_status[job_slot] =
701 JS_STATUS_CONFIG_FAULT;
702 break;
703
704 case KBASE_JOB_POWER_FAULT:
705 hw_error_status.js_status[job_slot] =
706 JS_STATUS_POWER_FAULT;
707 break;
708
709 case KBASE_JOB_READ_FAULT:
710 hw_error_status.js_status[job_slot] =
711 JS_STATUS_READ_FAULT;
712 break;
713
714 case KBASE_JOB_WRITE_FAULT:
715 hw_error_status.js_status[job_slot] =
716 JS_STATUS_WRITE_FAULT;
717 break;
718
719 case KBASE_JOB_AFFINITY_FAULT:
720 hw_error_status.js_status[job_slot] =
721 JS_STATUS_AFFINITY_FAULT;
722 break;
723
724 case KBASE_JOB_BUS_FAULT:
725 hw_error_status.js_status[job_slot] =
726 JS_STATUS_BUS_FAULT;
727 break;
728
729 case KBASE_INSTR_INVALID_PC:
730 hw_error_status.js_status[job_slot] =
731 JS_STATUS_INSTR_INVALID_PC;
732 break;
733
734 case KBASE_INSTR_INVALID_ENC:
735 hw_error_status.js_status[job_slot] =
736 JS_STATUS_INSTR_INVALID_ENC;
737 break;
738
739 case KBASE_INSTR_TYPE_MISMATCH:
740 hw_error_status.js_status[job_slot] =
741 JS_STATUS_INSTR_TYPE_MISMATCH;
742 break;
743
744 case KBASE_INSTR_OPERAND_FAULT:
745 hw_error_status.js_status[job_slot] =
746 JS_STATUS_INSTR_OPERAND_FAULT;
747 break;
748
749 case KBASE_INSTR_TLS_FAULT:
750 hw_error_status.js_status[job_slot] =
751 JS_STATUS_INSTR_TLS_FAULT;
752 break;
753
754 case KBASE_INSTR_BARRIER_FAULT:
755 hw_error_status.js_status[job_slot] =
756 JS_STATUS_INSTR_BARRIER_FAULT;
757 break;
758
759 case KBASE_INSTR_ALIGN_FAULT:
760 hw_error_status.js_status[job_slot] =
761 JS_STATUS_INSTR_ALIGN_FAULT;
762 break;
763
764 case KBASE_DATA_INVALID_FAULT:
765 hw_error_status.js_status[job_slot] =
766 JS_STATUS_DATA_INVALID_FAULT;
767 break;
768
769 case KBASE_TILE_RANGE_FAULT:
770 hw_error_status.js_status[job_slot] =
771 JS_STATUS_TILE_RANGE_FAULT;
772 break;
773
774 case KBASE_ADDR_RANGE_FAULT:
775 hw_error_status.js_status[job_slot] =
776 JS_STATUS_ADDRESS_RANGE_FAULT;
777 break;
778
779 case KBASE_OUT_OF_MEMORY:
780 hw_error_status.js_status[job_slot] =
781 JS_STATUS_OUT_OF_MEMORY;
782 break;
783
784 case KBASE_UNKNOWN:
785 hw_error_status.js_status[job_slot] =
786 JS_STATUS_UNKNOWN;
787 break;
788
789 default:
790 model_error_log(KBASE_CORE,
791 "\nAtom Chain 0x%llx: Invalid Error Mask!",
792 hw_error_status.current_jc);
793 break;
794 }
795 }
796 #endif /* !MALI_USE_CSF */
797
798 /* we set JOB_FAIL_<n> */
799 hw_error_status.job_irq_rawstat |=
800 (dummy->slots[job_slot].job_complete_irq_asserted) <<
801 (job_slot + 16);
802 hw_error_status.job_irq_status |=
803 (((dummy->slots[job_slot].job_complete_irq_asserted) <<
804 (job_slot)) &
805 (dummy->slots[job_slot].job_irq_mask <<
806 job_slot)) << 16;
807 } else {
808 hw_error_status.job_irq_rawstat |=
809 (dummy->slots[job_slot].job_complete_irq_asserted) <<
810 job_slot;
811 hw_error_status.job_irq_status |=
812 ((dummy->slots[job_slot].job_complete_irq_asserted) <<
813 (job_slot)) &
814 (dummy->slots[job_slot].job_irq_mask <<
815 job_slot);
816 }
817 } else {
818 hw_error_status.job_irq_rawstat |=
819 (dummy->slots[job_slot].job_complete_irq_asserted) <<
820 job_slot;
821 hw_error_status.job_irq_status |=
822 ((dummy->slots[job_slot].job_complete_irq_asserted) <<
823 (job_slot)) &
824 (dummy->slots[job_slot].job_irq_mask << job_slot);
825 } /* end of job register statuses */
826
827 if (hw_error_status.errors_mask & IS_A_MMU_ERROR) {
828 int i;
829
830 for (i = 0; i < NUM_MMU_AS; i++) {
831 if (i == hw_error_status.faulty_mmu_as) {
832 if (hw_error_status.as_faultstatus[i] == 0) {
833 u32 status =
834 hw_error_status.as_faultstatus[i];
835 /* status reg is clean; it can be
836 * written
837 */
838 switch (hw_error_status.errors_mask &
839 IS_A_MMU_ERROR) {
840 case KBASE_TRANSLATION_FAULT:
841 /* 0xCm means TRANSLATION FAULT
842 * (m is mmu_table_level)
843 */
844 status =
845 ((1 << 7) | (1 << 6) |
846 hw_error_status.mmu_table_level
847 );
848 break;
849
850 case KBASE_PERMISSION_FAULT:
851 /*0xC8 means PERMISSION FAULT */
852 status = ((1 << 7) | (1 << 6) |
853 (1 << 3));
854 break;
855
856 case KBASE_TRANSTAB_BUS_FAULT:
857 /* 0xDm means TRANSITION TABLE
858 * BUS FAULT (m is
859 * mmu_table_level)
860 */
861 status = ((1 << 7) | (1 << 6) |
862 (1 << 4) |
863 hw_error_status.mmu_table_level
864 );
865 break;
866
867 case KBASE_ACCESS_FLAG:
868 /* 0xD8 means ACCESS FLAG */
869 status = ((1 << 7) | (1 << 6) |
870 (1 << 4) | (1 << 3));
871 break;
872
873 default:
874 model_error_log(KBASE_CORE,
875 "\nAtom Chain 0x%llx: Invalid Error Mask!",
876 hw_error_status.current_jc);
877 break;
878 }
879 hw_error_status.as_faultstatus[i] =
880 status;
881 }
882
883 if (hw_error_status.errors_mask &
884 KBASE_TRANSTAB_BUS_FAULT)
885 hw_error_status.mmu_irq_rawstat |=
886 1 << (16 + i); /* bus error */
887 else
888 hw_error_status.mmu_irq_rawstat |=
889 1 << i; /* page fault */
890 }
891 }
892 } /*end of mmu register statuses */
893 if (hw_error_status.errors_mask & IS_A_GPU_ERROR) {
894 if (hw_error_status.gpu_fault_status) {
895 /* not the first GPU error reported */
896 hw_error_status.gpu_error_irq |= (1 << 7);
897 } else {
898 hw_error_status.gpu_error_irq |= 1;
899 switch (hw_error_status.errors_mask & IS_A_GPU_ERROR) {
900 case KBASE_DELAYED_BUS_FAULT:
901 hw_error_status.gpu_fault_status = (1 << 7);
902 break;
903
904 case KBASE_SHAREABILITY_FAULT:
905 hw_error_status.gpu_fault_status = (1 << 7) |
906 (1 << 3);
907 break;
908
909 default:
910 model_error_log(KBASE_CORE,
911 "\nAtom Chain 0x%llx: Invalid Error Mask!",
912 hw_error_status.current_jc);
913 break;
914 }
915 }
916 }
917 hw_error_status.errors_mask = 0; /*clear error mask */
918 }
919
920 #if !MALI_USE_CSF
update_job_irq_js_state(struct dummy_model_t * dummy,int mask)921 static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask)
922 {
923 int i;
924
925 pr_debug("%s", "Updating the JS_ACTIVE register");
926
927 for (i = 0; i < NUM_SLOTS; i++) {
928 int slot_active = dummy->slots[i].job_active;
929 int next_busy = dummy->slots[i].job_queued;
930
931 if ((mask & (1 << i)) || (mask & (1 << (i + 16)))) {
932 /* clear the bits we're updating */
933 dummy->job_irq_js_state &= ~((1 << (16 + i)) |
934 (1 << i));
935 if (hw_error_status.js_status[i]) {
936 dummy->job_irq_js_state |= next_busy <<
937 (i + 16);
938 if (mask & (1 << (i + 16))) {
939 /* clear job slot status */
940 hw_error_status.js_status[i] = 0;
941 /* continue execution of jobchain */
942 dummy->slots[i].job_active =
943 dummy->slots[i].job_queued;
944 }
945 } else {
946 /* set bits if needed */
947 dummy->job_irq_js_state |= ((slot_active << i) |
948 (next_busy << (i + 16)));
949 }
950 }
951 }
952 pr_debug("The new snapshot is 0x%08X\n", dummy->job_irq_js_state);
953 }
954 #endif /* !MALI_USE_CSF */
955
956 /**
957 * find_control_reg_values() - Look up constant control register values.
958 * @gpu: GPU name
959 *
960 * Look up the GPU name to find the correct set of control register values for
961 * that GPU. If not found, warn and use the first values in the array.
962 *
963 * Return: Pointer to control register values for that GPU.
964 */
find_control_reg_values(const char * gpu)965 static const struct control_reg_values_t *find_control_reg_values(const char *gpu)
966 {
967 size_t i;
968 const struct control_reg_values_t *ret = NULL;
969
970 for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) {
971 const struct control_reg_values_t * const fcrv = &all_control_reg_values[i];
972
973 if (!strcmp(fcrv->name, gpu)) {
974 ret = fcrv;
975 pr_debug("Found control register values for %s\n", gpu);
976 break;
977 }
978 }
979
980 if (!ret) {
981 ret = &all_control_reg_values[0];
982 pr_warn("Couldn't find control register values for GPU %s; using default %s\n",
983 gpu, ret->name);
984 }
985
986 return ret;
987 }
988
midgard_model_create(const void * config)989 void *midgard_model_create(const void *config)
990 {
991 struct dummy_model_t *dummy = NULL;
992
993 dummy = kzalloc(sizeof(*dummy), GFP_KERNEL);
994
995 if (dummy) {
996 dummy->job_irq_js_state = 0;
997 init_register_statuses(dummy);
998 dummy->control_reg_values = find_control_reg_values(no_mali_gpu);
999 }
1000 return dummy;
1001 }
1002
midgard_model_destroy(void * h)1003 void midgard_model_destroy(void *h)
1004 {
1005 kfree((void *)h);
1006 }
1007
midgard_model_get_outputs(void * h)1008 static void midgard_model_get_outputs(void *h)
1009 {
1010 struct dummy_model_t *dummy = (struct dummy_model_t *)h;
1011
1012 if (hw_error_status.job_irq_status)
1013 gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ);
1014
1015 if ((dummy->power_changed && dummy->power_changed_mask) ||
1016 (dummy->reset_completed & dummy->reset_completed_mask) ||
1017 hw_error_status.gpu_error_irq ||
1018 (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) ||
1019 dummy->prfcnt_sample_completed)
1020 gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);
1021
1022 if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask)
1023 gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ);
1024 }
1025
midgard_model_update(void * h)1026 static void midgard_model_update(void *h)
1027 {
1028 struct dummy_model_t *dummy = (struct dummy_model_t *)h;
1029 int i;
1030
1031 for (i = 0; i < NUM_SLOTS; i++) {
1032 if (!dummy->slots[i].job_active)
1033 continue;
1034
1035 if (dummy->slots[i].job_disabled) {
1036 update_register_statuses(dummy, i);
1037 continue;
1038 }
1039
1040 /* If there are any pending interrupts that have not
1041 * been cleared we cannot run the job in the next register
1042 * as we will overwrite the register status of the job in
1043 * the head registers - which has not yet been read
1044 */
1045 if ((hw_error_status.job_irq_rawstat & (1 << (i + 16))) ||
1046 (hw_error_status.job_irq_rawstat & (1 << i))) {
1047 continue;
1048 }
1049
1050 /*this job is done assert IRQ lines */
1051 signal_int(dummy, i);
1052 #ifdef CONFIG_MALI_BIFROST_ERROR_INJECT
1053 midgard_set_error(i);
1054 #endif /* CONFIG_MALI_BIFROST_ERROR_INJECT */
1055 update_register_statuses(dummy, i);
1056 /*if this job slot returned failures we cannot use it */
1057 if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) {
1058 dummy->slots[i].job_active = 0;
1059 continue;
1060 }
1061 /*process next job */
1062 dummy->slots[i].job_active = dummy->slots[i].job_queued;
1063 dummy->slots[i].job_queued = 0;
1064 if (dummy->slots[i].job_active) {
1065 if (hw_error_status.job_irq_rawstat & (1 << (i + 16)))
1066 model_error_log(KBASE_CORE,
1067 "\natom %lld running a job on a dirty slot",
1068 hw_error_status.current_jc);
1069 }
1070 }
1071 }
1072
invalidate_active_jobs(struct dummy_model_t * dummy)1073 static void invalidate_active_jobs(struct dummy_model_t *dummy)
1074 {
1075 int i;
1076
1077 for (i = 0; i < NUM_SLOTS; i++) {
1078 if (dummy->slots[i].job_active) {
1079 hw_error_status.job_irq_rawstat |= (1 << (16 + i));
1080
1081 hw_error_status.js_status[i] = 0x7f; /*UNKNOWN*/
1082 }
1083 }
1084 }
1085
midgard_model_write_reg(void * h,u32 addr,u32 value)1086 u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
1087 {
1088 struct dummy_model_t *dummy = (struct dummy_model_t *)h;
1089 #if !MALI_USE_CSF
1090 if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
1091 (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
1092 int slot_idx = (addr >> 7) & 0xf;
1093
1094 KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS);
1095 if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) {
1096 hw_error_status.current_jc &=
1097 ~((u64) (0xFFFFFFFF));
1098 hw_error_status.current_jc |= (u64) value;
1099 }
1100 if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_HI)) {
1101 hw_error_status.current_jc &= (u64) 0xFFFFFFFF;
1102 hw_error_status.current_jc |=
1103 ((u64) value) << 32;
1104 }
1105 if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) &&
1106 value == 1) {
1107 pr_debug("%s", "start detected");
1108 KBASE_DEBUG_ASSERT(!dummy->slots[slot_idx].job_active ||
1109 !dummy->slots[slot_idx].job_queued);
1110 if ((dummy->slots[slot_idx].job_active) ||
1111 (hw_error_status.job_irq_rawstat &
1112 (1 << (slot_idx + 16)))) {
1113 pr_debug("~~~~~~~~~~~ Start: job slot is already active or there are IRQ pending ~~~~~~~~~"
1114 );
1115 dummy->slots[slot_idx].job_queued = 1;
1116 } else {
1117 dummy->slots[slot_idx].job_active = 1;
1118 }
1119 }
1120
1121 if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value ==
1122 0)
1123 dummy->slots[slot_idx].job_queued = 0;
1124
1125 if ((addr == JOB_SLOT_REG(slot_idx, JS_COMMAND)) &&
1126 (value == JS_COMMAND_SOFT_STOP ||
1127 value == JS_COMMAND_HARD_STOP)) {
1128 /*dummy->slots[slot_idx].job_active = 0; */
1129 hw_error_status.current_job_slot = slot_idx;
1130 if (value == JS_COMMAND_SOFT_STOP) {
1131 hw_error_status.errors_mask = KBASE_JOB_STOPPED;
1132 } else { /*value == 3 */
1133
1134 if (dummy->slots[slot_idx].job_disabled != 0) {
1135 pr_debug("enabling slot after HARD_STOP"
1136 );
1137 dummy->slots[slot_idx].job_disabled = 0;
1138 }
1139 hw_error_status.errors_mask =
1140 KBASE_JOB_TERMINATED;
1141 }
1142 }
1143 } else if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) {
1144 int i;
1145
1146 for (i = 0; i < NUM_SLOTS; i++) {
1147 if (value & ((1 << i) | (1 << (i + 16))))
1148 dummy->slots[i].job_complete_irq_asserted = 0;
1149 /* hw_error_status.js_status[i] is cleared in
1150 * update_job_irq_js_state
1151 */
1152 }
1153 pr_debug("%s", "job irq cleared");
1154 update_job_irq_js_state(dummy, value);
1155 /*remove error condition for JOB */
1156 hw_error_status.job_irq_rawstat &= ~(value);
1157 hw_error_status.job_irq_status &= ~(value);
1158 } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
1159 int i;
1160
1161 for (i = 0; i < NUM_SLOTS; i++)
1162 dummy->slots[i].job_irq_mask = (value >> i) & 0x01;
1163 pr_debug("job irq mask to value %x", value);
1164 } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
1165 #else /* !MALI_USE_CSF */
1166 if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) {
1167 pr_debug("%s", "job irq cleared");
1168
1169 hw_error_status.job_irq_rawstat &= ~(value);
1170 hw_error_status.job_irq_status &= ~(value);
1171 } else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
1172 /* ignore JOB_IRQ_MASK as it is handled by CSFFW */
1173 } else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
1174 #endif /* !MALI_USE_CSF */
1175 pr_debug("GPU_IRQ_MASK set to 0x%x", value);
1176 dummy->reset_completed_mask = (value >> 8) & 0x01;
1177 dummy->power_changed_mask = (value >> 9) & 0x03;
1178 dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u;
1179 } else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) {
1180 dummy->coherency_enable = value;
1181 } else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) {
1182 if (value & (1 << 8)) {
1183 pr_debug("%s", "gpu RESET_COMPLETED irq cleared");
1184 dummy->reset_completed = 0;
1185 }
1186 if (value & (3 << 9))
1187 dummy->power_changed = 0;
1188
1189 if (value & (1 << 17))
1190 dummy->clean_caches_completed = false;
1191 if (value & (1 << 16))
1192 dummy->prfcnt_sample_completed = 0;
1193
1194 /*update error status */
1195 hw_error_status.gpu_error_irq &= ~(value);
1196 } else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) {
1197 switch (value) {
1198 case GPU_COMMAND_SOFT_RESET:
1199 case GPU_COMMAND_HARD_RESET:
1200 pr_debug("gpu reset (%d) requested", value);
1201 /* no more fault status */
1202 hw_error_status.gpu_fault_status = 0;
1203 /* completed reset instantly */
1204 dummy->reset_completed = 1;
1205 break;
1206 #if MALI_USE_CSF
1207 case GPU_COMMAND_CACHE_CLN_INV_L2:
1208 case GPU_COMMAND_CACHE_CLN_INV_L2_LSC:
1209 case GPU_COMMAND_CACHE_CLN_INV_FULL:
1210 #else
1211 case GPU_COMMAND_CLEAN_CACHES:
1212 case GPU_COMMAND_CLEAN_INV_CACHES:
1213 #endif
1214 pr_debug("clean caches requested");
1215 dummy->clean_caches_completed = true;
1216 break;
1217 case GPU_COMMAND_PRFCNT_SAMPLE:
1218 midgard_model_dump_prfcnt();
1219 dummy->prfcnt_sample_completed = 1;
1220 default:
1221 break;
1222 }
1223 } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
1224 dummy->l2_config = value;
1225 }
1226 #if MALI_USE_CSF
1227 else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) &&
1228 addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET +
1229 (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) {
1230 if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET))
1231 hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF;
1232 } else if (addr == IPA_CONTROL_REG(COMMAND)) {
1233 pr_debug("Received IPA_CONTROL command");
1234 } else if (addr == IPA_CONTROL_REG(TIMER)) {
1235 ipa_control_timer_enabled = value ? true : false;
1236 } else if ((addr >= IPA_CONTROL_REG(SELECT_CSHW_LO)) &&
1237 (addr <= IPA_CONTROL_REG(SELECT_SHADER_HI))) {
1238 enum kbase_ipa_core_type core_type = (enum kbase_ipa_core_type)(
1239 (addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) >> 3);
1240 bool is_low_word =
1241 !((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) & 7);
1242
1243 if (is_low_word) {
1244 ipa_ctl_select_config[core_type] &= ~(u64)U32_MAX;
1245 ipa_ctl_select_config[core_type] |= value;
1246 } else {
1247 ipa_ctl_select_config[core_type] &= U32_MAX;
1248 ipa_ctl_select_config[core_type] |= ((u64)value << 32);
1249 }
1250 }
1251 #endif
1252 else if (addr == MMU_REG(MMU_IRQ_MASK)) {
1253 hw_error_status.mmu_irq_mask = value;
1254 } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) {
1255 hw_error_status.mmu_irq_rawstat &= (~value);
1256 } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) &&
1257 (addr <= MMU_AS_REG(15, AS_STATUS))) {
1258 int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
1259 >> 6;
1260
1261 switch (addr & 0x3F) {
1262 case AS_COMMAND:
1263 switch (value) {
1264 case AS_COMMAND_NOP:
1265 hw_error_status.as_command[mem_addr_space] =
1266 value;
1267 break;
1268
1269 case AS_COMMAND_UPDATE:
1270 hw_error_status.as_command[mem_addr_space] =
1271 value;
1272 if ((hw_error_status.as_faultstatus[
1273 mem_addr_space])
1274 && ((hw_error_status.as_transtab[
1275 mem_addr_space] & 0x3) != 0)) {
1276 model_error_log(KBASE_CORE,
1277 "\n ERROR: AS_COMMAND issued UPDATE on error condition before AS_TRANSTAB been set to unmapped\n"
1278 );
1279 } else if ((hw_error_status.as_faultstatus[
1280 mem_addr_space])
1281 && ((hw_error_status.as_transtab[
1282 mem_addr_space] & 0x3) == 0)) {
1283
1284 /*invalidate all active jobs */
1285 invalidate_active_jobs(dummy);
1286 /* error handled */
1287 hw_error_status.as_faultstatus[
1288 mem_addr_space] = 0;
1289 }
1290 break;
1291
1292 case AS_COMMAND_LOCK:
1293 case AS_COMMAND_UNLOCK:
1294 hw_error_status.as_command[mem_addr_space] =
1295 value;
1296 break;
1297
1298 case AS_COMMAND_FLUSH_PT:
1299 case AS_COMMAND_FLUSH_MEM:
1300 if (hw_error_status.as_command[mem_addr_space]
1301 != AS_COMMAND_LOCK)
1302 model_error_log(KBASE_CORE,
1303 "\n ERROR: AS_COMMAND issued FLUSH without LOCKING before\n"
1304 );
1305 else /* error handled if any */
1306 hw_error_status.as_faultstatus[
1307 mem_addr_space] = 0;
1308 hw_error_status.as_command[mem_addr_space] =
1309 value;
1310 break;
1311
1312 default:
1313 model_error_log(KBASE_CORE,
1314 "\n WARNING: UNRECOGNIZED AS_COMMAND 0x%x\n",
1315 value);
1316 break;
1317 }
1318 break;
1319
1320 case AS_TRANSTAB_LO:
1321 hw_error_status.as_transtab[mem_addr_space] &=
1322 ~((u64) (0xffffffff));
1323 hw_error_status.as_transtab[mem_addr_space] |=
1324 (u64) value;
1325 break;
1326
1327 case AS_TRANSTAB_HI:
1328 hw_error_status.as_transtab[mem_addr_space] &=
1329 (u64) 0xffffffff;
1330 hw_error_status.as_transtab[mem_addr_space] |=
1331 ((u64) value) << 32;
1332 break;
1333
1334 case AS_LOCKADDR_LO:
1335 case AS_LOCKADDR_HI:
1336 case AS_MEMATTR_LO:
1337 case AS_MEMATTR_HI:
1338 case AS_TRANSCFG_LO:
1339 case AS_TRANSCFG_HI:
1340 /* Writes ignored */
1341 break;
1342
1343 default:
1344 model_error_log(KBASE_CORE,
1345 "Dummy model register access: Writing unsupported MMU #%d register 0x%x value 0x%x\n",
1346 mem_addr_space, addr, value);
1347 break;
1348 }
1349 } else if (addr >= GPU_CONTROL_REG(PRFCNT_BASE_LO) &&
1350 addr <= GPU_CONTROL_REG(PRFCNT_MMU_L2_EN)) {
1351 switch (addr) {
1352 case PRFCNT_BASE_LO:
1353 performance_counters.prfcnt_base |= value;
1354 break;
1355 case PRFCNT_BASE_HI:
1356 performance_counters.prfcnt_base |= ((u64) value) << 32;
1357 break;
1358 #if !MALI_USE_CSF
1359 case PRFCNT_JM_EN:
1360 performance_counters.prfcnt_en.jm = value;
1361 break;
1362 #endif /* !MALI_USE_CSF */
1363 case PRFCNT_SHADER_EN:
1364 performance_counters.prfcnt_en.shader = value;
1365 break;
1366 case PRFCNT_TILER_EN:
1367 performance_counters.prfcnt_en.tiler = value;
1368 break;
1369 case PRFCNT_MMU_L2_EN:
1370 performance_counters.prfcnt_en.l2 = value;
1371 break;
1372 }
1373 } else {
1374 switch (addr) {
1375 case TILER_PWRON_LO:
1376 dummy->power_on |= (value & 1) << 1;
1377 /* Also ensure L2 is powered on */
1378 dummy->power_on |= value & 1;
1379 dummy->power_changed = 1;
1380 break;
1381 case SHADER_PWRON_LO:
1382 dummy->power_on |= (value & 0xF) << 2;
1383 dummy->power_changed = 1;
1384 break;
1385 case L2_PWRON_LO:
1386 dummy->power_on |= value & 1;
1387 dummy->power_changed = 1;
1388 break;
1389 case STACK_PWRON_LO:
1390 dummy->stack_power_on_lo |= value;
1391 dummy->power_changed = 1;
1392 break;
1393 case TILER_PWROFF_LO:
1394 dummy->power_on &= ~((value & 1) << 1);
1395 dummy->power_changed = 1;
1396 break;
1397 case SHADER_PWROFF_LO:
1398 dummy->power_on &= ~((value & 0xF) << 2);
1399 dummy->power_changed = 1;
1400 break;
1401 case L2_PWROFF_LO:
1402 dummy->power_on &= ~(value & 1);
1403 /* Also ensure tiler is powered off */
1404 dummy->power_on &= ~((value & 1) << 1);
1405 dummy->power_changed = 1;
1406 break;
1407 case STACK_PWROFF_LO:
1408 dummy->stack_power_on_lo &= ~value;
1409 dummy->power_changed = 1;
1410 break;
1411
1412 case TILER_PWROFF_HI:
1413 case SHADER_PWROFF_HI:
1414 case L2_PWROFF_HI:
1415 case PWR_KEY:
1416 case PWR_OVERRIDE0:
1417 #if !MALI_USE_CSF
1418 case JM_CONFIG:
1419 #else /* !MALI_USE_CSF */
1420 case CSF_CONFIG:
1421 #endif /* !MALI_USE_CSF */
1422 case SHADER_CONFIG:
1423 case TILER_CONFIG:
1424 case L2_MMU_CONFIG:
1425 /* Writes ignored */
1426 break;
1427 default:
1428 model_error_log(KBASE_CORE,
1429 "Dummy model register access: Writing unsupported register 0x%x value 0x%x\n",
1430 addr, value);
1431 break;
1432 }
1433 }
1434
1435 midgard_model_update(dummy);
1436 midgard_model_get_outputs(dummy);
1437
1438 return 1;
1439 }
1440
1441 u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
1442 {
1443 struct dummy_model_t *dummy = (struct dummy_model_t *)h;
1444 *value = 0; /* 0 by default */
1445 #if !MALI_USE_CSF
1446 if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) {
1447 pr_debug("%s", "JS_ACTIVE being read");
1448
1449 *value = dummy->job_irq_js_state;
1450 } else if (addr == GPU_CONTROL_REG(GPU_ID)) {
1451 #else /* !MALI_USE_CSF */
1452 if (addr == GPU_CONTROL_REG(GPU_ID)) {
1453 #endif /* !MALI_USE_CSF */
1454
1455 *value = dummy->control_reg_values->gpu_id;
1456 } else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) {
1457 *value = hw_error_status.job_irq_rawstat;
1458 pr_debug("%s", "JS_IRQ_RAWSTAT being read");
1459 } else if (addr == JOB_CONTROL_REG(JOB_IRQ_STATUS)) {
1460 *value = hw_error_status.job_irq_status;
1461 pr_debug("JS_IRQ_STATUS being read %x", *value);
1462 }
1463 #if !MALI_USE_CSF
1464 else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
1465 int i;
1466
1467 *value = 0;
1468 for (i = 0; i < NUM_SLOTS; i++)
1469 *value |= dummy->slots[i].job_irq_mask << i;
1470 pr_debug("JS_IRQ_MASK being read %x", *value);
1471 }
1472 #else /* !MALI_USE_CSF */
1473 else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
1474 /* ignore JOB_IRQ_MASK as it is handled by CSFFW */
1475 }
1476 #endif /* !MALI_USE_CSF */
1477 else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
1478 *value = (dummy->reset_completed_mask << 8) |
1479 (dummy->power_changed_mask << 9) | (1 << 7) | 1;
1480 pr_debug("GPU_IRQ_MASK read %x", *value);
1481 } else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) {
1482 *value = (dummy->power_changed << 9) | (dummy->power_changed << 10) |
1483 (dummy->reset_completed << 8) |
1484 ((dummy->clean_caches_completed ? 1u : 0u) << 17) |
1485 (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq;
1486 pr_debug("GPU_IRQ_RAWSTAT read %x", *value);
1487 } else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) {
1488 *value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) |
1489 ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) |
1490 ((dummy->reset_completed & dummy->reset_completed_mask) << 8) |
1491 (((dummy->clean_caches_completed &&
1492 dummy->clean_caches_completed_irq_enabled) ?
1493 1u :
1494 0u)
1495 << 17) |
1496 (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq;
1497 pr_debug("GPU_IRQ_STAT read %x", *value);
1498 } else if (addr == GPU_CONTROL_REG(GPU_STATUS)) {
1499 *value = 0;
1500 #if !MALI_USE_CSF
1501 } else if (addr == GPU_CONTROL_REG(LATEST_FLUSH)) {
1502 *value = 0;
1503 #endif
1504 } else if (addr == GPU_CONTROL_REG(GPU_FAULTSTATUS)) {
1505 *value = hw_error_status.gpu_fault_status;
1506 } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
1507 *value = dummy->l2_config;
1508 } else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) &&
1509 (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) {
1510 switch (addr) {
1511 case GPU_CONTROL_REG(SHADER_PRESENT_LO):
1512 case GPU_CONTROL_REG(SHADER_PRESENT_HI):
1513 case GPU_CONTROL_REG(TILER_PRESENT_LO):
1514 case GPU_CONTROL_REG(TILER_PRESENT_HI):
1515 case GPU_CONTROL_REG(L2_PRESENT_LO):
1516 case GPU_CONTROL_REG(L2_PRESENT_HI):
1517 case GPU_CONTROL_REG(STACK_PRESENT_LO):
1518 case GPU_CONTROL_REG(STACK_PRESENT_HI):
1519 *value = get_implementation_register(addr);
1520 break;
1521 case GPU_CONTROL_REG(SHADER_READY_LO):
1522 *value = (dummy->power_on >> 0x02) &
1523 get_implementation_register(
1524 GPU_CONTROL_REG(SHADER_PRESENT_LO));
1525 break;
1526 case GPU_CONTROL_REG(TILER_READY_LO):
1527 *value = (dummy->power_on >> 0x01) &
1528 get_implementation_register(
1529 GPU_CONTROL_REG(TILER_PRESENT_LO));
1530 break;
1531 case GPU_CONTROL_REG(L2_READY_LO):
1532 *value = dummy->power_on &
1533 get_implementation_register(
1534 GPU_CONTROL_REG(L2_PRESENT_LO));
1535 break;
1536 case GPU_CONTROL_REG(STACK_READY_LO):
1537 *value = dummy->stack_power_on_lo &
1538 get_implementation_register(
1539 GPU_CONTROL_REG(STACK_PRESENT_LO));
1540 break;
1541
1542 case GPU_CONTROL_REG(SHADER_READY_HI):
1543 case GPU_CONTROL_REG(TILER_READY_HI):
1544 case GPU_CONTROL_REG(L2_READY_HI):
1545 case GPU_CONTROL_REG(STACK_READY_HI):
1546 *value = 0;
1547 break;
1548
1549 case GPU_CONTROL_REG(SHADER_PWRTRANS_LO):
1550 case GPU_CONTROL_REG(SHADER_PWRTRANS_HI):
1551 case GPU_CONTROL_REG(TILER_PWRTRANS_LO):
1552 case GPU_CONTROL_REG(TILER_PWRTRANS_HI):
1553 case GPU_CONTROL_REG(L2_PWRTRANS_LO):
1554 case GPU_CONTROL_REG(L2_PWRTRANS_HI):
1555 case GPU_CONTROL_REG(STACK_PWRTRANS_LO):
1556 case GPU_CONTROL_REG(STACK_PWRTRANS_HI):
1557 *value = 0;
1558 break;
1559
1560 case GPU_CONTROL_REG(SHADER_PWRACTIVE_LO):
1561 case GPU_CONTROL_REG(SHADER_PWRACTIVE_HI):
1562 case GPU_CONTROL_REG(TILER_PWRACTIVE_LO):
1563 case GPU_CONTROL_REG(TILER_PWRACTIVE_HI):
1564 case GPU_CONTROL_REG(L2_PWRACTIVE_LO):
1565 case GPU_CONTROL_REG(L2_PWRACTIVE_HI):
1566 *value = 0;
1567 break;
1568
1569 #if !MALI_USE_CSF
1570 case GPU_CONTROL_REG(JM_CONFIG):
1571 #else /* !MALI_USE_CSF */
1572 case GPU_CONTROL_REG(CSF_CONFIG):
1573 #endif /* !MALI_USE_CSF */
1574
1575 case GPU_CONTROL_REG(SHADER_CONFIG):
1576 case GPU_CONTROL_REG(TILER_CONFIG):
1577 case GPU_CONTROL_REG(L2_MMU_CONFIG):
1578 *value = 0;
1579 break;
1580
1581 case GPU_CONTROL_REG(COHERENCY_FEATURES):
1582 *value = BIT(0) | BIT(1); /* ace_lite and ace, respectively. */
1583 break;
1584 case GPU_CONTROL_REG(COHERENCY_ENABLE):
1585 *value = dummy->coherency_enable;
1586 break;
1587
1588 case GPU_CONTROL_REG(THREAD_TLS_ALLOC):
1589 *value = 0;
1590 break;
1591
1592 default:
1593 model_error_log(KBASE_CORE,
1594 "Dummy model register access: Reading unknown control reg 0x%x\n",
1595 addr);
1596 break;
1597 }
1598 #if !MALI_USE_CSF
1599 } else if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
1600 (addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
1601 int slot_idx = (addr >> 7) & 0xf;
1602 int sub_reg = addr & 0x7F;
1603
1604 KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS);
1605 switch (sub_reg) {
1606 case JS_HEAD_NEXT_LO:
1607 *value = (u32) ((hw_error_status.current_jc) &
1608 0xFFFFFFFF);
1609 break;
1610 case JS_HEAD_NEXT_HI:
1611 *value = (u32) (hw_error_status.current_jc >> 32);
1612 break;
1613 case JS_STATUS:
1614 if (hw_error_status.js_status[slot_idx])
1615 *value = hw_error_status.js_status[slot_idx];
1616 else /* 0x08 means active, 0x00 idle */
1617 *value = (dummy->slots[slot_idx].job_active)
1618 << 3;
1619 break;
1620 case JS_COMMAND_NEXT:
1621 *value = dummy->slots[slot_idx].job_queued;
1622 break;
1623
1624 /* The dummy model does not implement these registers
1625 * avoid printing error messages
1626 */
1627 case JS_HEAD_HI:
1628 case JS_HEAD_LO:
1629 case JS_TAIL_HI:
1630 case JS_TAIL_LO:
1631 case JS_FLUSH_ID_NEXT:
1632 break;
1633
1634 default:
1635 model_error_log(KBASE_CORE,
1636 "Dummy model register access: unknown job slot reg 0x%02X being read\n",
1637 sub_reg);
1638 break;
1639 }
1640 #endif /* !MALI_USE_CSF */
1641 } else if (addr == GPU_CONTROL_REG(AS_PRESENT)) {
1642 *value = dummy->control_reg_values->as_present;
1643 #if !MALI_USE_CSF
1644 } else if (addr == GPU_CONTROL_REG(JS_PRESENT)) {
1645 *value = 0x7;
1646 #endif /* !MALI_USE_CSF */
1647 } else if (addr >= GPU_CONTROL_REG(TEXTURE_FEATURES_0) &&
1648 addr <= GPU_CONTROL_REG(TEXTURE_FEATURES_3)) {
1649 switch (addr) {
1650 case GPU_CONTROL_REG(TEXTURE_FEATURES_0):
1651 *value = 0xfffff;
1652 break;
1653
1654 case GPU_CONTROL_REG(TEXTURE_FEATURES_1):
1655 *value = 0xffff;
1656 break;
1657
1658 case GPU_CONTROL_REG(TEXTURE_FEATURES_2):
1659 *value = 0x9f81ffff;
1660 break;
1661
1662 case GPU_CONTROL_REG(TEXTURE_FEATURES_3):
1663 *value = 0;
1664 break;
1665 }
1666 #if !MALI_USE_CSF
1667 } else if (addr >= GPU_CONTROL_REG(JS0_FEATURES) &&
1668 addr <= GPU_CONTROL_REG(JS15_FEATURES)) {
1669 switch (addr) {
1670 case GPU_CONTROL_REG(JS0_FEATURES):
1671 *value = 0x20e;
1672 break;
1673
1674 case GPU_CONTROL_REG(JS1_FEATURES):
1675 *value = 0x1fe;
1676 break;
1677
1678 case GPU_CONTROL_REG(JS2_FEATURES):
1679 *value = 0x7e;
1680 break;
1681
1682 default:
1683 *value = 0;
1684 break;
1685 }
1686 #endif /* !MALI_USE_CSF */
1687 } else if (addr >= GPU_CONTROL_REG(L2_FEATURES)
1688 && addr <= GPU_CONTROL_REG(MMU_FEATURES)) {
1689 switch (addr) {
1690 case GPU_CONTROL_REG(L2_FEATURES):
1691 *value = 0x6100206;
1692 break;
1693
1694 case GPU_CONTROL_REG(CORE_FEATURES):
1695 *value = dummy->control_reg_values->core_features;
1696 break;
1697
1698 case GPU_CONTROL_REG(TILER_FEATURES):
1699 *value = dummy->control_reg_values->tiler_features;
1700 break;
1701
1702 case GPU_CONTROL_REG(MEM_FEATURES):
1703 /* Bit 0: Core group is coherent */
1704 *value = 0x01;
1705 /* Bits 11:8: L2 slice count - 1 */
1706 *value |= (hweight64(DUMMY_IMPLEMENTATION_L2_PRESENT) - 1) << 8;
1707 break;
1708
1709 case GPU_CONTROL_REG(MMU_FEATURES):
1710 *value = dummy->control_reg_values->mmu_features;
1711 break;
1712 }
1713 } else if (addr >= GPU_CONTROL_REG(THREAD_MAX_THREADS)
1714 && addr <= GPU_CONTROL_REG(THREAD_FEATURES)) {
1715 switch (addr) {
1716 case GPU_CONTROL_REG(THREAD_FEATURES):
1717 *value = dummy->control_reg_values->thread_features
1718 | (IMPLEMENTATION_MODEL << 30);
1719 break;
1720 case GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE):
1721 *value = dummy->control_reg_values->thread_max_barrier_size;
1722 break;
1723 case GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE):
1724 *value = dummy->control_reg_values->thread_max_workgroup_size;
1725 break;
1726 case GPU_CONTROL_REG(THREAD_MAX_THREADS):
1727 *value = dummy->control_reg_values->thread_max_threads;
1728 break;
1729 }
1730 } else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO)
1731 && addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) {
1732 *value = 0;
1733 } else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)
1734 && addr <= MMU_AS_REG(15, AS_STATUS)) {
1735 int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
1736 >> 6;
1737
1738 switch (addr & 0x3F) {
1739 case AS_TRANSTAB_LO:
1740 *value = (u32)
1741 (hw_error_status.as_transtab[mem_addr_space] &
1742 0xffffffff);
1743 break;
1744
1745 case AS_TRANSTAB_HI:
1746 *value = (u32)
1747 (hw_error_status.as_transtab[mem_addr_space] >>
1748 32);
1749 break;
1750
1751 case AS_STATUS:
1752 *value = 0;
1753 break;
1754
1755 case AS_FAULTSTATUS:
1756 if (mem_addr_space == hw_error_status.faulty_mmu_as)
1757 *value = hw_error_status.as_faultstatus[
1758 hw_error_status.faulty_mmu_as];
1759 else
1760 *value = 0;
1761 break;
1762
1763 case AS_LOCKADDR_LO:
1764 case AS_LOCKADDR_HI:
1765 case AS_MEMATTR_LO:
1766 case AS_MEMATTR_HI:
1767 case AS_TRANSCFG_LO:
1768 case AS_TRANSCFG_HI:
1769 /* Read ignored */
1770 *value = 0;
1771 break;
1772
1773 default:
1774 model_error_log(KBASE_CORE,
1775 "Dummy model register access: Reading unsupported MMU #%d register 0x%x. Returning 0\n",
1776 mem_addr_space, addr);
1777 *value = 0;
1778 break;
1779 }
1780 } else if (addr == MMU_REG(MMU_IRQ_MASK)) {
1781 *value = hw_error_status.mmu_irq_mask;
1782 } else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) {
1783 *value = hw_error_status.mmu_irq_rawstat;
1784 } else if (addr == MMU_REG(MMU_IRQ_STATUS)) {
1785 *value = hw_error_status.mmu_irq_mask &
1786 hw_error_status.mmu_irq_rawstat;
1787 }
1788 #if MALI_USE_CSF
1789 else if (addr == IPA_CONTROL_REG(STATUS)) {
1790 *value = (ipa_control_timer_enabled << 31);
1791 } else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) &&
1792 (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(
1793 IPA_CTL_MAX_VAL_CNT_IDX)))) {
1794 u32 counter_index =
1795 (addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3;
1796 bool is_low_word =
1797 !((addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) & 7);
1798
1799 *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW,
1800 counter_index, is_low_word);
1801 } else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) &&
1802 (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(
1803 IPA_CTL_MAX_VAL_CNT_IDX)))) {
1804 u32 counter_index =
1805 (addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3;
1806 bool is_low_word =
1807 !((addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) & 7);
1808
1809 *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS,
1810 counter_index, is_low_word);
1811 } else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) &&
1812 (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(
1813 IPA_CTL_MAX_VAL_CNT_IDX)))) {
1814 u32 counter_index =
1815 (addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3;
1816 bool is_low_word =
1817 !((addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) & 7);
1818
1819 *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER,
1820 counter_index, is_low_word);
1821 } else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) &&
1822 (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(
1823 IPA_CTL_MAX_VAL_CNT_IDX)))) {
1824 u32 counter_index =
1825 (addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3;
1826 bool is_low_word =
1827 !((addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) & 7);
1828
1829 *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER,
1830 counter_index, is_low_word);
1831 }
1832 #endif
1833 else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) {
1834 *value = dummy->control_reg_values->gpu_features_lo;
1835 } else if (addr == GPU_CONTROL_REG(GPU_FEATURES_HI)) {
1836 *value = dummy->control_reg_values->gpu_features_hi;
1837 } else {
1838 model_error_log(KBASE_CORE,
1839 "Dummy model register access: Reading unsupported register 0x%x. Returning 0\n",
1840 addr);
1841 *value = 0;
1842 }
1843
1844 CSTD_UNUSED(dummy);
1845
1846 return 1;
1847 }
1848
1849 static u32 set_user_sample_core_type(u64 *counters,
1850 u32 *usr_data_start, u32 usr_data_offset,
1851 u32 usr_data_size, u32 core_count)
1852 {
1853 u32 sample_size;
1854 u32 *usr_data = NULL;
1855
1856 sample_size =
1857 core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32);
1858
1859 if ((usr_data_size >= usr_data_offset) &&
1860 (sample_size <= usr_data_size - usr_data_offset))
1861 usr_data = usr_data_start + (usr_data_offset / sizeof(u32));
1862
1863 if (!usr_data)
1864 model_error_log(KBASE_CORE, "Unable to set counter sample 1");
1865 else {
1866 u32 loop_cnt = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
1867 u32 i;
1868
1869 for (i = 0; i < loop_cnt; i++) {
1870 if (copy_from_user(&counters[i], &usr_data[i],
1871 sizeof(u32))) {
1872 model_error_log(KBASE_CORE, "Unable to set counter sample 2");
1873 break;
1874 }
1875 }
1876 }
1877
1878 return usr_data_offset + sample_size;
1879 }
1880
1881 static u32 set_kernel_sample_core_type(u64 *counters,
1882 u64 *usr_data_start, u32 usr_data_offset,
1883 u32 usr_data_size, u32 core_count)
1884 {
1885 u32 sample_size;
1886 u64 *usr_data = NULL;
1887
1888 sample_size =
1889 core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64);
1890
1891 if ((usr_data_size >= usr_data_offset) &&
1892 (sample_size <= usr_data_size - usr_data_offset))
1893 usr_data = usr_data_start + (usr_data_offset / sizeof(u64));
1894
1895 if (!usr_data)
1896 model_error_log(KBASE_CORE, "Unable to set kernel counter sample 1");
1897 else
1898 memcpy(counters, usr_data, sample_size);
1899
1900 return usr_data_offset + sample_size;
1901 }
1902
1903 /* Counter values injected through ioctl are of 32 bits */
1904 void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size)
1905 {
1906 u32 offset = 0;
1907
1908 #if !MALI_USE_CSF
1909 offset = set_user_sample_core_type(performance_counters.jm_counters,
1910 usr_data, offset, usr_data_size, 1);
1911 #else
1912 offset = set_user_sample_core_type(performance_counters.cshw_counters,
1913 usr_data, offset, usr_data_size, 1);
1914 #endif /* !MALI_USE_CSF */
1915 offset = set_user_sample_core_type(performance_counters.tiler_counters,
1916 usr_data, offset, usr_data_size,
1917 hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
1918 offset = set_user_sample_core_type(performance_counters.l2_counters,
1919 usr_data, offset, usr_data_size,
1920 KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS);
1921 offset = set_user_sample_core_type(performance_counters.shader_counters,
1922 usr_data, offset, usr_data_size,
1923 KBASE_DUMMY_MODEL_MAX_SHADER_CORES);
1924 }
1925
1926 /* Counter values injected through kutf are of 64 bits */
1927 void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size)
1928 {
1929 u32 offset = 0;
1930
1931 #if !MALI_USE_CSF
1932 offset = set_kernel_sample_core_type(performance_counters.jm_counters,
1933 usr_data, offset, usr_data_size, 1);
1934 #else
1935 offset = set_kernel_sample_core_type(performance_counters.cshw_counters,
1936 usr_data, offset, usr_data_size, 1);
1937 #endif /* !MALI_USE_CSF */
1938 offset = set_kernel_sample_core_type(performance_counters.tiler_counters,
1939 usr_data, offset, usr_data_size,
1940 hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
1941 offset = set_kernel_sample_core_type(performance_counters.l2_counters,
1942 usr_data, offset, usr_data_size,
1943 hweight64(performance_counters.l2_present));
1944 offset = set_kernel_sample_core_type(performance_counters.shader_counters,
1945 usr_data, offset, usr_data_size,
1946 hweight64(performance_counters.shader_present));
1947 }
1948 KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample);
1949
1950 void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev,
1951 u64 *l2_present, u64 *shader_present)
1952 {
1953 if (shader_present)
1954 *shader_present = performance_counters.shader_present;
1955 if (l2_present)
1956 *l2_present = performance_counters.l2_present;
1957 }
1958 KBASE_EXPORT_TEST_API(gpu_model_get_dummy_prfcnt_cores);
1959
1960 void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev,
1961 u64 l2_present, u64 shader_present)
1962 {
1963 if (WARN_ON(!l2_present || !shader_present
1964 || hweight64(l2_present) > KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS
1965 || hweight64(shader_present) > KBASE_DUMMY_MODEL_MAX_SHADER_CORES))
1966 return;
1967
1968 performance_counters.l2_present = l2_present;
1969 performance_counters.shader_present = shader_present;
1970
1971 /* Update the GPU properties used by vinstr to calculate the counter
1972 * dump buffer size.
1973 */
1974 kbdev->gpu_props.props.l2_props.num_l2_slices = hweight64(l2_present);
1975 kbdev->gpu_props.props.coherency_info.group[0].core_mask = shader_present;
1976 kbdev->gpu_props.curr_config.l2_slices = hweight64(l2_present);
1977 kbdev->gpu_props.curr_config.shader_present = shader_present;
1978 }
1979 KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores);
1980
1981 void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev,
1982 struct tagged_addr *pages,
1983 size_t page_count)
1984 {
1985 performance_counters.prfcnt_base_cpu = base;
1986 performance_counters.kbdev = kbdev;
1987 performance_counters.pages = pages;
1988 performance_counters.page_count = page_count;
1989 }
1990
1991 int gpu_model_control(void *model,
1992 struct kbase_model_control_params *params)
1993 {
1994 struct dummy_model_t *dummy = (struct dummy_model_t *)model;
1995 int i;
1996
1997 if (params->command == KBASE_MC_DISABLE_JOBS) {
1998 for (i = 0; i < NUM_SLOTS; i++)
1999 dummy->slots[i].job_disabled = params->value;
2000 } else {
2001 return -EINVAL;
2002 }
2003
2004 midgard_model_update(dummy);
2005 midgard_model_get_outputs(dummy);
2006
2007 return 0;
2008 }
2009