• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 /* NOTES:
23  * - A default GPU can be compiled in during the build, by defining
24  *   CONFIG_MALI_NO_MALI_DEFAULT_GPU. SCons sets this, which means that
25  *   insmod'ing mali_kbase.ko with no arguments after a build with "scons
26  *   gpu=tXYZ" will yield the expected GPU ID for tXYZ. This can always be
27  *   overridden by passing the 'no_mali_gpu' argument to insmod.
28  *
29  * - if CONFIG_MALI_BIFROST_ERROR_INJECT is defined the error injection system is
30  *   activated.
31  */
32 
33 /* Implementation of failure injection system:
34  *
35  * Error conditions are generated by gpu_generate_error().
36  * According to CONFIG_MALI_BIFROST_ERROR_INJECT definition gpu_generate_error() either
37  * generates an error HW condition randomly (CONFIG_MALI_ERROR_INJECT_RANDOM) or
38  * checks if there is (in error_track_list) an error configuration to be set for
39  * the current job chain (CONFIG_MALI_ERROR_INJECT_RANDOM not defined).
40  * Each error condition will trigger a specific "state" for a certain set of
41  * registers as per Midgard Architecture Specifications doc.
42  *
43  * According to Midgard Architecture Specifications doc the following registers
44  * are always affected by error conditions:
45  *
46  * JOB Exception:
47  *				JOB_IRQ_RAWSTAT
48  *				JOB<n> STATUS AREA
49  *
50  * MMU Exception:
51  *				MMU_IRQ_RAWSTAT
52  *				AS<n>_FAULTSTATUS
53  *				AS<n>_FAULTADDRESS
54  *
55  * GPU Exception:
56  *				GPU_IRQ_RAWSTAT
57  *				GPU_FAULTSTATUS
58  *				GPU_FAULTADDRESS
59  *
60  *	For further clarification on the model behaviour upon specific error
61  *      conditions the user may refer to the Midgard Architecture Specification
62  *      document
63  */
64 #include <mali_kbase.h>
65 #include <gpu/mali_kbase_gpu_regmap.h>
66 #include <backend/gpu/mali_kbase_model_dummy.h>
67 #include <mali_kbase_mem_linux.h>
68 
69 #if MALI_USE_CSF
70 #include <csf/mali_kbase_csf_firmware.h>
71 
72 /* Index of the last value register for each type of core, with the 1st value
73  * register being at index 0.
74  */
75 #define IPA_CTL_MAX_VAL_CNT_IDX (KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS - 1)
76 
77 /* Array for storing the value of SELECT register for each type of core */
78 static u64 ipa_ctl_select_config[KBASE_IPA_CORE_TYPE_NUM];
79 static bool ipa_control_timer_enabled;
80 #endif
81 
82 #define LO_MASK(M) ((M) & 0xFFFFFFFF)
83 
get_implementation_register(u32 reg)84 static u32 get_implementation_register(u32 reg)
85 {
86 	switch (reg) {
87 	case GPU_CONTROL_REG(SHADER_PRESENT_LO):
88 		return LO_MASK(DUMMY_IMPLEMENTATION_SHADER_PRESENT);
89 	case GPU_CONTROL_REG(TILER_PRESENT_LO):
90 		return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT);
91 	case GPU_CONTROL_REG(L2_PRESENT_LO):
92 		return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT);
93 	case GPU_CONTROL_REG(STACK_PRESENT_LO):
94 		return LO_MASK(DUMMY_IMPLEMENTATION_STACK_PRESENT);
95 
96 	case GPU_CONTROL_REG(SHADER_PRESENT_HI):
97 	case GPU_CONTROL_REG(TILER_PRESENT_HI):
98 	case GPU_CONTROL_REG(L2_PRESENT_HI):
99 	case GPU_CONTROL_REG(STACK_PRESENT_HI):
100 	/* *** FALLTHROUGH *** */
101 	default:
102 		return 0;
103 	}
104 }
105 
106 struct {
107 	unsigned long prfcnt_base;
108 	u32 *prfcnt_base_cpu;
109 	struct kbase_device *kbdev;
110 	struct tagged_addr *pages;
111 	size_t page_count;
112 
113 	u32 time;
114 
115 	struct {
116 		u32 jm;
117 		u32 tiler;
118 		u32 l2;
119 		u32 shader;
120 	} prfcnt_en;
121 
122 	u64 l2_present;
123 	u64 shader_present;
124 
125 #if !MALI_USE_CSF
126 	u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
127 #else
128 	u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
129 #endif /* !MALI_USE_CSF */
130 	u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
131 	u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
132 			KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
133 	u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES *
134 			    KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
135 
136 } performance_counters = {
137 	.l2_present = DUMMY_IMPLEMENTATION_L2_PRESENT,
138 	.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
139 };
140 
141 struct job_slot {
142 	int job_active;
143 	int job_queued;
144 	int job_complete_irq_asserted;
145 	int job_irq_mask;
146 	int job_disabled;
147 };
148 
149 /**
150  * struct control_reg_values_t - control register values specific to the GPU being 'emulated'
151  * @name:			GPU name
152  * @gpu_id:			GPU ID to report
153  * @as_present:			Bitmap of address spaces present
154  * @thread_max_threads:		Maximum number of threads per core
155  * @thread_max_workgroup_size:	Maximum number of threads per workgroup
156  * @thread_max_barrier_size:	Maximum number of threads per barrier
157  * @thread_features:		Thread features, NOT INCLUDING the 2
158  *				most-significant bits, which are always set to
159  *				IMPLEMENTATION_MODEL.
160  * @core_features:		Core features
161  * @tiler_features:		Tiler features
162  * @mmu_features:		MMU features
163  * @gpu_features_lo:		GPU features (low)
164  * @gpu_features_hi:		GPU features (high)
165  */
166 struct control_reg_values_t {
167 	const char *name;
168 	u32 gpu_id;
169 	u32 as_present;
170 	u32 thread_max_threads;
171 	u32 thread_max_workgroup_size;
172 	u32 thread_max_barrier_size;
173 	u32 thread_features;
174 	u32 core_features;
175 	u32 tiler_features;
176 	u32 mmu_features;
177 	u32 gpu_features_lo;
178 	u32 gpu_features_hi;
179 };
180 
181 struct dummy_model_t {
182 	int reset_completed;
183 	int reset_completed_mask;
184 	int prfcnt_sample_completed;
185 	int power_changed_mask;	/* 2bits: _ALL,_SINGLE */
186 	int power_changed;	/* 1bit */
187 	bool clean_caches_completed;
188 	bool clean_caches_completed_irq_enabled;
189 	int power_on;		/* 6bits: SHADER[4],TILER,L2 */
190 	u32 stack_power_on_lo;
191 	u32 coherency_enable;
192 	unsigned int job_irq_js_state;
193 	struct job_slot slots[NUM_SLOTS];
194 	const struct control_reg_values_t *control_reg_values;
195 	u32 l2_config;
196 	void *data;
197 };
198 
gpu_device_set_data(void * model,void * data)199 void gpu_device_set_data(void *model, void *data)
200 {
201 	struct dummy_model_t *dummy = (struct dummy_model_t *)model;
202 
203 	dummy->data = data;
204 }
205 
gpu_device_get_data(void * model)206 void *gpu_device_get_data(void *model)
207 {
208 	struct dummy_model_t *dummy = (struct dummy_model_t *)model;
209 
210 	return dummy->data;
211 }
212 
213 #define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1
214 
215 /* SCons should pass in a default GPU, but other ways of building (e.g.
216  * in-tree) won't, so define one here in case.
217  */
218 #ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU
219 #define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx"
220 #endif
221 
222 static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU;
223 module_param(no_mali_gpu, charp, 0000);
224 MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");
225 
226 /* Construct a value for the THREAD_FEATURES register, *except* the two most
227  * significant bits, which are set to IMPLEMENTATION_MODEL in
228  * midgard_model_read_reg().
229  */
230 #if MALI_USE_CSF
231 #define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
232 	((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24))
233 #else
234 #define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
235 	((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24))
236 #endif
237 
238 /* Array associating GPU names with control register values. The first
239  * one is used in the case of no match.
240  */
241 static const struct control_reg_values_t all_control_reg_values[] = {
242 	{
243 		.name = "tMIx",
244 		.gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0),
245 		.as_present = 0xFF,
246 		.thread_max_threads = 0x180,
247 		.thread_max_workgroup_size = 0x180,
248 		.thread_max_barrier_size = 0x180,
249 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
250 		.tiler_features = 0x809,
251 		.mmu_features = 0x2830,
252 		.gpu_features_lo = 0,
253 		.gpu_features_hi = 0,
254 	},
255 	{
256 		.name = "tHEx",
257 		.gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0),
258 		.as_present = 0xFF,
259 		.thread_max_threads = 0x180,
260 		.thread_max_workgroup_size = 0x180,
261 		.thread_max_barrier_size = 0x180,
262 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
263 		.tiler_features = 0x809,
264 		.mmu_features = 0x2830,
265 		.gpu_features_lo = 0,
266 		.gpu_features_hi = 0,
267 	},
268 	{
269 		.name = "tSIx",
270 		.gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0),
271 		.as_present = 0xFF,
272 		.thread_max_threads = 0x300,
273 		.thread_max_workgroup_size = 0x180,
274 		.thread_max_barrier_size = 0x180,
275 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
276 		.tiler_features = 0x209,
277 		.mmu_features = 0x2821,
278 		.gpu_features_lo = 0,
279 		.gpu_features_hi = 0,
280 	},
281 	{
282 		.name = "tDVx",
283 		.gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0),
284 		.as_present = 0xFF,
285 		.thread_max_threads = 0x300,
286 		.thread_max_workgroup_size = 0x180,
287 		.thread_max_barrier_size = 0x180,
288 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
289 		.tiler_features = 0x209,
290 		.mmu_features = 0x2821,
291 		.gpu_features_lo = 0,
292 		.gpu_features_hi = 0,
293 	},
294 	{
295 		.name = "tNOx",
296 		.gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0),
297 		.as_present = 0xFF,
298 		.thread_max_threads = 0x180,
299 		.thread_max_workgroup_size = 0x180,
300 		.thread_max_barrier_size = 0x180,
301 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
302 		.tiler_features = 0x809,
303 		.mmu_features = 0x2830,
304 		.gpu_features_lo = 0,
305 		.gpu_features_hi = 0,
306 	},
307 	{
308 		.name = "tGOx_r0p0",
309 		.gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0),
310 		.as_present = 0xFF,
311 		.thread_max_threads = 0x180,
312 		.thread_max_workgroup_size = 0x180,
313 		.thread_max_barrier_size = 0x180,
314 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
315 		.tiler_features = 0x809,
316 		.mmu_features = 0x2830,
317 		.gpu_features_lo = 0,
318 		.gpu_features_hi = 0,
319 	},
320 	{
321 		.name = "tGOx_r1p0",
322 		.gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0),
323 		.as_present = 0xFF,
324 		.thread_max_threads = 0x180,
325 		.thread_max_workgroup_size = 0x180,
326 		.thread_max_barrier_size = 0x180,
327 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
328 		.core_features = 0x2,
329 		.tiler_features = 0x209,
330 		.mmu_features = 0x2823,
331 		.gpu_features_lo = 0,
332 		.gpu_features_hi = 0,
333 	},
334 	{
335 		.name = "tTRx",
336 		.gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0),
337 		.as_present = 0xFF,
338 		.thread_max_threads = 0x180,
339 		.thread_max_workgroup_size = 0x180,
340 		.thread_max_barrier_size = 0x180,
341 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
342 		.tiler_features = 0x809,
343 		.mmu_features = 0x2830,
344 		.gpu_features_lo = 0,
345 		.gpu_features_hi = 0,
346 	},
347 	{
348 		.name = "tNAx",
349 		.gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0),
350 		.as_present = 0xFF,
351 		.thread_max_threads = 0x180,
352 		.thread_max_workgroup_size = 0x180,
353 		.thread_max_barrier_size = 0x180,
354 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
355 		.tiler_features = 0x809,
356 		.mmu_features = 0x2830,
357 		.gpu_features_lo = 0,
358 		.gpu_features_hi = 0,
359 	},
360 	{
361 		.name = "tBEx",
362 		.gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0),
363 		.as_present = 0xFF,
364 		.thread_max_threads = 0x180,
365 		.thread_max_workgroup_size = 0x180,
366 		.thread_max_barrier_size = 0x180,
367 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
368 		.tiler_features = 0x809,
369 		.mmu_features = 0x2830,
370 		.gpu_features_lo = 0,
371 		.gpu_features_hi = 0,
372 	},
373 	{
374 		.name = "tBAx",
375 		.gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0),
376 		.as_present = 0xFF,
377 		.thread_max_threads = 0x180,
378 		.thread_max_workgroup_size = 0x180,
379 		.thread_max_barrier_size = 0x180,
380 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
381 		.tiler_features = 0x809,
382 		.mmu_features = 0x2830,
383 		.gpu_features_lo = 0,
384 		.gpu_features_hi = 0,
385 	},
386 	{
387 		.name = "tDUx",
388 		.gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0),
389 		.as_present = 0xFF,
390 		.thread_max_threads = 0x180,
391 		.thread_max_workgroup_size = 0x180,
392 		.thread_max_barrier_size = 0x180,
393 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
394 		.tiler_features = 0x809,
395 		.mmu_features = 0x2830,
396 		.gpu_features_lo = 0,
397 		.gpu_features_hi = 0,
398 	},
399 	{
400 		.name = "tODx",
401 		.gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0),
402 		.as_present = 0xFF,
403 		.thread_max_threads = 0x180,
404 		.thread_max_workgroup_size = 0x180,
405 		.thread_max_barrier_size = 0x180,
406 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
407 		.tiler_features = 0x809,
408 		.mmu_features = 0x2830,
409 		.gpu_features_lo = 0,
410 		.gpu_features_hi = 0,
411 	},
412 	{
413 		.name = "tGRx",
414 		.gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0),
415 		.as_present = 0xFF,
416 		.thread_max_threads = 0x180,
417 		.thread_max_workgroup_size = 0x180,
418 		.thread_max_barrier_size = 0x180,
419 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
420 		.core_features = 0x0, /* core_1e16fma2tex */
421 		.tiler_features = 0x809,
422 		.mmu_features = 0x2830,
423 		.gpu_features_lo = 0,
424 		.gpu_features_hi = 0,
425 	},
426 	{
427 		.name = "tVAx",
428 		.gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0),
429 		.as_present = 0xFF,
430 		.thread_max_threads = 0x180,
431 		.thread_max_workgroup_size = 0x180,
432 		.thread_max_barrier_size = 0x180,
433 		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
434 		.core_features = 0x0, /* core_1e16fma2tex */
435 		.tiler_features = 0x809,
436 		.mmu_features = 0x2830,
437 		.gpu_features_lo = 0,
438 		.gpu_features_hi = 0,
439 	},
440 	{
441 		.name = "tTUx",
442 		.gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0),
443 		.as_present = 0xFF,
444 		.thread_max_threads = 0x800,
445 		.thread_max_workgroup_size = 0x400,
446 		.thread_max_barrier_size = 0x400,
447 		.thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0),
448 		.core_features = 0x0, /* core_1e32fma2tex */
449 		.tiler_features = 0x809,
450 		.mmu_features = 0x2830,
451 		.gpu_features_lo = 0xf,
452 		.gpu_features_hi = 0,
453 	},
454 };
455 
456 struct error_status_t hw_error_status;
457 
458 #if MALI_USE_CSF
gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,u32 cnt_idx,bool is_low_word)459 static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
460 				      u32 cnt_idx, bool is_low_word)
461 {
462 	u64 *counters_data;
463 	u32 core_count = 0;
464 	u32 event_index;
465 	u64 value = 0;
466 	u32 core;
467 
468 	if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM))
469 		return 0;
470 
471 	if (WARN_ON(cnt_idx >= KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS))
472 		return 0;
473 
474 	event_index =
475 		(ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF;
476 
477 	/* Currently only primary counter blocks are supported */
478 	if (WARN_ON(event_index >= 64))
479 		return 0;
480 
481 	/* The actual events start index 4 onwards. Spec also says PRFCNT_EN,
482 	 * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for
483 	 * IPA counters. If selected, the value returned for them will be zero.
484 	 */
485 	if (WARN_ON(event_index <= 3))
486 		return 0;
487 
488 	event_index -= 4;
489 
490 	switch (core_type) {
491 	case KBASE_IPA_CORE_TYPE_CSHW:
492 		core_count = 1;
493 		counters_data = performance_counters.cshw_counters;
494 		break;
495 	case KBASE_IPA_CORE_TYPE_MEMSYS:
496 		core_count = hweight64(performance_counters.l2_present);
497 		counters_data = performance_counters.l2_counters;
498 		break;
499 	case KBASE_IPA_CORE_TYPE_TILER:
500 		core_count = 1;
501 		counters_data = performance_counters.tiler_counters;
502 		break;
503 	case KBASE_IPA_CORE_TYPE_SHADER:
504 		core_count = hweight64(performance_counters.shader_present);
505 		counters_data = performance_counters.shader_counters;
506 		break;
507 	default:
508 		WARN(1, "Invalid core_type %d\n", core_type);
509 		break;
510 	}
511 
512 	for (core = 0; core < core_count; core++) {
513 		value += counters_data[event_index];
514 		event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
515 	}
516 
517 	if (is_low_word)
518 		return (value & U32_MAX);
519 	else
520 		return (value >> 32);
521 }
522 
gpu_model_clear_prfcnt_values(void)523 void gpu_model_clear_prfcnt_values(void)
524 {
525 	memset(performance_counters.cshw_counters, 0,
526 	       sizeof(performance_counters.cshw_counters));
527 
528 	memset(performance_counters.tiler_counters, 0,
529 	       sizeof(performance_counters.tiler_counters));
530 
531 	memset(performance_counters.l2_counters, 0,
532 	       sizeof(performance_counters.l2_counters));
533 
534 	memset(performance_counters.shader_counters, 0,
535 	       sizeof(performance_counters.shader_counters));
536 }
537 KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values);
538 #endif
539 
540 /**
541  * gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer
542  *
543  * @values:             Array of values to be written out
544  * @out_index:          Index into performance counter buffer
545  * @block_count:        Number of blocks to dump
546  * @prfcnt_enable_mask: Counter enable mask
547  * @blocks_present:     Available blocks bit mask
548  */
gpu_model_dump_prfcnt_blocks(u64 * values,u32 * out_index,u32 block_count,u32 prfcnt_enable_mask,u64 blocks_present)549 static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index,
550 					 u32 block_count,
551 					 u32 prfcnt_enable_mask,
552 					 u64 blocks_present)
553 {
554 	u32 block_idx, counter;
555 	u32 counter_value = 0;
556 	u32 *prfcnt_base;
557 	u32 index = 0;
558 
559 	prfcnt_base = performance_counters.prfcnt_base_cpu;
560 
561 	for (block_idx = 0; block_idx < block_count; block_idx++) {
562 		/* only dump values if core is present */
563 		if (!(blocks_present & (1 << block_idx))) {
564 #if MALI_USE_CSF
565 			/* if CSF dump zeroed out block */
566 			memset(&prfcnt_base[*out_index], 0,
567 			       KBASE_DUMMY_MODEL_BLOCK_SIZE);
568 			*out_index += KBASE_DUMMY_MODEL_VALUES_PER_BLOCK;
569 #endif /* MALI_USE_CSF */
570 			continue;
571 		}
572 
573 		/* write the header */
574 		prfcnt_base[*out_index] = performance_counters.time++;
575 		prfcnt_base[*out_index+2] = prfcnt_enable_mask;
576 		*out_index += KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS;
577 
578 		/* write the counters */
579 		for (counter = 0;
580 			 counter < KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
581 			 counter++) {
582 			/* HW counter values retrieved through
583 			 * PRFCNT_SAMPLE request are of 32 bits only.
584 			 */
585 			counter_value = (u32)values[index++];
586 			if (KBASE_DUMMY_MODEL_COUNTER_ENABLED(
587 				 prfcnt_enable_mask, (counter +
588 				 KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))) {
589 				prfcnt_base[*out_index + counter] =
590 					counter_value;
591 			}
592 		}
593 		*out_index +=  KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
594 	}
595 }
596 
597 /**
598  * gpu_model_sync_dummy_prfcnt() - Synchronize dumped performance counter values
599  *
600  * Used to ensure counter values are not lost if cache invalidation is performed
601  * prior to reading.
602  */
gpu_model_sync_dummy_prfcnt(void)603 static void gpu_model_sync_dummy_prfcnt(void)
604 {
605 	int i;
606 	struct page *pg;
607 
608 	for (i = 0; i < performance_counters.page_count; i++) {
609 		pg = as_page(performance_counters.pages[i]);
610 		kbase_sync_single_for_device(performance_counters.kbdev,
611 					     kbase_dma_addr(pg), PAGE_SIZE,
612 					     DMA_BIDIRECTIONAL);
613 	}
614 }
615 
midgard_model_dump_prfcnt(void)616 static void midgard_model_dump_prfcnt(void)
617 {
618 	u32 index = 0;
619 
620 #if !MALI_USE_CSF
621 	gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index,
622 				     1, 0xffffffff, 0x1);
623 #else
624 	gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index,
625 				     1, 0xffffffff, 0x1);
626 #endif /* !MALI_USE_CSF */
627 	gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters,
628 				     &index, 1,
629 				     performance_counters.prfcnt_en.tiler,
630 				     DUMMY_IMPLEMENTATION_TILER_PRESENT);
631 	gpu_model_dump_prfcnt_blocks(performance_counters.l2_counters, &index,
632 				     KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS,
633 				     performance_counters.prfcnt_en.l2,
634 				     performance_counters.l2_present);
635 	gpu_model_dump_prfcnt_blocks(performance_counters.shader_counters,
636 				     &index, KBASE_DUMMY_MODEL_MAX_SHADER_CORES,
637 				     performance_counters.prfcnt_en.shader,
638 				     performance_counters.shader_present);
639 
640 	gpu_model_sync_dummy_prfcnt();
641 
642 	/* simulate a 'long' time between samples */
643 	performance_counters.time += 10;
644 }
645 
init_register_statuses(struct dummy_model_t * dummy)646 static void init_register_statuses(struct dummy_model_t *dummy)
647 {
648 	int i;
649 
650 	hw_error_status.errors_mask = 0;
651 	hw_error_status.gpu_error_irq = 0;
652 	hw_error_status.gpu_fault_status = 0;
653 	hw_error_status.job_irq_rawstat = 0;
654 	hw_error_status.job_irq_status = 0;
655 	hw_error_status.mmu_irq_rawstat = 0;
656 	hw_error_status.mmu_irq_mask = 0;
657 
658 	for (i = 0; i < NUM_SLOTS; i++) {
659 		hw_error_status.js_status[i] = 0;
660 		hw_error_status.job_irq_rawstat |=
661 			(dummy->slots[i].job_complete_irq_asserted) << i;
662 		hw_error_status.job_irq_status |=
663 			(dummy->slots[i].job_complete_irq_asserted) << i;
664 	}
665 	for (i = 0; i < NUM_MMU_AS; i++) {
666 		hw_error_status.as_command[i] = 0;
667 		hw_error_status.as_faultstatus[i] = 0;
668 		hw_error_status.mmu_irq_mask |= 1 << i;
669 	}
670 
671 	performance_counters.time = 0;
672 }
673 
update_register_statuses(struct dummy_model_t * dummy,int job_slot)674 static void update_register_statuses(struct dummy_model_t *dummy, int job_slot)
675 {
676 	if (hw_error_status.errors_mask & IS_A_JOB_ERROR) {
677 		if (job_slot == hw_error_status.current_job_slot) {
678 #if !MALI_USE_CSF
679 			if (hw_error_status.js_status[job_slot] == 0) {
680 				/* status reg is clean; it can be written */
681 
682 				switch (hw_error_status.errors_mask &
683 							IS_A_JOB_ERROR) {
684 				case KBASE_JOB_INTERRUPTED:
685 					hw_error_status.js_status[job_slot] =
686 						JS_STATUS_INTERRUPTED;
687 					break;
688 
689 				case KBASE_JOB_STOPPED:
690 					hw_error_status.js_status[job_slot] =
691 						JS_STATUS_STOPPED;
692 					break;
693 
694 				case KBASE_JOB_TERMINATED:
695 					hw_error_status.js_status[job_slot] =
696 						JS_STATUS_TERMINATED;
697 					break;
698 
699 				case KBASE_JOB_CONFIG_FAULT:
700 					hw_error_status.js_status[job_slot] =
701 						JS_STATUS_CONFIG_FAULT;
702 					break;
703 
704 				case KBASE_JOB_POWER_FAULT:
705 					hw_error_status.js_status[job_slot] =
706 						JS_STATUS_POWER_FAULT;
707 					break;
708 
709 				case KBASE_JOB_READ_FAULT:
710 					hw_error_status.js_status[job_slot] =
711 						JS_STATUS_READ_FAULT;
712 					break;
713 
714 				case KBASE_JOB_WRITE_FAULT:
715 					hw_error_status.js_status[job_slot] =
716 						JS_STATUS_WRITE_FAULT;
717 					break;
718 
719 				case KBASE_JOB_AFFINITY_FAULT:
720 					hw_error_status.js_status[job_slot] =
721 						JS_STATUS_AFFINITY_FAULT;
722 					break;
723 
724 				case KBASE_JOB_BUS_FAULT:
725 					hw_error_status.js_status[job_slot] =
726 						JS_STATUS_BUS_FAULT;
727 					break;
728 
729 				case KBASE_INSTR_INVALID_PC:
730 					hw_error_status.js_status[job_slot] =
731 						JS_STATUS_INSTR_INVALID_PC;
732 					break;
733 
734 				case KBASE_INSTR_INVALID_ENC:
735 					hw_error_status.js_status[job_slot] =
736 						JS_STATUS_INSTR_INVALID_ENC;
737 					break;
738 
739 				case KBASE_INSTR_TYPE_MISMATCH:
740 					hw_error_status.js_status[job_slot] =
741 						JS_STATUS_INSTR_TYPE_MISMATCH;
742 					break;
743 
744 				case KBASE_INSTR_OPERAND_FAULT:
745 					hw_error_status.js_status[job_slot] =
746 						JS_STATUS_INSTR_OPERAND_FAULT;
747 					break;
748 
749 				case KBASE_INSTR_TLS_FAULT:
750 					hw_error_status.js_status[job_slot] =
751 						JS_STATUS_INSTR_TLS_FAULT;
752 					break;
753 
754 				case KBASE_INSTR_BARRIER_FAULT:
755 					hw_error_status.js_status[job_slot] =
756 						JS_STATUS_INSTR_BARRIER_FAULT;
757 					break;
758 
759 				case KBASE_INSTR_ALIGN_FAULT:
760 					hw_error_status.js_status[job_slot] =
761 						JS_STATUS_INSTR_ALIGN_FAULT;
762 					break;
763 
764 				case KBASE_DATA_INVALID_FAULT:
765 					hw_error_status.js_status[job_slot] =
766 						JS_STATUS_DATA_INVALID_FAULT;
767 					break;
768 
769 				case KBASE_TILE_RANGE_FAULT:
770 					hw_error_status.js_status[job_slot] =
771 						JS_STATUS_TILE_RANGE_FAULT;
772 					break;
773 
774 				case KBASE_ADDR_RANGE_FAULT:
775 					hw_error_status.js_status[job_slot] =
776 						JS_STATUS_ADDRESS_RANGE_FAULT;
777 					break;
778 
779 				case KBASE_OUT_OF_MEMORY:
780 					hw_error_status.js_status[job_slot] =
781 						JS_STATUS_OUT_OF_MEMORY;
782 					break;
783 
784 				case KBASE_UNKNOWN:
785 					hw_error_status.js_status[job_slot] =
786 						JS_STATUS_UNKNOWN;
787 					break;
788 
789 				default:
790 					model_error_log(KBASE_CORE,
791 					"\nAtom Chain 0x%llx: Invalid Error Mask!",
792 						hw_error_status.current_jc);
793 					break;
794 				}
795 			}
796 #endif /* !MALI_USE_CSF */
797 
798 			/* we set JOB_FAIL_<n> */
799 			hw_error_status.job_irq_rawstat |=
800 			(dummy->slots[job_slot].job_complete_irq_asserted) <<
801 								(job_slot + 16);
802 			hw_error_status.job_irq_status |=
803 			(((dummy->slots[job_slot].job_complete_irq_asserted) <<
804 								(job_slot)) &
805 					(dummy->slots[job_slot].job_irq_mask <<
806 							job_slot)) << 16;
807 		} else {
808 			hw_error_status.job_irq_rawstat |=
809 			(dummy->slots[job_slot].job_complete_irq_asserted) <<
810 								job_slot;
811 			hw_error_status.job_irq_status |=
812 			((dummy->slots[job_slot].job_complete_irq_asserted) <<
813 								(job_slot)) &
814 					(dummy->slots[job_slot].job_irq_mask <<
815 								job_slot);
816 		}
817 	} else {
818 		hw_error_status.job_irq_rawstat |=
819 			(dummy->slots[job_slot].job_complete_irq_asserted) <<
820 								job_slot;
821 		hw_error_status.job_irq_status |=
822 			((dummy->slots[job_slot].job_complete_irq_asserted) <<
823 								(job_slot)) &
824 			(dummy->slots[job_slot].job_irq_mask << job_slot);
825 	}			/* end of job register statuses */
826 
827 	if (hw_error_status.errors_mask & IS_A_MMU_ERROR) {
828 		int i;
829 
830 		for (i = 0; i < NUM_MMU_AS; i++) {
831 			if (i == hw_error_status.faulty_mmu_as) {
832 				if (hw_error_status.as_faultstatus[i] == 0) {
833 					u32 status =
834 					hw_error_status.as_faultstatus[i];
835 					/* status reg is clean; it can be
836 					 * written
837 					 */
838 					switch (hw_error_status.errors_mask &
839 							IS_A_MMU_ERROR) {
840 					case KBASE_TRANSLATION_FAULT:
841 						/* 0xCm means TRANSLATION FAULT
842 						 * (m is mmu_table_level)
843 						 */
844 						status =
845 							((1 << 7) | (1 << 6) |
846 						hw_error_status.mmu_table_level
847 									);
848 						break;
849 
850 					case KBASE_PERMISSION_FAULT:
851 						/*0xC8 means PERMISSION FAULT */
852 						status = ((1 << 7) | (1 << 6) |
853 								(1 << 3));
854 						break;
855 
856 					case KBASE_TRANSTAB_BUS_FAULT:
857 						/* 0xDm means TRANSITION TABLE
858 						 * BUS FAULT (m is
859 						 * mmu_table_level)
860 						 */
861 						status = ((1 << 7) | (1 << 6) |
862 								(1 << 4) |
863 						hw_error_status.mmu_table_level
864 									);
865 						break;
866 
867 					case KBASE_ACCESS_FLAG:
868 						/* 0xD8 means ACCESS FLAG */
869 						status = ((1 << 7) | (1 << 6) |
870 							(1 << 4) | (1 << 3));
871 						break;
872 
873 					default:
874 						model_error_log(KBASE_CORE,
875 						"\nAtom Chain 0x%llx: Invalid Error Mask!",
876 						hw_error_status.current_jc);
877 						break;
878 					}
879 					hw_error_status.as_faultstatus[i] =
880 									status;
881 				}
882 
883 				if (hw_error_status.errors_mask &
884 						KBASE_TRANSTAB_BUS_FAULT)
885 					hw_error_status.mmu_irq_rawstat |=
886 						1 << (16 + i); /* bus error */
887 				else
888 					hw_error_status.mmu_irq_rawstat |=
889 						1 << i; /* page fault */
890 			}
891 		}
892 	}			/*end of mmu register statuses */
893 	if (hw_error_status.errors_mask & IS_A_GPU_ERROR) {
894 		if (hw_error_status.gpu_fault_status) {
895 			/* not the first GPU error reported */
896 			hw_error_status.gpu_error_irq |= (1 << 7);
897 		} else {
898 			hw_error_status.gpu_error_irq |= 1;
899 			switch (hw_error_status.errors_mask & IS_A_GPU_ERROR) {
900 			case KBASE_DELAYED_BUS_FAULT:
901 				hw_error_status.gpu_fault_status = (1 << 7);
902 				break;
903 
904 			case KBASE_SHAREABILITY_FAULT:
905 				hw_error_status.gpu_fault_status = (1 << 7) |
906 								(1 << 3);
907 				break;
908 
909 			default:
910 				model_error_log(KBASE_CORE,
911 				"\nAtom Chain 0x%llx: Invalid Error Mask!",
912 						hw_error_status.current_jc);
913 				break;
914 			}
915 		}
916 	}
917 	hw_error_status.errors_mask = 0;	/*clear error mask */
918 }
919 
920 #if !MALI_USE_CSF
update_job_irq_js_state(struct dummy_model_t * dummy,int mask)921 static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask)
922 {
923 	int i;
924 
925 	pr_debug("%s", "Updating the JS_ACTIVE register");
926 
927 	for (i = 0; i < NUM_SLOTS; i++) {
928 		int slot_active = dummy->slots[i].job_active;
929 		int next_busy = dummy->slots[i].job_queued;
930 
931 		if ((mask & (1 << i)) || (mask & (1 << (i + 16)))) {
932 			/* clear the bits we're updating */
933 			dummy->job_irq_js_state &= ~((1 << (16 + i)) |
934 								(1 << i));
935 			if (hw_error_status.js_status[i]) {
936 				dummy->job_irq_js_state |= next_busy <<
937 								(i + 16);
938 				if (mask & (1 << (i + 16))) {
939 					/* clear job slot status */
940 					hw_error_status.js_status[i] = 0;
941 					/* continue execution of jobchain */
942 					dummy->slots[i].job_active =
943 						dummy->slots[i].job_queued;
944 				}
945 			} else {
946 				/* set bits if needed */
947 				dummy->job_irq_js_state |= ((slot_active << i) |
948 						(next_busy << (i + 16)));
949 			}
950 		}
951 	}
952 	pr_debug("The new snapshot is 0x%08X\n", dummy->job_irq_js_state);
953 }
954 #endif /* !MALI_USE_CSF */
955 
956 /**
957  * find_control_reg_values() - Look up constant control register values.
958  * @gpu:	GPU name
959  *
960  * Look up the GPU name to find the correct set of control register values for
961  * that GPU. If not found, warn and use the first values in the array.
962  *
963  * Return: Pointer to control register values for that GPU.
964  */
find_control_reg_values(const char * gpu)965 static const struct control_reg_values_t *find_control_reg_values(const char *gpu)
966 {
967 	size_t i;
968 	const struct control_reg_values_t *ret = NULL;
969 
970 	for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) {
971 		const struct control_reg_values_t * const fcrv = &all_control_reg_values[i];
972 
973 		if (!strcmp(fcrv->name, gpu)) {
974 			ret = fcrv;
975 			pr_debug("Found control register values for %s\n", gpu);
976 			break;
977 		}
978 	}
979 
980 	if (!ret) {
981 		ret = &all_control_reg_values[0];
982 		pr_warn("Couldn't find control register values for GPU %s; using default %s\n",
983 			gpu, ret->name);
984 	}
985 
986 	return ret;
987 }
988 
midgard_model_create(const void * config)989 void *midgard_model_create(const void *config)
990 {
991 	struct dummy_model_t *dummy = NULL;
992 
993 	dummy = kzalloc(sizeof(*dummy), GFP_KERNEL);
994 
995 	if (dummy) {
996 		dummy->job_irq_js_state = 0;
997 		init_register_statuses(dummy);
998 		dummy->control_reg_values = find_control_reg_values(no_mali_gpu);
999 	}
1000 	return dummy;
1001 }
1002 
midgard_model_destroy(void * h)1003 void midgard_model_destroy(void *h)
1004 {
1005 	kfree((void *)h);
1006 }
1007 
midgard_model_get_outputs(void * h)1008 static void midgard_model_get_outputs(void *h)
1009 {
1010 	struct dummy_model_t *dummy = (struct dummy_model_t *)h;
1011 
1012 	if (hw_error_status.job_irq_status)
1013 		gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ);
1014 
1015 	if ((dummy->power_changed && dummy->power_changed_mask) ||
1016 	    (dummy->reset_completed & dummy->reset_completed_mask) ||
1017 	    hw_error_status.gpu_error_irq ||
1018 	    (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) ||
1019 	    dummy->prfcnt_sample_completed)
1020 		gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);
1021 
1022 	if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask)
1023 		gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ);
1024 }
1025 
midgard_model_update(void * h)1026 static void midgard_model_update(void *h)
1027 {
1028 	struct dummy_model_t *dummy = (struct dummy_model_t *)h;
1029 	int i;
1030 
1031 	for (i = 0; i < NUM_SLOTS; i++) {
1032 		if (!dummy->slots[i].job_active)
1033 			continue;
1034 
1035 		if (dummy->slots[i].job_disabled) {
1036 			update_register_statuses(dummy, i);
1037 			continue;
1038 		}
1039 
1040 		/* If there are any pending interrupts that have not
1041 		 * been cleared we cannot run the job in the next register
1042 		 * as we will overwrite the register status of the job in
1043 		 * the head registers - which has not yet been read
1044 		 */
1045 		if ((hw_error_status.job_irq_rawstat & (1 << (i + 16))) ||
1046 		   (hw_error_status.job_irq_rawstat & (1 << i))) {
1047 			continue;
1048 		}
1049 
1050 		/*this job is done assert IRQ lines */
1051 		signal_int(dummy, i);
1052 #ifdef CONFIG_MALI_BIFROST_ERROR_INJECT
1053 		midgard_set_error(i);
1054 #endif				/* CONFIG_MALI_BIFROST_ERROR_INJECT */
1055 		update_register_statuses(dummy, i);
1056 		/*if this job slot returned failures we cannot use it */
1057 		if (hw_error_status.job_irq_rawstat & (1 << (i + 16))) {
1058 			dummy->slots[i].job_active = 0;
1059 			continue;
1060 		}
1061 		/*process next job */
1062 		dummy->slots[i].job_active = dummy->slots[i].job_queued;
1063 		dummy->slots[i].job_queued = 0;
1064 		if (dummy->slots[i].job_active) {
1065 			if (hw_error_status.job_irq_rawstat & (1 << (i + 16)))
1066 				model_error_log(KBASE_CORE,
1067 				"\natom %lld running a job on a dirty slot",
1068 						hw_error_status.current_jc);
1069 		}
1070 	}
1071 }
1072 
invalidate_active_jobs(struct dummy_model_t * dummy)1073 static void invalidate_active_jobs(struct dummy_model_t *dummy)
1074 {
1075 	int i;
1076 
1077 	for (i = 0; i < NUM_SLOTS; i++) {
1078 		if (dummy->slots[i].job_active) {
1079 			hw_error_status.job_irq_rawstat |= (1 << (16 + i));
1080 
1081 		hw_error_status.js_status[i] = 0x7f; /*UNKNOWN*/
1082 		}
1083 	}
1084 }
1085 
midgard_model_write_reg(void * h,u32 addr,u32 value)1086 u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
1087 {
1088 	struct dummy_model_t *dummy = (struct dummy_model_t *)h;
1089 #if !MALI_USE_CSF
1090 	if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
1091 			(addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
1092 		int slot_idx = (addr >> 7) & 0xf;
1093 
1094 		KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS);
1095 		if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) {
1096 			hw_error_status.current_jc &=
1097 						~((u64) (0xFFFFFFFF));
1098 			hw_error_status.current_jc |= (u64) value;
1099 		}
1100 		if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_HI)) {
1101 			hw_error_status.current_jc &= (u64) 0xFFFFFFFF;
1102 			hw_error_status.current_jc |=
1103 						((u64) value) << 32;
1104 		}
1105 		if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) &&
1106 								value == 1) {
1107 			pr_debug("%s", "start detected");
1108 			KBASE_DEBUG_ASSERT(!dummy->slots[slot_idx].job_active ||
1109 					!dummy->slots[slot_idx].job_queued);
1110 			if ((dummy->slots[slot_idx].job_active) ||
1111 					(hw_error_status.job_irq_rawstat &
1112 						(1 << (slot_idx + 16)))) {
1113 				pr_debug("~~~~~~~~~~~ Start: job slot is already active or there are IRQ pending  ~~~~~~~~~"
1114 									);
1115 				dummy->slots[slot_idx].job_queued = 1;
1116 			} else {
1117 				dummy->slots[slot_idx].job_active = 1;
1118 			}
1119 		}
1120 
1121 		if (addr == JOB_SLOT_REG(slot_idx, JS_COMMAND_NEXT) && value ==
1122 									0)
1123 			dummy->slots[slot_idx].job_queued = 0;
1124 
1125 		if ((addr == JOB_SLOT_REG(slot_idx, JS_COMMAND)) &&
1126 				(value == JS_COMMAND_SOFT_STOP ||
1127 					value == JS_COMMAND_HARD_STOP)) {
1128 			/*dummy->slots[slot_idx].job_active = 0; */
1129 			hw_error_status.current_job_slot = slot_idx;
1130 			if (value == JS_COMMAND_SOFT_STOP) {
1131 				hw_error_status.errors_mask = KBASE_JOB_STOPPED;
1132 			} else {	/*value == 3 */
1133 
1134 				if (dummy->slots[slot_idx].job_disabled != 0) {
1135 					pr_debug("enabling slot after HARD_STOP"
1136 									);
1137 					dummy->slots[slot_idx].job_disabled = 0;
1138 				}
1139 				hw_error_status.errors_mask =
1140 							KBASE_JOB_TERMINATED;
1141 			}
1142 		}
1143 	} else if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) {
1144 		int i;
1145 
1146 		for (i = 0; i < NUM_SLOTS; i++) {
1147 			if (value & ((1 << i) | (1 << (i + 16))))
1148 				dummy->slots[i].job_complete_irq_asserted = 0;
1149 			/* hw_error_status.js_status[i] is cleared in
1150 			 * update_job_irq_js_state
1151 			 */
1152 		}
1153 		pr_debug("%s", "job irq cleared");
1154 		update_job_irq_js_state(dummy, value);
1155 		/*remove error condition for JOB */
1156 		hw_error_status.job_irq_rawstat &= ~(value);
1157 		hw_error_status.job_irq_status &= ~(value);
1158 	} else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
1159 		int i;
1160 
1161 		for (i = 0; i < NUM_SLOTS; i++)
1162 			dummy->slots[i].job_irq_mask = (value >> i) & 0x01;
1163 		pr_debug("job irq mask to value %x", value);
1164 	} else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
1165 #else /* !MALI_USE_CSF */
1166 	if (addr == JOB_CONTROL_REG(JOB_IRQ_CLEAR)) {
1167 		pr_debug("%s", "job irq cleared");
1168 
1169 		hw_error_status.job_irq_rawstat &= ~(value);
1170 		hw_error_status.job_irq_status &= ~(value);
1171 	} else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
1172 		/* ignore JOB_IRQ_MASK as it is handled by CSFFW */
1173 	} else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
1174 #endif /* !MALI_USE_CSF */
1175 		pr_debug("GPU_IRQ_MASK set to 0x%x", value);
1176 		dummy->reset_completed_mask = (value >> 8) & 0x01;
1177 		dummy->power_changed_mask = (value >> 9) & 0x03;
1178 		dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u;
1179 	} else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) {
1180 		dummy->coherency_enable = value;
1181 	} else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) {
1182 		if (value & (1 << 8)) {
1183 			pr_debug("%s", "gpu RESET_COMPLETED irq cleared");
1184 			dummy->reset_completed = 0;
1185 		}
1186 		if (value & (3 << 9))
1187 			dummy->power_changed = 0;
1188 
1189 		if (value & (1 << 17))
1190 			dummy->clean_caches_completed = false;
1191 		if (value & (1 << 16))
1192 			dummy->prfcnt_sample_completed = 0;
1193 
1194 		/*update error status */
1195 		hw_error_status.gpu_error_irq &= ~(value);
1196 	} else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) {
1197 		switch (value) {
1198 		case GPU_COMMAND_SOFT_RESET:
1199 		case GPU_COMMAND_HARD_RESET:
1200 			pr_debug("gpu reset (%d) requested", value);
1201 			/* no more fault status */
1202 			hw_error_status.gpu_fault_status = 0;
1203 			/* completed reset instantly */
1204 			dummy->reset_completed = 1;
1205 			break;
1206 #if MALI_USE_CSF
1207 		case GPU_COMMAND_CACHE_CLN_INV_L2:
1208 		case GPU_COMMAND_CACHE_CLN_INV_L2_LSC:
1209 		case GPU_COMMAND_CACHE_CLN_INV_FULL:
1210 #else
1211 		case GPU_COMMAND_CLEAN_CACHES:
1212 		case GPU_COMMAND_CLEAN_INV_CACHES:
1213 #endif
1214 			pr_debug("clean caches requested");
1215 			dummy->clean_caches_completed = true;
1216 			break;
1217 		case GPU_COMMAND_PRFCNT_SAMPLE:
1218 			midgard_model_dump_prfcnt();
1219 			dummy->prfcnt_sample_completed = 1;
1220 		default:
1221 			break;
1222 		}
1223 	} else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
1224 		dummy->l2_config = value;
1225 	}
1226 #if MALI_USE_CSF
1227 	else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) &&
1228 			 addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET +
1229 						(CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) {
1230 		if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET))
1231 			hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF;
1232 	} else if (addr == IPA_CONTROL_REG(COMMAND)) {
1233 		pr_debug("Received IPA_CONTROL command");
1234 	} else if (addr == IPA_CONTROL_REG(TIMER)) {
1235 		ipa_control_timer_enabled = value ? true : false;
1236 	} else if ((addr >= IPA_CONTROL_REG(SELECT_CSHW_LO)) &&
1237 		   (addr <= IPA_CONTROL_REG(SELECT_SHADER_HI))) {
1238 		enum kbase_ipa_core_type core_type = (enum kbase_ipa_core_type)(
1239 			(addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) >> 3);
1240 		bool is_low_word =
1241 			!((addr - IPA_CONTROL_REG(SELECT_CSHW_LO)) & 7);
1242 
1243 		if (is_low_word) {
1244 			ipa_ctl_select_config[core_type] &= ~(u64)U32_MAX;
1245 			ipa_ctl_select_config[core_type] |= value;
1246 		} else {
1247 			ipa_ctl_select_config[core_type] &= U32_MAX;
1248 			ipa_ctl_select_config[core_type] |= ((u64)value << 32);
1249 		}
1250 	}
1251 #endif
1252 	else if (addr == MMU_REG(MMU_IRQ_MASK)) {
1253 		hw_error_status.mmu_irq_mask = value;
1254 	} else if (addr == MMU_REG(MMU_IRQ_CLEAR)) {
1255 		hw_error_status.mmu_irq_rawstat &= (~value);
1256 	} else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) &&
1257 			(addr <= MMU_AS_REG(15, AS_STATUS))) {
1258 		int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
1259 									>> 6;
1260 
1261 		switch (addr & 0x3F) {
1262 		case AS_COMMAND:
1263 			switch (value) {
1264 			case AS_COMMAND_NOP:
1265 				hw_error_status.as_command[mem_addr_space] =
1266 									value;
1267 				break;
1268 
1269 			case AS_COMMAND_UPDATE:
1270 				hw_error_status.as_command[mem_addr_space] =
1271 									value;
1272 				if ((hw_error_status.as_faultstatus[
1273 								mem_addr_space])
1274 					&& ((hw_error_status.as_transtab[
1275 						mem_addr_space] & 0x3) != 0)) {
1276 					model_error_log(KBASE_CORE,
1277 					"\n ERROR: AS_COMMAND issued UPDATE on error condition before AS_TRANSTAB been set to unmapped\n"
1278 									);
1279 				} else if ((hw_error_status.as_faultstatus[
1280 								mem_addr_space])
1281 					&& ((hw_error_status.as_transtab[
1282 						mem_addr_space] & 0x3) == 0)) {
1283 
1284 					/*invalidate all active jobs */
1285 					invalidate_active_jobs(dummy);
1286 					/* error handled */
1287 					hw_error_status.as_faultstatus[
1288 							mem_addr_space] = 0;
1289 				}
1290 				break;
1291 
1292 			case AS_COMMAND_LOCK:
1293 			case AS_COMMAND_UNLOCK:
1294 				hw_error_status.as_command[mem_addr_space] =
1295 									value;
1296 				break;
1297 
1298 			case AS_COMMAND_FLUSH_PT:
1299 			case AS_COMMAND_FLUSH_MEM:
1300 				if (hw_error_status.as_command[mem_addr_space]
1301 							!= AS_COMMAND_LOCK)
1302 					model_error_log(KBASE_CORE,
1303 						"\n ERROR: AS_COMMAND issued FLUSH without LOCKING before\n"
1304 									);
1305 				else /* error handled if any */
1306 					hw_error_status.as_faultstatus[
1307 							mem_addr_space] = 0;
1308 				hw_error_status.as_command[mem_addr_space] =
1309 									value;
1310 				break;
1311 
1312 			default:
1313 				model_error_log(KBASE_CORE,
1314 				"\n WARNING: UNRECOGNIZED AS_COMMAND 0x%x\n",
1315 									value);
1316 				break;
1317 			}
1318 			break;
1319 
1320 		case AS_TRANSTAB_LO:
1321 			hw_error_status.as_transtab[mem_addr_space] &=
1322 						~((u64) (0xffffffff));
1323 			hw_error_status.as_transtab[mem_addr_space] |=
1324 						(u64) value;
1325 			break;
1326 
1327 		case AS_TRANSTAB_HI:
1328 			hw_error_status.as_transtab[mem_addr_space] &=
1329 						(u64) 0xffffffff;
1330 			hw_error_status.as_transtab[mem_addr_space] |=
1331 						((u64) value) << 32;
1332 			break;
1333 
1334 		case AS_LOCKADDR_LO:
1335 		case AS_LOCKADDR_HI:
1336 		case AS_MEMATTR_LO:
1337 		case AS_MEMATTR_HI:
1338 		case AS_TRANSCFG_LO:
1339 		case AS_TRANSCFG_HI:
1340 			/* Writes ignored */
1341 			break;
1342 
1343 		default:
1344 			model_error_log(KBASE_CORE,
1345 				"Dummy model register access: Writing unsupported MMU #%d register 0x%x value 0x%x\n",
1346 						mem_addr_space, addr, value);
1347 			break;
1348 		}
1349 	} else if (addr >= GPU_CONTROL_REG(PRFCNT_BASE_LO) &&
1350 			   addr <= GPU_CONTROL_REG(PRFCNT_MMU_L2_EN)) {
1351 		switch (addr) {
1352 		case PRFCNT_BASE_LO:
1353 			performance_counters.prfcnt_base |= value;
1354 			break;
1355 		case PRFCNT_BASE_HI:
1356 			performance_counters.prfcnt_base |= ((u64) value) << 32;
1357 			break;
1358 #if !MALI_USE_CSF
1359 		case PRFCNT_JM_EN:
1360 			performance_counters.prfcnt_en.jm = value;
1361 			break;
1362 #endif /* !MALI_USE_CSF */
1363 		case PRFCNT_SHADER_EN:
1364 			performance_counters.prfcnt_en.shader = value;
1365 			break;
1366 		case PRFCNT_TILER_EN:
1367 			performance_counters.prfcnt_en.tiler = value;
1368 			break;
1369 		case PRFCNT_MMU_L2_EN:
1370 			performance_counters.prfcnt_en.l2 = value;
1371 			break;
1372 		}
1373 	} else {
1374 		switch (addr) {
1375 		case TILER_PWRON_LO:
1376 			dummy->power_on |= (value & 1) << 1;
1377 			/* Also ensure L2 is powered on */
1378 			dummy->power_on |= value & 1;
1379 			dummy->power_changed = 1;
1380 			break;
1381 		case SHADER_PWRON_LO:
1382 			dummy->power_on |= (value & 0xF) << 2;
1383 			dummy->power_changed = 1;
1384 			break;
1385 		case L2_PWRON_LO:
1386 			dummy->power_on |= value & 1;
1387 			dummy->power_changed = 1;
1388 			break;
1389 		case STACK_PWRON_LO:
1390 			dummy->stack_power_on_lo |= value;
1391 			dummy->power_changed = 1;
1392 			break;
1393 		case TILER_PWROFF_LO:
1394 			dummy->power_on &= ~((value & 1) << 1);
1395 			dummy->power_changed = 1;
1396 			break;
1397 		case SHADER_PWROFF_LO:
1398 			dummy->power_on &= ~((value & 0xF) << 2);
1399 			dummy->power_changed = 1;
1400 			break;
1401 		case L2_PWROFF_LO:
1402 			dummy->power_on &= ~(value & 1);
1403 			/* Also ensure tiler is powered off */
1404 			dummy->power_on &= ~((value & 1) << 1);
1405 			dummy->power_changed = 1;
1406 			break;
1407 		case STACK_PWROFF_LO:
1408 			dummy->stack_power_on_lo &= ~value;
1409 			dummy->power_changed = 1;
1410 			break;
1411 
1412 		case TILER_PWROFF_HI:
1413 		case SHADER_PWROFF_HI:
1414 		case L2_PWROFF_HI:
1415 		case PWR_KEY:
1416 		case PWR_OVERRIDE0:
1417 #if !MALI_USE_CSF
1418 		case JM_CONFIG:
1419 #else /* !MALI_USE_CSF */
1420 		case CSF_CONFIG:
1421 #endif /* !MALI_USE_CSF */
1422 		case SHADER_CONFIG:
1423 		case TILER_CONFIG:
1424 		case L2_MMU_CONFIG:
1425 			/* Writes ignored */
1426 			break;
1427 		default:
1428 			model_error_log(KBASE_CORE,
1429 				"Dummy model register access: Writing unsupported register 0x%x value 0x%x\n",
1430 								addr, value);
1431 			break;
1432 		}
1433 	}
1434 
1435 	midgard_model_update(dummy);
1436 	midgard_model_get_outputs(dummy);
1437 
1438 	return 1;
1439 }
1440 
1441 u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
1442 {
1443 	struct dummy_model_t *dummy = (struct dummy_model_t *)h;
1444 	*value = 0;		/* 0 by default */
1445 #if !MALI_USE_CSF
1446 	if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) {
1447 		pr_debug("%s", "JS_ACTIVE being read");
1448 
1449 		*value = dummy->job_irq_js_state;
1450 	} else if (addr == GPU_CONTROL_REG(GPU_ID)) {
1451 #else /* !MALI_USE_CSF */
1452 	if (addr == GPU_CONTROL_REG(GPU_ID)) {
1453 #endif /* !MALI_USE_CSF */
1454 
1455 		*value = dummy->control_reg_values->gpu_id;
1456 	} else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) {
1457 		*value = hw_error_status.job_irq_rawstat;
1458 		pr_debug("%s", "JS_IRQ_RAWSTAT being read");
1459 	} else if (addr == JOB_CONTROL_REG(JOB_IRQ_STATUS)) {
1460 		*value = hw_error_status.job_irq_status;
1461 		pr_debug("JS_IRQ_STATUS being read %x", *value);
1462 	}
1463 #if !MALI_USE_CSF
1464 	else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
1465 		int i;
1466 
1467 		*value = 0;
1468 		for (i = 0; i < NUM_SLOTS; i++)
1469 			*value |= dummy->slots[i].job_irq_mask << i;
1470 		pr_debug("JS_IRQ_MASK being read %x", *value);
1471 	}
1472 #else /* !MALI_USE_CSF */
1473 	else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
1474 		/* ignore JOB_IRQ_MASK as it is handled by CSFFW */
1475 	}
1476 #endif /* !MALI_USE_CSF */
1477 	else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
1478 		*value = (dummy->reset_completed_mask << 8) |
1479 				(dummy->power_changed_mask << 9) | (1 << 7) | 1;
1480 		pr_debug("GPU_IRQ_MASK read %x", *value);
1481 	} else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) {
1482 		*value = (dummy->power_changed << 9) | (dummy->power_changed << 10) |
1483 			 (dummy->reset_completed << 8) |
1484 			 ((dummy->clean_caches_completed ? 1u : 0u) << 17) |
1485 			 (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq;
1486 		pr_debug("GPU_IRQ_RAWSTAT read %x", *value);
1487 	} else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) {
1488 		*value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) |
1489 			 ((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) |
1490 			 ((dummy->reset_completed & dummy->reset_completed_mask) << 8) |
1491 			 (((dummy->clean_caches_completed &&
1492 			    dummy->clean_caches_completed_irq_enabled) ?
1493 				   1u :
1494 				   0u)
1495 			  << 17) |
1496 			 (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq;
1497 		pr_debug("GPU_IRQ_STAT read %x", *value);
1498 	} else if (addr == GPU_CONTROL_REG(GPU_STATUS)) {
1499 		*value = 0;
1500 #if !MALI_USE_CSF
1501 	} else if (addr == GPU_CONTROL_REG(LATEST_FLUSH)) {
1502 		*value = 0;
1503 #endif
1504 	} else if (addr == GPU_CONTROL_REG(GPU_FAULTSTATUS)) {
1505 		*value = hw_error_status.gpu_fault_status;
1506 	} else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
1507 		*value = dummy->l2_config;
1508 	} else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) &&
1509 				(addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) {
1510 		switch (addr) {
1511 		case GPU_CONTROL_REG(SHADER_PRESENT_LO):
1512 		case GPU_CONTROL_REG(SHADER_PRESENT_HI):
1513 		case GPU_CONTROL_REG(TILER_PRESENT_LO):
1514 		case GPU_CONTROL_REG(TILER_PRESENT_HI):
1515 		case GPU_CONTROL_REG(L2_PRESENT_LO):
1516 		case GPU_CONTROL_REG(L2_PRESENT_HI):
1517 		case GPU_CONTROL_REG(STACK_PRESENT_LO):
1518 		case GPU_CONTROL_REG(STACK_PRESENT_HI):
1519 			*value = get_implementation_register(addr);
1520 			break;
1521 		case GPU_CONTROL_REG(SHADER_READY_LO):
1522 			*value = (dummy->power_on >> 0x02) &
1523 			get_implementation_register(
1524 				GPU_CONTROL_REG(SHADER_PRESENT_LO));
1525 			break;
1526 		case GPU_CONTROL_REG(TILER_READY_LO):
1527 			*value = (dummy->power_on >> 0x01) &
1528 				 get_implementation_register(
1529 				GPU_CONTROL_REG(TILER_PRESENT_LO));
1530 			break;
1531 		case GPU_CONTROL_REG(L2_READY_LO):
1532 			*value = dummy->power_on &
1533 				 get_implementation_register(
1534 				GPU_CONTROL_REG(L2_PRESENT_LO));
1535 			break;
1536 		case GPU_CONTROL_REG(STACK_READY_LO):
1537 			*value = dummy->stack_power_on_lo &
1538 				 get_implementation_register(
1539 				GPU_CONTROL_REG(STACK_PRESENT_LO));
1540 			break;
1541 
1542 		case GPU_CONTROL_REG(SHADER_READY_HI):
1543 		case GPU_CONTROL_REG(TILER_READY_HI):
1544 		case GPU_CONTROL_REG(L2_READY_HI):
1545 		case GPU_CONTROL_REG(STACK_READY_HI):
1546 			*value = 0;
1547 			break;
1548 
1549 		case GPU_CONTROL_REG(SHADER_PWRTRANS_LO):
1550 		case GPU_CONTROL_REG(SHADER_PWRTRANS_HI):
1551 		case GPU_CONTROL_REG(TILER_PWRTRANS_LO):
1552 		case GPU_CONTROL_REG(TILER_PWRTRANS_HI):
1553 		case GPU_CONTROL_REG(L2_PWRTRANS_LO):
1554 		case GPU_CONTROL_REG(L2_PWRTRANS_HI):
1555 		case GPU_CONTROL_REG(STACK_PWRTRANS_LO):
1556 		case GPU_CONTROL_REG(STACK_PWRTRANS_HI):
1557 			*value = 0;
1558 			break;
1559 
1560 		case GPU_CONTROL_REG(SHADER_PWRACTIVE_LO):
1561 		case GPU_CONTROL_REG(SHADER_PWRACTIVE_HI):
1562 		case GPU_CONTROL_REG(TILER_PWRACTIVE_LO):
1563 		case GPU_CONTROL_REG(TILER_PWRACTIVE_HI):
1564 		case GPU_CONTROL_REG(L2_PWRACTIVE_LO):
1565 		case GPU_CONTROL_REG(L2_PWRACTIVE_HI):
1566 			*value = 0;
1567 			break;
1568 
1569 #if !MALI_USE_CSF
1570 		case GPU_CONTROL_REG(JM_CONFIG):
1571 #else /* !MALI_USE_CSF */
1572 		case GPU_CONTROL_REG(CSF_CONFIG):
1573 #endif /* !MALI_USE_CSF */
1574 
1575 		case GPU_CONTROL_REG(SHADER_CONFIG):
1576 		case GPU_CONTROL_REG(TILER_CONFIG):
1577 		case GPU_CONTROL_REG(L2_MMU_CONFIG):
1578 			*value = 0;
1579 			break;
1580 
1581 		case GPU_CONTROL_REG(COHERENCY_FEATURES):
1582 			*value = BIT(0) | BIT(1); /* ace_lite and ace, respectively. */
1583 			break;
1584 		case GPU_CONTROL_REG(COHERENCY_ENABLE):
1585 			*value = dummy->coherency_enable;
1586 			break;
1587 
1588 		case GPU_CONTROL_REG(THREAD_TLS_ALLOC):
1589 			*value = 0;
1590 			break;
1591 
1592 		default:
1593 			model_error_log(KBASE_CORE,
1594 					"Dummy model register access: Reading unknown control reg 0x%x\n",
1595 									addr);
1596 			break;
1597 		}
1598 #if !MALI_USE_CSF
1599 	} else if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
1600 			(addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
1601 		int slot_idx = (addr >> 7) & 0xf;
1602 		int sub_reg = addr & 0x7F;
1603 
1604 		KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS);
1605 		switch (sub_reg) {
1606 		case JS_HEAD_NEXT_LO:
1607 			*value = (u32) ((hw_error_status.current_jc) &
1608 								0xFFFFFFFF);
1609 			break;
1610 		case JS_HEAD_NEXT_HI:
1611 			*value = (u32) (hw_error_status.current_jc >> 32);
1612 			break;
1613 		case JS_STATUS:
1614 			if (hw_error_status.js_status[slot_idx])
1615 				*value = hw_error_status.js_status[slot_idx];
1616 			else /* 0x08 means active, 0x00 idle */
1617 				*value = (dummy->slots[slot_idx].job_active)
1618 									<< 3;
1619 			break;
1620 		case JS_COMMAND_NEXT:
1621 			*value = dummy->slots[slot_idx].job_queued;
1622 			break;
1623 
1624 		/* The dummy model does not implement these registers
1625 		 * avoid printing error messages
1626 		 */
1627 		case JS_HEAD_HI:
1628 		case JS_HEAD_LO:
1629 		case JS_TAIL_HI:
1630 		case JS_TAIL_LO:
1631 		case JS_FLUSH_ID_NEXT:
1632 			break;
1633 
1634 		default:
1635 			model_error_log(KBASE_CORE,
1636 				"Dummy model register access: unknown job slot reg 0x%02X being read\n",
1637 								sub_reg);
1638 			break;
1639 		}
1640 #endif /* !MALI_USE_CSF */
1641 	} else if (addr == GPU_CONTROL_REG(AS_PRESENT)) {
1642 		*value = dummy->control_reg_values->as_present;
1643 #if !MALI_USE_CSF
1644 	} else if (addr == GPU_CONTROL_REG(JS_PRESENT)) {
1645 		*value = 0x7;
1646 #endif /* !MALI_USE_CSF */
1647 	} else if (addr >= GPU_CONTROL_REG(TEXTURE_FEATURES_0) &&
1648 				addr <= GPU_CONTROL_REG(TEXTURE_FEATURES_3)) {
1649 		switch (addr) {
1650 		case GPU_CONTROL_REG(TEXTURE_FEATURES_0):
1651 			*value = 0xfffff;
1652 			break;
1653 
1654 		case GPU_CONTROL_REG(TEXTURE_FEATURES_1):
1655 			*value = 0xffff;
1656 			break;
1657 
1658 		case GPU_CONTROL_REG(TEXTURE_FEATURES_2):
1659 			*value = 0x9f81ffff;
1660 			break;
1661 
1662 		case GPU_CONTROL_REG(TEXTURE_FEATURES_3):
1663 			*value = 0;
1664 			break;
1665 		}
1666 #if !MALI_USE_CSF
1667 	} else if (addr >= GPU_CONTROL_REG(JS0_FEATURES) &&
1668 				addr <= GPU_CONTROL_REG(JS15_FEATURES)) {
1669 		switch (addr) {
1670 		case GPU_CONTROL_REG(JS0_FEATURES):
1671 			*value = 0x20e;
1672 			break;
1673 
1674 		case GPU_CONTROL_REG(JS1_FEATURES):
1675 			*value = 0x1fe;
1676 			break;
1677 
1678 		case GPU_CONTROL_REG(JS2_FEATURES):
1679 			*value = 0x7e;
1680 			break;
1681 
1682 		default:
1683 			*value = 0;
1684 			break;
1685 		}
1686 #endif /* !MALI_USE_CSF */
1687 	} else if (addr >= GPU_CONTROL_REG(L2_FEATURES)
1688 				&& addr <= GPU_CONTROL_REG(MMU_FEATURES)) {
1689 		switch (addr) {
1690 		case GPU_CONTROL_REG(L2_FEATURES):
1691 			*value = 0x6100206;
1692 			break;
1693 
1694 		case GPU_CONTROL_REG(CORE_FEATURES):
1695 			*value = dummy->control_reg_values->core_features;
1696 			break;
1697 
1698 		case GPU_CONTROL_REG(TILER_FEATURES):
1699 			*value = dummy->control_reg_values->tiler_features;
1700 			break;
1701 
1702 		case GPU_CONTROL_REG(MEM_FEATURES):
1703 			/* Bit 0: Core group is coherent */
1704 			*value = 0x01;
1705 			/* Bits 11:8: L2 slice count - 1 */
1706 			*value |= (hweight64(DUMMY_IMPLEMENTATION_L2_PRESENT) - 1) << 8;
1707 			break;
1708 
1709 		case GPU_CONTROL_REG(MMU_FEATURES):
1710 			*value = dummy->control_reg_values->mmu_features;
1711 			break;
1712 		}
1713 	} else if (addr >= GPU_CONTROL_REG(THREAD_MAX_THREADS)
1714 				&& addr <= GPU_CONTROL_REG(THREAD_FEATURES)) {
1715 		switch (addr) {
1716 		case GPU_CONTROL_REG(THREAD_FEATURES):
1717 			*value = dummy->control_reg_values->thread_features
1718 					| (IMPLEMENTATION_MODEL << 30);
1719 			break;
1720 		case GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE):
1721 			*value = dummy->control_reg_values->thread_max_barrier_size;
1722 			break;
1723 		case GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE):
1724 			*value = dummy->control_reg_values->thread_max_workgroup_size;
1725 			break;
1726 		case GPU_CONTROL_REG(THREAD_MAX_THREADS):
1727 			*value = dummy->control_reg_values->thread_max_threads;
1728 			break;
1729 		}
1730 	} else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO)
1731 				&& addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) {
1732 		*value = 0;
1733 	} else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)
1734 				&& addr <= MMU_AS_REG(15, AS_STATUS)) {
1735 		int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
1736 									>> 6;
1737 
1738 		switch (addr & 0x3F) {
1739 		case AS_TRANSTAB_LO:
1740 			*value = (u32)
1741 				(hw_error_status.as_transtab[mem_addr_space] &
1742 								0xffffffff);
1743 			break;
1744 
1745 		case AS_TRANSTAB_HI:
1746 			*value = (u32)
1747 				(hw_error_status.as_transtab[mem_addr_space] >>
1748 									32);
1749 			break;
1750 
1751 		case AS_STATUS:
1752 			*value = 0;
1753 			break;
1754 
1755 		case AS_FAULTSTATUS:
1756 			if (mem_addr_space == hw_error_status.faulty_mmu_as)
1757 				*value = hw_error_status.as_faultstatus[
1758 						hw_error_status.faulty_mmu_as];
1759 			else
1760 				*value = 0;
1761 			break;
1762 
1763 		case AS_LOCKADDR_LO:
1764 		case AS_LOCKADDR_HI:
1765 		case AS_MEMATTR_LO:
1766 		case AS_MEMATTR_HI:
1767 		case AS_TRANSCFG_LO:
1768 		case AS_TRANSCFG_HI:
1769 			/* Read ignored */
1770 			*value = 0;
1771 			break;
1772 
1773 		default:
1774 			model_error_log(KBASE_CORE,
1775 					"Dummy model register access: Reading unsupported MMU #%d register 0x%x. Returning 0\n",
1776 							mem_addr_space, addr);
1777 			*value = 0;
1778 			break;
1779 		}
1780 	} else if (addr == MMU_REG(MMU_IRQ_MASK)) {
1781 		*value = hw_error_status.mmu_irq_mask;
1782 	} else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) {
1783 		*value = hw_error_status.mmu_irq_rawstat;
1784 	} else if (addr == MMU_REG(MMU_IRQ_STATUS)) {
1785 		*value = hw_error_status.mmu_irq_mask &
1786 						hw_error_status.mmu_irq_rawstat;
1787 	}
1788 #if MALI_USE_CSF
1789 	else if (addr == IPA_CONTROL_REG(STATUS)) {
1790 		*value = (ipa_control_timer_enabled << 31);
1791 	} else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) &&
1792 		   (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(
1793 				    IPA_CTL_MAX_VAL_CNT_IDX)))) {
1794 		u32 counter_index =
1795 			(addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3;
1796 		bool is_low_word =
1797 			!((addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) & 7);
1798 
1799 		*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW,
1800 						    counter_index, is_low_word);
1801 	} else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) &&
1802 		   (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(
1803 				    IPA_CTL_MAX_VAL_CNT_IDX)))) {
1804 		u32 counter_index =
1805 			(addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3;
1806 		bool is_low_word =
1807 			!((addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) & 7);
1808 
1809 		*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS,
1810 						    counter_index, is_low_word);
1811 	} else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) &&
1812 		   (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(
1813 				    IPA_CTL_MAX_VAL_CNT_IDX)))) {
1814 		u32 counter_index =
1815 			(addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3;
1816 		bool is_low_word =
1817 			!((addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) & 7);
1818 
1819 		*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER,
1820 						    counter_index, is_low_word);
1821 	} else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) &&
1822 		   (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(
1823 				    IPA_CTL_MAX_VAL_CNT_IDX)))) {
1824 		u32 counter_index =
1825 			(addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3;
1826 		bool is_low_word =
1827 			!((addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) & 7);
1828 
1829 		*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER,
1830 						    counter_index, is_low_word);
1831 	}
1832 #endif
1833 	else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) {
1834 		*value = dummy->control_reg_values->gpu_features_lo;
1835 	} else if (addr == GPU_CONTROL_REG(GPU_FEATURES_HI)) {
1836 		*value = dummy->control_reg_values->gpu_features_hi;
1837 	} else {
1838 		model_error_log(KBASE_CORE,
1839 			"Dummy model register access: Reading unsupported register 0x%x. Returning 0\n",
1840 									addr);
1841 		*value = 0;
1842 	}
1843 
1844 	CSTD_UNUSED(dummy);
1845 
1846 	return 1;
1847 }
1848 
1849 static u32 set_user_sample_core_type(u64 *counters,
1850 	u32 *usr_data_start, u32 usr_data_offset,
1851 	u32 usr_data_size, u32 core_count)
1852 {
1853 	u32 sample_size;
1854 	u32 *usr_data = NULL;
1855 
1856 	sample_size =
1857 		core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32);
1858 
1859 	if ((usr_data_size >= usr_data_offset) &&
1860 	    (sample_size <= usr_data_size - usr_data_offset))
1861 		usr_data = usr_data_start + (usr_data_offset / sizeof(u32));
1862 
1863 	if (!usr_data)
1864 		model_error_log(KBASE_CORE, "Unable to set counter sample 1");
1865 	else {
1866 		u32 loop_cnt = core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
1867 		u32 i;
1868 
1869 		for (i = 0; i < loop_cnt; i++) {
1870 			if (copy_from_user(&counters[i], &usr_data[i],
1871 					   sizeof(u32))) {
1872 				model_error_log(KBASE_CORE, "Unable to set counter sample 2");
1873 				break;
1874 			}
1875 		}
1876 	}
1877 
1878 	return usr_data_offset + sample_size;
1879 }
1880 
1881 static u32 set_kernel_sample_core_type(u64 *counters,
1882 	u64 *usr_data_start, u32 usr_data_offset,
1883 	u32 usr_data_size, u32 core_count)
1884 {
1885 	u32 sample_size;
1886 	u64 *usr_data = NULL;
1887 
1888 	sample_size =
1889 		core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64);
1890 
1891 	if ((usr_data_size >= usr_data_offset) &&
1892 	    (sample_size <= usr_data_size - usr_data_offset))
1893 		usr_data = usr_data_start + (usr_data_offset / sizeof(u64));
1894 
1895 	if (!usr_data)
1896 		model_error_log(KBASE_CORE, "Unable to set kernel counter sample 1");
1897 	else
1898 		memcpy(counters, usr_data, sample_size);
1899 
1900 	return usr_data_offset + sample_size;
1901 }
1902 
1903 /* Counter values injected through ioctl are of 32 bits */
1904 void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size)
1905 {
1906 	u32 offset = 0;
1907 
1908 #if !MALI_USE_CSF
1909 	offset = set_user_sample_core_type(performance_counters.jm_counters,
1910 		usr_data, offset, usr_data_size, 1);
1911 #else
1912 	offset = set_user_sample_core_type(performance_counters.cshw_counters,
1913 		usr_data, offset, usr_data_size, 1);
1914 #endif /* !MALI_USE_CSF */
1915 	offset = set_user_sample_core_type(performance_counters.tiler_counters,
1916 		usr_data, offset, usr_data_size,
1917 		hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
1918 	offset = set_user_sample_core_type(performance_counters.l2_counters,
1919 		usr_data, offset, usr_data_size,
1920 		KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS);
1921 	offset = set_user_sample_core_type(performance_counters.shader_counters,
1922 		usr_data, offset, usr_data_size,
1923 		KBASE_DUMMY_MODEL_MAX_SHADER_CORES);
1924 }
1925 
1926 /* Counter values injected through kutf are of 64 bits */
1927 void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size)
1928 {
1929 	u32 offset = 0;
1930 
1931 #if !MALI_USE_CSF
1932 	offset = set_kernel_sample_core_type(performance_counters.jm_counters,
1933 		usr_data, offset, usr_data_size, 1);
1934 #else
1935 	offset = set_kernel_sample_core_type(performance_counters.cshw_counters,
1936 		usr_data, offset, usr_data_size, 1);
1937 #endif /* !MALI_USE_CSF */
1938 	offset = set_kernel_sample_core_type(performance_counters.tiler_counters,
1939 		usr_data, offset, usr_data_size,
1940 		hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
1941 	offset = set_kernel_sample_core_type(performance_counters.l2_counters,
1942 		usr_data, offset, usr_data_size,
1943 		hweight64(performance_counters.l2_present));
1944 	offset = set_kernel_sample_core_type(performance_counters.shader_counters,
1945 		usr_data, offset, usr_data_size,
1946 		hweight64(performance_counters.shader_present));
1947 }
1948 KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample);
1949 
1950 void gpu_model_get_dummy_prfcnt_cores(struct kbase_device *kbdev,
1951 		u64 *l2_present, u64 *shader_present)
1952 {
1953 	if (shader_present)
1954 		*shader_present = performance_counters.shader_present;
1955 	if (l2_present)
1956 		*l2_present = performance_counters.l2_present;
1957 }
1958 KBASE_EXPORT_TEST_API(gpu_model_get_dummy_prfcnt_cores);
1959 
1960 void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev,
1961 		u64 l2_present, u64 shader_present)
1962 {
1963 	if (WARN_ON(!l2_present || !shader_present
1964 			|| hweight64(l2_present) > KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS
1965 			|| hweight64(shader_present) > KBASE_DUMMY_MODEL_MAX_SHADER_CORES))
1966 		return;
1967 
1968 	performance_counters.l2_present = l2_present;
1969 	performance_counters.shader_present = shader_present;
1970 
1971 	/* Update the GPU properties used by vinstr to calculate the counter
1972 	 * dump buffer size.
1973 	 */
1974 	kbdev->gpu_props.props.l2_props.num_l2_slices = hweight64(l2_present);
1975 	kbdev->gpu_props.props.coherency_info.group[0].core_mask = shader_present;
1976 	kbdev->gpu_props.curr_config.l2_slices = hweight64(l2_present);
1977 	kbdev->gpu_props.curr_config.shader_present = shader_present;
1978 }
1979 KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores);
1980 
1981 void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev,
1982 					 struct tagged_addr *pages,
1983 					 size_t page_count)
1984 {
1985 	performance_counters.prfcnt_base_cpu = base;
1986 	performance_counters.kbdev = kbdev;
1987 	performance_counters.pages = pages;
1988 	performance_counters.page_count = page_count;
1989 }
1990 
1991 int gpu_model_control(void *model,
1992 				struct kbase_model_control_params *params)
1993 {
1994 	struct dummy_model_t *dummy = (struct dummy_model_t *)model;
1995 	int i;
1996 
1997 	if (params->command == KBASE_MC_DISABLE_JOBS) {
1998 		for (i = 0; i < NUM_SLOTS; i++)
1999 			dummy->slots[i].job_disabled = params->value;
2000 	} else {
2001 		return -EINVAL;
2002 	}
2003 
2004 	midgard_model_update(dummy);
2005 	midgard_model_get_outputs(dummy);
2006 
2007 	return 0;
2008 }
2009