• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 /*
23  * Base kernel property query APIs
24  */
25 
26 #include <mali_kbase.h>
27 #include <gpu/mali_kbase_gpu_regmap.h>
28 #include <mali_kbase_gpuprops.h>
29 #include <mali_kbase_hwaccess_gpuprops.h>
30 #include <mali_kbase_config_defaults.h>
31 #include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
32 #include <linux/clk.h>
33 #include <backend/gpu/mali_kbase_pm_internal.h>
34 #include <linux/of_platform.h>
35 #include <linux/moduleparam.h>
36 
37 
kbase_gpuprops_construct_coherent_groups(struct base_gpu_props * const props)38 static void kbase_gpuprops_construct_coherent_groups(
39 	struct base_gpu_props * const props)
40 {
41 	struct mali_base_gpu_coherent_group *current_group;
42 	u64 group_present;
43 	u64 group_mask;
44 	u64 first_set, first_set_prev;
45 	u32 num_groups = 0;
46 
47 	KBASE_DEBUG_ASSERT(props != NULL);
48 
49 	props->coherency_info.coherency = props->raw_props.mem_features;
50 	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
51 
52 	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
53 		/* Group is l2 coherent */
54 		group_present = props->raw_props.l2_present;
55 	} else {
56 		/* Group is l1 coherent */
57 		group_present = props->raw_props.shader_present;
58 	}
59 
60 	/*
61 	 * The coherent group mask can be computed from the l2 present
62 	 * register.
63 	 *
64 	 * For the coherent group n:
65 	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
66 	 * where first_set is group_present with only its nth set-bit kept
67 	 * (i.e. the position from where a new group starts).
68 	 *
69 	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
70 	 * The first mask is:
71 	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
72 	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
73 	 *               =  0x0..00f
74 	 * The second mask is:
75 	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
76 	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
77 	 *               =  0x0..0f0
78 	 * And so on until all the bits from group_present have been cleared
79 	 * (i.e. there is no group left).
80 	 */
81 
82 	current_group = props->coherency_info.group;
83 	first_set = group_present & ~(group_present - 1);
84 
85 	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
86 		group_present -= first_set;	/* Clear the current group bit */
87 		first_set_prev = first_set;
88 
89 		first_set = group_present & ~(group_present - 1);
90 		group_mask = (first_set - 1) & ~(first_set_prev - 1);
91 
92 		/* Populate the coherent_group structure for each group */
93 		current_group->core_mask = group_mask & props->raw_props.shader_present;
94 		current_group->num_cores = hweight64(current_group->core_mask);
95 
96 		num_groups++;
97 		current_group++;
98 	}
99 
100 	if (group_present != 0)
101 		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
102 
103 	props->coherency_info.num_groups = num_groups;
104 }
105 
106 /**
107  * kbase_gpuprops_get_curr_config_props - Get the current allocated resources
108  * @kbdev:       The &struct kbase_device structure for the device
109  * @curr_config: The &struct curr_config_props structure to receive the result
110  *
111  * Fill the &struct curr_config_props structure with values from the GPU
112  * configuration registers.
113  *
114  * Return: Zero on success, Linux error code on failure
115  */
kbase_gpuprops_get_curr_config_props(struct kbase_device * kbdev,struct curr_config_props * const curr_config)116 int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev,
117 	struct curr_config_props * const curr_config)
118 {
119 	struct kbase_current_config_regdump curr_config_regdump;
120 	int err;
121 
122 	if (WARN_ON(!kbdev) || WARN_ON(!curr_config))
123 		return -EINVAL;
124 
125 	/* If update not needed just return. */
126 	if (!curr_config->update_needed)
127 		return 0;
128 
129 	/* Dump relevant registers */
130 	err = kbase_backend_gpuprops_get_curr_config(kbdev,
131 						     &curr_config_regdump);
132 	if (err)
133 		return err;
134 
135 	curr_config->l2_slices =
136 		KBASE_UBFX32(curr_config_regdump.mem_features, 8U, 4) + 1;
137 
138 	curr_config->l2_present =
139 		((u64) curr_config_regdump.l2_present_hi << 32) +
140 		curr_config_regdump.l2_present_lo;
141 
142 	curr_config->shader_present =
143 		((u64) curr_config_regdump.shader_present_hi << 32) +
144 		curr_config_regdump.shader_present_lo;
145 
146 	curr_config->num_cores = hweight64(curr_config->shader_present);
147 
148 	curr_config->update_needed = false;
149 
150 	return 0;
151 }
152 
153 /**
154  * kbase_gpuprops_req_curr_config_update - Request Current Config Update
155  * @kbdev: The &struct kbase_device structure for the device
156  *
157  * Requests the current configuration to be updated next time the
158  * kbase_gpuprops_get_curr_config_props() is called.
159  *
160  * Return: Zero on success, Linux error code on failure
161  */
kbase_gpuprops_req_curr_config_update(struct kbase_device * kbdev)162 int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev)
163 {
164 	if (WARN_ON(!kbdev))
165 		return -EINVAL;
166 
167 	kbdev->gpu_props.curr_config.update_needed = true;
168 	return 0;
169 }
170 
171 /**
172  * kbase_gpuprops_get_props - Get the GPU configuration
173  * @gpu_props: The &struct base_gpu_props structure
174  * @kbdev: The &struct kbase_device structure for the device
175  *
176  * Fill the &struct base_gpu_props structure with values from the GPU
177  * configuration registers. Only the raw properties are filled in this function.
178  *
179  * Return: Zero on success, Linux error code on failure
180  */
kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props,struct kbase_device * kbdev)181 static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props,
182 	struct kbase_device *kbdev)
183 {
184 	struct kbase_gpuprops_regdump regdump;
185 	int i;
186 	int err;
187 
188 	KBASE_DEBUG_ASSERT(kbdev != NULL);
189 	KBASE_DEBUG_ASSERT(gpu_props != NULL);
190 
191 	/* Dump relevant registers */
192 	err = kbase_backend_gpuprops_get(kbdev, &regdump);
193 	if (err)
194 		return err;
195 
196 	gpu_props->raw_props.gpu_id = regdump.gpu_id;
197 	gpu_props->raw_props.tiler_features = regdump.tiler_features;
198 	gpu_props->raw_props.mem_features = regdump.mem_features;
199 	gpu_props->raw_props.mmu_features = regdump.mmu_features;
200 	gpu_props->raw_props.l2_features = regdump.l2_features;
201 	gpu_props->raw_props.core_features = regdump.core_features;
202 
203 	gpu_props->raw_props.as_present = regdump.as_present;
204 	gpu_props->raw_props.js_present = regdump.js_present;
205 	gpu_props->raw_props.shader_present =
206 		((u64) regdump.shader_present_hi << 32) +
207 		regdump.shader_present_lo;
208 	gpu_props->raw_props.tiler_present =
209 		((u64) regdump.tiler_present_hi << 32) +
210 		regdump.tiler_present_lo;
211 	gpu_props->raw_props.l2_present =
212 		((u64) regdump.l2_present_hi << 32) +
213 		regdump.l2_present_lo;
214 	gpu_props->raw_props.stack_present =
215 		((u64) regdump.stack_present_hi << 32) +
216 		regdump.stack_present_lo;
217 
218 	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
219 		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
220 
221 	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
222 		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
223 
224 	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
225 	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
226 	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
227 	gpu_props->raw_props.thread_features = regdump.thread_features;
228 	gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc;
229 
230 	gpu_props->raw_props.gpu_features =
231 		((u64) regdump.gpu_features_hi << 32) +
232 		regdump.gpu_features_lo;
233 
234 	return 0;
235 }
236 
kbase_gpuprops_update_core_props_gpu_id(struct base_gpu_props * const gpu_props)237 void kbase_gpuprops_update_core_props_gpu_id(
238 	struct base_gpu_props * const gpu_props)
239 {
240 	gpu_props->core_props.version_status =
241 		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
242 	gpu_props->core_props.minor_revision =
243 		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
244 	gpu_props->core_props.major_revision =
245 		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
246 	gpu_props->core_props.product_id =
247 		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
248 }
249 
250 /**
251  * kbase_gpuprops_update_max_config_props - Updates the max config properties in
252  * the base_gpu_props.
253  * @base_props: The &struct base_gpu_props structure
254  * @kbdev:      The &struct kbase_device structure for the device
255  *
256  * Updates the &struct base_gpu_props structure with the max config properties.
257  */
kbase_gpuprops_update_max_config_props(struct base_gpu_props * const base_props,struct kbase_device * kbdev)258 static void kbase_gpuprops_update_max_config_props(
259 	struct base_gpu_props * const base_props, struct kbase_device *kbdev)
260 {
261 	int l2_n = 0;
262 
263 	if (WARN_ON(!kbdev) || WARN_ON(!base_props))
264 		return;
265 
266 	/* return if the max_config is not set during arbif initialization */
267 	if (kbdev->gpu_props.max_config.core_mask == 0)
268 		return;
269 
270 	/*
271 	 * Set the base_props with the maximum config values to ensure that the
272 	 * user space will always be based on the maximum resources available.
273 	 */
274 	base_props->l2_props.num_l2_slices =
275 		kbdev->gpu_props.max_config.l2_slices;
276 	base_props->raw_props.shader_present =
277 		kbdev->gpu_props.max_config.core_mask;
278 	/*
279 	 * Update l2_present in the raw data to be consistent with the
280 	 * max_config.l2_slices number.
281 	 */
282 	base_props->raw_props.l2_present = 0;
283 	for (l2_n = 0; l2_n < base_props->l2_props.num_l2_slices; l2_n++) {
284 		base_props->raw_props.l2_present <<= 1;
285 		base_props->raw_props.l2_present |= 0x1;
286 	}
287 	/*
288 	 * Update the coherency_info data using just one core group. For
289 	 * architectures where the max_config is provided by the arbiter it is
290 	 * not necessary to split the shader core groups in different coherent
291 	 * groups.
292 	 */
293 	base_props->coherency_info.coherency =
294 		base_props->raw_props.mem_features;
295 	base_props->coherency_info.num_core_groups = 1;
296 	base_props->coherency_info.num_groups = 1;
297 	base_props->coherency_info.group[0].core_mask =
298 		kbdev->gpu_props.max_config.core_mask;
299 	base_props->coherency_info.group[0].num_cores =
300 		hweight32(kbdev->gpu_props.max_config.core_mask);
301 }
302 
303 /**
304  * kbase_gpuprops_calculate_props - Calculate the derived properties
305  * @gpu_props: The &struct base_gpu_props structure
306  * @kbdev:     The &struct kbase_device structure for the device
307  *
308  * Fill the &struct base_gpu_props structure with values derived from the GPU
309  * configuration registers
310  */
kbase_gpuprops_calculate_props(struct base_gpu_props * const gpu_props,struct kbase_device * kbdev)311 static void kbase_gpuprops_calculate_props(
312 	struct base_gpu_props * const gpu_props, struct kbase_device *kbdev)
313 {
314 	int i;
315 	u32 gpu_id;
316 
317 	/* Populate the base_gpu_props structure */
318 	kbase_gpuprops_update_core_props_gpu_id(gpu_props);
319 	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
320 #if KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE
321 	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
322 #else
323 	gpu_props->core_props.gpu_available_memory_size =
324 		totalram_pages() << PAGE_SHIFT;
325 #endif
326 
327 	gpu_props->core_props.num_exec_engines =
328 		KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4);
329 
330 	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
331 		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
332 
333 	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
334 	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
335 
336 	/* Field with number of l2 slices is added to MEM_FEATURES register
337 	 * since t76x. Below code assumes that for older GPU reserved bits will
338 	 * be read as zero.
339 	 */
340 	gpu_props->l2_props.num_l2_slices =
341 		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
342 
343 	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
344 	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
345 
346 	if (gpu_props->raw_props.thread_max_threads == 0)
347 		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
348 	else
349 		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
350 
351 	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
352 		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
353 	else
354 		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
355 
356 	if (gpu_props->raw_props.thread_max_barrier_size == 0)
357 		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
358 	else
359 		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
360 
361 	if (gpu_props->raw_props.thread_tls_alloc == 0)
362 		gpu_props->thread_props.tls_alloc =
363 				gpu_props->thread_props.max_threads;
364 	else
365 		gpu_props->thread_props.tls_alloc =
366 				gpu_props->raw_props.thread_tls_alloc;
367 
368 	/* MIDHARC-2364 was intended for tULx.
369 	 * Workaround for the incorrectly applied THREAD_FEATURES to tDUx.
370 	 */
371 	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
372 
373 #if MALI_USE_CSF
374 	CSTD_UNUSED(gpu_id);
375 	gpu_props->thread_props.max_registers =
376 		KBASE_UBFX32(gpu_props->raw_props.thread_features,
377 			     0U, 22);
378 	gpu_props->thread_props.impl_tech =
379 		KBASE_UBFX32(gpu_props->raw_props.thread_features,
380 			     22U, 2);
381 	gpu_props->thread_props.max_task_queue =
382 		KBASE_UBFX32(gpu_props->raw_props.thread_features,
383 			     24U, 8);
384 	gpu_props->thread_props.max_thread_group_split = 0;
385 #else
386 	if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TDUX) {
387 		gpu_props->thread_props.max_registers =
388 			KBASE_UBFX32(gpu_props->raw_props.thread_features,
389 				     0U, 22);
390 		gpu_props->thread_props.impl_tech =
391 			KBASE_UBFX32(gpu_props->raw_props.thread_features,
392 				     22U, 2);
393 		gpu_props->thread_props.max_task_queue =
394 			KBASE_UBFX32(gpu_props->raw_props.thread_features,
395 				     24U, 8);
396 		gpu_props->thread_props.max_thread_group_split = 0;
397 	} else {
398 		gpu_props->thread_props.max_registers =
399 			KBASE_UBFX32(gpu_props->raw_props.thread_features,
400 				     0U, 16);
401 		gpu_props->thread_props.max_task_queue =
402 			KBASE_UBFX32(gpu_props->raw_props.thread_features,
403 				     16U, 8);
404 		gpu_props->thread_props.max_thread_group_split =
405 			KBASE_UBFX32(gpu_props->raw_props.thread_features,
406 				     24U, 6);
407 		gpu_props->thread_props.impl_tech =
408 			KBASE_UBFX32(gpu_props->raw_props.thread_features,
409 				     30U, 2);
410 	}
411 #endif
412 
413 	/* If values are not specified, then use defaults */
414 	if (gpu_props->thread_props.max_registers == 0) {
415 		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
416 		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
417 		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
418 	}
419 
420 	/*
421 	 * If the maximum resources allocated information is available it is
422 	 * necessary to update the base_gpu_props with the max_config info to
423 	 * the userspace. This is applicable to systems that receive this
424 	 * information from the arbiter.
425 	 */
426 	if (kbdev->gpu_props.max_config.core_mask)
427 		/* Update the max config properties in the base_gpu_props */
428 		kbase_gpuprops_update_max_config_props(gpu_props,
429 						       kbdev);
430 	else
431 		/* Initialize the coherent_group structure for each group */
432 		kbase_gpuprops_construct_coherent_groups(gpu_props);
433 }
434 
kbase_gpuprops_set_max_config(struct kbase_device * kbdev,const struct max_config_props * max_config)435 void kbase_gpuprops_set_max_config(struct kbase_device *kbdev,
436 	const struct max_config_props *max_config)
437 {
438 	if (WARN_ON(!kbdev) || WARN_ON(!max_config))
439 		return;
440 
441 	kbdev->gpu_props.max_config.l2_slices = max_config->l2_slices;
442 	kbdev->gpu_props.max_config.core_mask = max_config->core_mask;
443 }
444 
kbase_gpuprops_set(struct kbase_device * kbdev)445 void kbase_gpuprops_set(struct kbase_device *kbdev)
446 {
447 	struct kbase_gpu_props *gpu_props;
448 	struct gpu_raw_gpu_props *raw;
449 
450 	if (WARN_ON(!kbdev))
451 		return;
452 	gpu_props = &kbdev->gpu_props;
453 	raw = &gpu_props->props.raw_props;
454 
455 	/* Initialize the base_gpu_props structure from the hardware */
456 	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
457 
458 	/* Populate the derived properties */
459 	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
460 
461 	/* Populate kbase-only fields */
462 	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
463 	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
464 
465 	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
466 
467 	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
468 	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
469 
470 	gpu_props->num_cores = hweight64(raw->shader_present);
471 	gpu_props->num_core_groups =
472 		gpu_props->props.coherency_info.num_core_groups;
473 	gpu_props->num_address_spaces = hweight32(raw->as_present);
474 	gpu_props->num_job_slots = hweight32(raw->js_present);
475 
476 	/*
477 	 * Current configuration is used on HW interactions so that the maximum
478 	 * config is just used for user space avoiding interactions with parts
479 	 * of the hardware that might not be allocated to the kbase instance at
480 	 * that moment.
481 	 */
482 	kbase_gpuprops_req_curr_config_update(kbdev);
483 	kbase_gpuprops_get_curr_config_props(kbdev, &gpu_props->curr_config);
484 }
485 
kbase_gpuprops_set_features(struct kbase_device * kbdev)486 int kbase_gpuprops_set_features(struct kbase_device *kbdev)
487 {
488 	struct base_gpu_props *gpu_props;
489 	struct kbase_gpuprops_regdump regdump;
490 	int err;
491 
492 	gpu_props = &kbdev->gpu_props.props;
493 
494 	/* Dump relevant registers */
495 	err = kbase_backend_gpuprops_get_features(kbdev, &regdump);
496 	if (err)
497 		return err;
498 
499 	/*
500 	 * Copy the raw value from the register, later this will get turned
501 	 * into the selected coherency mode.
502 	 * Additionally, add non-coherent mode, as this is always supported.
503 	 */
504 	gpu_props->raw_props.coherency_mode = regdump.coherency_features |
505 		COHERENCY_FEATURE_BIT(COHERENCY_NONE);
506 
507 	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT))
508 		gpu_props->thread_props.max_thread_group_split = 0;
509 
510 	return err;
511 }
512 
513 /*
514  * Module parameters to allow the L2 size and hash configuration to be
515  * overridden.
516  *
517  * These parameters must be set on insmod to take effect, and are not visible
518  * in sysfs.
519  */
520 static u8 override_l2_size;
521 module_param(override_l2_size, byte, 0000);
522 MODULE_PARM_DESC(override_l2_size, "Override L2 size config for testing");
523 
524 static u8 override_l2_hash;
525 module_param(override_l2_hash, byte, 0000);
526 MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing");
527 
528 static u32 l2_hash_values[ASN_HASH_COUNT] = {
529 	0,
530 };
531 static int num_override_l2_hash_values;
532 module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000);
533 MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing");
534 
535 /* Definitions for range of supported user defined hash functions for GPUs
536  * that support L2_CONFIG and not ASN_HASH features. Supported hash function
537  * range from 0b1000-0b1111 inclusive. Selection of any other values will
538  * lead to undefined behavior.
539  */
540 #define USER_DEFINED_HASH_LO ((u8)0x08)
541 #define USER_DEFINED_HASH_HI ((u8)0x0F)
542 
543 enum l2_config_override_result {
544 	L2_CONFIG_OVERRIDE_FAIL = -1,
545 	L2_CONFIG_OVERRIDE_NONE,
546 	L2_CONFIG_OVERRIDE_OK,
547 };
548 
549 /**
550  * kbase_read_l2_config_from_dt - Read L2 configuration
551  * @kbdev: The kbase device for which to get the L2 configuration.
552  *
553  * Check for L2 configuration overrides in module parameters and device tree.
554  * Override values in module parameters take priority over override values in
555  * device tree.
556  *
557  * Return: L2_CONFIG_OVERRIDE_OK if either size or hash, or both was properly
558  *         overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided.
559  *         L2_CONFIG_OVERRIDE_FAIL otherwise.
560  */
561 static enum l2_config_override_result
kbase_read_l2_config_from_dt(struct kbase_device * const kbdev)562 kbase_read_l2_config_from_dt(struct kbase_device *const kbdev)
563 {
564 	struct device_node *np = kbdev->dev->of_node;
565 
566 	if (!np)
567 		return L2_CONFIG_OVERRIDE_NONE;
568 
569 	if (override_l2_size)
570 		kbdev->l2_size_override = override_l2_size;
571 	else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override))
572 		kbdev->l2_size_override = 0;
573 
574 	/* Check overriding value is supported, if not will result in
575 	 * undefined behavior.
576 	 */
577 	if (override_l2_hash >= USER_DEFINED_HASH_LO &&
578 	    override_l2_hash <= USER_DEFINED_HASH_HI)
579 		kbdev->l2_hash_override = override_l2_hash;
580 	else if (of_property_read_u8(np, "l2-hash", &kbdev->l2_hash_override))
581 		kbdev->l2_hash_override = 0;
582 
583 	kbdev->l2_hash_values_override = false;
584 	if (num_override_l2_hash_values) {
585 		int i;
586 
587 		kbdev->l2_hash_values_override = true;
588 		for (i = 0; i < num_override_l2_hash_values; i++)
589 			kbdev->l2_hash_values[i] = l2_hash_values[i];
590 	} else if (!of_property_read_u32_array(np, "l2-hash-values",
591 					       kbdev->l2_hash_values,
592 					       ASN_HASH_COUNT))
593 		kbdev->l2_hash_values_override = true;
594 
595 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) &&
596 	    (kbdev->l2_hash_override)) {
597 		dev_err(kbdev->dev, "l2-hash not supported\n");
598 		return L2_CONFIG_OVERRIDE_FAIL;
599 	}
600 
601 	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) &&
602 	    (kbdev->l2_hash_values_override)) {
603 		dev_err(kbdev->dev, "l2-hash-values not supported\n");
604 		return L2_CONFIG_OVERRIDE_FAIL;
605 	}
606 
607 	if (kbdev->l2_hash_override && kbdev->l2_hash_values_override) {
608 		dev_err(kbdev->dev,
609 			"both l2-hash & l2-hash-values not supported\n");
610 		return L2_CONFIG_OVERRIDE_FAIL;
611 	}
612 
613 	if (kbdev->l2_size_override || kbdev->l2_hash_override ||
614 	    kbdev->l2_hash_values_override)
615 		return L2_CONFIG_OVERRIDE_OK;
616 
617 	return L2_CONFIG_OVERRIDE_NONE;
618 }
619 
kbase_gpuprops_update_l2_features(struct kbase_device * kbdev)620 int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
621 {
622 	int err = 0;
623 
624 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
625 		struct kbase_gpuprops_regdump regdump;
626 		struct base_gpu_props *gpu_props = &kbdev->gpu_props.props;
627 
628 		/* Check for L2 cache size & hash overrides */
629 		switch (kbase_read_l2_config_from_dt(kbdev)) {
630 		case L2_CONFIG_OVERRIDE_FAIL:
631 			err = -EIO;
632 			goto exit;
633 		case L2_CONFIG_OVERRIDE_NONE:
634 			goto exit;
635 		default:
636 			break;
637 		}
638 
639 		/* pm.active_count is expected to be 1 here, which is set in
640 		 * kbase_hwaccess_pm_powerup().
641 		 */
642 		WARN_ON(kbdev->pm.active_count != 1);
643 		/* The new settings for L2 cache can only be applied when it is
644 		 * off, so first do the power down.
645 		 */
646 		kbase_pm_context_idle(kbdev);
647 		kbase_pm_wait_for_desired_state(kbdev);
648 
649 		/* Need L2 to get powered to reflect to L2_FEATURES */
650 		kbase_pm_context_active(kbdev);
651 
652 		/* Wait for the completion of L2 power transition */
653 		kbase_pm_wait_for_l2_powered(kbdev);
654 
655 		/* Dump L2_FEATURES register */
656 		err = kbase_backend_gpuprops_get_l2_features(kbdev, &regdump);
657 		if (err)
658 			goto exit;
659 
660 		dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n",
661 			 regdump.l2_features);
662 		dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n",
663 			 regdump.l2_config);
664 
665 		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)) {
666 			int idx;
667 			const bool asn_he = regdump.l2_config &
668 					    L2_CONFIG_ASN_HASH_ENABLE_MASK;
669 			if (!asn_he && kbdev->l2_hash_values_override)
670 				dev_err(kbdev->dev,
671 					"Failed to use requested ASN_HASH, fallback to default");
672 			for (idx = 0; idx < ASN_HASH_COUNT; idx++)
673 				dev_info(kbdev->dev,
674 					 "%s ASN_HASH[%d] is [0x%08x]\n",
675 					 asn_he ? "Overridden" : "Default", idx,
676 					 regdump.l2_asn_hash[idx]);
677 		}
678 
679 		/* Update gpuprops with reflected L2_FEATURES */
680 		gpu_props->raw_props.l2_features = regdump.l2_features;
681 		gpu_props->l2_props.log2_cache_size =
682 			KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
683 	}
684 
685 exit:
686 	return err;
687 }
688 
689 static struct {
690 	u32 type;
691 	size_t offset;
692 	int size;
693 } gpu_property_mapping[] = {
694 #define PROP(name, member) \
695 	{KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \
696 		sizeof(((struct base_gpu_props *)0)->member)}
697 	PROP(PRODUCT_ID,                  core_props.product_id),
698 	PROP(VERSION_STATUS,              core_props.version_status),
699 	PROP(MINOR_REVISION,              core_props.minor_revision),
700 	PROP(MAJOR_REVISION,              core_props.major_revision),
701 	PROP(GPU_FREQ_KHZ_MAX,            core_props.gpu_freq_khz_max),
702 	PROP(LOG2_PROGRAM_COUNTER_SIZE,   core_props.log2_program_counter_size),
703 	PROP(TEXTURE_FEATURES_0,          core_props.texture_features[0]),
704 	PROP(TEXTURE_FEATURES_1,          core_props.texture_features[1]),
705 	PROP(TEXTURE_FEATURES_2,          core_props.texture_features[2]),
706 	PROP(TEXTURE_FEATURES_3,          core_props.texture_features[3]),
707 	PROP(GPU_AVAILABLE_MEMORY_SIZE,   core_props.gpu_available_memory_size),
708 	PROP(NUM_EXEC_ENGINES,            core_props.num_exec_engines),
709 
710 	PROP(L2_LOG2_LINE_SIZE,           l2_props.log2_line_size),
711 	PROP(L2_LOG2_CACHE_SIZE,          l2_props.log2_cache_size),
712 	PROP(L2_NUM_L2_SLICES,            l2_props.num_l2_slices),
713 
714 	PROP(TILER_BIN_SIZE_BYTES,        tiler_props.bin_size_bytes),
715 	PROP(TILER_MAX_ACTIVE_LEVELS,     tiler_props.max_active_levels),
716 
717 	PROP(MAX_THREADS,                 thread_props.max_threads),
718 	PROP(MAX_WORKGROUP_SIZE,          thread_props.max_workgroup_size),
719 	PROP(MAX_BARRIER_SIZE,            thread_props.max_barrier_size),
720 	PROP(MAX_REGISTERS,               thread_props.max_registers),
721 	PROP(MAX_TASK_QUEUE,              thread_props.max_task_queue),
722 	PROP(MAX_THREAD_GROUP_SPLIT,      thread_props.max_thread_group_split),
723 	PROP(IMPL_TECH,                   thread_props.impl_tech),
724 	PROP(TLS_ALLOC,                   thread_props.tls_alloc),
725 
726 	PROP(RAW_SHADER_PRESENT,          raw_props.shader_present),
727 	PROP(RAW_TILER_PRESENT,           raw_props.tiler_present),
728 	PROP(RAW_L2_PRESENT,              raw_props.l2_present),
729 	PROP(RAW_STACK_PRESENT,           raw_props.stack_present),
730 	PROP(RAW_L2_FEATURES,             raw_props.l2_features),
731 	PROP(RAW_CORE_FEATURES,           raw_props.core_features),
732 	PROP(RAW_MEM_FEATURES,            raw_props.mem_features),
733 	PROP(RAW_MMU_FEATURES,            raw_props.mmu_features),
734 	PROP(RAW_AS_PRESENT,              raw_props.as_present),
735 	PROP(RAW_JS_PRESENT,              raw_props.js_present),
736 	PROP(RAW_JS_FEATURES_0,           raw_props.js_features[0]),
737 	PROP(RAW_JS_FEATURES_1,           raw_props.js_features[1]),
738 	PROP(RAW_JS_FEATURES_2,           raw_props.js_features[2]),
739 	PROP(RAW_JS_FEATURES_3,           raw_props.js_features[3]),
740 	PROP(RAW_JS_FEATURES_4,           raw_props.js_features[4]),
741 	PROP(RAW_JS_FEATURES_5,           raw_props.js_features[5]),
742 	PROP(RAW_JS_FEATURES_6,           raw_props.js_features[6]),
743 	PROP(RAW_JS_FEATURES_7,           raw_props.js_features[7]),
744 	PROP(RAW_JS_FEATURES_8,           raw_props.js_features[8]),
745 	PROP(RAW_JS_FEATURES_9,           raw_props.js_features[9]),
746 	PROP(RAW_JS_FEATURES_10,          raw_props.js_features[10]),
747 	PROP(RAW_JS_FEATURES_11,          raw_props.js_features[11]),
748 	PROP(RAW_JS_FEATURES_12,          raw_props.js_features[12]),
749 	PROP(RAW_JS_FEATURES_13,          raw_props.js_features[13]),
750 	PROP(RAW_JS_FEATURES_14,          raw_props.js_features[14]),
751 	PROP(RAW_JS_FEATURES_15,          raw_props.js_features[15]),
752 	PROP(RAW_TILER_FEATURES,          raw_props.tiler_features),
753 	PROP(RAW_TEXTURE_FEATURES_0,      raw_props.texture_features[0]),
754 	PROP(RAW_TEXTURE_FEATURES_1,      raw_props.texture_features[1]),
755 	PROP(RAW_TEXTURE_FEATURES_2,      raw_props.texture_features[2]),
756 	PROP(RAW_TEXTURE_FEATURES_3,      raw_props.texture_features[3]),
757 	PROP(RAW_GPU_ID,                  raw_props.gpu_id),
758 	PROP(RAW_THREAD_MAX_THREADS,      raw_props.thread_max_threads),
759 	PROP(RAW_THREAD_MAX_WORKGROUP_SIZE,
760 			raw_props.thread_max_workgroup_size),
761 	PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
762 	PROP(RAW_THREAD_FEATURES,         raw_props.thread_features),
763 	PROP(RAW_THREAD_TLS_ALLOC,        raw_props.thread_tls_alloc),
764 	PROP(RAW_COHERENCY_MODE,          raw_props.coherency_mode),
765 	PROP(RAW_GPU_FEATURES,            raw_props.gpu_features),
766 	PROP(COHERENCY_NUM_GROUPS,        coherency_info.num_groups),
767 	PROP(COHERENCY_NUM_CORE_GROUPS,   coherency_info.num_core_groups),
768 	PROP(COHERENCY_COHERENCY,         coherency_info.coherency),
769 	PROP(COHERENCY_GROUP_0,           coherency_info.group[0].core_mask),
770 	PROP(COHERENCY_GROUP_1,           coherency_info.group[1].core_mask),
771 	PROP(COHERENCY_GROUP_2,           coherency_info.group[2].core_mask),
772 	PROP(COHERENCY_GROUP_3,           coherency_info.group[3].core_mask),
773 	PROP(COHERENCY_GROUP_4,           coherency_info.group[4].core_mask),
774 	PROP(COHERENCY_GROUP_5,           coherency_info.group[5].core_mask),
775 	PROP(COHERENCY_GROUP_6,           coherency_info.group[6].core_mask),
776 	PROP(COHERENCY_GROUP_7,           coherency_info.group[7].core_mask),
777 	PROP(COHERENCY_GROUP_8,           coherency_info.group[8].core_mask),
778 	PROP(COHERENCY_GROUP_9,           coherency_info.group[9].core_mask),
779 	PROP(COHERENCY_GROUP_10,          coherency_info.group[10].core_mask),
780 	PROP(COHERENCY_GROUP_11,          coherency_info.group[11].core_mask),
781 	PROP(COHERENCY_GROUP_12,          coherency_info.group[12].core_mask),
782 	PROP(COHERENCY_GROUP_13,          coherency_info.group[13].core_mask),
783 	PROP(COHERENCY_GROUP_14,          coherency_info.group[14].core_mask),
784 	PROP(COHERENCY_GROUP_15,          coherency_info.group[15].core_mask),
785 
786 #undef PROP
787 };
788 
kbase_gpuprops_populate_user_buffer(struct kbase_device * kbdev)789 int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
790 {
791 	struct kbase_gpu_props *kprops = &kbdev->gpu_props;
792 	struct base_gpu_props *props = &kprops->props;
793 	u32 count = ARRAY_SIZE(gpu_property_mapping);
794 	u32 i;
795 	u32 size = 0;
796 	u8 *p;
797 
798 	for (i = 0; i < count; i++) {
799 		/* 4 bytes for the ID, and the size of the property */
800 		size += 4 + gpu_property_mapping[i].size;
801 	}
802 
803 	kprops->prop_buffer_size = size;
804 	kprops->prop_buffer = kmalloc(size, GFP_KERNEL);
805 
806 	if (!kprops->prop_buffer) {
807 		kprops->prop_buffer_size = 0;
808 		return -ENOMEM;
809 	}
810 
811 	p = kprops->prop_buffer;
812 
813 #define WRITE_U8(v) (*p++ = (v) & 0xFF)
814 #define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0)
815 #define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0)
816 #define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0)
817 
818 	for (i = 0; i < count; i++) {
819 		u32 type = gpu_property_mapping[i].type;
820 		u8 type_size;
821 		void *field = ((u8 *)props) + gpu_property_mapping[i].offset;
822 
823 		switch (gpu_property_mapping[i].size) {
824 		case 1:
825 			type_size = KBASE_GPUPROP_VALUE_SIZE_U8;
826 			break;
827 		case 2:
828 			type_size = KBASE_GPUPROP_VALUE_SIZE_U16;
829 			break;
830 		case 4:
831 			type_size = KBASE_GPUPROP_VALUE_SIZE_U32;
832 			break;
833 		case 8:
834 			type_size = KBASE_GPUPROP_VALUE_SIZE_U64;
835 			break;
836 		default:
837 			dev_err(kbdev->dev,
838 				"Invalid gpu_property_mapping type=%d size=%d",
839 				type, gpu_property_mapping[i].size);
840 			return -EINVAL;
841 		}
842 
843 		WRITE_U32((type<<2) | type_size);
844 
845 		switch (type_size) {
846 		case KBASE_GPUPROP_VALUE_SIZE_U8:
847 			WRITE_U8(*((u8 *)field));
848 			break;
849 		case KBASE_GPUPROP_VALUE_SIZE_U16:
850 			WRITE_U16(*((u16 *)field));
851 			break;
852 		case KBASE_GPUPROP_VALUE_SIZE_U32:
853 			WRITE_U32(*((u32 *)field));
854 			break;
855 		case KBASE_GPUPROP_VALUE_SIZE_U64:
856 			WRITE_U64(*((u64 *)field));
857 			break;
858 		default: /* Cannot be reached */
859 			WARN_ON(1);
860 			return -EINVAL;
861 		}
862 	}
863 
864 	return 0;
865 }
866 
kbase_gpuprops_free_user_buffer(struct kbase_device * kbdev)867 void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev)
868 {
869 	kfree(kbdev->gpu_props.prop_buffer);
870 }
871 
kbase_device_populate_max_freq(struct kbase_device * kbdev)872 int kbase_device_populate_max_freq(struct kbase_device *kbdev)
873 {
874 	struct mali_base_gpu_core_props *core_props;
875 
876 	/* obtain max configured gpu frequency, if devfreq is enabled then
877 	 * this will be overridden by the highest operating point found
878 	 */
879 	core_props = &(kbdev->gpu_props.props.core_props);
880 #ifdef GPU_FREQ_KHZ_MAX
881 	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
882 #else
883 	core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX;
884 #endif
885 
886 	return 0;
887 }
888