1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 /* This file is based on rgxdefs.h and should only contain function-like macros
25 * and inline functions. Any object-like macros should instead appear in
26 * rogue_hw_defs.h.
27 */
28
29 #ifndef ROGUE_HW_UTILS_H
30 #define ROGUE_HW_UTILS_H
31
32 #include <stdint.h>
33
34 #include "pvr_types.h"
35
36 #define __pvr_address_type pvr_dev_addr_t
37 #define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr).addr
38 /* clang-format off */
39 #define __pvr_make_address(addr_u64) PVR_DEV_ADDR(addr_u64)
40 /* clang-format on */
41
42 #include "csbgen/rogue_cdm.h"
43 #include "csbgen/rogue_lls.h"
44
45 #undef __pvr_make_address
46 #undef __pvr_get_address
47 #undef __pvr_address_type
48
49 #include "rogue_hw_defs.h"
50 #include "pvr_device_info.h"
51 #include "util/compiler.h"
52 #include "util/macros.h"
53
54 static inline void
rogue_get_isp_samples_per_tile_xy(const struct pvr_device_info * dev_info,uint32_t samples,uint32_t * const x_out,uint32_t * const y_out)55 rogue_get_isp_samples_per_tile_xy(const struct pvr_device_info *dev_info,
56 uint32_t samples,
57 uint32_t *const x_out,
58 uint32_t *const y_out)
59 {
60 const uint32_t tile_size_x =
61 PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
62 const uint32_t tile_size_y =
63 PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
64 const uint32_t samples_per_pixel =
65 PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0U);
66
67 #if !defined(NDEBUG)
68 switch (samples_per_pixel) {
69 case 1:
70 case 2:
71 case 4:
72 break;
73 default:
74 assert(!"Unsupported ISP samples per pixel");
75 }
76 #endif
77
78 *x_out = tile_size_x;
79 *y_out = tile_size_y;
80
81 switch (samples) {
82 case 1:
83 break;
84 case 2:
85 if (samples_per_pixel == 2 || samples_per_pixel == 4)
86 *y_out *= 2;
87
88 break;
89 case 4:
90 if (samples_per_pixel == 2 || samples_per_pixel == 4)
91 *x_out *= 2;
92
93 if (samples_per_pixel == 2)
94 *y_out *= 2;
95
96 break;
97 case 8:
98 *y_out *= 2;
99 break;
100 default:
101 assert(!"Unsupported number of samples");
102 }
103 }
104
105 static inline uint64_t
rogue_get_min_free_list_size(const struct pvr_device_info * dev_info)106 rogue_get_min_free_list_size(const struct pvr_device_info *dev_info)
107 {
108 uint64_t min_num_pages;
109
110 if (PVR_HAS_FEATURE(dev_info, roguexe)) {
111 if (PVR_HAS_QUIRK(dev_info, 66011))
112 min_num_pages = 40U;
113 else
114 min_num_pages = 25U;
115 } else {
116 min_num_pages = 50U;
117 }
118
119 return min_num_pages << ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
120 }
121
122 static inline uint32_t
rogue_get_max_num_vdm_pds_tasks(const struct pvr_device_info * dev_info)123 rogue_get_max_num_vdm_pds_tasks(const struct pvr_device_info *dev_info)
124 {
125 /* Default value based on the minimum value found in all existing cores. */
126 uint32_t max_usc_tasks = PVR_GET_FEATURE_VALUE(dev_info, max_usc_tasks, 24U);
127
128 /* FIXME: Where does the 9 come from? */
129 return max_usc_tasks - 9;
130 }
131
132 static inline uint32_t
rogue_get_max_output_regs_per_pixel(const struct pvr_device_info * dev_info)133 rogue_get_max_output_regs_per_pixel(const struct pvr_device_info *dev_info)
134 {
135 if (PVR_HAS_FEATURE(dev_info, eight_output_registers))
136 return 8U;
137
138 return 4U;
139 }
140
141 static inline void
rogue_get_num_macrotiles_xy(const struct pvr_device_info * dev_info,uint32_t * const x_out,uint32_t * const y_out)142 rogue_get_num_macrotiles_xy(const struct pvr_device_info *dev_info,
143 uint32_t *const x_out,
144 uint32_t *const y_out)
145 {
146 uint32_t version;
147
148 if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version))
149 version = 0;
150
151 if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ||
152 version == 2) {
153 *x_out = 4;
154 *y_out = 4;
155 } else {
156 *x_out = 1;
157 *y_out = 1;
158 }
159 }
160
161 static inline uint32_t
rogue_get_macrotile_array_size(const struct pvr_device_info * dev_info)162 rogue_get_macrotile_array_size(const struct pvr_device_info *dev_info)
163 {
164 uint32_t num_macrotiles_x;
165 uint32_t num_macrotiles_y;
166
167 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
168 return 0;
169
170 rogue_get_num_macrotiles_xy(dev_info, &num_macrotiles_x, &num_macrotiles_y);
171
172 return num_macrotiles_x * num_macrotiles_y * 8U;
173 }
174
175 /* Region header size in bytes. */
176 static inline uint32_t
rogue_get_region_header_size(const struct pvr_device_info * dev_info)177 rogue_get_region_header_size(const struct pvr_device_info *dev_info)
178 {
179 uint32_t version;
180
181 if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version))
182 version = 0;
183
184 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
185 version == 2) {
186 return 6;
187 }
188
189 return 5;
190 }
191
192 static inline uint32_t
rogue_get_render_size_max(const struct pvr_device_info * dev_info)193 rogue_get_render_size_max(const struct pvr_device_info *dev_info)
194 {
195 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
196 if (!PVR_HAS_FEATURE(dev_info, screen_size8K))
197 return 4096U;
198
199 return 8192U;
200 }
201
202 #define rogue_get_render_size_max_x(dev_info) \
203 rogue_get_render_size_max(dev_info)
204
205 #define rogue_get_render_size_max_y(dev_info) \
206 rogue_get_render_size_max(dev_info)
207
208 static inline uint32_t
rogue_get_slc_cache_line_size(const struct pvr_device_info * dev_info)209 rogue_get_slc_cache_line_size(const struct pvr_device_info *dev_info)
210 {
211 return PVR_GET_FEATURE_VALUE(dev_info, slc_cache_line_size_bits, 8U) / 8U;
212 }
213
pvr_get_max_user_vertex_output_components(const struct pvr_device_info * dev_info)214 static inline uint32_t pvr_get_max_user_vertex_output_components(
215 const struct pvr_device_info *dev_info)
216 {
217 const uint32_t uvs_pba_entries =
218 PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 0U);
219 const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 0U);
220
221 if (uvs_banks <= 8U && uvs_pba_entries == 160U)
222 return 64U;
223
224 return 128U;
225 }
226
227 static inline uint32_t
rogue_max_compute_shared_registers(const struct pvr_device_info * dev_info)228 rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info)
229 {
230 if (PVR_HAS_FEATURE(dev_info, compute))
231 return 2U * 1024U;
232
233 return 0U;
234 }
235
236 static inline uint32_t
rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info * dev_info)237 rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info)
238 {
239 if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
240 const uint32_t max_num_cores =
241 PVR_GET_FEATURE_VALUE(dev_info, xpu_max_slaves, 0U) + 1U;
242 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
243 const uint32_t cdm_context_resume_buffer_stride =
244 ALIGN_POT(ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE, cache_line_size);
245
246 return cdm_context_resume_buffer_stride * max_num_cores;
247 }
248
249 return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE;
250 }
251
rogue_get_cdm_context_resume_buffer_alignment(const struct pvr_device_info * dev_info)252 static inline uint32_t rogue_get_cdm_context_resume_buffer_alignment(
253 const struct pvr_device_info *dev_info)
254 {
255 if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support))
256 return rogue_get_slc_cache_line_size(dev_info);
257
258 return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_ALIGNMENT;
259 }
260
261 static inline uint32_t
rogue_get_compute_max_work_group_size(const struct pvr_device_info * dev_info)262 rogue_get_compute_max_work_group_size(const struct pvr_device_info *dev_info)
263 {
264 /* The number of tasks which can be executed per USC - Limited to 16U by the
265 * CDM.
266 */
267 const uint32_t max_tasks_per_usc = 16U;
268
269 if (!PVR_HAS_ERN(dev_info, 35421)) {
270 /* Barriers on work-groups > 32 instances aren't supported. */
271 return ROGUE_MAX_INSTANCES_PER_TASK;
272 }
273
274 return ROGUE_MAX_INSTANCES_PER_TASK * max_tasks_per_usc;
275 }
276
277 #endif /* ROGUE_HW_UTILS_H */
278