1 /*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file vc5_tiling.c
25 *
26 * Handles information about the VC5 tiling formats, and loading and storing
27 * from them.
28 */
29
30 #include <stdint.h>
31 #include "vc5_screen.h"
32 #include "vc5_context.h"
33 #include "vc5_tiling.h"
34
35 /** Return the width in pixels of a 64-byte microtile. */
36 uint32_t
vc5_utile_width(int cpp)37 vc5_utile_width(int cpp)
38 {
39 switch (cpp) {
40 case 1:
41 case 2:
42 return 8;
43 case 4:
44 case 8:
45 return 4;
46 case 16:
47 return 2;
48 default:
49 unreachable("unknown cpp");
50 }
51 }
52
53 /** Return the height in pixels of a 64-byte microtile. */
54 uint32_t
vc5_utile_height(int cpp)55 vc5_utile_height(int cpp)
56 {
57 switch (cpp) {
58 case 1:
59 return 8;
60 case 2:
61 case 4:
62 return 4;
63 case 8:
64 case 16:
65 return 2;
66 default:
67 unreachable("unknown cpp");
68 }
69 }
70
71 /**
72 * Returns the byte address for a given pixel within a utile.
73 *
74 * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4
75 * arrangement.
76 */
77 static inline uint32_t
vc5_get_utile_pixel_offset(uint32_t cpp,uint32_t x,uint32_t y)78 vc5_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y)
79 {
80 uint32_t utile_w = vc5_utile_width(cpp);
81 uint32_t utile_h = vc5_utile_height(cpp);
82
83 assert(x < utile_w && y < utile_h);
84
85 return x * cpp + y * utile_w * cpp;
86 }
87
88 /**
89 * Returns the byte offset for a given pixel in a LINEARTILE layout.
90 *
91 * LINEARTILE is a single line of utiles in either the X or Y direction.
92 */
93 static inline uint32_t
vc5_get_lt_pixel_offset(uint32_t cpp,uint32_t image_h,uint32_t x,uint32_t y)94 vc5_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
95 {
96 uint32_t utile_w = vc5_utile_width(cpp);
97 uint32_t utile_h = vc5_utile_height(cpp);
98 uint32_t utile_index_x = x / utile_w;
99 uint32_t utile_index_y = y / utile_h;
100
101 assert(utile_index_x == 0 || utile_index_y == 0);
102
103 return (64 * (utile_index_x + utile_index_y) +
104 vc5_get_utile_pixel_offset(cpp,
105 x & (utile_w - 1),
106 y & (utile_h - 1)));
107 }
108
109 /**
110 * Returns the byte offset for a given pixel in a UBLINEAR layout.
111 *
112 * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2
113 * utiles), and the UIF blocks are in 1 or 2 columns in raster order.
114 */
115 static inline uint32_t
vc5_get_ublinear_pixel_offset(uint32_t cpp,uint32_t x,uint32_t y,int ublinear_number)116 vc5_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y,
117 int ublinear_number)
118 {
119 uint32_t utile_w = vc5_utile_width(cpp);
120 uint32_t utile_h = vc5_utile_height(cpp);
121 uint32_t ub_w = utile_w * 2;
122 uint32_t ub_h = utile_h * 2;
123 uint32_t ub_x = x / ub_w;
124 uint32_t ub_y = y / ub_h;
125
126 return (256 * (ub_y * ublinear_number +
127 ub_x) +
128 ((x & utile_w) ? 64 : 0) +
129 ((y & utile_h) ? 128 : 0) +
130 + vc5_get_utile_pixel_offset(cpp,
131 x & (utile_w - 1),
132 y & (utile_h - 1)));
133 }
134
135 static inline uint32_t
vc5_get_ublinear_2_column_pixel_offset(uint32_t cpp,uint32_t image_h,uint32_t x,uint32_t y)136 vc5_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h,
137 uint32_t x, uint32_t y)
138 {
139 return vc5_get_ublinear_pixel_offset(cpp, x, y, 2);
140 }
141
142 static inline uint32_t
vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp,uint32_t image_h,uint32_t x,uint32_t y)143 vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h,
144 uint32_t x, uint32_t y)
145 {
146 return vc5_get_ublinear_pixel_offset(cpp, x, y, 1);
147 }
148
149 /**
150 * Returns the byte offset for a given pixel in a UIF layout.
151 *
152 * UIF is the general VC5 tiling layout shared across 3D, media, and scanout.
153 * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in
154 * 4x4 groups, and those 4x4 groups are then stored in raster order.
155 */
156 static inline uint32_t
vc5_get_uif_pixel_offset(uint32_t cpp,uint32_t image_h,uint32_t x,uint32_t y)157 vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
158 {
159 uint32_t utile_w = vc5_utile_width(cpp);
160 uint32_t utile_h = vc5_utile_height(cpp);
161 uint32_t mb_width = utile_w * 2;
162 uint32_t mb_height = utile_h * 2;
163 uint32_t log2_mb_width = ffs(mb_width) - 1;
164 uint32_t log2_mb_height = ffs(mb_height) - 1;
165
166 /* Macroblock X, y */
167 uint32_t mb_x = x >> log2_mb_width;
168 uint32_t mb_y = y >> log2_mb_height;
169 /* X, y within the macroblock */
170 uint32_t mb_pixel_x = x - (mb_x << log2_mb_width);
171 uint32_t mb_pixel_y = y - (mb_y << log2_mb_height);
172
173 uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height;
174 uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4;
175
176 uint32_t mb_base_addr = mb_id * 256;
177
178 bool top = mb_pixel_y < utile_h;
179 bool left = mb_pixel_x < utile_w;
180
181 /* Docs have this in pixels, we do bytes here. */
182 uint32_t mb_tile_offset = (!top * 128 + !left * 64);
183
184 uint32_t utile_x = mb_pixel_x & (utile_w - 1);
185 uint32_t utile_y = mb_pixel_y & (utile_h - 1);
186
187 uint32_t mb_pixel_address = (mb_base_addr +
188 mb_tile_offset +
189 vc5_get_utile_pixel_offset(cpp,
190 utile_x,
191 utile_y));
192
193 return mb_pixel_address;
194 }
195
196 static inline void
vc5_move_pixels_general_percpp(void * gpu,uint32_t gpu_stride,void * cpu,uint32_t cpu_stride,int cpp,uint32_t image_h,const struct pipe_box * box,uint32_t (* get_pixel_offset)(uint32_t cpp,uint32_t image_h,uint32_t x,uint32_t y),bool is_load)197 vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
198 void *cpu, uint32_t cpu_stride,
199 int cpp, uint32_t image_h,
200 const struct pipe_box *box,
201 uint32_t (*get_pixel_offset)(uint32_t cpp,
202 uint32_t image_h,
203 uint32_t x, uint32_t y),
204 bool is_load)
205 {
206 for (uint32_t y = 0; y < box->height; y++) {
207 void *cpu_row = cpu + y * cpu_stride;
208
209 for (int x = 0; x < box->width; x++) {
210 uint32_t pixel_offset = get_pixel_offset(cpp, image_h,
211 box->x + x,
212 box->y + y);
213
214 if (false) {
215 fprintf(stderr, "%3d,%3d -> %d\n",
216 box->x + x, box->y + y,
217 pixel_offset);
218 }
219
220 if (is_load) {
221 memcpy(cpu_row + x * cpp,
222 gpu + pixel_offset,
223 cpp);
224 } else {
225 memcpy(gpu + pixel_offset,
226 cpu_row + x * cpp,
227 cpp);
228 }
229 }
230 }
231 }
232
233 static inline void
vc5_move_pixels_general(void * gpu,uint32_t gpu_stride,void * cpu,uint32_t cpu_stride,int cpp,uint32_t image_h,const struct pipe_box * box,uint32_t (* get_pixel_offset)(uint32_t cpp,uint32_t image_h,uint32_t x,uint32_t y),bool is_load)234 vc5_move_pixels_general(void *gpu, uint32_t gpu_stride,
235 void *cpu, uint32_t cpu_stride,
236 int cpp, uint32_t image_h,
237 const struct pipe_box *box,
238 uint32_t (*get_pixel_offset)(uint32_t cpp,
239 uint32_t image_h,
240 uint32_t x, uint32_t y),
241 bool is_load)
242 {
243 switch (cpp) {
244 case 1:
245 vc5_move_pixels_general_percpp(gpu, gpu_stride,
246 cpu, cpu_stride,
247 1, image_h, box,
248 get_pixel_offset,
249 is_load);
250 break;
251 case 2:
252 vc5_move_pixels_general_percpp(gpu, gpu_stride,
253 cpu, cpu_stride,
254 2, image_h, box,
255 get_pixel_offset,
256 is_load);
257 break;
258 case 4:
259 vc5_move_pixels_general_percpp(gpu, gpu_stride,
260 cpu, cpu_stride,
261 4, image_h, box,
262 get_pixel_offset,
263 is_load);
264 break;
265 case 8:
266 vc5_move_pixels_general_percpp(gpu, gpu_stride,
267 cpu, cpu_stride,
268 8, image_h, box,
269 get_pixel_offset,
270 is_load);
271 break;
272 case 16:
273 vc5_move_pixels_general_percpp(gpu, gpu_stride,
274 cpu, cpu_stride,
275 16, image_h, box,
276 get_pixel_offset,
277 is_load);
278 break;
279 }
280 }
281
282 static inline void
vc5_move_tiled_image(void * gpu,uint32_t gpu_stride,void * cpu,uint32_t cpu_stride,enum vc5_tiling_mode tiling_format,int cpp,uint32_t image_h,const struct pipe_box * box,bool is_load)283 vc5_move_tiled_image(void *gpu, uint32_t gpu_stride,
284 void *cpu, uint32_t cpu_stride,
285 enum vc5_tiling_mode tiling_format,
286 int cpp,
287 uint32_t image_h,
288 const struct pipe_box *box,
289 bool is_load)
290 {
291 switch (tiling_format) {
292 case VC5_TILING_UIF_NO_XOR:
293 vc5_move_pixels_general(gpu, gpu_stride,
294 cpu, cpu_stride,
295 cpp, image_h, box,
296 vc5_get_uif_pixel_offset,
297 is_load);
298 break;
299 case VC5_TILING_UBLINEAR_2_COLUMN:
300 vc5_move_pixels_general(gpu, gpu_stride,
301 cpu, cpu_stride,
302 cpp, image_h, box,
303 vc5_get_ublinear_2_column_pixel_offset,
304 is_load);
305 break;
306 case VC5_TILING_UBLINEAR_1_COLUMN:
307 vc5_move_pixels_general(gpu, gpu_stride,
308 cpu, cpu_stride,
309 cpp, image_h, box,
310 vc5_get_ublinear_1_column_pixel_offset,
311 is_load);
312 break;
313 case VC5_TILING_LINEARTILE:
314 vc5_move_pixels_general(gpu, gpu_stride,
315 cpu, cpu_stride,
316 cpp, image_h, box,
317 vc5_get_lt_pixel_offset,
318 is_load);
319 break;
320 default:
321 unreachable("Unsupported tiling format");
322 break;
323 }
324 }
325
326 /**
327 * Loads pixel data from the start (microtile-aligned) box in \p src to the
328 * start of \p dst according to the given tiling format.
329 */
330 void
vc5_load_tiled_image(void * dst,uint32_t dst_stride,void * src,uint32_t src_stride,enum vc5_tiling_mode tiling_format,int cpp,uint32_t image_h,const struct pipe_box * box)331 vc5_load_tiled_image(void *dst, uint32_t dst_stride,
332 void *src, uint32_t src_stride,
333 enum vc5_tiling_mode tiling_format, int cpp,
334 uint32_t image_h,
335 const struct pipe_box *box)
336 {
337 vc5_move_tiled_image(src, src_stride,
338 dst, dst_stride,
339 tiling_format,
340 cpp,
341 image_h,
342 box,
343 true);
344 }
345
346 /**
347 * Stores pixel data from the start of \p src into a (microtile-aligned) box in
348 * \p dst according to the given tiling format.
349 */
350 void
vc5_store_tiled_image(void * dst,uint32_t dst_stride,void * src,uint32_t src_stride,enum vc5_tiling_mode tiling_format,int cpp,uint32_t image_h,const struct pipe_box * box)351 vc5_store_tiled_image(void *dst, uint32_t dst_stride,
352 void *src, uint32_t src_stride,
353 enum vc5_tiling_mode tiling_format, int cpp,
354 uint32_t image_h,
355 const struct pipe_box *box)
356 {
357 vc5_move_tiled_image(dst, dst_stride,
358 src, src_stride,
359 tiling_format,
360 cpp,
361 image_h,
362 box,
363 false);
364 }
365