1 /*
2 * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_screen.h"
29 #include "radeon_tile.h"
30
31 #include <stdint.h>
32 #include <string.h>
33
34 #include "main/macros.h"
35 #include "radeon_debug.h"
36
37 #define MICRO_TILE_SIZE 32
38
micro_tile_8_x_4_8bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)39 static void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch,
40 void * const dst, unsigned dst_pitch,
41 unsigned width, unsigned height)
42 {
43 unsigned row; /* current source row */
44 unsigned col; /* current source column */
45 unsigned k; /* number of processed tiles */
46 const unsigned tile_width = 8, tile_height = 4;
47 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
48
49 k = 0;
50 for (row = 0; row < height; row += tile_height)
51 {
52 for (col = 0; col < width; col += tile_width, ++k)
53 {
54 uint8_t *src2 = (uint8_t *)src + src_pitch * row + col;
55 uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch +
56 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
57 unsigned j;
58
59 for (j = 0; j < MIN2(tile_height, height - row); ++j)
60 {
61 unsigned columns = MIN2(tile_width, width - col);
62 memcpy(dst2, src2, columns * sizeof(uint8_t));
63 dst2 += tile_width;
64 src2 += src_pitch;
65 }
66 }
67 }
68 }
69
micro_tile_4_x_4_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)70 static void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch,
71 void * const dst, unsigned dst_pitch,
72 unsigned width, unsigned height)
73 {
74 unsigned row; /* current source row */
75 unsigned col; /* current source column */
76 unsigned k; /* number of processed tiles */
77 const unsigned tile_width = 4, tile_height = 4;
78 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
79
80 k = 0;
81 for (row = 0; row < height; row += tile_height)
82 {
83 for (col = 0; col < width; col += tile_width, ++k)
84 {
85 uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
86 uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
87 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
88 unsigned j;
89
90 for (j = 0; j < MIN2(tile_height, height - row); ++j)
91 {
92 unsigned columns = MIN2(tile_width, width - col);
93 memcpy(dst2, src2, columns * sizeof(uint16_t));
94 dst2 += tile_width;
95 src2 += src_pitch;
96 }
97 }
98 }
99 }
100
micro_tile_8_x_2_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)101 static void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch,
102 void * const dst, unsigned dst_pitch,
103 unsigned width, unsigned height)
104 {
105 unsigned row; /* current source row */
106 unsigned col; /* current source column */
107 unsigned k; /* number of processed tiles */
108 const unsigned tile_width = 8, tile_height = 2;
109 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
110
111 k = 0;
112 for (row = 0; row < height; row += tile_height)
113 {
114 for (col = 0; col < width; col += tile_width, ++k)
115 {
116 uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
117 uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
118 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
119 unsigned j;
120
121 for (j = 0; j < MIN2(tile_height, height - row); ++j)
122 {
123 unsigned columns = MIN2(tile_width, width - col);
124 memcpy(dst2, src2, columns * sizeof(uint16_t));
125 dst2 += tile_width;
126 src2 += src_pitch;
127 }
128 }
129 }
130 }
131
micro_tile_4_x_2_32bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)132 static void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch,
133 void * const dst, unsigned dst_pitch,
134 unsigned width, unsigned height)
135 {
136 unsigned row; /* current source row */
137 unsigned col; /* current source column */
138 unsigned k; /* number of processed tiles */
139 const unsigned tile_width = 4, tile_height = 2;
140 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
141
142 k = 0;
143 for (row = 0; row < height; row += tile_height)
144 {
145 for (col = 0; col < width; col += tile_width, ++k)
146 {
147 uint32_t *src2 = (uint32_t *)src + src_pitch * row + col;
148 uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch +
149 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
150 unsigned j;
151
152 for (j = 0; j < MIN2(tile_height, height - row); ++j)
153 {
154 unsigned columns = MIN2(tile_width, width - col);
155 memcpy(dst2, src2, columns * sizeof(uint32_t));
156 dst2 += tile_width;
157 src2 += src_pitch;
158 }
159 }
160 }
161 }
162
micro_tile_2_x_2_64bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)163 static void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch,
164 void * const dst, unsigned dst_pitch,
165 unsigned width, unsigned height)
166 {
167 unsigned row; /* current source row */
168 unsigned col; /* current source column */
169 unsigned k; /* number of processed tiles */
170 const unsigned tile_width = 2, tile_height = 2;
171 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
172
173 k = 0;
174 for (row = 0; row < height; row += tile_height)
175 {
176 for (col = 0; col < width; col += tile_width, ++k)
177 {
178 uint64_t *src2 = (uint64_t *)src + src_pitch * row + col;
179 uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch +
180 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
181 unsigned j;
182
183 for (j = 0; j < MIN2(tile_height, height - row); ++j)
184 {
185 unsigned columns = MIN2(tile_width, width - col);
186 memcpy(dst2, src2, columns * sizeof(uint64_t));
187 dst2 += tile_width;
188 src2 += src_pitch;
189 }
190 }
191 }
192 }
193
micro_tile_1_x_1_128bit(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,unsigned width,unsigned height)194 static void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch,
195 void * dst, unsigned dst_pitch,
196 unsigned width, unsigned height)
197 {
198 unsigned i, j;
199 const unsigned elem_size = 16; /* sizeof(uint128_t) */
200
201 for (j = 0; j < height; ++j)
202 {
203 for (i = 0; i < width; ++i)
204 {
205 memcpy(dst, src, width * elem_size);
206 dst += dst_pitch * elem_size;
207 src += src_pitch * elem_size;
208 }
209 }
210 }
211
tile_image(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,mesa_format format,unsigned width,unsigned height)212 void tile_image(const void * src, unsigned src_pitch,
213 void *dst, unsigned dst_pitch,
214 mesa_format format, unsigned width, unsigned height)
215 {
216 assert(src_pitch >= width);
217 assert(dst_pitch >= width);
218
219 radeon_print(RADEON_TEXTURE, RADEON_TRACE,
220 "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
221 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
222
223 switch (_mesa_get_format_bytes(format))
224 {
225 case 16:
226 micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
227 break;
228 case 8:
229 micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
230 break;
231 case 4:
232 micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
233 break;
234 case 2:
235 if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
236 {
237 micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
238 }
239 else
240 {
241 micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
242 }
243 break;
244 case 1:
245 micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
246 break;
247 default:
248 assert(0);
249 break;
250 }
251 }
252
micro_untile_8_x_4_8bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)253 static void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch,
254 void * const dst, unsigned dst_pitch,
255 unsigned width, unsigned height)
256 {
257 unsigned row; /* current destination row */
258 unsigned col; /* current destination column */
259 unsigned k; /* current tile number */
260 const unsigned tile_width = 8, tile_height = 4;
261 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
262
263 assert(src_pitch % tile_width == 0);
264
265 k = 0;
266 for (row = 0; row < height; row += tile_height)
267 {
268 for (col = 0; col < width; col += tile_width, ++k)
269 {
270 uint8_t *src2 = (uint8_t *)src + row * src_pitch +
271 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
272 uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col;
273 unsigned j;
274
275 for (j = 0; j < MIN2(tile_height, height - row); ++j)
276 {
277 unsigned columns = MIN2(tile_width, width - col);
278 memcpy(dst2, src2, columns * sizeof(uint8_t));
279 dst2 += dst_pitch;
280 src2 += tile_width;
281 }
282 }
283 }
284 }
285
micro_untile_8_x_2_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)286 static void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch,
287 void * const dst, unsigned dst_pitch,
288 unsigned width, unsigned height)
289 {
290 unsigned row; /* current destination row */
291 unsigned col; /* current destination column */
292 unsigned k; /* current tile number */
293 const unsigned tile_width = 8, tile_height = 2;
294 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
295
296 assert(src_pitch % tile_width == 0);
297
298 k = 0;
299 for (row = 0; row < height; row += tile_height)
300 {
301 for (col = 0; col < width; col += tile_width, ++k)
302 {
303 uint16_t *src2 = (uint16_t *)src + row * src_pitch +
304 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
305 uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
306 unsigned j;
307
308 for (j = 0; j < MIN2(tile_height, height - row); ++j)
309 {
310 unsigned columns = MIN2(tile_width, width - col);
311 memcpy(dst2, src2, columns * sizeof(uint16_t));
312 dst2 += dst_pitch;
313 src2 += tile_width;
314 }
315 }
316 }
317 }
318
micro_untile_4_x_4_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)319 static void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch,
320 void * const dst, unsigned dst_pitch,
321 unsigned width, unsigned height)
322 {
323 unsigned row; /* current destination row */
324 unsigned col; /* current destination column */
325 unsigned k; /* current tile number */
326 const unsigned tile_width = 4, tile_height = 4;
327 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
328
329 assert(src_pitch % tile_width == 0);
330
331 k = 0;
332 for (row = 0; row < height; row += tile_height)
333 {
334 for (col = 0; col < width; col += tile_width, ++k)
335 {
336 uint16_t *src2 = (uint16_t *)src + row * src_pitch +
337 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
338 uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
339 unsigned j;
340
341 for (j = 0; j < MIN2(tile_height, height - row); ++j)
342 {
343 unsigned columns = MIN2(tile_width, width - col);
344 memcpy(dst2, src2, columns * sizeof(uint16_t));
345 dst2 += dst_pitch;
346 src2 += tile_width;
347 }
348 }
349 }
350 }
351
micro_untile_4_x_2_32bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)352 static void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch,
353 void * const dst, unsigned dst_pitch,
354 unsigned width, unsigned height)
355 {
356 unsigned row; /* current destination row */
357 unsigned col; /* current destination column */
358 unsigned k; /* current tile number */
359 const unsigned tile_width = 4, tile_height = 2;
360 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
361
362 assert(src_pitch % tile_width == 0);
363
364 k = 0;
365 for (row = 0; row < height; row += tile_height)
366 {
367 for (col = 0; col < width; col += tile_width, ++k)
368 {
369 uint32_t *src2 = (uint32_t *)src + row * src_pitch +
370 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
371 uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col;
372 unsigned j;
373
374 for (j = 0; j < MIN2(tile_height, height - row); ++j)
375 {
376 unsigned columns = MIN2(tile_width, width - col);
377 memcpy(dst2, src2, columns * sizeof(uint32_t));
378 dst2 += dst_pitch;
379 src2 += tile_width;
380 }
381 }
382 }
383 }
384
micro_untile_2_x_2_64bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)385 static void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch,
386 void * const dst, unsigned dst_pitch,
387 unsigned width, unsigned height)
388 {
389 unsigned row; /* current destination row */
390 unsigned col; /* current destination column */
391 unsigned k; /* current tile number */
392 const unsigned tile_width = 2, tile_height = 2;
393 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
394
395 assert(src_pitch % tile_width == 0);
396
397 k = 0;
398 for (row = 0; row < height; row += tile_height)
399 {
400 for (col = 0; col < width; col += tile_width, ++k)
401 {
402 uint64_t *src2 = (uint64_t *)src + row * src_pitch +
403 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
404 uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col;
405 unsigned j;
406
407 for (j = 0; j < MIN2(tile_height, height - row); ++j)
408 {
409 unsigned columns = MIN2(tile_width, width - col);
410 memcpy(dst2, src2, columns * sizeof(uint64_t));
411 dst2 += dst_pitch;
412 src2 += tile_width;
413 }
414 }
415 }
416 }
417
micro_untile_1_x_1_128bit(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,unsigned width,unsigned height)418 static void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch,
419 void * dst, unsigned dst_pitch,
420 unsigned width, unsigned height)
421 {
422 unsigned i, j;
423 const unsigned elem_size = 16; /* sizeof(uint128_t) */
424
425 for (j = 0; j < height; ++j)
426 {
427 for (i = 0; i < width; ++i)
428 {
429 memcpy(dst, src, width * elem_size);
430 dst += dst_pitch * elem_size;
431 src += src_pitch * elem_size;
432 }
433 }
434 }
435
untile_image(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,mesa_format format,unsigned width,unsigned height)436 void untile_image(const void * src, unsigned src_pitch,
437 void *dst, unsigned dst_pitch,
438 mesa_format format, unsigned width, unsigned height)
439 {
440 assert(src_pitch >= width);
441 assert(dst_pitch >= width);
442
443 radeon_print(RADEON_TEXTURE, RADEON_TRACE,
444 "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
445 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
446
447 switch (_mesa_get_format_bytes(format))
448 {
449 case 16:
450 micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
451 break;
452 case 8:
453 micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
454 break;
455 case 4:
456 micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
457 break;
458 case 2:
459 if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
460 {
461 micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
462 }
463 else
464 {
465 micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
466 }
467 break;
468 case 1:
469 micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
470 break;
471 default:
472 assert(0);
473 break;
474 }
475 }
476
get_tile_size(mesa_format format,unsigned * block_width,unsigned * block_height)477 void get_tile_size(mesa_format format, unsigned *block_width, unsigned *block_height)
478 {
479 switch (_mesa_get_format_bytes(format))
480 {
481 case 16:
482 *block_width = 1;
483 *block_height = 1;
484 break;
485 case 8:
486 *block_width = 2;
487 *block_height = 2;
488 break;
489 case 4:
490 *block_width = 4;
491 *block_height = 2;
492 break;
493 case 2:
494 if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
495 {
496 *block_width = 4;
497 *block_height = 4;
498 }
499 else
500 {
501 *block_width = 8;
502 *block_height = 2;
503 }
504 break;
505 case 1:
506 *block_width = 8;
507 *block_height = 4;
508 break;
509 default:
510 assert(0);
511 break;
512 }
513 }
514