1 /*
2 * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_tile.h"
29
30 #include <stdint.h>
31 #include <string.h>
32
33 #include "main/macros.h"
34 #include "radeon_debug.h"
35
36 #define MICRO_TILE_SIZE 32
37
micro_tile_8_x_4_8bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)38 static void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch,
39 void * const dst, unsigned dst_pitch,
40 unsigned width, unsigned height)
41 {
42 unsigned row; /* current source row */
43 unsigned col; /* current source column */
44 unsigned k; /* number of processed tiles */
45 const unsigned tile_width = 8, tile_height = 4;
46 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
47
48 k = 0;
49 for (row = 0; row < height; row += tile_height)
50 {
51 for (col = 0; col < width; col += tile_width, ++k)
52 {
53 uint8_t *src2 = (uint8_t *)src + src_pitch * row + col;
54 uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch +
55 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
56 unsigned j;
57
58 for (j = 0; j < MIN2(tile_height, height - row); ++j)
59 {
60 unsigned columns = MIN2(tile_width, width - col);
61 memcpy(dst2, src2, columns * sizeof(uint8_t));
62 dst2 += tile_width;
63 src2 += src_pitch;
64 }
65 }
66 }
67 }
68
micro_tile_4_x_4_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)69 static void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch,
70 void * const dst, unsigned dst_pitch,
71 unsigned width, unsigned height)
72 {
73 unsigned row; /* current source row */
74 unsigned col; /* current source column */
75 unsigned k; /* number of processed tiles */
76 const unsigned tile_width = 4, tile_height = 4;
77 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
78
79 k = 0;
80 for (row = 0; row < height; row += tile_height)
81 {
82 for (col = 0; col < width; col += tile_width, ++k)
83 {
84 uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
85 uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
86 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
87 unsigned j;
88
89 for (j = 0; j < MIN2(tile_height, height - row); ++j)
90 {
91 unsigned columns = MIN2(tile_width, width - col);
92 memcpy(dst2, src2, columns * sizeof(uint16_t));
93 dst2 += tile_width;
94 src2 += src_pitch;
95 }
96 }
97 }
98 }
99
micro_tile_8_x_2_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)100 static void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch,
101 void * const dst, unsigned dst_pitch,
102 unsigned width, unsigned height)
103 {
104 unsigned row; /* current source row */
105 unsigned col; /* current source column */
106 unsigned k; /* number of processed tiles */
107 const unsigned tile_width = 8, tile_height = 2;
108 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
109
110 k = 0;
111 for (row = 0; row < height; row += tile_height)
112 {
113 for (col = 0; col < width; col += tile_width, ++k)
114 {
115 uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
116 uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
117 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
118 unsigned j;
119
120 for (j = 0; j < MIN2(tile_height, height - row); ++j)
121 {
122 unsigned columns = MIN2(tile_width, width - col);
123 memcpy(dst2, src2, columns * sizeof(uint16_t));
124 dst2 += tile_width;
125 src2 += src_pitch;
126 }
127 }
128 }
129 }
130
micro_tile_4_x_2_32bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)131 static void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch,
132 void * const dst, unsigned dst_pitch,
133 unsigned width, unsigned height)
134 {
135 unsigned row; /* current source row */
136 unsigned col; /* current source column */
137 unsigned k; /* number of processed tiles */
138 const unsigned tile_width = 4, tile_height = 2;
139 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
140
141 k = 0;
142 for (row = 0; row < height; row += tile_height)
143 {
144 for (col = 0; col < width; col += tile_width, ++k)
145 {
146 uint32_t *src2 = (uint32_t *)src + src_pitch * row + col;
147 uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch +
148 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
149 unsigned j;
150
151 for (j = 0; j < MIN2(tile_height, height - row); ++j)
152 {
153 unsigned columns = MIN2(tile_width, width - col);
154 memcpy(dst2, src2, columns * sizeof(uint32_t));
155 dst2 += tile_width;
156 src2 += src_pitch;
157 }
158 }
159 }
160 }
161
micro_tile_2_x_2_64bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)162 static void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch,
163 void * const dst, unsigned dst_pitch,
164 unsigned width, unsigned height)
165 {
166 unsigned row; /* current source row */
167 unsigned col; /* current source column */
168 unsigned k; /* number of processed tiles */
169 const unsigned tile_width = 2, tile_height = 2;
170 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
171
172 k = 0;
173 for (row = 0; row < height; row += tile_height)
174 {
175 for (col = 0; col < width; col += tile_width, ++k)
176 {
177 uint64_t *src2 = (uint64_t *)src + src_pitch * row + col;
178 uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch +
179 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
180 unsigned j;
181
182 for (j = 0; j < MIN2(tile_height, height - row); ++j)
183 {
184 unsigned columns = MIN2(tile_width, width - col);
185 memcpy(dst2, src2, columns * sizeof(uint64_t));
186 dst2 += tile_width;
187 src2 += src_pitch;
188 }
189 }
190 }
191 }
192
micro_tile_1_x_1_128bit(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,unsigned width,unsigned height)193 static void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch,
194 void * dst, unsigned dst_pitch,
195 unsigned width, unsigned height)
196 {
197 unsigned i, j;
198 const unsigned elem_size = 16; /* sizeof(uint128_t) */
199
200 for (j = 0; j < height; ++j)
201 {
202 for (i = 0; i < width; ++i)
203 {
204 memcpy(dst, src, width * elem_size);
205 dst += dst_pitch * elem_size;
206 src += src_pitch * elem_size;
207 }
208 }
209 }
210
tile_image(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,gl_format format,unsigned width,unsigned height)211 void tile_image(const void * src, unsigned src_pitch,
212 void *dst, unsigned dst_pitch,
213 gl_format format, unsigned width, unsigned height)
214 {
215 assert(src_pitch >= width);
216 assert(dst_pitch >= width);
217
218 radeon_print(RADEON_TEXTURE, RADEON_TRACE,
219 "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
220 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
221
222 switch (_mesa_get_format_bytes(format))
223 {
224 case 16:
225 micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
226 break;
227 case 8:
228 micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
229 break;
230 case 4:
231 micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
232 break;
233 case 2:
234 if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
235 {
236 micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
237 }
238 else
239 {
240 micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
241 }
242 break;
243 case 1:
244 micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
245 break;
246 default:
247 assert(0);
248 break;
249 }
250 }
251
micro_untile_8_x_4_8bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)252 static void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch,
253 void * const dst, unsigned dst_pitch,
254 unsigned width, unsigned height)
255 {
256 unsigned row; /* current destination row */
257 unsigned col; /* current destination column */
258 unsigned k; /* current tile number */
259 const unsigned tile_width = 8, tile_height = 4;
260 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
261
262 assert(src_pitch % tile_width == 0);
263
264 k = 0;
265 for (row = 0; row < height; row += tile_height)
266 {
267 for (col = 0; col < width; col += tile_width, ++k)
268 {
269 uint8_t *src2 = (uint8_t *)src + row * src_pitch +
270 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
271 uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col;
272 unsigned j;
273
274 for (j = 0; j < MIN2(tile_height, height - row); ++j)
275 {
276 unsigned columns = MIN2(tile_width, width - col);
277 memcpy(dst2, src2, columns * sizeof(uint8_t));
278 dst2 += dst_pitch;
279 src2 += tile_width;
280 }
281 }
282 }
283 }
284
micro_untile_8_x_2_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)285 static void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch,
286 void * const dst, unsigned dst_pitch,
287 unsigned width, unsigned height)
288 {
289 unsigned row; /* current destination row */
290 unsigned col; /* current destination column */
291 unsigned k; /* current tile number */
292 const unsigned tile_width = 8, tile_height = 2;
293 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
294
295 assert(src_pitch % tile_width == 0);
296
297 k = 0;
298 for (row = 0; row < height; row += tile_height)
299 {
300 for (col = 0; col < width; col += tile_width, ++k)
301 {
302 uint16_t *src2 = (uint16_t *)src + row * src_pitch +
303 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
304 uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
305 unsigned j;
306
307 for (j = 0; j < MIN2(tile_height, height - row); ++j)
308 {
309 unsigned columns = MIN2(tile_width, width - col);
310 memcpy(dst2, src2, columns * sizeof(uint16_t));
311 dst2 += dst_pitch;
312 src2 += tile_width;
313 }
314 }
315 }
316 }
317
micro_untile_4_x_4_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)318 static void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch,
319 void * const dst, unsigned dst_pitch,
320 unsigned width, unsigned height)
321 {
322 unsigned row; /* current destination row */
323 unsigned col; /* current destination column */
324 unsigned k; /* current tile number */
325 const unsigned tile_width = 4, tile_height = 4;
326 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
327
328 assert(src_pitch % tile_width == 0);
329
330 k = 0;
331 for (row = 0; row < height; row += tile_height)
332 {
333 for (col = 0; col < width; col += tile_width, ++k)
334 {
335 uint16_t *src2 = (uint16_t *)src + row * src_pitch +
336 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
337 uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
338 unsigned j;
339
340 for (j = 0; j < MIN2(tile_height, height - row); ++j)
341 {
342 unsigned columns = MIN2(tile_width, width - col);
343 memcpy(dst2, src2, columns * sizeof(uint16_t));
344 dst2 += dst_pitch;
345 src2 += tile_width;
346 }
347 }
348 }
349 }
350
micro_untile_4_x_2_32bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)351 static void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch,
352 void * const dst, unsigned dst_pitch,
353 unsigned width, unsigned height)
354 {
355 unsigned row; /* current destination row */
356 unsigned col; /* current destination column */
357 unsigned k; /* current tile number */
358 const unsigned tile_width = 4, tile_height = 2;
359 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
360
361 assert(src_pitch % tile_width == 0);
362
363 k = 0;
364 for (row = 0; row < height; row += tile_height)
365 {
366 for (col = 0; col < width; col += tile_width, ++k)
367 {
368 uint32_t *src2 = (uint32_t *)src + row * src_pitch +
369 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
370 uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col;
371 unsigned j;
372
373 for (j = 0; j < MIN2(tile_height, height - row); ++j)
374 {
375 unsigned columns = MIN2(tile_width, width - col);
376 memcpy(dst2, src2, columns * sizeof(uint32_t));
377 dst2 += dst_pitch;
378 src2 += tile_width;
379 }
380 }
381 }
382 }
383
micro_untile_2_x_2_64bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)384 static void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch,
385 void * const dst, unsigned dst_pitch,
386 unsigned width, unsigned height)
387 {
388 unsigned row; /* current destination row */
389 unsigned col; /* current destination column */
390 unsigned k; /* current tile number */
391 const unsigned tile_width = 2, tile_height = 2;
392 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
393
394 assert(src_pitch % tile_width == 0);
395
396 k = 0;
397 for (row = 0; row < height; row += tile_height)
398 {
399 for (col = 0; col < width; col += tile_width, ++k)
400 {
401 uint64_t *src2 = (uint64_t *)src + row * src_pitch +
402 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
403 uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col;
404 unsigned j;
405
406 for (j = 0; j < MIN2(tile_height, height - row); ++j)
407 {
408 unsigned columns = MIN2(tile_width, width - col);
409 memcpy(dst2, src2, columns * sizeof(uint64_t));
410 dst2 += dst_pitch;
411 src2 += tile_width;
412 }
413 }
414 }
415 }
416
micro_untile_1_x_1_128bit(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,unsigned width,unsigned height)417 static void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch,
418 void * dst, unsigned dst_pitch,
419 unsigned width, unsigned height)
420 {
421 unsigned i, j;
422 const unsigned elem_size = 16; /* sizeof(uint128_t) */
423
424 for (j = 0; j < height; ++j)
425 {
426 for (i = 0; i < width; ++i)
427 {
428 memcpy(dst, src, width * elem_size);
429 dst += dst_pitch * elem_size;
430 src += src_pitch * elem_size;
431 }
432 }
433 }
434
untile_image(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,gl_format format,unsigned width,unsigned height)435 void untile_image(const void * src, unsigned src_pitch,
436 void *dst, unsigned dst_pitch,
437 gl_format format, unsigned width, unsigned height)
438 {
439 assert(src_pitch >= width);
440 assert(dst_pitch >= width);
441
442 radeon_print(RADEON_TEXTURE, RADEON_TRACE,
443 "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
444 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
445
446 switch (_mesa_get_format_bytes(format))
447 {
448 case 16:
449 micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
450 break;
451 case 8:
452 micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
453 break;
454 case 4:
455 micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
456 break;
457 case 2:
458 if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
459 {
460 micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
461 }
462 else
463 {
464 micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
465 }
466 break;
467 case 1:
468 micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
469 break;
470 default:
471 assert(0);
472 break;
473 }
474 }
475
get_tile_size(gl_format format,unsigned * block_width,unsigned * block_height)476 void get_tile_size(gl_format format, unsigned *block_width, unsigned *block_height)
477 {
478 switch (_mesa_get_format_bytes(format))
479 {
480 case 16:
481 *block_width = 1;
482 *block_height = 1;
483 break;
484 case 8:
485 *block_width = 2;
486 *block_height = 2;
487 break;
488 case 4:
489 *block_width = 4;
490 *block_height = 2;
491 break;
492 case 2:
493 if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
494 {
495 *block_width = 4;
496 *block_height = 4;
497 }
498 else
499 {
500 *block_width = 8;
501 *block_height = 2;
502 }
503 break;
504 case 1:
505 *block_width = 8;
506 *block_height = 4;
507 break;
508 default:
509 assert(0);
510 break;
511 }
512 }
513