• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27 
28 #include "radeon_screen.h"
29 #include "radeon_tile.h"
30 
31 #include <stdint.h>
32 #include <string.h>
33 
34 #include "main/macros.h"
35 #include "radeon_debug.h"
36 
37 #define MICRO_TILE_SIZE 32
38 
micro_tile_8_x_4_8bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)39 static void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch,
40                                   void * const dst, unsigned dst_pitch,
41                                   unsigned width, unsigned height)
42 {
43     unsigned row; /* current source row */
44     unsigned col; /* current source column */
45     unsigned k; /* number of processed tiles */
46     const unsigned tile_width = 8, tile_height = 4;
47     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
48 
49     k = 0;
50     for (row = 0; row < height; row += tile_height)
51     {
52         for (col = 0; col < width; col += tile_width, ++k)
53         {
54             uint8_t *src2 = (uint8_t *)src + src_pitch * row + col;
55             uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch +
56                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
57             unsigned j;
58 
59             for (j = 0; j < MIN2(tile_height, height - row); ++j)
60             {
61                 unsigned columns = MIN2(tile_width, width - col);
62                 memcpy(dst2, src2, columns * sizeof(uint8_t));
63                 dst2 += tile_width;
64                 src2 += src_pitch;
65             }
66         }
67     }
68 }
69 
micro_tile_4_x_4_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)70 static void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch,
71                                    void * const dst, unsigned dst_pitch,
72                                    unsigned width, unsigned height)
73 {
74     unsigned row; /* current source row */
75     unsigned col; /* current source column */
76     unsigned k; /* number of processed tiles */
77     const unsigned tile_width = 4, tile_height = 4;
78     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
79 
80     k = 0;
81     for (row = 0; row < height; row += tile_height)
82     {
83         for (col = 0; col < width; col += tile_width, ++k)
84         {
85             uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
86             uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
87                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
88             unsigned j;
89 
90             for (j = 0; j < MIN2(tile_height, height - row); ++j)
91             {
92                 unsigned columns = MIN2(tile_width, width - col);
93                 memcpy(dst2, src2, columns * sizeof(uint16_t));
94                 dst2 += tile_width;
95                 src2 += src_pitch;
96             }
97         }
98     }
99 }
100 
micro_tile_8_x_2_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)101 static void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch,
102                                    void * const dst, unsigned dst_pitch,
103                                    unsigned width, unsigned height)
104 {
105     unsigned row; /* current source row */
106     unsigned col; /* current source column */
107     unsigned k; /* number of processed tiles */
108     const unsigned tile_width = 8, tile_height = 2;
109     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
110 
111     k = 0;
112     for (row = 0; row < height; row += tile_height)
113     {
114         for (col = 0; col < width; col += tile_width, ++k)
115         {
116             uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
117             uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
118                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
119             unsigned j;
120 
121             for (j = 0; j < MIN2(tile_height, height - row); ++j)
122             {
123                 unsigned columns = MIN2(tile_width, width - col);
124                 memcpy(dst2, src2, columns * sizeof(uint16_t));
125                 dst2 += tile_width;
126                 src2 += src_pitch;
127             }
128         }
129     }
130 }
131 
micro_tile_4_x_2_32bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)132 static void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch,
133                                    void * const dst, unsigned dst_pitch,
134                                    unsigned width, unsigned height)
135 {
136     unsigned row; /* current source row */
137     unsigned col; /* current source column */
138     unsigned k; /* number of processed tiles */
139     const unsigned tile_width = 4, tile_height = 2;
140     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
141 
142     k = 0;
143     for (row = 0; row < height; row += tile_height)
144     {
145         for (col = 0; col < width; col += tile_width, ++k)
146         {
147             uint32_t *src2 = (uint32_t *)src + src_pitch * row + col;
148             uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch +
149                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
150             unsigned j;
151 
152             for (j = 0; j < MIN2(tile_height, height - row); ++j)
153             {
154                 unsigned columns = MIN2(tile_width, width - col);
155                 memcpy(dst2, src2, columns * sizeof(uint32_t));
156                 dst2 += tile_width;
157                 src2 += src_pitch;
158             }
159         }
160     }
161 }
162 
micro_tile_2_x_2_64bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)163 static void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch,
164                                    void * const dst, unsigned dst_pitch,
165                                    unsigned width, unsigned height)
166 {
167     unsigned row; /* current source row */
168     unsigned col; /* current source column */
169     unsigned k; /* number of processed tiles */
170     const unsigned tile_width = 2, tile_height = 2;
171     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
172 
173     k = 0;
174     for (row = 0; row < height; row += tile_height)
175     {
176         for (col = 0; col < width; col += tile_width, ++k)
177         {
178             uint64_t *src2 = (uint64_t *)src + src_pitch * row + col;
179             uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch +
180                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
181             unsigned j;
182 
183             for (j = 0; j < MIN2(tile_height, height - row); ++j)
184             {
185                 unsigned columns = MIN2(tile_width, width - col);
186                 memcpy(dst2, src2, columns * sizeof(uint64_t));
187                 dst2 += tile_width;
188                 src2 += src_pitch;
189             }
190         }
191     }
192 }
193 
micro_tile_1_x_1_128bit(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,unsigned width,unsigned height)194 static void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch,
195                                     void * dst, unsigned dst_pitch,
196                                     unsigned width, unsigned height)
197 {
198     unsigned i, j;
199     const unsigned elem_size = 16; /* sizeof(uint128_t) */
200 
201     for (j = 0; j < height; ++j)
202     {
203         for (i = 0; i < width; ++i)
204         {
205             memcpy(dst, src, width * elem_size);
206             dst += dst_pitch * elem_size;
207             src += src_pitch * elem_size;
208         }
209     }
210 }
211 
tile_image(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,mesa_format format,unsigned width,unsigned height)212 void tile_image(const void * src, unsigned src_pitch,
213                 void *dst, unsigned dst_pitch,
214                 mesa_format format, unsigned width, unsigned height)
215 {
216     assert(src_pitch >= width);
217     assert(dst_pitch >= width);
218 
219     radeon_print(RADEON_TEXTURE, RADEON_TRACE,
220                  "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
221                  src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
222 
223     switch (_mesa_get_format_bytes(format))
224     {
225         case 16:
226             micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
227             break;
228         case 8:
229             micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
230             break;
231         case 4:
232             micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
233             break;
234         case 2:
235             if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
236             {
237                 micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
238             }
239             else
240             {
241                 micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
242             }
243             break;
244         case 1:
245             micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
246             break;
247         default:
248             assert(0);
249             break;
250     }
251 }
252 
micro_untile_8_x_4_8bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)253 static void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch,
254                                     void * const dst, unsigned dst_pitch,
255                                     unsigned width, unsigned height)
256 {
257     unsigned row; /* current destination row */
258     unsigned col; /* current destination column */
259     unsigned k; /* current tile number */
260     const unsigned tile_width = 8, tile_height = 4;
261     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
262 
263     assert(src_pitch % tile_width == 0);
264 
265     k = 0;
266     for (row = 0; row < height; row += tile_height)
267     {
268         for (col = 0; col < width; col += tile_width, ++k)
269         {
270             uint8_t *src2 = (uint8_t *)src + row * src_pitch +
271                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
272             uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col;
273             unsigned j;
274 
275             for (j = 0; j < MIN2(tile_height, height - row); ++j)
276             {
277                 unsigned columns = MIN2(tile_width, width - col);
278                 memcpy(dst2, src2, columns * sizeof(uint8_t));
279                 dst2 += dst_pitch;
280                 src2 += tile_width;
281             }
282         }
283     }
284 }
285 
micro_untile_8_x_2_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)286 static void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch,
287                                      void * const dst, unsigned dst_pitch,
288                                      unsigned width, unsigned height)
289 {
290     unsigned row; /* current destination row */
291     unsigned col; /* current destination column */
292     unsigned k; /* current tile number */
293     const unsigned tile_width = 8, tile_height = 2;
294     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
295 
296     assert(src_pitch % tile_width == 0);
297 
298     k = 0;
299     for (row = 0; row < height; row += tile_height)
300     {
301         for (col = 0; col < width; col += tile_width, ++k)
302         {
303             uint16_t *src2 = (uint16_t *)src + row * src_pitch +
304                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
305             uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
306             unsigned j;
307 
308             for (j = 0; j < MIN2(tile_height, height - row); ++j)
309             {
310                 unsigned columns = MIN2(tile_width, width - col);
311                 memcpy(dst2, src2, columns * sizeof(uint16_t));
312                 dst2 += dst_pitch;
313                 src2 += tile_width;
314             }
315         }
316     }
317 }
318 
micro_untile_4_x_4_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)319 static void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch,
320                                      void * const dst, unsigned dst_pitch,
321                                      unsigned width, unsigned height)
322 {
323     unsigned row; /* current destination row */
324     unsigned col; /* current destination column */
325     unsigned k; /* current tile number */
326     const unsigned tile_width = 4, tile_height = 4;
327     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
328 
329     assert(src_pitch % tile_width == 0);
330 
331     k = 0;
332     for (row = 0; row < height; row += tile_height)
333     {
334         for (col = 0; col < width; col += tile_width, ++k)
335         {
336             uint16_t *src2 = (uint16_t *)src + row * src_pitch +
337                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
338             uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
339             unsigned j;
340 
341             for (j = 0; j < MIN2(tile_height, height - row); ++j)
342             {
343                 unsigned columns = MIN2(tile_width, width - col);
344                 memcpy(dst2, src2, columns * sizeof(uint16_t));
345                 dst2 += dst_pitch;
346                 src2 += tile_width;
347             }
348         }
349     }
350 }
351 
micro_untile_4_x_2_32bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)352 static void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch,
353                                      void * const dst, unsigned dst_pitch,
354                                      unsigned width, unsigned height)
355 {
356     unsigned row; /* current destination row */
357     unsigned col; /* current destination column */
358     unsigned k; /* current tile number */
359     const unsigned tile_width = 4, tile_height = 2;
360     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
361 
362     assert(src_pitch % tile_width == 0);
363 
364     k = 0;
365     for (row = 0; row < height; row += tile_height)
366     {
367         for (col = 0; col < width; col += tile_width, ++k)
368         {
369             uint32_t *src2 = (uint32_t *)src + row * src_pitch +
370                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
371             uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col;
372             unsigned j;
373 
374             for (j = 0; j < MIN2(tile_height, height - row); ++j)
375             {
376                 unsigned columns = MIN2(tile_width, width - col);
377                 memcpy(dst2, src2, columns * sizeof(uint32_t));
378                 dst2 += dst_pitch;
379                 src2 += tile_width;
380             }
381         }
382     }
383 }
384 
micro_untile_2_x_2_64bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)385 static void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch,
386                                      void * const dst, unsigned dst_pitch,
387                                      unsigned width, unsigned height)
388 {
389     unsigned row; /* current destination row */
390     unsigned col; /* current destination column */
391     unsigned k; /* current tile number */
392     const unsigned tile_width = 2, tile_height = 2;
393     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
394 
395     assert(src_pitch % tile_width == 0);
396 
397     k = 0;
398     for (row = 0; row < height; row += tile_height)
399     {
400         for (col = 0; col < width; col += tile_width, ++k)
401         {
402             uint64_t *src2 = (uint64_t *)src + row * src_pitch +
403                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
404             uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col;
405             unsigned j;
406 
407             for (j = 0; j < MIN2(tile_height, height - row); ++j)
408             {
409                 unsigned columns = MIN2(tile_width, width - col);
410                 memcpy(dst2, src2, columns * sizeof(uint64_t));
411                 dst2 += dst_pitch;
412                 src2 += tile_width;
413             }
414         }
415     }
416 }
417 
micro_untile_1_x_1_128bit(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,unsigned width,unsigned height)418 static void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch,
419                                       void * dst, unsigned dst_pitch,
420                                       unsigned width, unsigned height)
421 {
422     unsigned i, j;
423     const unsigned elem_size = 16; /* sizeof(uint128_t) */
424 
425     for (j = 0; j < height; ++j)
426     {
427         for (i = 0; i < width; ++i)
428         {
429             memcpy(dst, src, width * elem_size);
430             dst += dst_pitch * elem_size;
431             src += src_pitch * elem_size;
432         }
433     }
434 }
435 
untile_image(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,mesa_format format,unsigned width,unsigned height)436 void untile_image(const void * src, unsigned src_pitch,
437                   void *dst, unsigned dst_pitch,
438                   mesa_format format, unsigned width, unsigned height)
439 {
440     assert(src_pitch >= width);
441     assert(dst_pitch >= width);
442 
443     radeon_print(RADEON_TEXTURE, RADEON_TRACE,
444                  "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
445                  src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
446 
447     switch (_mesa_get_format_bytes(format))
448     {
449         case 16:
450             micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
451             break;
452         case 8:
453             micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
454             break;
455         case 4:
456             micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
457             break;
458         case 2:
459             if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
460             {
461                 micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
462             }
463             else
464             {
465                 micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
466             }
467             break;
468         case 1:
469             micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
470             break;
471         default:
472             assert(0);
473             break;
474     }
475 }
476 
get_tile_size(mesa_format format,unsigned * block_width,unsigned * block_height)477 void get_tile_size(mesa_format format, unsigned *block_width, unsigned *block_height)
478 {
479     switch (_mesa_get_format_bytes(format))
480     {
481         case 16:
482             *block_width = 1;
483             *block_height = 1;
484             break;
485         case 8:
486             *block_width = 2;
487             *block_height = 2;
488             break;
489         case 4:
490             *block_width = 4;
491             *block_height = 2;
492             break;
493         case 2:
494             if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
495             {
496                 *block_width = 4;
497                 *block_height = 4;
498             }
499             else
500             {
501                 *block_width = 8;
502                 *block_height = 2;
503             }
504             break;
505         case 1:
506             *block_width = 8;
507             *block_height = 4;
508             break;
509         default:
510             assert(0);
511             break;
512     }
513 }
514