• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27 
28 #include "radeon_tile.h"
29 
30 #include <stdint.h>
31 #include <string.h>
32 
33 #include "main/macros.h"
34 #include "radeon_debug.h"
35 
36 #define MICRO_TILE_SIZE 32
37 
micro_tile_8_x_4_8bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)38 static void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch,
39                                   void * const dst, unsigned dst_pitch,
40                                   unsigned width, unsigned height)
41 {
42     unsigned row; /* current source row */
43     unsigned col; /* current source column */
44     unsigned k; /* number of processed tiles */
45     const unsigned tile_width = 8, tile_height = 4;
46     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
47 
48     k = 0;
49     for (row = 0; row < height; row += tile_height)
50     {
51         for (col = 0; col < width; col += tile_width, ++k)
52         {
53             uint8_t *src2 = (uint8_t *)src + src_pitch * row + col;
54             uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch +
55                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
56             unsigned j;
57 
58             for (j = 0; j < MIN2(tile_height, height - row); ++j)
59             {
60                 unsigned columns = MIN2(tile_width, width - col);
61                 memcpy(dst2, src2, columns * sizeof(uint8_t));
62                 dst2 += tile_width;
63                 src2 += src_pitch;
64             }
65         }
66     }
67 }
68 
micro_tile_4_x_4_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)69 static void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch,
70                                    void * const dst, unsigned dst_pitch,
71                                    unsigned width, unsigned height)
72 {
73     unsigned row; /* current source row */
74     unsigned col; /* current source column */
75     unsigned k; /* number of processed tiles */
76     const unsigned tile_width = 4, tile_height = 4;
77     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
78 
79     k = 0;
80     for (row = 0; row < height; row += tile_height)
81     {
82         for (col = 0; col < width; col += tile_width, ++k)
83         {
84             uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
85             uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
86                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
87             unsigned j;
88 
89             for (j = 0; j < MIN2(tile_height, height - row); ++j)
90             {
91                 unsigned columns = MIN2(tile_width, width - col);
92                 memcpy(dst2, src2, columns * sizeof(uint16_t));
93                 dst2 += tile_width;
94                 src2 += src_pitch;
95             }
96         }
97     }
98 }
99 
micro_tile_8_x_2_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)100 static void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch,
101                                    void * const dst, unsigned dst_pitch,
102                                    unsigned width, unsigned height)
103 {
104     unsigned row; /* current source row */
105     unsigned col; /* current source column */
106     unsigned k; /* number of processed tiles */
107     const unsigned tile_width = 8, tile_height = 2;
108     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
109 
110     k = 0;
111     for (row = 0; row < height; row += tile_height)
112     {
113         for (col = 0; col < width; col += tile_width, ++k)
114         {
115             uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
116             uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
117                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
118             unsigned j;
119 
120             for (j = 0; j < MIN2(tile_height, height - row); ++j)
121             {
122                 unsigned columns = MIN2(tile_width, width - col);
123                 memcpy(dst2, src2, columns * sizeof(uint16_t));
124                 dst2 += tile_width;
125                 src2 += src_pitch;
126             }
127         }
128     }
129 }
130 
micro_tile_4_x_2_32bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)131 static void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch,
132                                    void * const dst, unsigned dst_pitch,
133                                    unsigned width, unsigned height)
134 {
135     unsigned row; /* current source row */
136     unsigned col; /* current source column */
137     unsigned k; /* number of processed tiles */
138     const unsigned tile_width = 4, tile_height = 2;
139     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
140 
141     k = 0;
142     for (row = 0; row < height; row += tile_height)
143     {
144         for (col = 0; col < width; col += tile_width, ++k)
145         {
146             uint32_t *src2 = (uint32_t *)src + src_pitch * row + col;
147             uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch +
148                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
149             unsigned j;
150 
151             for (j = 0; j < MIN2(tile_height, height - row); ++j)
152             {
153                 unsigned columns = MIN2(tile_width, width - col);
154                 memcpy(dst2, src2, columns * sizeof(uint32_t));
155                 dst2 += tile_width;
156                 src2 += src_pitch;
157             }
158         }
159     }
160 }
161 
micro_tile_2_x_2_64bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)162 static void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch,
163                                    void * const dst, unsigned dst_pitch,
164                                    unsigned width, unsigned height)
165 {
166     unsigned row; /* current source row */
167     unsigned col; /* current source column */
168     unsigned k; /* number of processed tiles */
169     const unsigned tile_width = 2, tile_height = 2;
170     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
171 
172     k = 0;
173     for (row = 0; row < height; row += tile_height)
174     {
175         for (col = 0; col < width; col += tile_width, ++k)
176         {
177             uint64_t *src2 = (uint64_t *)src + src_pitch * row + col;
178             uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch +
179                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
180             unsigned j;
181 
182             for (j = 0; j < MIN2(tile_height, height - row); ++j)
183             {
184                 unsigned columns = MIN2(tile_width, width - col);
185                 memcpy(dst2, src2, columns * sizeof(uint64_t));
186                 dst2 += tile_width;
187                 src2 += src_pitch;
188             }
189         }
190     }
191 }
192 
micro_tile_1_x_1_128bit(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,unsigned width,unsigned height)193 static void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch,
194                                     void * dst, unsigned dst_pitch,
195                                     unsigned width, unsigned height)
196 {
197     unsigned i, j;
198     const unsigned elem_size = 16; /* sizeof(uint128_t) */
199 
200     for (j = 0; j < height; ++j)
201     {
202         for (i = 0; i < width; ++i)
203         {
204             memcpy(dst, src, width * elem_size);
205             dst += dst_pitch * elem_size;
206             src += src_pitch * elem_size;
207         }
208     }
209 }
210 
tile_image(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,gl_format format,unsigned width,unsigned height)211 void tile_image(const void * src, unsigned src_pitch,
212                 void *dst, unsigned dst_pitch,
213                 gl_format format, unsigned width, unsigned height)
214 {
215     assert(src_pitch >= width);
216     assert(dst_pitch >= width);
217 
218     radeon_print(RADEON_TEXTURE, RADEON_TRACE,
219                  "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
220                  src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
221 
222     switch (_mesa_get_format_bytes(format))
223     {
224         case 16:
225             micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
226             break;
227         case 8:
228             micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
229             break;
230         case 4:
231             micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
232             break;
233         case 2:
234             if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
235             {
236                 micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
237             }
238             else
239             {
240                 micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
241             }
242             break;
243         case 1:
244             micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
245             break;
246         default:
247             assert(0);
248             break;
249     }
250 }
251 
micro_untile_8_x_4_8bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)252 static void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch,
253                                     void * const dst, unsigned dst_pitch,
254                                     unsigned width, unsigned height)
255 {
256     unsigned row; /* current destination row */
257     unsigned col; /* current destination column */
258     unsigned k; /* current tile number */
259     const unsigned tile_width = 8, tile_height = 4;
260     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
261 
262     assert(src_pitch % tile_width == 0);
263 
264     k = 0;
265     for (row = 0; row < height; row += tile_height)
266     {
267         for (col = 0; col < width; col += tile_width, ++k)
268         {
269             uint8_t *src2 = (uint8_t *)src + row * src_pitch +
270                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
271             uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col;
272             unsigned j;
273 
274             for (j = 0; j < MIN2(tile_height, height - row); ++j)
275             {
276                 unsigned columns = MIN2(tile_width, width - col);
277                 memcpy(dst2, src2, columns * sizeof(uint8_t));
278                 dst2 += dst_pitch;
279                 src2 += tile_width;
280             }
281         }
282     }
283 }
284 
micro_untile_8_x_2_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)285 static void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch,
286                                      void * const dst, unsigned dst_pitch,
287                                      unsigned width, unsigned height)
288 {
289     unsigned row; /* current destination row */
290     unsigned col; /* current destination column */
291     unsigned k; /* current tile number */
292     const unsigned tile_width = 8, tile_height = 2;
293     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
294 
295     assert(src_pitch % tile_width == 0);
296 
297     k = 0;
298     for (row = 0; row < height; row += tile_height)
299     {
300         for (col = 0; col < width; col += tile_width, ++k)
301         {
302             uint16_t *src2 = (uint16_t *)src + row * src_pitch +
303                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
304             uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
305             unsigned j;
306 
307             for (j = 0; j < MIN2(tile_height, height - row); ++j)
308             {
309                 unsigned columns = MIN2(tile_width, width - col);
310                 memcpy(dst2, src2, columns * sizeof(uint16_t));
311                 dst2 += dst_pitch;
312                 src2 += tile_width;
313             }
314         }
315     }
316 }
317 
micro_untile_4_x_4_16bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)318 static void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch,
319                                      void * const dst, unsigned dst_pitch,
320                                      unsigned width, unsigned height)
321 {
322     unsigned row; /* current destination row */
323     unsigned col; /* current destination column */
324     unsigned k; /* current tile number */
325     const unsigned tile_width = 4, tile_height = 4;
326     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
327 
328     assert(src_pitch % tile_width == 0);
329 
330     k = 0;
331     for (row = 0; row < height; row += tile_height)
332     {
333         for (col = 0; col < width; col += tile_width, ++k)
334         {
335             uint16_t *src2 = (uint16_t *)src + row * src_pitch +
336                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
337             uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
338             unsigned j;
339 
340             for (j = 0; j < MIN2(tile_height, height - row); ++j)
341             {
342                 unsigned columns = MIN2(tile_width, width - col);
343                 memcpy(dst2, src2, columns * sizeof(uint16_t));
344                 dst2 += dst_pitch;
345                 src2 += tile_width;
346             }
347         }
348     }
349 }
350 
micro_untile_4_x_2_32bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)351 static void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch,
352                                      void * const dst, unsigned dst_pitch,
353                                      unsigned width, unsigned height)
354 {
355     unsigned row; /* current destination row */
356     unsigned col; /* current destination column */
357     unsigned k; /* current tile number */
358     const unsigned tile_width = 4, tile_height = 2;
359     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
360 
361     assert(src_pitch % tile_width == 0);
362 
363     k = 0;
364     for (row = 0; row < height; row += tile_height)
365     {
366         for (col = 0; col < width; col += tile_width, ++k)
367         {
368             uint32_t *src2 = (uint32_t *)src + row * src_pitch +
369                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
370             uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col;
371             unsigned j;
372 
373             for (j = 0; j < MIN2(tile_height, height - row); ++j)
374             {
375                 unsigned columns = MIN2(tile_width, width - col);
376                 memcpy(dst2, src2, columns * sizeof(uint32_t));
377                 dst2 += dst_pitch;
378                 src2 += tile_width;
379             }
380         }
381     }
382 }
383 
micro_untile_2_x_2_64bit(const void * const src,unsigned src_pitch,void * const dst,unsigned dst_pitch,unsigned width,unsigned height)384 static void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch,
385                                      void * const dst, unsigned dst_pitch,
386                                      unsigned width, unsigned height)
387 {
388     unsigned row; /* current destination row */
389     unsigned col; /* current destination column */
390     unsigned k; /* current tile number */
391     const unsigned tile_width = 2, tile_height = 2;
392     const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
393 
394     assert(src_pitch % tile_width == 0);
395 
396     k = 0;
397     for (row = 0; row < height; row += tile_height)
398     {
399         for (col = 0; col < width; col += tile_width, ++k)
400         {
401             uint64_t *src2 = (uint64_t *)src + row * src_pitch +
402                              (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
403             uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col;
404             unsigned j;
405 
406             for (j = 0; j < MIN2(tile_height, height - row); ++j)
407             {
408                 unsigned columns = MIN2(tile_width, width - col);
409                 memcpy(dst2, src2, columns * sizeof(uint64_t));
410                 dst2 += dst_pitch;
411                 src2 += tile_width;
412             }
413         }
414     }
415 }
416 
micro_untile_1_x_1_128bit(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,unsigned width,unsigned height)417 static void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch,
418                                       void * dst, unsigned dst_pitch,
419                                       unsigned width, unsigned height)
420 {
421     unsigned i, j;
422     const unsigned elem_size = 16; /* sizeof(uint128_t) */
423 
424     for (j = 0; j < height; ++j)
425     {
426         for (i = 0; i < width; ++i)
427         {
428             memcpy(dst, src, width * elem_size);
429             dst += dst_pitch * elem_size;
430             src += src_pitch * elem_size;
431         }
432     }
433 }
434 
untile_image(const void * src,unsigned src_pitch,void * dst,unsigned dst_pitch,gl_format format,unsigned width,unsigned height)435 void untile_image(const void * src, unsigned src_pitch,
436                   void *dst, unsigned dst_pitch,
437                   gl_format format, unsigned width, unsigned height)
438 {
439     assert(src_pitch >= width);
440     assert(dst_pitch >= width);
441 
442     radeon_print(RADEON_TEXTURE, RADEON_TRACE,
443                  "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
444                  src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
445 
446     switch (_mesa_get_format_bytes(format))
447     {
448         case 16:
449             micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
450             break;
451         case 8:
452             micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
453             break;
454         case 4:
455             micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
456             break;
457         case 2:
458             if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
459             {
460                 micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
461             }
462             else
463             {
464                 micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
465             }
466             break;
467         case 1:
468             micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
469             break;
470         default:
471             assert(0);
472             break;
473     }
474 }
475 
get_tile_size(gl_format format,unsigned * block_width,unsigned * block_height)476 void get_tile_size(gl_format format, unsigned *block_width, unsigned *block_height)
477 {
478     switch (_mesa_get_format_bytes(format))
479     {
480         case 16:
481             *block_width = 1;
482             *block_height = 1;
483             break;
484         case 8:
485             *block_width = 2;
486             *block_height = 2;
487             break;
488         case 4:
489             *block_width = 4;
490             *block_height = 2;
491             break;
492         case 2:
493             if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
494             {
495                 *block_width = 4;
496                 *block_height = 4;
497             }
498             else
499             {
500                 *block_width = 8;
501                 *block_height = 2;
502             }
503             break;
504         case 1:
505             *block_width = 8;
506             *block_height = 4;
507             break;
508         default:
509             assert(0);
510             break;
511     }
512 }
513