• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018-2019 Google, Inc.
4  * SPDX-License-Identifier: MIT
5  *
6  * Authors:
7  *    Rob Clark <robclark@freedesktop.org>
8  */
9 
10 #include <stdio.h>
11 
12 #include "freedreno_layout.h"
13 
14 #include "adreno_pm4.xml.h"
15 #include "adreno_common.xml.h"
16 #include "a6xx.xml.h"
17 
18 static bool
is_r8g8(const struct fdl_layout * layout)19 is_r8g8(const struct fdl_layout *layout)
20 {
21    return layout->cpp == 2 &&
22           util_format_get_nr_components(layout->format) == 2 &&
23           !layout->is_mutable;
24 }
25 
26 void
fdl6_get_ubwc_blockwidth(const struct fdl_layout * layout,uint32_t * blockwidth,uint32_t * blockheight)27 fdl6_get_ubwc_blockwidth(const struct fdl_layout *layout,
28                          uint32_t *blockwidth, uint32_t *blockheight)
29 {
30    /* UBWC compression for cpp above 32 isn't supported,
31     * and using zero blocksize will effectively disable it.
32     */
33    static const struct {
34       uint8_t width;
35       uint8_t height;
36    } blocksize[] = {
37       { 16, 4 }, /* cpp = 1 */
38       { 16, 4 }, /* cpp = 2 */
39       { 16, 4 }, /* cpp = 4 */
40       {  8, 4 }, /* cpp = 8 */
41       {  4, 4 }, /* cpp = 16 */
42       {  4, 2 }, /* cpp = 32 */
43       {  0, 0 }, /* cpp = 64 */
44       {  0, 0 }, /* cpp = 128 */
45    };
46 
47    /* special case for r8g8: */
48    if (is_r8g8(layout)) {
49       *blockwidth = 16;
50       *blockheight = 8;
51       return;
52    }
53 
54    if (layout->format == PIPE_FORMAT_Y8_UNORM) {
55       *blockwidth = 32;
56       *blockheight = 8;
57       return;
58    }
59 
60    /* special case for 1bpp/2bpp + MSAA (note layout->cpp is already
61     * pre-multiplied by nr_samples):
62     */
63    if ((layout->cpp / layout->nr_samples <= 2) && (layout->nr_samples > 1)) {
64       if (layout->nr_samples == 2) {
65          *blockwidth = 8;
66          *blockheight = 4;
67       } else if (layout->nr_samples == 4) {
68          *blockwidth = 4;
69          *blockheight = 4;
70       } else if (layout->nr_samples == 8) {
71          *blockwidth = 4;
72          *blockheight = 2;
73       } else {
74          unreachable("bad nr_samples");
75       }
76       return;
77    }
78 
79    uint32_t cpp = fdl_cpp_shift(layout);
80    assert(cpp < ARRAY_SIZE(blocksize));
81    *blockwidth = blocksize[cpp].width;
82    *blockheight = blocksize[cpp].height;
83 }
84 
85 static void
fdl6_tile_alignment(struct fdl_layout * layout,uint32_t * heightalign)86 fdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign)
87 {
88    layout->pitchalign = fdl_cpp_shift(layout);
89    *heightalign = 16;
90 
91    if (is_r8g8(layout) || layout->cpp == 1) {
92       layout->pitchalign = 1;
93       *heightalign = 32;
94    } else if (layout->cpp == 2) {
95       layout->pitchalign = 2;
96    }
97 
98    /* Empirical evidence suggests that images with UBWC could have much
99     * looser alignment requirements, however the validity of alignment is
100     * heavily undertested and the "officially" supported alignment is 4096b.
101     */
102    if (layout->ubwc || util_format_is_depth_or_stencil(layout->format) ||
103        is_r8g8(layout))
104       layout->base_align = 4096;
105    else if (layout->cpp == 1)
106       layout->base_align = 64;
107    else if (layout->cpp == 2)
108       layout->base_align = 128;
109    else
110       layout->base_align = 256;
111 }
112 
113 /* NOTE: good way to test this is:  (for example)
114  *  piglit/bin/texelFetch fs sampler3D 100x100x8
115  */
116 bool
fdl6_layout(struct fdl_layout * layout,const struct fd_dev_info * info,enum pipe_format format,uint32_t nr_samples,uint32_t width0,uint32_t height0,uint32_t depth0,uint32_t mip_levels,uint32_t array_size,bool is_3d,bool is_mutable,struct fdl_explicit_layout * explicit_layout)117 fdl6_layout(struct fdl_layout *layout, const struct fd_dev_info *info,
118             enum pipe_format format, uint32_t nr_samples, uint32_t width0,
119             uint32_t height0, uint32_t depth0, uint32_t mip_levels,
120             uint32_t array_size, bool is_3d, bool is_mutable,
121             struct fdl_explicit_layout *explicit_layout)
122 {
123    uint32_t offset = 0, heightalign;
124    uint32_t ubwc_blockwidth, ubwc_blockheight;
125 
126    assert(nr_samples > 0);
127    layout->width0 = width0;
128    layout->height0 = height0;
129    layout->depth0 = depth0;
130    layout->mip_levels = mip_levels;
131 
132    layout->cpp = util_format_get_blocksize(format);
133    layout->cpp *= nr_samples;
134    layout->cpp_shift = ffs(layout->cpp) - 1;
135 
136    layout->format = format;
137    layout->nr_samples = nr_samples;
138    layout->layer_first = !is_3d;
139    layout->is_mutable = is_mutable;
140 
141    fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight);
142 
143    /* For simplicity support UBWC only for 3D images without mipmaps,
144     * most d3d11 games don't use mipmaps for 3D images.
145     */
146    if (depth0 > 1 && mip_levels > 1)
147       layout->ubwc = false;
148 
149    if (ubwc_blockwidth == 0)
150       layout->ubwc = false;
151 
152    if (width0 < FDL_MIN_UBWC_WIDTH) {
153       layout->ubwc = false;
154       /* Linear D/S is not supported by HW. */
155       if (!util_format_is_depth_or_stencil(format))
156          layout->tile_mode = TILE6_LINEAR;
157    }
158 
159    /* Linear D/S is not supported by HW. */
160    if (util_format_is_depth_or_stencil(format))
161       layout->tile_all = true;
162 
163    if (layout->ubwc && !info->a6xx.has_ubwc_linear_mipmap_fallback)
164       layout->tile_all = true;
165 
166    /* in layer_first layout, the level (slice) contains just one
167     * layer (since in fact the layer contains the slices)
168     */
169    uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
170 
171    /* note: for tiled+noubwc layouts, we can use a lower pitchalign
172     * which will affect the linear levels only, (the hardware will still
173     * expect the tiled alignment on the tiled levels)
174     */
175    if (layout->tile_mode) {
176       fdl6_tile_alignment(layout, &heightalign);
177    } else {
178       layout->base_align = 64;
179       layout->pitchalign = 0;
180       /* align pitch to at least 16 pixels:
181        * both turnip and galium assume there is enough alignment for 16x4
182        * aligned gmem store. turnip can use CP_BLIT to work without this
183        * extra alignment, but gallium driver doesn't implement it yet
184        */
185       if (layout->cpp > 4)
186          layout->pitchalign = fdl_cpp_shift(layout) - 2;
187 
188       /* when possible, use a bit more alignment than necessary
189        * presumably this is better for performance?
190        */
191       if (!explicit_layout)
192          layout->pitchalign = fdl_cpp_shift(layout);
193 
194       /* not used, avoid "may be used uninitialized" warning */
195       heightalign = 1;
196    }
197 
198    fdl_set_pitchalign(layout, layout->pitchalign + 6);
199 
200    if (explicit_layout) {
201       offset = explicit_layout->offset;
202       layout->pitch0 = explicit_layout->pitch;
203       if (align(layout->pitch0, 1 << layout->pitchalign) != layout->pitch0)
204          return false;
205    }
206 
207    uint32_t ubwc_width0 = width0;
208    uint32_t ubwc_height0 = height0;
209    uint32_t ubwc_tile_height_alignment = RGB_TILE_HEIGHT_ALIGNMENT;
210    if (mip_levels > 1) {
211       /* With mipmapping enabled, UBWC layout is power-of-two sized,
212        * specified in log2 width/height in the descriptors.  The height
213        * alignment is 64 for mipmapping, but for buffer sharing (always
214        * single level) other participants expect 16.
215        */
216       ubwc_width0 = util_next_power_of_two(width0);
217       ubwc_height0 = util_next_power_of_two(height0);
218       ubwc_tile_height_alignment = 64;
219    }
220    layout->ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth),
221                                RGB_TILE_WIDTH_ALIGNMENT);
222    ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight),
223                         ubwc_tile_height_alignment);
224 
225    uint32_t min_3d_layer_size = 0;
226 
227    for (uint32_t level = 0; level < mip_levels; level++) {
228       uint32_t depth = u_minify(depth0, level);
229       struct fdl_slice *slice = &layout->slices[level];
230       struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
231       enum a6xx_tile_mode tile_mode = fdl_tile_mode(layout, level);
232       uint32_t pitch = fdl_pitch(layout, level);
233       uint32_t height = u_minify(height0, level);
234 
235       uint32_t nblocksy = util_format_get_nblocksy(format, height);
236       if (tile_mode)
237          nblocksy = align(nblocksy, heightalign);
238 
239       /* The blits used for mem<->gmem work at a granularity of
240        * 16x4, which can cause faults due to over-fetch on the
241        * last level.  The simple solution is to over-allocate a
242        * bit the last level to ensure any over-fetch is harmless.
243        * The pitch is already sufficiently aligned, but height
244        * may not be. note this only matters if last level is linear
245        */
246       if (level == mip_levels - 1)
247          nblocksy = align(nblocksy, 4);
248 
249       slice->offset = offset + layout->size;
250 
251       /* 1d array and 2d array textures must all have the same layer size for
252        * each miplevel on a6xx.  For 3D, the layer size automatically reduces
253        * until the value we specify in TEX_CONST_3_MIN_LAYERSZ, which is used to
254        * make sure that we follow alignment requirements after minification.
255        */
256       if (is_3d) {
257          if (level == 0) {
258             slice->size0 = align(nblocksy * pitch, 4096);
259          } else if (min_3d_layer_size) {
260             slice->size0 = min_3d_layer_size;
261          } else {
262             /* Note: level * 2 for minifying in both X and Y. */
263             slice->size0 = u_minify(layout->slices[0].size0, level * 2);
264 
265             /* If this level didn't reduce the pitch by half, then fix it up,
266              * and this is the end of layer size reduction.
267              */
268             uint32_t pitch = fdl_pitch(layout, level);
269             if (pitch != fdl_pitch(layout, level - 1) / 2)
270                min_3d_layer_size = slice->size0 = nblocksy * pitch;
271 
272             /* If the height wouldn't be aligned, stay aligned instead */
273             if (slice->size0 < nblocksy * pitch)
274                min_3d_layer_size = slice->size0 = nblocksy * pitch;
275 
276             /* If the size would become un-page-aligned, stay aligned instead. */
277             if (align(slice->size0, 4096) != slice->size0)
278                min_3d_layer_size = slice->size0 = align(slice->size0, 4096);
279          }
280       } else {
281          slice->size0 = nblocksy * pitch;
282       }
283 
284       layout->size += slice->size0 * depth * layers_in_level;
285 
286       if (layout->ubwc && tile_mode != TILE6_LINEAR) {
287          /* with UBWC every level is aligned to 4K */
288          layout->size = align64(layout->size, 4096);
289 
290          uint32_t meta_pitch = fdl_ubwc_pitch(layout, level);
291          uint32_t meta_height =
292             align(u_minify(ubwc_height0, level), ubwc_tile_height_alignment);
293 
294          ubwc_slice->size0 =
295             align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
296          ubwc_slice->offset = offset + layout->ubwc_layer_size;
297          layout->ubwc_layer_size += ubwc_slice->size0;
298       }
299    }
300 
301    if (layout->layer_first) {
302       layout->layer_size = align64(layout->size, 4096);
303       layout->size = layout->layer_size * array_size;
304    }
305 
306    /* Place the UBWC slices before the uncompressed slices, because the
307     * kernel expects UBWC to be at the start of the buffer.  In the HW, we
308     * get to program the UBWC and non-UBWC offset/strides
309     * independently.
310     */
311    if (layout->ubwc) {
312       assert(!(depth0 > 1 && mip_levels > 1));
313       for (uint32_t level = 0; level < mip_levels; level++)
314          layout->slices[level].offset += layout->ubwc_layer_size * array_size * depth0;
315       layout->size += layout->ubwc_layer_size * array_size * depth0;
316    }
317 
318    /* include explicit offset in size */
319    layout->size += offset;
320 
321    return true;
322 }
323