• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018-2019 Google, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *    Rob Clark <robclark@freedesktop.org>
26  */
27 
28 #include <stdio.h>
29 
30 #include "freedreno_layout.h"
31 
32 static bool
is_r8g8(const struct fdl_layout * layout)33 is_r8g8(const struct fdl_layout *layout)
34 {
35    return layout->cpp == 2 &&
36           util_format_get_nr_components(layout->format) == 2;
37 }
38 
39 void
fdl6_get_ubwc_blockwidth(const struct fdl_layout * layout,uint32_t * blockwidth,uint32_t * blockheight)40 fdl6_get_ubwc_blockwidth(const struct fdl_layout *layout,
41                          uint32_t *blockwidth, uint32_t *blockheight)
42 {
43    static const struct {
44       uint8_t width;
45       uint8_t height;
46    } blocksize[] = {
47       { 16, 4 }, /* cpp = 1 */
48       { 16, 4 }, /* cpp = 2 */
49       { 16, 4 }, /* cpp = 4 */
50       {  8, 4 }, /* cpp = 8 */
51       {  4, 4 }, /* cpp = 16 */
52       {  4, 2 }, /* cpp = 32 */
53       {  0, 0 }, /* cpp = 64 (TODO) */
54    };
55 
56    /* special case for r8g8: */
57    if (is_r8g8(layout)) {
58       *blockwidth = 16;
59       *blockheight = 8;
60       return;
61    }
62 
63    if (layout->format == PIPE_FORMAT_Y8_UNORM) {
64       *blockwidth = 32;
65       *blockheight = 8;
66       return;
67    }
68 
69    /* special case for 2bpp + MSAA (not layout->cpp is already
70     * pre-multiplied by nr_samples):
71     */
72    if ((layout->cpp / layout->nr_samples == 2) && (layout->nr_samples > 1)) {
73       if (layout->nr_samples == 2) {
74          *blockwidth = 8;
75          *blockheight = 4;
76       } else if (layout->nr_samples == 4) {
77          *blockwidth = 4;
78          *blockheight = 4;
79       } else {
80          unreachable("bad nr_samples");
81       }
82       return;
83    }
84 
85    uint32_t cpp = fdl_cpp_shift(layout);
86    assert(cpp < ARRAY_SIZE(blocksize));
87    *blockwidth = blocksize[cpp].width;
88    *blockheight = blocksize[cpp].height;
89 }
90 
91 static void
fdl6_tile_alignment(struct fdl_layout * layout,uint32_t * heightalign)92 fdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign)
93 {
94    layout->pitchalign = fdl_cpp_shift(layout);
95    *heightalign = 16;
96 
97    if (is_r8g8(layout) || layout->cpp == 1) {
98       layout->pitchalign = 1;
99       *heightalign = 32;
100    } else if (layout->cpp == 2) {
101       layout->pitchalign = 2;
102    }
103 
104    /* Empirical evidence suggests that images with UBWC could have much
105     * looser alignment requirements, however the validity of alignment is
106     * heavily undertested and the "officially" supported alignment is 4096b.
107     */
108    if (layout->ubwc || util_format_is_depth_or_stencil(layout->format))
109       layout->base_align = 4096;
110    else if (layout->cpp == 1)
111       layout->base_align = 64;
112    else if (layout->cpp == 2)
113       layout->base_align = 128;
114    else
115       layout->base_align = 256;
116 }
117 
118 /* NOTE: good way to test this is:  (for example)
119  *  piglit/bin/texelFetch fs sampler3D 100x100x8
120  */
121 bool
fdl6_layout(struct fdl_layout * layout,enum pipe_format format,uint32_t nr_samples,uint32_t width0,uint32_t height0,uint32_t depth0,uint32_t mip_levels,uint32_t array_size,bool is_3d,struct fdl_explicit_layout * explicit_layout)122 fdl6_layout(struct fdl_layout *layout, enum pipe_format format,
123             uint32_t nr_samples, uint32_t width0, uint32_t height0,
124             uint32_t depth0, uint32_t mip_levels, uint32_t array_size,
125             bool is_3d, struct fdl_explicit_layout *explicit_layout)
126 {
127    uint32_t offset = 0, heightalign;
128    uint32_t ubwc_blockwidth, ubwc_blockheight;
129 
130    assert(nr_samples > 0);
131    layout->width0 = width0;
132    layout->height0 = height0;
133    layout->depth0 = depth0;
134    layout->mip_levels = mip_levels;
135 
136    layout->cpp = util_format_get_blocksize(format);
137    layout->cpp *= nr_samples;
138    layout->cpp_shift = ffs(layout->cpp) - 1;
139 
140    layout->format = format;
141    layout->nr_samples = nr_samples;
142    layout->layer_first = !is_3d;
143 
144    fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight);
145 
146    if (depth0 > 1 || ubwc_blockwidth == 0)
147       layout->ubwc = false;
148 
149    if (layout->ubwc || util_format_is_depth_or_stencil(format))
150       layout->tile_all = true;
151 
152    /* in layer_first layout, the level (slice) contains just one
153     * layer (since in fact the layer contains the slices)
154     */
155    uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
156 
157    /* note: for tiled+noubwc layouts, we can use a lower pitchalign
158     * which will affect the linear levels only, (the hardware will still
159     * expect the tiled alignment on the tiled levels)
160     */
161    if (layout->tile_mode) {
162       fdl6_tile_alignment(layout, &heightalign);
163    } else {
164       layout->base_align = 64;
165       layout->pitchalign = 0;
166       /* align pitch to at least 16 pixels:
167        * both turnip and galium assume there is enough alignment for 16x4
168        * aligned gmem store. turnip can use CP_BLIT to work without this
169        * extra alignment, but gallium driver doesn't implement it yet
170        */
171       if (layout->cpp > 4)
172          layout->pitchalign = fdl_cpp_shift(layout) - 2;
173 
174       /* when possible, use a bit more alignment than necessary
175        * presumably this is better for performance?
176        */
177       if (!explicit_layout)
178          layout->pitchalign = fdl_cpp_shift(layout);
179 
180       /* not used, avoid "may be used uninitialized" warning */
181       heightalign = 1;
182    }
183 
184    fdl_set_pitchalign(layout, layout->pitchalign + 6);
185 
186    if (explicit_layout) {
187       offset = explicit_layout->offset;
188       layout->pitch0 = explicit_layout->pitch;
189       if (align(layout->pitch0, 1 << layout->pitchalign) != layout->pitch0)
190          return false;
191    }
192 
193    uint32_t ubwc_width0 = width0;
194    uint32_t ubwc_height0 = height0;
195    uint32_t ubwc_tile_height_alignment = RGB_TILE_HEIGHT_ALIGNMENT;
196    if (mip_levels > 1) {
197       /* With mipmapping enabled, UBWC layout is power-of-two sized,
198        * specified in log2 width/height in the descriptors.  The height
199        * alignment is 64 for mipmapping, but for buffer sharing (always
200        * single level) other participants expect 16.
201        */
202       ubwc_width0 = util_next_power_of_two(width0);
203       ubwc_height0 = util_next_power_of_two(height0);
204       ubwc_tile_height_alignment = 64;
205    }
206    layout->ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth),
207                                RGB_TILE_WIDTH_ALIGNMENT);
208    ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight),
209                         ubwc_tile_height_alignment);
210 
211    uint32_t min_3d_layer_size = 0;
212 
213    for (uint32_t level = 0; level < mip_levels; level++) {
214       uint32_t depth = u_minify(depth0, level);
215       struct fdl_slice *slice = &layout->slices[level];
216       struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
217       uint32_t tile_mode = fdl_tile_mode(layout, level);
218       uint32_t pitch = fdl_pitch(layout, level);
219       uint32_t height = u_minify(height0, level);
220 
221       uint32_t nblocksy = util_format_get_nblocksy(format, height);
222       if (tile_mode)
223          nblocksy = align(nblocksy, heightalign);
224 
225       /* The blits used for mem<->gmem work at a granularity of
226        * 16x4, which can cause faults due to over-fetch on the
227        * last level.  The simple solution is to over-allocate a
228        * bit the last level to ensure any over-fetch is harmless.
229        * The pitch is already sufficiently aligned, but height
230        * may not be. note this only matters if last level is linear
231        */
232       if (level == mip_levels - 1)
233          nblocksy = align(nblocksy, 4);
234 
235       slice->offset = offset + layout->size;
236 
237       /* 1d array and 2d array textures must all have the same layer size for
238        * each miplevel on a6xx.  For 3D, the layer size automatically reduces
239        * until the value we specify in TEX_CONST_3_MIN_LAYERSZ, which is used to
240        * make sure that we follow alignment requirements after minification.
241        */
242       if (is_3d) {
243          if (level == 0) {
244             slice->size0 = align(nblocksy * pitch, 4096);
245          } else if (min_3d_layer_size) {
246             slice->size0 = min_3d_layer_size;
247          } else {
248             /* Note: level * 2 for minifying in both X and Y. */
249             slice->size0 = u_minify(layout->slices[0].size0, level * 2);
250 
251             /* If this level didn't reduce the pitch by half, then fix it up,
252              * and this is the end of layer size reduction.
253              */
254             uint32_t pitch = fdl_pitch(layout, level);
255             if (pitch != fdl_pitch(layout, level - 1) / 2)
256                min_3d_layer_size = slice->size0 = nblocksy * pitch;
257 
258             /* If the height is now less than the alignment requirement, then
259              * scale it up and let this be the minimum layer size.
260              */
261             if (tile_mode && util_format_get_nblocksy(format, height) < heightalign)
262                min_3d_layer_size = slice->size0 = nblocksy * pitch;
263 
264             /* If the size would become un-page-aligned, stay aligned instead. */
265             if (align(slice->size0, 4096) != slice->size0)
266                min_3d_layer_size = slice->size0 = align(slice->size0, 4096);
267          }
268       } else {
269          slice->size0 = nblocksy * pitch;
270       }
271 
272       layout->size += slice->size0 * depth * layers_in_level;
273 
274       if (layout->ubwc) {
275          /* with UBWC every level is aligned to 4K */
276          layout->size = align(layout->size, 4096);
277 
278          uint32_t meta_pitch = fdl_ubwc_pitch(layout, level);
279          uint32_t meta_height =
280             align(u_minify(ubwc_height0, level), ubwc_tile_height_alignment);
281 
282          ubwc_slice->size0 =
283             align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
284          ubwc_slice->offset = offset + layout->ubwc_layer_size;
285          layout->ubwc_layer_size += ubwc_slice->size0;
286       }
287    }
288 
289    if (layout->layer_first) {
290       layout->layer_size = align(layout->size, 4096);
291       layout->size = layout->layer_size * array_size;
292    }
293 
294    /* Place the UBWC slices before the uncompressed slices, because the
295     * kernel expects UBWC to be at the start of the buffer.  In the HW, we
296     * get to program the UBWC and non-UBWC offset/strides
297     * independently.
298     */
299    if (layout->ubwc) {
300       for (uint32_t level = 0; level < mip_levels; level++)
301          layout->slices[level].offset += layout->ubwc_layer_size * array_size;
302       layout->size += layout->ubwc_layer_size * array_size;
303    }
304 
305    /* include explicit offset in size */
306    layout->size += offset;
307 
308    return true;
309 }
310