1 /*
2 * Copyright © 2018 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018-2019 Google, Inc.
4 * SPDX-License-Identifier: MIT
5 *
6 * Authors:
7 * Rob Clark <robclark@freedesktop.org>
8 */
9
10 #include <stdio.h>
11
12 #include "freedreno_layout.h"
13
14 #include "adreno_pm4.xml.h"
15 #include "adreno_common.xml.h"
16 #include "a6xx.xml.h"
17
18 static bool
is_r8g8(const struct fdl_layout * layout)19 is_r8g8(const struct fdl_layout *layout)
20 {
21 return layout->cpp == 2 &&
22 util_format_get_nr_components(layout->format) == 2 &&
23 !layout->is_mutable;
24 }
25
26 void
fdl6_get_ubwc_blockwidth(const struct fdl_layout * layout,uint32_t * blockwidth,uint32_t * blockheight)27 fdl6_get_ubwc_blockwidth(const struct fdl_layout *layout,
28 uint32_t *blockwidth, uint32_t *blockheight)
29 {
30 /* UBWC compression for cpp above 32 isn't supported,
31 * and using zero blocksize will effectively disable it.
32 */
33 static const struct {
34 uint8_t width;
35 uint8_t height;
36 } blocksize[] = {
37 { 16, 4 }, /* cpp = 1 */
38 { 16, 4 }, /* cpp = 2 */
39 { 16, 4 }, /* cpp = 4 */
40 { 8, 4 }, /* cpp = 8 */
41 { 4, 4 }, /* cpp = 16 */
42 { 4, 2 }, /* cpp = 32 */
43 { 0, 0 }, /* cpp = 64 */
44 { 0, 0 }, /* cpp = 128 */
45 };
46
47 /* special case for r8g8: */
48 if (is_r8g8(layout)) {
49 *blockwidth = 16;
50 *blockheight = 8;
51 return;
52 }
53
54 if (layout->format == PIPE_FORMAT_Y8_UNORM) {
55 *blockwidth = 32;
56 *blockheight = 8;
57 return;
58 }
59
60 /* special case for 1bpp/2bpp + MSAA (note layout->cpp is already
61 * pre-multiplied by nr_samples):
62 */
63 if ((layout->cpp / layout->nr_samples <= 2) && (layout->nr_samples > 1)) {
64 if (layout->nr_samples == 2) {
65 *blockwidth = 8;
66 *blockheight = 4;
67 } else if (layout->nr_samples == 4) {
68 *blockwidth = 4;
69 *blockheight = 4;
70 } else if (layout->nr_samples == 8) {
71 *blockwidth = 4;
72 *blockheight = 2;
73 } else {
74 unreachable("bad nr_samples");
75 }
76 return;
77 }
78
79 uint32_t cpp = fdl_cpp_shift(layout);
80 assert(cpp < ARRAY_SIZE(blocksize));
81 *blockwidth = blocksize[cpp].width;
82 *blockheight = blocksize[cpp].height;
83 }
84
85 static void
fdl6_tile_alignment(struct fdl_layout * layout,uint32_t * heightalign)86 fdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign)
87 {
88 layout->pitchalign = fdl_cpp_shift(layout);
89 *heightalign = 16;
90
91 if (is_r8g8(layout) || layout->cpp == 1) {
92 layout->pitchalign = 1;
93 *heightalign = 32;
94 } else if (layout->cpp == 2) {
95 layout->pitchalign = 2;
96 }
97
98 /* Empirical evidence suggests that images with UBWC could have much
99 * looser alignment requirements, however the validity of alignment is
100 * heavily undertested and the "officially" supported alignment is 4096b.
101 */
102 if (layout->ubwc || util_format_is_depth_or_stencil(layout->format) ||
103 is_r8g8(layout))
104 layout->base_align = 4096;
105 else if (layout->cpp == 1)
106 layout->base_align = 64;
107 else if (layout->cpp == 2)
108 layout->base_align = 128;
109 else
110 layout->base_align = 256;
111 }
112
113 /* NOTE: good way to test this is: (for example)
114 * piglit/bin/texelFetch fs sampler3D 100x100x8
115 */
116 bool
fdl6_layout(struct fdl_layout * layout,const struct fd_dev_info * info,enum pipe_format format,uint32_t nr_samples,uint32_t width0,uint32_t height0,uint32_t depth0,uint32_t mip_levels,uint32_t array_size,bool is_3d,bool is_mutable,struct fdl_explicit_layout * explicit_layout)117 fdl6_layout(struct fdl_layout *layout, const struct fd_dev_info *info,
118 enum pipe_format format, uint32_t nr_samples, uint32_t width0,
119 uint32_t height0, uint32_t depth0, uint32_t mip_levels,
120 uint32_t array_size, bool is_3d, bool is_mutable,
121 struct fdl_explicit_layout *explicit_layout)
122 {
123 uint32_t offset = 0, heightalign;
124 uint32_t ubwc_blockwidth, ubwc_blockheight;
125
126 assert(nr_samples > 0);
127 layout->width0 = width0;
128 layout->height0 = height0;
129 layout->depth0 = depth0;
130 layout->mip_levels = mip_levels;
131
132 layout->cpp = util_format_get_blocksize(format);
133 layout->cpp *= nr_samples;
134 layout->cpp_shift = ffs(layout->cpp) - 1;
135
136 layout->format = format;
137 layout->nr_samples = nr_samples;
138 layout->layer_first = !is_3d;
139 layout->is_mutable = is_mutable;
140
141 fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight);
142
143 /* For simplicity support UBWC only for 3D images without mipmaps,
144 * most d3d11 games don't use mipmaps for 3D images.
145 */
146 if (depth0 > 1 && mip_levels > 1)
147 layout->ubwc = false;
148
149 if (ubwc_blockwidth == 0)
150 layout->ubwc = false;
151
152 if (width0 < FDL_MIN_UBWC_WIDTH) {
153 layout->ubwc = false;
154 /* Linear D/S is not supported by HW. */
155 if (!util_format_is_depth_or_stencil(format))
156 layout->tile_mode = TILE6_LINEAR;
157 }
158
159 /* Linear D/S is not supported by HW. */
160 if (util_format_is_depth_or_stencil(format))
161 layout->tile_all = true;
162
163 if (layout->ubwc && !info->a6xx.has_ubwc_linear_mipmap_fallback)
164 layout->tile_all = true;
165
166 /* in layer_first layout, the level (slice) contains just one
167 * layer (since in fact the layer contains the slices)
168 */
169 uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
170
171 /* note: for tiled+noubwc layouts, we can use a lower pitchalign
172 * which will affect the linear levels only, (the hardware will still
173 * expect the tiled alignment on the tiled levels)
174 */
175 if (layout->tile_mode) {
176 fdl6_tile_alignment(layout, &heightalign);
177 } else {
178 layout->base_align = 64;
179 layout->pitchalign = 0;
180 /* align pitch to at least 16 pixels:
181 * both turnip and galium assume there is enough alignment for 16x4
182 * aligned gmem store. turnip can use CP_BLIT to work without this
183 * extra alignment, but gallium driver doesn't implement it yet
184 */
185 if (layout->cpp > 4)
186 layout->pitchalign = fdl_cpp_shift(layout) - 2;
187
188 /* when possible, use a bit more alignment than necessary
189 * presumably this is better for performance?
190 */
191 if (!explicit_layout)
192 layout->pitchalign = fdl_cpp_shift(layout);
193
194 /* not used, avoid "may be used uninitialized" warning */
195 heightalign = 1;
196 }
197
198 fdl_set_pitchalign(layout, layout->pitchalign + 6);
199
200 if (explicit_layout) {
201 offset = explicit_layout->offset;
202 layout->pitch0 = explicit_layout->pitch;
203 if (align(layout->pitch0, 1 << layout->pitchalign) != layout->pitch0)
204 return false;
205 }
206
207 uint32_t ubwc_width0 = width0;
208 uint32_t ubwc_height0 = height0;
209 uint32_t ubwc_tile_height_alignment = RGB_TILE_HEIGHT_ALIGNMENT;
210 if (mip_levels > 1) {
211 /* With mipmapping enabled, UBWC layout is power-of-two sized,
212 * specified in log2 width/height in the descriptors. The height
213 * alignment is 64 for mipmapping, but for buffer sharing (always
214 * single level) other participants expect 16.
215 */
216 ubwc_width0 = util_next_power_of_two(width0);
217 ubwc_height0 = util_next_power_of_two(height0);
218 ubwc_tile_height_alignment = 64;
219 }
220 layout->ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth),
221 RGB_TILE_WIDTH_ALIGNMENT);
222 ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight),
223 ubwc_tile_height_alignment);
224
225 uint32_t min_3d_layer_size = 0;
226
227 for (uint32_t level = 0; level < mip_levels; level++) {
228 uint32_t depth = u_minify(depth0, level);
229 struct fdl_slice *slice = &layout->slices[level];
230 struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
231 enum a6xx_tile_mode tile_mode = fdl_tile_mode(layout, level);
232 uint32_t pitch = fdl_pitch(layout, level);
233 uint32_t height = u_minify(height0, level);
234
235 uint32_t nblocksy = util_format_get_nblocksy(format, height);
236 if (tile_mode)
237 nblocksy = align(nblocksy, heightalign);
238
239 /* The blits used for mem<->gmem work at a granularity of
240 * 16x4, which can cause faults due to over-fetch on the
241 * last level. The simple solution is to over-allocate a
242 * bit the last level to ensure any over-fetch is harmless.
243 * The pitch is already sufficiently aligned, but height
244 * may not be. note this only matters if last level is linear
245 */
246 if (level == mip_levels - 1)
247 nblocksy = align(nblocksy, 4);
248
249 slice->offset = offset + layout->size;
250
251 /* 1d array and 2d array textures must all have the same layer size for
252 * each miplevel on a6xx. For 3D, the layer size automatically reduces
253 * until the value we specify in TEX_CONST_3_MIN_LAYERSZ, which is used to
254 * make sure that we follow alignment requirements after minification.
255 */
256 if (is_3d) {
257 if (level == 0) {
258 slice->size0 = align(nblocksy * pitch, 4096);
259 } else if (min_3d_layer_size) {
260 slice->size0 = min_3d_layer_size;
261 } else {
262 /* Note: level * 2 for minifying in both X and Y. */
263 slice->size0 = u_minify(layout->slices[0].size0, level * 2);
264
265 /* If this level didn't reduce the pitch by half, then fix it up,
266 * and this is the end of layer size reduction.
267 */
268 uint32_t pitch = fdl_pitch(layout, level);
269 if (pitch != fdl_pitch(layout, level - 1) / 2)
270 min_3d_layer_size = slice->size0 = nblocksy * pitch;
271
272 /* If the height wouldn't be aligned, stay aligned instead */
273 if (slice->size0 < nblocksy * pitch)
274 min_3d_layer_size = slice->size0 = nblocksy * pitch;
275
276 /* If the size would become un-page-aligned, stay aligned instead. */
277 if (align(slice->size0, 4096) != slice->size0)
278 min_3d_layer_size = slice->size0 = align(slice->size0, 4096);
279 }
280 } else {
281 slice->size0 = nblocksy * pitch;
282 }
283
284 layout->size += slice->size0 * depth * layers_in_level;
285
286 if (layout->ubwc && tile_mode != TILE6_LINEAR) {
287 /* with UBWC every level is aligned to 4K */
288 layout->size = align64(layout->size, 4096);
289
290 uint32_t meta_pitch = fdl_ubwc_pitch(layout, level);
291 uint32_t meta_height =
292 align(u_minify(ubwc_height0, level), ubwc_tile_height_alignment);
293
294 ubwc_slice->size0 =
295 align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
296 ubwc_slice->offset = offset + layout->ubwc_layer_size;
297 layout->ubwc_layer_size += ubwc_slice->size0;
298 }
299 }
300
301 if (layout->layer_first) {
302 layout->layer_size = align64(layout->size, 4096);
303 layout->size = layout->layer_size * array_size;
304 }
305
306 /* Place the UBWC slices before the uncompressed slices, because the
307 * kernel expects UBWC to be at the start of the buffer. In the HW, we
308 * get to program the UBWC and non-UBWC offset/strides
309 * independently.
310 */
311 if (layout->ubwc) {
312 assert(!(depth0 > 1 && mip_levels > 1));
313 for (uint32_t level = 0; level < mip_levels; level++)
314 layout->slices[level].offset += layout->ubwc_layer_size * array_size * depth0;
315 layout->size += layout->ubwc_layer_size * array_size * depth0;
316 }
317
318 /* include explicit offset in size */
319 layout->size += offset;
320
321 return true;
322 }
323