• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018-2019 Google, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *    Rob Clark <robclark@freedesktop.org>
26  */
27 
28 #include <stdio.h>
29 
30 #include "freedreno_layout.h"
31 
32 static bool
is_r8g8(struct fdl_layout * layout)33 is_r8g8(struct fdl_layout *layout)
34 {
35 	return layout->cpp == 2 &&
36 		   util_format_get_nr_components(layout->format) == 2;
37 }
38 
39 void
fdl6_get_ubwc_blockwidth(struct fdl_layout * layout,uint32_t * blockwidth,uint32_t * blockheight)40 fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
41 		uint32_t *blockwidth, uint32_t *blockheight)
42 {
43 	static const struct {
44 		uint8_t width;
45 		uint8_t height;
46 	} blocksize[] = {
47 		{ 16, 4 }, /* cpp = 1 */
48 		{ 16, 4 }, /* cpp = 2 */
49 		{ 16, 4 }, /* cpp = 4 */
50 		{ 8, 4, }, /* cpp = 8 */
51 		{ 4, 4, }, /* cpp = 16 */
52 		{ 4, 2 },  /* cpp = 32 */
53 		{ 0, 0 },  /* cpp = 64 (TODO) */
54 	};
55 
56 	/* special case for r8g8: */
57 	if (is_r8g8(layout)) {
58 		*blockwidth = 16;
59 		*blockheight = 8;
60 		return;
61 	}
62 
63 	uint32_t cpp = fdl_cpp_shift(layout);
64 	assert(cpp < ARRAY_SIZE(blocksize));
65 	*blockwidth = blocksize[cpp].width;
66 	*blockheight = blocksize[cpp].height;
67 }
68 
69 static void
fdl6_tile_alignment(struct fdl_layout * layout,uint32_t * heightalign)70 fdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign)
71 {
72 	layout->pitchalign = fdl_cpp_shift(layout);
73 	*heightalign = 16;
74 
75 	if (is_r8g8(layout) || layout->cpp == 1) {
76 		layout->pitchalign = 1;
77 		*heightalign = 32;
78 	} else if (layout->cpp == 2) {
79 		layout->pitchalign = 2;
80 	}
81 
82 	/* note: this base_align is *probably* not always right,
83 	 * it doesn't really get tested. for example with UBWC we might
84 	 * want 4k alignment, since we align UBWC levels to 4k
85 	 */
86 	if (layout->cpp == 1)
87 		layout->base_align = 64;
88 	else if (layout->cpp == 2)
89 		layout->base_align = 128;
90 	else
91 		layout->base_align = 256;
92 }
93 
94 /* NOTE: good way to test this is:  (for example)
95  *  piglit/bin/texelFetch fs sampler3D 100x100x8
96  */
97 bool
fdl6_layout(struct fdl_layout * layout,enum pipe_format format,uint32_t nr_samples,uint32_t width0,uint32_t height0,uint32_t depth0,uint32_t mip_levels,uint32_t array_size,bool is_3d,struct fdl_explicit_layout * explicit_layout)98 fdl6_layout(struct fdl_layout *layout,
99 		enum pipe_format format, uint32_t nr_samples,
100 		uint32_t width0, uint32_t height0, uint32_t depth0,
101 		uint32_t mip_levels, uint32_t array_size, bool is_3d,
102 		struct fdl_explicit_layout *explicit_layout)
103 {
104 	uint32_t offset = 0, heightalign;
105 	uint32_t ubwc_blockwidth, ubwc_blockheight;
106 
107 	assert(nr_samples > 0);
108 	layout->width0 = width0;
109 	layout->height0 = height0;
110 	layout->depth0 = depth0;
111 
112 	layout->cpp = util_format_get_blocksize(format);
113 	layout->cpp *= nr_samples;
114 	layout->cpp_shift = ffs(layout->cpp) - 1;
115 
116 	layout->format = format;
117 	layout->nr_samples = nr_samples;
118 	layout->layer_first = !is_3d;
119 
120 	fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight);
121 
122 	if (depth0 > 1 || ubwc_blockwidth == 0)
123 		layout->ubwc = false;
124 
125 	if (layout->ubwc || util_format_is_depth_or_stencil(format))
126 		layout->tile_all = true;
127 
128 	/* in layer_first layout, the level (slice) contains just one
129 	 * layer (since in fact the layer contains the slices)
130 	 */
131 	uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
132 
133 	/* note: for tiled+noubwc layouts, we can use a lower pitchalign
134 	 * which will affect the linear levels only, (the hardware will still
135 	 * expect the tiled alignment on the tiled levels)
136 	 */
137 	if (layout->tile_mode) {
138 		fdl6_tile_alignment(layout, &heightalign);
139 	} else {
140 		layout->base_align = 64;
141 		layout->pitchalign = 0;
142 		/* align pitch to at least 16 pixels:
143 		 * both turnip and galium assume there is enough alignment for 16x4
144 		 * aligned gmem store. turnip can use CP_BLIT to work without this
145 		 * extra alignment, but gallium driver doesn't implement it yet
146 		 */
147 		if (layout->cpp > 4)
148 			layout->pitchalign = fdl_cpp_shift(layout) - 2;
149 
150 		/* when possible, use a bit more alignment than necessary
151 		 * presumably this is better for performance?
152 		 */
153 		if (!explicit_layout)
154 			layout->pitchalign = fdl_cpp_shift(layout);
155 
156 		/* not used, avoid "may be used uninitialized" warning */
157 		heightalign = 1;
158 	}
159 
160 	fdl_set_pitchalign(layout, layout->pitchalign + 6);
161 
162 	if (explicit_layout) {
163 		offset = explicit_layout->offset;
164 		layout->pitch0 = explicit_layout->pitch;
165 		if (align(layout->pitch0, 1 << layout->pitchalign) != layout->pitch0)
166 			return false;
167 	}
168 
169 	uint32_t ubwc_width0 = width0;
170 	uint32_t ubwc_height0 = height0;
171 	uint32_t ubwc_tile_height_alignment = RGB_TILE_HEIGHT_ALIGNMENT;
172 	if (mip_levels > 1) {
173 		/* With mipmapping enabled, UBWC layout is power-of-two sized,
174 		 * specified in log2 width/height in the descriptors.  The height
175 		 * alignment is 64 for mipmapping, but for buffer sharing (always
176 		 * single level) other participants expect 16.
177 		 */
178 		ubwc_width0 = util_next_power_of_two(width0);
179 		ubwc_height0 = util_next_power_of_two(height0);
180 		ubwc_tile_height_alignment = 64;
181 	}
182 	layout->ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth),
183 								RGB_TILE_WIDTH_ALIGNMENT);
184 	ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight),
185 			ubwc_tile_height_alignment);
186 
187 	for (uint32_t level = 0; level < mip_levels; level++) {
188 		uint32_t depth = u_minify(depth0, level);
189 		struct fdl_slice *slice = &layout->slices[level];
190 		struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
191 		uint32_t tile_mode = fdl_tile_mode(layout, level);
192 		uint32_t pitch = fdl_pitch(layout, level);
193 		uint32_t height;
194 
195 		/* tiled levels of 3D textures are rounded up to PoT dimensions: */
196 		if (is_3d && tile_mode) {
197 			height = u_minify(util_next_power_of_two(height0), level);
198 		} else {
199 			height = u_minify(height0, level);
200 		}
201 
202 		uint32_t nblocksy = util_format_get_nblocksy(format, height);
203 		if (tile_mode)
204 			nblocksy = align(nblocksy, heightalign);
205 
206 		/* The blits used for mem<->gmem work at a granularity of
207 		 * 16x4, which can cause faults due to over-fetch on the
208 		 * last level.  The simple solution is to over-allocate a
209 		 * bit the last level to ensure any over-fetch is harmless.
210 		 * The pitch is already sufficiently aligned, but height
211 		 * may not be. note this only matters if last level is linear
212 		 */
213 		if (level == mip_levels - 1)
214 			nblocksy = align(nblocksy, 4);
215 
216 		slice->offset = offset + layout->size;
217 
218 		/* 1d array and 2d array textures must all have the same layer size
219 		 * for each miplevel on a6xx. 3d textures can have different layer
220 		 * sizes for high levels, but the hw auto-sizer is buggy (or at least
221 		 * different than what this code does), so as soon as the layer size
222 		 * range gets into range, we stop reducing it.
223 		 */
224 		if (is_3d) {
225 			if (level < 1 || layout->slices[level - 1].size0 > 0xf000) {
226 				slice->size0 = align(nblocksy * pitch, 4096);
227 			} else {
228 				slice->size0 = layout->slices[level - 1].size0;
229 			}
230 		} else {
231 			slice->size0 = nblocksy * pitch;
232 		}
233 
234 		layout->size += slice->size0 * depth * layers_in_level;
235 
236 		if (layout->ubwc) {
237 			/* with UBWC every level is aligned to 4K */
238 			layout->size = align(layout->size, 4096);
239 
240 			uint32_t meta_pitch = fdl_ubwc_pitch(layout, level);
241 			uint32_t meta_height = align(u_minify(ubwc_height0, level),
242 					ubwc_tile_height_alignment);
243 
244 			ubwc_slice->size0 = align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
245 			ubwc_slice->offset = offset + layout->ubwc_layer_size;
246 			layout->ubwc_layer_size += ubwc_slice->size0;
247 		}
248 	}
249 
250 	if (layout->layer_first) {
251 		layout->layer_size = align(layout->size, 4096);
252 		layout->size = layout->layer_size * array_size;
253 	}
254 
255 	/* Place the UBWC slices before the uncompressed slices, because the
256 	 * kernel expects UBWC to be at the start of the buffer.  In the HW, we
257 	 * get to program the UBWC and non-UBWC offset/strides
258 	 * independently.
259 	 */
260 	if (layout->ubwc) {
261 		for (uint32_t level = 0; level < mip_levels; level++)
262 			layout->slices[level].offset += layout->ubwc_layer_size * array_size;
263 		layout->size += layout->ubwc_layer_size * array_size;
264 	}
265 
266 	/* include explicit offset in size */
267 	layout->size += offset;
268 
269 	return true;
270 }
271