1 /*
2 * Copyright 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27
28 #include "genxml/genX_bits.h"
29
30 #include "isl.h"
31 #include "isl_gfx4.h"
32 #include "isl_gfx6.h"
33 #include "isl_gfx7.h"
34 #include "isl_gfx8.h"
35 #include "isl_gfx9.h"
36 #include "isl_gfx12.h"
37 #include "isl_priv.h"
38
39 void
isl_memcpy_linear_to_tiled(uint32_t xt1,uint32_t xt2,uint32_t yt1,uint32_t yt2,char * dst,const char * src,uint32_t dst_pitch,int32_t src_pitch,bool has_swizzling,enum isl_tiling tiling,isl_memcpy_type copy_type)40 isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2,
41 uint32_t yt1, uint32_t yt2,
42 char *dst, const char *src,
43 uint32_t dst_pitch, int32_t src_pitch,
44 bool has_swizzling,
45 enum isl_tiling tiling,
46 isl_memcpy_type copy_type)
47 {
48 #ifdef USE_SSE41
49 if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
50 _isl_memcpy_linear_to_tiled_sse41(
51 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
52 tiling, copy_type);
53 return;
54 }
55 #endif
56
57 _isl_memcpy_linear_to_tiled(
58 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
59 tiling, copy_type);
60 }
61
62 void
isl_memcpy_tiled_to_linear(uint32_t xt1,uint32_t xt2,uint32_t yt1,uint32_t yt2,char * dst,const char * src,int32_t dst_pitch,uint32_t src_pitch,bool has_swizzling,enum isl_tiling tiling,isl_memcpy_type copy_type)63 isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2,
64 uint32_t yt1, uint32_t yt2,
65 char *dst, const char *src,
66 int32_t dst_pitch, uint32_t src_pitch,
67 bool has_swizzling,
68 enum isl_tiling tiling,
69 isl_memcpy_type copy_type)
70 {
71 #ifdef USE_SSE41
72 if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
73 _isl_memcpy_tiled_to_linear_sse41(
74 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
75 tiling, copy_type);
76 return;
77 }
78 #endif
79
80 _isl_memcpy_tiled_to_linear(
81 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
82 tiling, copy_type);
83 }
84
85 void PRINTFLIKE(3, 4) UNUSED
__isl_finishme(const char * file,int line,const char * fmt,...)86 __isl_finishme(const char *file, int line, const char *fmt, ...)
87 {
88 va_list ap;
89 char buf[512];
90
91 va_start(ap, fmt);
92 vsnprintf(buf, sizeof(buf), fmt, ap);
93 va_end(ap);
94
95 fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
96 }
97
98 static void
isl_device_setup_mocs(struct isl_device * dev)99 isl_device_setup_mocs(struct isl_device *dev)
100 {
101 if (dev->info->ver >= 12) {
102 if (dev->info->is_dg2) {
103 /* L3CC=WB; BSpec: 45101 */
104 dev->mocs.internal = 3 << 1;
105 dev->mocs.external = 3 << 1;
106 } else if (dev->info->is_dg1) {
107 /* L3CC=WB */
108 dev->mocs.internal = 5 << 1;
109 /* Displayables on DG1 are free to cache in L3 since L3 is transient
110 * and flushed at bottom of each submission.
111 */
112 dev->mocs.external = 5 << 1;
113 } else {
114 /* TC=1/LLC Only, LeCC=1/UC, LRUM=0, L3CC=3/WB */
115 dev->mocs.external = 61 << 1;
116 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
117 dev->mocs.internal = 2 << 1;
118
119 /* L1 - HDC:L1 + L3 + LLC */
120 dev->mocs.l1_hdc_l3_llc = 48 << 1;
121 }
122 } else if (dev->info->ver >= 9) {
123 /* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */
124 dev->mocs.external = 1 << 1;
125 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
126 dev->mocs.internal = 2 << 1;
127 } else if (dev->info->ver >= 8) {
128 /* MEMORY_OBJECT_CONTROL_STATE:
129 * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,
130 * .TargetCache = L3DefertoPATforLLCeLLCselection,
131 * .AgeforQUADLRU = 0
132 */
133 dev->mocs.external = 0x18;
134 /* MEMORY_OBJECT_CONTROL_STATE:
135 * .MemoryTypeLLCeLLCCacheabilityControl = WB,
136 * .TargetCache = L3DefertoPATforLLCeLLCselection,
137 * .AgeforQUADLRU = 0
138 */
139 dev->mocs.internal = 0x78;
140 } else if (dev->info->ver >= 7) {
141 if (dev->info->is_haswell) {
142 /* MEMORY_OBJECT_CONTROL_STATE:
143 * .LLCeLLCCacheabilityControlLLCCC = 0,
144 * .L3CacheabilityControlL3CC = 1,
145 */
146 dev->mocs.internal = 1;
147 dev->mocs.external = 1;
148 } else {
149 /* MEMORY_OBJECT_CONTROL_STATE:
150 * .GraphicsDataTypeGFDT = 0,
151 * .LLCCacheabilityControlLLCCC = 0,
152 * .L3CacheabilityControlL3CC = 1,
153 */
154 dev->mocs.internal = 1;
155 dev->mocs.external = 1;
156 }
157 } else {
158 dev->mocs.internal = 0;
159 dev->mocs.external = 0;
160 }
161 }
162
163 /**
164 * Return an appropriate MOCS entry for the given usage flags.
165 */
166 uint32_t
isl_mocs(const struct isl_device * dev,isl_surf_usage_flags_t usage,bool external)167 isl_mocs(const struct isl_device *dev, isl_surf_usage_flags_t usage,
168 bool external)
169 {
170 if (external)
171 return dev->mocs.external;
172
173 if (dev->info->ver >= 12 && !dev->info->is_dg1) {
174 if (usage & ISL_SURF_USAGE_STAGING_BIT)
175 return dev->mocs.internal;
176
177 /* Using L1:HDC for storage buffers breaks Vulkan memory model
178 * tests that use shader atomics. This isn't likely to work out,
179 * and we can't know a priori whether they'll be used. So just
180 * continue with ordinary internal MOCS for now.
181 */
182 if (usage & ISL_SURF_USAGE_STORAGE_BIT)
183 return dev->mocs.internal;
184
185 if (usage & (ISL_SURF_USAGE_CONSTANT_BUFFER_BIT |
186 ISL_SURF_USAGE_RENDER_TARGET_BIT |
187 ISL_SURF_USAGE_TEXTURE_BIT))
188 return dev->mocs.l1_hdc_l3_llc;
189 }
190
191 return dev->mocs.internal;
192 }
193
194 void
isl_device_init(struct isl_device * dev,const struct intel_device_info * info,bool has_bit6_swizzling)195 isl_device_init(struct isl_device *dev,
196 const struct intel_device_info *info,
197 bool has_bit6_swizzling)
198 {
199 /* Gfx8+ don't have bit6 swizzling, ensure callsite is not confused. */
200 assert(!(has_bit6_swizzling && info->ver >= 8));
201
202 dev->info = info;
203 dev->use_separate_stencil = ISL_GFX_VER(dev) >= 6;
204 dev->has_bit6_swizzling = has_bit6_swizzling;
205
206 /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
207 * device properties at buildtime. Verify that the macros with the device
208 * properties chosen during runtime.
209 */
210 ISL_GFX_VER_SANITIZE(dev);
211 ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev);
212
213 /* Did we break hiz or stencil? */
214 if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
215 assert(info->has_hiz_and_separate_stencil);
216 if (info->must_use_separate_stencil)
217 assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
218
219 dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4;
220 dev->ss.align = isl_align(dev->ss.size, 32);
221
222 dev->ss.clear_color_state_size =
223 isl_align(CLEAR_COLOR_length(info) * 4, 64);
224 dev->ss.clear_color_state_offset =
225 RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4;
226
227 dev->ss.clear_value_size =
228 isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) +
229 RENDER_SURFACE_STATE_GreenClearColor_bits(info) +
230 RENDER_SURFACE_STATE_BlueClearColor_bits(info) +
231 RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8;
232
233 dev->ss.clear_value_offset =
234 RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4;
235
236 assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0);
237 dev->ss.addr_offset =
238 RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8;
239
240 /* The "Auxiliary Surface Base Address" field starts a bit higher up
241 * because the bottom 12 bits are used for other things. Round down to
242 * the nearest dword before.
243 */
244 dev->ss.aux_addr_offset =
245 (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8;
246
247 dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4;
248 assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
249 dev->ds.depth_offset =
250 _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
251
252 if (dev->use_separate_stencil) {
253 dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
254 _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 +
255 _3DSTATE_CLEAR_PARAMS_length(info) * 4;
256
257 assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
258 dev->ds.stencil_offset =
259 _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
260 _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8;
261
262 assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
263 dev->ds.hiz_offset =
264 _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
265 _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
266 _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
267 } else {
268 dev->ds.stencil_offset = 0;
269 dev->ds.hiz_offset = 0;
270 }
271
272 if (ISL_GFX_VER(dev) >= 7) {
273 /* From the IVB PRM, SURFACE_STATE::Height,
274 *
275 * For typed buffer and structured buffer surfaces, the number
276 * of entries in the buffer ranges from 1 to 2^27. For raw buffer
277 * surfaces, the number of entries in the buffer is the number of bytes
278 * which can range from 1 to 2^30.
279 *
280 * This limit is only concerned with raw buffers.
281 */
282 dev->max_buffer_size = 1ull << 30;
283 } else {
284 dev->max_buffer_size = 1ull << 27;
285 }
286
287 isl_device_setup_mocs(dev);
288 }
289
290 /**
291 * @brief Query the set of multisamples supported by the device.
292 *
293 * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
294 * supported.
295 */
296 isl_sample_count_mask_t ATTRIBUTE_CONST
isl_device_get_sample_counts(struct isl_device * dev)297 isl_device_get_sample_counts(struct isl_device *dev)
298 {
299 if (ISL_GFX_VER(dev) >= 9) {
300 return ISL_SAMPLE_COUNT_1_BIT |
301 ISL_SAMPLE_COUNT_2_BIT |
302 ISL_SAMPLE_COUNT_4_BIT |
303 ISL_SAMPLE_COUNT_8_BIT |
304 ISL_SAMPLE_COUNT_16_BIT;
305 } else if (ISL_GFX_VER(dev) >= 8) {
306 return ISL_SAMPLE_COUNT_1_BIT |
307 ISL_SAMPLE_COUNT_2_BIT |
308 ISL_SAMPLE_COUNT_4_BIT |
309 ISL_SAMPLE_COUNT_8_BIT;
310 } else if (ISL_GFX_VER(dev) >= 7) {
311 return ISL_SAMPLE_COUNT_1_BIT |
312 ISL_SAMPLE_COUNT_4_BIT |
313 ISL_SAMPLE_COUNT_8_BIT;
314 } else if (ISL_GFX_VER(dev) >= 6) {
315 return ISL_SAMPLE_COUNT_1_BIT |
316 ISL_SAMPLE_COUNT_4_BIT;
317 } else {
318 return ISL_SAMPLE_COUNT_1_BIT;
319 }
320 }
321
322 /**
323 * Returns an isl_tile_info representation of the given isl_tiling when
324 * combined when used in the given configuration.
325 *
326 * @param[in] tiling The tiling format to introspect
327 * @param[in] dim The dimensionality of the surface being tiled
328 * @param[in] msaa_layout The layout of samples in the surface being tiled
329 * @param[in] format_bpb The number of bits per surface element (block) for
330 * the surface being tiled
331 * @param[in] samples The samples in the surface being tiled
332 * @param[out] tile_info Return parameter for the tiling information
333 */
334 void
isl_tiling_get_info(enum isl_tiling tiling,enum isl_surf_dim dim,enum isl_msaa_layout msaa_layout,uint32_t format_bpb,uint32_t samples,struct isl_tile_info * tile_info)335 isl_tiling_get_info(enum isl_tiling tiling,
336 enum isl_surf_dim dim,
337 enum isl_msaa_layout msaa_layout,
338 uint32_t format_bpb,
339 uint32_t samples,
340 struct isl_tile_info *tile_info)
341 {
342 const uint32_t bs = format_bpb / 8;
343 struct isl_extent4d logical_el;
344 struct isl_extent2d phys_B;
345
346 if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
347 /* It is possible to have non-power-of-two formats in a tiled buffer.
348 * The easiest way to handle this is to treat the tile as if it is three
349 * times as wide. This way no pixel will ever cross a tile boundary.
350 * This really only works on a subset of tiling formats.
351 */
352 assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0 ||
353 tiling == ISL_TILING_4);
354 assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
355 isl_tiling_get_info(tiling, dim, msaa_layout, format_bpb / 3, samples,
356 tile_info);
357 return;
358 }
359
360 switch (tiling) {
361 case ISL_TILING_LINEAR:
362 assert(bs > 0);
363 logical_el = isl_extent4d(1, 1, 1, 1);
364 phys_B = isl_extent2d(bs, 1);
365 break;
366
367 case ISL_TILING_X:
368 assert(bs > 0);
369 logical_el = isl_extent4d(512 / bs, 8, 1, 1);
370 phys_B = isl_extent2d(512, 8);
371 break;
372
373 case ISL_TILING_Y0:
374 case ISL_TILING_4:
375 assert(bs > 0);
376 logical_el = isl_extent4d(128 / bs, 32, 1, 1);
377 phys_B = isl_extent2d(128, 32);
378 break;
379
380 case ISL_TILING_W:
381 assert(bs == 1);
382 logical_el = isl_extent4d(64, 64, 1, 1);
383 /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
384 *
385 * "If the surface is a stencil buffer (and thus has Tile Mode set
386 * to TILEMODE_WMAJOR), the pitch must be set to 2x the value
387 * computed based on width, as the stencil buffer is stored with two
388 * rows interleaved."
389 *
390 * This, together with the fact that stencil buffers are referred to as
391 * being Y-tiled in the PRMs for older hardware implies that the
392 * physical size of a W-tile is actually the same as for a Y-tile.
393 */
394 phys_B = isl_extent2d(128, 32);
395 break;
396
397 case ISL_TILING_Yf:
398 case ISL_TILING_Ys: {
399 bool is_Ys = tiling == ISL_TILING_Ys;
400
401 assert(bs > 0);
402 unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
403 unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));
404
405 logical_el = isl_extent4d(width / bs, height, 1, 1);
406 phys_B = isl_extent2d(width, height);
407 break;
408 }
409 case ISL_TILING_64:
410 /* The tables below are taken from the "2D Surfaces" page in the Bspec
411 * which are formulated in terms of the Cv and Cu constants. This is
412 * different from the tables in the "Tile64 Format" page which should be
413 * equivalent but are usually in terms of pixels. Also note that Cv and
414 * Cu are HxW order to match the Bspec table, not WxH order like you
415 * might expect.
416 *
417 * From the Bspec's "Tile64 Format" page:
418 *
419 * MSAA Depth/Stencil surface use IMS (Interleaved Multi Samples)
420 * which means:
421 *
422 * - Use the 1X MSAA (non-MSRT) version of the Tile64 equations and
423 * let the client unit do the swizzling internally
424 *
425 * Surfaces using the IMS layout will use the mapping for 1x MSAA.
426 */
427 #define tile_extent(bs, cv, cu, a) \
428 isl_extent4d((1 << cu) / bs, 1 << cv, 1, a)
429
430 /* Only 2D surfaces are handled. */
431 assert(dim == ISL_SURF_DIM_2D);
432
433 if (samples == 1 || msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
434 switch (format_bpb) {
435 case 128: logical_el = tile_extent(bs, 6, 10, 1); break;
436 case 64: logical_el = tile_extent(bs, 6, 10, 1); break;
437 case 32: logical_el = tile_extent(bs, 7, 9, 1); break;
438 case 16: logical_el = tile_extent(bs, 7, 9, 1); break;
439 case 8: logical_el = tile_extent(bs, 8, 8, 1); break;
440 default: unreachable("Unsupported format size.");
441 }
442 } else if (samples == 2) {
443 switch (format_bpb) {
444 case 128: logical_el = tile_extent(bs, 6, 9, 2); break;
445 case 64: logical_el = tile_extent(bs, 6, 9, 2); break;
446 case 32: logical_el = tile_extent(bs, 7, 8, 2); break;
447 case 16: logical_el = tile_extent(bs, 7, 8, 2); break;
448 case 8: logical_el = tile_extent(bs, 8, 7, 2); break;
449 default: unreachable("Unsupported format size.");
450 }
451 } else {
452 switch (format_bpb) {
453 case 128: logical_el = tile_extent(bs, 5, 9, 4); break;
454 case 64: logical_el = tile_extent(bs, 5, 9, 4); break;
455 case 32: logical_el = tile_extent(bs, 6, 8, 4); break;
456 case 16: logical_el = tile_extent(bs, 6, 8, 4); break;
457 case 8: logical_el = tile_extent(bs, 7, 7, 4); break;
458 default: unreachable("Unsupported format size.");
459 }
460 }
461
462 #undef tile_extent
463
464 phys_B.w = logical_el.w * bs;
465 phys_B.h = 64 * 1024 / phys_B.w;
466 break;
467
468 case ISL_TILING_HIZ:
469 /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4
470 * 128bpb format. The tiling has the same physical dimensions as
471 * Y-tiling but actually has two HiZ columns per Y-tiled column.
472 */
473 assert(bs == 16);
474 logical_el = isl_extent4d(16, 16, 1, 1);
475 phys_B = isl_extent2d(128, 32);
476 break;
477
478 case ISL_TILING_CCS:
479 /* CCS surfaces are required to have one of the GENX_CCS_* formats which
480 * have a block size of 1 or 2 bits per block and each CCS element
481 * corresponds to one cache-line pair in the main surface. From the Sky
482 * Lake PRM Vol. 12 in the section on planes:
483 *
484 * "The Color Control Surface (CCS) contains the compression status
485 * of the cache-line pairs. The compression state of the cache-line
486 * pair is specified by 2 bits in the CCS. Each CCS cache-line
487 * represents an area on the main surface of 16x16 sets of 128 byte
488 * Y-tiled cache-line-pairs. CCS is always Y tiled."
489 *
490 * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines.
491 * Since each cache line corresponds to a 16x16 set of cache-line pairs,
492 * that yields total tile area of 128x128 cache-line pairs or CCS
493 * elements. On older hardware, each CCS element is 1 bit and the tile
494 * is 128x256 elements.
495 */
496 assert(format_bpb == 1 || format_bpb == 2);
497 logical_el = isl_extent4d(128, 256 / format_bpb, 1, 1);
498 phys_B = isl_extent2d(128, 32);
499 break;
500
501 case ISL_TILING_GFX12_CCS:
502 /* From the Bspec, Gen Graphics > Gfx12 > Memory Data Formats > Memory
503 * Compression > Memory Compression - Gfx12:
504 *
505 * 4 bits of auxiliary plane data are required for 2 cachelines of
506 * main surface data. This results in a single cacheline of auxiliary
507 * plane data mapping to 4 4K pages of main surface data for the 4K
508 * pages (tile Y ) and 1 64K Tile Ys page.
509 *
510 * The Y-tiled pairing bit of 9 shown in the table below that Bspec
511 * section expresses that the 2 cachelines of main surface data are
512 * horizontally adjacent.
513 *
514 * TODO: Handle Ys, Yf and their pairing bits.
515 *
516 * Therefore, each CCS cacheline represents a 512Bx32 row area and each
517 * element represents a 32Bx4 row area.
518 */
519 assert(format_bpb == 4);
520 logical_el = isl_extent4d(16, 8, 1, 1);
521 phys_B = isl_extent2d(64, 1);
522 break;
523
524 default:
525 unreachable("not reached");
526 } /* end switch */
527
528 *tile_info = (struct isl_tile_info) {
529 .tiling = tiling,
530 .format_bpb = format_bpb,
531 .logical_extent_el = logical_el,
532 .phys_extent_B = phys_B,
533 };
534 }
535
536 bool
isl_color_value_is_zero(union isl_color_value value,enum isl_format format)537 isl_color_value_is_zero(union isl_color_value value,
538 enum isl_format format)
539 {
540 const struct isl_format_layout *fmtl = isl_format_get_layout(format);
541
542 #define RETURN_FALSE_IF_NOT_0(c, i) \
543 if (fmtl->channels.c.bits && value.u32[i] != 0) \
544 return false
545
546 RETURN_FALSE_IF_NOT_0(r, 0);
547 RETURN_FALSE_IF_NOT_0(g, 1);
548 RETURN_FALSE_IF_NOT_0(b, 2);
549 RETURN_FALSE_IF_NOT_0(a, 3);
550
551 #undef RETURN_FALSE_IF_NOT_0
552
553 return true;
554 }
555
556 bool
isl_color_value_is_zero_one(union isl_color_value value,enum isl_format format)557 isl_color_value_is_zero_one(union isl_color_value value,
558 enum isl_format format)
559 {
560 const struct isl_format_layout *fmtl = isl_format_get_layout(format);
561
562 #define RETURN_FALSE_IF_NOT_0_1(c, i, field) \
563 if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \
564 return false
565
566 if (isl_format_has_int_channel(format)) {
567 RETURN_FALSE_IF_NOT_0_1(r, 0, u32);
568 RETURN_FALSE_IF_NOT_0_1(g, 1, u32);
569 RETURN_FALSE_IF_NOT_0_1(b, 2, u32);
570 RETURN_FALSE_IF_NOT_0_1(a, 3, u32);
571 } else {
572 RETURN_FALSE_IF_NOT_0_1(r, 0, f32);
573 RETURN_FALSE_IF_NOT_0_1(g, 1, f32);
574 RETURN_FALSE_IF_NOT_0_1(b, 2, f32);
575 RETURN_FALSE_IF_NOT_0_1(a, 3, f32);
576 }
577
578 #undef RETURN_FALSE_IF_NOT_0_1
579
580 return true;
581 }
582
583 /**
584 * @param[out] tiling is set only on success
585 */
586 static bool
isl_surf_choose_tiling(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_tiling * tiling)587 isl_surf_choose_tiling(const struct isl_device *dev,
588 const struct isl_surf_init_info *restrict info,
589 enum isl_tiling *tiling)
590 {
591 isl_tiling_flags_t tiling_flags = info->tiling_flags;
592
593 /* HiZ surfaces always use the HiZ tiling */
594 if (info->usage & ISL_SURF_USAGE_HIZ_BIT) {
595 assert(info->format == ISL_FORMAT_HIZ);
596 assert(tiling_flags == ISL_TILING_HIZ_BIT);
597 *tiling = isl_tiling_flag_to_enum(tiling_flags);
598 return true;
599 }
600
601 /* CCS surfaces always use the CCS tiling */
602 if (info->usage & ISL_SURF_USAGE_CCS_BIT) {
603 assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS);
604 UNUSED bool ivb_ccs = ISL_GFX_VER(dev) < 12 &&
605 tiling_flags == ISL_TILING_CCS_BIT;
606 UNUSED bool tgl_ccs = ISL_GFX_VER(dev) >= 12 &&
607 tiling_flags == ISL_TILING_GFX12_CCS_BIT;
608 assert(ivb_ccs != tgl_ccs);
609 *tiling = isl_tiling_flag_to_enum(tiling_flags);
610 return true;
611 }
612
613 if (ISL_GFX_VERX10(dev) >= 125) {
614 isl_gfx125_filter_tiling(dev, info, &tiling_flags);
615 } else if (ISL_GFX_VER(dev) >= 6) {
616 isl_gfx6_filter_tiling(dev, info, &tiling_flags);
617 } else {
618 isl_gfx4_filter_tiling(dev, info, &tiling_flags);
619 }
620
621 #define CHOOSE(__tiling) \
622 do { \
623 if (tiling_flags & (1u << (__tiling))) { \
624 *tiling = (__tiling); \
625 return true; \
626 } \
627 } while (0)
628
629 /* Of the tiling modes remaining, choose the one that offers the best
630 * performance.
631 */
632
633 if (info->dim == ISL_SURF_DIM_1D) {
634 /* Prefer linear for 1D surfaces because they do not benefit from
635 * tiling. To the contrary, tiling leads to wasted memory and poor
636 * memory locality due to the swizzling and alignment restrictions
637 * required in tiled surfaces.
638 */
639 CHOOSE(ISL_TILING_LINEAR);
640 }
641
642 CHOOSE(ISL_TILING_4);
643 CHOOSE(ISL_TILING_64);
644 CHOOSE(ISL_TILING_Ys);
645 CHOOSE(ISL_TILING_Yf);
646 CHOOSE(ISL_TILING_Y0);
647 CHOOSE(ISL_TILING_X);
648 CHOOSE(ISL_TILING_W);
649 CHOOSE(ISL_TILING_LINEAR);
650
651 #undef CHOOSE
652
653 /* No tiling mode accomodates the inputs. */
654 return false;
655 }
656
657 static bool
isl_choose_msaa_layout(const struct isl_device * dev,const struct isl_surf_init_info * info,enum isl_tiling tiling,enum isl_msaa_layout * msaa_layout)658 isl_choose_msaa_layout(const struct isl_device *dev,
659 const struct isl_surf_init_info *info,
660 enum isl_tiling tiling,
661 enum isl_msaa_layout *msaa_layout)
662 {
663 if (ISL_GFX_VER(dev) >= 8) {
664 return isl_gfx8_choose_msaa_layout(dev, info, tiling, msaa_layout);
665 } else if (ISL_GFX_VER(dev) >= 7) {
666 return isl_gfx7_choose_msaa_layout(dev, info, tiling, msaa_layout);
667 } else if (ISL_GFX_VER(dev) >= 6) {
668 return isl_gfx6_choose_msaa_layout(dev, info, tiling, msaa_layout);
669 } else {
670 return isl_gfx4_choose_msaa_layout(dev, info, tiling, msaa_layout);
671 }
672 }
673
674 struct isl_extent2d
isl_get_interleaved_msaa_px_size_sa(uint32_t samples)675 isl_get_interleaved_msaa_px_size_sa(uint32_t samples)
676 {
677 assert(isl_is_pow2(samples));
678
679 /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
680 * Sizes (p133):
681 *
682 * If the surface is multisampled and it is a depth or stencil surface
683 * or Multisampled Surface StorageFormat in SURFACE_STATE is
684 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
685 * proceeding: [...]
686 */
687 return (struct isl_extent2d) {
688 .width = 1 << ((ffs(samples) - 0) / 2),
689 .height = 1 << ((ffs(samples) - 1) / 2),
690 };
691 }
692
693 static void
isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,uint32_t * width,uint32_t * height)694 isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
695 uint32_t *width, uint32_t *height)
696 {
697 const struct isl_extent2d px_size_sa =
698 isl_get_interleaved_msaa_px_size_sa(samples);
699
700 if (width)
701 *width = isl_align(*width, 2) * px_size_sa.width;
702 if (height)
703 *height = isl_align(*height, 2) * px_size_sa.height;
704 }
705
706 static enum isl_array_pitch_span
isl_choose_array_pitch_span(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_dim_layout dim_layout,const struct isl_extent4d * phys_level0_sa)707 isl_choose_array_pitch_span(const struct isl_device *dev,
708 const struct isl_surf_init_info *restrict info,
709 enum isl_dim_layout dim_layout,
710 const struct isl_extent4d *phys_level0_sa)
711 {
712 switch (dim_layout) {
713 case ISL_DIM_LAYOUT_GFX9_1D:
714 case ISL_DIM_LAYOUT_GFX4_2D:
715 if (ISL_GFX_VER(dev) >= 8) {
716 /* QPitch becomes programmable in Broadwell. So choose the
717 * most compact QPitch possible in order to conserve memory.
718 *
719 * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
720 * >> RENDER_SURFACE_STATE Surface QPitch (p325):
721 *
722 * - Software must ensure that this field is set to a value
723 * sufficiently large such that the array slices in the surface
724 * do not overlap. Refer to the Memory Data Formats section for
725 * information on how surfaces are stored in memory.
726 *
727 * - This field specifies the distance in rows between array
728 * slices. It is used only in the following cases:
729 *
730 * - Surface Array is enabled OR
731 * - Number of Mulitsamples is not NUMSAMPLES_1 and
732 * Multisampled Surface Storage Format set to MSFMT_MSS OR
733 * - Surface Type is SURFTYPE_CUBE
734 */
735 return ISL_ARRAY_PITCH_SPAN_COMPACT;
736 } else if (ISL_GFX_VER(dev) >= 7) {
737 /* Note that Ivybridge introduces
738 * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
739 * driver more control over the QPitch.
740 */
741
742 if (phys_level0_sa->array_len == 1) {
743 /* The hardware will never use the QPitch. So choose the most
744 * compact QPitch possible in order to conserve memory.
745 */
746 return ISL_ARRAY_PITCH_SPAN_COMPACT;
747 }
748
749 if (isl_surf_usage_is_depth_or_stencil(info->usage) ||
750 (info->usage & ISL_SURF_USAGE_HIZ_BIT)) {
751 /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
752 * Section 6.18.4.7: Surface Arrays (p112):
753 *
754 * If Surface Array Spacing is set to ARYSPC_FULL (note that
755 * the depth buffer and stencil buffer have an implied value of
756 * ARYSPC_FULL):
757 */
758 return ISL_ARRAY_PITCH_SPAN_FULL;
759 }
760
761 if (info->levels == 1) {
762 /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
763 * to ARYSPC_LOD0.
764 */
765 return ISL_ARRAY_PITCH_SPAN_COMPACT;
766 }
767
768 return ISL_ARRAY_PITCH_SPAN_FULL;
769 } else if ((ISL_GFX_VER(dev) == 5 || ISL_GFX_VER(dev) == 6) &&
770 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
771 isl_surf_usage_is_stencil(info->usage)) {
772 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
773 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
774 *
775 * The separate stencil buffer does not support mip mapping, thus
776 * the storage for LODs other than LOD 0 is not needed.
777 */
778 assert(info->levels == 1);
779 return ISL_ARRAY_PITCH_SPAN_COMPACT;
780 } else {
781 if ((ISL_GFX_VER(dev) == 5 || ISL_GFX_VER(dev) == 6) &&
782 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
783 isl_surf_usage_is_stencil(info->usage)) {
784 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
785 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
786 *
787 * The separate stencil buffer does not support mip mapping,
788 * thus the storage for LODs other than LOD 0 is not needed.
789 */
790 assert(info->levels == 1);
791 assert(phys_level0_sa->array_len == 1);
792 return ISL_ARRAY_PITCH_SPAN_COMPACT;
793 }
794
795 if (phys_level0_sa->array_len == 1) {
796 /* The hardware will never use the QPitch. So choose the most
797 * compact QPitch possible in order to conserve memory.
798 */
799 return ISL_ARRAY_PITCH_SPAN_COMPACT;
800 }
801
802 return ISL_ARRAY_PITCH_SPAN_FULL;
803 }
804
805 case ISL_DIM_LAYOUT_GFX4_3D:
806 /* The hardware will never use the QPitch. So choose the most
807 * compact QPitch possible in order to conserve memory.
808 */
809 return ISL_ARRAY_PITCH_SPAN_COMPACT;
810
811 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
812 /* Each array image in the gfx6 stencil of HiZ surface is compact in the
813 * sense that every LOD is a compact array of the same size as LOD0.
814 */
815 return ISL_ARRAY_PITCH_SPAN_COMPACT;
816 }
817
818 unreachable("bad isl_dim_layout");
819 return ISL_ARRAY_PITCH_SPAN_FULL;
820 }
821
822 static void
isl_choose_image_alignment_el(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_tiling tiling,enum isl_dim_layout dim_layout,enum isl_msaa_layout msaa_layout,struct isl_extent3d * image_align_el)823 isl_choose_image_alignment_el(const struct isl_device *dev,
824 const struct isl_surf_init_info *restrict info,
825 enum isl_tiling tiling,
826 enum isl_dim_layout dim_layout,
827 enum isl_msaa_layout msaa_layout,
828 struct isl_extent3d *image_align_el)
829 {
830 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
831 if (fmtl->txc == ISL_TXC_MCS) {
832 assert(tiling == ISL_TILING_Y0);
833
834 /*
835 * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
836 *
837 * Height, width, and layout of MCS buffer in this case must match with
838 * Render Target height, width, and layout. MCS buffer is tiledY.
839 *
840 * To avoid wasting memory, choose the smallest alignment possible:
841 * HALIGN_4 and VALIGN_4.
842 */
843 *image_align_el = isl_extent3d(4, 4, 1);
844 return;
845 } else if (info->format == ISL_FORMAT_HIZ) {
846 assert(ISL_GFX_VER(dev) >= 6);
847 if (ISL_GFX_VER(dev) == 6) {
848 /* HiZ surfaces on Sandy Bridge are packed tightly. */
849 *image_align_el = isl_extent3d(1, 1, 1);
850 } else if (ISL_GFX_VER(dev) < 12) {
851 /* On gfx7+, HiZ surfaces are always aligned to 16x8 pixels in the
852 * primary surface which works out to 2x2 HiZ elments.
853 */
854 *image_align_el = isl_extent3d(2, 2, 1);
855 } else {
856 /* On gfx12+, HiZ surfaces are always aligned to 16x16 pixels in the
857 * primary surface which works out to 2x4 HiZ elments.
858 * TODO: Verify
859 */
860 *image_align_el = isl_extent3d(2, 4, 1);
861 }
862 return;
863 }
864
865 if (ISL_GFX_VERX10(dev) >= 125) {
866 isl_gfx125_choose_image_alignment_el(dev, info, tiling, dim_layout,
867 msaa_layout, image_align_el);
868 } else if (ISL_GFX_VER(dev) >= 12) {
869 isl_gfx12_choose_image_alignment_el(dev, info, tiling, dim_layout,
870 msaa_layout, image_align_el);
871 } else if (ISL_GFX_VER(dev) >= 9) {
872 isl_gfx9_choose_image_alignment_el(dev, info, tiling, dim_layout,
873 msaa_layout, image_align_el);
874 } else if (ISL_GFX_VER(dev) >= 8) {
875 isl_gfx8_choose_image_alignment_el(dev, info, tiling, dim_layout,
876 msaa_layout, image_align_el);
877 } else if (ISL_GFX_VER(dev) >= 7) {
878 isl_gfx7_choose_image_alignment_el(dev, info, tiling, dim_layout,
879 msaa_layout, image_align_el);
880 } else if (ISL_GFX_VER(dev) >= 6) {
881 isl_gfx6_choose_image_alignment_el(dev, info, tiling, dim_layout,
882 msaa_layout, image_align_el);
883 } else {
884 isl_gfx4_choose_image_alignment_el(dev, info, tiling, dim_layout,
885 msaa_layout, image_align_el);
886 }
887 }
888
889 static enum isl_dim_layout
isl_surf_choose_dim_layout(const struct isl_device * dev,enum isl_surf_dim logical_dim,enum isl_tiling tiling,isl_surf_usage_flags_t usage)890 isl_surf_choose_dim_layout(const struct isl_device *dev,
891 enum isl_surf_dim logical_dim,
892 enum isl_tiling tiling,
893 isl_surf_usage_flags_t usage)
894 {
895 /* Sandy bridge needs a special layout for HiZ and stencil. */
896 if (ISL_GFX_VER(dev) == 6 &&
897 (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ))
898 return ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ;
899
900 if (ISL_GFX_VER(dev) >= 9) {
901 switch (logical_dim) {
902 case ISL_SURF_DIM_1D:
903 /* From the Sky Lake PRM Vol. 5, "1D Surfaces":
904 *
905 * One-dimensional surfaces use a tiling mode of linear.
906 * Technically, they are not tiled resources, but the Tiled
907 * Resource Mode field in RENDER_SURFACE_STATE is still used to
908 * indicate the alignment requirements for this linear surface
909 * (See 1D Alignment requirements for how 4K and 64KB Tiled
910 * Resource Modes impact alignment). Alternatively, a 1D surface
911 * can be defined as a 2D tiled surface (e.g. TileY or TileX) with
912 * a height of 0.
913 *
914 * In other words, ISL_DIM_LAYOUT_GFX9_1D is only used for linear
915 * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GFX4_2D is used.
916 */
917 if (tiling == ISL_TILING_LINEAR)
918 return ISL_DIM_LAYOUT_GFX9_1D;
919 else
920 return ISL_DIM_LAYOUT_GFX4_2D;
921 case ISL_SURF_DIM_2D:
922 case ISL_SURF_DIM_3D:
923 return ISL_DIM_LAYOUT_GFX4_2D;
924 }
925 } else {
926 switch (logical_dim) {
927 case ISL_SURF_DIM_1D:
928 case ISL_SURF_DIM_2D:
929 /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
930 *
931 * The cube face textures are stored in the same way as 3D surfaces
932 * are stored (see section 6.17.5 for details). For cube surfaces,
933 * however, the depth is equal to the number of faces (always 6) and
934 * is not reduced for each MIP.
935 */
936 if (ISL_GFX_VER(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT))
937 return ISL_DIM_LAYOUT_GFX4_3D;
938
939 return ISL_DIM_LAYOUT_GFX4_2D;
940 case ISL_SURF_DIM_3D:
941 return ISL_DIM_LAYOUT_GFX4_3D;
942 }
943 }
944
945 unreachable("bad isl_surf_dim");
946 return ISL_DIM_LAYOUT_GFX4_2D;
947 }
948
949 /**
950 * Calculate the physical extent of the surface's first level, in units of
951 * surface samples.
952 */
953 static void
isl_calc_phys_level0_extent_sa(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_dim_layout dim_layout,enum isl_tiling tiling,enum isl_msaa_layout msaa_layout,struct isl_extent4d * phys_level0_sa)954 isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
955 const struct isl_surf_init_info *restrict info,
956 enum isl_dim_layout dim_layout,
957 enum isl_tiling tiling,
958 enum isl_msaa_layout msaa_layout,
959 struct isl_extent4d *phys_level0_sa)
960 {
961 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
962
963 if (isl_format_is_planar(info->format))
964 unreachable("Planar formats unsupported");
965
966 switch (info->dim) {
967 case ISL_SURF_DIM_1D:
968 assert(info->height == 1);
969 assert(info->depth == 1);
970 assert(info->samples == 1);
971
972 switch (dim_layout) {
973 case ISL_DIM_LAYOUT_GFX4_3D:
974 unreachable("bad isl_dim_layout");
975
976 case ISL_DIM_LAYOUT_GFX9_1D:
977 case ISL_DIM_LAYOUT_GFX4_2D:
978 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
979 *phys_level0_sa = (struct isl_extent4d) {
980 .w = info->width,
981 .h = 1,
982 .d = 1,
983 .a = info->array_len,
984 };
985 break;
986 }
987 break;
988
989 case ISL_SURF_DIM_2D:
990 if (ISL_GFX_VER(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT))
991 assert(dim_layout == ISL_DIM_LAYOUT_GFX4_3D);
992 else
993 assert(dim_layout == ISL_DIM_LAYOUT_GFX4_2D ||
994 dim_layout == ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ);
995
996 if (tiling == ISL_TILING_Ys && info->samples > 1)
997 isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__);
998
999 switch (msaa_layout) {
1000 case ISL_MSAA_LAYOUT_NONE:
1001 assert(info->depth == 1);
1002 assert(info->samples == 1);
1003
1004 *phys_level0_sa = (struct isl_extent4d) {
1005 .w = info->width,
1006 .h = info->height,
1007 .d = 1,
1008 .a = info->array_len,
1009 };
1010 break;
1011
1012 case ISL_MSAA_LAYOUT_ARRAY:
1013 assert(info->depth == 1);
1014 assert(info->levels == 1);
1015 assert(isl_format_supports_multisampling(dev->info, info->format));
1016 assert(fmtl->bw == 1 && fmtl->bh == 1);
1017
1018 *phys_level0_sa = (struct isl_extent4d) {
1019 .w = info->width,
1020 .h = info->height,
1021 .d = 1,
1022 .a = info->array_len * info->samples,
1023 };
1024 break;
1025
1026 case ISL_MSAA_LAYOUT_INTERLEAVED:
1027 assert(info->depth == 1);
1028 assert(info->levels == 1);
1029 assert(isl_format_supports_multisampling(dev->info, info->format));
1030
1031 *phys_level0_sa = (struct isl_extent4d) {
1032 .w = info->width,
1033 .h = info->height,
1034 .d = 1,
1035 .a = info->array_len,
1036 };
1037
1038 isl_msaa_interleaved_scale_px_to_sa(info->samples,
1039 &phys_level0_sa->w,
1040 &phys_level0_sa->h);
1041 break;
1042 }
1043 break;
1044
1045 case ISL_SURF_DIM_3D:
1046 assert(info->array_len == 1);
1047 assert(info->samples == 1);
1048
1049 if (fmtl->bd > 1) {
1050 isl_finishme("%s:%s: compression block with depth > 1",
1051 __FILE__, __func__);
1052 }
1053
1054 switch (dim_layout) {
1055 case ISL_DIM_LAYOUT_GFX9_1D:
1056 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
1057 unreachable("bad isl_dim_layout");
1058
1059 case ISL_DIM_LAYOUT_GFX4_2D:
1060 assert(ISL_GFX_VER(dev) >= 9);
1061
1062 *phys_level0_sa = (struct isl_extent4d) {
1063 .w = info->width,
1064 .h = info->height,
1065 .d = 1,
1066 .a = info->depth,
1067 };
1068 break;
1069
1070 case ISL_DIM_LAYOUT_GFX4_3D:
1071 assert(ISL_GFX_VER(dev) < 9);
1072 *phys_level0_sa = (struct isl_extent4d) {
1073 .w = info->width,
1074 .h = info->height,
1075 .d = info->depth,
1076 .a = 1,
1077 };
1078 break;
1079 }
1080 break;
1081 }
1082 }
1083
1084 /**
1085 * Calculate the pitch between physical array slices, in units of rows of
1086 * surface elements.
1087 */
1088 static uint32_t
isl_calc_array_pitch_el_rows_gfx4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,const struct isl_extent2d * phys_slice0_sa)1089 isl_calc_array_pitch_el_rows_gfx4_2d(
1090 const struct isl_device *dev,
1091 const struct isl_surf_init_info *restrict info,
1092 const struct isl_tile_info *tile_info,
1093 const struct isl_extent3d *image_align_sa,
1094 const struct isl_extent4d *phys_level0_sa,
1095 enum isl_array_pitch_span array_pitch_span,
1096 const struct isl_extent2d *phys_slice0_sa)
1097 {
1098 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1099 uint32_t pitch_sa_rows = 0;
1100
1101 switch (array_pitch_span) {
1102 case ISL_ARRAY_PITCH_SPAN_COMPACT:
1103 pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
1104 break;
1105 case ISL_ARRAY_PITCH_SPAN_FULL: {
1106 /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
1107 * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
1108 * Surfaces >> Surface Arrays.
1109 */
1110 uint32_t H0_sa = phys_level0_sa->h;
1111 uint32_t H1_sa = isl_minify(H0_sa, 1);
1112
1113 uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
1114 uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
1115
1116 uint32_t m;
1117 if (ISL_GFX_VER(dev) >= 7) {
1118 /* The QPitch equation changed slightly in Ivybridge. */
1119 m = 12;
1120 } else {
1121 m = 11;
1122 }
1123
1124 pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
1125
1126 if (ISL_GFX_VER(dev) == 6 && info->samples > 1 &&
1127 (info->height % 4 == 1)) {
1128 /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
1129 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
1130 *
1131 * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
1132 * the value calculated in the equation above , for every
1133 * other odd Surface Height starting from 1 i.e. 1,5,9,13.
1134 *
1135 * XXX(chadv): Is the errata natural corollary of the physical
1136 * layout of interleaved samples?
1137 */
1138 pitch_sa_rows += 4;
1139 }
1140
1141 pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
1142 } /* end case */
1143 break;
1144 }
1145
1146 assert(pitch_sa_rows % fmtl->bh == 0);
1147 uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
1148
1149 if (ISL_GFX_VER(dev) >= 9 && ISL_GFX_VER(dev) <= 11 &&
1150 fmtl->txc == ISL_TXC_CCS) {
1151 /*
1152 * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
1153 *
1154 * "Mip-mapped and arrayed surfaces are supported with MCS buffer
1155 * layout with these alignments in the RT space: Horizontal
1156 * Alignment = 128 and Vertical Alignment = 64."
1157 *
1158 * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
1159 *
1160 * "For non-multisampled render target's CCS auxiliary surface,
1161 * QPitch must be computed with Horizontal Alignment = 128 and
1162 * Surface Vertical Alignment = 256. These alignments are only for
1163 * CCS buffer and not for associated render target."
1164 *
1165 * The first restriction is already handled by isl_choose_image_alignment_el
1166 * but the second restriction, which is an extension of the first, only
1167 * applies to qpitch and must be applied here.
1168 *
1169 * The second restriction disappears on Gfx12.
1170 */
1171 assert(fmtl->bh == 4);
1172 pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
1173 }
1174
1175 if (ISL_GFX_VER(dev) >= 9 &&
1176 info->dim == ISL_SURF_DIM_3D &&
1177 tile_info->tiling != ISL_TILING_LINEAR) {
1178 /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
1179 *
1180 * Tile Mode != Linear: This field must be set to an integer multiple
1181 * of the tile height
1182 */
1183 pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
1184 }
1185
1186 return pitch_el_rows;
1187 }
1188
1189 /**
1190 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1191 * ISL_DIM_LAYOUT_GFX4_2D.
1192 */
1193 static void
isl_calc_phys_slice0_extent_sa_gfx4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,struct isl_extent2d * phys_slice0_sa)1194 isl_calc_phys_slice0_extent_sa_gfx4_2d(
1195 const struct isl_device *dev,
1196 const struct isl_surf_init_info *restrict info,
1197 enum isl_msaa_layout msaa_layout,
1198 const struct isl_extent3d *image_align_sa,
1199 const struct isl_extent4d *phys_level0_sa,
1200 struct isl_extent2d *phys_slice0_sa)
1201 {
1202 assert(phys_level0_sa->depth == 1);
1203
1204 if (info->levels == 1) {
1205 /* Do not pad the surface to the image alignment.
1206 *
1207 * For tiled surfaces, using a reduced alignment here avoids wasting CPU
1208 * cycles on the below mipmap layout caluclations. Reducing the
1209 * alignment here is safe because we later align the row pitch and array
1210 * pitch to the tile boundary. It is safe even for
1211 * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
1212 * to accomodate the interleaved samples.
1213 *
1214 * For linear surfaces, reducing the alignment here permits us to later
1215 * choose an arbitrary, non-aligned row pitch. If the surface backs
1216 * a VkBuffer, then an arbitrary pitch may be needed to accomodate
1217 * VkBufferImageCopy::bufferRowLength.
1218 */
1219 *phys_slice0_sa = (struct isl_extent2d) {
1220 .w = phys_level0_sa->w,
1221 .h = phys_level0_sa->h,
1222 };
1223 return;
1224 }
1225
1226 uint32_t slice_top_w = 0;
1227 uint32_t slice_bottom_w = 0;
1228 uint32_t slice_left_h = 0;
1229 uint32_t slice_right_h = 0;
1230
1231 uint32_t W0 = phys_level0_sa->w;
1232 uint32_t H0 = phys_level0_sa->h;
1233
1234 for (uint32_t l = 0; l < info->levels; ++l) {
1235 uint32_t W = isl_minify(W0, l);
1236 uint32_t H = isl_minify(H0, l);
1237
1238 uint32_t w = isl_align_npot(W, image_align_sa->w);
1239 uint32_t h = isl_align_npot(H, image_align_sa->h);
1240
1241 if (l == 0) {
1242 slice_top_w = w;
1243 slice_left_h = h;
1244 slice_right_h = h;
1245 } else if (l == 1) {
1246 slice_bottom_w = w;
1247 slice_left_h += h;
1248 } else if (l == 2) {
1249 slice_bottom_w += w;
1250 slice_right_h += h;
1251 } else {
1252 slice_right_h += h;
1253 }
1254 }
1255
1256 *phys_slice0_sa = (struct isl_extent2d) {
1257 .w = MAX(slice_top_w, slice_bottom_w),
1258 .h = MAX(slice_left_h, slice_right_h),
1259 };
1260 }
1261
1262 static void
isl_calc_phys_total_extent_el_gfx4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1263 isl_calc_phys_total_extent_el_gfx4_2d(
1264 const struct isl_device *dev,
1265 const struct isl_surf_init_info *restrict info,
1266 const struct isl_tile_info *tile_info,
1267 enum isl_msaa_layout msaa_layout,
1268 const struct isl_extent3d *image_align_sa,
1269 const struct isl_extent4d *phys_level0_sa,
1270 enum isl_array_pitch_span array_pitch_span,
1271 uint32_t *array_pitch_el_rows,
1272 struct isl_extent4d *phys_total_el)
1273 {
1274 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1275
1276 struct isl_extent2d phys_slice0_sa;
1277 isl_calc_phys_slice0_extent_sa_gfx4_2d(dev, info, msaa_layout,
1278 image_align_sa, phys_level0_sa,
1279 &phys_slice0_sa);
1280 *array_pitch_el_rows =
1281 isl_calc_array_pitch_el_rows_gfx4_2d(dev, info, tile_info,
1282 image_align_sa, phys_level0_sa,
1283 array_pitch_span,
1284 &phys_slice0_sa);
1285
1286 if (tile_info->tiling == ISL_TILING_64) {
1287 *phys_total_el = (struct isl_extent4d) {
1288 .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1289 .h = isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1290 .d = isl_align_div_npot(phys_level0_sa->d, fmtl->bd),
1291 .a = phys_level0_sa->array_len,
1292 };
1293 } else {
1294 *phys_total_el = (struct isl_extent4d) {
1295 .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1296 .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
1297 isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1298 .d = 1,
1299 .a = 1,
1300 };
1301 }
1302 }
1303
1304 /**
1305 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1306 * ISL_DIM_LAYOUT_GFX4_3D.
1307 */
1308 static void
isl_calc_phys_total_extent_el_gfx4_3d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1309 isl_calc_phys_total_extent_el_gfx4_3d(
1310 const struct isl_device *dev,
1311 const struct isl_surf_init_info *restrict info,
1312 const struct isl_extent3d *image_align_sa,
1313 const struct isl_extent4d *phys_level0_sa,
1314 uint32_t *array_pitch_el_rows,
1315 struct isl_extent4d *phys_total_el)
1316 {
1317 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1318
1319 assert(info->samples == 1);
1320
1321 if (info->dim != ISL_SURF_DIM_3D) {
1322 /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
1323 *
1324 * The cube face textures are stored in the same way as 3D surfaces
1325 * are stored (see section 6.17.5 for details). For cube surfaces,
1326 * however, the depth is equal to the number of faces (always 6) and
1327 * is not reduced for each MIP.
1328 */
1329 assert(ISL_GFX_VER(dev) == 4);
1330 assert(info->usage & ISL_SURF_USAGE_CUBE_BIT);
1331 assert(phys_level0_sa->array_len == 6);
1332 } else {
1333 assert(phys_level0_sa->array_len == 1);
1334 }
1335
1336 uint32_t total_w = 0;
1337 uint32_t total_h = 0;
1338
1339 uint32_t W0 = phys_level0_sa->w;
1340 uint32_t H0 = phys_level0_sa->h;
1341 uint32_t D0 = phys_level0_sa->d;
1342 uint32_t A0 = phys_level0_sa->a;
1343
1344 for (uint32_t l = 0; l < info->levels; ++l) {
1345 uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
1346 uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
1347 uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0;
1348
1349 uint32_t max_layers_horiz = MIN(level_d, 1u << l);
1350 uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
1351
1352 total_w = MAX(total_w, level_w * max_layers_horiz);
1353 total_h += level_h * max_layers_vert;
1354 }
1355
1356 /* GFX4_3D layouts don't really have an array pitch since each LOD has a
1357 * different number of horizontal and vertical layers. We have to set it
1358 * to something, so at least make it true for LOD0.
1359 */
1360 *array_pitch_el_rows =
1361 isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw;
1362 *phys_total_el = (struct isl_extent4d) {
1363 .w = isl_assert_div(total_w, fmtl->bw),
1364 .h = isl_assert_div(total_h, fmtl->bh),
1365 .d = 1,
1366 .a = 1,
1367 };
1368 }
1369
1370 /**
1371 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1372 * ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ.
1373 */
1374 static void
isl_calc_phys_total_extent_el_gfx6_stencil_hiz(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1375 isl_calc_phys_total_extent_el_gfx6_stencil_hiz(
1376 const struct isl_device *dev,
1377 const struct isl_surf_init_info *restrict info,
1378 const struct isl_tile_info *tile_info,
1379 const struct isl_extent3d *image_align_sa,
1380 const struct isl_extent4d *phys_level0_sa,
1381 uint32_t *array_pitch_el_rows,
1382 struct isl_extent4d *phys_total_el)
1383 {
1384 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1385
1386 const struct isl_extent2d tile_extent_sa = {
1387 .w = tile_info->logical_extent_el.w * fmtl->bw,
1388 .h = tile_info->logical_extent_el.h * fmtl->bh,
1389 };
1390 /* Tile size is a multiple of image alignment */
1391 assert(tile_extent_sa.w % image_align_sa->w == 0);
1392 assert(tile_extent_sa.h % image_align_sa->h == 0);
1393
1394 const uint32_t W0 = phys_level0_sa->w;
1395 const uint32_t H0 = phys_level0_sa->h;
1396
1397 /* Each image has the same height as LOD0 because the hardware thinks
1398 * everything is LOD0
1399 */
1400 const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a;
1401
1402 uint32_t total_top_w = 0;
1403 uint32_t total_bottom_w = 0;
1404 uint32_t total_h = 0;
1405
1406 for (uint32_t l = 0; l < info->levels; ++l) {
1407 const uint32_t W = isl_minify(W0, l);
1408
1409 const uint32_t w = isl_align(W, tile_extent_sa.w);
1410 const uint32_t h = isl_align(H, tile_extent_sa.h);
1411
1412 if (l == 0) {
1413 total_top_w = w;
1414 total_h = h;
1415 } else if (l == 1) {
1416 total_bottom_w = w;
1417 total_h += h;
1418 } else {
1419 total_bottom_w += w;
1420 }
1421 }
1422
1423 *array_pitch_el_rows =
1424 isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh);
1425 *phys_total_el = (struct isl_extent4d) {
1426 .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw),
1427 .h = isl_assert_div(total_h, fmtl->bh),
1428 .d = 1,
1429 .a = 1,
1430 };
1431 }
1432
1433 /**
1434 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1435 * ISL_DIM_LAYOUT_GFX9_1D.
1436 */
1437 static void
isl_calc_phys_total_extent_el_gfx9_1d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1438 isl_calc_phys_total_extent_el_gfx9_1d(
1439 const struct isl_device *dev,
1440 const struct isl_surf_init_info *restrict info,
1441 const struct isl_extent3d *image_align_sa,
1442 const struct isl_extent4d *phys_level0_sa,
1443 uint32_t *array_pitch_el_rows,
1444 struct isl_extent4d *phys_total_el)
1445 {
1446 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1447
1448 assert(phys_level0_sa->height == 1);
1449 assert(phys_level0_sa->depth == 1);
1450 assert(info->samples == 1);
1451 assert(image_align_sa->w >= fmtl->bw);
1452
1453 uint32_t slice_w = 0;
1454 const uint32_t W0 = phys_level0_sa->w;
1455
1456 for (uint32_t l = 0; l < info->levels; ++l) {
1457 uint32_t W = isl_minify(W0, l);
1458 uint32_t w = isl_align_npot(W, image_align_sa->w);
1459
1460 slice_w += w;
1461 }
1462
1463 *array_pitch_el_rows = 1;
1464 *phys_total_el = (struct isl_extent4d) {
1465 .w = isl_assert_div(slice_w, fmtl->bw),
1466 .h = phys_level0_sa->array_len,
1467 .d = 1,
1468 .a = 1,
1469 };
1470 }
1471
1472 /**
1473 * Calculate the two-dimensional total physical extent of the surface, in
1474 * units of surface elements.
1475 */
1476 static void
isl_calc_phys_total_extent_el(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_dim_layout dim_layout,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1477 isl_calc_phys_total_extent_el(const struct isl_device *dev,
1478 const struct isl_surf_init_info *restrict info,
1479 const struct isl_tile_info *tile_info,
1480 enum isl_dim_layout dim_layout,
1481 enum isl_msaa_layout msaa_layout,
1482 const struct isl_extent3d *image_align_sa,
1483 const struct isl_extent4d *phys_level0_sa,
1484 enum isl_array_pitch_span array_pitch_span,
1485 uint32_t *array_pitch_el_rows,
1486 struct isl_extent4d *phys_total_el)
1487 {
1488 switch (dim_layout) {
1489 case ISL_DIM_LAYOUT_GFX9_1D:
1490 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1491 isl_calc_phys_total_extent_el_gfx9_1d(dev, info,
1492 image_align_sa, phys_level0_sa,
1493 array_pitch_el_rows,
1494 phys_total_el);
1495 return;
1496 case ISL_DIM_LAYOUT_GFX4_2D:
1497 isl_calc_phys_total_extent_el_gfx4_2d(dev, info, tile_info, msaa_layout,
1498 image_align_sa, phys_level0_sa,
1499 array_pitch_span,
1500 array_pitch_el_rows,
1501 phys_total_el);
1502 return;
1503 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
1504 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1505 isl_calc_phys_total_extent_el_gfx6_stencil_hiz(dev, info, tile_info,
1506 image_align_sa,
1507 phys_level0_sa,
1508 array_pitch_el_rows,
1509 phys_total_el);
1510 return;
1511 case ISL_DIM_LAYOUT_GFX4_3D:
1512 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1513 isl_calc_phys_total_extent_el_gfx4_3d(dev, info,
1514 image_align_sa, phys_level0_sa,
1515 array_pitch_el_rows,
1516 phys_total_el);
1517 return;
1518 }
1519
1520 unreachable("invalid value for dim_layout");
1521 }
1522
1523 static uint32_t
isl_calc_row_pitch_alignment(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info)1524 isl_calc_row_pitch_alignment(const struct isl_device *dev,
1525 const struct isl_surf_init_info *surf_info,
1526 const struct isl_tile_info *tile_info)
1527 {
1528 if (tile_info->tiling != ISL_TILING_LINEAR) {
1529 /* According to BSpec: 44930, Gfx12's CCS-compressed surface pitches must
1530 * be 512B-aligned. CCS is only support on Y tilings.
1531 *
1532 * Only consider 512B alignment when :
1533 * - AUX is not explicitly disabled
1534 * - the caller has specified no pitch
1535 *
1536 * isl_surf_get_ccs_surf() will check that the main surface alignment
1537 * matches CCS expectations.
1538 */
1539 if (ISL_GFX_VER(dev) >= 12 &&
1540 isl_format_supports_ccs_e(dev->info, surf_info->format) &&
1541 tile_info->tiling != ISL_TILING_X &&
1542 !(surf_info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) &&
1543 surf_info->row_pitch_B == 0) {
1544 return isl_align(tile_info->phys_extent_B.width, 512);
1545 }
1546
1547 return tile_info->phys_extent_B.width;
1548 }
1549
1550 /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
1551 * RENDER_SURFACE_STATE Surface Pitch (p349):
1552 *
1553 * - For linear render target surfaces and surfaces accessed with the
1554 * typed data port messages, the pitch must be a multiple of the
1555 * element size for non-YUV surface formats. Pitch must be
1556 * a multiple of 2 * element size for YUV surface formats.
1557 *
1558 * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
1559 * ignore because isl doesn't do buffers.]
1560 *
1561 * - For other linear surfaces, the pitch can be any multiple of
1562 * bytes.
1563 */
1564 const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1565 const uint32_t bs = fmtl->bpb / 8;
1566 uint32_t alignment;
1567
1568 if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1569 if (isl_format_is_yuv(surf_info->format)) {
1570 alignment = 2 * bs;
1571 } else {
1572 alignment = bs;
1573 }
1574 } else {
1575 alignment = 1;
1576 }
1577
1578 /* From the Broadwell PRM >> Volume 2c: Command Reference: Registers >>
1579 * PRI_STRIDE Stride (p1254):
1580 *
1581 * "When using linear memory, this must be at least 64 byte aligned."
1582 *
1583 * However, when displaying on NVIDIA and recent AMD GPUs via PRIME,
1584 * we need a larger pitch of 256 bytes.
1585 *
1586 * If the ISL caller didn't specify a row_pitch_B, then we should assume
1587 * the NVIDIA/AMD requirements. Otherwise, if we have a specified
1588 * row_pitch_B, this is probably because the caller is trying to import a
1589 * buffer. In that case we limit the minimum row pitch to the Intel HW
1590 * requirement.
1591 */
1592 if (surf_info->usage & ISL_SURF_USAGE_DISPLAY_BIT) {
1593 if (surf_info->row_pitch_B == 0)
1594 alignment = isl_align(alignment, 256);
1595 else
1596 alignment = isl_align(alignment, 64);
1597 }
1598
1599 return alignment;
1600 }
1601
1602 static uint32_t
isl_calc_linear_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * info,const struct isl_extent4d * phys_total_el,uint32_t alignment_B)1603 isl_calc_linear_min_row_pitch(const struct isl_device *dev,
1604 const struct isl_surf_init_info *info,
1605 const struct isl_extent4d *phys_total_el,
1606 uint32_t alignment_B)
1607 {
1608 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1609 const uint32_t bs = fmtl->bpb / 8;
1610
1611 return isl_align_npot(bs * phys_total_el->w, alignment_B);
1612 }
1613
1614 static uint32_t
isl_calc_tiled_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,const struct isl_extent4d * phys_total_el,uint32_t alignment_B)1615 isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
1616 const struct isl_surf_init_info *surf_info,
1617 const struct isl_tile_info *tile_info,
1618 const struct isl_extent4d *phys_total_el,
1619 uint32_t alignment_B)
1620 {
1621 const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1622
1623 assert(fmtl->bpb % tile_info->format_bpb == 0);
1624
1625 const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb;
1626 const uint32_t total_w_tl =
1627 isl_align_div(phys_total_el->w * tile_el_scale,
1628 tile_info->logical_extent_el.width);
1629
1630 /* In some cases the alignment of the pitch might be > to the tile size
1631 * (for example Gfx12 CCS requires 512B alignment while the tile's width
1632 * can be 128B), so align the row pitch to the alignment.
1633 */
1634 assert(alignment_B >= tile_info->phys_extent_B.width);
1635 return isl_align(total_w_tl * tile_info->phys_extent_B.width, alignment_B);
1636 }
1637
1638 static uint32_t
isl_calc_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,const struct isl_extent4d * phys_total_el,uint32_t alignment_B)1639 isl_calc_min_row_pitch(const struct isl_device *dev,
1640 const struct isl_surf_init_info *surf_info,
1641 const struct isl_tile_info *tile_info,
1642 const struct isl_extent4d *phys_total_el,
1643 uint32_t alignment_B)
1644 {
1645 if (tile_info->tiling == ISL_TILING_LINEAR) {
1646 return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el,
1647 alignment_B);
1648 } else {
1649 return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info,
1650 phys_total_el, alignment_B);
1651 }
1652 }
1653
1654 /**
1655 * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's
1656 * size is `bits` bits?
1657 *
1658 * Hardware pitch fields are offset by 1. For example, if the size of
1659 * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid
1660 * pitches is [1, 2^b] inclusive. If the surface pitch is N, then
1661 * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1.
1662 */
1663 static bool
pitch_in_range(uint32_t n,uint32_t bits)1664 pitch_in_range(uint32_t n, uint32_t bits)
1665 {
1666 assert(n != 0);
1667 return likely(bits != 0 && 1 <= n && n <= (1 << bits));
1668 }
1669
1670 static bool
isl_calc_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,enum isl_dim_layout dim_layout,const struct isl_extent4d * phys_total_el,uint32_t * out_row_pitch_B)1671 isl_calc_row_pitch(const struct isl_device *dev,
1672 const struct isl_surf_init_info *surf_info,
1673 const struct isl_tile_info *tile_info,
1674 enum isl_dim_layout dim_layout,
1675 const struct isl_extent4d *phys_total_el,
1676 uint32_t *out_row_pitch_B)
1677 {
1678 uint32_t alignment_B =
1679 isl_calc_row_pitch_alignment(dev, surf_info, tile_info);
1680
1681 const uint32_t min_row_pitch_B =
1682 isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el,
1683 alignment_B);
1684
1685 if (surf_info->row_pitch_B != 0) {
1686 if (surf_info->row_pitch_B < min_row_pitch_B)
1687 return false;
1688
1689 if (surf_info->row_pitch_B % alignment_B != 0)
1690 return false;
1691 }
1692
1693 const uint32_t row_pitch_B =
1694 surf_info->row_pitch_B != 0 ? surf_info->row_pitch_B : min_row_pitch_B;
1695
1696 const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width;
1697
1698 if (row_pitch_B == 0)
1699 return false;
1700
1701 if (dim_layout == ISL_DIM_LAYOUT_GFX9_1D) {
1702 /* SurfacePitch is ignored for this layout. */
1703 goto done;
1704 }
1705
1706 if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
1707 ISL_SURF_USAGE_TEXTURE_BIT |
1708 ISL_SURF_USAGE_STORAGE_BIT)) &&
1709 !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info)))
1710 return false;
1711
1712 if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT |
1713 ISL_SURF_USAGE_MCS_BIT)) &&
1714 !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info)))
1715 return false;
1716
1717 if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) &&
1718 !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1719 return false;
1720
1721 if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) &&
1722 !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1723 return false;
1724
1725 const uint32_t stencil_pitch_bits = dev->use_separate_stencil ?
1726 _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) :
1727 _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info);
1728
1729 if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) &&
1730 !pitch_in_range(row_pitch_B, stencil_pitch_bits))
1731 return false;
1732
1733 done:
1734 *out_row_pitch_B = row_pitch_B;
1735 return true;
1736 }
1737
1738 bool
isl_surf_init_s(const struct isl_device * dev,struct isl_surf * surf,const struct isl_surf_init_info * restrict info)1739 isl_surf_init_s(const struct isl_device *dev,
1740 struct isl_surf *surf,
1741 const struct isl_surf_init_info *restrict info)
1742 {
1743 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1744
1745 const struct isl_extent4d logical_level0_px = {
1746 .w = info->width,
1747 .h = info->height,
1748 .d = info->depth,
1749 .a = info->array_len,
1750 };
1751
1752 enum isl_tiling tiling;
1753 if (!isl_surf_choose_tiling(dev, info, &tiling))
1754 return false;
1755
1756 const enum isl_dim_layout dim_layout =
1757 isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage);
1758
1759 enum isl_msaa_layout msaa_layout;
1760 if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
1761 return false;
1762
1763 struct isl_tile_info tile_info;
1764 isl_tiling_get_info(tiling, info->dim, msaa_layout, fmtl->bpb,
1765 info->samples, &tile_info);
1766
1767 struct isl_extent3d image_align_el;
1768 isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout,
1769 &image_align_el);
1770
1771 struct isl_extent3d image_align_sa =
1772 isl_extent3d_el_to_sa(info->format, image_align_el);
1773
1774 struct isl_extent4d phys_level0_sa;
1775 isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
1776 &phys_level0_sa);
1777
1778 enum isl_array_pitch_span array_pitch_span =
1779 isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
1780
1781 uint32_t array_pitch_el_rows;
1782 struct isl_extent4d phys_total_el;
1783 isl_calc_phys_total_extent_el(dev, info, &tile_info,
1784 dim_layout, msaa_layout,
1785 &image_align_sa, &phys_level0_sa,
1786 array_pitch_span, &array_pitch_el_rows,
1787 &phys_total_el);
1788
1789 uint32_t row_pitch_B;
1790 if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout,
1791 &phys_total_el, &row_pitch_B))
1792 return false;
1793
1794 uint32_t base_alignment_B;
1795 uint64_t size_B;
1796 if (tiling == ISL_TILING_LINEAR) {
1797 /* LINEAR tiling has no concept of intra-tile arrays */
1798 assert(phys_total_el.d == 1 && phys_total_el.a == 1);
1799
1800 size_B = (uint64_t) row_pitch_B * phys_total_el.h;
1801
1802 /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress:
1803 *
1804 * "The Base Address for linear render target surfaces and surfaces
1805 * accessed with the typed surface read/write data port messages must
1806 * be element-size aligned, for non-YUV surface formats, or a
1807 * multiple of 2 element-sizes for YUV surface formats. Other linear
1808 * surfaces have no alignment requirements (byte alignment is
1809 * sufficient.)"
1810 */
1811 base_alignment_B = MAX(1, info->min_alignment_B);
1812 if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1813 if (isl_format_is_yuv(info->format)) {
1814 base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 4);
1815 } else {
1816 base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 8);
1817 }
1818 }
1819 base_alignment_B = isl_round_up_to_power_of_two(base_alignment_B);
1820
1821 /* From the Skylake PRM Vol 2c, PLANE_STRIDE::Stride:
1822 *
1823 * "For Linear memory, this field specifies the stride in chunks of
1824 * 64 bytes (1 cache line)."
1825 */
1826 if (isl_surf_usage_is_display(info->usage))
1827 base_alignment_B = MAX(base_alignment_B, 64);
1828 } else {
1829 /* Pitches must make sense with the tiling */
1830 assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
1831
1832 uint32_t array_slices, array_pitch_tl_rows;
1833 if (phys_total_el.d > 1) {
1834 assert(phys_total_el.a == 1);
1835 array_pitch_tl_rows = isl_assert_div(array_pitch_el_rows,
1836 tile_info.logical_extent_el.h);
1837 array_slices = isl_align_div(phys_total_el.d,
1838 tile_info.logical_extent_el.d);
1839 } else if (phys_total_el.a > 1) {
1840 assert(phys_total_el.d == 1);
1841 array_pitch_tl_rows = isl_assert_div(array_pitch_el_rows,
1842 tile_info.logical_extent_el.h);
1843 array_slices = isl_align_div(phys_total_el.a,
1844 tile_info.logical_extent_el.a);
1845 } else {
1846 assert(phys_total_el.d == 1 && phys_total_el.a == 1);
1847 array_pitch_tl_rows = 0;
1848 array_slices = 1;
1849 }
1850
1851 const uint32_t total_h_tl =
1852 (array_slices - 1) * array_pitch_tl_rows +
1853 isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height);
1854
1855 size_B = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch_B;
1856
1857 const uint32_t tile_size_B = tile_info.phys_extent_B.width *
1858 tile_info.phys_extent_B.height;
1859 assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B));
1860 base_alignment_B = MAX(info->min_alignment_B, tile_size_B);
1861
1862 /* The diagram in the Bspec section Memory Compression - Gfx12, shows
1863 * that the CCS is indexed in 256B chunks. However, the
1864 * PLANE_AUX_DIST::Auxiliary Surface Distance field is in units of 4K
1865 * pages. We currently don't assign the usage field like we do for main
1866 * surfaces, so just use 4K for now.
1867 */
1868 if (tiling == ISL_TILING_GFX12_CCS)
1869 base_alignment_B = MAX(base_alignment_B, 4096);
1870
1871 /* Gfx12+ requires that images be 64K-aligned if they're going to used
1872 * with CCS. This is because the Aux translation table maps main
1873 * surface addresses to aux addresses at a 64K (in the main surface)
1874 * granularity. Because we don't know for sure in ISL if a surface will
1875 * use CCS, we have to guess based on the DISABLE_AUX usage bit. The
1876 * one thing we do know is that we haven't enable CCS on linear images
1877 * yet so we can avoid the extra alignment there.
1878 */
1879 if (ISL_GFX_VER(dev) >= 12 &&
1880 !(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) {
1881 base_alignment_B = MAX(base_alignment_B, 64 * 1024);
1882 }
1883 }
1884
1885 if (ISL_GFX_VER(dev) < 9) {
1886 /* From the Broadwell PRM Vol 5, Surface Layout:
1887 *
1888 * "In addition to restrictions on maximum height, width, and depth,
1889 * surfaces are also restricted to a maximum size in bytes. This
1890 * maximum is 2 GB for all products and all surface types."
1891 *
1892 * This comment is applicable to all Pre-gfx9 platforms.
1893 */
1894 if (size_B > (uint64_t) 1 << 31)
1895 return false;
1896 } else if (ISL_GFX_VER(dev) < 11) {
1897 /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes:
1898 * "In addition to restrictions on maximum height, width, and depth,
1899 * surfaces are also restricted to a maximum size of 2^38 bytes.
1900 * All pixels within the surface must be contained within 2^38 bytes
1901 * of the base address."
1902 */
1903 if (size_B > (uint64_t) 1 << 38)
1904 return false;
1905 } else {
1906 /* gfx11+ platforms raised this limit to 2^44 bytes. */
1907 if (size_B > (uint64_t) 1 << 44)
1908 return false;
1909 }
1910
1911 *surf = (struct isl_surf) {
1912 .dim = info->dim,
1913 .dim_layout = dim_layout,
1914 .msaa_layout = msaa_layout,
1915 .tiling = tiling,
1916 .format = info->format,
1917
1918 .levels = info->levels,
1919 .samples = info->samples,
1920
1921 .image_alignment_el = image_align_el,
1922 .logical_level0_px = logical_level0_px,
1923 .phys_level0_sa = phys_level0_sa,
1924
1925 .size_B = size_B,
1926 .alignment_B = base_alignment_B,
1927 .row_pitch_B = row_pitch_B,
1928 .array_pitch_el_rows = array_pitch_el_rows,
1929 .array_pitch_span = array_pitch_span,
1930
1931 .usage = info->usage,
1932 };
1933
1934 return true;
1935 }
1936
1937 void
isl_surf_get_tile_info(const struct isl_surf * surf,struct isl_tile_info * tile_info)1938 isl_surf_get_tile_info(const struct isl_surf *surf,
1939 struct isl_tile_info *tile_info)
1940 {
1941 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
1942 isl_tiling_get_info(surf->tiling, surf->dim, surf->msaa_layout, fmtl->bpb,
1943 surf->samples, tile_info);
1944 }
1945
1946 bool
isl_surf_get_hiz_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * hiz_surf)1947 isl_surf_get_hiz_surf(const struct isl_device *dev,
1948 const struct isl_surf *surf,
1949 struct isl_surf *hiz_surf)
1950 {
1951 assert(ISL_GFX_VER(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev));
1952
1953 if (!isl_surf_usage_is_depth(surf->usage))
1954 return false;
1955
1956 /* HiZ only works with Y-tiled depth buffers */
1957 if (!isl_tiling_is_any_y(surf->tiling))
1958 return false;
1959
1960 /* On SNB+, compressed depth buffers cannot be interleaved with stencil. */
1961 switch (surf->format) {
1962 case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
1963 if (isl_surf_usage_is_depth_and_stencil(surf->usage)) {
1964 assert(ISL_GFX_VER(dev) == 5);
1965 unreachable("This should work, but is untested");
1966 }
1967 FALLTHROUGH;
1968 case ISL_FORMAT_R16_UNORM:
1969 case ISL_FORMAT_R32_FLOAT:
1970 break;
1971 case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
1972 if (ISL_GFX_VER(dev) == 5) {
1973 assert(isl_surf_usage_is_depth_and_stencil(surf->usage));
1974 unreachable("This should work, but is untested");
1975 }
1976 FALLTHROUGH;
1977 default:
1978 return false;
1979 }
1980
1981 /* Multisampled depth is always interleaved */
1982 assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE ||
1983 surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
1984
1985 /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer":
1986 *
1987 * "The Surface Type, Height, Width, Depth, Minimum Array Element, Render
1988 * Target View Extent, and Depth Coordinate Offset X/Y of the
1989 * hierarchical depth buffer are inherited from the depth buffer. The
1990 * height and width of the hierarchical depth buffer that must be
1991 * allocated are computed by the following formulas, where HZ is the
1992 * hierarchical depth buffer and Z is the depth buffer. The Z_Height,
1993 * Z_Width, and Z_Depth values given in these formulas are those present
1994 * in 3DSTATE_DEPTH_BUFFER incremented by one.
1995 *
1996 * "The value of Z_Height and Z_Width must each be multiplied by 2 before
1997 * being applied to the table below if Number of Multisamples is set to
1998 * NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and
1999 * Z_Width must be multiplied by 4 before being applied to the table
2000 * below if Number of Multisamples is set to NUMSAMPLES_8."
2001 *
2002 * In the Sky Lake PRM, the second paragraph is replaced with this:
2003 *
2004 * "The Z_Height and Z_Width values must equal those present in
2005 * 3DSTATE_DEPTH_BUFFER incremented by one."
2006 *
2007 * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ
2008 * block corresponds to a region of 8x4 samples in the primary depth
2009 * surface. On Sky Lake, on the other hand, each HiZ block corresponds to
2010 * a region of 8x4 pixels in the primary depth surface regardless of the
2011 * number of samples. The dimensions of a HiZ block in both pixels and
2012 * samples are given in the table below:
2013 *
2014 * | SNB - BDW | SKL+
2015 * ------+-----------+-------------
2016 * 1x | 8 x 4 sa | 8 x 4 sa
2017 * MSAA | 8 x 4 px | 8 x 4 px
2018 * ------+-----------+-------------
2019 * 2x | 8 x 4 sa | 16 x 4 sa
2020 * MSAA | 4 x 4 px | 8 x 4 px
2021 * ------+-----------+-------------
2022 * 4x | 8 x 4 sa | 16 x 8 sa
2023 * MSAA | 4 x 2 px | 8 x 4 px
2024 * ------+-----------+-------------
2025 * 8x | 8 x 4 sa | 32 x 8 sa
2026 * MSAA | 2 x 2 px | 8 x 4 px
2027 * ------+-----------+-------------
2028 * 16x | N/A | 32 x 16 sa
2029 * MSAA | N/A | 8 x 4 px
2030 * ------+-----------+-------------
2031 *
2032 * There are a number of different ways that this discrepency could be
2033 * handled. The way we have chosen is to simply make MSAA HiZ have the
2034 * same number of samples as the parent surface pre-Sky Lake and always be
2035 * single-sampled on Sky Lake and above. Since the block sizes of
2036 * compressed formats are given in samples, this neatly handles everything
2037 * without the need for additional HiZ formats with different block sizes
2038 * on SKL+.
2039 */
2040 const unsigned samples = ISL_GFX_VER(dev) >= 9 ? 1 : surf->samples;
2041
2042 return isl_surf_init(dev, hiz_surf,
2043 .dim = surf->dim,
2044 .format = ISL_FORMAT_HIZ,
2045 .width = surf->logical_level0_px.width,
2046 .height = surf->logical_level0_px.height,
2047 .depth = surf->logical_level0_px.depth,
2048 .levels = surf->levels,
2049 .array_len = surf->logical_level0_px.array_len,
2050 .samples = samples,
2051 .usage = ISL_SURF_USAGE_HIZ_BIT,
2052 .tiling_flags = ISL_TILING_HIZ_BIT);
2053 }
2054
2055 bool
isl_surf_get_mcs_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * mcs_surf)2056 isl_surf_get_mcs_surf(const struct isl_device *dev,
2057 const struct isl_surf *surf,
2058 struct isl_surf *mcs_surf)
2059 {
2060 /* It must be multisampled with an array layout */
2061 if (surf->msaa_layout != ISL_MSAA_LAYOUT_ARRAY)
2062 return false;
2063
2064 if (mcs_surf->size_B > 0)
2065 return false;
2066
2067 /* The following are true of all multisampled surfaces */
2068 assert(surf->samples > 1);
2069 assert(surf->dim == ISL_SURF_DIM_2D);
2070 assert(surf->levels == 1);
2071 assert(surf->logical_level0_px.depth == 1);
2072
2073 /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
2074 *
2075 * This field must be set to 0 for all SINT MSRTs when all RT channels
2076 * are not written
2077 *
2078 * In practice this means that we have to disable MCS for all signed
2079 * integer MSAA buffers. The alternative, to disable MCS only when one
2080 * of the render target channels is disabled, is impractical because it
2081 * would require converting between CMS and UMS MSAA layouts on the fly,
2082 * which is expensive.
2083 */
2084 if (ISL_GFX_VER(dev) == 7 && isl_format_has_sint_channel(surf->format))
2085 return false;
2086
2087 /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9
2088 * bits which means the maximum pitch of a compression surface is 512
2089 * tiles or 64KB (since MCS is always Y-tiled). Since a 16x MCS buffer is
2090 * 64bpp, this gives us a maximum width of 8192 pixels. We can create
2091 * larger multisampled surfaces, we just can't compress them. For 2x, 4x,
2092 * and 8x, we have enough room for the full 16k supported by the hardware.
2093 */
2094 if (surf->samples == 16 && surf->logical_level0_px.width > 8192)
2095 return false;
2096
2097 enum isl_format mcs_format;
2098 switch (surf->samples) {
2099 case 2: mcs_format = ISL_FORMAT_MCS_2X; break;
2100 case 4: mcs_format = ISL_FORMAT_MCS_4X; break;
2101 case 8: mcs_format = ISL_FORMAT_MCS_8X; break;
2102 case 16: mcs_format = ISL_FORMAT_MCS_16X; break;
2103 default:
2104 unreachable("Invalid sample count");
2105 }
2106
2107 return isl_surf_init(dev, mcs_surf,
2108 .dim = ISL_SURF_DIM_2D,
2109 .format = mcs_format,
2110 .width = surf->logical_level0_px.width,
2111 .height = surf->logical_level0_px.height,
2112 .depth = 1,
2113 .levels = 1,
2114 .array_len = surf->logical_level0_px.array_len,
2115 .samples = 1, /* MCS surfaces are really single-sampled */
2116 .usage = ISL_SURF_USAGE_MCS_BIT,
2117 .tiling_flags = ISL_TILING_Y0_BIT);
2118 }
2119
2120 bool
isl_surf_supports_ccs(const struct isl_device * dev,const struct isl_surf * surf,const struct isl_surf * hiz_or_mcs_surf)2121 isl_surf_supports_ccs(const struct isl_device *dev,
2122 const struct isl_surf *surf,
2123 const struct isl_surf *hiz_or_mcs_surf)
2124 {
2125 /* CCS support does not exist prior to Gfx7 */
2126 if (ISL_GFX_VER(dev) <= 6)
2127 return false;
2128
2129 /* Wa_22011186057: Disable compression on ADL-P A0 */
2130 if (dev->info->is_alderlake && dev->info->gt == 2 &&
2131 dev->info->revision == 0)
2132 return false;
2133
2134 if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
2135 return false;
2136
2137 if (isl_format_is_compressed(surf->format))
2138 return false;
2139
2140 if (!isl_is_pow2(isl_format_get_layout(surf->format)->bpb))
2141 return false;
2142
2143 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
2144 * Target(s)", beneath the "Fast Color Clear" bullet (p326):
2145 *
2146 * - Support is limited to tiled render targets.
2147 *
2148 * From the Skylake documentation, it is made clear that X-tiling is no
2149 * longer supported:
2150 *
2151 * - MCS and Lossless compression is supported for
2152 * TiledY/TileYs/TileYf non-MSRTs only.
2153 *
2154 * From the BSpec (44930) for Gfx12:
2155 *
2156 * Linear CCS is only allowed for Untyped Buffers but only via HDC
2157 * Data-Port messages.
2158 *
2159 * We never use untyped messages on surfaces created by ISL on Gfx9+ so
2160 * this means linear is out on Gfx12+ as well.
2161 */
2162 if (surf->tiling == ISL_TILING_LINEAR)
2163 return false;
2164
2165 if (ISL_GFX_VER(dev) >= 12) {
2166 if (isl_surf_usage_is_stencil(surf->usage)) {
2167 /* HiZ and MCS aren't allowed with stencil */
2168 assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
2169
2170 /* Multi-sampled stencil cannot have CCS */
2171 if (surf->samples > 1)
2172 return false;
2173 } else if (isl_surf_usage_is_depth(surf->usage)) {
2174 const struct isl_surf *hiz_surf = hiz_or_mcs_surf;
2175
2176 /* With depth surfaces, HIZ is required for CCS. */
2177 if (hiz_surf == NULL || hiz_surf->size_B == 0)
2178 return false;
2179
2180 assert(hiz_surf->usage & ISL_SURF_USAGE_HIZ_BIT);
2181 assert(hiz_surf->tiling == ISL_TILING_HIZ);
2182 assert(hiz_surf->format == ISL_FORMAT_HIZ);
2183 } else if (surf->samples > 1) {
2184 const struct isl_surf *mcs_surf = hiz_or_mcs_surf;
2185
2186 /* With multisampled color, CCS requires MCS */
2187 if (mcs_surf == NULL || mcs_surf->size_B == 0)
2188 return false;
2189
2190 assert(mcs_surf->usage & ISL_SURF_USAGE_MCS_BIT);
2191 assert(isl_tiling_is_any_y(mcs_surf->tiling));
2192 assert(isl_format_is_mcs(mcs_surf->format));
2193 } else {
2194 /* Single-sampled color can't have MCS or HiZ */
2195 assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
2196 }
2197
2198 /* On Gfx12, all CCS-compressed surface pitches must be multiples of
2199 * 512B.
2200 */
2201 if (surf->row_pitch_B % 512 != 0)
2202 return false;
2203
2204 /* According to Wa_1406738321, 3D textures need a blit to a new
2205 * surface in order to perform a resolve. For now, just disable CCS.
2206 */
2207 if (surf->dim == ISL_SURF_DIM_3D) {
2208 isl_finishme("%s:%s: CCS for 3D textures is disabled, but a workaround"
2209 " is available.", __FILE__, __func__);
2210 return false;
2211 }
2212
2213 /* Wa_1207137018
2214 *
2215 * TODO: implement following workaround currently covered by the
2216 * restriction above. If following conditions are met:
2217 *
2218 * - RENDER_SURFACE_STATE.Surface Type == 3D
2219 * - RENDER_SURFACE_STATE.Auxiliary Surface Mode != AUX_NONE
2220 * - RENDER_SURFACE_STATE.Tiled ResourceMode is TYF or TYS
2221 *
2222 * Set the value of RENDER_SURFACE_STATE.Mip Tail Start LOD to a mip
2223 * that larger than those present in the surface (i.e. 15)
2224 */
2225
2226 /* TODO: Handle the other tiling formats */
2227 if (surf->tiling != ISL_TILING_Y0)
2228 return false;
2229 } else {
2230 /* ISL_GFX_VER(dev) < 12 */
2231 if (surf->samples > 1)
2232 return false;
2233
2234 /* CCS is only for color images on Gfx7-11 */
2235 if (isl_surf_usage_is_depth_or_stencil(surf->usage))
2236 return false;
2237
2238 /* We're single-sampled color so having HiZ or MCS makes no sense */
2239 assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
2240
2241 /* The PRM doesn't say this explicitly, but fast-clears don't appear to
2242 * work for 3D textures until gfx9 where the layout of 3D textures
2243 * changes to match 2D array textures.
2244 */
2245 if (ISL_GFX_VER(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
2246 return false;
2247
2248 /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of
2249 * Non-MultiSampler Render Target Restrictions):
2250 *
2251 * "Support is for non-mip-mapped and non-array surface types only."
2252 *
2253 * This restriction is lifted on gfx8+. Technically, it may be possible
2254 * to create a CCS for an arrayed or mipmapped image and only enable
2255 * CCS_D when rendering to the base slice. However, there is no
2256 * documentation tell us what the hardware would do in that case or what
2257 * it does if you walk off the bases slice. (Does it ignore CCS or does
2258 * it start scribbling over random memory?) We play it safe and just
2259 * follow the docs and don't allow CCS_D for arrayed or mip-mapped
2260 * surfaces.
2261 */
2262 if (ISL_GFX_VER(dev) <= 7 &&
2263 (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
2264 return false;
2265
2266 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
2267 * Target(s)", beneath the "Fast Color Clear" bullet (p326):
2268 *
2269 * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
2270 * 64bpp, and 128bpp.
2271 */
2272 if (isl_format_get_layout(surf->format)->bpb < 32)
2273 return false;
2274
2275 /* From the Skylake documentation, it is made clear that X-tiling is no
2276 * longer supported:
2277 *
2278 * - MCS and Lossless compression is supported for
2279 * TiledY/TileYs/TileYf non-MSRTs only.
2280 */
2281 if (ISL_GFX_VER(dev) >= 9 && !isl_tiling_is_any_y(surf->tiling))
2282 return false;
2283 }
2284
2285 return true;
2286 }
2287
2288 bool
isl_surf_get_ccs_surf(const struct isl_device * dev,const struct isl_surf * surf,const struct isl_surf * hiz_or_mcs_surf,struct isl_surf * ccs_surf,uint32_t row_pitch_B)2289 isl_surf_get_ccs_surf(const struct isl_device *dev,
2290 const struct isl_surf *surf,
2291 const struct isl_surf *hiz_or_mcs_surf,
2292 struct isl_surf *ccs_surf,
2293 uint32_t row_pitch_B)
2294 {
2295 if (!isl_surf_supports_ccs(dev, surf, hiz_or_mcs_surf))
2296 return false;
2297
2298 if (ISL_GFX_VER(dev) >= 12) {
2299 enum isl_format ccs_format;
2300 switch (isl_format_get_layout(surf->format)->bpb) {
2301 case 8: ccs_format = ISL_FORMAT_GFX12_CCS_8BPP_Y0; break;
2302 case 16: ccs_format = ISL_FORMAT_GFX12_CCS_16BPP_Y0; break;
2303 case 32: ccs_format = ISL_FORMAT_GFX12_CCS_32BPP_Y0; break;
2304 case 64: ccs_format = ISL_FORMAT_GFX12_CCS_64BPP_Y0; break;
2305 case 128: ccs_format = ISL_FORMAT_GFX12_CCS_128BPP_Y0; break;
2306 default:
2307 return false;
2308 }
2309
2310 /* On Gfx12, the CCS is a scaled-down version of the main surface. We
2311 * model this as the CCS compressing a 2D-view of the entire surface.
2312 */
2313 const bool ok =
2314 isl_surf_init(dev, ccs_surf,
2315 .dim = ISL_SURF_DIM_2D,
2316 .format = ccs_format,
2317 .width = isl_surf_get_row_pitch_el(surf),
2318 .height = surf->size_B / surf->row_pitch_B,
2319 .depth = 1,
2320 .levels = 1,
2321 .array_len = 1,
2322 .samples = 1,
2323 .row_pitch_B = row_pitch_B,
2324 .usage = ISL_SURF_USAGE_CCS_BIT,
2325 .tiling_flags = ISL_TILING_GFX12_CCS_BIT);
2326 assert(!ok || ccs_surf->size_B == surf->size_B / 256);
2327 return ok;
2328 } else {
2329 enum isl_format ccs_format;
2330 if (ISL_GFX_VER(dev) >= 9) {
2331 switch (isl_format_get_layout(surf->format)->bpb) {
2332 case 32: ccs_format = ISL_FORMAT_GFX9_CCS_32BPP; break;
2333 case 64: ccs_format = ISL_FORMAT_GFX9_CCS_64BPP; break;
2334 case 128: ccs_format = ISL_FORMAT_GFX9_CCS_128BPP; break;
2335 default: unreachable("Unsupported CCS format");
2336 return false;
2337 }
2338 } else if (surf->tiling == ISL_TILING_Y0) {
2339 switch (isl_format_get_layout(surf->format)->bpb) {
2340 case 32: ccs_format = ISL_FORMAT_GFX7_CCS_32BPP_Y; break;
2341 case 64: ccs_format = ISL_FORMAT_GFX7_CCS_64BPP_Y; break;
2342 case 128: ccs_format = ISL_FORMAT_GFX7_CCS_128BPP_Y; break;
2343 default: unreachable("Unsupported CCS format");
2344 }
2345 } else if (surf->tiling == ISL_TILING_X) {
2346 switch (isl_format_get_layout(surf->format)->bpb) {
2347 case 32: ccs_format = ISL_FORMAT_GFX7_CCS_32BPP_X; break;
2348 case 64: ccs_format = ISL_FORMAT_GFX7_CCS_64BPP_X; break;
2349 case 128: ccs_format = ISL_FORMAT_GFX7_CCS_128BPP_X; break;
2350 default: unreachable("Unsupported CCS format");
2351 }
2352 } else {
2353 unreachable("Invalid tiling format");
2354 }
2355
2356 return isl_surf_init(dev, ccs_surf,
2357 .dim = surf->dim,
2358 .format = ccs_format,
2359 .width = surf->logical_level0_px.width,
2360 .height = surf->logical_level0_px.height,
2361 .depth = surf->logical_level0_px.depth,
2362 .levels = surf->levels,
2363 .array_len = surf->logical_level0_px.array_len,
2364 .samples = 1,
2365 .row_pitch_B = row_pitch_B,
2366 .usage = ISL_SURF_USAGE_CCS_BIT,
2367 .tiling_flags = ISL_TILING_CCS_BIT);
2368 }
2369 }
2370
2371 #define isl_genX_call(dev, func, ...) \
2372 switch (ISL_GFX_VERX10(dev)) { \
2373 case 40: \
2374 isl_gfx4_##func(__VA_ARGS__); \
2375 break; \
2376 case 45: \
2377 /* G45 surface state is the same as gfx5 */ \
2378 case 50: \
2379 isl_gfx5_##func(__VA_ARGS__); \
2380 break; \
2381 case 60: \
2382 isl_gfx6_##func(__VA_ARGS__); \
2383 break; \
2384 case 70: \
2385 isl_gfx7_##func(__VA_ARGS__); \
2386 break; \
2387 case 75: \
2388 isl_gfx75_##func(__VA_ARGS__); \
2389 break; \
2390 case 80: \
2391 isl_gfx8_##func(__VA_ARGS__); \
2392 break; \
2393 case 90: \
2394 isl_gfx9_##func(__VA_ARGS__); \
2395 break; \
2396 case 110: \
2397 isl_gfx11_##func(__VA_ARGS__); \
2398 break; \
2399 case 120: \
2400 isl_gfx12_##func(__VA_ARGS__); \
2401 break; \
2402 case 125: \
2403 isl_gfx125_##func(__VA_ARGS__); \
2404 break; \
2405 default: \
2406 assert(!"Unknown hardware generation"); \
2407 }
2408
2409 void
isl_surf_fill_state_s(const struct isl_device * dev,void * state,const struct isl_surf_fill_state_info * restrict info)2410 isl_surf_fill_state_s(const struct isl_device *dev, void *state,
2411 const struct isl_surf_fill_state_info *restrict info)
2412 {
2413 #ifndef NDEBUG
2414 isl_surf_usage_flags_t _base_usage =
2415 info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
2416 ISL_SURF_USAGE_TEXTURE_BIT |
2417 ISL_SURF_USAGE_STORAGE_BIT);
2418 /* They may only specify one of the above bits at a time */
2419 assert(__builtin_popcount(_base_usage) == 1);
2420 /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */
2421 assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage);
2422 #endif
2423
2424 if (info->surf->dim == ISL_SURF_DIM_3D) {
2425 assert(info->view->base_array_layer + info->view->array_len <=
2426 info->surf->logical_level0_px.depth);
2427 } else {
2428 assert(info->view->base_array_layer + info->view->array_len <=
2429 info->surf->logical_level0_px.array_len);
2430 }
2431
2432 isl_genX_call(dev, surf_fill_state_s, dev, state, info);
2433 }
2434
2435 void
isl_buffer_fill_state_s(const struct isl_device * dev,void * state,const struct isl_buffer_fill_state_info * restrict info)2436 isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
2437 const struct isl_buffer_fill_state_info *restrict info)
2438 {
2439 isl_genX_call(dev, buffer_fill_state_s, dev, state, info);
2440 }
2441
2442 void
isl_null_fill_state_s(const struct isl_device * dev,void * state,const struct isl_null_fill_state_info * restrict info)2443 isl_null_fill_state_s(const struct isl_device *dev, void *state,
2444 const struct isl_null_fill_state_info *restrict info)
2445 {
2446 isl_genX_call(dev, null_fill_state, state, info);
2447 }
2448
2449 void
isl_emit_depth_stencil_hiz_s(const struct isl_device * dev,void * batch,const struct isl_depth_stencil_hiz_emit_info * restrict info)2450 isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
2451 const struct isl_depth_stencil_hiz_emit_info *restrict info)
2452 {
2453 if (info->depth_surf && info->stencil_surf) {
2454 if (!dev->info->has_hiz_and_separate_stencil) {
2455 assert(info->depth_surf == info->stencil_surf);
2456 assert(info->depth_address == info->stencil_address);
2457 }
2458 assert(info->depth_surf->dim == info->stencil_surf->dim);
2459 }
2460
2461 if (info->depth_surf) {
2462 assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT));
2463 if (info->depth_surf->dim == ISL_SURF_DIM_3D) {
2464 assert(info->view->base_array_layer + info->view->array_len <=
2465 info->depth_surf->logical_level0_px.depth);
2466 } else {
2467 assert(info->view->base_array_layer + info->view->array_len <=
2468 info->depth_surf->logical_level0_px.array_len);
2469 }
2470 }
2471
2472 if (info->stencil_surf) {
2473 assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT));
2474 if (info->stencil_surf->dim == ISL_SURF_DIM_3D) {
2475 assert(info->view->base_array_layer + info->view->array_len <=
2476 info->stencil_surf->logical_level0_px.depth);
2477 } else {
2478 assert(info->view->base_array_layer + info->view->array_len <=
2479 info->stencil_surf->logical_level0_px.array_len);
2480 }
2481 }
2482
2483 isl_genX_call(dev, emit_depth_stencil_hiz_s, dev, batch, info);
2484 }
2485
2486 /**
2487 * A variant of isl_surf_get_image_offset_sa() specific to
2488 * ISL_DIM_LAYOUT_GFX4_2D.
2489 */
2490 static void
get_image_offset_sa_gfx4_2d(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2491 get_image_offset_sa_gfx4_2d(const struct isl_surf *surf,
2492 uint32_t level, uint32_t logical_array_layer,
2493 uint32_t *x_offset_sa,
2494 uint32_t *y_offset_sa)
2495 {
2496 assert(level < surf->levels);
2497 if (surf->dim == ISL_SURF_DIM_3D)
2498 assert(logical_array_layer < surf->logical_level0_px.depth);
2499 else
2500 assert(logical_array_layer < surf->logical_level0_px.array_len);
2501
2502 const struct isl_extent3d image_align_sa =
2503 isl_surf_get_image_alignment_sa(surf);
2504
2505 const uint32_t W0 = surf->phys_level0_sa.width;
2506 const uint32_t H0 = surf->phys_level0_sa.height;
2507
2508 const uint32_t phys_layer = logical_array_layer *
2509 (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1);
2510
2511 uint32_t x = 0;
2512 uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
2513
2514 for (uint32_t l = 0; l < level; ++l) {
2515 if (l == 1) {
2516 uint32_t W = isl_minify(W0, l);
2517 x += isl_align_npot(W, image_align_sa.w);
2518 } else {
2519 uint32_t H = isl_minify(H0, l);
2520 y += isl_align_npot(H, image_align_sa.h);
2521 }
2522 }
2523
2524 *x_offset_sa = x;
2525 *y_offset_sa = y;
2526 }
2527
2528 /**
2529 * A variant of isl_surf_get_image_offset_sa() specific to
2530 * ISL_DIM_LAYOUT_GFX4_3D.
2531 */
2532 static void
get_image_offset_sa_gfx4_3d(const struct isl_surf * surf,uint32_t level,uint32_t logical_z_offset_px,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2533 get_image_offset_sa_gfx4_3d(const struct isl_surf *surf,
2534 uint32_t level, uint32_t logical_z_offset_px,
2535 uint32_t *x_offset_sa,
2536 uint32_t *y_offset_sa)
2537 {
2538 assert(level < surf->levels);
2539 if (surf->dim == ISL_SURF_DIM_3D) {
2540 assert(surf->phys_level0_sa.array_len == 1);
2541 assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
2542 } else {
2543 assert(surf->dim == ISL_SURF_DIM_2D);
2544 assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT);
2545 assert(surf->phys_level0_sa.array_len == 6);
2546 assert(logical_z_offset_px < surf->phys_level0_sa.array_len);
2547 }
2548
2549 const struct isl_extent3d image_align_sa =
2550 isl_surf_get_image_alignment_sa(surf);
2551
2552 const uint32_t W0 = surf->phys_level0_sa.width;
2553 const uint32_t H0 = surf->phys_level0_sa.height;
2554 const uint32_t D0 = surf->phys_level0_sa.depth;
2555 const uint32_t AL = surf->phys_level0_sa.array_len;
2556
2557 uint32_t x = 0;
2558 uint32_t y = 0;
2559
2560 for (uint32_t l = 0; l < level; ++l) {
2561 const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
2562 const uint32_t level_d =
2563 isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL,
2564 image_align_sa.d);
2565 const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
2566
2567 y += level_h * max_layers_vert;
2568 }
2569
2570 const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
2571 const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
2572 const uint32_t level_d =
2573 isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL,
2574 image_align_sa.d);
2575
2576 const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
2577
2578 x += level_w * (logical_z_offset_px % max_layers_horiz);
2579 y += level_h * (logical_z_offset_px / max_layers_horiz);
2580
2581 *x_offset_sa = x;
2582 *y_offset_sa = y;
2583 }
2584
2585 static void
get_image_offset_sa_gfx6_stencil_hiz(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2586 get_image_offset_sa_gfx6_stencil_hiz(const struct isl_surf *surf,
2587 uint32_t level,
2588 uint32_t logical_array_layer,
2589 uint32_t *x_offset_sa,
2590 uint32_t *y_offset_sa)
2591 {
2592 assert(level < surf->levels);
2593 assert(surf->logical_level0_px.depth == 1);
2594 assert(logical_array_layer < surf->logical_level0_px.array_len);
2595
2596 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2597
2598 const struct isl_extent3d image_align_sa =
2599 isl_surf_get_image_alignment_sa(surf);
2600
2601 struct isl_tile_info tile_info;
2602 isl_surf_get_tile_info(surf, &tile_info);
2603 const struct isl_extent2d tile_extent_sa = {
2604 .w = tile_info.logical_extent_el.w * fmtl->bw,
2605 .h = tile_info.logical_extent_el.h * fmtl->bh,
2606 };
2607 /* Tile size is a multiple of image alignment */
2608 assert(tile_extent_sa.w % image_align_sa.w == 0);
2609 assert(tile_extent_sa.h % image_align_sa.h == 0);
2610
2611 const uint32_t W0 = surf->phys_level0_sa.w;
2612 const uint32_t H0 = surf->phys_level0_sa.h;
2613
2614 /* Each image has the same height as LOD0 because the hardware thinks
2615 * everything is LOD0
2616 */
2617 const uint32_t H = isl_align(H0, image_align_sa.h);
2618
2619 /* Quick sanity check for consistency */
2620 if (surf->phys_level0_sa.array_len > 1)
2621 assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh));
2622
2623 uint32_t x = 0, y = 0;
2624 for (uint32_t l = 0; l < level; ++l) {
2625 const uint32_t W = isl_minify(W0, l);
2626
2627 const uint32_t w = isl_align(W, tile_extent_sa.w);
2628 const uint32_t h = isl_align(H * surf->phys_level0_sa.a,
2629 tile_extent_sa.h);
2630
2631 if (l == 0) {
2632 y += h;
2633 } else {
2634 x += w;
2635 }
2636 }
2637
2638 y += H * logical_array_layer;
2639
2640 *x_offset_sa = x;
2641 *y_offset_sa = y;
2642 }
2643
2644 /**
2645 * A variant of isl_surf_get_image_offset_sa() specific to
2646 * ISL_DIM_LAYOUT_GFX9_1D.
2647 */
2648 static void
get_image_offset_sa_gfx9_1d(const struct isl_surf * surf,uint32_t level,uint32_t layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2649 get_image_offset_sa_gfx9_1d(const struct isl_surf *surf,
2650 uint32_t level, uint32_t layer,
2651 uint32_t *x_offset_sa,
2652 uint32_t *y_offset_sa)
2653 {
2654 assert(level < surf->levels);
2655 assert(layer < surf->phys_level0_sa.array_len);
2656 assert(surf->phys_level0_sa.height == 1);
2657 assert(surf->phys_level0_sa.depth == 1);
2658 assert(surf->samples == 1);
2659
2660 const uint32_t W0 = surf->phys_level0_sa.width;
2661 const struct isl_extent3d image_align_sa =
2662 isl_surf_get_image_alignment_sa(surf);
2663
2664 uint32_t x = 0;
2665
2666 for (uint32_t l = 0; l < level; ++l) {
2667 uint32_t W = isl_minify(W0, l);
2668 uint32_t w = isl_align_npot(W, image_align_sa.w);
2669
2670 x += w;
2671 }
2672
2673 *x_offset_sa = x;
2674 *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
2675 }
2676
2677 /**
2678 * Calculate the offset, in units of surface samples, to a subimage in the
2679 * surface.
2680 *
2681 * @invariant level < surface levels
2682 * @invariant logical_array_layer < logical array length of surface
2683 * @invariant logical_z_offset_px < logical depth of surface at level
2684 */
2685 void
isl_surf_get_image_offset_sa(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * x_offset_sa,uint32_t * y_offset_sa,uint32_t * z_offset_sa,uint32_t * array_offset)2686 isl_surf_get_image_offset_sa(const struct isl_surf *surf,
2687 uint32_t level,
2688 uint32_t logical_array_layer,
2689 uint32_t logical_z_offset_px,
2690 uint32_t *x_offset_sa,
2691 uint32_t *y_offset_sa,
2692 uint32_t *z_offset_sa,
2693 uint32_t *array_offset)
2694 {
2695 assert(level < surf->levels);
2696 assert(logical_array_layer < surf->logical_level0_px.array_len);
2697 assert(logical_z_offset_px
2698 < isl_minify(surf->logical_level0_px.depth, level));
2699
2700 switch (surf->dim_layout) {
2701 case ISL_DIM_LAYOUT_GFX9_1D:
2702 get_image_offset_sa_gfx9_1d(surf, level, logical_array_layer,
2703 x_offset_sa, y_offset_sa);
2704 *z_offset_sa = 0;
2705 *array_offset = 0;
2706 break;
2707 case ISL_DIM_LAYOUT_GFX4_2D:
2708 get_image_offset_sa_gfx4_2d(surf, level, logical_array_layer
2709 + logical_z_offset_px,
2710 x_offset_sa, y_offset_sa);
2711 *z_offset_sa = 0;
2712 *array_offset = 0;
2713 break;
2714 case ISL_DIM_LAYOUT_GFX4_3D:
2715 get_image_offset_sa_gfx4_3d(surf, level, logical_array_layer +
2716 logical_z_offset_px,
2717 x_offset_sa, y_offset_sa);
2718 *z_offset_sa = 0;
2719 *array_offset = 0;
2720 break;
2721 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
2722 get_image_offset_sa_gfx6_stencil_hiz(surf, level, logical_array_layer +
2723 logical_z_offset_px,
2724 x_offset_sa, y_offset_sa);
2725 *z_offset_sa = 0;
2726 *array_offset = 0;
2727 break;
2728
2729 default:
2730 unreachable("not reached");
2731 }
2732 }
2733
2734 void
isl_surf_get_image_offset_el(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * x_offset_el,uint32_t * y_offset_el,uint32_t * z_offset_el,uint32_t * array_offset)2735 isl_surf_get_image_offset_el(const struct isl_surf *surf,
2736 uint32_t level,
2737 uint32_t logical_array_layer,
2738 uint32_t logical_z_offset_px,
2739 uint32_t *x_offset_el,
2740 uint32_t *y_offset_el,
2741 uint32_t *z_offset_el,
2742 uint32_t *array_offset)
2743 {
2744 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2745
2746 assert(level < surf->levels);
2747 assert(logical_array_layer < surf->logical_level0_px.array_len);
2748 assert(logical_z_offset_px
2749 < isl_minify(surf->logical_level0_px.depth, level));
2750
2751 uint32_t x_offset_sa, y_offset_sa, z_offset_sa;
2752 isl_surf_get_image_offset_sa(surf, level,
2753 logical_array_layer,
2754 logical_z_offset_px,
2755 &x_offset_sa,
2756 &y_offset_sa,
2757 &z_offset_sa,
2758 array_offset);
2759
2760 *x_offset_el = x_offset_sa / fmtl->bw;
2761 *y_offset_el = y_offset_sa / fmtl->bh;
2762 *z_offset_el = z_offset_sa / fmtl->bd;
2763 }
2764
2765 void
isl_surf_get_image_offset_B_tile_sa(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint64_t * offset_B,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2766 isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf,
2767 uint32_t level,
2768 uint32_t logical_array_layer,
2769 uint32_t logical_z_offset_px,
2770 uint64_t *offset_B,
2771 uint32_t *x_offset_sa,
2772 uint32_t *y_offset_sa)
2773 {
2774 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2775
2776 uint32_t x_offset_el, y_offset_el;
2777 isl_surf_get_image_offset_B_tile_el(surf, level,
2778 logical_array_layer,
2779 logical_z_offset_px,
2780 offset_B,
2781 &x_offset_el,
2782 &y_offset_el);
2783
2784 if (x_offset_sa) {
2785 *x_offset_sa = x_offset_el * fmtl->bw;
2786 } else {
2787 assert(x_offset_el == 0);
2788 }
2789
2790 if (y_offset_sa) {
2791 *y_offset_sa = y_offset_el * fmtl->bh;
2792 } else {
2793 assert(y_offset_el == 0);
2794 }
2795 }
2796
2797 void
isl_surf_get_image_offset_B_tile_el(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint64_t * offset_B,uint32_t * x_offset_el,uint32_t * y_offset_el)2798 isl_surf_get_image_offset_B_tile_el(const struct isl_surf *surf,
2799 uint32_t level,
2800 uint32_t logical_array_layer,
2801 uint32_t logical_z_offset_px,
2802 uint64_t *offset_B,
2803 uint32_t *x_offset_el,
2804 uint32_t *y_offset_el)
2805 {
2806 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2807
2808 uint32_t total_x_offset_el, total_y_offset_el;
2809 uint32_t total_z_offset_el, total_array_offset;
2810 isl_surf_get_image_offset_el(surf, level, logical_array_layer,
2811 logical_z_offset_px,
2812 &total_x_offset_el,
2813 &total_y_offset_el,
2814 &total_z_offset_el,
2815 &total_array_offset);
2816
2817 uint32_t z_offset_el, array_offset;
2818 isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
2819 surf->msaa_layout, fmtl->bpb,
2820 surf->samples,
2821 surf->row_pitch_B,
2822 surf->array_pitch_el_rows,
2823 total_x_offset_el,
2824 total_y_offset_el,
2825 total_z_offset_el,
2826 total_array_offset,
2827 offset_B,
2828 x_offset_el,
2829 y_offset_el,
2830 &z_offset_el,
2831 &array_offset);
2832 assert(z_offset_el == 0);
2833 assert(array_offset == 0);
2834 }
2835
2836 void
isl_surf_get_image_range_B_tile(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint64_t * start_tile_B,uint64_t * end_tile_B)2837 isl_surf_get_image_range_B_tile(const struct isl_surf *surf,
2838 uint32_t level,
2839 uint32_t logical_array_layer,
2840 uint32_t logical_z_offset_px,
2841 uint64_t *start_tile_B,
2842 uint64_t *end_tile_B)
2843 {
2844 uint32_t start_x_offset_el, start_y_offset_el;
2845 uint32_t start_z_offset_el, start_array_slice;
2846 isl_surf_get_image_offset_el(surf, level, logical_array_layer,
2847 logical_z_offset_px,
2848 &start_x_offset_el,
2849 &start_y_offset_el,
2850 &start_z_offset_el,
2851 &start_array_slice);
2852
2853 /* Compute the size of the subimage in surface elements */
2854 const uint32_t subimage_w_sa = isl_minify(surf->phys_level0_sa.w, level);
2855 const uint32_t subimage_h_sa = isl_minify(surf->phys_level0_sa.h, level);
2856 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2857 const uint32_t subimage_w_el = isl_align_div_npot(subimage_w_sa, fmtl->bw);
2858 const uint32_t subimage_h_el = isl_align_div_npot(subimage_h_sa, fmtl->bh);
2859
2860 /* Find the last pixel */
2861 uint32_t end_x_offset_el = start_x_offset_el + subimage_w_el - 1;
2862 uint32_t end_y_offset_el = start_y_offset_el + subimage_h_el - 1;
2863
2864 /* We only consider one Z or array slice */
2865 const uint32_t end_z_offset_el = start_z_offset_el;
2866 const uint32_t end_array_slice = start_array_slice;
2867
2868 UNUSED uint32_t x_offset_el, y_offset_el, z_offset_el, array_slice;
2869 isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
2870 surf->msaa_layout, fmtl->bpb,
2871 surf->samples,
2872 surf->row_pitch_B,
2873 surf->array_pitch_el_rows,
2874 start_x_offset_el,
2875 start_y_offset_el,
2876 start_z_offset_el,
2877 start_array_slice,
2878 start_tile_B,
2879 &x_offset_el,
2880 &y_offset_el,
2881 &z_offset_el,
2882 &array_slice);
2883
2884 isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
2885 surf->msaa_layout, fmtl->bpb,
2886 surf->samples,
2887 surf->row_pitch_B,
2888 surf->array_pitch_el_rows,
2889 end_x_offset_el,
2890 end_y_offset_el,
2891 end_z_offset_el,
2892 end_array_slice,
2893 end_tile_B,
2894 &x_offset_el,
2895 &y_offset_el,
2896 &z_offset_el,
2897 &array_slice);
2898
2899 /* We want the range we return to be exclusive but the tile containing the
2900 * last pixel (what we just calculated) is inclusive. Add one.
2901 */
2902 (*end_tile_B)++;
2903
2904 assert(*end_tile_B <= surf->size_B);
2905 }
2906
2907 void
isl_surf_get_image_surf(const struct isl_device * dev,const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,struct isl_surf * image_surf,uint64_t * offset_B,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2908 isl_surf_get_image_surf(const struct isl_device *dev,
2909 const struct isl_surf *surf,
2910 uint32_t level,
2911 uint32_t logical_array_layer,
2912 uint32_t logical_z_offset_px,
2913 struct isl_surf *image_surf,
2914 uint64_t *offset_B,
2915 uint32_t *x_offset_sa,
2916 uint32_t *y_offset_sa)
2917 {
2918 isl_surf_get_image_offset_B_tile_sa(surf,
2919 level,
2920 logical_array_layer,
2921 logical_z_offset_px,
2922 offset_B,
2923 x_offset_sa,
2924 y_offset_sa);
2925
2926 /* Even for cube maps there will be only single face, therefore drop the
2927 * corresponding flag if present.
2928 */
2929 const isl_surf_usage_flags_t usage =
2930 surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
2931
2932 bool ok UNUSED;
2933 ok = isl_surf_init(dev, image_surf,
2934 .dim = ISL_SURF_DIM_2D,
2935 .format = surf->format,
2936 .width = isl_minify(surf->logical_level0_px.w, level),
2937 .height = isl_minify(surf->logical_level0_px.h, level),
2938 .depth = 1,
2939 .levels = 1,
2940 .array_len = 1,
2941 .samples = surf->samples,
2942 .row_pitch_B = surf->row_pitch_B,
2943 .usage = usage,
2944 .tiling_flags = (1 << surf->tiling));
2945 assert(ok);
2946 }
2947
2948 bool
isl_surf_get_uncompressed_surf(const struct isl_device * dev,const struct isl_surf * surf,const struct isl_view * view,struct isl_surf * ucompr_surf,struct isl_view * ucompr_view,uint64_t * offset_B,uint32_t * x_offset_el,uint32_t * y_offset_el)2949 isl_surf_get_uncompressed_surf(const struct isl_device *dev,
2950 const struct isl_surf *surf,
2951 const struct isl_view *view,
2952 struct isl_surf *ucompr_surf,
2953 struct isl_view *ucompr_view,
2954 uint64_t *offset_B,
2955 uint32_t *x_offset_el,
2956 uint32_t *y_offset_el)
2957 {
2958 const struct isl_format_layout *fmtl =
2959 isl_format_get_layout(surf->format);
2960 const enum isl_format view_format = view->format;
2961
2962 assert(fmtl->bw > 1 || fmtl->bh > 1 || fmtl->bd > 1);
2963 assert(isl_format_is_compressed(surf->format));
2964 assert(!isl_format_is_compressed(view->format));
2965 assert(isl_format_get_layout(view->format)->bpb == fmtl->bpb);
2966 assert(view->levels == 1);
2967
2968 const uint32_t view_width_px =
2969 isl_minify(surf->logical_level0_px.width, view->base_level);
2970 const uint32_t view_height_px =
2971 isl_minify(surf->logical_level0_px.height, view->base_level);
2972
2973 assert(surf->samples == 1);
2974 const uint32_t view_width_el = isl_align_div_npot(view_width_px, fmtl->bw);
2975 const uint32_t view_height_el = isl_align_div_npot(view_height_px, fmtl->bh);
2976
2977 /* If we ever enable 3D block formats, we'll need to re-think this */
2978 assert(fmtl->bd == 1);
2979
2980 if (view->array_len > 1) {
2981 /* The Skylake PRM Vol. 2d, "RENDER_SURFACE_STATE::X Offset" says:
2982 *
2983 * "If Surface Array is enabled, this field must be zero."
2984 *
2985 * The PRMs for other hardware have similar text. This is also tricky
2986 * to handle with things like BLORP's SW offsetting because the
2987 * increased surface size required for the offset may result in an image
2988 * height greater than qpitch.
2989 */
2990 if (view->base_level > 0)
2991 return false;
2992
2993 /* On Haswell and earlier, RENDER_SURFACE_STATE doesn't have a QPitch
2994 * field; it only has "array pitch span" which means the QPitch is
2995 * automatically calculated. Since we're smashing the surface format
2996 * (block formats are subtly different) and the number of miplevels,
2997 * that calculation will get thrown off. This means we can't do arrays
2998 * even at LOD0
2999 *
3000 * On Broadwell, we do have a QPitch field which we can control.
3001 * However, HALIGN and VALIGN are specified in pixels and are
3002 * hard-coded to align to exactly the block size of the compressed
3003 * texture. This means that, when reinterpreted as a non-compressed
3004 * the QPitch may be anything but the HW requires it to be properly
3005 * aligned.
3006 */
3007 if (ISL_GFX_VER(dev) < 9)
3008 return false;
3009
3010 *ucompr_surf = *surf;
3011 ucompr_surf->levels = 1;
3012 ucompr_surf->format = view_format;
3013
3014 /* We're making an uncompressed view here. The image dimensions
3015 * need to be scaled down by the block size.
3016 */
3017 assert(ucompr_surf->logical_level0_px.width == view_width_px);
3018 assert(ucompr_surf->logical_level0_px.height == view_height_px);
3019 ucompr_surf->logical_level0_px.width = view_width_el;
3020 ucompr_surf->logical_level0_px.height = view_height_el;
3021 ucompr_surf->phys_level0_sa = isl_surf_get_phys_level0_el(surf);
3022
3023 /* The surface mostly stays as-is; there is no offset */
3024 *offset_B = 0;
3025 *x_offset_el = 0;
3026 *y_offset_el = 0;
3027
3028 /* The view remains the same */
3029 *ucompr_view = *view;
3030 } else {
3031 /* If only one array slice is requested, directly offset to that slice.
3032 * We could, in theory, still use arrays in some cases but BLORP isn't
3033 * prepared for this and everyone who calls this function should be
3034 * prepared to handle an X/Y offset.
3035 */
3036 isl_surf_get_image_offset_B_tile_el(surf,
3037 view->base_level,
3038 surf->dim == ISL_SURF_DIM_3D ?
3039 0 : view->base_array_layer,
3040 surf->dim == ISL_SURF_DIM_3D ?
3041 view->base_array_layer : 0,
3042 offset_B,
3043 x_offset_el,
3044 y_offset_el);
3045
3046 /* Even for cube maps there will be only single face, therefore drop the
3047 * corresponding flag if present.
3048 */
3049 const isl_surf_usage_flags_t usage =
3050 surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
3051
3052 bool ok UNUSED;
3053 ok = isl_surf_init(dev, ucompr_surf,
3054 .dim = ISL_SURF_DIM_2D,
3055 .format = view_format,
3056 .width = view_width_el,
3057 .height = view_height_el,
3058 .depth = 1,
3059 .levels = 1,
3060 .array_len = 1,
3061 .samples = 1,
3062 .row_pitch_B = surf->row_pitch_B,
3063 .usage = usage,
3064 .tiling_flags = (1 << surf->tiling));
3065 assert(ok);
3066
3067 /* The newly created image represents the one subimage we're
3068 * referencing with this view so it only has one array slice and
3069 * miplevel.
3070 */
3071 *ucompr_view = *view;
3072 ucompr_view->base_array_layer = 0;
3073 ucompr_view->base_level = 0;
3074 }
3075
3076 return true;
3077 }
3078
3079 void
isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,enum isl_surf_dim dim,enum isl_msaa_layout msaa_layout,uint32_t bpb,uint32_t samples,uint32_t row_pitch_B,uint32_t array_pitch_el_rows,uint32_t total_x_offset_el,uint32_t total_y_offset_el,uint32_t total_z_offset_el,uint32_t total_array_offset,uint64_t * tile_offset_B,uint32_t * x_offset_el,uint32_t * y_offset_el,uint32_t * z_offset_el,uint32_t * array_offset)3080 isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
3081 enum isl_surf_dim dim,
3082 enum isl_msaa_layout msaa_layout,
3083 uint32_t bpb,
3084 uint32_t samples,
3085 uint32_t row_pitch_B,
3086 uint32_t array_pitch_el_rows,
3087 uint32_t total_x_offset_el,
3088 uint32_t total_y_offset_el,
3089 uint32_t total_z_offset_el,
3090 uint32_t total_array_offset,
3091 uint64_t *tile_offset_B,
3092 uint32_t *x_offset_el,
3093 uint32_t *y_offset_el,
3094 uint32_t *z_offset_el,
3095 uint32_t *array_offset)
3096 {
3097 if (tiling == ISL_TILING_LINEAR) {
3098 assert(bpb % 8 == 0);
3099 assert(samples == 1);
3100 assert(total_z_offset_el == 0 && total_array_offset == 0);
3101 *tile_offset_B = (uint64_t)total_y_offset_el * row_pitch_B +
3102 (uint64_t)total_x_offset_el * (bpb / 8);
3103 *x_offset_el = 0;
3104 *y_offset_el = 0;
3105 *z_offset_el = 0;
3106 *array_offset = 0;
3107 return;
3108 }
3109
3110 struct isl_tile_info tile_info;
3111 isl_tiling_get_info(tiling, dim, msaa_layout, bpb, samples, &tile_info);
3112
3113 /* Pitches must make sense with the tiling */
3114 assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
3115 if (tile_info.logical_extent_el.d > 1 || tile_info.logical_extent_el.a > 1)
3116 assert(array_pitch_el_rows % tile_info.logical_extent_el.h == 0);
3117
3118 /* For non-power-of-two formats, we need the address to be both tile and
3119 * element-aligned. The easiest way to achieve this is to work with a tile
3120 * that is three times as wide as the regular tile.
3121 *
3122 * The tile info returned by get_tile_info has a logical size that is an
3123 * integer number of tile_info.format_bpb size elements. To scale the
3124 * tile, we scale up the physical width and then treat the logical tile
3125 * size as if it has bpb size elements.
3126 */
3127 const uint32_t tile_el_scale = bpb / tile_info.format_bpb;
3128 tile_info.phys_extent_B.width *= tile_el_scale;
3129
3130 /* Compute the offset into the tile */
3131 *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
3132 *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h;
3133 *z_offset_el = total_z_offset_el % tile_info.logical_extent_el.d;
3134 *array_offset = total_array_offset % tile_info.logical_extent_el.a;
3135
3136 /* Compute the offset of the tile in units of whole tiles */
3137 uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
3138 uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
3139 uint32_t z_offset_tl = total_z_offset_el / tile_info.logical_extent_el.d;
3140 uint32_t a_offset_tl = total_array_offset / tile_info.logical_extent_el.a;
3141
3142 /* Compute an array pitch in number of tiles */
3143 uint32_t array_pitch_tl_rows =
3144 array_pitch_el_rows / tile_info.logical_extent_el.h;
3145
3146 /* Add the Z and array offset to the Y offset to get a 2D offset */
3147 y_offset_tl += (z_offset_tl + a_offset_tl) * array_pitch_tl_rows;
3148
3149 *tile_offset_B =
3150 (uint64_t)y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B +
3151 (uint64_t)x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w;
3152 }
3153
3154 uint32_t
isl_surf_get_depth_format(const struct isl_device * dev,const struct isl_surf * surf)3155 isl_surf_get_depth_format(const struct isl_device *dev,
3156 const struct isl_surf *surf)
3157 {
3158 /* Support for separate stencil buffers began in gfx5. Support for
3159 * interleaved depthstencil buffers ceased in gfx7. The intermediate gens,
3160 * those that supported separate and interleaved stencil, were gfx5 and
3161 * gfx6.
3162 *
3163 * For a list of all available formats, see the Sandybridge PRM >> Volume
3164 * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
3165 * Format (p321).
3166 */
3167
3168 bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT;
3169
3170 assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT);
3171
3172 if (has_stencil)
3173 assert(ISL_GFX_VER(dev) < 7);
3174
3175 switch (surf->format) {
3176 default:
3177 unreachable("bad isl depth format");
3178 case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
3179 assert(ISL_GFX_VER(dev) < 7);
3180 return 0; /* D32_FLOAT_S8X24_UINT */
3181 case ISL_FORMAT_R32_FLOAT:
3182 assert(!has_stencil);
3183 return 1; /* D32_FLOAT */
3184 case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
3185 if (has_stencil) {
3186 assert(ISL_GFX_VER(dev) < 7);
3187 return 2; /* D24_UNORM_S8_UINT */
3188 } else {
3189 assert(ISL_GFX_VER(dev) >= 5);
3190 return 3; /* D24_UNORM_X8_UINT */
3191 }
3192 case ISL_FORMAT_R16_UNORM:
3193 assert(!has_stencil);
3194 return 5; /* D16_UNORM */
3195 }
3196 }
3197
3198 bool
isl_swizzle_supports_rendering(const struct intel_device_info * devinfo,struct isl_swizzle swizzle)3199 isl_swizzle_supports_rendering(const struct intel_device_info *devinfo,
3200 struct isl_swizzle swizzle)
3201 {
3202 if (devinfo->is_haswell) {
3203 /* From the Haswell PRM,
3204 * RENDER_SURFACE_STATE::Shader Channel Select Red
3205 *
3206 * "The Shader channel selects also define which shader channels are
3207 * written to which surface channel. If the Shader channel select is
3208 * SCS_ZERO or SCS_ONE then it is not written to the surface. If the
3209 * shader channel select is SCS_RED it is written to the surface red
3210 * channel and so on. If more than one shader channel select is set
3211 * to the same surface channel only the first shader channel in RGBA
3212 * order will be written."
3213 */
3214 return true;
3215 } else if (devinfo->ver <= 7) {
3216 /* Ivy Bridge and early doesn't have any swizzling */
3217 return isl_swizzle_is_identity(swizzle);
3218 } else {
3219 /* From the Sky Lake PRM Vol. 2d,
3220 * RENDER_SURFACE_STATE::Shader Channel Select Red
3221 *
3222 * "For Render Target, Red, Green and Blue Shader Channel Selects
3223 * MUST be such that only valid components can be swapped i.e. only
3224 * change the order of components in the pixel. Any other values for
3225 * these Shader Channel Select fields are not valid for Render
3226 * Targets. This also means that there MUST not be multiple shader
3227 * channels mapped to the same RT channel."
3228 *
3229 * From the Sky Lake PRM Vol. 2d,
3230 * RENDER_SURFACE_STATE::Shader Channel Select Alpha
3231 *
3232 * "For Render Target, this field MUST be programmed to
3233 * value = SCS_ALPHA."
3234 */
3235 return (swizzle.r == ISL_CHANNEL_SELECT_RED ||
3236 swizzle.r == ISL_CHANNEL_SELECT_GREEN ||
3237 swizzle.r == ISL_CHANNEL_SELECT_BLUE) &&
3238 (swizzle.g == ISL_CHANNEL_SELECT_RED ||
3239 swizzle.g == ISL_CHANNEL_SELECT_GREEN ||
3240 swizzle.g == ISL_CHANNEL_SELECT_BLUE) &&
3241 (swizzle.b == ISL_CHANNEL_SELECT_RED ||
3242 swizzle.b == ISL_CHANNEL_SELECT_GREEN ||
3243 swizzle.b == ISL_CHANNEL_SELECT_BLUE) &&
3244 swizzle.r != swizzle.g &&
3245 swizzle.r != swizzle.b &&
3246 swizzle.g != swizzle.b &&
3247 swizzle.a == ISL_CHANNEL_SELECT_ALPHA;
3248 }
3249 }
3250
3251 static enum isl_channel_select
swizzle_select(enum isl_channel_select chan,struct isl_swizzle swizzle)3252 swizzle_select(enum isl_channel_select chan, struct isl_swizzle swizzle)
3253 {
3254 switch (chan) {
3255 case ISL_CHANNEL_SELECT_ZERO:
3256 case ISL_CHANNEL_SELECT_ONE:
3257 return chan;
3258 case ISL_CHANNEL_SELECT_RED:
3259 return swizzle.r;
3260 case ISL_CHANNEL_SELECT_GREEN:
3261 return swizzle.g;
3262 case ISL_CHANNEL_SELECT_BLUE:
3263 return swizzle.b;
3264 case ISL_CHANNEL_SELECT_ALPHA:
3265 return swizzle.a;
3266 default:
3267 unreachable("Invalid swizzle component");
3268 }
3269 }
3270
3271 /**
3272 * Returns the single swizzle that is equivalent to applying the two given
3273 * swizzles in sequence.
3274 */
3275 struct isl_swizzle
isl_swizzle_compose(struct isl_swizzle first,struct isl_swizzle second)3276 isl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second)
3277 {
3278 return (struct isl_swizzle) {
3279 .r = swizzle_select(first.r, second),
3280 .g = swizzle_select(first.g, second),
3281 .b = swizzle_select(first.b, second),
3282 .a = swizzle_select(first.a, second),
3283 };
3284 }
3285
3286 /**
3287 * Returns a swizzle that is the pseudo-inverse of this swizzle.
3288 */
3289 struct isl_swizzle
isl_swizzle_invert(struct isl_swizzle swizzle)3290 isl_swizzle_invert(struct isl_swizzle swizzle)
3291 {
3292 /* Default to zero for channels which do not show up in the swizzle */
3293 enum isl_channel_select chans[4] = {
3294 ISL_CHANNEL_SELECT_ZERO,
3295 ISL_CHANNEL_SELECT_ZERO,
3296 ISL_CHANNEL_SELECT_ZERO,
3297 ISL_CHANNEL_SELECT_ZERO,
3298 };
3299
3300 /* We go in ABGR order so that, if there are any duplicates, the first one
3301 * is taken if you look at it in RGBA order. This is what Haswell hardware
3302 * does for render target swizzles.
3303 */
3304 if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
3305 chans[swizzle.a - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_ALPHA;
3306 if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
3307 chans[swizzle.b - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_BLUE;
3308 if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
3309 chans[swizzle.g - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_GREEN;
3310 if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
3311 chans[swizzle.r - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_RED;
3312
3313 return (struct isl_swizzle) { chans[0], chans[1], chans[2], chans[3] };
3314 }
3315
3316 /** Applies an inverse swizzle to a color value */
3317 union isl_color_value
isl_color_value_swizzle_inv(union isl_color_value src,struct isl_swizzle swizzle)3318 isl_color_value_swizzle_inv(union isl_color_value src,
3319 struct isl_swizzle swizzle)
3320 {
3321 union isl_color_value dst = { .u32 = { 0, } };
3322
3323 /* We assign colors in ABGR order so that the first one will be taken in
3324 * RGBA precedence order. According to the PRM docs for shader channel
3325 * select, this matches Haswell hardware behavior.
3326 */
3327 if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
3328 dst.u32[swizzle.a - ISL_CHANNEL_SELECT_RED] = src.u32[3];
3329 if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
3330 dst.u32[swizzle.b - ISL_CHANNEL_SELECT_RED] = src.u32[2];
3331 if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
3332 dst.u32[swizzle.g - ISL_CHANNEL_SELECT_RED] = src.u32[1];
3333 if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
3334 dst.u32[swizzle.r - ISL_CHANNEL_SELECT_RED] = src.u32[0];
3335
3336 return dst;
3337 }
3338
3339 uint8_t
isl_format_get_aux_map_encoding(enum isl_format format)3340 isl_format_get_aux_map_encoding(enum isl_format format)
3341 {
3342 switch(format) {
3343 case ISL_FORMAT_R32G32B32A32_FLOAT: return 0x11;
3344 case ISL_FORMAT_R32G32B32X32_FLOAT: return 0x11;
3345 case ISL_FORMAT_R32G32B32A32_SINT: return 0x12;
3346 case ISL_FORMAT_R32G32B32A32_UINT: return 0x13;
3347 case ISL_FORMAT_R16G16B16A16_UNORM: return 0x14;
3348 case ISL_FORMAT_R16G16B16A16_SNORM: return 0x15;
3349 case ISL_FORMAT_R16G16B16A16_SINT: return 0x16;
3350 case ISL_FORMAT_R16G16B16A16_UINT: return 0x17;
3351 case ISL_FORMAT_R16G16B16A16_FLOAT: return 0x10;
3352 case ISL_FORMAT_R16G16B16X16_FLOAT: return 0x10;
3353 case ISL_FORMAT_R32G32_FLOAT: return 0x11;
3354 case ISL_FORMAT_R32G32_SINT: return 0x12;
3355 case ISL_FORMAT_R32G32_UINT: return 0x13;
3356 case ISL_FORMAT_B8G8R8A8_UNORM: return 0xA;
3357 case ISL_FORMAT_B8G8R8X8_UNORM: return 0xA;
3358 case ISL_FORMAT_B8G8R8A8_UNORM_SRGB: return 0xA;
3359 case ISL_FORMAT_B8G8R8X8_UNORM_SRGB: return 0xA;
3360 case ISL_FORMAT_R10G10B10A2_UNORM: return 0x18;
3361 case ISL_FORMAT_R10G10B10A2_UNORM_SRGB: return 0x18;
3362 case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM: return 0x19;
3363 case ISL_FORMAT_R10G10B10A2_UINT: return 0x1A;
3364 case ISL_FORMAT_R8G8B8A8_UNORM: return 0xA;
3365 case ISL_FORMAT_R8G8B8A8_UNORM_SRGB: return 0xA;
3366 case ISL_FORMAT_R8G8B8A8_SNORM: return 0x1B;
3367 case ISL_FORMAT_R8G8B8A8_SINT: return 0x1C;
3368 case ISL_FORMAT_R8G8B8A8_UINT: return 0x1D;
3369 case ISL_FORMAT_R16G16_UNORM: return 0x14;
3370 case ISL_FORMAT_R16G16_SNORM: return 0x15;
3371 case ISL_FORMAT_R16G16_SINT: return 0x16;
3372 case ISL_FORMAT_R16G16_UINT: return 0x17;
3373 case ISL_FORMAT_R16G16_FLOAT: return 0x10;
3374 case ISL_FORMAT_B10G10R10A2_UNORM: return 0x18;
3375 case ISL_FORMAT_B10G10R10A2_UNORM_SRGB: return 0x18;
3376 case ISL_FORMAT_R11G11B10_FLOAT: return 0x1E;
3377 case ISL_FORMAT_R32_SINT: return 0x12;
3378 case ISL_FORMAT_R32_UINT: return 0x13;
3379 case ISL_FORMAT_R32_FLOAT: return 0x11;
3380 case ISL_FORMAT_R24_UNORM_X8_TYPELESS: return 0x13;
3381 case ISL_FORMAT_B5G6R5_UNORM: return 0xA;
3382 case ISL_FORMAT_B5G6R5_UNORM_SRGB: return 0xA;
3383 case ISL_FORMAT_B5G5R5A1_UNORM: return 0xA;
3384 case ISL_FORMAT_B5G5R5A1_UNORM_SRGB: return 0xA;
3385 case ISL_FORMAT_B4G4R4A4_UNORM: return 0xA;
3386 case ISL_FORMAT_B4G4R4A4_UNORM_SRGB: return 0xA;
3387 case ISL_FORMAT_R8G8_UNORM: return 0xA;
3388 case ISL_FORMAT_R8G8_SNORM: return 0x1B;
3389 case ISL_FORMAT_R8G8_SINT: return 0x1C;
3390 case ISL_FORMAT_R8G8_UINT: return 0x1D;
3391 case ISL_FORMAT_R16_UNORM: return 0x14;
3392 case ISL_FORMAT_R16_SNORM: return 0x15;
3393 case ISL_FORMAT_R16_SINT: return 0x16;
3394 case ISL_FORMAT_R16_UINT: return 0x17;
3395 case ISL_FORMAT_R16_FLOAT: return 0x10;
3396 case ISL_FORMAT_B5G5R5X1_UNORM: return 0xA;
3397 case ISL_FORMAT_B5G5R5X1_UNORM_SRGB: return 0xA;
3398 case ISL_FORMAT_A1B5G5R5_UNORM: return 0xA;
3399 case ISL_FORMAT_A4B4G4R4_UNORM: return 0xA;
3400 case ISL_FORMAT_R8_UNORM: return 0xA;
3401 case ISL_FORMAT_R8_SNORM: return 0x1B;
3402 case ISL_FORMAT_R8_SINT: return 0x1C;
3403 case ISL_FORMAT_R8_UINT: return 0x1D;
3404 case ISL_FORMAT_A8_UNORM: return 0xA;
3405 case ISL_FORMAT_PLANAR_420_8: return 0xF;
3406 case ISL_FORMAT_PLANAR_420_10: return 0x7;
3407 case ISL_FORMAT_PLANAR_420_12: return 0x8;
3408 case ISL_FORMAT_PLANAR_420_16: return 0x8;
3409 case ISL_FORMAT_YCRCB_NORMAL: return 0x3;
3410 case ISL_FORMAT_YCRCB_SWAPY: return 0xB;
3411 default:
3412 unreachable("Unsupported aux-map format!");
3413 return 0;
3414 }
3415 }
3416
3417 /*
3418 * Returns compression format encoding for Unified Lossless Compression
3419 */
3420 uint8_t
isl_get_render_compression_format(enum isl_format format)3421 isl_get_render_compression_format(enum isl_format format)
3422 {
3423 /* From the Bspec, Enumeration_RenderCompressionFormat section (53726): */
3424 switch(format) {
3425 case ISL_FORMAT_R32G32B32A32_FLOAT:
3426 case ISL_FORMAT_R32G32B32X32_FLOAT:
3427 case ISL_FORMAT_R32G32B32A32_SINT:
3428 return 0x0;
3429 case ISL_FORMAT_R32G32B32A32_UINT:
3430 return 0x1;
3431 case ISL_FORMAT_R32G32_FLOAT:
3432 case ISL_FORMAT_R32G32_SINT:
3433 return 0x2;
3434 case ISL_FORMAT_R32G32_UINT:
3435 return 0x3;
3436 case ISL_FORMAT_R16G16B16A16_UNORM:
3437 case ISL_FORMAT_R16G16B16X16_UNORM:
3438 case ISL_FORMAT_R16G16B16A16_UINT:
3439 return 0x4;
3440 case ISL_FORMAT_R16G16B16A16_SNORM:
3441 case ISL_FORMAT_R16G16B16A16_SINT:
3442 case ISL_FORMAT_R16G16B16A16_FLOAT:
3443 case ISL_FORMAT_R16G16B16X16_FLOAT:
3444 return 0x5;
3445 case ISL_FORMAT_R16G16_UNORM:
3446 case ISL_FORMAT_R16G16_UINT:
3447 return 0x6;
3448 case ISL_FORMAT_R16G16_SNORM:
3449 case ISL_FORMAT_R16G16_SINT:
3450 case ISL_FORMAT_R16G16_FLOAT:
3451 return 0x7;
3452 case ISL_FORMAT_B8G8R8A8_UNORM:
3453 case ISL_FORMAT_B8G8R8X8_UNORM:
3454 case ISL_FORMAT_B8G8R8A8_UNORM_SRGB:
3455 case ISL_FORMAT_B8G8R8X8_UNORM_SRGB:
3456 case ISL_FORMAT_R8G8B8A8_UNORM:
3457 case ISL_FORMAT_R8G8B8X8_UNORM:
3458 case ISL_FORMAT_R8G8B8A8_UNORM_SRGB:
3459 case ISL_FORMAT_R8G8B8X8_UNORM_SRGB:
3460 case ISL_FORMAT_R8G8B8A8_UINT:
3461 return 0x8;
3462 case ISL_FORMAT_R8G8B8A8_SNORM:
3463 case ISL_FORMAT_R8G8B8A8_SINT:
3464 return 0x9;
3465 case ISL_FORMAT_B5G6R5_UNORM:
3466 case ISL_FORMAT_B5G6R5_UNORM_SRGB:
3467 case ISL_FORMAT_B5G5R5A1_UNORM:
3468 case ISL_FORMAT_B5G5R5A1_UNORM_SRGB:
3469 case ISL_FORMAT_B4G4R4A4_UNORM:
3470 case ISL_FORMAT_B4G4R4A4_UNORM_SRGB:
3471 case ISL_FORMAT_B5G5R5X1_UNORM:
3472 case ISL_FORMAT_B5G5R5X1_UNORM_SRGB:
3473 case ISL_FORMAT_A1B5G5R5_UNORM:
3474 case ISL_FORMAT_A4B4G4R4_UNORM:
3475 case ISL_FORMAT_R8G8_UNORM:
3476 case ISL_FORMAT_R8G8_UINT:
3477 return 0xA;
3478 case ISL_FORMAT_R8G8_SNORM:
3479 case ISL_FORMAT_R8G8_SINT:
3480 return 0xB;
3481 case ISL_FORMAT_R10G10B10A2_UNORM:
3482 case ISL_FORMAT_R10G10B10A2_UNORM_SRGB:
3483 case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM:
3484 case ISL_FORMAT_R10G10B10A2_UINT:
3485 case ISL_FORMAT_B10G10R10A2_UNORM:
3486 case ISL_FORMAT_B10G10R10X2_UNORM:
3487 case ISL_FORMAT_B10G10R10A2_UNORM_SRGB:
3488 return 0xC;
3489 case ISL_FORMAT_R11G11B10_FLOAT:
3490 return 0xD;
3491 case ISL_FORMAT_R32_SINT:
3492 case ISL_FORMAT_R32_FLOAT:
3493 return 0x10;
3494 case ISL_FORMAT_R32_UINT:
3495 case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
3496 return 0x11;
3497 case ISL_FORMAT_R16_UNORM:
3498 case ISL_FORMAT_R16_UINT:
3499 return 0x14;
3500 case ISL_FORMAT_R16_SNORM:
3501 case ISL_FORMAT_R16_SINT:
3502 case ISL_FORMAT_R16_FLOAT:
3503 return 0x15;
3504 case ISL_FORMAT_R8_UNORM:
3505 case ISL_FORMAT_R8_UINT:
3506 case ISL_FORMAT_A8_UNORM:
3507 return 0x18;
3508 case ISL_FORMAT_R8_SNORM:
3509 case ISL_FORMAT_R8_SINT:
3510 return 0x19;
3511 default:
3512 unreachable("Unsupported render compression format!");
3513 return 0;
3514 }
3515 }
3516