1 /*
2 * Copyright 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <inttypes.h>
28
29 #include "dev/intel_debug.h"
30 #include "genxml/genX_bits.h"
31 #include "util/log.h"
32
33 #include "isl.h"
34 #include "isl_gfx4.h"
35 #include "isl_gfx6.h"
36 #include "isl_gfx7.h"
37 #include "isl_gfx8.h"
38 #include "isl_gfx9.h"
39 #include "isl_gfx12.h"
40 #include "isl_priv.h"
41
42 isl_genX_declare_get_func(surf_fill_state_s)
isl_genX_declare_get_func(buffer_fill_state_s)43 isl_genX_declare_get_func(buffer_fill_state_s)
44 isl_genX_declare_get_func(emit_depth_stencil_hiz_s)
45 isl_genX_declare_get_func(null_fill_state_s)
46 isl_genX_declare_get_func(emit_cpb_control_s)
47
48 void
49 isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2,
50 uint32_t yt1, uint32_t yt2,
51 char *dst, const char *src,
52 uint32_t dst_pitch, int32_t src_pitch,
53 bool has_swizzling,
54 enum isl_tiling tiling,
55 isl_memcpy_type copy_type)
56 {
57 #ifdef USE_SSE41
58 if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
59 _isl_memcpy_linear_to_tiled_sse41(
60 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
61 tiling, copy_type);
62 return;
63 }
64 #endif
65
66 _isl_memcpy_linear_to_tiled(
67 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
68 tiling, copy_type);
69 }
70
71 void
isl_memcpy_tiled_to_linear(uint32_t xt1,uint32_t xt2,uint32_t yt1,uint32_t yt2,char * dst,const char * src,int32_t dst_pitch,uint32_t src_pitch,bool has_swizzling,enum isl_tiling tiling,isl_memcpy_type copy_type)72 isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2,
73 uint32_t yt1, uint32_t yt2,
74 char *dst, const char *src,
75 int32_t dst_pitch, uint32_t src_pitch,
76 bool has_swizzling,
77 enum isl_tiling tiling,
78 isl_memcpy_type copy_type)
79 {
80 #ifdef USE_SSE41
81 if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
82 _isl_memcpy_tiled_to_linear_sse41(
83 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
84 tiling, copy_type);
85 return;
86 }
87 #endif
88
89 _isl_memcpy_tiled_to_linear(
90 xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
91 tiling, copy_type);
92 }
93
94 void PRINTFLIKE(3, 4) UNUSED
__isl_finishme(const char * file,int line,const char * fmt,...)95 __isl_finishme(const char *file, int line, const char *fmt, ...)
96 {
97 va_list ap;
98 char buf[512];
99
100 va_start(ap, fmt);
101 vsnprintf(buf, sizeof(buf), fmt, ap);
102 va_end(ap);
103
104 fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
105 }
106
107 static void
isl_device_setup_mocs(struct isl_device * dev)108 isl_device_setup_mocs(struct isl_device *dev)
109 {
110 dev->mocs.protected_mask = 0;
111
112 if (dev->info->ver >= 20) {
113 /* L3+L4=WB; BSpec: 71582 */
114 dev->mocs.internal = 1 << 1;
115 dev->mocs.external = 1 << 1;
116 dev->mocs.protected_mask = 3 << 0;
117 /* TODO: Setting to uncached
118 * WA 14018443005:
119 * Ensure that any compression-enabled resource from gfx memory subject
120 * to app recycling (e.g. OGL sparse resource backing memory or
121 * Vulkan heaps) is never PAT/MOCS'ed as L3:UC.
122 */
123 dev->mocs.blitter_dst = 1 << 1;
124 dev->mocs.blitter_src = 1 << 1;
125 } else if (dev->info->ver >= 12) {
126 if (intel_device_info_is_mtl_or_arl(dev->info)) {
127 /* Cached L3+L4; BSpec: 45101 */
128 dev->mocs.internal = 1 << 1;
129 /* Displayables cached to L3+L4:WT */
130 dev->mocs.external = 14 << 1;
131 /* Uncached - GO:Mem */
132 dev->mocs.uncached = 5 << 1;
133 /* TODO: XY_BLOCK_COPY_BLT don't mention what should be the L4 cache
134 * mode so for now it is setting L4 as uncached following what is
135 * asked for L3
136 */
137 dev->mocs.blitter_dst = 9 << 1;
138 dev->mocs.blitter_src = 9 << 1;
139 } else if (intel_device_info_is_dg2(dev->info)) {
140 /* L3CC=WB; BSpec: 45101 */
141 dev->mocs.internal = 3 << 1;
142 dev->mocs.external = 3 << 1;
143 /* UC - Coherent; GO:Memory */
144 dev->mocs.uncached = 1 << 1;
145
146 /* XY_BLOCK_COPY_BLT MOCS fields have programming notes which say:
147 *
148 * "Destination MOCS value, which is used to program MOCS index
149 * for writing to memory, should select a MOCS register having
150 * "L3 Cacheability Control" programmed as uncacheable(UC) and
151 * "Global GO" parameter set as GOMemory (pushes GO point to
152 * memory). The MOCS Register may have L3 Lookup programmed as
153 * UCL3LKDIS for better efficiency."
154 *
155 * The GO:Memory setting requires us to use MOCS 1 or 2. MOCS 2
156 * has LKUP set to 0 and is marked "Non-Coherent", which we assume
157 * is probably the "better efficiency" they mention...
158 *
159 * "Source MOCS value, which is used to program MOCS index for
160 * reading from memory, should select a MOCS register having
161 * "L3 Cacheability Control" programmed as uncacheable(UC).
162 * The MOCS Register may have L3 Lookup programmed as UCL3LKDIS
163 * for better efficiency."
164 *
165 * Any MOCS except 3 should work. We use MOCS 2...
166 */
167 dev->mocs.blitter_dst = 2 << 1;
168 dev->mocs.blitter_src = 2 << 1;
169 } else if (dev->info->platform == INTEL_PLATFORM_DG1) {
170 /* L3CC=WB */
171 dev->mocs.internal = 5 << 1;
172 /* Displayables on DG1 are free to cache in L3 since L3 is transient
173 * and flushed at bottom of each submission.
174 */
175 dev->mocs.external = 5 << 1;
176 /* UC */
177 dev->mocs.uncached = 1 << 1;
178 } else {
179 /* TC=1/LLC Only, LeCC=1/UC, LRUM=0, L3CC=3/WB */
180 dev->mocs.external = 61 << 1;
181 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
182 dev->mocs.internal = 2 << 1;
183 /* Uncached */
184 dev->mocs.uncached = 3 << 1;
185
186 /* L1 - HDC:L1 + L3 + LLC */
187 dev->mocs.l1_hdc_l3_llc = 48 << 1;
188 }
189 /* Protected is just an additional flag. */
190 dev->mocs.protected_mask = 1 << 0;
191 } else if (dev->info->ver >= 9) {
192 /* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */
193 dev->mocs.external = 1 << 1;
194 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
195 dev->mocs.internal = 2 << 1;
196 /* Uncached */
197 dev->mocs.uncached = (dev->info->ver >= 11 ? 3 : 0) << 1;
198 } else if (dev->info->ver >= 8) {
199 /* MEMORY_OBJECT_CONTROL_STATE:
200 * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,
201 * .TargetCache = L3DefertoPATforLLCeLLCselection,
202 * .AgeforQUADLRU = 0
203 */
204 dev->mocs.external = 0x18;
205 /* MEMORY_OBJECT_CONTROL_STATE:
206 * .MemoryTypeLLCeLLCCacheabilityControl = WB,
207 * .TargetCache = L3DefertoPATforLLCeLLCselection,
208 * .AgeforQUADLRU = 0
209 */
210 dev->mocs.internal = 0x78;
211 if (dev->info->platform == INTEL_PLATFORM_CHV) {
212 /* MEMORY_OBJECT_CONTROL_STATE:
213 * .MemoryType = UC,
214 * .TargetCache = NoCaching,
215 */
216 dev->mocs.uncached = 0;
217 } else {
218 /* MEMORY_OBJECT_CONTROL_STATE:
219 * .MemoryTypeLLCeLLCCacheabilityControl = UCUncacheable,
220 * .TargetCache = eLLCOnlywheneDRAMispresentelsegetsallocatedinLLC,
221 * .AgeforQUADLRU = 0
222 */
223 dev->mocs.uncached = 0x20;
224 }
225 } else if (dev->info->ver >= 7) {
226 if (dev->info->platform == INTEL_PLATFORM_HSW) {
227 /* MEMORY_OBJECT_CONTROL_STATE:
228 * .LLCeLLCCacheabilityControlLLCCC = 0,
229 * .L3CacheabilityControlL3CC = 1,
230 */
231 dev->mocs.internal = 1;
232 dev->mocs.external = 1;
233 /* MEMORY_OBJECT_CONTROL_STATE:
234 * .LLCeLLCCacheabilityControlLLCCC = 1,
235 * .L3CacheabilityControlL3CC = 0,
236 */
237 dev->mocs.uncached = 2;
238 } else {
239 /* MEMORY_OBJECT_CONTROL_STATE:
240 * .GraphicsDataTypeGFDT = 0,
241 * .LLCCacheabilityControlLLCCC = 0,
242 * .L3CacheabilityControlL3CC = 1,
243 */
244 dev->mocs.internal = 1;
245 dev->mocs.external = 1;
246 /* MEMORY_OBJECT_CONTROL_STATE:
247 * .GraphicsDataTypeGFDT = 0,
248 * .LLCCacheabilityControlLLCCC = 0,
249 * .L3CacheabilityControlL3CC = 0,
250 */
251 dev->mocs.uncached = 0;
252 }
253 } else {
254 dev->mocs.internal = 0;
255 dev->mocs.external = 0;
256 dev->mocs.uncached = 0;
257 }
258 }
259
260 /**
261 * Return an appropriate MOCS entry for the given usage flags.
262 */
263 uint32_t
isl_mocs(const struct isl_device * dev,isl_surf_usage_flags_t usage,bool external)264 isl_mocs(const struct isl_device *dev, isl_surf_usage_flags_t usage,
265 bool external)
266 {
267 uint32_t mask = (usage & ISL_SURF_USAGE_PROTECTED_BIT) ?
268 dev->mocs.protected_mask : 0;
269
270 if (external)
271 return dev->mocs.external | mask;
272
273 if (intel_device_info_is_mtl_or_arl(dev->info) &&
274 (usage & ISL_SURF_USAGE_STREAM_OUT_BIT))
275 return dev->mocs.uncached | mask;
276
277 if (dev->info->verx10 == 120 && dev->info->platform != INTEL_PLATFORM_DG1) {
278 if (usage & ISL_SURF_USAGE_STAGING_BIT)
279 return dev->mocs.internal | mask;
280
281 if (usage & ISL_SURF_USAGE_CPB_BIT)
282 return dev->mocs.internal;
283
284 /* Using L1:HDC for storage buffers breaks Vulkan memory model
285 * tests that use shader atomics. This isn't likely to work out,
286 * and we can't know a priori whether they'll be used. So just
287 * continue with ordinary internal MOCS for now.
288 */
289 if (usage & ISL_SURF_USAGE_STORAGE_BIT)
290 return dev->mocs.internal | mask;
291
292 if (usage & (ISL_SURF_USAGE_CONSTANT_BUFFER_BIT |
293 ISL_SURF_USAGE_RENDER_TARGET_BIT |
294 ISL_SURF_USAGE_TEXTURE_BIT))
295 return dev->mocs.l1_hdc_l3_llc | mask;
296 }
297
298 return dev->mocs.internal | mask;
299 }
300
301 void
isl_device_init(struct isl_device * dev,const struct intel_device_info * info)302 isl_device_init(struct isl_device *dev,
303 const struct intel_device_info *info)
304 {
305 /* Gfx8+ don't have bit6 swizzling, ensure callsite is not confused. */
306 assert(!(info->has_bit6_swizzle && info->ver >= 8));
307
308 dev->info = info;
309 dev->use_separate_stencil = ISL_GFX_VER(dev) >= 6;
310 dev->has_bit6_swizzling = info->has_bit6_swizzle;
311 dev->buffer_length_in_aux_addr = false;
312
313 /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
314 * device properties at buildtime. Verify that the macros with the device
315 * properties chosen during runtime.
316 */
317 ISL_GFX_VER_SANITIZE(dev);
318 ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev);
319
320 /* Did we break hiz or stencil? */
321 if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
322 assert(info->has_hiz_and_separate_stencil);
323 if (info->must_use_separate_stencil)
324 assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
325
326 dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4;
327 dev->ss.align = isl_align(dev->ss.size, 32);
328
329 dev->ss.clear_color_state_size = CLEAR_COLOR_length(info) * 4;
330 dev->ss.clear_color_state_offset =
331 RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4;
332
333 dev->ss.clear_value_size =
334 isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) +
335 RENDER_SURFACE_STATE_GreenClearColor_bits(info) +
336 RENDER_SURFACE_STATE_BlueClearColor_bits(info) +
337 RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8;
338
339 dev->ss.clear_value_offset =
340 RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4;
341
342 assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0);
343 dev->ss.addr_offset =
344 RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8;
345
346 /* The "Auxiliary Surface Base Address" field starts a bit higher up
347 * because the bottom 12 bits are used for other things. Round down to
348 * the nearest dword before.
349 */
350 dev->ss.aux_addr_offset =
351 (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8;
352
353 dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4;
354 assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
355 dev->ds.depth_offset =
356 _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
357
358 if (dev->use_separate_stencil) {
359 dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
360 _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 +
361 _3DSTATE_CLEAR_PARAMS_length(info) * 4;
362
363 assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
364 dev->ds.stencil_offset =
365 _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
366 _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8;
367
368 assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
369 dev->ds.hiz_offset =
370 _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
371 _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
372 _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
373 } else {
374 dev->ds.stencil_offset = 0;
375 dev->ds.hiz_offset = 0;
376 }
377
378 /* From the IVB PRM, SURFACE_STATE::Height,
379 *
380 * For typed buffer and structured buffer surfaces, the number
381 * of entries in the buffer ranges from 1 to 2^27. For raw buffer
382 * surfaces, the number of entries in the buffer is the number of bytes
383 * which can range from 1 to 2^30.
384 *
385 * From the SKL PRM, SURFACE_STATE::Width/Height/Depth for RAW buffers,
386 *
387 * Width : bits [6:0]
388 * Height : bits [20:7]
389 * Depth : bits [31:21]
390 *
391 * So we can address 4Gb
392 *
393 * This limit is only concerned with raw buffers.
394 */
395 if (ISL_GFX_VER(dev) >= 9) {
396 dev->max_buffer_size = 1ull << 32;
397 } else if (ISL_GFX_VER(dev) >= 7) {
398 dev->max_buffer_size = 1ull << 30;
399 } else {
400 dev->max_buffer_size = 1ull << 27;
401 }
402
403 dev->cpb.size = _3DSTATE_CPSIZE_CONTROL_BUFFER_length(info) * 4;
404 dev->cpb.offset =
405 _3DSTATE_CPSIZE_CONTROL_BUFFER_SurfaceBaseAddress_start(info) / 8;
406
407 isl_device_setup_mocs(dev);
408
409 dev->surf_fill_state_s = isl_surf_fill_state_s_get_func(dev);
410 dev->buffer_fill_state_s = isl_buffer_fill_state_s_get_func(dev);
411 dev->emit_depth_stencil_hiz_s = isl_emit_depth_stencil_hiz_s_get_func(dev);
412 dev->null_fill_state_s = isl_null_fill_state_s_get_func(dev);
413 dev->emit_cpb_control_s = isl_emit_cpb_control_s_get_func(dev);
414 }
415
416 /**
417 * @brief Query the set of multisamples supported by the device.
418 *
419 * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
420 * supported.
421 */
422 isl_sample_count_mask_t ATTRIBUTE_CONST
isl_device_get_sample_counts(const struct isl_device * dev)423 isl_device_get_sample_counts(const struct isl_device *dev)
424 {
425 if (ISL_GFX_VER(dev) >= 9) {
426 return ISL_SAMPLE_COUNT_1_BIT |
427 ISL_SAMPLE_COUNT_2_BIT |
428 ISL_SAMPLE_COUNT_4_BIT |
429 ISL_SAMPLE_COUNT_8_BIT |
430 ISL_SAMPLE_COUNT_16_BIT;
431 } else if (ISL_GFX_VER(dev) >= 8) {
432 return ISL_SAMPLE_COUNT_1_BIT |
433 ISL_SAMPLE_COUNT_2_BIT |
434 ISL_SAMPLE_COUNT_4_BIT |
435 ISL_SAMPLE_COUNT_8_BIT;
436 } else if (ISL_GFX_VER(dev) >= 7) {
437 return ISL_SAMPLE_COUNT_1_BIT |
438 ISL_SAMPLE_COUNT_4_BIT |
439 ISL_SAMPLE_COUNT_8_BIT;
440 } else if (ISL_GFX_VER(dev) >= 6) {
441 return ISL_SAMPLE_COUNT_1_BIT |
442 ISL_SAMPLE_COUNT_4_BIT;
443 } else {
444 return ISL_SAMPLE_COUNT_1_BIT;
445 }
446 }
447
448 static uint32_t
isl_get_miptail_base_row(enum isl_tiling tiling)449 isl_get_miptail_base_row(enum isl_tiling tiling)
450 {
451 /* Miptails base levels can depend on the number of samples, but since we
452 * don't support levels > 1 with multisampling, the base miptail level is
453 * really simple :
454 */
455 if (tiling == ISL_TILING_SKL_Yf ||
456 tiling == ISL_TILING_ICL_Yf)
457 return 4;
458 else
459 return 0;
460 }
461
462 static const uint8_t skl_std_y_2d_miptail_offset_el[][5][2] = {
463 /* 128 bpb 64 bpb 32 bpb 16 bpb 8 bpb */
464 { {32, 0}, {64, 0}, {64, 0}, {128, 0}, {128, 0} },
465 { { 0, 32}, { 0, 32}, { 0, 64}, { 0, 64}, { 0,128} },
466 { {16, 0}, {32, 0}, {32, 0}, { 64, 0}, { 64, 0} },
467 { { 0, 16}, { 0, 16}, { 0, 32}, { 0, 32}, { 0, 64} },
468 { { 8, 0}, {16, 0}, {16, 0}, { 32, 0}, { 32, 0} },
469 { { 4, 8}, { 8, 8}, { 8, 16}, { 16, 16}, { 16, 32} },
470 { { 0, 12}, { 0, 12}, { 0, 24}, { 0, 24}, { 0, 48} },
471 { { 0, 8}, { 0, 8}, { 0, 16}, { 0, 16}, { 0, 32} },
472 { { 4, 4}, { 8, 4}, { 8, 8}, { 16, 8}, { 16, 16} },
473 { { 4, 0}, { 8, 0}, { 8, 0}, { 16, 0}, { 16, 0} },
474 { { 0, 4}, { 0, 4}, { 0, 8}, { 0, 8}, { 0, 16} },
475 { { 3, 0}, { 6, 0}, { 4, 4}, { 8, 4}, { 0, 12} },
476 { { 2, 0}, { 4, 0}, { 4, 0}, { 8, 0}, { 0, 8} },
477 { { 1, 0}, { 2, 0}, { 0, 4}, { 0, 4}, { 0, 4} },
478 { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} },
479 };
480
481 static const uint8_t icl_std_y_2d_miptail_offset_el[][5][2] = {
482 /* 128 bpb 64 bpb 32 bpb 16 bpb 8 bpb */
483 { {32, 0}, {64, 0}, {64, 0}, {128, 0}, {128, 0} },
484 { { 0, 32}, { 0, 32}, { 0, 64}, { 0, 64}, { 0, 128} },
485 { {16, 0}, {32, 0}, {32, 0}, { 64, 0}, { 64, 0} },
486 { { 0, 16}, { 0, 16}, { 0, 32}, { 0, 32}, { 0, 64} },
487 { { 8, 0}, {16, 0}, {16, 0}, { 32, 0}, { 32, 0} },
488 { { 4, 8}, { 8, 8}, { 8, 16}, { 16, 16}, { 16, 32} },
489 { { 0, 12}, { 0, 12}, { 0, 24}, { 0, 24}, { 0, 48} },
490 { { 0, 8}, { 0, 8}, { 0, 16}, { 0, 16}, { 0, 32} },
491 { { 4, 4}, { 8, 4}, { 8, 8}, { 16, 8}, { 16, 16} },
492 { { 4, 0}, { 8, 0}, { 8, 0}, { 16, 0}, { 16, 0} },
493 { { 0, 4}, { 0, 4}, { 0, 8}, { 0, 8}, { 0, 16} },
494 { { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0} },
495 { { 1, 0}, { 2, 0}, { 0, 4}, { 0, 4}, { 0, 4} },
496 { { 2, 0}, { 4, 0}, { 4, 0}, { 8, 0}, { 0, 8} },
497 { { 3, 0}, { 6, 0}, { 4, 4}, { 8, 4}, { 0, 12} },
498 };
499
500 static const uint8_t skl_std_y_3d_miptail_offset_el[][5][3] = {
501 /* 128 bpb 64 bpb 32 bpb 16 bpb 8 bpb */
502 { {8, 0, 0}, {16, 0, 0}, {16, 0, 0}, {16, 0, 0}, {32, 0, 0} },
503 { {0, 8, 0}, { 0, 8, 0}, { 0, 16, 0}, { 0, 16, 0}, { 0, 16, 0} },
504 { {0, 0, 8}, { 0, 0, 8}, { 0, 0, 8}, { 0, 0, 16}, { 0, 0, 16} },
505 { {4, 0, 0}, { 8, 0, 0}, { 8, 0, 0}, { 8, 0, 0}, {16, 0, 0} },
506 { {0, 4, 0}, { 0, 4, 0}, { 0, 8, 0}, { 0, 8, 0}, { 0, 8, 0} },
507 { {0, 0, 4}, { 0, 0, 4}, { 0, 0, 4}, { 0, 0, 8}, { 0, 0, 8} },
508 { {3, 0, 0}, { 6, 0, 0}, { 4, 4, 0}, { 0, 4, 4}, { 0, 4, 4} },
509 { {2, 0, 0}, { 4, 0, 0}, { 0, 4, 0}, { 0, 4, 0}, { 0, 4, 0} },
510 { {1, 0, 3}, { 2, 0, 3}, { 4, 0, 3}, { 0, 0, 7}, { 0, 0, 7} },
511 { {1, 0, 2}, { 2, 0, 2}, { 4, 0, 2}, { 0, 0, 6}, { 0, 0, 6} },
512 { {1, 0, 1}, { 2, 0, 1}, { 4, 0, 1}, { 0, 0, 5}, { 0, 0, 5} },
513 { {1, 0, 0}, { 2, 0, 0}, { 4, 0, 0}, { 0, 0, 4}, { 0, 0, 4} },
514 { {0, 0, 3}, { 0, 0, 3}, { 0, 0, 3}, { 0, 0, 3}, { 0, 0, 3} },
515 { {0, 0, 2}, { 0, 0, 2}, { 0, 0, 2}, { 0, 0, 2}, { 0, 0, 2} },
516 { {0, 0, 1}, { 0, 0, 1}, { 0, 0, 1}, { 0, 0, 1}, { 0, 0, 1} },
517 { {0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0} },
518 };
519
520 static const uint8_t icl_std_y_3d_miptail_offset_el[][5][3] = {
521 /* 128 bpb 64 bpb 32 bpb 16 bpb 8 bpb */
522 { {8, 0, 0}, {16, 0, 0}, {16, 0, 0}, {16, 0, 0}, {32, 0, 0} },
523 { {0, 8, 0}, { 0, 8, 0}, { 0, 16, 0}, { 0, 16, 0}, { 0, 16, 0} },
524 { {0, 0, 8}, { 0, 0, 8}, { 0, 0, 8}, { 0, 0, 16}, { 0, 0, 16} },
525 { {4, 0, 0}, { 8, 0, 0}, { 8, 0, 0}, { 8, 0, 0}, {16, 0, 0} },
526 { {0, 4, 0}, { 0, 4, 0}, { 0, 8, 0}, { 0, 8, 0}, { 0, 8, 0} },
527 { {2, 0, 4}, { 4, 0, 4}, { 4, 0, 4}, { 4, 0, 8}, { 8, 0, 8} },
528 { {0, 2, 4}, { 0, 2, 4}, { 0, 4, 4}, { 0, 4, 8}, { 0, 4, 8} },
529 { {0, 0, 4}, { 0, 0, 4}, { 0, 0, 4}, { 0, 0, 8}, { 0, 0, 8} },
530 { {2, 2, 0}, { 4, 2, 0}, { 4, 4, 0}, { 4, 4, 0}, { 8, 4, 0} },
531 { {2, 0, 0}, { 4, 0, 0}, { 4, 0, 0}, { 4, 0, 0}, { 8, 0, 0} },
532 { {0, 2, 0}, { 0, 2, 0}, { 0, 4, 0}, { 0, 4, 0}, { 0, 4, 0} },
533 { {1, 0, 2}, { 2, 0, 2}, { 2, 0, 2}, { 2, 0, 4}, { 4, 0, 4} },
534 { {0, 0, 2}, { 0, 0, 2}, { 0, 0, 2}, { 0, 0, 4}, { 0, 0, 4} },
535 { {1, 0, 0}, { 2, 0, 0}, { 2, 0, 0}, { 2, 0, 0}, { 4, 0, 0} },
536 { {0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0} },
537 };
538
539 static const uint8_t acm_tile64_3d_miptail_offset_el[][5][3] = {
540 /* 128 bpb 64 bpb 32 bpb 16 bpb 8 bpb */
541 { {8, 0, 0}, {16, 0, 0}, {16, 0, 0}, {16, 0, 0}, {32, 0, 0}, },
542 { {0, 8, 0}, { 0, 8, 0}, { 0, 16, 0}, { 0, 16, 0}, { 0, 16, 0}, },
543 { {0, 0, 8}, { 0, 0, 8}, { 0, 0, 8}, { 0, 0, 16}, { 0, 0, 16}, },
544 { {4, 0, 0}, { 8, 0, 0}, { 8, 0, 0}, { 8, 0, 0}, {16, 0, 0}, },
545 { {0, 4, 0}, { 0, 4, 0}, { 0, 8, 0}, { 0, 8, 0}, { 0, 8, 0}, },
546 { {2, 0, 4}, { 4, 0, 4}, { 4, 0, 4}, { 0, 4, 8}, { 0, 4, 8}, },
547 { {1, 0, 4}, { 2, 0, 4}, { 0, 4, 4}, { 0, 0, 12}, { 0, 0, 12}, },
548 { {0, 0, 4}, { 0, 0, 4}, { 0, 0, 4}, { 0, 0, 8}, { 0, 0, 8}, },
549 { {3, 0, 0}, { 6, 0, 0}, { 4, 4, 0}, { 0, 4, 4}, { 0, 4, 4}, },
550 { {2, 0, 0}, { 4, 0, 0}, { 4, 0, 0}, { 0, 4, 0}, { 0, 4, 0}, },
551 { {1, 0, 0}, { 2, 0, 0}, { 0, 4, 0}, { 0, 0, 4}, { 0, 0, 4}, },
552 { {0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, },
553 { {0, 0, 1}, { 0, 0, 1}, { 0, 0, 1}, { 0, 0, 1}, { 0, 0, 1}, },
554 { {0, 0, 2}, { 0, 0, 2}, { 0, 0, 2}, { 0, 0, 2}, { 0, 0, 2}, },
555 { {0, 0, 3}, { 0, 0, 3}, { 0, 0, 3}, { 0, 0, 3}, { 0, 0, 3}, },
556 };
557
558 static uint32_t
tiling_max_mip_tail(enum isl_tiling tiling,enum isl_surf_dim dim,uint32_t samples)559 tiling_max_mip_tail(enum isl_tiling tiling,
560 enum isl_surf_dim dim,
561 uint32_t samples)
562 {
563 /* In theory, miptails work for multisampled images, but we don't support
564 * mipmapped multisampling.
565 *
566 * BSpec 58770: Xe2 does not support miptails on multisampled images.
567 */
568 if (samples > 1)
569 return 0;
570
571 int num_2d_table_rows;
572 int num_3d_table_rows;
573
574 switch (tiling) {
575 case ISL_TILING_LINEAR:
576 case ISL_TILING_X:
577 case ISL_TILING_Y0:
578 case ISL_TILING_4:
579 case ISL_TILING_W:
580 case ISL_TILING_HIZ:
581 case ISL_TILING_CCS:
582 case ISL_TILING_GFX12_CCS:
583 /* There is no miptail for those tilings */
584 return 0;
585
586 case ISL_TILING_SKL_Yf:
587 case ISL_TILING_SKL_Ys:
588 /* SKL PRMs, Volume 5: Memory Views :
589 *
590 * Given by the last row of the table in the following sections:
591 *
592 * - Tiling and Mip Tail for 1D Surfaces
593 * - Tiling and Mip Tail for 2D Surfaces
594 * - Tiling and Mip Tail for 3D Surfaces
595 */
596 num_2d_table_rows = ARRAY_SIZE(skl_std_y_2d_miptail_offset_el);
597 num_3d_table_rows = ARRAY_SIZE(skl_std_y_3d_miptail_offset_el);
598 break;
599
600 case ISL_TILING_ICL_Yf:
601 case ISL_TILING_ICL_Ys:
602 /* ICL PRMs, Volume 5: Memory Views :
603 *
604 * - Tiling and Mip Tail for 1D Surfaces :
605 * "There is no MIP Tail allowed for 1D surfaces because they are
606 * not allowed to be tiled. They must be declared as linear."
607 * - Tiling and Mip Tail for 2D Surfaces
608 * - Tiling and Mip Tail for 3D Surfaces
609 */
610 num_2d_table_rows = ARRAY_SIZE(icl_std_y_2d_miptail_offset_el);
611 num_3d_table_rows = ARRAY_SIZE(icl_std_y_3d_miptail_offset_el);
612 break;
613
614 case ISL_TILING_64:
615 case ISL_TILING_64_XE2:
616 /* ATS-M PRMS, Volume 5: Memory Data Formats :
617 *
618 * - Tiling and Mip Tail for 1D Surfaces :
619 * "There is no MIP Tail allowed for 1D surfaces because they are
620 * not allowed to be tiled. They must be declared as linear."
621 * - Tiling and Mip Tail for 2D Surfaces
622 * - Tiling and Mip Tail for 3D Surfaces
623 */
624 num_2d_table_rows = ARRAY_SIZE(icl_std_y_2d_miptail_offset_el);
625 num_3d_table_rows = ARRAY_SIZE(acm_tile64_3d_miptail_offset_el);
626 break;
627
628 default:
629 unreachable("Invalid tiling");
630 }
631
632 assert(dim != ISL_SURF_DIM_1D);
633 const int num_rows = dim == ISL_SURF_DIM_2D ? num_2d_table_rows :
634 num_3d_table_rows;
635 return num_rows - isl_get_miptail_base_row(tiling);
636 }
637
638 /**
639 * Returns an isl_tile_info representation of the given isl_tiling when
640 * combined when used in the given configuration.
641 *
642 * :param tiling: |in| The tiling format to introspect
643 * :param dim: |in| The dimensionality of the surface being tiled
644 * :param msaa_layout: |in| The layout of samples in the surface being tiled
645 * :param format_bpb: |in| The number of bits per surface element (block) for
646 * the surface being tiled
647 * :param samples: |in| The samples in the surface being tiled
648 * :param tile_info: |out| Return parameter for the tiling information
649 */
650 void
isl_tiling_get_info(enum isl_tiling tiling,enum isl_surf_dim dim,enum isl_msaa_layout msaa_layout,uint32_t format_bpb,uint32_t samples,struct isl_tile_info * tile_info)651 isl_tiling_get_info(enum isl_tiling tiling,
652 enum isl_surf_dim dim,
653 enum isl_msaa_layout msaa_layout,
654 uint32_t format_bpb,
655 uint32_t samples,
656 struct isl_tile_info *tile_info)
657 {
658 const uint32_t bs = format_bpb / 8;
659 struct isl_extent4d logical_el;
660 struct isl_extent2d phys_B;
661
662 if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
663 /* It is possible to have non-power-of-two formats in a tiled buffer.
664 * The easiest way to handle this is to treat the tile as if it is three
665 * times as wide. This way no pixel will ever cross a tile boundary.
666 * This really only works on a subset of tiling formats.
667 */
668 assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0 ||
669 tiling == ISL_TILING_4);
670 assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
671 isl_tiling_get_info(tiling, dim, msaa_layout, format_bpb / 3, samples,
672 tile_info);
673 return;
674 }
675
676 switch (tiling) {
677 case ISL_TILING_LINEAR:
678 assert(bs > 0);
679 logical_el = isl_extent4d(1, 1, 1, 1);
680 phys_B = isl_extent2d(bs, 1);
681 break;
682
683 case ISL_TILING_X:
684 assert(bs > 0);
685 logical_el = isl_extent4d(512 / bs, 8, 1, 1);
686 phys_B = isl_extent2d(512, 8);
687 break;
688
689 case ISL_TILING_Y0:
690 case ISL_TILING_4:
691 assert(bs > 0);
692 logical_el = isl_extent4d(128 / bs, 32, 1, 1);
693 phys_B = isl_extent2d(128, 32);
694 break;
695
696 case ISL_TILING_W:
697 assert(bs == 1);
698 logical_el = isl_extent4d(64, 64, 1, 1);
699 /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
700 *
701 * "If the surface is a stencil buffer (and thus has Tile Mode set
702 * to TILEMODE_WMAJOR), the pitch must be set to 2x the value
703 * computed based on width, as the stencil buffer is stored with two
704 * rows interleaved."
705 *
706 * This, together with the fact that stencil buffers are referred to as
707 * being Y-tiled in the PRMs for older hardware implies that the
708 * physical size of a W-tile is actually the same as for a Y-tile.
709 */
710 phys_B = isl_extent2d(128, 32);
711 break;
712
713 case ISL_TILING_SKL_Yf:
714 case ISL_TILING_SKL_Ys:
715 case ISL_TILING_ICL_Yf:
716 case ISL_TILING_ICL_Ys: {
717 bool is_Ys = tiling == ISL_TILING_SKL_Ys ||
718 tiling == ISL_TILING_ICL_Ys;
719 assert(format_bpb >= 8);
720
721 switch (dim) {
722 case ISL_SURF_DIM_2D:
723 /* See the BSpec Memory Data Formats » Common Surface Formats »
724 * Surface Layout and Tiling [SKL+] » 2D Surfaces SKL+ » 2D/CUBE
725 * Alignment Requirement [SKL+]
726 *
727 * Or, look in the SKL PRM under Memory Views > Common Surface
728 * Formats > Surface Layout and Tiling > 2D Surfaces > 2D/CUBE
729 * Alignment Requirements.
730 */
731 logical_el = (struct isl_extent4d) {
732 .w = 1 << (6 - ((ffs(format_bpb) - 4) / 2) + (2 * is_Ys)),
733 .h = 1 << (6 - ((ffs(format_bpb) - 3) / 2) + (2 * is_Ys)),
734 .d = 1,
735 .a = 1,
736 };
737
738 if (samples > 1 && tiling != ISL_TILING_SKL_Yf) {
739 /* SKL PRMs, Volume 5: Memory Views, 2D/CUBE Alignment
740 * Requirement:
741 *
742 * "For MSFMT_MSS type multi-sampled TileYS surfaces, the
743 * alignments given above must be divided by the appropriate
744 * value from the table below."
745 *
746 * The formulas below reproduce those values.
747 */
748 if (msaa_layout == ISL_MSAA_LAYOUT_ARRAY) {
749 logical_el.w >>= (ffs(samples) - 0) / 2;
750 logical_el.h >>= (ffs(samples) - 1) / 2;
751 logical_el.a = samples;
752 }
753 }
754 break;
755
756 case ISL_SURF_DIM_3D:
757 /* See the BSpec Memory Data Formats » Common Surface Formats »
758 * Surface Layout and Tiling [SKL+] » 3D Surfaces SKL+ » 3D Alignment
759 * Requirements [SKL+]
760 *
761 * Or, look in the SKL PRM under Memory Views > Common Surface
762 * Formats > Surface Layout and Tiling > 3D Surfaces > 3D Alignment
763 * Requirements.
764 */
765 logical_el = (struct isl_extent4d) {
766 .w = 1 << (4 - ((ffs(format_bpb) - 2) / 3) + (2 * is_Ys)),
767 .h = 1 << (4 - ((ffs(format_bpb) - 4) / 3) + (1 * is_Ys)),
768 .d = 1 << (4 - ((ffs(format_bpb) - 3) / 3) + (1 * is_Ys)),
769 .a = 1,
770 };
771 break;
772 default:
773 unreachable("Invalid dimension");
774 }
775
776 uint32_t tile_size_B = is_Ys ? (1 << 16) : (1 << 12);
777
778 phys_B.w = logical_el.width * bs;
779 phys_B.h = tile_size_B / phys_B.w;
780 break;
781 }
782 case ISL_TILING_64:
783 /* The tables below are taken from the "2D Surfaces" & "3D Surfaces"
784 * pages in the Bspec which are formulated in terms of the Cv and Cu
785 * constants. This is different from the tables in the "Tile64 Format"
786 * page which should be equivalent but are usually in terms of pixels.
787 * Also note that Cv and Cu are HxW order to match the Bspec table, not
788 * WxH order like you might expect.
789 *
790 * From the Bspec's or ATS-M PRMs Volume 5: Memory Data Formats, "Tile64
791 * Format" :
792 *
793 * MSAA Depth/Stencil surface use IMS (Interleaved Multi Samples)
794 * which means:
795 *
796 * - Use the 1X MSAA (non-MSRT) version of the Tile64 equations and
797 * let the client unit do the swizzling internally
798 *
799 * Surfaces using the IMS layout will use the mapping for 1x MSAA.
800 */
801 #define tile_extent2d(bs, cv, cu, a) \
802 isl_extent4d((1 << cu) / bs, 1 << cv, 1, a)
803 #define tile_extent3d(bs, cr, cv, cu) \
804 isl_extent4d((1 << cu) / bs, 1 << cv, 1 << cr, 1)
805
806 if (dim == ISL_SURF_DIM_3D) {
807 switch (format_bpb) {
808 case 128: logical_el = tile_extent3d(bs, 4, 4, 8); break;
809 case 64: logical_el = tile_extent3d(bs, 4, 4, 8); break;
810 case 32: logical_el = tile_extent3d(bs, 4, 5, 7); break;
811 case 16: logical_el = tile_extent3d(bs, 5, 5, 6); break;
812 case 8: logical_el = tile_extent3d(bs, 5, 5, 6); break;
813 default: unreachable("Unsupported format size for 3D");
814 }
815 } else {
816 if (samples == 1 || msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
817 switch (format_bpb) {
818 case 128: logical_el = tile_extent2d(bs, 6, 10, 1); break;
819 case 64: logical_el = tile_extent2d(bs, 6, 10, 1); break;
820 case 32: logical_el = tile_extent2d(bs, 7, 9, 1); break;
821 case 16: logical_el = tile_extent2d(bs, 7, 9, 1); break;
822 case 8: logical_el = tile_extent2d(bs, 8, 8, 1); break;
823 default: unreachable("Unsupported format size.");
824 }
825 } else if (samples == 2) {
826 switch (format_bpb) {
827 case 128: logical_el = tile_extent2d(bs, 6, 9, 2); break;
828 case 64: logical_el = tile_extent2d(bs, 6, 9, 2); break;
829 case 32: logical_el = tile_extent2d(bs, 7, 8, 2); break;
830 case 16: logical_el = tile_extent2d(bs, 7, 8, 2); break;
831 case 8: logical_el = tile_extent2d(bs, 8, 7, 2); break;
832 default: unreachable("Unsupported format size.");
833 }
834 } else {
835 switch (format_bpb) {
836 case 128: logical_el = tile_extent2d(bs, 5, 9, 4); break;
837 case 64: logical_el = tile_extent2d(bs, 5, 9, 4); break;
838 case 32: logical_el = tile_extent2d(bs, 6, 8, 4); break;
839 case 16: logical_el = tile_extent2d(bs, 6, 8, 4); break;
840 case 8: logical_el = tile_extent2d(bs, 7, 7, 4); break;
841 default: unreachable("Unsupported format size.");
842 }
843 }
844 }
845
846 #undef tile_extent2d
847 #undef tile_extent3d
848
849 phys_B.w = logical_el.w * bs;
850 phys_B.h = 64 * 1024 / phys_B.w;
851 break;
852
853 case ISL_TILING_64_XE2:
854 /* The tables below are taken from BSpec 58767 which are formulated in
855 * terms of the Cv and Cu constants. This is different from the tables in
856 * the "Tile64 Format" page which should be equivalent but are usually in
857 * terms of pixels.
858 *
859 * Also note that Cv and Cu are HxW order to match the Bspec table, not
860 * WxH order like you might expect.
861 */
862 #define tile_extent2d(bs, cv, cu, a) \
863 isl_extent4d((1 << cu) / bs, 1 << cv, 1, a)
864 #define tile_extent3d(bs, cr, cv, cu) \
865 isl_extent4d((1 << cu) / bs, 1 << cv, 1 << cr, 1)
866
867 if (dim == ISL_SURF_DIM_3D) {
868 switch (format_bpb) {
869 case 128: logical_el = tile_extent3d(bs, 4, 4, 8); break;
870 case 64: logical_el = tile_extent3d(bs, 4, 4, 8); break;
871 case 32: logical_el = tile_extent3d(bs, 4, 5, 7); break;
872 case 16: logical_el = tile_extent3d(bs, 5, 5, 6); break;
873 case 8: logical_el = tile_extent3d(bs, 5, 5, 6); break;
874 default: unreachable("Unsupported format size for 3D");
875 }
876 } else {
877 if (samples == 1 || msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
878 switch (format_bpb) {
879 case 128: logical_el = tile_extent2d(bs, 6, 10, 1); break;
880 case 64: logical_el = tile_extent2d(bs, 6, 10, 1); break;
881 case 32: logical_el = tile_extent2d(bs, 7, 9, 1); break;
882 case 16: logical_el = tile_extent2d(bs, 7, 9, 1); break;
883 case 8: logical_el = tile_extent2d(bs, 8, 8, 1); break;
884 default: unreachable("Unsupported format size.");
885 }
886 } else if (samples == 2) {
887 switch (format_bpb) {
888 case 128: logical_el = tile_extent2d(bs, 5, 10, 2); break;
889 case 64: logical_el = tile_extent2d(bs, 6, 9, 2); break;
890 case 32: logical_el = tile_extent2d(bs, 7, 8, 2); break;
891 case 16: logical_el = tile_extent2d(bs, 7, 8, 2); break;
892 case 8: logical_el = tile_extent2d(bs, 8, 7, 2); break;
893 default: unreachable("Unsupported format size.");
894 }
895 } else if (samples == 4) {
896 switch (format_bpb) {
897 case 128: logical_el = tile_extent2d(bs, 5, 9, 4); break;
898 case 64: logical_el = tile_extent2d(bs, 5, 9, 4); break;
899 case 32: logical_el = tile_extent2d(bs, 6, 8, 4); break;
900 case 16: logical_el = tile_extent2d(bs, 6, 8, 4); break;
901 case 8: logical_el = tile_extent2d(bs, 7, 7, 4); break;
902 default: unreachable("Unsupported format size.");
903 }
904 } else if (samples == 8) {
905 switch (format_bpb) {
906 case 128: logical_el = tile_extent2d(bs, 5, 8, 8); break;
907 case 64: logical_el = tile_extent2d(bs, 5, 8, 8); break;
908 case 32: logical_el = tile_extent2d(bs, 5, 8, 8); break;
909 case 16: logical_el = tile_extent2d(bs, 6, 7, 8); break;
910 case 8: logical_el = tile_extent2d(bs, 6, 7, 8); break;
911 default: unreachable("Unsupported format size.");
912 }
913 } else if (samples == 16) {
914 switch (format_bpb) {
915 case 128: logical_el = tile_extent2d(bs, 4, 8, 16); break;
916 case 64: logical_el = tile_extent2d(bs, 5, 7, 16); break;
917 case 32: logical_el = tile_extent2d(bs, 5, 7, 16); break;
918 case 16: logical_el = tile_extent2d(bs, 5, 7, 16); break;
919 case 8: logical_el = tile_extent2d(bs, 6, 6, 16); break;
920 default: unreachable("Unsupported format size.");
921 }
922 }
923 }
924
925 #undef tile_extent2d
926 #undef tile_extent3d
927
928 phys_B.w = logical_el.w * bs;
929 phys_B.h = 64 * 1024 / phys_B.w;
930 break;
931
932 case ISL_TILING_HIZ:
933 /* HiZ buffers are required to have a 128bpb HiZ format. The tiling has
934 * the same physical dimensions as Y-tiling but actually has two HiZ
935 * columns per Y-tiled column.
936 */
937 assert(bs == 16);
938 logical_el = isl_extent4d(16, 16, 1, 1);
939 phys_B = isl_extent2d(128, 32);
940 break;
941
942 case ISL_TILING_CCS:
943 /* CCS surfaces are required to have one of the GENX_CCS_* formats which
944 * have a block size of 1 or 2 bits per block and each CCS element
945 * corresponds to one cache-line pair in the main surface. From the Sky
946 * Lake PRM Vol. 12 in the section on planes:
947 *
948 * "The Color Control Surface (CCS) contains the compression status
949 * of the cache-line pairs. The compression state of the cache-line
950 * pair is specified by 2 bits in the CCS. Each CCS cache-line
951 * represents an area on the main surface of 16x16 sets of 128 byte
952 * Y-tiled cache-line-pairs. CCS is always Y tiled."
953 *
954 * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines.
955 * Since each cache line corresponds to a 16x16 set of cache-line pairs,
956 * that yields total tile area of 128x128 cache-line pairs or CCS
957 * elements. On older hardware, each CCS element is 1 bit and the tile
958 * is 128x256 elements.
959 */
960 assert(format_bpb == 1 || format_bpb == 2);
961 logical_el = isl_extent4d(128, 256 / format_bpb, 1, 1);
962 phys_B = isl_extent2d(128, 32);
963 break;
964
965 case ISL_TILING_GFX12_CCS:
966 /* From the Bspec, Gen Graphics > Gfx12 > Memory Data Formats > Memory
967 * Compression > Memory Compression - Gfx12:
968 *
969 * 4 bits of auxiliary plane data are required for 2 cachelines of
970 * main surface data. This results in a single cacheline of auxiliary
971 * plane data mapping to 4 4K pages of main surface data for the 4K
972 * pages (tile Y ) and 1 64K Tile Ys page.
973 *
974 * The Y-tiled pairing bit of 9 shown in the table below that Bspec
975 * section expresses that the 2 cachelines of main surface data are
976 * horizontally adjacent.
977 *
978 * TODO: Handle Ys, Yf and their pairing bits.
979 *
980 * Therefore, each CCS cacheline represents a 512Bx32 row area and each
981 * element represents a 32Bx4 row area.
982 */
983 assert(format_bpb == 4);
984 logical_el = isl_extent4d(16, 8, 1, 1);
985 phys_B = isl_extent2d(64, 1);
986 break;
987
988 default:
989 unreachable("not reached");
990 } /* end switch */
991
992 *tile_info = (struct isl_tile_info) {
993 .tiling = tiling,
994 .format_bpb = format_bpb,
995 .logical_extent_el = logical_el,
996 .phys_extent_B = phys_B,
997 .max_miptail_levels = tiling_max_mip_tail(tiling, dim, samples),
998 };
999 }
1000
1001 bool
isl_color_value_is_zero(union isl_color_value value,enum isl_format format)1002 isl_color_value_is_zero(union isl_color_value value,
1003 enum isl_format format)
1004 {
1005 const struct isl_format_layout *fmtl = isl_format_get_layout(format);
1006
1007 #define RETURN_FALSE_IF_NOT_0(c, i) \
1008 if (fmtl->channels.c.bits && value.u32[i] != 0) \
1009 return false
1010
1011 RETURN_FALSE_IF_NOT_0(r, 0);
1012 RETURN_FALSE_IF_NOT_0(g, 1);
1013 RETURN_FALSE_IF_NOT_0(b, 2);
1014 RETURN_FALSE_IF_NOT_0(a, 3);
1015
1016 #undef RETURN_FALSE_IF_NOT_0
1017
1018 return true;
1019 }
1020
1021 bool
isl_color_value_is_zero_one(union isl_color_value value,enum isl_format format)1022 isl_color_value_is_zero_one(union isl_color_value value,
1023 enum isl_format format)
1024 {
1025 const struct isl_format_layout *fmtl = isl_format_get_layout(format);
1026
1027 #define RETURN_FALSE_IF_NOT_0_1(c, i, field) \
1028 if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \
1029 return false
1030
1031 if (isl_format_has_int_channel(format)) {
1032 RETURN_FALSE_IF_NOT_0_1(r, 0, u32);
1033 RETURN_FALSE_IF_NOT_0_1(g, 1, u32);
1034 RETURN_FALSE_IF_NOT_0_1(b, 2, u32);
1035 RETURN_FALSE_IF_NOT_0_1(a, 3, u32);
1036 } else {
1037 RETURN_FALSE_IF_NOT_0_1(r, 0, f32);
1038 RETURN_FALSE_IF_NOT_0_1(g, 1, f32);
1039 RETURN_FALSE_IF_NOT_0_1(b, 2, f32);
1040 RETURN_FALSE_IF_NOT_0_1(a, 3, f32);
1041 }
1042
1043 #undef RETURN_FALSE_IF_NOT_0_1
1044
1045 return true;
1046 }
1047
1048 /**
1049 * @param[out] tiling is set only on success
1050 */
1051 static bool
isl_surf_choose_tiling(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_tiling * tiling)1052 isl_surf_choose_tiling(const struct isl_device *dev,
1053 const struct isl_surf_init_info *restrict info,
1054 enum isl_tiling *tiling)
1055 {
1056 isl_tiling_flags_t tiling_flags = info->tiling_flags;
1057
1058 /* HiZ surfaces always use the HiZ tiling */
1059 if (info->usage & ISL_SURF_USAGE_HIZ_BIT) {
1060 assert(isl_format_is_hiz(info->format));
1061 assert(tiling_flags == ISL_TILING_HIZ_BIT);
1062 *tiling = isl_tiling_flag_to_enum(tiling_flags);
1063 return true;
1064 }
1065
1066 /* CCS surfaces always use the CCS tiling */
1067 if (info->usage & ISL_SURF_USAGE_CCS_BIT) {
1068 assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS);
1069 UNUSED bool ivb_ccs = ISL_GFX_VER(dev) < 12 &&
1070 tiling_flags == ISL_TILING_CCS_BIT;
1071 UNUSED bool tgl_ccs = ISL_GFX_VER(dev) >= 12 &&
1072 tiling_flags == ISL_TILING_GFX12_CCS_BIT;
1073 assert(ivb_ccs != tgl_ccs);
1074 *tiling = isl_tiling_flag_to_enum(tiling_flags);
1075 return true;
1076 }
1077
1078 if (ISL_GFX_VERX10(dev) >= 125) {
1079 isl_gfx125_filter_tiling(dev, info, &tiling_flags);
1080 } else if (ISL_GFX_VER(dev) >= 6) {
1081 isl_gfx6_filter_tiling(dev, info, &tiling_flags);
1082 } else {
1083 isl_gfx4_filter_tiling(dev, info, &tiling_flags);
1084 }
1085
1086 #define CHOOSE(__tiling) \
1087 do { \
1088 if (tiling_flags & (1u << (__tiling))) { \
1089 *tiling = (__tiling); \
1090 return true; \
1091 } \
1092 } while (0)
1093
1094 /* Of the tiling modes remaining, choose the one that offers the best
1095 * performance.
1096 */
1097
1098 if (info->dim == ISL_SURF_DIM_1D) {
1099 /* Prefer linear for 1D surfaces because they do not benefit from
1100 * tiling. To the contrary, tiling leads to wasted memory and poor
1101 * memory locality due to the swizzling and alignment restrictions
1102 * required in tiled surfaces.
1103 */
1104 CHOOSE(ISL_TILING_LINEAR);
1105 }
1106
1107 /* For sparse images, prefer the formats that use the standard block
1108 * shapes.
1109 */
1110 if (info->usage & ISL_SURF_USAGE_SPARSE_BIT) {
1111 CHOOSE(ISL_GFX_VER(dev) >= 20 ? ISL_TILING_64_XE2 : ISL_TILING_64);
1112 CHOOSE(ISL_TILING_ICL_Ys);
1113 CHOOSE(ISL_TILING_SKL_Ys);
1114 }
1115
1116 /* Choose suggested 4K tilings first, then 64K tilings:
1117 *
1118 * Then following quotes can be found in the SKL PRMs,
1119 * Volume 5: Memory Views, Address Tiling Function Introduction
1120 * and from the ATS-M PRMs,
1121 * Volume 5: Memory Data Formats, Address Tiling Function Introduction
1122 *
1123 * "TileY: Used for most tiled surfaces when TR_MODE=TR_NONE."
1124 * "Tile4: 4KB tiling mode based on previously-supported TileY"
1125 * "TileYF: 4KB tiling mode based on TileY"
1126 * "TileYS: 64KB tiling mode based on TileY"
1127 * "Tile64: 64KB tiling mode which support standard-tiling including
1128 * Mip Tails"
1129 *
1130 * When TileYF and TileYS are used TR_MODE != TR_NONE.
1131 */
1132 CHOOSE(ISL_TILING_Y0);
1133 CHOOSE(ISL_TILING_4);
1134 CHOOSE(ISL_TILING_SKL_Yf);
1135 CHOOSE(ISL_TILING_ICL_Yf);
1136 CHOOSE(ISL_TILING_SKL_Ys);
1137 CHOOSE(ISL_TILING_ICL_Ys);
1138 CHOOSE(ISL_GFX_VER(dev) >= 20 ? ISL_TILING_64_XE2 : ISL_TILING_64);
1139
1140 CHOOSE(ISL_TILING_X);
1141 CHOOSE(ISL_TILING_W);
1142 CHOOSE(ISL_TILING_LINEAR);
1143
1144 #undef CHOOSE
1145
1146 /* No tiling mode accommodates the inputs. */
1147 assert(tiling_flags == 0);
1148 return notify_failure(info, "no supported tiling");
1149 }
1150
1151 static bool
isl_choose_msaa_layout(const struct isl_device * dev,const struct isl_surf_init_info * info,enum isl_tiling tiling,enum isl_msaa_layout * msaa_layout)1152 isl_choose_msaa_layout(const struct isl_device *dev,
1153 const struct isl_surf_init_info *info,
1154 enum isl_tiling tiling,
1155 enum isl_msaa_layout *msaa_layout)
1156 {
1157 if (ISL_GFX_VER(dev) >= 8) {
1158 return isl_gfx8_choose_msaa_layout(dev, info, tiling, msaa_layout);
1159 } else if (ISL_GFX_VER(dev) >= 7) {
1160 return isl_gfx7_choose_msaa_layout(dev, info, tiling, msaa_layout);
1161 } else if (ISL_GFX_VER(dev) >= 6) {
1162 return isl_gfx6_choose_msaa_layout(dev, info, tiling, msaa_layout);
1163 } else {
1164 return isl_gfx4_choose_msaa_layout(dev, info, tiling, msaa_layout);
1165 }
1166 }
1167
1168 struct isl_extent2d
isl_get_interleaved_msaa_px_size_sa(uint32_t samples)1169 isl_get_interleaved_msaa_px_size_sa(uint32_t samples)
1170 {
1171 assert(isl_is_pow2(samples));
1172
1173 /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
1174 * Sizes (p133):
1175 *
1176 * If the surface is multisampled and it is a depth or stencil surface
1177 * or Multisampled Surface StorageFormat in SURFACE_STATE is
1178 * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
1179 * proceeding: [...]
1180 */
1181 return (struct isl_extent2d) {
1182 .width = 1 << ((ffs(samples) - 0) / 2),
1183 .height = 1 << ((ffs(samples) - 1) / 2),
1184 };
1185 }
1186
1187 static void
isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,uint32_t * width,uint32_t * height)1188 isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
1189 uint32_t *width, uint32_t *height)
1190 {
1191 const struct isl_extent2d px_size_sa =
1192 isl_get_interleaved_msaa_px_size_sa(samples);
1193
1194 if (width)
1195 *width = isl_align(*width, 2) * px_size_sa.width;
1196 if (height)
1197 *height = isl_align(*height, 2) * px_size_sa.height;
1198 }
1199
1200 static enum isl_array_pitch_span
isl_choose_array_pitch_span(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_dim_layout dim_layout,const struct isl_extent4d * phys_level0_sa)1201 isl_choose_array_pitch_span(const struct isl_device *dev,
1202 const struct isl_surf_init_info *restrict info,
1203 enum isl_dim_layout dim_layout,
1204 const struct isl_extent4d *phys_level0_sa)
1205 {
1206 switch (dim_layout) {
1207 case ISL_DIM_LAYOUT_GFX9_1D:
1208 case ISL_DIM_LAYOUT_GFX4_2D:
1209 if (ISL_GFX_VER(dev) >= 8) {
1210 /* QPitch becomes programmable in Broadwell. So choose the
1211 * most compact QPitch possible in order to conserve memory.
1212 *
1213 * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
1214 * >> RENDER_SURFACE_STATE Surface QPitch (p325):
1215 *
1216 * - Software must ensure that this field is set to a value
1217 * sufficiently large such that the array slices in the surface
1218 * do not overlap. Refer to the Memory Data Formats section for
1219 * information on how surfaces are stored in memory.
1220 *
1221 * - This field specifies the distance in rows between array
1222 * slices. It is used only in the following cases:
1223 *
1224 * - Surface Array is enabled OR
1225 * - Number of Mulitsamples is not NUMSAMPLES_1 and
1226 * Multisampled Surface Storage Format set to MSFMT_MSS OR
1227 * - Surface Type is SURFTYPE_CUBE
1228 */
1229 return ISL_ARRAY_PITCH_SPAN_COMPACT;
1230 } else if (ISL_GFX_VER(dev) >= 7) {
1231 /* Note that Ivybridge introduces
1232 * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
1233 * driver more control over the QPitch.
1234 */
1235
1236 if (phys_level0_sa->array_len == 1) {
1237 /* The hardware will never use the QPitch. So choose the most
1238 * compact QPitch possible in order to conserve memory.
1239 */
1240 return ISL_ARRAY_PITCH_SPAN_COMPACT;
1241 }
1242
1243 if (isl_surf_usage_is_depth_or_stencil(info->usage) ||
1244 (info->usage & ISL_SURF_USAGE_HIZ_BIT)) {
1245 /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
1246 * Section 6.18.4.7: Surface Arrays (p112):
1247 *
1248 * If Surface Array Spacing is set to ARYSPC_FULL (note that
1249 * the depth buffer and stencil buffer have an implied value of
1250 * ARYSPC_FULL):
1251 */
1252 return ISL_ARRAY_PITCH_SPAN_FULL;
1253 }
1254
1255 if (info->levels == 1) {
1256 /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
1257 * to ARYSPC_LOD0.
1258 */
1259 return ISL_ARRAY_PITCH_SPAN_COMPACT;
1260 }
1261
1262 return ISL_ARRAY_PITCH_SPAN_FULL;
1263 } else if ((ISL_GFX_VER(dev) == 5 || ISL_GFX_VER(dev) == 6) &&
1264 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
1265 isl_surf_usage_is_stencil(info->usage)) {
1266 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
1267 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
1268 *
1269 * The separate stencil buffer does not support mip mapping, thus
1270 * the storage for LODs other than LOD 0 is not needed.
1271 */
1272 assert(info->levels == 1);
1273 return ISL_ARRAY_PITCH_SPAN_COMPACT;
1274 } else {
1275 if ((ISL_GFX_VER(dev) == 5 || ISL_GFX_VER(dev) == 6) &&
1276 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
1277 isl_surf_usage_is_stencil(info->usage)) {
1278 /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
1279 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
1280 *
1281 * The separate stencil buffer does not support mip mapping,
1282 * thus the storage for LODs other than LOD 0 is not needed.
1283 */
1284 assert(info->levels == 1);
1285 assert(phys_level0_sa->array_len == 1);
1286 return ISL_ARRAY_PITCH_SPAN_COMPACT;
1287 }
1288
1289 if (phys_level0_sa->array_len == 1) {
1290 /* The hardware will never use the QPitch. So choose the most
1291 * compact QPitch possible in order to conserve memory.
1292 */
1293 return ISL_ARRAY_PITCH_SPAN_COMPACT;
1294 }
1295
1296 return ISL_ARRAY_PITCH_SPAN_FULL;
1297 }
1298
1299 case ISL_DIM_LAYOUT_GFX4_3D:
1300 /* The hardware will never use the QPitch. So choose the most
1301 * compact QPitch possible in order to conserve memory.
1302 */
1303 return ISL_ARRAY_PITCH_SPAN_COMPACT;
1304
1305 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
1306 /* Each array image in the gfx6 stencil of HiZ surface is compact in the
1307 * sense that every LOD is a compact array of the same size as LOD0.
1308 */
1309 return ISL_ARRAY_PITCH_SPAN_COMPACT;
1310 }
1311
1312 unreachable("bad isl_dim_layout");
1313 return ISL_ARRAY_PITCH_SPAN_FULL;
1314 }
1315
1316 static void
isl_choose_image_alignment_el(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_tiling tiling,enum isl_dim_layout dim_layout,enum isl_msaa_layout msaa_layout,struct isl_extent3d * image_align_el)1317 isl_choose_image_alignment_el(const struct isl_device *dev,
1318 const struct isl_surf_init_info *restrict info,
1319 enum isl_tiling tiling,
1320 enum isl_dim_layout dim_layout,
1321 enum isl_msaa_layout msaa_layout,
1322 struct isl_extent3d *image_align_el)
1323 {
1324 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1325 if (fmtl->txc == ISL_TXC_MCS) {
1326 /*
1327 * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
1328 *
1329 * Height, width, and layout of MCS buffer in this case must match with
1330 * Render Target height, width, and layout. MCS buffer is tiledY.
1331 *
1332 * Pick a vertical and horizontal alignment that matches the main render
1333 * target. Vertical alignment is important for properly spacing an array
1334 * of MCS images. Horizontal alignment is not expected to matter because
1335 * MCS is not mipmapped. Regardless, we pick a valid value here.
1336 */
1337 if (ISL_GFX_VERX10(dev) >= 125) {
1338 *image_align_el = isl_extent3d(128 * 8 / fmtl->bpb, 4, 1);
1339 } else if (ISL_GFX_VER(dev) >= 8) {
1340 *image_align_el = isl_extent3d(16, 4, 1);
1341 } else {
1342 *image_align_el = isl_extent3d(4, 4, 1);
1343 }
1344 return;
1345 } else if (fmtl->txc == ISL_TXC_HIZ) {
1346 assert(ISL_GFX_VER(dev) >= 6);
1347 if (ISL_GFX_VER(dev) == 6) {
1348 /* HiZ surfaces on Sandy Bridge are packed tightly. */
1349 *image_align_el = isl_extent3d(1, 1, 1);
1350 } else if (ISL_GFX_VER(dev) < 12) {
1351 /* On gfx7+, HiZ surfaces are always aligned to 16x8 pixels in the
1352 * primary surface which works out to 2x2 HiZ elements.
1353 */
1354 *image_align_el = isl_extent3d(2, 2, 1);
1355 } else {
1356 /* We choose the alignments based on the docs and what we've seen on
1357 * prior platforms. From the TGL PRM Vol. 9, "Hierarchical Depth
1358 * Buffer":
1359 *
1360 * The height and width of the hierarchical depth buffer that must
1361 * be allocated are computed by the following formulas, where HZ
1362 * is the hierarchical depth buffer and Z is the depth buffer. The
1363 * Z_Height, Z_Width, and Z_Depth values given in these formulas
1364 * are those present in 3DSTATE_DEPTH_BUFFER incremented by one.
1365 *
1366 * The note about 3DSTATE_DEPTH_BUFFER tells us that the dimensions
1367 * in the following formula refers to the base level. The key formula
1368 * for the horizontal alignment is:
1369 *
1370 * HZ_Width (bytes) [=]
1371 * ceiling(Z_Width / 16) * 16
1372 *
1373 * This type of formula is used when sizing compression blocks. So,
1374 * the docs seem to say that the HiZ format has a block width of 16,
1375 * and thus, the surface has a minimum horizontal alignment of 16
1376 * pixels. This formula hasn't changed from prior platforms (where
1377 * we've chosen a horizontal alignment of 16), so we should be on the
1378 * right track. As for the vertical alignment, we're told:
1379 *
1380 * To compute the minimum QPitch for the HZ surface, the height of
1381 * each LOD in pixels is determined using the equations for hL in
1382 * the GPU Overview volume, using a vertical alignment j=16.
1383 *
1384 * We're not calculating the QPitch right now, but the vertical
1385 * alignment is plainly given as 16 rows in the depth buffer.
1386 *
1387 * As a result, we believe that HiZ surfaces are aligned to 16x16
1388 * pixels in the primary surface. We divide this area by the HiZ
1389 * block dimensions to get the alignment in terms of HiZ blocks.
1390 */
1391 *image_align_el = isl_extent3d(16 / fmtl->bw, 16 / fmtl->bh, 1);
1392 }
1393 return;
1394 }
1395
1396 if (ISL_GFX_VERX10(dev) >= 125) {
1397 isl_gfx125_choose_image_alignment_el(dev, info, tiling, dim_layout,
1398 msaa_layout, image_align_el);
1399 } else if (ISL_GFX_VER(dev) >= 12) {
1400 isl_gfx12_choose_image_alignment_el(dev, info, tiling, dim_layout,
1401 msaa_layout, image_align_el);
1402 } else if (ISL_GFX_VER(dev) >= 9) {
1403 isl_gfx9_choose_image_alignment_el(dev, info, tiling, dim_layout,
1404 msaa_layout, image_align_el);
1405 } else if (ISL_GFX_VER(dev) >= 8) {
1406 isl_gfx8_choose_image_alignment_el(dev, info, tiling, dim_layout,
1407 msaa_layout, image_align_el);
1408 } else if (ISL_GFX_VER(dev) >= 7) {
1409 isl_gfx7_choose_image_alignment_el(dev, info, tiling, dim_layout,
1410 msaa_layout, image_align_el);
1411 } else if (ISL_GFX_VER(dev) >= 6) {
1412 isl_gfx6_choose_image_alignment_el(dev, info, tiling, dim_layout,
1413 msaa_layout, image_align_el);
1414 } else {
1415 isl_gfx4_choose_image_alignment_el(dev, info, tiling, dim_layout,
1416 msaa_layout, image_align_el);
1417 }
1418 }
1419
1420 static enum isl_dim_layout
isl_surf_choose_dim_layout(const struct isl_device * dev,enum isl_surf_dim logical_dim,enum isl_tiling tiling,isl_surf_usage_flags_t usage)1421 isl_surf_choose_dim_layout(const struct isl_device *dev,
1422 enum isl_surf_dim logical_dim,
1423 enum isl_tiling tiling,
1424 isl_surf_usage_flags_t usage)
1425 {
1426 /* Sandy bridge needs a special layout for HiZ and stencil. */
1427 if (ISL_GFX_VER(dev) == 6 &&
1428 (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ))
1429 return ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ;
1430
1431 if (ISL_GFX_VER(dev) >= 9) {
1432 switch (logical_dim) {
1433 case ISL_SURF_DIM_1D:
1434 /* From the Sky Lake PRM Vol. 5, "1D Surfaces":
1435 *
1436 * One-dimensional surfaces use a tiling mode of linear.
1437 * Technically, they are not tiled resources, but the Tiled
1438 * Resource Mode field in RENDER_SURFACE_STATE is still used to
1439 * indicate the alignment requirements for this linear surface
1440 * (See 1D Alignment requirements for how 4K and 64KB Tiled
1441 * Resource Modes impact alignment). Alternatively, a 1D surface
1442 * can be defined as a 2D tiled surface (e.g. TileY or TileX) with
1443 * a height of 0.
1444 *
1445 * In other words, ISL_DIM_LAYOUT_GFX9_1D is only used for linear
1446 * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GFX4_2D is used.
1447 */
1448 if (tiling == ISL_TILING_LINEAR)
1449 return ISL_DIM_LAYOUT_GFX9_1D;
1450 else
1451 return ISL_DIM_LAYOUT_GFX4_2D;
1452 case ISL_SURF_DIM_2D:
1453 case ISL_SURF_DIM_3D:
1454 return ISL_DIM_LAYOUT_GFX4_2D;
1455 }
1456 } else {
1457 switch (logical_dim) {
1458 case ISL_SURF_DIM_1D:
1459 case ISL_SURF_DIM_2D:
1460 /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
1461 *
1462 * The cube face textures are stored in the same way as 3D surfaces
1463 * are stored (see section 6.17.5 for details). For cube surfaces,
1464 * however, the depth is equal to the number of faces (always 6) and
1465 * is not reduced for each MIP.
1466 */
1467 if (ISL_GFX_VER(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT))
1468 return ISL_DIM_LAYOUT_GFX4_3D;
1469
1470 return ISL_DIM_LAYOUT_GFX4_2D;
1471 case ISL_SURF_DIM_3D:
1472 return ISL_DIM_LAYOUT_GFX4_3D;
1473 }
1474 }
1475
1476 unreachable("bad isl_surf_dim");
1477 return ISL_DIM_LAYOUT_GFX4_2D;
1478 }
1479
1480 /**
1481 * Calculate the physical extent of the surface's first level, in units of
1482 * surface samples.
1483 */
1484 static void
isl_calc_phys_level0_extent_sa(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_dim_layout dim_layout,enum isl_tiling tiling,enum isl_msaa_layout msaa_layout,struct isl_extent4d * phys_level0_sa)1485 isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
1486 const struct isl_surf_init_info *restrict info,
1487 enum isl_dim_layout dim_layout,
1488 enum isl_tiling tiling,
1489 enum isl_msaa_layout msaa_layout,
1490 struct isl_extent4d *phys_level0_sa)
1491 {
1492 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1493
1494 if (isl_format_is_planar(info->format))
1495 unreachable("Planar formats unsupported");
1496
1497 switch (info->dim) {
1498 case ISL_SURF_DIM_1D:
1499 assert(info->height == 1);
1500 assert(info->depth == 1);
1501 assert(info->samples == 1);
1502
1503 switch (dim_layout) {
1504 case ISL_DIM_LAYOUT_GFX4_3D:
1505 unreachable("bad isl_dim_layout");
1506
1507 case ISL_DIM_LAYOUT_GFX9_1D:
1508 case ISL_DIM_LAYOUT_GFX4_2D:
1509 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
1510 *phys_level0_sa = (struct isl_extent4d) {
1511 .w = info->width,
1512 .h = 1,
1513 .d = 1,
1514 .a = info->array_len,
1515 };
1516 break;
1517 }
1518 break;
1519
1520 case ISL_SURF_DIM_2D:
1521 if (ISL_GFX_VER(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT))
1522 assert(dim_layout == ISL_DIM_LAYOUT_GFX4_3D);
1523 else
1524 assert(dim_layout == ISL_DIM_LAYOUT_GFX4_2D ||
1525 dim_layout == ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ);
1526
1527 switch (msaa_layout) {
1528 case ISL_MSAA_LAYOUT_NONE:
1529 assert(info->depth == 1);
1530 assert(info->samples == 1);
1531
1532 *phys_level0_sa = (struct isl_extent4d) {
1533 .w = info->width,
1534 .h = info->height,
1535 .d = 1,
1536 .a = info->array_len,
1537 };
1538 break;
1539
1540 case ISL_MSAA_LAYOUT_ARRAY:
1541 assert(info->depth == 1);
1542 assert(info->levels == 1);
1543 assert(isl_format_supports_multisampling(dev->info, info->format));
1544 assert(fmtl->bw == 1 && fmtl->bh == 1);
1545
1546 *phys_level0_sa = (struct isl_extent4d) {
1547 .w = info->width,
1548 .h = info->height,
1549 .d = 1,
1550 .a = info->array_len * info->samples,
1551 };
1552 break;
1553
1554 case ISL_MSAA_LAYOUT_INTERLEAVED:
1555 assert(info->depth == 1);
1556 assert(info->levels == 1);
1557 assert(isl_format_supports_multisampling(dev->info, info->format));
1558
1559 *phys_level0_sa = (struct isl_extent4d) {
1560 .w = info->width,
1561 .h = info->height,
1562 .d = 1,
1563 .a = info->array_len,
1564 };
1565
1566 isl_msaa_interleaved_scale_px_to_sa(info->samples,
1567 &phys_level0_sa->w,
1568 &phys_level0_sa->h);
1569 break;
1570 }
1571 break;
1572
1573 case ISL_SURF_DIM_3D:
1574 assert(info->array_len == 1);
1575 assert(info->samples == 1);
1576
1577 if (fmtl->bd > 1) {
1578 isl_finishme("%s:%s: compression block with depth > 1",
1579 __FILE__, __func__);
1580 }
1581
1582 switch (dim_layout) {
1583 case ISL_DIM_LAYOUT_GFX9_1D:
1584 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
1585 unreachable("bad isl_dim_layout");
1586
1587 case ISL_DIM_LAYOUT_GFX4_2D:
1588 case ISL_DIM_LAYOUT_GFX4_3D:
1589 *phys_level0_sa = (struct isl_extent4d) {
1590 .w = info->width,
1591 .h = info->height,
1592 .d = info->depth,
1593 .a = 1,
1594 };
1595 break;
1596 }
1597 break;
1598 }
1599 }
1600
1601 static void
isl_get_miptail_level_offset_el(enum isl_tiling tiling,enum isl_surf_dim dim,uint32_t format_bpb,uint32_t level,uint32_t * x_offset_el,uint32_t * y_offset_el,uint32_t * z_offset_el)1602 isl_get_miptail_level_offset_el(enum isl_tiling tiling,
1603 enum isl_surf_dim dim,
1604 uint32_t format_bpb,
1605 uint32_t level,
1606 uint32_t *x_offset_el,
1607 uint32_t *y_offset_el,
1608 uint32_t *z_offset_el)
1609 {
1610 uint32_t row = isl_get_miptail_base_row(tiling) + level;
1611 uint32_t col = 8 - ffs(format_bpb);
1612
1613 switch (dim) {
1614 case ISL_SURF_DIM_2D:
1615 switch (tiling) {
1616 case ISL_TILING_64:
1617 case ISL_TILING_64_XE2:
1618 case ISL_TILING_ICL_Yf:
1619 case ISL_TILING_ICL_Ys:
1620 assert(row < ARRAY_SIZE(icl_std_y_2d_miptail_offset_el));
1621 assert(col < ARRAY_SIZE(icl_std_y_2d_miptail_offset_el[0]));
1622 *x_offset_el = icl_std_y_2d_miptail_offset_el[row][col][0];
1623 *y_offset_el = icl_std_y_2d_miptail_offset_el[row][col][1];
1624 break;
1625 case ISL_TILING_SKL_Yf:
1626 case ISL_TILING_SKL_Ys:
1627 assert(row < ARRAY_SIZE(skl_std_y_2d_miptail_offset_el));
1628 assert(col < ARRAY_SIZE(skl_std_y_2d_miptail_offset_el[0]));
1629 *x_offset_el = skl_std_y_2d_miptail_offset_el[row][col][0];
1630 *y_offset_el = skl_std_y_2d_miptail_offset_el[row][col][1];
1631 break;
1632 default:
1633 unreachable("invalid tiling");
1634 }
1635 *z_offset_el = 0;
1636 break;
1637
1638 case ISL_SURF_DIM_3D:
1639 switch (tiling) {
1640 case ISL_TILING_64:
1641 case ISL_TILING_64_XE2:
1642 assert(row < ARRAY_SIZE(acm_tile64_3d_miptail_offset_el));
1643 assert(col < ARRAY_SIZE(acm_tile64_3d_miptail_offset_el[0]));
1644 *x_offset_el = acm_tile64_3d_miptail_offset_el[row][col][0];
1645 *y_offset_el = acm_tile64_3d_miptail_offset_el[row][col][1];
1646 *z_offset_el = acm_tile64_3d_miptail_offset_el[row][col][2];
1647 break;
1648 case ISL_TILING_ICL_Yf:
1649 case ISL_TILING_ICL_Ys:
1650 assert(row < ARRAY_SIZE(icl_std_y_3d_miptail_offset_el));
1651 assert(col < ARRAY_SIZE(icl_std_y_3d_miptail_offset_el[0]));
1652 *x_offset_el = icl_std_y_3d_miptail_offset_el[row][col][0];
1653 *y_offset_el = icl_std_y_3d_miptail_offset_el[row][col][1];
1654 *z_offset_el = icl_std_y_3d_miptail_offset_el[row][col][2];
1655 break;
1656 case ISL_TILING_SKL_Yf:
1657 case ISL_TILING_SKL_Ys:
1658 assert(row < ARRAY_SIZE(skl_std_y_3d_miptail_offset_el));
1659 assert(col < ARRAY_SIZE(skl_std_y_3d_miptail_offset_el[0]));
1660 *x_offset_el = skl_std_y_3d_miptail_offset_el[row][col][0];
1661 *y_offset_el = skl_std_y_3d_miptail_offset_el[row][col][1];
1662 *z_offset_el = skl_std_y_3d_miptail_offset_el[row][col][2];
1663 break;
1664 default:
1665 unreachable("invalid tiling");
1666 }
1667 break;
1668
1669 case ISL_SURF_DIM_1D:
1670 unreachable("invalid dimension");
1671 }
1672 }
1673
1674 static uint32_t
isl_choose_miptail_start_level(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info)1675 isl_choose_miptail_start_level(const struct isl_device *dev,
1676 const struct isl_surf_init_info *restrict info,
1677 const struct isl_tile_info *tile_info)
1678 {
1679 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1680
1681 if (tile_info->max_miptail_levels == 0)
1682 return info->levels;
1683
1684 /* SKL PRMs, Volume 5: Memory Views, YUV 4:2:0 Format Memory Organization :
1685 *
1686 * "Planar YUV does not support MIP Tails as part of Standard Tiling.
1687 * The MIP Tail Start field in RENDER_SURFACE_STATE must be programmed
1688 * to 15."
1689 */
1690 if (isl_format_is_planar(info->format))
1691 return 15;
1692
1693 /* TODO: figure out why having YUV formats in the miptail on Gfx12 does not
1694 * work.
1695 */
1696 if (ISL_GFX_VER(dev) == 12 && isl_format_is_yuv(info->format))
1697 return 15;
1698
1699 assert(isl_tiling_is_64(tile_info->tiling) ||
1700 isl_tiling_is_std_y(tile_info->tiling));
1701 assert(info->samples == 1);
1702
1703 uint32_t max_miptail_levels = tile_info->max_miptail_levels;
1704
1705 /* Start with the minimum number of levels that will fit in the tile */
1706 uint32_t min_miptail_start =
1707 info->levels > max_miptail_levels ? info->levels - max_miptail_levels : 0;
1708
1709 /* Account for the specified minimum */
1710 min_miptail_start = MAX(min_miptail_start, info->min_miptail_start_level);
1711
1712 struct isl_extent3d level0_extent_el = {
1713 .w = isl_align_div_npot(info->width, fmtl->bw),
1714 .h = isl_align_div_npot(info->height, fmtl->bh),
1715 .d = isl_align_div_npot(info->depth, fmtl->bd),
1716 };
1717
1718 /* The first miptail slot takes up the entire right side of the tile. So,
1719 * the extent is just the distance from the offset of the first level to
1720 * the corner of the tile.
1721 */
1722 uint32_t level0_x_offset_el, level0_y_offset_el, level0_z_offset_el;
1723 isl_get_miptail_level_offset_el(tile_info->tiling, info->dim,
1724 fmtl->bpb, 0, /* level */
1725 &level0_x_offset_el,
1726 &level0_y_offset_el,
1727 &level0_z_offset_el);
1728 struct isl_extent3d miptail_level0_extent_el = {
1729 .w = tile_info->logical_extent_el.w - level0_x_offset_el,
1730 .h = tile_info->logical_extent_el.h - level0_y_offset_el,
1731 .d = tile_info->logical_extent_el.d - level0_z_offset_el,
1732 };
1733
1734 /* Now find the first level that fits the maximum miptail size requirement.
1735 */
1736 for (uint32_t s = min_miptail_start; s < info->levels; s++) {
1737 if (isl_minify(level0_extent_el.w, s) <= miptail_level0_extent_el.w &&
1738 isl_minify(level0_extent_el.h, s) <= miptail_level0_extent_el.h &&
1739 isl_minify(level0_extent_el.d, s) <= miptail_level0_extent_el.d)
1740 return s;
1741 }
1742
1743 return info->levels;
1744 }
1745
1746 /**
1747 * Calculate the pitch between physical array slices, in units of rows of
1748 * surface elements.
1749 */
1750 static uint32_t
isl_calc_array_pitch_el_rows_gfx4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,const struct isl_extent2d * phys_slice0_sa)1751 isl_calc_array_pitch_el_rows_gfx4_2d(
1752 const struct isl_device *dev,
1753 const struct isl_surf_init_info *restrict info,
1754 const struct isl_tile_info *tile_info,
1755 const struct isl_extent3d *image_align_sa,
1756 const struct isl_extent4d *phys_level0_sa,
1757 enum isl_array_pitch_span array_pitch_span,
1758 const struct isl_extent2d *phys_slice0_sa)
1759 {
1760 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1761 uint32_t pitch_sa_rows = 0;
1762
1763 switch (array_pitch_span) {
1764 case ISL_ARRAY_PITCH_SPAN_COMPACT:
1765 pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
1766 break;
1767 case ISL_ARRAY_PITCH_SPAN_FULL: {
1768 /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
1769 * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
1770 * Surfaces >> Surface Arrays.
1771 */
1772 uint32_t H0_sa = phys_level0_sa->h;
1773 uint32_t H1_sa = isl_minify(H0_sa, 1);
1774
1775 uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
1776 uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
1777
1778 uint32_t m;
1779 if (ISL_GFX_VER(dev) >= 7) {
1780 /* The QPitch equation changed slightly in Ivybridge. */
1781 m = 12;
1782 } else {
1783 m = 11;
1784 }
1785
1786 pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
1787
1788 if (ISL_GFX_VER(dev) == 6 && info->samples > 1 &&
1789 (info->height % 4 == 1)) {
1790 /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
1791 * Graphics Core >> Section 7.18.3.7: Surface Arrays:
1792 *
1793 * [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
1794 * the value calculated in the equation above , for every
1795 * other odd Surface Height starting from 1 i.e. 1,5,9,13.
1796 *
1797 * XXX(chadv): Is the errata natural corollary of the physical
1798 * layout of interleaved samples?
1799 */
1800 pitch_sa_rows += 4;
1801 }
1802
1803 pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
1804 } /* end case */
1805 break;
1806 }
1807
1808 assert(pitch_sa_rows % fmtl->bh == 0);
1809 uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
1810
1811 if (ISL_GFX_VER(dev) >= 9 && ISL_GFX_VER(dev) <= 11 &&
1812 fmtl->txc == ISL_TXC_CCS) {
1813 /*
1814 * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
1815 *
1816 * "Mip-mapped and arrayed surfaces are supported with MCS buffer
1817 * layout with these alignments in the RT space: Horizontal
1818 * Alignment = 128 and Vertical Alignment = 64."
1819 *
1820 * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
1821 *
1822 * "For non-multisampled render target's CCS auxiliary surface,
1823 * QPitch must be computed with Horizontal Alignment = 128 and
1824 * Surface Vertical Alignment = 256. These alignments are only for
1825 * CCS buffer and not for associated render target."
1826 *
1827 * The first restriction is already handled by isl_choose_image_alignment_el
1828 * but the second restriction, which is an extension of the first, only
1829 * applies to qpitch and must be applied here.
1830 *
1831 * The second restriction disappears on Gfx12.
1832 */
1833 assert(fmtl->bh == 4);
1834 pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
1835 }
1836
1837 if (ISL_GFX_VER(dev) >= 9 &&
1838 info->dim == ISL_SURF_DIM_3D &&
1839 tile_info->tiling != ISL_TILING_LINEAR) {
1840 /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
1841 *
1842 * Tile Mode != Linear: This field must be set to an integer multiple
1843 * of the tile height
1844 */
1845 pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
1846 }
1847
1848 return pitch_el_rows;
1849 }
1850
1851 /**
1852 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1853 * ISL_DIM_LAYOUT_GFX4_2D.
1854 */
1855 static void
isl_calc_phys_slice0_extent_sa_gfx4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t miptail_start_level,struct isl_extent2d * phys_slice0_sa)1856 isl_calc_phys_slice0_extent_sa_gfx4_2d(
1857 const struct isl_device *dev,
1858 const struct isl_surf_init_info *restrict info,
1859 const struct isl_tile_info *tile_info,
1860 enum isl_msaa_layout msaa_layout,
1861 const struct isl_extent3d *image_align_sa,
1862 const struct isl_extent4d *phys_level0_sa,
1863 uint32_t miptail_start_level,
1864 struct isl_extent2d *phys_slice0_sa)
1865 {
1866 ASSERTED const struct isl_format_layout *fmtl =
1867 isl_format_get_layout(info->format);
1868
1869 if (info->levels == 1 && miptail_start_level > 0) {
1870 /* Do not pad the surface to the image alignment.
1871 *
1872 * For tiled surfaces, using a reduced alignment here avoids wasting CPU
1873 * cycles on the below mipmap layout caluclations. Reducing the
1874 * alignment here is safe because we later align the row pitch and array
1875 * pitch to the tile boundary. It is safe even for
1876 * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
1877 * to accommodate the interleaved samples.
1878 *
1879 * For linear surfaces, reducing the alignment here permits us to later
1880 * choose an arbitrary, non-aligned row pitch. If the surface backs
1881 * a VkBuffer, then an arbitrary pitch may be needed to accommodate
1882 * VkBufferImageCopy::bufferRowLength.
1883 */
1884 *phys_slice0_sa = (struct isl_extent2d) {
1885 .w = phys_level0_sa->w,
1886 .h = phys_level0_sa->h,
1887 };
1888 return;
1889 }
1890
1891 uint32_t slice_top_w = 0;
1892 uint32_t slice_bottom_w = 0;
1893 uint32_t slice_left_h = 0;
1894 uint32_t slice_right_h = 0;
1895
1896 uint32_t W0 = phys_level0_sa->w;
1897 uint32_t H0 = phys_level0_sa->h;
1898
1899 for (uint32_t l = 0; l < info->levels; ++l) {
1900 uint32_t W = isl_minify(W0, l);
1901 uint32_t H = isl_minify(H0, l);
1902
1903 uint32_t w = isl_align_npot(W, image_align_sa->w);
1904 uint32_t h = isl_align_npot(H, image_align_sa->h);
1905
1906 if (l == 0) {
1907 slice_top_w = w;
1908 slice_left_h = h;
1909 slice_right_h = h;
1910 } else if (l == 1) {
1911 slice_bottom_w = w;
1912 slice_left_h += h;
1913 } else if (l == 2) {
1914 slice_bottom_w += w;
1915 slice_right_h += h;
1916 } else {
1917 slice_right_h += h;
1918 }
1919
1920 if (l >= miptail_start_level) {
1921 assert(l == miptail_start_level);
1922 assert(isl_tiling_is_64(tile_info->tiling) ||
1923 isl_tiling_is_std_y(tile_info->tiling));
1924 assert(w == tile_info->logical_extent_el.w * fmtl->bw);
1925 assert(h == tile_info->logical_extent_el.h * fmtl->bh);
1926 /* If we've gone into the miptail, we're done. All higher miplevels
1927 * will be tucked into the same tile as this one.
1928 */
1929 break;
1930 }
1931 }
1932
1933 *phys_slice0_sa = (struct isl_extent2d) {
1934 .w = MAX(slice_top_w, slice_bottom_w),
1935 .h = MAX(slice_left_h, slice_right_h),
1936 };
1937 }
1938
1939 static void
isl_calc_phys_total_extent_el_gfx4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,uint32_t miptail_start_level,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1940 isl_calc_phys_total_extent_el_gfx4_2d(
1941 const struct isl_device *dev,
1942 const struct isl_surf_init_info *restrict info,
1943 const struct isl_tile_info *tile_info,
1944 enum isl_msaa_layout msaa_layout,
1945 const struct isl_extent3d *image_align_sa,
1946 const struct isl_extent4d *phys_level0_sa,
1947 enum isl_array_pitch_span array_pitch_span,
1948 uint32_t miptail_start_level,
1949 uint32_t *array_pitch_el_rows,
1950 struct isl_extent4d *phys_total_el)
1951 {
1952 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1953
1954 struct isl_extent2d phys_slice0_sa;
1955 isl_calc_phys_slice0_extent_sa_gfx4_2d(dev, info, tile_info, msaa_layout,
1956 image_align_sa, phys_level0_sa,
1957 miptail_start_level,
1958 &phys_slice0_sa);
1959 *array_pitch_el_rows =
1960 isl_calc_array_pitch_el_rows_gfx4_2d(dev, info, tile_info,
1961 image_align_sa, phys_level0_sa,
1962 array_pitch_span,
1963 &phys_slice0_sa);
1964
1965 if (isl_tiling_is_64(tile_info->tiling) ||
1966 isl_tiling_is_std_y(tile_info->tiling)) {
1967 *phys_total_el = (struct isl_extent4d) {
1968 .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1969 .h = isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1970 .d = isl_align_div_npot(phys_level0_sa->d, fmtl->bd),
1971 .a = phys_level0_sa->array_len,
1972 };
1973 } else {
1974 uint32_t array_len = MAX(phys_level0_sa->d, phys_level0_sa->a);
1975 *phys_total_el = (struct isl_extent4d) {
1976 .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1977 .h = *array_pitch_el_rows * (array_len - 1) +
1978 isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1979 .d = 1,
1980 .a = 1,
1981 };
1982 }
1983 }
1984
1985 /**
1986 * A variant of isl_calc_phys_slice0_extent_sa() specific to
1987 * ISL_DIM_LAYOUT_GFX4_3D.
1988 */
1989 static void
isl_calc_phys_total_extent_el_gfx4_3d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1990 isl_calc_phys_total_extent_el_gfx4_3d(
1991 const struct isl_device *dev,
1992 const struct isl_surf_init_info *restrict info,
1993 const struct isl_extent3d *image_align_sa,
1994 const struct isl_extent4d *phys_level0_sa,
1995 uint32_t *array_pitch_el_rows,
1996 struct isl_extent4d *phys_total_el)
1997 {
1998 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1999
2000 assert(info->samples == 1);
2001
2002 if (info->dim != ISL_SURF_DIM_3D) {
2003 /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
2004 *
2005 * The cube face textures are stored in the same way as 3D surfaces
2006 * are stored (see section 6.17.5 for details). For cube surfaces,
2007 * however, the depth is equal to the number of faces (always 6) and
2008 * is not reduced for each MIP.
2009 */
2010 assert(ISL_GFX_VER(dev) == 4);
2011 assert(info->usage & ISL_SURF_USAGE_CUBE_BIT);
2012 assert(phys_level0_sa->array_len == 6);
2013 } else {
2014 assert(phys_level0_sa->array_len == 1);
2015 }
2016
2017 uint32_t total_w = 0;
2018 uint32_t total_h = 0;
2019
2020 uint32_t W0 = phys_level0_sa->w;
2021 uint32_t H0 = phys_level0_sa->h;
2022 uint32_t D0 = phys_level0_sa->d;
2023 uint32_t A0 = phys_level0_sa->a;
2024
2025 for (uint32_t l = 0; l < info->levels; ++l) {
2026 uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
2027 uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
2028 uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0;
2029
2030 uint32_t max_layers_horiz = MIN(level_d, 1u << l);
2031 uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
2032
2033 total_w = MAX(total_w, level_w * max_layers_horiz);
2034 total_h += level_h * max_layers_vert;
2035 }
2036
2037 /* GFX4_3D layouts don't really have an array pitch since each LOD has a
2038 * different number of horizontal and vertical layers. We have to set it
2039 * to something, so at least make it true for LOD0.
2040 */
2041 *array_pitch_el_rows =
2042 isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw;
2043 *phys_total_el = (struct isl_extent4d) {
2044 .w = isl_assert_div(total_w, fmtl->bw),
2045 .h = isl_assert_div(total_h, fmtl->bh),
2046 .d = 1,
2047 .a = 1,
2048 };
2049 }
2050
2051 /**
2052 * A variant of isl_calc_phys_slice0_extent_sa() specific to
2053 * ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ.
2054 */
2055 static void
isl_calc_phys_total_extent_el_gfx6_stencil_hiz(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)2056 isl_calc_phys_total_extent_el_gfx6_stencil_hiz(
2057 const struct isl_device *dev,
2058 const struct isl_surf_init_info *restrict info,
2059 const struct isl_tile_info *tile_info,
2060 const struct isl_extent3d *image_align_sa,
2061 const struct isl_extent4d *phys_level0_sa,
2062 uint32_t *array_pitch_el_rows,
2063 struct isl_extent4d *phys_total_el)
2064 {
2065 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
2066
2067 const struct isl_extent2d tile_extent_sa = {
2068 .w = tile_info->logical_extent_el.w * fmtl->bw,
2069 .h = tile_info->logical_extent_el.h * fmtl->bh,
2070 };
2071 /* Tile size is a multiple of image alignment */
2072 assert(tile_extent_sa.w % image_align_sa->w == 0);
2073 assert(tile_extent_sa.h % image_align_sa->h == 0);
2074
2075 const uint32_t W0 = phys_level0_sa->w;
2076 const uint32_t H0 = phys_level0_sa->h;
2077
2078 /* Each image has the same height as LOD0 because the hardware thinks
2079 * everything is LOD0
2080 */
2081 const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a;
2082
2083 uint32_t total_top_w = 0;
2084 uint32_t total_bottom_w = 0;
2085 uint32_t total_h = 0;
2086
2087 for (uint32_t l = 0; l < info->levels; ++l) {
2088 const uint32_t W = isl_minify(W0, l);
2089
2090 const uint32_t w = isl_align(W, tile_extent_sa.w);
2091 const uint32_t h = isl_align(H, tile_extent_sa.h);
2092
2093 if (l == 0) {
2094 total_top_w = w;
2095 total_h = h;
2096 } else if (l == 1) {
2097 total_bottom_w = w;
2098 total_h += h;
2099 } else {
2100 total_bottom_w += w;
2101 }
2102 }
2103
2104 *array_pitch_el_rows =
2105 isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh);
2106 *phys_total_el = (struct isl_extent4d) {
2107 .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw),
2108 .h = isl_assert_div(total_h, fmtl->bh),
2109 .d = 1,
2110 .a = 1,
2111 };
2112 }
2113
2114 /**
2115 * A variant of isl_calc_phys_slice0_extent_sa() specific to
2116 * ISL_DIM_LAYOUT_GFX9_1D.
2117 */
2118 static void
isl_calc_phys_total_extent_el_gfx9_1d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)2119 isl_calc_phys_total_extent_el_gfx9_1d(
2120 const struct isl_device *dev,
2121 const struct isl_surf_init_info *restrict info,
2122 const struct isl_extent3d *image_align_sa,
2123 const struct isl_extent4d *phys_level0_sa,
2124 uint32_t *array_pitch_el_rows,
2125 struct isl_extent4d *phys_total_el)
2126 {
2127 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
2128
2129 assert(phys_level0_sa->height == 1);
2130 assert(phys_level0_sa->depth == 1);
2131 assert(info->samples == 1);
2132 assert(image_align_sa->w >= fmtl->bw);
2133
2134 uint32_t slice_w = 0;
2135 const uint32_t W0 = phys_level0_sa->w;
2136
2137 for (uint32_t l = 0; l < info->levels; ++l) {
2138 uint32_t W = isl_minify(W0, l);
2139 uint32_t w = isl_align_npot(W, image_align_sa->w);
2140
2141 slice_w += w;
2142 }
2143
2144 *array_pitch_el_rows = 1;
2145 *phys_total_el = (struct isl_extent4d) {
2146 .w = isl_assert_div(slice_w, fmtl->bw),
2147 .h = phys_level0_sa->array_len,
2148 .d = 1,
2149 .a = 1,
2150 };
2151 }
2152
2153 /**
2154 * Calculate the two-dimensional total physical extent of the surface, in
2155 * units of surface elements.
2156 */
2157 static void
isl_calc_phys_total_extent_el(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_dim_layout dim_layout,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,uint32_t miptail_start_level,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)2158 isl_calc_phys_total_extent_el(const struct isl_device *dev,
2159 const struct isl_surf_init_info *restrict info,
2160 const struct isl_tile_info *tile_info,
2161 enum isl_dim_layout dim_layout,
2162 enum isl_msaa_layout msaa_layout,
2163 const struct isl_extent3d *image_align_sa,
2164 const struct isl_extent4d *phys_level0_sa,
2165 enum isl_array_pitch_span array_pitch_span,
2166 uint32_t miptail_start_level,
2167 uint32_t *array_pitch_el_rows,
2168 struct isl_extent4d *phys_total_el)
2169 {
2170 switch (dim_layout) {
2171 case ISL_DIM_LAYOUT_GFX9_1D:
2172 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
2173 isl_calc_phys_total_extent_el_gfx9_1d(dev, info,
2174 image_align_sa, phys_level0_sa,
2175 array_pitch_el_rows,
2176 phys_total_el);
2177 return;
2178 case ISL_DIM_LAYOUT_GFX4_2D:
2179 isl_calc_phys_total_extent_el_gfx4_2d(dev, info, tile_info, msaa_layout,
2180 image_align_sa, phys_level0_sa,
2181 array_pitch_span,
2182 miptail_start_level,
2183 array_pitch_el_rows,
2184 phys_total_el);
2185 return;
2186 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
2187 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
2188 isl_calc_phys_total_extent_el_gfx6_stencil_hiz(dev, info, tile_info,
2189 image_align_sa,
2190 phys_level0_sa,
2191 array_pitch_el_rows,
2192 phys_total_el);
2193 return;
2194 case ISL_DIM_LAYOUT_GFX4_3D:
2195 assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
2196 isl_calc_phys_total_extent_el_gfx4_3d(dev, info,
2197 image_align_sa, phys_level0_sa,
2198 array_pitch_el_rows,
2199 phys_total_el);
2200 return;
2201 }
2202
2203 unreachable("invalid value for dim_layout");
2204 }
2205
2206 static uint32_t
isl_calc_row_pitch_alignment(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info)2207 isl_calc_row_pitch_alignment(const struct isl_device *dev,
2208 const struct isl_surf_init_info *surf_info,
2209 const struct isl_tile_info *tile_info)
2210 {
2211 if (tile_info->tiling != ISL_TILING_LINEAR) {
2212 /* According to BSpec: 44930, Gfx12's CCS-compressed surface pitches must
2213 * be 512B-aligned. CCS is only support on Y tilings.
2214 *
2215 * Only consider 512B alignment when :
2216 * - AUX is not explicitly disabled
2217 * - the caller has specified no pitch
2218 *
2219 * isl_surf_get_ccs_surf() will check that the main surface alignment
2220 * matches CCS expectations.
2221 */
2222 if (ISL_GFX_VER(dev) >= 12 &&
2223 isl_format_supports_ccs_e(dev->info, surf_info->format) &&
2224 tile_info->tiling != ISL_TILING_X &&
2225 !(surf_info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) &&
2226 surf_info->row_pitch_B == 0) {
2227 return isl_align(tile_info->phys_extent_B.width, 512);
2228 }
2229
2230 return tile_info->phys_extent_B.width;
2231 }
2232
2233 /* We only support tiled fragment shading rate buffers. */
2234 assert((surf_info->usage & ISL_SURF_USAGE_CPB_BIT) == 0);
2235
2236 /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
2237 * RENDER_SURFACE_STATE Surface Pitch (p349):
2238 *
2239 * - For linear render target surfaces and surfaces accessed with the
2240 * typed data port messages, the pitch must be a multiple of the
2241 * element size for non-YUV surface formats. Pitch must be
2242 * a multiple of 2 * element size for YUV surface formats.
2243 *
2244 * - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
2245 * ignore because isl doesn't do buffers.]
2246 *
2247 * - For other linear surfaces, the pitch can be any multiple of
2248 * bytes.
2249 */
2250 const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
2251 const uint32_t bs = fmtl->bpb / 8;
2252 uint32_t alignment;
2253
2254 if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
2255 if (isl_format_is_yuv(surf_info->format)) {
2256 alignment = 2 * bs;
2257 } else {
2258 alignment = bs;
2259 }
2260 } else {
2261 alignment = 1;
2262 }
2263
2264 /* From the Broadwell PRM >> Volume 2c: Command Reference: Registers >>
2265 * PRI_STRIDE Stride (p1254):
2266 *
2267 * "When using linear memory, this must be at least 64 byte aligned."
2268 *
2269 * However, when displaying on NVIDIA and recent AMD GPUs via PRIME,
2270 * we need a larger pitch of 256 bytes.
2271 *
2272 * If the ISL caller didn't specify a row_pitch_B, then we should assume
2273 * the NVIDIA/AMD requirements. Otherwise, if we have a specified
2274 * row_pitch_B, this is probably because the caller is trying to import a
2275 * buffer. In that case we limit the minimum row pitch to the Intel HW
2276 * requirement.
2277 */
2278 if (surf_info->usage & ISL_SURF_USAGE_DISPLAY_BIT) {
2279 if (surf_info->row_pitch_B == 0)
2280 alignment = isl_align(alignment, 256);
2281 else
2282 alignment = isl_align(alignment, 64);
2283 }
2284
2285 return alignment;
2286 }
2287
2288 static uint32_t
isl_calc_linear_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * info,const struct isl_extent4d * phys_total_el,uint32_t alignment_B)2289 isl_calc_linear_min_row_pitch(const struct isl_device *dev,
2290 const struct isl_surf_init_info *info,
2291 const struct isl_extent4d *phys_total_el,
2292 uint32_t alignment_B)
2293 {
2294 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
2295 const uint32_t bs = fmtl->bpb / 8;
2296
2297 return isl_align_npot(bs * phys_total_el->w, alignment_B);
2298 }
2299
2300 static uint32_t
isl_calc_tiled_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,const struct isl_extent4d * phys_total_el,uint32_t alignment_B)2301 isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
2302 const struct isl_surf_init_info *surf_info,
2303 const struct isl_tile_info *tile_info,
2304 const struct isl_extent4d *phys_total_el,
2305 uint32_t alignment_B)
2306 {
2307 const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
2308
2309 assert(fmtl->bpb % tile_info->format_bpb == 0);
2310
2311 const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb;
2312 const uint32_t total_w_tl =
2313 isl_align_div(phys_total_el->w * tile_el_scale,
2314 tile_info->logical_extent_el.width);
2315
2316 /* In some cases the alignment of the pitch might be > to the tile size
2317 * (for example Gfx12 CCS requires 512B alignment while the tile's width
2318 * can be 128B), so align the row pitch to the alignment.
2319 */
2320 assert(alignment_B >= tile_info->phys_extent_B.width);
2321 return isl_align(total_w_tl * tile_info->phys_extent_B.width, alignment_B);
2322 }
2323
2324 static uint32_t
isl_calc_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,const struct isl_extent4d * phys_total_el,uint32_t alignment_B)2325 isl_calc_min_row_pitch(const struct isl_device *dev,
2326 const struct isl_surf_init_info *surf_info,
2327 const struct isl_tile_info *tile_info,
2328 const struct isl_extent4d *phys_total_el,
2329 uint32_t alignment_B)
2330 {
2331 if (tile_info->tiling == ISL_TILING_LINEAR) {
2332 return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el,
2333 alignment_B);
2334 } else {
2335 return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info,
2336 phys_total_el, alignment_B);
2337 }
2338 }
2339
2340 /**
2341 * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's
2342 * size is `bits` bits?
2343 *
2344 * Hardware pitch fields are offset by 1. For example, if the size of
2345 * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid
2346 * pitches is [1, 2^b] inclusive. If the surface pitch is N, then
2347 * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1.
2348 */
2349 static bool
pitch_in_range(uint32_t n,uint32_t bits)2350 pitch_in_range(uint32_t n, uint32_t bits)
2351 {
2352 assert(n != 0);
2353 return likely(bits != 0 && 1 <= n && n <= (1 << bits));
2354 }
2355
2356 void PRINTFLIKE(4, 5)
_isl_notify_failure(const struct isl_surf_init_info * surf_info,const char * file,int line,const char * fmt,...)2357 _isl_notify_failure(const struct isl_surf_init_info *surf_info,
2358 const char *file, int line, const char *fmt, ...)
2359 {
2360 if (!INTEL_DEBUG(DEBUG_ISL))
2361 return;
2362
2363 char msg[512];
2364 va_list ap;
2365 va_start(ap, fmt);
2366 int ret = vsnprintf(msg, sizeof(msg), fmt, ap);
2367 assert(ret < sizeof(msg));
2368 va_end(ap);
2369
2370 #define PRINT_USAGE(bit, str) \
2371 (surf_info->usage & ISL_SURF_USAGE_##bit##_BIT) ? ("+"str) : ""
2372 #define PRINT_TILING(bit, str) \
2373 (surf_info->tiling_flags & ISL_TILING_##bit##_BIT) ? ("+"str) : ""
2374
2375 snprintf(msg + ret, sizeof(msg) - ret,
2376 " extent=%ux%ux%u dim=%s msaa=%ux levels=%u rpitch=%u fmt=%s "
2377 "usages=%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s "
2378 "tiling_flags=%s%s%s%s%s%s%s%s%s%s%s%s%s",
2379 surf_info->width, surf_info->height,
2380 surf_info->dim == ISL_SURF_DIM_3D ?
2381 surf_info->depth : surf_info->array_len,
2382 surf_info->dim == ISL_SURF_DIM_1D ? "1d" :
2383 surf_info->dim == ISL_SURF_DIM_2D ? "2d" : "3d",
2384 surf_info->samples, surf_info->levels,
2385 surf_info->row_pitch_B,
2386 isl_format_get_name(surf_info->format) + strlen("ISL_FORMAT_"),
2387
2388 PRINT_USAGE(RENDER_TARGET, "rt"),
2389 PRINT_USAGE(DEPTH, "depth"),
2390 PRINT_USAGE(STENCIL, "stenc"),
2391 PRINT_USAGE(TEXTURE, "tex"),
2392 PRINT_USAGE(CUBE, "cube"),
2393 PRINT_USAGE(DISABLE_AUX, "noaux"),
2394 PRINT_USAGE(DISPLAY, "disp"),
2395 PRINT_USAGE(HIZ, "hiz"),
2396 PRINT_USAGE(MCS, "mcs"),
2397 PRINT_USAGE(CCS, "ccs"),
2398 PRINT_USAGE(VERTEX_BUFFER, "vb"),
2399 PRINT_USAGE(INDEX_BUFFER, "ib"),
2400 PRINT_USAGE(CONSTANT_BUFFER, "const"),
2401 PRINT_USAGE(STAGING, "stage"),
2402 PRINT_USAGE(SPARSE, "sparse"),
2403 PRINT_USAGE(NO_AUX_TT_ALIGNMENT, "no-aux-align"),
2404
2405 PRINT_TILING(LINEAR, "linear"),
2406 PRINT_TILING(W, "W"),
2407 PRINT_TILING(X, "X"),
2408 PRINT_TILING(Y0, "Y0"),
2409 PRINT_TILING(SKL_Yf, "skl-Yf"),
2410 PRINT_TILING(SKL_Ys, "skl-Ys"),
2411 PRINT_TILING(ICL_Yf, "icl-Yf"),
2412 PRINT_TILING(ICL_Ys, "icl-Ys"),
2413 PRINT_TILING(4, "4"),
2414 PRINT_TILING(64, "64"),
2415 PRINT_TILING(HIZ, "hiz"),
2416 PRINT_TILING(CCS, "ccs"),
2417 PRINT_TILING(GFX12_CCS, "ccs12"));
2418
2419 #undef PRINT_USAGE
2420 #undef PRINT_TILING
2421
2422 mesa_logd("%s:%i: %s", file, line, msg);
2423 }
2424
2425 static bool
isl_calc_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,enum isl_dim_layout dim_layout,const struct isl_extent4d * phys_total_el,uint32_t * out_row_pitch_B)2426 isl_calc_row_pitch(const struct isl_device *dev,
2427 const struct isl_surf_init_info *surf_info,
2428 const struct isl_tile_info *tile_info,
2429 enum isl_dim_layout dim_layout,
2430 const struct isl_extent4d *phys_total_el,
2431 uint32_t *out_row_pitch_B)
2432 {
2433 uint32_t alignment_B =
2434 isl_calc_row_pitch_alignment(dev, surf_info, tile_info);
2435
2436 const uint32_t min_row_pitch_B =
2437 isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el,
2438 alignment_B);
2439
2440 if (surf_info->row_pitch_B != 0) {
2441 if (surf_info->row_pitch_B < min_row_pitch_B) {
2442 return notify_failure(surf_info,
2443 "requested row pitch (%uB) less than minimum "
2444 "allowed (%uB)",
2445 surf_info->row_pitch_B, min_row_pitch_B);
2446 }
2447
2448 if (surf_info->row_pitch_B % alignment_B != 0) {
2449 return notify_failure(surf_info,
2450 "requested row pitch (%uB) doesn't satisfy the "
2451 "minimum alignment requirement (%uB)",
2452 surf_info->row_pitch_B, alignment_B);
2453 }
2454 }
2455
2456 const uint32_t row_pitch_B =
2457 surf_info->row_pitch_B != 0 ? surf_info->row_pitch_B : min_row_pitch_B;
2458
2459 const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width;
2460
2461 if (row_pitch_B == 0)
2462 return notify_failure(surf_info, "calculated row pitch is zero");
2463
2464 if (dim_layout == ISL_DIM_LAYOUT_GFX9_1D) {
2465 /* SurfacePitch is ignored for this layout. */
2466 goto done;
2467 }
2468
2469 if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
2470 ISL_SURF_USAGE_TEXTURE_BIT |
2471 ISL_SURF_USAGE_STORAGE_BIT)) &&
2472 !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info))) {
2473 return notify_failure(surf_info,
2474 "row pitch (%uB) not in range of "
2475 "RENDER_SURFACE_STATE::SurfacePitch",
2476 row_pitch_B);
2477 }
2478
2479 if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT |
2480 ISL_SURF_USAGE_MCS_BIT)) &&
2481 !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info))) {
2482 return notify_failure(surf_info,
2483 "row_pitch_tl=%u not in range of "
2484 "RENDER_SURFACE_STATE::AuxiliarySurfacePitch",
2485 row_pitch_tl);
2486 }
2487
2488 if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) &&
2489 !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) {
2490 return notify_failure(surf_info,
2491 "row pitch (%uB) not in range of "
2492 "3DSTATE_DEPTH_BUFFER::SurfacePitch",
2493 row_pitch_B);
2494 }
2495
2496 if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) &&
2497 !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) {
2498 return notify_failure(surf_info,
2499 "row pitch (%uB) not in range of "
2500 "3DSTATE_HIER_DEPTH_BUFFER::SurfacePitch",
2501 row_pitch_B);
2502 }
2503
2504 const uint32_t stencil_pitch_bits = dev->use_separate_stencil ?
2505 _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) :
2506 _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info);
2507
2508 if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) &&
2509 !pitch_in_range(row_pitch_B, stencil_pitch_bits)) {
2510 return notify_failure(surf_info,
2511 "row pitch (%uB) not in range of "
2512 "3DSTATE_STENCIL_BUFFER/3DSTATE_DEPTH_BUFFER::SurfacePitch",
2513 row_pitch_B);
2514 }
2515
2516 if ((surf_info->usage & ISL_SURF_USAGE_CPB_BIT) &&
2517 !pitch_in_range(row_pitch_B, _3DSTATE_CPSIZE_CONTROL_BUFFER_SurfacePitch_bits(dev->info)))
2518 return false;
2519
2520 done:
2521 *out_row_pitch_B = row_pitch_B;
2522 return true;
2523 }
2524
2525 static bool
isl_calc_size(const struct isl_device * dev,const struct isl_surf_init_info * info,const struct isl_tile_info * tile_info,const struct isl_extent4d * phys_total_el,uint32_t array_pitch_el_rows,uint32_t row_pitch_B,uint64_t * out_size_B)2526 isl_calc_size(const struct isl_device *dev,
2527 const struct isl_surf_init_info *info,
2528 const struct isl_tile_info *tile_info,
2529 const struct isl_extent4d *phys_total_el,
2530 uint32_t array_pitch_el_rows,
2531 uint32_t row_pitch_B,
2532 uint64_t *out_size_B)
2533 {
2534 uint64_t size_B;
2535 if (tile_info->tiling == ISL_TILING_LINEAR) {
2536 /* LINEAR tiling has no concept of intra-tile arrays */
2537 assert(phys_total_el->d == 1 && phys_total_el->a == 1);
2538
2539 size_B = (uint64_t) row_pitch_B * phys_total_el->h;
2540
2541 } else {
2542 /* Pitches must make sense with the tiling */
2543 assert(row_pitch_B % tile_info->phys_extent_B.width == 0);
2544
2545 uint32_t array_slices, array_pitch_tl_rows;
2546 if (phys_total_el->d > 1) {
2547 assert(phys_total_el->a == 1);
2548 array_pitch_tl_rows = isl_assert_div(array_pitch_el_rows,
2549 tile_info->logical_extent_el.h);
2550 array_slices = isl_align_div(phys_total_el->d,
2551 tile_info->logical_extent_el.d);
2552 } else if (phys_total_el->a > 1) {
2553 assert(phys_total_el->d == 1);
2554 array_pitch_tl_rows = isl_assert_div(array_pitch_el_rows,
2555 tile_info->logical_extent_el.h);
2556 array_slices = isl_align_div(phys_total_el->a,
2557 tile_info->logical_extent_el.a);
2558 } else {
2559 assert(phys_total_el->d == 1 && phys_total_el->a == 1);
2560 array_pitch_tl_rows = 0;
2561 array_slices = 1;
2562 }
2563
2564 const uint32_t total_h_tl =
2565 (array_slices - 1) * array_pitch_tl_rows +
2566 isl_align_div(phys_total_el->h, tile_info->logical_extent_el.height);
2567
2568 size_B = (uint64_t) total_h_tl * tile_info->phys_extent_B.height *
2569 row_pitch_B;
2570 }
2571
2572 /* If for some reason we can't support the appropriate tiling format and
2573 * end up falling to linear or some other format, make sure the image size
2574 * and alignment are aligned to the expected block size so we can at least
2575 * do opaque binds.
2576 */
2577 if (info->usage & ISL_SURF_USAGE_SPARSE_BIT)
2578 size_B = isl_align(size_B, 64 * 1024);
2579
2580 /* Pre-gfx9: from the Broadwell PRM Vol 5, Surface Layout:
2581 * "In addition to restrictions on maximum height, width, and depth,
2582 * surfaces are also restricted to a maximum size in bytes. This
2583 * maximum is 2 GB for all products and all surface types."
2584 *
2585 * gfx9-10: from the Skylake PRM Vol 5, Maximum Surface Size in Bytes:
2586 * "In addition to restrictions on maximum height, width, and depth,
2587 * surfaces are also restricted to a maximum size of 2^38 bytes.
2588 * All pixels within the surface must be contained within 2^38 bytes
2589 * of the base address."
2590 *
2591 * gfx11+ platforms raised this limit to 2^44 bytes.
2592 */
2593 uint64_t max_surface_B = 1ull << (ISL_GFX_VER(dev) >= 11 ? 44 :
2594 ISL_GFX_VER(dev) >= 9 ? 38 : 31);
2595 if (size_B > max_surface_B) {
2596 return notify_failure(
2597 info,
2598 "calculated size (%"PRIu64"B) exceeds platform limit of %"PRIu64"B",
2599 size_B, max_surface_B);
2600 }
2601
2602 *out_size_B = size_B;
2603 return true;
2604 }
2605
2606 static uint32_t
isl_calc_base_alignment(const struct isl_device * dev,const struct isl_surf_init_info * info,const struct isl_tile_info * tile_info)2607 isl_calc_base_alignment(const struct isl_device *dev,
2608 const struct isl_surf_init_info *info,
2609 const struct isl_tile_info *tile_info)
2610 {
2611 uint32_t base_alignment_B;
2612 if (tile_info->tiling == ISL_TILING_LINEAR) {
2613 /* From the Broadwell PRM Vol 2d,
2614 * RENDER_SURFACE_STATE::SurfaceBaseAddress:
2615 *
2616 * "The Base Address for linear render target surfaces and surfaces
2617 * accessed with the typed surface read/write data port messages must
2618 * be element-size aligned, for non-YUV surface formats, or a
2619 * multiple of 2 element-sizes for YUV surface formats. Other linear
2620 * surfaces have no alignment requirements (byte alignment is
2621 * sufficient.)"
2622 */
2623 base_alignment_B = MAX(1, info->min_alignment_B);
2624 if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
2625 if (isl_format_is_yuv(info->format)) {
2626 base_alignment_B =
2627 MAX(base_alignment_B, tile_info->format_bpb / 4);
2628 } else {
2629 base_alignment_B =
2630 MAX(base_alignment_B, tile_info->format_bpb / 8);
2631 }
2632 }
2633 base_alignment_B = isl_round_up_to_power_of_two(base_alignment_B);
2634
2635 /* From the Skylake PRM Vol 2c, PLANE_STRIDE::Stride:
2636 *
2637 * "For Linear memory, this field specifies the stride in chunks of
2638 * 64 bytes (1 cache line)."
2639 */
2640 if (isl_surf_usage_is_display(info->usage))
2641 base_alignment_B = MAX(base_alignment_B, 64);
2642 } else {
2643 const uint32_t tile_size_B = tile_info->phys_extent_B.width *
2644 tile_info->phys_extent_B.height;
2645 assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B));
2646 base_alignment_B = MAX(info->min_alignment_B, tile_size_B);
2647
2648 /* The diagram in the Bspec section Memory Compression - Gfx12, shows
2649 * that the CCS is indexed in 256B chunks. However, the
2650 * PLANE_AUX_DIST::Auxiliary Surface Distance field is in units of 4K
2651 * pages. We currently don't assign the usage field like we do for main
2652 * surfaces, so just use 4K for now.
2653 */
2654 if (tile_info->tiling == ISL_TILING_GFX12_CCS)
2655 base_alignment_B = MAX(base_alignment_B, 4096);
2656
2657 if (dev->info->has_aux_map &&
2658 (isl_format_supports_ccs_d(dev->info, info->format) ||
2659 isl_format_supports_ccs_e(dev->info, info->format)) &&
2660 !INTEL_DEBUG(DEBUG_NO_CCS) &&
2661 !(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) {
2662 /* Wa_22015614752:
2663 *
2664 * Due to L3 cache being tagged with (engineID, vaID) and the CCS
2665 * block/cacheline being 256 bytes, 2 engines accessing a 64Kb range
2666 * with compression will generate 2 different CCS cacheline entries
2667 * in L3, this will lead to corruptions. To avoid this, we need to
2668 * ensure 2 images do not share a 256 bytes CCS cacheline. With a
2669 * ratio of compression of 1/256, this is 64Kb alignment (even for
2670 * Tile4...)
2671 *
2672 * ATS-M PRMS, Vol 2a: Command Reference: Instructions,
2673 * XY_CTRL_SURF_COPY_BLT, "Size of Control Surface Copy" field, the
2674 * CCS blocks are 256 bytes :
2675 *
2676 * "This field indicates size of the Control Surface or CCS copy.
2677 * It is expressed in terms of number of 256B block of CCS, where
2678 * each 256B block of CCS corresponds to 64KB of main surface."
2679 */
2680 if (intel_needs_workaround(dev->info, 22015614752)) {
2681 base_alignment_B = MAX(base_alignment_B,
2682 256 /* cacheline */ * 256 /* AUX ratio */);
2683 }
2684
2685 /* Platforms using an aux map require that images be
2686 * granularity-aligned if they're going to used with CCS. This is
2687 * because the Aux translation table maps main surface addresses to
2688 * aux addresses at a granularity in the main surface. Because we
2689 * don't know for sure in ISL if a surface will use CCS, we have to
2690 * guess based on the DISABLE_AUX usage bit. The one thing we do know
2691 * is that we haven't enable CCS on linear images yet so we can avoid
2692 * the extra alignment there.
2693 */
2694 if (!(info->usage & ISL_SURF_USAGE_NO_AUX_TT_ALIGNMENT_BIT)) {
2695 base_alignment_B = MAX(base_alignment_B, dev->info->verx10 >= 125 ?
2696 1024 * 1024 : 64 * 1024);
2697 }
2698 }
2699 }
2700
2701 /* If for some reason we can't support the appropriate tiling format and
2702 * end up falling to linear or some other format, make sure the image size
2703 * and alignment are aligned to the expected block size so we can at least
2704 * do opaque binds.
2705 */
2706 if (info->usage & ISL_SURF_USAGE_SPARSE_BIT)
2707 base_alignment_B = MAX(base_alignment_B, 64 * 1024);
2708
2709 return base_alignment_B;
2710 }
2711
2712 bool
isl_surf_init_s(const struct isl_device * dev,struct isl_surf * surf,const struct isl_surf_init_info * restrict info)2713 isl_surf_init_s(const struct isl_device *dev,
2714 struct isl_surf *surf,
2715 const struct isl_surf_init_info *restrict info)
2716 {
2717 /* Some sanity checks */
2718 assert(!(info->usage & ISL_SURF_USAGE_CPB_BIT) ||
2719 dev->info->has_coarse_pixel_primitive_and_cb);
2720
2721 const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
2722
2723 const struct isl_extent4d logical_level0_px = {
2724 .w = info->width,
2725 .h = info->height,
2726 .d = info->depth,
2727 .a = info->array_len,
2728 };
2729
2730 enum isl_tiling tiling;
2731 if (!isl_surf_choose_tiling(dev, info, &tiling))
2732 return false;
2733
2734 const enum isl_dim_layout dim_layout =
2735 isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage);
2736
2737 enum isl_msaa_layout msaa_layout;
2738 if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
2739 return false;
2740
2741 struct isl_tile_info tile_info;
2742 isl_tiling_get_info(tiling, info->dim, msaa_layout, fmtl->bpb,
2743 info->samples, &tile_info);
2744
2745 struct isl_extent3d image_align_el;
2746 isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout,
2747 &image_align_el);
2748
2749 struct isl_extent3d image_align_sa =
2750 isl_extent3d_el_to_sa(info->format, image_align_el);
2751
2752 struct isl_extent4d phys_level0_sa;
2753 isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
2754 &phys_level0_sa);
2755
2756 enum isl_array_pitch_span array_pitch_span =
2757 isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
2758
2759 uint32_t miptail_start_level =
2760 isl_choose_miptail_start_level(dev, info, &tile_info);
2761
2762 uint32_t array_pitch_el_rows;
2763 struct isl_extent4d phys_total_el;
2764 isl_calc_phys_total_extent_el(dev, info, &tile_info,
2765 dim_layout, msaa_layout,
2766 &image_align_sa, &phys_level0_sa,
2767 array_pitch_span, miptail_start_level,
2768 &array_pitch_el_rows,
2769 &phys_total_el);
2770
2771 uint32_t row_pitch_B;
2772 if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout,
2773 &phys_total_el, &row_pitch_B))
2774 return false;
2775
2776 uint64_t size_B;
2777 if (!isl_calc_size(dev, info, &tile_info, &phys_total_el,
2778 array_pitch_el_rows, row_pitch_B, &size_B))
2779 return false;
2780
2781 const uint32_t base_alignment_B =
2782 isl_calc_base_alignment(dev, info, &tile_info);
2783
2784 *surf = (struct isl_surf) {
2785 .dim = info->dim,
2786 .dim_layout = dim_layout,
2787 .msaa_layout = msaa_layout,
2788 .tiling = tiling,
2789 .format = info->format,
2790
2791 .levels = info->levels,
2792 .samples = info->samples,
2793
2794 .image_alignment_el = image_align_el,
2795 .logical_level0_px = logical_level0_px,
2796 .phys_level0_sa = phys_level0_sa,
2797
2798 .size_B = size_B,
2799 .alignment_B = base_alignment_B,
2800 .row_pitch_B = row_pitch_B,
2801 .array_pitch_el_rows = array_pitch_el_rows,
2802 .array_pitch_span = array_pitch_span,
2803 .miptail_start_level = miptail_start_level,
2804
2805 .usage = info->usage,
2806 };
2807
2808 return true;
2809 }
2810
2811 void
isl_surf_get_tile_info(const struct isl_surf * surf,struct isl_tile_info * tile_info)2812 isl_surf_get_tile_info(const struct isl_surf *surf,
2813 struct isl_tile_info *tile_info)
2814 {
2815 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2816 isl_tiling_get_info(surf->tiling, surf->dim, surf->msaa_layout, fmtl->bpb,
2817 surf->samples, tile_info);
2818 }
2819
2820 bool
isl_surf_get_hiz_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * hiz_surf)2821 isl_surf_get_hiz_surf(const struct isl_device *dev,
2822 const struct isl_surf *surf,
2823 struct isl_surf *hiz_surf)
2824 {
2825 if (INTEL_DEBUG(DEBUG_NO_HIZ))
2826 return false;
2827
2828 /* HiZ support does not exist prior to Gfx5 */
2829 if (ISL_GFX_VER(dev) < 5)
2830 return false;
2831
2832 if (!isl_surf_usage_is_depth(surf->usage))
2833 return false;
2834
2835 /* From the Sandy Bridge PRM, Vol 2 Part 1,
2836 * 3DSTATE_DEPTH_BUFFER::Hierarchical Depth Buffer Enable,
2837 *
2838 * If this field is enabled, the Surface Format of the depth buffer
2839 * cannot be D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
2840 * requires the separate stencil buffer.
2841 *
2842 * On SNB+, HiZ can't be used with combined depth-stencil buffers.
2843 */
2844 if (isl_surf_usage_is_stencil(surf->usage))
2845 return false;
2846
2847 /* Multisampled depth is always interleaved */
2848 assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE ||
2849 surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
2850
2851 /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer":
2852 *
2853 * "The Surface Type, Height, Width, Depth, Minimum Array Element, Render
2854 * Target View Extent, and Depth Coordinate Offset X/Y of the
2855 * hierarchical depth buffer are inherited from the depth buffer. The
2856 * height and width of the hierarchical depth buffer that must be
2857 * allocated are computed by the following formulas, where HZ is the
2858 * hierarchical depth buffer and Z is the depth buffer. The Z_Height,
2859 * Z_Width, and Z_Depth values given in these formulas are those present
2860 * in 3DSTATE_DEPTH_BUFFER incremented by one.
2861 *
2862 * "The value of Z_Height and Z_Width must each be multiplied by 2 before
2863 * being applied to the table below if Number of Multisamples is set to
2864 * NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and
2865 * Z_Width must be multiplied by 4 before being applied to the table
2866 * below if Number of Multisamples is set to NUMSAMPLES_8."
2867 *
2868 * In the Sky Lake PRM, the second paragraph is gone. This means that,
2869 * from Sandy Bridge through Broadwell, HiZ compresses samples in the
2870 * primary depth surface. On Sky Lake and onward, HiZ compresses pixels.
2871 *
2872 * There are a number of different ways that this discrepancy could be
2873 * handled. The way we have chosen is to simply make MSAA HiZ have the
2874 * same number of samples as the parent surface pre-Sky Lake and always be
2875 * single-sampled on Sky Lake and above. Since the block sizes of
2876 * compressed formats are given in samples, this neatly handles everything
2877 * without the need for additional HiZ formats with different block sizes
2878 * on SKL+.
2879 */
2880 const unsigned samples = ISL_GFX_VER(dev) >= 9 ? 1 : surf->samples;
2881
2882 const enum isl_format format =
2883 ISL_GFX_VERX10(dev) >= 125 ? ISL_FORMAT_GFX125_HIZ : ISL_FORMAT_HIZ;
2884
2885 return isl_surf_init(dev, hiz_surf,
2886 .dim = surf->dim,
2887 .format = format,
2888 .width = surf->logical_level0_px.width,
2889 .height = surf->logical_level0_px.height,
2890 .depth = surf->logical_level0_px.depth,
2891 .levels = surf->levels,
2892 .array_len = surf->logical_level0_px.array_len,
2893 .samples = samples,
2894 .usage = ISL_SURF_USAGE_HIZ_BIT,
2895 .tiling_flags = ISL_TILING_HIZ_BIT);
2896 }
2897
2898 bool
isl_surf_get_mcs_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * mcs_surf)2899 isl_surf_get_mcs_surf(const struct isl_device *dev,
2900 const struct isl_surf *surf,
2901 struct isl_surf *mcs_surf)
2902 {
2903 /* It must be multisampled with an array layout */
2904 if (surf->msaa_layout != ISL_MSAA_LAYOUT_ARRAY)
2905 return false;
2906
2907 /* On Gfx12+ this format is not listed in TGL PRMs, Volume 2b: Command
2908 * Reference: Enumerations, RenderCompressionFormat
2909 */
2910 if (ISL_GFX_VER(dev) >= 12 &&
2911 surf->format == ISL_FORMAT_R9G9B9E5_SHAREDEXP)
2912 return false;
2913
2914 /* The following are true of all multisampled surfaces */
2915 assert(surf->samples > 1);
2916 assert(surf->dim == ISL_SURF_DIM_2D);
2917 assert(surf->levels == 1);
2918 assert(surf->logical_level0_px.depth == 1);
2919 assert(isl_format_supports_multisampling(dev->info, surf->format));
2920
2921 enum isl_format mcs_format;
2922 switch (surf->samples) {
2923 case 2: mcs_format = ISL_FORMAT_MCS_2X; break;
2924 case 4: mcs_format = ISL_FORMAT_MCS_4X; break;
2925 case 8: mcs_format = ISL_FORMAT_MCS_8X; break;
2926 case 16: mcs_format = ISL_FORMAT_MCS_16X; break;
2927 default:
2928 unreachable("Invalid sample count");
2929 }
2930
2931 return isl_surf_init(dev, mcs_surf,
2932 .dim = ISL_SURF_DIM_2D,
2933 .format = mcs_format,
2934 .width = surf->logical_level0_px.width,
2935 .height = surf->logical_level0_px.height,
2936 .depth = 1,
2937 .levels = 1,
2938 .array_len = surf->logical_level0_px.array_len,
2939 .samples = 1, /* MCS surfaces are really single-sampled */
2940 .usage = ISL_SURF_USAGE_MCS_BIT,
2941 .tiling_flags = ISL_TILING_ANY_MASK);
2942 }
2943
2944 bool
isl_surf_supports_ccs(const struct isl_device * dev,const struct isl_surf * surf,const struct isl_surf * hiz_or_mcs_surf)2945 isl_surf_supports_ccs(const struct isl_device *dev,
2946 const struct isl_surf *surf,
2947 const struct isl_surf *hiz_or_mcs_surf)
2948 {
2949 if (INTEL_DEBUG(DEBUG_NO_CCS))
2950 return false;
2951
2952 if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
2953 return false;
2954
2955 if (!isl_format_supports_ccs_d(dev->info, surf->format) &&
2956 !isl_format_supports_ccs_e(dev->info, surf->format))
2957 return false;
2958
2959 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
2960 * Target(s)", beneath the "Fast Color Clear" bullet (p326):
2961 *
2962 * - Support is limited to tiled render targets.
2963 *
2964 * From the Skylake documentation, it is made clear that X-tiling is no
2965 * longer supported:
2966 *
2967 * - MCS and Lossless compression is supported for
2968 * TiledY/TileYs/TileYf non-MSRTs only.
2969 *
2970 * From the BSpec (44930) for Gfx12:
2971 *
2972 * Linear CCS is only allowed for Untyped Buffers but only via HDC
2973 * Data-Port messages.
2974 *
2975 * We never use untyped messages on surfaces created by ISL on Gfx9+ so
2976 * this means linear is out on Gfx12+ as well.
2977 */
2978 if (surf->tiling == ISL_TILING_LINEAR)
2979 return false;
2980
2981 /* TODO: Disable for now, as we're not sure about the meaning of
2982 * 3DSTATE_CPSIZE_CONTROL_BUFFER::CPCBCompressionEnable
2983 */
2984 if (isl_surf_usage_is_cpb(surf->usage))
2985 return false;
2986
2987 /* SKL PRMs, Volume 5: Memory Views, Tiling and Mip Tails for 2D Surfaces:
2988 *
2989 * "Lossless compression must not be used on surfaces which have MIP
2990 * Tail which contains MIPs for Slots greater than 11."
2991 */
2992 if (surf->miptail_start_level < surf->levels) {
2993 const uint32_t miptail_levels = surf->levels - surf->miptail_start_level;
2994 if (miptail_levels + isl_get_miptail_base_row(surf->tiling) > 11) {
2995 assert(isl_tiling_is_64(surf->tiling) ||
2996 isl_tiling_is_std_y(surf->tiling));
2997 return false;
2998 }
2999 }
3000
3001 /* From the workarounds section in the SKL PRM:
3002 *
3003 * "RCC cacheline is composed of X-adjacent 64B fragments instead of
3004 * memory adjacent. This causes a single 128B cacheline to straddle
3005 * multiple LODs inside the TYF MIPtail for 3D surfaces (beyond a
3006 * certain slot number), leading to corruption when CCS is enabled
3007 * for these LODs and RT is later bound as texture. WA: If
3008 * RENDER_SURFACE_STATE.Surface Type = 3D and
3009 * RENDER_SURFACE_STATE.Auxiliary Surface Mode != AUX_NONE and
3010 * RENDER_SURFACE_STATE.Tiled ResourceMode is TYF or TYS, Set the
3011 * value of RENDER_SURFACE_STATE.Mip Tail Start LOD to a mip that
3012 * larger than those present in the surface (i.e. 15)"
3013 *
3014 * We simply disallow CCS on 3D surfaces with miptails.
3015 *
3016 * Referred to as Wa_1207137018 on ICL+
3017 */
3018 if (ISL_GFX_VERX10(dev) <= 120 &&
3019 surf->dim == ISL_SURF_DIM_3D &&
3020 surf->miptail_start_level < surf->levels) {
3021 assert(isl_tiling_is_std_y(surf->tiling));
3022 return false;
3023 }
3024
3025 /* TODO: add CCS support for Ys/Yf */
3026 if (isl_tiling_is_std_y(surf->tiling))
3027 return false;
3028
3029 if (ISL_GFX_VER(dev) >= 12) {
3030 if (isl_surf_usage_is_stencil(surf->usage)) {
3031 /* HiZ and MCS aren't allowed with stencil */
3032 assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
3033
3034 /* Multi-sampled stencil cannot have CCS */
3035 if (surf->samples > 1)
3036 return false;
3037 } else if (isl_surf_usage_is_depth(surf->usage)) {
3038 const struct isl_surf *hiz_surf = hiz_or_mcs_surf;
3039
3040 /* With depth surfaces, HIZ is required for CCS. */
3041 if (hiz_surf == NULL || hiz_surf->size_B == 0)
3042 return false;
3043
3044 assert(hiz_surf->usage & ISL_SURF_USAGE_HIZ_BIT);
3045 assert(hiz_surf->tiling == ISL_TILING_HIZ);
3046 assert(isl_format_is_hiz(hiz_surf->format));
3047 } else if (surf->samples > 1) {
3048 const struct isl_surf *mcs_surf = hiz_or_mcs_surf;
3049
3050 /* With multisampled color, CCS requires MCS */
3051 if (mcs_surf == NULL || mcs_surf->size_B == 0)
3052 return false;
3053
3054 assert(mcs_surf->usage & ISL_SURF_USAGE_MCS_BIT);
3055 assert(isl_format_is_mcs(mcs_surf->format));
3056 } else {
3057 /* Single-sampled color can't have MCS or HiZ */
3058 assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
3059 }
3060
3061 /* On Gfx12, all CCS-compressed surface pitches must be multiples of
3062 * 512B.
3063 */
3064 if (surf->row_pitch_B % 512 != 0)
3065 return false;
3066
3067 /* TODO: According to Wa_1406738321, 3D textures need a blit to a new
3068 * surface in order to perform a resolve. For now, just disable CCS.
3069 */
3070 if (surf->dim == ISL_SURF_DIM_3D)
3071 return false;
3072
3073 /* BSpec 44930: (Gfx12, Gfx12.5)
3074 *
3075 * "Compression of 3D Ys surfaces with 64 or 128 bpp is not supported
3076 * in Gen12. Moreover, "Render Target Fast-clear Enable" command is
3077 * not supported for any 3D Ys surfaces. except when Surface is a
3078 * Procdural Texture."
3079 *
3080 * Since the note applies to MTL, we apply this to TILE64 too.
3081 */
3082 uint32_t format_bpb = isl_format_get_layout(surf->format)->bpb;
3083 if (ISL_GFX_VER(dev) == 12 &&
3084 surf->dim == ISL_SURF_DIM_3D &&
3085 (surf->tiling == ISL_TILING_ICL_Ys ||
3086 isl_tiling_is_64(surf->tiling)) &&
3087 (format_bpb == 64 || format_bpb == 128))
3088 return false;
3089
3090 /* TODO: Handle the other tiling formats */
3091 if (surf->tiling != ISL_TILING_Y0 &&
3092 surf->tiling != ISL_TILING_4 &&
3093 !isl_tiling_is_64(surf->tiling))
3094 return false;
3095
3096 /* TODO: Handle single-sampled Tile64. */
3097 if (surf->samples == 1 && isl_tiling_is_64(surf->tiling))
3098 return false;
3099 } else {
3100 /* ISL_GFX_VER(dev) < 12 */
3101 if (surf->samples > 1)
3102 return false;
3103
3104 /* CCS is only for color images on Gfx7-11 */
3105 if (isl_surf_usage_is_depth_or_stencil(surf->usage))
3106 return false;
3107
3108 /* We're single-sampled color so having HiZ or MCS makes no sense */
3109 assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
3110
3111 /* The PRM doesn't say this explicitly, but fast-clears don't appear to
3112 * work for 3D textures until gfx9 where the layout of 3D textures
3113 * changes to match 2D array textures.
3114 */
3115 if (ISL_GFX_VER(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
3116 return false;
3117
3118 /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of
3119 * Non-MultiSampler Render Target Restrictions):
3120 *
3121 * "Support is for non-mip-mapped and non-array surface types only."
3122 *
3123 * This restriction is lifted on gfx8+. Technically, it may be possible
3124 * to create a CCS for an arrayed or mipmapped image and only enable
3125 * CCS_D when rendering to the base slice. However, there is no
3126 * documentation tell us what the hardware would do in that case or what
3127 * it does if you walk off the bases slice. (Does it ignore CCS or does
3128 * it start scribbling over random memory?) We play it safe and just
3129 * follow the docs and don't allow CCS_D for arrayed or mip-mapped
3130 * surfaces.
3131 */
3132 if (ISL_GFX_VER(dev) <= 7 &&
3133 (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
3134 return false;
3135
3136 /* From the Skylake documentation, it is made clear that X-tiling is no
3137 * longer supported:
3138 *
3139 * - MCS and Lossless compression is supported for
3140 * TiledY/TileYs/TileYf non-MSRTs only.
3141 */
3142 if (ISL_GFX_VER(dev) >= 9 && !isl_tiling_is_any_y(surf->tiling))
3143 return false;
3144 }
3145
3146 return true;
3147 }
3148
3149 bool
isl_surf_get_ccs_surf(const struct isl_device * dev,const struct isl_surf * surf,const struct isl_surf * hiz_or_mcs_surf,struct isl_surf * ccs_surf,uint32_t row_pitch_B)3150 isl_surf_get_ccs_surf(const struct isl_device *dev,
3151 const struct isl_surf *surf,
3152 const struct isl_surf *hiz_or_mcs_surf,
3153 struct isl_surf *ccs_surf,
3154 uint32_t row_pitch_B)
3155 {
3156 if (!isl_surf_supports_ccs(dev, surf, hiz_or_mcs_surf))
3157 return false;
3158
3159 if (ISL_GFX_VER(dev) >= 12) {
3160 enum isl_format ccs_format;
3161 switch (isl_format_get_layout(surf->format)->bpb) {
3162 case 8: ccs_format = ISL_FORMAT_GFX12_CCS_8BPP_Y0; break;
3163 case 16: ccs_format = ISL_FORMAT_GFX12_CCS_16BPP_Y0; break;
3164 case 32: ccs_format = ISL_FORMAT_GFX12_CCS_32BPP_Y0; break;
3165 case 64: ccs_format = ISL_FORMAT_GFX12_CCS_64BPP_Y0; break;
3166 case 128: ccs_format = ISL_FORMAT_GFX12_CCS_128BPP_Y0; break;
3167 default:
3168 return false;
3169 }
3170
3171 /* On Gfx12, the CCS is a scaled-down version of the main surface. We
3172 * model this as the CCS compressing a 2D-view of the entire surface.
3173 */
3174 const bool ok =
3175 isl_surf_init(dev, ccs_surf,
3176 .dim = ISL_SURF_DIM_2D,
3177 .format = ccs_format,
3178 .width = isl_surf_get_row_pitch_el(surf),
3179 .height = surf->size_B / surf->row_pitch_B,
3180 .depth = 1,
3181 .levels = 1,
3182 .array_len = 1,
3183 .samples = 1,
3184 .row_pitch_B = row_pitch_B,
3185 .usage = ISL_SURF_USAGE_CCS_BIT,
3186 .tiling_flags = ISL_TILING_GFX12_CCS_BIT);
3187 assert(!ok || ccs_surf->size_B == surf->size_B / 256);
3188 return ok;
3189 } else {
3190 enum isl_format ccs_format;
3191 if (ISL_GFX_VER(dev) >= 9) {
3192 switch (isl_format_get_layout(surf->format)->bpb) {
3193 case 32: ccs_format = ISL_FORMAT_GFX9_CCS_32BPP; break;
3194 case 64: ccs_format = ISL_FORMAT_GFX9_CCS_64BPP; break;
3195 case 128: ccs_format = ISL_FORMAT_GFX9_CCS_128BPP; break;
3196 default: unreachable("Unsupported CCS format");
3197 return false;
3198 }
3199 } else if (surf->tiling == ISL_TILING_Y0) {
3200 switch (isl_format_get_layout(surf->format)->bpb) {
3201 case 32: ccs_format = ISL_FORMAT_GFX7_CCS_32BPP_Y; break;
3202 case 64: ccs_format = ISL_FORMAT_GFX7_CCS_64BPP_Y; break;
3203 case 128: ccs_format = ISL_FORMAT_GFX7_CCS_128BPP_Y; break;
3204 default: unreachable("Unsupported CCS format");
3205 }
3206 } else if (surf->tiling == ISL_TILING_X) {
3207 switch (isl_format_get_layout(surf->format)->bpb) {
3208 case 32: ccs_format = ISL_FORMAT_GFX7_CCS_32BPP_X; break;
3209 case 64: ccs_format = ISL_FORMAT_GFX7_CCS_64BPP_X; break;
3210 case 128: ccs_format = ISL_FORMAT_GFX7_CCS_128BPP_X; break;
3211 default: unreachable("Unsupported CCS format");
3212 }
3213 } else {
3214 unreachable("Invalid tiling format");
3215 }
3216
3217 return isl_surf_init(dev, ccs_surf,
3218 .dim = surf->dim,
3219 .format = ccs_format,
3220 .width = surf->logical_level0_px.width,
3221 .height = surf->logical_level0_px.height,
3222 .depth = surf->logical_level0_px.depth,
3223 .levels = surf->levels,
3224 .array_len = surf->logical_level0_px.array_len,
3225 .samples = 1,
3226 .row_pitch_B = row_pitch_B,
3227 .usage = ISL_SURF_USAGE_CCS_BIT,
3228 .tiling_flags = ISL_TILING_CCS_BIT);
3229 }
3230 }
3231
3232 #define isl_genX_call(dev, func, ...) \
3233 switch (ISL_GFX_VERX10(dev)) { \
3234 case 40: \
3235 isl_gfx4_##func(__VA_ARGS__); \
3236 break; \
3237 case 45: \
3238 /* G45 surface state is the same as gfx5 */ \
3239 case 50: \
3240 isl_gfx5_##func(__VA_ARGS__); \
3241 break; \
3242 case 60: \
3243 isl_gfx6_##func(__VA_ARGS__); \
3244 break; \
3245 case 70: \
3246 isl_gfx7_##func(__VA_ARGS__); \
3247 break; \
3248 case 75: \
3249 isl_gfx75_##func(__VA_ARGS__); \
3250 break; \
3251 case 80: \
3252 isl_gfx8_##func(__VA_ARGS__); \
3253 break; \
3254 case 90: \
3255 isl_gfx9_##func(__VA_ARGS__); \
3256 break; \
3257 case 110: \
3258 isl_gfx11_##func(__VA_ARGS__); \
3259 break; \
3260 case 120: \
3261 isl_gfx12_##func(__VA_ARGS__); \
3262 break; \
3263 case 125: \
3264 isl_gfx125_##func(__VA_ARGS__); \
3265 break; \
3266 case 200: \
3267 isl_gfx20_##func(__VA_ARGS__); \
3268 break; \
3269 default: \
3270 assert(!"Unknown hardware generation"); \
3271 }
3272
3273 /**
3274 * A variant of isl_surf_get_image_offset_sa() specific to
3275 * ISL_DIM_LAYOUT_GFX4_2D.
3276 */
3277 static void
get_image_offset_sa_gfx4_2d(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa,uint32_t * z_offset_sa,uint32_t * array_offset)3278 get_image_offset_sa_gfx4_2d(const struct isl_surf *surf,
3279 uint32_t level, uint32_t logical_array_layer,
3280 uint32_t *x_offset_sa,
3281 uint32_t *y_offset_sa,
3282 uint32_t *z_offset_sa,
3283 uint32_t *array_offset)
3284 {
3285 assert(level < surf->levels);
3286 if (surf->dim == ISL_SURF_DIM_3D)
3287 assert(logical_array_layer < surf->logical_level0_px.depth);
3288 else
3289 assert(logical_array_layer < surf->logical_level0_px.array_len);
3290
3291 const struct isl_extent3d image_align_sa =
3292 isl_surf_get_image_alignment_sa(surf);
3293
3294 const uint32_t W0 = surf->phys_level0_sa.width;
3295 const uint32_t H0 = surf->phys_level0_sa.height;
3296
3297 const uint32_t phys_layer = logical_array_layer *
3298 (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1);
3299
3300 uint32_t x = 0, y;
3301 if (isl_tiling_is_std_y(surf->tiling) ||
3302 isl_tiling_is_64(surf->tiling)) {
3303 y = 0;
3304 if (surf->dim == ISL_SURF_DIM_3D) {
3305 *z_offset_sa = logical_array_layer;
3306 *array_offset = 0;
3307 } else {
3308 *z_offset_sa = 0;
3309 *array_offset = phys_layer;
3310 }
3311 } else {
3312 y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
3313 *z_offset_sa = 0;
3314 *array_offset = 0;
3315 }
3316
3317 for (uint32_t l = 0; l < MIN(level, surf->miptail_start_level); ++l) {
3318 if (l == 1) {
3319 uint32_t W = isl_minify(W0, l);
3320 x += isl_align_npot(W, image_align_sa.w);
3321 } else {
3322 uint32_t H = isl_minify(H0, l);
3323 y += isl_align_npot(H, image_align_sa.h);
3324 }
3325 }
3326
3327 *x_offset_sa = x;
3328 *y_offset_sa = y;
3329
3330 if (level >= surf->miptail_start_level) {
3331 const struct isl_format_layout *fmtl =
3332 isl_format_get_layout(surf->format);
3333
3334 uint32_t tail_offset_x_el, tail_offset_y_el, tail_offset_z_el;
3335 isl_get_miptail_level_offset_el(surf->tiling, surf->dim,
3336 fmtl->bpb,
3337 level - surf->miptail_start_level,
3338 &tail_offset_x_el,
3339 &tail_offset_y_el,
3340 &tail_offset_z_el);
3341 *x_offset_sa += tail_offset_x_el * fmtl->bw;
3342 *y_offset_sa += tail_offset_y_el * fmtl->bh;
3343 *z_offset_sa += tail_offset_z_el * fmtl->bd;
3344 }
3345 }
3346
3347 /**
3348 * A variant of isl_surf_get_image_offset_sa() specific to
3349 * ISL_DIM_LAYOUT_GFX4_3D.
3350 */
3351 static void
get_image_offset_sa_gfx4_3d(const struct isl_surf * surf,uint32_t level,uint32_t logical_z_offset_px,uint32_t * x_offset_sa,uint32_t * y_offset_sa)3352 get_image_offset_sa_gfx4_3d(const struct isl_surf *surf,
3353 uint32_t level, uint32_t logical_z_offset_px,
3354 uint32_t *x_offset_sa,
3355 uint32_t *y_offset_sa)
3356 {
3357 assert(level < surf->levels);
3358 if (surf->dim == ISL_SURF_DIM_3D) {
3359 assert(surf->phys_level0_sa.array_len == 1);
3360 assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
3361 } else {
3362 assert(surf->dim == ISL_SURF_DIM_2D);
3363 assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT);
3364 assert(surf->phys_level0_sa.array_len == 6);
3365 assert(logical_z_offset_px < surf->phys_level0_sa.array_len);
3366 }
3367
3368 const struct isl_extent3d image_align_sa =
3369 isl_surf_get_image_alignment_sa(surf);
3370
3371 const uint32_t W0 = surf->phys_level0_sa.width;
3372 const uint32_t H0 = surf->phys_level0_sa.height;
3373 const uint32_t D0 = surf->phys_level0_sa.depth;
3374 const uint32_t AL = surf->phys_level0_sa.array_len;
3375
3376 uint32_t x = 0;
3377 uint32_t y = 0;
3378
3379 for (uint32_t l = 0; l < level; ++l) {
3380 const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
3381 const uint32_t level_d =
3382 isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL,
3383 image_align_sa.d);
3384 const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
3385
3386 y += level_h * max_layers_vert;
3387 }
3388
3389 const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
3390 const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
3391 const uint32_t level_d =
3392 isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL,
3393 image_align_sa.d);
3394
3395 const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
3396
3397 x += level_w * (logical_z_offset_px % max_layers_horiz);
3398 y += level_h * (logical_z_offset_px / max_layers_horiz);
3399
3400 *x_offset_sa = x;
3401 *y_offset_sa = y;
3402 }
3403
3404 static void
get_image_offset_sa_gfx6_stencil_hiz(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)3405 get_image_offset_sa_gfx6_stencil_hiz(const struct isl_surf *surf,
3406 uint32_t level,
3407 uint32_t logical_array_layer,
3408 uint32_t *x_offset_sa,
3409 uint32_t *y_offset_sa)
3410 {
3411 assert(level < surf->levels);
3412 assert(surf->logical_level0_px.depth == 1);
3413 assert(logical_array_layer < surf->logical_level0_px.array_len);
3414
3415 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
3416
3417 const struct isl_extent3d image_align_sa =
3418 isl_surf_get_image_alignment_sa(surf);
3419
3420 struct isl_tile_info tile_info;
3421 isl_surf_get_tile_info(surf, &tile_info);
3422 const struct isl_extent2d tile_extent_sa = {
3423 .w = tile_info.logical_extent_el.w * fmtl->bw,
3424 .h = tile_info.logical_extent_el.h * fmtl->bh,
3425 };
3426 /* Tile size is a multiple of image alignment */
3427 assert(tile_extent_sa.w % image_align_sa.w == 0);
3428 assert(tile_extent_sa.h % image_align_sa.h == 0);
3429
3430 const uint32_t W0 = surf->phys_level0_sa.w;
3431 const uint32_t H0 = surf->phys_level0_sa.h;
3432
3433 /* Each image has the same height as LOD0 because the hardware thinks
3434 * everything is LOD0
3435 */
3436 const uint32_t H = isl_align(H0, image_align_sa.h);
3437
3438 /* Quick sanity check for consistency */
3439 if (surf->phys_level0_sa.array_len > 1)
3440 assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh));
3441
3442 uint32_t x = 0, y = 0;
3443 for (uint32_t l = 0; l < level; ++l) {
3444 const uint32_t W = isl_minify(W0, l);
3445
3446 const uint32_t w = isl_align(W, tile_extent_sa.w);
3447 const uint32_t h = isl_align(H * surf->phys_level0_sa.a,
3448 tile_extent_sa.h);
3449
3450 if (l == 0) {
3451 y += h;
3452 } else {
3453 x += w;
3454 }
3455 }
3456
3457 y += H * logical_array_layer;
3458
3459 *x_offset_sa = x;
3460 *y_offset_sa = y;
3461 }
3462
3463 /**
3464 * A variant of isl_surf_get_image_offset_sa() specific to
3465 * ISL_DIM_LAYOUT_GFX9_1D.
3466 */
3467 static void
get_image_offset_sa_gfx9_1d(const struct isl_surf * surf,uint32_t level,uint32_t layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)3468 get_image_offset_sa_gfx9_1d(const struct isl_surf *surf,
3469 uint32_t level, uint32_t layer,
3470 uint32_t *x_offset_sa,
3471 uint32_t *y_offset_sa)
3472 {
3473 assert(level < surf->levels);
3474 assert(layer < surf->phys_level0_sa.array_len);
3475 assert(surf->phys_level0_sa.height == 1);
3476 assert(surf->phys_level0_sa.depth == 1);
3477 assert(surf->samples == 1);
3478
3479 const uint32_t W0 = surf->phys_level0_sa.width;
3480 const struct isl_extent3d image_align_sa =
3481 isl_surf_get_image_alignment_sa(surf);
3482
3483 uint32_t x = 0;
3484
3485 for (uint32_t l = 0; l < level; ++l) {
3486 uint32_t W = isl_minify(W0, l);
3487 uint32_t w = isl_align_npot(W, image_align_sa.w);
3488
3489 x += w;
3490 }
3491
3492 *x_offset_sa = x;
3493 *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
3494 }
3495
3496 /**
3497 * Calculate the offset, in units of surface samples, to a subimage in the
3498 * surface.
3499 *
3500 * @invariant level < surface levels
3501 * @invariant logical_array_layer < logical array length of surface
3502 * @invariant logical_z_offset_px < logical depth of surface at level
3503 */
3504 void
isl_surf_get_image_offset_sa(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * x_offset_sa,uint32_t * y_offset_sa,uint32_t * z_offset_sa,uint32_t * array_offset)3505 isl_surf_get_image_offset_sa(const struct isl_surf *surf,
3506 uint32_t level,
3507 uint32_t logical_array_layer,
3508 uint32_t logical_z_offset_px,
3509 uint32_t *x_offset_sa,
3510 uint32_t *y_offset_sa,
3511 uint32_t *z_offset_sa,
3512 uint32_t *array_offset)
3513 {
3514 assert(level < surf->levels);
3515 assert(logical_array_layer < surf->logical_level0_px.array_len);
3516 assert(logical_z_offset_px
3517 < isl_minify(surf->logical_level0_px.depth, level));
3518
3519 switch (surf->dim_layout) {
3520 case ISL_DIM_LAYOUT_GFX9_1D:
3521 get_image_offset_sa_gfx9_1d(surf, level, logical_array_layer,
3522 x_offset_sa, y_offset_sa);
3523 *z_offset_sa = 0;
3524 *array_offset = 0;
3525 break;
3526 case ISL_DIM_LAYOUT_GFX4_2D:
3527 get_image_offset_sa_gfx4_2d(surf, level, logical_array_layer
3528 + logical_z_offset_px,
3529 x_offset_sa, y_offset_sa,
3530 z_offset_sa, array_offset);
3531 break;
3532 case ISL_DIM_LAYOUT_GFX4_3D:
3533 get_image_offset_sa_gfx4_3d(surf, level, logical_array_layer +
3534 logical_z_offset_px,
3535 x_offset_sa, y_offset_sa);
3536 *z_offset_sa = 0;
3537 *array_offset = 0;
3538 break;
3539 case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
3540 get_image_offset_sa_gfx6_stencil_hiz(surf, level, logical_array_layer +
3541 logical_z_offset_px,
3542 x_offset_sa, y_offset_sa);
3543 *z_offset_sa = 0;
3544 *array_offset = 0;
3545 break;
3546
3547 default:
3548 unreachable("not reached");
3549 }
3550 }
3551
3552 void
isl_surf_get_image_offset_el(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * x_offset_el,uint32_t * y_offset_el,uint32_t * z_offset_el,uint32_t * array_offset)3553 isl_surf_get_image_offset_el(const struct isl_surf *surf,
3554 uint32_t level,
3555 uint32_t logical_array_layer,
3556 uint32_t logical_z_offset_px,
3557 uint32_t *x_offset_el,
3558 uint32_t *y_offset_el,
3559 uint32_t *z_offset_el,
3560 uint32_t *array_offset)
3561 {
3562 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
3563
3564 assert(level < surf->levels);
3565 assert(logical_array_layer < surf->logical_level0_px.array_len);
3566 assert(logical_z_offset_px
3567 < isl_minify(surf->logical_level0_px.depth, level));
3568
3569 uint32_t x_offset_sa, y_offset_sa, z_offset_sa;
3570 isl_surf_get_image_offset_sa(surf, level,
3571 logical_array_layer,
3572 logical_z_offset_px,
3573 &x_offset_sa,
3574 &y_offset_sa,
3575 &z_offset_sa,
3576 array_offset);
3577
3578 *x_offset_el = x_offset_sa / fmtl->bw;
3579 *y_offset_el = y_offset_sa / fmtl->bh;
3580 *z_offset_el = z_offset_sa / fmtl->bd;
3581 }
3582
3583 void
isl_surf_get_image_offset_B_tile_sa(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint64_t * offset_B,uint32_t * x_offset_sa,uint32_t * y_offset_sa)3584 isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf,
3585 uint32_t level,
3586 uint32_t logical_array_layer,
3587 uint32_t logical_z_offset_px,
3588 uint64_t *offset_B,
3589 uint32_t *x_offset_sa,
3590 uint32_t *y_offset_sa)
3591 {
3592 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
3593
3594 uint32_t x_offset_el, y_offset_el;
3595 isl_surf_get_image_offset_B_tile_el(surf, level,
3596 logical_array_layer,
3597 logical_z_offset_px,
3598 offset_B,
3599 &x_offset_el,
3600 &y_offset_el);
3601
3602 if (x_offset_sa) {
3603 *x_offset_sa = x_offset_el * fmtl->bw;
3604 } else {
3605 assert(x_offset_el == 0);
3606 }
3607
3608 if (y_offset_sa) {
3609 *y_offset_sa = y_offset_el * fmtl->bh;
3610 } else {
3611 assert(y_offset_el == 0);
3612 }
3613 }
3614
3615 void
isl_surf_get_image_offset_B_tile_el(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint64_t * offset_B,uint32_t * x_offset_el,uint32_t * y_offset_el)3616 isl_surf_get_image_offset_B_tile_el(const struct isl_surf *surf,
3617 uint32_t level,
3618 uint32_t logical_array_layer,
3619 uint32_t logical_z_offset_px,
3620 uint64_t *offset_B,
3621 uint32_t *x_offset_el,
3622 uint32_t *y_offset_el)
3623 {
3624 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
3625
3626 uint32_t total_x_offset_el, total_y_offset_el;
3627 uint32_t total_z_offset_el, total_array_offset;
3628 isl_surf_get_image_offset_el(surf, level, logical_array_layer,
3629 logical_z_offset_px,
3630 &total_x_offset_el,
3631 &total_y_offset_el,
3632 &total_z_offset_el,
3633 &total_array_offset);
3634
3635 uint32_t z_offset_el, array_offset;
3636 isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
3637 surf->msaa_layout, fmtl->bpb,
3638 surf->samples,
3639 surf->row_pitch_B,
3640 surf->array_pitch_el_rows,
3641 total_x_offset_el,
3642 total_y_offset_el,
3643 total_z_offset_el,
3644 total_array_offset,
3645 offset_B,
3646 x_offset_el,
3647 y_offset_el,
3648 &z_offset_el,
3649 &array_offset);
3650 if (level >= surf->miptail_start_level) {
3651 /* We can do a byte offset to the first level of a miptail but we cannot
3652 * offset into a miptail.
3653 */
3654 assert(level == surf->miptail_start_level);
3655
3656 /* The byte offset will get us to the miptail page. The other offsets
3657 * are to the actual level within the miptail. It is assumed that the
3658 * caller will set up a texture with a miptail and use the hardware to
3659 * handle offseting inside the miptail.
3660 */
3661 *x_offset_el = 0;
3662 *y_offset_el = 0;
3663 } else {
3664 assert(z_offset_el == 0);
3665 assert(array_offset == 0);
3666 }
3667 }
3668
3669 void
isl_surf_get_image_range_B_tile(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint64_t * start_tile_B,uint64_t * end_tile_B)3670 isl_surf_get_image_range_B_tile(const struct isl_surf *surf,
3671 uint32_t level,
3672 uint32_t logical_array_layer,
3673 uint32_t logical_z_offset_px,
3674 uint64_t *start_tile_B,
3675 uint64_t *end_tile_B)
3676 {
3677 uint32_t start_x_offset_el, start_y_offset_el;
3678 uint32_t start_z_offset_el, start_array_slice;
3679 isl_surf_get_image_offset_el(surf, level, logical_array_layer,
3680 logical_z_offset_px,
3681 &start_x_offset_el,
3682 &start_y_offset_el,
3683 &start_z_offset_el,
3684 &start_array_slice);
3685
3686 /* Compute the size of the subimage in surface elements */
3687 const uint32_t subimage_w_sa = isl_minify(surf->phys_level0_sa.w, level);
3688 const uint32_t subimage_h_sa = isl_minify(surf->phys_level0_sa.h, level);
3689 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
3690 const uint32_t subimage_w_el = isl_align_div_npot(subimage_w_sa, fmtl->bw);
3691 const uint32_t subimage_h_el = isl_align_div_npot(subimage_h_sa, fmtl->bh);
3692
3693 /* Find the last pixel */
3694 uint32_t end_x_offset_el = start_x_offset_el + subimage_w_el - 1;
3695 uint32_t end_y_offset_el = start_y_offset_el + subimage_h_el - 1;
3696
3697 /* We only consider one Z or array slice */
3698 const uint32_t end_z_offset_el = start_z_offset_el;
3699 const uint32_t end_array_slice = start_array_slice;
3700
3701 UNUSED uint32_t x_offset_el, y_offset_el, z_offset_el, array_slice;
3702 isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
3703 surf->msaa_layout, fmtl->bpb,
3704 surf->samples,
3705 surf->row_pitch_B,
3706 surf->array_pitch_el_rows,
3707 start_x_offset_el,
3708 start_y_offset_el,
3709 start_z_offset_el,
3710 start_array_slice,
3711 start_tile_B,
3712 &x_offset_el,
3713 &y_offset_el,
3714 &z_offset_el,
3715 &array_slice);
3716
3717 isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
3718 surf->msaa_layout, fmtl->bpb,
3719 surf->samples,
3720 surf->row_pitch_B,
3721 surf->array_pitch_el_rows,
3722 end_x_offset_el,
3723 end_y_offset_el,
3724 end_z_offset_el,
3725 end_array_slice,
3726 end_tile_B,
3727 &x_offset_el,
3728 &y_offset_el,
3729 &z_offset_el,
3730 &array_slice);
3731
3732 /* We want the range we return to be exclusive but the tile containing the
3733 * last pixel (what we just calculated) is inclusive. Add one.
3734 */
3735 (*end_tile_B)++;
3736
3737 assert(*end_tile_B <= surf->size_B);
3738 }
3739
3740 void
isl_surf_get_image_surf(const struct isl_device * dev,const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,struct isl_surf * image_surf,uint64_t * offset_B,uint32_t * x_offset_sa,uint32_t * y_offset_sa)3741 isl_surf_get_image_surf(const struct isl_device *dev,
3742 const struct isl_surf *surf,
3743 uint32_t level,
3744 uint32_t logical_array_layer,
3745 uint32_t logical_z_offset_px,
3746 struct isl_surf *image_surf,
3747 uint64_t *offset_B,
3748 uint32_t *x_offset_sa,
3749 uint32_t *y_offset_sa)
3750 {
3751 isl_surf_get_image_offset_B_tile_sa(surf,
3752 level,
3753 logical_array_layer,
3754 logical_z_offset_px,
3755 offset_B,
3756 x_offset_sa,
3757 y_offset_sa);
3758
3759 /* Even for cube maps there will be only single face, therefore drop the
3760 * corresponding flag if present.
3761 */
3762 const isl_surf_usage_flags_t usage =
3763 surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
3764
3765 bool ok UNUSED;
3766 ok = isl_surf_init(dev, image_surf,
3767 .dim = ISL_SURF_DIM_2D,
3768 .format = surf->format,
3769 .width = isl_minify(surf->logical_level0_px.w, level),
3770 .height = isl_minify(surf->logical_level0_px.h, level),
3771 .depth = 1,
3772 .levels = 1,
3773 .array_len = 1,
3774 .samples = surf->samples,
3775 .row_pitch_B = surf->row_pitch_B,
3776 .usage = usage,
3777 .tiling_flags = (1 << surf->tiling));
3778 assert(ok);
3779 }
3780
3781 bool
isl_surf_get_uncompressed_surf(const struct isl_device * dev,const struct isl_surf * _surf,const struct isl_view * _view,struct isl_surf * ucompr_surf,struct isl_view * ucompr_view,uint64_t * offset_B,uint32_t * x_offset_el,uint32_t * y_offset_el)3782 isl_surf_get_uncompressed_surf(const struct isl_device *dev,
3783 const struct isl_surf *_surf,
3784 const struct isl_view *_view,
3785 struct isl_surf *ucompr_surf,
3786 struct isl_view *ucompr_view,
3787 uint64_t *offset_B,
3788 uint32_t *x_offset_el,
3789 uint32_t *y_offset_el)
3790 {
3791 /* Input and output pointers may be the same, save the input contents now. */
3792 const struct isl_surf __surf = *_surf, *surf = &__surf;
3793 const struct isl_view __view = *_view, *view = &__view;
3794 const struct isl_format_layout *fmtl =
3795 isl_format_get_layout(surf->format);
3796 const enum isl_format view_format = view->format;
3797
3798 assert(fmtl->bw > 1 || fmtl->bh > 1 || fmtl->bd > 1);
3799 assert(isl_format_is_compressed(surf->format));
3800 assert(!isl_format_is_compressed(view->format));
3801 assert(isl_format_get_layout(view->format)->bpb == fmtl->bpb);
3802 assert(view->levels == 1);
3803
3804 const uint32_t view_width_px =
3805 isl_minify(surf->logical_level0_px.width, view->base_level);
3806 const uint32_t view_height_px =
3807 isl_minify(surf->logical_level0_px.height, view->base_level);
3808
3809 assert(surf->samples == 1);
3810 const uint32_t view_width_el = isl_align_div_npot(view_width_px, fmtl->bw);
3811 const uint32_t view_height_el = isl_align_div_npot(view_height_px, fmtl->bh);
3812
3813 /* If we ever enable 3D block formats, we'll need to re-think this */
3814 assert(fmtl->bd == 1);
3815
3816 if (isl_tiling_is_std_y(surf->tiling) ||
3817 isl_tiling_is_64(surf->tiling)) {
3818 /* If the requested level is not part of the miptail, we just offset to
3819 * the requested level. Because we're using standard tilings and aren't
3820 * in the miptail, arrays and 3D textures should just work so long as we
3821 * have the right array stride in the end.
3822 *
3823 * If the requested level is in the miptail, we instead offset to the
3824 * base of the miptail. Because offsets into the miptail are fixed by
3825 * the tiling and don't depend on the actual size of the image, we can
3826 * set the level in the view to offset into the miptail regardless of
3827 * the fact minification yields different results for the compressed and
3828 * uncompressed surface.
3829 */
3830 const uint32_t base_level =
3831 MIN(view->base_level, surf->miptail_start_level);
3832
3833 isl_surf_get_image_offset_B_tile_el(surf, base_level, 0, 0,
3834 offset_B, x_offset_el, y_offset_el);
3835 /* Tile64, Ys and Yf should have no intratile X or Y offset */
3836 assert(*x_offset_el == 0 && *y_offset_el == 0);
3837
3838 /* Save off the array pitch */
3839 const uint32_t array_pitch_el_rows = surf->array_pitch_el_rows;
3840
3841 const uint32_t view_depth_px =
3842 isl_minify(surf->logical_level0_px.depth, view->base_level);
3843 const uint32_t view_depth_el =
3844 isl_align_div_npot(view_depth_px, fmtl->bd);
3845
3846 /* We need to compute the size of the uncompressed surface we will
3847 * create. If we're not in the miptail, it is just the view size in
3848 * surface elements. If we are in a miptail, we need a size that will
3849 * minify to the view size in surface elements. This may not be the same
3850 * as the size of base_level, but that's not a problem. Slot offsets are
3851 * fixed in HW (see the tables used in isl_get_miptail_level_offset_el).
3852 */
3853 const uint32_t ucompr_level = view->base_level - base_level;
3854
3855 /* The > 1 check is here to prevent a change in the surface's overall
3856 * dimension (e.g. 2D->3D).
3857 *
3858 * Also having a base_level dimension = 1 doesn´t mean the HW will
3859 * ignore higher mip level. Once the dimension has reached 1, it'll stay
3860 * at 1 in the higher mip levels.
3861 */
3862 struct isl_extent3d ucompr_surf_extent_el = {
3863 .w = view_width_el > 1 ? view_width_el << ucompr_level : 1,
3864 .h = view_height_el > 1 ? view_height_el << ucompr_level : 1,
3865 .d = view_depth_el > 1 ? view_depth_el << ucompr_level : 1,
3866 };
3867
3868 bool ok UNUSED;
3869 ok = isl_surf_init(dev, ucompr_surf,
3870 .dim = surf->dim,
3871 .format = view->format,
3872 .width = ucompr_surf_extent_el.width,
3873 .height = ucompr_surf_extent_el.height,
3874 .depth = ucompr_surf_extent_el.depth,
3875 .levels = ucompr_level + 1,
3876 .array_len = surf->logical_level0_px.array_len,
3877 .samples = surf->samples,
3878 .min_miptail_start_level =
3879 (int) (view->base_level < surf->miptail_start_level),
3880 .row_pitch_B = surf->row_pitch_B,
3881 .usage = surf->usage,
3882 .tiling_flags = (1u << surf->tiling));
3883 assert(ok);
3884
3885 /* Use the array pitch from the original surface. This way 2D arrays
3886 * and 3D textures should work properly, just with one LOD.
3887 */
3888 assert(ucompr_surf->array_pitch_el_rows <= array_pitch_el_rows);
3889 ucompr_surf->array_pitch_el_rows = array_pitch_el_rows;
3890
3891 /* The newly created image represents only the one miplevel so we
3892 * need to adjust the view accordingly. Because we offset it to
3893 * miplevel but used a Z and array slice of 0, the array range can be
3894 * left alone.
3895 */
3896 *ucompr_view = *view;
3897 ucompr_view->base_level -= base_level;
3898 } else {
3899 if (view->array_len > 1) {
3900 /* The Skylake PRM Vol. 2d, "RENDER_SURFACE_STATE::X Offset" says:
3901 *
3902 * "If Surface Array is enabled, this field must be zero."
3903 *
3904 * The PRMs for other hardware have similar text. This is also tricky
3905 * to handle with things like BLORP's SW offsetting because the
3906 * increased surface size required for the offset may result in an
3907 * image height greater than qpitch.
3908 */
3909 if (view->base_level > 0)
3910 return false;
3911
3912 /* On Haswell and earlier, RENDER_SURFACE_STATE doesn't have a QPitch
3913 * field; it only has "array pitch span" which means the QPitch is
3914 * automatically calculated. Since we're smashing the surface format
3915 * (block formats are subtly different) and the number of miplevels,
3916 * that calculation will get thrown off. This means we can't do
3917 * arrays even at LOD0
3918 *
3919 * On Broadwell, we do have a QPitch field which we can control.
3920 * However, HALIGN and VALIGN are specified in pixels and are
3921 * hard-coded to align to exactly the block size of the compressed
3922 * texture. This means that, when reinterpreted as a non-compressed
3923 * the QPitch may be anything but the HW requires it to be properly
3924 * aligned.
3925 */
3926 if (ISL_GFX_VER(dev) < 9)
3927 return false;
3928
3929 *ucompr_surf = *surf;
3930 ucompr_surf->levels = 1;
3931 ucompr_surf->format = view_format;
3932
3933 /* We're making an uncompressed view here. The image dimensions need
3934 * to be scaled down by the block size.
3935 */
3936 assert(ucompr_surf->logical_level0_px.width == view_width_px);
3937 assert(ucompr_surf->logical_level0_px.height == view_height_px);
3938 ucompr_surf->logical_level0_px.width = view_width_el;
3939 ucompr_surf->logical_level0_px.height = view_height_el;
3940 ucompr_surf->phys_level0_sa = isl_surf_get_phys_level0_el(surf);
3941
3942 /* The surface mostly stays as-is; there is no offset */
3943 *offset_B = 0;
3944 *x_offset_el = 0;
3945 *y_offset_el = 0;
3946
3947 /* The view remains the same */
3948 *ucompr_view = *view;
3949 } else {
3950 /* If only one array slice is requested, directly offset to that
3951 * slice. We could, in theory, still use arrays in some cases but
3952 * BLORP isn't prepared for this and everyone who calls this function
3953 * should be prepared to handle an X/Y offset.
3954 */
3955 isl_surf_get_image_offset_B_tile_el(surf,
3956 view->base_level,
3957 surf->dim == ISL_SURF_DIM_3D ?
3958 0 : view->base_array_layer,
3959 surf->dim == ISL_SURF_DIM_3D ?
3960 view->base_array_layer : 0,
3961 offset_B,
3962 x_offset_el,
3963 y_offset_el);
3964
3965 /* Even for cube maps there will be only single face, therefore drop
3966 * the corresponding flag if present.
3967 */
3968 const isl_surf_usage_flags_t usage =
3969 surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
3970
3971 bool ok UNUSED;
3972 ok = isl_surf_init(dev, ucompr_surf,
3973 .dim = ISL_SURF_DIM_2D,
3974 .format = view_format,
3975 .width = view_width_el,
3976 .height = view_height_el,
3977 .depth = 1,
3978 .levels = 1,
3979 .array_len = 1,
3980 .samples = 1,
3981 .row_pitch_B = surf->row_pitch_B,
3982 .usage = usage,
3983 .tiling_flags = (1 << surf->tiling));
3984 assert(ok);
3985
3986 /* The newly created image represents the one subimage we're
3987 * referencing with this view so it only has one array slice and
3988 * miplevel.
3989 */
3990 *ucompr_view = *view;
3991 ucompr_view->base_array_layer = 0;
3992 ucompr_view->base_level = 0;
3993 }
3994 }
3995
3996 return true;
3997 }
3998
3999 void
isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,enum isl_surf_dim dim,enum isl_msaa_layout msaa_layout,uint32_t bpb,uint32_t samples,uint32_t row_pitch_B,uint32_t array_pitch_el_rows,uint32_t total_x_offset_el,uint32_t total_y_offset_el,uint32_t total_z_offset_el,uint32_t total_array_offset,uint64_t * tile_offset_B,uint32_t * x_offset_el,uint32_t * y_offset_el,uint32_t * z_offset_el,uint32_t * array_offset)4000 isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
4001 enum isl_surf_dim dim,
4002 enum isl_msaa_layout msaa_layout,
4003 uint32_t bpb,
4004 uint32_t samples,
4005 uint32_t row_pitch_B,
4006 uint32_t array_pitch_el_rows,
4007 uint32_t total_x_offset_el,
4008 uint32_t total_y_offset_el,
4009 uint32_t total_z_offset_el,
4010 uint32_t total_array_offset,
4011 uint64_t *tile_offset_B,
4012 uint32_t *x_offset_el,
4013 uint32_t *y_offset_el,
4014 uint32_t *z_offset_el,
4015 uint32_t *array_offset)
4016 {
4017 if (tiling == ISL_TILING_LINEAR) {
4018 assert(bpb % 8 == 0);
4019 assert(samples == 1);
4020 assert(total_z_offset_el == 0 && total_array_offset == 0);
4021 *tile_offset_B = (uint64_t)total_y_offset_el * row_pitch_B +
4022 (uint64_t)total_x_offset_el * (bpb / 8);
4023 *x_offset_el = 0;
4024 *y_offset_el = 0;
4025 *z_offset_el = 0;
4026 *array_offset = 0;
4027 return;
4028 }
4029
4030 struct isl_tile_info tile_info;
4031 isl_tiling_get_info(tiling, dim, msaa_layout, bpb, samples, &tile_info);
4032
4033 /* Pitches must make sense with the tiling */
4034 assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
4035 if (tile_info.logical_extent_el.d > 1 || tile_info.logical_extent_el.a > 1)
4036 assert(array_pitch_el_rows % tile_info.logical_extent_el.h == 0);
4037
4038 /* For non-power-of-two formats, we need the address to be both tile and
4039 * element-aligned. The easiest way to achieve this is to work with a tile
4040 * that is three times as wide as the regular tile.
4041 *
4042 * The tile info returned by get_tile_info has a logical size that is an
4043 * integer number of tile_info.format_bpb size elements. To scale the
4044 * tile, we scale up the physical width and then treat the logical tile
4045 * size as if it has bpb size elements.
4046 */
4047 const uint32_t tile_el_scale = bpb / tile_info.format_bpb;
4048 tile_info.phys_extent_B.width *= tile_el_scale;
4049
4050 /* Compute the offset into the tile */
4051 *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
4052 *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h;
4053 *z_offset_el = total_z_offset_el % tile_info.logical_extent_el.d;
4054 *array_offset = total_array_offset % tile_info.logical_extent_el.a;
4055
4056 /* Compute the offset of the tile in units of whole tiles */
4057 uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
4058 uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
4059 uint32_t z_offset_tl = total_z_offset_el / tile_info.logical_extent_el.d;
4060 uint32_t a_offset_tl = total_array_offset / tile_info.logical_extent_el.a;
4061
4062 /* Compute an array pitch in number of tiles */
4063 uint32_t array_pitch_tl_rows =
4064 array_pitch_el_rows / tile_info.logical_extent_el.h;
4065
4066 /* Add the Z and array offset to the Y offset to get a 2D offset */
4067 y_offset_tl += (z_offset_tl + a_offset_tl) * array_pitch_tl_rows;
4068
4069 *tile_offset_B =
4070 (uint64_t)y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B +
4071 (uint64_t)x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w;
4072 }
4073
4074 uint32_t
isl_surf_get_depth_format(const struct isl_device * dev,const struct isl_surf * surf)4075 isl_surf_get_depth_format(const struct isl_device *dev,
4076 const struct isl_surf *surf)
4077 {
4078 /* Support for separate stencil buffers began in gfx5. Support for
4079 * interleaved depthstencil buffers ceased in gfx7. The intermediate gens,
4080 * those that supported separate and interleaved stencil, were gfx5 and
4081 * gfx6.
4082 *
4083 * For a list of all available formats, see the Sandybridge PRM >> Volume
4084 * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
4085 * Format (p321).
4086 */
4087
4088 bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT;
4089
4090 assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT);
4091
4092 if (has_stencil)
4093 assert(ISL_GFX_VER(dev) < 7);
4094
4095 switch (surf->format) {
4096 default:
4097 unreachable("bad isl depth format");
4098 case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
4099 assert(ISL_GFX_VER(dev) < 7);
4100 return 0; /* D32_FLOAT_S8X24_UINT */
4101 case ISL_FORMAT_R32_FLOAT:
4102 assert(!has_stencil);
4103 return 1; /* D32_FLOAT */
4104 case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
4105 if (has_stencil) {
4106 assert(ISL_GFX_VER(dev) < 7);
4107 return 2; /* D24_UNORM_S8_UINT */
4108 } else {
4109 assert(ISL_GFX_VER(dev) >= 5);
4110 return 3; /* D24_UNORM_X8_UINT */
4111 }
4112 case ISL_FORMAT_R16_UNORM:
4113 assert(!has_stencil);
4114 return 5; /* D16_UNORM */
4115 }
4116 }
4117
4118 bool
isl_swizzle_supports_rendering(const struct intel_device_info * devinfo,struct isl_swizzle swizzle)4119 isl_swizzle_supports_rendering(const struct intel_device_info *devinfo,
4120 struct isl_swizzle swizzle)
4121 {
4122 if (devinfo->platform == INTEL_PLATFORM_HSW) {
4123 /* From the Haswell PRM,
4124 * RENDER_SURFACE_STATE::Shader Channel Select Red
4125 *
4126 * "The Shader channel selects also define which shader channels are
4127 * written to which surface channel. If the Shader channel select is
4128 * SCS_ZERO or SCS_ONE then it is not written to the surface. If the
4129 * shader channel select is SCS_RED it is written to the surface red
4130 * channel and so on. If more than one shader channel select is set
4131 * to the same surface channel only the first shader channel in RGBA
4132 * order will be written."
4133 */
4134 return true;
4135 } else if (devinfo->ver <= 7) {
4136 /* Ivy Bridge and early doesn't have any swizzling */
4137 return isl_swizzle_is_identity(swizzle);
4138 } else {
4139 /* From the Sky Lake PRM Vol. 2d,
4140 * RENDER_SURFACE_STATE::Shader Channel Select Red
4141 *
4142 * "For Render Target, Red, Green and Blue Shader Channel Selects
4143 * MUST be such that only valid components can be swapped i.e. only
4144 * change the order of components in the pixel. Any other values for
4145 * these Shader Channel Select fields are not valid for Render
4146 * Targets. This also means that there MUST not be multiple shader
4147 * channels mapped to the same RT channel."
4148 *
4149 * From the Sky Lake PRM Vol. 2d,
4150 * RENDER_SURFACE_STATE::Shader Channel Select Alpha
4151 *
4152 * "For Render Target, this field MUST be programmed to
4153 * value = SCS_ALPHA."
4154 */
4155 return (swizzle.r == ISL_CHANNEL_SELECT_RED ||
4156 swizzle.r == ISL_CHANNEL_SELECT_GREEN ||
4157 swizzle.r == ISL_CHANNEL_SELECT_BLUE) &&
4158 (swizzle.g == ISL_CHANNEL_SELECT_RED ||
4159 swizzle.g == ISL_CHANNEL_SELECT_GREEN ||
4160 swizzle.g == ISL_CHANNEL_SELECT_BLUE) &&
4161 (swizzle.b == ISL_CHANNEL_SELECT_RED ||
4162 swizzle.b == ISL_CHANNEL_SELECT_GREEN ||
4163 swizzle.b == ISL_CHANNEL_SELECT_BLUE) &&
4164 swizzle.r != swizzle.g &&
4165 swizzle.r != swizzle.b &&
4166 swizzle.g != swizzle.b &&
4167 swizzle.a == ISL_CHANNEL_SELECT_ALPHA;
4168 }
4169 }
4170
4171 static enum isl_channel_select
swizzle_select(enum isl_channel_select chan,struct isl_swizzle swizzle)4172 swizzle_select(enum isl_channel_select chan, struct isl_swizzle swizzle)
4173 {
4174 switch (chan) {
4175 case ISL_CHANNEL_SELECT_ZERO:
4176 case ISL_CHANNEL_SELECT_ONE:
4177 return chan;
4178 case ISL_CHANNEL_SELECT_RED:
4179 return swizzle.r;
4180 case ISL_CHANNEL_SELECT_GREEN:
4181 return swizzle.g;
4182 case ISL_CHANNEL_SELECT_BLUE:
4183 return swizzle.b;
4184 case ISL_CHANNEL_SELECT_ALPHA:
4185 return swizzle.a;
4186 default:
4187 unreachable("Invalid swizzle component");
4188 }
4189 }
4190
4191 /**
4192 * Returns the single swizzle that is equivalent to applying the two given
4193 * swizzles in sequence.
4194 */
4195 struct isl_swizzle
isl_swizzle_compose(struct isl_swizzle first,struct isl_swizzle second)4196 isl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second)
4197 {
4198 return (struct isl_swizzle) {
4199 .r = swizzle_select(first.r, second),
4200 .g = swizzle_select(first.g, second),
4201 .b = swizzle_select(first.b, second),
4202 .a = swizzle_select(first.a, second),
4203 };
4204 }
4205
4206 /**
4207 * Returns a swizzle that is the pseudo-inverse of this swizzle.
4208 */
4209 struct isl_swizzle
isl_swizzle_invert(struct isl_swizzle swizzle)4210 isl_swizzle_invert(struct isl_swizzle swizzle)
4211 {
4212 /* Default to zero for channels which do not show up in the swizzle */
4213 enum isl_channel_select chans[4] = {
4214 ISL_CHANNEL_SELECT_ZERO,
4215 ISL_CHANNEL_SELECT_ZERO,
4216 ISL_CHANNEL_SELECT_ZERO,
4217 ISL_CHANNEL_SELECT_ZERO,
4218 };
4219
4220 /* We go in ABGR order so that, if there are any duplicates, the first one
4221 * is taken if you look at it in RGBA order. This is what Haswell hardware
4222 * does for render target swizzles.
4223 */
4224 if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
4225 chans[swizzle.a - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_ALPHA;
4226 if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
4227 chans[swizzle.b - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_BLUE;
4228 if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
4229 chans[swizzle.g - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_GREEN;
4230 if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
4231 chans[swizzle.r - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_RED;
4232
4233 return (struct isl_swizzle) { chans[0], chans[1], chans[2], chans[3] };
4234 }
4235
4236 static uint32_t
isl_color_value_channel(union isl_color_value src,enum isl_channel_select chan,uint32_t one)4237 isl_color_value_channel(union isl_color_value src,
4238 enum isl_channel_select chan,
4239 uint32_t one)
4240 {
4241 if (chan == ISL_CHANNEL_SELECT_ZERO)
4242 return 0;
4243 if (chan == ISL_CHANNEL_SELECT_ONE)
4244 return one;
4245
4246 assert(chan >= ISL_CHANNEL_SELECT_RED);
4247 assert(chan < ISL_CHANNEL_SELECT_RED + 4);
4248
4249 return src.u32[chan - ISL_CHANNEL_SELECT_RED];
4250 }
4251
4252 /** Applies an inverse swizzle to a color value */
4253 union isl_color_value
isl_color_value_swizzle(union isl_color_value src,struct isl_swizzle swizzle,bool is_float)4254 isl_color_value_swizzle(union isl_color_value src,
4255 struct isl_swizzle swizzle,
4256 bool is_float)
4257 {
4258 uint32_t one = is_float ? 0x3f800000 : 1;
4259
4260 return (union isl_color_value) { .u32 = {
4261 isl_color_value_channel(src, swizzle.r, one),
4262 isl_color_value_channel(src, swizzle.g, one),
4263 isl_color_value_channel(src, swizzle.b, one),
4264 isl_color_value_channel(src, swizzle.a, one),
4265 } };
4266 }
4267
4268 /** Applies an inverse swizzle to a color value */
4269 union isl_color_value
isl_color_value_swizzle_inv(union isl_color_value src,struct isl_swizzle swizzle)4270 isl_color_value_swizzle_inv(union isl_color_value src,
4271 struct isl_swizzle swizzle)
4272 {
4273 union isl_color_value dst = { .u32 = { 0, } };
4274
4275 /* We assign colors in ABGR order so that the first one will be taken in
4276 * RGBA precedence order. According to the PRM docs for shader channel
4277 * select, this matches Haswell hardware behavior.
4278 */
4279 if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
4280 dst.u32[swizzle.a - ISL_CHANNEL_SELECT_RED] = src.u32[3];
4281 if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
4282 dst.u32[swizzle.b - ISL_CHANNEL_SELECT_RED] = src.u32[2];
4283 if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
4284 dst.u32[swizzle.g - ISL_CHANNEL_SELECT_RED] = src.u32[1];
4285 if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
4286 dst.u32[swizzle.r - ISL_CHANNEL_SELECT_RED] = src.u32[0];
4287
4288 return dst;
4289 }
4290
4291 uint8_t
isl_format_get_aux_map_encoding(enum isl_format format)4292 isl_format_get_aux_map_encoding(enum isl_format format)
4293 {
4294 switch(format) {
4295 case ISL_FORMAT_R32G32B32A32_FLOAT: return 0x11;
4296 case ISL_FORMAT_R32G32B32X32_FLOAT: return 0x11;
4297 case ISL_FORMAT_R32G32B32A32_SINT: return 0x12;
4298 case ISL_FORMAT_R32G32B32A32_UINT: return 0x13;
4299 case ISL_FORMAT_R16G16B16A16_UNORM: return 0x14;
4300 case ISL_FORMAT_R16G16B16A16_SNORM: return 0x15;
4301 case ISL_FORMAT_R16G16B16A16_SINT: return 0x16;
4302 case ISL_FORMAT_R16G16B16A16_UINT: return 0x17;
4303 case ISL_FORMAT_R16G16B16A16_FLOAT: return 0x10;
4304 case ISL_FORMAT_R16G16B16X16_FLOAT: return 0x10;
4305 case ISL_FORMAT_R32G32_FLOAT: return 0x11;
4306 case ISL_FORMAT_R32G32_SINT: return 0x12;
4307 case ISL_FORMAT_R32G32_UINT: return 0x13;
4308 case ISL_FORMAT_B8G8R8A8_UNORM: return 0xA;
4309 case ISL_FORMAT_B8G8R8X8_UNORM: return 0xA;
4310 case ISL_FORMAT_B8G8R8A8_UNORM_SRGB: return 0xA;
4311 case ISL_FORMAT_B8G8R8X8_UNORM_SRGB: return 0xA;
4312 case ISL_FORMAT_R10G10B10A2_UNORM: return 0x18;
4313 case ISL_FORMAT_R10G10B10A2_UNORM_SRGB: return 0x18;
4314 case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM: return 0x19;
4315 case ISL_FORMAT_R10G10B10A2_UINT: return 0x1A;
4316 case ISL_FORMAT_R8G8B8A8_UNORM: return 0xA;
4317 case ISL_FORMAT_R8G8B8A8_UNORM_SRGB: return 0xA;
4318 case ISL_FORMAT_R8G8B8A8_SNORM: return 0x1B;
4319 case ISL_FORMAT_R8G8B8A8_SINT: return 0x1C;
4320 case ISL_FORMAT_R8G8B8A8_UINT: return 0x1D;
4321 case ISL_FORMAT_R16G16_UNORM: return 0x14;
4322 case ISL_FORMAT_R16G16_SNORM: return 0x15;
4323 case ISL_FORMAT_R16G16_SINT: return 0x16;
4324 case ISL_FORMAT_R16G16_UINT: return 0x17;
4325 case ISL_FORMAT_R16G16_FLOAT: return 0x10;
4326 case ISL_FORMAT_B10G10R10A2_UNORM: return 0x18;
4327 case ISL_FORMAT_B10G10R10A2_UNORM_SRGB: return 0x18;
4328 case ISL_FORMAT_R11G11B10_FLOAT: return 0x1E;
4329 case ISL_FORMAT_R32_SINT: return 0x12;
4330 case ISL_FORMAT_R32_UINT: return 0x13;
4331 case ISL_FORMAT_R32_FLOAT: return 0x11;
4332 case ISL_FORMAT_R24_UNORM_X8_TYPELESS: return 0x13;
4333 case ISL_FORMAT_B5G6R5_UNORM: return 0xA;
4334 case ISL_FORMAT_B5G6R5_UNORM_SRGB: return 0xA;
4335 case ISL_FORMAT_B5G5R5A1_UNORM: return 0xA;
4336 case ISL_FORMAT_B5G5R5A1_UNORM_SRGB: return 0xA;
4337 case ISL_FORMAT_B4G4R4A4_UNORM: return 0xA;
4338 case ISL_FORMAT_B4G4R4A4_UNORM_SRGB: return 0xA;
4339 case ISL_FORMAT_R8G8_UNORM: return 0xA;
4340 case ISL_FORMAT_R8G8_SNORM: return 0x1B;
4341 case ISL_FORMAT_R8G8_SINT: return 0x1C;
4342 case ISL_FORMAT_R8G8_UINT: return 0x1D;
4343 case ISL_FORMAT_R16_UNORM: return 0x14;
4344 case ISL_FORMAT_R16_SNORM: return 0x15;
4345 case ISL_FORMAT_R16_SINT: return 0x16;
4346 case ISL_FORMAT_R16_UINT: return 0x17;
4347 case ISL_FORMAT_R16_FLOAT: return 0x10;
4348 case ISL_FORMAT_B5G5R5X1_UNORM: return 0xA;
4349 case ISL_FORMAT_B5G5R5X1_UNORM_SRGB: return 0xA;
4350 case ISL_FORMAT_A1B5G5R5_UNORM: return 0xA;
4351 case ISL_FORMAT_A4B4G4R4_UNORM: return 0xA;
4352 case ISL_FORMAT_R8_UNORM: return 0xA;
4353 case ISL_FORMAT_R8_SNORM: return 0x1B;
4354 case ISL_FORMAT_R8_SINT: return 0x1C;
4355 case ISL_FORMAT_R8_UINT: return 0x1D;
4356 case ISL_FORMAT_A8_UNORM: return 0xA;
4357 case ISL_FORMAT_PLANAR_420_8: return 0xF;
4358 case ISL_FORMAT_PLANAR_420_10: return 0x7;
4359 case ISL_FORMAT_PLANAR_420_12: return 0x8;
4360 case ISL_FORMAT_PLANAR_420_16: return 0x8;
4361 case ISL_FORMAT_YCRCB_NORMAL: return 0x3;
4362 case ISL_FORMAT_YCRCB_SWAPY: return 0xB;
4363 default:
4364 unreachable("Unsupported aux-map format!");
4365 return 0;
4366 }
4367 }
4368
4369 /*
4370 * Returns compression format encoding for Unified Lossless Compression
4371 */
4372 uint8_t
isl_get_render_compression_format(enum isl_format format)4373 isl_get_render_compression_format(enum isl_format format)
4374 {
4375 /* From the Bspec, Enumeration_RenderCompressionFormat section (53726): */
4376 switch(format) {
4377 case ISL_FORMAT_R32G32B32A32_FLOAT:
4378 case ISL_FORMAT_R32G32B32X32_FLOAT:
4379 case ISL_FORMAT_R32G32B32A32_SINT:
4380 return 0x0;
4381 case ISL_FORMAT_R32G32B32A32_UINT:
4382 return 0x1;
4383 case ISL_FORMAT_R32G32_FLOAT:
4384 case ISL_FORMAT_R32G32_SINT:
4385 return 0x2;
4386 case ISL_FORMAT_R32G32_UINT:
4387 return 0x3;
4388 case ISL_FORMAT_R16G16B16A16_UNORM:
4389 case ISL_FORMAT_R16G16B16X16_UNORM:
4390 case ISL_FORMAT_R16G16B16A16_UINT:
4391 return 0x4;
4392 case ISL_FORMAT_R16G16B16A16_SNORM:
4393 case ISL_FORMAT_R16G16B16A16_SINT:
4394 case ISL_FORMAT_R16G16B16A16_FLOAT:
4395 case ISL_FORMAT_R16G16B16X16_FLOAT:
4396 return 0x5;
4397 case ISL_FORMAT_R16G16_UNORM:
4398 case ISL_FORMAT_R16G16_UINT:
4399 return 0x6;
4400 case ISL_FORMAT_R16G16_SNORM:
4401 case ISL_FORMAT_R16G16_SINT:
4402 case ISL_FORMAT_R16G16_FLOAT:
4403 return 0x7;
4404 case ISL_FORMAT_B8G8R8A8_UNORM:
4405 case ISL_FORMAT_B8G8R8X8_UNORM:
4406 case ISL_FORMAT_B8G8R8A8_UNORM_SRGB:
4407 case ISL_FORMAT_B8G8R8X8_UNORM_SRGB:
4408 case ISL_FORMAT_R8G8B8A8_UNORM:
4409 case ISL_FORMAT_R8G8B8X8_UNORM:
4410 case ISL_FORMAT_R8G8B8A8_UNORM_SRGB:
4411 case ISL_FORMAT_R8G8B8X8_UNORM_SRGB:
4412 case ISL_FORMAT_R8G8B8A8_UINT:
4413 return 0x8;
4414 case ISL_FORMAT_R8G8B8A8_SNORM:
4415 case ISL_FORMAT_R8G8B8A8_SINT:
4416 return 0x9;
4417 case ISL_FORMAT_B5G6R5_UNORM:
4418 case ISL_FORMAT_B5G6R5_UNORM_SRGB:
4419 case ISL_FORMAT_B5G5R5A1_UNORM:
4420 case ISL_FORMAT_B5G5R5A1_UNORM_SRGB:
4421 case ISL_FORMAT_B4G4R4A4_UNORM:
4422 case ISL_FORMAT_B4G4R4A4_UNORM_SRGB:
4423 case ISL_FORMAT_B5G5R5X1_UNORM:
4424 case ISL_FORMAT_B5G5R5X1_UNORM_SRGB:
4425 case ISL_FORMAT_A1B5G5R5_UNORM:
4426 case ISL_FORMAT_A4B4G4R4_UNORM:
4427 case ISL_FORMAT_R8G8_UNORM:
4428 case ISL_FORMAT_R8G8_UINT:
4429 return 0xA;
4430 case ISL_FORMAT_R8G8_SNORM:
4431 case ISL_FORMAT_R8G8_SINT:
4432 return 0xB;
4433 case ISL_FORMAT_R10G10B10A2_UNORM:
4434 case ISL_FORMAT_R10G10B10A2_UNORM_SRGB:
4435 case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM:
4436 case ISL_FORMAT_R10G10B10A2_UINT:
4437 case ISL_FORMAT_B10G10R10A2_UNORM:
4438 case ISL_FORMAT_B10G10R10X2_UNORM:
4439 case ISL_FORMAT_B10G10R10A2_UNORM_SRGB:
4440 return 0xC;
4441 case ISL_FORMAT_R11G11B10_FLOAT:
4442 return 0xD;
4443 case ISL_FORMAT_R32_SINT:
4444 case ISL_FORMAT_R32_FLOAT:
4445 return 0x10;
4446 case ISL_FORMAT_R32_UINT:
4447 case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
4448 return 0x11;
4449 case ISL_FORMAT_R16_UNORM:
4450 case ISL_FORMAT_R16_UINT:
4451 return 0x14;
4452 case ISL_FORMAT_R16_SNORM:
4453 case ISL_FORMAT_R16_SINT:
4454 case ISL_FORMAT_R16_FLOAT:
4455 return 0x15;
4456 case ISL_FORMAT_R8_UNORM:
4457 case ISL_FORMAT_R8_UINT:
4458 case ISL_FORMAT_A8_UNORM:
4459 return 0x18;
4460 case ISL_FORMAT_R8_SNORM:
4461 case ISL_FORMAT_R8_SINT:
4462 return 0x19;
4463 default:
4464 unreachable("Unsupported render compression format!");
4465 return 0;
4466 }
4467 }
4468
4469 const char *
isl_aux_op_to_name(enum isl_aux_op op)4470 isl_aux_op_to_name(enum isl_aux_op op)
4471 {
4472 static const char *names[] = {
4473 [ISL_AUX_OP_NONE] = "none",
4474 [ISL_AUX_OP_FAST_CLEAR] = "fast-clear",
4475 [ISL_AUX_OP_FULL_RESOLVE] = "full-resolve",
4476 [ISL_AUX_OP_PARTIAL_RESOLVE] = "partial-resolve",
4477 [ISL_AUX_OP_AMBIGUATE] = "ambiguate",
4478 };
4479 assert(op < ARRAY_SIZE(names));
4480 return names[op];
4481 }
4482
4483 const char *
isl_tiling_to_name(enum isl_tiling tiling)4484 isl_tiling_to_name(enum isl_tiling tiling)
4485 {
4486 static const char *names[] = {
4487 [ISL_TILING_LINEAR] = "linear",
4488 [ISL_TILING_W] = "W",
4489 [ISL_TILING_X] = "X",
4490 [ISL_TILING_Y0] = "Y0",
4491 [ISL_TILING_SKL_Yf] = "SKL-Yf",
4492 [ISL_TILING_SKL_Ys] = "SKL-Ys",
4493 [ISL_TILING_ICL_Yf] = "ICL-Yf",
4494 [ISL_TILING_ICL_Ys] = "ICL-Ys",
4495 [ISL_TILING_4] = "4",
4496 [ISL_TILING_64] = "64",
4497 [ISL_TILING_HIZ] = "hiz",
4498 [ISL_TILING_CCS] = "ccs",
4499 [ISL_TILING_GFX12_CCS] = "gfx12-ccs",
4500 };
4501 assert(tiling < ARRAY_SIZE(names));
4502 return names[tiling];
4503 }
4504
4505 const char *
isl_aux_usage_to_name(enum isl_aux_usage usage)4506 isl_aux_usage_to_name(enum isl_aux_usage usage)
4507 {
4508 static const char *names[] = {
4509 [ISL_AUX_USAGE_NONE] = "none",
4510 [ISL_AUX_USAGE_HIZ] = "hiz",
4511 [ISL_AUX_USAGE_MCS] = "mcs",
4512 [ISL_AUX_USAGE_CCS_D] = "ccs-d",
4513 [ISL_AUX_USAGE_CCS_E] = "ccs-e",
4514 [ISL_AUX_USAGE_FCV_CCS_E] = "fcv-ccs-e",
4515 [ISL_AUX_USAGE_MC] = "mc",
4516 [ISL_AUX_USAGE_HIZ_CCS_WT] = "hiz-ccs-wt",
4517 [ISL_AUX_USAGE_HIZ_CCS] = "hiz-ccs",
4518 [ISL_AUX_USAGE_MCS_CCS] = "mcs-ccs",
4519 [ISL_AUX_USAGE_STC_CCS] = "stc-ccs",
4520 };
4521 assert(usage < ARRAY_SIZE(names));
4522 return names[usage];
4523 }
4524
4525 const char *
isl_aux_state_to_name(enum isl_aux_state state)4526 isl_aux_state_to_name(enum isl_aux_state state)
4527 {
4528 static const char *names[] = {
4529 [ISL_AUX_STATE_CLEAR] = "clear",
4530 [ISL_AUX_STATE_PARTIAL_CLEAR] = "partial_clear",
4531 [ISL_AUX_STATE_COMPRESSED_CLEAR] = "compressed_clear",
4532 [ISL_AUX_STATE_COMPRESSED_NO_CLEAR] = "compressed_no_clear",
4533 [ISL_AUX_STATE_RESOLVED] = "resolved",
4534 [ISL_AUX_STATE_PASS_THROUGH] = "pass-through",
4535 [ISL_AUX_STATE_AUX_INVALID] = "invalid",
4536 };
4537 assert(state < ARRAY_SIZE(names));
4538 return names[state];
4539 }
4540