• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2015 Intel Corporation
3  *
4  *  Permission is hereby granted, free of charge, to any person obtaining a
5  *  copy of this software and associated documentation files (the "Software"),
6  *  to deal in the Software without restriction, including without limitation
7  *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  *  and/or sell copies of the Software, and to permit persons to whom the
9  *  Software is furnished to do so, subject to the following conditions:
10  *
11  *  The above copyright notice and this permission notice (including the next
12  *  paragraph) shall be included in all copies or substantial portions of the
13  *  Software.
14  *
15  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  *  IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <inttypes.h>
28 
29 #include "dev/intel_debug.h"
30 #include "genxml/genX_bits.h"
31 #include "util/log.h"
32 
33 #include "isl.h"
34 #include "isl_gfx4.h"
35 #include "isl_gfx6.h"
36 #include "isl_gfx7.h"
37 #include "isl_gfx8.h"
38 #include "isl_gfx9.h"
39 #include "isl_gfx12.h"
40 #include "isl_priv.h"
41 
42 isl_genX_declare_get_func(surf_fill_state_s)
isl_genX_declare_get_func(buffer_fill_state_s)43 isl_genX_declare_get_func(buffer_fill_state_s)
44 isl_genX_declare_get_func(emit_depth_stencil_hiz_s)
45 isl_genX_declare_get_func(null_fill_state_s)
46 isl_genX_declare_get_func(emit_cpb_control_s)
47 
48 void
49 isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2,
50                            uint32_t yt1, uint32_t yt2,
51                            char *dst, const char *src,
52                            uint32_t dst_pitch, int32_t src_pitch,
53                            bool has_swizzling,
54                            enum isl_tiling tiling,
55                            isl_memcpy_type copy_type)
56 {
57 #ifdef USE_SSE41
58    if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
59       _isl_memcpy_linear_to_tiled_sse41(
60          xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
61          tiling, copy_type);
62       return;
63    }
64 #endif
65 
66    _isl_memcpy_linear_to_tiled(
67       xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
68       tiling, copy_type);
69 }
70 
71 void
isl_memcpy_tiled_to_linear(uint32_t xt1,uint32_t xt2,uint32_t yt1,uint32_t yt2,char * dst,const char * src,int32_t dst_pitch,uint32_t src_pitch,bool has_swizzling,enum isl_tiling tiling,isl_memcpy_type copy_type)72 isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2,
73                            uint32_t yt1, uint32_t yt2,
74                            char *dst, const char *src,
75                            int32_t dst_pitch, uint32_t src_pitch,
76                            bool has_swizzling,
77                            enum isl_tiling tiling,
78                            isl_memcpy_type copy_type)
79 {
80 #ifdef USE_SSE41
81    if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
82       _isl_memcpy_tiled_to_linear_sse41(
83          xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
84          tiling, copy_type);
85       return;
86    }
87 #endif
88 
89    _isl_memcpy_tiled_to_linear(
90       xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
91       tiling, copy_type);
92 }
93 
94 void PRINTFLIKE(3, 4) UNUSED
__isl_finishme(const char * file,int line,const char * fmt,...)95 __isl_finishme(const char *file, int line, const char *fmt, ...)
96 {
97    va_list ap;
98    char buf[512];
99 
100    va_start(ap, fmt);
101    vsnprintf(buf, sizeof(buf), fmt, ap);
102    va_end(ap);
103 
104    fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
105 }
106 
107 static void
isl_device_setup_mocs(struct isl_device * dev)108 isl_device_setup_mocs(struct isl_device *dev)
109 {
110    dev->mocs.protected_mask = 0;
111 
112    if (dev->info->ver >= 20) {
113       /* L3+L4=WB; BSpec: 71582 */
114       dev->mocs.internal = 1 << 1;
115       dev->mocs.external = 1 << 1;
116       dev->mocs.protected_mask = 3 << 0;
117       /* TODO: Setting to uncached
118        * WA 14018443005:
119        *  Ensure that any compression-enabled resource from gfx memory subject
120        *  to app recycling (e.g. OGL sparse resource backing memory or
121        *  Vulkan heaps) is never PAT/MOCS'ed as L3:UC.
122        */
123       dev->mocs.blitter_dst = 1 << 1;
124       dev->mocs.blitter_src = 1 << 1;
125    } else if (dev->info->ver >= 12) {
126       if (intel_device_info_is_mtl_or_arl(dev->info)) {
127          /* Cached L3+L4; BSpec: 45101 */
128          dev->mocs.internal = 1 << 1;
129          /* Displayables cached to L3+L4:WT */
130          dev->mocs.external = 14 << 1;
131          /* Uncached - GO:Mem */
132          dev->mocs.uncached = 5 << 1;
133          /* TODO: XY_BLOCK_COPY_BLT don't mention what should be the L4 cache
134           * mode so for now it is setting L4 as uncached following what is
135           * asked for L3
136           */
137          dev->mocs.blitter_dst = 9 << 1;
138          dev->mocs.blitter_src = 9 << 1;
139       } else if (intel_device_info_is_dg2(dev->info)) {
140          /* L3CC=WB; BSpec: 45101 */
141          dev->mocs.internal = 3 << 1;
142          dev->mocs.external = 3 << 1;
143          /* UC - Coherent; GO:Memory */
144          dev->mocs.uncached = 1 << 1;
145 
146          /* XY_BLOCK_COPY_BLT MOCS fields have programming notes which say:
147           *
148           *    "Destination MOCS value, which is used to program MOCS index
149           *     for writing to memory, should select a MOCS register having
150           *     "L3 Cacheability Control" programmed as uncacheable(UC) and
151           *     "Global GO" parameter set as GOMemory (pushes GO point to
152           *     memory). The MOCS Register may have L3 Lookup programmed as
153           *     UCL3LKDIS for better efficiency."
154           *
155           * The GO:Memory setting requires us to use MOCS 1 or 2.  MOCS 2
156           * has LKUP set to 0 and is marked "Non-Coherent", which we assume
157           * is probably the "better efficiency" they mention...
158           *
159           *   "Source MOCS value, which is used to program MOCS index for
160           *    reading from memory, should select a MOCS register having
161           *    "L3 Cacheability Control" programmed as uncacheable(UC).
162           *    The MOCS Register may have L3 Lookup programmed as UCL3LKDIS
163           *    for better efficiency."
164           *
165           * Any MOCS except 3 should work.  We use MOCS 2...
166           */
167          dev->mocs.blitter_dst = 2 << 1;
168          dev->mocs.blitter_src = 2 << 1;
169       } else if (dev->info->platform == INTEL_PLATFORM_DG1) {
170          /* L3CC=WB */
171          dev->mocs.internal = 5 << 1;
172          /* Displayables on DG1 are free to cache in L3 since L3 is transient
173           * and flushed at bottom of each submission.
174           */
175          dev->mocs.external = 5 << 1;
176          /* UC */
177          dev->mocs.uncached = 1 << 1;
178       } else {
179          /* TC=1/LLC Only, LeCC=1/UC, LRUM=0, L3CC=3/WB */
180          dev->mocs.external = 61 << 1;
181          /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
182          dev->mocs.internal = 2 << 1;
183          /* Uncached */
184          dev->mocs.uncached = 3 << 1;
185 
186          /* L1 - HDC:L1 + L3 + LLC */
187          dev->mocs.l1_hdc_l3_llc = 48 << 1;
188       }
189       /* Protected is just an additional flag. */
190       dev->mocs.protected_mask = 1 << 0;
191    } else if (dev->info->ver >= 9) {
192       /* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */
193       dev->mocs.external = 1 << 1;
194       /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
195       dev->mocs.internal = 2 << 1;
196       /* Uncached */
197       dev->mocs.uncached = (dev->info->ver >= 11 ? 3 : 0) << 1;
198    } else if (dev->info->ver >= 8) {
199       /* MEMORY_OBJECT_CONTROL_STATE:
200        * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,
201        * .TargetCache = L3DefertoPATforLLCeLLCselection,
202        * .AgeforQUADLRU = 0
203        */
204       dev->mocs.external = 0x18;
205       /* MEMORY_OBJECT_CONTROL_STATE:
206        * .MemoryTypeLLCeLLCCacheabilityControl = WB,
207        * .TargetCache = L3DefertoPATforLLCeLLCselection,
208        * .AgeforQUADLRU = 0
209        */
210       dev->mocs.internal = 0x78;
211       if (dev->info->platform == INTEL_PLATFORM_CHV) {
212          /* MEMORY_OBJECT_CONTROL_STATE:
213           * .MemoryType = UC,
214           * .TargetCache = NoCaching,
215           */
216          dev->mocs.uncached = 0;
217       } else {
218          /* MEMORY_OBJECT_CONTROL_STATE:
219           * .MemoryTypeLLCeLLCCacheabilityControl = UCUncacheable,
220           * .TargetCache = eLLCOnlywheneDRAMispresentelsegetsallocatedinLLC,
221           * .AgeforQUADLRU = 0
222           */
223          dev->mocs.uncached = 0x20;
224       }
225    } else if (dev->info->ver >= 7) {
226       if (dev->info->platform == INTEL_PLATFORM_HSW) {
227          /* MEMORY_OBJECT_CONTROL_STATE:
228           * .LLCeLLCCacheabilityControlLLCCC             = 0,
229           * .L3CacheabilityControlL3CC                   = 1,
230           */
231          dev->mocs.internal = 1;
232          dev->mocs.external = 1;
233          /* MEMORY_OBJECT_CONTROL_STATE:
234           * .LLCeLLCCacheabilityControlLLCCC             = 1,
235           * .L3CacheabilityControlL3CC                   = 0,
236           */
237          dev->mocs.uncached = 2;
238       } else {
239          /* MEMORY_OBJECT_CONTROL_STATE:
240           * .GraphicsDataTypeGFDT                        = 0,
241           * .LLCCacheabilityControlLLCCC                 = 0,
242           * .L3CacheabilityControlL3CC                   = 1,
243           */
244          dev->mocs.internal = 1;
245          dev->mocs.external = 1;
246          /* MEMORY_OBJECT_CONTROL_STATE:
247           * .GraphicsDataTypeGFDT                        = 0,
248           * .LLCCacheabilityControlLLCCC                 = 0,
249           * .L3CacheabilityControlL3CC                   = 0,
250           */
251          dev->mocs.uncached = 0;
252       }
253    } else {
254       dev->mocs.internal = 0;
255       dev->mocs.external = 0;
256       dev->mocs.uncached = 0;
257    }
258 }
259 
260 /**
261  * Return an appropriate MOCS entry for the given usage flags.
262  */
263 uint32_t
isl_mocs(const struct isl_device * dev,isl_surf_usage_flags_t usage,bool external)264 isl_mocs(const struct isl_device *dev, isl_surf_usage_flags_t usage,
265          bool external)
266 {
267    uint32_t mask = (usage & ISL_SURF_USAGE_PROTECTED_BIT) ?
268       dev->mocs.protected_mask : 0;
269 
270    if (external)
271       return dev->mocs.external | mask;
272 
273    if (intel_device_info_is_mtl_or_arl(dev->info) &&
274        (usage & ISL_SURF_USAGE_STREAM_OUT_BIT))
275       return dev->mocs.uncached | mask;
276 
277    if (dev->info->verx10 == 120 && dev->info->platform != INTEL_PLATFORM_DG1) {
278       if (usage & ISL_SURF_USAGE_STAGING_BIT)
279          return dev->mocs.internal | mask;
280 
281       if (usage & ISL_SURF_USAGE_CPB_BIT)
282          return dev->mocs.internal;
283 
284       /* Using L1:HDC for storage buffers breaks Vulkan memory model
285        * tests that use shader atomics.  This isn't likely to work out,
286        * and we can't know a priori whether they'll be used.  So just
287        * continue with ordinary internal MOCS for now.
288        */
289       if (usage & ISL_SURF_USAGE_STORAGE_BIT)
290          return dev->mocs.internal | mask;
291 
292       if (usage & (ISL_SURF_USAGE_CONSTANT_BUFFER_BIT |
293                    ISL_SURF_USAGE_RENDER_TARGET_BIT |
294                    ISL_SURF_USAGE_TEXTURE_BIT))
295          return dev->mocs.l1_hdc_l3_llc | mask;
296    }
297 
298    return dev->mocs.internal | mask;
299 }
300 
301 void
isl_device_init(struct isl_device * dev,const struct intel_device_info * info)302 isl_device_init(struct isl_device *dev,
303                 const struct intel_device_info *info)
304 {
305    /* Gfx8+ don't have bit6 swizzling, ensure callsite is not confused. */
306    assert(!(info->has_bit6_swizzle && info->ver >= 8));
307 
308    dev->info = info;
309    dev->use_separate_stencil = ISL_GFX_VER(dev) >= 6;
310    dev->has_bit6_swizzling = info->has_bit6_swizzle;
311    dev->buffer_length_in_aux_addr = false;
312 
313    /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
314     * device properties at buildtime. Verify that the macros with the device
315     * properties chosen during runtime.
316     */
317    ISL_GFX_VER_SANITIZE(dev);
318    ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev);
319 
320    /* Did we break hiz or stencil? */
321    if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
322       assert(info->has_hiz_and_separate_stencil);
323    if (info->must_use_separate_stencil)
324       assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
325 
326    dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4;
327    dev->ss.align = isl_align(dev->ss.size, 32);
328 
329    dev->ss.clear_color_state_size = CLEAR_COLOR_length(info) * 4;
330    dev->ss.clear_color_state_offset =
331       RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4;
332 
333    dev->ss.clear_value_size =
334       isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) +
335                 RENDER_SURFACE_STATE_GreenClearColor_bits(info) +
336                 RENDER_SURFACE_STATE_BlueClearColor_bits(info) +
337                 RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8;
338 
339    dev->ss.clear_value_offset =
340       RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4;
341 
342    assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0);
343    dev->ss.addr_offset =
344       RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8;
345 
346    /* The "Auxiliary Surface Base Address" field starts a bit higher up
347     * because the bottom 12 bits are used for other things.  Round down to
348     * the nearest dword before.
349     */
350    dev->ss.aux_addr_offset =
351       (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8;
352 
353    dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4;
354    assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
355    dev->ds.depth_offset =
356       _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
357 
358    if (dev->use_separate_stencil) {
359       dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
360                       _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 +
361                       _3DSTATE_CLEAR_PARAMS_length(info) * 4;
362 
363       assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
364       dev->ds.stencil_offset =
365          _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
366          _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8;
367 
368       assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
369       dev->ds.hiz_offset =
370          _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
371          _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
372          _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
373    } else {
374       dev->ds.stencil_offset = 0;
375       dev->ds.hiz_offset = 0;
376    }
377 
378    /* From the IVB PRM, SURFACE_STATE::Height,
379     *
380     *    For typed buffer and structured buffer surfaces, the number
381     *    of entries in the buffer ranges from 1 to 2^27. For raw buffer
382     *    surfaces, the number of entries in the buffer is the number of bytes
383     *    which can range from 1 to 2^30.
384     *
385     * From the SKL PRM, SURFACE_STATE::Width/Height/Depth for RAW buffers,
386     *
387     *    Width  : bits [6:0]
388     *    Height : bits [20:7]
389     *    Depth  : bits [31:21]
390     *
391     *    So we can address 4Gb
392     *
393     * This limit is only concerned with raw buffers.
394     */
395    if (ISL_GFX_VER(dev) >= 9) {
396       dev->max_buffer_size = 1ull << 32;
397    } else if (ISL_GFX_VER(dev) >= 7) {
398       dev->max_buffer_size = 1ull << 30;
399    } else {
400       dev->max_buffer_size = 1ull << 27;
401    }
402 
403    dev->cpb.size = _3DSTATE_CPSIZE_CONTROL_BUFFER_length(info) * 4;
404    dev->cpb.offset =
405       _3DSTATE_CPSIZE_CONTROL_BUFFER_SurfaceBaseAddress_start(info) / 8;
406 
407    isl_device_setup_mocs(dev);
408 
409    dev->surf_fill_state_s = isl_surf_fill_state_s_get_func(dev);
410    dev->buffer_fill_state_s = isl_buffer_fill_state_s_get_func(dev);
411    dev->emit_depth_stencil_hiz_s = isl_emit_depth_stencil_hiz_s_get_func(dev);
412    dev->null_fill_state_s = isl_null_fill_state_s_get_func(dev);
413    dev->emit_cpb_control_s = isl_emit_cpb_control_s_get_func(dev);
414 }
415 
416 /**
417  * @brief Query the set of multisamples supported by the device.
418  *
419  * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
420  * supported.
421  */
422 isl_sample_count_mask_t ATTRIBUTE_CONST
isl_device_get_sample_counts(const struct isl_device * dev)423 isl_device_get_sample_counts(const struct isl_device *dev)
424 {
425    if (ISL_GFX_VER(dev) >= 9) {
426       return ISL_SAMPLE_COUNT_1_BIT |
427              ISL_SAMPLE_COUNT_2_BIT |
428              ISL_SAMPLE_COUNT_4_BIT |
429              ISL_SAMPLE_COUNT_8_BIT |
430              ISL_SAMPLE_COUNT_16_BIT;
431    } else if (ISL_GFX_VER(dev) >= 8) {
432       return ISL_SAMPLE_COUNT_1_BIT |
433              ISL_SAMPLE_COUNT_2_BIT |
434              ISL_SAMPLE_COUNT_4_BIT |
435              ISL_SAMPLE_COUNT_8_BIT;
436    } else if (ISL_GFX_VER(dev) >= 7) {
437       return ISL_SAMPLE_COUNT_1_BIT |
438              ISL_SAMPLE_COUNT_4_BIT |
439              ISL_SAMPLE_COUNT_8_BIT;
440    } else if (ISL_GFX_VER(dev) >= 6) {
441       return ISL_SAMPLE_COUNT_1_BIT |
442              ISL_SAMPLE_COUNT_4_BIT;
443    } else {
444       return ISL_SAMPLE_COUNT_1_BIT;
445    }
446 }
447 
448 static uint32_t
isl_get_miptail_base_row(enum isl_tiling tiling)449 isl_get_miptail_base_row(enum isl_tiling tiling)
450 {
451    /* Miptails base levels can depend on the number of samples, but since we
452     * don't support levels > 1 with multisampling, the base miptail level is
453     * really simple :
454     */
455    if (tiling == ISL_TILING_SKL_Yf ||
456        tiling == ISL_TILING_ICL_Yf)
457       return 4;
458    else
459       return 0;
460 }
461 
462 static const uint8_t skl_std_y_2d_miptail_offset_el[][5][2] = {
463 /*   128 bpb    64 bpb    32 bpb    16 bpb      8 bpb     */
464    { {32,  0}, {64,  0}, {64,  0}, {128,  0}, {128,  0} },
465    { { 0, 32}, { 0, 32}, { 0, 64}, {  0, 64}, {  0,128} },
466    { {16,  0}, {32,  0}, {32,  0}, { 64,  0}, { 64,  0} },
467    { { 0, 16}, { 0, 16}, { 0, 32}, {  0, 32}, {  0, 64} },
468    { { 8,  0}, {16,  0}, {16,  0}, { 32,  0}, { 32,  0} },
469    { { 4,  8}, { 8,  8}, { 8, 16}, { 16, 16}, { 16, 32} },
470    { { 0, 12}, { 0, 12}, { 0, 24}, {  0, 24}, {  0, 48} },
471    { { 0,  8}, { 0,  8}, { 0, 16}, {  0, 16}, {  0, 32} },
472    { { 4,  4}, { 8,  4}, { 8,  8}, { 16,  8}, { 16, 16} },
473    { { 4,  0}, { 8,  0}, { 8,  0}, { 16,  0}, { 16,  0} },
474    { { 0,  4}, { 0,  4}, { 0,  8}, {  0,  8}, {  0, 16} },
475    { { 3,  0}, { 6,  0}, { 4,  4}, {  8,  4}, {  0, 12} },
476    { { 2,  0}, { 4,  0}, { 4,  0}, {  8,  0}, {  0,  8} },
477    { { 1,  0}, { 2,  0}, { 0,  4}, {  0,  4}, {  0,  4} },
478    { { 0,  0}, { 0,  0}, { 0,  0}, {  0,  0}, {  0,  0} },
479 };
480 
481 static const uint8_t icl_std_y_2d_miptail_offset_el[][5][2] = {
482 /*   128 bpb    64 bpb    32 bpb    16 bpb      8 bpb     */
483    { {32,  0}, {64,  0}, {64,  0}, {128,  0}, {128,   0} },
484    { { 0, 32}, { 0, 32}, { 0, 64}, {  0, 64}, {  0, 128} },
485    { {16,  0}, {32,  0}, {32,  0}, { 64,  0}, { 64,   0} },
486    { { 0, 16}, { 0, 16}, { 0, 32}, {  0, 32}, {  0,  64} },
487    { { 8,  0}, {16,  0}, {16,  0}, { 32,  0}, { 32,   0} },
488    { { 4,  8}, { 8,  8}, { 8, 16}, { 16, 16}, { 16,  32} },
489    { { 0, 12}, { 0, 12}, { 0, 24}, {  0, 24}, {  0,  48} },
490    { { 0,  8}, { 0,  8}, { 0, 16}, {  0, 16}, {  0,  32} },
491    { { 4,  4}, { 8,  4}, { 8,  8}, { 16,  8}, { 16,  16} },
492    { { 4,  0}, { 8,  0}, { 8,  0}, { 16,  0}, { 16,   0} },
493    { { 0,  4}, { 0,  4}, { 0,  8}, {  0,  8}, {  0,  16} },
494    { { 0,  0}, { 0,  0}, { 0,  0}, {  0,  0}, {  0,   0} },
495    { { 1,  0}, { 2,  0}, { 0,  4}, {  0,  4}, {  0,   4} },
496    { { 2,  0}, { 4,  0}, { 4,  0}, {  8,  0}, {  0,   8} },
497    { { 3,  0}, { 6,  0}, { 4,  4}, {  8,  4}, {  0,  12} },
498 };
499 
500 static const uint8_t skl_std_y_3d_miptail_offset_el[][5][3] = {
501 /*    128 bpb     64 bpb      32 bpb        16 bpb        8 bpb      */
502    { {8, 0, 0}, {16, 0, 0}, {16,  0, 0}, {16,  0,  0}, {32,  0,  0} },
503    { {0, 8, 0}, { 0, 8, 0}, { 0, 16, 0}, { 0, 16,  0}, { 0, 16,  0} },
504    { {0, 0, 8}, { 0, 0, 8}, { 0,  0, 8}, { 0,  0, 16}, { 0,  0, 16} },
505    { {4, 0, 0}, { 8, 0, 0}, { 8,  0, 0}, { 8,  0,  0}, {16,  0,  0} },
506    { {0, 4, 0}, { 0, 4, 0}, { 0,  8, 0}, { 0,  8,  0}, { 0,  8,  0} },
507    { {0, 0, 4}, { 0, 0, 4}, { 0,  0, 4}, { 0,  0,  8}, { 0,  0,  8} },
508    { {3, 0, 0}, { 6, 0, 0}, { 4,  4, 0}, { 0,  4,  4}, { 0,  4,  4} },
509    { {2, 0, 0}, { 4, 0, 0}, { 0,  4, 0}, { 0,  4,  0}, { 0,  4,  0} },
510    { {1, 0, 3}, { 2, 0, 3}, { 4,  0, 3}, { 0,  0,  7}, { 0,  0,  7} },
511    { {1, 0, 2}, { 2, 0, 2}, { 4,  0, 2}, { 0,  0,  6}, { 0,  0,  6} },
512    { {1, 0, 1}, { 2, 0, 1}, { 4,  0, 1}, { 0,  0,  5}, { 0,  0,  5} },
513    { {1, 0, 0}, { 2, 0, 0}, { 4,  0, 0}, { 0,  0,  4}, { 0,  0,  4} },
514    { {0, 0, 3}, { 0, 0, 3}, { 0,  0, 3}, { 0,  0,  3}, { 0,  0,  3} },
515    { {0, 0, 2}, { 0, 0, 2}, { 0,  0, 2}, { 0,  0,  2}, { 0,  0,  2} },
516    { {0, 0, 1}, { 0, 0, 1}, { 0,  0, 1}, { 0,  0,  1}, { 0,  0,  1} },
517    { {0, 0, 0}, { 0, 0, 0}, { 0,  0, 0}, { 0,  0,  0}, { 0,  0,  0} },
518 };
519 
520 static const uint8_t icl_std_y_3d_miptail_offset_el[][5][3] = {
521 /*    128 bpb     64 bpb      32 bpb        16 bpb        8 bpb      */
522    { {8, 0, 0}, {16, 0, 0}, {16,  0, 0}, {16,  0,  0}, {32,  0,  0} },
523    { {0, 8, 0}, { 0, 8, 0}, { 0, 16, 0}, { 0, 16,  0}, { 0, 16,  0} },
524    { {0, 0, 8}, { 0, 0, 8}, { 0,  0, 8}, { 0,  0, 16}, { 0,  0, 16} },
525    { {4, 0, 0}, { 8, 0, 0}, { 8,  0, 0}, { 8,  0,  0}, {16,  0,  0} },
526    { {0, 4, 0}, { 0, 4, 0}, { 0,  8, 0}, { 0,  8,  0}, { 0,  8,  0} },
527    { {2, 0, 4}, { 4, 0, 4}, { 4,  0, 4}, { 4,  0,  8}, { 8,  0,  8} },
528    { {0, 2, 4}, { 0, 2, 4}, { 0,  4, 4}, { 0,  4,  8}, { 0,  4,  8} },
529    { {0, 0, 4}, { 0, 0, 4}, { 0,  0, 4}, { 0,  0,  8}, { 0,  0,  8} },
530    { {2, 2, 0}, { 4, 2, 0}, { 4,  4, 0}, { 4,  4,  0}, { 8,  4,  0} },
531    { {2, 0, 0}, { 4, 0, 0}, { 4,  0, 0}, { 4,  0,  0}, { 8,  0,  0} },
532    { {0, 2, 0}, { 0, 2, 0}, { 0,  4, 0}, { 0,  4,  0}, { 0,  4,  0} },
533    { {1, 0, 2}, { 2, 0, 2}, { 2,  0, 2}, { 2,  0,  4}, { 4,  0,  4} },
534    { {0, 0, 2}, { 0, 0, 2}, { 0,  0, 2}, { 0,  0,  4}, { 0,  0,  4} },
535    { {1, 0, 0}, { 2, 0, 0}, { 2,  0, 0}, { 2,  0,  0}, { 4,  0,  0} },
536    { {0, 0, 0}, { 0, 0, 0}, { 0,  0, 0}, { 0,  0,  0}, { 0,  0,  0} },
537 };
538 
539 static const uint8_t acm_tile64_3d_miptail_offset_el[][5][3] = {
540 /*    128 bpb     64 bpb      32 bpb        16 bpb        8 bpb      */
541    { {8, 0, 0}, {16, 0, 0}, {16,  0, 0}, {16,  0,  0}, {32,  0,  0}, },
542    { {0, 8, 0}, { 0, 8, 0}, { 0, 16, 0}, { 0, 16,  0}, { 0, 16,  0}, },
543    { {0, 0, 8}, { 0, 0, 8}, { 0,  0, 8}, { 0,  0, 16}, { 0,  0, 16}, },
544    { {4, 0, 0}, { 8, 0, 0}, { 8,  0, 0}, { 8,  0,  0}, {16,  0,  0}, },
545    { {0, 4, 0}, { 0, 4, 0}, { 0,  8, 0}, { 0,  8,  0}, { 0,  8,  0}, },
546    { {2, 0, 4}, { 4, 0, 4}, { 4,  0, 4}, { 0,  4,  8}, { 0,  4,  8}, },
547    { {1, 0, 4}, { 2, 0, 4}, { 0,  4, 4}, { 0,  0, 12}, { 0,  0, 12}, },
548    { {0, 0, 4}, { 0, 0, 4}, { 0,  0, 4}, { 0,  0,  8}, { 0,  0,  8}, },
549    { {3, 0, 0}, { 6, 0, 0}, { 4,  4, 0}, { 0,  4,  4}, { 0,  4,  4}, },
550    { {2, 0, 0}, { 4, 0, 0}, { 4,  0, 0}, { 0,  4,  0}, { 0,  4,  0}, },
551    { {1, 0, 0}, { 2, 0, 0}, { 0,  4, 0}, { 0,  0,  4}, { 0,  0,  4}, },
552    { {0, 0, 0}, { 0, 0, 0}, { 0,  0, 0}, { 0,  0,  0}, { 0,  0,  0}, },
553    { {0, 0, 1}, { 0, 0, 1}, { 0,  0, 1}, { 0,  0,  1}, { 0,  0,  1}, },
554    { {0, 0, 2}, { 0, 0, 2}, { 0,  0, 2}, { 0,  0,  2}, { 0,  0,  2}, },
555    { {0, 0, 3}, { 0, 0, 3}, { 0,  0, 3}, { 0,  0,  3}, { 0,  0,  3}, },
556 };
557 
558 static uint32_t
tiling_max_mip_tail(enum isl_tiling tiling,enum isl_surf_dim dim,uint32_t samples)559 tiling_max_mip_tail(enum isl_tiling tiling,
560                     enum isl_surf_dim dim,
561                     uint32_t samples)
562 {
563    /* In theory, miptails work for multisampled images, but we don't support
564     * mipmapped multisampling.
565     *
566     * BSpec 58770: Xe2 does not support miptails on multisampled images.
567     */
568    if (samples > 1)
569       return 0;
570 
571    int num_2d_table_rows;
572    int num_3d_table_rows;
573 
574    switch (tiling) {
575    case ISL_TILING_LINEAR:
576    case ISL_TILING_X:
577    case ISL_TILING_Y0:
578    case ISL_TILING_4:
579    case ISL_TILING_W:
580    case ISL_TILING_HIZ:
581    case ISL_TILING_CCS:
582    case ISL_TILING_GFX12_CCS:
583       /* There is no miptail for those tilings */
584       return 0;
585 
586    case ISL_TILING_SKL_Yf:
587    case ISL_TILING_SKL_Ys:
588       /* SKL PRMs, Volume 5: Memory Views :
589        *
590        * Given by the last row of the table in the following sections:
591        *
592        *    - Tiling and Mip Tail for 1D Surfaces
593        *    - Tiling and Mip Tail for 2D Surfaces
594        *    - Tiling and Mip Tail for 3D Surfaces
595        */
596       num_2d_table_rows = ARRAY_SIZE(skl_std_y_2d_miptail_offset_el);
597       num_3d_table_rows = ARRAY_SIZE(skl_std_y_3d_miptail_offset_el);
598       break;
599 
600    case ISL_TILING_ICL_Yf:
601    case ISL_TILING_ICL_Ys:
602       /* ICL PRMs, Volume 5: Memory Views :
603        *
604        *    - Tiling and Mip Tail for 1D Surfaces :
605        *        "There is no MIP Tail allowed for 1D surfaces because they are
606        *         not allowed to be tiled. They must be declared as linear."
607        *    - Tiling and Mip Tail for 2D Surfaces
608        *    - Tiling and Mip Tail for 3D Surfaces
609        */
610       num_2d_table_rows = ARRAY_SIZE(icl_std_y_2d_miptail_offset_el);
611       num_3d_table_rows = ARRAY_SIZE(icl_std_y_3d_miptail_offset_el);
612       break;
613 
614    case ISL_TILING_64:
615    case ISL_TILING_64_XE2:
616       /* ATS-M PRMS, Volume 5: Memory Data Formats :
617        *
618        *    - Tiling and Mip Tail for 1D Surfaces :
619        *       "There is no MIP Tail allowed for 1D surfaces because they are
620        *        not allowed to be tiled. They must be declared as linear."
621        *    - Tiling and Mip Tail for 2D Surfaces
622        *    - Tiling and Mip Tail for 3D Surfaces
623        */
624       num_2d_table_rows = ARRAY_SIZE(icl_std_y_2d_miptail_offset_el);
625       num_3d_table_rows = ARRAY_SIZE(acm_tile64_3d_miptail_offset_el);
626       break;
627 
628    default:
629       unreachable("Invalid tiling");
630    }
631 
632    assert(dim != ISL_SURF_DIM_1D);
633    const int num_rows = dim == ISL_SURF_DIM_2D ? num_2d_table_rows :
634                                                  num_3d_table_rows;
635    return num_rows - isl_get_miptail_base_row(tiling);
636 }
637 
638 /**
639  * Returns an isl_tile_info representation of the given isl_tiling when
640  * combined when used in the given configuration.
641  *
642  * :param tiling:       |in|  The tiling format to introspect
643  * :param dim:          |in|  The dimensionality of the surface being tiled
644  * :param msaa_layout:  |in|  The layout of samples in the surface being tiled
645  * :param format_bpb:   |in|  The number of bits per surface element (block) for
646  *                            the surface being tiled
647  * :param samples:      |in|  The samples in the surface being tiled
648  * :param tile_info:    |out| Return parameter for the tiling information
649  */
650 void
isl_tiling_get_info(enum isl_tiling tiling,enum isl_surf_dim dim,enum isl_msaa_layout msaa_layout,uint32_t format_bpb,uint32_t samples,struct isl_tile_info * tile_info)651 isl_tiling_get_info(enum isl_tiling tiling,
652                     enum isl_surf_dim dim,
653                     enum isl_msaa_layout msaa_layout,
654                     uint32_t format_bpb,
655                     uint32_t samples,
656                     struct isl_tile_info *tile_info)
657 {
658    const uint32_t bs = format_bpb / 8;
659    struct isl_extent4d logical_el;
660    struct isl_extent2d phys_B;
661 
662    if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
663       /* It is possible to have non-power-of-two formats in a tiled buffer.
664        * The easiest way to handle this is to treat the tile as if it is three
665        * times as wide.  This way no pixel will ever cross a tile boundary.
666        * This really only works on a subset of tiling formats.
667        */
668       assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0 ||
669              tiling == ISL_TILING_4);
670       assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
671       isl_tiling_get_info(tiling, dim, msaa_layout, format_bpb / 3, samples,
672                           tile_info);
673       return;
674    }
675 
676    switch (tiling) {
677    case ISL_TILING_LINEAR:
678       assert(bs > 0);
679       logical_el = isl_extent4d(1, 1, 1, 1);
680       phys_B = isl_extent2d(bs, 1);
681       break;
682 
683    case ISL_TILING_X:
684       assert(bs > 0);
685       logical_el = isl_extent4d(512 / bs, 8, 1, 1);
686       phys_B = isl_extent2d(512, 8);
687       break;
688 
689    case ISL_TILING_Y0:
690    case ISL_TILING_4:
691       assert(bs > 0);
692       logical_el = isl_extent4d(128 / bs, 32, 1, 1);
693       phys_B = isl_extent2d(128, 32);
694       break;
695 
696    case ISL_TILING_W:
697       assert(bs == 1);
698       logical_el = isl_extent4d(64, 64, 1, 1);
699       /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
700        *
701        *    "If the surface is a stencil buffer (and thus has Tile Mode set
702        *    to TILEMODE_WMAJOR), the pitch must be set to 2x the value
703        *    computed based on width, as the stencil buffer is stored with two
704        *    rows interleaved."
705        *
706        * This, together with the fact that stencil buffers are referred to as
707        * being Y-tiled in the PRMs for older hardware implies that the
708        * physical size of a W-tile is actually the same as for a Y-tile.
709        */
710       phys_B = isl_extent2d(128, 32);
711       break;
712 
713    case ISL_TILING_SKL_Yf:
714    case ISL_TILING_SKL_Ys:
715    case ISL_TILING_ICL_Yf:
716    case ISL_TILING_ICL_Ys: {
717       bool is_Ys = tiling == ISL_TILING_SKL_Ys ||
718                    tiling == ISL_TILING_ICL_Ys;
719       assert(format_bpb >= 8);
720 
721       switch (dim) {
722       case ISL_SURF_DIM_2D:
723          /* See the BSpec Memory Data Formats » Common Surface Formats »
724           * Surface Layout and Tiling [SKL+] » 2D Surfaces SKL+ » 2D/CUBE
725           * Alignment Requirement [SKL+]
726           *
727           * Or, look in the SKL PRM under Memory Views > Common Surface
728           * Formats > Surface Layout and Tiling > 2D Surfaces > 2D/CUBE
729           * Alignment Requirements.
730           */
731          logical_el = (struct isl_extent4d) {
732             .w = 1 << (6 - ((ffs(format_bpb) - 4) / 2) + (2 * is_Ys)),
733             .h = 1 << (6 - ((ffs(format_bpb) - 3) / 2) + (2 * is_Ys)),
734             .d = 1,
735             .a = 1,
736          };
737 
738          if (samples > 1 && tiling != ISL_TILING_SKL_Yf) {
739             /* SKL PRMs, Volume 5: Memory Views, 2D/CUBE Alignment
740              * Requirement:
741              *
742              *    "For MSFMT_MSS type multi-sampled TileYS surfaces, the
743              *     alignments given above must be divided by the appropriate
744              *     value from the table below."
745              *
746              * The formulas below reproduce those values.
747              */
748             if (msaa_layout == ISL_MSAA_LAYOUT_ARRAY) {
749                logical_el.w >>= (ffs(samples) - 0) / 2;
750                logical_el.h >>= (ffs(samples) - 1) / 2;
751                logical_el.a = samples;
752             }
753          }
754          break;
755 
756       case ISL_SURF_DIM_3D:
757          /* See the BSpec Memory Data Formats » Common Surface Formats »
758           * Surface Layout and Tiling [SKL+] » 3D Surfaces SKL+ » 3D Alignment
759           * Requirements [SKL+]
760           *
761           * Or, look in the SKL PRM under Memory Views > Common Surface
762           * Formats > Surface Layout and Tiling > 3D Surfaces > 3D Alignment
763           * Requirements.
764           */
765          logical_el = (struct isl_extent4d) {
766             .w = 1 << (4 - ((ffs(format_bpb) - 2) / 3) + (2 * is_Ys)),
767             .h = 1 << (4 - ((ffs(format_bpb) - 4) / 3) + (1 * is_Ys)),
768             .d = 1 << (4 - ((ffs(format_bpb) - 3) / 3) + (1 * is_Ys)),
769             .a = 1,
770          };
771          break;
772       default:
773          unreachable("Invalid dimension");
774       }
775 
776       uint32_t tile_size_B = is_Ys ? (1 << 16) : (1 << 12);
777 
778       phys_B.w = logical_el.width * bs;
779       phys_B.h = tile_size_B / phys_B.w;
780       break;
781    }
782    case ISL_TILING_64:
783       /* The tables below are taken from the "2D Surfaces" & "3D Surfaces"
784        * pages in the Bspec which are formulated in terms of the Cv and Cu
785        * constants. This is different from the tables in the "Tile64 Format"
786        * page which should be equivalent but are usually in terms of pixels.
787        * Also note that Cv and Cu are HxW order to match the Bspec table, not
788        * WxH order like you might expect.
789        *
790        * From the Bspec's or ATS-M PRMs Volume 5: Memory Data Formats, "Tile64
791        * Format" :
792        *
793        *    MSAA Depth/Stencil surface use IMS (Interleaved Multi Samples)
794        *    which means:
795        *
796        *    - Use the 1X MSAA (non-MSRT) version of the Tile64 equations and
797        *      let the client unit do the swizzling internally
798        *
799        * Surfaces using the IMS layout will use the mapping for 1x MSAA.
800        */
801 #define tile_extent2d(bs, cv, cu, a) \
802       isl_extent4d((1 << cu) / bs, 1 << cv, 1, a)
803 #define tile_extent3d(bs, cr, cv, cu) \
804       isl_extent4d((1 << cu) / bs, 1 << cv, 1 << cr, 1)
805 
806       if (dim == ISL_SURF_DIM_3D) {
807           switch (format_bpb) {
808           case 128: logical_el = tile_extent3d(bs, 4, 4, 8); break;
809           case  64: logical_el = tile_extent3d(bs, 4, 4, 8); break;
810           case  32: logical_el = tile_extent3d(bs, 4, 5, 7); break;
811           case  16: logical_el = tile_extent3d(bs, 5, 5, 6); break;
812           case   8: logical_el = tile_extent3d(bs, 5, 5, 6); break;
813           default: unreachable("Unsupported format size for 3D");
814           }
815       } else {
816           if (samples == 1 || msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
817               switch (format_bpb) {
818               case 128: logical_el = tile_extent2d(bs, 6, 10, 1); break;
819               case  64: logical_el = tile_extent2d(bs, 6, 10, 1); break;
820               case  32: logical_el = tile_extent2d(bs, 7,  9, 1); break;
821               case  16: logical_el = tile_extent2d(bs, 7,  9, 1); break;
822               case   8: logical_el = tile_extent2d(bs, 8,  8, 1); break;
823               default: unreachable("Unsupported format size.");
824               }
825           } else if (samples == 2) {
826               switch (format_bpb) {
827               case 128: logical_el = tile_extent2d(bs, 6,  9, 2); break;
828               case  64: logical_el = tile_extent2d(bs, 6,  9, 2); break;
829               case  32: logical_el = tile_extent2d(bs, 7,  8, 2); break;
830               case  16: logical_el = tile_extent2d(bs, 7,  8, 2); break;
831               case   8: logical_el = tile_extent2d(bs, 8,  7, 2); break;
832               default: unreachable("Unsupported format size.");
833               }
834           } else {
835               switch (format_bpb) {
836               case 128: logical_el = tile_extent2d(bs, 5,  9, 4); break;
837               case  64: logical_el = tile_extent2d(bs, 5,  9, 4); break;
838               case  32: logical_el = tile_extent2d(bs, 6,  8, 4); break;
839               case  16: logical_el = tile_extent2d(bs, 6,  8, 4); break;
840               case   8: logical_el = tile_extent2d(bs, 7,  7, 4); break;
841               default: unreachable("Unsupported format size.");
842               }
843           }
844       }
845 
846 #undef tile_extent2d
847 #undef tile_extent3d
848 
849       phys_B.w = logical_el.w * bs;
850       phys_B.h = 64 * 1024 / phys_B.w;
851       break;
852 
853    case ISL_TILING_64_XE2:
854       /* The tables below are taken from BSpec 58767 which are formulated in
855        * terms of the Cv and Cu constants. This is different from the tables in
856        * the "Tile64 Format" page which should be equivalent but are usually in
857        * terms of pixels.
858        *
859        * Also note that Cv and Cu are HxW order to match the Bspec table, not
860        * WxH order like you might expect.
861        */
862 #define tile_extent2d(bs, cv, cu, a) \
863       isl_extent4d((1 << cu) / bs, 1 << cv, 1, a)
864 #define tile_extent3d(bs, cr, cv, cu) \
865       isl_extent4d((1 << cu) / bs, 1 << cv, 1 << cr, 1)
866 
867       if (dim == ISL_SURF_DIM_3D) {
868           switch (format_bpb) {
869           case 128: logical_el = tile_extent3d(bs, 4, 4, 8); break;
870           case  64: logical_el = tile_extent3d(bs, 4, 4, 8); break;
871           case  32: logical_el = tile_extent3d(bs, 4, 5, 7); break;
872           case  16: logical_el = tile_extent3d(bs, 5, 5, 6); break;
873           case   8: logical_el = tile_extent3d(bs, 5, 5, 6); break;
874           default: unreachable("Unsupported format size for 3D");
875           }
876       } else {
877           if (samples == 1 || msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
878               switch (format_bpb) {
879               case 128: logical_el = tile_extent2d(bs, 6, 10, 1); break;
880               case  64: logical_el = tile_extent2d(bs, 6, 10, 1); break;
881               case  32: logical_el = tile_extent2d(bs, 7,  9, 1); break;
882               case  16: logical_el = tile_extent2d(bs, 7,  9, 1); break;
883               case   8: logical_el = tile_extent2d(bs, 8,  8, 1); break;
884               default: unreachable("Unsupported format size.");
885               }
886           } else if (samples == 2) {
887               switch (format_bpb) {
888               case 128: logical_el = tile_extent2d(bs, 5, 10, 2); break;
889               case  64: logical_el = tile_extent2d(bs, 6,  9, 2); break;
890               case  32: logical_el = tile_extent2d(bs, 7,  8, 2); break;
891               case  16: logical_el = tile_extent2d(bs, 7,  8, 2); break;
892               case   8: logical_el = tile_extent2d(bs, 8,  7, 2); break;
893               default: unreachable("Unsupported format size.");
894               }
895           } else if (samples == 4) {
896               switch (format_bpb) {
897               case 128: logical_el = tile_extent2d(bs, 5,  9, 4); break;
898               case  64: logical_el = tile_extent2d(bs, 5,  9, 4); break;
899               case  32: logical_el = tile_extent2d(bs, 6,  8, 4); break;
900               case  16: logical_el = tile_extent2d(bs, 6,  8, 4); break;
901               case   8: logical_el = tile_extent2d(bs, 7,  7, 4); break;
902               default: unreachable("Unsupported format size.");
903               }
904           } else if (samples == 8) {
905               switch (format_bpb) {
906               case 128: logical_el = tile_extent2d(bs, 5,  8, 8); break;
907               case  64: logical_el = tile_extent2d(bs, 5,  8, 8); break;
908               case  32: logical_el = tile_extent2d(bs, 5,  8, 8); break;
909               case  16: logical_el = tile_extent2d(bs, 6,  7, 8); break;
910               case   8: logical_el = tile_extent2d(bs, 6,  7, 8); break;
911               default: unreachable("Unsupported format size.");
912               }
913           } else if (samples == 16) {
914               switch (format_bpb) {
915               case 128: logical_el = tile_extent2d(bs, 4,  8, 16); break;
916               case  64: logical_el = tile_extent2d(bs, 5,  7, 16); break;
917               case  32: logical_el = tile_extent2d(bs, 5,  7, 16); break;
918               case  16: logical_el = tile_extent2d(bs, 5,  7, 16); break;
919               case   8: logical_el = tile_extent2d(bs, 6,  6, 16); break;
920               default: unreachable("Unsupported format size.");
921               }
922           }
923       }
924 
925 #undef tile_extent2d
926 #undef tile_extent3d
927 
928       phys_B.w = logical_el.w * bs;
929       phys_B.h = 64 * 1024 / phys_B.w;
930       break;
931 
932    case ISL_TILING_HIZ:
933       /* HiZ buffers are required to have a 128bpb HiZ format. The tiling has
934        * the same physical dimensions as Y-tiling but actually has two HiZ
935        * columns per Y-tiled column.
936        */
937       assert(bs == 16);
938       logical_el = isl_extent4d(16, 16, 1, 1);
939       phys_B = isl_extent2d(128, 32);
940       break;
941 
942    case ISL_TILING_CCS:
943       /* CCS surfaces are required to have one of the GENX_CCS_* formats which
944        * have a block size of 1 or 2 bits per block and each CCS element
945        * corresponds to one cache-line pair in the main surface.  From the Sky
946        * Lake PRM Vol. 12 in the section on planes:
947        *
948        *    "The Color Control Surface (CCS) contains the compression status
949        *    of the cache-line pairs. The compression state of the cache-line
950        *    pair is specified by 2 bits in the CCS.  Each CCS cache-line
951        *    represents an area on the main surface of 16x16 sets of 128 byte
952        *    Y-tiled cache-line-pairs. CCS is always Y tiled."
953        *
954        * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines.
955        * Since each cache line corresponds to a 16x16 set of cache-line pairs,
956        * that yields total tile area of 128x128 cache-line pairs or CCS
957        * elements.  On older hardware, each CCS element is 1 bit and the tile
958        * is 128x256 elements.
959        */
960       assert(format_bpb == 1 || format_bpb == 2);
961       logical_el = isl_extent4d(128, 256 / format_bpb, 1, 1);
962       phys_B = isl_extent2d(128, 32);
963       break;
964 
965    case ISL_TILING_GFX12_CCS:
966       /* From the Bspec, Gen Graphics > Gfx12 > Memory Data Formats > Memory
967        * Compression > Memory Compression - Gfx12:
968        *
969        *    4 bits of auxiliary plane data are required for 2 cachelines of
970        *    main surface data. This results in a single cacheline of auxiliary
971        *    plane data mapping to 4 4K pages of main surface data for the 4K
972        *    pages (tile Y ) and 1 64K Tile Ys page.
973        *
974        * The Y-tiled pairing bit of 9 shown in the table below that Bspec
975        * section expresses that the 2 cachelines of main surface data are
976        * horizontally adjacent.
977        *
978        * TODO: Handle Ys, Yf and their pairing bits.
979        *
980        * Therefore, each CCS cacheline represents a 512Bx32 row area and each
981        * element represents a 32Bx4 row area.
982        */
983       assert(format_bpb == 4);
984       logical_el = isl_extent4d(16, 8, 1, 1);
985       phys_B = isl_extent2d(64, 1);
986       break;
987 
988    default:
989       unreachable("not reached");
990    } /* end switch */
991 
992    *tile_info = (struct isl_tile_info) {
993       .tiling = tiling,
994       .format_bpb = format_bpb,
995       .logical_extent_el = logical_el,
996       .phys_extent_B = phys_B,
997       .max_miptail_levels = tiling_max_mip_tail(tiling, dim, samples),
998    };
999 }
1000 
1001 bool
isl_color_value_is_zero(union isl_color_value value,enum isl_format format)1002 isl_color_value_is_zero(union isl_color_value value,
1003                         enum isl_format format)
1004 {
1005    const struct isl_format_layout *fmtl = isl_format_get_layout(format);
1006 
1007 #define RETURN_FALSE_IF_NOT_0(c, i) \
1008    if (fmtl->channels.c.bits && value.u32[i] != 0) \
1009       return false
1010 
1011    RETURN_FALSE_IF_NOT_0(r, 0);
1012    RETURN_FALSE_IF_NOT_0(g, 1);
1013    RETURN_FALSE_IF_NOT_0(b, 2);
1014    RETURN_FALSE_IF_NOT_0(a, 3);
1015 
1016 #undef RETURN_FALSE_IF_NOT_0
1017 
1018    return true;
1019 }
1020 
1021 bool
isl_color_value_is_zero_one(union isl_color_value value,enum isl_format format)1022 isl_color_value_is_zero_one(union isl_color_value value,
1023                             enum isl_format format)
1024 {
1025    const struct isl_format_layout *fmtl = isl_format_get_layout(format);
1026 
1027 #define RETURN_FALSE_IF_NOT_0_1(c, i, field) \
1028    if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \
1029       return false
1030 
1031    if (isl_format_has_int_channel(format)) {
1032       RETURN_FALSE_IF_NOT_0_1(r, 0, u32);
1033       RETURN_FALSE_IF_NOT_0_1(g, 1, u32);
1034       RETURN_FALSE_IF_NOT_0_1(b, 2, u32);
1035       RETURN_FALSE_IF_NOT_0_1(a, 3, u32);
1036    } else {
1037       RETURN_FALSE_IF_NOT_0_1(r, 0, f32);
1038       RETURN_FALSE_IF_NOT_0_1(g, 1, f32);
1039       RETURN_FALSE_IF_NOT_0_1(b, 2, f32);
1040       RETURN_FALSE_IF_NOT_0_1(a, 3, f32);
1041    }
1042 
1043 #undef RETURN_FALSE_IF_NOT_0_1
1044 
1045    return true;
1046 }
1047 
1048 /**
1049  * @param[out] tiling is set only on success
1050  */
1051 static bool
isl_surf_choose_tiling(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_tiling * tiling)1052 isl_surf_choose_tiling(const struct isl_device *dev,
1053                        const struct isl_surf_init_info *restrict info,
1054                        enum isl_tiling *tiling)
1055 {
1056    isl_tiling_flags_t tiling_flags = info->tiling_flags;
1057 
1058    /* HiZ surfaces always use the HiZ tiling */
1059    if (info->usage & ISL_SURF_USAGE_HIZ_BIT) {
1060       assert(isl_format_is_hiz(info->format));
1061       assert(tiling_flags == ISL_TILING_HIZ_BIT);
1062       *tiling = isl_tiling_flag_to_enum(tiling_flags);
1063       return true;
1064    }
1065 
1066    /* CCS surfaces always use the CCS tiling */
1067    if (info->usage & ISL_SURF_USAGE_CCS_BIT) {
1068       assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS);
1069       UNUSED bool ivb_ccs = ISL_GFX_VER(dev) < 12 &&
1070                             tiling_flags == ISL_TILING_CCS_BIT;
1071       UNUSED bool tgl_ccs = ISL_GFX_VER(dev) >= 12 &&
1072                             tiling_flags == ISL_TILING_GFX12_CCS_BIT;
1073       assert(ivb_ccs != tgl_ccs);
1074       *tiling = isl_tiling_flag_to_enum(tiling_flags);
1075       return true;
1076    }
1077 
1078    if (ISL_GFX_VERX10(dev) >= 125) {
1079       isl_gfx125_filter_tiling(dev, info, &tiling_flags);
1080    } else if (ISL_GFX_VER(dev) >= 6) {
1081       isl_gfx6_filter_tiling(dev, info, &tiling_flags);
1082    } else {
1083       isl_gfx4_filter_tiling(dev, info, &tiling_flags);
1084    }
1085 
1086    #define CHOOSE(__tiling) \
1087       do { \
1088          if (tiling_flags & (1u << (__tiling))) { \
1089             *tiling = (__tiling); \
1090             return true; \
1091           } \
1092       } while (0)
1093 
1094    /* Of the tiling modes remaining, choose the one that offers the best
1095     * performance.
1096     */
1097 
1098    if (info->dim == ISL_SURF_DIM_1D) {
1099       /* Prefer linear for 1D surfaces because they do not benefit from
1100        * tiling. To the contrary, tiling leads to wasted memory and poor
1101        * memory locality due to the swizzling and alignment restrictions
1102        * required in tiled surfaces.
1103        */
1104       CHOOSE(ISL_TILING_LINEAR);
1105    }
1106 
1107    /* For sparse images, prefer the formats that use the standard block
1108     * shapes.
1109     */
1110    if (info->usage & ISL_SURF_USAGE_SPARSE_BIT) {
1111       CHOOSE(ISL_GFX_VER(dev) >= 20 ? ISL_TILING_64_XE2 : ISL_TILING_64);
1112       CHOOSE(ISL_TILING_ICL_Ys);
1113       CHOOSE(ISL_TILING_SKL_Ys);
1114    }
1115 
1116    /* Choose suggested 4K tilings first, then 64K tilings:
1117     *
1118     * Then following quotes can be found in the SKL PRMs,
1119     *   Volume 5: Memory Views, Address Tiling Function Introduction
1120     * and from the ATS-M PRMs,
1121     *   Volume 5: Memory Data Formats, Address Tiling Function Introduction
1122     *
1123     *    "TileY: Used for most tiled surfaces when TR_MODE=TR_NONE."
1124     *    "Tile4: 4KB tiling mode based on previously-supported TileY"
1125     *    "TileYF: 4KB tiling mode based on TileY"
1126     *    "TileYS: 64KB tiling mode based on TileY"
1127     *    "Tile64: 64KB tiling mode which support standard-tiling including
1128     *     Mip Tails"
1129     *
1130     * When TileYF and TileYS are used TR_MODE != TR_NONE.
1131     */
1132    CHOOSE(ISL_TILING_Y0);
1133    CHOOSE(ISL_TILING_4);
1134    CHOOSE(ISL_TILING_SKL_Yf);
1135    CHOOSE(ISL_TILING_ICL_Yf);
1136    CHOOSE(ISL_TILING_SKL_Ys);
1137    CHOOSE(ISL_TILING_ICL_Ys);
1138    CHOOSE(ISL_GFX_VER(dev) >= 20 ? ISL_TILING_64_XE2 : ISL_TILING_64);
1139 
1140    CHOOSE(ISL_TILING_X);
1141    CHOOSE(ISL_TILING_W);
1142    CHOOSE(ISL_TILING_LINEAR);
1143 
1144    #undef CHOOSE
1145 
1146    /* No tiling mode accommodates the inputs. */
1147    assert(tiling_flags == 0);
1148    return notify_failure(info, "no supported tiling");
1149 }
1150 
1151 static bool
isl_choose_msaa_layout(const struct isl_device * dev,const struct isl_surf_init_info * info,enum isl_tiling tiling,enum isl_msaa_layout * msaa_layout)1152 isl_choose_msaa_layout(const struct isl_device *dev,
1153                  const struct isl_surf_init_info *info,
1154                  enum isl_tiling tiling,
1155                  enum isl_msaa_layout *msaa_layout)
1156 {
1157    if (ISL_GFX_VER(dev) >= 8) {
1158       return isl_gfx8_choose_msaa_layout(dev, info, tiling, msaa_layout);
1159    } else if (ISL_GFX_VER(dev) >= 7) {
1160       return isl_gfx7_choose_msaa_layout(dev, info, tiling, msaa_layout);
1161    } else if (ISL_GFX_VER(dev) >= 6) {
1162       return isl_gfx6_choose_msaa_layout(dev, info, tiling, msaa_layout);
1163    } else {
1164       return isl_gfx4_choose_msaa_layout(dev, info, tiling, msaa_layout);
1165    }
1166 }
1167 
1168 struct isl_extent2d
isl_get_interleaved_msaa_px_size_sa(uint32_t samples)1169 isl_get_interleaved_msaa_px_size_sa(uint32_t samples)
1170 {
1171    assert(isl_is_pow2(samples));
1172 
1173    /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
1174     * Sizes (p133):
1175     *
1176     *    If the surface is multisampled and it is a depth or stencil surface
1177     *    or Multisampled Surface StorageFormat in SURFACE_STATE is
1178     *    MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
1179     *    proceeding: [...]
1180     */
1181    return (struct isl_extent2d) {
1182       .width = 1 << ((ffs(samples) - 0) / 2),
1183       .height = 1 << ((ffs(samples) - 1) / 2),
1184    };
1185 }
1186 
1187 static void
isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,uint32_t * width,uint32_t * height)1188 isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
1189                                     uint32_t *width, uint32_t *height)
1190 {
1191    const struct isl_extent2d px_size_sa =
1192       isl_get_interleaved_msaa_px_size_sa(samples);
1193 
1194    if (width)
1195       *width = isl_align(*width, 2) * px_size_sa.width;
1196    if (height)
1197       *height = isl_align(*height, 2) * px_size_sa.height;
1198 }
1199 
1200 static enum isl_array_pitch_span
isl_choose_array_pitch_span(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_dim_layout dim_layout,const struct isl_extent4d * phys_level0_sa)1201 isl_choose_array_pitch_span(const struct isl_device *dev,
1202                             const struct isl_surf_init_info *restrict info,
1203                             enum isl_dim_layout dim_layout,
1204                             const struct isl_extent4d *phys_level0_sa)
1205 {
1206    switch (dim_layout) {
1207    case ISL_DIM_LAYOUT_GFX9_1D:
1208    case ISL_DIM_LAYOUT_GFX4_2D:
1209       if (ISL_GFX_VER(dev) >= 8) {
1210          /* QPitch becomes programmable in Broadwell. So choose the
1211           * most compact QPitch possible in order to conserve memory.
1212           *
1213           * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
1214           * >> RENDER_SURFACE_STATE Surface QPitch (p325):
1215           *
1216           *    - Software must ensure that this field is set to a value
1217           *      sufficiently large such that the array slices in the surface
1218           *      do not overlap. Refer to the Memory Data Formats section for
1219           *      information on how surfaces are stored in memory.
1220           *
1221           *    - This field specifies the distance in rows between array
1222           *      slices.  It is used only in the following cases:
1223           *
1224           *          - Surface Array is enabled OR
1225           *          - Number of Mulitsamples is not NUMSAMPLES_1 and
1226           *            Multisampled Surface Storage Format set to MSFMT_MSS OR
1227           *          - Surface Type is SURFTYPE_CUBE
1228           */
1229          return ISL_ARRAY_PITCH_SPAN_COMPACT;
1230       } else if (ISL_GFX_VER(dev) >= 7) {
1231          /* Note that Ivybridge introduces
1232           * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
1233           * driver more control over the QPitch.
1234           */
1235 
1236          if (phys_level0_sa->array_len == 1) {
1237             /* The hardware will never use the QPitch. So choose the most
1238              * compact QPitch possible in order to conserve memory.
1239              */
1240             return ISL_ARRAY_PITCH_SPAN_COMPACT;
1241          }
1242 
1243          if (isl_surf_usage_is_depth_or_stencil(info->usage) ||
1244              (info->usage & ISL_SURF_USAGE_HIZ_BIT)) {
1245             /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
1246              * Section 6.18.4.7: Surface Arrays (p112):
1247              *
1248              *    If Surface Array Spacing is set to ARYSPC_FULL (note that
1249              *    the depth buffer and stencil buffer have an implied value of
1250              *    ARYSPC_FULL):
1251              */
1252             return ISL_ARRAY_PITCH_SPAN_FULL;
1253          }
1254 
1255          if (info->levels == 1) {
1256             /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
1257              * to ARYSPC_LOD0.
1258              */
1259             return ISL_ARRAY_PITCH_SPAN_COMPACT;
1260          }
1261 
1262          return ISL_ARRAY_PITCH_SPAN_FULL;
1263       } else if ((ISL_GFX_VER(dev) == 5 || ISL_GFX_VER(dev) == 6) &&
1264                  ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
1265                  isl_surf_usage_is_stencil(info->usage)) {
1266          /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
1267           * Graphics Core >> Section 7.18.3.7: Surface Arrays:
1268           *
1269           *    The separate stencil buffer does not support mip mapping, thus
1270           *    the storage for LODs other than LOD 0 is not needed.
1271           */
1272          assert(info->levels == 1);
1273          return ISL_ARRAY_PITCH_SPAN_COMPACT;
1274       } else {
1275          if ((ISL_GFX_VER(dev) == 5 || ISL_GFX_VER(dev) == 6) &&
1276              ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
1277              isl_surf_usage_is_stencil(info->usage)) {
1278             /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
1279              * Graphics Core >> Section 7.18.3.7: Surface Arrays:
1280              *
1281              *    The separate stencil buffer does not support mip mapping,
1282              *    thus the storage for LODs other than LOD 0 is not needed.
1283              */
1284             assert(info->levels == 1);
1285             assert(phys_level0_sa->array_len == 1);
1286             return ISL_ARRAY_PITCH_SPAN_COMPACT;
1287          }
1288 
1289          if (phys_level0_sa->array_len == 1) {
1290             /* The hardware will never use the QPitch. So choose the most
1291              * compact QPitch possible in order to conserve memory.
1292              */
1293             return ISL_ARRAY_PITCH_SPAN_COMPACT;
1294          }
1295 
1296          return ISL_ARRAY_PITCH_SPAN_FULL;
1297       }
1298 
1299    case ISL_DIM_LAYOUT_GFX4_3D:
1300       /* The hardware will never use the QPitch. So choose the most
1301        * compact QPitch possible in order to conserve memory.
1302        */
1303       return ISL_ARRAY_PITCH_SPAN_COMPACT;
1304 
1305    case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
1306       /* Each array image in the gfx6 stencil of HiZ surface is compact in the
1307        * sense that every LOD is a compact array of the same size as LOD0.
1308        */
1309       return ISL_ARRAY_PITCH_SPAN_COMPACT;
1310    }
1311 
1312    unreachable("bad isl_dim_layout");
1313    return ISL_ARRAY_PITCH_SPAN_FULL;
1314 }
1315 
1316 static void
isl_choose_image_alignment_el(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_tiling tiling,enum isl_dim_layout dim_layout,enum isl_msaa_layout msaa_layout,struct isl_extent3d * image_align_el)1317 isl_choose_image_alignment_el(const struct isl_device *dev,
1318                               const struct isl_surf_init_info *restrict info,
1319                               enum isl_tiling tiling,
1320                               enum isl_dim_layout dim_layout,
1321                               enum isl_msaa_layout msaa_layout,
1322                               struct isl_extent3d *image_align_el)
1323 {
1324    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1325    if (fmtl->txc == ISL_TXC_MCS) {
1326       /*
1327        * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
1328        *
1329        * Height, width, and layout of MCS buffer in this case must match with
1330        * Render Target height, width, and layout. MCS buffer is tiledY.
1331        *
1332        * Pick a vertical and horizontal alignment that matches the main render
1333        * target. Vertical alignment is important for properly spacing an array
1334        * of MCS images. Horizontal alignment is not expected to matter because
1335        * MCS is not mipmapped. Regardless, we pick a valid value here.
1336        */
1337       if (ISL_GFX_VERX10(dev) >= 125) {
1338          *image_align_el = isl_extent3d(128 * 8 / fmtl->bpb, 4, 1);
1339       } else if (ISL_GFX_VER(dev) >= 8) {
1340          *image_align_el = isl_extent3d(16, 4, 1);
1341       } else {
1342          *image_align_el = isl_extent3d(4, 4, 1);
1343       }
1344       return;
1345    } else if (fmtl->txc == ISL_TXC_HIZ) {
1346       assert(ISL_GFX_VER(dev) >= 6);
1347       if (ISL_GFX_VER(dev) == 6) {
1348          /* HiZ surfaces on Sandy Bridge are packed tightly. */
1349          *image_align_el = isl_extent3d(1, 1, 1);
1350       } else if (ISL_GFX_VER(dev) < 12) {
1351          /* On gfx7+, HiZ surfaces are always aligned to 16x8 pixels in the
1352           * primary surface which works out to 2x2 HiZ elements.
1353           */
1354          *image_align_el = isl_extent3d(2, 2, 1);
1355       } else {
1356          /* We choose the alignments based on the docs and what we've seen on
1357           * prior platforms. From the TGL PRM Vol. 9, "Hierarchical Depth
1358           * Buffer":
1359           *
1360           *    The height and width of the hierarchical depth buffer that must
1361           *    be allocated are computed by the following formulas, where HZ
1362           *    is the hierarchical depth buffer and Z is the depth buffer. The
1363           *    Z_Height, Z_Width, and Z_Depth values given in these formulas
1364           *    are those present in 3DSTATE_DEPTH_BUFFER incremented by one.
1365           *
1366           * The note about 3DSTATE_DEPTH_BUFFER tells us that the dimensions
1367           * in the following formula refers to the base level. The key formula
1368           * for the horizontal alignment is:
1369           *
1370           *    HZ_Width (bytes) [=]
1371           *    ceiling(Z_Width / 16) * 16
1372           *
1373           * This type of formula is used when sizing compression blocks. So,
1374           * the docs seem to say that the HiZ format has a block width of 16,
1375           * and thus, the surface has a minimum horizontal alignment of 16
1376           * pixels. This formula hasn't changed from prior platforms (where
1377           * we've chosen a horizontal alignment of 16), so we should be on the
1378           * right track. As for the vertical alignment, we're told:
1379           *
1380           *    To compute the minimum QPitch for the HZ surface, the height of
1381           *    each LOD in pixels is determined using the equations for hL in
1382           *    the GPU Overview volume, using a vertical alignment j=16.
1383           *
1384           * We're not calculating the QPitch right now, but the vertical
1385           * alignment is plainly given as 16 rows in the depth buffer.
1386           *
1387           * As a result, we believe that HiZ surfaces are aligned to 16x16
1388           * pixels in the primary surface. We divide this area by the HiZ
1389           * block dimensions to get the alignment in terms of HiZ blocks.
1390           */
1391          *image_align_el = isl_extent3d(16 / fmtl->bw, 16 / fmtl->bh, 1);
1392       }
1393       return;
1394    }
1395 
1396    if (ISL_GFX_VERX10(dev) >= 125) {
1397       isl_gfx125_choose_image_alignment_el(dev, info, tiling, dim_layout,
1398                                            msaa_layout, image_align_el);
1399    } else if (ISL_GFX_VER(dev) >= 12) {
1400       isl_gfx12_choose_image_alignment_el(dev, info, tiling, dim_layout,
1401                                           msaa_layout, image_align_el);
1402    } else if (ISL_GFX_VER(dev) >= 9) {
1403       isl_gfx9_choose_image_alignment_el(dev, info, tiling, dim_layout,
1404                                          msaa_layout, image_align_el);
1405    } else if (ISL_GFX_VER(dev) >= 8) {
1406       isl_gfx8_choose_image_alignment_el(dev, info, tiling, dim_layout,
1407                                          msaa_layout, image_align_el);
1408    } else if (ISL_GFX_VER(dev) >= 7) {
1409       isl_gfx7_choose_image_alignment_el(dev, info, tiling, dim_layout,
1410                                           msaa_layout, image_align_el);
1411    } else if (ISL_GFX_VER(dev) >= 6) {
1412       isl_gfx6_choose_image_alignment_el(dev, info, tiling, dim_layout,
1413                                          msaa_layout, image_align_el);
1414    } else {
1415       isl_gfx4_choose_image_alignment_el(dev, info, tiling, dim_layout,
1416                                          msaa_layout, image_align_el);
1417    }
1418 }
1419 
1420 static enum isl_dim_layout
isl_surf_choose_dim_layout(const struct isl_device * dev,enum isl_surf_dim logical_dim,enum isl_tiling tiling,isl_surf_usage_flags_t usage)1421 isl_surf_choose_dim_layout(const struct isl_device *dev,
1422                            enum isl_surf_dim logical_dim,
1423                            enum isl_tiling tiling,
1424                            isl_surf_usage_flags_t usage)
1425 {
1426    /* Sandy bridge needs a special layout for HiZ and stencil. */
1427    if (ISL_GFX_VER(dev) == 6 &&
1428        (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ))
1429       return ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ;
1430 
1431    if (ISL_GFX_VER(dev) >= 9) {
1432       switch (logical_dim) {
1433       case ISL_SURF_DIM_1D:
1434          /* From the Sky Lake PRM Vol. 5, "1D Surfaces":
1435           *
1436           *    One-dimensional surfaces use a tiling mode of linear.
1437           *    Technically, they are not tiled resources, but the Tiled
1438           *    Resource Mode field in RENDER_SURFACE_STATE is still used to
1439           *    indicate the alignment requirements for this linear surface
1440           *    (See 1D Alignment requirements for how 4K and 64KB Tiled
1441           *    Resource Modes impact alignment). Alternatively, a 1D surface
1442           *    can be defined as a 2D tiled surface (e.g. TileY or TileX) with
1443           *    a height of 0.
1444           *
1445           * In other words, ISL_DIM_LAYOUT_GFX9_1D is only used for linear
1446           * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GFX4_2D is used.
1447           */
1448          if (tiling == ISL_TILING_LINEAR)
1449             return ISL_DIM_LAYOUT_GFX9_1D;
1450          else
1451             return ISL_DIM_LAYOUT_GFX4_2D;
1452       case ISL_SURF_DIM_2D:
1453       case ISL_SURF_DIM_3D:
1454          return ISL_DIM_LAYOUT_GFX4_2D;
1455       }
1456    } else {
1457       switch (logical_dim) {
1458       case ISL_SURF_DIM_1D:
1459       case ISL_SURF_DIM_2D:
1460          /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
1461           *
1462           * The cube face textures are stored in the same way as 3D surfaces
1463           * are stored (see section 6.17.5 for details).  For cube surfaces,
1464           * however, the depth is equal to the number of faces (always 6) and
1465           * is not reduced for each MIP.
1466           */
1467          if (ISL_GFX_VER(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT))
1468             return ISL_DIM_LAYOUT_GFX4_3D;
1469 
1470          return ISL_DIM_LAYOUT_GFX4_2D;
1471       case ISL_SURF_DIM_3D:
1472          return ISL_DIM_LAYOUT_GFX4_3D;
1473       }
1474    }
1475 
1476    unreachable("bad isl_surf_dim");
1477    return ISL_DIM_LAYOUT_GFX4_2D;
1478 }
1479 
1480 /**
1481  * Calculate the physical extent of the surface's first level, in units of
1482  * surface samples.
1483  */
1484 static void
isl_calc_phys_level0_extent_sa(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_dim_layout dim_layout,enum isl_tiling tiling,enum isl_msaa_layout msaa_layout,struct isl_extent4d * phys_level0_sa)1485 isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
1486                                const struct isl_surf_init_info *restrict info,
1487                                enum isl_dim_layout dim_layout,
1488                                enum isl_tiling tiling,
1489                                enum isl_msaa_layout msaa_layout,
1490                                struct isl_extent4d *phys_level0_sa)
1491 {
1492    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1493 
1494    if (isl_format_is_planar(info->format))
1495       unreachable("Planar formats unsupported");
1496 
1497    switch (info->dim) {
1498    case ISL_SURF_DIM_1D:
1499       assert(info->height == 1);
1500       assert(info->depth == 1);
1501       assert(info->samples == 1);
1502 
1503       switch (dim_layout) {
1504       case ISL_DIM_LAYOUT_GFX4_3D:
1505          unreachable("bad isl_dim_layout");
1506 
1507       case ISL_DIM_LAYOUT_GFX9_1D:
1508       case ISL_DIM_LAYOUT_GFX4_2D:
1509       case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
1510          *phys_level0_sa = (struct isl_extent4d) {
1511             .w = info->width,
1512             .h = 1,
1513             .d = 1,
1514             .a = info->array_len,
1515          };
1516          break;
1517       }
1518       break;
1519 
1520    case ISL_SURF_DIM_2D:
1521       if (ISL_GFX_VER(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT))
1522          assert(dim_layout == ISL_DIM_LAYOUT_GFX4_3D);
1523       else
1524          assert(dim_layout == ISL_DIM_LAYOUT_GFX4_2D ||
1525                 dim_layout == ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ);
1526 
1527       switch (msaa_layout) {
1528       case ISL_MSAA_LAYOUT_NONE:
1529          assert(info->depth == 1);
1530          assert(info->samples == 1);
1531 
1532          *phys_level0_sa = (struct isl_extent4d) {
1533             .w = info->width,
1534             .h = info->height,
1535             .d = 1,
1536             .a = info->array_len,
1537          };
1538          break;
1539 
1540       case ISL_MSAA_LAYOUT_ARRAY:
1541          assert(info->depth == 1);
1542          assert(info->levels == 1);
1543          assert(isl_format_supports_multisampling(dev->info, info->format));
1544          assert(fmtl->bw == 1 && fmtl->bh == 1);
1545 
1546          *phys_level0_sa = (struct isl_extent4d) {
1547             .w = info->width,
1548             .h = info->height,
1549             .d = 1,
1550             .a = info->array_len * info->samples,
1551          };
1552          break;
1553 
1554       case ISL_MSAA_LAYOUT_INTERLEAVED:
1555          assert(info->depth == 1);
1556          assert(info->levels == 1);
1557          assert(isl_format_supports_multisampling(dev->info, info->format));
1558 
1559          *phys_level0_sa = (struct isl_extent4d) {
1560             .w = info->width,
1561             .h = info->height,
1562             .d = 1,
1563             .a = info->array_len,
1564          };
1565 
1566          isl_msaa_interleaved_scale_px_to_sa(info->samples,
1567                                              &phys_level0_sa->w,
1568                                              &phys_level0_sa->h);
1569          break;
1570       }
1571       break;
1572 
1573    case ISL_SURF_DIM_3D:
1574       assert(info->array_len == 1);
1575       assert(info->samples == 1);
1576 
1577       if (fmtl->bd > 1) {
1578          isl_finishme("%s:%s: compression block with depth > 1",
1579                       __FILE__, __func__);
1580       }
1581 
1582       switch (dim_layout) {
1583       case ISL_DIM_LAYOUT_GFX9_1D:
1584       case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
1585          unreachable("bad isl_dim_layout");
1586 
1587       case ISL_DIM_LAYOUT_GFX4_2D:
1588       case ISL_DIM_LAYOUT_GFX4_3D:
1589          *phys_level0_sa = (struct isl_extent4d) {
1590             .w = info->width,
1591             .h = info->height,
1592             .d = info->depth,
1593             .a = 1,
1594          };
1595          break;
1596       }
1597       break;
1598    }
1599 }
1600 
1601 static void
isl_get_miptail_level_offset_el(enum isl_tiling tiling,enum isl_surf_dim dim,uint32_t format_bpb,uint32_t level,uint32_t * x_offset_el,uint32_t * y_offset_el,uint32_t * z_offset_el)1602 isl_get_miptail_level_offset_el(enum isl_tiling tiling,
1603                                 enum isl_surf_dim dim,
1604                                 uint32_t format_bpb,
1605                                 uint32_t level,
1606                                 uint32_t *x_offset_el,
1607                                 uint32_t *y_offset_el,
1608                                 uint32_t *z_offset_el)
1609 {
1610    uint32_t row = isl_get_miptail_base_row(tiling) + level;
1611    uint32_t col = 8 - ffs(format_bpb);
1612 
1613    switch (dim) {
1614    case ISL_SURF_DIM_2D:
1615       switch (tiling) {
1616       case ISL_TILING_64:
1617       case ISL_TILING_64_XE2:
1618       case ISL_TILING_ICL_Yf:
1619       case ISL_TILING_ICL_Ys:
1620          assert(row < ARRAY_SIZE(icl_std_y_2d_miptail_offset_el));
1621          assert(col < ARRAY_SIZE(icl_std_y_2d_miptail_offset_el[0]));
1622          *x_offset_el = icl_std_y_2d_miptail_offset_el[row][col][0];
1623          *y_offset_el = icl_std_y_2d_miptail_offset_el[row][col][1];
1624          break;
1625       case ISL_TILING_SKL_Yf:
1626       case ISL_TILING_SKL_Ys:
1627          assert(row < ARRAY_SIZE(skl_std_y_2d_miptail_offset_el));
1628          assert(col < ARRAY_SIZE(skl_std_y_2d_miptail_offset_el[0]));
1629          *x_offset_el = skl_std_y_2d_miptail_offset_el[row][col][0];
1630          *y_offset_el = skl_std_y_2d_miptail_offset_el[row][col][1];
1631          break;
1632       default:
1633          unreachable("invalid tiling");
1634       }
1635       *z_offset_el = 0;
1636       break;
1637 
1638    case ISL_SURF_DIM_3D:
1639       switch (tiling) {
1640       case ISL_TILING_64:
1641       case ISL_TILING_64_XE2:
1642          assert(row < ARRAY_SIZE(acm_tile64_3d_miptail_offset_el));
1643          assert(col < ARRAY_SIZE(acm_tile64_3d_miptail_offset_el[0]));
1644          *x_offset_el = acm_tile64_3d_miptail_offset_el[row][col][0];
1645          *y_offset_el = acm_tile64_3d_miptail_offset_el[row][col][1];
1646          *z_offset_el = acm_tile64_3d_miptail_offset_el[row][col][2];
1647          break;
1648       case ISL_TILING_ICL_Yf:
1649       case ISL_TILING_ICL_Ys:
1650          assert(row < ARRAY_SIZE(icl_std_y_3d_miptail_offset_el));
1651          assert(col < ARRAY_SIZE(icl_std_y_3d_miptail_offset_el[0]));
1652          *x_offset_el = icl_std_y_3d_miptail_offset_el[row][col][0];
1653          *y_offset_el = icl_std_y_3d_miptail_offset_el[row][col][1];
1654          *z_offset_el = icl_std_y_3d_miptail_offset_el[row][col][2];
1655          break;
1656       case ISL_TILING_SKL_Yf:
1657       case ISL_TILING_SKL_Ys:
1658          assert(row < ARRAY_SIZE(skl_std_y_3d_miptail_offset_el));
1659          assert(col < ARRAY_SIZE(skl_std_y_3d_miptail_offset_el[0]));
1660          *x_offset_el = skl_std_y_3d_miptail_offset_el[row][col][0];
1661          *y_offset_el = skl_std_y_3d_miptail_offset_el[row][col][1];
1662          *z_offset_el = skl_std_y_3d_miptail_offset_el[row][col][2];
1663          break;
1664       default:
1665          unreachable("invalid tiling");
1666       }
1667       break;
1668 
1669    case ISL_SURF_DIM_1D:
1670       unreachable("invalid dimension");
1671    }
1672 }
1673 
1674 static uint32_t
isl_choose_miptail_start_level(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info)1675 isl_choose_miptail_start_level(const struct isl_device *dev,
1676                                const struct isl_surf_init_info *restrict info,
1677                                const struct isl_tile_info *tile_info)
1678 {
1679    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1680 
1681    if (tile_info->max_miptail_levels == 0)
1682       return info->levels;
1683 
1684    /* SKL PRMs, Volume 5: Memory Views, YUV 4:2:0 Format Memory Organization :
1685     *
1686     *    "Planar YUV does not support MIP Tails as part of Standard Tiling.
1687     *     The MIP Tail Start field in RENDER_SURFACE_STATE must be programmed
1688     *     to 15."
1689     */
1690    if (isl_format_is_planar(info->format))
1691       return 15;
1692 
1693    /* TODO: figure out why having YUV formats in the miptail on Gfx12 does not
1694     *       work.
1695     */
1696    if (ISL_GFX_VER(dev) == 12 && isl_format_is_yuv(info->format))
1697       return 15;
1698 
1699    assert(isl_tiling_is_64(tile_info->tiling) ||
1700           isl_tiling_is_std_y(tile_info->tiling));
1701    assert(info->samples == 1);
1702 
1703    uint32_t max_miptail_levels = tile_info->max_miptail_levels;
1704 
1705    /* Start with the minimum number of levels that will fit in the tile */
1706    uint32_t min_miptail_start =
1707       info->levels > max_miptail_levels ? info->levels - max_miptail_levels : 0;
1708 
1709    /* Account for the specified minimum */
1710    min_miptail_start = MAX(min_miptail_start, info->min_miptail_start_level);
1711 
1712    struct isl_extent3d level0_extent_el = {
1713       .w = isl_align_div_npot(info->width, fmtl->bw),
1714       .h = isl_align_div_npot(info->height, fmtl->bh),
1715       .d = isl_align_div_npot(info->depth, fmtl->bd),
1716    };
1717 
1718    /* The first miptail slot takes up the entire right side of the tile. So,
1719     * the extent is just the distance from the offset of the first level to
1720     * the corner of the tile.
1721     */
1722    uint32_t level0_x_offset_el, level0_y_offset_el, level0_z_offset_el;
1723    isl_get_miptail_level_offset_el(tile_info->tiling, info->dim,
1724                                    fmtl->bpb, 0, /* level */
1725                                    &level0_x_offset_el,
1726                                    &level0_y_offset_el,
1727                                    &level0_z_offset_el);
1728    struct isl_extent3d miptail_level0_extent_el = {
1729       .w = tile_info->logical_extent_el.w - level0_x_offset_el,
1730       .h = tile_info->logical_extent_el.h - level0_y_offset_el,
1731       .d = tile_info->logical_extent_el.d - level0_z_offset_el,
1732    };
1733 
1734    /* Now find the first level that fits the maximum miptail size requirement.
1735     */
1736    for (uint32_t s = min_miptail_start; s < info->levels; s++) {
1737       if (isl_minify(level0_extent_el.w, s) <= miptail_level0_extent_el.w &&
1738           isl_minify(level0_extent_el.h, s) <= miptail_level0_extent_el.h &&
1739           isl_minify(level0_extent_el.d, s) <= miptail_level0_extent_el.d)
1740          return s;
1741    }
1742 
1743    return info->levels;
1744 }
1745 
1746 /**
1747  * Calculate the pitch between physical array slices, in units of rows of
1748  * surface elements.
1749  */
1750 static uint32_t
isl_calc_array_pitch_el_rows_gfx4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,const struct isl_extent2d * phys_slice0_sa)1751 isl_calc_array_pitch_el_rows_gfx4_2d(
1752       const struct isl_device *dev,
1753       const struct isl_surf_init_info *restrict info,
1754       const struct isl_tile_info *tile_info,
1755       const struct isl_extent3d *image_align_sa,
1756       const struct isl_extent4d *phys_level0_sa,
1757       enum isl_array_pitch_span array_pitch_span,
1758       const struct isl_extent2d *phys_slice0_sa)
1759 {
1760    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1761    uint32_t pitch_sa_rows = 0;
1762 
1763    switch (array_pitch_span) {
1764    case ISL_ARRAY_PITCH_SPAN_COMPACT:
1765       pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
1766       break;
1767    case ISL_ARRAY_PITCH_SPAN_FULL: {
1768       /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
1769        * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
1770        * Surfaces >> Surface Arrays.
1771        */
1772       uint32_t H0_sa = phys_level0_sa->h;
1773       uint32_t H1_sa = isl_minify(H0_sa, 1);
1774 
1775       uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
1776       uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
1777 
1778       uint32_t m;
1779       if (ISL_GFX_VER(dev) >= 7) {
1780          /* The QPitch equation changed slightly in Ivybridge. */
1781          m = 12;
1782       } else {
1783          m = 11;
1784       }
1785 
1786       pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
1787 
1788       if (ISL_GFX_VER(dev) == 6 && info->samples > 1 &&
1789           (info->height % 4 == 1)) {
1790          /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
1791           * Graphics Core >> Section 7.18.3.7: Surface Arrays:
1792           *
1793           *    [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
1794           *    the value calculated in the equation above , for every
1795           *    other odd Surface Height starting from 1 i.e. 1,5,9,13.
1796           *
1797           * XXX(chadv): Is the errata natural corollary of the physical
1798           * layout of interleaved samples?
1799           */
1800          pitch_sa_rows += 4;
1801       }
1802 
1803       pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
1804       } /* end case */
1805       break;
1806    }
1807 
1808    assert(pitch_sa_rows % fmtl->bh == 0);
1809    uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
1810 
1811    if (ISL_GFX_VER(dev) >= 9 && ISL_GFX_VER(dev) <= 11 &&
1812        fmtl->txc == ISL_TXC_CCS) {
1813       /*
1814        * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
1815        *
1816        *    "Mip-mapped and arrayed surfaces are supported with MCS buffer
1817        *    layout with these alignments in the RT space: Horizontal
1818        *    Alignment = 128 and Vertical Alignment = 64."
1819        *
1820        * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
1821        *
1822        *    "For non-multisampled render target's CCS auxiliary surface,
1823        *    QPitch must be computed with Horizontal Alignment = 128 and
1824        *    Surface Vertical Alignment = 256. These alignments are only for
1825        *    CCS buffer and not for associated render target."
1826        *
1827        * The first restriction is already handled by isl_choose_image_alignment_el
1828        * but the second restriction, which is an extension of the first, only
1829        * applies to qpitch and must be applied here.
1830        *
1831        * The second restriction disappears on Gfx12.
1832        */
1833       assert(fmtl->bh == 4);
1834       pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
1835    }
1836 
1837    if (ISL_GFX_VER(dev) >= 9 &&
1838        info->dim == ISL_SURF_DIM_3D &&
1839        tile_info->tiling != ISL_TILING_LINEAR) {
1840       /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
1841        *
1842        *    Tile Mode != Linear: This field must be set to an integer multiple
1843        *    of the tile height
1844        */
1845       pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
1846    }
1847 
1848    return pitch_el_rows;
1849 }
1850 
1851 /**
1852  * A variant of isl_calc_phys_slice0_extent_sa() specific to
1853  * ISL_DIM_LAYOUT_GFX4_2D.
1854  */
1855 static void
isl_calc_phys_slice0_extent_sa_gfx4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t miptail_start_level,struct isl_extent2d * phys_slice0_sa)1856 isl_calc_phys_slice0_extent_sa_gfx4_2d(
1857       const struct isl_device *dev,
1858       const struct isl_surf_init_info *restrict info,
1859       const struct isl_tile_info *tile_info,
1860       enum isl_msaa_layout msaa_layout,
1861       const struct isl_extent3d *image_align_sa,
1862       const struct isl_extent4d *phys_level0_sa,
1863       uint32_t miptail_start_level,
1864       struct isl_extent2d *phys_slice0_sa)
1865 {
1866    ASSERTED const struct isl_format_layout *fmtl =
1867       isl_format_get_layout(info->format);
1868 
1869    if (info->levels == 1 && miptail_start_level > 0) {
1870       /* Do not pad the surface to the image alignment.
1871        *
1872        * For tiled surfaces, using a reduced alignment here avoids wasting CPU
1873        * cycles on the below mipmap layout caluclations. Reducing the
1874        * alignment here is safe because we later align the row pitch and array
1875        * pitch to the tile boundary. It is safe even for
1876        * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
1877        * to accommodate the interleaved samples.
1878        *
1879        * For linear surfaces, reducing the alignment here permits us to later
1880        * choose an arbitrary, non-aligned row pitch. If the surface backs
1881        * a VkBuffer, then an arbitrary pitch may be needed to accommodate
1882        * VkBufferImageCopy::bufferRowLength.
1883        */
1884       *phys_slice0_sa = (struct isl_extent2d) {
1885          .w = phys_level0_sa->w,
1886          .h = phys_level0_sa->h,
1887       };
1888       return;
1889    }
1890 
1891    uint32_t slice_top_w = 0;
1892    uint32_t slice_bottom_w = 0;
1893    uint32_t slice_left_h = 0;
1894    uint32_t slice_right_h = 0;
1895 
1896    uint32_t W0 = phys_level0_sa->w;
1897    uint32_t H0 = phys_level0_sa->h;
1898 
1899    for (uint32_t l = 0; l < info->levels; ++l) {
1900       uint32_t W = isl_minify(W0, l);
1901       uint32_t H = isl_minify(H0, l);
1902 
1903       uint32_t w = isl_align_npot(W, image_align_sa->w);
1904       uint32_t h = isl_align_npot(H, image_align_sa->h);
1905 
1906       if (l == 0) {
1907          slice_top_w = w;
1908          slice_left_h = h;
1909          slice_right_h = h;
1910       } else if (l == 1) {
1911          slice_bottom_w = w;
1912          slice_left_h += h;
1913       } else if (l == 2) {
1914          slice_bottom_w += w;
1915          slice_right_h += h;
1916       } else {
1917          slice_right_h += h;
1918       }
1919 
1920       if (l >= miptail_start_level) {
1921          assert(l == miptail_start_level);
1922          assert(isl_tiling_is_64(tile_info->tiling) ||
1923                 isl_tiling_is_std_y(tile_info->tiling));
1924          assert(w == tile_info->logical_extent_el.w * fmtl->bw);
1925          assert(h == tile_info->logical_extent_el.h * fmtl->bh);
1926          /* If we've gone into the miptail, we're done.  All higher miplevels
1927           * will be tucked into the same tile as this one.
1928           */
1929          break;
1930       }
1931    }
1932 
1933    *phys_slice0_sa = (struct isl_extent2d) {
1934       .w = MAX(slice_top_w, slice_bottom_w),
1935       .h = MAX(slice_left_h, slice_right_h),
1936    };
1937 }
1938 
1939 static void
isl_calc_phys_total_extent_el_gfx4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,uint32_t miptail_start_level,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1940 isl_calc_phys_total_extent_el_gfx4_2d(
1941       const struct isl_device *dev,
1942       const struct isl_surf_init_info *restrict info,
1943       const struct isl_tile_info *tile_info,
1944       enum isl_msaa_layout msaa_layout,
1945       const struct isl_extent3d *image_align_sa,
1946       const struct isl_extent4d *phys_level0_sa,
1947       enum isl_array_pitch_span array_pitch_span,
1948       uint32_t miptail_start_level,
1949       uint32_t *array_pitch_el_rows,
1950       struct isl_extent4d *phys_total_el)
1951 {
1952    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1953 
1954    struct isl_extent2d phys_slice0_sa;
1955    isl_calc_phys_slice0_extent_sa_gfx4_2d(dev, info, tile_info, msaa_layout,
1956                                           image_align_sa, phys_level0_sa,
1957                                           miptail_start_level,
1958                                           &phys_slice0_sa);
1959    *array_pitch_el_rows =
1960       isl_calc_array_pitch_el_rows_gfx4_2d(dev, info, tile_info,
1961                                            image_align_sa, phys_level0_sa,
1962                                            array_pitch_span,
1963                                            &phys_slice0_sa);
1964 
1965    if (isl_tiling_is_64(tile_info->tiling) ||
1966        isl_tiling_is_std_y(tile_info->tiling)) {
1967       *phys_total_el = (struct isl_extent4d) {
1968          .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1969          .h = isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1970          .d = isl_align_div_npot(phys_level0_sa->d, fmtl->bd),
1971          .a = phys_level0_sa->array_len,
1972       };
1973    } else {
1974       uint32_t array_len = MAX(phys_level0_sa->d, phys_level0_sa->a);
1975       *phys_total_el = (struct isl_extent4d) {
1976          .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1977          .h = *array_pitch_el_rows * (array_len - 1) +
1978               isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1979          .d = 1,
1980          .a = 1,
1981       };
1982    }
1983 }
1984 
1985 /**
1986  * A variant of isl_calc_phys_slice0_extent_sa() specific to
1987  * ISL_DIM_LAYOUT_GFX4_3D.
1988  */
1989 static void
isl_calc_phys_total_extent_el_gfx4_3d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1990 isl_calc_phys_total_extent_el_gfx4_3d(
1991       const struct isl_device *dev,
1992       const struct isl_surf_init_info *restrict info,
1993       const struct isl_extent3d *image_align_sa,
1994       const struct isl_extent4d *phys_level0_sa,
1995       uint32_t *array_pitch_el_rows,
1996       struct isl_extent4d *phys_total_el)
1997 {
1998    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1999 
2000    assert(info->samples == 1);
2001 
2002    if (info->dim != ISL_SURF_DIM_3D) {
2003       /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
2004        *
2005        * The cube face textures are stored in the same way as 3D surfaces
2006        * are stored (see section 6.17.5 for details).  For cube surfaces,
2007        * however, the depth is equal to the number of faces (always 6) and
2008        * is not reduced for each MIP.
2009        */
2010       assert(ISL_GFX_VER(dev) == 4);
2011       assert(info->usage & ISL_SURF_USAGE_CUBE_BIT);
2012       assert(phys_level0_sa->array_len == 6);
2013    } else {
2014       assert(phys_level0_sa->array_len == 1);
2015    }
2016 
2017    uint32_t total_w = 0;
2018    uint32_t total_h = 0;
2019 
2020    uint32_t W0 = phys_level0_sa->w;
2021    uint32_t H0 = phys_level0_sa->h;
2022    uint32_t D0 = phys_level0_sa->d;
2023    uint32_t A0 = phys_level0_sa->a;
2024 
2025    for (uint32_t l = 0; l < info->levels; ++l) {
2026       uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
2027       uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
2028       uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0;
2029 
2030       uint32_t max_layers_horiz = MIN(level_d, 1u << l);
2031       uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
2032 
2033       total_w = MAX(total_w, level_w * max_layers_horiz);
2034       total_h += level_h * max_layers_vert;
2035    }
2036 
2037    /* GFX4_3D layouts don't really have an array pitch since each LOD has a
2038     * different number of horizontal and vertical layers.  We have to set it
2039     * to something, so at least make it true for LOD0.
2040     */
2041    *array_pitch_el_rows =
2042       isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw;
2043    *phys_total_el = (struct isl_extent4d) {
2044       .w = isl_assert_div(total_w, fmtl->bw),
2045       .h = isl_assert_div(total_h, fmtl->bh),
2046       .d = 1,
2047       .a = 1,
2048    };
2049 }
2050 
2051 /**
2052  * A variant of isl_calc_phys_slice0_extent_sa() specific to
2053  * ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ.
2054  */
2055 static void
isl_calc_phys_total_extent_el_gfx6_stencil_hiz(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)2056 isl_calc_phys_total_extent_el_gfx6_stencil_hiz(
2057       const struct isl_device *dev,
2058       const struct isl_surf_init_info *restrict info,
2059       const struct isl_tile_info *tile_info,
2060       const struct isl_extent3d *image_align_sa,
2061       const struct isl_extent4d *phys_level0_sa,
2062       uint32_t *array_pitch_el_rows,
2063       struct isl_extent4d *phys_total_el)
2064 {
2065    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
2066 
2067    const struct isl_extent2d tile_extent_sa = {
2068       .w = tile_info->logical_extent_el.w * fmtl->bw,
2069       .h = tile_info->logical_extent_el.h * fmtl->bh,
2070    };
2071    /* Tile size is a multiple of image alignment */
2072    assert(tile_extent_sa.w % image_align_sa->w == 0);
2073    assert(tile_extent_sa.h % image_align_sa->h == 0);
2074 
2075    const uint32_t W0 = phys_level0_sa->w;
2076    const uint32_t H0 = phys_level0_sa->h;
2077 
2078    /* Each image has the same height as LOD0 because the hardware thinks
2079     * everything is LOD0
2080     */
2081    const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a;
2082 
2083    uint32_t total_top_w = 0;
2084    uint32_t total_bottom_w = 0;
2085    uint32_t total_h = 0;
2086 
2087    for (uint32_t l = 0; l < info->levels; ++l) {
2088       const uint32_t W = isl_minify(W0, l);
2089 
2090       const uint32_t w = isl_align(W, tile_extent_sa.w);
2091       const uint32_t h = isl_align(H, tile_extent_sa.h);
2092 
2093       if (l == 0) {
2094          total_top_w = w;
2095          total_h = h;
2096       } else if (l == 1) {
2097          total_bottom_w = w;
2098          total_h += h;
2099       } else {
2100          total_bottom_w += w;
2101       }
2102    }
2103 
2104    *array_pitch_el_rows =
2105       isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh);
2106    *phys_total_el = (struct isl_extent4d) {
2107       .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw),
2108       .h = isl_assert_div(total_h, fmtl->bh),
2109       .d = 1,
2110       .a = 1,
2111    };
2112 }
2113 
2114 /**
2115  * A variant of isl_calc_phys_slice0_extent_sa() specific to
2116  * ISL_DIM_LAYOUT_GFX9_1D.
2117  */
2118 static void
isl_calc_phys_total_extent_el_gfx9_1d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)2119 isl_calc_phys_total_extent_el_gfx9_1d(
2120       const struct isl_device *dev,
2121       const struct isl_surf_init_info *restrict info,
2122       const struct isl_extent3d *image_align_sa,
2123       const struct isl_extent4d *phys_level0_sa,
2124       uint32_t *array_pitch_el_rows,
2125       struct isl_extent4d *phys_total_el)
2126 {
2127    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
2128 
2129    assert(phys_level0_sa->height == 1);
2130    assert(phys_level0_sa->depth == 1);
2131    assert(info->samples == 1);
2132    assert(image_align_sa->w >= fmtl->bw);
2133 
2134    uint32_t slice_w = 0;
2135    const uint32_t W0 = phys_level0_sa->w;
2136 
2137    for (uint32_t l = 0; l < info->levels; ++l) {
2138       uint32_t W = isl_minify(W0, l);
2139       uint32_t w = isl_align_npot(W, image_align_sa->w);
2140 
2141       slice_w += w;
2142    }
2143 
2144    *array_pitch_el_rows = 1;
2145    *phys_total_el = (struct isl_extent4d) {
2146       .w = isl_assert_div(slice_w, fmtl->bw),
2147       .h = phys_level0_sa->array_len,
2148       .d = 1,
2149       .a = 1,
2150    };
2151 }
2152 
2153 /**
2154  * Calculate the two-dimensional total physical extent of the surface, in
2155  * units of surface elements.
2156  */
2157 static void
isl_calc_phys_total_extent_el(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_dim_layout dim_layout,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,uint32_t miptail_start_level,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)2158 isl_calc_phys_total_extent_el(const struct isl_device *dev,
2159                               const struct isl_surf_init_info *restrict info,
2160                               const struct isl_tile_info *tile_info,
2161                               enum isl_dim_layout dim_layout,
2162                               enum isl_msaa_layout msaa_layout,
2163                               const struct isl_extent3d *image_align_sa,
2164                               const struct isl_extent4d *phys_level0_sa,
2165                               enum isl_array_pitch_span array_pitch_span,
2166                               uint32_t miptail_start_level,
2167                               uint32_t *array_pitch_el_rows,
2168                               struct isl_extent4d *phys_total_el)
2169 {
2170    switch (dim_layout) {
2171    case ISL_DIM_LAYOUT_GFX9_1D:
2172       assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
2173       isl_calc_phys_total_extent_el_gfx9_1d(dev, info,
2174                                             image_align_sa, phys_level0_sa,
2175                                             array_pitch_el_rows,
2176                                             phys_total_el);
2177       return;
2178    case ISL_DIM_LAYOUT_GFX4_2D:
2179       isl_calc_phys_total_extent_el_gfx4_2d(dev, info, tile_info, msaa_layout,
2180                                             image_align_sa, phys_level0_sa,
2181                                             array_pitch_span,
2182                                             miptail_start_level,
2183                                             array_pitch_el_rows,
2184                                             phys_total_el);
2185       return;
2186    case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
2187       assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
2188       isl_calc_phys_total_extent_el_gfx6_stencil_hiz(dev, info, tile_info,
2189                                                      image_align_sa,
2190                                                      phys_level0_sa,
2191                                                      array_pitch_el_rows,
2192                                                      phys_total_el);
2193       return;
2194    case ISL_DIM_LAYOUT_GFX4_3D:
2195       assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
2196       isl_calc_phys_total_extent_el_gfx4_3d(dev, info,
2197                                             image_align_sa, phys_level0_sa,
2198                                             array_pitch_el_rows,
2199                                             phys_total_el);
2200       return;
2201    }
2202 
2203    unreachable("invalid value for dim_layout");
2204 }
2205 
2206 static uint32_t
isl_calc_row_pitch_alignment(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info)2207 isl_calc_row_pitch_alignment(const struct isl_device *dev,
2208                              const struct isl_surf_init_info *surf_info,
2209                              const struct isl_tile_info *tile_info)
2210 {
2211    if (tile_info->tiling != ISL_TILING_LINEAR) {
2212       /* According to BSpec: 44930, Gfx12's CCS-compressed surface pitches must
2213        * be 512B-aligned. CCS is only support on Y tilings.
2214        *
2215        * Only consider 512B alignment when :
2216        *    - AUX is not explicitly disabled
2217        *    - the caller has specified no pitch
2218        *
2219        * isl_surf_get_ccs_surf() will check that the main surface alignment
2220        * matches CCS expectations.
2221        */
2222       if (ISL_GFX_VER(dev) >= 12 &&
2223           isl_format_supports_ccs_e(dev->info, surf_info->format) &&
2224           tile_info->tiling != ISL_TILING_X &&
2225           !(surf_info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) &&
2226           surf_info->row_pitch_B == 0) {
2227          return isl_align(tile_info->phys_extent_B.width, 512);
2228       }
2229 
2230       return tile_info->phys_extent_B.width;
2231    }
2232 
2233    /* We only support tiled fragment shading rate buffers. */
2234    assert((surf_info->usage & ISL_SURF_USAGE_CPB_BIT) == 0);
2235 
2236    /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
2237     * RENDER_SURFACE_STATE Surface Pitch (p349):
2238     *
2239     *    - For linear render target surfaces and surfaces accessed with the
2240     *      typed data port messages, the pitch must be a multiple of the
2241     *      element size for non-YUV surface formats.  Pitch must be
2242     *      a multiple of 2 * element size for YUV surface formats.
2243     *
2244     *    - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
2245     *      ignore because isl doesn't do buffers.]
2246     *
2247     *    - For other linear surfaces, the pitch can be any multiple of
2248     *      bytes.
2249     */
2250    const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
2251    const uint32_t bs = fmtl->bpb / 8;
2252    uint32_t alignment;
2253 
2254    if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
2255       if (isl_format_is_yuv(surf_info->format)) {
2256          alignment = 2 * bs;
2257       } else  {
2258          alignment = bs;
2259       }
2260    } else {
2261       alignment = 1;
2262    }
2263 
2264    /* From the Broadwell PRM >> Volume 2c: Command Reference: Registers >>
2265     * PRI_STRIDE Stride (p1254):
2266     *
2267     *    "When using linear memory, this must be at least 64 byte aligned."
2268     *
2269     * However, when displaying on NVIDIA and recent AMD GPUs via PRIME,
2270     * we need a larger pitch of 256 bytes.
2271     *
2272     * If the ISL caller didn't specify a row_pitch_B, then we should assume
2273     * the NVIDIA/AMD requirements. Otherwise, if we have a specified
2274     * row_pitch_B, this is probably because the caller is trying to import a
2275     * buffer. In that case we limit the minimum row pitch to the Intel HW
2276     * requirement.
2277     */
2278    if (surf_info->usage & ISL_SURF_USAGE_DISPLAY_BIT) {
2279       if (surf_info->row_pitch_B == 0)
2280          alignment = isl_align(alignment, 256);
2281       else
2282          alignment = isl_align(alignment, 64);
2283    }
2284 
2285    return alignment;
2286 }
2287 
2288 static uint32_t
isl_calc_linear_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * info,const struct isl_extent4d * phys_total_el,uint32_t alignment_B)2289 isl_calc_linear_min_row_pitch(const struct isl_device *dev,
2290                               const struct isl_surf_init_info *info,
2291                               const struct isl_extent4d *phys_total_el,
2292                               uint32_t alignment_B)
2293 {
2294    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
2295    const uint32_t bs = fmtl->bpb / 8;
2296 
2297    return isl_align_npot(bs * phys_total_el->w, alignment_B);
2298 }
2299 
2300 static uint32_t
isl_calc_tiled_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,const struct isl_extent4d * phys_total_el,uint32_t alignment_B)2301 isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
2302                              const struct isl_surf_init_info *surf_info,
2303                              const struct isl_tile_info *tile_info,
2304                              const struct isl_extent4d *phys_total_el,
2305                              uint32_t alignment_B)
2306 {
2307    const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
2308 
2309    assert(fmtl->bpb % tile_info->format_bpb == 0);
2310 
2311    const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb;
2312    const uint32_t total_w_tl =
2313       isl_align_div(phys_total_el->w * tile_el_scale,
2314                     tile_info->logical_extent_el.width);
2315 
2316    /* In some cases the alignment of the pitch might be > to the tile size
2317     * (for example Gfx12 CCS requires 512B alignment while the tile's width
2318     * can be 128B), so align the row pitch to the alignment.
2319     */
2320    assert(alignment_B >= tile_info->phys_extent_B.width);
2321    return isl_align(total_w_tl * tile_info->phys_extent_B.width, alignment_B);
2322 }
2323 
2324 static uint32_t
isl_calc_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,const struct isl_extent4d * phys_total_el,uint32_t alignment_B)2325 isl_calc_min_row_pitch(const struct isl_device *dev,
2326                        const struct isl_surf_init_info *surf_info,
2327                        const struct isl_tile_info *tile_info,
2328                        const struct isl_extent4d *phys_total_el,
2329                        uint32_t alignment_B)
2330 {
2331    if (tile_info->tiling == ISL_TILING_LINEAR) {
2332       return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el,
2333                                            alignment_B);
2334    } else {
2335       return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info,
2336                                           phys_total_el, alignment_B);
2337    }
2338 }
2339 
2340 /**
2341  * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's
2342  * size is `bits` bits?
2343  *
2344  * Hardware pitch fields are offset by 1. For example, if the size of
2345  * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid
2346  * pitches is [1, 2^b] inclusive.  If the surface pitch is N, then
2347  * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1.
2348  */
2349 static bool
pitch_in_range(uint32_t n,uint32_t bits)2350 pitch_in_range(uint32_t n, uint32_t bits)
2351 {
2352    assert(n != 0);
2353    return likely(bits != 0 && 1 <= n && n <= (1 << bits));
2354 }
2355 
2356 void PRINTFLIKE(4, 5)
_isl_notify_failure(const struct isl_surf_init_info * surf_info,const char * file,int line,const char * fmt,...)2357 _isl_notify_failure(const struct isl_surf_init_info *surf_info,
2358                     const char *file, int line, const char *fmt, ...)
2359 {
2360    if (!INTEL_DEBUG(DEBUG_ISL))
2361       return;
2362 
2363    char msg[512];
2364    va_list ap;
2365    va_start(ap, fmt);
2366    int ret = vsnprintf(msg, sizeof(msg), fmt, ap);
2367    assert(ret < sizeof(msg));
2368    va_end(ap);
2369 
2370 #define PRINT_USAGE(bit, str) \
2371             (surf_info->usage & ISL_SURF_USAGE_##bit##_BIT) ? ("+"str) : ""
2372 #define PRINT_TILING(bit, str) \
2373             (surf_info->tiling_flags & ISL_TILING_##bit##_BIT) ? ("+"str) : ""
2374 
2375    snprintf(msg + ret, sizeof(msg) - ret,
2376             " extent=%ux%ux%u dim=%s msaa=%ux levels=%u rpitch=%u fmt=%s "
2377             "usages=%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s "
2378             "tiling_flags=%s%s%s%s%s%s%s%s%s%s%s%s%s",
2379             surf_info->width, surf_info->height,
2380             surf_info->dim == ISL_SURF_DIM_3D ?
2381             surf_info->depth : surf_info->array_len,
2382             surf_info->dim == ISL_SURF_DIM_1D ? "1d" :
2383             surf_info->dim == ISL_SURF_DIM_2D ? "2d" : "3d",
2384             surf_info->samples, surf_info->levels,
2385             surf_info->row_pitch_B,
2386             isl_format_get_name(surf_info->format) + strlen("ISL_FORMAT_"),
2387 
2388             PRINT_USAGE(RENDER_TARGET,       "rt"),
2389             PRINT_USAGE(DEPTH,               "depth"),
2390             PRINT_USAGE(STENCIL,             "stenc"),
2391             PRINT_USAGE(TEXTURE,             "tex"),
2392             PRINT_USAGE(CUBE,                "cube"),
2393             PRINT_USAGE(DISABLE_AUX,         "noaux"),
2394             PRINT_USAGE(DISPLAY,             "disp"),
2395             PRINT_USAGE(HIZ,                 "hiz"),
2396             PRINT_USAGE(MCS,                 "mcs"),
2397             PRINT_USAGE(CCS,                 "ccs"),
2398             PRINT_USAGE(VERTEX_BUFFER,       "vb"),
2399             PRINT_USAGE(INDEX_BUFFER,        "ib"),
2400             PRINT_USAGE(CONSTANT_BUFFER,     "const"),
2401             PRINT_USAGE(STAGING,             "stage"),
2402             PRINT_USAGE(SPARSE,              "sparse"),
2403             PRINT_USAGE(NO_AUX_TT_ALIGNMENT, "no-aux-align"),
2404 
2405             PRINT_TILING(LINEAR,         "linear"),
2406             PRINT_TILING(W,              "W"),
2407             PRINT_TILING(X,              "X"),
2408             PRINT_TILING(Y0,             "Y0"),
2409             PRINT_TILING(SKL_Yf,         "skl-Yf"),
2410             PRINT_TILING(SKL_Ys,         "skl-Ys"),
2411             PRINT_TILING(ICL_Yf,         "icl-Yf"),
2412             PRINT_TILING(ICL_Ys,         "icl-Ys"),
2413             PRINT_TILING(4,              "4"),
2414             PRINT_TILING(64,             "64"),
2415             PRINT_TILING(HIZ,            "hiz"),
2416             PRINT_TILING(CCS,            "ccs"),
2417             PRINT_TILING(GFX12_CCS,      "ccs12"));
2418 
2419 #undef PRINT_USAGE
2420 #undef PRINT_TILING
2421 
2422    mesa_logd("%s:%i: %s", file, line, msg);
2423 }
2424 
2425 static bool
isl_calc_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,enum isl_dim_layout dim_layout,const struct isl_extent4d * phys_total_el,uint32_t * out_row_pitch_B)2426 isl_calc_row_pitch(const struct isl_device *dev,
2427                    const struct isl_surf_init_info *surf_info,
2428                    const struct isl_tile_info *tile_info,
2429                    enum isl_dim_layout dim_layout,
2430                    const struct isl_extent4d *phys_total_el,
2431                    uint32_t *out_row_pitch_B)
2432 {
2433    uint32_t alignment_B =
2434       isl_calc_row_pitch_alignment(dev, surf_info, tile_info);
2435 
2436    const uint32_t min_row_pitch_B =
2437       isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el,
2438                              alignment_B);
2439 
2440    if (surf_info->row_pitch_B != 0) {
2441       if (surf_info->row_pitch_B < min_row_pitch_B) {
2442          return notify_failure(surf_info,
2443                                "requested row pitch (%uB) less than minimum "
2444                                "allowed (%uB)",
2445                                surf_info->row_pitch_B, min_row_pitch_B);
2446       }
2447 
2448       if (surf_info->row_pitch_B % alignment_B != 0) {
2449          return notify_failure(surf_info,
2450                                "requested row pitch (%uB) doesn't satisfy the "
2451                                "minimum alignment requirement (%uB)",
2452                                surf_info->row_pitch_B, alignment_B);
2453       }
2454    }
2455 
2456    const uint32_t row_pitch_B =
2457       surf_info->row_pitch_B != 0 ? surf_info->row_pitch_B : min_row_pitch_B;
2458 
2459    const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width;
2460 
2461    if (row_pitch_B == 0)
2462       return notify_failure(surf_info, "calculated row pitch is zero");
2463 
2464    if (dim_layout == ISL_DIM_LAYOUT_GFX9_1D) {
2465       /* SurfacePitch is ignored for this layout. */
2466       goto done;
2467    }
2468 
2469    if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
2470                             ISL_SURF_USAGE_TEXTURE_BIT |
2471                             ISL_SURF_USAGE_STORAGE_BIT)) &&
2472        !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info))) {
2473       return notify_failure(surf_info,
2474                             "row pitch (%uB) not in range of "
2475                             "RENDER_SURFACE_STATE::SurfacePitch",
2476                             row_pitch_B);
2477    }
2478 
2479    if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT |
2480                             ISL_SURF_USAGE_MCS_BIT)) &&
2481        !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info))) {
2482       return notify_failure(surf_info,
2483                             "row_pitch_tl=%u not in range of "
2484                             "RENDER_SURFACE_STATE::AuxiliarySurfacePitch",
2485                             row_pitch_tl);
2486    }
2487 
2488    if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) &&
2489        !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) {
2490       return notify_failure(surf_info,
2491                             "row pitch (%uB) not in range of "
2492                             "3DSTATE_DEPTH_BUFFER::SurfacePitch",
2493                             row_pitch_B);
2494    }
2495 
2496    if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) &&
2497        !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) {
2498       return notify_failure(surf_info,
2499                             "row pitch (%uB) not in range of "
2500                             "3DSTATE_HIER_DEPTH_BUFFER::SurfacePitch",
2501                             row_pitch_B);
2502    }
2503 
2504    const uint32_t stencil_pitch_bits = dev->use_separate_stencil ?
2505       _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) :
2506       _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info);
2507 
2508    if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) &&
2509        !pitch_in_range(row_pitch_B, stencil_pitch_bits)) {
2510       return notify_failure(surf_info,
2511                             "row pitch (%uB) not in range of "
2512                             "3DSTATE_STENCIL_BUFFER/3DSTATE_DEPTH_BUFFER::SurfacePitch",
2513                             row_pitch_B);
2514    }
2515 
2516    if ((surf_info->usage & ISL_SURF_USAGE_CPB_BIT) &&
2517        !pitch_in_range(row_pitch_B, _3DSTATE_CPSIZE_CONTROL_BUFFER_SurfacePitch_bits(dev->info)))
2518       return false;
2519 
2520  done:
2521    *out_row_pitch_B = row_pitch_B;
2522    return true;
2523 }
2524 
2525 static bool
isl_calc_size(const struct isl_device * dev,const struct isl_surf_init_info * info,const struct isl_tile_info * tile_info,const struct isl_extent4d * phys_total_el,uint32_t array_pitch_el_rows,uint32_t row_pitch_B,uint64_t * out_size_B)2526 isl_calc_size(const struct isl_device *dev,
2527               const struct isl_surf_init_info *info,
2528               const struct isl_tile_info *tile_info,
2529               const struct isl_extent4d *phys_total_el,
2530               uint32_t array_pitch_el_rows,
2531               uint32_t row_pitch_B,
2532               uint64_t *out_size_B)
2533 {
2534    uint64_t size_B;
2535    if (tile_info->tiling == ISL_TILING_LINEAR) {
2536       /* LINEAR tiling has no concept of intra-tile arrays */
2537       assert(phys_total_el->d == 1 && phys_total_el->a == 1);
2538 
2539       size_B = (uint64_t) row_pitch_B * phys_total_el->h;
2540 
2541    } else {
2542       /* Pitches must make sense with the tiling */
2543       assert(row_pitch_B % tile_info->phys_extent_B.width == 0);
2544 
2545       uint32_t array_slices, array_pitch_tl_rows;
2546       if (phys_total_el->d > 1) {
2547          assert(phys_total_el->a == 1);
2548          array_pitch_tl_rows = isl_assert_div(array_pitch_el_rows,
2549                                               tile_info->logical_extent_el.h);
2550          array_slices = isl_align_div(phys_total_el->d,
2551                                       tile_info->logical_extent_el.d);
2552       } else if (phys_total_el->a > 1) {
2553          assert(phys_total_el->d == 1);
2554          array_pitch_tl_rows = isl_assert_div(array_pitch_el_rows,
2555                                               tile_info->logical_extent_el.h);
2556          array_slices = isl_align_div(phys_total_el->a,
2557                                       tile_info->logical_extent_el.a);
2558       } else {
2559          assert(phys_total_el->d == 1 && phys_total_el->a == 1);
2560          array_pitch_tl_rows = 0;
2561          array_slices = 1;
2562       }
2563 
2564       const uint32_t total_h_tl =
2565          (array_slices - 1) * array_pitch_tl_rows +
2566          isl_align_div(phys_total_el->h, tile_info->logical_extent_el.height);
2567 
2568       size_B = (uint64_t) total_h_tl * tile_info->phys_extent_B.height *
2569                row_pitch_B;
2570    }
2571 
2572    /* If for some reason we can't support the appropriate tiling format and
2573     * end up falling to linear or some other format, make sure the image size
2574     * and alignment are aligned to the expected block size so we can at least
2575     * do opaque binds.
2576     */
2577    if (info->usage & ISL_SURF_USAGE_SPARSE_BIT)
2578       size_B = isl_align(size_B, 64 * 1024);
2579 
2580    /* Pre-gfx9: from the Broadwell PRM Vol 5, Surface Layout:
2581     *    "In addition to restrictions on maximum height, width, and depth,
2582     *     surfaces are also restricted to a maximum size in bytes. This
2583     *     maximum is 2 GB for all products and all surface types."
2584     *
2585     * gfx9-10: from the Skylake PRM Vol 5, Maximum Surface Size in Bytes:
2586     *    "In addition to restrictions on maximum height, width, and depth,
2587     *     surfaces are also restricted to a maximum size of 2^38 bytes.
2588     *     All pixels within the surface must be contained within 2^38 bytes
2589     *     of the base address."
2590     *
2591     * gfx11+ platforms raised this limit to 2^44 bytes.
2592     */
2593    uint64_t max_surface_B = 1ull << (ISL_GFX_VER(dev) >= 11 ? 44 :
2594                                      ISL_GFX_VER(dev) >= 9 ? 38 : 31);
2595    if (size_B > max_surface_B) {
2596       return notify_failure(
2597          info,
2598          "calculated size (%"PRIu64"B) exceeds platform limit of %"PRIu64"B",
2599          size_B, max_surface_B);
2600    }
2601 
2602    *out_size_B = size_B;
2603    return true;
2604 }
2605 
2606 static uint32_t
isl_calc_base_alignment(const struct isl_device * dev,const struct isl_surf_init_info * info,const struct isl_tile_info * tile_info)2607 isl_calc_base_alignment(const struct isl_device *dev,
2608                         const struct isl_surf_init_info *info,
2609                         const struct isl_tile_info *tile_info)
2610 {
2611    uint32_t base_alignment_B;
2612    if (tile_info->tiling == ISL_TILING_LINEAR) {
2613       /* From the Broadwell PRM Vol 2d,
2614        * RENDER_SURFACE_STATE::SurfaceBaseAddress:
2615        *
2616        *    "The Base Address for linear render target surfaces and surfaces
2617        *    accessed with the typed surface read/write data port messages must
2618        *    be element-size aligned, for non-YUV surface formats, or a
2619        *    multiple of 2 element-sizes for YUV surface formats. Other linear
2620        *    surfaces have no alignment requirements (byte alignment is
2621        *    sufficient.)"
2622        */
2623       base_alignment_B = MAX(1, info->min_alignment_B);
2624       if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
2625          if (isl_format_is_yuv(info->format)) {
2626             base_alignment_B =
2627                MAX(base_alignment_B, tile_info->format_bpb / 4);
2628          } else {
2629             base_alignment_B =
2630                MAX(base_alignment_B, tile_info->format_bpb / 8);
2631          }
2632       }
2633       base_alignment_B = isl_round_up_to_power_of_two(base_alignment_B);
2634 
2635       /* From the Skylake PRM Vol 2c, PLANE_STRIDE::Stride:
2636        *
2637        *     "For Linear memory, this field specifies the stride in chunks of
2638        *     64 bytes (1 cache line)."
2639        */
2640       if (isl_surf_usage_is_display(info->usage))
2641          base_alignment_B = MAX(base_alignment_B, 64);
2642    } else {
2643       const uint32_t tile_size_B = tile_info->phys_extent_B.width *
2644                                    tile_info->phys_extent_B.height;
2645       assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B));
2646       base_alignment_B = MAX(info->min_alignment_B, tile_size_B);
2647 
2648       /* The diagram in the Bspec section Memory Compression - Gfx12, shows
2649        * that the CCS is indexed in 256B chunks. However, the
2650        * PLANE_AUX_DIST::Auxiliary Surface Distance field is in units of 4K
2651        * pages. We currently don't assign the usage field like we do for main
2652        * surfaces, so just use 4K for now.
2653        */
2654       if (tile_info->tiling == ISL_TILING_GFX12_CCS)
2655          base_alignment_B = MAX(base_alignment_B, 4096);
2656 
2657       if (dev->info->has_aux_map &&
2658           (isl_format_supports_ccs_d(dev->info, info->format) ||
2659            isl_format_supports_ccs_e(dev->info, info->format)) &&
2660           !INTEL_DEBUG(DEBUG_NO_CCS) &&
2661           !(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) {
2662          /* Wa_22015614752:
2663           *
2664           * Due to L3 cache being tagged with (engineID, vaID) and the CCS
2665           * block/cacheline being 256 bytes, 2 engines accessing a 64Kb range
2666           * with compression will generate 2 different CCS cacheline entries
2667           * in L3, this will lead to corruptions. To avoid this, we need to
2668           * ensure 2 images do not share a 256 bytes CCS cacheline. With a
2669           * ratio of compression of 1/256, this is 64Kb alignment (even for
2670           * Tile4...)
2671           *
2672           * ATS-M PRMS, Vol 2a: Command Reference: Instructions,
2673           * XY_CTRL_SURF_COPY_BLT, "Size of Control Surface Copy" field, the
2674           * CCS blocks are 256 bytes :
2675           *
2676           *    "This field indicates size of the Control Surface or CCS copy.
2677           *     It is expressed in terms of number of 256B block of CCS, where
2678           *     each 256B block of CCS corresponds to 64KB of main surface."
2679           */
2680          if (intel_needs_workaround(dev->info, 22015614752)) {
2681             base_alignment_B = MAX(base_alignment_B,
2682                                    256 /* cacheline */ * 256 /* AUX ratio */);
2683          }
2684 
2685          /* Platforms using an aux map require that images be
2686           * granularity-aligned if they're going to used with CCS. This is
2687           * because the Aux translation table maps main surface addresses to
2688           * aux addresses at a granularity in the main surface. Because we
2689           * don't know for sure in ISL if a surface will use CCS, we have to
2690           * guess based on the DISABLE_AUX usage bit. The one thing we do know
2691           * is that we haven't enable CCS on linear images yet so we can avoid
2692           * the extra alignment there.
2693           */
2694          if (!(info->usage & ISL_SURF_USAGE_NO_AUX_TT_ALIGNMENT_BIT)) {
2695             base_alignment_B = MAX(base_alignment_B, dev->info->verx10 >= 125 ?
2696                                    1024 * 1024 : 64 * 1024);
2697          }
2698       }
2699    }
2700 
2701    /* If for some reason we can't support the appropriate tiling format and
2702     * end up falling to linear or some other format, make sure the image size
2703     * and alignment are aligned to the expected block size so we can at least
2704     * do opaque binds.
2705     */
2706    if (info->usage & ISL_SURF_USAGE_SPARSE_BIT)
2707       base_alignment_B = MAX(base_alignment_B, 64 * 1024);
2708 
2709    return base_alignment_B;
2710 }
2711 
2712 bool
isl_surf_init_s(const struct isl_device * dev,struct isl_surf * surf,const struct isl_surf_init_info * restrict info)2713 isl_surf_init_s(const struct isl_device *dev,
2714                 struct isl_surf *surf,
2715                 const struct isl_surf_init_info *restrict info)
2716 {
2717    /* Some sanity checks */
2718    assert(!(info->usage & ISL_SURF_USAGE_CPB_BIT) ||
2719           dev->info->has_coarse_pixel_primitive_and_cb);
2720 
2721    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
2722 
2723    const struct isl_extent4d logical_level0_px = {
2724       .w = info->width,
2725       .h = info->height,
2726       .d = info->depth,
2727       .a = info->array_len,
2728    };
2729 
2730    enum isl_tiling tiling;
2731    if (!isl_surf_choose_tiling(dev, info, &tiling))
2732       return false;
2733 
2734    const enum isl_dim_layout dim_layout =
2735       isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage);
2736 
2737    enum isl_msaa_layout msaa_layout;
2738    if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
2739        return false;
2740 
2741    struct isl_tile_info tile_info;
2742    isl_tiling_get_info(tiling, info->dim, msaa_layout, fmtl->bpb,
2743                        info->samples, &tile_info);
2744 
2745    struct isl_extent3d image_align_el;
2746    isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout,
2747                                  &image_align_el);
2748 
2749    struct isl_extent3d image_align_sa =
2750       isl_extent3d_el_to_sa(info->format, image_align_el);
2751 
2752    struct isl_extent4d phys_level0_sa;
2753    isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
2754                                   &phys_level0_sa);
2755 
2756    enum isl_array_pitch_span array_pitch_span =
2757       isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
2758 
2759    uint32_t miptail_start_level =
2760       isl_choose_miptail_start_level(dev, info, &tile_info);
2761 
2762    uint32_t array_pitch_el_rows;
2763    struct isl_extent4d phys_total_el;
2764    isl_calc_phys_total_extent_el(dev, info, &tile_info,
2765                                  dim_layout, msaa_layout,
2766                                  &image_align_sa, &phys_level0_sa,
2767                                  array_pitch_span, miptail_start_level,
2768                                  &array_pitch_el_rows,
2769                                  &phys_total_el);
2770 
2771    uint32_t row_pitch_B;
2772    if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout,
2773                            &phys_total_el, &row_pitch_B))
2774       return false;
2775 
2776    uint64_t size_B;
2777    if (!isl_calc_size(dev, info, &tile_info, &phys_total_el,
2778                       array_pitch_el_rows, row_pitch_B, &size_B))
2779       return false;
2780 
2781    const uint32_t base_alignment_B =
2782       isl_calc_base_alignment(dev, info, &tile_info);
2783 
2784    *surf = (struct isl_surf) {
2785       .dim = info->dim,
2786       .dim_layout = dim_layout,
2787       .msaa_layout = msaa_layout,
2788       .tiling = tiling,
2789       .format = info->format,
2790 
2791       .levels = info->levels,
2792       .samples = info->samples,
2793 
2794       .image_alignment_el = image_align_el,
2795       .logical_level0_px = logical_level0_px,
2796       .phys_level0_sa = phys_level0_sa,
2797 
2798       .size_B = size_B,
2799       .alignment_B = base_alignment_B,
2800       .row_pitch_B = row_pitch_B,
2801       .array_pitch_el_rows = array_pitch_el_rows,
2802       .array_pitch_span = array_pitch_span,
2803       .miptail_start_level = miptail_start_level,
2804 
2805       .usage = info->usage,
2806    };
2807 
2808    return true;
2809 }
2810 
2811 void
isl_surf_get_tile_info(const struct isl_surf * surf,struct isl_tile_info * tile_info)2812 isl_surf_get_tile_info(const struct isl_surf *surf,
2813                        struct isl_tile_info *tile_info)
2814 {
2815    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2816    isl_tiling_get_info(surf->tiling, surf->dim, surf->msaa_layout, fmtl->bpb,
2817                        surf->samples, tile_info);
2818 }
2819 
2820 bool
isl_surf_get_hiz_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * hiz_surf)2821 isl_surf_get_hiz_surf(const struct isl_device *dev,
2822                       const struct isl_surf *surf,
2823                       struct isl_surf *hiz_surf)
2824 {
2825    if (INTEL_DEBUG(DEBUG_NO_HIZ))
2826       return false;
2827 
2828    /* HiZ support does not exist prior to Gfx5 */
2829    if (ISL_GFX_VER(dev) < 5)
2830       return false;
2831 
2832    if (!isl_surf_usage_is_depth(surf->usage))
2833       return false;
2834 
2835    /* From the Sandy Bridge PRM, Vol 2 Part 1,
2836     * 3DSTATE_DEPTH_BUFFER::Hierarchical Depth Buffer Enable,
2837     *
2838     *    If this field is enabled, the Surface Format of the depth buffer
2839     *    cannot be D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
2840     *    requires the separate stencil buffer.
2841     *
2842     * On SNB+, HiZ can't be used with combined depth-stencil buffers.
2843     */
2844    if (isl_surf_usage_is_stencil(surf->usage))
2845       return false;
2846 
2847    /* Multisampled depth is always interleaved */
2848    assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE ||
2849           surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
2850 
2851    /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer":
2852     *
2853     *    "The Surface Type, Height, Width, Depth, Minimum Array Element, Render
2854     *    Target View Extent, and Depth Coordinate Offset X/Y of the
2855     *    hierarchical depth buffer are inherited from the depth buffer. The
2856     *    height and width of the hierarchical depth buffer that must be
2857     *    allocated are computed by the following formulas, where HZ is the
2858     *    hierarchical depth buffer and Z is the depth buffer. The Z_Height,
2859     *    Z_Width, and Z_Depth values given in these formulas are those present
2860     *    in 3DSTATE_DEPTH_BUFFER incremented by one.
2861     *
2862     *    "The value of Z_Height and Z_Width must each be multiplied by 2 before
2863     *    being applied to the table below if Number of Multisamples is set to
2864     *    NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and
2865     *    Z_Width must be multiplied by 4 before being applied to the table
2866     *    below if Number of Multisamples is set to NUMSAMPLES_8."
2867     *
2868     * In the Sky Lake PRM, the second paragraph is gone.  This means that,
2869     * from Sandy Bridge through Broadwell, HiZ compresses samples in the
2870     * primary depth surface.  On Sky Lake and onward, HiZ compresses pixels.
2871     *
2872     * There are a number of different ways that this discrepancy could be
2873     * handled.  The way we have chosen is to simply make MSAA HiZ have the
2874     * same number of samples as the parent surface pre-Sky Lake and always be
2875     * single-sampled on Sky Lake and above.  Since the block sizes of
2876     * compressed formats are given in samples, this neatly handles everything
2877     * without the need for additional HiZ formats with different block sizes
2878     * on SKL+.
2879     */
2880    const unsigned samples = ISL_GFX_VER(dev) >= 9 ? 1 : surf->samples;
2881 
2882    const enum isl_format format =
2883       ISL_GFX_VERX10(dev) >= 125 ? ISL_FORMAT_GFX125_HIZ : ISL_FORMAT_HIZ;
2884 
2885    return isl_surf_init(dev, hiz_surf,
2886                         .dim = surf->dim,
2887                         .format = format,
2888                         .width = surf->logical_level0_px.width,
2889                         .height = surf->logical_level0_px.height,
2890                         .depth = surf->logical_level0_px.depth,
2891                         .levels = surf->levels,
2892                         .array_len = surf->logical_level0_px.array_len,
2893                         .samples = samples,
2894                         .usage = ISL_SURF_USAGE_HIZ_BIT,
2895                         .tiling_flags = ISL_TILING_HIZ_BIT);
2896 }
2897 
2898 bool
isl_surf_get_mcs_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * mcs_surf)2899 isl_surf_get_mcs_surf(const struct isl_device *dev,
2900                       const struct isl_surf *surf,
2901                       struct isl_surf *mcs_surf)
2902 {
2903    /* It must be multisampled with an array layout */
2904    if (surf->msaa_layout != ISL_MSAA_LAYOUT_ARRAY)
2905       return false;
2906 
2907    /* On Gfx12+ this format is not listed in TGL PRMs, Volume 2b: Command
2908     * Reference: Enumerations, RenderCompressionFormat
2909     */
2910    if (ISL_GFX_VER(dev) >= 12 &&
2911        surf->format == ISL_FORMAT_R9G9B9E5_SHAREDEXP)
2912       return false;
2913 
2914    /* The following are true of all multisampled surfaces */
2915    assert(surf->samples > 1);
2916    assert(surf->dim == ISL_SURF_DIM_2D);
2917    assert(surf->levels == 1);
2918    assert(surf->logical_level0_px.depth == 1);
2919    assert(isl_format_supports_multisampling(dev->info, surf->format));
2920 
2921    enum isl_format mcs_format;
2922    switch (surf->samples) {
2923    case 2:  mcs_format = ISL_FORMAT_MCS_2X;  break;
2924    case 4:  mcs_format = ISL_FORMAT_MCS_4X;  break;
2925    case 8:  mcs_format = ISL_FORMAT_MCS_8X;  break;
2926    case 16: mcs_format = ISL_FORMAT_MCS_16X; break;
2927    default:
2928       unreachable("Invalid sample count");
2929    }
2930 
2931    return isl_surf_init(dev, mcs_surf,
2932                         .dim = ISL_SURF_DIM_2D,
2933                         .format = mcs_format,
2934                         .width = surf->logical_level0_px.width,
2935                         .height = surf->logical_level0_px.height,
2936                         .depth = 1,
2937                         .levels = 1,
2938                         .array_len = surf->logical_level0_px.array_len,
2939                         .samples = 1, /* MCS surfaces are really single-sampled */
2940                         .usage = ISL_SURF_USAGE_MCS_BIT,
2941                         .tiling_flags = ISL_TILING_ANY_MASK);
2942 }
2943 
2944 bool
isl_surf_supports_ccs(const struct isl_device * dev,const struct isl_surf * surf,const struct isl_surf * hiz_or_mcs_surf)2945 isl_surf_supports_ccs(const struct isl_device *dev,
2946                       const struct isl_surf *surf,
2947                       const struct isl_surf *hiz_or_mcs_surf)
2948 {
2949    if (INTEL_DEBUG(DEBUG_NO_CCS))
2950       return false;
2951 
2952    if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
2953       return false;
2954 
2955    if (!isl_format_supports_ccs_d(dev->info, surf->format) &&
2956        !isl_format_supports_ccs_e(dev->info, surf->format))
2957       return false;
2958 
2959    /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
2960     * Target(s)", beneath the "Fast Color Clear" bullet (p326):
2961     *
2962     *     - Support is limited to tiled render targets.
2963     *
2964     * From the Skylake documentation, it is made clear that X-tiling is no
2965     * longer supported:
2966     *
2967     *     - MCS and Lossless compression is supported for
2968     *       TiledY/TileYs/TileYf non-MSRTs only.
2969     *
2970     * From the BSpec (44930) for Gfx12:
2971     *
2972     *    Linear CCS is only allowed for Untyped Buffers but only via HDC
2973     *    Data-Port messages.
2974     *
2975     * We never use untyped messages on surfaces created by ISL on Gfx9+ so
2976     * this means linear is out on Gfx12+ as well.
2977     */
2978    if (surf->tiling == ISL_TILING_LINEAR)
2979       return false;
2980 
2981    /* TODO: Disable for now, as we're not sure about the meaning of
2982     * 3DSTATE_CPSIZE_CONTROL_BUFFER::CPCBCompressionEnable
2983     */
2984    if (isl_surf_usage_is_cpb(surf->usage))
2985       return false;
2986 
2987    /* SKL PRMs, Volume 5: Memory Views, Tiling and Mip Tails for 2D Surfaces:
2988     *
2989     *    "Lossless compression must not be used on surfaces which have MIP
2990     *     Tail which contains MIPs for Slots greater than 11."
2991     */
2992    if (surf->miptail_start_level < surf->levels) {
2993       const uint32_t miptail_levels = surf->levels - surf->miptail_start_level;
2994       if (miptail_levels + isl_get_miptail_base_row(surf->tiling) > 11) {
2995          assert(isl_tiling_is_64(surf->tiling) ||
2996                 isl_tiling_is_std_y(surf->tiling));
2997          return false;
2998       }
2999    }
3000 
3001    /* From the workarounds section in the SKL PRM:
3002     *
3003     *    "RCC cacheline is composed of X-adjacent 64B fragments instead of
3004     *     memory adjacent. This causes a single 128B cacheline to straddle
3005     *     multiple LODs inside the TYF MIPtail for 3D surfaces (beyond a
3006     *     certain slot number), leading to corruption when CCS is enabled
3007     *     for these LODs and RT is later bound as texture. WA: If
3008     *     RENDER_SURFACE_STATE.Surface Type = 3D and
3009     *     RENDER_SURFACE_STATE.Auxiliary Surface Mode != AUX_NONE and
3010     *     RENDER_SURFACE_STATE.Tiled ResourceMode is TYF or TYS, Set the
3011     *     value of RENDER_SURFACE_STATE.Mip Tail Start LOD to a mip that
3012     *     larger than those present in the surface (i.e. 15)"
3013     *
3014     * We simply disallow CCS on 3D surfaces with miptails.
3015     *
3016     * Referred to as Wa_1207137018 on ICL+
3017     */
3018    if (ISL_GFX_VERX10(dev) <= 120 &&
3019        surf->dim == ISL_SURF_DIM_3D &&
3020        surf->miptail_start_level < surf->levels) {
3021       assert(isl_tiling_is_std_y(surf->tiling));
3022       return false;
3023    }
3024 
3025    /* TODO: add CCS support for Ys/Yf */
3026    if (isl_tiling_is_std_y(surf->tiling))
3027       return false;
3028 
3029    if (ISL_GFX_VER(dev) >= 12) {
3030       if (isl_surf_usage_is_stencil(surf->usage)) {
3031          /* HiZ and MCS aren't allowed with stencil */
3032          assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
3033 
3034          /* Multi-sampled stencil cannot have CCS */
3035          if (surf->samples > 1)
3036             return false;
3037       } else if (isl_surf_usage_is_depth(surf->usage)) {
3038          const struct isl_surf *hiz_surf = hiz_or_mcs_surf;
3039 
3040          /* With depth surfaces, HIZ is required for CCS. */
3041          if (hiz_surf == NULL || hiz_surf->size_B == 0)
3042             return false;
3043 
3044          assert(hiz_surf->usage & ISL_SURF_USAGE_HIZ_BIT);
3045          assert(hiz_surf->tiling == ISL_TILING_HIZ);
3046          assert(isl_format_is_hiz(hiz_surf->format));
3047       } else if (surf->samples > 1) {
3048          const struct isl_surf *mcs_surf = hiz_or_mcs_surf;
3049 
3050          /* With multisampled color, CCS requires MCS */
3051          if (mcs_surf == NULL || mcs_surf->size_B == 0)
3052             return false;
3053 
3054          assert(mcs_surf->usage & ISL_SURF_USAGE_MCS_BIT);
3055          assert(isl_format_is_mcs(mcs_surf->format));
3056       } else {
3057          /* Single-sampled color can't have MCS or HiZ */
3058          assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
3059       }
3060 
3061       /* On Gfx12, all CCS-compressed surface pitches must be multiples of
3062        * 512B.
3063        */
3064       if (surf->row_pitch_B % 512 != 0)
3065          return false;
3066 
3067       /* TODO: According to Wa_1406738321, 3D textures need a blit to a new
3068        * surface in order to perform a resolve. For now, just disable CCS.
3069        */
3070       if (surf->dim == ISL_SURF_DIM_3D)
3071          return false;
3072 
3073       /* BSpec 44930: (Gfx12, Gfx12.5)
3074        *
3075        *    "Compression of 3D Ys surfaces with 64 or 128 bpp is not supported
3076        *     in Gen12. Moreover, "Render Target Fast-clear Enable" command is
3077        *     not supported for any 3D Ys surfaces. except when Surface is a
3078        *     Procdural Texture."
3079        *
3080        * Since the note applies to MTL, we apply this to TILE64 too.
3081        */
3082       uint32_t format_bpb = isl_format_get_layout(surf->format)->bpb;
3083       if (ISL_GFX_VER(dev) == 12 &&
3084           surf->dim == ISL_SURF_DIM_3D &&
3085           (surf->tiling == ISL_TILING_ICL_Ys ||
3086            isl_tiling_is_64(surf->tiling)) &&
3087           (format_bpb == 64 || format_bpb == 128))
3088          return false;
3089 
3090       /* TODO: Handle the other tiling formats */
3091       if (surf->tiling != ISL_TILING_Y0 &&
3092           surf->tiling != ISL_TILING_4 &&
3093           !isl_tiling_is_64(surf->tiling))
3094          return false;
3095 
3096       /* TODO: Handle single-sampled Tile64. */
3097       if (surf->samples == 1 && isl_tiling_is_64(surf->tiling))
3098          return false;
3099    } else {
3100       /* ISL_GFX_VER(dev) < 12 */
3101       if (surf->samples > 1)
3102          return false;
3103 
3104       /* CCS is only for color images on Gfx7-11 */
3105       if (isl_surf_usage_is_depth_or_stencil(surf->usage))
3106          return false;
3107 
3108       /* We're single-sampled color so having HiZ or MCS makes no sense */
3109       assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
3110 
3111       /* The PRM doesn't say this explicitly, but fast-clears don't appear to
3112        * work for 3D textures until gfx9 where the layout of 3D textures
3113        * changes to match 2D array textures.
3114        */
3115       if (ISL_GFX_VER(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
3116          return false;
3117 
3118       /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of
3119        * Non-MultiSampler Render Target Restrictions):
3120        *
3121        *    "Support is for non-mip-mapped and non-array surface types only."
3122        *
3123        * This restriction is lifted on gfx8+.  Technically, it may be possible
3124        * to create a CCS for an arrayed or mipmapped image and only enable
3125        * CCS_D when rendering to the base slice.  However, there is no
3126        * documentation tell us what the hardware would do in that case or what
3127        * it does if you walk off the bases slice.  (Does it ignore CCS or does
3128        * it start scribbling over random memory?)  We play it safe and just
3129        * follow the docs and don't allow CCS_D for arrayed or mip-mapped
3130        * surfaces.
3131        */
3132       if (ISL_GFX_VER(dev) <= 7 &&
3133           (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
3134          return false;
3135 
3136       /* From the Skylake documentation, it is made clear that X-tiling is no
3137        * longer supported:
3138        *
3139        *     - MCS and Lossless compression is supported for
3140        *     TiledY/TileYs/TileYf non-MSRTs only.
3141        */
3142       if (ISL_GFX_VER(dev) >= 9 && !isl_tiling_is_any_y(surf->tiling))
3143          return false;
3144    }
3145 
3146    return true;
3147 }
3148 
3149 bool
isl_surf_get_ccs_surf(const struct isl_device * dev,const struct isl_surf * surf,const struct isl_surf * hiz_or_mcs_surf,struct isl_surf * ccs_surf,uint32_t row_pitch_B)3150 isl_surf_get_ccs_surf(const struct isl_device *dev,
3151                       const struct isl_surf *surf,
3152                       const struct isl_surf *hiz_or_mcs_surf,
3153                       struct isl_surf *ccs_surf,
3154                       uint32_t row_pitch_B)
3155 {
3156    if (!isl_surf_supports_ccs(dev, surf, hiz_or_mcs_surf))
3157       return false;
3158 
3159    if (ISL_GFX_VER(dev) >= 12) {
3160       enum isl_format ccs_format;
3161       switch (isl_format_get_layout(surf->format)->bpb) {
3162       case 8:     ccs_format = ISL_FORMAT_GFX12_CCS_8BPP_Y0;    break;
3163       case 16:    ccs_format = ISL_FORMAT_GFX12_CCS_16BPP_Y0;   break;
3164       case 32:    ccs_format = ISL_FORMAT_GFX12_CCS_32BPP_Y0;   break;
3165       case 64:    ccs_format = ISL_FORMAT_GFX12_CCS_64BPP_Y0;   break;
3166       case 128:   ccs_format = ISL_FORMAT_GFX12_CCS_128BPP_Y0;  break;
3167       default:
3168          return false;
3169       }
3170 
3171       /* On Gfx12, the CCS is a scaled-down version of the main surface. We
3172        * model this as the CCS compressing a 2D-view of the entire surface.
3173        */
3174       const bool ok =
3175          isl_surf_init(dev, ccs_surf,
3176                        .dim = ISL_SURF_DIM_2D,
3177                        .format = ccs_format,
3178                        .width = isl_surf_get_row_pitch_el(surf),
3179                        .height = surf->size_B / surf->row_pitch_B,
3180                        .depth = 1,
3181                        .levels = 1,
3182                        .array_len = 1,
3183                        .samples = 1,
3184                        .row_pitch_B = row_pitch_B,
3185                        .usage = ISL_SURF_USAGE_CCS_BIT,
3186                        .tiling_flags = ISL_TILING_GFX12_CCS_BIT);
3187       assert(!ok || ccs_surf->size_B == surf->size_B / 256);
3188       return ok;
3189    } else {
3190       enum isl_format ccs_format;
3191       if (ISL_GFX_VER(dev) >= 9) {
3192          switch (isl_format_get_layout(surf->format)->bpb) {
3193          case 32:    ccs_format = ISL_FORMAT_GFX9_CCS_32BPP;   break;
3194          case 64:    ccs_format = ISL_FORMAT_GFX9_CCS_64BPP;   break;
3195          case 128:   ccs_format = ISL_FORMAT_GFX9_CCS_128BPP;  break;
3196          default:    unreachable("Unsupported CCS format");
3197             return false;
3198          }
3199       } else if (surf->tiling == ISL_TILING_Y0) {
3200          switch (isl_format_get_layout(surf->format)->bpb) {
3201          case 32:    ccs_format = ISL_FORMAT_GFX7_CCS_32BPP_Y;    break;
3202          case 64:    ccs_format = ISL_FORMAT_GFX7_CCS_64BPP_Y;    break;
3203          case 128:   ccs_format = ISL_FORMAT_GFX7_CCS_128BPP_Y;   break;
3204          default:    unreachable("Unsupported CCS format");
3205          }
3206       } else if (surf->tiling == ISL_TILING_X) {
3207          switch (isl_format_get_layout(surf->format)->bpb) {
3208          case 32:    ccs_format = ISL_FORMAT_GFX7_CCS_32BPP_X;    break;
3209          case 64:    ccs_format = ISL_FORMAT_GFX7_CCS_64BPP_X;    break;
3210          case 128:   ccs_format = ISL_FORMAT_GFX7_CCS_128BPP_X;   break;
3211          default:    unreachable("Unsupported CCS format");
3212          }
3213       } else {
3214          unreachable("Invalid tiling format");
3215       }
3216 
3217       return isl_surf_init(dev, ccs_surf,
3218                            .dim = surf->dim,
3219                            .format = ccs_format,
3220                            .width = surf->logical_level0_px.width,
3221                            .height = surf->logical_level0_px.height,
3222                            .depth = surf->logical_level0_px.depth,
3223                            .levels = surf->levels,
3224                            .array_len = surf->logical_level0_px.array_len,
3225                            .samples = 1,
3226                            .row_pitch_B = row_pitch_B,
3227                            .usage = ISL_SURF_USAGE_CCS_BIT,
3228                            .tiling_flags = ISL_TILING_CCS_BIT);
3229    }
3230 }
3231 
3232 #define isl_genX_call(dev, func, ...)              \
3233    switch (ISL_GFX_VERX10(dev)) {                  \
3234    case 40:                                        \
3235       isl_gfx4_##func(__VA_ARGS__);                \
3236       break;                                       \
3237    case 45:                                        \
3238       /* G45 surface state is the same as gfx5 */  \
3239    case 50:                                        \
3240       isl_gfx5_##func(__VA_ARGS__);                \
3241       break;                                       \
3242    case 60:                                        \
3243       isl_gfx6_##func(__VA_ARGS__);                \
3244       break;                                       \
3245    case 70:                                        \
3246       isl_gfx7_##func(__VA_ARGS__);                \
3247       break;                                       \
3248    case 75:                                        \
3249       isl_gfx75_##func(__VA_ARGS__);               \
3250       break;                                       \
3251    case 80:                                        \
3252       isl_gfx8_##func(__VA_ARGS__);                \
3253       break;                                       \
3254    case 90:                                        \
3255       isl_gfx9_##func(__VA_ARGS__);                \
3256       break;                                       \
3257    case 110:                                       \
3258       isl_gfx11_##func(__VA_ARGS__);               \
3259       break;                                       \
3260    case 120:                                       \
3261       isl_gfx12_##func(__VA_ARGS__);               \
3262       break;                                       \
3263    case 125:                                       \
3264       isl_gfx125_##func(__VA_ARGS__);              \
3265       break;                                       \
3266    case 200:                                       \
3267       isl_gfx20_##func(__VA_ARGS__);               \
3268       break;                                       \
3269    default:                                        \
3270       assert(!"Unknown hardware generation");      \
3271    }
3272 
3273 /**
3274  * A variant of isl_surf_get_image_offset_sa() specific to
3275  * ISL_DIM_LAYOUT_GFX4_2D.
3276  */
3277 static void
get_image_offset_sa_gfx4_2d(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa,uint32_t * z_offset_sa,uint32_t * array_offset)3278 get_image_offset_sa_gfx4_2d(const struct isl_surf *surf,
3279                             uint32_t level, uint32_t logical_array_layer,
3280                             uint32_t *x_offset_sa,
3281                             uint32_t *y_offset_sa,
3282                             uint32_t *z_offset_sa,
3283                             uint32_t *array_offset)
3284 {
3285    assert(level < surf->levels);
3286    if (surf->dim == ISL_SURF_DIM_3D)
3287       assert(logical_array_layer < surf->logical_level0_px.depth);
3288    else
3289       assert(logical_array_layer < surf->logical_level0_px.array_len);
3290 
3291    const struct isl_extent3d image_align_sa =
3292       isl_surf_get_image_alignment_sa(surf);
3293 
3294    const uint32_t W0 = surf->phys_level0_sa.width;
3295    const uint32_t H0 = surf->phys_level0_sa.height;
3296 
3297    const uint32_t phys_layer = logical_array_layer *
3298       (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1);
3299 
3300    uint32_t x = 0, y;
3301    if (isl_tiling_is_std_y(surf->tiling) ||
3302        isl_tiling_is_64(surf->tiling)) {
3303       y = 0;
3304       if (surf->dim == ISL_SURF_DIM_3D) {
3305          *z_offset_sa = logical_array_layer;
3306          *array_offset = 0;
3307       } else {
3308          *z_offset_sa = 0;
3309          *array_offset = phys_layer;
3310       }
3311    } else {
3312       y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
3313       *z_offset_sa = 0;
3314       *array_offset = 0;
3315    }
3316 
3317    for (uint32_t l = 0; l < MIN(level, surf->miptail_start_level); ++l) {
3318       if (l == 1) {
3319          uint32_t W = isl_minify(W0, l);
3320          x += isl_align_npot(W, image_align_sa.w);
3321       } else {
3322          uint32_t H = isl_minify(H0, l);
3323          y += isl_align_npot(H, image_align_sa.h);
3324       }
3325    }
3326 
3327    *x_offset_sa = x;
3328    *y_offset_sa = y;
3329 
3330    if (level >= surf->miptail_start_level) {
3331       const struct isl_format_layout *fmtl =
3332          isl_format_get_layout(surf->format);
3333 
3334       uint32_t tail_offset_x_el, tail_offset_y_el, tail_offset_z_el;
3335       isl_get_miptail_level_offset_el(surf->tiling, surf->dim,
3336                                       fmtl->bpb,
3337                                       level - surf->miptail_start_level,
3338                                       &tail_offset_x_el,
3339                                       &tail_offset_y_el,
3340                                       &tail_offset_z_el);
3341       *x_offset_sa += tail_offset_x_el * fmtl->bw;
3342       *y_offset_sa += tail_offset_y_el * fmtl->bh;
3343       *z_offset_sa += tail_offset_z_el * fmtl->bd;
3344    }
3345 }
3346 
3347 /**
3348  * A variant of isl_surf_get_image_offset_sa() specific to
3349  * ISL_DIM_LAYOUT_GFX4_3D.
3350  */
3351 static void
get_image_offset_sa_gfx4_3d(const struct isl_surf * surf,uint32_t level,uint32_t logical_z_offset_px,uint32_t * x_offset_sa,uint32_t * y_offset_sa)3352 get_image_offset_sa_gfx4_3d(const struct isl_surf *surf,
3353                             uint32_t level, uint32_t logical_z_offset_px,
3354                             uint32_t *x_offset_sa,
3355                             uint32_t *y_offset_sa)
3356 {
3357    assert(level < surf->levels);
3358    if (surf->dim == ISL_SURF_DIM_3D) {
3359       assert(surf->phys_level0_sa.array_len == 1);
3360       assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
3361    } else {
3362       assert(surf->dim == ISL_SURF_DIM_2D);
3363       assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT);
3364       assert(surf->phys_level0_sa.array_len == 6);
3365       assert(logical_z_offset_px < surf->phys_level0_sa.array_len);
3366    }
3367 
3368    const struct isl_extent3d image_align_sa =
3369       isl_surf_get_image_alignment_sa(surf);
3370 
3371    const uint32_t W0 = surf->phys_level0_sa.width;
3372    const uint32_t H0 = surf->phys_level0_sa.height;
3373    const uint32_t D0 = surf->phys_level0_sa.depth;
3374    const uint32_t AL = surf->phys_level0_sa.array_len;
3375 
3376    uint32_t x = 0;
3377    uint32_t y = 0;
3378 
3379    for (uint32_t l = 0; l < level; ++l) {
3380       const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
3381       const uint32_t level_d =
3382          isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL,
3383                         image_align_sa.d);
3384       const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
3385 
3386       y += level_h * max_layers_vert;
3387    }
3388 
3389    const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
3390    const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
3391    const uint32_t level_d =
3392       isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL,
3393                      image_align_sa.d);
3394 
3395    const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
3396 
3397    x += level_w * (logical_z_offset_px % max_layers_horiz);
3398    y += level_h * (logical_z_offset_px / max_layers_horiz);
3399 
3400    *x_offset_sa = x;
3401    *y_offset_sa = y;
3402 }
3403 
3404 static void
get_image_offset_sa_gfx6_stencil_hiz(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)3405 get_image_offset_sa_gfx6_stencil_hiz(const struct isl_surf *surf,
3406                                      uint32_t level,
3407                                      uint32_t logical_array_layer,
3408                                      uint32_t *x_offset_sa,
3409                                      uint32_t *y_offset_sa)
3410 {
3411    assert(level < surf->levels);
3412    assert(surf->logical_level0_px.depth == 1);
3413    assert(logical_array_layer < surf->logical_level0_px.array_len);
3414 
3415    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
3416 
3417    const struct isl_extent3d image_align_sa =
3418       isl_surf_get_image_alignment_sa(surf);
3419 
3420    struct isl_tile_info tile_info;
3421    isl_surf_get_tile_info(surf, &tile_info);
3422    const struct isl_extent2d tile_extent_sa = {
3423       .w = tile_info.logical_extent_el.w * fmtl->bw,
3424       .h = tile_info.logical_extent_el.h * fmtl->bh,
3425    };
3426    /* Tile size is a multiple of image alignment */
3427    assert(tile_extent_sa.w % image_align_sa.w == 0);
3428    assert(tile_extent_sa.h % image_align_sa.h == 0);
3429 
3430    const uint32_t W0 = surf->phys_level0_sa.w;
3431    const uint32_t H0 = surf->phys_level0_sa.h;
3432 
3433    /* Each image has the same height as LOD0 because the hardware thinks
3434     * everything is LOD0
3435     */
3436    const uint32_t H = isl_align(H0, image_align_sa.h);
3437 
3438    /* Quick sanity check for consistency */
3439    if (surf->phys_level0_sa.array_len > 1)
3440       assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh));
3441 
3442    uint32_t x = 0, y = 0;
3443    for (uint32_t l = 0; l < level; ++l) {
3444       const uint32_t W = isl_minify(W0, l);
3445 
3446       const uint32_t w = isl_align(W, tile_extent_sa.w);
3447       const uint32_t h = isl_align(H * surf->phys_level0_sa.a,
3448                                    tile_extent_sa.h);
3449 
3450       if (l == 0) {
3451          y += h;
3452       } else {
3453          x += w;
3454       }
3455    }
3456 
3457    y += H * logical_array_layer;
3458 
3459    *x_offset_sa = x;
3460    *y_offset_sa = y;
3461 }
3462 
3463 /**
3464  * A variant of isl_surf_get_image_offset_sa() specific to
3465  * ISL_DIM_LAYOUT_GFX9_1D.
3466  */
3467 static void
get_image_offset_sa_gfx9_1d(const struct isl_surf * surf,uint32_t level,uint32_t layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)3468 get_image_offset_sa_gfx9_1d(const struct isl_surf *surf,
3469                             uint32_t level, uint32_t layer,
3470                             uint32_t *x_offset_sa,
3471                             uint32_t *y_offset_sa)
3472 {
3473    assert(level < surf->levels);
3474    assert(layer < surf->phys_level0_sa.array_len);
3475    assert(surf->phys_level0_sa.height == 1);
3476    assert(surf->phys_level0_sa.depth == 1);
3477    assert(surf->samples == 1);
3478 
3479    const uint32_t W0 = surf->phys_level0_sa.width;
3480    const struct isl_extent3d image_align_sa =
3481       isl_surf_get_image_alignment_sa(surf);
3482 
3483    uint32_t x = 0;
3484 
3485    for (uint32_t l = 0; l < level; ++l) {
3486       uint32_t W = isl_minify(W0, l);
3487       uint32_t w = isl_align_npot(W, image_align_sa.w);
3488 
3489       x += w;
3490    }
3491 
3492    *x_offset_sa = x;
3493    *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
3494 }
3495 
3496 /**
3497  * Calculate the offset, in units of surface samples, to a subimage in the
3498  * surface.
3499  *
3500  * @invariant level < surface levels
3501  * @invariant logical_array_layer < logical array length of surface
3502  * @invariant logical_z_offset_px < logical depth of surface at level
3503  */
3504 void
isl_surf_get_image_offset_sa(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * x_offset_sa,uint32_t * y_offset_sa,uint32_t * z_offset_sa,uint32_t * array_offset)3505 isl_surf_get_image_offset_sa(const struct isl_surf *surf,
3506                              uint32_t level,
3507                              uint32_t logical_array_layer,
3508                              uint32_t logical_z_offset_px,
3509                              uint32_t *x_offset_sa,
3510                              uint32_t *y_offset_sa,
3511                              uint32_t *z_offset_sa,
3512                              uint32_t *array_offset)
3513 {
3514    assert(level < surf->levels);
3515    assert(logical_array_layer < surf->logical_level0_px.array_len);
3516    assert(logical_z_offset_px
3517           < isl_minify(surf->logical_level0_px.depth, level));
3518 
3519    switch (surf->dim_layout) {
3520    case ISL_DIM_LAYOUT_GFX9_1D:
3521       get_image_offset_sa_gfx9_1d(surf, level, logical_array_layer,
3522                                   x_offset_sa, y_offset_sa);
3523       *z_offset_sa = 0;
3524       *array_offset = 0;
3525       break;
3526    case ISL_DIM_LAYOUT_GFX4_2D:
3527       get_image_offset_sa_gfx4_2d(surf, level, logical_array_layer
3528                                   + logical_z_offset_px,
3529                                   x_offset_sa, y_offset_sa,
3530                                   z_offset_sa, array_offset);
3531       break;
3532    case ISL_DIM_LAYOUT_GFX4_3D:
3533       get_image_offset_sa_gfx4_3d(surf, level, logical_array_layer +
3534                                   logical_z_offset_px,
3535                                   x_offset_sa, y_offset_sa);
3536       *z_offset_sa = 0;
3537       *array_offset = 0;
3538       break;
3539    case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
3540       get_image_offset_sa_gfx6_stencil_hiz(surf, level, logical_array_layer +
3541                                            logical_z_offset_px,
3542                                            x_offset_sa, y_offset_sa);
3543       *z_offset_sa = 0;
3544       *array_offset = 0;
3545       break;
3546 
3547    default:
3548       unreachable("not reached");
3549    }
3550 }
3551 
3552 void
isl_surf_get_image_offset_el(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * x_offset_el,uint32_t * y_offset_el,uint32_t * z_offset_el,uint32_t * array_offset)3553 isl_surf_get_image_offset_el(const struct isl_surf *surf,
3554                              uint32_t level,
3555                              uint32_t logical_array_layer,
3556                              uint32_t logical_z_offset_px,
3557                              uint32_t *x_offset_el,
3558                              uint32_t *y_offset_el,
3559                              uint32_t *z_offset_el,
3560                              uint32_t *array_offset)
3561 {
3562    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
3563 
3564    assert(level < surf->levels);
3565    assert(logical_array_layer < surf->logical_level0_px.array_len);
3566    assert(logical_z_offset_px
3567           < isl_minify(surf->logical_level0_px.depth, level));
3568 
3569    uint32_t x_offset_sa, y_offset_sa, z_offset_sa;
3570    isl_surf_get_image_offset_sa(surf, level,
3571                                 logical_array_layer,
3572                                 logical_z_offset_px,
3573                                 &x_offset_sa,
3574                                 &y_offset_sa,
3575                                 &z_offset_sa,
3576                                 array_offset);
3577 
3578    *x_offset_el = x_offset_sa / fmtl->bw;
3579    *y_offset_el = y_offset_sa / fmtl->bh;
3580    *z_offset_el = z_offset_sa / fmtl->bd;
3581 }
3582 
3583 void
isl_surf_get_image_offset_B_tile_sa(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint64_t * offset_B,uint32_t * x_offset_sa,uint32_t * y_offset_sa)3584 isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf,
3585                                     uint32_t level,
3586                                     uint32_t logical_array_layer,
3587                                     uint32_t logical_z_offset_px,
3588                                     uint64_t *offset_B,
3589                                     uint32_t *x_offset_sa,
3590                                     uint32_t *y_offset_sa)
3591 {
3592    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
3593 
3594    uint32_t x_offset_el, y_offset_el;
3595    isl_surf_get_image_offset_B_tile_el(surf, level,
3596                                        logical_array_layer,
3597                                        logical_z_offset_px,
3598                                        offset_B,
3599                                        &x_offset_el,
3600                                        &y_offset_el);
3601 
3602    if (x_offset_sa) {
3603       *x_offset_sa = x_offset_el * fmtl->bw;
3604    } else {
3605       assert(x_offset_el == 0);
3606    }
3607 
3608    if (y_offset_sa) {
3609       *y_offset_sa = y_offset_el * fmtl->bh;
3610    } else {
3611       assert(y_offset_el == 0);
3612    }
3613 }
3614 
3615 void
isl_surf_get_image_offset_B_tile_el(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint64_t * offset_B,uint32_t * x_offset_el,uint32_t * y_offset_el)3616 isl_surf_get_image_offset_B_tile_el(const struct isl_surf *surf,
3617                                     uint32_t level,
3618                                     uint32_t logical_array_layer,
3619                                     uint32_t logical_z_offset_px,
3620                                     uint64_t *offset_B,
3621                                     uint32_t *x_offset_el,
3622                                     uint32_t *y_offset_el)
3623 {
3624    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
3625 
3626    uint32_t total_x_offset_el, total_y_offset_el;
3627    uint32_t total_z_offset_el, total_array_offset;
3628    isl_surf_get_image_offset_el(surf, level, logical_array_layer,
3629                                 logical_z_offset_px,
3630                                 &total_x_offset_el,
3631                                 &total_y_offset_el,
3632                                 &total_z_offset_el,
3633                                 &total_array_offset);
3634 
3635    uint32_t z_offset_el, array_offset;
3636    isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
3637                                       surf->msaa_layout, fmtl->bpb,
3638                                       surf->samples,
3639                                       surf->row_pitch_B,
3640                                       surf->array_pitch_el_rows,
3641                                       total_x_offset_el,
3642                                       total_y_offset_el,
3643                                       total_z_offset_el,
3644                                       total_array_offset,
3645                                       offset_B,
3646                                       x_offset_el,
3647                                       y_offset_el,
3648                                       &z_offset_el,
3649                                       &array_offset);
3650    if (level >= surf->miptail_start_level) {
3651       /* We can do a byte offset to the first level of a miptail but we cannot
3652        * offset into a miptail.
3653        */
3654       assert(level == surf->miptail_start_level);
3655 
3656       /* The byte offset will get us to the miptail page.  The other offsets
3657        * are to the actual level within the miptail.  It is assumed that the
3658        * caller will set up a texture with a miptail and use the hardware to
3659        * handle offseting inside the miptail.
3660        */
3661       *x_offset_el = 0;
3662       *y_offset_el = 0;
3663    } else {
3664       assert(z_offset_el == 0);
3665       assert(array_offset == 0);
3666    }
3667 }
3668 
3669 void
isl_surf_get_image_range_B_tile(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint64_t * start_tile_B,uint64_t * end_tile_B)3670 isl_surf_get_image_range_B_tile(const struct isl_surf *surf,
3671                                 uint32_t level,
3672                                 uint32_t logical_array_layer,
3673                                 uint32_t logical_z_offset_px,
3674                                 uint64_t *start_tile_B,
3675                                 uint64_t *end_tile_B)
3676 {
3677    uint32_t start_x_offset_el, start_y_offset_el;
3678    uint32_t start_z_offset_el, start_array_slice;
3679    isl_surf_get_image_offset_el(surf, level, logical_array_layer,
3680                                 logical_z_offset_px,
3681                                 &start_x_offset_el,
3682                                 &start_y_offset_el,
3683                                 &start_z_offset_el,
3684                                 &start_array_slice);
3685 
3686    /* Compute the size of the subimage in surface elements */
3687    const uint32_t subimage_w_sa = isl_minify(surf->phys_level0_sa.w, level);
3688    const uint32_t subimage_h_sa = isl_minify(surf->phys_level0_sa.h, level);
3689    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
3690    const uint32_t subimage_w_el = isl_align_div_npot(subimage_w_sa, fmtl->bw);
3691    const uint32_t subimage_h_el = isl_align_div_npot(subimage_h_sa, fmtl->bh);
3692 
3693    /* Find the last pixel */
3694    uint32_t end_x_offset_el = start_x_offset_el + subimage_w_el - 1;
3695    uint32_t end_y_offset_el = start_y_offset_el + subimage_h_el - 1;
3696 
3697    /* We only consider one Z or array slice */
3698    const uint32_t end_z_offset_el = start_z_offset_el;
3699    const uint32_t end_array_slice = start_array_slice;
3700 
3701    UNUSED uint32_t x_offset_el, y_offset_el, z_offset_el, array_slice;
3702    isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
3703                                       surf->msaa_layout, fmtl->bpb,
3704                                       surf->samples,
3705                                       surf->row_pitch_B,
3706                                       surf->array_pitch_el_rows,
3707                                       start_x_offset_el,
3708                                       start_y_offset_el,
3709                                       start_z_offset_el,
3710                                       start_array_slice,
3711                                       start_tile_B,
3712                                       &x_offset_el,
3713                                       &y_offset_el,
3714                                       &z_offset_el,
3715                                       &array_slice);
3716 
3717    isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
3718                                       surf->msaa_layout, fmtl->bpb,
3719                                       surf->samples,
3720                                       surf->row_pitch_B,
3721                                       surf->array_pitch_el_rows,
3722                                       end_x_offset_el,
3723                                       end_y_offset_el,
3724                                       end_z_offset_el,
3725                                       end_array_slice,
3726                                       end_tile_B,
3727                                       &x_offset_el,
3728                                       &y_offset_el,
3729                                       &z_offset_el,
3730                                       &array_slice);
3731 
3732    /* We want the range we return to be exclusive but the tile containing the
3733     * last pixel (what we just calculated) is inclusive.  Add one.
3734     */
3735    (*end_tile_B)++;
3736 
3737    assert(*end_tile_B <= surf->size_B);
3738 }
3739 
3740 void
isl_surf_get_image_surf(const struct isl_device * dev,const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,struct isl_surf * image_surf,uint64_t * offset_B,uint32_t * x_offset_sa,uint32_t * y_offset_sa)3741 isl_surf_get_image_surf(const struct isl_device *dev,
3742                         const struct isl_surf *surf,
3743                         uint32_t level,
3744                         uint32_t logical_array_layer,
3745                         uint32_t logical_z_offset_px,
3746                         struct isl_surf *image_surf,
3747                         uint64_t *offset_B,
3748                         uint32_t *x_offset_sa,
3749                         uint32_t *y_offset_sa)
3750 {
3751    isl_surf_get_image_offset_B_tile_sa(surf,
3752                                        level,
3753                                        logical_array_layer,
3754                                        logical_z_offset_px,
3755                                        offset_B,
3756                                        x_offset_sa,
3757                                        y_offset_sa);
3758 
3759    /* Even for cube maps there will be only single face, therefore drop the
3760     * corresponding flag if present.
3761     */
3762    const isl_surf_usage_flags_t usage =
3763       surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
3764 
3765    bool ok UNUSED;
3766    ok = isl_surf_init(dev, image_surf,
3767                       .dim = ISL_SURF_DIM_2D,
3768                       .format = surf->format,
3769                       .width = isl_minify(surf->logical_level0_px.w, level),
3770                       .height = isl_minify(surf->logical_level0_px.h, level),
3771                       .depth = 1,
3772                       .levels = 1,
3773                       .array_len = 1,
3774                       .samples = surf->samples,
3775                       .row_pitch_B = surf->row_pitch_B,
3776                       .usage = usage,
3777                       .tiling_flags = (1 << surf->tiling));
3778    assert(ok);
3779 }
3780 
3781 bool
isl_surf_get_uncompressed_surf(const struct isl_device * dev,const struct isl_surf * _surf,const struct isl_view * _view,struct isl_surf * ucompr_surf,struct isl_view * ucompr_view,uint64_t * offset_B,uint32_t * x_offset_el,uint32_t * y_offset_el)3782 isl_surf_get_uncompressed_surf(const struct isl_device *dev,
3783                                const struct isl_surf *_surf,
3784                                const struct isl_view *_view,
3785                                struct isl_surf *ucompr_surf,
3786                                struct isl_view *ucompr_view,
3787                                uint64_t *offset_B,
3788                                uint32_t *x_offset_el,
3789                                uint32_t *y_offset_el)
3790 {
3791    /* Input and output pointers may be the same, save the input contents now. */
3792    const struct isl_surf __surf = *_surf, *surf = &__surf;
3793    const struct isl_view __view = *_view, *view = &__view;
3794    const struct isl_format_layout *fmtl =
3795       isl_format_get_layout(surf->format);
3796    const enum isl_format view_format = view->format;
3797 
3798    assert(fmtl->bw > 1 || fmtl->bh > 1 || fmtl->bd > 1);
3799    assert(isl_format_is_compressed(surf->format));
3800    assert(!isl_format_is_compressed(view->format));
3801    assert(isl_format_get_layout(view->format)->bpb == fmtl->bpb);
3802    assert(view->levels == 1);
3803 
3804    const uint32_t view_width_px =
3805       isl_minify(surf->logical_level0_px.width, view->base_level);
3806    const uint32_t view_height_px =
3807       isl_minify(surf->logical_level0_px.height, view->base_level);
3808 
3809    assert(surf->samples == 1);
3810    const uint32_t view_width_el = isl_align_div_npot(view_width_px, fmtl->bw);
3811    const uint32_t view_height_el = isl_align_div_npot(view_height_px, fmtl->bh);
3812 
3813    /* If we ever enable 3D block formats, we'll need to re-think this */
3814    assert(fmtl->bd == 1);
3815 
3816    if (isl_tiling_is_std_y(surf->tiling) ||
3817        isl_tiling_is_64(surf->tiling)) {
3818       /* If the requested level is not part of the miptail, we just offset to
3819        * the requested level. Because we're using standard tilings and aren't
3820        * in the miptail, arrays and 3D textures should just work so long as we
3821        * have the right array stride in the end.
3822        *
3823        * If the requested level is in the miptail, we instead offset to the
3824        * base of the miptail.  Because offsets into the miptail are fixed by
3825        * the tiling and don't depend on the actual size of the image, we can
3826        * set the level in the view to offset into the miptail regardless of
3827        * the fact minification yields different results for the compressed and
3828        * uncompressed surface.
3829        */
3830       const uint32_t base_level =
3831          MIN(view->base_level, surf->miptail_start_level);
3832 
3833       isl_surf_get_image_offset_B_tile_el(surf, base_level, 0, 0,
3834                                           offset_B, x_offset_el, y_offset_el);
3835       /* Tile64, Ys and Yf should have no intratile X or Y offset */
3836       assert(*x_offset_el == 0 && *y_offset_el == 0);
3837 
3838       /* Save off the array pitch */
3839       const uint32_t array_pitch_el_rows = surf->array_pitch_el_rows;
3840 
3841       const uint32_t view_depth_px =
3842          isl_minify(surf->logical_level0_px.depth, view->base_level);
3843       const uint32_t view_depth_el =
3844          isl_align_div_npot(view_depth_px, fmtl->bd);
3845 
3846       /* We need to compute the size of the uncompressed surface we will
3847        * create. If we're not in the miptail, it is just the view size in
3848        * surface elements. If we are in a miptail, we need a size that will
3849        * minify to the view size in surface elements. This may not be the same
3850        * as the size of base_level, but that's not a problem. Slot offsets are
3851        * fixed in HW (see the tables used in isl_get_miptail_level_offset_el).
3852        */
3853       const uint32_t ucompr_level = view->base_level - base_level;
3854 
3855       /* The > 1 check is here to prevent a change in the surface's overall
3856        * dimension (e.g. 2D->3D).
3857        *
3858        * Also having a base_level dimension = 1 doesn´t mean the HW will
3859        * ignore higher mip level. Once the dimension has reached 1, it'll stay
3860        * at 1 in the higher mip levels.
3861        */
3862       struct isl_extent3d ucompr_surf_extent_el = {
3863          .w = view_width_el  > 1 ? view_width_el  << ucompr_level : 1,
3864          .h = view_height_el > 1 ? view_height_el << ucompr_level : 1,
3865          .d = view_depth_el  > 1 ? view_depth_el  << ucompr_level : 1,
3866       };
3867 
3868       bool ok UNUSED;
3869       ok = isl_surf_init(dev, ucompr_surf,
3870                          .dim = surf->dim,
3871                          .format = view->format,
3872                          .width = ucompr_surf_extent_el.width,
3873                          .height = ucompr_surf_extent_el.height,
3874                          .depth = ucompr_surf_extent_el.depth,
3875                          .levels = ucompr_level + 1,
3876                          .array_len = surf->logical_level0_px.array_len,
3877                          .samples = surf->samples,
3878                          .min_miptail_start_level =
3879                             (int) (view->base_level < surf->miptail_start_level),
3880                          .row_pitch_B = surf->row_pitch_B,
3881                          .usage = surf->usage,
3882                          .tiling_flags = (1u << surf->tiling));
3883       assert(ok);
3884 
3885       /* Use the array pitch from the original surface.  This way 2D arrays
3886        * and 3D textures should work properly, just with one LOD.
3887        */
3888       assert(ucompr_surf->array_pitch_el_rows <= array_pitch_el_rows);
3889       ucompr_surf->array_pitch_el_rows = array_pitch_el_rows;
3890 
3891       /* The newly created image represents only the one miplevel so we
3892        * need to adjust the view accordingly.  Because we offset it to
3893        * miplevel but used a Z and array slice of 0, the array range can be
3894        * left alone.
3895        */
3896       *ucompr_view = *view;
3897       ucompr_view->base_level -= base_level;
3898    } else {
3899       if (view->array_len > 1) {
3900          /* The Skylake PRM Vol. 2d, "RENDER_SURFACE_STATE::X Offset" says:
3901           *
3902           *    "If Surface Array is enabled, this field must be zero."
3903           *
3904           * The PRMs for other hardware have similar text. This is also tricky
3905           * to handle with things like BLORP's SW offsetting because the
3906           * increased surface size required for the offset may result in an
3907           * image height greater than qpitch.
3908           */
3909          if (view->base_level > 0)
3910             return false;
3911 
3912          /* On Haswell and earlier, RENDER_SURFACE_STATE doesn't have a QPitch
3913           * field; it only has "array pitch span" which means the QPitch is
3914           * automatically calculated. Since we're smashing the surface format
3915           * (block formats are subtly different) and the number of miplevels,
3916           * that calculation will get thrown off. This means we can't do
3917           * arrays even at LOD0
3918           *
3919           * On Broadwell, we do have a QPitch field which we can control.
3920           * However, HALIGN and VALIGN are specified in pixels and are
3921           * hard-coded to align to exactly the block size of the compressed
3922           * texture. This means that, when reinterpreted as a non-compressed
3923           * the QPitch may be anything but the HW requires it to be properly
3924           * aligned.
3925           */
3926          if (ISL_GFX_VER(dev) < 9)
3927             return false;
3928 
3929          *ucompr_surf = *surf;
3930          ucompr_surf->levels = 1;
3931          ucompr_surf->format = view_format;
3932 
3933          /* We're making an uncompressed view here. The image dimensions need
3934           * to be scaled down by the block size.
3935           */
3936          assert(ucompr_surf->logical_level0_px.width == view_width_px);
3937          assert(ucompr_surf->logical_level0_px.height == view_height_px);
3938          ucompr_surf->logical_level0_px.width = view_width_el;
3939          ucompr_surf->logical_level0_px.height = view_height_el;
3940          ucompr_surf->phys_level0_sa = isl_surf_get_phys_level0_el(surf);
3941 
3942          /* The surface mostly stays as-is; there is no offset */
3943          *offset_B = 0;
3944          *x_offset_el = 0;
3945          *y_offset_el = 0;
3946 
3947          /* The view remains the same */
3948          *ucompr_view = *view;
3949       } else {
3950          /* If only one array slice is requested, directly offset to that
3951           * slice. We could, in theory, still use arrays in some cases but
3952           * BLORP isn't prepared for this and everyone who calls this function
3953           * should be prepared to handle an X/Y offset.
3954           */
3955          isl_surf_get_image_offset_B_tile_el(surf,
3956                                              view->base_level,
3957                                              surf->dim == ISL_SURF_DIM_3D ?
3958                                              0 : view->base_array_layer,
3959                                              surf->dim == ISL_SURF_DIM_3D ?
3960                                              view->base_array_layer : 0,
3961                                              offset_B,
3962                                              x_offset_el,
3963                                              y_offset_el);
3964 
3965          /* Even for cube maps there will be only single face, therefore drop
3966           * the corresponding flag if present.
3967           */
3968          const isl_surf_usage_flags_t usage =
3969             surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
3970 
3971          bool ok UNUSED;
3972          ok = isl_surf_init(dev, ucompr_surf,
3973                             .dim = ISL_SURF_DIM_2D,
3974                             .format = view_format,
3975                             .width = view_width_el,
3976                             .height = view_height_el,
3977                             .depth = 1,
3978                             .levels = 1,
3979                             .array_len = 1,
3980                             .samples = 1,
3981                             .row_pitch_B = surf->row_pitch_B,
3982                             .usage = usage,
3983                             .tiling_flags = (1 << surf->tiling));
3984          assert(ok);
3985 
3986          /* The newly created image represents the one subimage we're
3987           * referencing with this view so it only has one array slice and
3988           * miplevel.
3989           */
3990          *ucompr_view = *view;
3991          ucompr_view->base_array_layer = 0;
3992          ucompr_view->base_level = 0;
3993       }
3994    }
3995 
3996    return true;
3997 }
3998 
3999 void
isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,enum isl_surf_dim dim,enum isl_msaa_layout msaa_layout,uint32_t bpb,uint32_t samples,uint32_t row_pitch_B,uint32_t array_pitch_el_rows,uint32_t total_x_offset_el,uint32_t total_y_offset_el,uint32_t total_z_offset_el,uint32_t total_array_offset,uint64_t * tile_offset_B,uint32_t * x_offset_el,uint32_t * y_offset_el,uint32_t * z_offset_el,uint32_t * array_offset)4000 isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
4001                                    enum isl_surf_dim dim,
4002                                    enum isl_msaa_layout msaa_layout,
4003                                    uint32_t bpb,
4004                                    uint32_t samples,
4005                                    uint32_t row_pitch_B,
4006                                    uint32_t array_pitch_el_rows,
4007                                    uint32_t total_x_offset_el,
4008                                    uint32_t total_y_offset_el,
4009                                    uint32_t total_z_offset_el,
4010                                    uint32_t total_array_offset,
4011                                    uint64_t *tile_offset_B,
4012                                    uint32_t *x_offset_el,
4013                                    uint32_t *y_offset_el,
4014                                    uint32_t *z_offset_el,
4015                                    uint32_t *array_offset)
4016 {
4017    if (tiling == ISL_TILING_LINEAR) {
4018       assert(bpb % 8 == 0);
4019       assert(samples == 1);
4020       assert(total_z_offset_el == 0 && total_array_offset == 0);
4021       *tile_offset_B = (uint64_t)total_y_offset_el * row_pitch_B +
4022                        (uint64_t)total_x_offset_el * (bpb / 8);
4023       *x_offset_el = 0;
4024       *y_offset_el = 0;
4025       *z_offset_el = 0;
4026       *array_offset = 0;
4027       return;
4028    }
4029 
4030    struct isl_tile_info tile_info;
4031    isl_tiling_get_info(tiling, dim, msaa_layout, bpb, samples, &tile_info);
4032 
4033    /* Pitches must make sense with the tiling */
4034    assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
4035    if (tile_info.logical_extent_el.d > 1 || tile_info.logical_extent_el.a > 1)
4036       assert(array_pitch_el_rows % tile_info.logical_extent_el.h == 0);
4037 
4038    /* For non-power-of-two formats, we need the address to be both tile and
4039     * element-aligned.  The easiest way to achieve this is to work with a tile
4040     * that is three times as wide as the regular tile.
4041     *
4042     * The tile info returned by get_tile_info has a logical size that is an
4043     * integer number of tile_info.format_bpb size elements.  To scale the
4044     * tile, we scale up the physical width and then treat the logical tile
4045     * size as if it has bpb size elements.
4046     */
4047    const uint32_t tile_el_scale = bpb / tile_info.format_bpb;
4048    tile_info.phys_extent_B.width *= tile_el_scale;
4049 
4050    /* Compute the offset into the tile */
4051    *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
4052    *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h;
4053    *z_offset_el = total_z_offset_el % tile_info.logical_extent_el.d;
4054    *array_offset = total_array_offset % tile_info.logical_extent_el.a;
4055 
4056    /* Compute the offset of the tile in units of whole tiles */
4057    uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
4058    uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
4059    uint32_t z_offset_tl = total_z_offset_el / tile_info.logical_extent_el.d;
4060    uint32_t a_offset_tl = total_array_offset / tile_info.logical_extent_el.a;
4061 
4062    /* Compute an array pitch in number of tiles */
4063    uint32_t array_pitch_tl_rows =
4064       array_pitch_el_rows / tile_info.logical_extent_el.h;
4065 
4066    /* Add the Z and array offset to the Y offset to get a 2D offset */
4067    y_offset_tl += (z_offset_tl + a_offset_tl) * array_pitch_tl_rows;
4068 
4069    *tile_offset_B =
4070       (uint64_t)y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B +
4071       (uint64_t)x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w;
4072 }
4073 
4074 uint32_t
isl_surf_get_depth_format(const struct isl_device * dev,const struct isl_surf * surf)4075 isl_surf_get_depth_format(const struct isl_device *dev,
4076                           const struct isl_surf *surf)
4077 {
4078    /* Support for separate stencil buffers began in gfx5. Support for
4079     * interleaved depthstencil buffers ceased in gfx7. The intermediate gens,
4080     * those that supported separate and interleaved stencil, were gfx5 and
4081     * gfx6.
4082     *
4083     * For a list of all available formats, see the Sandybridge PRM >> Volume
4084     * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
4085     * Format (p321).
4086     */
4087 
4088    bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT;
4089 
4090    assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT);
4091 
4092    if (has_stencil)
4093       assert(ISL_GFX_VER(dev) < 7);
4094 
4095    switch (surf->format) {
4096    default:
4097       unreachable("bad isl depth format");
4098    case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
4099       assert(ISL_GFX_VER(dev) < 7);
4100       return 0; /* D32_FLOAT_S8X24_UINT */
4101    case ISL_FORMAT_R32_FLOAT:
4102       assert(!has_stencil);
4103       return 1; /* D32_FLOAT */
4104    case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
4105       if (has_stencil) {
4106          assert(ISL_GFX_VER(dev) < 7);
4107          return 2; /* D24_UNORM_S8_UINT */
4108       } else {
4109          assert(ISL_GFX_VER(dev) >= 5);
4110          return 3; /* D24_UNORM_X8_UINT */
4111       }
4112    case ISL_FORMAT_R16_UNORM:
4113       assert(!has_stencil);
4114       return 5; /* D16_UNORM */
4115    }
4116 }
4117 
4118 bool
isl_swizzle_supports_rendering(const struct intel_device_info * devinfo,struct isl_swizzle swizzle)4119 isl_swizzle_supports_rendering(const struct intel_device_info *devinfo,
4120                                struct isl_swizzle swizzle)
4121 {
4122    if (devinfo->platform == INTEL_PLATFORM_HSW) {
4123       /* From the Haswell PRM,
4124        * RENDER_SURFACE_STATE::Shader Channel Select Red
4125        *
4126        *    "The Shader channel selects also define which shader channels are
4127        *    written to which surface channel. If the Shader channel select is
4128        *    SCS_ZERO or SCS_ONE then it is not written to the surface. If the
4129        *    shader channel select is SCS_RED it is written to the surface red
4130        *    channel and so on. If more than one shader channel select is set
4131        *    to the same surface channel only the first shader channel in RGBA
4132        *    order will be written."
4133        */
4134       return true;
4135    } else if (devinfo->ver <= 7) {
4136       /* Ivy Bridge and early doesn't have any swizzling */
4137       return isl_swizzle_is_identity(swizzle);
4138    } else {
4139       /* From the Sky Lake PRM Vol. 2d,
4140        * RENDER_SURFACE_STATE::Shader Channel Select Red
4141        *
4142        *    "For Render Target, Red, Green and Blue Shader Channel Selects
4143        *    MUST be such that only valid components can be swapped i.e. only
4144        *    change the order of components in the pixel. Any other values for
4145        *    these Shader Channel Select fields are not valid for Render
4146        *    Targets. This also means that there MUST not be multiple shader
4147        *    channels mapped to the same RT channel."
4148        *
4149        * From the Sky Lake PRM Vol. 2d,
4150        * RENDER_SURFACE_STATE::Shader Channel Select Alpha
4151        *
4152        *    "For Render Target, this field MUST be programmed to
4153        *    value = SCS_ALPHA."
4154        */
4155       return (swizzle.r == ISL_CHANNEL_SELECT_RED ||
4156               swizzle.r == ISL_CHANNEL_SELECT_GREEN ||
4157               swizzle.r == ISL_CHANNEL_SELECT_BLUE) &&
4158              (swizzle.g == ISL_CHANNEL_SELECT_RED ||
4159               swizzle.g == ISL_CHANNEL_SELECT_GREEN ||
4160               swizzle.g == ISL_CHANNEL_SELECT_BLUE) &&
4161              (swizzle.b == ISL_CHANNEL_SELECT_RED ||
4162               swizzle.b == ISL_CHANNEL_SELECT_GREEN ||
4163               swizzle.b == ISL_CHANNEL_SELECT_BLUE) &&
4164              swizzle.r != swizzle.g &&
4165              swizzle.r != swizzle.b &&
4166              swizzle.g != swizzle.b &&
4167              swizzle.a == ISL_CHANNEL_SELECT_ALPHA;
4168    }
4169 }
4170 
4171 static enum isl_channel_select
swizzle_select(enum isl_channel_select chan,struct isl_swizzle swizzle)4172 swizzle_select(enum isl_channel_select chan, struct isl_swizzle swizzle)
4173 {
4174    switch (chan) {
4175    case ISL_CHANNEL_SELECT_ZERO:
4176    case ISL_CHANNEL_SELECT_ONE:
4177       return chan;
4178    case ISL_CHANNEL_SELECT_RED:
4179       return swizzle.r;
4180    case ISL_CHANNEL_SELECT_GREEN:
4181       return swizzle.g;
4182    case ISL_CHANNEL_SELECT_BLUE:
4183       return swizzle.b;
4184    case ISL_CHANNEL_SELECT_ALPHA:
4185       return swizzle.a;
4186    default:
4187       unreachable("Invalid swizzle component");
4188    }
4189 }
4190 
4191 /**
4192  * Returns the single swizzle that is equivalent to applying the two given
4193  * swizzles in sequence.
4194  */
4195 struct isl_swizzle
isl_swizzle_compose(struct isl_swizzle first,struct isl_swizzle second)4196 isl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second)
4197 {
4198    return (struct isl_swizzle) {
4199       .r = swizzle_select(first.r, second),
4200       .g = swizzle_select(first.g, second),
4201       .b = swizzle_select(first.b, second),
4202       .a = swizzle_select(first.a, second),
4203    };
4204 }
4205 
4206 /**
4207  * Returns a swizzle that is the pseudo-inverse of this swizzle.
4208  */
4209 struct isl_swizzle
isl_swizzle_invert(struct isl_swizzle swizzle)4210 isl_swizzle_invert(struct isl_swizzle swizzle)
4211 {
4212    /* Default to zero for channels which do not show up in the swizzle */
4213    enum isl_channel_select chans[4] = {
4214       ISL_CHANNEL_SELECT_ZERO,
4215       ISL_CHANNEL_SELECT_ZERO,
4216       ISL_CHANNEL_SELECT_ZERO,
4217       ISL_CHANNEL_SELECT_ZERO,
4218    };
4219 
4220    /* We go in ABGR order so that, if there are any duplicates, the first one
4221     * is taken if you look at it in RGBA order.  This is what Haswell hardware
4222     * does for render target swizzles.
4223     */
4224    if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
4225       chans[swizzle.a - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_ALPHA;
4226    if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
4227       chans[swizzle.b - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_BLUE;
4228    if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
4229       chans[swizzle.g - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_GREEN;
4230    if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
4231       chans[swizzle.r - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_RED;
4232 
4233    return (struct isl_swizzle) { chans[0], chans[1], chans[2], chans[3] };
4234 }
4235 
4236 static uint32_t
isl_color_value_channel(union isl_color_value src,enum isl_channel_select chan,uint32_t one)4237 isl_color_value_channel(union isl_color_value src,
4238                         enum isl_channel_select chan,
4239                         uint32_t one)
4240 {
4241    if (chan == ISL_CHANNEL_SELECT_ZERO)
4242       return 0;
4243    if (chan == ISL_CHANNEL_SELECT_ONE)
4244       return one;
4245 
4246    assert(chan >= ISL_CHANNEL_SELECT_RED);
4247    assert(chan < ISL_CHANNEL_SELECT_RED + 4);
4248 
4249    return src.u32[chan - ISL_CHANNEL_SELECT_RED];
4250 }
4251 
4252 /** Applies an inverse swizzle to a color value */
4253 union isl_color_value
isl_color_value_swizzle(union isl_color_value src,struct isl_swizzle swizzle,bool is_float)4254 isl_color_value_swizzle(union isl_color_value src,
4255                         struct isl_swizzle swizzle,
4256                         bool is_float)
4257 {
4258    uint32_t one = is_float ? 0x3f800000 : 1;
4259 
4260    return (union isl_color_value) { .u32 = {
4261       isl_color_value_channel(src, swizzle.r, one),
4262       isl_color_value_channel(src, swizzle.g, one),
4263       isl_color_value_channel(src, swizzle.b, one),
4264       isl_color_value_channel(src, swizzle.a, one),
4265    } };
4266 }
4267 
4268 /** Applies an inverse swizzle to a color value */
4269 union isl_color_value
isl_color_value_swizzle_inv(union isl_color_value src,struct isl_swizzle swizzle)4270 isl_color_value_swizzle_inv(union isl_color_value src,
4271                             struct isl_swizzle swizzle)
4272 {
4273    union isl_color_value dst = { .u32 = { 0, } };
4274 
4275    /* We assign colors in ABGR order so that the first one will be taken in
4276     * RGBA precedence order.  According to the PRM docs for shader channel
4277     * select, this matches Haswell hardware behavior.
4278     */
4279    if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
4280       dst.u32[swizzle.a - ISL_CHANNEL_SELECT_RED] = src.u32[3];
4281    if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
4282       dst.u32[swizzle.b - ISL_CHANNEL_SELECT_RED] = src.u32[2];
4283    if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
4284       dst.u32[swizzle.g - ISL_CHANNEL_SELECT_RED] = src.u32[1];
4285    if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
4286       dst.u32[swizzle.r - ISL_CHANNEL_SELECT_RED] = src.u32[0];
4287 
4288    return dst;
4289 }
4290 
4291 uint8_t
isl_format_get_aux_map_encoding(enum isl_format format)4292 isl_format_get_aux_map_encoding(enum isl_format format)
4293 {
4294    switch(format) {
4295    case ISL_FORMAT_R32G32B32A32_FLOAT: return 0x11;
4296    case ISL_FORMAT_R32G32B32X32_FLOAT: return 0x11;
4297    case ISL_FORMAT_R32G32B32A32_SINT: return 0x12;
4298    case ISL_FORMAT_R32G32B32A32_UINT: return 0x13;
4299    case ISL_FORMAT_R16G16B16A16_UNORM: return 0x14;
4300    case ISL_FORMAT_R16G16B16A16_SNORM: return 0x15;
4301    case ISL_FORMAT_R16G16B16A16_SINT: return 0x16;
4302    case ISL_FORMAT_R16G16B16A16_UINT: return 0x17;
4303    case ISL_FORMAT_R16G16B16A16_FLOAT: return 0x10;
4304    case ISL_FORMAT_R16G16B16X16_FLOAT: return 0x10;
4305    case ISL_FORMAT_R32G32_FLOAT: return 0x11;
4306    case ISL_FORMAT_R32G32_SINT: return 0x12;
4307    case ISL_FORMAT_R32G32_UINT: return 0x13;
4308    case ISL_FORMAT_B8G8R8A8_UNORM: return 0xA;
4309    case ISL_FORMAT_B8G8R8X8_UNORM: return 0xA;
4310    case ISL_FORMAT_B8G8R8A8_UNORM_SRGB: return 0xA;
4311    case ISL_FORMAT_B8G8R8X8_UNORM_SRGB: return 0xA;
4312    case ISL_FORMAT_R10G10B10A2_UNORM: return 0x18;
4313    case ISL_FORMAT_R10G10B10A2_UNORM_SRGB: return 0x18;
4314    case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM: return 0x19;
4315    case ISL_FORMAT_R10G10B10A2_UINT: return 0x1A;
4316    case ISL_FORMAT_R8G8B8A8_UNORM: return 0xA;
4317    case ISL_FORMAT_R8G8B8A8_UNORM_SRGB: return 0xA;
4318    case ISL_FORMAT_R8G8B8A8_SNORM: return 0x1B;
4319    case ISL_FORMAT_R8G8B8A8_SINT: return 0x1C;
4320    case ISL_FORMAT_R8G8B8A8_UINT: return 0x1D;
4321    case ISL_FORMAT_R16G16_UNORM: return 0x14;
4322    case ISL_FORMAT_R16G16_SNORM: return 0x15;
4323    case ISL_FORMAT_R16G16_SINT: return 0x16;
4324    case ISL_FORMAT_R16G16_UINT: return 0x17;
4325    case ISL_FORMAT_R16G16_FLOAT: return 0x10;
4326    case ISL_FORMAT_B10G10R10A2_UNORM: return 0x18;
4327    case ISL_FORMAT_B10G10R10A2_UNORM_SRGB: return 0x18;
4328    case ISL_FORMAT_R11G11B10_FLOAT: return 0x1E;
4329    case ISL_FORMAT_R32_SINT: return 0x12;
4330    case ISL_FORMAT_R32_UINT: return 0x13;
4331    case ISL_FORMAT_R32_FLOAT: return 0x11;
4332    case ISL_FORMAT_R24_UNORM_X8_TYPELESS: return 0x13;
4333    case ISL_FORMAT_B5G6R5_UNORM: return 0xA;
4334    case ISL_FORMAT_B5G6R5_UNORM_SRGB: return 0xA;
4335    case ISL_FORMAT_B5G5R5A1_UNORM: return 0xA;
4336    case ISL_FORMAT_B5G5R5A1_UNORM_SRGB: return 0xA;
4337    case ISL_FORMAT_B4G4R4A4_UNORM: return 0xA;
4338    case ISL_FORMAT_B4G4R4A4_UNORM_SRGB: return 0xA;
4339    case ISL_FORMAT_R8G8_UNORM: return 0xA;
4340    case ISL_FORMAT_R8G8_SNORM: return 0x1B;
4341    case ISL_FORMAT_R8G8_SINT: return 0x1C;
4342    case ISL_FORMAT_R8G8_UINT: return 0x1D;
4343    case ISL_FORMAT_R16_UNORM: return 0x14;
4344    case ISL_FORMAT_R16_SNORM: return 0x15;
4345    case ISL_FORMAT_R16_SINT: return 0x16;
4346    case ISL_FORMAT_R16_UINT: return 0x17;
4347    case ISL_FORMAT_R16_FLOAT: return 0x10;
4348    case ISL_FORMAT_B5G5R5X1_UNORM: return 0xA;
4349    case ISL_FORMAT_B5G5R5X1_UNORM_SRGB: return 0xA;
4350    case ISL_FORMAT_A1B5G5R5_UNORM: return 0xA;
4351    case ISL_FORMAT_A4B4G4R4_UNORM: return 0xA;
4352    case ISL_FORMAT_R8_UNORM: return 0xA;
4353    case ISL_FORMAT_R8_SNORM: return 0x1B;
4354    case ISL_FORMAT_R8_SINT: return 0x1C;
4355    case ISL_FORMAT_R8_UINT: return 0x1D;
4356    case ISL_FORMAT_A8_UNORM: return 0xA;
4357    case ISL_FORMAT_PLANAR_420_8: return 0xF;
4358    case ISL_FORMAT_PLANAR_420_10: return 0x7;
4359    case ISL_FORMAT_PLANAR_420_12: return 0x8;
4360    case ISL_FORMAT_PLANAR_420_16: return 0x8;
4361    case ISL_FORMAT_YCRCB_NORMAL: return 0x3;
4362    case ISL_FORMAT_YCRCB_SWAPY: return 0xB;
4363    default:
4364       unreachable("Unsupported aux-map format!");
4365       return 0;
4366    }
4367 }
4368 
4369 /*
4370  * Returns compression format encoding for Unified Lossless Compression
4371  */
4372 uint8_t
isl_get_render_compression_format(enum isl_format format)4373 isl_get_render_compression_format(enum isl_format format)
4374 {
4375    /* From the Bspec, Enumeration_RenderCompressionFormat section (53726): */
4376    switch(format) {
4377    case ISL_FORMAT_R32G32B32A32_FLOAT:
4378    case ISL_FORMAT_R32G32B32X32_FLOAT:
4379    case ISL_FORMAT_R32G32B32A32_SINT:
4380       return 0x0;
4381    case ISL_FORMAT_R32G32B32A32_UINT:
4382       return 0x1;
4383    case ISL_FORMAT_R32G32_FLOAT:
4384    case ISL_FORMAT_R32G32_SINT:
4385       return 0x2;
4386    case ISL_FORMAT_R32G32_UINT:
4387       return 0x3;
4388    case ISL_FORMAT_R16G16B16A16_UNORM:
4389    case ISL_FORMAT_R16G16B16X16_UNORM:
4390    case ISL_FORMAT_R16G16B16A16_UINT:
4391       return 0x4;
4392    case ISL_FORMAT_R16G16B16A16_SNORM:
4393    case ISL_FORMAT_R16G16B16A16_SINT:
4394    case ISL_FORMAT_R16G16B16A16_FLOAT:
4395    case ISL_FORMAT_R16G16B16X16_FLOAT:
4396       return 0x5;
4397    case ISL_FORMAT_R16G16_UNORM:
4398    case ISL_FORMAT_R16G16_UINT:
4399       return 0x6;
4400    case ISL_FORMAT_R16G16_SNORM:
4401    case ISL_FORMAT_R16G16_SINT:
4402    case ISL_FORMAT_R16G16_FLOAT:
4403       return 0x7;
4404    case ISL_FORMAT_B8G8R8A8_UNORM:
4405    case ISL_FORMAT_B8G8R8X8_UNORM:
4406    case ISL_FORMAT_B8G8R8A8_UNORM_SRGB:
4407    case ISL_FORMAT_B8G8R8X8_UNORM_SRGB:
4408    case ISL_FORMAT_R8G8B8A8_UNORM:
4409    case ISL_FORMAT_R8G8B8X8_UNORM:
4410    case ISL_FORMAT_R8G8B8A8_UNORM_SRGB:
4411    case ISL_FORMAT_R8G8B8X8_UNORM_SRGB:
4412    case ISL_FORMAT_R8G8B8A8_UINT:
4413       return 0x8;
4414    case ISL_FORMAT_R8G8B8A8_SNORM:
4415    case ISL_FORMAT_R8G8B8A8_SINT:
4416       return 0x9;
4417    case ISL_FORMAT_B5G6R5_UNORM:
4418    case ISL_FORMAT_B5G6R5_UNORM_SRGB:
4419    case ISL_FORMAT_B5G5R5A1_UNORM:
4420    case ISL_FORMAT_B5G5R5A1_UNORM_SRGB:
4421    case ISL_FORMAT_B4G4R4A4_UNORM:
4422    case ISL_FORMAT_B4G4R4A4_UNORM_SRGB:
4423    case ISL_FORMAT_B5G5R5X1_UNORM:
4424    case ISL_FORMAT_B5G5R5X1_UNORM_SRGB:
4425    case ISL_FORMAT_A1B5G5R5_UNORM:
4426    case ISL_FORMAT_A4B4G4R4_UNORM:
4427    case ISL_FORMAT_R8G8_UNORM:
4428    case ISL_FORMAT_R8G8_UINT:
4429       return 0xA;
4430    case ISL_FORMAT_R8G8_SNORM:
4431    case ISL_FORMAT_R8G8_SINT:
4432       return 0xB;
4433    case ISL_FORMAT_R10G10B10A2_UNORM:
4434    case ISL_FORMAT_R10G10B10A2_UNORM_SRGB:
4435    case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM:
4436    case ISL_FORMAT_R10G10B10A2_UINT:
4437    case ISL_FORMAT_B10G10R10A2_UNORM:
4438    case ISL_FORMAT_B10G10R10X2_UNORM:
4439    case ISL_FORMAT_B10G10R10A2_UNORM_SRGB:
4440       return 0xC;
4441    case ISL_FORMAT_R11G11B10_FLOAT:
4442       return 0xD;
4443    case ISL_FORMAT_R32_SINT:
4444    case ISL_FORMAT_R32_FLOAT:
4445       return 0x10;
4446    case ISL_FORMAT_R32_UINT:
4447    case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
4448       return 0x11;
4449    case ISL_FORMAT_R16_UNORM:
4450    case ISL_FORMAT_R16_UINT:
4451       return 0x14;
4452    case ISL_FORMAT_R16_SNORM:
4453    case ISL_FORMAT_R16_SINT:
4454    case ISL_FORMAT_R16_FLOAT:
4455       return 0x15;
4456    case ISL_FORMAT_R8_UNORM:
4457    case ISL_FORMAT_R8_UINT:
4458    case ISL_FORMAT_A8_UNORM:
4459       return 0x18;
4460    case ISL_FORMAT_R8_SNORM:
4461    case ISL_FORMAT_R8_SINT:
4462       return 0x19;
4463    default:
4464       unreachable("Unsupported render compression format!");
4465       return 0;
4466    }
4467 }
4468 
4469 const char *
isl_aux_op_to_name(enum isl_aux_op op)4470 isl_aux_op_to_name(enum isl_aux_op op)
4471 {
4472    static const char *names[] = {
4473       [ISL_AUX_OP_NONE]            = "none",
4474       [ISL_AUX_OP_FAST_CLEAR]      = "fast-clear",
4475       [ISL_AUX_OP_FULL_RESOLVE]    = "full-resolve",
4476       [ISL_AUX_OP_PARTIAL_RESOLVE] = "partial-resolve",
4477       [ISL_AUX_OP_AMBIGUATE]       = "ambiguate",
4478    };
4479    assert(op < ARRAY_SIZE(names));
4480    return names[op];
4481 }
4482 
4483 const char *
isl_tiling_to_name(enum isl_tiling tiling)4484 isl_tiling_to_name(enum isl_tiling tiling)
4485 {
4486    static const char *names[] = {
4487       [ISL_TILING_LINEAR]    = "linear",
4488       [ISL_TILING_W]         = "W",
4489       [ISL_TILING_X]         = "X",
4490       [ISL_TILING_Y0]        = "Y0",
4491       [ISL_TILING_SKL_Yf]    = "SKL-Yf",
4492       [ISL_TILING_SKL_Ys]    = "SKL-Ys",
4493       [ISL_TILING_ICL_Yf]    = "ICL-Yf",
4494       [ISL_TILING_ICL_Ys]    = "ICL-Ys",
4495       [ISL_TILING_4]         = "4",
4496       [ISL_TILING_64]        = "64",
4497       [ISL_TILING_HIZ]       = "hiz",
4498       [ISL_TILING_CCS]       = "ccs",
4499       [ISL_TILING_GFX12_CCS] = "gfx12-ccs",
4500    };
4501    assert(tiling < ARRAY_SIZE(names));
4502    return names[tiling];
4503 }
4504 
4505 const char *
isl_aux_usage_to_name(enum isl_aux_usage usage)4506 isl_aux_usage_to_name(enum isl_aux_usage usage)
4507 {
4508    static const char *names[] = {
4509       [ISL_AUX_USAGE_NONE]       = "none",
4510       [ISL_AUX_USAGE_HIZ]        = "hiz",
4511       [ISL_AUX_USAGE_MCS]        = "mcs",
4512       [ISL_AUX_USAGE_CCS_D]      = "ccs-d",
4513       [ISL_AUX_USAGE_CCS_E]      = "ccs-e",
4514       [ISL_AUX_USAGE_FCV_CCS_E]  = "fcv-ccs-e",
4515       [ISL_AUX_USAGE_MC]         = "mc",
4516       [ISL_AUX_USAGE_HIZ_CCS_WT] = "hiz-ccs-wt",
4517       [ISL_AUX_USAGE_HIZ_CCS]    = "hiz-ccs",
4518       [ISL_AUX_USAGE_MCS_CCS]    = "mcs-ccs",
4519       [ISL_AUX_USAGE_STC_CCS]    = "stc-ccs",
4520    };
4521    assert(usage < ARRAY_SIZE(names));
4522    return names[usage];
4523 }
4524 
4525 const char *
isl_aux_state_to_name(enum isl_aux_state state)4526 isl_aux_state_to_name(enum isl_aux_state state)
4527 {
4528    static const char *names[] = {
4529       [ISL_AUX_STATE_CLEAR]               = "clear",
4530       [ISL_AUX_STATE_PARTIAL_CLEAR]       = "partial_clear",
4531       [ISL_AUX_STATE_COMPRESSED_CLEAR]    = "compressed_clear",
4532       [ISL_AUX_STATE_COMPRESSED_NO_CLEAR] = "compressed_no_clear",
4533       [ISL_AUX_STATE_RESOLVED]            = "resolved",
4534       [ISL_AUX_STATE_PASS_THROUGH]        = "pass-through",
4535       [ISL_AUX_STATE_AUX_INVALID]         = "invalid",
4536    };
4537    assert(state < ARRAY_SIZE(names));
4538    return names[state];
4539 }
4540