• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2015 Intel Corporation
3  *
4  *  Permission is hereby granted, free of charge, to any person obtaining a
5  *  copy of this software and associated documentation files (the "Software"),
6  *  to deal in the Software without restriction, including without limitation
7  *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  *  and/or sell copies of the Software, and to permit persons to whom the
9  *  Software is furnished to do so, subject to the following conditions:
10  *
11  *  The above copyright notice and this permission notice (including the next
12  *  paragraph) shall be included in all copies or substantial portions of the
13  *  Software.
14  *
15  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  *  IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 
28 #include "genxml/genX_bits.h"
29 
30 #include "isl.h"
31 #include "isl_gfx4.h"
32 #include "isl_gfx6.h"
33 #include "isl_gfx7.h"
34 #include "isl_gfx8.h"
35 #include "isl_gfx9.h"
36 #include "isl_gfx12.h"
37 #include "isl_priv.h"
38 
39 void
isl_memcpy_linear_to_tiled(uint32_t xt1,uint32_t xt2,uint32_t yt1,uint32_t yt2,char * dst,const char * src,uint32_t dst_pitch,int32_t src_pitch,bool has_swizzling,enum isl_tiling tiling,isl_memcpy_type copy_type)40 isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2,
41                            uint32_t yt1, uint32_t yt2,
42                            char *dst, const char *src,
43                            uint32_t dst_pitch, int32_t src_pitch,
44                            bool has_swizzling,
45                            enum isl_tiling tiling,
46                            isl_memcpy_type copy_type)
47 {
48 #ifdef USE_SSE41
49    if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
50       _isl_memcpy_linear_to_tiled_sse41(
51          xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
52          tiling, copy_type);
53       return;
54    }
55 #endif
56 
57    _isl_memcpy_linear_to_tiled(
58       xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
59       tiling, copy_type);
60 }
61 
62 void
isl_memcpy_tiled_to_linear(uint32_t xt1,uint32_t xt2,uint32_t yt1,uint32_t yt2,char * dst,const char * src,int32_t dst_pitch,uint32_t src_pitch,bool has_swizzling,enum isl_tiling tiling,isl_memcpy_type copy_type)63 isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2,
64                            uint32_t yt1, uint32_t yt2,
65                            char *dst, const char *src,
66                            int32_t dst_pitch, uint32_t src_pitch,
67                            bool has_swizzling,
68                            enum isl_tiling tiling,
69                            isl_memcpy_type copy_type)
70 {
71 #ifdef USE_SSE41
72    if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
73       _isl_memcpy_tiled_to_linear_sse41(
74          xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
75          tiling, copy_type);
76       return;
77    }
78 #endif
79 
80    _isl_memcpy_tiled_to_linear(
81       xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
82       tiling, copy_type);
83 }
84 
85 void PRINTFLIKE(3, 4) UNUSED
__isl_finishme(const char * file,int line,const char * fmt,...)86 __isl_finishme(const char *file, int line, const char *fmt, ...)
87 {
88    va_list ap;
89    char buf[512];
90 
91    va_start(ap, fmt);
92    vsnprintf(buf, sizeof(buf), fmt, ap);
93    va_end(ap);
94 
95    fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
96 }
97 
98 static void
isl_device_setup_mocs(struct isl_device * dev)99 isl_device_setup_mocs(struct isl_device *dev)
100 {
101    if (dev->info->ver >= 12) {
102       if (dev->info->is_dg2) {
103          /* L3CC=WB; BSpec: 45101 */
104          dev->mocs.internal = 3 << 1;
105          dev->mocs.external = 3 << 1;
106       } else if (dev->info->is_dg1) {
107          /* L3CC=WB */
108          dev->mocs.internal = 5 << 1;
109          /* Displayables on DG1 are free to cache in L3 since L3 is transient
110           * and flushed at bottom of each submission.
111           */
112          dev->mocs.external = 5 << 1;
113       } else {
114          /* TC=1/LLC Only, LeCC=1/UC, LRUM=0, L3CC=3/WB */
115          dev->mocs.external = 61 << 1;
116          /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
117          dev->mocs.internal = 2 << 1;
118 
119          /* L1 - HDC:L1 + L3 + LLC */
120          dev->mocs.l1_hdc_l3_llc = 48 << 1;
121       }
122    } else if (dev->info->ver >= 9) {
123       /* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */
124       dev->mocs.external = 1 << 1;
125       /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
126       dev->mocs.internal = 2 << 1;
127    } else if (dev->info->ver >= 8) {
128       /* MEMORY_OBJECT_CONTROL_STATE:
129        * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,
130        * .TargetCache = L3DefertoPATforLLCeLLCselection,
131        * .AgeforQUADLRU = 0
132        */
133       dev->mocs.external = 0x18;
134       /* MEMORY_OBJECT_CONTROL_STATE:
135        * .MemoryTypeLLCeLLCCacheabilityControl = WB,
136        * .TargetCache = L3DefertoPATforLLCeLLCselection,
137        * .AgeforQUADLRU = 0
138        */
139       dev->mocs.internal = 0x78;
140    } else if (dev->info->ver >= 7) {
141       if (dev->info->is_haswell) {
142          /* MEMORY_OBJECT_CONTROL_STATE:
143           * .LLCeLLCCacheabilityControlLLCCC             = 0,
144           * .L3CacheabilityControlL3CC                   = 1,
145           */
146          dev->mocs.internal = 1;
147          dev->mocs.external = 1;
148       } else {
149          /* MEMORY_OBJECT_CONTROL_STATE:
150           * .GraphicsDataTypeGFDT                        = 0,
151           * .LLCCacheabilityControlLLCCC                 = 0,
152           * .L3CacheabilityControlL3CC                   = 1,
153           */
154          dev->mocs.internal = 1;
155          dev->mocs.external = 1;
156       }
157    } else {
158       dev->mocs.internal = 0;
159       dev->mocs.external = 0;
160    }
161 }
162 
163 /**
164  * Return an appropriate MOCS entry for the given usage flags.
165  */
166 uint32_t
isl_mocs(const struct isl_device * dev,isl_surf_usage_flags_t usage,bool external)167 isl_mocs(const struct isl_device *dev, isl_surf_usage_flags_t usage,
168          bool external)
169 {
170    if (external)
171       return dev->mocs.external;
172 
173    if (dev->info->ver >= 12 && !dev->info->is_dg1) {
174       if (usage & ISL_SURF_USAGE_STAGING_BIT)
175          return dev->mocs.internal;
176 
177       /* Using L1:HDC for storage buffers breaks Vulkan memory model
178        * tests that use shader atomics.  This isn't likely to work out,
179        * and we can't know a priori whether they'll be used.  So just
180        * continue with ordinary internal MOCS for now.
181        */
182       if (usage & ISL_SURF_USAGE_STORAGE_BIT)
183          return dev->mocs.internal;
184 
185       if (usage & (ISL_SURF_USAGE_CONSTANT_BUFFER_BIT |
186                    ISL_SURF_USAGE_RENDER_TARGET_BIT |
187                    ISL_SURF_USAGE_TEXTURE_BIT))
188          return dev->mocs.l1_hdc_l3_llc;
189    }
190 
191    return dev->mocs.internal;
192 }
193 
194 void
isl_device_init(struct isl_device * dev,const struct intel_device_info * info,bool has_bit6_swizzling)195 isl_device_init(struct isl_device *dev,
196                 const struct intel_device_info *info,
197                 bool has_bit6_swizzling)
198 {
199    /* Gfx8+ don't have bit6 swizzling, ensure callsite is not confused. */
200    assert(!(has_bit6_swizzling && info->ver >= 8));
201 
202    dev->info = info;
203    dev->use_separate_stencil = ISL_GFX_VER(dev) >= 6;
204    dev->has_bit6_swizzling = has_bit6_swizzling;
205 
206    /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
207     * device properties at buildtime. Verify that the macros with the device
208     * properties chosen during runtime.
209     */
210    ISL_GFX_VER_SANITIZE(dev);
211    ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev);
212 
213    /* Did we break hiz or stencil? */
214    if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
215       assert(info->has_hiz_and_separate_stencil);
216    if (info->must_use_separate_stencil)
217       assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
218 
219    dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4;
220    dev->ss.align = isl_align(dev->ss.size, 32);
221 
222    dev->ss.clear_color_state_size =
223       isl_align(CLEAR_COLOR_length(info) * 4, 64);
224    dev->ss.clear_color_state_offset =
225       RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4;
226 
227    dev->ss.clear_value_size =
228       isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) +
229                 RENDER_SURFACE_STATE_GreenClearColor_bits(info) +
230                 RENDER_SURFACE_STATE_BlueClearColor_bits(info) +
231                 RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8;
232 
233    dev->ss.clear_value_offset =
234       RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4;
235 
236    assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0);
237    dev->ss.addr_offset =
238       RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8;
239 
240    /* The "Auxiliary Surface Base Address" field starts a bit higher up
241     * because the bottom 12 bits are used for other things.  Round down to
242     * the nearest dword before.
243     */
244    dev->ss.aux_addr_offset =
245       (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8;
246 
247    dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4;
248    assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
249    dev->ds.depth_offset =
250       _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
251 
252    if (dev->use_separate_stencil) {
253       dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
254                       _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 +
255                       _3DSTATE_CLEAR_PARAMS_length(info) * 4;
256 
257       assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
258       dev->ds.stencil_offset =
259          _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
260          _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8;
261 
262       assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
263       dev->ds.hiz_offset =
264          _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
265          _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
266          _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
267    } else {
268       dev->ds.stencil_offset = 0;
269       dev->ds.hiz_offset = 0;
270    }
271 
272    if (ISL_GFX_VER(dev) >= 7) {
273       /* From the IVB PRM, SURFACE_STATE::Height,
274        *
275        *    For typed buffer and structured buffer surfaces, the number
276        *    of entries in the buffer ranges from 1 to 2^27. For raw buffer
277        *    surfaces, the number of entries in the buffer is the number of bytes
278        *    which can range from 1 to 2^30.
279        *
280        * This limit is only concerned with raw buffers.
281        */
282       dev->max_buffer_size = 1ull << 30;
283    } else {
284       dev->max_buffer_size = 1ull << 27;
285    }
286 
287    isl_device_setup_mocs(dev);
288 }
289 
290 /**
291  * @brief Query the set of multisamples supported by the device.
292  *
293  * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
294  * supported.
295  */
296 isl_sample_count_mask_t ATTRIBUTE_CONST
isl_device_get_sample_counts(struct isl_device * dev)297 isl_device_get_sample_counts(struct isl_device *dev)
298 {
299    if (ISL_GFX_VER(dev) >= 9) {
300       return ISL_SAMPLE_COUNT_1_BIT |
301              ISL_SAMPLE_COUNT_2_BIT |
302              ISL_SAMPLE_COUNT_4_BIT |
303              ISL_SAMPLE_COUNT_8_BIT |
304              ISL_SAMPLE_COUNT_16_BIT;
305    } else if (ISL_GFX_VER(dev) >= 8) {
306       return ISL_SAMPLE_COUNT_1_BIT |
307              ISL_SAMPLE_COUNT_2_BIT |
308              ISL_SAMPLE_COUNT_4_BIT |
309              ISL_SAMPLE_COUNT_8_BIT;
310    } else if (ISL_GFX_VER(dev) >= 7) {
311       return ISL_SAMPLE_COUNT_1_BIT |
312              ISL_SAMPLE_COUNT_4_BIT |
313              ISL_SAMPLE_COUNT_8_BIT;
314    } else if (ISL_GFX_VER(dev) >= 6) {
315       return ISL_SAMPLE_COUNT_1_BIT |
316              ISL_SAMPLE_COUNT_4_BIT;
317    } else {
318       return ISL_SAMPLE_COUNT_1_BIT;
319    }
320 }
321 
322 /**
323  * Returns an isl_tile_info representation of the given isl_tiling when
324  * combined when used in the given configuration.
325  *
326  * @param[in]  tiling      The tiling format to introspect
327  * @param[in]  dim         The dimensionality of the surface being tiled
328  * @param[in]  msaa_layout The layout of samples in the surface being tiled
329  * @param[in]  format_bpb  The number of bits per surface element (block) for
330  *                         the surface being tiled
331  * @param[in]  samples     The samples in the surface being tiled
332  * @param[out] tile_info   Return parameter for the tiling information
333  */
334 void
isl_tiling_get_info(enum isl_tiling tiling,enum isl_surf_dim dim,enum isl_msaa_layout msaa_layout,uint32_t format_bpb,uint32_t samples,struct isl_tile_info * tile_info)335 isl_tiling_get_info(enum isl_tiling tiling,
336                     enum isl_surf_dim dim,
337                     enum isl_msaa_layout msaa_layout,
338                     uint32_t format_bpb,
339                     uint32_t samples,
340                     struct isl_tile_info *tile_info)
341 {
342    const uint32_t bs = format_bpb / 8;
343    struct isl_extent4d logical_el;
344    struct isl_extent2d phys_B;
345 
346    if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
347       /* It is possible to have non-power-of-two formats in a tiled buffer.
348        * The easiest way to handle this is to treat the tile as if it is three
349        * times as wide.  This way no pixel will ever cross a tile boundary.
350        * This really only works on a subset of tiling formats.
351        */
352       assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0 ||
353              tiling == ISL_TILING_4);
354       assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
355       isl_tiling_get_info(tiling, dim, msaa_layout, format_bpb / 3, samples,
356                           tile_info);
357       return;
358    }
359 
360    switch (tiling) {
361    case ISL_TILING_LINEAR:
362       assert(bs > 0);
363       logical_el = isl_extent4d(1, 1, 1, 1);
364       phys_B = isl_extent2d(bs, 1);
365       break;
366 
367    case ISL_TILING_X:
368       assert(bs > 0);
369       logical_el = isl_extent4d(512 / bs, 8, 1, 1);
370       phys_B = isl_extent2d(512, 8);
371       break;
372 
373    case ISL_TILING_Y0:
374    case ISL_TILING_4:
375       assert(bs > 0);
376       logical_el = isl_extent4d(128 / bs, 32, 1, 1);
377       phys_B = isl_extent2d(128, 32);
378       break;
379 
380    case ISL_TILING_W:
381       assert(bs == 1);
382       logical_el = isl_extent4d(64, 64, 1, 1);
383       /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
384        *
385        *    "If the surface is a stencil buffer (and thus has Tile Mode set
386        *    to TILEMODE_WMAJOR), the pitch must be set to 2x the value
387        *    computed based on width, as the stencil buffer is stored with two
388        *    rows interleaved."
389        *
390        * This, together with the fact that stencil buffers are referred to as
391        * being Y-tiled in the PRMs for older hardware implies that the
392        * physical size of a W-tile is actually the same as for a Y-tile.
393        */
394       phys_B = isl_extent2d(128, 32);
395       break;
396 
397    case ISL_TILING_Yf:
398    case ISL_TILING_Ys: {
399       bool is_Ys = tiling == ISL_TILING_Ys;
400 
401       assert(bs > 0);
402       unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
403       unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));
404 
405       logical_el = isl_extent4d(width / bs, height, 1, 1);
406       phys_B = isl_extent2d(width, height);
407       break;
408    }
409    case ISL_TILING_64:
410       /* The tables below are taken from the "2D Surfaces" page in the Bspec
411        * which are formulated in terms of the Cv and Cu constants. This is
412        * different from the tables in the "Tile64 Format" page which should be
413        * equivalent but are usually in terms of pixels. Also note that Cv and
414        * Cu are HxW order to match the Bspec table, not WxH order like you
415        * might expect.
416        *
417        * From the Bspec's "Tile64 Format" page:
418        *
419        *    MSAA Depth/Stencil surface use IMS (Interleaved Multi Samples)
420        *    which means:
421        *
422        *    - Use the 1X MSAA (non-MSRT) version of the Tile64 equations and
423        *      let the client unit do the swizzling internally
424        *
425        * Surfaces using the IMS layout will use the mapping for 1x MSAA.
426        */
427 #define tile_extent(bs, cv, cu, a) \
428       isl_extent4d((1 << cu) / bs, 1 << cv, 1, a)
429 
430       /* Only 2D surfaces are handled. */
431       assert(dim == ISL_SURF_DIM_2D);
432 
433       if (samples == 1 || msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
434          switch (format_bpb) {
435          case 128: logical_el = tile_extent(bs, 6, 10, 1); break;
436          case  64: logical_el = tile_extent(bs, 6, 10, 1); break;
437          case  32: logical_el = tile_extent(bs, 7,  9, 1); break;
438          case  16: logical_el = tile_extent(bs, 7,  9, 1); break;
439          case   8: logical_el = tile_extent(bs, 8,  8, 1); break;
440          default: unreachable("Unsupported format size.");
441          }
442       } else if (samples == 2) {
443          switch (format_bpb) {
444          case 128: logical_el = tile_extent(bs, 6,  9, 2); break;
445          case  64: logical_el = tile_extent(bs, 6,  9, 2); break;
446          case  32: logical_el = tile_extent(bs, 7,  8, 2); break;
447          case  16: logical_el = tile_extent(bs, 7,  8, 2); break;
448          case   8: logical_el = tile_extent(bs, 8,  7, 2); break;
449          default: unreachable("Unsupported format size.");
450          }
451       } else {
452          switch (format_bpb) {
453          case 128: logical_el = tile_extent(bs, 5,  9, 4); break;
454          case  64: logical_el = tile_extent(bs, 5,  9, 4); break;
455          case  32: logical_el = tile_extent(bs, 6,  8, 4); break;
456          case  16: logical_el = tile_extent(bs, 6,  8, 4); break;
457          case   8: logical_el = tile_extent(bs, 7,  7, 4); break;
458          default: unreachable("Unsupported format size.");
459          }
460       }
461 
462 #undef tile_extent
463 
464       phys_B.w = logical_el.w * bs;
465       phys_B.h = 64 * 1024 / phys_B.w;
466       break;
467 
468    case ISL_TILING_HIZ:
469       /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4
470        * 128bpb format.  The tiling has the same physical dimensions as
471        * Y-tiling but actually has two HiZ columns per Y-tiled column.
472        */
473       assert(bs == 16);
474       logical_el = isl_extent4d(16, 16, 1, 1);
475       phys_B = isl_extent2d(128, 32);
476       break;
477 
478    case ISL_TILING_CCS:
479       /* CCS surfaces are required to have one of the GENX_CCS_* formats which
480        * have a block size of 1 or 2 bits per block and each CCS element
481        * corresponds to one cache-line pair in the main surface.  From the Sky
482        * Lake PRM Vol. 12 in the section on planes:
483        *
484        *    "The Color Control Surface (CCS) contains the compression status
485        *    of the cache-line pairs. The compression state of the cache-line
486        *    pair is specified by 2 bits in the CCS.  Each CCS cache-line
487        *    represents an area on the main surface of 16x16 sets of 128 byte
488        *    Y-tiled cache-line-pairs. CCS is always Y tiled."
489        *
490        * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines.
491        * Since each cache line corresponds to a 16x16 set of cache-line pairs,
492        * that yields total tile area of 128x128 cache-line pairs or CCS
493        * elements.  On older hardware, each CCS element is 1 bit and the tile
494        * is 128x256 elements.
495        */
496       assert(format_bpb == 1 || format_bpb == 2);
497       logical_el = isl_extent4d(128, 256 / format_bpb, 1, 1);
498       phys_B = isl_extent2d(128, 32);
499       break;
500 
501    case ISL_TILING_GFX12_CCS:
502       /* From the Bspec, Gen Graphics > Gfx12 > Memory Data Formats > Memory
503        * Compression > Memory Compression - Gfx12:
504        *
505        *    4 bits of auxiliary plane data are required for 2 cachelines of
506        *    main surface data. This results in a single cacheline of auxiliary
507        *    plane data mapping to 4 4K pages of main surface data for the 4K
508        *    pages (tile Y ) and 1 64K Tile Ys page.
509        *
510        * The Y-tiled pairing bit of 9 shown in the table below that Bspec
511        * section expresses that the 2 cachelines of main surface data are
512        * horizontally adjacent.
513        *
514        * TODO: Handle Ys, Yf and their pairing bits.
515        *
516        * Therefore, each CCS cacheline represents a 512Bx32 row area and each
517        * element represents a 32Bx4 row area.
518        */
519       assert(format_bpb == 4);
520       logical_el = isl_extent4d(16, 8, 1, 1);
521       phys_B = isl_extent2d(64, 1);
522       break;
523 
524    default:
525       unreachable("not reached");
526    } /* end switch */
527 
528    *tile_info = (struct isl_tile_info) {
529       .tiling = tiling,
530       .format_bpb = format_bpb,
531       .logical_extent_el = logical_el,
532       .phys_extent_B = phys_B,
533    };
534 }
535 
536 bool
isl_color_value_is_zero(union isl_color_value value,enum isl_format format)537 isl_color_value_is_zero(union isl_color_value value,
538                         enum isl_format format)
539 {
540    const struct isl_format_layout *fmtl = isl_format_get_layout(format);
541 
542 #define RETURN_FALSE_IF_NOT_0(c, i) \
543    if (fmtl->channels.c.bits && value.u32[i] != 0) \
544       return false
545 
546    RETURN_FALSE_IF_NOT_0(r, 0);
547    RETURN_FALSE_IF_NOT_0(g, 1);
548    RETURN_FALSE_IF_NOT_0(b, 2);
549    RETURN_FALSE_IF_NOT_0(a, 3);
550 
551 #undef RETURN_FALSE_IF_NOT_0
552 
553    return true;
554 }
555 
556 bool
isl_color_value_is_zero_one(union isl_color_value value,enum isl_format format)557 isl_color_value_is_zero_one(union isl_color_value value,
558                             enum isl_format format)
559 {
560    const struct isl_format_layout *fmtl = isl_format_get_layout(format);
561 
562 #define RETURN_FALSE_IF_NOT_0_1(c, i, field) \
563    if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \
564       return false
565 
566    if (isl_format_has_int_channel(format)) {
567       RETURN_FALSE_IF_NOT_0_1(r, 0, u32);
568       RETURN_FALSE_IF_NOT_0_1(g, 1, u32);
569       RETURN_FALSE_IF_NOT_0_1(b, 2, u32);
570       RETURN_FALSE_IF_NOT_0_1(a, 3, u32);
571    } else {
572       RETURN_FALSE_IF_NOT_0_1(r, 0, f32);
573       RETURN_FALSE_IF_NOT_0_1(g, 1, f32);
574       RETURN_FALSE_IF_NOT_0_1(b, 2, f32);
575       RETURN_FALSE_IF_NOT_0_1(a, 3, f32);
576    }
577 
578 #undef RETURN_FALSE_IF_NOT_0_1
579 
580    return true;
581 }
582 
583 /**
584  * @param[out] tiling is set only on success
585  */
586 static bool
isl_surf_choose_tiling(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_tiling * tiling)587 isl_surf_choose_tiling(const struct isl_device *dev,
588                        const struct isl_surf_init_info *restrict info,
589                        enum isl_tiling *tiling)
590 {
591    isl_tiling_flags_t tiling_flags = info->tiling_flags;
592 
593    /* HiZ surfaces always use the HiZ tiling */
594    if (info->usage & ISL_SURF_USAGE_HIZ_BIT) {
595       assert(info->format == ISL_FORMAT_HIZ);
596       assert(tiling_flags == ISL_TILING_HIZ_BIT);
597       *tiling = isl_tiling_flag_to_enum(tiling_flags);
598       return true;
599    }
600 
601    /* CCS surfaces always use the CCS tiling */
602    if (info->usage & ISL_SURF_USAGE_CCS_BIT) {
603       assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS);
604       UNUSED bool ivb_ccs = ISL_GFX_VER(dev) < 12 &&
605                             tiling_flags == ISL_TILING_CCS_BIT;
606       UNUSED bool tgl_ccs = ISL_GFX_VER(dev) >= 12 &&
607                             tiling_flags == ISL_TILING_GFX12_CCS_BIT;
608       assert(ivb_ccs != tgl_ccs);
609       *tiling = isl_tiling_flag_to_enum(tiling_flags);
610       return true;
611    }
612 
613    if (ISL_GFX_VERX10(dev) >= 125) {
614       isl_gfx125_filter_tiling(dev, info, &tiling_flags);
615    } else if (ISL_GFX_VER(dev) >= 6) {
616       isl_gfx6_filter_tiling(dev, info, &tiling_flags);
617    } else {
618       isl_gfx4_filter_tiling(dev, info, &tiling_flags);
619    }
620 
621    #define CHOOSE(__tiling) \
622       do { \
623          if (tiling_flags & (1u << (__tiling))) { \
624             *tiling = (__tiling); \
625             return true; \
626           } \
627       } while (0)
628 
629    /* Of the tiling modes remaining, choose the one that offers the best
630     * performance.
631     */
632 
633    if (info->dim == ISL_SURF_DIM_1D) {
634       /* Prefer linear for 1D surfaces because they do not benefit from
635        * tiling. To the contrary, tiling leads to wasted memory and poor
636        * memory locality due to the swizzling and alignment restrictions
637        * required in tiled surfaces.
638        */
639       CHOOSE(ISL_TILING_LINEAR);
640    }
641 
642    CHOOSE(ISL_TILING_4);
643    CHOOSE(ISL_TILING_64);
644    CHOOSE(ISL_TILING_Ys);
645    CHOOSE(ISL_TILING_Yf);
646    CHOOSE(ISL_TILING_Y0);
647    CHOOSE(ISL_TILING_X);
648    CHOOSE(ISL_TILING_W);
649    CHOOSE(ISL_TILING_LINEAR);
650 
651    #undef CHOOSE
652 
653    /* No tiling mode accomodates the inputs. */
654    return false;
655 }
656 
657 static bool
isl_choose_msaa_layout(const struct isl_device * dev,const struct isl_surf_init_info * info,enum isl_tiling tiling,enum isl_msaa_layout * msaa_layout)658 isl_choose_msaa_layout(const struct isl_device *dev,
659                  const struct isl_surf_init_info *info,
660                  enum isl_tiling tiling,
661                  enum isl_msaa_layout *msaa_layout)
662 {
663    if (ISL_GFX_VER(dev) >= 8) {
664       return isl_gfx8_choose_msaa_layout(dev, info, tiling, msaa_layout);
665    } else if (ISL_GFX_VER(dev) >= 7) {
666       return isl_gfx7_choose_msaa_layout(dev, info, tiling, msaa_layout);
667    } else if (ISL_GFX_VER(dev) >= 6) {
668       return isl_gfx6_choose_msaa_layout(dev, info, tiling, msaa_layout);
669    } else {
670       return isl_gfx4_choose_msaa_layout(dev, info, tiling, msaa_layout);
671    }
672 }
673 
674 struct isl_extent2d
isl_get_interleaved_msaa_px_size_sa(uint32_t samples)675 isl_get_interleaved_msaa_px_size_sa(uint32_t samples)
676 {
677    assert(isl_is_pow2(samples));
678 
679    /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
680     * Sizes (p133):
681     *
682     *    If the surface is multisampled and it is a depth or stencil surface
683     *    or Multisampled Surface StorageFormat in SURFACE_STATE is
684     *    MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
685     *    proceeding: [...]
686     */
687    return (struct isl_extent2d) {
688       .width = 1 << ((ffs(samples) - 0) / 2),
689       .height = 1 << ((ffs(samples) - 1) / 2),
690    };
691 }
692 
693 static void
isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,uint32_t * width,uint32_t * height)694 isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
695                                     uint32_t *width, uint32_t *height)
696 {
697    const struct isl_extent2d px_size_sa =
698       isl_get_interleaved_msaa_px_size_sa(samples);
699 
700    if (width)
701       *width = isl_align(*width, 2) * px_size_sa.width;
702    if (height)
703       *height = isl_align(*height, 2) * px_size_sa.height;
704 }
705 
706 static enum isl_array_pitch_span
isl_choose_array_pitch_span(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_dim_layout dim_layout,const struct isl_extent4d * phys_level0_sa)707 isl_choose_array_pitch_span(const struct isl_device *dev,
708                             const struct isl_surf_init_info *restrict info,
709                             enum isl_dim_layout dim_layout,
710                             const struct isl_extent4d *phys_level0_sa)
711 {
712    switch (dim_layout) {
713    case ISL_DIM_LAYOUT_GFX9_1D:
714    case ISL_DIM_LAYOUT_GFX4_2D:
715       if (ISL_GFX_VER(dev) >= 8) {
716          /* QPitch becomes programmable in Broadwell. So choose the
717           * most compact QPitch possible in order to conserve memory.
718           *
719           * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
720           * >> RENDER_SURFACE_STATE Surface QPitch (p325):
721           *
722           *    - Software must ensure that this field is set to a value
723           *      sufficiently large such that the array slices in the surface
724           *      do not overlap. Refer to the Memory Data Formats section for
725           *      information on how surfaces are stored in memory.
726           *
727           *    - This field specifies the distance in rows between array
728           *      slices.  It is used only in the following cases:
729           *
730           *          - Surface Array is enabled OR
731           *          - Number of Mulitsamples is not NUMSAMPLES_1 and
732           *            Multisampled Surface Storage Format set to MSFMT_MSS OR
733           *          - Surface Type is SURFTYPE_CUBE
734           */
735          return ISL_ARRAY_PITCH_SPAN_COMPACT;
736       } else if (ISL_GFX_VER(dev) >= 7) {
737          /* Note that Ivybridge introduces
738           * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
739           * driver more control over the QPitch.
740           */
741 
742          if (phys_level0_sa->array_len == 1) {
743             /* The hardware will never use the QPitch. So choose the most
744              * compact QPitch possible in order to conserve memory.
745              */
746             return ISL_ARRAY_PITCH_SPAN_COMPACT;
747          }
748 
749          if (isl_surf_usage_is_depth_or_stencil(info->usage) ||
750              (info->usage & ISL_SURF_USAGE_HIZ_BIT)) {
751             /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
752              * Section 6.18.4.7: Surface Arrays (p112):
753              *
754              *    If Surface Array Spacing is set to ARYSPC_FULL (note that
755              *    the depth buffer and stencil buffer have an implied value of
756              *    ARYSPC_FULL):
757              */
758             return ISL_ARRAY_PITCH_SPAN_FULL;
759          }
760 
761          if (info->levels == 1) {
762             /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
763              * to ARYSPC_LOD0.
764              */
765             return ISL_ARRAY_PITCH_SPAN_COMPACT;
766          }
767 
768          return ISL_ARRAY_PITCH_SPAN_FULL;
769       } else if ((ISL_GFX_VER(dev) == 5 || ISL_GFX_VER(dev) == 6) &&
770                  ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
771                  isl_surf_usage_is_stencil(info->usage)) {
772          /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
773           * Graphics Core >> Section 7.18.3.7: Surface Arrays:
774           *
775           *    The separate stencil buffer does not support mip mapping, thus
776           *    the storage for LODs other than LOD 0 is not needed.
777           */
778          assert(info->levels == 1);
779          return ISL_ARRAY_PITCH_SPAN_COMPACT;
780       } else {
781          if ((ISL_GFX_VER(dev) == 5 || ISL_GFX_VER(dev) == 6) &&
782              ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
783              isl_surf_usage_is_stencil(info->usage)) {
784             /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
785              * Graphics Core >> Section 7.18.3.7: Surface Arrays:
786              *
787              *    The separate stencil buffer does not support mip mapping,
788              *    thus the storage for LODs other than LOD 0 is not needed.
789              */
790             assert(info->levels == 1);
791             assert(phys_level0_sa->array_len == 1);
792             return ISL_ARRAY_PITCH_SPAN_COMPACT;
793          }
794 
795          if (phys_level0_sa->array_len == 1) {
796             /* The hardware will never use the QPitch. So choose the most
797              * compact QPitch possible in order to conserve memory.
798              */
799             return ISL_ARRAY_PITCH_SPAN_COMPACT;
800          }
801 
802          return ISL_ARRAY_PITCH_SPAN_FULL;
803       }
804 
805    case ISL_DIM_LAYOUT_GFX4_3D:
806       /* The hardware will never use the QPitch. So choose the most
807        * compact QPitch possible in order to conserve memory.
808        */
809       return ISL_ARRAY_PITCH_SPAN_COMPACT;
810 
811    case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
812       /* Each array image in the gfx6 stencil of HiZ surface is compact in the
813        * sense that every LOD is a compact array of the same size as LOD0.
814        */
815       return ISL_ARRAY_PITCH_SPAN_COMPACT;
816    }
817 
818    unreachable("bad isl_dim_layout");
819    return ISL_ARRAY_PITCH_SPAN_FULL;
820 }
821 
822 static void
isl_choose_image_alignment_el(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_tiling tiling,enum isl_dim_layout dim_layout,enum isl_msaa_layout msaa_layout,struct isl_extent3d * image_align_el)823 isl_choose_image_alignment_el(const struct isl_device *dev,
824                               const struct isl_surf_init_info *restrict info,
825                               enum isl_tiling tiling,
826                               enum isl_dim_layout dim_layout,
827                               enum isl_msaa_layout msaa_layout,
828                               struct isl_extent3d *image_align_el)
829 {
830    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
831    if (fmtl->txc == ISL_TXC_MCS) {
832       assert(tiling == ISL_TILING_Y0);
833 
834       /*
835        * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
836        *
837        * Height, width, and layout of MCS buffer in this case must match with
838        * Render Target height, width, and layout. MCS buffer is tiledY.
839        *
840        * To avoid wasting memory, choose the smallest alignment possible:
841        * HALIGN_4 and VALIGN_4.
842        */
843       *image_align_el = isl_extent3d(4, 4, 1);
844       return;
845    } else if (info->format == ISL_FORMAT_HIZ) {
846       assert(ISL_GFX_VER(dev) >= 6);
847       if (ISL_GFX_VER(dev) == 6) {
848          /* HiZ surfaces on Sandy Bridge are packed tightly. */
849          *image_align_el = isl_extent3d(1, 1, 1);
850       } else if (ISL_GFX_VER(dev) < 12) {
851          /* On gfx7+, HiZ surfaces are always aligned to 16x8 pixels in the
852           * primary surface which works out to 2x2 HiZ elments.
853           */
854          *image_align_el = isl_extent3d(2, 2, 1);
855       } else {
856          /* On gfx12+, HiZ surfaces are always aligned to 16x16 pixels in the
857           * primary surface which works out to 2x4 HiZ elments.
858           * TODO: Verify
859           */
860          *image_align_el = isl_extent3d(2, 4, 1);
861       }
862       return;
863    }
864 
865    if (ISL_GFX_VERX10(dev) >= 125) {
866       isl_gfx125_choose_image_alignment_el(dev, info, tiling, dim_layout,
867                                            msaa_layout, image_align_el);
868    } else if (ISL_GFX_VER(dev) >= 12) {
869       isl_gfx12_choose_image_alignment_el(dev, info, tiling, dim_layout,
870                                           msaa_layout, image_align_el);
871    } else if (ISL_GFX_VER(dev) >= 9) {
872       isl_gfx9_choose_image_alignment_el(dev, info, tiling, dim_layout,
873                                          msaa_layout, image_align_el);
874    } else if (ISL_GFX_VER(dev) >= 8) {
875       isl_gfx8_choose_image_alignment_el(dev, info, tiling, dim_layout,
876                                          msaa_layout, image_align_el);
877    } else if (ISL_GFX_VER(dev) >= 7) {
878       isl_gfx7_choose_image_alignment_el(dev, info, tiling, dim_layout,
879                                           msaa_layout, image_align_el);
880    } else if (ISL_GFX_VER(dev) >= 6) {
881       isl_gfx6_choose_image_alignment_el(dev, info, tiling, dim_layout,
882                                          msaa_layout, image_align_el);
883    } else {
884       isl_gfx4_choose_image_alignment_el(dev, info, tiling, dim_layout,
885                                          msaa_layout, image_align_el);
886    }
887 }
888 
889 static enum isl_dim_layout
isl_surf_choose_dim_layout(const struct isl_device * dev,enum isl_surf_dim logical_dim,enum isl_tiling tiling,isl_surf_usage_flags_t usage)890 isl_surf_choose_dim_layout(const struct isl_device *dev,
891                            enum isl_surf_dim logical_dim,
892                            enum isl_tiling tiling,
893                            isl_surf_usage_flags_t usage)
894 {
895    /* Sandy bridge needs a special layout for HiZ and stencil. */
896    if (ISL_GFX_VER(dev) == 6 &&
897        (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ))
898       return ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ;
899 
900    if (ISL_GFX_VER(dev) >= 9) {
901       switch (logical_dim) {
902       case ISL_SURF_DIM_1D:
903          /* From the Sky Lake PRM Vol. 5, "1D Surfaces":
904           *
905           *    One-dimensional surfaces use a tiling mode of linear.
906           *    Technically, they are not tiled resources, but the Tiled
907           *    Resource Mode field in RENDER_SURFACE_STATE is still used to
908           *    indicate the alignment requirements for this linear surface
909           *    (See 1D Alignment requirements for how 4K and 64KB Tiled
910           *    Resource Modes impact alignment). Alternatively, a 1D surface
911           *    can be defined as a 2D tiled surface (e.g. TileY or TileX) with
912           *    a height of 0.
913           *
914           * In other words, ISL_DIM_LAYOUT_GFX9_1D is only used for linear
915           * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GFX4_2D is used.
916           */
917          if (tiling == ISL_TILING_LINEAR)
918             return ISL_DIM_LAYOUT_GFX9_1D;
919          else
920             return ISL_DIM_LAYOUT_GFX4_2D;
921       case ISL_SURF_DIM_2D:
922       case ISL_SURF_DIM_3D:
923          return ISL_DIM_LAYOUT_GFX4_2D;
924       }
925    } else {
926       switch (logical_dim) {
927       case ISL_SURF_DIM_1D:
928       case ISL_SURF_DIM_2D:
929          /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
930           *
931           * The cube face textures are stored in the same way as 3D surfaces
932           * are stored (see section 6.17.5 for details).  For cube surfaces,
933           * however, the depth is equal to the number of faces (always 6) and
934           * is not reduced for each MIP.
935           */
936          if (ISL_GFX_VER(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT))
937             return ISL_DIM_LAYOUT_GFX4_3D;
938 
939          return ISL_DIM_LAYOUT_GFX4_2D;
940       case ISL_SURF_DIM_3D:
941          return ISL_DIM_LAYOUT_GFX4_3D;
942       }
943    }
944 
945    unreachable("bad isl_surf_dim");
946    return ISL_DIM_LAYOUT_GFX4_2D;
947 }
948 
949 /**
950  * Calculate the physical extent of the surface's first level, in units of
951  * surface samples.
952  */
953 static void
isl_calc_phys_level0_extent_sa(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_dim_layout dim_layout,enum isl_tiling tiling,enum isl_msaa_layout msaa_layout,struct isl_extent4d * phys_level0_sa)954 isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
955                                const struct isl_surf_init_info *restrict info,
956                                enum isl_dim_layout dim_layout,
957                                enum isl_tiling tiling,
958                                enum isl_msaa_layout msaa_layout,
959                                struct isl_extent4d *phys_level0_sa)
960 {
961    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
962 
963    if (isl_format_is_planar(info->format))
964       unreachable("Planar formats unsupported");
965 
966    switch (info->dim) {
967    case ISL_SURF_DIM_1D:
968       assert(info->height == 1);
969       assert(info->depth == 1);
970       assert(info->samples == 1);
971 
972       switch (dim_layout) {
973       case ISL_DIM_LAYOUT_GFX4_3D:
974          unreachable("bad isl_dim_layout");
975 
976       case ISL_DIM_LAYOUT_GFX9_1D:
977       case ISL_DIM_LAYOUT_GFX4_2D:
978       case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
979          *phys_level0_sa = (struct isl_extent4d) {
980             .w = info->width,
981             .h = 1,
982             .d = 1,
983             .a = info->array_len,
984          };
985          break;
986       }
987       break;
988 
989    case ISL_SURF_DIM_2D:
990       if (ISL_GFX_VER(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT))
991          assert(dim_layout == ISL_DIM_LAYOUT_GFX4_3D);
992       else
993          assert(dim_layout == ISL_DIM_LAYOUT_GFX4_2D ||
994                 dim_layout == ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ);
995 
996       if (tiling == ISL_TILING_Ys && info->samples > 1)
997          isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__);
998 
999       switch (msaa_layout) {
1000       case ISL_MSAA_LAYOUT_NONE:
1001          assert(info->depth == 1);
1002          assert(info->samples == 1);
1003 
1004          *phys_level0_sa = (struct isl_extent4d) {
1005             .w = info->width,
1006             .h = info->height,
1007             .d = 1,
1008             .a = info->array_len,
1009          };
1010          break;
1011 
1012       case ISL_MSAA_LAYOUT_ARRAY:
1013          assert(info->depth == 1);
1014          assert(info->levels == 1);
1015          assert(isl_format_supports_multisampling(dev->info, info->format));
1016          assert(fmtl->bw == 1 && fmtl->bh == 1);
1017 
1018          *phys_level0_sa = (struct isl_extent4d) {
1019             .w = info->width,
1020             .h = info->height,
1021             .d = 1,
1022             .a = info->array_len * info->samples,
1023          };
1024          break;
1025 
1026       case ISL_MSAA_LAYOUT_INTERLEAVED:
1027          assert(info->depth == 1);
1028          assert(info->levels == 1);
1029          assert(isl_format_supports_multisampling(dev->info, info->format));
1030 
1031          *phys_level0_sa = (struct isl_extent4d) {
1032             .w = info->width,
1033             .h = info->height,
1034             .d = 1,
1035             .a = info->array_len,
1036          };
1037 
1038          isl_msaa_interleaved_scale_px_to_sa(info->samples,
1039                                              &phys_level0_sa->w,
1040                                              &phys_level0_sa->h);
1041          break;
1042       }
1043       break;
1044 
1045    case ISL_SURF_DIM_3D:
1046       assert(info->array_len == 1);
1047       assert(info->samples == 1);
1048 
1049       if (fmtl->bd > 1) {
1050          isl_finishme("%s:%s: compression block with depth > 1",
1051                       __FILE__, __func__);
1052       }
1053 
1054       switch (dim_layout) {
1055       case ISL_DIM_LAYOUT_GFX9_1D:
1056       case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
1057          unreachable("bad isl_dim_layout");
1058 
1059       case ISL_DIM_LAYOUT_GFX4_2D:
1060          assert(ISL_GFX_VER(dev) >= 9);
1061 
1062          *phys_level0_sa = (struct isl_extent4d) {
1063             .w = info->width,
1064             .h = info->height,
1065             .d = 1,
1066             .a = info->depth,
1067          };
1068          break;
1069 
1070       case ISL_DIM_LAYOUT_GFX4_3D:
1071          assert(ISL_GFX_VER(dev) < 9);
1072          *phys_level0_sa = (struct isl_extent4d) {
1073             .w = info->width,
1074             .h = info->height,
1075             .d = info->depth,
1076             .a = 1,
1077          };
1078          break;
1079       }
1080       break;
1081    }
1082 }
1083 
1084 /**
1085  * Calculate the pitch between physical array slices, in units of rows of
1086  * surface elements.
1087  */
1088 static uint32_t
isl_calc_array_pitch_el_rows_gfx4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,const struct isl_extent2d * phys_slice0_sa)1089 isl_calc_array_pitch_el_rows_gfx4_2d(
1090       const struct isl_device *dev,
1091       const struct isl_surf_init_info *restrict info,
1092       const struct isl_tile_info *tile_info,
1093       const struct isl_extent3d *image_align_sa,
1094       const struct isl_extent4d *phys_level0_sa,
1095       enum isl_array_pitch_span array_pitch_span,
1096       const struct isl_extent2d *phys_slice0_sa)
1097 {
1098    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1099    uint32_t pitch_sa_rows = 0;
1100 
1101    switch (array_pitch_span) {
1102    case ISL_ARRAY_PITCH_SPAN_COMPACT:
1103       pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
1104       break;
1105    case ISL_ARRAY_PITCH_SPAN_FULL: {
1106       /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
1107        * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
1108        * Surfaces >> Surface Arrays.
1109        */
1110       uint32_t H0_sa = phys_level0_sa->h;
1111       uint32_t H1_sa = isl_minify(H0_sa, 1);
1112 
1113       uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
1114       uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
1115 
1116       uint32_t m;
1117       if (ISL_GFX_VER(dev) >= 7) {
1118          /* The QPitch equation changed slightly in Ivybridge. */
1119          m = 12;
1120       } else {
1121          m = 11;
1122       }
1123 
1124       pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
1125 
1126       if (ISL_GFX_VER(dev) == 6 && info->samples > 1 &&
1127           (info->height % 4 == 1)) {
1128          /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
1129           * Graphics Core >> Section 7.18.3.7: Surface Arrays:
1130           *
1131           *    [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
1132           *    the value calculated in the equation above , for every
1133           *    other odd Surface Height starting from 1 i.e. 1,5,9,13.
1134           *
1135           * XXX(chadv): Is the errata natural corollary of the physical
1136           * layout of interleaved samples?
1137           */
1138          pitch_sa_rows += 4;
1139       }
1140 
1141       pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
1142       } /* end case */
1143       break;
1144    }
1145 
1146    assert(pitch_sa_rows % fmtl->bh == 0);
1147    uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
1148 
1149    if (ISL_GFX_VER(dev) >= 9 && ISL_GFX_VER(dev) <= 11 &&
1150        fmtl->txc == ISL_TXC_CCS) {
1151       /*
1152        * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
1153        *
1154        *    "Mip-mapped and arrayed surfaces are supported with MCS buffer
1155        *    layout with these alignments in the RT space: Horizontal
1156        *    Alignment = 128 and Vertical Alignment = 64."
1157        *
1158        * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
1159        *
1160        *    "For non-multisampled render target's CCS auxiliary surface,
1161        *    QPitch must be computed with Horizontal Alignment = 128 and
1162        *    Surface Vertical Alignment = 256. These alignments are only for
1163        *    CCS buffer and not for associated render target."
1164        *
1165        * The first restriction is already handled by isl_choose_image_alignment_el
1166        * but the second restriction, which is an extension of the first, only
1167        * applies to qpitch and must be applied here.
1168        *
1169        * The second restriction disappears on Gfx12.
1170        */
1171       assert(fmtl->bh == 4);
1172       pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
1173    }
1174 
1175    if (ISL_GFX_VER(dev) >= 9 &&
1176        info->dim == ISL_SURF_DIM_3D &&
1177        tile_info->tiling != ISL_TILING_LINEAR) {
1178       /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
1179        *
1180        *    Tile Mode != Linear: This field must be set to an integer multiple
1181        *    of the tile height
1182        */
1183       pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
1184    }
1185 
1186    return pitch_el_rows;
1187 }
1188 
1189 /**
1190  * A variant of isl_calc_phys_slice0_extent_sa() specific to
1191  * ISL_DIM_LAYOUT_GFX4_2D.
1192  */
1193 static void
isl_calc_phys_slice0_extent_sa_gfx4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,struct isl_extent2d * phys_slice0_sa)1194 isl_calc_phys_slice0_extent_sa_gfx4_2d(
1195       const struct isl_device *dev,
1196       const struct isl_surf_init_info *restrict info,
1197       enum isl_msaa_layout msaa_layout,
1198       const struct isl_extent3d *image_align_sa,
1199       const struct isl_extent4d *phys_level0_sa,
1200       struct isl_extent2d *phys_slice0_sa)
1201 {
1202    assert(phys_level0_sa->depth == 1);
1203 
1204    if (info->levels == 1) {
1205       /* Do not pad the surface to the image alignment.
1206        *
1207        * For tiled surfaces, using a reduced alignment here avoids wasting CPU
1208        * cycles on the below mipmap layout caluclations. Reducing the
1209        * alignment here is safe because we later align the row pitch and array
1210        * pitch to the tile boundary. It is safe even for
1211        * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
1212        * to accomodate the interleaved samples.
1213        *
1214        * For linear surfaces, reducing the alignment here permits us to later
1215        * choose an arbitrary, non-aligned row pitch. If the surface backs
1216        * a VkBuffer, then an arbitrary pitch may be needed to accomodate
1217        * VkBufferImageCopy::bufferRowLength.
1218        */
1219       *phys_slice0_sa = (struct isl_extent2d) {
1220          .w = phys_level0_sa->w,
1221          .h = phys_level0_sa->h,
1222       };
1223       return;
1224    }
1225 
1226    uint32_t slice_top_w = 0;
1227    uint32_t slice_bottom_w = 0;
1228    uint32_t slice_left_h = 0;
1229    uint32_t slice_right_h = 0;
1230 
1231    uint32_t W0 = phys_level0_sa->w;
1232    uint32_t H0 = phys_level0_sa->h;
1233 
1234    for (uint32_t l = 0; l < info->levels; ++l) {
1235       uint32_t W = isl_minify(W0, l);
1236       uint32_t H = isl_minify(H0, l);
1237 
1238       uint32_t w = isl_align_npot(W, image_align_sa->w);
1239       uint32_t h = isl_align_npot(H, image_align_sa->h);
1240 
1241       if (l == 0) {
1242          slice_top_w = w;
1243          slice_left_h = h;
1244          slice_right_h = h;
1245       } else if (l == 1) {
1246          slice_bottom_w = w;
1247          slice_left_h += h;
1248       } else if (l == 2) {
1249          slice_bottom_w += w;
1250          slice_right_h += h;
1251       } else {
1252          slice_right_h += h;
1253       }
1254    }
1255 
1256    *phys_slice0_sa = (struct isl_extent2d) {
1257       .w = MAX(slice_top_w, slice_bottom_w),
1258       .h = MAX(slice_left_h, slice_right_h),
1259    };
1260 }
1261 
1262 static void
isl_calc_phys_total_extent_el_gfx4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1263 isl_calc_phys_total_extent_el_gfx4_2d(
1264       const struct isl_device *dev,
1265       const struct isl_surf_init_info *restrict info,
1266       const struct isl_tile_info *tile_info,
1267       enum isl_msaa_layout msaa_layout,
1268       const struct isl_extent3d *image_align_sa,
1269       const struct isl_extent4d *phys_level0_sa,
1270       enum isl_array_pitch_span array_pitch_span,
1271       uint32_t *array_pitch_el_rows,
1272       struct isl_extent4d *phys_total_el)
1273 {
1274    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1275 
1276    struct isl_extent2d phys_slice0_sa;
1277    isl_calc_phys_slice0_extent_sa_gfx4_2d(dev, info, msaa_layout,
1278                                           image_align_sa, phys_level0_sa,
1279                                           &phys_slice0_sa);
1280    *array_pitch_el_rows =
1281       isl_calc_array_pitch_el_rows_gfx4_2d(dev, info, tile_info,
1282                                            image_align_sa, phys_level0_sa,
1283                                            array_pitch_span,
1284                                            &phys_slice0_sa);
1285 
1286    if (tile_info->tiling == ISL_TILING_64) {
1287       *phys_total_el = (struct isl_extent4d) {
1288          .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1289          .h = isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1290          .d = isl_align_div_npot(phys_level0_sa->d, fmtl->bd),
1291          .a = phys_level0_sa->array_len,
1292       };
1293    } else {
1294       *phys_total_el = (struct isl_extent4d) {
1295          .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1296          .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
1297               isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1298          .d = 1,
1299          .a = 1,
1300       };
1301    }
1302 }
1303 
1304 /**
1305  * A variant of isl_calc_phys_slice0_extent_sa() specific to
1306  * ISL_DIM_LAYOUT_GFX4_3D.
1307  */
1308 static void
isl_calc_phys_total_extent_el_gfx4_3d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1309 isl_calc_phys_total_extent_el_gfx4_3d(
1310       const struct isl_device *dev,
1311       const struct isl_surf_init_info *restrict info,
1312       const struct isl_extent3d *image_align_sa,
1313       const struct isl_extent4d *phys_level0_sa,
1314       uint32_t *array_pitch_el_rows,
1315       struct isl_extent4d *phys_total_el)
1316 {
1317    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1318 
1319    assert(info->samples == 1);
1320 
1321    if (info->dim != ISL_SURF_DIM_3D) {
1322       /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
1323        *
1324        * The cube face textures are stored in the same way as 3D surfaces
1325        * are stored (see section 6.17.5 for details).  For cube surfaces,
1326        * however, the depth is equal to the number of faces (always 6) and
1327        * is not reduced for each MIP.
1328        */
1329       assert(ISL_GFX_VER(dev) == 4);
1330       assert(info->usage & ISL_SURF_USAGE_CUBE_BIT);
1331       assert(phys_level0_sa->array_len == 6);
1332    } else {
1333       assert(phys_level0_sa->array_len == 1);
1334    }
1335 
1336    uint32_t total_w = 0;
1337    uint32_t total_h = 0;
1338 
1339    uint32_t W0 = phys_level0_sa->w;
1340    uint32_t H0 = phys_level0_sa->h;
1341    uint32_t D0 = phys_level0_sa->d;
1342    uint32_t A0 = phys_level0_sa->a;
1343 
1344    for (uint32_t l = 0; l < info->levels; ++l) {
1345       uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
1346       uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
1347       uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0;
1348 
1349       uint32_t max_layers_horiz = MIN(level_d, 1u << l);
1350       uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
1351 
1352       total_w = MAX(total_w, level_w * max_layers_horiz);
1353       total_h += level_h * max_layers_vert;
1354    }
1355 
1356    /* GFX4_3D layouts don't really have an array pitch since each LOD has a
1357     * different number of horizontal and vertical layers.  We have to set it
1358     * to something, so at least make it true for LOD0.
1359     */
1360    *array_pitch_el_rows =
1361       isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw;
1362    *phys_total_el = (struct isl_extent4d) {
1363       .w = isl_assert_div(total_w, fmtl->bw),
1364       .h = isl_assert_div(total_h, fmtl->bh),
1365       .d = 1,
1366       .a = 1,
1367    };
1368 }
1369 
1370 /**
1371  * A variant of isl_calc_phys_slice0_extent_sa() specific to
1372  * ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ.
1373  */
1374 static void
isl_calc_phys_total_extent_el_gfx6_stencil_hiz(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1375 isl_calc_phys_total_extent_el_gfx6_stencil_hiz(
1376       const struct isl_device *dev,
1377       const struct isl_surf_init_info *restrict info,
1378       const struct isl_tile_info *tile_info,
1379       const struct isl_extent3d *image_align_sa,
1380       const struct isl_extent4d *phys_level0_sa,
1381       uint32_t *array_pitch_el_rows,
1382       struct isl_extent4d *phys_total_el)
1383 {
1384    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1385 
1386    const struct isl_extent2d tile_extent_sa = {
1387       .w = tile_info->logical_extent_el.w * fmtl->bw,
1388       .h = tile_info->logical_extent_el.h * fmtl->bh,
1389    };
1390    /* Tile size is a multiple of image alignment */
1391    assert(tile_extent_sa.w % image_align_sa->w == 0);
1392    assert(tile_extent_sa.h % image_align_sa->h == 0);
1393 
1394    const uint32_t W0 = phys_level0_sa->w;
1395    const uint32_t H0 = phys_level0_sa->h;
1396 
1397    /* Each image has the same height as LOD0 because the hardware thinks
1398     * everything is LOD0
1399     */
1400    const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a;
1401 
1402    uint32_t total_top_w = 0;
1403    uint32_t total_bottom_w = 0;
1404    uint32_t total_h = 0;
1405 
1406    for (uint32_t l = 0; l < info->levels; ++l) {
1407       const uint32_t W = isl_minify(W0, l);
1408 
1409       const uint32_t w = isl_align(W, tile_extent_sa.w);
1410       const uint32_t h = isl_align(H, tile_extent_sa.h);
1411 
1412       if (l == 0) {
1413          total_top_w = w;
1414          total_h = h;
1415       } else if (l == 1) {
1416          total_bottom_w = w;
1417          total_h += h;
1418       } else {
1419          total_bottom_w += w;
1420       }
1421    }
1422 
1423    *array_pitch_el_rows =
1424       isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh);
1425    *phys_total_el = (struct isl_extent4d) {
1426       .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw),
1427       .h = isl_assert_div(total_h, fmtl->bh),
1428       .d = 1,
1429       .a = 1,
1430    };
1431 }
1432 
1433 /**
1434  * A variant of isl_calc_phys_slice0_extent_sa() specific to
1435  * ISL_DIM_LAYOUT_GFX9_1D.
1436  */
1437 static void
isl_calc_phys_total_extent_el_gfx9_1d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1438 isl_calc_phys_total_extent_el_gfx9_1d(
1439       const struct isl_device *dev,
1440       const struct isl_surf_init_info *restrict info,
1441       const struct isl_extent3d *image_align_sa,
1442       const struct isl_extent4d *phys_level0_sa,
1443       uint32_t *array_pitch_el_rows,
1444       struct isl_extent4d *phys_total_el)
1445 {
1446    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1447 
1448    assert(phys_level0_sa->height == 1);
1449    assert(phys_level0_sa->depth == 1);
1450    assert(info->samples == 1);
1451    assert(image_align_sa->w >= fmtl->bw);
1452 
1453    uint32_t slice_w = 0;
1454    const uint32_t W0 = phys_level0_sa->w;
1455 
1456    for (uint32_t l = 0; l < info->levels; ++l) {
1457       uint32_t W = isl_minify(W0, l);
1458       uint32_t w = isl_align_npot(W, image_align_sa->w);
1459 
1460       slice_w += w;
1461    }
1462 
1463    *array_pitch_el_rows = 1;
1464    *phys_total_el = (struct isl_extent4d) {
1465       .w = isl_assert_div(slice_w, fmtl->bw),
1466       .h = phys_level0_sa->array_len,
1467       .d = 1,
1468       .a = 1,
1469    };
1470 }
1471 
1472 /**
1473  * Calculate the two-dimensional total physical extent of the surface, in
1474  * units of surface elements.
1475  */
1476 static void
isl_calc_phys_total_extent_el(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_dim_layout dim_layout,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,uint32_t * array_pitch_el_rows,struct isl_extent4d * phys_total_el)1477 isl_calc_phys_total_extent_el(const struct isl_device *dev,
1478                               const struct isl_surf_init_info *restrict info,
1479                               const struct isl_tile_info *tile_info,
1480                               enum isl_dim_layout dim_layout,
1481                               enum isl_msaa_layout msaa_layout,
1482                               const struct isl_extent3d *image_align_sa,
1483                               const struct isl_extent4d *phys_level0_sa,
1484                               enum isl_array_pitch_span array_pitch_span,
1485                               uint32_t *array_pitch_el_rows,
1486                               struct isl_extent4d *phys_total_el)
1487 {
1488    switch (dim_layout) {
1489    case ISL_DIM_LAYOUT_GFX9_1D:
1490       assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1491       isl_calc_phys_total_extent_el_gfx9_1d(dev, info,
1492                                             image_align_sa, phys_level0_sa,
1493                                             array_pitch_el_rows,
1494                                             phys_total_el);
1495       return;
1496    case ISL_DIM_LAYOUT_GFX4_2D:
1497       isl_calc_phys_total_extent_el_gfx4_2d(dev, info, tile_info, msaa_layout,
1498                                             image_align_sa, phys_level0_sa,
1499                                             array_pitch_span,
1500                                             array_pitch_el_rows,
1501                                             phys_total_el);
1502       return;
1503    case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
1504       assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1505       isl_calc_phys_total_extent_el_gfx6_stencil_hiz(dev, info, tile_info,
1506                                                      image_align_sa,
1507                                                      phys_level0_sa,
1508                                                      array_pitch_el_rows,
1509                                                      phys_total_el);
1510       return;
1511    case ISL_DIM_LAYOUT_GFX4_3D:
1512       assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1513       isl_calc_phys_total_extent_el_gfx4_3d(dev, info,
1514                                             image_align_sa, phys_level0_sa,
1515                                             array_pitch_el_rows,
1516                                             phys_total_el);
1517       return;
1518    }
1519 
1520    unreachable("invalid value for dim_layout");
1521 }
1522 
1523 static uint32_t
isl_calc_row_pitch_alignment(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info)1524 isl_calc_row_pitch_alignment(const struct isl_device *dev,
1525                              const struct isl_surf_init_info *surf_info,
1526                              const struct isl_tile_info *tile_info)
1527 {
1528    if (tile_info->tiling != ISL_TILING_LINEAR) {
1529       /* According to BSpec: 44930, Gfx12's CCS-compressed surface pitches must
1530        * be 512B-aligned. CCS is only support on Y tilings.
1531        *
1532        * Only consider 512B alignment when :
1533        *    - AUX is not explicitly disabled
1534        *    - the caller has specified no pitch
1535        *
1536        * isl_surf_get_ccs_surf() will check that the main surface alignment
1537        * matches CCS expectations.
1538        */
1539       if (ISL_GFX_VER(dev) >= 12 &&
1540           isl_format_supports_ccs_e(dev->info, surf_info->format) &&
1541           tile_info->tiling != ISL_TILING_X &&
1542           !(surf_info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) &&
1543           surf_info->row_pitch_B == 0) {
1544          return isl_align(tile_info->phys_extent_B.width, 512);
1545       }
1546 
1547       return tile_info->phys_extent_B.width;
1548    }
1549 
1550    /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
1551     * RENDER_SURFACE_STATE Surface Pitch (p349):
1552     *
1553     *    - For linear render target surfaces and surfaces accessed with the
1554     *      typed data port messages, the pitch must be a multiple of the
1555     *      element size for non-YUV surface formats.  Pitch must be
1556     *      a multiple of 2 * element size for YUV surface formats.
1557     *
1558     *    - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
1559     *      ignore because isl doesn't do buffers.]
1560     *
1561     *    - For other linear surfaces, the pitch can be any multiple of
1562     *      bytes.
1563     */
1564    const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1565    const uint32_t bs = fmtl->bpb / 8;
1566    uint32_t alignment;
1567 
1568    if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1569       if (isl_format_is_yuv(surf_info->format)) {
1570          alignment = 2 * bs;
1571       } else  {
1572          alignment = bs;
1573       }
1574    } else {
1575       alignment = 1;
1576    }
1577 
1578    /* From the Broadwell PRM >> Volume 2c: Command Reference: Registers >>
1579     * PRI_STRIDE Stride (p1254):
1580     *
1581     *    "When using linear memory, this must be at least 64 byte aligned."
1582     *
1583     * However, when displaying on NVIDIA and recent AMD GPUs via PRIME,
1584     * we need a larger pitch of 256 bytes.
1585     *
1586     * If the ISL caller didn't specify a row_pitch_B, then we should assume
1587     * the NVIDIA/AMD requirements. Otherwise, if we have a specified
1588     * row_pitch_B, this is probably because the caller is trying to import a
1589     * buffer. In that case we limit the minimum row pitch to the Intel HW
1590     * requirement.
1591     */
1592    if (surf_info->usage & ISL_SURF_USAGE_DISPLAY_BIT) {
1593       if (surf_info->row_pitch_B == 0)
1594          alignment = isl_align(alignment, 256);
1595       else
1596          alignment = isl_align(alignment, 64);
1597    }
1598 
1599    return alignment;
1600 }
1601 
1602 static uint32_t
isl_calc_linear_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * info,const struct isl_extent4d * phys_total_el,uint32_t alignment_B)1603 isl_calc_linear_min_row_pitch(const struct isl_device *dev,
1604                               const struct isl_surf_init_info *info,
1605                               const struct isl_extent4d *phys_total_el,
1606                               uint32_t alignment_B)
1607 {
1608    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1609    const uint32_t bs = fmtl->bpb / 8;
1610 
1611    return isl_align_npot(bs * phys_total_el->w, alignment_B);
1612 }
1613 
1614 static uint32_t
isl_calc_tiled_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,const struct isl_extent4d * phys_total_el,uint32_t alignment_B)1615 isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
1616                              const struct isl_surf_init_info *surf_info,
1617                              const struct isl_tile_info *tile_info,
1618                              const struct isl_extent4d *phys_total_el,
1619                              uint32_t alignment_B)
1620 {
1621    const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1622 
1623    assert(fmtl->bpb % tile_info->format_bpb == 0);
1624 
1625    const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb;
1626    const uint32_t total_w_tl =
1627       isl_align_div(phys_total_el->w * tile_el_scale,
1628                     tile_info->logical_extent_el.width);
1629 
1630    /* In some cases the alignment of the pitch might be > to the tile size
1631     * (for example Gfx12 CCS requires 512B alignment while the tile's width
1632     * can be 128B), so align the row pitch to the alignment.
1633     */
1634    assert(alignment_B >= tile_info->phys_extent_B.width);
1635    return isl_align(total_w_tl * tile_info->phys_extent_B.width, alignment_B);
1636 }
1637 
1638 static uint32_t
isl_calc_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,const struct isl_extent4d * phys_total_el,uint32_t alignment_B)1639 isl_calc_min_row_pitch(const struct isl_device *dev,
1640                        const struct isl_surf_init_info *surf_info,
1641                        const struct isl_tile_info *tile_info,
1642                        const struct isl_extent4d *phys_total_el,
1643                        uint32_t alignment_B)
1644 {
1645    if (tile_info->tiling == ISL_TILING_LINEAR) {
1646       return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el,
1647                                            alignment_B);
1648    } else {
1649       return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info,
1650                                           phys_total_el, alignment_B);
1651    }
1652 }
1653 
1654 /**
1655  * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's
1656  * size is `bits` bits?
1657  *
1658  * Hardware pitch fields are offset by 1. For example, if the size of
1659  * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid
1660  * pitches is [1, 2^b] inclusive.  If the surface pitch is N, then
1661  * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1.
1662  */
1663 static bool
pitch_in_range(uint32_t n,uint32_t bits)1664 pitch_in_range(uint32_t n, uint32_t bits)
1665 {
1666    assert(n != 0);
1667    return likely(bits != 0 && 1 <= n && n <= (1 << bits));
1668 }
1669 
1670 static bool
isl_calc_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,enum isl_dim_layout dim_layout,const struct isl_extent4d * phys_total_el,uint32_t * out_row_pitch_B)1671 isl_calc_row_pitch(const struct isl_device *dev,
1672                    const struct isl_surf_init_info *surf_info,
1673                    const struct isl_tile_info *tile_info,
1674                    enum isl_dim_layout dim_layout,
1675                    const struct isl_extent4d *phys_total_el,
1676                    uint32_t *out_row_pitch_B)
1677 {
1678    uint32_t alignment_B =
1679       isl_calc_row_pitch_alignment(dev, surf_info, tile_info);
1680 
1681    const uint32_t min_row_pitch_B =
1682       isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el,
1683                              alignment_B);
1684 
1685    if (surf_info->row_pitch_B != 0) {
1686       if (surf_info->row_pitch_B < min_row_pitch_B)
1687          return false;
1688 
1689       if (surf_info->row_pitch_B % alignment_B != 0)
1690          return false;
1691    }
1692 
1693    const uint32_t row_pitch_B =
1694       surf_info->row_pitch_B != 0 ? surf_info->row_pitch_B : min_row_pitch_B;
1695 
1696    const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width;
1697 
1698    if (row_pitch_B == 0)
1699       return false;
1700 
1701    if (dim_layout == ISL_DIM_LAYOUT_GFX9_1D) {
1702       /* SurfacePitch is ignored for this layout. */
1703       goto done;
1704    }
1705 
1706    if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
1707                             ISL_SURF_USAGE_TEXTURE_BIT |
1708                             ISL_SURF_USAGE_STORAGE_BIT)) &&
1709        !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info)))
1710       return false;
1711 
1712    if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT |
1713                             ISL_SURF_USAGE_MCS_BIT)) &&
1714        !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info)))
1715       return false;
1716 
1717    if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) &&
1718        !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1719       return false;
1720 
1721    if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) &&
1722        !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1723       return false;
1724 
1725    const uint32_t stencil_pitch_bits = dev->use_separate_stencil ?
1726       _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) :
1727       _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info);
1728 
1729    if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) &&
1730        !pitch_in_range(row_pitch_B, stencil_pitch_bits))
1731       return false;
1732 
1733  done:
1734    *out_row_pitch_B = row_pitch_B;
1735    return true;
1736 }
1737 
1738 bool
isl_surf_init_s(const struct isl_device * dev,struct isl_surf * surf,const struct isl_surf_init_info * restrict info)1739 isl_surf_init_s(const struct isl_device *dev,
1740                 struct isl_surf *surf,
1741                 const struct isl_surf_init_info *restrict info)
1742 {
1743    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1744 
1745    const struct isl_extent4d logical_level0_px = {
1746       .w = info->width,
1747       .h = info->height,
1748       .d = info->depth,
1749       .a = info->array_len,
1750    };
1751 
1752    enum isl_tiling tiling;
1753    if (!isl_surf_choose_tiling(dev, info, &tiling))
1754       return false;
1755 
1756    const enum isl_dim_layout dim_layout =
1757       isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage);
1758 
1759    enum isl_msaa_layout msaa_layout;
1760    if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
1761        return false;
1762 
1763    struct isl_tile_info tile_info;
1764    isl_tiling_get_info(tiling, info->dim, msaa_layout, fmtl->bpb,
1765                        info->samples, &tile_info);
1766 
1767    struct isl_extent3d image_align_el;
1768    isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout,
1769                                  &image_align_el);
1770 
1771    struct isl_extent3d image_align_sa =
1772       isl_extent3d_el_to_sa(info->format, image_align_el);
1773 
1774    struct isl_extent4d phys_level0_sa;
1775    isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
1776                                   &phys_level0_sa);
1777 
1778    enum isl_array_pitch_span array_pitch_span =
1779       isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
1780 
1781    uint32_t array_pitch_el_rows;
1782    struct isl_extent4d phys_total_el;
1783    isl_calc_phys_total_extent_el(dev, info, &tile_info,
1784                                  dim_layout, msaa_layout,
1785                                  &image_align_sa, &phys_level0_sa,
1786                                  array_pitch_span, &array_pitch_el_rows,
1787                                  &phys_total_el);
1788 
1789    uint32_t row_pitch_B;
1790    if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout,
1791                            &phys_total_el, &row_pitch_B))
1792       return false;
1793 
1794    uint32_t base_alignment_B;
1795    uint64_t size_B;
1796    if (tiling == ISL_TILING_LINEAR) {
1797       /* LINEAR tiling has no concept of intra-tile arrays */
1798       assert(phys_total_el.d == 1 && phys_total_el.a == 1);
1799 
1800       size_B = (uint64_t) row_pitch_B * phys_total_el.h;
1801 
1802       /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress:
1803        *
1804        *    "The Base Address for linear render target surfaces and surfaces
1805        *    accessed with the typed surface read/write data port messages must
1806        *    be element-size aligned, for non-YUV surface formats, or a
1807        *    multiple of 2 element-sizes for YUV surface formats. Other linear
1808        *    surfaces have no alignment requirements (byte alignment is
1809        *    sufficient.)"
1810        */
1811       base_alignment_B = MAX(1, info->min_alignment_B);
1812       if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1813          if (isl_format_is_yuv(info->format)) {
1814             base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 4);
1815          } else {
1816             base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 8);
1817          }
1818       }
1819       base_alignment_B = isl_round_up_to_power_of_two(base_alignment_B);
1820 
1821       /* From the Skylake PRM Vol 2c, PLANE_STRIDE::Stride:
1822        *
1823        *     "For Linear memory, this field specifies the stride in chunks of
1824        *     64 bytes (1 cache line)."
1825        */
1826       if (isl_surf_usage_is_display(info->usage))
1827          base_alignment_B = MAX(base_alignment_B, 64);
1828    } else {
1829       /* Pitches must make sense with the tiling */
1830       assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
1831 
1832       uint32_t array_slices, array_pitch_tl_rows;
1833       if (phys_total_el.d > 1) {
1834          assert(phys_total_el.a == 1);
1835          array_pitch_tl_rows = isl_assert_div(array_pitch_el_rows,
1836                                               tile_info.logical_extent_el.h);
1837          array_slices = isl_align_div(phys_total_el.d,
1838                                       tile_info.logical_extent_el.d);
1839       } else if (phys_total_el.a > 1) {
1840          assert(phys_total_el.d == 1);
1841          array_pitch_tl_rows = isl_assert_div(array_pitch_el_rows,
1842                                               tile_info.logical_extent_el.h);
1843          array_slices = isl_align_div(phys_total_el.a,
1844                                       tile_info.logical_extent_el.a);
1845       } else {
1846          assert(phys_total_el.d == 1 && phys_total_el.a == 1);
1847          array_pitch_tl_rows = 0;
1848          array_slices = 1;
1849       }
1850 
1851       const uint32_t total_h_tl =
1852          (array_slices - 1) * array_pitch_tl_rows +
1853          isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height);
1854 
1855       size_B = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch_B;
1856 
1857       const uint32_t tile_size_B = tile_info.phys_extent_B.width *
1858                                    tile_info.phys_extent_B.height;
1859       assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B));
1860       base_alignment_B = MAX(info->min_alignment_B, tile_size_B);
1861 
1862       /* The diagram in the Bspec section Memory Compression - Gfx12, shows
1863        * that the CCS is indexed in 256B chunks. However, the
1864        * PLANE_AUX_DIST::Auxiliary Surface Distance field is in units of 4K
1865        * pages. We currently don't assign the usage field like we do for main
1866        * surfaces, so just use 4K for now.
1867        */
1868       if (tiling == ISL_TILING_GFX12_CCS)
1869          base_alignment_B = MAX(base_alignment_B, 4096);
1870 
1871       /* Gfx12+ requires that images be 64K-aligned if they're going to used
1872        * with CCS.  This is because the Aux translation table maps main
1873        * surface addresses to aux addresses at a 64K (in the main surface)
1874        * granularity.  Because we don't know for sure in ISL if a surface will
1875        * use CCS, we have to guess based on the DISABLE_AUX usage bit.  The
1876        * one thing we do know is that we haven't enable CCS on linear images
1877        * yet so we can avoid the extra alignment there.
1878        */
1879       if (ISL_GFX_VER(dev) >= 12 &&
1880           !(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) {
1881          base_alignment_B = MAX(base_alignment_B, 64 * 1024);
1882       }
1883    }
1884 
1885    if (ISL_GFX_VER(dev) < 9) {
1886       /* From the Broadwell PRM Vol 5, Surface Layout:
1887        *
1888        *    "In addition to restrictions on maximum height, width, and depth,
1889        *     surfaces are also restricted to a maximum size in bytes. This
1890        *     maximum is 2 GB for all products and all surface types."
1891        *
1892        * This comment is applicable to all Pre-gfx9 platforms.
1893        */
1894       if (size_B > (uint64_t) 1 << 31)
1895          return false;
1896    } else if (ISL_GFX_VER(dev) < 11) {
1897       /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes:
1898        *    "In addition to restrictions on maximum height, width, and depth,
1899        *     surfaces are also restricted to a maximum size of 2^38 bytes.
1900        *     All pixels within the surface must be contained within 2^38 bytes
1901        *     of the base address."
1902        */
1903       if (size_B > (uint64_t) 1 << 38)
1904          return false;
1905    } else {
1906       /* gfx11+ platforms raised this limit to 2^44 bytes. */
1907       if (size_B > (uint64_t) 1 << 44)
1908          return false;
1909    }
1910 
1911    *surf = (struct isl_surf) {
1912       .dim = info->dim,
1913       .dim_layout = dim_layout,
1914       .msaa_layout = msaa_layout,
1915       .tiling = tiling,
1916       .format = info->format,
1917 
1918       .levels = info->levels,
1919       .samples = info->samples,
1920 
1921       .image_alignment_el = image_align_el,
1922       .logical_level0_px = logical_level0_px,
1923       .phys_level0_sa = phys_level0_sa,
1924 
1925       .size_B = size_B,
1926       .alignment_B = base_alignment_B,
1927       .row_pitch_B = row_pitch_B,
1928       .array_pitch_el_rows = array_pitch_el_rows,
1929       .array_pitch_span = array_pitch_span,
1930 
1931       .usage = info->usage,
1932    };
1933 
1934    return true;
1935 }
1936 
1937 void
isl_surf_get_tile_info(const struct isl_surf * surf,struct isl_tile_info * tile_info)1938 isl_surf_get_tile_info(const struct isl_surf *surf,
1939                        struct isl_tile_info *tile_info)
1940 {
1941    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
1942    isl_tiling_get_info(surf->tiling, surf->dim, surf->msaa_layout, fmtl->bpb,
1943                        surf->samples, tile_info);
1944 }
1945 
1946 bool
isl_surf_get_hiz_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * hiz_surf)1947 isl_surf_get_hiz_surf(const struct isl_device *dev,
1948                       const struct isl_surf *surf,
1949                       struct isl_surf *hiz_surf)
1950 {
1951    assert(ISL_GFX_VER(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev));
1952 
1953    if (!isl_surf_usage_is_depth(surf->usage))
1954       return false;
1955 
1956    /* HiZ only works with Y-tiled depth buffers */
1957    if (!isl_tiling_is_any_y(surf->tiling))
1958       return false;
1959 
1960    /* On SNB+, compressed depth buffers cannot be interleaved with stencil. */
1961    switch (surf->format) {
1962    case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
1963       if (isl_surf_usage_is_depth_and_stencil(surf->usage)) {
1964          assert(ISL_GFX_VER(dev) == 5);
1965          unreachable("This should work, but is untested");
1966       }
1967       FALLTHROUGH;
1968    case ISL_FORMAT_R16_UNORM:
1969    case ISL_FORMAT_R32_FLOAT:
1970       break;
1971    case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
1972       if (ISL_GFX_VER(dev) == 5) {
1973          assert(isl_surf_usage_is_depth_and_stencil(surf->usage));
1974          unreachable("This should work, but is untested");
1975       }
1976       FALLTHROUGH;
1977    default:
1978       return false;
1979    }
1980 
1981    /* Multisampled depth is always interleaved */
1982    assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE ||
1983           surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
1984 
1985    /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer":
1986     *
1987     *    "The Surface Type, Height, Width, Depth, Minimum Array Element, Render
1988     *    Target View Extent, and Depth Coordinate Offset X/Y of the
1989     *    hierarchical depth buffer are inherited from the depth buffer. The
1990     *    height and width of the hierarchical depth buffer that must be
1991     *    allocated are computed by the following formulas, where HZ is the
1992     *    hierarchical depth buffer and Z is the depth buffer. The Z_Height,
1993     *    Z_Width, and Z_Depth values given in these formulas are those present
1994     *    in 3DSTATE_DEPTH_BUFFER incremented by one.
1995     *
1996     *    "The value of Z_Height and Z_Width must each be multiplied by 2 before
1997     *    being applied to the table below if Number of Multisamples is set to
1998     *    NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and
1999     *    Z_Width must be multiplied by 4 before being applied to the table
2000     *    below if Number of Multisamples is set to NUMSAMPLES_8."
2001     *
2002     * In the Sky Lake PRM, the second paragraph is replaced with this:
2003     *
2004     *    "The Z_Height and Z_Width values must equal those present in
2005     *    3DSTATE_DEPTH_BUFFER incremented by one."
2006     *
2007     * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ
2008     * block corresponds to a region of 8x4 samples in the primary depth
2009     * surface.  On Sky Lake, on the other hand, each HiZ block corresponds to
2010     * a region of 8x4 pixels in the primary depth surface regardless of the
2011     * number of samples.  The dimensions of a HiZ block in both pixels and
2012     * samples are given in the table below:
2013     *
2014     *                    | SNB - BDW |     SKL+
2015     *              ------+-----------+-------------
2016     *                1x  |  8 x 4 sa |   8 x 4 sa
2017     *               MSAA |  8 x 4 px |   8 x 4 px
2018     *              ------+-----------+-------------
2019     *                2x  |  8 x 4 sa |  16 x 4 sa
2020     *               MSAA |  4 x 4 px |   8 x 4 px
2021     *              ------+-----------+-------------
2022     *                4x  |  8 x 4 sa |  16 x 8 sa
2023     *               MSAA |  4 x 2 px |   8 x 4 px
2024     *              ------+-----------+-------------
2025     *                8x  |  8 x 4 sa |  32 x 8 sa
2026     *               MSAA |  2 x 2 px |   8 x 4 px
2027     *              ------+-----------+-------------
2028     *               16x  |    N/A    | 32 x 16 sa
2029     *               MSAA |    N/A    |  8 x  4 px
2030     *              ------+-----------+-------------
2031     *
2032     * There are a number of different ways that this discrepency could be
2033     * handled.  The way we have chosen is to simply make MSAA HiZ have the
2034     * same number of samples as the parent surface pre-Sky Lake and always be
2035     * single-sampled on Sky Lake and above.  Since the block sizes of
2036     * compressed formats are given in samples, this neatly handles everything
2037     * without the need for additional HiZ formats with different block sizes
2038     * on SKL+.
2039     */
2040    const unsigned samples = ISL_GFX_VER(dev) >= 9 ? 1 : surf->samples;
2041 
2042    return isl_surf_init(dev, hiz_surf,
2043                         .dim = surf->dim,
2044                         .format = ISL_FORMAT_HIZ,
2045                         .width = surf->logical_level0_px.width,
2046                         .height = surf->logical_level0_px.height,
2047                         .depth = surf->logical_level0_px.depth,
2048                         .levels = surf->levels,
2049                         .array_len = surf->logical_level0_px.array_len,
2050                         .samples = samples,
2051                         .usage = ISL_SURF_USAGE_HIZ_BIT,
2052                         .tiling_flags = ISL_TILING_HIZ_BIT);
2053 }
2054 
2055 bool
isl_surf_get_mcs_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * mcs_surf)2056 isl_surf_get_mcs_surf(const struct isl_device *dev,
2057                       const struct isl_surf *surf,
2058                       struct isl_surf *mcs_surf)
2059 {
2060    /* It must be multisampled with an array layout */
2061    if (surf->msaa_layout != ISL_MSAA_LAYOUT_ARRAY)
2062       return false;
2063 
2064    if (mcs_surf->size_B > 0)
2065       return false;
2066 
2067    /* The following are true of all multisampled surfaces */
2068    assert(surf->samples > 1);
2069    assert(surf->dim == ISL_SURF_DIM_2D);
2070    assert(surf->levels == 1);
2071    assert(surf->logical_level0_px.depth == 1);
2072 
2073    /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
2074     *
2075     *   This field must be set to 0 for all SINT MSRTs when all RT channels
2076     *   are not written
2077     *
2078     * In practice this means that we have to disable MCS for all signed
2079     * integer MSAA buffers.  The alternative, to disable MCS only when one
2080     * of the render target channels is disabled, is impractical because it
2081     * would require converting between CMS and UMS MSAA layouts on the fly,
2082     * which is expensive.
2083     */
2084    if (ISL_GFX_VER(dev) == 7 && isl_format_has_sint_channel(surf->format))
2085       return false;
2086 
2087    /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9
2088     * bits which means the maximum pitch of a compression surface is 512
2089     * tiles or 64KB (since MCS is always Y-tiled).  Since a 16x MCS buffer is
2090     * 64bpp, this gives us a maximum width of 8192 pixels.  We can create
2091     * larger multisampled surfaces, we just can't compress them.   For 2x, 4x,
2092     * and 8x, we have enough room for the full 16k supported by the hardware.
2093     */
2094    if (surf->samples == 16 && surf->logical_level0_px.width > 8192)
2095       return false;
2096 
2097    enum isl_format mcs_format;
2098    switch (surf->samples) {
2099    case 2:  mcs_format = ISL_FORMAT_MCS_2X;  break;
2100    case 4:  mcs_format = ISL_FORMAT_MCS_4X;  break;
2101    case 8:  mcs_format = ISL_FORMAT_MCS_8X;  break;
2102    case 16: mcs_format = ISL_FORMAT_MCS_16X; break;
2103    default:
2104       unreachable("Invalid sample count");
2105    }
2106 
2107    return isl_surf_init(dev, mcs_surf,
2108                         .dim = ISL_SURF_DIM_2D,
2109                         .format = mcs_format,
2110                         .width = surf->logical_level0_px.width,
2111                         .height = surf->logical_level0_px.height,
2112                         .depth = 1,
2113                         .levels = 1,
2114                         .array_len = surf->logical_level0_px.array_len,
2115                         .samples = 1, /* MCS surfaces are really single-sampled */
2116                         .usage = ISL_SURF_USAGE_MCS_BIT,
2117                         .tiling_flags = ISL_TILING_Y0_BIT);
2118 }
2119 
2120 bool
isl_surf_supports_ccs(const struct isl_device * dev,const struct isl_surf * surf,const struct isl_surf * hiz_or_mcs_surf)2121 isl_surf_supports_ccs(const struct isl_device *dev,
2122                       const struct isl_surf *surf,
2123                       const struct isl_surf *hiz_or_mcs_surf)
2124 {
2125    /* CCS support does not exist prior to Gfx7 */
2126    if (ISL_GFX_VER(dev) <= 6)
2127       return false;
2128 
2129    /* Wa_22011186057: Disable compression on ADL-P A0 */
2130    if (dev->info->is_alderlake && dev->info->gt == 2 &&
2131        dev->info->revision == 0)
2132       return false;
2133 
2134    if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
2135       return false;
2136 
2137    if (isl_format_is_compressed(surf->format))
2138       return false;
2139 
2140    if (!isl_is_pow2(isl_format_get_layout(surf->format)->bpb))
2141       return false;
2142 
2143    /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
2144     * Target(s)", beneath the "Fast Color Clear" bullet (p326):
2145     *
2146     *     - Support is limited to tiled render targets.
2147     *
2148     * From the Skylake documentation, it is made clear that X-tiling is no
2149     * longer supported:
2150     *
2151     *     - MCS and Lossless compression is supported for
2152     *       TiledY/TileYs/TileYf non-MSRTs only.
2153     *
2154     * From the BSpec (44930) for Gfx12:
2155     *
2156     *    Linear CCS is only allowed for Untyped Buffers but only via HDC
2157     *    Data-Port messages.
2158     *
2159     * We never use untyped messages on surfaces created by ISL on Gfx9+ so
2160     * this means linear is out on Gfx12+ as well.
2161     */
2162    if (surf->tiling == ISL_TILING_LINEAR)
2163       return false;
2164 
2165    if (ISL_GFX_VER(dev) >= 12) {
2166       if (isl_surf_usage_is_stencil(surf->usage)) {
2167          /* HiZ and MCS aren't allowed with stencil */
2168          assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
2169 
2170          /* Multi-sampled stencil cannot have CCS */
2171          if (surf->samples > 1)
2172             return false;
2173       } else if (isl_surf_usage_is_depth(surf->usage)) {
2174          const struct isl_surf *hiz_surf = hiz_or_mcs_surf;
2175 
2176          /* With depth surfaces, HIZ is required for CCS. */
2177          if (hiz_surf == NULL || hiz_surf->size_B == 0)
2178             return false;
2179 
2180          assert(hiz_surf->usage & ISL_SURF_USAGE_HIZ_BIT);
2181          assert(hiz_surf->tiling == ISL_TILING_HIZ);
2182          assert(hiz_surf->format == ISL_FORMAT_HIZ);
2183       } else if (surf->samples > 1) {
2184          const struct isl_surf *mcs_surf = hiz_or_mcs_surf;
2185 
2186          /* With multisampled color, CCS requires MCS */
2187          if (mcs_surf == NULL || mcs_surf->size_B == 0)
2188             return false;
2189 
2190          assert(mcs_surf->usage & ISL_SURF_USAGE_MCS_BIT);
2191          assert(isl_tiling_is_any_y(mcs_surf->tiling));
2192          assert(isl_format_is_mcs(mcs_surf->format));
2193       } else {
2194          /* Single-sampled color can't have MCS or HiZ */
2195          assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
2196       }
2197 
2198       /* On Gfx12, all CCS-compressed surface pitches must be multiples of
2199        * 512B.
2200        */
2201       if (surf->row_pitch_B % 512 != 0)
2202          return false;
2203 
2204       /* According to Wa_1406738321, 3D textures need a blit to a new
2205        * surface in order to perform a resolve. For now, just disable CCS.
2206        */
2207       if (surf->dim == ISL_SURF_DIM_3D) {
2208          isl_finishme("%s:%s: CCS for 3D textures is disabled, but a workaround"
2209                       " is available.", __FILE__, __func__);
2210          return false;
2211       }
2212 
2213       /* Wa_1207137018
2214        *
2215        * TODO: implement following workaround currently covered by the
2216        * restriction above. If following conditions are met:
2217        *
2218        *    - RENDER_SURFACE_STATE.Surface Type == 3D
2219        *    - RENDER_SURFACE_STATE.Auxiliary Surface Mode != AUX_NONE
2220        *    - RENDER_SURFACE_STATE.Tiled ResourceMode is TYF or TYS
2221        *
2222        * Set the value of RENDER_SURFACE_STATE.Mip Tail Start LOD to a mip
2223        * that larger than those present in the surface (i.e. 15)
2224        */
2225 
2226       /* TODO: Handle the other tiling formats */
2227       if (surf->tiling != ISL_TILING_Y0)
2228          return false;
2229    } else {
2230       /* ISL_GFX_VER(dev) < 12 */
2231       if (surf->samples > 1)
2232          return false;
2233 
2234       /* CCS is only for color images on Gfx7-11 */
2235       if (isl_surf_usage_is_depth_or_stencil(surf->usage))
2236          return false;
2237 
2238       /* We're single-sampled color so having HiZ or MCS makes no sense */
2239       assert(hiz_or_mcs_surf == NULL || hiz_or_mcs_surf->size_B == 0);
2240 
2241       /* The PRM doesn't say this explicitly, but fast-clears don't appear to
2242        * work for 3D textures until gfx9 where the layout of 3D textures
2243        * changes to match 2D array textures.
2244        */
2245       if (ISL_GFX_VER(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
2246          return false;
2247 
2248       /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of
2249        * Non-MultiSampler Render Target Restrictions):
2250        *
2251        *    "Support is for non-mip-mapped and non-array surface types only."
2252        *
2253        * This restriction is lifted on gfx8+.  Technically, it may be possible
2254        * to create a CCS for an arrayed or mipmapped image and only enable
2255        * CCS_D when rendering to the base slice.  However, there is no
2256        * documentation tell us what the hardware would do in that case or what
2257        * it does if you walk off the bases slice.  (Does it ignore CCS or does
2258        * it start scribbling over random memory?)  We play it safe and just
2259        * follow the docs and don't allow CCS_D for arrayed or mip-mapped
2260        * surfaces.
2261        */
2262       if (ISL_GFX_VER(dev) <= 7 &&
2263           (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
2264          return false;
2265 
2266       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
2267        * Target(s)", beneath the "Fast Color Clear" bullet (p326):
2268        *
2269        *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
2270        *       64bpp, and 128bpp.
2271        */
2272       if (isl_format_get_layout(surf->format)->bpb < 32)
2273          return false;
2274 
2275       /* From the Skylake documentation, it is made clear that X-tiling is no
2276        * longer supported:
2277        *
2278        *     - MCS and Lossless compression is supported for
2279        *     TiledY/TileYs/TileYf non-MSRTs only.
2280        */
2281       if (ISL_GFX_VER(dev) >= 9 && !isl_tiling_is_any_y(surf->tiling))
2282          return false;
2283    }
2284 
2285    return true;
2286 }
2287 
2288 bool
isl_surf_get_ccs_surf(const struct isl_device * dev,const struct isl_surf * surf,const struct isl_surf * hiz_or_mcs_surf,struct isl_surf * ccs_surf,uint32_t row_pitch_B)2289 isl_surf_get_ccs_surf(const struct isl_device *dev,
2290                       const struct isl_surf *surf,
2291                       const struct isl_surf *hiz_or_mcs_surf,
2292                       struct isl_surf *ccs_surf,
2293                       uint32_t row_pitch_B)
2294 {
2295    if (!isl_surf_supports_ccs(dev, surf, hiz_or_mcs_surf))
2296       return false;
2297 
2298    if (ISL_GFX_VER(dev) >= 12) {
2299       enum isl_format ccs_format;
2300       switch (isl_format_get_layout(surf->format)->bpb) {
2301       case 8:     ccs_format = ISL_FORMAT_GFX12_CCS_8BPP_Y0;    break;
2302       case 16:    ccs_format = ISL_FORMAT_GFX12_CCS_16BPP_Y0;   break;
2303       case 32:    ccs_format = ISL_FORMAT_GFX12_CCS_32BPP_Y0;   break;
2304       case 64:    ccs_format = ISL_FORMAT_GFX12_CCS_64BPP_Y0;   break;
2305       case 128:   ccs_format = ISL_FORMAT_GFX12_CCS_128BPP_Y0;  break;
2306       default:
2307          return false;
2308       }
2309 
2310       /* On Gfx12, the CCS is a scaled-down version of the main surface. We
2311        * model this as the CCS compressing a 2D-view of the entire surface.
2312        */
2313       const bool ok =
2314          isl_surf_init(dev, ccs_surf,
2315                        .dim = ISL_SURF_DIM_2D,
2316                        .format = ccs_format,
2317                        .width = isl_surf_get_row_pitch_el(surf),
2318                        .height = surf->size_B / surf->row_pitch_B,
2319                        .depth = 1,
2320                        .levels = 1,
2321                        .array_len = 1,
2322                        .samples = 1,
2323                        .row_pitch_B = row_pitch_B,
2324                        .usage = ISL_SURF_USAGE_CCS_BIT,
2325                        .tiling_flags = ISL_TILING_GFX12_CCS_BIT);
2326       assert(!ok || ccs_surf->size_B == surf->size_B / 256);
2327       return ok;
2328    } else {
2329       enum isl_format ccs_format;
2330       if (ISL_GFX_VER(dev) >= 9) {
2331          switch (isl_format_get_layout(surf->format)->bpb) {
2332          case 32:    ccs_format = ISL_FORMAT_GFX9_CCS_32BPP;   break;
2333          case 64:    ccs_format = ISL_FORMAT_GFX9_CCS_64BPP;   break;
2334          case 128:   ccs_format = ISL_FORMAT_GFX9_CCS_128BPP;  break;
2335          default:    unreachable("Unsupported CCS format");
2336             return false;
2337          }
2338       } else if (surf->tiling == ISL_TILING_Y0) {
2339          switch (isl_format_get_layout(surf->format)->bpb) {
2340          case 32:    ccs_format = ISL_FORMAT_GFX7_CCS_32BPP_Y;    break;
2341          case 64:    ccs_format = ISL_FORMAT_GFX7_CCS_64BPP_Y;    break;
2342          case 128:   ccs_format = ISL_FORMAT_GFX7_CCS_128BPP_Y;   break;
2343          default:    unreachable("Unsupported CCS format");
2344          }
2345       } else if (surf->tiling == ISL_TILING_X) {
2346          switch (isl_format_get_layout(surf->format)->bpb) {
2347          case 32:    ccs_format = ISL_FORMAT_GFX7_CCS_32BPP_X;    break;
2348          case 64:    ccs_format = ISL_FORMAT_GFX7_CCS_64BPP_X;    break;
2349          case 128:   ccs_format = ISL_FORMAT_GFX7_CCS_128BPP_X;   break;
2350          default:    unreachable("Unsupported CCS format");
2351          }
2352       } else {
2353          unreachable("Invalid tiling format");
2354       }
2355 
2356       return isl_surf_init(dev, ccs_surf,
2357                            .dim = surf->dim,
2358                            .format = ccs_format,
2359                            .width = surf->logical_level0_px.width,
2360                            .height = surf->logical_level0_px.height,
2361                            .depth = surf->logical_level0_px.depth,
2362                            .levels = surf->levels,
2363                            .array_len = surf->logical_level0_px.array_len,
2364                            .samples = 1,
2365                            .row_pitch_B = row_pitch_B,
2366                            .usage = ISL_SURF_USAGE_CCS_BIT,
2367                            .tiling_flags = ISL_TILING_CCS_BIT);
2368    }
2369 }
2370 
2371 #define isl_genX_call(dev, func, ...)              \
2372    switch (ISL_GFX_VERX10(dev)) {                  \
2373    case 40:                                        \
2374       isl_gfx4_##func(__VA_ARGS__);                \
2375       break;                                       \
2376    case 45:                                        \
2377       /* G45 surface state is the same as gfx5 */  \
2378    case 50:                                        \
2379       isl_gfx5_##func(__VA_ARGS__);                \
2380       break;                                       \
2381    case 60:                                        \
2382       isl_gfx6_##func(__VA_ARGS__);                \
2383       break;                                       \
2384    case 70:                                        \
2385       isl_gfx7_##func(__VA_ARGS__);                \
2386       break;                                       \
2387    case 75:                                        \
2388       isl_gfx75_##func(__VA_ARGS__);               \
2389       break;                                       \
2390    case 80:                                        \
2391       isl_gfx8_##func(__VA_ARGS__);                \
2392       break;                                       \
2393    case 90:                                        \
2394       isl_gfx9_##func(__VA_ARGS__);                \
2395       break;                                       \
2396    case 110:                                       \
2397       isl_gfx11_##func(__VA_ARGS__);               \
2398       break;                                       \
2399    case 120:                                       \
2400       isl_gfx12_##func(__VA_ARGS__);               \
2401       break;                                       \
2402    case 125:                                       \
2403       isl_gfx125_##func(__VA_ARGS__);              \
2404       break;                                       \
2405    default:                                        \
2406       assert(!"Unknown hardware generation");      \
2407    }
2408 
2409 void
isl_surf_fill_state_s(const struct isl_device * dev,void * state,const struct isl_surf_fill_state_info * restrict info)2410 isl_surf_fill_state_s(const struct isl_device *dev, void *state,
2411                       const struct isl_surf_fill_state_info *restrict info)
2412 {
2413 #ifndef NDEBUG
2414    isl_surf_usage_flags_t _base_usage =
2415       info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
2416                            ISL_SURF_USAGE_TEXTURE_BIT |
2417                            ISL_SURF_USAGE_STORAGE_BIT);
2418    /* They may only specify one of the above bits at a time */
2419    assert(__builtin_popcount(_base_usage) == 1);
2420    /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */
2421    assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage);
2422 #endif
2423 
2424    if (info->surf->dim == ISL_SURF_DIM_3D) {
2425       assert(info->view->base_array_layer + info->view->array_len <=
2426              info->surf->logical_level0_px.depth);
2427    } else {
2428       assert(info->view->base_array_layer + info->view->array_len <=
2429              info->surf->logical_level0_px.array_len);
2430    }
2431 
2432    isl_genX_call(dev, surf_fill_state_s, dev, state, info);
2433 }
2434 
2435 void
isl_buffer_fill_state_s(const struct isl_device * dev,void * state,const struct isl_buffer_fill_state_info * restrict info)2436 isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
2437                         const struct isl_buffer_fill_state_info *restrict info)
2438 {
2439    isl_genX_call(dev, buffer_fill_state_s, dev, state, info);
2440 }
2441 
2442 void
isl_null_fill_state_s(const struct isl_device * dev,void * state,const struct isl_null_fill_state_info * restrict info)2443 isl_null_fill_state_s(const struct isl_device *dev, void *state,
2444                       const struct isl_null_fill_state_info *restrict info)
2445 {
2446    isl_genX_call(dev, null_fill_state, state, info);
2447 }
2448 
2449 void
isl_emit_depth_stencil_hiz_s(const struct isl_device * dev,void * batch,const struct isl_depth_stencil_hiz_emit_info * restrict info)2450 isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
2451                              const struct isl_depth_stencil_hiz_emit_info *restrict info)
2452 {
2453    if (info->depth_surf && info->stencil_surf) {
2454       if (!dev->info->has_hiz_and_separate_stencil) {
2455          assert(info->depth_surf == info->stencil_surf);
2456          assert(info->depth_address == info->stencil_address);
2457       }
2458       assert(info->depth_surf->dim == info->stencil_surf->dim);
2459    }
2460 
2461    if (info->depth_surf) {
2462       assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT));
2463       if (info->depth_surf->dim == ISL_SURF_DIM_3D) {
2464          assert(info->view->base_array_layer + info->view->array_len <=
2465                 info->depth_surf->logical_level0_px.depth);
2466       } else {
2467          assert(info->view->base_array_layer + info->view->array_len <=
2468                 info->depth_surf->logical_level0_px.array_len);
2469       }
2470    }
2471 
2472    if (info->stencil_surf) {
2473       assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT));
2474       if (info->stencil_surf->dim == ISL_SURF_DIM_3D) {
2475          assert(info->view->base_array_layer + info->view->array_len <=
2476                 info->stencil_surf->logical_level0_px.depth);
2477       } else {
2478          assert(info->view->base_array_layer + info->view->array_len <=
2479                 info->stencil_surf->logical_level0_px.array_len);
2480       }
2481    }
2482 
2483    isl_genX_call(dev, emit_depth_stencil_hiz_s, dev, batch, info);
2484 }
2485 
2486 /**
2487  * A variant of isl_surf_get_image_offset_sa() specific to
2488  * ISL_DIM_LAYOUT_GFX4_2D.
2489  */
2490 static void
get_image_offset_sa_gfx4_2d(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2491 get_image_offset_sa_gfx4_2d(const struct isl_surf *surf,
2492                             uint32_t level, uint32_t logical_array_layer,
2493                             uint32_t *x_offset_sa,
2494                             uint32_t *y_offset_sa)
2495 {
2496    assert(level < surf->levels);
2497    if (surf->dim == ISL_SURF_DIM_3D)
2498       assert(logical_array_layer < surf->logical_level0_px.depth);
2499    else
2500       assert(logical_array_layer < surf->logical_level0_px.array_len);
2501 
2502    const struct isl_extent3d image_align_sa =
2503       isl_surf_get_image_alignment_sa(surf);
2504 
2505    const uint32_t W0 = surf->phys_level0_sa.width;
2506    const uint32_t H0 = surf->phys_level0_sa.height;
2507 
2508    const uint32_t phys_layer = logical_array_layer *
2509       (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1);
2510 
2511    uint32_t x = 0;
2512    uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
2513 
2514    for (uint32_t l = 0; l < level; ++l) {
2515       if (l == 1) {
2516          uint32_t W = isl_minify(W0, l);
2517          x += isl_align_npot(W, image_align_sa.w);
2518       } else {
2519          uint32_t H = isl_minify(H0, l);
2520          y += isl_align_npot(H, image_align_sa.h);
2521       }
2522    }
2523 
2524    *x_offset_sa = x;
2525    *y_offset_sa = y;
2526 }
2527 
2528 /**
2529  * A variant of isl_surf_get_image_offset_sa() specific to
2530  * ISL_DIM_LAYOUT_GFX4_3D.
2531  */
2532 static void
get_image_offset_sa_gfx4_3d(const struct isl_surf * surf,uint32_t level,uint32_t logical_z_offset_px,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2533 get_image_offset_sa_gfx4_3d(const struct isl_surf *surf,
2534                             uint32_t level, uint32_t logical_z_offset_px,
2535                             uint32_t *x_offset_sa,
2536                             uint32_t *y_offset_sa)
2537 {
2538    assert(level < surf->levels);
2539    if (surf->dim == ISL_SURF_DIM_3D) {
2540       assert(surf->phys_level0_sa.array_len == 1);
2541       assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
2542    } else {
2543       assert(surf->dim == ISL_SURF_DIM_2D);
2544       assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT);
2545       assert(surf->phys_level0_sa.array_len == 6);
2546       assert(logical_z_offset_px < surf->phys_level0_sa.array_len);
2547    }
2548 
2549    const struct isl_extent3d image_align_sa =
2550       isl_surf_get_image_alignment_sa(surf);
2551 
2552    const uint32_t W0 = surf->phys_level0_sa.width;
2553    const uint32_t H0 = surf->phys_level0_sa.height;
2554    const uint32_t D0 = surf->phys_level0_sa.depth;
2555    const uint32_t AL = surf->phys_level0_sa.array_len;
2556 
2557    uint32_t x = 0;
2558    uint32_t y = 0;
2559 
2560    for (uint32_t l = 0; l < level; ++l) {
2561       const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
2562       const uint32_t level_d =
2563          isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL,
2564                         image_align_sa.d);
2565       const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
2566 
2567       y += level_h * max_layers_vert;
2568    }
2569 
2570    const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
2571    const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
2572    const uint32_t level_d =
2573       isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL,
2574                      image_align_sa.d);
2575 
2576    const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
2577 
2578    x += level_w * (logical_z_offset_px % max_layers_horiz);
2579    y += level_h * (logical_z_offset_px / max_layers_horiz);
2580 
2581    *x_offset_sa = x;
2582    *y_offset_sa = y;
2583 }
2584 
2585 static void
get_image_offset_sa_gfx6_stencil_hiz(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2586 get_image_offset_sa_gfx6_stencil_hiz(const struct isl_surf *surf,
2587                                      uint32_t level,
2588                                      uint32_t logical_array_layer,
2589                                      uint32_t *x_offset_sa,
2590                                      uint32_t *y_offset_sa)
2591 {
2592    assert(level < surf->levels);
2593    assert(surf->logical_level0_px.depth == 1);
2594    assert(logical_array_layer < surf->logical_level0_px.array_len);
2595 
2596    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2597 
2598    const struct isl_extent3d image_align_sa =
2599       isl_surf_get_image_alignment_sa(surf);
2600 
2601    struct isl_tile_info tile_info;
2602    isl_surf_get_tile_info(surf, &tile_info);
2603    const struct isl_extent2d tile_extent_sa = {
2604       .w = tile_info.logical_extent_el.w * fmtl->bw,
2605       .h = tile_info.logical_extent_el.h * fmtl->bh,
2606    };
2607    /* Tile size is a multiple of image alignment */
2608    assert(tile_extent_sa.w % image_align_sa.w == 0);
2609    assert(tile_extent_sa.h % image_align_sa.h == 0);
2610 
2611    const uint32_t W0 = surf->phys_level0_sa.w;
2612    const uint32_t H0 = surf->phys_level0_sa.h;
2613 
2614    /* Each image has the same height as LOD0 because the hardware thinks
2615     * everything is LOD0
2616     */
2617    const uint32_t H = isl_align(H0, image_align_sa.h);
2618 
2619    /* Quick sanity check for consistency */
2620    if (surf->phys_level0_sa.array_len > 1)
2621       assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh));
2622 
2623    uint32_t x = 0, y = 0;
2624    for (uint32_t l = 0; l < level; ++l) {
2625       const uint32_t W = isl_minify(W0, l);
2626 
2627       const uint32_t w = isl_align(W, tile_extent_sa.w);
2628       const uint32_t h = isl_align(H * surf->phys_level0_sa.a,
2629                                    tile_extent_sa.h);
2630 
2631       if (l == 0) {
2632          y += h;
2633       } else {
2634          x += w;
2635       }
2636    }
2637 
2638    y += H * logical_array_layer;
2639 
2640    *x_offset_sa = x;
2641    *y_offset_sa = y;
2642 }
2643 
2644 /**
2645  * A variant of isl_surf_get_image_offset_sa() specific to
2646  * ISL_DIM_LAYOUT_GFX9_1D.
2647  */
2648 static void
get_image_offset_sa_gfx9_1d(const struct isl_surf * surf,uint32_t level,uint32_t layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2649 get_image_offset_sa_gfx9_1d(const struct isl_surf *surf,
2650                             uint32_t level, uint32_t layer,
2651                             uint32_t *x_offset_sa,
2652                             uint32_t *y_offset_sa)
2653 {
2654    assert(level < surf->levels);
2655    assert(layer < surf->phys_level0_sa.array_len);
2656    assert(surf->phys_level0_sa.height == 1);
2657    assert(surf->phys_level0_sa.depth == 1);
2658    assert(surf->samples == 1);
2659 
2660    const uint32_t W0 = surf->phys_level0_sa.width;
2661    const struct isl_extent3d image_align_sa =
2662       isl_surf_get_image_alignment_sa(surf);
2663 
2664    uint32_t x = 0;
2665 
2666    for (uint32_t l = 0; l < level; ++l) {
2667       uint32_t W = isl_minify(W0, l);
2668       uint32_t w = isl_align_npot(W, image_align_sa.w);
2669 
2670       x += w;
2671    }
2672 
2673    *x_offset_sa = x;
2674    *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
2675 }
2676 
2677 /**
2678  * Calculate the offset, in units of surface samples, to a subimage in the
2679  * surface.
2680  *
2681  * @invariant level < surface levels
2682  * @invariant logical_array_layer < logical array length of surface
2683  * @invariant logical_z_offset_px < logical depth of surface at level
2684  */
2685 void
isl_surf_get_image_offset_sa(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * x_offset_sa,uint32_t * y_offset_sa,uint32_t * z_offset_sa,uint32_t * array_offset)2686 isl_surf_get_image_offset_sa(const struct isl_surf *surf,
2687                              uint32_t level,
2688                              uint32_t logical_array_layer,
2689                              uint32_t logical_z_offset_px,
2690                              uint32_t *x_offset_sa,
2691                              uint32_t *y_offset_sa,
2692                              uint32_t *z_offset_sa,
2693                              uint32_t *array_offset)
2694 {
2695    assert(level < surf->levels);
2696    assert(logical_array_layer < surf->logical_level0_px.array_len);
2697    assert(logical_z_offset_px
2698           < isl_minify(surf->logical_level0_px.depth, level));
2699 
2700    switch (surf->dim_layout) {
2701    case ISL_DIM_LAYOUT_GFX9_1D:
2702       get_image_offset_sa_gfx9_1d(surf, level, logical_array_layer,
2703                                   x_offset_sa, y_offset_sa);
2704       *z_offset_sa = 0;
2705       *array_offset = 0;
2706       break;
2707    case ISL_DIM_LAYOUT_GFX4_2D:
2708       get_image_offset_sa_gfx4_2d(surf, level, logical_array_layer
2709                                   + logical_z_offset_px,
2710                                   x_offset_sa, y_offset_sa);
2711       *z_offset_sa = 0;
2712       *array_offset = 0;
2713       break;
2714    case ISL_DIM_LAYOUT_GFX4_3D:
2715       get_image_offset_sa_gfx4_3d(surf, level, logical_array_layer +
2716                                   logical_z_offset_px,
2717                                   x_offset_sa, y_offset_sa);
2718       *z_offset_sa = 0;
2719       *array_offset = 0;
2720       break;
2721    case ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ:
2722       get_image_offset_sa_gfx6_stencil_hiz(surf, level, logical_array_layer +
2723                                            logical_z_offset_px,
2724                                            x_offset_sa, y_offset_sa);
2725       *z_offset_sa = 0;
2726       *array_offset = 0;
2727       break;
2728 
2729    default:
2730       unreachable("not reached");
2731    }
2732 }
2733 
2734 void
isl_surf_get_image_offset_el(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * x_offset_el,uint32_t * y_offset_el,uint32_t * z_offset_el,uint32_t * array_offset)2735 isl_surf_get_image_offset_el(const struct isl_surf *surf,
2736                              uint32_t level,
2737                              uint32_t logical_array_layer,
2738                              uint32_t logical_z_offset_px,
2739                              uint32_t *x_offset_el,
2740                              uint32_t *y_offset_el,
2741                              uint32_t *z_offset_el,
2742                              uint32_t *array_offset)
2743 {
2744    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2745 
2746    assert(level < surf->levels);
2747    assert(logical_array_layer < surf->logical_level0_px.array_len);
2748    assert(logical_z_offset_px
2749           < isl_minify(surf->logical_level0_px.depth, level));
2750 
2751    uint32_t x_offset_sa, y_offset_sa, z_offset_sa;
2752    isl_surf_get_image_offset_sa(surf, level,
2753                                 logical_array_layer,
2754                                 logical_z_offset_px,
2755                                 &x_offset_sa,
2756                                 &y_offset_sa,
2757                                 &z_offset_sa,
2758                                 array_offset);
2759 
2760    *x_offset_el = x_offset_sa / fmtl->bw;
2761    *y_offset_el = y_offset_sa / fmtl->bh;
2762    *z_offset_el = z_offset_sa / fmtl->bd;
2763 }
2764 
2765 void
isl_surf_get_image_offset_B_tile_sa(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint64_t * offset_B,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2766 isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf,
2767                                     uint32_t level,
2768                                     uint32_t logical_array_layer,
2769                                     uint32_t logical_z_offset_px,
2770                                     uint64_t *offset_B,
2771                                     uint32_t *x_offset_sa,
2772                                     uint32_t *y_offset_sa)
2773 {
2774    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2775 
2776    uint32_t x_offset_el, y_offset_el;
2777    isl_surf_get_image_offset_B_tile_el(surf, level,
2778                                        logical_array_layer,
2779                                        logical_z_offset_px,
2780                                        offset_B,
2781                                        &x_offset_el,
2782                                        &y_offset_el);
2783 
2784    if (x_offset_sa) {
2785       *x_offset_sa = x_offset_el * fmtl->bw;
2786    } else {
2787       assert(x_offset_el == 0);
2788    }
2789 
2790    if (y_offset_sa) {
2791       *y_offset_sa = y_offset_el * fmtl->bh;
2792    } else {
2793       assert(y_offset_el == 0);
2794    }
2795 }
2796 
2797 void
isl_surf_get_image_offset_B_tile_el(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint64_t * offset_B,uint32_t * x_offset_el,uint32_t * y_offset_el)2798 isl_surf_get_image_offset_B_tile_el(const struct isl_surf *surf,
2799                                     uint32_t level,
2800                                     uint32_t logical_array_layer,
2801                                     uint32_t logical_z_offset_px,
2802                                     uint64_t *offset_B,
2803                                     uint32_t *x_offset_el,
2804                                     uint32_t *y_offset_el)
2805 {
2806    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2807 
2808    uint32_t total_x_offset_el, total_y_offset_el;
2809    uint32_t total_z_offset_el, total_array_offset;
2810    isl_surf_get_image_offset_el(surf, level, logical_array_layer,
2811                                 logical_z_offset_px,
2812                                 &total_x_offset_el,
2813                                 &total_y_offset_el,
2814                                 &total_z_offset_el,
2815                                 &total_array_offset);
2816 
2817    uint32_t z_offset_el, array_offset;
2818    isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
2819                                       surf->msaa_layout, fmtl->bpb,
2820                                       surf->samples,
2821                                       surf->row_pitch_B,
2822                                       surf->array_pitch_el_rows,
2823                                       total_x_offset_el,
2824                                       total_y_offset_el,
2825                                       total_z_offset_el,
2826                                       total_array_offset,
2827                                       offset_B,
2828                                       x_offset_el,
2829                                       y_offset_el,
2830                                       &z_offset_el,
2831                                       &array_offset);
2832    assert(z_offset_el == 0);
2833    assert(array_offset == 0);
2834 }
2835 
2836 void
isl_surf_get_image_range_B_tile(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint64_t * start_tile_B,uint64_t * end_tile_B)2837 isl_surf_get_image_range_B_tile(const struct isl_surf *surf,
2838                                 uint32_t level,
2839                                 uint32_t logical_array_layer,
2840                                 uint32_t logical_z_offset_px,
2841                                 uint64_t *start_tile_B,
2842                                 uint64_t *end_tile_B)
2843 {
2844    uint32_t start_x_offset_el, start_y_offset_el;
2845    uint32_t start_z_offset_el, start_array_slice;
2846    isl_surf_get_image_offset_el(surf, level, logical_array_layer,
2847                                 logical_z_offset_px,
2848                                 &start_x_offset_el,
2849                                 &start_y_offset_el,
2850                                 &start_z_offset_el,
2851                                 &start_array_slice);
2852 
2853    /* Compute the size of the subimage in surface elements */
2854    const uint32_t subimage_w_sa = isl_minify(surf->phys_level0_sa.w, level);
2855    const uint32_t subimage_h_sa = isl_minify(surf->phys_level0_sa.h, level);
2856    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2857    const uint32_t subimage_w_el = isl_align_div_npot(subimage_w_sa, fmtl->bw);
2858    const uint32_t subimage_h_el = isl_align_div_npot(subimage_h_sa, fmtl->bh);
2859 
2860    /* Find the last pixel */
2861    uint32_t end_x_offset_el = start_x_offset_el + subimage_w_el - 1;
2862    uint32_t end_y_offset_el = start_y_offset_el + subimage_h_el - 1;
2863 
2864    /* We only consider one Z or array slice */
2865    const uint32_t end_z_offset_el = start_z_offset_el;
2866    const uint32_t end_array_slice = start_array_slice;
2867 
2868    UNUSED uint32_t x_offset_el, y_offset_el, z_offset_el, array_slice;
2869    isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
2870                                       surf->msaa_layout, fmtl->bpb,
2871                                       surf->samples,
2872                                       surf->row_pitch_B,
2873                                       surf->array_pitch_el_rows,
2874                                       start_x_offset_el,
2875                                       start_y_offset_el,
2876                                       start_z_offset_el,
2877                                       start_array_slice,
2878                                       start_tile_B,
2879                                       &x_offset_el,
2880                                       &y_offset_el,
2881                                       &z_offset_el,
2882                                       &array_slice);
2883 
2884    isl_tiling_get_intratile_offset_el(surf->tiling, surf->dim,
2885                                       surf->msaa_layout, fmtl->bpb,
2886                                       surf->samples,
2887                                       surf->row_pitch_B,
2888                                       surf->array_pitch_el_rows,
2889                                       end_x_offset_el,
2890                                       end_y_offset_el,
2891                                       end_z_offset_el,
2892                                       end_array_slice,
2893                                       end_tile_B,
2894                                       &x_offset_el,
2895                                       &y_offset_el,
2896                                       &z_offset_el,
2897                                       &array_slice);
2898 
2899    /* We want the range we return to be exclusive but the tile containing the
2900     * last pixel (what we just calculated) is inclusive.  Add one.
2901     */
2902    (*end_tile_B)++;
2903 
2904    assert(*end_tile_B <= surf->size_B);
2905 }
2906 
2907 void
isl_surf_get_image_surf(const struct isl_device * dev,const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,struct isl_surf * image_surf,uint64_t * offset_B,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2908 isl_surf_get_image_surf(const struct isl_device *dev,
2909                         const struct isl_surf *surf,
2910                         uint32_t level,
2911                         uint32_t logical_array_layer,
2912                         uint32_t logical_z_offset_px,
2913                         struct isl_surf *image_surf,
2914                         uint64_t *offset_B,
2915                         uint32_t *x_offset_sa,
2916                         uint32_t *y_offset_sa)
2917 {
2918    isl_surf_get_image_offset_B_tile_sa(surf,
2919                                        level,
2920                                        logical_array_layer,
2921                                        logical_z_offset_px,
2922                                        offset_B,
2923                                        x_offset_sa,
2924                                        y_offset_sa);
2925 
2926    /* Even for cube maps there will be only single face, therefore drop the
2927     * corresponding flag if present.
2928     */
2929    const isl_surf_usage_flags_t usage =
2930       surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
2931 
2932    bool ok UNUSED;
2933    ok = isl_surf_init(dev, image_surf,
2934                       .dim = ISL_SURF_DIM_2D,
2935                       .format = surf->format,
2936                       .width = isl_minify(surf->logical_level0_px.w, level),
2937                       .height = isl_minify(surf->logical_level0_px.h, level),
2938                       .depth = 1,
2939                       .levels = 1,
2940                       .array_len = 1,
2941                       .samples = surf->samples,
2942                       .row_pitch_B = surf->row_pitch_B,
2943                       .usage = usage,
2944                       .tiling_flags = (1 << surf->tiling));
2945    assert(ok);
2946 }
2947 
2948 bool
isl_surf_get_uncompressed_surf(const struct isl_device * dev,const struct isl_surf * surf,const struct isl_view * view,struct isl_surf * ucompr_surf,struct isl_view * ucompr_view,uint64_t * offset_B,uint32_t * x_offset_el,uint32_t * y_offset_el)2949 isl_surf_get_uncompressed_surf(const struct isl_device *dev,
2950                                const struct isl_surf *surf,
2951                                const struct isl_view *view,
2952                                struct isl_surf *ucompr_surf,
2953                                struct isl_view *ucompr_view,
2954                                uint64_t *offset_B,
2955                                uint32_t *x_offset_el,
2956                                uint32_t *y_offset_el)
2957 {
2958    const struct isl_format_layout *fmtl =
2959       isl_format_get_layout(surf->format);
2960    const enum isl_format view_format = view->format;
2961 
2962    assert(fmtl->bw > 1 || fmtl->bh > 1 || fmtl->bd > 1);
2963    assert(isl_format_is_compressed(surf->format));
2964    assert(!isl_format_is_compressed(view->format));
2965    assert(isl_format_get_layout(view->format)->bpb == fmtl->bpb);
2966    assert(view->levels == 1);
2967 
2968    const uint32_t view_width_px =
2969       isl_minify(surf->logical_level0_px.width, view->base_level);
2970    const uint32_t view_height_px =
2971       isl_minify(surf->logical_level0_px.height, view->base_level);
2972 
2973    assert(surf->samples == 1);
2974    const uint32_t view_width_el = isl_align_div_npot(view_width_px, fmtl->bw);
2975    const uint32_t view_height_el = isl_align_div_npot(view_height_px, fmtl->bh);
2976 
2977    /* If we ever enable 3D block formats, we'll need to re-think this */
2978    assert(fmtl->bd == 1);
2979 
2980    if (view->array_len > 1) {
2981       /* The Skylake PRM Vol. 2d, "RENDER_SURFACE_STATE::X Offset" says:
2982        *
2983        *    "If Surface Array is enabled, this field must be zero."
2984        *
2985        * The PRMs for other hardware have similar text.  This is also tricky
2986        * to handle with things like BLORP's SW offsetting because the
2987        * increased surface size required for the offset may result in an image
2988        * height greater than qpitch.
2989        */
2990       if (view->base_level > 0)
2991          return false;
2992 
2993       /* On Haswell and earlier, RENDER_SURFACE_STATE doesn't have a QPitch
2994        * field; it only has "array pitch span" which means the QPitch is
2995        * automatically calculated.  Since we're smashing the surface format
2996        * (block formats are subtly different) and the number of miplevels,
2997        * that calculation will get thrown off.  This means we can't do arrays
2998        * even at LOD0
2999        *
3000        * On Broadwell, we do have a QPitch field which we can control.
3001        * However, HALIGN and VALIGN are specified in pixels and are
3002        * hard-coded to align to exactly the block size of the compressed
3003        * texture.  This means that, when reinterpreted as a non-compressed
3004        * the QPitch may be anything but the HW requires it to be properly
3005        * aligned.
3006        */
3007       if (ISL_GFX_VER(dev) < 9)
3008          return false;
3009 
3010       *ucompr_surf = *surf;
3011       ucompr_surf->levels = 1;
3012       ucompr_surf->format = view_format;
3013 
3014       /* We're making an uncompressed view here.  The image dimensions
3015        * need to be scaled down by the block size.
3016        */
3017       assert(ucompr_surf->logical_level0_px.width == view_width_px);
3018       assert(ucompr_surf->logical_level0_px.height == view_height_px);
3019       ucompr_surf->logical_level0_px.width = view_width_el;
3020       ucompr_surf->logical_level0_px.height = view_height_el;
3021       ucompr_surf->phys_level0_sa = isl_surf_get_phys_level0_el(surf);
3022 
3023       /* The surface mostly stays as-is; there is no offset */
3024       *offset_B = 0;
3025       *x_offset_el = 0;
3026       *y_offset_el = 0;
3027 
3028       /* The view remains the same */
3029       *ucompr_view = *view;
3030    } else {
3031       /* If only one array slice is requested, directly offset to that slice.
3032        * We could, in theory, still use arrays in some cases but BLORP isn't
3033        * prepared for this and everyone who calls this function should be
3034        * prepared to handle an X/Y offset.
3035        */
3036       isl_surf_get_image_offset_B_tile_el(surf,
3037                                           view->base_level,
3038                                           surf->dim == ISL_SURF_DIM_3D ?
3039                                              0 : view->base_array_layer,
3040                                           surf->dim == ISL_SURF_DIM_3D ?
3041                                              view->base_array_layer : 0,
3042                                           offset_B,
3043                                           x_offset_el,
3044                                           y_offset_el);
3045 
3046       /* Even for cube maps there will be only single face, therefore drop the
3047        * corresponding flag if present.
3048        */
3049       const isl_surf_usage_flags_t usage =
3050          surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
3051 
3052       bool ok UNUSED;
3053       ok = isl_surf_init(dev, ucompr_surf,
3054                          .dim = ISL_SURF_DIM_2D,
3055                          .format = view_format,
3056                          .width = view_width_el,
3057                          .height = view_height_el,
3058                          .depth = 1,
3059                          .levels = 1,
3060                          .array_len = 1,
3061                          .samples = 1,
3062                          .row_pitch_B = surf->row_pitch_B,
3063                          .usage = usage,
3064                          .tiling_flags = (1 << surf->tiling));
3065       assert(ok);
3066 
3067       /* The newly created image represents the one subimage we're
3068        * referencing with this view so it only has one array slice and
3069        * miplevel.
3070        */
3071       *ucompr_view = *view;
3072       ucompr_view->base_array_layer = 0;
3073       ucompr_view->base_level = 0;
3074    }
3075 
3076    return true;
3077 }
3078 
3079 void
isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,enum isl_surf_dim dim,enum isl_msaa_layout msaa_layout,uint32_t bpb,uint32_t samples,uint32_t row_pitch_B,uint32_t array_pitch_el_rows,uint32_t total_x_offset_el,uint32_t total_y_offset_el,uint32_t total_z_offset_el,uint32_t total_array_offset,uint64_t * tile_offset_B,uint32_t * x_offset_el,uint32_t * y_offset_el,uint32_t * z_offset_el,uint32_t * array_offset)3080 isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
3081                                    enum isl_surf_dim dim,
3082                                    enum isl_msaa_layout msaa_layout,
3083                                    uint32_t bpb,
3084                                    uint32_t samples,
3085                                    uint32_t row_pitch_B,
3086                                    uint32_t array_pitch_el_rows,
3087                                    uint32_t total_x_offset_el,
3088                                    uint32_t total_y_offset_el,
3089                                    uint32_t total_z_offset_el,
3090                                    uint32_t total_array_offset,
3091                                    uint64_t *tile_offset_B,
3092                                    uint32_t *x_offset_el,
3093                                    uint32_t *y_offset_el,
3094                                    uint32_t *z_offset_el,
3095                                    uint32_t *array_offset)
3096 {
3097    if (tiling == ISL_TILING_LINEAR) {
3098       assert(bpb % 8 == 0);
3099       assert(samples == 1);
3100       assert(total_z_offset_el == 0 && total_array_offset == 0);
3101       *tile_offset_B = (uint64_t)total_y_offset_el * row_pitch_B +
3102                        (uint64_t)total_x_offset_el * (bpb / 8);
3103       *x_offset_el = 0;
3104       *y_offset_el = 0;
3105       *z_offset_el = 0;
3106       *array_offset = 0;
3107       return;
3108    }
3109 
3110    struct isl_tile_info tile_info;
3111    isl_tiling_get_info(tiling, dim, msaa_layout, bpb, samples, &tile_info);
3112 
3113    /* Pitches must make sense with the tiling */
3114    assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
3115    if (tile_info.logical_extent_el.d > 1 || tile_info.logical_extent_el.a > 1)
3116       assert(array_pitch_el_rows % tile_info.logical_extent_el.h == 0);
3117 
3118    /* For non-power-of-two formats, we need the address to be both tile and
3119     * element-aligned.  The easiest way to achieve this is to work with a tile
3120     * that is three times as wide as the regular tile.
3121     *
3122     * The tile info returned by get_tile_info has a logical size that is an
3123     * integer number of tile_info.format_bpb size elements.  To scale the
3124     * tile, we scale up the physical width and then treat the logical tile
3125     * size as if it has bpb size elements.
3126     */
3127    const uint32_t tile_el_scale = bpb / tile_info.format_bpb;
3128    tile_info.phys_extent_B.width *= tile_el_scale;
3129 
3130    /* Compute the offset into the tile */
3131    *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
3132    *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h;
3133    *z_offset_el = total_z_offset_el % tile_info.logical_extent_el.d;
3134    *array_offset = total_array_offset % tile_info.logical_extent_el.a;
3135 
3136    /* Compute the offset of the tile in units of whole tiles */
3137    uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
3138    uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
3139    uint32_t z_offset_tl = total_z_offset_el / tile_info.logical_extent_el.d;
3140    uint32_t a_offset_tl = total_array_offset / tile_info.logical_extent_el.a;
3141 
3142    /* Compute an array pitch in number of tiles */
3143    uint32_t array_pitch_tl_rows =
3144       array_pitch_el_rows / tile_info.logical_extent_el.h;
3145 
3146    /* Add the Z and array offset to the Y offset to get a 2D offset */
3147    y_offset_tl += (z_offset_tl + a_offset_tl) * array_pitch_tl_rows;
3148 
3149    *tile_offset_B =
3150       (uint64_t)y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B +
3151       (uint64_t)x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w;
3152 }
3153 
3154 uint32_t
isl_surf_get_depth_format(const struct isl_device * dev,const struct isl_surf * surf)3155 isl_surf_get_depth_format(const struct isl_device *dev,
3156                           const struct isl_surf *surf)
3157 {
3158    /* Support for separate stencil buffers began in gfx5. Support for
3159     * interleaved depthstencil buffers ceased in gfx7. The intermediate gens,
3160     * those that supported separate and interleaved stencil, were gfx5 and
3161     * gfx6.
3162     *
3163     * For a list of all available formats, see the Sandybridge PRM >> Volume
3164     * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
3165     * Format (p321).
3166     */
3167 
3168    bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT;
3169 
3170    assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT);
3171 
3172    if (has_stencil)
3173       assert(ISL_GFX_VER(dev) < 7);
3174 
3175    switch (surf->format) {
3176    default:
3177       unreachable("bad isl depth format");
3178    case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
3179       assert(ISL_GFX_VER(dev) < 7);
3180       return 0; /* D32_FLOAT_S8X24_UINT */
3181    case ISL_FORMAT_R32_FLOAT:
3182       assert(!has_stencil);
3183       return 1; /* D32_FLOAT */
3184    case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
3185       if (has_stencil) {
3186          assert(ISL_GFX_VER(dev) < 7);
3187          return 2; /* D24_UNORM_S8_UINT */
3188       } else {
3189          assert(ISL_GFX_VER(dev) >= 5);
3190          return 3; /* D24_UNORM_X8_UINT */
3191       }
3192    case ISL_FORMAT_R16_UNORM:
3193       assert(!has_stencil);
3194       return 5; /* D16_UNORM */
3195    }
3196 }
3197 
3198 bool
isl_swizzle_supports_rendering(const struct intel_device_info * devinfo,struct isl_swizzle swizzle)3199 isl_swizzle_supports_rendering(const struct intel_device_info *devinfo,
3200                                struct isl_swizzle swizzle)
3201 {
3202    if (devinfo->is_haswell) {
3203       /* From the Haswell PRM,
3204        * RENDER_SURFACE_STATE::Shader Channel Select Red
3205        *
3206        *    "The Shader channel selects also define which shader channels are
3207        *    written to which surface channel. If the Shader channel select is
3208        *    SCS_ZERO or SCS_ONE then it is not written to the surface. If the
3209        *    shader channel select is SCS_RED it is written to the surface red
3210        *    channel and so on. If more than one shader channel select is set
3211        *    to the same surface channel only the first shader channel in RGBA
3212        *    order will be written."
3213        */
3214       return true;
3215    } else if (devinfo->ver <= 7) {
3216       /* Ivy Bridge and early doesn't have any swizzling */
3217       return isl_swizzle_is_identity(swizzle);
3218    } else {
3219       /* From the Sky Lake PRM Vol. 2d,
3220        * RENDER_SURFACE_STATE::Shader Channel Select Red
3221        *
3222        *    "For Render Target, Red, Green and Blue Shader Channel Selects
3223        *    MUST be such that only valid components can be swapped i.e. only
3224        *    change the order of components in the pixel. Any other values for
3225        *    these Shader Channel Select fields are not valid for Render
3226        *    Targets. This also means that there MUST not be multiple shader
3227        *    channels mapped to the same RT channel."
3228        *
3229        * From the Sky Lake PRM Vol. 2d,
3230        * RENDER_SURFACE_STATE::Shader Channel Select Alpha
3231        *
3232        *    "For Render Target, this field MUST be programmed to
3233        *    value = SCS_ALPHA."
3234        */
3235       return (swizzle.r == ISL_CHANNEL_SELECT_RED ||
3236               swizzle.r == ISL_CHANNEL_SELECT_GREEN ||
3237               swizzle.r == ISL_CHANNEL_SELECT_BLUE) &&
3238              (swizzle.g == ISL_CHANNEL_SELECT_RED ||
3239               swizzle.g == ISL_CHANNEL_SELECT_GREEN ||
3240               swizzle.g == ISL_CHANNEL_SELECT_BLUE) &&
3241              (swizzle.b == ISL_CHANNEL_SELECT_RED ||
3242               swizzle.b == ISL_CHANNEL_SELECT_GREEN ||
3243               swizzle.b == ISL_CHANNEL_SELECT_BLUE) &&
3244              swizzle.r != swizzle.g &&
3245              swizzle.r != swizzle.b &&
3246              swizzle.g != swizzle.b &&
3247              swizzle.a == ISL_CHANNEL_SELECT_ALPHA;
3248    }
3249 }
3250 
3251 static enum isl_channel_select
swizzle_select(enum isl_channel_select chan,struct isl_swizzle swizzle)3252 swizzle_select(enum isl_channel_select chan, struct isl_swizzle swizzle)
3253 {
3254    switch (chan) {
3255    case ISL_CHANNEL_SELECT_ZERO:
3256    case ISL_CHANNEL_SELECT_ONE:
3257       return chan;
3258    case ISL_CHANNEL_SELECT_RED:
3259       return swizzle.r;
3260    case ISL_CHANNEL_SELECT_GREEN:
3261       return swizzle.g;
3262    case ISL_CHANNEL_SELECT_BLUE:
3263       return swizzle.b;
3264    case ISL_CHANNEL_SELECT_ALPHA:
3265       return swizzle.a;
3266    default:
3267       unreachable("Invalid swizzle component");
3268    }
3269 }
3270 
3271 /**
3272  * Returns the single swizzle that is equivalent to applying the two given
3273  * swizzles in sequence.
3274  */
3275 struct isl_swizzle
isl_swizzle_compose(struct isl_swizzle first,struct isl_swizzle second)3276 isl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second)
3277 {
3278    return (struct isl_swizzle) {
3279       .r = swizzle_select(first.r, second),
3280       .g = swizzle_select(first.g, second),
3281       .b = swizzle_select(first.b, second),
3282       .a = swizzle_select(first.a, second),
3283    };
3284 }
3285 
3286 /**
3287  * Returns a swizzle that is the pseudo-inverse of this swizzle.
3288  */
3289 struct isl_swizzle
isl_swizzle_invert(struct isl_swizzle swizzle)3290 isl_swizzle_invert(struct isl_swizzle swizzle)
3291 {
3292    /* Default to zero for channels which do not show up in the swizzle */
3293    enum isl_channel_select chans[4] = {
3294       ISL_CHANNEL_SELECT_ZERO,
3295       ISL_CHANNEL_SELECT_ZERO,
3296       ISL_CHANNEL_SELECT_ZERO,
3297       ISL_CHANNEL_SELECT_ZERO,
3298    };
3299 
3300    /* We go in ABGR order so that, if there are any duplicates, the first one
3301     * is taken if you look at it in RGBA order.  This is what Haswell hardware
3302     * does for render target swizzles.
3303     */
3304    if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
3305       chans[swizzle.a - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_ALPHA;
3306    if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
3307       chans[swizzle.b - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_BLUE;
3308    if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
3309       chans[swizzle.g - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_GREEN;
3310    if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
3311       chans[swizzle.r - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_RED;
3312 
3313    return (struct isl_swizzle) { chans[0], chans[1], chans[2], chans[3] };
3314 }
3315 
3316 /** Applies an inverse swizzle to a color value */
3317 union isl_color_value
isl_color_value_swizzle_inv(union isl_color_value src,struct isl_swizzle swizzle)3318 isl_color_value_swizzle_inv(union isl_color_value src,
3319                             struct isl_swizzle swizzle)
3320 {
3321    union isl_color_value dst = { .u32 = { 0, } };
3322 
3323    /* We assign colors in ABGR order so that the first one will be taken in
3324     * RGBA precedence order.  According to the PRM docs for shader channel
3325     * select, this matches Haswell hardware behavior.
3326     */
3327    if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
3328       dst.u32[swizzle.a - ISL_CHANNEL_SELECT_RED] = src.u32[3];
3329    if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
3330       dst.u32[swizzle.b - ISL_CHANNEL_SELECT_RED] = src.u32[2];
3331    if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
3332       dst.u32[swizzle.g - ISL_CHANNEL_SELECT_RED] = src.u32[1];
3333    if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
3334       dst.u32[swizzle.r - ISL_CHANNEL_SELECT_RED] = src.u32[0];
3335 
3336    return dst;
3337 }
3338 
3339 uint8_t
isl_format_get_aux_map_encoding(enum isl_format format)3340 isl_format_get_aux_map_encoding(enum isl_format format)
3341 {
3342    switch(format) {
3343    case ISL_FORMAT_R32G32B32A32_FLOAT: return 0x11;
3344    case ISL_FORMAT_R32G32B32X32_FLOAT: return 0x11;
3345    case ISL_FORMAT_R32G32B32A32_SINT: return 0x12;
3346    case ISL_FORMAT_R32G32B32A32_UINT: return 0x13;
3347    case ISL_FORMAT_R16G16B16A16_UNORM: return 0x14;
3348    case ISL_FORMAT_R16G16B16A16_SNORM: return 0x15;
3349    case ISL_FORMAT_R16G16B16A16_SINT: return 0x16;
3350    case ISL_FORMAT_R16G16B16A16_UINT: return 0x17;
3351    case ISL_FORMAT_R16G16B16A16_FLOAT: return 0x10;
3352    case ISL_FORMAT_R16G16B16X16_FLOAT: return 0x10;
3353    case ISL_FORMAT_R32G32_FLOAT: return 0x11;
3354    case ISL_FORMAT_R32G32_SINT: return 0x12;
3355    case ISL_FORMAT_R32G32_UINT: return 0x13;
3356    case ISL_FORMAT_B8G8R8A8_UNORM: return 0xA;
3357    case ISL_FORMAT_B8G8R8X8_UNORM: return 0xA;
3358    case ISL_FORMAT_B8G8R8A8_UNORM_SRGB: return 0xA;
3359    case ISL_FORMAT_B8G8R8X8_UNORM_SRGB: return 0xA;
3360    case ISL_FORMAT_R10G10B10A2_UNORM: return 0x18;
3361    case ISL_FORMAT_R10G10B10A2_UNORM_SRGB: return 0x18;
3362    case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM: return 0x19;
3363    case ISL_FORMAT_R10G10B10A2_UINT: return 0x1A;
3364    case ISL_FORMAT_R8G8B8A8_UNORM: return 0xA;
3365    case ISL_FORMAT_R8G8B8A8_UNORM_SRGB: return 0xA;
3366    case ISL_FORMAT_R8G8B8A8_SNORM: return 0x1B;
3367    case ISL_FORMAT_R8G8B8A8_SINT: return 0x1C;
3368    case ISL_FORMAT_R8G8B8A8_UINT: return 0x1D;
3369    case ISL_FORMAT_R16G16_UNORM: return 0x14;
3370    case ISL_FORMAT_R16G16_SNORM: return 0x15;
3371    case ISL_FORMAT_R16G16_SINT: return 0x16;
3372    case ISL_FORMAT_R16G16_UINT: return 0x17;
3373    case ISL_FORMAT_R16G16_FLOAT: return 0x10;
3374    case ISL_FORMAT_B10G10R10A2_UNORM: return 0x18;
3375    case ISL_FORMAT_B10G10R10A2_UNORM_SRGB: return 0x18;
3376    case ISL_FORMAT_R11G11B10_FLOAT: return 0x1E;
3377    case ISL_FORMAT_R32_SINT: return 0x12;
3378    case ISL_FORMAT_R32_UINT: return 0x13;
3379    case ISL_FORMAT_R32_FLOAT: return 0x11;
3380    case ISL_FORMAT_R24_UNORM_X8_TYPELESS: return 0x13;
3381    case ISL_FORMAT_B5G6R5_UNORM: return 0xA;
3382    case ISL_FORMAT_B5G6R5_UNORM_SRGB: return 0xA;
3383    case ISL_FORMAT_B5G5R5A1_UNORM: return 0xA;
3384    case ISL_FORMAT_B5G5R5A1_UNORM_SRGB: return 0xA;
3385    case ISL_FORMAT_B4G4R4A4_UNORM: return 0xA;
3386    case ISL_FORMAT_B4G4R4A4_UNORM_SRGB: return 0xA;
3387    case ISL_FORMAT_R8G8_UNORM: return 0xA;
3388    case ISL_FORMAT_R8G8_SNORM: return 0x1B;
3389    case ISL_FORMAT_R8G8_SINT: return 0x1C;
3390    case ISL_FORMAT_R8G8_UINT: return 0x1D;
3391    case ISL_FORMAT_R16_UNORM: return 0x14;
3392    case ISL_FORMAT_R16_SNORM: return 0x15;
3393    case ISL_FORMAT_R16_SINT: return 0x16;
3394    case ISL_FORMAT_R16_UINT: return 0x17;
3395    case ISL_FORMAT_R16_FLOAT: return 0x10;
3396    case ISL_FORMAT_B5G5R5X1_UNORM: return 0xA;
3397    case ISL_FORMAT_B5G5R5X1_UNORM_SRGB: return 0xA;
3398    case ISL_FORMAT_A1B5G5R5_UNORM: return 0xA;
3399    case ISL_FORMAT_A4B4G4R4_UNORM: return 0xA;
3400    case ISL_FORMAT_R8_UNORM: return 0xA;
3401    case ISL_FORMAT_R8_SNORM: return 0x1B;
3402    case ISL_FORMAT_R8_SINT: return 0x1C;
3403    case ISL_FORMAT_R8_UINT: return 0x1D;
3404    case ISL_FORMAT_A8_UNORM: return 0xA;
3405    case ISL_FORMAT_PLANAR_420_8: return 0xF;
3406    case ISL_FORMAT_PLANAR_420_10: return 0x7;
3407    case ISL_FORMAT_PLANAR_420_12: return 0x8;
3408    case ISL_FORMAT_PLANAR_420_16: return 0x8;
3409    case ISL_FORMAT_YCRCB_NORMAL: return 0x3;
3410    case ISL_FORMAT_YCRCB_SWAPY: return 0xB;
3411    default:
3412       unreachable("Unsupported aux-map format!");
3413       return 0;
3414    }
3415 }
3416 
3417 /*
3418  * Returns compression format encoding for Unified Lossless Compression
3419  */
3420 uint8_t
isl_get_render_compression_format(enum isl_format format)3421 isl_get_render_compression_format(enum isl_format format)
3422 {
3423    /* From the Bspec, Enumeration_RenderCompressionFormat section (53726): */
3424    switch(format) {
3425    case ISL_FORMAT_R32G32B32A32_FLOAT:
3426    case ISL_FORMAT_R32G32B32X32_FLOAT:
3427    case ISL_FORMAT_R32G32B32A32_SINT:
3428       return 0x0;
3429    case ISL_FORMAT_R32G32B32A32_UINT:
3430       return 0x1;
3431    case ISL_FORMAT_R32G32_FLOAT:
3432    case ISL_FORMAT_R32G32_SINT:
3433       return 0x2;
3434    case ISL_FORMAT_R32G32_UINT:
3435       return 0x3;
3436    case ISL_FORMAT_R16G16B16A16_UNORM:
3437    case ISL_FORMAT_R16G16B16X16_UNORM:
3438    case ISL_FORMAT_R16G16B16A16_UINT:
3439       return 0x4;
3440    case ISL_FORMAT_R16G16B16A16_SNORM:
3441    case ISL_FORMAT_R16G16B16A16_SINT:
3442    case ISL_FORMAT_R16G16B16A16_FLOAT:
3443    case ISL_FORMAT_R16G16B16X16_FLOAT:
3444       return 0x5;
3445    case ISL_FORMAT_R16G16_UNORM:
3446    case ISL_FORMAT_R16G16_UINT:
3447       return 0x6;
3448    case ISL_FORMAT_R16G16_SNORM:
3449    case ISL_FORMAT_R16G16_SINT:
3450    case ISL_FORMAT_R16G16_FLOAT:
3451       return 0x7;
3452    case ISL_FORMAT_B8G8R8A8_UNORM:
3453    case ISL_FORMAT_B8G8R8X8_UNORM:
3454    case ISL_FORMAT_B8G8R8A8_UNORM_SRGB:
3455    case ISL_FORMAT_B8G8R8X8_UNORM_SRGB:
3456    case ISL_FORMAT_R8G8B8A8_UNORM:
3457    case ISL_FORMAT_R8G8B8X8_UNORM:
3458    case ISL_FORMAT_R8G8B8A8_UNORM_SRGB:
3459    case ISL_FORMAT_R8G8B8X8_UNORM_SRGB:
3460    case ISL_FORMAT_R8G8B8A8_UINT:
3461       return 0x8;
3462    case ISL_FORMAT_R8G8B8A8_SNORM:
3463    case ISL_FORMAT_R8G8B8A8_SINT:
3464       return 0x9;
3465    case ISL_FORMAT_B5G6R5_UNORM:
3466    case ISL_FORMAT_B5G6R5_UNORM_SRGB:
3467    case ISL_FORMAT_B5G5R5A1_UNORM:
3468    case ISL_FORMAT_B5G5R5A1_UNORM_SRGB:
3469    case ISL_FORMAT_B4G4R4A4_UNORM:
3470    case ISL_FORMAT_B4G4R4A4_UNORM_SRGB:
3471    case ISL_FORMAT_B5G5R5X1_UNORM:
3472    case ISL_FORMAT_B5G5R5X1_UNORM_SRGB:
3473    case ISL_FORMAT_A1B5G5R5_UNORM:
3474    case ISL_FORMAT_A4B4G4R4_UNORM:
3475    case ISL_FORMAT_R8G8_UNORM:
3476    case ISL_FORMAT_R8G8_UINT:
3477       return 0xA;
3478    case ISL_FORMAT_R8G8_SNORM:
3479    case ISL_FORMAT_R8G8_SINT:
3480       return 0xB;
3481    case ISL_FORMAT_R10G10B10A2_UNORM:
3482    case ISL_FORMAT_R10G10B10A2_UNORM_SRGB:
3483    case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM:
3484    case ISL_FORMAT_R10G10B10A2_UINT:
3485    case ISL_FORMAT_B10G10R10A2_UNORM:
3486    case ISL_FORMAT_B10G10R10X2_UNORM:
3487    case ISL_FORMAT_B10G10R10A2_UNORM_SRGB:
3488       return 0xC;
3489    case ISL_FORMAT_R11G11B10_FLOAT:
3490       return 0xD;
3491    case ISL_FORMAT_R32_SINT:
3492    case ISL_FORMAT_R32_FLOAT:
3493       return 0x10;
3494    case ISL_FORMAT_R32_UINT:
3495    case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
3496       return 0x11;
3497    case ISL_FORMAT_R16_UNORM:
3498    case ISL_FORMAT_R16_UINT:
3499       return 0x14;
3500    case ISL_FORMAT_R16_SNORM:
3501    case ISL_FORMAT_R16_SINT:
3502    case ISL_FORMAT_R16_FLOAT:
3503       return 0x15;
3504    case ISL_FORMAT_R8_UNORM:
3505    case ISL_FORMAT_R8_UINT:
3506    case ISL_FORMAT_A8_UNORM:
3507       return 0x18;
3508    case ISL_FORMAT_R8_SNORM:
3509    case ISL_FORMAT_R8_SINT:
3510       return 0x19;
3511    default:
3512       unreachable("Unsupported render compression format!");
3513       return 0;
3514    }
3515 }
3516