• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ************************************************************************************************************************
3 *
4 *  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE
23 *
24 ***********************************************************************************************************************/
25 
26 /**
27 ************************************************************************************************************************
28 * @file  gfx11addrlib.cpp
29 * @brief Contain the implementation for the Gfx11Lib class.
30 ************************************************************************************************************************
31 */
32 
33 #include "gfx11addrlib.h"
34 #include "gfx11_gb_reg.h"
35 
36 #include "amdgpu_asic_addr.h"
37 
38 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40 
41 namespace Addr
42 {
43 /**
44 ************************************************************************************************************************
45 *   Gfx11HwlInit
46 *
47 *   @brief
48 *       Creates an Gfx11Lib object.
49 *
50 *   @return
51 *       Returns an Gfx11Lib object pointer.
52 ************************************************************************************************************************
53 */
Gfx11HwlInit(const Client * pClient)54 Addr::Lib* Gfx11HwlInit(const Client* pClient)
55 {
56     return V2::Gfx11Lib::CreateObj(pClient);
57 }
58 
59 namespace V2
60 {
61 
62 ////////////////////////////////////////////////////////////////////////////////////////////////////
63 //                               Static Const Member
64 ////////////////////////////////////////////////////////////////////////////////////////////////////
65 
66 const SwizzleModeFlags Gfx11Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
67 {//Linear 256B  4KB  64KB  256KB   Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
68     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR
69     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
70     {{0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_D
71     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
72 
73     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
74     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_S
75     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_D
76     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
77 
78     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
79     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_S
80     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_D
81     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
82 
83     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
84     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
85     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
86     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
87 
88     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
89     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_S_T
90     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_D_T
91     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
92 
93     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
94     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_S_X
95     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_D_X
96     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
97 
98     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_Z_X
99     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_S_X
100     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_D_X
101     {{0,    0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_64KB_R_X
102 
103     {{0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_256KB_Z_X
104     {{0,    0,    0,    0,    1,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_256KB_S_X
105     {{0,    0,    0,    0,    1,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_256KB_D_X
106     {{0,    0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_256KB_R_X
107     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR_GENERAL
108 };
109 
110 const Dim3d Gfx11Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
111 
112 const Dim3d Gfx11Lib::Block256K_Log2_3d[] = {{6, 6, 6}, {5, 6, 6}, {5, 6, 5}, {5, 5, 5}, {4, 5, 5}};
113 const Dim3d Gfx11Lib::Block64K_Log2_3d[]  = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114 const Dim3d Gfx11Lib::Block4K_Log2_3d[]   = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
115 
116 /**
117 ************************************************************************************************************************
118 *   Gfx11Lib::Gfx11Lib
119 *
120 *   @brief
121 *       Constructor
122 *
123 ************************************************************************************************************************
124 */
Gfx11Lib(const Client * pClient)125 Gfx11Lib::Gfx11Lib(const Client* pClient)
126     :
127     Lib(pClient),
128     m_numPkrLog2(0),
129     m_numSaLog2(0),
130     m_colorBaseIndex(0),
131     m_htileBaseIndex(0),
132     m_dccBaseIndex(0)
133 {
134     memset(&m_settings, 0, sizeof(m_settings));
135     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
136 }
137 
138 /**
139 ************************************************************************************************************************
140 *   Gfx11Lib::~Gfx11Lib
141 *
142 *   @brief
143 *       Destructor
144 ************************************************************************************************************************
145 */
~Gfx11Lib()146 Gfx11Lib::~Gfx11Lib()
147 {
148 }
149 
150 /**
151 ************************************************************************************************************************
152 *   Gfx11Lib::HwlComputeHtileInfo
153 *
154 *   @brief
155 *       Interface function stub of AddrComputeHtilenfo
156 *
157 *   @return
158 *       ADDR_E_RETURNCODE
159 ************************************************************************************************************************
160 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const161 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileInfo(
162     const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,    ///< [in] input structure
163     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut    ///< [out] output structure
164     ) const
165 {
166     ADDR_E_RETURNCODE ret = ADDR_OK;
167 
168     if ((pIn->swizzleMode != ADDR_SW_64KB_Z_X)  &&
169         (pIn->swizzleMode != ADDR_SW_256KB_Z_X) &&
170         (pIn->hTileFlags.pipeAligned != TRUE))
171     {
172         ret = ADDR_INVALIDPARAMS;
173     }
174     else
175     {
176         Dim3d         metaBlk     = {};
177         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx11DataDepthStencil,
178                                                    ADDR_RSRC_TEX_2D,
179                                                    pIn->swizzleMode,
180                                                    0,
181                                                    0,
182                                                    TRUE,
183                                                    &metaBlk);
184 
185         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
186         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
187         pOut->baseAlign     = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
188         pOut->metaBlkWidth  = metaBlk.w;
189         pOut->metaBlkHeight = metaBlk.h;
190 
191         if (pIn->numMipLevels > 1)
192         {
193             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
194 
195             UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
196 
197             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
198             {
199                 UINT_32 mipWidth, mipHeight;
200 
201                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
202 
203                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
204                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
205 
206                 const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
207                 const UINT_32 heightInM    = mipHeight / metaBlk.h;
208                 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
209 
210                 if (pOut->pMipInfo != NULL)
211                 {
212                     pOut->pMipInfo[i].inMiptail = FALSE;
213                     pOut->pMipInfo[i].offset    = offset;
214                     pOut->pMipInfo[i].sliceSize = mipSliceSize;
215                 }
216 
217                 offset += mipSliceSize;
218             }
219 
220             pOut->sliceSize          = offset;
221             pOut->metaBlkNumPerSlice = offset / metaBlkSize;
222             pOut->htileBytes         = pOut->sliceSize * pIn->numSlices;
223 
224             if (pOut->pMipInfo != NULL)
225             {
226                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
227                 {
228                     pOut->pMipInfo[i].inMiptail = TRUE;
229                     pOut->pMipInfo[i].offset    = 0;
230                     pOut->pMipInfo[i].sliceSize = 0;
231                 }
232 
233                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
234                 {
235                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
236                 }
237             }
238         }
239         else
240         {
241             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
242             const UINT_32 heightInM = pOut->height / metaBlk.h;
243 
244             pOut->metaBlkNumPerSlice    = pitchInM * heightInM;
245             pOut->sliceSize             = pOut->metaBlkNumPerSlice * metaBlkSize;
246             pOut->htileBytes            = pOut->sliceSize * pIn->numSlices;
247 
248             if (pOut->pMipInfo != NULL)
249             {
250                 pOut->pMipInfo[0].inMiptail = FALSE;
251                 pOut->pMipInfo[0].offset    = 0;
252                 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
253             }
254         }
255 
256         // Get the HTILE address equation (copied from HtileAddrFromCoord).
257         // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
258         const UINT_32  index         = m_htileBaseIndex;
259         const UINT_8* patIdxTable = GFX11_HTILE_PATIDX;
260 
261         ADDR_C_ASSERT(sizeof(GFX11_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
262         pOut->equation.gfx10_bits = (UINT_16 *)GFX11_HTILE_SW_PATTERN[patIdxTable[index]];
263     }
264 
265     return ret;
266 }
267 
268 /**
269 ************************************************************************************************************************
270 *   Gfx11Lib::HwlComputeDccInfo
271 *
272 *   @brief
273 *       Interface function to compute DCC key info
274 *
275 *   @return
276 *       ADDR_E_RETURNCODE
277 ************************************************************************************************************************
278 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const279 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeDccInfo(
280     const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,    ///< [in] input structure
281     ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut    ///< [out] output structure
282     ) const
283 {
284     ADDR_E_RETURNCODE ret = ADDR_OK;
285 
286     if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
287     {
288         ret = ADDR_INVALIDPARAMS;
289     }
290     else
291     {
292         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
293 
294         const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
295 
296         pOut->compressBlkWidth  = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
297         pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
298         pOut->compressBlkDepth  = isThick ? Block256_3d[elemLog2].d : 1;
299 
300         if (ret == ADDR_OK)
301         {
302             Dim3d         metaBlk     = {};
303             const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
304             const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx11DataColor,
305                                                        pIn->resourceType,
306                                                        pIn->swizzleMode,
307                                                        elemLog2,
308                                                        numFragLog2,
309                                                        pIn->dccKeyFlags.pipeAligned,
310                                                        &metaBlk);
311 
312             pOut->dccRamBaseAlign   = metaBlkSize;
313             pOut->metaBlkWidth      = metaBlk.w;
314             pOut->metaBlkHeight     = metaBlk.h;
315             pOut->metaBlkDepth      = metaBlk.d;
316             pOut->metaBlkSize       = metaBlkSize;
317 
318             pOut->pitch             = PowTwoAlign(pIn->unalignedWidth,     metaBlk.w);
319             pOut->height            = PowTwoAlign(pIn->unalignedHeight,    metaBlk.h);
320             pOut->depth             = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
321 
322             if (pIn->numMipLevels > 1)
323             {
324                 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
325 
326                 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
327 
328                 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
329                 {
330                     UINT_32 mipWidth, mipHeight;
331 
332                     GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
333 
334                     mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
335                     mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
336 
337                     const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
338                     const UINT_32 heightInM    = mipHeight / metaBlk.h;
339                     const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
340 
341                     if (pOut->pMipInfo != NULL)
342                     {
343                         pOut->pMipInfo[i].inMiptail = FALSE;
344                         pOut->pMipInfo[i].offset    = offset;
345                         pOut->pMipInfo[i].sliceSize = mipSliceSize;
346                     }
347 
348                     offset += mipSliceSize;
349                 }
350 
351                 pOut->dccRamSliceSize    = offset;
352                 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
353                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
354 
355                 if (pOut->pMipInfo != NULL)
356                 {
357                     for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
358                     {
359                         pOut->pMipInfo[i].inMiptail = TRUE;
360                         pOut->pMipInfo[i].offset    = 0;
361                         pOut->pMipInfo[i].sliceSize = 0;
362                     }
363 
364                     if (pIn->firstMipIdInTail != pIn->numMipLevels)
365                     {
366                         pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
367                     }
368                 }
369             }
370             else
371             {
372                 const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
373                 const UINT_32 heightInM = pOut->height / metaBlk.h;
374 
375                 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
376                 pOut->dccRamSliceSize    = pOut->metaBlkNumPerSlice * metaBlkSize;
377                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
378 
379                 if (pOut->pMipInfo != NULL)
380                 {
381                     pOut->pMipInfo[0].inMiptail = FALSE;
382                     pOut->pMipInfo[0].offset    = 0;
383                     pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
384                 }
385             }
386 
387             // Get the DCC address equation (copied from DccAddrFromCoord)
388             const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
389             const UINT_32 numPipeLog2 = m_pipesLog2;
390             UINT_32       index       = m_dccBaseIndex + elemLog2;
391             const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_64KB_R_X) ?
392                                         GFX11_DCC_64K_R_X_PATIDX : GFX11_DCC_256K_R_X_PATIDX;
393 
394             if (pIn->dccKeyFlags.pipeAligned)
395             {
396                 index += MaxNumOfBpp;
397 
398                 if (m_numPkrLog2 < 2)
399                 {
400                     index += m_pipesLog2 * MaxNumOfBpp;
401                 }
402                 else
403                 {
404                     // 4 groups for "m_numPkrLog2 < 2" case
405                     index += 4 * MaxNumOfBpp;
406 
407                     const UINT_32 dccPipePerPkr = 3;
408 
409                     index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
410                              (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
411                 }
412             }
413 
414             ADDR_C_ASSERT(sizeof(GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
415             pOut->equation.gfx10_bits = (UINT_16*)GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]];
416         }
417     }
418 
419     return ret;
420 }
421 
422 /**
423 ************************************************************************************************************************
424 *   Gfx11Lib::HwlComputeHtileAddrFromCoord
425 *
426 *   @brief
427 *       Interface function stub of AddrComputeHtileAddrFromCoord
428 *
429 *   @return
430 *       ADDR_E_RETURNCODE
431 ************************************************************************************************************************
432 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)433 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileAddrFromCoord(
434     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
435     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
436 {
437     ADDR_E_RETURNCODE returnCode = ADDR_OK;
438 
439     if (pIn->numMipLevels > 1)
440     {
441         returnCode = ADDR_NOTIMPLEMENTED;
442     }
443     else
444     {
445         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
446         input.size            = sizeof(input);
447         input.hTileFlags      = pIn->hTileFlags;
448         input.depthFlags      = pIn->depthflags;
449         input.swizzleMode     = pIn->swizzleMode;
450         input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
451         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
452         input.numSlices       = Max(pIn->numSlices,       1u);
453         input.numMipLevels    = 1;
454 
455         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
456         output.size = sizeof(output);
457 
458         returnCode = ComputeHtileInfo(&input, &output);
459 
460         if (returnCode == ADDR_OK)
461         {
462             const UINT_32  numSampleLog2 = Log2(pIn->numSamples);
463             const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
464             const UINT_32  index         = m_htileBaseIndex + numSampleLog2;
465             const UINT_8*  patIdxTable   = GFX11_HTILE_PATIDX;
466             const UINT_32  blkSizeLog2   = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
467             const UINT_32  blkMask       = (1 << blkSizeLog2) - 1;
468             const UINT_32  blkOffset     = ComputeOffsetFromSwizzlePattern(GFX11_HTILE_SW_PATTERN[patIdxTable[index]],
469                                                                            blkSizeLog2 + 1, // +1 for nibble offset
470                                                                            pIn->x,
471                                                                            pIn->y,
472                                                                            pIn->slice,
473                                                                            0);
474             const UINT_32 xb       = pIn->x / output.metaBlkWidth;
475             const UINT_32 yb       = pIn->y / output.metaBlkHeight;
476             const UINT_32 pb       = output.pitch / output.metaBlkWidth;
477             const UINT_32 blkIndex = (yb * pb) + xb;
478             const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
479 
480             pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
481                          (blkIndex * (1 << blkSizeLog2)) +
482                          ((blkOffset >> 1) ^ pipeXor);
483         }
484     }
485 
486     return returnCode;
487 }
488 
489 /**
490 ************************************************************************************************************************
491 *   Gfx11Lib::HwlComputeHtileCoordFromAddr
492 *
493 *   @brief
494 *       Interface function stub of AddrComputeHtileCoordFromAddr
495 *
496 *   @return
497 *       ADDR_E_RETURNCODE
498 ************************************************************************************************************************
499 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)500 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileCoordFromAddr(
501     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
502     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)   ///< [out] output structure
503 {
504     ADDR_NOT_IMPLEMENTED();
505 
506     return ADDR_OK;
507 }
508 
509 /**
510 ************************************************************************************************************************
511 *   Gfx11Lib::HwlSupportComputeDccAddrFromCoord
512 *
513 *   @brief
514 *       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
515 *
516 *   @return
517 *       ADDR_E_RETURNCODE
518 ************************************************************************************************************************
519 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)520 ADDR_E_RETURNCODE Gfx11Lib::HwlSupportComputeDccAddrFromCoord(
521     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
522 {
523     ADDR_E_RETURNCODE returnCode = ADDR_OK;
524 
525     if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
526         ((pIn->swizzleMode != ADDR_SW_64KB_R_X) &&
527          (pIn->swizzleMode != ADDR_SW_256KB_R_X)) ||
528         (pIn->dccKeyFlags.linear == TRUE) ||
529         (pIn->numFrags > 1) ||
530         (pIn->numMipLevels > 1) ||
531         (pIn->mipId > 0))
532     {
533         returnCode = ADDR_NOTSUPPORTED;
534     }
535     else if ((pIn->pitch == 0)         ||
536              (pIn->metaBlkWidth == 0)  ||
537              (pIn->metaBlkHeight == 0) ||
538              (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
539     {
540         returnCode = ADDR_NOTSUPPORTED;
541     }
542 
543     return returnCode;
544 }
545 
546 /**
547 ************************************************************************************************************************
548 *   Gfx11Lib::HwlComputeDccAddrFromCoord
549 *
550 *   @brief
551 *       Interface function stub of AddrComputeDccAddrFromCoord
552 *
553 *   @return
554 *       N/A
555 ************************************************************************************************************************
556 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)557 VOID Gfx11Lib::HwlComputeDccAddrFromCoord(
558     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,  ///< [in] input structure
559     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] output structure
560 {
561     const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
562     const UINT_32 numPipeLog2 = m_pipesLog2;
563     const UINT_32 pipeMask    = (1 << numPipeLog2) - 1;
564     UINT_32       index       = m_dccBaseIndex + elemLog2;
565     const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_64KB_R_X) ?
566                                 GFX11_DCC_64K_R_X_PATIDX : GFX11_DCC_256K_R_X_PATIDX;
567 
568     if (pIn->dccKeyFlags.pipeAligned)
569     {
570         index += MaxNumOfBpp;
571 
572         if (m_numPkrLog2 < 2)
573         {
574             index += m_pipesLog2 * MaxNumOfBpp;
575         }
576         else
577         {
578             // 4 groups for "m_numPkrLog2 < 2" case
579             index += 4 * MaxNumOfBpp;
580 
581             const UINT_32 dccPipePerPkr = 3;
582 
583             index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
584                      (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
585         }
586     }
587 
588     const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
589     const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
590     const UINT_32  blkOffset   = ComputeOffsetFromSwizzlePattern(GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]],
591                                                                  blkSizeLog2 + 1, // +1 for nibble offset
592                                                                  pIn->x,
593                                                                  pIn->y,
594                                                                  pIn->slice,
595                                                                  0);
596     const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
597     const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
598     const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
599     const UINT_32 blkIndex = (yb * pb) + xb;
600     const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
601 
602     pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
603                  (blkIndex * (1 << blkSizeLog2)) +
604                  ((blkOffset >> 1) ^ pipeXor);
605 }
606 
607 /**
608 ************************************************************************************************************************
609 *   Gfx11Lib::HwlInitGlobalParams
610 *
611 *   @brief
612 *       Initializes global parameters
613 *
614 *   @return
615 *       TRUE if all settings are valid
616 *
617 ************************************************************************************************************************
618 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)619 BOOL_32 Gfx11Lib::HwlInitGlobalParams(
620     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
621 {
622     BOOL_32              valid = TRUE;
623     GB_ADDR_CONFIG_GFX11 gbAddrConfig;
624 
625     gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
626 
627     switch (gbAddrConfig.bits.NUM_PIPES)
628     {
629         case ADDR_CONFIG_1_PIPE:
630             m_pipes     = 1;
631             m_pipesLog2 = 0;
632             break;
633         case ADDR_CONFIG_2_PIPE:
634             m_pipes     = 2;
635             m_pipesLog2 = 1;
636             break;
637         case ADDR_CONFIG_4_PIPE:
638             m_pipes     = 4;
639             m_pipesLog2 = 2;
640             break;
641         case ADDR_CONFIG_8_PIPE:
642             m_pipes     = 8;
643             m_pipesLog2 = 3;
644             break;
645         case ADDR_CONFIG_16_PIPE:
646             m_pipes     = 16;
647             m_pipesLog2 = 4;
648             break;
649         case ADDR_CONFIG_32_PIPE:
650             m_pipes     = 32;
651             m_pipesLog2 = 5;
652             break;
653         case ADDR_CONFIG_64_PIPE:
654             m_pipes     = 64;
655             m_pipesLog2 = 6;
656             break;
657         default:
658             ADDR_ASSERT_ALWAYS();
659             valid = FALSE;
660             break;
661     }
662 
663     switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
664     {
665         case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
666             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
667             m_pipeInterleaveLog2  = 8;
668             break;
669         case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
670             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
671             m_pipeInterleaveLog2  = 9;
672             break;
673         case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
674             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
675             m_pipeInterleaveLog2  = 10;
676             break;
677         case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
678             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
679             m_pipeInterleaveLog2  = 11;
680             break;
681         default:
682             ADDR_ASSERT_ALWAYS();
683             valid = FALSE;
684             break;
685     }
686 
687     // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
688     // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
689     // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
690     ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
691 
692     // These fields are deprecated on GFX11; they do nothing on HW.
693     m_maxCompFrag     = 1;
694     m_maxCompFragLog2 = 0;
695 
696     // Skip unaligned case
697     m_htileBaseIndex += MaxNumOfAA;
698 
699     m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
700     m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
701 
702     m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
703     m_numSaLog2  = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
704 
705     ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
706 
707     if (m_numPkrLog2 >= 2)
708     {
709         m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
710         m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
711     }
712 
713     // There is no so-called VAR swizzle mode on GFX11 and instead there are 4 256KB swizzle modes. Here we treat 256KB
714     // swizzle mode as "VAR" swizzle mode for reusing exising facilities (e.g GetBlockSizeLog2()) provided by base class
715     m_blockVarSizeLog2 = 18;
716 
717     if (valid)
718     {
719         InitEquationTable();
720     }
721 
722     return valid;
723 }
724 
725 /**
726 ************************************************************************************************************************
727 *   Gfx11Lib::HwlConvertChipFamily
728 *
729 *   @brief
730 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
731 *   @return
732 *       ChipFamily
733 ************************************************************************************************************************
734 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)735 ChipFamily Gfx11Lib::HwlConvertChipFamily(
736     UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
737     UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
738 {
739     ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
740 
741     switch (chipFamily)
742     {
743         case FAMILY_GFX1100:
744             if (ASICREV_IS_GFX1100(chipRevision))
745             {
746             }
747             if (ASICREV_IS_GFX1101(chipRevision))
748             {
749             }
750             if (ASICREV_IS_GFX1102(chipRevision))
751             {
752             }
753             break;
754         case FAMILY_GFX1103:
755             if (ASICREV_IS_GFX1103(chipRevision))
756             {
757             }
758             break;
759         default:
760             ADDR_ASSERT(!"Unknown chip family");
761             break;
762     }
763 
764     m_configFlags.use32bppFor422Fmt = TRUE;
765 
766     return family;
767 }
768 
769 /**
770 ************************************************************************************************************************
771 *   Gfx11Lib::GetBlk256SizeLog2
772 *
773 *   @brief
774 *       Get block 256 size
775 *
776 *   @return
777 *       N/A
778 ************************************************************************************************************************
779 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const780 void Gfx11Lib::GetBlk256SizeLog2(
781     AddrResourceType resourceType,      ///< [in] Resource type
782     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
783     UINT_32          elemLog2,          ///< [in] element size log2
784     UINT_32          numSamplesLog2,    ///< [in] number of samples
785     Dim3d*           pBlock             ///< [out] block size
786     ) const
787 {
788     if (IsThin(resourceType, swizzleMode))
789     {
790         UINT_32 blockBits = 8 - elemLog2;
791 
792         // On GFX11, Z and R modes are the same thing.
793         if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
794         {
795             blockBits -= numSamplesLog2;
796         }
797 
798         pBlock->w = (blockBits >> 1) + (blockBits & 1);
799         pBlock->h = (blockBits >> 1);
800         pBlock->d = 0;
801     }
802     else
803     {
804         ADDR_ASSERT(IsThick(resourceType, swizzleMode));
805 
806         UINT_32 blockBits = 8 - elemLog2;
807 
808         pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
809         pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
810         pBlock->h = (blockBits / 3);
811     }
812 }
813 
814 /**
815 ************************************************************************************************************************
816 *   Gfx11Lib::GetCompressedBlockSizeLog2
817 *
818 *   @brief
819 *       Get compress block size
820 *
821 *   @return
822 *       N/A
823 ************************************************************************************************************************
824 */
GetCompressedBlockSizeLog2(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const825 void Gfx11Lib::GetCompressedBlockSizeLog2(
826     Gfx11DataType    dataType,          ///< [in] Data type
827     AddrResourceType resourceType,      ///< [in] Resource type
828     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
829     UINT_32          elemLog2,          ///< [in] element size log2
830     UINT_32          numSamplesLog2,    ///< [in] number of samples
831     Dim3d*           pBlock             ///< [out] block size
832     ) const
833 {
834     if (dataType == Gfx11DataColor)
835     {
836         GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
837     }
838     else
839     {
840         ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
841         pBlock->w = 3;
842         pBlock->h = 3;
843         pBlock->d = 0;
844     }
845 }
846 
847 /**
848 ************************************************************************************************************************
849 *   Gfx11Lib::GetMetaOverlapLog2
850 *
851 *   @brief
852 *       Get meta block overlap
853 *
854 *   @return
855 *       N/A
856 ************************************************************************************************************************
857 */
GetMetaOverlapLog2(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const858 INT_32 Gfx11Lib::GetMetaOverlapLog2(
859     Gfx11DataType    dataType,          ///< [in] Data type
860     AddrResourceType resourceType,      ///< [in] Resource type
861     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
862     UINT_32          elemLog2,          ///< [in] element size log2
863     UINT_32          numSamplesLog2     ///< [in] number of samples
864     ) const
865 {
866     Dim3d compBlock;
867     Dim3d microBlock;
868 
869     GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
870     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
871 
872     const INT_32 compSizeLog2   = compBlock.w  + compBlock.h  + compBlock.d;
873     const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
874     const INT_32 maxSizeLog2    = Max(compSizeLog2, blk256SizeLog2);
875     const INT_32 numPipesLog2   = GetEffectiveNumPipes();
876     INT_32       overlap        = numPipesLog2 - maxSizeLog2;
877 
878     if (numPipesLog2 > 1)
879     {
880         overlap++;
881     }
882 
883     // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
884     if ((elemLog2 == 4) && (numSamplesLog2 == 3))
885     {
886         overlap--;
887     }
888     overlap = Max(overlap, 0);
889     return overlap;
890 }
891 
892 /**
893 ************************************************************************************************************************
894 *   Gfx11Lib::Get3DMetaOverlapLog2
895 *
896 *   @brief
897 *       Get 3d meta block overlap
898 *
899 *   @return
900 *       N/A
901 ************************************************************************************************************************
902 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const903 INT_32 Gfx11Lib::Get3DMetaOverlapLog2(
904     AddrResourceType resourceType,      ///< [in] Resource type
905     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
906     UINT_32          elemLog2           ///< [in] element size log2
907     ) const
908 {
909     Dim3d microBlock;
910     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
911 
912     INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
913 
914     overlap++;
915 
916     if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
917     {
918         overlap = 0;
919     }
920     return overlap;
921 }
922 
923 /**
924 ************************************************************************************************************************
925 *   Gfx11Lib::GetPipeRotateAmount
926 *
927 *   @brief
928 *       Get pipe rotate amount
929 *
930 *   @return
931 *       Pipe rotate amount
932 ************************************************************************************************************************
933 */
934 
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const935 INT_32 Gfx11Lib::GetPipeRotateAmount(
936     AddrResourceType resourceType,      ///< [in] Resource type
937     AddrSwizzleMode  swizzleMode        ///< [in] Swizzle mode
938     ) const
939 {
940     INT_32 amount = 0;
941 
942     if ((m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
943     {
944         amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
945                  1 : m_pipesLog2 - (m_numSaLog2 + 1);
946     }
947 
948     return amount;
949 }
950 
951 /**
952 ************************************************************************************************************************
953 *   Gfx11Lib::GetMetaBlkSize
954 *
955 *   @brief
956 *       Get metadata block size
957 *
958 *   @return
959 *       Meta block size
960 ************************************************************************************************************************
961 */
GetMetaBlkSize(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const962 UINT_32 Gfx11Lib::GetMetaBlkSize(
963     Gfx11DataType    dataType,          ///< [in] Data type
964     AddrResourceType resourceType,      ///< [in] Resource type
965     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
966     UINT_32          elemLog2,          ///< [in] element size log2
967     UINT_32          numSamplesLog2,    ///< [in] number of samples
968     BOOL_32          pipeAlign,         ///< [in] pipe align
969     Dim3d*           pBlock             ///< [out] block size
970     ) const
971 {
972     INT_32 metablkSizeLog2;
973 
974     const INT_32 metaElemSizeLog2   = GetMetaElementSizeLog2(dataType);
975     const INT_32 metaCacheSizeLog2  = GetMetaCacheSizeLog2(dataType);
976     const INT_32 compBlkSizeLog2    = (dataType == Gfx11DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
977     const INT_32 metaBlkSamplesLog2 = numSamplesLog2;
978     const INT_32 dataBlkSizeLog2    = GetBlockSizeLog2(swizzleMode);
979     INT_32       numPipesLog2       = m_pipesLog2;
980 
981     if (IsThin(resourceType, swizzleMode))
982     {
983         if ((pipeAlign == FALSE) ||
984             (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
985             (IsDisplaySwizzle(resourceType, swizzleMode)  == TRUE))
986         {
987             if (pipeAlign)
988             {
989                 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
990                 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
991             }
992             else
993             {
994                 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
995             }
996         }
997         else
998         {
999             if ((m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1000             {
1001                 numPipesLog2++;
1002             }
1003 
1004             INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1005 
1006             if (numPipesLog2 >= 4)
1007             {
1008                 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1009 
1010                 // In 16Bpe 8xaa, we have an extra overlap bit
1011                 if ((pipeRotateLog2 > 0)  &&
1012                     (elemLog2 == 4)       &&
1013                     (numSamplesLog2 == 3) &&
1014                     (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1015                 {
1016                     overlapLog2++;
1017                 }
1018 
1019                 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1020                 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1021             }
1022             else
1023             {
1024                 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1025             }
1026 
1027             if (dataType == Gfx11DataDepthStencil)
1028             {
1029                 // For htile surfaces, pad meta block size to 2K * num_pipes
1030                 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1031             }
1032 
1033             const INT_32 compFragLog2 = numSamplesLog2;
1034 
1035             if  (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1036             {
1037                 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1038 
1039                 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1040             }
1041         }
1042 
1043         const INT_32 metablkBitsLog2 =
1044             metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1045         pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1046         pBlock->h = 1 << (metablkBitsLog2 >> 1);
1047         pBlock->d = 1;
1048     }
1049     else
1050     {
1051         ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1052 
1053         if (pipeAlign)
1054         {
1055             if ((m_pipesLog2 == m_numSaLog2 + 1) &&
1056                 (m_pipesLog2 > 1)                &&
1057                 IsRbAligned(resourceType, swizzleMode))
1058             {
1059                 numPipesLog2++;
1060             }
1061 
1062             const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1063 
1064             metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1065             metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1066             metablkSizeLog2 = Max(metablkSizeLog2, 12);
1067         }
1068         else
1069         {
1070             metablkSizeLog2 = 12;
1071         }
1072 
1073         const INT_32 metablkBitsLog2 =
1074             metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1075         pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1076         pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1077         pBlock->d = 1 << (metablkBitsLog2 / 3);
1078     }
1079 
1080     return (1 << static_cast<UINT_32>(metablkSizeLog2));
1081 }
1082 
1083 /**
1084 ************************************************************************************************************************
1085 *   Gfx11Lib::ConvertSwizzlePatternToEquation
1086 *
1087 *   @brief
1088 *       Convert swizzle pattern to equation.
1089 *
1090 *   @return
1091 *       N/A
1092 ************************************************************************************************************************
1093 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1094 VOID Gfx11Lib::ConvertSwizzlePatternToEquation(
1095     UINT_32                elemLog2,  ///< [in] element bytes log2
1096     AddrResourceType       rsrcType,  ///< [in] resource type
1097     AddrSwizzleMode        swMode,    ///< [in] swizzle mode
1098     const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern infor
1099     ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
1100     const
1101 {
1102     ADDR_BIT_SETTING fullSwizzlePattern[20];
1103     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1104 
1105     const ADDR_BIT_SETTING* pSwizzle      = fullSwizzlePattern;
1106     const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode);
1107 
1108     pEquation->numBits            = blockSizeLog2;
1109     pEquation->stackedDepthSlices = FALSE;
1110 
1111     for (UINT_32 i = 0; i < elemLog2; i++)
1112     {
1113         pEquation->addr[i].channel = 0;
1114         pEquation->addr[i].valid   = 1;
1115         pEquation->addr[i].index   = i;
1116     }
1117 
1118     if (IsXor(swMode) == FALSE)
1119     {
1120         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1121         {
1122             ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1123 
1124             if (pSwizzle[i].x != 0)
1125             {
1126                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1127 
1128                 pEquation->addr[i].channel = 0;
1129                 pEquation->addr[i].valid   = 1;
1130                 pEquation->addr[i].index   = Log2(pSwizzle[i].x) + elemLog2;
1131             }
1132             else if (pSwizzle[i].y != 0)
1133             {
1134                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1135 
1136                 pEquation->addr[i].channel = 1;
1137                 pEquation->addr[i].valid   = 1;
1138                 pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1139             }
1140             else
1141             {
1142                 ADDR_ASSERT(pSwizzle[i].z != 0);
1143                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1144 
1145                 pEquation->addr[i].channel = 2;
1146                 pEquation->addr[i].valid   = 1;
1147                 pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1148             }
1149 
1150             pEquation->xor1[i].value = 0;
1151             pEquation->xor2[i].value = 0;
1152         }
1153     }
1154     else if (IsThin(rsrcType, swMode))
1155     {
1156         Dim3d dim;
1157         ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1158 
1159         const UINT_32 blkXLog2 = Log2(dim.w);
1160         const UINT_32 blkYLog2 = Log2(dim.h);
1161         const UINT_32 blkXMask = dim.w - 1;
1162         const UINT_32 blkYMask = dim.h - 1;
1163 
1164         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1165         UINT_32          xMask = 0;
1166         UINT_32          yMask = 0;
1167         UINT_32          bMask = (1 << elemLog2) - 1;
1168 
1169         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1170         {
1171             if (IsPow2(pSwizzle[i].value))
1172             {
1173                 if (pSwizzle[i].x != 0)
1174                 {
1175                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1176                     xMask |= pSwizzle[i].x;
1177 
1178                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1179 
1180                     ADDR_ASSERT(xLog2 < blkXLog2);
1181 
1182                     pEquation->addr[i].channel = 0;
1183                     pEquation->addr[i].valid   = 1;
1184                     pEquation->addr[i].index   = xLog2 + elemLog2;
1185                 }
1186                 else
1187                 {
1188                     ADDR_ASSERT(pSwizzle[i].y != 0);
1189                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1190                     yMask |= pSwizzle[i].y;
1191 
1192                     pEquation->addr[i].channel = 1;
1193                     pEquation->addr[i].valid   = 1;
1194                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1195 
1196                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1197                 }
1198 
1199                 swizzle[i].value = 0;
1200                 bMask |= 1 << i;
1201             }
1202             else
1203             {
1204                 if (pSwizzle[i].z != 0)
1205                 {
1206                     ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1207 
1208                     pEquation->xor2[i].channel = 2;
1209                     pEquation->xor2[i].valid   = 1;
1210                     pEquation->xor2[i].index   = Log2(pSwizzle[i].z);
1211                 }
1212 
1213                 swizzle[i].x = pSwizzle[i].x;
1214                 swizzle[i].y = pSwizzle[i].y;
1215                 swizzle[i].z = swizzle[i].s = 0;
1216 
1217                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1218 
1219                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1220 
1221                 if (xHi != 0)
1222                 {
1223                     ADDR_ASSERT(IsPow2(xHi));
1224                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1225 
1226                     pEquation->xor1[i].channel = 0;
1227                     pEquation->xor1[i].valid   = 1;
1228                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1229 
1230                     swizzle[i].x &= blkXMask;
1231                 }
1232 
1233                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1234 
1235                 if (yHi != 0)
1236                 {
1237                     ADDR_ASSERT(IsPow2(yHi));
1238 
1239                     if (xHi == 0)
1240                     {
1241                         ADDR_ASSERT(pEquation->xor1[i].value == 0);
1242                         pEquation->xor1[i].channel = 1;
1243                         pEquation->xor1[i].valid   = 1;
1244                         pEquation->xor1[i].index   = Log2(yHi);
1245                     }
1246                     else
1247                     {
1248                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1249                         pEquation->xor2[i].channel = 1;
1250                         pEquation->xor2[i].valid   = 1;
1251                         pEquation->xor2[i].index   = Log2(yHi);
1252                     }
1253 
1254                     swizzle[i].y &= blkYMask;
1255                 }
1256 
1257                 if (swizzle[i].value == 0)
1258                 {
1259                     bMask |= 1 << i;
1260                 }
1261             }
1262         }
1263 
1264         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1265         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1266 
1267         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1268 
1269         while (bMask != blockMask)
1270         {
1271             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1272             {
1273                 if ((bMask & (1 << i)) == 0)
1274                 {
1275                     if (IsPow2(swizzle[i].value))
1276                     {
1277                         if (swizzle[i].x != 0)
1278                         {
1279                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1280                             xMask |= swizzle[i].x;
1281 
1282                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1283 
1284                             ADDR_ASSERT(xLog2 < blkXLog2);
1285 
1286                             pEquation->addr[i].channel = 0;
1287                             pEquation->addr[i].valid   = 1;
1288                             pEquation->addr[i].index   = xLog2 + elemLog2;
1289                         }
1290                         else
1291                         {
1292                             ADDR_ASSERT(swizzle[i].y != 0);
1293                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1294                             yMask |= swizzle[i].y;
1295 
1296                             pEquation->addr[i].channel = 1;
1297                             pEquation->addr[i].valid   = 1;
1298                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1299 
1300                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1301                         }
1302 
1303                         swizzle[i].value = 0;
1304                         bMask |= 1 << i;
1305                     }
1306                     else
1307                     {
1308                         const UINT_32 x = swizzle[i].x & xMask;
1309                         const UINT_32 y = swizzle[i].y & yMask;
1310 
1311                         if (x != 0)
1312                         {
1313                             ADDR_ASSERT(IsPow2(x));
1314 
1315                             if (pEquation->xor1[i].value == 0)
1316                             {
1317                                 pEquation->xor1[i].channel = 0;
1318                                 pEquation->xor1[i].valid   = 1;
1319                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1320                             }
1321                             else
1322                             {
1323                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1324                                 pEquation->xor2[i].channel = 0;
1325                                 pEquation->xor2[i].valid   = 1;
1326                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1327                             }
1328                         }
1329 
1330                         if (y != 0)
1331                         {
1332                             ADDR_ASSERT(IsPow2(y));
1333 
1334                             if (pEquation->xor1[i].value == 0)
1335                             {
1336                                 pEquation->xor1[i].channel = 1;
1337                                 pEquation->xor1[i].valid   = 1;
1338                                 pEquation->xor1[i].index   = Log2(y);
1339                             }
1340                             else
1341                             {
1342                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1343                                 pEquation->xor2[i].channel = 1;
1344                                 pEquation->xor2[i].valid   = 1;
1345                                 pEquation->xor2[i].index   = Log2(y);
1346                             }
1347                         }
1348 
1349                         swizzle[i].x &= ~x;
1350                         swizzle[i].y &= ~y;
1351                     }
1352                 }
1353             }
1354         }
1355 
1356         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1357     }
1358     else
1359     {
1360         const Dim3d& blkDim = (blockSizeLog2 == Log2Size256K) ?
1361                               Block256K_Log2_3d[elemLog2] :
1362                               ((blockSizeLog2 == Log2Size4K) ? Block4K_Log2_3d[elemLog2] : Block64K_Log2_3d[elemLog2]);
1363 
1364         const UINT_32 blkXLog2 = blkDim.w;
1365         const UINT_32 blkYLog2 = blkDim.h;
1366         const UINT_32 blkZLog2 = blkDim.d;
1367         const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1368         const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1369         const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1370 
1371         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1372         UINT_32          xMask = 0;
1373         UINT_32          yMask = 0;
1374         UINT_32          zMask = 0;
1375         UINT_32          bMask = (1 << elemLog2) - 1;
1376 
1377         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1378         {
1379             if (IsPow2(pSwizzle[i].value))
1380             {
1381                 if (pSwizzle[i].x != 0)
1382                 {
1383                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1384                     xMask |= pSwizzle[i].x;
1385 
1386                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1387 
1388                     ADDR_ASSERT(xLog2 < blkXLog2);
1389 
1390                     pEquation->addr[i].channel = 0;
1391                     pEquation->addr[i].valid   = 1;
1392                     pEquation->addr[i].index   = xLog2 + elemLog2;
1393                 }
1394                 else if (pSwizzle[i].y != 0)
1395                 {
1396                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1397                     yMask |= pSwizzle[i].y;
1398 
1399                     pEquation->addr[i].channel = 1;
1400                     pEquation->addr[i].valid   = 1;
1401                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1402 
1403                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1404                 }
1405                 else
1406                 {
1407                     ADDR_ASSERT(pSwizzle[i].z != 0);
1408                     ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1409                     zMask |= pSwizzle[i].z;
1410 
1411                     pEquation->addr[i].channel = 2;
1412                     pEquation->addr[i].valid   = 1;
1413                     pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1414 
1415                     ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1416                 }
1417 
1418                 swizzle[i].value = 0;
1419                 bMask |= 1 << i;
1420             }
1421             else
1422             {
1423                 swizzle[i].x = pSwizzle[i].x;
1424                 swizzle[i].y = pSwizzle[i].y;
1425                 swizzle[i].z = pSwizzle[i].z;
1426                 swizzle[i].s = 0;
1427 
1428                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1429 
1430                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1431                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1432                 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1433 
1434                 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1435 
1436                 if (xHi != 0)
1437                 {
1438                     ADDR_ASSERT(IsPow2(xHi));
1439                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1440 
1441                     pEquation->xor1[i].channel = 0;
1442                     pEquation->xor1[i].valid   = 1;
1443                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1444 
1445                     swizzle[i].x &= blkXMask;
1446                 }
1447 
1448                 if (yHi != 0)
1449                 {
1450                     ADDR_ASSERT(IsPow2(yHi));
1451 
1452                     if (pEquation->xor1[i].value == 0)
1453                     {
1454                         pEquation->xor1[i].channel = 1;
1455                         pEquation->xor1[i].valid   = 1;
1456                         pEquation->xor1[i].index   = Log2(yHi);
1457                     }
1458                     else
1459                     {
1460                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1461                         pEquation->xor2[i].channel = 1;
1462                         pEquation->xor2[i].valid   = 1;
1463                         pEquation->xor2[i].index   = Log2(yHi);
1464                     }
1465 
1466                     swizzle[i].y &= blkYMask;
1467                 }
1468 
1469                 if (zHi != 0)
1470                 {
1471                     ADDR_ASSERT(IsPow2(zHi));
1472 
1473                     if (pEquation->xor1[i].value == 0)
1474                     {
1475                         pEquation->xor1[i].channel = 2;
1476                         pEquation->xor1[i].valid   = 1;
1477                         pEquation->xor1[i].index   = Log2(zHi);
1478                     }
1479                     else
1480                     {
1481                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1482                         pEquation->xor2[i].channel = 2;
1483                         pEquation->xor2[i].valid   = 1;
1484                         pEquation->xor2[i].index   = Log2(zHi);
1485                     }
1486 
1487                     swizzle[i].z &= blkZMask;
1488                 }
1489 
1490                 if (swizzle[i].value == 0)
1491                 {
1492                     bMask |= 1 << i;
1493                 }
1494             }
1495         }
1496 
1497         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1498         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1499 
1500         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1501 
1502         while (bMask != blockMask)
1503         {
1504             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1505             {
1506                 if ((bMask & (1 << i)) == 0)
1507                 {
1508                     if (IsPow2(swizzle[i].value))
1509                     {
1510                         if (swizzle[i].x != 0)
1511                         {
1512                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1513                             xMask |= swizzle[i].x;
1514 
1515                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1516 
1517                             ADDR_ASSERT(xLog2 < blkXLog2);
1518 
1519                             pEquation->addr[i].channel = 0;
1520                             pEquation->addr[i].valid   = 1;
1521                             pEquation->addr[i].index   = xLog2 + elemLog2;
1522                         }
1523                         else if (swizzle[i].y != 0)
1524                         {
1525                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1526                             yMask |= swizzle[i].y;
1527 
1528                             pEquation->addr[i].channel = 1;
1529                             pEquation->addr[i].valid   = 1;
1530                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1531 
1532                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1533                         }
1534                         else
1535                         {
1536                             ADDR_ASSERT(swizzle[i].z != 0);
1537                             ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1538                             zMask |= swizzle[i].z;
1539 
1540                             pEquation->addr[i].channel = 2;
1541                             pEquation->addr[i].valid   = 1;
1542                             pEquation->addr[i].index   = Log2(swizzle[i].z);
1543 
1544                             ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1545                         }
1546 
1547                         swizzle[i].value = 0;
1548                         bMask |= 1 << i;
1549                     }
1550                     else
1551                     {
1552                         const UINT_32 x = swizzle[i].x & xMask;
1553                         const UINT_32 y = swizzle[i].y & yMask;
1554                         const UINT_32 z = swizzle[i].z & zMask;
1555 
1556                         if (x != 0)
1557                         {
1558                             ADDR_ASSERT(IsPow2(x));
1559 
1560                             if (pEquation->xor1[i].value == 0)
1561                             {
1562                                 pEquation->xor1[i].channel = 0;
1563                                 pEquation->xor1[i].valid   = 1;
1564                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1565                             }
1566                             else
1567                             {
1568                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1569                                 pEquation->xor2[i].channel = 0;
1570                                 pEquation->xor2[i].valid   = 1;
1571                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1572                             }
1573                         }
1574 
1575                         if (y != 0)
1576                         {
1577                             ADDR_ASSERT(IsPow2(y));
1578 
1579                             if (pEquation->xor1[i].value == 0)
1580                             {
1581                                 pEquation->xor1[i].channel = 1;
1582                                 pEquation->xor1[i].valid   = 1;
1583                                 pEquation->xor1[i].index   = Log2(y);
1584                             }
1585                             else
1586                             {
1587                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1588                                 pEquation->xor2[i].channel = 1;
1589                                 pEquation->xor2[i].valid   = 1;
1590                                 pEquation->xor2[i].index   = Log2(y);
1591                             }
1592                         }
1593 
1594                         if (z != 0)
1595                         {
1596                             ADDR_ASSERT(IsPow2(z));
1597 
1598                             if (pEquation->xor1[i].value == 0)
1599                             {
1600                                 pEquation->xor1[i].channel = 2;
1601                                 pEquation->xor1[i].valid   = 1;
1602                                 pEquation->xor1[i].index   = Log2(z);
1603                             }
1604                             else
1605                             {
1606                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1607                                 pEquation->xor2[i].channel = 2;
1608                                 pEquation->xor2[i].valid   = 1;
1609                                 pEquation->xor2[i].index   = Log2(z);
1610                             }
1611                         }
1612 
1613                         swizzle[i].x &= ~x;
1614                         swizzle[i].y &= ~y;
1615                         swizzle[i].z &= ~z;
1616                     }
1617                 }
1618             }
1619         }
1620 
1621         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1622     }
1623 }
1624 
1625 /**
1626 ************************************************************************************************************************
1627 *   Gfx11Lib::InitEquationTable
1628 *
1629 *   @brief
1630 *       Initialize Equation table.
1631 *
1632 *   @return
1633 *       N/A
1634 ************************************************************************************************************************
1635 */
InitEquationTable()1636 VOID Gfx11Lib::InitEquationTable()
1637 {
1638     memset(m_equationTable, 0, sizeof(m_equationTable));
1639 
1640     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1641     {
1642         const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1643 
1644         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1645         {
1646             const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1647 
1648             for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1649             {
1650                 UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
1651                 const ADDR_SW_PATINFO* pPatInfo      = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1652 
1653                 if (pPatInfo != NULL)
1654                 {
1655                     ADDR_ASSERT(IsValidSwMode(swMode));
1656 
1657                     if (pPatInfo->maxItemCount <= 3)
1658                     {
1659                         ADDR_EQUATION equation = {};
1660 
1661                         ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1662 
1663                         equationIndex = m_numEquations;
1664                         ADDR_ASSERT(equationIndex < EquationTableSize);
1665 
1666                         m_equationTable[equationIndex] = equation;
1667 
1668                         m_numEquations++;
1669                     }
1670                     else
1671                     {
1672                         // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X
1673                         ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
1674                         ADDR_ASSERT(rsrcType == ADDR_RSRC_TEX_3D);
1675                         ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
1676                     }
1677                 }
1678 
1679                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1680             }
1681         }
1682     }
1683 }
1684 
1685 /**
1686 ************************************************************************************************************************
1687 *   Gfx11Lib::HwlGetEquationIndex
1688 *
1689 *   @brief
1690 *       Interface function stub of GetEquationIndex
1691 *
1692 *   @return
1693 *       ADDR_E_RETURNCODE
1694 ************************************************************************************************************************
1695 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const1696 UINT_32 Gfx11Lib::HwlGetEquationIndex(
1697     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
1698     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
1699     ) const
1700 {
1701     UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1702 
1703     if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1704         (pIn->resourceType == ADDR_RSRC_TEX_3D))
1705     {
1706         const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1707         const UINT_32 swModeIdx   = static_cast<UINT_32>(pIn->swizzleMode);
1708         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
1709 
1710         equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1711     }
1712 
1713     if (pOut->pMipInfo != NULL)
1714     {
1715         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1716         {
1717             pOut->pMipInfo[i].equationIndex = equationIdx;
1718         }
1719     }
1720 
1721     return equationIdx;
1722 }
1723 
1724 /**
1725 ************************************************************************************************************************
1726 *   Gfx11Lib::GetValidDisplaySwizzleModes
1727 *
1728 *   @brief
1729 *       Get valid swizzle modes mask for displayable surface
1730 *
1731 *   @return
1732 *       Valid swizzle modes mask for displayable surface
1733 ************************************************************************************************************************
1734 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const1735 UINT_32 Gfx11Lib::GetValidDisplaySwizzleModes(
1736     UINT_32 bpp
1737     ) const
1738 {
1739     UINT_32 swModeMask = 0;
1740 
1741     if (bpp <= 64)
1742     {
1743         swModeMask = Dcn32SwModeMask;
1744     }
1745 
1746     return swModeMask;
1747 }
1748 
1749 /**
1750 ************************************************************************************************************************
1751 *   Gfx11Lib::IsValidDisplaySwizzleMode
1752 *
1753 *   @brief
1754 *       Check if a swizzle mode is supported by display engine
1755 *
1756 *   @return
1757 *       TRUE is swizzle mode is supported by display engine
1758 ************************************************************************************************************************
1759 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const1760 BOOL_32 Gfx11Lib::IsValidDisplaySwizzleMode(
1761     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
1762     ) const
1763 {
1764     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
1765 
1766     return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
1767 }
1768 
1769 /**
1770 ************************************************************************************************************************
1771 *   Gfx11Lib::GetMaxNumMipsInTail
1772 *
1773 *   @brief
1774 *       Return max number of mips in tails
1775 *
1776 *   @return
1777 *       Max number of mips in tails
1778 ************************************************************************************************************************
1779 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const1780 UINT_32 Gfx11Lib::GetMaxNumMipsInTail(
1781     UINT_32 blockSizeLog2,     ///< block size log2
1782     BOOL_32 isThin             ///< is thin or thick
1783     ) const
1784 {
1785     UINT_32 effectiveLog2 = blockSizeLog2;
1786 
1787     if (isThin == FALSE)
1788     {
1789         effectiveLog2 -= (blockSizeLog2 - 8) / 3;
1790     }
1791 
1792     return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
1793 }
1794 
1795 /**
1796 ************************************************************************************************************************
1797 *   Gfx11Lib::HwlComputePipeBankXor
1798 *
1799 *   @brief
1800 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
1801 *
1802 *   @return
1803 *       PipeBankXor value
1804 ************************************************************************************************************************
1805 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const1806 ADDR_E_RETURNCODE Gfx11Lib::HwlComputePipeBankXor(
1807     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
1808     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
1809     ) const
1810 {
1811     if (IsNonPrtXor(pIn->swizzleMode))
1812     {
1813         pOut->pipeBankXor = 0;
1814     }
1815     else
1816     {
1817         pOut->pipeBankXor = 0;
1818     }
1819 
1820     return ADDR_OK;
1821 }
1822 
1823 /**
1824 ************************************************************************************************************************
1825 *   Gfx11Lib::HwlComputeSlicePipeBankXor
1826 *
1827 *   @brief
1828 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
1829 *
1830 *   @return
1831 *       PipeBankXor value
1832 ************************************************************************************************************************
1833 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const1834 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSlicePipeBankXor(
1835     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
1836     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
1837     ) const
1838 {
1839     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1840 
1841     if (IsNonPrtXor(pIn->swizzleMode))
1842     {
1843         if (pIn->bpe == 0)
1844         {
1845             ADDR_ASSERT_ALWAYS();
1846 
1847             // Require a valid bytes-per-element value passed from client...
1848             returnCode = ADDR_INVALIDPARAMS;
1849         }
1850         else
1851         {
1852             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
1853                                                                     pIn->resourceType,
1854                                                                     Log2(pIn->bpe >> 3),
1855                                                                     1);
1856 
1857             if (pPatInfo != NULL)
1858             {
1859                 ADDR_BIT_SETTING fullSwizzlePattern[20];
1860                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1861 
1862                 const UINT_32 pipeBankXorOffset =
1863                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
1864                                                     GetBlockSizeLog2(pIn->swizzleMode),
1865                                                     0,
1866                                                     0,
1867                                                     pIn->slice,
1868                                                     0);
1869 
1870                 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
1871 
1872                 // Should have no bit set under pipe interleave
1873                 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
1874 
1875                 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
1876             }
1877             else
1878             {
1879                 // Should never come here...
1880                 ADDR_NOT_IMPLEMENTED();
1881 
1882                 returnCode = ADDR_NOTSUPPORTED;
1883             }
1884         }
1885     }
1886     else
1887     {
1888         pOut->pipeBankXor = 0;
1889     }
1890 
1891     return returnCode;
1892 }
1893 
1894 /**
1895 ************************************************************************************************************************
1896 *   Gfx11Lib::HwlComputeSubResourceOffsetForSwizzlePattern
1897 *
1898 *   @brief
1899 *       Compute sub resource offset to support swizzle pattern
1900 *
1901 *   @return
1902 *       Offset
1903 ************************************************************************************************************************
1904 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const1905 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
1906     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
1907     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
1908     ) const
1909 {
1910     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
1911 
1912     pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
1913 
1914     return ADDR_OK;
1915 }
1916 
1917 /**
1918 ************************************************************************************************************************
1919 *   Gfx11Lib::HwlComputeNonBlockCompressedView
1920 *
1921 *   @brief
1922 *       Compute non-block-compressed view for a given mipmap level/slice.
1923 *
1924 *   @return
1925 *       ADDR_E_RETURNCODE
1926 ************************************************************************************************************************
1927 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const1928 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeNonBlockCompressedView(
1929     const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
1930     ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
1931     ) const
1932 {
1933     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1934 
1935     if (pIn->resourceType != ADDR_RSRC_TEX_2D)
1936     {
1937         // Only 2D resource can have a NonBC view...
1938         returnCode = ADDR_INVALIDPARAMS;
1939     }
1940     else if ((pIn->format != ADDR_FMT_ASTC_8x8) &&
1941              ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
1942     {
1943         // Only support BC1~BC7 or ASTC_8x8 for now...
1944         returnCode = ADDR_NOTSUPPORTED;
1945     }
1946     else
1947     {
1948         UINT_32 bcWidth, bcHeight;
1949         UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
1950 
1951         ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
1952         infoIn.flags        = pIn->flags;
1953         infoIn.swizzleMode  = pIn->swizzleMode;
1954         infoIn.resourceType = pIn->resourceType;
1955         infoIn.bpp          = bpp;
1956         infoIn.width        = PowTwoAlign(pIn->width, bcWidth) / bcWidth;
1957         infoIn.height       = PowTwoAlign(pIn->height, bcHeight) / bcHeight;
1958         infoIn.numSlices    = pIn->numSlices;
1959         infoIn.numMipLevels = pIn->numMipLevels;
1960         infoIn.numSamples   = 1;
1961         infoIn.numFrags     = 1;
1962 
1963         ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
1964 
1965         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
1966         infoOut.pMipInfo = mipInfo;
1967 
1968         const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
1969 
1970         if (tiled)
1971         {
1972             returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
1973         }
1974         else
1975         {
1976             returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
1977         }
1978 
1979         if (returnCode == ADDR_OK)
1980         {
1981             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
1982             subOffIn.swizzleMode      = infoIn.swizzleMode;
1983             subOffIn.resourceType     = infoIn.resourceType;
1984             subOffIn.slice            = pIn->slice;
1985             subOffIn.sliceSize        = infoOut.sliceSize;
1986             subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
1987             subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;
1988 
1989             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
1990 
1991             // For any mipmap level, move nonBc view base address by offset
1992             HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
1993             pOut->offset = subOffOut.offset;
1994 
1995             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
1996             slicePbXorIn.bpe             = infoIn.bpp;
1997             slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
1998             slicePbXorIn.resourceType    = infoIn.resourceType;
1999             slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2000             slicePbXorIn.slice           = pIn->slice;
2001 
2002             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2003 
2004             // For any mipmap level, nonBc view should use computed pbXor
2005             HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2006             pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2007 
2008             const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2009             const UINT_32 requestMipWidth  = PowTwoAlign(Max(pIn->width >> pIn->mipId, 1u), bcWidth) / bcWidth;
2010             const UINT_32 requestMipHeight = PowTwoAlign(Max(pIn->height >> pIn->mipId, 1u), bcHeight) / bcHeight;
2011 
2012             if (inTail)
2013             {
2014                 // For mipmap level that is in mip tail block, hack a lot of things...
2015                 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2016                 // are fit in tail block:
2017 
2018                 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2019                 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2020 
2021                 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2022                 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2023 
2024                 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2025                 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2026 
2027                 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2028                 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2029             }
2030             // This check should cover at least mipId == 0
2031             else if (requestMipWidth << pIn->mipId == infoIn.width)
2032             {
2033                 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2034                 // - only one mipmap level and mipId = 0
2035                 pOut->mipId        = 0;
2036                 pOut->numMipLevels = 1;
2037 
2038                 // (mip0) width = requestMipWidth
2039                 pOut->unalignedWidth = requestMipWidth;
2040 
2041                 // (mip0) height = requestMipHeight
2042                 pOut->unalignedHeight = requestMipHeight;
2043             }
2044             else
2045             {
2046                 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2047                 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2048                 // because single mip view may have different pitch value than original (multiple) mip view...
2049                 // A simple case would be:
2050                 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2051                 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2052                 //   mip0 width = 0x101/mip1 width = 0x80
2053                 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2054                 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2055 
2056                 // - 2 levels and mipId = 1
2057                 pOut->mipId        = 1;
2058                 pOut->numMipLevels = 2;
2059 
2060                 const UINT_32 upperMipWidth  =
2061                     PowTwoAlign(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth) / bcWidth;
2062                 const UINT_32 upperMipHeight =
2063                     PowTwoAlign(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight) / bcHeight;
2064 
2065                 const BOOL_32 needToAvoidInTail =
2066                     tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2067                     TRUE : FALSE;
2068 
2069                 const UINT_32 hwMipWidth  = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2070                 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2071 
2072                 const BOOL_32 needExtraWidth =
2073                     ((upperMipWidth < requestMipWidth * 2) ||
2074                      ((upperMipWidth == requestMipWidth * 2) &&
2075                       ((needToAvoidInTail == TRUE) ||
2076                        (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2077 
2078                 const BOOL_32 needExtraHeight =
2079                     ((upperMipHeight < requestMipHeight * 2) ||
2080                      ((upperMipHeight == requestMipHeight * 2) &&
2081                       ((needToAvoidInTail == TRUE) ||
2082                        (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2083 
2084                 // (mip0) width = requestLastMipLevelWidth
2085                 pOut->unalignedWidth  = upperMipWidth + (needExtraWidth ? 1: 0);
2086 
2087                 // (mip0) height = requestLastMipLevelHeight
2088                 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2089             }
2090 
2091             // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2092             ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2093             // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2094             ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2095         }
2096     }
2097 
2098     return returnCode;
2099 }
2100 
2101 /**
2102 ************************************************************************************************************************
2103 *   Gfx11Lib::ValidateNonSwModeParams
2104 *
2105 *   @brief
2106 *       Validate compute surface info params except swizzle mode
2107 *
2108 *   @return
2109 *       TRUE if parameters are valid, FALSE otherwise
2110 ************************************************************************************************************************
2111 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2112 BOOL_32 Gfx11Lib::ValidateNonSwModeParams(
2113     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2114 {
2115     BOOL_32 valid = TRUE;
2116 
2117     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8))
2118     {
2119         ADDR_ASSERT_ALWAYS();
2120         valid = FALSE;
2121     }
2122     else if (pIn->flags.fmask == 1)
2123     {
2124         // There is no FMASK for GFX11 ASICs
2125         ADDR_ASSERT_ALWAYS();
2126         valid = FALSE;
2127     }
2128     else if (pIn->numSamples > 8)
2129     {
2130         // There is no EQAA support for GFX11 ASICs, so the max number of sample is 8
2131         ADDR_ASSERT_ALWAYS();
2132         valid = FALSE;
2133     }
2134     else if ((pIn->numFrags != 0) && (pIn->numSamples != pIn->numFrags))
2135     {
2136         // There is no EQAA support for GFX11 ASICs, so the number of sample has to be same as number of fragment
2137         ADDR_ASSERT_ALWAYS();
2138         valid = FALSE;
2139     }
2140 
2141     const ADDR2_SURFACE_FLAGS flags    = pIn->flags;
2142     const AddrResourceType    rsrcType = pIn->resourceType;
2143     const BOOL_32             mipmap   = (pIn->numMipLevels > 1);
2144     const BOOL_32             msaa     = (pIn->numSamples > 1);
2145     const BOOL_32             display  = flags.display;
2146     const BOOL_32             tex3d    = IsTex3d(rsrcType);
2147     const BOOL_32             tex2d    = IsTex2d(rsrcType);
2148     const BOOL_32             tex1d    = IsTex1d(rsrcType);
2149     const BOOL_32             stereo   = flags.qbStereo;
2150 
2151     // Resource type check
2152     if (tex1d)
2153     {
2154         if (msaa || display || stereo)
2155         {
2156             ADDR_ASSERT_ALWAYS();
2157             valid = FALSE;
2158         }
2159     }
2160     else if (tex2d)
2161     {
2162         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2163         {
2164             ADDR_ASSERT_ALWAYS();
2165             valid = FALSE;
2166         }
2167     }
2168     else if (tex3d)
2169     {
2170         if (msaa || display || stereo)
2171         {
2172             ADDR_ASSERT_ALWAYS();
2173             valid = FALSE;
2174         }
2175     }
2176     else
2177     {
2178         ADDR_ASSERT_ALWAYS();
2179         valid = FALSE;
2180     }
2181 
2182     return valid;
2183 }
2184 
2185 /**
2186 ************************************************************************************************************************
2187 *   Gfx11Lib::ValidateSwModeParams
2188 *
2189 *   @brief
2190 *       Validate compute surface info related to swizzle mode
2191 *
2192 *   @return
2193 *       TRUE if parameters are valid, FALSE otherwise
2194 ************************************************************************************************************************
2195 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2196 BOOL_32 Gfx11Lib::ValidateSwModeParams(
2197     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2198 {
2199     BOOL_32 valid = TRUE;
2200 
2201     if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2202     {
2203         ADDR_ASSERT_ALWAYS();
2204         valid = FALSE;
2205     }
2206     else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2207     {
2208         ADDR_ASSERT_ALWAYS();
2209         valid = FALSE;
2210     }
2211 
2212     const ADDR2_SURFACE_FLAGS flags       = pIn->flags;
2213     const AddrResourceType    rsrcType    = pIn->resourceType;
2214     const AddrSwizzleMode     swizzle     = pIn->swizzleMode;
2215     const BOOL_32             msaa        = (pIn->numSamples > 1);
2216     const BOOL_32             zbuffer     = flags.depth || flags.stencil;
2217     const BOOL_32             color       = flags.color;
2218     const BOOL_32             display     = flags.display;
2219     const BOOL_32             tex3d       = IsTex3d(rsrcType);
2220     const BOOL_32             tex2d       = IsTex2d(rsrcType);
2221     const BOOL_32             tex1d       = IsTex1d(rsrcType);
2222     const BOOL_32             thin3d      = flags.view3dAs2dArray;
2223     const BOOL_32             linear      = IsLinear(swizzle);
2224     const BOOL_32             blk256B     = IsBlock256b(swizzle);
2225     const BOOL_32             isNonPrtXor = IsNonPrtXor(swizzle);
2226     const BOOL_32             prt         = flags.prt;
2227 
2228     // Misc check
2229     if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numSamples)))
2230     {
2231         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2232         ADDR_ASSERT_ALWAYS();
2233         valid = FALSE;
2234     }
2235 
2236     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2237     {
2238         ADDR_ASSERT_ALWAYS();
2239         valid = FALSE;
2240     }
2241 
2242     if ((pIn->bpp == 96) && (linear == FALSE))
2243     {
2244         ADDR_ASSERT_ALWAYS();
2245         valid = FALSE;
2246     }
2247 
2248     const UINT_32 swizzleMask = 1 << swizzle;
2249 
2250     // Resource type check
2251     if (tex1d)
2252     {
2253         if ((swizzleMask & Gfx11Rsrc1dSwModeMask) == 0)
2254         {
2255             ADDR_ASSERT_ALWAYS();
2256             valid = FALSE;
2257         }
2258     }
2259     else if (tex2d)
2260     {
2261         if ((swizzleMask & Gfx11Rsrc2dSwModeMask) == 0)
2262         {
2263             ADDR_ASSERT_ALWAYS();
2264             valid = FALSE;
2265         }
2266         else if (prt && ((swizzleMask & Gfx11Rsrc2dPrtSwModeMask) == 0))
2267         {
2268             ADDR_ASSERT_ALWAYS();
2269             valid = FALSE;
2270         }
2271     }
2272     else if (tex3d)
2273     {
2274         if (((swizzleMask & Gfx11Rsrc3dSwModeMask) == 0) ||
2275             (prt && ((swizzleMask & Gfx11Rsrc3dPrtSwModeMask) == 0)) ||
2276             (thin3d && ((swizzleMask & Gfx11Rsrc3dThinSwModeMask) == 0)))
2277         {
2278             ADDR_ASSERT_ALWAYS();
2279             valid = FALSE;
2280         }
2281     }
2282 
2283     // Swizzle type check
2284     if (linear)
2285     {
2286         if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2287         {
2288             ADDR_ASSERT_ALWAYS();
2289             valid = FALSE;
2290         }
2291     }
2292     else if (IsZOrderSwizzle(swizzle))
2293     {
2294         if ((pIn->bpp > 64)                         ||
2295             (msaa && (color || (pIn->bpp > 32)))    ||
2296             ElemLib::IsBlockCompressed(pIn->format) ||
2297             ElemLib::IsMacroPixelPacked(pIn->format))
2298         {
2299             ADDR_ASSERT_ALWAYS();
2300             valid = FALSE;
2301         }
2302     }
2303     else if (IsStandardSwizzle(rsrcType, swizzle))
2304     {
2305         if (zbuffer || msaa)
2306         {
2307             ADDR_ASSERT_ALWAYS();
2308             valid = FALSE;
2309         }
2310     }
2311     else if (IsDisplaySwizzle(rsrcType, swizzle))
2312     {
2313         if (zbuffer || msaa)
2314         {
2315             ADDR_ASSERT_ALWAYS();
2316             valid = FALSE;
2317         }
2318     }
2319     else if (IsRtOptSwizzle(swizzle))
2320     {
2321         if (zbuffer)
2322         {
2323             ADDR_ASSERT_ALWAYS();
2324             valid = FALSE;
2325         }
2326     }
2327     else
2328     {
2329         ADDR_ASSERT_ALWAYS();
2330         valid = FALSE;
2331     }
2332 
2333     // Block type check
2334     if (blk256B)
2335     {
2336         if (zbuffer || tex3d || msaa)
2337         {
2338             ADDR_ASSERT_ALWAYS();
2339             valid = FALSE;
2340         }
2341     }
2342 
2343     return valid;
2344 }
2345 
2346 /**
2347 ************************************************************************************************************************
2348 *   Gfx11Lib::HwlComputeSurfaceInfoSanityCheck
2349 *
2350 *   @brief
2351 *       Compute surface info sanity check
2352 *
2353 *   @return
2354 *       Offset
2355 ************************************************************************************************************************
2356 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2357 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoSanityCheck(
2358     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2359     ) const
2360 {
2361     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2362 }
2363 
2364 /**
2365 ************************************************************************************************************************
2366 *   Gfx11Lib::HwlGetPreferredSurfaceSetting
2367 *
2368 *   @brief
2369 *       Internal function to get suggested surface information for cliet to use
2370 *
2371 *   @return
2372 *       ADDR_E_RETURNCODE
2373 ************************************************************************************************************************
2374 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2375 ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
2376     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
2377     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
2378     ) const
2379 {
2380     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2381 
2382     if (pIn->flags.fmask)
2383     {
2384         // There is no FMASK for GFX11 ASICs.
2385         ADDR_ASSERT_ALWAYS();
2386 
2387         returnCode = ADDR_INVALIDPARAMS;
2388     }
2389     else
2390     {
2391         UINT_32 bpp    = pIn->bpp;
2392         UINT_32 width  = Max(pIn->width, 1u);
2393         UINT_32 height = Max(pIn->height, 1u);
2394 
2395         // Set format to INVALID will skip this conversion
2396         if (pIn->format != ADDR_FMT_INVALID)
2397         {
2398             ElemMode elemMode = ADDR_UNCOMPRESSED;
2399             UINT_32 expandX, expandY;
2400 
2401             // Get compression/expansion factors and element mode which indicates compression/expansion
2402             bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2403                                                 &elemMode,
2404                                                 &expandX,
2405                                                 &expandY);
2406 
2407             UINT_32 basePitch = 0;
2408             GetElemLib()->AdjustSurfaceInfo(elemMode,
2409                                             expandX,
2410                                             expandY,
2411                                             &bpp,
2412                                             &basePitch,
2413                                             &width,
2414                                             &height);
2415         }
2416 
2417         const UINT_32 numSlices    = Max(pIn->numSlices,    1u);
2418         const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2419         const UINT_32 numSamples   = Max(pIn->numSamples,   1u);
2420         const BOOL_32 msaa         = numSamples > 1;
2421 
2422         // Pre sanity check on non swizzle mode parameters
2423         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2424         localIn.flags        = pIn->flags;
2425         localIn.resourceType = pIn->resourceType;
2426         localIn.format       = pIn->format;
2427         localIn.bpp          = bpp;
2428         localIn.width        = width;
2429         localIn.height       = height;
2430         localIn.numSlices    = numSlices;
2431         localIn.numMipLevels = numMipLevels;
2432         localIn.numSamples   = numSamples;
2433         localIn.numFrags     = numSamples;
2434 
2435         if (ValidateNonSwModeParams(&localIn))
2436         {
2437             // Forbid swizzle mode(s) by client setting
2438             ADDR2_SWMODE_SET allowedSwModeSet = {};
2439             allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx11LinearSwModeMask;
2440             allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx11Blk256BSwModeMask;
2441             allowedSwModeSet.value |=
2442                 pIn->forbiddenBlock.macroThin4KB ? 0 :
2443                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx11Blk4KBSwModeMask);
2444             allowedSwModeSet.value |=
2445                 pIn->forbiddenBlock.macroThick4KB ? 0 :
2446                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick4KBSwModeMask : 0);
2447             allowedSwModeSet.value |=
2448                 pIn->forbiddenBlock.macroThin64KB ? 0 :
2449                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThin64KBSwModeMask : Gfx11Blk64KBSwModeMask);
2450             allowedSwModeSet.value |=
2451                 pIn->forbiddenBlock.macroThick64KB ? 0 :
2452                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick64KBSwModeMask : 0);
2453             allowedSwModeSet.value |=
2454                 pIn->forbiddenBlock.gfx11.thin256KB ? 0 :
2455                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThin256KBSwModeMask : Gfx11Blk256KBSwModeMask);
2456             allowedSwModeSet.value |=
2457                 pIn->forbiddenBlock.gfx11.thick256KB ? 0 :
2458                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick256KBSwModeMask : 0);
2459 
2460             if (pIn->preferredSwSet.value != 0)
2461             {
2462                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx11ZSwModeMask;
2463                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx11StandardSwModeMask;
2464                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx11DisplaySwModeMask;
2465                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx11RenderSwModeMask;
2466             }
2467 
2468             if (pIn->noXor)
2469             {
2470                 allowedSwModeSet.value &= ~Gfx11XorSwModeMask;
2471             }
2472 
2473             if (pIn->maxAlign > 0)
2474             {
2475                 if (pIn->maxAlign < Size256K)
2476                 {
2477                     allowedSwModeSet.value &= ~Gfx11Blk256KBSwModeMask;
2478                 }
2479 
2480                 if (pIn->maxAlign < Size64K)
2481                 {
2482                     allowedSwModeSet.value &= ~Gfx11Blk64KBSwModeMask;
2483                 }
2484 
2485                 if (pIn->maxAlign < Size4K)
2486                 {
2487                     allowedSwModeSet.value &= ~Gfx11Blk4KBSwModeMask;
2488                 }
2489 
2490                 if (pIn->maxAlign < Size256)
2491                 {
2492                     allowedSwModeSet.value &= ~Gfx11Blk256BSwModeMask;
2493                 }
2494             }
2495 
2496             // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2497             switch (pIn->resourceType)
2498             {
2499                 case ADDR_RSRC_TEX_1D:
2500                     allowedSwModeSet.value &= Gfx11Rsrc1dSwModeMask;
2501                     break;
2502 
2503                 case ADDR_RSRC_TEX_2D:
2504                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc2dPrtSwModeMask : Gfx11Rsrc2dSwModeMask;
2505                     break;
2506 
2507                 case ADDR_RSRC_TEX_3D:
2508                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc3dPrtSwModeMask : Gfx11Rsrc3dSwModeMask;
2509 
2510                     if (pIn->flags.view3dAs2dArray)
2511                     {
2512                         allowedSwModeSet.value &= Gfx11Rsrc3dThinSwModeMask;
2513                     }
2514                     break;
2515 
2516                 default:
2517                     ADDR_ASSERT_ALWAYS();
2518                     allowedSwModeSet.value = 0;
2519                     break;
2520             }
2521 
2522             if (ElemLib::IsBlockCompressed(pIn->format)  ||
2523                 ElemLib::IsMacroPixelPacked(pIn->format) ||
2524                 (bpp > 64)                               ||
2525                 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2526             {
2527                 allowedSwModeSet.value &= ~Gfx11ZSwModeMask;
2528             }
2529 
2530             if (pIn->format == ADDR_FMT_32_32_32)
2531             {
2532                 allowedSwModeSet.value &= Gfx11LinearSwModeMask;
2533             }
2534 
2535             if (msaa)
2536             {
2537                 allowedSwModeSet.value &= Gfx11MsaaSwModeMask;
2538             }
2539 
2540             if (pIn->flags.depth || pIn->flags.stencil)
2541             {
2542                 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2543             }
2544 
2545             if (pIn->flags.display)
2546             {
2547                 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
2548             }
2549 
2550             if (allowedSwModeSet.value != 0)
2551             {
2552 #if DEBUG
2553                 // Post sanity check, at least AddrLib should accept the output generated by its own
2554                 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2555 
2556                 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2557                 {
2558                     if (validateSwModeSet & 1)
2559                     {
2560                         localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2561                         ADDR_ASSERT(ValidateSwModeParams(&localIn));
2562                     }
2563 
2564                     validateSwModeSet >>= 1;
2565                 }
2566 #endif
2567 
2568                 pOut->resourceType   = pIn->resourceType;
2569                 pOut->validSwModeSet = allowedSwModeSet;
2570                 pOut->canXor         = (allowedSwModeSet.value & Gfx11XorSwModeMask) ? TRUE : FALSE;
2571                 pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2572                 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
2573 
2574                 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2575 
2576                 if (pOut->clientPreferredSwSet.value == 0)
2577                 {
2578                     pOut->clientPreferredSwSet.value = AddrSwSetAll;
2579                 }
2580 
2581                 // Apply optional restrictions
2582                 if (pIn->flags.needEquation)
2583                 {
2584                     FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
2585                 }
2586 
2587                 if (allowedSwModeSet.value == Gfx11LinearSwModeMask)
2588                 {
2589                     pOut->swizzleMode = ADDR_SW_LINEAR;
2590                 }
2591                 else
2592                 {
2593                     const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
2594 
2595                     if ((height > 1) && (computeMinSize == FALSE))
2596                     {
2597                         // Always ignore linear swizzle mode if:
2598                         // 1. This is a (2D/3D) resource with height > 1
2599                         // 2. Client doesn't require computing minimize size
2600                         allowedSwModeSet.swLinear = 0;
2601                     }
2602 
2603                     ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2604 
2605                     // Determine block size if there are 2 or more block type candidates
2606                     if (IsPow2(allowedBlockSet.value) == FALSE)
2607                     {
2608                         AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
2609 
2610                         swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
2611 
2612                         if (pOut->resourceType == ADDR_RSRC_TEX_3D)
2613                         {
2614                             swMode[AddrBlockThick4KB]   = ADDR_SW_4KB_S_X;
2615                             swMode[AddrBlockThin64KB]   = ADDR_SW_64KB_R_X;
2616                             swMode[AddrBlockThick64KB]  = ADDR_SW_64KB_S_X;
2617                             swMode[AddrBlockThin256KB]  = ADDR_SW_256KB_R_X;
2618                             swMode[AddrBlockThick256KB] = ADDR_SW_256KB_S_X;
2619                         }
2620                         else
2621                         {
2622                             swMode[AddrBlockMicro]     = ADDR_SW_256B_D;
2623                             swMode[AddrBlockThin4KB]   = ADDR_SW_4KB_D_X;
2624                             swMode[AddrBlockThin64KB]  = ADDR_SW_64KB_D_X;
2625                             swMode[AddrBlockThin256KB] = ADDR_SW_256KB_D_X;
2626                         }
2627 
2628                         UINT_64 padSize[AddrBlockMaxTiledType] = {};
2629 
2630                         const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
2631                         const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
2632                         const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2633                         UINT_32       minSizeBlk         = AddrBlockMicro;
2634                         UINT_64       minSize            = 0;
2635 
2636                         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
2637 
2638                         for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
2639                         {
2640                             if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
2641                             {
2642                                 localIn.swizzleMode = swMode[i];
2643 
2644                                 if (localIn.swizzleMode == ADDR_SW_LINEAR)
2645                                 {
2646                                     returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
2647                                 }
2648                                 else
2649                                 {
2650                                     returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
2651                                 }
2652 
2653                                 if (returnCode == ADDR_OK)
2654                                 {
2655                                     padSize[i] = localOut.surfSize;
2656 
2657                                     if ((minSize == 0) ||
2658                                         BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
2659                                     {
2660                                         minSize    = padSize[i];
2661                                         minSizeBlk = i;
2662                                     }
2663                                 }
2664                                 else
2665                                 {
2666                                     ADDR_ASSERT_ALWAYS();
2667                                     break;
2668                                 }
2669                             }
2670                         }
2671 
2672                         if (pIn->memoryBudget > 1.0)
2673                         {
2674                             // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
2675                             // smaller-block type again in coming loop
2676                             switch (minSizeBlk)
2677                             {
2678                                 case AddrBlockThick256KB:
2679                                     allowedBlockSet.gfx11.thin256KB = 0;
2680                                 case AddrBlockThin256KB:
2681                                     allowedBlockSet.macroThick64KB = 0;
2682                                 case AddrBlockThick64KB:
2683                                     allowedBlockSet.macroThin64KB = 0;
2684                                 case AddrBlockThin64KB:
2685                                     allowedBlockSet.macroThick4KB = 0;
2686                                 case AddrBlockThick4KB:
2687                                     allowedBlockSet.macroThin4KB = 0;
2688                                 case AddrBlockThin4KB:
2689                                     allowedBlockSet.micro  = 0;
2690                                 case AddrBlockMicro:
2691                                     allowedBlockSet.linear = 0;
2692                                 case AddrBlockLinear:
2693                                     break;
2694 
2695                                 default:
2696                                     ADDR_ASSERT_ALWAYS();
2697                                     break;
2698                             }
2699 
2700                             for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2701                             {
2702                                 if ((i != minSizeBlk) &&
2703                                     IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
2704                                 {
2705                                     if (BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
2706                                     {
2707                                         // Clear the block type if the memory waste is unacceptable
2708                                         allowedBlockSet.value &= ~(1u << (i - 1));
2709                                     }
2710                                 }
2711                             }
2712 
2713                             // Remove linear block type if 2 or more block types are allowed
2714                             if (IsPow2(allowedBlockSet.value) == FALSE)
2715                             {
2716                                 allowedBlockSet.linear = 0;
2717                             }
2718 
2719                             // Select the biggest allowed block type
2720                             minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
2721 
2722                             if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
2723                             {
2724                                 minSizeBlk = AddrBlockLinear;
2725                             }
2726                         }
2727 
2728                         switch (minSizeBlk)
2729                         {
2730                             case AddrBlockLinear:
2731                                 allowedSwModeSet.value &= Gfx11LinearSwModeMask;
2732                                 break;
2733 
2734                             case AddrBlockMicro:
2735                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2736                                 allowedSwModeSet.value &= Gfx11Blk256BSwModeMask;
2737                                 break;
2738 
2739                             case AddrBlockThin4KB:
2740                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2741                                 allowedSwModeSet.value &= Gfx11Blk4KBSwModeMask;
2742                                 break;
2743 
2744                             case AddrBlockThick4KB:
2745                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2746                                 allowedSwModeSet.value &= Gfx11Rsrc3dThick4KBSwModeMask;
2747                                 break;
2748 
2749                             case AddrBlockThin64KB:
2750                                 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2751                                                           Gfx11Rsrc3dThin64KBSwModeMask : Gfx11Blk64KBSwModeMask;
2752                                 break;
2753 
2754                             case AddrBlockThick64KB:
2755                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2756                                 allowedSwModeSet.value &= Gfx11Rsrc3dThick64KBSwModeMask;
2757                                 break;
2758 
2759                             case AddrBlockThin256KB:
2760                                 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2761                                                           Gfx11Rsrc3dThin256KBSwModeMask : Gfx11Blk256KBSwModeMask;
2762                                 break;
2763 
2764                             case AddrBlockThick256KB:
2765                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2766                                 allowedSwModeSet.value &= Gfx11Rsrc3dThick256KBSwModeMask;
2767                                 break;
2768 
2769                             default:
2770                                 ADDR_ASSERT_ALWAYS();
2771                                 allowedSwModeSet.value = 0;
2772                                 break;
2773                         }
2774                     }
2775 
2776                     // Block type should be determined.
2777                     ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
2778 
2779                     ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
2780 
2781                     // Determine swizzle type if there are 2 or more swizzle type candidates
2782                     if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
2783                     {
2784                         if (ElemLib::IsBlockCompressed(pIn->format))
2785                         {
2786                             if (allowedSwSet.sw_D)
2787                             {
2788                                 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2789                             }
2790                             else if (allowedSwSet.sw_S)
2791                             {
2792                                 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2793                             }
2794                             else
2795                             {
2796                                 ADDR_ASSERT(allowedSwSet.sw_R);
2797                                 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2798                             }
2799                         }
2800                         else if (ElemLib::IsMacroPixelPacked(pIn->format))
2801                         {
2802                             if (allowedSwSet.sw_S)
2803                             {
2804                                 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2805                             }
2806                             else if (allowedSwSet.sw_D)
2807                             {
2808                                 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2809                             }
2810                             else
2811                             {
2812                                 ADDR_ASSERT(allowedSwSet.sw_R);
2813                                 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2814                             }
2815                         }
2816                         else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2817                         {
2818                             if (allowedSwSet.sw_D)
2819                             {
2820                                 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2821                             }
2822                             else if (allowedSwSet.sw_S)
2823                             {
2824                                 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2825                             }
2826                             else if (allowedSwSet.sw_R)
2827                             {
2828                                 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2829                             }
2830                             else
2831                             {
2832                                 ADDR_ASSERT(allowedSwSet.sw_Z);
2833                                 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2834                             }
2835                         }
2836                         else
2837                         {
2838                             if (allowedSwSet.sw_R)
2839                             {
2840                                 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2841                             }
2842                             else if (allowedSwSet.sw_D)
2843                             {
2844                                 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2845                             }
2846                             else if (allowedSwSet.sw_Z)
2847                             {
2848                                 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2849                             }
2850                             else
2851                             {
2852                                 ADDR_ASSERT_ALWAYS();
2853                             }
2854                         }
2855 
2856                         // Swizzle type should be determined.
2857                         ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
2858                     }
2859 
2860                     // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2861                     // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2862                     // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2863                     pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
2864                 }
2865             }
2866             else
2867             {
2868                 // Invalid combination...
2869                 ADDR_ASSERT_ALWAYS();
2870                 returnCode = ADDR_INVALIDPARAMS;
2871             }
2872         }
2873         else
2874         {
2875             // Invalid combination...
2876             ADDR_ASSERT_ALWAYS();
2877             returnCode = ADDR_INVALIDPARAMS;
2878         }
2879     }
2880 
2881     return returnCode;
2882 }
2883 
2884 /**
2885 ************************************************************************************************************************
2886 *   Gfx11Lib::ComputeStereoInfo
2887 *
2888 *   @brief
2889 *       Compute height alignment and right eye pipeBankXor for stereo surface
2890 *
2891 *   @return
2892 *       Error code
2893 *
2894 ************************************************************************************************************************
2895 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const2896 ADDR_E_RETURNCODE Gfx11Lib::ComputeStereoInfo(
2897     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
2898     UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
2899     UINT_32*                                pRightXor   ///< Right eye xor
2900     ) const
2901 {
2902     ADDR_E_RETURNCODE ret = ADDR_OK;
2903 
2904     *pRightXor = 0;
2905 
2906     if (IsNonPrtXor(pIn->swizzleMode))
2907     {
2908         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
2909         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
2910         const UINT_32 rsrcType    = static_cast<UINT_32>(pIn->resourceType) - 1;
2911         const UINT_32 swMode      = static_cast<UINT_32>(pIn->swizzleMode);
2912         const UINT_32 eqIndex     = m_equationLookupTable[rsrcType][swMode][elemLog2];
2913 
2914         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
2915         {
2916             UINT_32 yMax     = 0;
2917             UINT_32 yPosMask = 0;
2918 
2919             // First get "max y bit"
2920             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2921             {
2922                 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
2923 
2924                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
2925                     (m_equationTable[eqIndex].addr[i].index > yMax))
2926                 {
2927                     yMax = m_equationTable[eqIndex].addr[i].index;
2928                 }
2929 
2930                 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
2931                     (m_equationTable[eqIndex].xor1[i].channel == 1) &&
2932                     (m_equationTable[eqIndex].xor1[i].index > yMax))
2933                 {
2934                     yMax = m_equationTable[eqIndex].xor1[i].index;
2935                 }
2936 
2937                 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
2938                     (m_equationTable[eqIndex].xor2[i].channel == 1) &&
2939                     (m_equationTable[eqIndex].xor2[i].index > yMax))
2940                 {
2941                     yMax = m_equationTable[eqIndex].xor2[i].index;
2942                 }
2943             }
2944 
2945             // Then loop again for populating a position mask of "max Y bit"
2946             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2947             {
2948                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
2949                     (m_equationTable[eqIndex].addr[i].index == yMax))
2950                 {
2951                     yPosMask |= 1u << i;
2952                 }
2953                 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
2954                          (m_equationTable[eqIndex].xor1[i].channel == 1) &&
2955                          (m_equationTable[eqIndex].xor1[i].index == yMax))
2956                 {
2957                     yPosMask |= 1u << i;
2958                 }
2959                 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
2960                          (m_equationTable[eqIndex].xor2[i].channel == 1) &&
2961                          (m_equationTable[eqIndex].xor2[i].index == yMax))
2962                 {
2963                     yPosMask |= 1u << i;
2964                 }
2965             }
2966 
2967             const UINT_32 additionalAlign = 1 << yMax;
2968 
2969             if (additionalAlign >= *pAlignY)
2970             {
2971                 *pAlignY = additionalAlign;
2972 
2973                 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
2974 
2975                 if ((alignedHeight >> yMax) & 1)
2976                 {
2977                     *pRightXor = yPosMask >> m_pipeInterleaveLog2;
2978                 }
2979             }
2980         }
2981         else
2982         {
2983             ret = ADDR_INVALIDPARAMS;
2984         }
2985     }
2986 
2987     return ret;
2988 }
2989 
2990 /**
2991 ************************************************************************************************************************
2992 *   Gfx11Lib::HwlComputeSurfaceInfoTiled
2993 *
2994 *   @brief
2995 *       Internal function to calculate alignment for tiled surface
2996 *
2997 *   @return
2998 *       ADDR_E_RETURNCODE
2999 ************************************************************************************************************************
3000 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3001 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoTiled(
3002      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3003      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3004      ) const
3005 {
3006     ADDR_E_RETURNCODE ret;
3007 
3008     // Mip chain dimesion and epitch has no meaning in GFX11, set to default value
3009     pOut->mipChainPitch    = 0;
3010     pOut->mipChainHeight   = 0;
3011     pOut->mipChainSlice    = 0;
3012     pOut->epitchIsHeight   = FALSE;
3013 
3014     // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3015     pOut->mipChainInTail   = FALSE;
3016     pOut->firstMipIdInTail = pIn->numMipLevels;
3017 
3018     if (IsBlock256b(pIn->swizzleMode))
3019     {
3020         ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3021     }
3022     else
3023     {
3024         ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3025     }
3026 
3027     return ret;
3028 }
3029 
3030 /**
3031 ************************************************************************************************************************
3032 *   Gfx11Lib::ComputeSurfaceInfoMicroTiled
3033 *
3034 *   @brief
3035 *       Internal function to calculate alignment for micro tiled surface
3036 *
3037 *   @return
3038 *       ADDR_E_RETURNCODE
3039 ************************************************************************************************************************
3040 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3041 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceInfoMicroTiled(
3042      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3043      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3044      ) const
3045 {
3046     ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3047                                                          &pOut->blockHeight,
3048                                                          &pOut->blockSlices,
3049                                                          pIn->bpp,
3050                                                          pIn->numSamples,
3051                                                          pIn->resourceType,
3052                                                          pIn->swizzleMode);
3053 
3054     if (ret == ADDR_OK)
3055     {
3056         const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3057 
3058         pOut->pitch     = PowTwoAlign(pIn->width,  pOut->blockWidth);
3059         pOut->height    = PowTwoAlign(pIn->height, pOut->blockHeight);
3060         pOut->numSlices = pIn->numSlices;
3061         pOut->baseAlign = blockSize;
3062 
3063         if (pIn->numMipLevels > 1)
3064         {
3065             const UINT_32 mip0Width    = pIn->width;
3066             const UINT_32 mip0Height   = pIn->height;
3067             UINT_64       mipSliceSize = 0;
3068 
3069             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3070             {
3071                 UINT_32 mipWidth, mipHeight;
3072 
3073                 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3074 
3075                 const UINT_32 mipActualWidth  = PowTwoAlign(mipWidth,  pOut->blockWidth);
3076                 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3077 
3078                 if (pOut->pMipInfo != NULL)
3079                 {
3080                     pOut->pMipInfo[i].pitch            = mipActualWidth;
3081                     pOut->pMipInfo[i].height           = mipActualHeight;
3082                     pOut->pMipInfo[i].depth            = 1;
3083                     pOut->pMipInfo[i].offset           = mipSliceSize;
3084                     pOut->pMipInfo[i].mipTailOffset    = 0;
3085                     pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3086                 }
3087 
3088                 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3089             }
3090 
3091             pOut->sliceSize = mipSliceSize;
3092             pOut->surfSize  = mipSliceSize * pOut->numSlices;
3093         }
3094         else
3095         {
3096             pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3097             pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3098 
3099             if (pOut->pMipInfo != NULL)
3100             {
3101                 pOut->pMipInfo[0].pitch            = pOut->pitch;
3102                 pOut->pMipInfo[0].height           = pOut->height;
3103                 pOut->pMipInfo[0].depth            = 1;
3104                 pOut->pMipInfo[0].offset           = 0;
3105                 pOut->pMipInfo[0].mipTailOffset    = 0;
3106                 pOut->pMipInfo[0].macroBlockOffset = 0;
3107             }
3108         }
3109 
3110     }
3111 
3112     return ret;
3113 }
3114 
3115 /**
3116 ************************************************************************************************************************
3117 *   Gfx11Lib::ComputeSurfaceInfoMacroTiled
3118 *
3119 *   @brief
3120 *       Internal function to calculate alignment for macro tiled surface
3121 *
3122 *   @return
3123 *       ADDR_E_RETURNCODE
3124 ************************************************************************************************************************
3125 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3126 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceInfoMacroTiled(
3127      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3128      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3129      ) const
3130 {
3131     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3132                                                                 &pOut->blockHeight,
3133                                                                 &pOut->blockSlices,
3134                                                                 pIn->bpp,
3135                                                                 pIn->numSamples,
3136                                                                 pIn->resourceType,
3137                                                                 pIn->swizzleMode);
3138 
3139     if (returnCode == ADDR_OK)
3140     {
3141         UINT_32 heightAlign = pOut->blockHeight;
3142 
3143         if (pIn->flags.qbStereo)
3144         {
3145             UINT_32 rightXor = 0;
3146 
3147             returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3148 
3149             if (returnCode == ADDR_OK)
3150             {
3151                 pOut->pStereoInfo->rightSwizzle = rightXor;
3152             }
3153         }
3154 
3155         if (returnCode == ADDR_OK)
3156         {
3157             const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3158             const UINT_32 blockSize     = 1 << blockSizeLog2;
3159 
3160             pOut->pitch     = PowTwoAlign(pIn->width,     pOut->blockWidth);
3161             pOut->height    = PowTwoAlign(pIn->height,    heightAlign);
3162             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3163             pOut->baseAlign = blockSize;
3164 
3165             if (pIn->numMipLevels > 1)
3166             {
3167                 const Dim3d  tailMaxDim         = GetMipTailDim(pIn->resourceType,
3168                                                                 pIn->swizzleMode,
3169                                                                 pOut->blockWidth,
3170                                                                 pOut->blockHeight,
3171                                                                 pOut->blockSlices);
3172                 const UINT_32 mip0Width         = pIn->width;
3173                 const UINT_32 mip0Height        = pIn->height;
3174                 const BOOL_32 isThin            = IsThin(pIn->resourceType, pIn->swizzleMode);
3175                 const UINT_32 mip0Depth         = isThin ? 1 : pIn->numSlices;
3176                 const UINT_32 maxMipsInTail     = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3177                 const UINT_32 index             = Log2(pIn->bpp >> 3);
3178                 UINT_32       firstMipInTail    = pIn->numMipLevels;
3179                 UINT_64       mipChainSliceSize = 0;
3180                 UINT_64       mipSize[MaxMipLevels];
3181                 UINT_64       mipSliceSize[MaxMipLevels];
3182 
3183                 Dim3d fixedTailMaxDim = tailMaxDim;
3184 
3185                 if ((IsZOrderSwizzle(pIn->swizzleMode) || IsRtOptSwizzle(pIn->swizzleMode)) && (index <= 1))
3186                 {
3187                     fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3188                     fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3189                 }
3190 
3191                 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3192                 {
3193                     UINT_32 mipWidth, mipHeight, mipDepth;
3194 
3195                     GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3196 
3197                     if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3198                     {
3199                         firstMipInTail     = i;
3200                         mipChainSliceSize += blockSize / pOut->blockSlices;
3201                         break;
3202                     }
3203                     else
3204                     {
3205                         const UINT_32 pitch     = PowTwoAlign(mipWidth,  pOut->blockWidth);
3206                         const UINT_32 height    = PowTwoAlign(mipHeight, pOut->blockHeight);
3207                         const UINT_32 depth     = PowTwoAlign(mipDepth,  pOut->blockSlices);
3208                         const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3209 
3210                         mipSize[i]         = sliceSize * depth;
3211                         mipSliceSize[i]    = sliceSize * pOut->blockSlices;
3212                         mipChainSliceSize += sliceSize;
3213 
3214                         if (pOut->pMipInfo != NULL)
3215                         {
3216                             pOut->pMipInfo[i].pitch  = pitch;
3217                             pOut->pMipInfo[i].height = height;
3218                             pOut->pMipInfo[i].depth  = depth;
3219                         }
3220                     }
3221                 }
3222 
3223                 pOut->sliceSize        = mipChainSliceSize;
3224                 pOut->surfSize         = mipChainSliceSize * pOut->numSlices;
3225                 pOut->mipChainInTail   = (firstMipInTail == 0) ? TRUE : FALSE;
3226                 pOut->firstMipIdInTail = firstMipInTail;
3227 
3228                 if (pOut->pMipInfo != NULL)
3229                 {
3230                     UINT_64 offset         = 0;
3231                     UINT_64 macroBlkOffset = 0;
3232                     UINT_32 tailMaxDepth   = 0;
3233 
3234                     if (firstMipInTail != pIn->numMipLevels)
3235                     {
3236                         UINT_32 mipWidth, mipHeight;
3237 
3238                         GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3239                                    &mipWidth, &mipHeight, &tailMaxDepth);
3240 
3241                         offset         = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3242                         macroBlkOffset = blockSize;
3243                     }
3244 
3245                     for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3246                     {
3247                         pOut->pMipInfo[i].offset           = offset;
3248                         pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3249                         pOut->pMipInfo[i].mipTailOffset    = 0;
3250 
3251                         offset         += mipSize[i];
3252                         macroBlkOffset += mipSliceSize[i];
3253                     }
3254 
3255                     UINT_32 pitch  = tailMaxDim.w;
3256                     UINT_32 height = tailMaxDim.h;
3257                     UINT_32 depth  = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3258 
3259                     tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3260 
3261                     for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3262                     {
3263                         const UINT_32 m         = maxMipsInTail - 1 - (i - firstMipInTail);
3264                         const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3265 
3266                         pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
3267                         pOut->pMipInfo[i].mipTailOffset    = mipOffset;
3268                         pOut->pMipInfo[i].macroBlockOffset = 0;
3269 
3270                         pOut->pMipInfo[i].pitch  = pitch;
3271                         pOut->pMipInfo[i].height = height;
3272                         pOut->pMipInfo[i].depth  = depth;
3273 
3274                         UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
3275                                        ((mipOffset >> 10) & 2)  |
3276                                        ((mipOffset >> 11) & 4)  |
3277                                        ((mipOffset >> 12) & 8)  |
3278                                        ((mipOffset >> 13) & 16) |
3279                                        ((mipOffset >> 14) & 32);
3280                         UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
3281                                        ((mipOffset >> 9)  & 2)  |
3282                                        ((mipOffset >> 10) & 4)  |
3283                                        ((mipOffset >> 11) & 8)  |
3284                                        ((mipOffset >> 12) & 16) |
3285                                        ((mipOffset >> 13) & 32);
3286 
3287                         if (blockSizeLog2 & 1)
3288                         {
3289                             const UINT_32 temp = mipX;
3290                             mipX = mipY;
3291                             mipY = temp;
3292 
3293                             if (index & 1)
3294                             {
3295                                 mipY = (mipY << 1) | (mipX & 1);
3296                                 mipX = mipX >> 1;
3297                             }
3298                         }
3299 
3300                         if (isThin)
3301                         {
3302                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3303                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3304                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3305 
3306                             pitch  = Max(pitch  >> 1, Block256_2d[index].w);
3307                             height = Max(height >> 1, Block256_2d[index].h);
3308                             depth  = 1;
3309                         }
3310                         else
3311                         {
3312                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3313                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3314                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3315 
3316                             pitch  = Max(pitch  >> 1, Block256_3d[index].w);
3317                             height = Max(height >> 1, Block256_3d[index].h);
3318                             depth  = PowTwoAlign(Max(depth  >> 1, 1u), Block256_3d[index].d);
3319                         }
3320                     }
3321                 }
3322             }
3323             else
3324             {
3325                 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numSamples;
3326                 pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3327 
3328                 if (pOut->pMipInfo != NULL)
3329                 {
3330                     pOut->pMipInfo[0].pitch            = pOut->pitch;
3331                     pOut->pMipInfo[0].height           = pOut->height;
3332                     pOut->pMipInfo[0].depth            = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3333                     pOut->pMipInfo[0].offset           = 0;
3334                     pOut->pMipInfo[0].mipTailOffset    = 0;
3335                     pOut->pMipInfo[0].macroBlockOffset = 0;
3336                     pOut->pMipInfo[0].mipTailCoordX    = 0;
3337                     pOut->pMipInfo[0].mipTailCoordY    = 0;
3338                     pOut->pMipInfo[0].mipTailCoordZ    = 0;
3339                 }
3340             }
3341         }
3342     }
3343 
3344     return returnCode;
3345 }
3346 
3347 /**
3348 ************************************************************************************************************************
3349 *   Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled
3350 *
3351 *   @brief
3352 *       Internal function to calculate address from coord for tiled swizzle surface
3353 *
3354 *   @return
3355 *       ADDR_E_RETURNCODE
3356 ************************************************************************************************************************
3357 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3358 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled(
3359      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
3360      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
3361      ) const
3362 {
3363     ADDR_E_RETURNCODE ret;
3364 
3365     if (IsBlock256b(pIn->swizzleMode))
3366     {
3367         ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3368     }
3369     else
3370     {
3371         ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3372     }
3373 
3374     return ret;
3375 }
3376 
3377 /**
3378 ************************************************************************************************************************
3379 *   Gfx11Lib::ComputeOffsetFromEquation
3380 *
3381 *   @brief
3382 *       Compute offset from equation
3383 *
3384 *   @return
3385 *       Offset
3386 ************************************************************************************************************************
3387 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3388 UINT_32 Gfx11Lib::ComputeOffsetFromEquation(
3389     const ADDR_EQUATION* pEq,   ///< Equation
3390     UINT_32              x,     ///< x coord in bytes
3391     UINT_32              y,     ///< y coord in pixel
3392     UINT_32              z      ///< z coord in slice
3393     ) const
3394 {
3395     UINT_32 offset = 0;
3396 
3397     for (UINT_32 i = 0; i < pEq->numBits; i++)
3398     {
3399         UINT_32 v = 0;
3400 
3401         if (pEq->addr[i].valid)
3402         {
3403             if (pEq->addr[i].channel == 0)
3404             {
3405                 v ^= (x >> pEq->addr[i].index) & 1;
3406             }
3407             else if (pEq->addr[i].channel == 1)
3408             {
3409                 v ^= (y >> pEq->addr[i].index) & 1;
3410             }
3411             else
3412             {
3413                 ADDR_ASSERT(pEq->addr[i].channel == 2);
3414                 v ^= (z >> pEq->addr[i].index) & 1;
3415             }
3416         }
3417 
3418         if (pEq->xor1[i].valid)
3419         {
3420             if (pEq->xor1[i].channel == 0)
3421             {
3422                 v ^= (x >> pEq->xor1[i].index) & 1;
3423             }
3424             else if (pEq->xor1[i].channel == 1)
3425             {
3426                 v ^= (y >> pEq->xor1[i].index) & 1;
3427             }
3428             else
3429             {
3430                 ADDR_ASSERT(pEq->xor1[i].channel == 2);
3431                 v ^= (z >> pEq->xor1[i].index) & 1;
3432             }
3433         }
3434 
3435         if (pEq->xor2[i].valid)
3436         {
3437             if (pEq->xor2[i].channel == 0)
3438             {
3439                 v ^= (x >> pEq->xor2[i].index) & 1;
3440             }
3441             else if (pEq->xor2[i].channel == 1)
3442             {
3443                 v ^= (y >> pEq->xor2[i].index) & 1;
3444             }
3445             else
3446             {
3447                 ADDR_ASSERT(pEq->xor2[i].channel == 2);
3448                 v ^= (z >> pEq->xor2[i].index) & 1;
3449             }
3450         }
3451 
3452         offset |= (v << i);
3453     }
3454 
3455     return offset;
3456 }
3457 
3458 /**
3459 ************************************************************************************************************************
3460 *   Gfx11Lib::ComputeOffsetFromSwizzlePattern
3461 *
3462 *   @brief
3463 *       Compute offset from swizzle pattern
3464 *
3465 *   @return
3466 *       Offset
3467 ************************************************************************************************************************
3468 */
ComputeOffsetFromSwizzlePattern(const UINT_64 * pPattern,UINT_32 numBits,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const3469 UINT_32 Gfx11Lib::ComputeOffsetFromSwizzlePattern(
3470     const UINT_64* pPattern,    ///< Swizzle pattern
3471     UINT_32        numBits,     ///< Number of bits in pattern
3472     UINT_32        x,           ///< x coord in pixel
3473     UINT_32        y,           ///< y coord in pixel
3474     UINT_32        z,           ///< z coord in slice
3475     UINT_32        s            ///< sample id
3476     ) const
3477 {
3478     UINT_32                 offset          = 0;
3479     const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3480 
3481     for (UINT_32 i = 0; i < numBits; i++)
3482     {
3483         UINT_32 v = 0;
3484 
3485         if (pSwizzlePattern[i].x != 0)
3486         {
3487             UINT_16 mask  = pSwizzlePattern[i].x;
3488             UINT_32 xBits = x;
3489 
3490             while (mask != 0)
3491             {
3492                 if (mask & 1)
3493                 {
3494                     v ^= xBits & 1;
3495                 }
3496 
3497                 xBits >>= 1;
3498                 mask  >>= 1;
3499             }
3500         }
3501 
3502         if (pSwizzlePattern[i].y != 0)
3503         {
3504             UINT_16 mask  = pSwizzlePattern[i].y;
3505             UINT_32 yBits = y;
3506 
3507             while (mask != 0)
3508             {
3509                 if (mask & 1)
3510                 {
3511                     v ^= yBits & 1;
3512                 }
3513 
3514                 yBits >>= 1;
3515                 mask  >>= 1;
3516             }
3517         }
3518 
3519         if (pSwizzlePattern[i].z != 0)
3520         {
3521             UINT_16 mask  = pSwizzlePattern[i].z;
3522             UINT_32 zBits = z;
3523 
3524             while (mask != 0)
3525             {
3526                 if (mask & 1)
3527                 {
3528                     v ^= zBits & 1;
3529                 }
3530 
3531                 zBits >>= 1;
3532                 mask  >>= 1;
3533             }
3534         }
3535 
3536         if (pSwizzlePattern[i].s != 0)
3537         {
3538             UINT_16 mask  = pSwizzlePattern[i].s;
3539             UINT_32 sBits = s;
3540 
3541             while (mask != 0)
3542             {
3543                 if (mask & 1)
3544                 {
3545                     v ^= sBits & 1;
3546                 }
3547 
3548                 sBits >>= 1;
3549                 mask  >>= 1;
3550             }
3551         }
3552 
3553         offset |= (v << i);
3554     }
3555 
3556     return offset;
3557 }
3558 
3559 /**
3560 ************************************************************************************************************************
3561 *   Gfx11Lib::GetSwizzlePatternInfo
3562 *
3563 *   @brief
3564 *       Get swizzle pattern
3565 *
3566 *   @return
3567 *       Swizzle pattern information
3568 ************************************************************************************************************************
3569 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const3570 const ADDR_SW_PATINFO* Gfx11Lib::GetSwizzlePatternInfo(
3571     AddrSwizzleMode  swizzleMode,       ///< Swizzle mode
3572     AddrResourceType resourceType,      ///< Resource type
3573     UINT_32          elemLog2,          ///< Element size in bytes log2
3574     UINT_32          numFrag            ///< Number of fragment
3575     ) const
3576 {
3577     const UINT_32          index       = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
3578     const ADDR_SW_PATINFO* patInfo     = NULL;
3579     const UINT_32          swizzleMask = 1 << swizzleMode;
3580     const BOOL_32          isBlock256k = IsBlock256kb(swizzleMode);
3581     const BOOL_32          isBlock64K  = IsBlock64kb(swizzleMode);
3582 
3583     if (IsLinear(swizzleMode) == FALSE)
3584     {
3585         if (resourceType == ADDR_RSRC_TEX_3D)
3586         {
3587             ADDR_ASSERT(numFrag == 1);
3588 
3589             if ((swizzleMask & Gfx11Rsrc3dSwModeMask) != 0)
3590             {
3591                 if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
3592                 {
3593                     if (isBlock256k)
3594                     {
3595                         ADDR_ASSERT((swizzleMode == ADDR_SW_256KB_Z_X) || (swizzleMode == ADDR_SW_256KB_R_X));
3596                         patInfo = GFX11_SW_256K_ZR_X_1xaa_PATINFO;
3597                     }
3598                     else if (isBlock64K)
3599                     {
3600                         ADDR_ASSERT((swizzleMode == ADDR_SW_64KB_Z_X) || (swizzleMode == ADDR_SW_64KB_R_X));
3601                         patInfo = GFX11_SW_64K_ZR_X_1xaa_PATINFO;
3602                     }
3603                     else
3604                     {
3605                         ADDR_ASSERT_ALWAYS();
3606                     }
3607                 }
3608                 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3609                 {
3610                     if (isBlock256k)
3611                     {
3612                         ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_D_X);
3613                         // patInfo = GFX11_SW_256K_D3_X_PATINFO;
3614                     }
3615                     else if (isBlock64K)
3616                     {
3617                         ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
3618                         patInfo = GFX11_SW_64K_D3_X_PATINFO;
3619                     }
3620                     else
3621                     {
3622                         ADDR_ASSERT_ALWAYS();
3623                     }
3624                 }
3625                 else
3626                 {
3627                     ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
3628 
3629                     if (isBlock256k)
3630                     {
3631                         ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_S_X);
3632                         patInfo = GFX11_SW_256K_S3_X_PATINFO;
3633                     }
3634                     else if (isBlock64K)
3635                     {
3636                         if (swizzleMode == ADDR_SW_64KB_S)
3637                         {
3638                             patInfo = GFX11_SW_64K_S3_PATINFO;
3639                         }
3640                         else if (swizzleMode == ADDR_SW_64KB_S_X)
3641                         {
3642                             patInfo = GFX11_SW_64K_S3_X_PATINFO;
3643                         }
3644                         else if (swizzleMode == ADDR_SW_64KB_S_T)
3645                         {
3646                             patInfo = GFX11_SW_64K_S3_T_PATINFO;
3647                         }
3648                         else
3649                         {
3650                             ADDR_ASSERT_ALWAYS();
3651                         }
3652                     }
3653                     else if (IsBlock4kb(swizzleMode))
3654                     {
3655                         if (swizzleMode == ADDR_SW_4KB_S)
3656                         {
3657                             patInfo = GFX11_SW_4K_S3_PATINFO;
3658                         }
3659                         else if (swizzleMode == ADDR_SW_4KB_S_X)
3660                         {
3661                             patInfo = GFX11_SW_4K_S3_X_PATINFO;
3662                         }
3663                         else
3664                         {
3665                             ADDR_ASSERT_ALWAYS();
3666                         }
3667                     }
3668                     else
3669                     {
3670                         ADDR_ASSERT_ALWAYS();
3671                     }
3672                 }
3673             }
3674         }
3675         else
3676         {
3677             if ((swizzleMask & Gfx11Rsrc2dSwModeMask) != 0)
3678             {
3679                 if (IsBlock256b(swizzleMode))
3680                 {
3681                     ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
3682                     patInfo = GFX11_SW_256_D_PATINFO;
3683                 }
3684                 else if (IsBlock4kb(swizzleMode))
3685                 {
3686                     if (swizzleMode == ADDR_SW_4KB_D)
3687                     {
3688                         patInfo = GFX11_SW_4K_D_PATINFO;
3689                     }
3690                     else if (swizzleMode == ADDR_SW_4KB_D_X)
3691                     {
3692                         patInfo = GFX11_SW_4K_D_X_PATINFO;
3693                     }
3694                     else
3695                     {
3696                         ADDR_ASSERT_ALWAYS();
3697                     }
3698                 }
3699                 else if (isBlock64K)
3700                 {
3701                     if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
3702                     {
3703                         if (numFrag == 1)
3704                         {
3705                             patInfo = GFX11_SW_64K_ZR_X_1xaa_PATINFO;
3706                         }
3707                         else if (numFrag == 2)
3708                         {
3709                             patInfo = GFX11_SW_64K_ZR_X_2xaa_PATINFO;
3710                         }
3711                         else if (numFrag == 4)
3712                         {
3713                             patInfo = GFX11_SW_64K_ZR_X_4xaa_PATINFO;
3714                         }
3715                         else if (numFrag == 8)
3716                         {
3717                             patInfo = GFX11_SW_64K_ZR_X_8xaa_PATINFO;
3718                         }
3719                         else
3720                         {
3721                             ADDR_ASSERT_ALWAYS();
3722                         }
3723                     }
3724                     else if (IsDisplaySwizzle(resourceType, swizzleMode))
3725                     {
3726                         if (swizzleMode == ADDR_SW_64KB_D)
3727                         {
3728                             patInfo = GFX11_SW_64K_D_PATINFO;
3729                         }
3730                         else if (swizzleMode == ADDR_SW_64KB_D_X)
3731                         {
3732                             patInfo = GFX11_SW_64K_D_X_PATINFO;
3733                         }
3734                         else if (swizzleMode == ADDR_SW_64KB_D_T)
3735                         {
3736                             patInfo = GFX11_SW_64K_D_T_PATINFO;
3737                         }
3738                         else
3739                         {
3740                             ADDR_ASSERT_ALWAYS();
3741                         }
3742                     }
3743                     else
3744                     {
3745                         ADDR_ASSERT_ALWAYS();
3746                     }
3747                 }
3748                 else if (isBlock256k)
3749                 {
3750                     if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
3751                     {
3752                         if (numFrag == 1)
3753                         {
3754                             patInfo = GFX11_SW_256K_ZR_X_1xaa_PATINFO;
3755                         }
3756                         else if (numFrag == 2)
3757                         {
3758                             patInfo = GFX11_SW_256K_ZR_X_2xaa_PATINFO;
3759                         }
3760                         else if (numFrag == 4)
3761                         {
3762                             patInfo = GFX11_SW_256K_ZR_X_4xaa_PATINFO;
3763                         }
3764                         else if (numFrag == 8)
3765                         {
3766                             patInfo = GFX11_SW_256K_ZR_X_8xaa_PATINFO;
3767                         }
3768                         else
3769                         {
3770                             ADDR_ASSERT_ALWAYS();
3771                         }
3772                     }
3773                     else if (IsDisplaySwizzle(resourceType, swizzleMode))
3774                     {
3775                         ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_D_X);
3776                         patInfo = GFX11_SW_256K_D_X_PATINFO;
3777                     }
3778                     else
3779                     {
3780                         ADDR_ASSERT_ALWAYS();
3781                     }
3782                 }
3783                 else
3784                 {
3785                     ADDR_ASSERT_ALWAYS();
3786                 }
3787             }
3788         }
3789     }
3790 
3791     return (patInfo != NULL) ? &patInfo[index] : NULL;
3792 }
3793 
3794 /**
3795 ************************************************************************************************************************
3796 *   Gfx11Lib::ComputeSurfaceAddrFromCoordMicroTiled
3797 *
3798 *   @brief
3799 *       Internal function to calculate address from coord for micro tiled swizzle surface
3800 *
3801 *   @return
3802 *       ADDR_E_RETURNCODE
3803 ************************************************************************************************************************
3804 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3805 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3806      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
3807      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
3808      ) const
3809 {
3810     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
3811     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3812     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
3813 
3814     localIn.swizzleMode  = pIn->swizzleMode;
3815     localIn.flags        = pIn->flags;
3816     localIn.resourceType = pIn->resourceType;
3817     localIn.bpp          = pIn->bpp;
3818     localIn.width        = Max(pIn->unalignedWidth,  1u);
3819     localIn.height       = Max(pIn->unalignedHeight, 1u);
3820     localIn.numSlices    = Max(pIn->numSlices,       1u);
3821     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
3822     localIn.numSamples   = Max(pIn->numSamples,      1u);
3823     localIn.numFrags     = localIn.numSamples;
3824     localOut.pMipInfo    = mipInfo;
3825 
3826     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
3827 
3828     if (ret == ADDR_OK)
3829     {
3830         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3831         const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3832         const UINT_32 swMode   = static_cast<UINT_32>(pIn->swizzleMode);
3833         const UINT_32 eqIndex  = m_equationLookupTable[rsrcType][swMode][elemLog2];
3834 
3835         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3836         {
3837             const UINT_32 pb           = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3838             const UINT_32 yb           = pIn->y / localOut.blockHeight;
3839             const UINT_32 xb           = pIn->x / localOut.blockWidth;
3840             const UINT_32 blockIndex   = yb * pb + xb;
3841             const UINT_32 blockSize    = 256;
3842             const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3843                                                                    pIn->x << elemLog2,
3844                                                                    pIn->y,
3845                                                                    0);
3846             pOut->addr = localOut.sliceSize * pIn->slice +
3847                          mipInfo[pIn->mipId].macroBlockOffset +
3848                          (blockIndex * blockSize) +
3849                          blk256Offset;
3850         }
3851         else
3852         {
3853             ret = ADDR_INVALIDPARAMS;
3854         }
3855     }
3856 
3857     return ret;
3858 }
3859 
3860 /**
3861 ************************************************************************************************************************
3862 *   Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled
3863 *
3864 *   @brief
3865 *       Internal function to calculate address from coord for macro tiled swizzle surface
3866 *
3867 *   @return
3868 *       ADDR_E_RETURNCODE
3869 ************************************************************************************************************************
3870 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3871 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled(
3872      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
3873      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
3874      ) const
3875 {
3876     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
3877     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3878     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
3879 
3880     localIn.swizzleMode  = pIn->swizzleMode;
3881     localIn.flags        = pIn->flags;
3882     localIn.resourceType = pIn->resourceType;
3883     localIn.bpp          = pIn->bpp;
3884     localIn.width        = Max(pIn->unalignedWidth,  1u);
3885     localIn.height       = Max(pIn->unalignedHeight, 1u);
3886     localIn.numSlices    = Max(pIn->numSlices,       1u);
3887     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
3888     localIn.numSamples   = Max(pIn->numSamples,      1u);
3889     localIn.numFrags     = localIn.numSamples;
3890     localOut.pMipInfo    = mipInfo;
3891 
3892     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
3893 
3894     if (ret == ADDR_OK)
3895     {
3896         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
3897         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3898         const UINT_32 blkMask     = (1 << blkSizeLog2) - 1;
3899         const UINT_32 pipeMask    = (1 << m_pipesLog2) - 1;
3900         const UINT_32 bankMask    = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
3901         const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
3902                                     (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
3903 
3904         if (localIn.numSamples > 1)
3905         {
3906             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
3907                                                                     pIn->resourceType,
3908                                                                     elemLog2,
3909                                                                     localIn.numSamples);
3910 
3911             if (pPatInfo != NULL)
3912             {
3913                 const UINT_32 pb     = localOut.pitch / localOut.blockWidth;
3914                 const UINT_32 yb     = pIn->y / localOut.blockHeight;
3915                 const UINT_32 xb     = pIn->x / localOut.blockWidth;
3916                 const UINT_64 blkIdx = yb * pb + xb;
3917 
3918                 ADDR_BIT_SETTING fullSwizzlePattern[20];
3919                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
3920 
3921                 const UINT_32 blkOffset =
3922                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
3923                                                     blkSizeLog2,
3924                                                     pIn->x,
3925                                                     pIn->y,
3926                                                     pIn->slice,
3927                                                     pIn->sample);
3928 
3929                 pOut->addr = (localOut.sliceSize * pIn->slice) +
3930                              (blkIdx << blkSizeLog2) +
3931                              (blkOffset ^ pipeBankXor);
3932             }
3933             else
3934             {
3935                 ret = ADDR_INVALIDPARAMS;
3936             }
3937         }
3938         else
3939         {
3940             const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
3941             const UINT_32 swMode  = static_cast<UINT_32>(pIn->swizzleMode);
3942             const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
3943 
3944             if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3945             {
3946                 const BOOL_32 inTail    = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
3947                 const BOOL_32 isThin    = IsThin(pIn->resourceType, pIn->swizzleMode);
3948                 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
3949                 const UINT_32 sliceId   = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
3950                 const UINT_32 x         = inTail ? (pIn->x     + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
3951                 const UINT_32 y         = inTail ? (pIn->y     + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
3952                 const UINT_32 z         = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
3953                 const UINT_32 pb        = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3954                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
3955                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
3956                 const UINT_64 blkIdx    = yb * pb + xb;
3957                 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3958                                                                     x << elemLog2,
3959                                                                     y,
3960                                                                     z);
3961                 pOut->addr = sliceSize * sliceId +
3962                              mipInfo[pIn->mipId].macroBlockOffset +
3963                              (blkIdx << blkSizeLog2) +
3964                              (blkOffset ^ pipeBankXor);
3965             }
3966             else
3967             {
3968                 ret = ADDR_INVALIDPARAMS;
3969             }
3970         }
3971     }
3972 
3973     return ret;
3974 }
3975 
3976 /**
3977 ************************************************************************************************************************
3978 *   Gfx11Lib::HwlComputeMaxBaseAlignments
3979 *
3980 *   @brief
3981 *       Gets maximum alignments
3982 *   @return
3983 *       maximum alignments
3984 ************************************************************************************************************************
3985 */
HwlComputeMaxBaseAlignments() const3986 UINT_32 Gfx11Lib::HwlComputeMaxBaseAlignments() const
3987 {
3988     return Size256K;
3989 }
3990 
3991 /**
3992 ************************************************************************************************************************
3993 *   Gfx11Lib::HwlComputeMaxMetaBaseAlignments
3994 *
3995 *   @brief
3996 *       Gets maximum alignments for metadata
3997 *   @return
3998 *       maximum alignments for metadata
3999 ************************************************************************************************************************
4000 */
HwlComputeMaxMetaBaseAlignments() const4001 UINT_32 Gfx11Lib::HwlComputeMaxMetaBaseAlignments() const
4002 {
4003     Dim3d metaBlk;
4004 
4005     // Max base alignment for Htile
4006     const AddrSwizzleMode ValidSwizzleModeForHtile[] =
4007     {
4008         ADDR_SW_64KB_Z_X,
4009         ADDR_SW_256KB_Z_X,
4010     };
4011 
4012     UINT_32 maxBaseAlignHtile = 0;
4013 
4014     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForHtile) / sizeof(ValidSwizzleModeForHtile[0]); swIdx++)
4015     {
4016         for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4017         {
4018             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4019             {
4020                 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx11DataDepthStencil,
4021                                                                 ADDR_RSRC_TEX_2D,
4022                                                                 ValidSwizzleModeForHtile[swIdx],
4023                                                                 bppLog2,
4024                                                                 numFragLog2,
4025                                                                 TRUE,
4026                                                                 &metaBlk);
4027 
4028                 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4029             }
4030         }
4031     }
4032 
4033     // Max base alignment for 2D Dcc
4034     const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4035     {
4036         ADDR_SW_64KB_R_X,
4037         ADDR_SW_256KB_R_X,
4038     };
4039 
4040     UINT_32 maxBaseAlignDcc2D = 0;
4041 
4042     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4043     {
4044         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4045         {
4046             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4047             {
4048                 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx11DataColor,
4049                                                              ADDR_RSRC_TEX_2D,
4050                                                              ValidSwizzleModeForDcc2D[swIdx],
4051                                                              bppLog2,
4052                                                              numFragLog2,
4053                                                              TRUE,
4054                                                              &metaBlk);
4055 
4056                 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4057             }
4058         }
4059     }
4060 
4061     // Max base alignment for 3D Dcc
4062     const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4063     {
4064         ADDR_SW_64KB_S_X,
4065         ADDR_SW_64KB_D_X,
4066         ADDR_SW_64KB_R_X,
4067         ADDR_SW_256KB_S_X,
4068         ADDR_SW_256KB_D_X,
4069         ADDR_SW_256KB_R_X,
4070     };
4071 
4072     UINT_32 maxBaseAlignDcc3D = 0;
4073 
4074     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4075     {
4076         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4077         {
4078             const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx11DataColor,
4079                                                          ADDR_RSRC_TEX_3D,
4080                                                          ValidSwizzleModeForDcc3D[swIdx],
4081                                                          bppLog2,
4082                                                          0,
4083                                                          TRUE,
4084                                                          &metaBlk);
4085 
4086             maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4087         }
4088     }
4089 
4090     return Max(maxBaseAlignHtile, Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4091 }
4092 
4093 /**
4094 ************************************************************************************************************************
4095 *   Gfx11Lib::GetMetaElementSizeLog2
4096 *
4097 *   @brief
4098 *       Gets meta data element size log2
4099 *   @return
4100 *       Meta data element size log2
4101 ************************************************************************************************************************
4102 */
GetMetaElementSizeLog2(Gfx11DataType dataType)4103 INT_32 Gfx11Lib::GetMetaElementSizeLog2(
4104     Gfx11DataType dataType) ///< Data surface type
4105 {
4106     INT_32 elemSizeLog2 = 0;
4107 
4108     if (dataType == Gfx11DataColor)
4109     {
4110         elemSizeLog2 = 0;
4111     }
4112     else
4113     {
4114         ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
4115         elemSizeLog2 = 2;
4116     }
4117 
4118     return elemSizeLog2;
4119 }
4120 
4121 /**
4122 ************************************************************************************************************************
4123 *   Gfx11Lib::GetMetaCacheSizeLog2
4124 *
4125 *   @brief
4126 *       Gets meta data cache line size log2
4127 *   @return
4128 *       Meta data cache line size log2
4129 ************************************************************************************************************************
4130 */
GetMetaCacheSizeLog2(Gfx11DataType dataType)4131 INT_32 Gfx11Lib::GetMetaCacheSizeLog2(
4132     Gfx11DataType dataType) ///< Data surface type
4133 {
4134     INT_32 cacheSizeLog2 = 0;
4135 
4136     if (dataType == Gfx11DataColor)
4137     {
4138         cacheSizeLog2 = 6;
4139     }
4140     else
4141     {
4142         ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
4143         cacheSizeLog2 = 8;
4144     }
4145 
4146     return cacheSizeLog2;
4147 }
4148 
4149 /**
4150 ************************************************************************************************************************
4151 *   Gfx11Lib::HwlComputeSurfaceInfoLinear
4152 *
4153 *   @brief
4154 *       Internal function to calculate alignment for linear surface
4155 *
4156 *   @return
4157 *       ADDR_E_RETURNCODE
4158 ************************************************************************************************************************
4159 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4160 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoLinear(
4161      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4162      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4163      ) const
4164 {
4165     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4166 
4167     if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4168     {
4169         returnCode = ADDR_INVALIDPARAMS;
4170     }
4171     else
4172     {
4173         const UINT_32 elementBytes = pIn->bpp >> 3;
4174         const UINT_32 pitchAlign   = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4175         const UINT_32 mipDepth     = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4176         UINT_32       pitch        = PowTwoAlign(pIn->width, pitchAlign);
4177         UINT_32       actualHeight = pIn->height;
4178         UINT_64       sliceSize    = 0;
4179 
4180         if (pIn->numMipLevels > 1)
4181         {
4182             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4183             {
4184                 UINT_32 mipWidth, mipHeight;
4185 
4186                 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4187 
4188                 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4189 
4190                 if (pOut->pMipInfo != NULL)
4191                 {
4192                     pOut->pMipInfo[i].pitch            = mipActualWidth;
4193                     pOut->pMipInfo[i].height           = mipHeight;
4194                     pOut->pMipInfo[i].depth            = mipDepth;
4195                     pOut->pMipInfo[i].offset           = sliceSize;
4196                     pOut->pMipInfo[i].mipTailOffset    = 0;
4197                     pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4198                 }
4199 
4200                 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4201             }
4202         }
4203         else
4204         {
4205             returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4206 
4207             if (returnCode == ADDR_OK)
4208             {
4209                 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4210 
4211                 if (pOut->pMipInfo != NULL)
4212                 {
4213                     pOut->pMipInfo[0].pitch            = pitch;
4214                     pOut->pMipInfo[0].height           = actualHeight;
4215                     pOut->pMipInfo[0].depth            = mipDepth;
4216                     pOut->pMipInfo[0].offset           = 0;
4217                     pOut->pMipInfo[0].mipTailOffset    = 0;
4218                     pOut->pMipInfo[0].macroBlockOffset = 0;
4219                 }
4220             }
4221         }
4222 
4223         if (returnCode == ADDR_OK)
4224         {
4225             pOut->pitch          = pitch;
4226             pOut->height         = actualHeight;
4227             pOut->numSlices      = pIn->numSlices;
4228             pOut->sliceSize      = sliceSize;
4229             pOut->surfSize       = sliceSize * pOut->numSlices;
4230             pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4231             pOut->blockWidth     = pitchAlign;
4232             pOut->blockHeight    = 1;
4233             pOut->blockSlices    = 1;
4234 
4235             // Following members are useless on GFX11
4236             pOut->mipChainPitch  = 0;
4237             pOut->mipChainHeight = 0;
4238             pOut->mipChainSlice  = 0;
4239             pOut->epitchIsHeight = FALSE;
4240 
4241             // Post calculation validate
4242             ADDR_ASSERT(pOut->sliceSize > 0);
4243         }
4244     }
4245 
4246     return returnCode;
4247 }
4248 
4249 } // V2
4250 } // Addr
4251