• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ************************************************************************************************************************
3 *
4 *  Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
5 *  SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8 
9 /**
10 ************************************************************************************************************************
11 * @file  gfx11addrlib.cpp
12 * @brief Contain the implementation for the Gfx11Lib class.
13 ************************************************************************************************************************
14 */
15 
16 #include "gfx11addrlib.h"
17 #include "gfx11_gb_reg.h"
18 #include "addrswizzler.h"
19 
20 #include "amdgpu_asic_addr.h"
21 
22 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
23 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
24 
25 namespace Addr
26 {
27 /**
28 ************************************************************************************************************************
29 *   Gfx11HwlInit
30 *
31 *   @brief
32 *       Creates an Gfx11Lib object.
33 *
34 *   @return
35 *       Returns an Gfx11Lib object pointer.
36 ************************************************************************************************************************
37 */
Gfx11HwlInit(const Client * pClient)38 Addr::Lib* Gfx11HwlInit(const Client* pClient)
39 {
40     return V2::Gfx11Lib::CreateObj(pClient);
41 }
42 
43 namespace V2
44 {
45 
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
47 //                               Static Const Member
48 ////////////////////////////////////////////////////////////////////////////////////////////////////
49 
50 const SwizzleModeFlags Gfx11Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
51 {//Linear 256B  4KB  64KB  256KB   Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
52     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR
53     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
54     {{0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_D
55     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
56 
57     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
58     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_S
59     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_D
60     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
61 
62     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
63     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_S
64     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_D
65     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
66 
67     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
68     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
69     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
70     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
71 
72     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
73     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_S_T
74     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_D_T
75     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
76 
77     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
78     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_S_X
79     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_D_X
80     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
81 
82     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_Z_X
83     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_S_X
84     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_D_X
85     {{0,    0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_64KB_R_X
86 
87     {{0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_256KB_Z_X
88     {{0,    0,    0,    0,    1,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_256KB_S_X
89     {{0,    0,    0,    0,    1,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_256KB_D_X
90     {{0,    0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_256KB_R_X
91     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR_GENERAL
92 };
93 
94 const Dim3d Gfx11Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
95 
96 const Dim3d Gfx11Lib::Block256K_Log2_3d[] = {{6, 6, 6}, {5, 6, 6}, {5, 6, 5}, {5, 5, 5}, {4, 5, 5}};
97 const Dim3d Gfx11Lib::Block64K_Log2_3d[]  = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
98 const Dim3d Gfx11Lib::Block4K_Log2_3d[]   = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
99 
100 /**
101 ************************************************************************************************************************
102 *   Gfx11Lib::Gfx11Lib
103 *
104 *   @brief
105 *       Constructor
106 *
107 ************************************************************************************************************************
108 */
Gfx11Lib(const Client * pClient)109 Gfx11Lib::Gfx11Lib(const Client* pClient)
110     :
111     Lib(pClient),
112     m_numPkrLog2(0),
113     m_numSaLog2(0),
114     m_colorBaseIndex(0),
115     m_htileBaseIndex(0),
116     m_dccBaseIndex(0)
117 {
118     memset(&m_settings, 0, sizeof(m_settings));
119     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
120 }
121 
122 /**
123 ************************************************************************************************************************
124 *   Gfx11Lib::~Gfx11Lib
125 *
126 *   @brief
127 *       Destructor
128 ************************************************************************************************************************
129 */
~Gfx11Lib()130 Gfx11Lib::~Gfx11Lib()
131 {
132 }
133 
134 /**
135 ************************************************************************************************************************
136 *   Gfx11Lib::HwlComputeHtileInfo
137 *
138 *   @brief
139 *       Interface function stub of AddrComputeHtilenfo
140 *
141 *   @return
142 *       ADDR_E_RETURNCODE
143 ************************************************************************************************************************
144 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const145 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileInfo(
146     const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,    ///< [in] input structure
147     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut    ///< [out] output structure
148     ) const
149 {
150     ADDR_E_RETURNCODE ret = ADDR_OK;
151 
152     if ((pIn->swizzleMode != ADDR_SW_64KB_Z_X)  &&
153         (pIn->swizzleMode != ADDR_SW_256KB_Z_X) &&
154         (pIn->hTileFlags.pipeAligned != TRUE))
155     {
156         ret = ADDR_INVALIDPARAMS;
157     }
158     else
159     {
160         Dim3d         metaBlk     = {};
161         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx11DataDepthStencil,
162                                                    ADDR_RSRC_TEX_2D,
163                                                    pIn->swizzleMode,
164                                                    0,
165                                                    0,
166                                                    TRUE,
167                                                    &metaBlk);
168 
169         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
170         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
171         pOut->baseAlign     = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
172         pOut->metaBlkWidth  = metaBlk.w;
173         pOut->metaBlkHeight = metaBlk.h;
174 
175         if (pIn->numMipLevels > 1)
176         {
177             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
178 
179             UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
180 
181             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
182             {
183                 UINT_32 mipWidth, mipHeight;
184 
185                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
186 
187                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
188                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
189 
190                 const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
191                 const UINT_32 heightInM    = mipHeight / metaBlk.h;
192                 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
193 
194                 if (pOut->pMipInfo != NULL)
195                 {
196                     pOut->pMipInfo[i].inMiptail = FALSE;
197                     pOut->pMipInfo[i].offset    = offset;
198                     pOut->pMipInfo[i].sliceSize = mipSliceSize;
199                 }
200 
201                 offset += mipSliceSize;
202             }
203 
204             pOut->sliceSize          = offset;
205             pOut->metaBlkNumPerSlice = offset / metaBlkSize;
206             pOut->htileBytes         = pOut->sliceSize * pIn->numSlices;
207 
208             if (pOut->pMipInfo != NULL)
209             {
210                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
211                 {
212                     pOut->pMipInfo[i].inMiptail = TRUE;
213                     pOut->pMipInfo[i].offset    = 0;
214                     pOut->pMipInfo[i].sliceSize = 0;
215                 }
216 
217                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
218                 {
219                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
220                 }
221             }
222         }
223         else
224         {
225             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
226             const UINT_32 heightInM = pOut->height / metaBlk.h;
227 
228             pOut->metaBlkNumPerSlice    = pitchInM * heightInM;
229             pOut->sliceSize             = pOut->metaBlkNumPerSlice * metaBlkSize;
230             pOut->htileBytes            = pOut->sliceSize * pIn->numSlices;
231 
232             if (pOut->pMipInfo != NULL)
233             {
234                 pOut->pMipInfo[0].inMiptail = FALSE;
235                 pOut->pMipInfo[0].offset    = 0;
236                 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
237             }
238         }
239 
240         // Get the HTILE address equation (copied from HtileAddrFromCoord).
241         // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
242         const UINT_32  index         = m_htileBaseIndex;
243         const UINT_8* patIdxTable = GFX11_HTILE_PATIDX;
244 
245         ADDR_C_ASSERT(sizeof(GFX11_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
246         pOut->equation.gfx10_bits = (UINT_16 *)GFX11_HTILE_SW_PATTERN[patIdxTable[index]];
247     }
248 
249     return ret;
250 }
251 
252 /**
253 ************************************************************************************************************************
254 *   Gfx11Lib::HwlComputeDccInfo
255 *
256 *   @brief
257 *       Interface function to compute DCC key info
258 *
259 *   @return
260 *       ADDR_E_RETURNCODE
261 ************************************************************************************************************************
262 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const263 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeDccInfo(
264     const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,    ///< [in] input structure
265     ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut    ///< [out] output structure
266     ) const
267 {
268     ADDR_E_RETURNCODE ret = ADDR_OK;
269 
270     if (IsLinear(pIn->swizzleMode))
271     {
272         ret = ADDR_INVALIDPARAMS;
273     }
274     else if (pIn->dccKeyFlags.pipeAligned &&
275              (IsStandardSwizzle(pIn->swizzleMode) ||
276               IsDisplaySwizzle(pIn->swizzleMode)))
277     {
278         ret = ADDR_INVALIDPARAMS;
279     }
280     else
281     {
282         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
283         const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
284         Dim3d         compBlock   = {};
285 
286         GetCompressedBlockSizeLog2(Gfx11DataColor,
287                                    pIn->resourceType,
288                                    pIn->swizzleMode,
289                                    elemLog2,
290                                    numFragLog2,
291                                    &compBlock);
292         pOut->compressBlkWidth  = 1 << compBlock.w;
293         pOut->compressBlkHeight = 1 << compBlock.h;
294         pOut->compressBlkDepth  = 1 << compBlock.d;
295 
296         if (ret == ADDR_OK)
297         {
298             Dim3d         metaBlk     = {};
299             const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx11DataColor,
300                                                        pIn->resourceType,
301                                                        pIn->swizzleMode,
302                                                        elemLog2,
303                                                        numFragLog2,
304                                                        pIn->dccKeyFlags.pipeAligned,
305                                                        &metaBlk);
306 
307             pOut->dccRamBaseAlign   = metaBlkSize;
308             pOut->metaBlkWidth      = metaBlk.w;
309             pOut->metaBlkHeight     = metaBlk.h;
310             pOut->metaBlkDepth      = metaBlk.d;
311             pOut->metaBlkSize       = metaBlkSize;
312 
313             pOut->pitch             = PowTwoAlign(pIn->unalignedWidth,     metaBlk.w);
314             pOut->height            = PowTwoAlign(pIn->unalignedHeight,    metaBlk.h);
315             pOut->depth             = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
316 
317             if (pIn->numMipLevels > 1)
318             {
319                 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
320 
321                 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
322 
323                 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
324                 {
325                     UINT_32 mipWidth, mipHeight;
326 
327                     GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
328 
329                     mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
330                     mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
331 
332                     const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
333                     const UINT_32 heightInM    = mipHeight / metaBlk.h;
334                     const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
335 
336                     if (pOut->pMipInfo != NULL)
337                     {
338                         pOut->pMipInfo[i].inMiptail = FALSE;
339                         pOut->pMipInfo[i].offset    = offset;
340                         pOut->pMipInfo[i].sliceSize = mipSliceSize;
341                     }
342 
343                     offset += mipSliceSize;
344                 }
345 
346                 pOut->dccRamSliceSize    = offset;
347                 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
348                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
349 
350                 if (pOut->pMipInfo != NULL)
351                 {
352                     for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
353                     {
354                         pOut->pMipInfo[i].inMiptail = TRUE;
355                         pOut->pMipInfo[i].offset    = 0;
356                         pOut->pMipInfo[i].sliceSize = 0;
357                     }
358 
359                     if (pIn->firstMipIdInTail != pIn->numMipLevels)
360                     {
361                         pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
362                     }
363                 }
364             }
365             else
366             {
367                 const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
368                 const UINT_32 heightInM = pOut->height / metaBlk.h;
369 
370                 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
371                 pOut->dccRamSliceSize    = pOut->metaBlkNumPerSlice * metaBlkSize;
372                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
373 
374                 if (pOut->pMipInfo != NULL)
375                 {
376                     pOut->pMipInfo[0].inMiptail = FALSE;
377                     pOut->pMipInfo[0].offset    = 0;
378                     pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
379                 }
380             }
381 
382             // Get the DCC address equation (copied from DccAddrFromCoord)
383             const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
384             const UINT_32 numPipeLog2 = m_pipesLog2;
385             UINT_32       index       = m_dccBaseIndex + elemLog2;
386             const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_64KB_R_X) ?
387                                         GFX11_DCC_64K_R_X_PATIDX : GFX11_DCC_256K_R_X_PATIDX;
388 
389             if (pIn->dccKeyFlags.pipeAligned)
390             {
391                 index += MaxNumOfBpp;
392 
393                 if (m_numPkrLog2 < 2)
394                 {
395                     index += m_pipesLog2 * MaxNumOfBpp;
396                 }
397                 else
398                 {
399                     // 4 groups for "m_numPkrLog2 < 2" case
400                     index += 4 * MaxNumOfBpp;
401 
402                     const UINT_32 dccPipePerPkr = 3;
403 
404                     index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
405                              (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
406                 }
407             }
408 
409             ADDR_C_ASSERT(sizeof(GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
410             pOut->equation.gfx10_bits = (UINT_16*)GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]];
411         }
412     }
413 
414     return ret;
415 }
416 
417 /**
418 ************************************************************************************************************************
419 *   Gfx11Lib::HwlComputeHtileAddrFromCoord
420 *
421 *   @brief
422 *       Interface function stub of AddrComputeHtileAddrFromCoord
423 *
424 *   @return
425 *       ADDR_E_RETURNCODE
426 ************************************************************************************************************************
427 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)428 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileAddrFromCoord(
429     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
430     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
431 {
432     ADDR_E_RETURNCODE returnCode = ADDR_OK;
433 
434     if (pIn->numMipLevels > 1)
435     {
436         returnCode = ADDR_NOTIMPLEMENTED;
437     }
438     else
439     {
440         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
441         input.size            = sizeof(input);
442         input.hTileFlags      = pIn->hTileFlags;
443         input.depthFlags      = pIn->depthflags;
444         input.swizzleMode     = pIn->swizzleMode;
445         input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
446         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
447         input.numSlices       = Max(pIn->numSlices,       1u);
448         input.numMipLevels    = 1;
449 
450         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
451         output.size = sizeof(output);
452 
453         returnCode = ComputeHtileInfo(&input, &output);
454 
455         if (returnCode == ADDR_OK)
456         {
457             const UINT_32  numSampleLog2 = Log2(pIn->numSamples);
458             const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
459             const UINT_32  index         = m_htileBaseIndex + numSampleLog2;
460             const UINT_8*  patIdxTable   = GFX11_HTILE_PATIDX;
461             const UINT_32  blkSizeLog2   = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
462             const UINT_32  blkMask       = (1 << blkSizeLog2) - 1;
463             const UINT_32  blkOffset     = ComputeOffsetFromSwizzlePattern(GFX11_HTILE_SW_PATTERN[patIdxTable[index]],
464                                                                            blkSizeLog2 + 1, // +1 for nibble offset
465                                                                            pIn->x,
466                                                                            pIn->y,
467                                                                            pIn->slice,
468                                                                            0);
469             const UINT_32 xb       = pIn->x / output.metaBlkWidth;
470             const UINT_32 yb       = pIn->y / output.metaBlkHeight;
471             const UINT_32 pb       = output.pitch / output.metaBlkWidth;
472             const UINT_32 blkIndex = (yb * pb) + xb;
473             const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
474 
475             pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
476                          (blkIndex * (1 << blkSizeLog2)) +
477                          ((blkOffset >> 1) ^ pipeXor);
478         }
479     }
480 
481     return returnCode;
482 }
483 
484 /**
485 ************************************************************************************************************************
486 *   Gfx11Lib::HwlComputeHtileCoordFromAddr
487 *
488 *   @brief
489 *       Interface function stub of AddrComputeHtileCoordFromAddr
490 *
491 *   @return
492 *       ADDR_E_RETURNCODE
493 ************************************************************************************************************************
494 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)495 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileCoordFromAddr(
496     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
497     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)   ///< [out] output structure
498 {
499     ADDR_NOT_IMPLEMENTED();
500 
501     return ADDR_OK;
502 }
503 
504 /**
505 ************************************************************************************************************************
506 *   Gfx11Lib::HwlSupportComputeDccAddrFromCoord
507 *
508 *   @brief
509 *       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
510 *
511 *   @return
512 *       ADDR_E_RETURNCODE
513 ************************************************************************************************************************
514 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)515 ADDR_E_RETURNCODE Gfx11Lib::HwlSupportComputeDccAddrFromCoord(
516     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
517 {
518     ADDR_E_RETURNCODE returnCode = ADDR_OK;
519 
520     if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
521         ((pIn->swizzleMode != ADDR_SW_64KB_R_X) &&
522          (pIn->swizzleMode != ADDR_SW_256KB_R_X)) ||
523         (pIn->dccKeyFlags.linear == TRUE) ||
524         (pIn->numFrags > 1) ||
525         (pIn->numMipLevels > 1) ||
526         (pIn->mipId > 0))
527     {
528         returnCode = ADDR_NOTSUPPORTED;
529     }
530     else if ((pIn->pitch == 0)         ||
531              (pIn->metaBlkWidth == 0)  ||
532              (pIn->metaBlkHeight == 0) ||
533              (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
534     {
535         returnCode = ADDR_NOTSUPPORTED;
536     }
537 
538     return returnCode;
539 }
540 
541 /**
542 ************************************************************************************************************************
543 *   Gfx11Lib::HwlComputeDccAddrFromCoord
544 *
545 *   @brief
546 *       Interface function stub of AddrComputeDccAddrFromCoord
547 *
548 *   @return
549 *       N/A
550 ************************************************************************************************************************
551 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)552 VOID Gfx11Lib::HwlComputeDccAddrFromCoord(
553     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,  ///< [in] input structure
554     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] output structure
555 {
556     const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
557     const UINT_32 numPipeLog2 = m_pipesLog2;
558     const UINT_32 pipeMask    = (1 << numPipeLog2) - 1;
559     UINT_32       index       = m_dccBaseIndex + elemLog2;
560     const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_64KB_R_X) ?
561                                 GFX11_DCC_64K_R_X_PATIDX : GFX11_DCC_256K_R_X_PATIDX;
562 
563     if (pIn->dccKeyFlags.pipeAligned)
564     {
565         index += MaxNumOfBpp;
566 
567         if (m_numPkrLog2 < 2)
568         {
569             index += m_pipesLog2 * MaxNumOfBpp;
570         }
571         else
572         {
573             // 4 groups for "m_numPkrLog2 < 2" case
574             index += 4 * MaxNumOfBpp;
575 
576             const UINT_32 dccPipePerPkr = 3;
577 
578             index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
579                      (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
580         }
581     }
582 
583     const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
584     const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
585     const UINT_32  blkOffset   = ComputeOffsetFromSwizzlePattern(GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]],
586                                                                  blkSizeLog2 + 1, // +1 for nibble offset
587                                                                  pIn->x,
588                                                                  pIn->y,
589                                                                  pIn->slice,
590                                                                  0);
591     const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
592     const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
593     const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
594     const UINT_32 blkIndex = (yb * pb) + xb;
595     const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
596 
597     pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
598                  (blkIndex * (1 << blkSizeLog2)) +
599                  ((blkOffset >> 1) ^ pipeXor);
600 }
601 
602 /**
603 ************************************************************************************************************************
604 *   Gfx11Lib::HwlInitGlobalParams
605 *
606 *   @brief
607 *       Initializes global parameters
608 *
609 *   @return
610 *       TRUE if all settings are valid
611 *
612 ************************************************************************************************************************
613 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)614 BOOL_32 Gfx11Lib::HwlInitGlobalParams(
615     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
616 {
617     BOOL_32              valid = TRUE;
618     GB_ADDR_CONFIG_GFX11 gbAddrConfig;
619 
620     gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
621 
622     switch (gbAddrConfig.bits.NUM_PIPES)
623     {
624         case ADDR_CONFIG_1_PIPE:
625             m_pipes     = 1;
626             m_pipesLog2 = 0;
627             break;
628         case ADDR_CONFIG_2_PIPE:
629             m_pipes     = 2;
630             m_pipesLog2 = 1;
631             break;
632         case ADDR_CONFIG_4_PIPE:
633             m_pipes     = 4;
634             m_pipesLog2 = 2;
635             break;
636         case ADDR_CONFIG_8_PIPE:
637             m_pipes     = 8;
638             m_pipesLog2 = 3;
639             break;
640         case ADDR_CONFIG_16_PIPE:
641             m_pipes     = 16;
642             m_pipesLog2 = 4;
643             break;
644         case ADDR_CONFIG_32_PIPE:
645             m_pipes     = 32;
646             m_pipesLog2 = 5;
647             break;
648         case ADDR_CONFIG_64_PIPE:
649             m_pipes     = 64;
650             m_pipesLog2 = 6;
651             break;
652         default:
653             ADDR_ASSERT_ALWAYS();
654             valid = FALSE;
655             break;
656     }
657 
658     switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
659     {
660         case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
661             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
662             m_pipeInterleaveLog2  = 8;
663             break;
664         case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
665             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
666             m_pipeInterleaveLog2  = 9;
667             break;
668         case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
669             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
670             m_pipeInterleaveLog2  = 10;
671             break;
672         case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
673             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
674             m_pipeInterleaveLog2  = 11;
675             break;
676         default:
677             ADDR_ASSERT_ALWAYS();
678             valid = FALSE;
679             break;
680     }
681 
682     // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
683     // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
684     // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
685     ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
686 
687     // These fields are deprecated on GFX11; they do nothing on HW.
688     m_maxCompFrag     = 1;
689     m_maxCompFragLog2 = 0;
690 
691     // Skip unaligned case
692     m_htileBaseIndex += MaxNumOfAA;
693 
694     m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
695     m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
696 
697     m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
698     m_numSaLog2  = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
699 
700     ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
701 
702     if (m_numPkrLog2 >= 2)
703     {
704         m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
705         m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
706     }
707 
708     // There is no so-called VAR swizzle mode on GFX11 and instead there are 4 256KB swizzle modes. Here we treat 256KB
709     // swizzle mode as "VAR" swizzle mode for reusing exising facilities (e.g GetBlockSizeLog2()) provided by base class
710     m_blockVarSizeLog2 = 18;
711 
712     if (valid)
713     {
714         InitEquationTable();
715     }
716 
717     return valid;
718 }
719 
720 /**
721 ************************************************************************************************************************
722 *   Gfx11Lib::HwlConvertChipFamily
723 *
724 *   @brief
725 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
726 *   @return
727 *       ChipFamily
728 ************************************************************************************************************************
729 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)730 ChipFamily Gfx11Lib::HwlConvertChipFamily(
731     UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
732     UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
733 {
734     ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
735 
736     switch (chipFamily)
737     {
738         case FAMILY_NV3:
739             if (ASICREV_IS_NAVI31_P(chipRevision))
740             {
741             }
742             if (ASICREV_IS_NAVI32_P(chipRevision))
743             {
744             }
745             if (ASICREV_IS_NAVI33_P(chipRevision))
746             {
747             }
748             break;
749 
750         case FAMILY_GFX1150:
751             {
752                 m_settings.isGfx1150 = 1;
753             }
754             break;
755         case FAMILY_PHX:
756             m_settings.isPhoenix = 1;
757             break;
758         default:
759             ADDR_ASSERT(!"Unknown chip family");
760             break;
761     }
762 
763     m_configFlags.use32bppFor422Fmt = TRUE;
764 
765     return family;
766 }
767 
768 /**
769 ************************************************************************************************************************
770 *   Gfx11Lib::GetBlk256SizeLog2
771 *
772 *   @brief
773 *       Get block 256 size
774 *
775 *   @return
776 *       N/A
777 ************************************************************************************************************************
778 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const779 void Gfx11Lib::GetBlk256SizeLog2(
780     AddrResourceType resourceType,      ///< [in] Resource type
781     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
782     UINT_32          elemLog2,          ///< [in] element size log2
783     UINT_32          numSamplesLog2,    ///< [in] number of samples
784     Dim3d*           pBlock             ///< [out] block size
785     ) const
786 {
787     if (IsThin(resourceType, swizzleMode))
788     {
789         UINT_32 blockBits = 8 - elemLog2;
790 
791         // On GFX11, Z and R modes are the same thing.
792         if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
793         {
794             blockBits -= numSamplesLog2;
795         }
796 
797         pBlock->w = (blockBits >> 1) + (blockBits & 1);
798         pBlock->h = (blockBits >> 1);
799         pBlock->d = 0;
800     }
801     else
802     {
803         ADDR_ASSERT(IsThick(resourceType, swizzleMode));
804 
805         UINT_32 blockBits = 8 - elemLog2;
806 
807         pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
808         pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
809         pBlock->h = (blockBits / 3);
810     }
811 }
812 
813 /**
814 ************************************************************************************************************************
815 *   Gfx11Lib::GetCompressedBlockSizeLog2
816 *
817 *   @brief
818 *       Get compress block size
819 *
820 *   @return
821 *       N/A
822 ************************************************************************************************************************
823 */
GetCompressedBlockSizeLog2(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const824 void Gfx11Lib::GetCompressedBlockSizeLog2(
825     Gfx11DataType    dataType,          ///< [in] Data type
826     AddrResourceType resourceType,      ///< [in] Resource type
827     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
828     UINT_32          elemLog2,          ///< [in] element size log2
829     UINT_32          numSamplesLog2,    ///< [in] number of samples
830     Dim3d*           pBlock             ///< [out] block size
831     ) const
832 {
833     if (dataType == Gfx11DataColor)
834     {
835         GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
836     }
837     else
838     {
839         ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
840         pBlock->w = 3;
841         pBlock->h = 3;
842         pBlock->d = 0;
843     }
844 }
845 
846 /**
847 ************************************************************************************************************************
848 *   Gfx11Lib::GetMetaOverlapLog2
849 *
850 *   @brief
851 *       Get meta block overlap
852 *
853 *   @return
854 *       N/A
855 ************************************************************************************************************************
856 */
GetMetaOverlapLog2(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const857 INT_32 Gfx11Lib::GetMetaOverlapLog2(
858     Gfx11DataType    dataType,          ///< [in] Data type
859     AddrResourceType resourceType,      ///< [in] Resource type
860     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
861     UINT_32          elemLog2,          ///< [in] element size log2
862     UINT_32          numSamplesLog2     ///< [in] number of samples
863     ) const
864 {
865     Dim3d compBlock;
866     Dim3d microBlock;
867 
868     GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
869     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
870 
871     const INT_32 blkSizeLog2    = GetBlockSizeLog2(swizzleMode);
872     const INT_32 compSizeLog2   = compBlock.w  + compBlock.h  + compBlock.d;
873     const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
874     const INT_32 maxSizeLog2    = Max(compSizeLog2, blk256SizeLog2);
875     const INT_32 numPipesLog2   = GetEffectiveNumPipes();
876     INT_32       overlap        = numPipesLog2 - maxSizeLog2;
877 
878     if (numPipesLog2 > 1)
879     {
880         overlap++;
881     }
882 
883     // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
884     if ((elemLog2 == 4) && (numSamplesLog2 == 3) && (blkSizeLog2 == 16))
885     {
886         overlap--;
887     }
888     overlap += 16 - blkSizeLog2;
889     overlap = Max(overlap, 0);
890     return overlap;
891 }
892 
893 /**
894 ************************************************************************************************************************
895 *   Gfx11Lib::Get3DMetaOverlapLog2
896 *
897 *   @brief
898 *       Get 3d meta block overlap
899 *
900 *   @return
901 *       N/A
902 ************************************************************************************************************************
903 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const904 INT_32 Gfx11Lib::Get3DMetaOverlapLog2(
905     AddrResourceType resourceType,      ///< [in] Resource type
906     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
907     UINT_32          elemLog2           ///< [in] element size log2
908     ) const
909 {
910     Dim3d microBlock;
911     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
912 
913     INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
914 
915     overlap++;
916 
917     if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
918     {
919         overlap = 0;
920     }
921     return overlap;
922 }
923 
924 /**
925 ************************************************************************************************************************
926 *   Gfx11Lib::GetPipeRotateAmount
927 *
928 *   @brief
929 *       Get pipe rotate amount
930 *
931 *   @return
932 *       Pipe rotate amount
933 ************************************************************************************************************************
934 */
935 
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const936 INT_32 Gfx11Lib::GetPipeRotateAmount(
937     AddrResourceType resourceType,      ///< [in] Resource type
938     AddrSwizzleMode  swizzleMode        ///< [in] Swizzle mode
939     ) const
940 {
941     INT_32 amount = 0;
942 
943     if ((m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
944     {
945         amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
946                  1 : m_pipesLog2 - (m_numSaLog2 + 1);
947     }
948 
949     return amount;
950 }
951 
952 /**
953 ************************************************************************************************************************
954 *   Gfx11Lib::GetMetaBlkSize
955 *
956 *   @brief
957 *       Get metadata block size
958 *
959 *   @return
960 *       Meta block size
961 ************************************************************************************************************************
962 */
GetMetaBlkSize(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const963 UINT_32 Gfx11Lib::GetMetaBlkSize(
964     Gfx11DataType    dataType,          ///< [in] Data type
965     AddrResourceType resourceType,      ///< [in] Resource type
966     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
967     UINT_32          elemLog2,          ///< [in] element size log2
968     UINT_32          numSamplesLog2,    ///< [in] number of samples
969     BOOL_32          pipeAlign,         ///< [in] pipe align
970     Dim3d*           pBlock             ///< [out] block size
971     ) const
972 {
973     INT_32 metablkSizeLog2;
974 
975     const INT_32 metaElemSizeLog2   = GetMetaElementSizeLog2(dataType);
976     const INT_32 metaCacheSizeLog2  = GetMetaCacheSizeLog2(dataType);
977     const INT_32 compBlkSizeLog2    = (dataType == Gfx11DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
978     const INT_32 metaBlkSamplesLog2 = numSamplesLog2;
979     const INT_32 dataBlkSizeLog2    = GetBlockSizeLog2(swizzleMode);
980     INT_32       numPipesLog2       = m_pipesLog2;
981 
982     if (IsThin(resourceType, swizzleMode))
983     {
984         if ((pipeAlign == FALSE) ||
985             (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
986             (IsDisplaySwizzle(resourceType, swizzleMode)  == TRUE))
987         {
988             if (pipeAlign)
989             {
990                 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
991                 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
992             }
993             else
994             {
995                 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
996             }
997         }
998         else
999         {
1000             if ((m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1001             {
1002                 numPipesLog2++;
1003             }
1004 
1005             INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1006 
1007             if (numPipesLog2 >= 4)
1008             {
1009                 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1010 
1011                 // In 16Bpe 8xaa, we have an extra overlap bit
1012                 if ((pipeRotateLog2 > 0)  &&
1013                     (elemLog2 == 4)       &&
1014                     (numSamplesLog2 == 3) &&
1015                     (IsZOrderSwizzle(swizzleMode) ||
1016                      IsRtOptSwizzle(swizzleMode)  ||
1017                      (GetEffectiveNumPipes() > 3)))
1018                 {
1019                     overlapLog2++;
1020                 }
1021 
1022                 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1023                 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1024             }
1025             else
1026             {
1027                 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1028             }
1029 
1030             if (dataType == Gfx11DataDepthStencil)
1031             {
1032                 // For htile surfaces, pad meta block size to 2K * num_pipes
1033                 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1034             }
1035 
1036             /* This chunk is not part of upstream addrlib. See !28268 */
1037             const INT_32 compFragLog2 = numSamplesLog2;
1038 
1039             if  (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1040             {
1041                 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1042 
1043                 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1044             }
1045             /* End of the non-upstream chunk. */
1046         }
1047 
1048         const INT_32 metablkBitsLog2 =
1049             metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1050         pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1051         pBlock->h = 1 << (metablkBitsLog2 >> 1);
1052         pBlock->d = 1;
1053     }
1054     else
1055     {
1056         ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1057 
1058         if (pipeAlign)
1059         {
1060             if ((m_pipesLog2 == m_numSaLog2 + 1) &&
1061                 (m_pipesLog2 > 1)                &&
1062                 IsRbAligned(resourceType, swizzleMode))
1063             {
1064                 numPipesLog2++;
1065             }
1066 
1067             const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1068 
1069             metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1070             metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1071             metablkSizeLog2 = Max(metablkSizeLog2, 12);
1072         }
1073         else
1074         {
1075             metablkSizeLog2 = 12;
1076         }
1077 
1078         const INT_32 metablkBitsLog2 =
1079             metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1080         pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1081         pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1082         pBlock->d = 1 << (metablkBitsLog2 / 3);
1083     }
1084 
1085     return (1 << static_cast<UINT_32>(metablkSizeLog2));
1086 }
1087 
1088 /**
1089 ************************************************************************************************************************
1090 *   Gfx11Lib::ConvertSwizzlePatternToEquation
1091 *
1092 *   @brief
1093 *       Convert swizzle pattern to equation.
1094 *
1095 *   @return
1096 *       N/A
1097 ************************************************************************************************************************
1098 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1099 VOID Gfx11Lib::ConvertSwizzlePatternToEquation(
1100     UINT_32                elemLog2,  ///< [in] element bytes log2
1101     AddrResourceType       rsrcType,  ///< [in] resource type
1102     AddrSwizzleMode        swMode,    ///< [in] swizzle mode
1103     const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern infor
1104     ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
1105     const
1106 {
1107     ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
1108     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1109 
1110     const ADDR_BIT_SETTING* pSwizzle      = fullSwizzlePattern;
1111     const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode);
1112     memset(pEquation, 0, sizeof(ADDR_EQUATION));
1113     pEquation->numBits            = blockSizeLog2;
1114     pEquation->numBitComponents   = pPatInfo->maxItemCount;
1115     pEquation->stackedDepthSlices = FALSE;
1116 
1117     for (UINT_32 i = 0; i < elemLog2; i++)
1118     {
1119         pEquation->addr[i].channel = 0;
1120         pEquation->addr[i].valid   = 1;
1121         pEquation->addr[i].index   = i;
1122     }
1123 
1124     if (IsXor(swMode) == FALSE)
1125     {
1126         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1127         {
1128             ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1129 
1130             if (pSwizzle[i].x != 0)
1131             {
1132                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1133 
1134                 pEquation->addr[i].channel = 0;
1135                 pEquation->addr[i].valid   = 1;
1136                 pEquation->addr[i].index   = Log2(pSwizzle[i].x) + elemLog2;
1137             }
1138             else if (pSwizzle[i].y != 0)
1139             {
1140                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1141 
1142                 pEquation->addr[i].channel = 1;
1143                 pEquation->addr[i].valid   = 1;
1144                 pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1145             }
1146             else
1147             {
1148                 ADDR_ASSERT(pSwizzle[i].z != 0);
1149                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1150 
1151                 pEquation->addr[i].channel = 2;
1152                 pEquation->addr[i].valid   = 1;
1153                 pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1154             }
1155 
1156             pEquation->xor1[i].value = 0;
1157             pEquation->xor2[i].value = 0;
1158         }
1159     }
1160     else if (IsThin(rsrcType, swMode))
1161     {
1162         Dim3d dim;
1163         ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1164 
1165         const UINT_32 blkXLog2 = Log2(dim.w);
1166         const UINT_32 blkYLog2 = Log2(dim.h);
1167         const UINT_32 blkXMask = dim.w - 1;
1168         const UINT_32 blkYMask = dim.h - 1;
1169 
1170         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1171         UINT_32          xMask = 0;
1172         UINT_32          yMask = 0;
1173         UINT_32          bMask = (1 << elemLog2) - 1;
1174 
1175         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1176         {
1177             if (IsPow2(pSwizzle[i].value))
1178             {
1179                 if (pSwizzle[i].x != 0)
1180                 {
1181                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1182                     xMask |= pSwizzle[i].x;
1183 
1184                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1185 
1186                     ADDR_ASSERT(xLog2 < blkXLog2);
1187 
1188                     pEquation->addr[i].channel = 0;
1189                     pEquation->addr[i].valid   = 1;
1190                     pEquation->addr[i].index   = xLog2 + elemLog2;
1191                 }
1192                 else
1193                 {
1194                     ADDR_ASSERT(pSwizzle[i].y != 0);
1195                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1196                     yMask |= pSwizzle[i].y;
1197 
1198                     pEquation->addr[i].channel = 1;
1199                     pEquation->addr[i].valid   = 1;
1200                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1201 
1202                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1203                 }
1204 
1205                 swizzle[i].value = 0;
1206                 bMask |= 1 << i;
1207             }
1208             else
1209             {
1210                 if (pSwizzle[i].z != 0)
1211                 {
1212                     ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1213 
1214                     pEquation->xor2[i].channel = 2;
1215                     pEquation->xor2[i].valid   = 1;
1216                     pEquation->xor2[i].index   = Log2(pSwizzle[i].z);
1217                 }
1218 
1219                 swizzle[i].x = pSwizzle[i].x;
1220                 swizzle[i].y = pSwizzle[i].y;
1221                 swizzle[i].z = swizzle[i].s = 0;
1222 
1223                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1224 
1225                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1226 
1227                 if (xHi != 0)
1228                 {
1229                     ADDR_ASSERT(IsPow2(xHi));
1230                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1231 
1232                     pEquation->xor1[i].channel = 0;
1233                     pEquation->xor1[i].valid   = 1;
1234                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1235 
1236                     swizzle[i].x &= blkXMask;
1237                 }
1238 
1239                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1240 
1241                 if (yHi != 0)
1242                 {
1243                     ADDR_ASSERT(IsPow2(yHi));
1244 
1245                     if (xHi == 0)
1246                     {
1247                         ADDR_ASSERT(pEquation->xor1[i].value == 0);
1248                         pEquation->xor1[i].channel = 1;
1249                         pEquation->xor1[i].valid   = 1;
1250                         pEquation->xor1[i].index   = Log2(yHi);
1251                     }
1252                     else
1253                     {
1254                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1255                         pEquation->xor2[i].channel = 1;
1256                         pEquation->xor2[i].valid   = 1;
1257                         pEquation->xor2[i].index   = Log2(yHi);
1258                     }
1259 
1260                     swizzle[i].y &= blkYMask;
1261                 }
1262 
1263                 if (swizzle[i].value == 0)
1264                 {
1265                     bMask |= 1 << i;
1266                 }
1267             }
1268         }
1269 
1270         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1271         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1272 
1273         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1274 
1275         while (bMask != blockMask)
1276         {
1277             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1278             {
1279                 if ((bMask & (1 << i)) == 0)
1280                 {
1281                     if (IsPow2(swizzle[i].value))
1282                     {
1283                         if (swizzle[i].x != 0)
1284                         {
1285                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1286                             xMask |= swizzle[i].x;
1287 
1288                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1289 
1290                             ADDR_ASSERT(xLog2 < blkXLog2);
1291 
1292                             pEquation->addr[i].channel = 0;
1293                             pEquation->addr[i].valid   = 1;
1294                             pEquation->addr[i].index   = xLog2 + elemLog2;
1295                         }
1296                         else
1297                         {
1298                             ADDR_ASSERT(swizzle[i].y != 0);
1299                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1300                             yMask |= swizzle[i].y;
1301 
1302                             pEquation->addr[i].channel = 1;
1303                             pEquation->addr[i].valid   = 1;
1304                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1305 
1306                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1307                         }
1308 
1309                         swizzle[i].value = 0;
1310                         bMask |= 1 << i;
1311                     }
1312                     else
1313                     {
1314                         const UINT_32 x = swizzle[i].x & xMask;
1315                         const UINT_32 y = swizzle[i].y & yMask;
1316 
1317                         if (x != 0)
1318                         {
1319                             ADDR_ASSERT(IsPow2(x));
1320 
1321                             if (pEquation->xor1[i].value == 0)
1322                             {
1323                                 pEquation->xor1[i].channel = 0;
1324                                 pEquation->xor1[i].valid   = 1;
1325                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1326                             }
1327                             else
1328                             {
1329                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1330                                 pEquation->xor2[i].channel = 0;
1331                                 pEquation->xor2[i].valid   = 1;
1332                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1333                             }
1334                         }
1335 
1336                         if (y != 0)
1337                         {
1338                             ADDR_ASSERT(IsPow2(y));
1339 
1340                             if (pEquation->xor1[i].value == 0)
1341                             {
1342                                 pEquation->xor1[i].channel = 1;
1343                                 pEquation->xor1[i].valid   = 1;
1344                                 pEquation->xor1[i].index   = Log2(y);
1345                             }
1346                             else
1347                             {
1348                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1349                                 pEquation->xor2[i].channel = 1;
1350                                 pEquation->xor2[i].valid   = 1;
1351                                 pEquation->xor2[i].index   = Log2(y);
1352                             }
1353                         }
1354 
1355                         swizzle[i].x &= ~x;
1356                         swizzle[i].y &= ~y;
1357                     }
1358                 }
1359             }
1360         }
1361 
1362         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1363     }
1364     else
1365     {
1366         const Dim3d& blkDim = (blockSizeLog2 == Log2Size256K) ?
1367                               Block256K_Log2_3d[elemLog2] :
1368                               ((blockSizeLog2 == Log2Size4K) ? Block4K_Log2_3d[elemLog2] : Block64K_Log2_3d[elemLog2]);
1369 
1370         const UINT_32 blkXLog2 = blkDim.w;
1371         const UINT_32 blkYLog2 = blkDim.h;
1372         const UINT_32 blkZLog2 = blkDim.d;
1373         const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1374         const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1375         const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1376 
1377         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1378         UINT_32          xMask = 0;
1379         UINT_32          yMask = 0;
1380         UINT_32          zMask = 0;
1381         UINT_32          bMask = (1 << elemLog2) - 1;
1382 
1383         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1384         {
1385             if (IsPow2(pSwizzle[i].value))
1386             {
1387                 if (pSwizzle[i].x != 0)
1388                 {
1389                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1390                     xMask |= pSwizzle[i].x;
1391 
1392                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1393 
1394                     ADDR_ASSERT(xLog2 < blkXLog2);
1395 
1396                     pEquation->addr[i].channel = 0;
1397                     pEquation->addr[i].valid   = 1;
1398                     pEquation->addr[i].index   = xLog2 + elemLog2;
1399                 }
1400                 else if (pSwizzle[i].y != 0)
1401                 {
1402                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1403                     yMask |= pSwizzle[i].y;
1404 
1405                     pEquation->addr[i].channel = 1;
1406                     pEquation->addr[i].valid   = 1;
1407                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1408 
1409                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1410                 }
1411                 else
1412                 {
1413                     ADDR_ASSERT(pSwizzle[i].z != 0);
1414                     ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1415                     zMask |= pSwizzle[i].z;
1416 
1417                     pEquation->addr[i].channel = 2;
1418                     pEquation->addr[i].valid   = 1;
1419                     pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1420 
1421                     ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1422                 }
1423 
1424                 swizzle[i].value = 0;
1425                 bMask |= 1 << i;
1426             }
1427             else
1428             {
1429                 swizzle[i].x = pSwizzle[i].x;
1430                 swizzle[i].y = pSwizzle[i].y;
1431                 swizzle[i].z = pSwizzle[i].z;
1432                 swizzle[i].s = 0;
1433 
1434                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1435 
1436                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1437                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1438                 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1439 
1440                 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1441 
1442                 if (xHi != 0)
1443                 {
1444                     ADDR_ASSERT(IsPow2(xHi));
1445                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1446 
1447                     pEquation->xor1[i].channel = 0;
1448                     pEquation->xor1[i].valid   = 1;
1449                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1450 
1451                     swizzle[i].x &= blkXMask;
1452                 }
1453 
1454                 if (yHi != 0)
1455                 {
1456                     ADDR_ASSERT(IsPow2(yHi));
1457 
1458                     if (pEquation->xor1[i].value == 0)
1459                     {
1460                         pEquation->xor1[i].channel = 1;
1461                         pEquation->xor1[i].valid   = 1;
1462                         pEquation->xor1[i].index   = Log2(yHi);
1463                     }
1464                     else
1465                     {
1466                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1467                         pEquation->xor2[i].channel = 1;
1468                         pEquation->xor2[i].valid   = 1;
1469                         pEquation->xor2[i].index   = Log2(yHi);
1470                     }
1471 
1472                     swizzle[i].y &= blkYMask;
1473                 }
1474 
1475                 if (zHi != 0)
1476                 {
1477                     ADDR_ASSERT(IsPow2(zHi));
1478 
1479                     if (pEquation->xor1[i].value == 0)
1480                     {
1481                         pEquation->xor1[i].channel = 2;
1482                         pEquation->xor1[i].valid   = 1;
1483                         pEquation->xor1[i].index   = Log2(zHi);
1484                     }
1485                     else
1486                     {
1487                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1488                         pEquation->xor2[i].channel = 2;
1489                         pEquation->xor2[i].valid   = 1;
1490                         pEquation->xor2[i].index   = Log2(zHi);
1491                     }
1492 
1493                     swizzle[i].z &= blkZMask;
1494                 }
1495 
1496                 if (swizzle[i].value == 0)
1497                 {
1498                     bMask |= 1 << i;
1499                 }
1500             }
1501         }
1502 
1503         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1504         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1505 
1506         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1507 
1508         while (bMask != blockMask)
1509         {
1510             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1511             {
1512                 if ((bMask & (1 << i)) == 0)
1513                 {
1514                     if (IsPow2(swizzle[i].value))
1515                     {
1516                         if (swizzle[i].x != 0)
1517                         {
1518                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1519                             xMask |= swizzle[i].x;
1520 
1521                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1522 
1523                             ADDR_ASSERT(xLog2 < blkXLog2);
1524 
1525                             pEquation->addr[i].channel = 0;
1526                             pEquation->addr[i].valid   = 1;
1527                             pEquation->addr[i].index   = xLog2 + elemLog2;
1528                         }
1529                         else if (swizzle[i].y != 0)
1530                         {
1531                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1532                             yMask |= swizzle[i].y;
1533 
1534                             pEquation->addr[i].channel = 1;
1535                             pEquation->addr[i].valid   = 1;
1536                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1537 
1538                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1539                         }
1540                         else
1541                         {
1542                             ADDR_ASSERT(swizzle[i].z != 0);
1543                             ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1544                             zMask |= swizzle[i].z;
1545 
1546                             pEquation->addr[i].channel = 2;
1547                             pEquation->addr[i].valid   = 1;
1548                             pEquation->addr[i].index   = Log2(swizzle[i].z);
1549 
1550                             ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1551                         }
1552 
1553                         swizzle[i].value = 0;
1554                         bMask |= 1 << i;
1555                     }
1556                     else
1557                     {
1558                         const UINT_32 x = swizzle[i].x & xMask;
1559                         const UINT_32 y = swizzle[i].y & yMask;
1560                         const UINT_32 z = swizzle[i].z & zMask;
1561 
1562                         if (x != 0)
1563                         {
1564                             ADDR_ASSERT(IsPow2(x));
1565 
1566                             if (pEquation->xor1[i].value == 0)
1567                             {
1568                                 pEquation->xor1[i].channel = 0;
1569                                 pEquation->xor1[i].valid   = 1;
1570                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1571                             }
1572                             else
1573                             {
1574                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1575                                 pEquation->xor2[i].channel = 0;
1576                                 pEquation->xor2[i].valid   = 1;
1577                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1578                             }
1579                         }
1580 
1581                         if (y != 0)
1582                         {
1583                             ADDR_ASSERT(IsPow2(y));
1584 
1585                             if (pEquation->xor1[i].value == 0)
1586                             {
1587                                 pEquation->xor1[i].channel = 1;
1588                                 pEquation->xor1[i].valid   = 1;
1589                                 pEquation->xor1[i].index   = Log2(y);
1590                             }
1591                             else
1592                             {
1593                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1594                                 pEquation->xor2[i].channel = 1;
1595                                 pEquation->xor2[i].valid   = 1;
1596                                 pEquation->xor2[i].index   = Log2(y);
1597                             }
1598                         }
1599 
1600                         if (z != 0)
1601                         {
1602                             ADDR_ASSERT(IsPow2(z));
1603 
1604                             if (pEquation->xor1[i].value == 0)
1605                             {
1606                                 pEquation->xor1[i].channel = 2;
1607                                 pEquation->xor1[i].valid   = 1;
1608                                 pEquation->xor1[i].index   = Log2(z);
1609                             }
1610                             else
1611                             {
1612                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1613                                 pEquation->xor2[i].channel = 2;
1614                                 pEquation->xor2[i].valid   = 1;
1615                                 pEquation->xor2[i].index   = Log2(z);
1616                             }
1617                         }
1618 
1619                         swizzle[i].x &= ~x;
1620                         swizzle[i].y &= ~y;
1621                         swizzle[i].z &= ~z;
1622                     }
1623                 }
1624             }
1625         }
1626 
1627         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1628     }
1629 }
1630 
1631 /**
1632 ************************************************************************************************************************
1633 *   Gfx11Lib::InitEquationTable
1634 *
1635 *   @brief
1636 *       Initialize Equation table.
1637 *
1638 *   @return
1639 *       N/A
1640 ************************************************************************************************************************
1641 */
InitEquationTable()1642 VOID Gfx11Lib::InitEquationTable()
1643 {
1644     memset(m_equationTable, 0, sizeof(m_equationTable));
1645 
1646     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1647     {
1648         const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1649 
1650         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1651         {
1652             const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1653 
1654             for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1655             {
1656                 UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
1657                 const ADDR_SW_PATINFO* pPatInfo      = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1658 
1659                 if (pPatInfo != NULL)
1660                 {
1661                     ADDR_ASSERT(IsValidSwMode(swMode));
1662 
1663                     if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
1664                     {
1665                         ADDR_EQUATION equation = {};
1666 
1667                         // Passing in pPatInfo to get the addr equation
1668                         ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1669 
1670                         equationIndex = m_numEquations;
1671                         ADDR_ASSERT(equationIndex < EquationTableSize);
1672                         // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
1673                         m_equationTable[equationIndex] = equation;
1674                         // Increment m_numEquations
1675                         m_numEquations++;
1676                     }
1677                     else // There is no equationIndex
1678                     {
1679                         // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X
1680                         ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
1681                         ADDR_ASSERT(rsrcType == ADDR_RSRC_TEX_3D);
1682                         ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
1683                     }
1684                 }
1685 
1686                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1687             }
1688         }
1689     }
1690 }
1691 
1692 /**
1693 ************************************************************************************************************************
1694 *   Gfx11Lib::HwlGetEquationIndex
1695 *
1696 *   @brief
1697 *       Interface function stub of GetEquationIndex
1698 *
1699 *   @return
1700 *       ADDR_E_RETURNCODE
1701 ************************************************************************************************************************
1702 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const1703 UINT_32 Gfx11Lib::HwlGetEquationIndex(
1704     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
1705     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
1706     ) const
1707 {
1708     UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1709 
1710     if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1711         (pIn->resourceType == ADDR_RSRC_TEX_3D))
1712     {
1713         const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1714         const UINT_32 swModeIdx   = static_cast<UINT_32>(pIn->swizzleMode);
1715         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
1716 
1717         equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1718     }
1719 
1720     if (pOut->pMipInfo != NULL)
1721     {
1722         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1723         {
1724             pOut->pMipInfo[i].equationIndex = equationIdx;
1725         }
1726     }
1727 
1728     return equationIdx;
1729 }
1730 
1731 /**
1732 ************************************************************************************************************************
1733 *   Gfx11Lib::GetValidDisplaySwizzleModes
1734 *
1735 *   @brief
1736 *       Get valid swizzle modes mask for displayable surface
1737 *
1738 *   @return
1739 *       Valid swizzle modes mask for displayable surface
1740 ************************************************************************************************************************
1741 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const1742 UINT_32 Gfx11Lib::GetValidDisplaySwizzleModes(
1743     UINT_32 bpp
1744     ) const
1745 {
1746     UINT_32 swModeMask = 0;
1747 
1748     if (bpp <= 64)
1749     {
1750         const ChipFamily  family = GetChipFamily();
1751 
1752         swModeMask = Dcn32SwModeMask;
1753 
1754         if (false
1755             || (m_settings.isPhoenix)
1756             || (m_settings.isGfx1150)
1757            )
1758         {
1759             // Not all GPUs support displaying with 256kB swizzle modes.
1760             swModeMask &= ~((1u << ADDR_SW_256KB_D_X) |
1761                             (1u << ADDR_SW_256KB_R_X));
1762         }
1763     }
1764 
1765     return swModeMask;
1766 }
1767 
1768 /**
1769 ************************************************************************************************************************
1770 *   Gfx11Lib::IsValidDisplaySwizzleMode
1771 *
1772 *   @brief
1773 *       Check if a swizzle mode is supported by display engine
1774 *
1775 *   @return
1776 *       TRUE is swizzle mode is supported by display engine
1777 ************************************************************************************************************************
1778 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const1779 BOOL_32 Gfx11Lib::IsValidDisplaySwizzleMode(
1780     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
1781     ) const
1782 {
1783     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
1784 
1785     return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
1786 }
1787 
1788 /**
1789 ************************************************************************************************************************
1790 *   Gfx11Lib::GetMaxNumMipsInTail
1791 *
1792 *   @brief
1793 *       Return max number of mips in tails
1794 *
1795 *   @return
1796 *       Max number of mips in tails
1797 ************************************************************************************************************************
1798 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const1799 UINT_32 Gfx11Lib::GetMaxNumMipsInTail(
1800     UINT_32 blockSizeLog2,     ///< block size log2
1801     BOOL_32 isThin             ///< is thin or thick
1802     ) const
1803 {
1804     UINT_32 effectiveLog2 = blockSizeLog2;
1805 
1806     if (isThin == FALSE)
1807     {
1808         effectiveLog2 -= (blockSizeLog2 - 8) / 3;
1809     }
1810 
1811     return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
1812 }
1813 
1814 /**
1815 ************************************************************************************************************************
1816 *   Gfx11Lib::HwlComputePipeBankXor
1817 *
1818 *   @brief
1819 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
1820 *
1821 *   @return
1822 *       PipeBankXor value
1823 ************************************************************************************************************************
1824 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const1825 ADDR_E_RETURNCODE Gfx11Lib::HwlComputePipeBankXor(
1826     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
1827     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
1828     ) const
1829 {
1830     if (IsNonPrtXor(pIn->swizzleMode))
1831     {
1832         pOut->pipeBankXor = 0;
1833     }
1834     else
1835     {
1836         pOut->pipeBankXor = 0;
1837     }
1838 
1839     return ADDR_OK;
1840 }
1841 
1842 /**
1843 ************************************************************************************************************************
1844 *   Gfx11Lib::HwlComputeSlicePipeBankXor
1845 *
1846 *   @brief
1847 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
1848 *
1849 *   @return
1850 *       PipeBankXor value
1851 ************************************************************************************************************************
1852 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const1853 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSlicePipeBankXor(
1854     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
1855     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
1856     ) const
1857 {
1858     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1859 
1860     if (IsNonPrtXor(pIn->swizzleMode))
1861     {
1862         if (pIn->bpe == 0)
1863         {
1864             ADDR_ASSERT_ALWAYS();
1865 
1866             // Require a valid bytes-per-element value passed from client...
1867             returnCode = ADDR_INVALIDPARAMS;
1868         }
1869         else
1870         {
1871             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
1872                                                                     pIn->resourceType,
1873                                                                     Log2(pIn->bpe >> 3),
1874                                                                     1);
1875 
1876             if (pPatInfo != NULL)
1877             {
1878                 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
1879                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1880 
1881                 const UINT_32 pipeBankXorOffset =
1882                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
1883                                                     GetBlockSizeLog2(pIn->swizzleMode),
1884                                                     0,
1885                                                     0,
1886                                                     pIn->slice,
1887                                                     0);
1888 
1889                 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
1890 
1891                 // Should have no bit set under pipe interleave
1892                 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
1893 
1894                 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
1895             }
1896             else
1897             {
1898                 // Should never come here...
1899                 ADDR_NOT_IMPLEMENTED();
1900 
1901                 returnCode = ADDR_NOTSUPPORTED;
1902             }
1903         }
1904     }
1905     else
1906     {
1907         pOut->pipeBankXor = 0;
1908     }
1909 
1910     return returnCode;
1911 }
1912 
1913 /**
1914 ************************************************************************************************************************
1915 *   Gfx11Lib::HwlComputeSubResourceOffsetForSwizzlePattern
1916 *
1917 *   @brief
1918 *       Compute sub resource offset to support swizzle pattern
1919 *
1920 *   @return
1921 *       Offset
1922 ************************************************************************************************************************
1923 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const1924 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
1925     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
1926     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
1927     ) const
1928 {
1929     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
1930 
1931     pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
1932 
1933     return ADDR_OK;
1934 }
1935 
1936 /**
1937 ************************************************************************************************************************
1938 *   Gfx11Lib::HwlComputeNonBlockCompressedView
1939 *
1940 *   @brief
1941 *       Compute non-block-compressed view for a given mipmap level/slice.
1942 *
1943 *   @return
1944 *       ADDR_E_RETURNCODE
1945 ************************************************************************************************************************
1946 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const1947 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeNonBlockCompressedView(
1948     const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
1949     ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
1950     ) const
1951 {
1952     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1953 
1954     if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE)
1955     {
1956         // Only thin swizzle mode can have a NonBC view...
1957         returnCode = ADDR_INVALIDPARAMS;
1958     }
1959     else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
1960              ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
1961     {
1962         // Only support BC1~BC7, ASTC, or ETC2 for now...
1963         returnCode = ADDR_NOTSUPPORTED;
1964     }
1965     else
1966     {
1967         UINT_32 bcWidth, bcHeight;
1968         UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
1969 
1970         ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
1971         infoIn.flags        = pIn->flags;
1972         infoIn.swizzleMode  = pIn->swizzleMode;
1973         infoIn.resourceType = pIn->resourceType;
1974         infoIn.bpp          = bpp;
1975         infoIn.width        = RoundUpQuotient(pIn->width, bcWidth);
1976         infoIn.height       = RoundUpQuotient(pIn->height, bcHeight);
1977         infoIn.numSlices    = pIn->numSlices;
1978         infoIn.numMipLevels = pIn->numMipLevels;
1979         infoIn.numSamples   = 1;
1980         infoIn.numFrags     = 1;
1981 
1982         ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
1983 
1984         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
1985         infoOut.pMipInfo = mipInfo;
1986 
1987         const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
1988 
1989         if (tiled)
1990         {
1991             returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
1992         }
1993         else
1994         {
1995             returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
1996         }
1997 
1998         if (returnCode == ADDR_OK)
1999         {
2000             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
2001             subOffIn.swizzleMode      = infoIn.swizzleMode;
2002             subOffIn.resourceType     = infoIn.resourceType;
2003             subOffIn.slice            = pIn->slice;
2004             subOffIn.sliceSize        = infoOut.sliceSize;
2005             subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
2006             subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;
2007 
2008             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
2009 
2010             // For any mipmap level, move nonBc view base address by offset
2011             HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
2012             pOut->offset = subOffOut.offset;
2013 
2014             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
2015             slicePbXorIn.bpe             = infoIn.bpp;
2016             slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
2017             slicePbXorIn.resourceType    = infoIn.resourceType;
2018             slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2019             slicePbXorIn.slice           = pIn->slice;
2020 
2021             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2022 
2023             // For any mipmap level, nonBc view should use computed pbXor
2024             HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2025             pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2026 
2027             const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2028             const UINT_32 requestMipWidth  = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth);
2029             const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight);
2030 
2031             if (inTail)
2032             {
2033                 // For mipmap level that is in mip tail block, hack a lot of things...
2034                 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2035                 // are fit in tail block:
2036 
2037                 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2038                 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2039 
2040                 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2041                 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2042 
2043                 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2044                 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2045 
2046                 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2047                 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2048             }
2049             // This check should cover at least mipId == 0
2050             else if (requestMipWidth << pIn->mipId == infoIn.width)
2051             {
2052                 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2053                 // - only one mipmap level and mipId = 0
2054                 pOut->mipId        = 0;
2055                 pOut->numMipLevels = 1;
2056 
2057                 // (mip0) width = requestMipWidth
2058                 pOut->unalignedWidth = requestMipWidth;
2059 
2060                 // (mip0) height = requestMipHeight
2061                 pOut->unalignedHeight = requestMipHeight;
2062             }
2063             else
2064             {
2065                 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2066                 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2067                 // because single mip view may have different pitch value than original (multiple) mip view...
2068                 // A simple case would be:
2069                 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2070                 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2071                 //   mip0 width = 0x101/mip1 width = 0x80
2072                 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2073                 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2074 
2075                 // - 2 levels and mipId = 1
2076                 pOut->mipId        = 1;
2077                 pOut->numMipLevels = 2;
2078 
2079                 const UINT_32 upperMipWidth  = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth);
2080                 const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight);
2081 
2082                 const BOOL_32 needToAvoidInTail =
2083                     tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2084                     TRUE : FALSE;
2085 
2086                 const UINT_32 hwMipWidth  = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2087                 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2088 
2089                 const BOOL_32 needExtraWidth =
2090                     ((upperMipWidth < requestMipWidth * 2) ||
2091                      ((upperMipWidth == requestMipWidth * 2) &&
2092                       ((needToAvoidInTail == TRUE) ||
2093                        (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2094 
2095                 const BOOL_32 needExtraHeight =
2096                     ((upperMipHeight < requestMipHeight * 2) ||
2097                      ((upperMipHeight == requestMipHeight * 2) &&
2098                       ((needToAvoidInTail == TRUE) ||
2099                        (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2100 
2101                 // (mip0) width = requestLastMipLevelWidth
2102                 pOut->unalignedWidth  = upperMipWidth + (needExtraWidth ? 1: 0);
2103 
2104                 // (mip0) height = requestLastMipLevelHeight
2105                 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2106             }
2107 
2108             // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2109             ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2110             // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2111             ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2112         }
2113     }
2114 
2115     return returnCode;
2116 }
2117 
2118 /**
2119 ************************************************************************************************************************
2120 *   Gfx11Lib::ValidateNonSwModeParams
2121 *
2122 *   @brief
2123 *       Validate compute surface info params except swizzle mode
2124 *
2125 *   @return
2126 *       TRUE if parameters are valid, FALSE otherwise
2127 ************************************************************************************************************************
2128 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2129 BOOL_32 Gfx11Lib::ValidateNonSwModeParams(
2130     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2131 {
2132     BOOL_32 valid = TRUE;
2133 
2134     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8))
2135     {
2136         ADDR_ASSERT_ALWAYS();
2137         valid = FALSE;
2138     }
2139     else if (pIn->flags.fmask == 1)
2140     {
2141         // There is no FMASK for GFX11 ASICs
2142         ADDR_ASSERT_ALWAYS();
2143         valid = FALSE;
2144     }
2145     else if (pIn->numSamples > 8)
2146     {
2147         // There is no EQAA support for GFX11 ASICs, so the max number of sample is 8
2148         ADDR_ASSERT_ALWAYS();
2149         valid = FALSE;
2150     }
2151     else if ((pIn->numFrags != 0) && (pIn->numSamples != pIn->numFrags))
2152     {
2153         // There is no EQAA support for GFX11 ASICs, so the number of sample has to be same as number of fragment
2154         ADDR_ASSERT_ALWAYS();
2155         valid = FALSE;
2156     }
2157 
2158     const ADDR2_SURFACE_FLAGS flags    = pIn->flags;
2159     const AddrResourceType    rsrcType = pIn->resourceType;
2160     const BOOL_32             mipmap   = (pIn->numMipLevels > 1);
2161     const BOOL_32             msaa     = (pIn->numSamples > 1);
2162     const BOOL_32             display  = flags.display;
2163     const BOOL_32             tex3d    = IsTex3d(rsrcType);
2164     const BOOL_32             tex2d    = IsTex2d(rsrcType);
2165     const BOOL_32             tex1d    = IsTex1d(rsrcType);
2166     const BOOL_32             stereo   = flags.qbStereo;
2167 
2168     // Resource type check
2169     if (tex1d)
2170     {
2171         if (msaa || display || stereo)
2172         {
2173             ADDR_ASSERT_ALWAYS();
2174             valid = FALSE;
2175         }
2176     }
2177     else if (tex2d)
2178     {
2179         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2180         {
2181             ADDR_ASSERT_ALWAYS();
2182             valid = FALSE;
2183         }
2184     }
2185     else if (tex3d)
2186     {
2187         if (msaa || display || stereo)
2188         {
2189             ADDR_ASSERT_ALWAYS();
2190             valid = FALSE;
2191         }
2192     }
2193     else
2194     {
2195         ADDR_ASSERT_ALWAYS();
2196         valid = FALSE;
2197     }
2198 
2199     return valid;
2200 }
2201 
2202 /**
2203 ************************************************************************************************************************
2204 *   Gfx11Lib::ValidateSwModeParams
2205 *
2206 *   @brief
2207 *       Validate compute surface info related to swizzle mode
2208 *
2209 *   @return
2210 *       TRUE if parameters are valid, FALSE otherwise
2211 ************************************************************************************************************************
2212 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2213 BOOL_32 Gfx11Lib::ValidateSwModeParams(
2214     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2215 {
2216     BOOL_32 valid = TRUE;
2217 
2218     if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2219     {
2220         ADDR_ASSERT_ALWAYS();
2221         valid = FALSE;
2222     }
2223     else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2224     {
2225         ADDR_ASSERT_ALWAYS();
2226         valid = FALSE;
2227     }
2228 
2229     const ADDR2_SURFACE_FLAGS flags       = pIn->flags;
2230     const AddrResourceType    rsrcType    = pIn->resourceType;
2231     const AddrSwizzleMode     swizzle     = pIn->swizzleMode;
2232     const BOOL_32             msaa        = (pIn->numSamples > 1);
2233     const BOOL_32             zbuffer     = flags.depth || flags.stencil;
2234     const BOOL_32             color       = flags.color;
2235     const BOOL_32             display     = flags.display;
2236     const BOOL_32             tex3d       = IsTex3d(rsrcType);
2237     const BOOL_32             tex2d       = IsTex2d(rsrcType);
2238     const BOOL_32             tex1d       = IsTex1d(rsrcType);
2239     const BOOL_32             thin3d      = flags.view3dAs2dArray;
2240     const BOOL_32             linear      = IsLinear(swizzle);
2241     const BOOL_32             blk256B     = IsBlock256b(swizzle);
2242     const BOOL_32             isNonPrtXor = IsNonPrtXor(swizzle);
2243     const BOOL_32             prt         = flags.prt;
2244 
2245     // Misc check
2246     if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numSamples)))
2247     {
2248         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2249         ADDR_ASSERT_ALWAYS();
2250         valid = FALSE;
2251     }
2252 
2253     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2254     {
2255         ADDR_ASSERT_ALWAYS();
2256         valid = FALSE;
2257     }
2258 
2259     if ((pIn->bpp == 96) && (linear == FALSE))
2260     {
2261         ADDR_ASSERT_ALWAYS();
2262         valid = FALSE;
2263     }
2264 
2265     const UINT_32 swizzleMask = 1 << swizzle;
2266 
2267     // Resource type check
2268     if (tex1d)
2269     {
2270         if ((swizzleMask & Gfx11Rsrc1dSwModeMask) == 0)
2271         {
2272             ADDR_ASSERT_ALWAYS();
2273             valid = FALSE;
2274         }
2275     }
2276     else if (tex2d)
2277     {
2278         if ((swizzleMask & Gfx11Rsrc2dSwModeMask) == 0)
2279         {
2280             ADDR_ASSERT_ALWAYS();
2281             valid = FALSE;
2282         }
2283         else if (prt && ((swizzleMask & Gfx11Rsrc2dPrtSwModeMask) == 0))
2284         {
2285             ADDR_ASSERT_ALWAYS();
2286             valid = FALSE;
2287         }
2288     }
2289     else if (tex3d)
2290     {
2291         if (((swizzleMask & Gfx11Rsrc3dSwModeMask) == 0) ||
2292             (prt && ((swizzleMask & Gfx11Rsrc3dPrtSwModeMask) == 0)) ||
2293             (thin3d && ((swizzleMask & Gfx11Rsrc3dViewAs2dSwModeMask) == 0)))
2294         {
2295             ADDR_ASSERT_ALWAYS();
2296             valid = FALSE;
2297         }
2298     }
2299 
2300     // Swizzle type check
2301     if (linear)
2302     {
2303         if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2304         {
2305             ADDR_ASSERT_ALWAYS();
2306             valid = FALSE;
2307         }
2308     }
2309     else if (IsZOrderSwizzle(swizzle))
2310     {
2311         if ((pIn->bpp > 64)                         ||
2312             (msaa && (color || (pIn->bpp > 32)))    ||
2313             ElemLib::IsBlockCompressed(pIn->format) ||
2314             ElemLib::IsMacroPixelPacked(pIn->format))
2315         {
2316             ADDR_ASSERT_ALWAYS();
2317             valid = FALSE;
2318         }
2319     }
2320     else if (IsStandardSwizzle(rsrcType, swizzle))
2321     {
2322         if (zbuffer || msaa)
2323         {
2324             ADDR_ASSERT_ALWAYS();
2325             valid = FALSE;
2326         }
2327     }
2328     else if (IsDisplaySwizzle(rsrcType, swizzle))
2329     {
2330         if (zbuffer || msaa)
2331         {
2332             ADDR_ASSERT_ALWAYS();
2333             valid = FALSE;
2334         }
2335     }
2336     else if (IsRtOptSwizzle(swizzle))
2337     {
2338         if (zbuffer)
2339         {
2340             ADDR_ASSERT_ALWAYS();
2341             valid = FALSE;
2342         }
2343     }
2344     else
2345     {
2346         ADDR_ASSERT_ALWAYS();
2347         valid = FALSE;
2348     }
2349 
2350     // Block type check
2351     if (blk256B)
2352     {
2353         if (zbuffer || tex3d || msaa)
2354         {
2355             ADDR_ASSERT_ALWAYS();
2356             valid = FALSE;
2357         }
2358     }
2359 
2360     return valid;
2361 }
2362 
2363 /**
2364 ************************************************************************************************************************
2365 *   Gfx11Lib::HwlComputeSurfaceInfoSanityCheck
2366 *
2367 *   @brief
2368 *       Compute surface info sanity check
2369 *
2370 *   @return
2371 *       Offset
2372 ************************************************************************************************************************
2373 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2374 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoSanityCheck(
2375     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2376     ) const
2377 {
2378     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2379 }
2380 
2381 /**
2382 ************************************************************************************************************************
2383 *   Gfx11Lib::HwlGetPreferredSurfaceSetting
2384 *
2385 *   @brief
2386 *       Internal function to get suggested surface information for cliet to use
2387 *
2388 *   @return
2389 *       ADDR_E_RETURNCODE
2390 ************************************************************************************************************************
2391 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2392 ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
2393     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
2394     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
2395     ) const
2396 {
2397     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2398 
2399     if (pIn->flags.fmask)
2400     {
2401         // There is no FMASK for GFX11 ASICs.
2402         ADDR_ASSERT_ALWAYS();
2403 
2404         returnCode = ADDR_INVALIDPARAMS;
2405     }
2406     else
2407     {
2408         UINT_32 bpp    = pIn->bpp;
2409         UINT_32 width  = Max(pIn->width, 1u);
2410         UINT_32 height = Max(pIn->height, 1u);
2411 
2412         // Set format to INVALID will skip this conversion
2413         if (pIn->format != ADDR_FMT_INVALID)
2414         {
2415             ElemMode elemMode = ADDR_UNCOMPRESSED;
2416             UINT_32 expandX, expandY;
2417 
2418             // Get compression/expansion factors and element mode which indicates compression/expansion
2419             bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2420                                                 &elemMode,
2421                                                 &expandX,
2422                                                 &expandY);
2423 
2424             UINT_32 basePitch = 0;
2425             GetElemLib()->AdjustSurfaceInfo(elemMode,
2426                                             expandX,
2427                                             expandY,
2428                                             &bpp,
2429                                             &basePitch,
2430                                             &width,
2431                                             &height);
2432         }
2433 
2434         const UINT_32 numSlices    = Max(pIn->numSlices,    1u);
2435         const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2436         const UINT_32 numSamples   = Max(pIn->numSamples,   1u);
2437         const BOOL_32 msaa         = numSamples > 1;
2438 
2439         // Pre sanity check on non swizzle mode parameters
2440         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2441         localIn.flags        = pIn->flags;
2442         localIn.resourceType = pIn->resourceType;
2443         localIn.format       = pIn->format;
2444         localIn.bpp          = bpp;
2445         localIn.width        = width;
2446         localIn.height       = height;
2447         localIn.numSlices    = numSlices;
2448         localIn.numMipLevels = numMipLevels;
2449         localIn.numSamples   = numSamples;
2450         localIn.numFrags     = numSamples;
2451 
2452         if (ValidateNonSwModeParams(&localIn))
2453         {
2454             // Forbid swizzle mode(s) by client setting
2455             ADDR2_SWMODE_SET allowedSwModeSet = {};
2456             allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx11LinearSwModeMask;
2457             allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx11Blk256BSwModeMask;
2458             allowedSwModeSet.value |=
2459                 pIn->forbiddenBlock.macroThin4KB ? 0 :
2460                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx11Blk4KBSwModeMask);
2461             allowedSwModeSet.value |=
2462                 pIn->forbiddenBlock.macroThick4KB ? 0 :
2463                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick4KBSwModeMask : 0);
2464             allowedSwModeSet.value |=
2465                 pIn->forbiddenBlock.macroThin64KB ? 0 :
2466                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThin64KBSwModeMask : Gfx11Blk64KBSwModeMask);
2467             allowedSwModeSet.value |=
2468                 pIn->forbiddenBlock.macroThick64KB ? 0 :
2469                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick64KBSwModeMask : 0);
2470             allowedSwModeSet.value |=
2471                 pIn->forbiddenBlock.gfx11.thin256KB ? 0 :
2472                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThin256KBSwModeMask : Gfx11Blk256KBSwModeMask);
2473             allowedSwModeSet.value |=
2474                 pIn->forbiddenBlock.gfx11.thick256KB ? 0 :
2475                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick256KBSwModeMask : 0);
2476 
2477             if (pIn->preferredSwSet.value != 0)
2478             {
2479                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx11ZSwModeMask;
2480                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx11StandardSwModeMask;
2481                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx11DisplaySwModeMask;
2482                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx11RenderSwModeMask;
2483             }
2484 
2485             if (pIn->noXor)
2486             {
2487                 allowedSwModeSet.value &= ~Gfx11XorSwModeMask;
2488             }
2489 
2490             if (pIn->maxAlign > 0)
2491             {
2492                 if (pIn->maxAlign < Size256K)
2493                 {
2494                     allowedSwModeSet.value &= ~Gfx11Blk256KBSwModeMask;
2495                 }
2496 
2497                 if (pIn->maxAlign < Size64K)
2498                 {
2499                     allowedSwModeSet.value &= ~Gfx11Blk64KBSwModeMask;
2500                 }
2501 
2502                 if (pIn->maxAlign < Size4K)
2503                 {
2504                     allowedSwModeSet.value &= ~Gfx11Blk4KBSwModeMask;
2505                 }
2506 
2507                 if (pIn->maxAlign < Size256)
2508                 {
2509                     allowedSwModeSet.value &= ~Gfx11Blk256BSwModeMask;
2510                 }
2511             }
2512 
2513             // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2514             switch (pIn->resourceType)
2515             {
2516                 case ADDR_RSRC_TEX_1D:
2517                     allowedSwModeSet.value &= Gfx11Rsrc1dSwModeMask;
2518                     break;
2519 
2520                 case ADDR_RSRC_TEX_2D:
2521                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc2dPrtSwModeMask : Gfx11Rsrc2dSwModeMask;
2522                     break;
2523 
2524                 case ADDR_RSRC_TEX_3D:
2525                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc3dPrtSwModeMask : Gfx11Rsrc3dSwModeMask;
2526 
2527                     if (pIn->flags.view3dAs2dArray)
2528                     {
2529                         // SW_LINEAR can be used for 3D thin images, including BCn image format.
2530                         allowedSwModeSet.value &= Gfx11Rsrc3dViewAs2dSwModeMask;
2531                     }
2532                     break;
2533 
2534                 default:
2535                     ADDR_ASSERT_ALWAYS();
2536                     allowedSwModeSet.value = 0;
2537                     break;
2538             }
2539 
2540             if (ElemLib::IsBlockCompressed(pIn->format)  ||
2541                 ElemLib::IsMacroPixelPacked(pIn->format) ||
2542                 (bpp > 64)                               ||
2543                 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2544             {
2545                 allowedSwModeSet.value &= ~Gfx11ZSwModeMask;
2546             }
2547 
2548             if (pIn->format == ADDR_FMT_32_32_32)
2549             {
2550                 allowedSwModeSet.value &= Gfx11LinearSwModeMask;
2551             }
2552 
2553             if (msaa)
2554             {
2555                 allowedSwModeSet.value &= Gfx11MsaaSwModeMask;
2556             }
2557 
2558             if (pIn->flags.depth || pIn->flags.stencil)
2559             {
2560                 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2561             }
2562 
2563             if (pIn->flags.requireMetadata)
2564             {
2565                 // Linear images can never be compressed
2566                 allowedSwModeSet.value &= ~Gfx11LinearSwModeMask;
2567                 if (pIn->flags.color)
2568                 {
2569                     // 256B formats must not be pipe-aligned (can't use in CB)
2570                     allowedSwModeSet.value &= ~(Gfx11Blk256BSwModeMask);
2571                     // D/S formats must not be pipe-aligned
2572                     allowedSwModeSet.value &= ~(Gfx11DisplaySwModeMask | Gfx11StandardSwModeMask);
2573                 }
2574             }
2575 
2576             if (pIn->flags.display)
2577             {
2578                 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
2579             }
2580 
2581             if (allowedSwModeSet.value != 0)
2582             {
2583 #if DEBUG
2584                 // Post sanity check, at least AddrLib should accept the output generated by its own
2585                 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2586 
2587                 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2588                 {
2589                     if (validateSwModeSet & 1)
2590                     {
2591                         localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2592                         ADDR_ASSERT(ValidateSwModeParams(&localIn));
2593                     }
2594 
2595                     validateSwModeSet >>= 1;
2596                 }
2597 #endif
2598 
2599                 pOut->resourceType   = pIn->resourceType;
2600                 pOut->validSwModeSet = allowedSwModeSet;
2601                 pOut->canXor         = (allowedSwModeSet.value & Gfx11XorSwModeMask) ? TRUE : FALSE;
2602 
2603                 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &(pOut->validBlockSet));
2604                 GetAllowedSwSet(allowedSwModeSet, &(pOut->validSwTypeSet));
2605 
2606                 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2607 
2608                 if (pOut->clientPreferredSwSet.value == 0)
2609                 {
2610                     pOut->clientPreferredSwSet.value = AddrSwSetAll;
2611                 }
2612 
2613                 // Apply optional restrictions
2614                 if (pIn->flags.needEquation)
2615                 {
2616                     UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
2617                                                                         ADDR_MAX_LEGACY_EQUATION_COMP;
2618                     FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
2619                 }
2620 
2621                 if (allowedSwModeSet.value == Gfx11LinearSwModeMask)
2622                 {
2623                     pOut->swizzleMode = ADDR_SW_LINEAR;
2624                 }
2625                 else
2626                 {
2627                     const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
2628 
2629                     if ((height > 1) && (computeMinSize == FALSE))
2630                     {
2631                         // Always ignore linear swizzle mode if:
2632                         // 1. This is a (2D/3D) resource with height > 1
2633                         // 2. Client doesn't require computing minimize size
2634                         allowedSwModeSet.swLinear = 0;
2635                     }
2636 
2637                     ADDR2_BLOCK_SET allowedBlockSet = {};
2638                     GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &allowedBlockSet);
2639 
2640                     // Determine block size if there are 2 or more block type candidates
2641                     if (IsPow2(allowedBlockSet.value) == FALSE)
2642                     {
2643                         AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
2644 
2645                         swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
2646 
2647                         if (pOut->resourceType == ADDR_RSRC_TEX_3D)
2648                         {
2649                             swMode[AddrBlockThick4KB]   = ADDR_SW_4KB_S_X;
2650                             swMode[AddrBlockThin64KB]   = ADDR_SW_64KB_R_X;
2651                             swMode[AddrBlockThick64KB]  = ADDR_SW_64KB_S_X;
2652                             swMode[AddrBlockThin256KB]  = ADDR_SW_256KB_R_X;
2653                             swMode[AddrBlockThick256KB] = ADDR_SW_256KB_S_X;
2654                         }
2655                         else
2656                         {
2657                             swMode[AddrBlockMicro]     = ADDR_SW_256B_D;
2658                             swMode[AddrBlockThin4KB]   = ADDR_SW_4KB_D_X;
2659                             swMode[AddrBlockThin64KB]  = ADDR_SW_64KB_D_X;
2660                             swMode[AddrBlockThin256KB] = ADDR_SW_256KB_D_X;
2661                         }
2662 
2663                         UINT_64 padSize[AddrBlockMaxTiledType] = {};
2664 
2665                         const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
2666                         const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
2667                         const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2668                         UINT_32       minSizeBlk         = AddrBlockMicro;
2669                         UINT_64       minSize            = 0;
2670 
2671                         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
2672 
2673                         for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
2674                         {
2675                             if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
2676                             {
2677                                 localIn.swizzleMode = swMode[i];
2678 
2679                                 if (localIn.swizzleMode == ADDR_SW_LINEAR)
2680                                 {
2681                                     returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
2682                                 }
2683                                 else
2684                                 {
2685                                     returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
2686                                 }
2687 
2688                                 if (returnCode == ADDR_OK)
2689                                 {
2690                                     padSize[i] = localOut.surfSize;
2691 
2692                                     if ((minSize == 0) ||
2693                                         Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
2694                                     {
2695                                         minSize    = padSize[i];
2696                                         minSizeBlk = i;
2697                                     }
2698                                 }
2699                                 else
2700                                 {
2701                                     ADDR_ASSERT_ALWAYS();
2702                                     break;
2703                                 }
2704                             }
2705                         }
2706 
2707                         if (pIn->memoryBudget > 1.0)
2708                         {
2709                             // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
2710                             // smaller-block type again in coming loop
2711                             switch (minSizeBlk)
2712                             {
2713                                 case AddrBlockThick256KB:
2714                                     allowedBlockSet.gfx11.thin256KB = 0;
2715                                 case AddrBlockThin256KB:
2716                                     allowedBlockSet.macroThick64KB = 0;
2717                                 case AddrBlockThick64KB:
2718                                     allowedBlockSet.macroThin64KB = 0;
2719                                 case AddrBlockThin64KB:
2720                                     allowedBlockSet.macroThick4KB = 0;
2721                                 case AddrBlockThick4KB:
2722                                     allowedBlockSet.macroThin4KB = 0;
2723                                 case AddrBlockThin4KB:
2724                                     allowedBlockSet.micro  = 0;
2725                                 case AddrBlockMicro:
2726                                     allowedBlockSet.linear = 0;
2727                                 case AddrBlockLinear:
2728                                     break;
2729 
2730                                 default:
2731                                     ADDR_ASSERT_ALWAYS();
2732                                     break;
2733                             }
2734 
2735                             for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2736                             {
2737                                 if ((i != minSizeBlk) &&
2738                                     Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
2739                                 {
2740                                     if (Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
2741                                     {
2742                                         // Clear the block type if the memory waste is unacceptable
2743                                         allowedBlockSet.value &= ~(1u << (i - 1));
2744                                     }
2745                                 }
2746                             }
2747 
2748                             // Remove linear block type if 2 or more block types are allowed
2749                             if (IsPow2(allowedBlockSet.value) == FALSE)
2750                             {
2751                                 allowedBlockSet.linear = 0;
2752                             }
2753 
2754                             // Select the biggest allowed block type
2755                             minSizeBlk = Log2(allowedBlockSet.value) + 1;
2756 
2757                             if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
2758                             {
2759                                 minSizeBlk = AddrBlockLinear;
2760                             }
2761                         }
2762 
2763                         switch (minSizeBlk)
2764                         {
2765                             case AddrBlockLinear:
2766                                 allowedSwModeSet.value &= Gfx11LinearSwModeMask;
2767                                 break;
2768 
2769                             case AddrBlockMicro:
2770                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2771                                 allowedSwModeSet.value &= Gfx11Blk256BSwModeMask;
2772                                 break;
2773 
2774                             case AddrBlockThin4KB:
2775                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2776                                 allowedSwModeSet.value &= Gfx11Blk4KBSwModeMask;
2777                                 break;
2778 
2779                             case AddrBlockThick4KB:
2780                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2781                                 allowedSwModeSet.value &= Gfx11Rsrc3dThick4KBSwModeMask;
2782                                 break;
2783 
2784                             case AddrBlockThin64KB:
2785                                 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2786                                                           Gfx11Rsrc3dThin64KBSwModeMask : Gfx11Blk64KBSwModeMask;
2787                                 break;
2788 
2789                             case AddrBlockThick64KB:
2790                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2791                                 allowedSwModeSet.value &= Gfx11Rsrc3dThick64KBSwModeMask;
2792                                 break;
2793 
2794                             case AddrBlockThin256KB:
2795                                 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2796                                                           Gfx11Rsrc3dThin256KBSwModeMask : Gfx11Blk256KBSwModeMask;
2797                                 break;
2798 
2799                             case AddrBlockThick256KB:
2800                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2801                                 allowedSwModeSet.value &= Gfx11Rsrc3dThick256KBSwModeMask;
2802                                 break;
2803 
2804                             default:
2805                                 ADDR_ASSERT_ALWAYS();
2806                                 allowedSwModeSet.value = 0;
2807                                 break;
2808                         }
2809                     }
2810 
2811                     // Block type should be determined.
2812                     GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &allowedBlockSet);
2813                     ADDR_ASSERT(IsPow2(allowedBlockSet.value));
2814 
2815                     ADDR2_SWTYPE_SET allowedSwSet = {};
2816                     GetAllowedSwSet(allowedSwModeSet, &allowedSwSet);
2817 
2818                     // Determine swizzle type if there are 2 or more swizzle type candidates
2819                     if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
2820                     {
2821                         if (ElemLib::IsBlockCompressed(pIn->format))
2822                         {
2823                             if (allowedSwSet.sw_D)
2824                             {
2825                                 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2826                             }
2827                             else if (allowedSwSet.sw_S)
2828                             {
2829                                 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2830                             }
2831                             else
2832                             {
2833                                 ADDR_ASSERT(allowedSwSet.sw_R);
2834                                 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2835                             }
2836                         }
2837                         else if (ElemLib::IsMacroPixelPacked(pIn->format))
2838                         {
2839                             if (allowedSwSet.sw_S)
2840                             {
2841                                 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2842                             }
2843                             else if (allowedSwSet.sw_D)
2844                             {
2845                                 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2846                             }
2847                             else
2848                             {
2849                                 ADDR_ASSERT(allowedSwSet.sw_R);
2850                                 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2851                             }
2852                         }
2853                         else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2854                         {
2855                             if (pIn->flags.color && allowedSwSet.sw_R)
2856                             {
2857                                 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2858                             }
2859                             else if (allowedSwSet.sw_S)
2860                             {
2861                                 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2862                             }
2863                             else if (allowedSwSet.sw_D)
2864                             {
2865                                 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2866                             }
2867                             else
2868                             {
2869                                 ADDR_ASSERT(allowedSwSet.sw_Z);
2870                                 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2871                             }
2872                         }
2873                         else
2874                         {
2875                             if (allowedSwSet.sw_R)
2876                             {
2877                                 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2878                             }
2879                             else if (allowedSwSet.sw_D)
2880                             {
2881                                 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2882                             }
2883                             else if (allowedSwSet.sw_Z)
2884                             {
2885                                 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2886                             }
2887                             else
2888                             {
2889                                 ADDR_ASSERT_ALWAYS();
2890                             }
2891                         }
2892 
2893                         // Swizzle type should be determined.
2894                         GetAllowedSwSet(allowedSwModeSet, &allowedSwSet);
2895                         ADDR_ASSERT(IsPow2(allowedSwSet.value));
2896                     }
2897 
2898                     // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2899                     // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2900                     // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2901                     pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2(allowedSwModeSet.value));
2902                 }
2903             }
2904             else
2905             {
2906                 // Invalid combination...
2907                 ADDR_ASSERT_ALWAYS();
2908                 returnCode = ADDR_INVALIDPARAMS;
2909             }
2910         }
2911         else
2912         {
2913             // Invalid combination...
2914             ADDR_ASSERT_ALWAYS();
2915             returnCode = ADDR_INVALIDPARAMS;
2916         }
2917     }
2918 
2919     return returnCode;
2920 }
2921 
2922 /**
2923 ************************************************************************************************************************
2924 *   Gfx11Lib::HwlGetPossibleSwizzleModes
2925 *
2926 *   @brief
2927 *       Returns a list of swizzle modes that are valid from the hardware's perspective for the client to choose from
2928 *
2929 *   @return
2930 *       ADDR_E_RETURNCODE
2931 ************************************************************************************************************************
2932 */
HwlGetPossibleSwizzleModes(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2933 ADDR_E_RETURNCODE Gfx11Lib::HwlGetPossibleSwizzleModes(
2934     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
2935     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
2936     ) const
2937 {
2938     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2939 
2940     if (pIn->flags.fmask)
2941     {
2942         // There is no FMASK for GFX11 ASICs.
2943         ADDR_ASSERT_ALWAYS();
2944 
2945         returnCode = ADDR_INVALIDPARAMS;
2946     }
2947     else
2948     {
2949         UINT_32 bpp    = pIn->bpp;
2950         UINT_32 width  = Max(pIn->width, 1u);
2951         UINT_32 height = Max(pIn->height, 1u);
2952 
2953         // Set format to INVALID will skip this conversion
2954         if (pIn->format != ADDR_FMT_INVALID)
2955         {
2956             ElemMode elemMode = ADDR_UNCOMPRESSED;
2957             UINT_32 expandX, expandY;
2958 
2959             // Get compression/expansion factors and element mode which indicates compression/expansion
2960             bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2961                 &elemMode,
2962                 &expandX,
2963                 &expandY);
2964 
2965             UINT_32 basePitch = 0;
2966             GetElemLib()->AdjustSurfaceInfo(elemMode,
2967                 expandX,
2968                 expandY,
2969                 &bpp,
2970                 &basePitch,
2971                 &width,
2972                 &height);
2973         }
2974 
2975         const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
2976         const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2977         const UINT_32 numSamples   = Max(pIn->numSamples, 1u);
2978         const BOOL_32 msaa         = numSamples > 1;
2979 
2980         // Pre sanity check on non swizzle mode parameters
2981         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2982         localIn.flags = pIn->flags;
2983         localIn.resourceType = pIn->resourceType;
2984         localIn.format = pIn->format;
2985         localIn.bpp = bpp;
2986         localIn.width = width;
2987         localIn.height = height;
2988         localIn.numSlices = numSlices;
2989         localIn.numMipLevels = numMipLevels;
2990         localIn.numSamples = numSamples;
2991         localIn.numFrags = numSamples;
2992 
2993         if (ValidateNonSwModeParams(&localIn))
2994         {
2995             // Allow appropriate swizzle modes by default
2996             ADDR2_SWMODE_SET allowedSwModeSet = {};
2997             allowedSwModeSet.value |= Gfx11LinearSwModeMask | Gfx11Blk256BSwModeMask;
2998             if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2999             {
3000                 allowedSwModeSet.value |= Gfx11Rsrc3dThick4KBSwModeMask  |
3001                                           Gfx11Rsrc3dThin64KBSwModeMask  |
3002                                           Gfx11Rsrc3dThick64KBSwModeMask |
3003                                           Gfx11Rsrc3dThin256KBSwModeMask |
3004                                           Gfx11Rsrc3dThick256KBSwModeMask;
3005             }
3006             else
3007             {
3008                 allowedSwModeSet.value |= Gfx11Blk4KBSwModeMask | Gfx11Blk64KBSwModeMask | Gfx11Blk256KBSwModeMask;
3009             }
3010 
3011             // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3012             switch (pIn->resourceType)
3013             {
3014             case ADDR_RSRC_TEX_1D:
3015                 allowedSwModeSet.value &= Gfx11Rsrc1dSwModeMask;
3016                 break;
3017 
3018             case ADDR_RSRC_TEX_2D:
3019                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc2dPrtSwModeMask : Gfx11Rsrc2dSwModeMask;
3020                 break;
3021 
3022             case ADDR_RSRC_TEX_3D:
3023                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc3dPrtSwModeMask : Gfx11Rsrc3dSwModeMask;
3024 
3025                 if (pIn->flags.view3dAs2dArray)
3026                 {
3027                     // SW_LINEAR can be used for 3D thin images, including BCn image format.
3028                     allowedSwModeSet.value &= Gfx11Rsrc3dViewAs2dSwModeMask;
3029                 }
3030                 break;
3031 
3032             default:
3033                 ADDR_ASSERT_ALWAYS();
3034                 allowedSwModeSet.value = 0;
3035                 break;
3036             }
3037 
3038             // TODO: figure out if following restrictions are correct on GFX11...
3039             if (ElemLib::IsBlockCompressed(pIn->format) ||
3040                 ElemLib::IsMacroPixelPacked(pIn->format) ||
3041                 (bpp > 64) ||
3042                 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3043             {
3044                 allowedSwModeSet.value &= ~Gfx11ZSwModeMask;
3045             }
3046 
3047             if (pIn->format == ADDR_FMT_32_32_32)
3048             {
3049                 allowedSwModeSet.value &= Gfx11LinearSwModeMask;
3050             }
3051 
3052             if (msaa)
3053             {
3054                 allowedSwModeSet.value &= Gfx11MsaaSwModeMask;
3055             }
3056 
3057             if (pIn->flags.depth || pIn->flags.stencil)
3058             {
3059                 allowedSwModeSet.value &= Gfx11ZSwModeMask;
3060             }
3061 
3062             if (pIn->flags.requireMetadata)
3063             {
3064                 // Linear images can never be compressed
3065                 allowedSwModeSet.value &= ~Gfx11LinearSwModeMask;
3066                 if (pIn->flags.color)
3067                 {
3068                     // 256B formats must not be pipe-aligned (can't use in CB)
3069                     allowedSwModeSet.value &= ~(Gfx11Blk256BSwModeMask);
3070                     // D/S formats must not be pipe-aligned
3071                     allowedSwModeSet.value &= ~(Gfx11DisplaySwModeMask | Gfx11StandardSwModeMask);
3072                 }
3073             }
3074 
3075             if (pIn->flags.display)
3076             {
3077                 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
3078             }
3079 
3080             if (allowedSwModeSet.value != 0)
3081             {
3082 #if DEBUG
3083                 // Post sanity check, at least AddrLib should accept the output generated by its own
3084                 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3085 
3086                 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3087                 {
3088                     if (validateSwModeSet & 1)
3089                     {
3090                         localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3091                         ADDR_ASSERT(ValidateSwModeParams(&localIn));
3092                     }
3093 
3094                     validateSwModeSet >>= 1;
3095                 }
3096 #endif
3097 
3098                 pOut->resourceType = pIn->resourceType;
3099                 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3100 
3101                 if (pOut->clientPreferredSwSet.value == 0)
3102                 {
3103                     pOut->clientPreferredSwSet.value = AddrSwSetAll;
3104                 }
3105 
3106                 if (pIn->flags.needEquation)
3107                 {
3108                     UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
3109                                                                         ADDR_MAX_LEGACY_EQUATION_COMP;
3110                     FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
3111                 }
3112 
3113                 pOut->validSwModeSet = allowedSwModeSet;
3114                 pOut->canXor = (allowedSwModeSet.value & Gfx11XorSwModeMask) ? TRUE : FALSE;
3115             }
3116             else
3117             {
3118                 // Invalid combination...
3119                 ADDR_ASSERT_ALWAYS();
3120                 returnCode = ADDR_INVALIDPARAMS;
3121             }
3122         }
3123         else
3124         {
3125             // Invalid combination...
3126             ADDR_ASSERT_ALWAYS();
3127             returnCode = ADDR_INVALIDPARAMS;
3128         }
3129     }
3130 
3131     return returnCode;
3132 }
3133 
3134 /**
3135 ************************************************************************************************************************
3136 *   Gfx11Lib::HwlGetAllowedBlockSet
3137 *
3138 *   @brief
3139 *       Returns the set of allowed block sizes given the allowed swizzle modes and resource type
3140 *
3141 *   @return
3142 *       ADDR_E_RETURNCODE
3143 ************************************************************************************************************************
3144 */
HwlGetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet,AddrResourceType rsrcType,ADDR2_BLOCK_SET * pAllowedBlockSet) const3145 ADDR_E_RETURNCODE Gfx11Lib::HwlGetAllowedBlockSet(
3146     ADDR2_SWMODE_SET allowedSwModeSet,  ///< [in] allowed swizzle modes
3147     AddrResourceType rsrcType,          ///< [in] resource type
3148     ADDR2_BLOCK_SET* pAllowedBlockSet   ///< [out] allowed block sizes
3149     ) const
3150 {
3151     ADDR2_BLOCK_SET allowedBlockSet = {};
3152 
3153     allowedBlockSet.micro  = (allowedSwModeSet.value & Gfx11Blk256BSwModeMask) ? TRUE : FALSE;
3154     allowedBlockSet.linear = (allowedSwModeSet.value & Gfx11LinearSwModeMask)  ? TRUE : FALSE;
3155 
3156     if (rsrcType == ADDR_RSRC_TEX_3D)
3157     {
3158         allowedBlockSet.macroThick4KB    = (allowedSwModeSet.value & Gfx11Rsrc3dThick4KBSwModeMask)   ? TRUE : FALSE;
3159         allowedBlockSet.macroThin64KB    = (allowedSwModeSet.value & Gfx11Rsrc3dThin64KBSwModeMask)   ? TRUE : FALSE;
3160         allowedBlockSet.macroThick64KB   = (allowedSwModeSet.value & Gfx11Rsrc3dThick64KBSwModeMask)  ? TRUE : FALSE;
3161         allowedBlockSet.gfx11.thin256KB  = (allowedSwModeSet.value & Gfx11Rsrc3dThin256KBSwModeMask)  ? TRUE : FALSE;
3162         allowedBlockSet.gfx11.thick256KB = (allowedSwModeSet.value & Gfx11Rsrc3dThick256KBSwModeMask) ? TRUE : FALSE;
3163     }
3164     else
3165     {
3166         allowedBlockSet.macroThin4KB    = (allowedSwModeSet.value & Gfx11Blk4KBSwModeMask)   ? TRUE : FALSE;
3167         allowedBlockSet.macroThin64KB   = (allowedSwModeSet.value & Gfx11Blk64KBSwModeMask)  ? TRUE : FALSE;
3168         allowedBlockSet.gfx11.thin256KB = (allowedSwModeSet.value & Gfx11Blk256KBSwModeMask) ? TRUE : FALSE;
3169     }
3170 
3171     *pAllowedBlockSet = allowedBlockSet;
3172     return ADDR_OK;
3173 }
3174 
3175 /**
3176 ************************************************************************************************************************
3177 *   Gfx11Lib::HwlGetAllowedSwSet
3178 *
3179 *   @brief
3180 *       Returns the set of allowed swizzle types given the allowed swizzle modes
3181 *   @return
3182 *       ADDR_E_RETURNCODE
3183 ************************************************************************************************************************
3184 */
HwlGetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet,ADDR2_SWTYPE_SET * pAllowedSwSet) const3185 ADDR_E_RETURNCODE Gfx11Lib::HwlGetAllowedSwSet(
3186     ADDR2_SWMODE_SET  allowedSwModeSet, ///< [in] allowed swizzle modes
3187     ADDR2_SWTYPE_SET* pAllowedSwSet     ///< [out] allowed swizzle types
3188     ) const
3189 {
3190     ADDR2_SWTYPE_SET allowedSwSet = {};
3191 
3192     allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx11ZSwModeMask)        ? TRUE : FALSE;
3193     allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx11StandardSwModeMask) ? TRUE : FALSE;
3194     allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx11DisplaySwModeMask)  ? TRUE : FALSE;
3195     allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx11RenderSwModeMask)   ? TRUE : FALSE;
3196 
3197     *pAllowedSwSet = allowedSwSet;
3198     return ADDR_OK;
3199 }
3200 
3201 /**
3202 ************************************************************************************************************************
3203 *   Gfx11Lib::ComputeStereoInfo
3204 *
3205 *   @brief
3206 *       Compute height alignment and right eye pipeBankXor for stereo surface
3207 *
3208 *   @return
3209 *       Error code
3210 *
3211 ************************************************************************************************************************
3212 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const3213 ADDR_E_RETURNCODE Gfx11Lib::ComputeStereoInfo(
3214     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
3215     UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
3216     UINT_32*                                pRightXor   ///< Right eye xor
3217     ) const
3218 {
3219     ADDR_E_RETURNCODE ret = ADDR_OK;
3220 
3221     *pRightXor = 0;
3222 
3223     if (IsNonPrtXor(pIn->swizzleMode))
3224     {
3225         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3226         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
3227         const UINT_32 rsrcType    = static_cast<UINT_32>(pIn->resourceType) - 1;
3228         const UINT_32 swMode      = static_cast<UINT_32>(pIn->swizzleMode);
3229         const UINT_32 eqIndex     = m_equationLookupTable[rsrcType][swMode][elemLog2];
3230 
3231         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3232         {
3233             UINT_32 yMax     = 0;
3234             UINT_32 yPosMask = 0;
3235 
3236             // First get "max y bit"
3237             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3238             {
3239                 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3240 
3241                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3242                     (m_equationTable[eqIndex].addr[i].index > yMax))
3243                 {
3244                     yMax = m_equationTable[eqIndex].addr[i].index;
3245                 }
3246 
3247                 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3248                     (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3249                     (m_equationTable[eqIndex].xor1[i].index > yMax))
3250                 {
3251                     yMax = m_equationTable[eqIndex].xor1[i].index;
3252                 }
3253 
3254                 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3255                     (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3256                     (m_equationTable[eqIndex].xor2[i].index > yMax))
3257                 {
3258                     yMax = m_equationTable[eqIndex].xor2[i].index;
3259                 }
3260             }
3261 
3262             // Then loop again for populating a position mask of "max Y bit"
3263             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3264             {
3265                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3266                     (m_equationTable[eqIndex].addr[i].index == yMax))
3267                 {
3268                     yPosMask |= 1u << i;
3269                 }
3270                 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3271                          (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3272                          (m_equationTable[eqIndex].xor1[i].index == yMax))
3273                 {
3274                     yPosMask |= 1u << i;
3275                 }
3276                 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3277                          (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3278                          (m_equationTable[eqIndex].xor2[i].index == yMax))
3279                 {
3280                     yPosMask |= 1u << i;
3281                 }
3282             }
3283 
3284             const UINT_32 additionalAlign = 1 << yMax;
3285 
3286             if (additionalAlign >= *pAlignY)
3287             {
3288                 *pAlignY = additionalAlign;
3289 
3290                 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3291 
3292                 if ((alignedHeight >> yMax) & 1)
3293                 {
3294                     *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3295                 }
3296             }
3297         }
3298         else
3299         {
3300             ret = ADDR_INVALIDPARAMS;
3301         }
3302     }
3303 
3304     return ret;
3305 }
3306 
3307 /**
3308 ************************************************************************************************************************
3309 *   Gfx11Lib::HwlComputeSurfaceInfoTiled
3310 *
3311 *   @brief
3312 *       Internal function to calculate alignment for tiled surface
3313 *
3314 *   @return
3315 *       ADDR_E_RETURNCODE
3316 ************************************************************************************************************************
3317 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3318 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoTiled(
3319      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3320      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3321      ) const
3322 {
3323     ADDR_E_RETURNCODE ret;
3324 
3325     // Mip chain dimesion and epitch has no meaning in GFX11, set to default value
3326     pOut->mipChainPitch    = 0;
3327     pOut->mipChainHeight   = 0;
3328     pOut->mipChainSlice    = 0;
3329     pOut->epitchIsHeight   = FALSE;
3330 
3331     // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3332     pOut->mipChainInTail   = FALSE;
3333     pOut->firstMipIdInTail = pIn->numMipLevels;
3334 
3335     if (IsBlock256b(pIn->swizzleMode))
3336     {
3337         ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3338     }
3339     else
3340     {
3341         ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3342     }
3343 
3344     return ret;
3345 }
3346 
3347 /**
3348 ************************************************************************************************************************
3349 *   Gfx11Lib::ComputeSurfaceInfoMicroTiled
3350 *
3351 *   @brief
3352 *       Internal function to calculate alignment for micro tiled surface
3353 *
3354 *   @return
3355 *       ADDR_E_RETURNCODE
3356 ************************************************************************************************************************
3357 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3358 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceInfoMicroTiled(
3359      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3360      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3361      ) const
3362 {
3363     ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3364                                                          &pOut->blockHeight,
3365                                                          &pOut->blockSlices,
3366                                                          pIn->bpp,
3367                                                          pIn->numSamples,
3368                                                          pIn->resourceType,
3369                                                          pIn->swizzleMode);
3370 
3371     if (ret == ADDR_OK)
3372     {
3373         const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3374 
3375         pOut->pitch     = PowTwoAlign(pIn->width,  pOut->blockWidth);
3376         pOut->height    = PowTwoAlign(pIn->height, pOut->blockHeight);
3377         pOut->numSlices = pIn->numSlices;
3378         pOut->baseAlign = blockSize;
3379 
3380         if (pIn->numMipLevels > 1)
3381         {
3382             const UINT_32 mip0Width    = pIn->width;
3383             const UINT_32 mip0Height   = pIn->height;
3384             UINT_64       mipSliceSize = 0;
3385 
3386             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3387             {
3388                 UINT_32 mipWidth, mipHeight;
3389 
3390                 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3391 
3392                 const UINT_32 mipActualWidth  = PowTwoAlign(mipWidth,  pOut->blockWidth);
3393                 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3394 
3395                 if (pOut->pMipInfo != NULL)
3396                 {
3397                     pOut->pMipInfo[i].pitch            = mipActualWidth;
3398                     pOut->pMipInfo[i].height           = mipActualHeight;
3399                     pOut->pMipInfo[i].depth            = 1;
3400                     pOut->pMipInfo[i].offset           = mipSliceSize;
3401                     pOut->pMipInfo[i].mipTailOffset    = 0;
3402                     pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3403                 }
3404 
3405                 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3406             }
3407 
3408             pOut->sliceSize = mipSliceSize;
3409             pOut->surfSize  = mipSliceSize * pOut->numSlices;
3410         }
3411         else
3412         {
3413             pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3414             pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3415 
3416             if (pOut->pMipInfo != NULL)
3417             {
3418                 pOut->pMipInfo[0].pitch            = pOut->pitch;
3419                 pOut->pMipInfo[0].height           = pOut->height;
3420                 pOut->pMipInfo[0].depth            = 1;
3421                 pOut->pMipInfo[0].offset           = 0;
3422                 pOut->pMipInfo[0].mipTailOffset    = 0;
3423                 pOut->pMipInfo[0].macroBlockOffset = 0;
3424             }
3425         }
3426 
3427     }
3428 
3429     return ret;
3430 }
3431 
3432 /**
3433 ************************************************************************************************************************
3434 *   Gfx11Lib::ComputeSurfaceInfoMacroTiled
3435 *
3436 *   @brief
3437 *       Internal function to calculate alignment for macro tiled surface
3438 *
3439 *   @return
3440 *       ADDR_E_RETURNCODE
3441 ************************************************************************************************************************
3442 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3443 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceInfoMacroTiled(
3444      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3445      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3446      ) const
3447 {
3448     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3449                                                                 &pOut->blockHeight,
3450                                                                 &pOut->blockSlices,
3451                                                                 pIn->bpp,
3452                                                                 pIn->numSamples,
3453                                                                 pIn->resourceType,
3454                                                                 pIn->swizzleMode);
3455 
3456     if (returnCode == ADDR_OK)
3457     {
3458         UINT_32 heightAlign = pOut->blockHeight;
3459 
3460         if (pIn->flags.qbStereo)
3461         {
3462             UINT_32 rightXor = 0;
3463 
3464             returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3465 
3466             if (returnCode == ADDR_OK)
3467             {
3468                 pOut->pStereoInfo->rightSwizzle = rightXor;
3469             }
3470         }
3471 
3472         if (returnCode == ADDR_OK)
3473         {
3474             const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3475             const UINT_32 blockSize     = 1 << blockSizeLog2;
3476 
3477             pOut->pitch     = PowTwoAlign(pIn->width,     pOut->blockWidth);
3478             pOut->height    = PowTwoAlign(pIn->height,    heightAlign);
3479             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3480             pOut->baseAlign = blockSize;
3481 
3482             if (pIn->numMipLevels > 1)
3483             {
3484                 const Dim3d  tailMaxDim         = GetMipTailDim(pIn->resourceType,
3485                                                                 pIn->swizzleMode,
3486                                                                 pOut->blockWidth,
3487                                                                 pOut->blockHeight,
3488                                                                 pOut->blockSlices);
3489                 const UINT_32 mip0Width         = pIn->width;
3490                 const UINT_32 mip0Height        = pIn->height;
3491                 const BOOL_32 isThin            = IsThin(pIn->resourceType, pIn->swizzleMode);
3492                 const UINT_32 mip0Depth         = isThin ? 1 : pIn->numSlices;
3493                 const UINT_32 maxMipsInTail     = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3494                 const UINT_32 index             = Log2(pIn->bpp >> 3);
3495                 UINT_32       firstMipInTail    = pIn->numMipLevels;
3496                 UINT_64       mipChainSliceSize = 0;
3497                 UINT_64       mipSize[MaxMipLevels];
3498                 UINT_64       mipSliceSize[MaxMipLevels];
3499 
3500                 // For htile, we need to make z16 and stencil enter the mip tail at the same time as z32 would
3501                 Dim3d fixedTailMaxDim = tailMaxDim;
3502                 if (IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3503                 {
3504                     fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3505                     fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3506                 }
3507 
3508                 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3509                 {
3510                     UINT_32 mipWidth, mipHeight, mipDepth;
3511 
3512                     GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3513 
3514                     if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3515                     {
3516                         firstMipInTail     = i;
3517                         mipChainSliceSize += blockSize / pOut->blockSlices;
3518                         break;
3519                     }
3520                     else
3521                     {
3522                         const UINT_32 pitch     = PowTwoAlign(mipWidth,  pOut->blockWidth);
3523                         const UINT_32 height    = PowTwoAlign(mipHeight, pOut->blockHeight);
3524                         const UINT_32 depth     = PowTwoAlign(mipDepth,  pOut->blockSlices);
3525                         const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3526 
3527                         mipSize[i]         = sliceSize * depth;
3528                         mipSliceSize[i]    = sliceSize * pOut->blockSlices;
3529                         mipChainSliceSize += sliceSize;
3530 
3531                         if (pOut->pMipInfo != NULL)
3532                         {
3533                             pOut->pMipInfo[i].pitch  = pitch;
3534                             pOut->pMipInfo[i].height = height;
3535                             pOut->pMipInfo[i].depth  = depth;
3536                         }
3537                     }
3538                 }
3539 
3540                 pOut->sliceSize        = mipChainSliceSize;
3541                 pOut->surfSize         = mipChainSliceSize * pOut->numSlices;
3542                 pOut->mipChainInTail   = (firstMipInTail == 0) ? TRUE : FALSE;
3543                 pOut->firstMipIdInTail = firstMipInTail;
3544 
3545                 if (pOut->pMipInfo != NULL)
3546                 {
3547                     UINT_64 offset         = 0;
3548                     UINT_64 macroBlkOffset = 0;
3549                     UINT_32 tailMaxDepth   = 0;
3550 
3551                     if (firstMipInTail != pIn->numMipLevels)
3552                     {
3553                         UINT_32 mipWidth, mipHeight;
3554 
3555                         GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3556                                    &mipWidth, &mipHeight, &tailMaxDepth);
3557 
3558                         offset         = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3559                         macroBlkOffset = blockSize;
3560                     }
3561 
3562                     for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3563                     {
3564                         pOut->pMipInfo[i].offset           = offset;
3565                         pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3566                         pOut->pMipInfo[i].mipTailOffset    = 0;
3567 
3568                         offset         += mipSize[i];
3569                         macroBlkOffset += mipSliceSize[i];
3570                     }
3571 
3572                     UINT_32 pitch  = tailMaxDim.w;
3573                     UINT_32 height = tailMaxDim.h;
3574                     UINT_32 depth  = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3575 
3576                     tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3577 
3578                     for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3579                     {
3580                         const UINT_32 m         = maxMipsInTail - 1 - (i - firstMipInTail);
3581                         const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3582 
3583                         pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
3584                         pOut->pMipInfo[i].mipTailOffset    = mipOffset;
3585                         pOut->pMipInfo[i].macroBlockOffset = 0;
3586 
3587                         pOut->pMipInfo[i].pitch  = pitch;
3588                         pOut->pMipInfo[i].height = height;
3589                         pOut->pMipInfo[i].depth  = depth;
3590 
3591                         UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
3592                                        ((mipOffset >> 10) & 2)  |
3593                                        ((mipOffset >> 11) & 4)  |
3594                                        ((mipOffset >> 12) & 8)  |
3595                                        ((mipOffset >> 13) & 16) |
3596                                        ((mipOffset >> 14) & 32);
3597                         UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
3598                                        ((mipOffset >> 9)  & 2)  |
3599                                        ((mipOffset >> 10) & 4)  |
3600                                        ((mipOffset >> 11) & 8)  |
3601                                        ((mipOffset >> 12) & 16) |
3602                                        ((mipOffset >> 13) & 32);
3603 
3604                         if (blockSizeLog2 & 1)
3605                         {
3606                             const UINT_32 temp = mipX;
3607                             mipX = mipY;
3608                             mipY = temp;
3609 
3610                             if (index & 1)
3611                             {
3612                                 mipY = (mipY << 1) | (mipX & 1);
3613                                 mipX = mipX >> 1;
3614                             }
3615                         }
3616 
3617                         if (isThin)
3618                         {
3619                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3620                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3621                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3622 
3623                             pitch  = Max(pitch  >> 1, Block256_2d[index].w);
3624                             height = Max(height >> 1, Block256_2d[index].h);
3625                             depth  = 1;
3626                         }
3627                         else
3628                         {
3629                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3630                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3631                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3632 
3633                             pitch  = Max(pitch  >> 1, Block256_3d[index].w);
3634                             height = Max(height >> 1, Block256_3d[index].h);
3635                             depth  = PowTwoAlign(Max(depth  >> 1, 1u), Block256_3d[index].d);
3636                         }
3637                     }
3638                 }
3639             }
3640             else
3641             {
3642                 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numSamples;
3643                 pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3644 
3645                 if (pOut->pMipInfo != NULL)
3646                 {
3647                     pOut->pMipInfo[0].pitch            = pOut->pitch;
3648                     pOut->pMipInfo[0].height           = pOut->height;
3649                     pOut->pMipInfo[0].depth            = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3650                     pOut->pMipInfo[0].offset           = 0;
3651                     pOut->pMipInfo[0].mipTailOffset    = 0;
3652                     pOut->pMipInfo[0].macroBlockOffset = 0;
3653                     pOut->pMipInfo[0].mipTailCoordX    = 0;
3654                     pOut->pMipInfo[0].mipTailCoordY    = 0;
3655                     pOut->pMipInfo[0].mipTailCoordZ    = 0;
3656                 }
3657             }
3658         }
3659     }
3660 
3661     return returnCode;
3662 }
3663 
3664 /**
3665 ************************************************************************************************************************
3666 *   Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled
3667 *
3668 *   @brief
3669 *       Internal function to calculate address from coord for tiled swizzle surface
3670 *
3671 *   @return
3672 *       ADDR_E_RETURNCODE
3673 ************************************************************************************************************************
3674 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3675 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled(
3676      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
3677      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
3678      ) const
3679 {
3680     ADDR_E_RETURNCODE ret;
3681 
3682     if (IsBlock256b(pIn->swizzleMode))
3683     {
3684         ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3685     }
3686     else
3687     {
3688         ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3689     }
3690 
3691     return ret;
3692 }
3693 
3694 /**
3695 ************************************************************************************************************************
3696 *   Gfx11Lib::HwlCopyMemToSurface
3697 *
3698 *   @brief
3699 *       Copy multiple regions from memory to a non-linear surface.
3700 *
3701 *   @return
3702 *       Error or success.
3703 ************************************************************************************************************************
3704 */
HwlCopyMemToSurface(const ADDR2_COPY_MEMSURFACE_INPUT * pIn,const ADDR2_COPY_MEMSURFACE_REGION * pRegions,UINT_32 regionCount) const3705 ADDR_E_RETURNCODE Gfx11Lib::HwlCopyMemToSurface(
3706     const ADDR2_COPY_MEMSURFACE_INPUT*  pIn,
3707     const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
3708     UINT_32                             regionCount
3709     ) const
3710 {
3711     // Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
3712     // optimized for a particular micro-swizzle mode if available.
3713     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
3714     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3715     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels] = {{0}};
3716     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
3717     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3718 
3719     if (pIn->numSamples > 1)
3720     {
3721         // TODO: MSAA
3722         returnCode = ADDR_NOTIMPLEMENTED;
3723     }
3724     if (IsBlockVariable(pIn->swizzleMode))
3725     {
3726         // TODO: larger LUTs for worst-case 256KB swizzle.
3727         returnCode = ADDR_NOTIMPLEMENTED;
3728     }
3729 
3730     localIn.size         = sizeof(localIn);
3731     localIn.flags        = pIn->flags;
3732     localIn.swizzleMode  = pIn->swizzleMode;
3733     localIn.resourceType = pIn->resourceType;
3734     localIn.format       = pIn->format;
3735     localIn.bpp          = pIn->bpp;
3736     localIn.width        = Max(pIn->unAlignedDims.width,  1u);
3737     localIn.height       = Max(pIn->unAlignedDims.height, 1u);
3738     localIn.numSlices    = Max(pIn->unAlignedDims.depth,  1u);
3739     localIn.numMipLevels = Max(pIn->numMipLevels,         1u);
3740     localIn.numSamples   = Max(pIn->numSamples,           1u);
3741 
3742     localOut.size     = sizeof(localOut);
3743     localOut.pMipInfo = mipInfo;
3744 
3745     if (returnCode == ADDR_OK)
3746     {
3747         returnCode = ComputeSurfaceInfo(&localIn, &localOut);
3748     }
3749     const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3750     const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
3751                                                             pIn->resourceType,
3752                                                             Log2(pIn->bpp >> 3),
3753                                                             pIn->numSamples);
3754 
3755     ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT] = {};
3756     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
3757     ADDR_EXTENT3D blockExtent = {
3758         localOut.blockWidth,
3759         localOut.blockHeight,
3760         localOut.blockSlices
3761     };
3762 
3763     LutAddresser addresser = LutAddresser();
3764     addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
3765     UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyMemImgFunc();
3766     if (pfnCopyUnaligned == nullptr)
3767     {
3768         ADDR_ASSERT_ALWAYS();
3769         returnCode = ADDR_INVALIDPARAMS;
3770     }
3771 
3772     if (returnCode == ADDR_OK)
3773     {
3774         for (UINT_32  regionIdx = 0; regionIdx < regionCount; regionIdx++)
3775         {
3776             const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
3777             const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
3778             UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
3779             UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
3780 
3781             UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
3782             UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
3783             UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
3784 
3785             for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
3786             {
3787                 // The copy functions take the base address of the hardware slice, not the logical slice. Those are
3788                 // not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
3789                 // for unaligned copies.
3790                 UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
3791                 UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
3792 
3793                 UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
3794                 UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
3795 
3796                 ADDR_COORD2D sliceOrigin = { xStart, yStart };
3797                 ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
3798 
3799                 pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
3800                                  VoidPtrInc(pCurRegion->pMem, memOffset),
3801                                  pCurRegion->memRowPitch,
3802                                  yBlks,
3803                                  sliceOrigin,
3804                                  sliceExtent,
3805                                  sliceXor,
3806                                  addresser);
3807             }
3808         }
3809     }
3810     return returnCode;
3811 }
3812 
3813 /**
3814 ************************************************************************************************************************
3815 *   Gfx11Lib::HwlCopySurfaceToMem
3816 *
3817 *   @brief
3818 *       Copy multiple regions from a non-linear surface to memory.
3819 *
3820 *   @return
3821 *       Error or success.
3822 ************************************************************************************************************************
3823 */
HwlCopySurfaceToMem(const ADDR2_COPY_MEMSURFACE_INPUT * pIn,const ADDR2_COPY_MEMSURFACE_REGION * pRegions,UINT_32 regionCount) const3824 ADDR_E_RETURNCODE Gfx11Lib::HwlCopySurfaceToMem(
3825     const ADDR2_COPY_MEMSURFACE_INPUT*  pIn,
3826     const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
3827     UINT_32                             regionCount
3828     ) const
3829 {
3830     // Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
3831     // optimized for a particular micro-swizzle mode if available.
3832     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
3833     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3834     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels] = {{0}};
3835     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
3836     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3837 
3838     if (pIn->numSamples > 1)
3839     {
3840         // TODO: MSAA
3841         returnCode = ADDR_NOTIMPLEMENTED;
3842     }
3843     if (IsBlockVariable(pIn->swizzleMode))
3844     {
3845         // TODO: larger LUTs for worst-case 256KB swizzle.
3846         returnCode = ADDR_NOTIMPLEMENTED;
3847     }
3848 
3849     localIn.size         = sizeof(localIn);
3850     localIn.flags        = pIn->flags;
3851     localIn.swizzleMode  = pIn->swizzleMode;
3852     localIn.resourceType = pIn->resourceType;
3853     localIn.format       = pIn->format;
3854     localIn.bpp          = pIn->bpp;
3855     localIn.width        = Max(pIn->unAlignedDims.width,  1u);
3856     localIn.height       = Max(pIn->unAlignedDims.height, 1u);
3857     localIn.numSlices    = Max(pIn->unAlignedDims.depth,  1u);
3858     localIn.numMipLevels = Max(pIn->numMipLevels,         1u);
3859     localIn.numSamples   = Max(pIn->numSamples,           1u);
3860 
3861     localOut.size     = sizeof(localOut);
3862     localOut.pMipInfo = mipInfo;
3863 
3864     if (returnCode == ADDR_OK)
3865     {
3866         returnCode = ComputeSurfaceInfo(&localIn, &localOut);
3867     }
3868     const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3869     const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
3870                                                             pIn->resourceType,
3871                                                             Log2(pIn->bpp >> 3),
3872                                                             pIn->numSamples);
3873 
3874     ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT] = {};
3875     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
3876     ADDR_EXTENT3D blockExtent = {
3877         localOut.blockWidth,
3878         localOut.blockHeight,
3879         localOut.blockSlices
3880     };
3881 
3882     LutAddresser addresser = LutAddresser();
3883     addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
3884     UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyImgMemFunc();
3885     if (pfnCopyUnaligned == nullptr)
3886     {
3887         ADDR_ASSERT_ALWAYS();
3888         returnCode = ADDR_INVALIDPARAMS;
3889     }
3890 
3891     if (returnCode == ADDR_OK)
3892     {
3893         for (UINT_32  regionIdx = 0; regionIdx < regionCount; regionIdx++)
3894         {
3895             const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
3896             const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
3897             UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
3898             UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
3899 
3900             UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
3901             UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
3902             UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
3903 
3904             for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
3905             {
3906                 // The copy functions take the base address of the hardware slice, not the logical slice. Those are
3907                 // not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
3908                 // for unaligned copies.
3909                 UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
3910                 UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
3911 
3912                 UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
3913                 UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
3914 
3915                 ADDR_COORD2D sliceOrigin = { xStart, yStart };
3916                 ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
3917 
3918                 pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
3919                                  VoidPtrInc(pCurRegion->pMem, memOffset),
3920                                  pCurRegion->memRowPitch,
3921                                  yBlks,
3922                                  sliceOrigin,
3923                                  sliceExtent,
3924                                  sliceXor,
3925                                  addresser);
3926             }
3927         }
3928     }
3929     return returnCode;
3930 }
3931 
3932 
3933 /**
3934 ************************************************************************************************************************
3935 *   Gfx11Lib::ComputeOffsetFromEquation
3936 *
3937 *   @brief
3938 *       Compute offset from equation
3939 *
3940 *   @return
3941 *       Offset
3942 ************************************************************************************************************************
3943 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3944 UINT_32 Gfx11Lib::ComputeOffsetFromEquation(
3945     const ADDR_EQUATION* pEq,   ///< Equation
3946     UINT_32              x,     ///< x coord in bytes
3947     UINT_32              y,     ///< y coord in pixel
3948     UINT_32              z      ///< z coord in slice
3949     ) const
3950 {
3951     UINT_32 offset = 0;
3952 
3953     for (UINT_32 i = 0; i < pEq->numBits; i++)
3954     {
3955         UINT_32 v = 0;
3956 
3957         for (UINT_32 c = 0; c < pEq->numBitComponents; c++)
3958         {
3959             if (pEq->comps[c][i].valid)
3960             {
3961                 if (pEq->comps[c][i].channel == 0)
3962                 {
3963                     v ^= (x >> pEq->comps[c][i].index) & 1;
3964                 }
3965                 else if (pEq->comps[c][i].channel == 1)
3966                 {
3967                     v ^= (y >> pEq->comps[c][i].index) & 1;
3968                 }
3969                 else
3970                 {
3971                     ADDR_ASSERT(pEq->comps[c][i].channel == 2);
3972                     v ^= (z >> pEq->comps[c][i].index) & 1;
3973                 }
3974             }
3975         }
3976 
3977         offset |= (v << i);
3978     }
3979 
3980     return offset;
3981 }
3982 
3983 /**
3984 ************************************************************************************************************************
3985 *   Gfx11Lib::GetSwizzlePatternInfo
3986 *
3987 *   @brief
3988 *       Get swizzle pattern
3989 *
3990 *   @return
3991 *       Swizzle pattern information
3992 ************************************************************************************************************************
3993 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const3994 const ADDR_SW_PATINFO* Gfx11Lib::GetSwizzlePatternInfo(
3995     AddrSwizzleMode  swizzleMode,       ///< Swizzle mode
3996     AddrResourceType resourceType,      ///< Resource type
3997     UINT_32          elemLog2,          ///< Element size in bytes log2
3998     UINT_32          numFrag            ///< Number of fragment
3999     ) const
4000 {
4001     const UINT_32          index       = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
4002     const ADDR_SW_PATINFO* patInfo     = NULL;
4003     const UINT_32          swizzleMask = 1 << swizzleMode;
4004     const BOOL_32          isBlock256k = IsBlock256kb(swizzleMode);
4005     const BOOL_32          isBlock64K  = IsBlock64kb(swizzleMode);
4006 
4007     if (IsLinear(swizzleMode) == FALSE)
4008     {
4009         if (resourceType == ADDR_RSRC_TEX_3D)
4010         {
4011             ADDR_ASSERT(numFrag == 1);
4012 
4013             if ((swizzleMask & Gfx11Rsrc3dSwModeMask) != 0)
4014             {
4015                 if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
4016                 {
4017                     if (isBlock256k)
4018                     {
4019                         ADDR_ASSERT((swizzleMode == ADDR_SW_256KB_Z_X) || (swizzleMode == ADDR_SW_256KB_R_X));
4020                         patInfo = GFX11_SW_256K_ZR_X_1xaa_PATINFO;
4021                     }
4022                     else if (isBlock64K)
4023                     {
4024                         ADDR_ASSERT((swizzleMode == ADDR_SW_64KB_Z_X) || (swizzleMode == ADDR_SW_64KB_R_X));
4025                         patInfo = GFX11_SW_64K_ZR_X_1xaa_PATINFO;
4026                     }
4027                     else
4028                     {
4029                         ADDR_ASSERT_ALWAYS();
4030                     }
4031                 }
4032                 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4033                 {
4034                     if (isBlock256k)
4035                     {
4036                         ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_D_X);
4037                         // patInfo = GFX11_SW_256K_D3_X_PATINFO;
4038                     }
4039                     else if (isBlock64K)
4040                     {
4041                         ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
4042                         patInfo = GFX11_SW_64K_D3_X_PATINFO;
4043                     }
4044                     else
4045                     {
4046                         ADDR_ASSERT_ALWAYS();
4047                     }
4048                 }
4049                 else
4050                 {
4051                     ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
4052 
4053                     if (isBlock256k)
4054                     {
4055                         ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_S_X);
4056                         patInfo = GFX11_SW_256K_S3_X_PATINFO;
4057                     }
4058                     else if (isBlock64K)
4059                     {
4060                         if (swizzleMode == ADDR_SW_64KB_S)
4061                         {
4062                             patInfo = GFX11_SW_64K_S3_PATINFO;
4063                         }
4064                         else if (swizzleMode == ADDR_SW_64KB_S_X)
4065                         {
4066                             patInfo = GFX11_SW_64K_S3_X_PATINFO;
4067                         }
4068                         else if (swizzleMode == ADDR_SW_64KB_S_T)
4069                         {
4070                             patInfo = GFX11_SW_64K_S3_T_PATINFO;
4071                         }
4072                         else
4073                         {
4074                             ADDR_ASSERT_ALWAYS();
4075                         }
4076                     }
4077                     else if (IsBlock4kb(swizzleMode))
4078                     {
4079                         if (swizzleMode == ADDR_SW_4KB_S)
4080                         {
4081                             patInfo = GFX11_SW_4K_S3_PATINFO;
4082                         }
4083                         else if (swizzleMode == ADDR_SW_4KB_S_X)
4084                         {
4085                             patInfo = GFX11_SW_4K_S3_X_PATINFO;
4086                         }
4087                         else
4088                         {
4089                             ADDR_ASSERT_ALWAYS();
4090                         }
4091                     }
4092                     else
4093                     {
4094                         ADDR_ASSERT_ALWAYS();
4095                     }
4096                 }
4097             }
4098         }
4099         else
4100         {
4101             if ((swizzleMask & Gfx11Rsrc2dSwModeMask) != 0)
4102             {
4103                 if (IsBlock256b(swizzleMode))
4104                 {
4105                     ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
4106                     patInfo = GFX11_SW_256_D_PATINFO;
4107                 }
4108                 else if (IsBlock4kb(swizzleMode))
4109                 {
4110                     if (swizzleMode == ADDR_SW_4KB_D)
4111                     {
4112                         patInfo = GFX11_SW_4K_D_PATINFO;
4113                     }
4114                     else if (swizzleMode == ADDR_SW_4KB_D_X)
4115                     {
4116                         patInfo = GFX11_SW_4K_D_X_PATINFO;
4117                     }
4118                     else
4119                     {
4120                         ADDR_ASSERT_ALWAYS();
4121                     }
4122                 }
4123                 else if (isBlock64K)
4124                 {
4125                     if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
4126                     {
4127                         if (numFrag == 1)
4128                         {
4129                             patInfo = GFX11_SW_64K_ZR_X_1xaa_PATINFO;
4130                         }
4131                         else if (numFrag == 2)
4132                         {
4133                             patInfo = GFX11_SW_64K_ZR_X_2xaa_PATINFO;
4134                         }
4135                         else if (numFrag == 4)
4136                         {
4137                             patInfo = GFX11_SW_64K_ZR_X_4xaa_PATINFO;
4138                         }
4139                         else if (numFrag == 8)
4140                         {
4141                             patInfo = GFX11_SW_64K_ZR_X_8xaa_PATINFO;
4142                         }
4143                         else
4144                         {
4145                             ADDR_ASSERT_ALWAYS();
4146                         }
4147                     }
4148                     else if (IsDisplaySwizzle(resourceType, swizzleMode))
4149                     {
4150                         if (swizzleMode == ADDR_SW_64KB_D)
4151                         {
4152                             patInfo = GFX11_SW_64K_D_PATINFO;
4153                         }
4154                         else if (swizzleMode == ADDR_SW_64KB_D_X)
4155                         {
4156                             patInfo = GFX11_SW_64K_D_X_PATINFO;
4157                         }
4158                         else if (swizzleMode == ADDR_SW_64KB_D_T)
4159                         {
4160                             patInfo = GFX11_SW_64K_D_T_PATINFO;
4161                         }
4162                         else
4163                         {
4164                             ADDR_ASSERT_ALWAYS();
4165                         }
4166                     }
4167                     else
4168                     {
4169                         ADDR_ASSERT_ALWAYS();
4170                     }
4171                 }
4172                 else if (isBlock256k)
4173                 {
4174                     if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
4175                     {
4176                         if (numFrag == 1)
4177                         {
4178                             patInfo = GFX11_SW_256K_ZR_X_1xaa_PATINFO;
4179                         }
4180                         else if (numFrag == 2)
4181                         {
4182                             patInfo = GFX11_SW_256K_ZR_X_2xaa_PATINFO;
4183                         }
4184                         else if (numFrag == 4)
4185                         {
4186                             patInfo = GFX11_SW_256K_ZR_X_4xaa_PATINFO;
4187                         }
4188                         else if (numFrag == 8)
4189                         {
4190                             patInfo = GFX11_SW_256K_ZR_X_8xaa_PATINFO;
4191                         }
4192                         else
4193                         {
4194                             ADDR_ASSERT_ALWAYS();
4195                         }
4196                     }
4197                     else if (IsDisplaySwizzle(resourceType, swizzleMode))
4198                     {
4199                         ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_D_X);
4200                         patInfo = GFX11_SW_256K_D_X_PATINFO;
4201                     }
4202                     else
4203                     {
4204                         ADDR_ASSERT_ALWAYS();
4205                     }
4206                 }
4207                 else
4208                 {
4209                     ADDR_ASSERT_ALWAYS();
4210                 }
4211             }
4212         }
4213     }
4214 
4215     return (patInfo != NULL) ? &patInfo[index] : NULL;
4216 }
4217 
4218 /**
4219 ************************************************************************************************************************
4220 *   Gfx11Lib::ComputeSurfaceAddrFromCoordMicroTiled
4221 *
4222 *   @brief
4223 *       Internal function to calculate address from coord for micro tiled swizzle surface
4224 *
4225 *   @return
4226 *       ADDR_E_RETURNCODE
4227 ************************************************************************************************************************
4228 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4229 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMicroTiled(
4230      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4231      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4232      ) const
4233 {
4234     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4235     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4236     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4237 
4238     localIn.swizzleMode  = pIn->swizzleMode;
4239     localIn.flags        = pIn->flags;
4240     localIn.resourceType = pIn->resourceType;
4241     localIn.bpp          = pIn->bpp;
4242     localIn.width        = Max(pIn->unalignedWidth,  1u);
4243     localIn.height       = Max(pIn->unalignedHeight, 1u);
4244     localIn.numSlices    = Max(pIn->numSlices,       1u);
4245     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4246     localIn.numSamples   = Max(pIn->numSamples,      1u);
4247     localIn.numFrags     = localIn.numSamples;
4248     localOut.pMipInfo    = mipInfo;
4249 
4250     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
4251 
4252     if (ret == ADDR_OK)
4253     {
4254         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4255         const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
4256         const UINT_32 swMode   = static_cast<UINT_32>(pIn->swizzleMode);
4257         const UINT_32 eqIndex  = m_equationLookupTable[rsrcType][swMode][elemLog2];
4258 
4259         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4260         {
4261             const UINT_32 pb           = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4262             const UINT_32 yb           = pIn->y / localOut.blockHeight;
4263             const UINT_32 xb           = pIn->x / localOut.blockWidth;
4264             const UINT_32 blockIndex   = yb * pb + xb;
4265             const UINT_32 blockSize    = 256;
4266             const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4267                                                                    pIn->x << elemLog2,
4268                                                                    pIn->y,
4269                                                                    0);
4270             pOut->addr = localOut.sliceSize * pIn->slice +
4271                          mipInfo[pIn->mipId].macroBlockOffset +
4272                          (blockIndex * blockSize) +
4273                          blk256Offset;
4274         }
4275         else
4276         {
4277             ret = ADDR_INVALIDPARAMS;
4278         }
4279     }
4280 
4281     return ret;
4282 }
4283 
4284 /**
4285 ************************************************************************************************************************
4286 *   Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled
4287 *
4288 *   @brief
4289 *       Internal function to calculate address from coord for macro tiled swizzle surface
4290 *
4291 *   @return
4292 *       ADDR_E_RETURNCODE
4293 ************************************************************************************************************************
4294 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4295 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4296      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4297      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4298      ) const
4299 {
4300     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4301     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4302     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4303 
4304     localIn.swizzleMode  = pIn->swizzleMode;
4305     localIn.flags        = pIn->flags;
4306     localIn.resourceType = pIn->resourceType;
4307     localIn.bpp          = pIn->bpp;
4308     localIn.width        = Max(pIn->unalignedWidth,  1u);
4309     localIn.height       = Max(pIn->unalignedHeight, 1u);
4310     localIn.numSlices    = Max(pIn->numSlices,       1u);
4311     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4312     localIn.numSamples   = Max(pIn->numSamples,      1u);
4313     localIn.numFrags     = localIn.numSamples;
4314     localOut.pMipInfo    = mipInfo;
4315 
4316     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4317 
4318     if (ret == ADDR_OK)
4319     {
4320         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
4321         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4322         const UINT_32 blkMask     = (1 << blkSizeLog2) - 1;
4323         const UINT_32 pipeMask    = (1 << m_pipesLog2) - 1;
4324         const UINT_32 bankMask    = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4325         const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4326                                     (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4327 
4328         if (localIn.numSamples > 1)
4329         {
4330             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4331                                                                     pIn->resourceType,
4332                                                                     elemLog2,
4333                                                                     localIn.numSamples);
4334 
4335             if (pPatInfo != NULL)
4336             {
4337                 const UINT_32 pb     = localOut.pitch / localOut.blockWidth;
4338                 const UINT_32 yb     = pIn->y / localOut.blockHeight;
4339                 const UINT_32 xb     = pIn->x / localOut.blockWidth;
4340                 const UINT_64 blkIdx = yb * pb + xb;
4341 
4342                 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
4343                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4344 
4345                 const UINT_32 blkOffset =
4346                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4347                                                     blkSizeLog2,
4348                                                     pIn->x,
4349                                                     pIn->y,
4350                                                     pIn->slice,
4351                                                     pIn->sample);
4352 
4353                 pOut->addr = (localOut.sliceSize * pIn->slice) +
4354                              (blkIdx << blkSizeLog2) +
4355                              (blkOffset ^ pipeBankXor);
4356             }
4357             else
4358             {
4359                 ret = ADDR_INVALIDPARAMS;
4360             }
4361         }
4362         else
4363         {
4364             const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4365             const UINT_32 swMode  = static_cast<UINT_32>(pIn->swizzleMode);
4366             const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4367 
4368             if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4369             {
4370                 const BOOL_32 inTail    = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4371                 const BOOL_32 isThin    = IsThin(pIn->resourceType, pIn->swizzleMode);
4372                 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4373                 const UINT_32 sliceId   = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4374                 const UINT_32 x         = inTail ? (pIn->x     + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4375                 const UINT_32 y         = inTail ? (pIn->y     + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4376                 const UINT_32 z         = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4377                 const UINT_32 pb        = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4378                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4379                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4380                 const UINT_64 blkIdx    = yb * pb + xb;
4381                 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4382                                                                     x << elemLog2,
4383                                                                     y,
4384                                                                     z);
4385                 pOut->addr = sliceSize * sliceId +
4386                              mipInfo[pIn->mipId].macroBlockOffset +
4387                              (blkIdx << blkSizeLog2) +
4388                              (blkOffset ^ pipeBankXor);
4389             }
4390             else
4391             {
4392                 ret = ADDR_INVALIDPARAMS;
4393             }
4394         }
4395     }
4396 
4397     return ret;
4398 }
4399 
4400 /**
4401 ************************************************************************************************************************
4402 *   Gfx11Lib::HwlComputeMaxBaseAlignments
4403 *
4404 *   @brief
4405 *       Gets maximum alignments
4406 *   @return
4407 *       maximum alignments
4408 ************************************************************************************************************************
4409 */
HwlComputeMaxBaseAlignments() const4410 UINT_32 Gfx11Lib::HwlComputeMaxBaseAlignments() const
4411 {
4412     return Size256K;
4413 }
4414 
4415 /**
4416 ************************************************************************************************************************
4417 *   Gfx11Lib::HwlComputeMaxMetaBaseAlignments
4418 *
4419 *   @brief
4420 *       Gets maximum alignments for metadata
4421 *   @return
4422 *       maximum alignments for metadata
4423 ************************************************************************************************************************
4424 */
HwlComputeMaxMetaBaseAlignments() const4425 UINT_32 Gfx11Lib::HwlComputeMaxMetaBaseAlignments() const
4426 {
4427     Dim3d metaBlk;
4428 
4429     // Max base alignment for Htile
4430     const AddrSwizzleMode ValidSwizzleModeForHtile[] =
4431     {
4432         ADDR_SW_64KB_Z_X,
4433         ADDR_SW_256KB_Z_X,
4434     };
4435 
4436     UINT_32 maxBaseAlignHtile = 0;
4437 
4438     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForHtile) / sizeof(ValidSwizzleModeForHtile[0]); swIdx++)
4439     {
4440         for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4441         {
4442             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4443             {
4444                 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx11DataDepthStencil,
4445                                                                 ADDR_RSRC_TEX_2D,
4446                                                                 ValidSwizzleModeForHtile[swIdx],
4447                                                                 bppLog2,
4448                                                                 numFragLog2,
4449                                                                 TRUE,
4450                                                                 &metaBlk);
4451 
4452                 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4453             }
4454         }
4455     }
4456 
4457     // Max base alignment for 2D Dcc
4458     // swizzle mode support DCC...
4459     const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4460     {
4461         ADDR_SW_64KB_R_X,
4462         ADDR_SW_256KB_R_X,
4463     };
4464 
4465     UINT_32 maxBaseAlignDcc2D = 0;
4466 
4467     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4468     {
4469         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4470         {
4471             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4472             {
4473                 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx11DataColor,
4474                                                              ADDR_RSRC_TEX_2D,
4475                                                              ValidSwizzleModeForDcc2D[swIdx],
4476                                                              bppLog2,
4477                                                              numFragLog2,
4478                                                              TRUE,
4479                                                              &metaBlk);
4480 
4481                 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4482             }
4483         }
4484     }
4485 
4486     // Max base alignment for 3D Dcc
4487     const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4488     {
4489         ADDR_SW_64KB_S_X,
4490         ADDR_SW_64KB_D_X,
4491         ADDR_SW_64KB_R_X,
4492         ADDR_SW_256KB_S_X,
4493         ADDR_SW_256KB_D_X,
4494         ADDR_SW_256KB_R_X,
4495     };
4496 
4497     UINT_32 maxBaseAlignDcc3D = 0;
4498 
4499     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4500     {
4501         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4502         {
4503             const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx11DataColor,
4504                                                          ADDR_RSRC_TEX_3D,
4505                                                          ValidSwizzleModeForDcc3D[swIdx],
4506                                                          bppLog2,
4507                                                          0,
4508                                                          TRUE,
4509                                                          &metaBlk);
4510 
4511             maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4512         }
4513     }
4514 
4515     return Max(maxBaseAlignHtile, Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4516 }
4517 
4518 /**
4519 ************************************************************************************************************************
4520 *   Gfx11Lib::GetMetaElementSizeLog2
4521 *
4522 *   @brief
4523 *       Gets meta data element size log2
4524 *   @return
4525 *       Meta data element size log2
4526 ************************************************************************************************************************
4527 */
GetMetaElementSizeLog2(Gfx11DataType dataType)4528 INT_32 Gfx11Lib::GetMetaElementSizeLog2(
4529     Gfx11DataType dataType) ///< Data surface type
4530 {
4531     INT_32 elemSizeLog2 = 0;
4532 
4533     if (dataType == Gfx11DataColor)
4534     {
4535         elemSizeLog2 = 0;
4536     }
4537     else
4538     {
4539         ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
4540         elemSizeLog2 = 2;
4541     }
4542 
4543     return elemSizeLog2;
4544 }
4545 
4546 /**
4547 ************************************************************************************************************************
4548 *   Gfx11Lib::GetMetaCacheSizeLog2
4549 *
4550 *   @brief
4551 *       Gets meta data cache line size log2
4552 *   @return
4553 *       Meta data cache line size log2
4554 ************************************************************************************************************************
4555 */
GetMetaCacheSizeLog2(Gfx11DataType dataType)4556 INT_32 Gfx11Lib::GetMetaCacheSizeLog2(
4557     Gfx11DataType dataType) ///< Data surface type
4558 {
4559     INT_32 cacheSizeLog2 = 0;
4560 
4561     if (dataType == Gfx11DataColor)
4562     {
4563         cacheSizeLog2 = 6;
4564     }
4565     else
4566     {
4567         ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
4568         cacheSizeLog2 = 8;
4569     }
4570 
4571     return cacheSizeLog2;
4572 }
4573 
4574 /**
4575 ************************************************************************************************************************
4576 *   Gfx11Lib::HwlComputeSurfaceInfoLinear
4577 *
4578 *   @brief
4579 *       Internal function to calculate alignment for linear surface
4580 *
4581 *   @return
4582 *       ADDR_E_RETURNCODE
4583 ************************************************************************************************************************
4584 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4585 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoLinear(
4586      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4587      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4588      ) const
4589 {
4590     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4591 
4592     if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4593     {
4594         returnCode = ADDR_INVALIDPARAMS;
4595     }
4596     else
4597     {
4598         const UINT_32 elementBytes = pIn->bpp >> 3;
4599         const UINT_32 pitchAlign   = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4600         const UINT_32 mipDepth     = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4601         UINT_32       pitch        = PowTwoAlign(pIn->width, pitchAlign);
4602         UINT_32       actualHeight = pIn->height;
4603         UINT_64       sliceSize    = 0;
4604 
4605         if (pIn->numMipLevels > 1)
4606         {
4607             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4608             {
4609                 UINT_32 mipWidth, mipHeight;
4610 
4611                 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4612 
4613                 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4614 
4615                 if (pOut->pMipInfo != NULL)
4616                 {
4617                     pOut->pMipInfo[i].pitch            = mipActualWidth;
4618                     pOut->pMipInfo[i].height           = mipHeight;
4619                     pOut->pMipInfo[i].depth            = mipDepth;
4620                     pOut->pMipInfo[i].offset           = sliceSize;
4621                     pOut->pMipInfo[i].mipTailOffset    = 0;
4622                     pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4623                 }
4624 
4625                 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4626             }
4627         }
4628         else
4629         {
4630             returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4631 
4632             if (returnCode == ADDR_OK)
4633             {
4634                 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4635 
4636                 if (pOut->pMipInfo != NULL)
4637                 {
4638                     pOut->pMipInfo[0].pitch            = pitch;
4639                     pOut->pMipInfo[0].height           = actualHeight;
4640                     pOut->pMipInfo[0].depth            = mipDepth;
4641                     pOut->pMipInfo[0].offset           = 0;
4642                     pOut->pMipInfo[0].mipTailOffset    = 0;
4643                     pOut->pMipInfo[0].macroBlockOffset = 0;
4644                 }
4645             }
4646         }
4647 
4648         if (returnCode == ADDR_OK)
4649         {
4650             pOut->pitch          = pitch;
4651             pOut->height         = actualHeight;
4652             pOut->numSlices      = pIn->numSlices;
4653             pOut->sliceSize      = sliceSize;
4654             pOut->surfSize       = sliceSize * pOut->numSlices;
4655             pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4656             pOut->blockWidth     = pitchAlign;
4657             pOut->blockHeight    = 1;
4658             pOut->blockSlices    = 1;
4659 
4660             // Following members are useless on GFX11
4661             pOut->mipChainPitch  = 0;
4662             pOut->mipChainHeight = 0;
4663             pOut->mipChainSlice  = 0;
4664             pOut->epitchIsHeight = FALSE;
4665 
4666             // Post calculation validate
4667             ADDR_ASSERT(pOut->sliceSize > 0);
4668         }
4669     }
4670 
4671     return returnCode;
4672 }
4673 
4674 } // V2
4675 } // Addr
4676