• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ************************************************************************************************************************
3 *
4 *  Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
5 *  SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8 
9 /**
10 ************************************************************************************************************************
11 * @file  gfx10addrlib.cpp
12 * @brief Contain the implementation for the Gfx10Lib class.
13 ************************************************************************************************************************
14 */
15 
16 #include "gfx10addrlib.h"
17 #include "addrcommon.h"
18 #include "addrswizzler.h"
19 #include "gfx10_gb_reg.h"
20 
21 #include "amdgpu_asic_addr.h"
22 
23 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
24 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
25 
26 namespace Addr
27 {
28 /**
29 ************************************************************************************************************************
30 *   Gfx10HwlInit
31 *
32 *   @brief
33 *       Creates an Gfx10Lib object.
34 *
35 *   @return
36 *       Returns an Gfx10Lib object pointer.
37 ************************************************************************************************************************
38 */
Gfx10HwlInit(const Client * pClient)39 Addr::Lib* Gfx10HwlInit(const Client* pClient)
40 {
41     return V2::Gfx10Lib::CreateObj(pClient);
42 }
43 
44 namespace V2
45 {
46 
47 ////////////////////////////////////////////////////////////////////////////////////////////////////
48 //                               Static Const Member
49 ////////////////////////////////////////////////////////////////////////////////////////////////////
50 
51 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
52 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
53     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR
54     {{0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_S
55     {{0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_D
56     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
57 
58     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
59     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_S
60     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_D
61     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
62 
63     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
64     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_S
65     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_D
66     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
67 
68     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
69     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
70     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
71     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
72 
73     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
74     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_S_T
75     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_D_T
76     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
77 
78     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
79     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_S_X
80     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_D_X
81     {{0,    0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_4KB_R_X
82 
83     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_Z_X
84     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_S_X
85     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_D_X
86     {{0,    0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_64KB_R_X
87 
88     {{0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_VAR_Z_X
89     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
90     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
91     {{0,    0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_VAR_R_X
92     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR_GENERAL
93 };
94 
95 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
96 
97 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
98 const Dim3d Gfx10Lib::Block4K_Log2_3d[]  = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
99 
100 /**
101 ************************************************************************************************************************
102 *   Gfx10Lib::Gfx10Lib
103 *
104 *   @brief
105 *       Constructor
106 *
107 ************************************************************************************************************************
108 */
Gfx10Lib(const Client * pClient)109 Gfx10Lib::Gfx10Lib(const Client* pClient)
110     :
111     Lib(pClient),
112     m_numPkrLog2(0),
113     m_numSaLog2(0),
114     m_colorBaseIndex(0),
115     m_xmaskBaseIndex(0),
116     m_htileBaseIndex(0),
117     m_dccBaseIndex(0)
118 {
119     memset(&m_settings, 0, sizeof(m_settings));
120     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
121 }
122 
123 /**
124 ************************************************************************************************************************
125 *   Gfx10Lib::~Gfx10Lib
126 *
127 *   @brief
128 *       Destructor
129 ************************************************************************************************************************
130 */
~Gfx10Lib()131 Gfx10Lib::~Gfx10Lib()
132 {
133 }
134 
135 /**
136 ************************************************************************************************************************
137 *   Gfx10Lib::HwlComputeHtileInfo
138 *
139 *   @brief
140 *       Interface function stub of AddrComputeHtilenfo
141 *
142 *   @return
143 *       ADDR_E_RETURNCODE
144 ************************************************************************************************************************
145 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const146 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
147     const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,    ///< [in] input structure
148     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut    ///< [out] output structure
149     ) const
150 {
151     ADDR_E_RETURNCODE ret = ADDR_OK;
152 
153     if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
154          ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
155         (pIn->hTileFlags.pipeAligned != TRUE))
156     {
157         ret = ADDR_INVALIDPARAMS;
158     }
159     else
160     {
161         Dim3d         metaBlk     = {};
162         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
163                                                    ADDR_RSRC_TEX_2D,
164                                                    pIn->swizzleMode,
165                                                    0,
166                                                    0,
167                                                    TRUE,
168                                                    &metaBlk);
169 
170         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
171         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
172         pOut->baseAlign     = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
173         pOut->metaBlkWidth  = metaBlk.w;
174         pOut->metaBlkHeight = metaBlk.h;
175 
176         if (pIn->numMipLevels > 1)
177         {
178             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
179 
180             UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
181 
182             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
183             {
184                 UINT_32 mipWidth, mipHeight;
185 
186                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
187 
188                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
189                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
190 
191                 const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
192                 const UINT_32 heightInM    = mipHeight / metaBlk.h;
193                 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
194 
195                 if (pOut->pMipInfo != NULL)
196                 {
197                     pOut->pMipInfo[i].inMiptail = FALSE;
198                     pOut->pMipInfo[i].offset    = offset;
199                     pOut->pMipInfo[i].sliceSize = mipSliceSize;
200                 }
201 
202                 offset += mipSliceSize;
203             }
204 
205             pOut->sliceSize          = offset;
206             pOut->metaBlkNumPerSlice = offset / metaBlkSize;
207             pOut->htileBytes         = pOut->sliceSize * pIn->numSlices;
208 
209             if (pOut->pMipInfo != NULL)
210             {
211                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
212                 {
213                     pOut->pMipInfo[i].inMiptail = TRUE;
214                     pOut->pMipInfo[i].offset    = 0;
215                     pOut->pMipInfo[i].sliceSize = 0;
216                 }
217 
218                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
219                 {
220                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
221                 }
222             }
223         }
224         else
225         {
226             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
227             const UINT_32 heightInM = pOut->height / metaBlk.h;
228 
229             pOut->metaBlkNumPerSlice    = pitchInM * heightInM;
230             pOut->sliceSize             = pOut->metaBlkNumPerSlice * metaBlkSize;
231             pOut->htileBytes            = pOut->sliceSize * pIn->numSlices;
232 
233             if (pOut->pMipInfo != NULL)
234             {
235                 pOut->pMipInfo[0].inMiptail = FALSE;
236                 pOut->pMipInfo[0].offset    = 0;
237                 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
238             }
239         }
240 
241         // Get the HTILE address equation (copied from HtileAddrFromCoord).
242         // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
243         const UINT_32 index = m_xmaskBaseIndex;
244         const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
245 
246         ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
247         pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];
248     }
249 
250     return ret;
251 }
252 
253 /**
254 ************************************************************************************************************************
255 *   Gfx10Lib::HwlComputeCmaskInfo
256 *
257 *   @brief
258 *       Interface function stub of AddrComputeCmaskInfo
259 *
260 *   @return
261 *       ADDR_E_RETURNCODE
262 ************************************************************************************************************************
263 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const264 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
265     const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,    ///< [in] input structure
266     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut    ///< [out] output structure
267     ) const
268 {
269     ADDR_E_RETURNCODE ret = ADDR_OK;
270 
271     if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
272         (pIn->cMaskFlags.pipeAligned != TRUE)   ||
273         ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
274          ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
275     {
276         ret = ADDR_INVALIDPARAMS;
277     }
278     else
279     {
280         Dim3d         metaBlk     = {};
281         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
282                                                    ADDR_RSRC_TEX_2D,
283                                                    pIn->swizzleMode,
284                                                    0,
285                                                    0,
286                                                    TRUE,
287                                                    &metaBlk);
288 
289         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
290         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
291         pOut->baseAlign     = metaBlkSize;
292         pOut->metaBlkWidth  = metaBlk.w;
293         pOut->metaBlkHeight = metaBlk.h;
294 
295         if (pIn->numMipLevels > 1)
296         {
297             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
298 
299             UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
300 
301             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
302             {
303                 UINT_32 mipWidth, mipHeight;
304 
305                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
306 
307                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
308                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
309 
310                 const UINT_32 pitchInM  = mipWidth  / metaBlk.w;
311                 const UINT_32 heightInM = mipHeight / metaBlk.h;
312 
313                 if (pOut->pMipInfo != NULL)
314                 {
315                     pOut->pMipInfo[i].inMiptail = FALSE;
316                     pOut->pMipInfo[i].offset    = metaBlkPerSlice * metaBlkSize;
317                     pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
318                 }
319 
320                 metaBlkPerSlice += pitchInM * heightInM;
321             }
322 
323             pOut->metaBlkNumPerSlice = metaBlkPerSlice;
324 
325             if (pOut->pMipInfo != NULL)
326             {
327                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
328                 {
329                     pOut->pMipInfo[i].inMiptail = TRUE;
330                     pOut->pMipInfo[i].offset    = 0;
331                     pOut->pMipInfo[i].sliceSize = 0;
332                 }
333 
334                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
335                 {
336                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
337                 }
338             }
339         }
340         else
341         {
342             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
343             const UINT_32 heightInM = pOut->height / metaBlk.h;
344 
345             pOut->metaBlkNumPerSlice = pitchInM * heightInM;
346 
347             if (pOut->pMipInfo != NULL)
348             {
349                 pOut->pMipInfo[0].inMiptail = FALSE;
350                 pOut->pMipInfo[0].offset    = 0;
351                 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
352             }
353         }
354 
355         pOut->sliceSize  = pOut->metaBlkNumPerSlice * metaBlkSize;
356         pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
357 
358         // Get the CMASK address equation (copied from CmaskAddrFromCoord)
359         const UINT_32  fmaskBpp      = GetFmaskBpp(1, 1);
360         const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
361         const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
362         const UINT_8*  patIdxTable   =
363             (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
364             (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
365 
366         ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
367         pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]];
368     }
369 
370     return ret;
371 }
372 
373 /**
374 ************************************************************************************************************************
375 *   Gfx10Lib::HwlComputeDccInfo
376 *
377 *   @brief
378 *       Interface function to compute DCC key info
379 *
380 *   @return
381 *       ADDR_E_RETURNCODE
382 ************************************************************************************************************************
383 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const384 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
385     const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,    ///< [in] input structure
386     ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut    ///< [out] output structure
387     ) const
388 {
389     ADDR_E_RETURNCODE ret = ADDR_OK;
390 
391     if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
392     {
393         // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
394         // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
395         ret = ADDR_INVALIDPARAMS;
396     }
397     else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
398     {
399         // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
400         ret = ADDR_INVALIDPARAMS;
401     }
402     else
403     {
404         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
405         // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
406         ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
407 
408         const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
409 
410         pOut->compressBlkWidth  = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
411         pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
412         pOut->compressBlkDepth  = isThick ? Block256_3d[elemLog2].d : 1;
413 
414         if (ret == ADDR_OK)
415         {
416             Dim3d         metaBlk     = {};
417             const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
418             const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
419                                                        pIn->resourceType,
420                                                        pIn->swizzleMode,
421                                                        elemLog2,
422                                                        numFragLog2,
423                                                        pIn->dccKeyFlags.pipeAligned,
424                                                        &metaBlk);
425 
426             pOut->dccRamBaseAlign   = metaBlkSize;
427             pOut->metaBlkWidth      = metaBlk.w;
428             pOut->metaBlkHeight     = metaBlk.h;
429             pOut->metaBlkDepth      = metaBlk.d;
430             pOut->metaBlkSize       = metaBlkSize;
431 
432             pOut->pitch             = PowTwoAlign(pIn->unalignedWidth,     metaBlk.w);
433             pOut->height            = PowTwoAlign(pIn->unalignedHeight,    metaBlk.h);
434             pOut->depth             = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
435 
436             if (pIn->numMipLevels > 1)
437             {
438                 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
439 
440                 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
441 
442                 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
443                 {
444                     UINT_32 mipWidth, mipHeight;
445 
446                     GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
447 
448                     mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
449                     mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
450 
451                     const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
452                     const UINT_32 heightInM    = mipHeight / metaBlk.h;
453                     const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
454 
455                     if (pOut->pMipInfo != NULL)
456                     {
457                         pOut->pMipInfo[i].inMiptail = FALSE;
458                         pOut->pMipInfo[i].offset    = offset;
459                         pOut->pMipInfo[i].sliceSize = mipSliceSize;
460                     }
461 
462                     offset += mipSliceSize;
463                 }
464 
465                 pOut->dccRamSliceSize    = offset;
466                 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
467                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
468 
469                 if (pOut->pMipInfo != NULL)
470                 {
471                     for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
472                     {
473                         pOut->pMipInfo[i].inMiptail = TRUE;
474                         pOut->pMipInfo[i].offset    = 0;
475                         pOut->pMipInfo[i].sliceSize = 0;
476                     }
477 
478                     if (pIn->firstMipIdInTail != pIn->numMipLevels)
479                     {
480                         pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
481                     }
482                 }
483             }
484             else
485             {
486                 const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
487                 const UINT_32 heightInM = pOut->height / metaBlk.h;
488 
489                 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
490                 pOut->dccRamSliceSize    = pOut->metaBlkNumPerSlice * metaBlkSize;
491                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
492 
493                 if (pOut->pMipInfo != NULL)
494                 {
495                     pOut->pMipInfo[0].inMiptail = FALSE;
496                     pOut->pMipInfo[0].offset    = 0;
497                     pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
498                 }
499             }
500 
501             // Get the DCC address equation (copied from DccAddrFromCoord)
502             const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
503             const UINT_32 numPipeLog2 = m_pipesLog2;
504             UINT_32       index       = m_dccBaseIndex + elemLog2;
505             const UINT_8* patIdxTable;
506 
507             if (m_settings.supportRbPlus)
508             {
509                 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
510 
511                 if (pIn->dccKeyFlags.pipeAligned)
512                 {
513                     index += MaxNumOfBpp;
514 
515                     if (m_numPkrLog2 < 2)
516                     {
517                         index += m_pipesLog2 * MaxNumOfBpp;
518                     }
519                     else
520                     {
521                         // 4 groups for "m_numPkrLog2 < 2" case
522                         index += 4 * MaxNumOfBpp;
523 
524                         const UINT_32 dccPipePerPkr = 3;
525 
526                         index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
527                                  (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
528                     }
529                 }
530             }
531             else
532             {
533                 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
534 
535                 if (pIn->dccKeyFlags.pipeAligned)
536                 {
537                     index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
538                 }
539                 else
540                 {
541                     index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
542                 }
543             }
544 
545             ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
546             pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];
547         }
548     }
549 
550     return ret;
551 }
552 
553 /**
554 ************************************************************************************************************************
555 *   Gfx10Lib::HwlComputeCmaskAddrFromCoord
556 *
557 *   @brief
558 *       Interface function stub of AddrComputeCmaskAddrFromCoord
559 *
560 *   @return
561 *       ADDR_E_RETURNCODE
562 ************************************************************************************************************************
563 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)564 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
565     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
566     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
567 {
568     // Only support pipe aligned CMask
569     ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
570 
571     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
572     input.size            = sizeof(input);
573     input.cMaskFlags      = pIn->cMaskFlags;
574     input.colorFlags      = pIn->colorFlags;
575     input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
576     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
577     input.numSlices       = Max(pIn->numSlices,       1u);
578     input.swizzleMode     = pIn->swizzleMode;
579     input.resourceType    = pIn->resourceType;
580 
581     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
582     output.size = sizeof(output);
583 
584     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
585 
586     if (returnCode == ADDR_OK)
587     {
588         const UINT_32  fmaskBpp      = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
589         const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
590         const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
591         const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
592         const UINT_8*  patIdxTable   =
593             (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
594             (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
595 
596         const UINT_32  blkSizeLog2  = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
597         const UINT_32  blkMask      = (1 << blkSizeLog2) - 1;
598         const UINT_32  blkOffset    = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
599                                                                       blkSizeLog2 + 1, // +1 for nibble offset
600                                                                       pIn->x,
601                                                                       pIn->y,
602                                                                       pIn->slice,
603                                                                       0);
604         const UINT_32 xb       = pIn->x / output.metaBlkWidth;
605         const UINT_32 yb       = pIn->y / output.metaBlkHeight;
606         const UINT_32 pb       = output.pitch / output.metaBlkWidth;
607         const UINT_32 blkIndex = (yb * pb) + xb;
608         const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
609 
610         pOut->addr = (output.sliceSize * pIn->slice) +
611                      (blkIndex * (1 << blkSizeLog2)) +
612                      ((blkOffset >> 1) ^ pipeXor);
613         pOut->bitPosition = (blkOffset & 1) << 2;
614     }
615 
616     return returnCode;
617 }
618 
619 /**
620 ************************************************************************************************************************
621 *   Gfx10Lib::HwlComputeHtileAddrFromCoord
622 *
623 *   @brief
624 *       Interface function stub of AddrComputeHtileAddrFromCoord
625 *
626 *   @return
627 *       ADDR_E_RETURNCODE
628 ************************************************************************************************************************
629 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)630 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
631     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
632     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
633 {
634     ADDR_E_RETURNCODE returnCode = ADDR_OK;
635 
636     if (pIn->numMipLevels > 1)
637     {
638         returnCode = ADDR_NOTIMPLEMENTED;
639     }
640     else
641     {
642         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
643         input.size            = sizeof(input);
644         input.hTileFlags      = pIn->hTileFlags;
645         input.depthFlags      = pIn->depthflags;
646         input.swizzleMode     = pIn->swizzleMode;
647         input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
648         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
649         input.numSlices       = Max(pIn->numSlices,       1u);
650         input.numMipLevels    = 1;
651 
652         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
653         output.size = sizeof(output);
654 
655         returnCode = ComputeHtileInfo(&input, &output);
656 
657         if (returnCode == ADDR_OK)
658         {
659             const UINT_32  numSampleLog2 = Log2(pIn->numSamples);
660             const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
661             const UINT_32  index         = m_htileBaseIndex + numSampleLog2;
662             const UINT_8*  patIdxTable   = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
663 
664             const UINT_32  blkSizeLog2   = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
665             const UINT_32  blkMask       = (1 << blkSizeLog2) - 1;
666             const UINT_32  blkOffset     = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
667                                                                            blkSizeLog2 + 1, // +1 for nibble offset
668                                                                            pIn->x,
669                                                                            pIn->y,
670                                                                            pIn->slice,
671                                                                            0);
672             const UINT_32 xb       = pIn->x / output.metaBlkWidth;
673             const UINT_32 yb       = pIn->y / output.metaBlkHeight;
674             const UINT_32 pb       = output.pitch / output.metaBlkWidth;
675             const UINT_32 blkIndex = (yb * pb) + xb;
676             const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
677 
678             pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
679                          (blkIndex * (1 << blkSizeLog2)) +
680                          ((blkOffset >> 1) ^ pipeXor);
681         }
682     }
683 
684     return returnCode;
685 }
686 
687 /**
688 ************************************************************************************************************************
689 *   Gfx10Lib::HwlComputeHtileCoordFromAddr
690 *
691 *   @brief
692 *       Interface function stub of AddrComputeHtileCoordFromAddr
693 *
694 *   @return
695 *       ADDR_E_RETURNCODE
696 ************************************************************************************************************************
697 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)698 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
699     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
700     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)   ///< [out] output structure
701 {
702     ADDR_NOT_IMPLEMENTED();
703 
704     return ADDR_OK;
705 }
706 
707 /**
708 ************************************************************************************************************************
709 *   Gfx10Lib::HwlSupportComputeDccAddrFromCoord
710 *
711 *   @brief
712 *       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
713 *
714 *   @return
715 *       ADDR_E_RETURNCODE
716 ************************************************************************************************************************
717 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)718 ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(
719     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
720 {
721     ADDR_E_RETURNCODE returnCode = ADDR_OK;
722 
723     if ((pIn->resourceType       != ADDR_RSRC_TEX_2D) ||
724         (pIn->swizzleMode        != ADDR_SW_64KB_R_X) ||
725         (pIn->dccKeyFlags.linear == TRUE)             ||
726         (pIn->numFrags           >  1)                ||
727         (pIn->numMipLevels       >  1)                ||
728         (pIn->mipId              >  0))
729     {
730         returnCode = ADDR_NOTSUPPORTED;
731     }
732     else if ((pIn->pitch == 0)         ||
733              (pIn->metaBlkWidth == 0)  ||
734              (pIn->metaBlkHeight == 0) ||
735              (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
736     {
737         returnCode = ADDR_NOTSUPPORTED;
738     }
739 
740     return returnCode;
741 }
742 
743 /**
744 ************************************************************************************************************************
745 *   Gfx10Lib::HwlComputeDccAddrFromCoord
746 *
747 *   @brief
748 *       Interface function stub of AddrComputeDccAddrFromCoord
749 *
750 *   @return
751 *       N/A
752 ************************************************************************************************************************
753 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)754 VOID Gfx10Lib::HwlComputeDccAddrFromCoord(
755     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,  ///< [in] input structure
756     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] output structure
757 {
758     const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
759     const UINT_32 numPipeLog2 = m_pipesLog2;
760     const UINT_32 pipeMask    = (1 << numPipeLog2) - 1;
761     UINT_32       index       = m_dccBaseIndex + elemLog2;
762     const UINT_8* patIdxTable;
763 
764     if (m_settings.supportRbPlus)
765     {
766         patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
767 
768         if (pIn->dccKeyFlags.pipeAligned)
769         {
770             index += MaxNumOfBpp;
771 
772             if (m_numPkrLog2 < 2)
773             {
774                 index += m_pipesLog2 * MaxNumOfBpp;
775             }
776             else
777             {
778                 // 4 groups for "m_numPkrLog2 < 2" case
779                 index += 4 * MaxNumOfBpp;
780 
781                 const UINT_32 dccPipePerPkr = 3;
782 
783                 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
784                          (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
785             }
786         }
787     }
788     else
789     {
790         patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
791 
792         if (pIn->dccKeyFlags.pipeAligned)
793         {
794             index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
795         }
796         else
797         {
798             index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
799         }
800     }
801 
802     const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
803     const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
804     const UINT_32  blkOffset   =
805         ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
806                                         blkSizeLog2 + 1, // +1 for nibble offset
807                                         pIn->x,
808                                         pIn->y,
809                                         pIn->slice,
810                                         0);
811     const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
812     const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
813     const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
814     const UINT_32 blkIndex = (yb * pb) + xb;
815     const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
816 
817     pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
818                  (blkIndex * (1 << blkSizeLog2)) +
819                  ((blkOffset >> 1) ^ pipeXor);
820 }
821 
822 /**
823 ************************************************************************************************************************
824 *   Gfx10Lib::HwlInitGlobalParams
825 *
826 *   @brief
827 *       Initializes global parameters
828 *
829 *   @return
830 *       TRUE if all settings are valid
831 *
832 ************************************************************************************************************************
833 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)834 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
835     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
836 {
837     BOOL_32              valid = TRUE;
838     GB_ADDR_CONFIG_GFX10 gbAddrConfig;
839 
840     gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
841 
842     // These values are copied from CModel code
843     switch (gbAddrConfig.bits.NUM_PIPES)
844     {
845         case ADDR_CONFIG_1_PIPE:
846             m_pipes     = 1;
847             m_pipesLog2 = 0;
848             break;
849         case ADDR_CONFIG_2_PIPE:
850             m_pipes     = 2;
851             m_pipesLog2 = 1;
852             break;
853         case ADDR_CONFIG_4_PIPE:
854             m_pipes     = 4;
855             m_pipesLog2 = 2;
856             break;
857         case ADDR_CONFIG_8_PIPE:
858             m_pipes     = 8;
859             m_pipesLog2 = 3;
860             break;
861         case ADDR_CONFIG_16_PIPE:
862             m_pipes     = 16;
863             m_pipesLog2 = 4;
864             break;
865         case ADDR_CONFIG_32_PIPE:
866             m_pipes     = 32;
867             m_pipesLog2 = 5;
868             break;
869         case ADDR_CONFIG_64_PIPE:
870             m_pipes     = 64;
871             m_pipesLog2 = 6;
872             break;
873         default:
874             ADDR_ASSERT_ALWAYS();
875             valid = FALSE;
876             break;
877     }
878 
879     switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
880     {
881         case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
882             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
883             m_pipeInterleaveLog2  = 8;
884             break;
885         case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
886             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
887             m_pipeInterleaveLog2  = 9;
888             break;
889         case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
890             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
891             m_pipeInterleaveLog2  = 10;
892             break;
893         case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
894             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
895             m_pipeInterleaveLog2  = 11;
896             break;
897         default:
898             ADDR_ASSERT_ALWAYS();
899             valid = FALSE;
900             break;
901     }
902 
903     // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
904     // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
905     // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
906     ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
907 
908     switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
909     {
910         case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
911             m_maxCompFrag     = 1;
912             m_maxCompFragLog2 = 0;
913             break;
914         case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
915             m_maxCompFrag     = 2;
916             m_maxCompFragLog2 = 1;
917             break;
918         case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
919             m_maxCompFrag     = 4;
920             m_maxCompFragLog2 = 2;
921             break;
922         case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
923             m_maxCompFrag     = 8;
924             m_maxCompFragLog2 = 3;
925             break;
926         default:
927             ADDR_ASSERT_ALWAYS();
928             valid = FALSE;
929             break;
930     }
931 
932     // Skip unaligned case
933     m_xmaskBaseIndex += MaxNumOfBppCMask;
934     m_htileBaseIndex += MaxNumOfAA;
935 
936     m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfBppCMask;
937     m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
938     m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
939 
940     if (m_settings.supportRbPlus)
941     {
942         m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
943         m_numSaLog2  = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
944 
945         ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
946 
947         ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
948                         sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
949 
950         if (m_numPkrLog2 >= 2)
951         {
952             m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
953             m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfBppCMask;
954             m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
955         }
956     }
957     else
958     {
959         const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
960                                     static_cast<UINT_32>(ADDR_CONFIG_1_PIPE)  +
961                                     1;
962 
963         ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
964         ADDR_C_ASSERT(sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]) ==
965                         (numPipeType + 1) * MaxNumOfBppCMask);
966     }
967 
968     if (m_settings.supportRbPlus)
969     {
970         // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
971         // corresponding SW_64KB_* mode
972         m_blockVarSizeLog2 = m_pipesLog2 + 14;
973     }
974 
975     if (valid)
976     {
977         InitEquationTable();
978     }
979 
980     return valid;
981 }
982 
983 /**
984 ************************************************************************************************************************
985 *   Gfx10Lib::HwlConvertChipFamily
986 *
987 *   @brief
988 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
989 *   @return
990 *       ChipFamily
991 ************************************************************************************************************************
992 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)993 ChipFamily Gfx10Lib::HwlConvertChipFamily(
994     UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
995     UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
996 {
997     ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
998 
999     m_settings.dccUnsup3DSwDis  = 1;
1000     m_settings.dsMipmapHtileFix = 1;
1001 
1002     switch (chipFamily)
1003     {
1004         case FAMILY_NV:
1005             if (ASICREV_IS_NAVI10_P(chipRevision))
1006             {
1007                 m_settings.dsMipmapHtileFix = 0;
1008                 m_settings.isDcn20          = 1;
1009             }
1010 
1011             if (ASICREV_IS_NAVI12_P(chipRevision))
1012             {
1013                 m_settings.isDcn20 = 1;
1014             }
1015 
1016             if (ASICREV_IS_NAVI14_M(chipRevision))
1017             {
1018                 m_settings.isDcn20 = 1;
1019             }
1020 
1021             if (ASICREV_IS_NAVI21_M(chipRevision))
1022             {
1023                 m_settings.supportRbPlus   = 1;
1024                 m_settings.dccUnsup3DSwDis = 0;
1025             }
1026 
1027             if (ASICREV_IS_NAVI22_P(chipRevision))
1028             {
1029                 m_settings.supportRbPlus   = 1;
1030                 m_settings.dccUnsup3DSwDis = 0;
1031             }
1032 
1033             if (ASICREV_IS_NAVI23_P(chipRevision))
1034             {
1035                 m_settings.supportRbPlus   = 1;
1036                 m_settings.dccUnsup3DSwDis = 0;
1037             }
1038 
1039             if (ASICREV_IS_NAVI24_P(chipRevision))
1040             {
1041                 m_settings.supportRbPlus   = 1;
1042                 m_settings.dccUnsup3DSwDis = 0;
1043             }
1044             break;
1045 
1046         case FAMILY_VGH:
1047             if (ASICREV_IS_VANGOGH(chipRevision))
1048             {
1049                 m_settings.supportRbPlus   = 1;
1050                 m_settings.dccUnsup3DSwDis = 0;
1051             }
1052             else
1053             {
1054                 ADDR_ASSERT(!"Unknown chip revision");
1055             }
1056 
1057             break;
1058         case FAMILY_RMB:
1059             if (ASICREV_IS_REMBRANDT(chipRevision))
1060             {
1061                 m_settings.supportRbPlus   = 1;
1062                 m_settings.dccUnsup3DSwDis = 0;
1063             }
1064             else
1065             {
1066                 ADDR_ASSERT(!"Unknown chip revision");
1067             }
1068 
1069             break;
1070         case FAMILY_RPL:
1071             if (ASICREV_IS_RAPHAEL(chipRevision))
1072             {
1073                 m_settings.supportRbPlus   = 1;
1074                 m_settings.dccUnsup3DSwDis = 0;
1075             }
1076             break;
1077         case FAMILY_MDN:
1078             if (ASICREV_IS_MENDOCINO(chipRevision))
1079             {
1080                 m_settings.supportRbPlus   = 1;
1081                 m_settings.dccUnsup3DSwDis = 0;
1082             }
1083             else
1084             {
1085                 ADDR_ASSERT(!"Unknown chip revision");
1086             }
1087             break;
1088         default:
1089             ADDR_ASSERT(!"Unknown chip family");
1090             break;
1091     }
1092 
1093     m_configFlags.use32bppFor422Fmt = TRUE;
1094 
1095     return family;
1096 }
1097 
1098 /**
1099 ************************************************************************************************************************
1100 *   Gfx10Lib::GetBlk256SizeLog2
1101 *
1102 *   @brief
1103 *       Get block 256 size
1104 *
1105 *   @return
1106 *       N/A
1107 ************************************************************************************************************************
1108 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1109 void Gfx10Lib::GetBlk256SizeLog2(
1110     AddrResourceType resourceType,      ///< [in] Resource type
1111     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1112     UINT_32          elemLog2,          ///< [in] element size log2
1113     UINT_32          numSamplesLog2,    ///< [in] number of samples
1114     Dim3d*           pBlock             ///< [out] block size
1115     ) const
1116 {
1117     if (IsThin(resourceType, swizzleMode))
1118     {
1119         UINT_32 blockBits = 8 - elemLog2;
1120 
1121         if (IsZOrderSwizzle(swizzleMode))
1122         {
1123             blockBits -= numSamplesLog2;
1124         }
1125 
1126         pBlock->w = (blockBits >> 1) + (blockBits & 1);
1127         pBlock->h = (blockBits >> 1);
1128         pBlock->d = 0;
1129     }
1130     else
1131     {
1132         ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1133 
1134         UINT_32 blockBits = 8 - elemLog2;
1135 
1136         pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1137         pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1138         pBlock->h = (blockBits / 3);
1139     }
1140 }
1141 
1142 /**
1143 ************************************************************************************************************************
1144 *   Gfx10Lib::GetCompressedBlockSizeLog2
1145 *
1146 *   @brief
1147 *       Get compress block size
1148 *
1149 *   @return
1150 *       N/A
1151 ************************************************************************************************************************
1152 */
GetCompressedBlockSizeLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1153 void Gfx10Lib::GetCompressedBlockSizeLog2(
1154     Gfx10DataType    dataType,          ///< [in] Data type
1155     AddrResourceType resourceType,      ///< [in] Resource type
1156     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1157     UINT_32          elemLog2,          ///< [in] element size log2
1158     UINT_32          numSamplesLog2,    ///< [in] number of samples
1159     Dim3d*           pBlock             ///< [out] block size
1160     ) const
1161 {
1162     if (dataType == Gfx10DataColor)
1163     {
1164         GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1165     }
1166     else
1167     {
1168         ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1169         pBlock->w = 3;
1170         pBlock->h = 3;
1171         pBlock->d = 0;
1172     }
1173 }
1174 
1175 /**
1176 ************************************************************************************************************************
1177 *   Gfx10Lib::GetMetaOverlapLog2
1178 *
1179 *   @brief
1180 *       Get meta block overlap
1181 *
1182 *   @return
1183 *       N/A
1184 ************************************************************************************************************************
1185 */
GetMetaOverlapLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const1186 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1187     Gfx10DataType    dataType,          ///< [in] Data type
1188     AddrResourceType resourceType,      ///< [in] Resource type
1189     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1190     UINT_32          elemLog2,          ///< [in] element size log2
1191     UINT_32          numSamplesLog2     ///< [in] number of samples
1192     ) const
1193 {
1194     Dim3d compBlock;
1195     Dim3d microBlock;
1196 
1197     GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1198     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1199 
1200     const INT_32 compSizeLog2   = compBlock.w  + compBlock.h  + compBlock.d;
1201     const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1202     const INT_32 maxSizeLog2    = Max(compSizeLog2, blk256SizeLog2);
1203     const INT_32 numPipesLog2   = GetEffectiveNumPipes();
1204     INT_32       overlap        = numPipesLog2 - maxSizeLog2;
1205 
1206     if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1207     {
1208         overlap++;
1209     }
1210 
1211     // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1212     if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1213     {
1214         overlap--;
1215     }
1216     overlap = Max(overlap, 0);
1217     return overlap;
1218 }
1219 
1220 /**
1221 ************************************************************************************************************************
1222 *   Gfx10Lib::Get3DMetaOverlapLog2
1223 *
1224 *   @brief
1225 *       Get 3d meta block overlap
1226 *
1227 *   @return
1228 *       N/A
1229 ************************************************************************************************************************
1230 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const1231 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1232     AddrResourceType resourceType,      ///< [in] Resource type
1233     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1234     UINT_32          elemLog2           ///< [in] element size log2
1235     ) const
1236 {
1237     Dim3d microBlock;
1238     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1239 
1240     INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1241 
1242     if (m_settings.supportRbPlus)
1243     {
1244         overlap++;
1245     }
1246 
1247     if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1248     {
1249         overlap = 0;
1250     }
1251     return overlap;
1252 }
1253 
1254 /**
1255 ************************************************************************************************************************
1256 *   Gfx10Lib::GetPipeRotateAmount
1257 *
1258 *   @brief
1259 *       Get pipe rotate amount
1260 *
1261 *   @return
1262 *       Pipe rotate amount
1263 ************************************************************************************************************************
1264 */
1265 
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const1266 INT_32 Gfx10Lib::GetPipeRotateAmount(
1267     AddrResourceType resourceType,      ///< [in] Resource type
1268     AddrSwizzleMode  swizzleMode        ///< [in] Swizzle mode
1269     ) const
1270 {
1271     INT_32 amount = 0;
1272 
1273     if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1274     {
1275         amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1276                  1 : m_pipesLog2 - (m_numSaLog2 + 1);
1277     }
1278 
1279     return amount;
1280 }
1281 
1282 /**
1283 ************************************************************************************************************************
1284 *   Gfx10Lib::GetMetaBlkSize
1285 *
1286 *   @brief
1287 *       Get metadata block size
1288 *
1289 *   @return
1290 *       Meta block size
1291 ************************************************************************************************************************
1292 */
GetMetaBlkSize(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const1293 UINT_32 Gfx10Lib::GetMetaBlkSize(
1294     Gfx10DataType    dataType,          ///< [in] Data type
1295     AddrResourceType resourceType,      ///< [in] Resource type
1296     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1297     UINT_32          elemLog2,          ///< [in] element size log2
1298     UINT_32          numSamplesLog2,    ///< [in] number of samples
1299     BOOL_32          pipeAlign,         ///< [in] pipe align
1300     Dim3d*           pBlock             ///< [out] block size
1301     ) const
1302 {
1303     INT_32 metablkSizeLog2;
1304 
1305     const INT_32 metaElemSizeLog2   = GetMetaElementSizeLog2(dataType);
1306     const INT_32 metaCacheSizeLog2  = GetMetaCacheSizeLog2(dataType);
1307     const INT_32 compBlkSizeLog2    = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1308     const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1309                                         numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1310     const INT_32 dataBlkSizeLog2    = GetBlockSizeLog2(swizzleMode);
1311     INT_32       numPipesLog2       = m_pipesLog2;
1312 
1313     if (IsThin(resourceType, swizzleMode))
1314     {
1315         if ((pipeAlign == FALSE) ||
1316             (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1317             (IsDisplaySwizzle(resourceType, swizzleMode)  == TRUE))
1318         {
1319             if (pipeAlign)
1320             {
1321                 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1322                 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1323             }
1324             else
1325             {
1326                 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1327             }
1328         }
1329         else
1330         {
1331             if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1332             {
1333                 numPipesLog2++;
1334             }
1335 
1336             INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1337 
1338             if (numPipesLog2 >= 4)
1339             {
1340                 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1341 
1342                 // In 16Bpe 8xaa, we have an extra overlap bit
1343                 if ((pipeRotateLog2 > 0)  &&
1344                     (elemLog2 == 4)       &&
1345                     (numSamplesLog2 == 3) &&
1346                     (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1347                 {
1348                     overlapLog2++;
1349                 }
1350 
1351                 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1352                 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1353 
1354                 if (m_settings.supportRbPlus    &&
1355                     IsRtOptSwizzle(swizzleMode) &&
1356                     (numPipesLog2 == 6)         &&
1357                     (numSamplesLog2 == 3)       &&
1358                     (m_maxCompFragLog2 == 3)    &&
1359                     (metablkSizeLog2 < 15))
1360                 {
1361                     metablkSizeLog2 = 15;
1362                 }
1363             }
1364             else
1365             {
1366                 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1367             }
1368 
1369             if (dataType == Gfx10DataDepthStencil)
1370             {
1371                 // For htile surfaces, pad meta block size to 2K * num_pipes
1372                 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1373             }
1374 
1375             const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1376 
1377             if  (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1378             {
1379                 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1380 
1381                 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1382             }
1383         }
1384 
1385         const INT_32 metablkBitsLog2 =
1386             metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1387         pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1388         pBlock->h = 1 << (metablkBitsLog2 >> 1);
1389         pBlock->d = 1;
1390     }
1391     else
1392     {
1393         ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1394 
1395         if (pipeAlign)
1396         {
1397             if (m_settings.supportRbPlus         &&
1398                 (m_pipesLog2 == m_numSaLog2 + 1) &&
1399                 (m_pipesLog2 > 1)                &&
1400                 IsRbAligned(resourceType, swizzleMode))
1401             {
1402                 numPipesLog2++;
1403             }
1404 
1405             const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1406 
1407             metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1408             metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1409             metablkSizeLog2 = Max(metablkSizeLog2, 12);
1410         }
1411         else
1412         {
1413             metablkSizeLog2 = 12;
1414         }
1415 
1416         const INT_32 metablkBitsLog2 =
1417             metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1418         pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1419         pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1420         pBlock->d = 1 << (metablkBitsLog2 / 3);
1421     }
1422 
1423     return (1 << static_cast<UINT_32>(metablkSizeLog2));
1424 }
1425 
1426 /**
1427 ************************************************************************************************************************
1428 *   Gfx10Lib::ConvertSwizzlePatternToEquation
1429 *
1430 *   @brief
1431 *       Convert swizzle pattern to equation.
1432 *
1433 *   @return
1434 *       N/A
1435 ************************************************************************************************************************
1436 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1437 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1438     UINT_32                elemLog2,  ///< [in] element bytes log2
1439     AddrResourceType       rsrcType,  ///< [in] resource type
1440     AddrSwizzleMode        swMode,    ///< [in] swizzle mode
1441     const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern infor
1442     ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
1443     const
1444 {
1445     // Get full swizzle pattern and store it as an ADDR_BIT_SETTING list
1446     ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
1447     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1448 
1449     const ADDR_BIT_SETTING* pSwizzle      = fullSwizzlePattern;
1450     const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode);
1451     memset(pEquation, 0, sizeof(ADDR_EQUATION));
1452     pEquation->numBits            = blockSizeLog2;
1453     pEquation->numBitComponents   = pPatInfo->maxItemCount;
1454     pEquation->stackedDepthSlices = FALSE;
1455 
1456     for (UINT_32 i = 0; i < elemLog2; i++)
1457     {
1458         pEquation->addr[i].channel = 0;
1459         pEquation->addr[i].valid   = 1;
1460         pEquation->addr[i].index   = i;
1461     }
1462 
1463     if (IsXor(swMode) == FALSE)
1464     {
1465         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1466         {
1467             ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1468 
1469             if (pSwizzle[i].x != 0)
1470             {
1471                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1472 
1473                 pEquation->addr[i].channel = 0;
1474                 pEquation->addr[i].valid   = 1;
1475                 pEquation->addr[i].index   = Log2(pSwizzle[i].x) + elemLog2;
1476             }
1477             else if (pSwizzle[i].y != 0)
1478             {
1479                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1480 
1481                 pEquation->addr[i].channel = 1;
1482                 pEquation->addr[i].valid   = 1;
1483                 pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1484             }
1485             else
1486             {
1487                 ADDR_ASSERT(pSwizzle[i].z != 0);
1488                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1489 
1490                 pEquation->addr[i].channel = 2;
1491                 pEquation->addr[i].valid   = 1;
1492                 pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1493             }
1494 
1495             pEquation->xor1[i].value = 0;
1496             pEquation->xor2[i].value = 0;
1497         }
1498     }
1499     else if (IsThin(rsrcType, swMode))
1500     {
1501         Dim3d dim;
1502         ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1503 
1504         const UINT_32 blkXLog2 = Log2(dim.w);
1505         const UINT_32 blkYLog2 = Log2(dim.h);
1506         const UINT_32 blkXMask = dim.w - 1;
1507         const UINT_32 blkYMask = dim.h - 1;
1508 
1509         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1510         UINT_32          xMask = 0;
1511         UINT_32          yMask = 0;
1512         UINT_32          bMask = (1 << elemLog2) - 1;
1513 
1514         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1515         {
1516             if (IsPow2(pSwizzle[i].value))
1517             {
1518                 if (pSwizzle[i].x != 0)
1519                 {
1520                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1521                     xMask |= pSwizzle[i].x;
1522 
1523                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1524 
1525                     ADDR_ASSERT(xLog2 < blkXLog2);
1526 
1527                     pEquation->addr[i].channel = 0;
1528                     pEquation->addr[i].valid   = 1;
1529                     pEquation->addr[i].index   = xLog2 + elemLog2;
1530                 }
1531                 else
1532                 {
1533                     ADDR_ASSERT(pSwizzle[i].y != 0);
1534                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1535                     yMask |= pSwizzle[i].y;
1536 
1537                     pEquation->addr[i].channel = 1;
1538                     pEquation->addr[i].valid   = 1;
1539                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1540 
1541                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1542                 }
1543 
1544                 swizzle[i].value = 0;
1545                 bMask |= 1 << i;
1546             }
1547             else
1548             {
1549                 if (pSwizzle[i].z != 0)
1550                 {
1551                     ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1552 
1553                     pEquation->xor2[i].channel = 2;
1554                     pEquation->xor2[i].valid   = 1;
1555                     pEquation->xor2[i].index   = Log2(pSwizzle[i].z);
1556                 }
1557 
1558                 swizzle[i].x = pSwizzle[i].x;
1559                 swizzle[i].y = pSwizzle[i].y;
1560                 swizzle[i].z = swizzle[i].s = 0;
1561 
1562                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1563 
1564                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1565 
1566                 if (xHi != 0)
1567                 {
1568                     ADDR_ASSERT(IsPow2(xHi));
1569                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1570 
1571                     pEquation->xor1[i].channel = 0;
1572                     pEquation->xor1[i].valid   = 1;
1573                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1574 
1575                     swizzle[i].x &= blkXMask;
1576                 }
1577 
1578                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1579 
1580                 if (yHi != 0)
1581                 {
1582                     ADDR_ASSERT(IsPow2(yHi));
1583 
1584                     if (xHi == 0)
1585                     {
1586                         ADDR_ASSERT(pEquation->xor1[i].value == 0);
1587                         pEquation->xor1[i].channel = 1;
1588                         pEquation->xor1[i].valid   = 1;
1589                         pEquation->xor1[i].index   = Log2(yHi);
1590                     }
1591                     else
1592                     {
1593                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1594                         pEquation->xor2[i].channel = 1;
1595                         pEquation->xor2[i].valid   = 1;
1596                         pEquation->xor2[i].index   = Log2(yHi);
1597                     }
1598 
1599                     swizzle[i].y &= blkYMask;
1600                 }
1601 
1602                 if (swizzle[i].value == 0)
1603                 {
1604                     bMask |= 1 << i;
1605                 }
1606             }
1607         }
1608 
1609         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1610         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1611 
1612         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1613 
1614         while (bMask != blockMask)
1615         {
1616             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1617             {
1618                 if ((bMask & (1 << i)) == 0)
1619                 {
1620                     if (IsPow2(swizzle[i].value))
1621                     {
1622                         if (swizzle[i].x != 0)
1623                         {
1624                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1625                             xMask |= swizzle[i].x;
1626 
1627                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1628 
1629                             ADDR_ASSERT(xLog2 < blkXLog2);
1630 
1631                             pEquation->addr[i].channel = 0;
1632                             pEquation->addr[i].valid   = 1;
1633                             pEquation->addr[i].index   = xLog2 + elemLog2;
1634                         }
1635                         else
1636                         {
1637                             ADDR_ASSERT(swizzle[i].y != 0);
1638                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1639                             yMask |= swizzle[i].y;
1640 
1641                             pEquation->addr[i].channel = 1;
1642                             pEquation->addr[i].valid   = 1;
1643                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1644 
1645                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1646                         }
1647 
1648                         swizzle[i].value = 0;
1649                         bMask |= 1 << i;
1650                     }
1651                     else
1652                     {
1653                         const UINT_32 x = swizzle[i].x & xMask;
1654                         const UINT_32 y = swizzle[i].y & yMask;
1655 
1656                         if (x != 0)
1657                         {
1658                             ADDR_ASSERT(IsPow2(x));
1659 
1660                             if (pEquation->xor1[i].value == 0)
1661                             {
1662                                 pEquation->xor1[i].channel = 0;
1663                                 pEquation->xor1[i].valid   = 1;
1664                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1665                             }
1666                             else
1667                             {
1668                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1669                                 pEquation->xor2[i].channel = 0;
1670                                 pEquation->xor2[i].valid   = 1;
1671                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1672                             }
1673                         }
1674 
1675                         if (y != 0)
1676                         {
1677                             ADDR_ASSERT(IsPow2(y));
1678 
1679                             if (pEquation->xor1[i].value == 0)
1680                             {
1681                                 pEquation->xor1[i].channel = 1;
1682                                 pEquation->xor1[i].valid   = 1;
1683                                 pEquation->xor1[i].index   = Log2(y);
1684                             }
1685                             else
1686                             {
1687                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1688                                 pEquation->xor2[i].channel = 1;
1689                                 pEquation->xor2[i].valid   = 1;
1690                                 pEquation->xor2[i].index   = Log2(y);
1691                             }
1692                         }
1693 
1694                         swizzle[i].x &= ~x;
1695                         swizzle[i].y &= ~y;
1696                     }
1697                 }
1698             }
1699         }
1700 
1701         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1702     }
1703     else
1704     {
1705         const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1706         const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1707         const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1708         const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1709         const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1710         const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1711 
1712         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1713         UINT_32          xMask = 0;
1714         UINT_32          yMask = 0;
1715         UINT_32          zMask = 0;
1716         UINT_32          bMask = (1 << elemLog2) - 1;
1717 
1718         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1719         {
1720             if (IsPow2(pSwizzle[i].value))
1721             {
1722                 if (pSwizzle[i].x != 0)
1723                 {
1724                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1725                     xMask |= pSwizzle[i].x;
1726 
1727                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1728 
1729                     ADDR_ASSERT(xLog2 < blkXLog2);
1730 
1731                     pEquation->addr[i].channel = 0;
1732                     pEquation->addr[i].valid   = 1;
1733                     pEquation->addr[i].index   = xLog2 + elemLog2;
1734                 }
1735                 else if (pSwizzle[i].y != 0)
1736                 {
1737                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1738                     yMask |= pSwizzle[i].y;
1739 
1740                     pEquation->addr[i].channel = 1;
1741                     pEquation->addr[i].valid   = 1;
1742                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1743 
1744                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1745                 }
1746                 else
1747                 {
1748                     ADDR_ASSERT(pSwizzle[i].z != 0);
1749                     ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1750                     zMask |= pSwizzle[i].z;
1751 
1752                     pEquation->addr[i].channel = 2;
1753                     pEquation->addr[i].valid   = 1;
1754                     pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1755 
1756                     ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1757                 }
1758 
1759                 swizzle[i].value = 0;
1760                 bMask |= 1 << i;
1761             }
1762             else
1763             {
1764                 swizzle[i].x = pSwizzle[i].x;
1765                 swizzle[i].y = pSwizzle[i].y;
1766                 swizzle[i].z = pSwizzle[i].z;
1767                 swizzle[i].s = 0;
1768 
1769                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1770 
1771                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1772                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1773                 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1774 
1775                 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1776 
1777                 if (xHi != 0)
1778                 {
1779                     ADDR_ASSERT(IsPow2(xHi));
1780                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1781 
1782                     pEquation->xor1[i].channel = 0;
1783                     pEquation->xor1[i].valid   = 1;
1784                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1785 
1786                     swizzle[i].x &= blkXMask;
1787                 }
1788 
1789                 if (yHi != 0)
1790                 {
1791                     ADDR_ASSERT(IsPow2(yHi));
1792 
1793                     if (pEquation->xor1[i].value == 0)
1794                     {
1795                         pEquation->xor1[i].channel = 1;
1796                         pEquation->xor1[i].valid   = 1;
1797                         pEquation->xor1[i].index   = Log2(yHi);
1798                     }
1799                     else
1800                     {
1801                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1802                         pEquation->xor2[i].channel = 1;
1803                         pEquation->xor2[i].valid   = 1;
1804                         pEquation->xor2[i].index   = Log2(yHi);
1805                     }
1806 
1807                     swizzle[i].y &= blkYMask;
1808                 }
1809 
1810                 if (zHi != 0)
1811                 {
1812                     ADDR_ASSERT(IsPow2(zHi));
1813 
1814                     if (pEquation->xor1[i].value == 0)
1815                     {
1816                         pEquation->xor1[i].channel = 2;
1817                         pEquation->xor1[i].valid   = 1;
1818                         pEquation->xor1[i].index   = Log2(zHi);
1819                     }
1820                     else
1821                     {
1822                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1823                         pEquation->xor2[i].channel = 2;
1824                         pEquation->xor2[i].valid   = 1;
1825                         pEquation->xor2[i].index   = Log2(zHi);
1826                     }
1827 
1828                     swizzle[i].z &= blkZMask;
1829                 }
1830 
1831                 if (swizzle[i].value == 0)
1832                 {
1833                     bMask |= 1 << i;
1834                 }
1835             }
1836         }
1837 
1838         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1839         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1840 
1841         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1842 
1843         while (bMask != blockMask)
1844         {
1845             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1846             {
1847                 if ((bMask & (1 << i)) == 0)
1848                 {
1849                     if (IsPow2(swizzle[i].value))
1850                     {
1851                         if (swizzle[i].x != 0)
1852                         {
1853                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1854                             xMask |= swizzle[i].x;
1855 
1856                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1857 
1858                             ADDR_ASSERT(xLog2 < blkXLog2);
1859 
1860                             pEquation->addr[i].channel = 0;
1861                             pEquation->addr[i].valid   = 1;
1862                             pEquation->addr[i].index   = xLog2 + elemLog2;
1863                         }
1864                         else if (swizzle[i].y != 0)
1865                         {
1866                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1867                             yMask |= swizzle[i].y;
1868 
1869                             pEquation->addr[i].channel = 1;
1870                             pEquation->addr[i].valid   = 1;
1871                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1872 
1873                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1874                         }
1875                         else
1876                         {
1877                             ADDR_ASSERT(swizzle[i].z != 0);
1878                             ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1879                             zMask |= swizzle[i].z;
1880 
1881                             pEquation->addr[i].channel = 2;
1882                             pEquation->addr[i].valid   = 1;
1883                             pEquation->addr[i].index   = Log2(swizzle[i].z);
1884 
1885                             ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1886                         }
1887 
1888                         swizzle[i].value = 0;
1889                         bMask |= 1 << i;
1890                     }
1891                     else
1892                     {
1893                         const UINT_32 x = swizzle[i].x & xMask;
1894                         const UINT_32 y = swizzle[i].y & yMask;
1895                         const UINT_32 z = swizzle[i].z & zMask;
1896 
1897                         if (x != 0)
1898                         {
1899                             ADDR_ASSERT(IsPow2(x));
1900 
1901                             if (pEquation->xor1[i].value == 0)
1902                             {
1903                                 pEquation->xor1[i].channel = 0;
1904                                 pEquation->xor1[i].valid   = 1;
1905                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1906                             }
1907                             else
1908                             {
1909                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1910                                 pEquation->xor2[i].channel = 0;
1911                                 pEquation->xor2[i].valid   = 1;
1912                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1913                             }
1914                         }
1915 
1916                         if (y != 0)
1917                         {
1918                             ADDR_ASSERT(IsPow2(y));
1919 
1920                             if (pEquation->xor1[i].value == 0)
1921                             {
1922                                 pEquation->xor1[i].channel = 1;
1923                                 pEquation->xor1[i].valid   = 1;
1924                                 pEquation->xor1[i].index   = Log2(y);
1925                             }
1926                             else
1927                             {
1928                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1929                                 pEquation->xor2[i].channel = 1;
1930                                 pEquation->xor2[i].valid   = 1;
1931                                 pEquation->xor2[i].index   = Log2(y);
1932                             }
1933                         }
1934 
1935                         if (z != 0)
1936                         {
1937                             ADDR_ASSERT(IsPow2(z));
1938 
1939                             if (pEquation->xor1[i].value == 0)
1940                             {
1941                                 pEquation->xor1[i].channel = 2;
1942                                 pEquation->xor1[i].valid   = 1;
1943                                 pEquation->xor1[i].index   = Log2(z);
1944                             }
1945                             else
1946                             {
1947                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1948                                 pEquation->xor2[i].channel = 2;
1949                                 pEquation->xor2[i].valid   = 1;
1950                                 pEquation->xor2[i].index   = Log2(z);
1951                             }
1952                         }
1953 
1954                         swizzle[i].x &= ~x;
1955                         swizzle[i].y &= ~y;
1956                         swizzle[i].z &= ~z;
1957                     }
1958                 }
1959             }
1960         }
1961 
1962         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1963     }
1964 }
1965 
1966 /**
1967 ************************************************************************************************************************
1968 *   Gfx10Lib::InitEquationTable
1969 *
1970 *   @brief
1971 *       Initialize Equation table.
1972 *
1973 *   @return
1974 *       N/A
1975 ************************************************************************************************************************
1976 */
InitEquationTable()1977 VOID Gfx10Lib::InitEquationTable()
1978 {
1979     memset(m_equationTable, 0, sizeof(m_equationTable));
1980 
1981     // Iterate through resourceTypes, up to MaxRsrcType where a "resourceType" refers to AddrResourceType (1D/2D/3D)
1982     // resources. This starts with rsrcTypeIdx = 0, however there is an offset added that will start us off at
1983     // computing 2D resources.
1984     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1985     {
1986         // Add offset. Start iterating from ADDR_RSRC_TEX_2D
1987         const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1988 
1989         // Iterate through the maximum number of swizzlemodes a type can hold
1990         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1991         {
1992             const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1993 
1994             // Iterate through the different bits-per-pixel settings (8bpp/16bpp/32bpp/64bpp/128bpp)
1995             for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1996             {
1997                 UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
1998                 // May or may not return a ADDR_SW_PATINFO for a completely different swizzle mode, essentially
1999                 // overwriting the choice.
2000                 const ADDR_SW_PATINFO* pPatInfo      = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
2001 
2002                 if (pPatInfo != NULL)
2003                 {
2004                     ADDR_ASSERT(IsValidSwMode(swMode));
2005                     if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
2006                     {
2007                         ADDR_EQUATION equation = {};
2008 
2009                         // Passing in pPatInfo to get the addr equation
2010                         ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
2011 
2012                         equationIndex = m_numEquations;
2013                         ADDR_ASSERT(equationIndex < EquationTableSize);
2014                         // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
2015                         m_equationTable[equationIndex] = equation;
2016                         // Increment m_numEquations
2017                         m_numEquations++;
2018                     }
2019                     else // There is no equationIndex
2020                     {
2021                         // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
2022                         ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
2023                         ADDR_ASSERT(rsrcTypeIdx == 1);
2024                         ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
2025                         ADDR_ASSERT(m_settings.supportRbPlus == 1);
2026                     }
2027                 }
2028                 // equationIndex, which is used to look up equations in m_equationTable, will be cached for every
2029                 // iteration in this nested for-loop
2030                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
2031             }
2032         }
2033     }
2034 }
2035 
2036 /**
2037 ************************************************************************************************************************
2038 *   Gfx10Lib::HwlGetEquationIndex
2039 *
2040 *   @brief
2041 *       Interface function stub of GetEquationIndex
2042 *
2043 *   @return
2044 *       ADDR_E_RETURNCODE
2045 ************************************************************************************************************************
2046 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2047 UINT_32 Gfx10Lib::HwlGetEquationIndex(
2048     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
2049     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
2050     ) const
2051 {
2052     UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
2053 
2054     if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
2055         (pIn->resourceType == ADDR_RSRC_TEX_3D))
2056     {
2057         const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
2058         const UINT_32 swModeIdx   = static_cast<UINT_32>(pIn->swizzleMode);
2059         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
2060 
2061         equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
2062     }
2063 
2064     if (pOut->pMipInfo != NULL)
2065     {
2066         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2067         {
2068             pOut->pMipInfo[i].equationIndex = equationIdx;
2069         }
2070     }
2071 
2072     return equationIdx;
2073 }
2074 
2075 /**
2076 ************************************************************************************************************************
2077 *   Gfx10Lib::GetValidDisplaySwizzleModes
2078 *
2079 *   @brief
2080 *       Get valid swizzle modes mask for displayable surface
2081 *
2082 *   @return
2083 *       Valid swizzle modes mask for displayable surface
2084 ************************************************************************************************************************
2085 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const2086 UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
2087     UINT_32 bpp
2088     ) const
2089 {
2090     UINT_32 swModeMask = 0;
2091 
2092     if (bpp <= 64)
2093     {
2094         if (m_settings.isDcn20)
2095         {
2096             swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
2097         }
2098         else
2099         {
2100             swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
2101         }
2102     }
2103 
2104     return swModeMask;
2105 }
2106 
2107 /**
2108 ************************************************************************************************************************
2109 *   Gfx10Lib::IsValidDisplaySwizzleMode
2110 *
2111 *   @brief
2112 *       Check if a swizzle mode is supported by display engine
2113 *
2114 *   @return
2115 *       TRUE is swizzle mode is supported by display engine
2116 ************************************************************************************************************************
2117 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2118 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
2119     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2120     ) const
2121 {
2122     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
2123 
2124     return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
2125 }
2126 
2127 /**
2128 ************************************************************************************************************************
2129 *   Gfx10Lib::GetMaxNumMipsInTail
2130 *
2131 *   @brief
2132 *       Return max number of mips in tails
2133 *
2134 *   @return
2135 *       Max number of mips in tails
2136 ************************************************************************************************************************
2137 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const2138 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
2139     UINT_32 blockSizeLog2,     ///< block size log2
2140     BOOL_32 isThin             ///< is thin or thick
2141     ) const
2142 {
2143     UINT_32 effectiveLog2 = blockSizeLog2;
2144 
2145     if (isThin == FALSE)
2146     {
2147         effectiveLog2 -= (blockSizeLog2 - 8) / 3;
2148     }
2149 
2150     return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
2151 }
2152 
2153 /**
2154 ************************************************************************************************************************
2155 *   Gfx10Lib::HwlComputePipeBankXor
2156 *
2157 *   @brief
2158 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2159 *
2160 *   @return
2161 *       PipeBankXor value
2162 ************************************************************************************************************************
2163 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2164 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2165     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
2166     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
2167     ) const
2168 {
2169     if (IsNonPrtXor(pIn->swizzleMode))
2170     {
2171         const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
2172 
2173         // No pipe xor...
2174         const UINT_32 pipeXor = 0;
2175         UINT_32       bankXor = 0;
2176 
2177         const UINT_32         XorPatternLen = 8;
2178         static const UINT_32  XorBankRot1b[XorPatternLen] = {0,  1,  0,  1,  0,  1,  0,  1};
2179         static const UINT_32  XorBankRot2b[XorPatternLen] = {0,  2,  1,  3,  2,  0,  3,  1};
2180         static const UINT_32  XorBankRot3b[XorPatternLen] = {0,  4,  2,  6,  1,  5,  3,  7};
2181         static const UINT_32  XorBankRot4b[XorPatternLen] = {0,  8,  4, 12,  2, 10,  6, 14};
2182         static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
2183 
2184         switch (bankBits)
2185         {
2186             case 1:
2187             case 2:
2188             case 3:
2189             case 4:
2190                 bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
2191                 break;
2192             default:
2193                 // valid bank bits should be 0~4
2194                 ADDR_ASSERT_ALWAYS();
2195             case 0:
2196                 break;
2197         }
2198 
2199         pOut->pipeBankXor = bankXor | pipeXor;
2200     }
2201     else
2202     {
2203         pOut->pipeBankXor = 0;
2204     }
2205 
2206     return ADDR_OK;
2207 }
2208 
2209 /**
2210 ************************************************************************************************************************
2211 *   Gfx10Lib::HwlComputeSlicePipeBankXor
2212 *
2213 *   @brief
2214 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2215 *
2216 *   @return
2217 *       PipeBankXor value
2218 ************************************************************************************************************************
2219 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const2220 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2221     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
2222     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
2223     ) const
2224 {
2225     if (IsNonPrtXor(pIn->swizzleMode))
2226     {
2227         const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2228         const UINT_32 pipeBits  = GetPipeXorBits(blockBits);
2229         const UINT_32 pipeXor   = ReverseBitVector(pIn->slice, pipeBits);
2230 
2231         pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2232 
2233         if (pIn->bpe != 0)
2234         {
2235             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
2236                                                                     pIn->resourceType,
2237                                                                     Log2(pIn->bpe >> 3),
2238                                                                     1);
2239 
2240             if (pPatInfo != NULL)
2241             {
2242                 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
2243                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
2244 
2245                 const UINT_32 pipeBankXorOffset =
2246                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
2247                                                     blockBits,
2248                                                     0,
2249                                                     0,
2250                                                     pIn->slice,
2251                                                     0);
2252 
2253                 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
2254 
2255                 // Should have no bit set under pipe interleave
2256                 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
2257 
2258                 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
2259             }
2260         }
2261     }
2262     else
2263     {
2264         pOut->pipeBankXor = 0;
2265     }
2266 
2267     return ADDR_OK;
2268 }
2269 
2270 /**
2271 ************************************************************************************************************************
2272 *   Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2273 *
2274 *   @brief
2275 *       Compute sub resource offset to support swizzle pattern
2276 *
2277 *   @return
2278 *       Offset
2279 ************************************************************************************************************************
2280 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const2281 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2282     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
2283     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
2284     ) const
2285 {
2286     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2287 
2288     pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2289 
2290     return ADDR_OK;
2291 }
2292 
2293 /**
2294 ************************************************************************************************************************
2295 *   Gfx10Lib::HwlComputeNonBlockCompressedView
2296 *
2297 *   @brief
2298 *       Compute non-block-compressed view for a given mipmap level/slice.
2299 *
2300 *   @return
2301 *       ADDR_E_RETURNCODE
2302 ************************************************************************************************************************
2303 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const2304 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(
2305     const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
2306     ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
2307     ) const
2308 {
2309     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2310 
2311     if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE)
2312     {
2313         // Only thin swizzle mode can have a NonBC view...
2314         returnCode = ADDR_INVALIDPARAMS;
2315     }
2316     else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
2317              ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
2318     {
2319         // Only support BC1~BC7, ASTC, or ETC2 for now...
2320         returnCode = ADDR_NOTSUPPORTED;
2321     }
2322     else
2323     {
2324         UINT_32 bcWidth, bcHeight;
2325         UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
2326 
2327         ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
2328         infoIn.flags        = pIn->flags;
2329         infoIn.swizzleMode  = pIn->swizzleMode;
2330         infoIn.resourceType = pIn->resourceType;
2331         infoIn.bpp          = bpp;
2332         infoIn.width        = RoundUpQuotient(pIn->width, bcWidth);
2333         infoIn.height       = RoundUpQuotient(pIn->height, bcHeight);
2334         infoIn.numSlices    = pIn->numSlices;
2335         infoIn.numMipLevels = pIn->numMipLevels;
2336         infoIn.numSamples   = 1;
2337         infoIn.numFrags     = 1;
2338 
2339         ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
2340         ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
2341 
2342         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
2343         infoOut.pMipInfo = mipInfo;
2344 
2345         const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
2346 
2347         if (tiled)
2348         {
2349             returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
2350         }
2351         else
2352         {
2353             returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
2354         }
2355 
2356         if (returnCode == ADDR_OK)
2357         {
2358             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
2359             subOffIn.swizzleMode      = infoIn.swizzleMode;
2360             subOffIn.resourceType     = infoIn.resourceType;
2361             subOffIn.slice            = pIn->slice;
2362             subOffIn.sliceSize        = infoOut.sliceSize;
2363             subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
2364             subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;
2365 
2366             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
2367 
2368             // For any mipmap level, move nonBc view base address by offset
2369             HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
2370             pOut->offset = subOffOut.offset;
2371 
2372             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
2373             slicePbXorIn.bpe             = infoIn.bpp;
2374             slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
2375             slicePbXorIn.resourceType    = infoIn.resourceType;
2376             slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2377             slicePbXorIn.slice           = pIn->slice;
2378 
2379             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2380 
2381             // For any mipmap level, nonBc view should use computed pbXor
2382             HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2383             pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2384 
2385             const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2386             const UINT_32 requestMipWidth  = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth);
2387             const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight);
2388 
2389             if (inTail)
2390             {
2391                 // For mipmap level that is in mip tail block, hack a lot of things...
2392                 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2393                 // are fit in tail block:
2394 
2395                 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2396                 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2397 
2398                 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2399                 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2400 
2401                 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2402                 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2403 
2404                 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2405                 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2406             }
2407             // This check should cover at least mipId == 0
2408             else if (requestMipWidth << pIn->mipId == infoIn.width)
2409             {
2410                 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2411                 // - only one mipmap level and mipId = 0
2412                 pOut->mipId        = 0;
2413                 pOut->numMipLevels = 1;
2414 
2415                 // (mip0) width = requestMipWidth
2416                 pOut->unalignedWidth = requestMipWidth;
2417 
2418                 // (mip0) height = requestMipHeight
2419                 pOut->unalignedHeight = requestMipHeight;
2420             }
2421             else
2422             {
2423                 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2424                 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2425                 // because single mip view may have different pitch value than original (multiple) mip view...
2426                 // A simple case would be:
2427                 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2428                 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2429                 //   mip0 width = 0x101/mip1 width = 0x80
2430                 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2431                 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2432 
2433                 // - 2 levels and mipId = 1
2434                 pOut->mipId        = 1;
2435                 pOut->numMipLevels = 2;
2436 
2437                 const UINT_32 upperMipWidth  = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth);
2438                 const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight);
2439 
2440                 const BOOL_32 needToAvoidInTail =
2441                     tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2442                     TRUE : FALSE;
2443 
2444                 const UINT_32 hwMipWidth  = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2445                 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2446 
2447                 const BOOL_32 needExtraWidth =
2448                     ((upperMipWidth < requestMipWidth * 2) ||
2449                      ((upperMipWidth == requestMipWidth * 2) &&
2450                       ((needToAvoidInTail == TRUE) ||
2451                        (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2452 
2453                 const BOOL_32 needExtraHeight =
2454                     ((upperMipHeight < requestMipHeight * 2) ||
2455                      ((upperMipHeight == requestMipHeight * 2) &&
2456                       ((needToAvoidInTail == TRUE) ||
2457                        (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2458 
2459                 // (mip0) width = requestLastMipLevelWidth
2460                 pOut->unalignedWidth  = upperMipWidth + (needExtraWidth ? 1: 0);
2461 
2462                 // (mip0) height = requestLastMipLevelHeight
2463                 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2464             }
2465 
2466             // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2467             ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2468             // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2469             ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2470         }
2471     }
2472 
2473     return returnCode;
2474 }
2475 
2476 /**
2477 ************************************************************************************************************************
2478 *   Gfx10Lib::ValidateNonSwModeParams
2479 *
2480 *   @brief
2481 *       Validate compute surface info params except swizzle mode
2482 *
2483 *   @return
2484 *       TRUE if parameters are valid, FALSE otherwise
2485 ************************************************************************************************************************
2486 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2487 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2488     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2489 {
2490     BOOL_32 valid = TRUE;
2491 
2492     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2493     {
2494         ADDR_ASSERT_ALWAYS();
2495         valid = FALSE;
2496     }
2497 
2498     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2499     {
2500         ADDR_ASSERT_ALWAYS();
2501         valid = FALSE;
2502     }
2503 
2504     const ADDR2_SURFACE_FLAGS flags    = pIn->flags;
2505     const AddrResourceType    rsrcType = pIn->resourceType;
2506     const BOOL_32             mipmap   = (pIn->numMipLevels > 1);
2507     const BOOL_32             msaa     = (pIn->numFrags > 1);
2508     const BOOL_32             display  = flags.display;
2509     const BOOL_32             tex3d    = IsTex3d(rsrcType);
2510     const BOOL_32             tex2d    = IsTex2d(rsrcType);
2511     const BOOL_32             tex1d    = IsTex1d(rsrcType);
2512     const BOOL_32             stereo   = flags.qbStereo;
2513 
2514     // Resource type check
2515     if (tex1d)
2516     {
2517         if (msaa || display || stereo)
2518         {
2519             ADDR_ASSERT_ALWAYS();
2520             valid = FALSE;
2521         }
2522     }
2523     else if (tex2d)
2524     {
2525         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2526         {
2527             ADDR_ASSERT_ALWAYS();
2528             valid = FALSE;
2529         }
2530     }
2531     else if (tex3d)
2532     {
2533         if (msaa || display || stereo)
2534         {
2535             ADDR_ASSERT_ALWAYS();
2536             valid = FALSE;
2537         }
2538     }
2539     else
2540     {
2541         ADDR_ASSERT_ALWAYS();
2542         valid = FALSE;
2543     }
2544 
2545     return valid;
2546 }
2547 
2548 /**
2549 ************************************************************************************************************************
2550 *   Gfx10Lib::ValidateSwModeParams
2551 *
2552 *   @brief
2553 *       Validate compute surface info related to swizzle mode
2554 *
2555 *   @return
2556 *       TRUE if parameters are valid, FALSE otherwise
2557 ************************************************************************************************************************
2558 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2559 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2560     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2561 {
2562     BOOL_32 valid = TRUE;
2563 
2564     if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2565     {
2566         ADDR_ASSERT_ALWAYS();
2567         valid = FALSE;
2568     }
2569     else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2570     {
2571         ADDR_ASSERT_ALWAYS();
2572         valid = FALSE;
2573     }
2574 
2575     const ADDR2_SURFACE_FLAGS flags       = pIn->flags;
2576     const AddrResourceType    rsrcType    = pIn->resourceType;
2577     const AddrSwizzleMode     swizzle     = pIn->swizzleMode;
2578     const BOOL_32             msaa        = (pIn->numFrags > 1);
2579     const BOOL_32             zbuffer     = flags.depth || flags.stencil;
2580     const BOOL_32             color       = flags.color;
2581     const BOOL_32             display     = flags.display;
2582     const BOOL_32             tex3d       = IsTex3d(rsrcType);
2583     const BOOL_32             tex2d       = IsTex2d(rsrcType);
2584     const BOOL_32             tex1d       = IsTex1d(rsrcType);
2585     const BOOL_32             thin3d      = flags.view3dAs2dArray;
2586     const BOOL_32             linear      = IsLinear(swizzle);
2587     const BOOL_32             blk256B     = IsBlock256b(swizzle);
2588     const BOOL_32             blkVar      = IsBlockVariable(swizzle);
2589     const BOOL_32             isNonPrtXor = IsNonPrtXor(swizzle);
2590     const BOOL_32             prt         = flags.prt;
2591     const BOOL_32             fmask       = flags.fmask;
2592 
2593     // Misc check
2594     if ((pIn->numFrags > 1) &&
2595         (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2596     {
2597         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2598         ADDR_ASSERT_ALWAYS();
2599         valid = FALSE;
2600     }
2601 
2602     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2603     {
2604         ADDR_ASSERT_ALWAYS();
2605         valid = FALSE;
2606     }
2607 
2608     if ((pIn->bpp == 96) && (linear == FALSE))
2609     {
2610         ADDR_ASSERT_ALWAYS();
2611         valid = FALSE;
2612     }
2613 
2614     const UINT_32 swizzleMask = 1 << swizzle;
2615 
2616     // Resource type check
2617     if (tex1d)
2618     {
2619         if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2620         {
2621             ADDR_ASSERT_ALWAYS();
2622             valid = FALSE;
2623         }
2624     }
2625     else if (tex2d)
2626     {
2627         if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2628         {
2629             ADDR_ASSERT_ALWAYS();
2630             valid = FALSE;
2631         }
2632         else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2633                  (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2634         {
2635             ADDR_ASSERT_ALWAYS();
2636             valid = FALSE;
2637         }
2638     }
2639     else if (tex3d)
2640     {
2641         if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2642             (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2643             (thin3d && ((swizzleMask & Gfx10Rsrc3dViewAs2dSwModeMask) == 0)))
2644         {
2645             ADDR_ASSERT_ALWAYS();
2646             valid = FALSE;
2647         }
2648     }
2649 
2650     // Swizzle type check
2651     if (linear)
2652     {
2653         if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2654         {
2655             ADDR_ASSERT_ALWAYS();
2656             valid = FALSE;
2657         }
2658     }
2659     else if (IsZOrderSwizzle(swizzle))
2660     {
2661         if ((pIn->bpp > 64)                         ||
2662             (msaa && (color || (pIn->bpp > 32)))    ||
2663             ElemLib::IsBlockCompressed(pIn->format) ||
2664             ElemLib::IsMacroPixelPacked(pIn->format))
2665         {
2666             ADDR_ASSERT_ALWAYS();
2667             valid = FALSE;
2668         }
2669     }
2670     else if (IsStandardSwizzle(rsrcType, swizzle))
2671     {
2672         if (zbuffer || msaa)
2673         {
2674             ADDR_ASSERT_ALWAYS();
2675             valid = FALSE;
2676         }
2677     }
2678     else if (IsDisplaySwizzle(rsrcType, swizzle))
2679     {
2680         if (zbuffer || msaa)
2681         {
2682             ADDR_ASSERT_ALWAYS();
2683             valid = FALSE;
2684         }
2685     }
2686     else if (IsRtOptSwizzle(swizzle))
2687     {
2688         if (zbuffer)
2689         {
2690             ADDR_ASSERT_ALWAYS();
2691             valid = FALSE;
2692         }
2693     }
2694     else
2695     {
2696         ADDR_ASSERT_ALWAYS();
2697         valid = FALSE;
2698     }
2699 
2700     // Block type check
2701     if (blk256B)
2702     {
2703         if (zbuffer || tex3d || msaa)
2704         {
2705             ADDR_ASSERT_ALWAYS();
2706             valid = FALSE;
2707         }
2708     }
2709     else if (blkVar)
2710     {
2711         if (m_blockVarSizeLog2 == 0)
2712         {
2713             ADDR_ASSERT_ALWAYS();
2714             valid = FALSE;
2715         }
2716     }
2717 
2718     return valid;
2719 }
2720 
2721 /**
2722 ************************************************************************************************************************
2723 *   Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2724 *
2725 *   @brief
2726 *       Compute surface info sanity check
2727 *
2728 *   @return
2729 *       Offset
2730 ************************************************************************************************************************
2731 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2732 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2733     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2734     ) const
2735 {
2736     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2737 }
2738 
2739 /**
2740 ************************************************************************************************************************
2741 *   Gfx10Lib::HwlGetPreferredSurfaceSetting
2742 *
2743 *   @brief
2744 *       Internal function to get suggested surface information for client to use
2745 *
2746 *   @return
2747 *       ADDR_E_RETURNCODE
2748 ************************************************************************************************************************
2749 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2750 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2751     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
2752     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
2753     ) const
2754 {
2755     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2756 
2757     if (pIn->flags.fmask)
2758     {
2759         const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2760         const BOOL_32 forbidVarBlockType  = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2761 
2762         if (forbid64KbBlockType && forbidVarBlockType)
2763         {
2764             // Invalid combination...
2765             ADDR_ASSERT_ALWAYS();
2766             returnCode = ADDR_INVALIDPARAMS;
2767         }
2768         else
2769         {
2770             pOut->resourceType                   = ADDR_RSRC_TEX_2D;
2771             pOut->validBlockSet.value            = 0;
2772             pOut->validBlockSet.macroThin64KB    = forbid64KbBlockType ? 0 : 1;
2773             pOut->validBlockSet.var              = forbidVarBlockType  ? 0 : 1;
2774             pOut->validSwModeSet.value           = 0;
2775             pOut->validSwModeSet.sw64KB_Z_X      = forbid64KbBlockType ? 0 : 1;
2776             pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType  ? 0 : 1;
2777             pOut->canXor                         = TRUE;
2778             pOut->validSwTypeSet.value           = AddrSwSetZ;
2779             pOut->clientPreferredSwSet           = pOut->validSwTypeSet;
2780 
2781             BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2782 
2783             if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2784             {
2785                 const UINT_8  maxFmaskSwizzleModeType = 2;
2786                 const UINT_32 ratioLow                = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2787                 const UINT_32 ratioHi                 = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2788                 const UINT_32 fmaskBpp                = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2789                 const UINT_32 numSlices               = Max(pIn->numSlices, 1u);
2790                 const UINT_32 width                   = Max(pIn->width, 1u);
2791                 const UINT_32 height                  = Max(pIn->height, 1u);
2792                 const UINT_64 sizeAlignInElement      = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2793 
2794                 AddrSwizzleMode swMode[maxFmaskSwizzleModeType]  = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2795                 Dim3d           blkDim[maxFmaskSwizzleModeType]  = {{}, {}};
2796                 Dim3d           padDim[maxFmaskSwizzleModeType]  = {{}, {}};
2797                 UINT_64         padSize[maxFmaskSwizzleModeType] = {};
2798 
2799                 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2800                 {
2801                     ComputeBlockDimensionForSurf(&blkDim[i].w,
2802                                                  &blkDim[i].h,
2803                                                  &blkDim[i].d,
2804                                                  fmaskBpp,
2805                                                  1,
2806                                                  pOut->resourceType,
2807                                                  swMode[i]);
2808 
2809                     padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2810                     padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2811                 }
2812 
2813                 if (Addr2BlockTypeWithinMemoryBudget(padSize[0],
2814                                                 padSize[1],
2815                                                 ratioLow,
2816                                                 ratioHi,
2817                                                 pIn->memoryBudget,
2818                                                 GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))
2819                 {
2820                     use64KbBlockType = FALSE;
2821                 }
2822             }
2823             else if (forbidVarBlockType)
2824             {
2825                 use64KbBlockType = TRUE;
2826             }
2827 
2828             if (use64KbBlockType)
2829             {
2830                 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2831             }
2832             else
2833             {
2834                 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2835             }
2836         }
2837     }
2838     else
2839     {
2840         UINT_32 bpp    = pIn->bpp;
2841         UINT_32 width  = Max(pIn->width, 1u);
2842         UINT_32 height = Max(pIn->height, 1u);
2843 
2844         // Set format to INVALID will skip this conversion
2845         if (pIn->format != ADDR_FMT_INVALID)
2846         {
2847             ElemMode elemMode = ADDR_UNCOMPRESSED;
2848             UINT_32 expandX, expandY;
2849 
2850             // Get compression/expansion factors and element mode which indicates compression/expansion
2851             bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2852                                                 &elemMode,
2853                                                 &expandX,
2854                                                 &expandY);
2855 
2856             UINT_32 basePitch = 0;
2857             GetElemLib()->AdjustSurfaceInfo(elemMode,
2858                                             expandX,
2859                                             expandY,
2860                                             &bpp,
2861                                             &basePitch,
2862                                             &width,
2863                                             &height);
2864         }
2865 
2866         const UINT_32 numSlices    = Max(pIn->numSlices,    1u);
2867         const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2868         const UINT_32 numSamples   = Max(pIn->numSamples,   1u);
2869         const UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2870         const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
2871 
2872         // Pre sanity check on non swizzle mode parameters
2873         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2874         localIn.flags        = pIn->flags;
2875         localIn.resourceType = pIn->resourceType;
2876         localIn.format       = pIn->format;
2877         localIn.bpp          = bpp;
2878         localIn.width        = width;
2879         localIn.height       = height;
2880         localIn.numSlices    = numSlices;
2881         localIn.numMipLevels = numMipLevels;
2882         localIn.numSamples   = numSamples;
2883         localIn.numFrags     = numFrags;
2884 
2885         if (ValidateNonSwModeParams(&localIn))
2886         {
2887             // Forbid swizzle mode(s) by client setting
2888             ADDR2_SWMODE_SET allowedSwModeSet = {};
2889             allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2890             allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx10Blk256BSwModeMask;
2891             allowedSwModeSet.value |=
2892                 pIn->forbiddenBlock.macroThin4KB ? 0 :
2893                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2894             allowedSwModeSet.value |=
2895                 pIn->forbiddenBlock.macroThick4KB ? 0 :
2896                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2897             allowedSwModeSet.value |=
2898                 pIn->forbiddenBlock.macroThin64KB ? 0 :
2899                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2900             allowedSwModeSet.value |=
2901                 pIn->forbiddenBlock.macroThick64KB ? 0 :
2902                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2903             allowedSwModeSet.value |=
2904                 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2905 
2906             if (pIn->preferredSwSet.value != 0)
2907             {
2908                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2909                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2910                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2911                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2912             }
2913 
2914             if (pIn->noXor)
2915             {
2916                 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2917             }
2918 
2919             if (pIn->maxAlign > 0)
2920             {
2921                 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2922                 {
2923                     allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2924                 }
2925 
2926                 if (pIn->maxAlign < Size64K)
2927                 {
2928                     allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2929                 }
2930 
2931                 if (pIn->maxAlign < Size4K)
2932                 {
2933                     allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2934                 }
2935 
2936                 if (pIn->maxAlign < Size256)
2937                 {
2938                     allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2939                 }
2940             }
2941 
2942             // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2943             switch (pIn->resourceType)
2944             {
2945                 case ADDR_RSRC_TEX_1D:
2946                     allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2947                     break;
2948 
2949                 case ADDR_RSRC_TEX_2D:
2950                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2951                     break;
2952 
2953                 case ADDR_RSRC_TEX_3D:
2954                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2955 
2956                     if (pIn->flags.view3dAs2dArray)
2957                     {
2958                         // SW_LINEAR can be used for 3D thin images, including BCn image format.
2959                         allowedSwModeSet.value &= Gfx10Rsrc3dViewAs2dSwModeMask;
2960                     }
2961                     break;
2962 
2963                 default:
2964                     ADDR_ASSERT_ALWAYS();
2965                     allowedSwModeSet.value = 0;
2966                     break;
2967             }
2968 
2969             if (ElemLib::IsBlockCompressed(pIn->format)  ||
2970                 ElemLib::IsMacroPixelPacked(pIn->format) ||
2971                 (bpp > 64)                               ||
2972                 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2973             {
2974                 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2975             }
2976 
2977             if (pIn->format == ADDR_FMT_32_32_32)
2978             {
2979                 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2980             }
2981 
2982             if (msaa)
2983             {
2984                 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2985             }
2986 
2987             if (pIn->flags.depth || pIn->flags.stencil)
2988             {
2989                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2990             }
2991 
2992             if (pIn->flags.display)
2993             {
2994                 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
2995             }
2996 
2997             if (allowedSwModeSet.value != 0)
2998             {
2999 #if DEBUG
3000                 // Post sanity check, at least AddrLib should accept the output generated by its own
3001                 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3002 
3003                 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3004                 {
3005                     if (validateSwModeSet & 1)
3006                     {
3007                         localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3008                         ADDR_ASSERT(ValidateSwModeParams(&localIn));
3009                     }
3010 
3011                     validateSwModeSet >>= 1;
3012                 }
3013 #endif
3014 
3015                 pOut->resourceType   = pIn->resourceType;
3016                 pOut->validSwModeSet = allowedSwModeSet;
3017                 pOut->canXor         = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
3018                 pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3019                 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3020 
3021                 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3022 
3023                 if (pOut->clientPreferredSwSet.value == 0)
3024                 {
3025                     pOut->clientPreferredSwSet.value = AddrSwSetAll;
3026                 }
3027 
3028                 // Apply optional restrictions
3029                 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
3030                 {
3031                     if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
3032                     {
3033                         // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
3034                         // the GL2 in VAR mode, so it should be avoided.
3035                         allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
3036                     }
3037                     else
3038                     {
3039                         // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
3040                         // But we have to suffer from low performance because there is no other choice...
3041                         ADDR_ASSERT_ALWAYS();
3042                     }
3043                 }
3044 
3045                 if (pIn->flags.needEquation)
3046                 {
3047                     UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
3048                                                                         ADDR_MAX_LEGACY_EQUATION_COMP;
3049                     FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
3050                 }
3051 
3052                 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
3053                 {
3054                     pOut->swizzleMode = ADDR_SW_LINEAR;
3055                 }
3056                 else
3057                 {
3058                     const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3059 
3060                     if ((height > 1) && (computeMinSize == FALSE))
3061                     {
3062                         // Always ignore linear swizzle mode if:
3063                         // 1. This is a (2D/3D) resource with height > 1
3064                         // 2. Client doesn't require computing minimize size
3065                         allowedSwModeSet.swLinear = 0;
3066                     }
3067 
3068                     // A bitfield where each bit represents a block type. Each swizzle mode maps to a block.
3069                     ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3070 
3071                     // Determine block size if there are 2 or more block type candidates
3072                     if (IsPow2(allowedBlockSet.value) == FALSE)
3073                     {
3074                         // Tracks a valid SwizzleMode for each valid block type
3075                         AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3076 
3077                         swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3078 
3079                         if (m_blockVarSizeLog2 != 0)
3080                         {
3081                             swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
3082                         }
3083 
3084                         if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3085                         {
3086                             swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
3087                             swMode[AddrBlockThin64KB]  = ADDR_SW_64KB_R_X;
3088                             swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3089                         }
3090                         else
3091                         {
3092                             swMode[AddrBlockMicro]    = ADDR_SW_256B_S;
3093                             swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_S;
3094                             swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
3095                         }
3096 
3097                         // Tracks the size of each valid swizzle mode's surface in bytes
3098                         UINT_64 padSize[AddrBlockMaxTiledType] = {};
3099 
3100                         const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3101                         const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3102                         const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3103                         UINT_32       minSizeBlk         = AddrBlockMicro; // Tracks the most optimal block to use
3104                         UINT_64       minSize            = 0;              // Tracks the minimum acceptable block type
3105 
3106                         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3107 
3108                         // Iterate through all block types
3109                         for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3110                         {
3111                             if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3112                             {
3113                                 localIn.swizzleMode = swMode[i];
3114 
3115                                 if (localIn.swizzleMode == ADDR_SW_LINEAR)
3116                                 {
3117                                     returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3118                                 }
3119                                 else
3120                                 {
3121                                     returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3122                                 }
3123 
3124                                 if (returnCode == ADDR_OK)
3125                                 {
3126                                     padSize[i] = localOut.surfSize;
3127 
3128                                     if (minSize == 0)
3129                                     {
3130                                         minSize    = padSize[i];
3131                                         minSizeBlk = i;
3132                                     }
3133                                     else
3134                                     {
3135                                         // Checks if the block type is within the memory budget but favors larger blocks
3136                                         if (Addr2BlockTypeWithinMemoryBudget(
3137                                                 minSize,
3138                                                 padSize[i],
3139                                                 ratioLow,
3140                                                 ratioHi,
3141                                                 0.0,
3142                                                 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))
3143                                         {
3144                                             minSize    = padSize[i];
3145                                             minSizeBlk = i;
3146                                         }
3147                                     }
3148                                 }
3149                                 else
3150                                 {
3151                                     ADDR_ASSERT_ALWAYS();
3152                                     break;
3153                                 }
3154                             }
3155                         }
3156 
3157                         if (pIn->memoryBudget > 1.0)
3158                         {
3159                             // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3160                             // smaller-block type again in coming loop
3161                             switch (minSizeBlk)
3162                             {
3163                                 case AddrBlockThick64KB:
3164                                     allowedBlockSet.macroThin64KB = 0;
3165                                 case AddrBlockThinVar:
3166                                 case AddrBlockThin64KB:
3167                                     allowedBlockSet.macroThick4KB = 0;
3168                                 case AddrBlockThick4KB:
3169                                     allowedBlockSet.macroThin4KB = 0;
3170                                 case AddrBlockThin4KB:
3171                                     allowedBlockSet.micro  = 0;
3172                                 case AddrBlockMicro:
3173                                     allowedBlockSet.linear = 0;
3174                                 case AddrBlockLinear:
3175                                     break;
3176 
3177                                 default:
3178                                     ADDR_ASSERT_ALWAYS();
3179                                     break;
3180                             }
3181 
3182                             for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3183                             {
3184                                 if ((i != minSizeBlk) &&
3185                                     Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3186                                 {
3187                                     if (Addr2BlockTypeWithinMemoryBudget(
3188                                             minSize,
3189                                             padSize[i],
3190                                             0,
3191                                             0,
3192                                             pIn->memoryBudget,
3193                                             GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)
3194                                     {
3195                                         // Clear the block type if the memory waste is unacceptable
3196                                         allowedBlockSet.value &= ~(1u << (i - 1));
3197                                     }
3198                                 }
3199                             }
3200 
3201                             // Remove VAR block type if bigger block type is allowed
3202                             if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))
3203                             {
3204                                 if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)
3205                                 {
3206                                     allowedBlockSet.var = 0;
3207                                 }
3208                             }
3209 
3210                             // Remove linear block type if 2 or more block types are allowed
3211                             if (IsPow2(allowedBlockSet.value) == FALSE)
3212                             {
3213                                 allowedBlockSet.linear = 0;
3214                             }
3215 
3216                             // Select the biggest allowed block type
3217                             minSizeBlk = Log2(allowedBlockSet.value) + 1;
3218 
3219                             if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3220                             {
3221                                 minSizeBlk = AddrBlockLinear;
3222                             }
3223                         }
3224 
3225                         switch (minSizeBlk)
3226                         {
3227                             case AddrBlockLinear:
3228                                 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3229                                 break;
3230 
3231                             case AddrBlockMicro:
3232                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3233                                 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
3234                                 break;
3235 
3236                             case AddrBlockThin4KB:
3237                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3238                                 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
3239                                 break;
3240 
3241                             case AddrBlockThick4KB:
3242                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3243                                 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
3244                                 break;
3245 
3246                             case AddrBlockThin64KB:
3247                                 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3248                                                           Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
3249                                 break;
3250 
3251                             case AddrBlockThick64KB:
3252                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3253                                 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
3254                                 break;
3255 
3256                             case AddrBlockThinVar:
3257                                 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
3258                                 break;
3259 
3260                             default:
3261                                 ADDR_ASSERT_ALWAYS();
3262                                 allowedSwModeSet.value = 0;
3263                                 break;
3264                         }
3265                     }
3266 
3267                     // Block type should be determined.
3268                     ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3269 
3270                     ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3271 
3272                     // Determine swizzle type if there are 2 or more swizzle type candidates
3273                     if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3274                     {
3275                         if (ElemLib::IsBlockCompressed(pIn->format))
3276                         {
3277                             if (allowedSwSet.sw_D)
3278                             {
3279                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3280                             }
3281                             else if (allowedSwSet.sw_S)
3282                             {
3283                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3284                             }
3285                             else
3286                             {
3287                                 ADDR_ASSERT(allowedSwSet.sw_R);
3288                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3289                             }
3290                         }
3291                         else if (ElemLib::IsMacroPixelPacked(pIn->format))
3292                         {
3293                             if (allowedSwSet.sw_S)
3294                             {
3295                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3296                             }
3297                             else if (allowedSwSet.sw_D)
3298                             {
3299                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3300                             }
3301                             else
3302                             {
3303                                 ADDR_ASSERT(allowedSwSet.sw_R);
3304                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3305                             }
3306                         }
3307                         else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
3308                         {
3309                             if (pIn->flags.color &&
3310                                 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
3311                                 allowedSwSet.sw_D)
3312                             {
3313                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3314                             }
3315                             else if (allowedSwSet.sw_S)
3316                             {
3317                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3318                             }
3319                             else if (allowedSwSet.sw_R)
3320                             {
3321                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3322                             }
3323                             else
3324                             {
3325                                 ADDR_ASSERT(allowedSwSet.sw_Z);
3326                                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3327                             }
3328                         }
3329                         else
3330                         {
3331                             if (allowedSwSet.sw_R)
3332                             {
3333                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3334                             }
3335                             else if (allowedSwSet.sw_D)
3336                             {
3337                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3338                             }
3339                             else if (allowedSwSet.sw_S)
3340                             {
3341                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3342                             }
3343                             else
3344                             {
3345                                 ADDR_ASSERT(allowedSwSet.sw_Z);
3346                                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3347                             }
3348                         }
3349 
3350                         // Swizzle type should be determined.
3351                         ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3352                     }
3353 
3354                     // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
3355                     // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3356                     // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3357                     pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2(allowedSwModeSet.value));
3358                 }
3359             }
3360             else
3361             {
3362                 // Invalid combination...
3363                 ADDR_ASSERT_ALWAYS();
3364                 returnCode = ADDR_INVALIDPARAMS;
3365             }
3366         }
3367         else
3368         {
3369             // Invalid combination...
3370             ADDR_ASSERT_ALWAYS();
3371             returnCode = ADDR_INVALIDPARAMS;
3372         }
3373     }
3374 
3375     return returnCode;
3376 }
3377 
3378 /**
3379 ************************************************************************************************************************
3380 *   Gfx10Lib::HwlGetPossibleSwizzleModes
3381 *
3382 *   @brief
3383 *       Returns a list of swizzle modes that are valid from the hardware's perspective for the client to choose from
3384 *
3385 *   @return
3386 *       ADDR_E_RETURNCODE
3387 ************************************************************************************************************************
3388 */
HwlGetPossibleSwizzleModes(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3389 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPossibleSwizzleModes(
3390     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
3391     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
3392     ) const
3393 {
3394     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3395     UINT_32 bpp    = pIn->bpp;
3396     UINT_32 width  = Max(pIn->width, 1u);
3397     UINT_32 height = Max(pIn->height, 1u);
3398 
3399     // Set format to INVALID will skip this conversion
3400     if (pIn->format != ADDR_FMT_INVALID)
3401     {
3402         ElemMode elemMode = ADDR_UNCOMPRESSED;
3403         UINT_32 expandX, expandY;
3404 
3405         // Get compression/expansion factors and element mode which indicates compression/expansion
3406         bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
3407             &elemMode,
3408             &expandX,
3409             &expandY);
3410 
3411         UINT_32 basePitch = 0;
3412         GetElemLib()->AdjustSurfaceInfo(elemMode,
3413             expandX,
3414             expandY,
3415             &bpp,
3416             &basePitch,
3417             &width,
3418             &height);
3419     }
3420 
3421     const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
3422     const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3423     const UINT_32 numSamples   = Max(pIn->numSamples, 1u);
3424     const BOOL_32 msaa         = numSamples > 1;
3425 
3426     // Pre sanity check on non swizzle mode parameters
3427     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3428     localIn.flags = pIn->flags;
3429     localIn.resourceType = pIn->resourceType;
3430     localIn.format = pIn->format;
3431     localIn.bpp = bpp;
3432     localIn.width = width;
3433     localIn.height = height;
3434     localIn.numSlices = numSlices;
3435     localIn.numMipLevels = numMipLevels;
3436     localIn.numSamples = numSamples;
3437     localIn.numFrags = numSamples;
3438 
3439     if (ValidateNonSwModeParams(&localIn))
3440     {
3441         // Forbid swizzle mode(s) by client setting
3442         ADDR2_SWMODE_SET allowedSwModeSet = {};
3443         allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
3444         allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx10Blk256BSwModeMask;
3445         allowedSwModeSet.value |=
3446             pIn->forbiddenBlock.macroThin4KB ? 0 :
3447             ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
3448         allowedSwModeSet.value |=
3449             pIn->forbiddenBlock.macroThick4KB ? 0 :
3450             ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
3451         allowedSwModeSet.value |=
3452             pIn->forbiddenBlock.macroThin64KB ? 0 :
3453             ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
3454         allowedSwModeSet.value |=
3455             pIn->forbiddenBlock.macroThick64KB ? 0 :
3456             ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
3457         allowedSwModeSet.value |=
3458             pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
3459 
3460         if (pIn->preferredSwSet.value != 0)
3461         {
3462             allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
3463             allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
3464             allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
3465             allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
3466         }
3467 
3468         if (pIn->noXor)
3469         {
3470             allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
3471         }
3472 
3473         if (pIn->maxAlign > 0)
3474         {
3475             if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
3476             {
3477                 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
3478             }
3479 
3480             if (pIn->maxAlign < Size64K)
3481             {
3482                 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
3483             }
3484 
3485             if (pIn->maxAlign < Size4K)
3486             {
3487                 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
3488             }
3489 
3490             if (pIn->maxAlign < Size256)
3491             {
3492                 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
3493             }
3494         }
3495 
3496         // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3497         switch (pIn->resourceType)
3498         {
3499             case ADDR_RSRC_TEX_1D:
3500                 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
3501                 break;
3502 
3503             case ADDR_RSRC_TEX_2D:
3504                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
3505                 break;
3506 
3507             case ADDR_RSRC_TEX_3D:
3508                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
3509 
3510                 if (pIn->flags.view3dAs2dArray)
3511                 {
3512                     // SW_LINEAR can be used for 3D thin images, including BCn image format.
3513                     allowedSwModeSet.value &= Gfx10Rsrc3dViewAs2dSwModeMask;
3514                 }
3515                 break;
3516 
3517             default:
3518                 ADDR_ASSERT_ALWAYS();
3519                 allowedSwModeSet.value = 0;
3520                 break;
3521         }
3522 
3523         if (ElemLib::IsBlockCompressed(pIn->format)  ||
3524             ElemLib::IsMacroPixelPacked(pIn->format) ||
3525             (bpp > 64)                               ||
3526             (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3527         {
3528             allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
3529         }
3530 
3531         if (pIn->format == ADDR_FMT_32_32_32)
3532         {
3533             allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3534         }
3535 
3536         if (msaa)
3537         {
3538             allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
3539         }
3540 
3541         if (pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask)
3542         {
3543             allowedSwModeSet.value &= Gfx10ZSwModeMask;
3544         }
3545 
3546         if (pIn->flags.display)
3547         {
3548             allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
3549         }
3550 
3551         if (pIn->flags.needEquation)
3552         {
3553             UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
3554                                                                 ADDR_MAX_LEGACY_EQUATION_COMP;
3555             FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
3556         }
3557 
3558         if (pIn->flags.requireMetadata)
3559         {
3560             // Linear images can never be compressed
3561             allowedSwModeSet.value &= ~Gfx10LinearSwModeMask;
3562             if (pIn->flags.color)
3563             {
3564                 // 256B formats must not be pipe-aligned (can't use in CB)
3565                 allowedSwModeSet.value &= ~(Gfx10Blk256BSwModeMask);
3566                 // D/S formats must not be pipe-aligned
3567                 allowedSwModeSet.value &= ~(Gfx10DisplaySwModeMask | Gfx10StandardSwModeMask);
3568             }
3569         }
3570 
3571         if (allowedSwModeSet.value != 0)
3572         {
3573 #if DEBUG
3574             // Post sanity check, at least AddrLib should accept the output generated by its own
3575             UINT_32 validateSwModeSet = allowedSwModeSet.value;
3576 
3577             for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3578             {
3579                 if (validateSwModeSet & 1)
3580                 {
3581                     localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3582                     ADDR_ASSERT(ValidateSwModeParams(&localIn));
3583                 }
3584 
3585                 validateSwModeSet >>= 1;
3586             }
3587 #endif
3588 
3589             pOut->resourceType = pIn->resourceType;
3590             pOut->clientPreferredSwSet = pIn->preferredSwSet;
3591 
3592             if (pOut->clientPreferredSwSet.value == 0)
3593             {
3594                 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3595             }
3596 
3597             pOut->validSwModeSet = allowedSwModeSet;
3598             pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
3599         }
3600         else
3601         {
3602             // Invalid combination...
3603             ADDR_ASSERT_ALWAYS();
3604             returnCode = ADDR_INVALIDPARAMS;
3605         }
3606     }
3607     else
3608     {
3609         // Invalid combination...
3610         ADDR_ASSERT_ALWAYS();
3611         returnCode = ADDR_INVALIDPARAMS;
3612     }
3613 
3614     return returnCode;
3615 }
3616 
3617 /**
3618 ************************************************************************************************************************
3619 *   Gfx10Lib::ComputeStereoInfo
3620 *
3621 *   @brief
3622 *       Compute height alignment and right eye pipeBankXor for stereo surface
3623 *
3624 *   @return
3625 *       Error code
3626 *
3627 ************************************************************************************************************************
3628 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const3629 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3630     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
3631     UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
3632     UINT_32*                                pRightXor   ///< Right eye xor
3633     ) const
3634 {
3635     ADDR_E_RETURNCODE ret = ADDR_OK;
3636 
3637     *pRightXor = 0;
3638 
3639     if (IsNonPrtXor(pIn->swizzleMode))
3640     {
3641         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3642         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
3643         const UINT_32 rsrcType    = static_cast<UINT_32>(pIn->resourceType) - 1;
3644         const UINT_32 swMode      = static_cast<UINT_32>(pIn->swizzleMode);
3645         const UINT_32 eqIndex     = m_equationLookupTable[rsrcType][swMode][elemLog2];
3646 
3647         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3648         {
3649             UINT_32 yMax     = 0;
3650             UINT_32 yPosMask = 0;
3651 
3652             // First get "max y bit"
3653             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3654             {
3655                 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3656 
3657                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3658                     (m_equationTable[eqIndex].addr[i].index > yMax))
3659                 {
3660                     yMax = m_equationTable[eqIndex].addr[i].index;
3661                 }
3662 
3663                 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3664                     (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3665                     (m_equationTable[eqIndex].xor1[i].index > yMax))
3666                 {
3667                     yMax = m_equationTable[eqIndex].xor1[i].index;
3668                 }
3669 
3670                 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3671                     (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3672                     (m_equationTable[eqIndex].xor2[i].index > yMax))
3673                 {
3674                     yMax = m_equationTable[eqIndex].xor2[i].index;
3675                 }
3676             }
3677 
3678             // Then loop again for populating a position mask of "max Y bit"
3679             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3680             {
3681                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3682                     (m_equationTable[eqIndex].addr[i].index == yMax))
3683                 {
3684                     yPosMask |= 1u << i;
3685                 }
3686                 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3687                          (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3688                          (m_equationTable[eqIndex].xor1[i].index == yMax))
3689                 {
3690                     yPosMask |= 1u << i;
3691                 }
3692                 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3693                          (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3694                          (m_equationTable[eqIndex].xor2[i].index == yMax))
3695                 {
3696                     yPosMask |= 1u << i;
3697                 }
3698             }
3699 
3700             const UINT_32 additionalAlign = 1 << yMax;
3701 
3702             if (additionalAlign >= *pAlignY)
3703             {
3704                 *pAlignY = additionalAlign;
3705 
3706                 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3707 
3708                 if ((alignedHeight >> yMax) & 1)
3709                 {
3710                     *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3711                 }
3712             }
3713         }
3714         else
3715         {
3716             ret = ADDR_INVALIDPARAMS;
3717         }
3718     }
3719 
3720     return ret;
3721 }
3722 
3723 /**
3724 ************************************************************************************************************************
3725 *   Gfx10Lib::HwlComputeSurfaceInfoTiled
3726 *
3727 *   @brief
3728 *       Internal function to calculate alignment for tiled surface
3729 *
3730 *   @return
3731 *       ADDR_E_RETURNCODE
3732 ************************************************************************************************************************
3733 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3734 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3735      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3736      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3737      ) const
3738 {
3739     ADDR_E_RETURNCODE ret;
3740 
3741     // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3742     pOut->mipChainPitch    = 0;
3743     pOut->mipChainHeight   = 0;
3744     pOut->mipChainSlice    = 0;
3745     pOut->epitchIsHeight   = FALSE;
3746 
3747     // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3748     pOut->mipChainInTail   = FALSE;
3749     pOut->firstMipIdInTail = pIn->numMipLevels;
3750 
3751     if (IsBlock256b(pIn->swizzleMode))
3752     {
3753         ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3754     }
3755     else
3756     {
3757         ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3758     }
3759 
3760     return ret;
3761 }
3762 
3763 /**
3764 ************************************************************************************************************************
3765 *   Gfx10Lib::ComputeSurfaceInfoMicroTiled
3766 *
3767 *   @brief
3768 *       Internal function to calculate alignment for micro tiled surface
3769 *
3770 *   @return
3771 *       ADDR_E_RETURNCODE
3772 ************************************************************************************************************************
3773 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3774 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3775      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3776      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3777      ) const
3778 {
3779     ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3780                                                          &pOut->blockHeight,
3781                                                          &pOut->blockSlices,
3782                                                          pIn->bpp,
3783                                                          pIn->numFrags,
3784                                                          pIn->resourceType,
3785                                                          pIn->swizzleMode);
3786 
3787     if (ret == ADDR_OK)
3788     {
3789         const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3790 
3791         pOut->pitch     = PowTwoAlign(pIn->width,  pOut->blockWidth);
3792         pOut->height    = PowTwoAlign(pIn->height, pOut->blockHeight);
3793         pOut->numSlices = pIn->numSlices;
3794         pOut->baseAlign = blockSize;
3795 
3796         if (pIn->numMipLevels > 1)
3797         {
3798             const UINT_32 mip0Width    = pIn->width;
3799             const UINT_32 mip0Height   = pIn->height;
3800             UINT_64       mipSliceSize = 0;
3801 
3802             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3803             {
3804                 UINT_32 mipWidth, mipHeight;
3805 
3806                 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3807 
3808                 const UINT_32 mipActualWidth  = PowTwoAlign(mipWidth,  pOut->blockWidth);
3809                 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3810 
3811                 if (pOut->pMipInfo != NULL)
3812                 {
3813                     pOut->pMipInfo[i].pitch            = mipActualWidth;
3814                     pOut->pMipInfo[i].height           = mipActualHeight;
3815                     pOut->pMipInfo[i].depth            = 1;
3816                     pOut->pMipInfo[i].offset           = mipSliceSize;
3817                     pOut->pMipInfo[i].mipTailOffset    = 0;
3818                     pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3819                 }
3820 
3821                 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3822             }
3823 
3824             pOut->sliceSize = mipSliceSize;
3825             pOut->surfSize  = mipSliceSize * pOut->numSlices;
3826         }
3827         else
3828         {
3829             pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3830             pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3831 
3832             if (pOut->pMipInfo != NULL)
3833             {
3834                 pOut->pMipInfo[0].pitch            = pOut->pitch;
3835                 pOut->pMipInfo[0].height           = pOut->height;
3836                 pOut->pMipInfo[0].depth            = 1;
3837                 pOut->pMipInfo[0].offset           = 0;
3838                 pOut->pMipInfo[0].mipTailOffset    = 0;
3839                 pOut->pMipInfo[0].macroBlockOffset = 0;
3840             }
3841         }
3842 
3843     }
3844 
3845     return ret;
3846 }
3847 
3848 /**
3849 ************************************************************************************************************************
3850 *   Gfx10Lib::ComputeSurfaceInfoMacroTiled
3851 *
3852 *   @brief
3853 *       Internal function to calculate alignment for macro tiled surface
3854 *
3855 *   @return
3856 *       ADDR_E_RETURNCODE
3857 ************************************************************************************************************************
3858 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3859 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3860      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3861      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3862      ) const
3863 {
3864     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3865                                                                 &pOut->blockHeight,
3866                                                                 &pOut->blockSlices,
3867                                                                 pIn->bpp,
3868                                                                 pIn->numFrags,
3869                                                                 pIn->resourceType,
3870                                                                 pIn->swizzleMode);
3871 
3872     if (returnCode == ADDR_OK)
3873     {
3874         UINT_32 heightAlign = pOut->blockHeight;
3875 
3876         if (pIn->flags.qbStereo)
3877         {
3878             UINT_32 rightXor = 0;
3879 
3880             returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3881 
3882             if (returnCode == ADDR_OK)
3883             {
3884                 pOut->pStereoInfo->rightSwizzle = rightXor;
3885             }
3886         }
3887 
3888         if (returnCode == ADDR_OK)
3889         {
3890             const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3891             const UINT_32 blockSize     = 1 << blockSizeLog2;
3892 
3893             pOut->pitch     = PowTwoAlign(pIn->width,     pOut->blockWidth);
3894             pOut->height    = PowTwoAlign(pIn->height,    heightAlign);
3895             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3896             pOut->baseAlign = blockSize;
3897 
3898             if (pIn->numMipLevels > 1)
3899             {
3900                 const Dim3d  tailMaxDim         = GetMipTailDim(pIn->resourceType,
3901                                                                 pIn->swizzleMode,
3902                                                                 pOut->blockWidth,
3903                                                                 pOut->blockHeight,
3904                                                                 pOut->blockSlices);
3905                 const UINT_32 mip0Width         = pIn->width;
3906                 const UINT_32 mip0Height        = pIn->height;
3907                 const BOOL_32 isThin            = IsThin(pIn->resourceType, pIn->swizzleMode);
3908                 const UINT_32 mip0Depth         = isThin ? 1 : pIn->numSlices;
3909                 const UINT_32 maxMipsInTail     = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3910                 const UINT_32 index             = Log2(pIn->bpp >> 3);
3911                 UINT_32       firstMipInTail    = pIn->numMipLevels;
3912                 UINT_64       mipChainSliceSize = 0;
3913                 UINT_64       mipSize[MaxMipLevels];
3914                 UINT_64       mipSliceSize[MaxMipLevels];
3915 
3916                 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
3917                 Dim3d fixedTailMaxDim = tailMaxDim;
3918 
3919                 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3920                 {
3921                     fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3922                     fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3923                 }
3924 
3925                 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3926                 {
3927                     UINT_32 mipWidth, mipHeight, mipDepth;
3928 
3929                     GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3930 
3931                     if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3932                     {
3933                         firstMipInTail     = i;
3934                         mipChainSliceSize += blockSize / pOut->blockSlices;
3935                         break;
3936                     }
3937                     else
3938                     {
3939                         const UINT_32 pitch     = PowTwoAlign(mipWidth,  pOut->blockWidth);
3940                         const UINT_32 height    = PowTwoAlign(mipHeight, pOut->blockHeight);
3941                         const UINT_32 depth     = PowTwoAlign(mipDepth,  pOut->blockSlices);
3942                         const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3943 
3944                         mipSize[i]         = sliceSize * depth;
3945                         mipSliceSize[i]    = sliceSize * pOut->blockSlices;
3946                         mipChainSliceSize += sliceSize;
3947 
3948                         if (pOut->pMipInfo != NULL)
3949                         {
3950                             pOut->pMipInfo[i].pitch  = pitch;
3951                             pOut->pMipInfo[i].height = height;
3952                             pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3953                         }
3954                     }
3955                 }
3956 
3957                 pOut->sliceSize        = mipChainSliceSize;
3958                 pOut->surfSize         = mipChainSliceSize * pOut->numSlices;
3959                 pOut->mipChainInTail   = (firstMipInTail == 0) ? TRUE : FALSE;
3960                 pOut->firstMipIdInTail = firstMipInTail;
3961 
3962                 if (pOut->pMipInfo != NULL)
3963                 {
3964                     UINT_64 offset         = 0;
3965                     UINT_64 macroBlkOffset = 0;
3966                     UINT_32 tailMaxDepth   = 0;
3967 
3968                     if (firstMipInTail != pIn->numMipLevels)
3969                     {
3970                         UINT_32 mipWidth, mipHeight;
3971 
3972                         GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3973                                    &mipWidth, &mipHeight, &tailMaxDepth);
3974 
3975                         offset         = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3976                         macroBlkOffset = blockSize;
3977                     }
3978 
3979                     for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3980                     {
3981                         pOut->pMipInfo[i].offset           = offset;
3982                         pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3983                         pOut->pMipInfo[i].mipTailOffset    = 0;
3984 
3985                         offset         += mipSize[i];
3986                         macroBlkOffset += mipSliceSize[i];
3987                     }
3988 
3989                     UINT_32 pitch  = tailMaxDim.w;
3990                     UINT_32 height = tailMaxDim.h;
3991                     UINT_32 depth  = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3992 
3993                     tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3994 
3995                     for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3996                     {
3997                         const UINT_32 m         = maxMipsInTail - 1 - (i - firstMipInTail);
3998                         const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3999 
4000                         pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
4001                         pOut->pMipInfo[i].mipTailOffset    = mipOffset;
4002                         pOut->pMipInfo[i].macroBlockOffset = 0;
4003 
4004                         pOut->pMipInfo[i].pitch  = pitch;
4005                         pOut->pMipInfo[i].height = height;
4006                         pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
4007 
4008                         UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
4009                                        ((mipOffset >> 10) & 2)  |
4010                                        ((mipOffset >> 11) & 4)  |
4011                                        ((mipOffset >> 12) & 8)  |
4012                                        ((mipOffset >> 13) & 16) |
4013                                        ((mipOffset >> 14) & 32);
4014                         UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
4015                                        ((mipOffset >> 9)  & 2)  |
4016                                        ((mipOffset >> 10) & 4)  |
4017                                        ((mipOffset >> 11) & 8)  |
4018                                        ((mipOffset >> 12) & 16) |
4019                                        ((mipOffset >> 13) & 32);
4020 
4021                         if (blockSizeLog2 & 1)
4022                         {
4023                             const UINT_32 temp = mipX;
4024                             mipX = mipY;
4025                             mipY = temp;
4026 
4027                             if (index & 1)
4028                             {
4029                                 mipY = (mipY << 1) | (mipX & 1);
4030                                 mipX = mipX >> 1;
4031                             }
4032                         }
4033 
4034                         if (isThin)
4035                         {
4036                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
4037                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
4038                             pOut->pMipInfo[i].mipTailCoordZ = 0;
4039 
4040                             pitch  = Max(pitch  >> 1, Block256_2d[index].w);
4041                             height = Max(height >> 1, Block256_2d[index].h);
4042                         }
4043                         else
4044                         {
4045                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
4046                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
4047                             pOut->pMipInfo[i].mipTailCoordZ = 0;
4048 
4049                             pitch  = Max(pitch  >> 1, Block256_3d[index].w);
4050                             height = Max(height >> 1, Block256_3d[index].h);
4051                         }
4052                     }
4053                 }
4054             }
4055             else
4056             {
4057                 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
4058                 pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
4059 
4060                 if (pOut->pMipInfo != NULL)
4061                 {
4062                     pOut->pMipInfo[0].pitch            = pOut->pitch;
4063                     pOut->pMipInfo[0].height           = pOut->height;
4064                     pOut->pMipInfo[0].depth            = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
4065                     pOut->pMipInfo[0].offset           = 0;
4066                     pOut->pMipInfo[0].mipTailOffset    = 0;
4067                     pOut->pMipInfo[0].macroBlockOffset = 0;
4068                     pOut->pMipInfo[0].mipTailCoordX    = 0;
4069                     pOut->pMipInfo[0].mipTailCoordY    = 0;
4070                     pOut->pMipInfo[0].mipTailCoordZ    = 0;
4071                 }
4072             }
4073         }
4074     }
4075 
4076     return returnCode;
4077 }
4078 
4079 /**
4080 ************************************************************************************************************************
4081 *   Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
4082 *
4083 *   @brief
4084 *       Internal function to calculate address from coord for tiled swizzle surface
4085 *
4086 *   @return
4087 *       ADDR_E_RETURNCODE
4088 ************************************************************************************************************************
4089 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4090 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
4091      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4092      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4093      ) const
4094 {
4095     ADDR_E_RETURNCODE ret;
4096 
4097     if (IsBlock256b(pIn->swizzleMode))
4098     {
4099         ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
4100     }
4101     else
4102     {
4103         ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
4104     }
4105 
4106     return ret;
4107 }
4108 
4109 /**
4110 ************************************************************************************************************************
4111 *   Gfx10Lib::HwlCopyMemToSurface
4112 *
4113 *   @brief
4114 *       Copy multiple regions from memory to a non-linear surface.
4115 *
4116 *   @return
4117 *       Error or success.
4118 ************************************************************************************************************************
4119 */
HwlCopyMemToSurface(const ADDR2_COPY_MEMSURFACE_INPUT * pIn,const ADDR2_COPY_MEMSURFACE_REGION * pRegions,UINT_32 regionCount) const4120 ADDR_E_RETURNCODE Gfx10Lib::HwlCopyMemToSurface(
4121     const ADDR2_COPY_MEMSURFACE_INPUT*  pIn,
4122     const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
4123     UINT_32                             regionCount
4124     ) const
4125 {
4126     // Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
4127     // optimized for a particular micro-swizzle mode if available.
4128     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
4129     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4130     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels] = {{0}};
4131     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4132     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4133 
4134     if (pIn->numSamples > 1)
4135     {
4136         // TODO: MSAA
4137         returnCode = ADDR_NOTIMPLEMENTED;
4138     }
4139     if (IsBlockVariable(pIn->swizzleMode))
4140     {
4141         // TODO: larger LUTs for worst-case var swizzle.
4142         returnCode = ADDR_NOTIMPLEMENTED;
4143     }
4144 
4145     localIn.size         = sizeof(localIn);
4146     localIn.flags        = pIn->flags;
4147     localIn.swizzleMode  = pIn->swizzleMode;
4148     localIn.resourceType = pIn->resourceType;
4149     localIn.format       = pIn->format;
4150     localIn.bpp          = pIn->bpp;
4151     localIn.width        = Max(pIn->unAlignedDims.width,  1u);
4152     localIn.height       = Max(pIn->unAlignedDims.height, 1u);
4153     localIn.numSlices    = Max(pIn->unAlignedDims.depth,  1u);
4154     localIn.numMipLevels = Max(pIn->numMipLevels,         1u);
4155     localIn.numSamples   = Max(pIn->numSamples,           1u);
4156 
4157     localOut.size     = sizeof(localOut);
4158     localOut.pMipInfo = mipInfo;
4159 
4160     if (returnCode == ADDR_OK)
4161     {
4162         returnCode = ComputeSurfaceInfo(&localIn, &localOut);
4163     }
4164     const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4165     const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4166                                                             pIn->resourceType,
4167                                                             Log2(pIn->bpp >> 3),
4168                                                             pIn->numSamples);
4169 
4170     ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT] = {};
4171     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4172     ADDR_EXTENT3D blockExtent = {
4173         localOut.blockWidth,
4174         localOut.blockHeight,
4175         localOut.blockSlices
4176     };
4177 
4178     LutAddresser addresser = LutAddresser();
4179     addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
4180     UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyMemImgFunc();
4181     if (pfnCopyUnaligned == nullptr)
4182     {
4183         ADDR_ASSERT_ALWAYS();
4184         returnCode = ADDR_INVALIDPARAMS;
4185     }
4186 
4187     if (returnCode == ADDR_OK)
4188     {
4189         for (UINT_32  regionIdx = 0; regionIdx < regionCount; regionIdx++)
4190         {
4191             const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
4192             const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
4193             UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
4194             UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
4195 
4196             UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
4197             UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
4198             UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
4199 
4200             for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
4201             {
4202                 // The copy functions take the base address of the hardware slice, not the logical slice. Those are
4203                 // not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
4204                 // for unaligned copies.
4205                 UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
4206                 UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
4207 
4208                 UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
4209                 UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
4210 
4211                 ADDR_COORD2D sliceOrigin = { xStart, yStart };
4212                 ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
4213 
4214                 pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
4215                                  VoidPtrInc(pCurRegion->pMem, memOffset),
4216                                  pCurRegion->memRowPitch,
4217                                  yBlks,
4218                                  sliceOrigin,
4219                                  sliceExtent,
4220                                  sliceXor,
4221                                  addresser);
4222             }
4223         }
4224     }
4225     return returnCode;
4226 }
4227 
4228 /**
4229 ************************************************************************************************************************
4230 *   Gfx10Lib::HwlCopySurfaceToMem
4231 *
4232 *   @brief
4233 *       Copy multiple regions from a non-linear surface to memory.
4234 *
4235 *   @return
4236 *       Error or success.
4237 ************************************************************************************************************************
4238 */
HwlCopySurfaceToMem(const ADDR2_COPY_MEMSURFACE_INPUT * pIn,const ADDR2_COPY_MEMSURFACE_REGION * pRegions,UINT_32 regionCount) const4239 ADDR_E_RETURNCODE Gfx10Lib::HwlCopySurfaceToMem(
4240     const ADDR2_COPY_MEMSURFACE_INPUT*  pIn,
4241     const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
4242     UINT_32                             regionCount
4243     ) const
4244 {
4245     // Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
4246     // optimized for a particular micro-swizzle mode if available.
4247     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
4248     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4249     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels] = {{0}};
4250     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4251     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4252 
4253     if (pIn->numSamples > 1)
4254     {
4255         // TODO: MSAA
4256         returnCode = ADDR_NOTIMPLEMENTED;
4257     }
4258     if (IsBlockVariable(pIn->swizzleMode))
4259     {
4260         // TODO: larger LUTs for worst-case var swizzle.
4261         returnCode = ADDR_NOTIMPLEMENTED;
4262     }
4263 
4264     localIn.size         = sizeof(localIn);
4265     localIn.flags        = pIn->flags;
4266     localIn.swizzleMode  = pIn->swizzleMode;
4267     localIn.resourceType = pIn->resourceType;
4268     localIn.format       = pIn->format;
4269     localIn.bpp          = pIn->bpp;
4270     localIn.width        = Max(pIn->unAlignedDims.width,  1u);
4271     localIn.height       = Max(pIn->unAlignedDims.height, 1u);
4272     localIn.numSlices    = Max(pIn->unAlignedDims.depth,  1u);
4273     localIn.numMipLevels = Max(pIn->numMipLevels,         1u);
4274     localIn.numSamples   = Max(pIn->numSamples,           1u);
4275 
4276     localOut.size     = sizeof(localOut);
4277     localOut.pMipInfo = mipInfo;
4278 
4279     if (returnCode == ADDR_OK)
4280     {
4281         returnCode = ComputeSurfaceInfo(&localIn, &localOut);
4282     }
4283     const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4284     const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4285                                                             pIn->resourceType,
4286                                                             Log2(pIn->bpp >> 3),
4287                                                             pIn->numSamples);
4288 
4289     ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT] = {};
4290     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4291     ADDR_EXTENT3D blockExtent = {
4292         localOut.blockWidth,
4293         localOut.blockHeight,
4294         localOut.blockSlices
4295     };
4296 
4297     LutAddresser addresser = LutAddresser();
4298     addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
4299     UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyImgMemFunc();
4300     if (pfnCopyUnaligned == nullptr)
4301     {
4302         ADDR_ASSERT_ALWAYS();
4303         returnCode = ADDR_INVALIDPARAMS;
4304     }
4305 
4306     if (returnCode == ADDR_OK)
4307     {
4308         for (UINT_32  regionIdx = 0; regionIdx < regionCount; regionIdx++)
4309         {
4310             const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
4311             const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
4312             UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
4313             UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
4314 
4315             UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
4316             UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
4317             UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
4318 
4319             for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
4320             {
4321                 // The copy functions take the base address of the hardware slice, not the logical slice. Those are
4322                 // not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
4323                 // for unaligned copies.
4324                 UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
4325                 UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
4326 
4327                 UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
4328                 UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
4329 
4330                 ADDR_COORD2D sliceOrigin = { xStart, yStart };
4331                 ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
4332 
4333                 pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
4334                                  VoidPtrInc(pCurRegion->pMem, memOffset),
4335                                  pCurRegion->memRowPitch,
4336                                  yBlks,
4337                                  sliceOrigin,
4338                                  sliceExtent,
4339                                  sliceXor,
4340                                  addresser);
4341             }
4342         }
4343     }
4344     return returnCode;
4345 }
4346 
4347 /**
4348 ************************************************************************************************************************
4349 *   Gfx10Lib::ComputeOffsetFromEquation
4350 *
4351 *   @brief
4352 *       Compute offset from equation
4353 *
4354 *   @return
4355 *       Offset
4356 ************************************************************************************************************************
4357 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const4358 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
4359     const ADDR_EQUATION* pEq,   ///< Equation
4360     UINT_32              x,     ///< x coord in bytes
4361     UINT_32              y,     ///< y coord in pixel
4362     UINT_32              z      ///< z coord in slice
4363     ) const
4364 {
4365     UINT_32 offset = 0;
4366 
4367     for (UINT_32 i = 0; i < pEq->numBits; i++)
4368     {
4369         UINT_32 v = 0;
4370 
4371         for (UINT_32 c = 0; c < pEq->numBitComponents; c++)
4372         {
4373             if (pEq->comps[c][i].valid)
4374             {
4375                 if (pEq->comps[c][i].channel == 0)
4376                 {
4377                     v ^= (x >> pEq->comps[c][i].index) & 1;
4378                 }
4379                 else if (pEq->comps[c][i].channel == 1)
4380                 {
4381                     v ^= (y >> pEq->comps[c][i].index) & 1;
4382                 }
4383                 else
4384                 {
4385                     ADDR_ASSERT(pEq->comps[c][i].channel == 2);
4386                     v ^= (z >> pEq->comps[c][i].index) & 1;
4387                 }
4388             }
4389         }
4390 
4391         offset |= (v << i);
4392     }
4393 
4394     return offset;
4395 }
4396 
4397 /**
4398 ************************************************************************************************************************
4399 *   Gfx10Lib::GetSwizzlePatternInfo
4400 *
4401 *   @brief
4402 *       Get swizzle pattern
4403 *
4404 *   @return
4405 *       Swizzle pattern information
4406 ************************************************************************************************************************
4407 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const4408 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
4409     AddrSwizzleMode  swizzleMode,       ///< Swizzle mode
4410     AddrResourceType resourceType,      ///< Resource type
4411     UINT_32          elemLog2,          ///< Element size in bytes log2
4412     UINT_32          numFrag            ///< Number of fragment
4413     ) const
4414 {
4415     // Now elemLog2 is going to be used to access the correct index insode of the pPatInfo array so we will start from
4416     // the right location
4417     const UINT_32          index       = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
4418     const ADDR_SW_PATINFO* patInfo     = NULL;
4419     const UINT_32          swizzleMask = 1 << swizzleMode;
4420 
4421     if (IsBlockVariable(swizzleMode))
4422     {
4423         if (m_blockVarSizeLog2 != 0)
4424         {
4425             ADDR_ASSERT(m_settings.supportRbPlus);
4426 
4427             if (IsRtOptSwizzle(swizzleMode))
4428             {
4429                 if (numFrag == 1)
4430                 {
4431                     patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
4432                 }
4433                 else if (numFrag == 2)
4434                 {
4435                     patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
4436                 }
4437                 else if (numFrag == 4)
4438                 {
4439                     patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
4440                 }
4441                 else
4442                 {
4443                     ADDR_ASSERT(numFrag == 8);
4444                     patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
4445                 }
4446             }
4447             else if (IsZOrderSwizzle(swizzleMode))
4448             {
4449                 if (numFrag == 1)
4450                 {
4451                     patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
4452                 }
4453                 else if (numFrag == 2)
4454                 {
4455                     patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
4456                 }
4457                 else if (numFrag == 4)
4458                 {
4459                     patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
4460                 }
4461                 else
4462                 {
4463                     ADDR_ASSERT(numFrag == 8);
4464                     patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
4465                 }
4466             }
4467         }
4468     }
4469     else if (IsLinear(swizzleMode) == FALSE)
4470     {
4471         if (resourceType == ADDR_RSRC_TEX_3D)
4472         {
4473             ADDR_ASSERT(numFrag == 1);
4474 
4475             if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
4476             {
4477                 if (IsRtOptSwizzle(swizzleMode))
4478                 {
4479                     if (swizzleMode == ADDR_SW_4KB_R_X)
4480                     {
4481                         patInfo = NULL;
4482                     }
4483                     else
4484                     {
4485                         patInfo = m_settings.supportRbPlus ?
4486                                   GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4487                     }
4488                 }
4489                 else if (IsZOrderSwizzle(swizzleMode))
4490                 {
4491                     patInfo = m_settings.supportRbPlus ?
4492                               GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4493                 }
4494                 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4495                 {
4496                     ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
4497                     patInfo = m_settings.supportRbPlus ?
4498                               GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
4499                 }
4500                 else
4501                 {
4502                     ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
4503 
4504                     if (IsBlock4kb(swizzleMode))
4505                     {
4506                         if (swizzleMode == ADDR_SW_4KB_S)
4507                         {
4508                             patInfo = m_settings.supportRbPlus ?
4509                                       GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
4510                         }
4511                         else
4512                         {
4513                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4514                             patInfo = m_settings.supportRbPlus ?
4515                                       GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
4516                         }
4517                     }
4518                     else
4519                     {
4520                         if (swizzleMode == ADDR_SW_64KB_S)
4521                         {
4522                             patInfo = m_settings.supportRbPlus ?
4523                                       GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
4524                         }
4525                         else if (swizzleMode == ADDR_SW_64KB_S_X)
4526                         {
4527                             patInfo = m_settings.supportRbPlus ?
4528                                       GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
4529                         }
4530                         else
4531                         {
4532                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4533                             patInfo = m_settings.supportRbPlus ?
4534                                       GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
4535                         }
4536                     }
4537                 }
4538             }
4539         }
4540         else
4541         {
4542             if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
4543             {
4544                 if (IsBlock256b(swizzleMode))
4545                 {
4546                     if (swizzleMode == ADDR_SW_256B_S)
4547                     {
4548                         patInfo = m_settings.supportRbPlus ?
4549                                   GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
4550                     }
4551                     else
4552                     {
4553                         ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
4554                         patInfo = m_settings.supportRbPlus ?
4555                                   GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
4556                     }
4557                 }
4558                 else if (IsBlock4kb(swizzleMode))
4559                 {
4560                     if (IsStandardSwizzle(resourceType, swizzleMode))
4561                     {
4562                         if (swizzleMode == ADDR_SW_4KB_S)
4563                         {
4564                             patInfo = m_settings.supportRbPlus ?
4565                                       GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
4566                         }
4567                         else
4568                         {
4569                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4570                             patInfo = m_settings.supportRbPlus ?
4571                                       GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
4572                         }
4573                     }
4574                     else
4575                     {
4576                         if (swizzleMode == ADDR_SW_4KB_D)
4577                         {
4578                             patInfo = m_settings.supportRbPlus ?
4579                                       GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
4580                         }
4581                         else if (swizzleMode == ADDR_SW_4KB_R_X)
4582                         {
4583                             patInfo = NULL;
4584                         }
4585                         else
4586                         {
4587                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
4588                             patInfo = m_settings.supportRbPlus ?
4589                                       GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
4590                         }
4591                     }
4592                 }
4593                 else
4594                 {
4595                     if (IsRtOptSwizzle(swizzleMode))
4596                     {
4597                         if (numFrag == 1)
4598                         {
4599                             patInfo = m_settings.supportRbPlus ?
4600                                       GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4601                         }
4602                         else if (numFrag == 2)
4603                         {
4604                             patInfo = m_settings.supportRbPlus ?
4605                                       GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
4606                         }
4607                         else if (numFrag == 4)
4608                         {
4609                             patInfo = m_settings.supportRbPlus ?
4610                                       GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
4611                         }
4612                         else
4613                         {
4614                             ADDR_ASSERT(numFrag == 8);
4615                             patInfo = m_settings.supportRbPlus ?
4616                                       GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
4617                         }
4618                     }
4619                     else if (IsZOrderSwizzle(swizzleMode))
4620                     {
4621                         if (numFrag == 1)
4622                         {
4623                             patInfo = m_settings.supportRbPlus ?
4624                                       GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4625                         }
4626                         else if (numFrag == 2)
4627                         {
4628                             patInfo = m_settings.supportRbPlus ?
4629                                       GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
4630                         }
4631                         else if (numFrag == 4)
4632                         {
4633                             patInfo = m_settings.supportRbPlus ?
4634                                       GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
4635                         }
4636                         else
4637                         {
4638                             ADDR_ASSERT(numFrag == 8);
4639                             patInfo = m_settings.supportRbPlus ?
4640                                       GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
4641                         }
4642                     }
4643                     else if (IsDisplaySwizzle(resourceType, swizzleMode))
4644                     {
4645                         if (swizzleMode == ADDR_SW_64KB_D)
4646                         {
4647                             patInfo = m_settings.supportRbPlus ?
4648                                       GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
4649                         }
4650                         else if (swizzleMode == ADDR_SW_64KB_D_X)
4651                         {
4652                             patInfo = m_settings.supportRbPlus ?
4653                                       GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
4654                         }
4655                         else
4656                         {
4657                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
4658                             patInfo = m_settings.supportRbPlus ?
4659                                       GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
4660                         }
4661                     }
4662                     else
4663                     {
4664                         if (swizzleMode == ADDR_SW_64KB_S)
4665                         {
4666                             patInfo = m_settings.supportRbPlus ?
4667                                       GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
4668                         }
4669                         else if (swizzleMode == ADDR_SW_64KB_S_X)
4670                         {
4671                             patInfo = m_settings.supportRbPlus ?
4672                                       GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
4673                         }
4674                         else
4675                         {
4676                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4677                             patInfo = m_settings.supportRbPlus ?
4678                                       GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
4679                         }
4680                     }
4681                 }
4682             }
4683         }
4684     }
4685 
4686     return (patInfo != NULL) ? &patInfo[index] : NULL;
4687 }
4688 
4689 /**
4690 ************************************************************************************************************************
4691 *   Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
4692 *
4693 *   @brief
4694 *       Internal function to calculate address from coord for micro tiled swizzle surface
4695 *
4696 *   @return
4697 *       ADDR_E_RETURNCODE
4698 ************************************************************************************************************************
4699 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4700 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
4701      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4702      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4703      ) const
4704 {
4705     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4706     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4707     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4708     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4709 
4710     localIn.swizzleMode  = pIn->swizzleMode;
4711     localIn.flags        = pIn->flags;
4712     localIn.resourceType = pIn->resourceType;
4713     localIn.bpp          = pIn->bpp;
4714     localIn.width        = Max(pIn->unalignedWidth,  1u);
4715     localIn.height       = Max(pIn->unalignedHeight, 1u);
4716     localIn.numSlices    = Max(pIn->numSlices,       1u);
4717     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4718     localIn.numSamples   = Max(pIn->numSamples,      1u);
4719     localIn.numFrags     = Max(pIn->numFrags,        1u);
4720     localOut.pMipInfo    = mipInfo;
4721 
4722     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
4723 
4724     if (ret == ADDR_OK)
4725     {
4726         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4727         const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
4728         const UINT_32 swMode   = static_cast<UINT_32>(pIn->swizzleMode);
4729         const UINT_32 eqIndex  = m_equationLookupTable[rsrcType][swMode][elemLog2];
4730 
4731         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4732         {
4733             const UINT_32 pb           = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4734             const UINT_32 yb           = pIn->y / localOut.blockHeight;
4735             const UINT_32 xb           = pIn->x / localOut.blockWidth;
4736             const UINT_32 blockIndex   = yb * pb + xb;
4737             const UINT_32 blockSize    = 256;
4738             const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4739                                                                    pIn->x << elemLog2,
4740                                                                    pIn->y,
4741                                                                    0);
4742             pOut->addr = localOut.sliceSize * pIn->slice +
4743                          mipInfo[pIn->mipId].macroBlockOffset +
4744                          (blockIndex * blockSize) +
4745                          blk256Offset;
4746         }
4747         else
4748         {
4749             ret = ADDR_INVALIDPARAMS;
4750         }
4751     }
4752 
4753     return ret;
4754 }
4755 
4756 /**
4757 ************************************************************************************************************************
4758 *   Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
4759 *
4760 *   @brief
4761 *       Internal function to calculate address from coord for macro tiled swizzle surface
4762 *
4763 *   @return
4764 *       ADDR_E_RETURNCODE
4765 ************************************************************************************************************************
4766 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4767 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4768      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4769      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4770      ) const
4771 {
4772     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4773     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4774     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4775     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4776 
4777     localIn.swizzleMode  = pIn->swizzleMode;
4778     localIn.flags        = pIn->flags;
4779     localIn.resourceType = pIn->resourceType;
4780     localIn.bpp          = pIn->bpp;
4781     localIn.width        = Max(pIn->unalignedWidth,  1u);
4782     localIn.height       = Max(pIn->unalignedHeight, 1u);
4783     localIn.numSlices    = Max(pIn->numSlices,       1u);
4784     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4785     localIn.numSamples   = Max(pIn->numSamples,      1u);
4786     localIn.numFrags     = Max(pIn->numFrags,        1u);
4787     localOut.pMipInfo    = mipInfo;
4788 
4789     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4790 
4791     if (ret == ADDR_OK)
4792     {
4793         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
4794         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4795         const UINT_32 blkMask     = (1 << blkSizeLog2) - 1;
4796         const UINT_32 pipeMask    = (1 << m_pipesLog2) - 1;
4797         const UINT_32 bankMask    = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4798         const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4799                                     (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4800 
4801         if (localIn.numFrags > 1)
4802         {
4803             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4804                                                                     pIn->resourceType,
4805                                                                     elemLog2,
4806                                                                     localIn.numFrags);
4807 
4808             if (pPatInfo != NULL)
4809             {
4810                 const UINT_32 pb        = localOut.pitch / localOut.blockWidth;
4811                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4812                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4813                 const UINT_64 blkIdx    = yb * pb + xb;
4814 
4815                 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
4816                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4817 
4818                 const UINT_32 blkOffset =
4819                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4820                                                     blkSizeLog2,
4821                                                     pIn->x,
4822                                                     pIn->y,
4823                                                     pIn->slice,
4824                                                     pIn->sample);
4825 
4826                 pOut->addr = (localOut.sliceSize * pIn->slice) +
4827                              (blkIdx << blkSizeLog2) +
4828                              (blkOffset ^ pipeBankXor);
4829             }
4830             else
4831             {
4832                 ret = ADDR_INVALIDPARAMS;
4833             }
4834         }
4835         else
4836         {
4837             const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4838             const UINT_32 swMode  = static_cast<UINT_32>(pIn->swizzleMode);
4839             const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4840 
4841             if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4842             {
4843                 const BOOL_32 inTail    = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4844                 const BOOL_32 isThin    = IsThin(pIn->resourceType, pIn->swizzleMode);
4845                 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4846                 const UINT_32 sliceId   = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4847                 const UINT_32 x         = inTail ? (pIn->x     + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4848                 const UINT_32 y         = inTail ? (pIn->y     + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4849                 const UINT_32 z         = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4850                 const UINT_32 pb        = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4851                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4852                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4853                 const UINT_64 blkIdx    = yb * pb + xb;
4854                 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4855                                                                     x << elemLog2,
4856                                                                     y,
4857                                                                     z);
4858                 pOut->addr = sliceSize * sliceId +
4859                              mipInfo[pIn->mipId].macroBlockOffset +
4860                              (blkIdx << blkSizeLog2) +
4861                              (blkOffset ^ pipeBankXor);
4862             }
4863             else
4864             {
4865                 ret = ADDR_INVALIDPARAMS;
4866             }
4867         }
4868     }
4869 
4870     return ret;
4871 }
4872 
4873 /**
4874 ************************************************************************************************************************
4875 *   Gfx10Lib::HwlComputeMaxBaseAlignments
4876 *
4877 *   @brief
4878 *       Gets maximum alignments
4879 *   @return
4880 *       maximum alignments
4881 ************************************************************************************************************************
4882 */
HwlComputeMaxBaseAlignments() const4883 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4884 {
4885     return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4886 }
4887 
4888 /**
4889 ************************************************************************************************************************
4890 *   Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4891 *
4892 *   @brief
4893 *       Gets maximum alignments for metadata
4894 *   @return
4895 *       maximum alignments for metadata
4896 ************************************************************************************************************************
4897 */
HwlComputeMaxMetaBaseAlignments() const4898 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4899 {
4900     Dim3d metaBlk;
4901 
4902     const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4903     {
4904         ADDR_SW_64KB_Z_X,
4905         m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4906     };
4907 
4908     UINT_32 maxBaseAlignHtile = 0;
4909     UINT_32 maxBaseAlignCmask = 0;
4910 
4911     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4912     {
4913         for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4914         {
4915             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4916             {
4917                 // Max base alignment for Htile
4918                 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4919                                                                 ADDR_RSRC_TEX_2D,
4920                                                                 ValidSwizzleModeForXmask[swIdx],
4921                                                                 bppLog2,
4922                                                                 numFragLog2,
4923                                                                 TRUE,
4924                                                                 &metaBlk);
4925 
4926                 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4927             }
4928         }
4929 
4930         // Max base alignment for Cmask
4931         const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4932                                                         ADDR_RSRC_TEX_2D,
4933                                                         ValidSwizzleModeForXmask[swIdx],
4934                                                         0,
4935                                                         0,
4936                                                         TRUE,
4937                                                         &metaBlk);
4938 
4939         maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4940     }
4941 
4942     // Max base alignment for 2D Dcc
4943     const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4944     {
4945         ADDR_SW_64KB_S_X,
4946         ADDR_SW_64KB_D_X,
4947         ADDR_SW_64KB_R_X,
4948         m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4949     };
4950 
4951     UINT_32 maxBaseAlignDcc2D = 0;
4952 
4953     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4954     {
4955         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4956         {
4957             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4958             {
4959                 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4960                                                              ADDR_RSRC_TEX_2D,
4961                                                              ValidSwizzleModeForDcc2D[swIdx],
4962                                                              bppLog2,
4963                                                              numFragLog2,
4964                                                              TRUE,
4965                                                              &metaBlk);
4966 
4967                 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4968             }
4969         }
4970     }
4971 
4972     // Max base alignment for 3D Dcc
4973     const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4974     {
4975         ADDR_SW_64KB_Z_X,
4976         ADDR_SW_64KB_S_X,
4977         ADDR_SW_64KB_D_X,
4978         ADDR_SW_64KB_R_X,
4979         m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4980     };
4981 
4982     UINT_32 maxBaseAlignDcc3D = 0;
4983 
4984     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4985     {
4986         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4987         {
4988             const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4989                                                          ADDR_RSRC_TEX_3D,
4990                                                          ValidSwizzleModeForDcc3D[swIdx],
4991                                                          bppLog2,
4992                                                          0,
4993                                                          TRUE,
4994                                                          &metaBlk);
4995 
4996             maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4997         }
4998     }
4999 
5000     return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
5001 }
5002 
5003 /**
5004 ************************************************************************************************************************
5005 *   Gfx10Lib::GetMetaElementSizeLog2
5006 *
5007 *   @brief
5008 *       Gets meta data element size log2
5009 *   @return
5010 *       Meta data element size log2
5011 ************************************************************************************************************************
5012 */
GetMetaElementSizeLog2(Gfx10DataType dataType)5013 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
5014     Gfx10DataType dataType) ///< Data surface type
5015 {
5016     INT_32 elemSizeLog2 = 0;
5017 
5018     if (dataType == Gfx10DataColor)
5019     {
5020         elemSizeLog2 = 0;
5021     }
5022     else if (dataType == Gfx10DataDepthStencil)
5023     {
5024         elemSizeLog2 = 2;
5025     }
5026     else
5027     {
5028         ADDR_ASSERT(dataType == Gfx10DataFmask);
5029         elemSizeLog2 = -1;
5030     }
5031 
5032     return elemSizeLog2;
5033 }
5034 
5035 /**
5036 ************************************************************************************************************************
5037 *   Gfx10Lib::GetMetaCacheSizeLog2
5038 *
5039 *   @brief
5040 *       Gets meta data cache line size log2
5041 *   @return
5042 *       Meta data cache line size log2
5043 ************************************************************************************************************************
5044 */
GetMetaCacheSizeLog2(Gfx10DataType dataType)5045 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
5046     Gfx10DataType dataType) ///< Data surface type
5047 {
5048     INT_32 cacheSizeLog2 = 0;
5049 
5050     if (dataType == Gfx10DataColor)
5051     {
5052         cacheSizeLog2 = 6;
5053     }
5054     else if (dataType == Gfx10DataDepthStencil)
5055     {
5056         cacheSizeLog2 = 8;
5057     }
5058     else
5059     {
5060         ADDR_ASSERT(dataType == Gfx10DataFmask);
5061         cacheSizeLog2 = 8;
5062     }
5063     return cacheSizeLog2;
5064 }
5065 
5066 /**
5067 ************************************************************************************************************************
5068 *   Gfx10Lib::HwlComputeSurfaceInfoLinear
5069 *
5070 *   @brief
5071 *       Internal function to calculate alignment for linear surface
5072 *
5073 *   @return
5074 *       ADDR_E_RETURNCODE
5075 ************************************************************************************************************************
5076 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const5077 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
5078      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
5079      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
5080      ) const
5081 {
5082     ADDR_E_RETURNCODE returnCode = ADDR_OK;
5083 
5084     if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
5085     {
5086         returnCode = ADDR_INVALIDPARAMS;
5087     }
5088     else
5089     {
5090         const UINT_32 elementBytes = pIn->bpp >> 3;
5091         const UINT_32 pitchAlign   = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
5092         const UINT_32 mipDepth     = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
5093         UINT_32       pitch        = PowTwoAlign(pIn->width, pitchAlign);
5094         UINT_32       actualHeight = pIn->height;
5095         UINT_64       sliceSize    = 0;
5096 
5097         if (pIn->numMipLevels > 1)
5098         {
5099             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
5100             {
5101                 UINT_32 mipWidth, mipHeight;
5102 
5103                 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
5104 
5105                 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
5106 
5107                 if (pOut->pMipInfo != NULL)
5108                 {
5109                     pOut->pMipInfo[i].pitch            = mipActualWidth;
5110                     pOut->pMipInfo[i].height           = mipHeight;
5111                     pOut->pMipInfo[i].depth            = mipDepth;
5112                     pOut->pMipInfo[i].offset           = sliceSize;
5113                     pOut->pMipInfo[i].mipTailOffset    = 0;
5114                     pOut->pMipInfo[i].macroBlockOffset = sliceSize;
5115                 }
5116 
5117                 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
5118             }
5119         }
5120         else
5121         {
5122             returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
5123 
5124             if (returnCode == ADDR_OK)
5125             {
5126                 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
5127 
5128                 if (pOut->pMipInfo != NULL)
5129                 {
5130                     pOut->pMipInfo[0].pitch            = pitch;
5131                     pOut->pMipInfo[0].height           = actualHeight;
5132                     pOut->pMipInfo[0].depth            = mipDepth;
5133                     pOut->pMipInfo[0].offset           = 0;
5134                     pOut->pMipInfo[0].mipTailOffset    = 0;
5135                     pOut->pMipInfo[0].macroBlockOffset = 0;
5136                 }
5137             }
5138         }
5139 
5140         if (returnCode == ADDR_OK)
5141         {
5142             pOut->pitch          = pitch;
5143             pOut->height         = actualHeight;
5144             pOut->numSlices      = pIn->numSlices;
5145             pOut->sliceSize      = sliceSize;
5146             pOut->surfSize       = sliceSize * pOut->numSlices;
5147             pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
5148             pOut->blockWidth     = pitchAlign;
5149             pOut->blockHeight    = 1;
5150             pOut->blockSlices    = 1;
5151 
5152             // Following members are useless on GFX10
5153             pOut->mipChainPitch  = 0;
5154             pOut->mipChainHeight = 0;
5155             pOut->mipChainSlice  = 0;
5156             pOut->epitchIsHeight = FALSE;
5157 
5158             // Post calculation validate
5159             ADDR_ASSERT(pOut->sliceSize > 0);
5160         }
5161     }
5162 
5163     return returnCode;
5164 }
5165 
5166 } // V2
5167 } // Addr
5168