• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ************************************************************************************************************************
3 *
4 *  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
5 *  SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8 
9 /**
10 ************************************************************************************************************************
11 * @file  gfx10addrlib.cpp
12 * @brief Contain the implementation for the Gfx10Lib class.
13 ************************************************************************************************************************
14 */
15 
16 #include "gfx10addrlib.h"
17 #include "addrcommon.h"
18 #include "gfx10_gb_reg.h"
19 
20 #include "amdgpu_asic_addr.h"
21 
22 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
23 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
24 
25 namespace Addr
26 {
27 /**
28 ************************************************************************************************************************
29 *   Gfx10HwlInit
30 *
31 *   @brief
32 *       Creates an Gfx10Lib object.
33 *
34 *   @return
35 *       Returns an Gfx10Lib object pointer.
36 ************************************************************************************************************************
37 */
Gfx10HwlInit(const Client * pClient)38 Addr::Lib* Gfx10HwlInit(const Client* pClient)
39 {
40     return V2::Gfx10Lib::CreateObj(pClient);
41 }
42 
43 namespace V2
44 {
45 
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
47 //                               Static Const Member
48 ////////////////////////////////////////////////////////////////////////////////////////////////////
49 
50 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
51 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
52     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR
53     {{0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_S
54     {{0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_D
55     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
56 
57     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
58     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_S
59     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_D
60     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
61 
62     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
63     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_S
64     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_D
65     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
66 
67     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
68     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
69     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
70     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
71 
72     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
73     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_S_T
74     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_D_T
75     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
76 
77     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
78     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_S_X
79     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_D_X
80     {{0,    0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_4KB_R_X
81 
82     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_Z_X
83     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_S_X
84     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_D_X
85     {{0,    0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_64KB_R_X
86 
87     {{0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_VAR_Z_X
88     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
89     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
90     {{0,    0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_VAR_R_X
91     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR_GENERAL
92 };
93 
94 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
95 
96 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
97 const Dim3d Gfx10Lib::Block4K_Log2_3d[]  = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
98 
99 /**
100 ************************************************************************************************************************
101 *   Gfx10Lib::Gfx10Lib
102 *
103 *   @brief
104 *       Constructor
105 *
106 ************************************************************************************************************************
107 */
Gfx10Lib(const Client * pClient)108 Gfx10Lib::Gfx10Lib(const Client* pClient)
109     :
110     Lib(pClient),
111     m_numPkrLog2(0),
112     m_numSaLog2(0),
113     m_colorBaseIndex(0),
114     m_xmaskBaseIndex(0),
115     m_htileBaseIndex(0),
116     m_dccBaseIndex(0)
117 {
118     memset(&m_settings, 0, sizeof(m_settings));
119     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
120 }
121 
122 /**
123 ************************************************************************************************************************
124 *   Gfx10Lib::~Gfx10Lib
125 *
126 *   @brief
127 *       Destructor
128 ************************************************************************************************************************
129 */
~Gfx10Lib()130 Gfx10Lib::~Gfx10Lib()
131 {
132 }
133 
134 /**
135 ************************************************************************************************************************
136 *   Gfx10Lib::HwlComputeHtileInfo
137 *
138 *   @brief
139 *       Interface function stub of AddrComputeHtilenfo
140 *
141 *   @return
142 *       ADDR_E_RETURNCODE
143 ************************************************************************************************************************
144 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const145 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
146     const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,    ///< [in] input structure
147     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut    ///< [out] output structure
148     ) const
149 {
150     ADDR_E_RETURNCODE ret = ADDR_OK;
151 
152     if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
153          ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
154         (pIn->hTileFlags.pipeAligned != TRUE))
155     {
156         ret = ADDR_INVALIDPARAMS;
157     }
158     else
159     {
160         Dim3d         metaBlk     = {};
161         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
162                                                    ADDR_RSRC_TEX_2D,
163                                                    pIn->swizzleMode,
164                                                    0,
165                                                    0,
166                                                    TRUE,
167                                                    &metaBlk);
168 
169         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
170         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
171         pOut->baseAlign     = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
172         pOut->metaBlkWidth  = metaBlk.w;
173         pOut->metaBlkHeight = metaBlk.h;
174 
175         if (pIn->numMipLevels > 1)
176         {
177             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
178 
179             UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
180 
181             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
182             {
183                 UINT_32 mipWidth, mipHeight;
184 
185                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
186 
187                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
188                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
189 
190                 const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
191                 const UINT_32 heightInM    = mipHeight / metaBlk.h;
192                 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
193 
194                 if (pOut->pMipInfo != NULL)
195                 {
196                     pOut->pMipInfo[i].inMiptail = FALSE;
197                     pOut->pMipInfo[i].offset    = offset;
198                     pOut->pMipInfo[i].sliceSize = mipSliceSize;
199                 }
200 
201                 offset += mipSliceSize;
202             }
203 
204             pOut->sliceSize          = offset;
205             pOut->metaBlkNumPerSlice = offset / metaBlkSize;
206             pOut->htileBytes         = pOut->sliceSize * pIn->numSlices;
207 
208             if (pOut->pMipInfo != NULL)
209             {
210                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
211                 {
212                     pOut->pMipInfo[i].inMiptail = TRUE;
213                     pOut->pMipInfo[i].offset    = 0;
214                     pOut->pMipInfo[i].sliceSize = 0;
215                 }
216 
217                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
218                 {
219                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
220                 }
221             }
222         }
223         else
224         {
225             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
226             const UINT_32 heightInM = pOut->height / metaBlk.h;
227 
228             pOut->metaBlkNumPerSlice    = pitchInM * heightInM;
229             pOut->sliceSize             = pOut->metaBlkNumPerSlice * metaBlkSize;
230             pOut->htileBytes            = pOut->sliceSize * pIn->numSlices;
231 
232             if (pOut->pMipInfo != NULL)
233             {
234                 pOut->pMipInfo[0].inMiptail = FALSE;
235                 pOut->pMipInfo[0].offset    = 0;
236                 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
237             }
238         }
239 
240         // Get the HTILE address equation (copied from HtileAddrFromCoord).
241         // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
242         const UINT_32 index = m_xmaskBaseIndex;
243         const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
244 
245         ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
246         pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];
247     }
248 
249     return ret;
250 }
251 
252 /**
253 ************************************************************************************************************************
254 *   Gfx10Lib::HwlComputeCmaskInfo
255 *
256 *   @brief
257 *       Interface function stub of AddrComputeCmaskInfo
258 *
259 *   @return
260 *       ADDR_E_RETURNCODE
261 ************************************************************************************************************************
262 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const263 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
264     const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,    ///< [in] input structure
265     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut    ///< [out] output structure
266     ) const
267 {
268     ADDR_E_RETURNCODE ret = ADDR_OK;
269 
270     if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
271         (pIn->cMaskFlags.pipeAligned != TRUE)   ||
272         ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
273          ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
274     {
275         ret = ADDR_INVALIDPARAMS;
276     }
277     else
278     {
279         Dim3d         metaBlk     = {};
280         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
281                                                    ADDR_RSRC_TEX_2D,
282                                                    pIn->swizzleMode,
283                                                    0,
284                                                    0,
285                                                    TRUE,
286                                                    &metaBlk);
287 
288         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
289         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
290         pOut->baseAlign     = metaBlkSize;
291         pOut->metaBlkWidth  = metaBlk.w;
292         pOut->metaBlkHeight = metaBlk.h;
293 
294         if (pIn->numMipLevels > 1)
295         {
296             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
297 
298             UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
299 
300             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
301             {
302                 UINT_32 mipWidth, mipHeight;
303 
304                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
305 
306                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
307                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
308 
309                 const UINT_32 pitchInM  = mipWidth  / metaBlk.w;
310                 const UINT_32 heightInM = mipHeight / metaBlk.h;
311 
312                 if (pOut->pMipInfo != NULL)
313                 {
314                     pOut->pMipInfo[i].inMiptail = FALSE;
315                     pOut->pMipInfo[i].offset    = metaBlkPerSlice * metaBlkSize;
316                     pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
317                 }
318 
319                 metaBlkPerSlice += pitchInM * heightInM;
320             }
321 
322             pOut->metaBlkNumPerSlice = metaBlkPerSlice;
323 
324             if (pOut->pMipInfo != NULL)
325             {
326                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
327                 {
328                     pOut->pMipInfo[i].inMiptail = TRUE;
329                     pOut->pMipInfo[i].offset    = 0;
330                     pOut->pMipInfo[i].sliceSize = 0;
331                 }
332 
333                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
334                 {
335                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
336                 }
337             }
338         }
339         else
340         {
341             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
342             const UINT_32 heightInM = pOut->height / metaBlk.h;
343 
344             pOut->metaBlkNumPerSlice = pitchInM * heightInM;
345 
346             if (pOut->pMipInfo != NULL)
347             {
348                 pOut->pMipInfo[0].inMiptail = FALSE;
349                 pOut->pMipInfo[0].offset    = 0;
350                 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
351             }
352         }
353 
354         pOut->sliceSize  = pOut->metaBlkNumPerSlice * metaBlkSize;
355         pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
356 
357         // Get the CMASK address equation (copied from CmaskAddrFromCoord)
358         const UINT_32  fmaskBpp      = GetFmaskBpp(1, 1);
359         const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
360         const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
361         const UINT_8*  patIdxTable   =
362             (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
363             (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
364 
365         ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
366         pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]];
367     }
368 
369     return ret;
370 }
371 
372 /**
373 ************************************************************************************************************************
374 *   Gfx10Lib::HwlComputeDccInfo
375 *
376 *   @brief
377 *       Interface function to compute DCC key info
378 *
379 *   @return
380 *       ADDR_E_RETURNCODE
381 ************************************************************************************************************************
382 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const383 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
384     const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,    ///< [in] input structure
385     ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut    ///< [out] output structure
386     ) const
387 {
388     ADDR_E_RETURNCODE ret = ADDR_OK;
389 
390     if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
391     {
392         // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
393         // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
394         ret = ADDR_INVALIDPARAMS;
395     }
396     else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
397     {
398         // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
399         ret = ADDR_INVALIDPARAMS;
400     }
401     else
402     {
403         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
404 
405         {
406             // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
407             ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
408 
409             const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
410 
411             pOut->compressBlkWidth  = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
412             pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
413             pOut->compressBlkDepth  = isThick ? Block256_3d[elemLog2].d : 1;
414         }
415 
416         if (ret == ADDR_OK)
417         {
418             Dim3d         metaBlk     = {};
419             const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
420             const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
421                                                        pIn->resourceType,
422                                                        pIn->swizzleMode,
423                                                        elemLog2,
424                                                        numFragLog2,
425                                                        pIn->dccKeyFlags.pipeAligned,
426                                                        &metaBlk);
427 
428             pOut->dccRamBaseAlign   = metaBlkSize;
429             pOut->metaBlkWidth      = metaBlk.w;
430             pOut->metaBlkHeight     = metaBlk.h;
431             pOut->metaBlkDepth      = metaBlk.d;
432             pOut->metaBlkSize       = metaBlkSize;
433 
434             pOut->pitch             = PowTwoAlign(pIn->unalignedWidth,     metaBlk.w);
435             pOut->height            = PowTwoAlign(pIn->unalignedHeight,    metaBlk.h);
436             pOut->depth             = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
437 
438             if (pIn->numMipLevels > 1)
439             {
440                 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
441 
442                 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
443 
444                 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
445                 {
446                     UINT_32 mipWidth, mipHeight;
447 
448                     GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
449 
450                     mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
451                     mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
452 
453                     const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
454                     const UINT_32 heightInM    = mipHeight / metaBlk.h;
455                     const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
456 
457                     if (pOut->pMipInfo != NULL)
458                     {
459                         pOut->pMipInfo[i].inMiptail = FALSE;
460                         pOut->pMipInfo[i].offset    = offset;
461                         pOut->pMipInfo[i].sliceSize = mipSliceSize;
462                     }
463 
464                     offset += mipSliceSize;
465                 }
466 
467                 pOut->dccRamSliceSize    = offset;
468                 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
469                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
470 
471                 if (pOut->pMipInfo != NULL)
472                 {
473                     for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
474                     {
475                         pOut->pMipInfo[i].inMiptail = TRUE;
476                         pOut->pMipInfo[i].offset    = 0;
477                         pOut->pMipInfo[i].sliceSize = 0;
478                     }
479 
480                     if (pIn->firstMipIdInTail != pIn->numMipLevels)
481                     {
482                         pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
483                     }
484                 }
485             }
486             else
487             {
488                 const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
489                 const UINT_32 heightInM = pOut->height / metaBlk.h;
490 
491                 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
492                 pOut->dccRamSliceSize    = pOut->metaBlkNumPerSlice * metaBlkSize;
493                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
494 
495                 if (pOut->pMipInfo != NULL)
496                 {
497                     pOut->pMipInfo[0].inMiptail = FALSE;
498                     pOut->pMipInfo[0].offset    = 0;
499                     pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
500                 }
501             }
502 
503             // Get the DCC address equation (copied from DccAddrFromCoord)
504             const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
505             const UINT_32 numPipeLog2 = m_pipesLog2;
506             UINT_32       index       = m_dccBaseIndex + elemLog2;
507             const UINT_8* patIdxTable;
508 
509             if (m_settings.supportRbPlus)
510             {
511                 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
512 
513                 if (pIn->dccKeyFlags.pipeAligned)
514                 {
515                     index += MaxNumOfBpp;
516 
517                     if (m_numPkrLog2 < 2)
518                     {
519                         index += m_pipesLog2 * MaxNumOfBpp;
520                     }
521                     else
522                     {
523                         // 4 groups for "m_numPkrLog2 < 2" case
524                         index += 4 * MaxNumOfBpp;
525 
526                         const UINT_32 dccPipePerPkr = 3;
527 
528                         index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
529                                  (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
530                     }
531                 }
532             }
533             else
534             {
535                 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
536 
537                 if (pIn->dccKeyFlags.pipeAligned)
538                 {
539                     index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
540                 }
541                 else
542                 {
543                     index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
544                 }
545             }
546 
547             ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
548             pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];
549         }
550     }
551 
552     return ret;
553 }
554 
555 /**
556 ************************************************************************************************************************
557 *   Gfx10Lib::HwlComputeCmaskAddrFromCoord
558 *
559 *   @brief
560 *       Interface function stub of AddrComputeCmaskAddrFromCoord
561 *
562 *   @return
563 *       ADDR_E_RETURNCODE
564 ************************************************************************************************************************
565 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)566 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
567     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
568     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
569 {
570     // Only support pipe aligned CMask
571     ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
572 
573     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
574     input.size            = sizeof(input);
575     input.cMaskFlags      = pIn->cMaskFlags;
576     input.colorFlags      = pIn->colorFlags;
577     input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
578     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
579     input.numSlices       = Max(pIn->numSlices,       1u);
580     input.swizzleMode     = pIn->swizzleMode;
581     input.resourceType    = pIn->resourceType;
582 
583     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
584     output.size = sizeof(output);
585 
586     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
587 
588     if (returnCode == ADDR_OK)
589     {
590         const UINT_32  fmaskBpp      = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
591         const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
592         const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
593         const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
594         const UINT_8*  patIdxTable   =
595             (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
596             (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
597 
598         const UINT_32  blkSizeLog2  = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
599         const UINT_32  blkMask      = (1 << blkSizeLog2) - 1;
600         const UINT_32  blkOffset    = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
601                                                                       blkSizeLog2 + 1, // +1 for nibble offset
602                                                                       pIn->x,
603                                                                       pIn->y,
604                                                                       pIn->slice,
605                                                                       0);
606         const UINT_32 xb       = pIn->x / output.metaBlkWidth;
607         const UINT_32 yb       = pIn->y / output.metaBlkHeight;
608         const UINT_32 pb       = output.pitch / output.metaBlkWidth;
609         const UINT_32 blkIndex = (yb * pb) + xb;
610         const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
611 
612         pOut->addr = (output.sliceSize * pIn->slice) +
613                      (blkIndex * (1 << blkSizeLog2)) +
614                      ((blkOffset >> 1) ^ pipeXor);
615         pOut->bitPosition = (blkOffset & 1) << 2;
616     }
617 
618     return returnCode;
619 }
620 
621 /**
622 ************************************************************************************************************************
623 *   Gfx10Lib::HwlComputeHtileAddrFromCoord
624 *
625 *   @brief
626 *       Interface function stub of AddrComputeHtileAddrFromCoord
627 *
628 *   @return
629 *       ADDR_E_RETURNCODE
630 ************************************************************************************************************************
631 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)632 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
633     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
634     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
635 {
636     ADDR_E_RETURNCODE returnCode = ADDR_OK;
637 
638     if (pIn->numMipLevels > 1)
639     {
640         returnCode = ADDR_NOTIMPLEMENTED;
641     }
642     else
643     {
644         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
645         input.size            = sizeof(input);
646         input.hTileFlags      = pIn->hTileFlags;
647         input.depthFlags      = pIn->depthflags;
648         input.swizzleMode     = pIn->swizzleMode;
649         input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
650         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
651         input.numSlices       = Max(pIn->numSlices,       1u);
652         input.numMipLevels    = 1;
653 
654         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
655         output.size = sizeof(output);
656 
657         returnCode = ComputeHtileInfo(&input, &output);
658 
659         if (returnCode == ADDR_OK)
660         {
661             const UINT_32  numSampleLog2 = Log2(pIn->numSamples);
662             const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
663             const UINT_32  index         = m_htileBaseIndex + numSampleLog2;
664             const UINT_8*  patIdxTable   = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
665 
666             const UINT_32  blkSizeLog2   = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
667             const UINT_32  blkMask       = (1 << blkSizeLog2) - 1;
668             const UINT_32  blkOffset     = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
669                                                                            blkSizeLog2 + 1, // +1 for nibble offset
670                                                                            pIn->x,
671                                                                            pIn->y,
672                                                                            pIn->slice,
673                                                                            0);
674             const UINT_32 xb       = pIn->x / output.metaBlkWidth;
675             const UINT_32 yb       = pIn->y / output.metaBlkHeight;
676             const UINT_32 pb       = output.pitch / output.metaBlkWidth;
677             const UINT_32 blkIndex = (yb * pb) + xb;
678             const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
679 
680             pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
681                          (blkIndex * (1 << blkSizeLog2)) +
682                          ((blkOffset >> 1) ^ pipeXor);
683         }
684     }
685 
686     return returnCode;
687 }
688 
689 /**
690 ************************************************************************************************************************
691 *   Gfx10Lib::HwlComputeHtileCoordFromAddr
692 *
693 *   @brief
694 *       Interface function stub of AddrComputeHtileCoordFromAddr
695 *
696 *   @return
697 *       ADDR_E_RETURNCODE
698 ************************************************************************************************************************
699 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)700 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
701     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
702     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)   ///< [out] output structure
703 {
704     ADDR_NOT_IMPLEMENTED();
705 
706     return ADDR_OK;
707 }
708 
709 /**
710 ************************************************************************************************************************
711 *   Gfx10Lib::HwlSupportComputeDccAddrFromCoord
712 *
713 *   @brief
714 *       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
715 *
716 *   @return
717 *       ADDR_E_RETURNCODE
718 ************************************************************************************************************************
719 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)720 ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(
721     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
722 {
723     ADDR_E_RETURNCODE returnCode = ADDR_OK;
724 
725     if ((pIn->resourceType       != ADDR_RSRC_TEX_2D) ||
726         (pIn->swizzleMode        != ADDR_SW_64KB_R_X) ||
727         (pIn->dccKeyFlags.linear == TRUE)             ||
728         (pIn->numFrags           >  1)                ||
729         (pIn->numMipLevels       >  1)                ||
730         (pIn->mipId              >  0))
731     {
732         returnCode = ADDR_NOTSUPPORTED;
733     }
734     else if ((pIn->pitch == 0)         ||
735              (pIn->metaBlkWidth == 0)  ||
736              (pIn->metaBlkHeight == 0) ||
737              (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
738     {
739         returnCode = ADDR_NOTSUPPORTED;
740     }
741 
742     return returnCode;
743 }
744 
745 /**
746 ************************************************************************************************************************
747 *   Gfx10Lib::HwlComputeDccAddrFromCoord
748 *
749 *   @brief
750 *       Interface function stub of AddrComputeDccAddrFromCoord
751 *
752 *   @return
753 *       N/A
754 ************************************************************************************************************************
755 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)756 VOID Gfx10Lib::HwlComputeDccAddrFromCoord(
757     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,  ///< [in] input structure
758     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] output structure
759 {
760     const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
761     const UINT_32 numPipeLog2 = m_pipesLog2;
762     const UINT_32 pipeMask    = (1 << numPipeLog2) - 1;
763     UINT_32       index       = m_dccBaseIndex + elemLog2;
764     const UINT_8* patIdxTable;
765 
766     if (m_settings.supportRbPlus)
767     {
768         patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
769 
770         if (pIn->dccKeyFlags.pipeAligned)
771         {
772             index += MaxNumOfBpp;
773 
774             if (m_numPkrLog2 < 2)
775             {
776                 index += m_pipesLog2 * MaxNumOfBpp;
777             }
778             else
779             {
780                 // 4 groups for "m_numPkrLog2 < 2" case
781                 index += 4 * MaxNumOfBpp;
782 
783                 const UINT_32 dccPipePerPkr = 3;
784 
785                 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
786                          (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
787             }
788         }
789     }
790     else
791     {
792         patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
793 
794         if (pIn->dccKeyFlags.pipeAligned)
795         {
796             index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
797         }
798         else
799         {
800             index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
801         }
802     }
803 
804     const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
805     const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
806     const UINT_32  blkOffset   =
807         ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
808                                         blkSizeLog2 + 1, // +1 for nibble offset
809                                         pIn->x,
810                                         pIn->y,
811                                         pIn->slice,
812                                         0);
813     const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
814     const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
815     const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
816     const UINT_32 blkIndex = (yb * pb) + xb;
817     const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
818 
819     pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
820                  (blkIndex * (1 << blkSizeLog2)) +
821                  ((blkOffset >> 1) ^ pipeXor);
822 }
823 
824 /**
825 ************************************************************************************************************************
826 *   Gfx10Lib::HwlInitGlobalParams
827 *
828 *   @brief
829 *       Initializes global parameters
830 *
831 *   @return
832 *       TRUE if all settings are valid
833 *
834 ************************************************************************************************************************
835 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)836 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
837     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
838 {
839     BOOL_32              valid = TRUE;
840     GB_ADDR_CONFIG_GFX10 gbAddrConfig;
841 
842     gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
843 
844     // These values are copied from CModel code
845     switch (gbAddrConfig.bits.NUM_PIPES)
846     {
847         case ADDR_CONFIG_1_PIPE:
848             m_pipes     = 1;
849             m_pipesLog2 = 0;
850             break;
851         case ADDR_CONFIG_2_PIPE:
852             m_pipes     = 2;
853             m_pipesLog2 = 1;
854             break;
855         case ADDR_CONFIG_4_PIPE:
856             m_pipes     = 4;
857             m_pipesLog2 = 2;
858             break;
859         case ADDR_CONFIG_8_PIPE:
860             m_pipes     = 8;
861             m_pipesLog2 = 3;
862             break;
863         case ADDR_CONFIG_16_PIPE:
864             m_pipes     = 16;
865             m_pipesLog2 = 4;
866             break;
867         case ADDR_CONFIG_32_PIPE:
868             m_pipes     = 32;
869             m_pipesLog2 = 5;
870             break;
871         case ADDR_CONFIG_64_PIPE:
872             m_pipes     = 64;
873             m_pipesLog2 = 6;
874             break;
875         default:
876             ADDR_ASSERT_ALWAYS();
877             valid = FALSE;
878             break;
879     }
880 
881     switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
882     {
883         case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
884             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
885             m_pipeInterleaveLog2  = 8;
886             break;
887         case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
888             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
889             m_pipeInterleaveLog2  = 9;
890             break;
891         case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
892             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
893             m_pipeInterleaveLog2  = 10;
894             break;
895         case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
896             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
897             m_pipeInterleaveLog2  = 11;
898             break;
899         default:
900             ADDR_ASSERT_ALWAYS();
901             valid = FALSE;
902             break;
903     }
904 
905     // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
906     // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
907     // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
908     ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
909 
910     switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
911     {
912         case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
913             m_maxCompFrag     = 1;
914             m_maxCompFragLog2 = 0;
915             break;
916         case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
917             m_maxCompFrag     = 2;
918             m_maxCompFragLog2 = 1;
919             break;
920         case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
921             m_maxCompFrag     = 4;
922             m_maxCompFragLog2 = 2;
923             break;
924         case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
925             m_maxCompFrag     = 8;
926             m_maxCompFragLog2 = 3;
927             break;
928         default:
929             ADDR_ASSERT_ALWAYS();
930             valid = FALSE;
931             break;
932     }
933 
934     {
935         // Skip unaligned case
936         m_xmaskBaseIndex += MaxNumOfBppCMask;
937         m_htileBaseIndex += MaxNumOfAA;
938 
939         m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfBppCMask;
940         m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
941         m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
942 
943         if (m_settings.supportRbPlus)
944         {
945             m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
946             m_numSaLog2  = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
947 
948             ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
949 
950             ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
951                           sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
952 
953             if (m_numPkrLog2 >= 2)
954             {
955                 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
956                 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfBppCMask;
957                 m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
958             }
959         }
960         else
961         {
962             const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
963                                         static_cast<UINT_32>(ADDR_CONFIG_1_PIPE)  +
964                                         1;
965 
966             ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
967             ADDR_C_ASSERT(sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]) ==
968                           (numPipeType + 1) * MaxNumOfBppCMask);
969         }
970     }
971 
972     if (m_settings.supportRbPlus)
973     {
974         // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
975         // corresponding SW_64KB_* mode
976         m_blockVarSizeLog2 = m_pipesLog2 + 14;
977     }
978 
979     if (valid)
980     {
981         InitEquationTable();
982     }
983 
984     return valid;
985 }
986 
987 /**
988 ************************************************************************************************************************
989 *   Gfx10Lib::HwlConvertChipFamily
990 *
991 *   @brief
992 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
993 *   @return
994 *       ChipFamily
995 ************************************************************************************************************************
996 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)997 ChipFamily Gfx10Lib::HwlConvertChipFamily(
998     UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
999     UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1000 {
1001     ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
1002 
1003     m_settings.dccUnsup3DSwDis  = 1;
1004     m_settings.dsMipmapHtileFix = 1;
1005 
1006     switch (chipFamily)
1007     {
1008         case FAMILY_NV:
1009             if (ASICREV_IS_NAVI10_P(chipRevision))
1010             {
1011                 m_settings.dsMipmapHtileFix = 0;
1012                 m_settings.isDcn20          = 1;
1013             }
1014 
1015             if (ASICREV_IS_NAVI12_P(chipRevision))
1016             {
1017                 m_settings.isDcn20 = 1;
1018             }
1019 
1020             if (ASICREV_IS_NAVI14_M(chipRevision))
1021             {
1022                 m_settings.isDcn20 = 1;
1023             }
1024 
1025             if (ASICREV_IS_NAVI21_M(chipRevision))
1026             {
1027                 m_settings.supportRbPlus   = 1;
1028                 m_settings.dccUnsup3DSwDis = 0;
1029             }
1030 
1031             if (ASICREV_IS_NAVI22_P(chipRevision))
1032             {
1033                 m_settings.supportRbPlus   = 1;
1034                 m_settings.dccUnsup3DSwDis = 0;
1035             }
1036 
1037             if (ASICREV_IS_NAVI23_P(chipRevision))
1038             {
1039                 m_settings.supportRbPlus   = 1;
1040                 m_settings.dccUnsup3DSwDis = 0;
1041             }
1042 
1043             if (ASICREV_IS_NAVI24_P(chipRevision))
1044             {
1045                 m_settings.supportRbPlus   = 1;
1046                 m_settings.dccUnsup3DSwDis = 0;
1047             }
1048             break;
1049 
1050         case FAMILY_VGH:
1051             if (ASICREV_IS_VANGOGH(chipRevision))
1052             {
1053                 m_settings.supportRbPlus   = 1;
1054                 m_settings.dccUnsup3DSwDis = 0;
1055             }
1056             else
1057             {
1058                 ADDR_ASSERT(!"Unknown chip revision");
1059             }
1060             break;
1061 
1062         case FAMILY_RMB:
1063             if (ASICREV_IS_REMBRANDT(chipRevision))
1064             {
1065                 m_settings.supportRbPlus   = 1;
1066                 m_settings.dccUnsup3DSwDis = 0;
1067             }
1068             else
1069             {
1070                 ADDR_ASSERT(!"Unknown chip revision");
1071             }
1072             break;
1073         case FAMILY_RPL:
1074             if (ASICREV_IS_RAPHAEL(chipRevision))
1075             {
1076                 m_settings.supportRbPlus   = 1;
1077                 m_settings.dccUnsup3DSwDis = 0;
1078             }
1079             break;
1080         case FAMILY_MDN:
1081             if (ASICREV_IS_MENDOCINO(chipRevision))
1082             {
1083                 m_settings.supportRbPlus   = 1;
1084                 m_settings.dccUnsup3DSwDis = 0;
1085             }
1086             else
1087             {
1088                 ADDR_ASSERT(!"Unknown chip revision");
1089             }
1090             break;
1091         default:
1092             ADDR_ASSERT(!"Unknown chip family");
1093             break;
1094     }
1095 
1096     m_configFlags.use32bppFor422Fmt = TRUE;
1097 
1098     return family;
1099 }
1100 
1101 /**
1102 ************************************************************************************************************************
1103 *   Gfx10Lib::GetBlk256SizeLog2
1104 *
1105 *   @brief
1106 *       Get block 256 size
1107 *
1108 *   @return
1109 *       N/A
1110 ************************************************************************************************************************
1111 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1112 void Gfx10Lib::GetBlk256SizeLog2(
1113     AddrResourceType resourceType,      ///< [in] Resource type
1114     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1115     UINT_32          elemLog2,          ///< [in] element size log2
1116     UINT_32          numSamplesLog2,    ///< [in] number of samples
1117     Dim3d*           pBlock             ///< [out] block size
1118     ) const
1119 {
1120     if (IsThin(resourceType, swizzleMode))
1121     {
1122         UINT_32 blockBits = 8 - elemLog2;
1123 
1124         if (IsZOrderSwizzle(swizzleMode))
1125         {
1126             blockBits -= numSamplesLog2;
1127         }
1128 
1129         pBlock->w = (blockBits >> 1) + (blockBits & 1);
1130         pBlock->h = (blockBits >> 1);
1131         pBlock->d = 0;
1132     }
1133     else
1134     {
1135         ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1136 
1137         UINT_32 blockBits = 8 - elemLog2;
1138 
1139         pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1140         pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1141         pBlock->h = (blockBits / 3);
1142     }
1143 }
1144 
1145 /**
1146 ************************************************************************************************************************
1147 *   Gfx10Lib::GetCompressedBlockSizeLog2
1148 *
1149 *   @brief
1150 *       Get compress block size
1151 *
1152 *   @return
1153 *       N/A
1154 ************************************************************************************************************************
1155 */
GetCompressedBlockSizeLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1156 void Gfx10Lib::GetCompressedBlockSizeLog2(
1157     Gfx10DataType    dataType,          ///< [in] Data type
1158     AddrResourceType resourceType,      ///< [in] Resource type
1159     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1160     UINT_32          elemLog2,          ///< [in] element size log2
1161     UINT_32          numSamplesLog2,    ///< [in] number of samples
1162     Dim3d*           pBlock             ///< [out] block size
1163     ) const
1164 {
1165     if (dataType == Gfx10DataColor)
1166     {
1167         GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1168     }
1169     else
1170     {
1171         ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1172         pBlock->w = 3;
1173         pBlock->h = 3;
1174         pBlock->d = 0;
1175     }
1176 }
1177 
1178 /**
1179 ************************************************************************************************************************
1180 *   Gfx10Lib::GetMetaOverlapLog2
1181 *
1182 *   @brief
1183 *       Get meta block overlap
1184 *
1185 *   @return
1186 *       N/A
1187 ************************************************************************************************************************
1188 */
GetMetaOverlapLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const1189 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1190     Gfx10DataType    dataType,          ///< [in] Data type
1191     AddrResourceType resourceType,      ///< [in] Resource type
1192     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1193     UINT_32          elemLog2,          ///< [in] element size log2
1194     UINT_32          numSamplesLog2     ///< [in] number of samples
1195     ) const
1196 {
1197     Dim3d compBlock;
1198     Dim3d microBlock;
1199 
1200     GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1201     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1202 
1203     const INT_32 compSizeLog2   = compBlock.w  + compBlock.h  + compBlock.d;
1204     const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1205     const INT_32 maxSizeLog2    = Max(compSizeLog2, blk256SizeLog2);
1206     const INT_32 numPipesLog2   = GetEffectiveNumPipes();
1207     INT_32       overlap        = numPipesLog2 - maxSizeLog2;
1208 
1209     if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1210     {
1211         overlap++;
1212     }
1213 
1214     // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1215     if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1216     {
1217         overlap--;
1218     }
1219     overlap = Max(overlap, 0);
1220     return overlap;
1221 }
1222 
1223 /**
1224 ************************************************************************************************************************
1225 *   Gfx10Lib::Get3DMetaOverlapLog2
1226 *
1227 *   @brief
1228 *       Get 3d meta block overlap
1229 *
1230 *   @return
1231 *       N/A
1232 ************************************************************************************************************************
1233 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const1234 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1235     AddrResourceType resourceType,      ///< [in] Resource type
1236     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1237     UINT_32          elemLog2           ///< [in] element size log2
1238     ) const
1239 {
1240     Dim3d microBlock;
1241     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1242 
1243     INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1244 
1245     if (m_settings.supportRbPlus)
1246     {
1247         overlap++;
1248     }
1249 
1250     if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1251     {
1252         overlap = 0;
1253     }
1254     return overlap;
1255 }
1256 
1257 /**
1258 ************************************************************************************************************************
1259 *   Gfx10Lib::GetPipeRotateAmount
1260 *
1261 *   @brief
1262 *       Get pipe rotate amount
1263 *
1264 *   @return
1265 *       Pipe rotate amount
1266 ************************************************************************************************************************
1267 */
1268 
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const1269 INT_32 Gfx10Lib::GetPipeRotateAmount(
1270     AddrResourceType resourceType,      ///< [in] Resource type
1271     AddrSwizzleMode  swizzleMode        ///< [in] Swizzle mode
1272     ) const
1273 {
1274     INT_32 amount = 0;
1275 
1276     if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1277     {
1278         amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1279                  1 : m_pipesLog2 - (m_numSaLog2 + 1);
1280     }
1281 
1282     return amount;
1283 }
1284 
1285 /**
1286 ************************************************************************************************************************
1287 *   Gfx10Lib::GetMetaBlkSize
1288 *
1289 *   @brief
1290 *       Get metadata block size
1291 *
1292 *   @return
1293 *       Meta block size
1294 ************************************************************************************************************************
1295 */
GetMetaBlkSize(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const1296 UINT_32 Gfx10Lib::GetMetaBlkSize(
1297     Gfx10DataType    dataType,          ///< [in] Data type
1298     AddrResourceType resourceType,      ///< [in] Resource type
1299     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1300     UINT_32          elemLog2,          ///< [in] element size log2
1301     UINT_32          numSamplesLog2,    ///< [in] number of samples
1302     BOOL_32          pipeAlign,         ///< [in] pipe align
1303     Dim3d*           pBlock             ///< [out] block size
1304     ) const
1305 {
1306     INT_32 metablkSizeLog2;
1307 
1308     {
1309         const INT_32 metaElemSizeLog2   = GetMetaElementSizeLog2(dataType);
1310         const INT_32 metaCacheSizeLog2  = GetMetaCacheSizeLog2(dataType);
1311         const INT_32 compBlkSizeLog2    = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1312         const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1313                                           numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1314         const INT_32 dataBlkSizeLog2    = GetBlockSizeLog2(swizzleMode);
1315         INT_32       numPipesLog2       = m_pipesLog2;
1316 
1317         if (IsThin(resourceType, swizzleMode))
1318         {
1319             if ((pipeAlign == FALSE) ||
1320                 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1321                 (IsDisplaySwizzle(resourceType, swizzleMode)  == TRUE))
1322             {
1323                 if (pipeAlign)
1324                 {
1325                     metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1326                     metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1327                 }
1328                 else
1329                 {
1330                     metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1331                 }
1332             }
1333             else
1334             {
1335                 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1336                 {
1337                     numPipesLog2++;
1338                 }
1339 
1340                 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1341 
1342                 if (numPipesLog2 >= 4)
1343                 {
1344                     INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1345 
1346                     // In 16Bpe 8xaa, we have an extra overlap bit
1347                     if ((pipeRotateLog2 > 0)  &&
1348                         (elemLog2 == 4)       &&
1349                         (numSamplesLog2 == 3) &&
1350                         (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1351                     {
1352                         overlapLog2++;
1353                     }
1354 
1355                     metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1356                     metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1357 
1358                     if (m_settings.supportRbPlus    &&
1359                         IsRtOptSwizzle(swizzleMode) &&
1360                         (numPipesLog2 == 6)         &&
1361                         (numSamplesLog2 == 3)       &&
1362                         (m_maxCompFragLog2 == 3)    &&
1363                         (metablkSizeLog2 < 15))
1364                     {
1365                         metablkSizeLog2 = 15;
1366                     }
1367                 }
1368                 else
1369                 {
1370                     metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1371                 }
1372 
1373                 if (dataType == Gfx10DataDepthStencil)
1374                 {
1375                     // For htile surfaces, pad meta block size to 2K * num_pipes
1376                     metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1377                 }
1378 
1379                 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1380 
1381                 if  (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1382                 {
1383                     const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1384 
1385                     metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1386                 }
1387             }
1388 
1389             const INT_32 metablkBitsLog2 =
1390                 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1391             pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1392             pBlock->h = 1 << (metablkBitsLog2 >> 1);
1393             pBlock->d = 1;
1394         }
1395         else
1396         {
1397             ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1398 
1399             if (pipeAlign)
1400             {
1401                 if (m_settings.supportRbPlus         &&
1402                     (m_pipesLog2 == m_numSaLog2 + 1) &&
1403                     (m_pipesLog2 > 1)                &&
1404                     IsRbAligned(resourceType, swizzleMode))
1405                 {
1406                     numPipesLog2++;
1407                 }
1408 
1409                 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1410 
1411                 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1412                 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1413                 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1414             }
1415             else
1416             {
1417                 metablkSizeLog2 = 12;
1418             }
1419 
1420             const INT_32 metablkBitsLog2 =
1421                 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1422             pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1423             pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1424             pBlock->d = 1 << (metablkBitsLog2 / 3);
1425         }
1426     }
1427 
1428     return (1 << static_cast<UINT_32>(metablkSizeLog2));
1429 }
1430 
1431 /**
1432 ************************************************************************************************************************
1433 *   Gfx10Lib::ConvertSwizzlePatternToEquation
1434 *
1435 *   @brief
1436 *       Convert swizzle pattern to equation.
1437 *
1438 *   @return
1439 *       N/A
1440 ************************************************************************************************************************
1441 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1442 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1443     UINT_32                elemLog2,  ///< [in] element bytes log2
1444     AddrResourceType       rsrcType,  ///< [in] resource type
1445     AddrSwizzleMode        swMode,    ///< [in] swizzle mode
1446     const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern infor
1447     ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
1448     const
1449 {
1450     // Get full swizzle pattern and store it as an ADDR_BIT_SETTING list
1451     ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
1452     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1453 
1454     const ADDR_BIT_SETTING* pSwizzle      = fullSwizzlePattern;
1455     const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode);
1456     memset(pEquation, 0, sizeof(ADDR_EQUATION));
1457     pEquation->numBits            = blockSizeLog2;
1458     pEquation->numBitComponents   = pPatInfo->maxItemCount;
1459     pEquation->stackedDepthSlices = FALSE;
1460 
1461     for (UINT_32 i = 0; i < elemLog2; i++)
1462     {
1463         pEquation->addr[i].channel = 0;
1464         pEquation->addr[i].valid   = 1;
1465         pEquation->addr[i].index   = i;
1466     }
1467 
1468     if (IsXor(swMode) == FALSE)
1469     {
1470         // Use simplified logic when we only have one bit-component
1471         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1472         {
1473             ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1474 
1475             if (pSwizzle[i].x != 0)
1476             {
1477                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1478 
1479                 pEquation->addr[i].channel = 0;
1480                 pEquation->addr[i].valid   = 1;
1481                 pEquation->addr[i].index   = Log2(pSwizzle[i].x) + elemLog2;
1482             }
1483             else if (pSwizzle[i].y != 0)
1484             {
1485                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1486 
1487                 pEquation->addr[i].channel = 1;
1488                 pEquation->addr[i].valid   = 1;
1489                 pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1490             }
1491             else
1492             {
1493                 ADDR_ASSERT(pSwizzle[i].z != 0);
1494                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1495 
1496                 pEquation->addr[i].channel = 2;
1497                 pEquation->addr[i].valid   = 1;
1498                 pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1499             }
1500         }
1501     }
1502     else
1503     {
1504         Dim3d dim;
1505         ComputeBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, rsrcType, swMode);
1506 
1507         const UINT_32 blkXLog2 = Log2(dim.w);
1508         const UINT_32 blkYLog2 = Log2(dim.h);
1509         const UINT_32 blkZLog2 = Log2(dim.d);
1510         const UINT_32 blkXMask = dim.w - 1;
1511         const UINT_32 blkYMask = dim.h - 1;
1512         const UINT_32 blkZMask = dim.d - 1;
1513 
1514         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1515         memcpy(&swizzle, pSwizzle, sizeof(swizzle));
1516         UINT_32          xMask = 0;
1517         UINT_32          yMask = 0;
1518         UINT_32          zMask = 0;
1519 
1520         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1521         {
1522             for (UINT_32 bitComp = 0; bitComp < ADDR_MAX_EQUATION_COMP; bitComp++)
1523             {
1524                 if (swizzle[i].value == 0)
1525                 {
1526                     ADDR_ASSERT(bitComp != 0); // Bits above element size must have at least one addr-bit
1527                     ADDR_ASSERT(bitComp <= pPatInfo->maxItemCount);
1528                     break;
1529                 }
1530 
1531                 if (swizzle[i].x != 0)
1532                 {
1533                     const UINT_32 xLog2 = BitScanForward(swizzle[i].x);
1534                     swizzle[i].x = UnsetLeastBit(swizzle[i].x);
1535                     xMask |= (1 << xLog2);
1536 
1537                     pEquation->comps[bitComp][i].channel = 0;
1538                     pEquation->comps[bitComp][i].valid   = 1;
1539                     pEquation->comps[bitComp][i].index   = xLog2 + elemLog2;
1540                 }
1541                 else if (swizzle[i].y != 0)
1542                 {
1543                     const UINT_32 yLog2 = BitScanForward(swizzle[i].y);
1544                     swizzle[i].y = UnsetLeastBit(swizzle[i].y);
1545                     yMask |= (1 << yLog2);
1546 
1547                     pEquation->comps[bitComp][i].channel = 1;
1548                     pEquation->comps[bitComp][i].valid   = 1;
1549                     pEquation->comps[bitComp][i].index   = yLog2;
1550                 }
1551                 else if (swizzle[i].z != 0)
1552                 {
1553                     const UINT_32 zLog2 = BitScanForward(swizzle[i].z);
1554                     swizzle[i].z = UnsetLeastBit(swizzle[i].z);
1555                     zMask |= (1 << zLog2);
1556 
1557                     pEquation->comps[bitComp][i].channel = 2;
1558                     pEquation->comps[bitComp][i].valid   = 1;
1559                     pEquation->comps[bitComp][i].index   = zLog2;
1560                 }
1561                 else
1562                 {
1563                     // This function doesn't handle MSAA (must update block dims, here, and consumers)
1564                     ADDR_ASSERT_ALWAYS();
1565                 }
1566             }
1567             ADDR_ASSERT(swizzle[i].value == 0); // We missed an xor? Are there too many?
1568         }
1569 
1570         // We missed an address bit for coords inside the block?
1571         // That means two coords will land on the same addr, which is bad.
1572         ADDR_ASSERT(((xMask & blkXMask) == blkXMask) &&
1573                     ((yMask & blkYMask) == blkYMask) &&
1574                     ((zMask & blkZMask) == blkZMask));
1575         // We're sourcing from outside our block? That won't fly for PRTs, which need to be movable.
1576         // Non-xor modes can also be used for 2D PRTs but they're handled in the simplified logic above.
1577         ADDR_ASSERT((IsPrt(swMode) == false) ||
1578                     ((xMask == blkXMask) &&
1579                      (yMask == blkYMask) &&
1580                      (zMask == blkZMask)));
1581     }
1582 }
1583 
1584 /**
1585 ************************************************************************************************************************
1586 *   Gfx10Lib::InitEquationTable
1587 *
1588 *   @brief
1589 *       Initialize Equation table.
1590 *
1591 *   @return
1592 *       N/A
1593 ************************************************************************************************************************
1594 */
InitEquationTable()1595 VOID Gfx10Lib::InitEquationTable()
1596 {
1597     memset(m_equationTable, 0, sizeof(m_equationTable));
1598 
1599     // Iterate through resourceTypes, up to MaxRsrcType where a "resourceType" refers to AddrResourceType (1D/2D/3D)
1600     // resources. This starts with rsrcTypeIdx = 0, however there is an offset added that will start us off at
1601     // computing 2D resources.
1602     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1603     {
1604         // Add offset. Start iterating from ADDR_RSRC_TEX_2D
1605         const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1606 
1607         // Iterate through the maximum number of swizzlemodes a type can hold
1608         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1609         {
1610             const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1611 
1612             // Iterate through the different bits-per-pixel settings (8bpp/16bpp/32bpp/64bpp/128bpp)
1613             for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1614             {
1615                 UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
1616                 // May or may not return a ADDR_SW_PATINFO for a completely different swizzle mode, essentially
1617                 // overwriting the choice.
1618                 const ADDR_SW_PATINFO* pPatInfo      = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1619 
1620                 if (pPatInfo != NULL)
1621                 {
1622                     ADDR_ASSERT(IsValidSwMode(swMode));
1623                     ADDR_EQUATION equation = {};
1624 
1625                     // Passing in pPatInfo to get the addr equation
1626                     ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1627 
1628                     equationIndex = m_numEquations;
1629                     ADDR_ASSERT(equationIndex < EquationTableSize);
1630                     // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
1631                     m_equationTable[equationIndex] = equation;
1632                     // Increment m_numEquations
1633                     m_numEquations++;
1634                 }
1635                 // equationIndex, which is used to look up equations in m_equationTable, will be cached for every
1636                 // iteration in this nested for-loop
1637                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1638             }
1639         }
1640     }
1641 }
1642 
1643 /**
1644 ************************************************************************************************************************
1645 *   Gfx10Lib::HwlGetEquationIndex
1646 *
1647 *   @brief
1648 *       Interface function stub of GetEquationIndex
1649 *
1650 *   @return
1651 *       ADDR_E_RETURNCODE
1652 ************************************************************************************************************************
1653 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const1654 UINT_32 Gfx10Lib::HwlGetEquationIndex(
1655     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
1656     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
1657     ) const
1658 {
1659     UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1660 
1661     if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1662         (pIn->resourceType == ADDR_RSRC_TEX_3D))
1663     {
1664         const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1665         const UINT_32 swModeIdx   = static_cast<UINT_32>(pIn->swizzleMode);
1666         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
1667 
1668         equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1669     }
1670 
1671     if (pOut->pMipInfo != NULL)
1672     {
1673         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1674         {
1675             pOut->pMipInfo[i].equationIndex = equationIdx;
1676         }
1677     }
1678 
1679     return equationIdx;
1680 }
1681 
1682 /**
1683 ************************************************************************************************************************
1684 *   Gfx10Lib::GetValidDisplaySwizzleModes
1685 *
1686 *   @brief
1687 *       Get valid swizzle modes mask for displayable surface
1688 *
1689 *   @return
1690 *       Valid swizzle modes mask for displayable surface
1691 ************************************************************************************************************************
1692 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const1693 UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
1694     UINT_32 bpp
1695     ) const
1696 {
1697     UINT_32 swModeMask = 0;
1698 
1699     if (bpp <= 64)
1700     {
1701         if (m_settings.isDcn20)
1702         {
1703             swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
1704         }
1705         else
1706         {
1707             swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
1708         }
1709     }
1710 
1711     return swModeMask;
1712 }
1713 
1714 /**
1715 ************************************************************************************************************************
1716 *   Gfx10Lib::IsValidDisplaySwizzleMode
1717 *
1718 *   @brief
1719 *       Check if a swizzle mode is supported by display engine
1720 *
1721 *   @return
1722 *       TRUE is swizzle mode is supported by display engine
1723 ************************************************************************************************************************
1724 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const1725 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
1726     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
1727     ) const
1728 {
1729     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
1730 
1731     return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
1732 }
1733 
1734 /**
1735 ************************************************************************************************************************
1736 *   Gfx10Lib::GetMaxNumMipsInTail
1737 *
1738 *   @brief
1739 *       Return max number of mips in tails
1740 *
1741 *   @return
1742 *       Max number of mips in tails
1743 ************************************************************************************************************************
1744 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const1745 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
1746     UINT_32 blockSizeLog2,     ///< block size log2
1747     BOOL_32 isThin             ///< is thin or thick
1748     ) const
1749 {
1750     UINT_32 effectiveLog2 = blockSizeLog2;
1751 
1752     if (isThin == FALSE)
1753     {
1754         effectiveLog2 -= (blockSizeLog2 - 8) / 3;
1755     }
1756 
1757     return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
1758 }
1759 
1760 /**
1761 ************************************************************************************************************************
1762 *   Gfx10Lib::HwlComputePipeBankXor
1763 *
1764 *   @brief
1765 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
1766 *
1767 *   @return
1768 *       PipeBankXor value
1769 ************************************************************************************************************************
1770 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const1771 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
1772     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
1773     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
1774     ) const
1775 {
1776     if (IsNonPrtXor(pIn->swizzleMode))
1777     {
1778         const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
1779 
1780         // No pipe xor...
1781         const UINT_32 pipeXor = 0;
1782         UINT_32       bankXor = 0;
1783 
1784         const UINT_32         XorPatternLen = 8;
1785         static const UINT_32  XorBankRot1b[XorPatternLen] = {0,  1,  0,  1,  0,  1,  0,  1};
1786         static const UINT_32  XorBankRot2b[XorPatternLen] = {0,  2,  1,  3,  2,  0,  3,  1};
1787         static const UINT_32  XorBankRot3b[XorPatternLen] = {0,  4,  2,  6,  1,  5,  3,  7};
1788         static const UINT_32  XorBankRot4b[XorPatternLen] = {0,  8,  4, 12,  2, 10,  6, 14};
1789         static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
1790 
1791         switch (bankBits)
1792         {
1793             case 1:
1794             case 2:
1795             case 3:
1796             case 4:
1797                 bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
1798                 break;
1799             default:
1800                 // valid bank bits should be 0~4
1801                 ADDR_ASSERT_ALWAYS();
1802             case 0:
1803                 break;
1804         }
1805 
1806         pOut->pipeBankXor = bankXor | pipeXor;
1807     }
1808     else
1809     {
1810         pOut->pipeBankXor = 0;
1811     }
1812 
1813     return ADDR_OK;
1814 }
1815 
1816 /**
1817 ************************************************************************************************************************
1818 *   Gfx10Lib::HwlComputeSlicePipeBankXor
1819 *
1820 *   @brief
1821 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
1822 *
1823 *   @return
1824 *       PipeBankXor value
1825 ************************************************************************************************************************
1826 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const1827 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
1828     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
1829     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
1830     ) const
1831 {
1832     if (IsNonPrtXor(pIn->swizzleMode))
1833     {
1834         const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
1835         const UINT_32 pipeBits  = GetPipeXorBits(blockBits);
1836         const UINT_32 pipeXor   = ReverseBitVector(pIn->slice, pipeBits);
1837 
1838         pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
1839 
1840         if (pIn->bpe != 0)
1841         {
1842             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
1843                                                                     pIn->resourceType,
1844                                                                     Log2(pIn->bpe >> 3),
1845                                                                     1);
1846 
1847             if (pPatInfo != NULL)
1848             {
1849                 ADDR_BIT_SETTING fullSwizzlePattern[20];
1850                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1851 
1852                 const UINT_32 pipeBankXorOffset =
1853                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
1854                                                     blockBits,
1855                                                     0,
1856                                                     0,
1857                                                     pIn->slice,
1858                                                     0);
1859 
1860                 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
1861 
1862                 // Should have no bit set under pipe interleave
1863                 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
1864 
1865                 // This assertion firing means old approach doesn't calculate a correct sliceXor value...
1866                 ADDR_ASSERT(pipeBankXor == pipeXor);
1867 
1868                 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
1869             }
1870         }
1871     }
1872     else
1873     {
1874         pOut->pipeBankXor = 0;
1875     }
1876 
1877     return ADDR_OK;
1878 }
1879 
1880 /**
1881 ************************************************************************************************************************
1882 *   Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
1883 *
1884 *   @brief
1885 *       Compute sub resource offset to support swizzle pattern
1886 *
1887 *   @return
1888 *       Offset
1889 ************************************************************************************************************************
1890 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const1891 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
1892     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
1893     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
1894     ) const
1895 {
1896     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
1897 
1898     pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
1899 
1900     return ADDR_OK;
1901 }
1902 
1903 /**
1904 ************************************************************************************************************************
1905 *   Gfx10Lib::HwlComputeNonBlockCompressedView
1906 *
1907 *   @brief
1908 *       Compute non-block-compressed view for a given mipmap level/slice.
1909 *
1910 *   @return
1911 *       ADDR_E_RETURNCODE
1912 ************************************************************************************************************************
1913 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const1914 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(
1915     const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
1916     ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
1917     ) const
1918 {
1919     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1920 
1921     if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE)
1922     {
1923         // Only thin swizzle mode can have a NonBC view...
1924         returnCode = ADDR_INVALIDPARAMS;
1925     }
1926     else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
1927              ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
1928     {
1929         // Only support BC1~BC7, ASTC, or ETC2 for now...
1930         returnCode = ADDR_NOTSUPPORTED;
1931     }
1932     else
1933     {
1934         UINT_32 bcWidth, bcHeight;
1935         UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
1936 
1937         ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
1938         infoIn.flags        = pIn->flags;
1939         infoIn.swizzleMode  = pIn->swizzleMode;
1940         infoIn.resourceType = pIn->resourceType;
1941         infoIn.bpp          = bpp;
1942         infoIn.width        = RoundUpQuotient(pIn->width, bcWidth);
1943         infoIn.height       = RoundUpQuotient(pIn->height, bcHeight);
1944         infoIn.numSlices    = pIn->numSlices;
1945         infoIn.numMipLevels = pIn->numMipLevels;
1946         infoIn.numSamples   = 1;
1947         infoIn.numFrags     = 1;
1948 
1949         ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
1950         ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
1951 
1952         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
1953         infoOut.pMipInfo = mipInfo;
1954 
1955         const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
1956 
1957         if (tiled)
1958         {
1959             returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
1960         }
1961         else
1962         {
1963             returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
1964         }
1965 
1966         if (returnCode == ADDR_OK)
1967         {
1968             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
1969             subOffIn.swizzleMode      = infoIn.swizzleMode;
1970             subOffIn.resourceType     = infoIn.resourceType;
1971             subOffIn.slice            = pIn->slice;
1972             subOffIn.sliceSize        = infoOut.sliceSize;
1973             subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
1974             subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;
1975 
1976             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
1977 
1978             // For any mipmap level, move nonBc view base address by offset
1979             HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
1980             pOut->offset = subOffOut.offset;
1981 
1982             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
1983             slicePbXorIn.bpe             = infoIn.bpp;
1984             slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
1985             slicePbXorIn.resourceType    = infoIn.resourceType;
1986             slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
1987             slicePbXorIn.slice           = pIn->slice;
1988 
1989             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
1990 
1991             // For any mipmap level, nonBc view should use computed pbXor
1992             HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
1993             pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
1994 
1995             const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
1996             const UINT_32 requestMipWidth  = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth);
1997             const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight);
1998 
1999             if (inTail)
2000             {
2001                 // For mipmap level that is in mip tail block, hack a lot of things...
2002                 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2003                 // are fit in tail block:
2004 
2005                 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2006                 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2007 
2008                 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2009                 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2010 
2011                 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2012                 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2013 
2014                 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2015                 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2016             }
2017             // This check should cover at least mipId == 0
2018             else if (requestMipWidth << pIn->mipId == infoIn.width)
2019             {
2020                 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2021                 // - only one mipmap level and mipId = 0
2022                 pOut->mipId        = 0;
2023                 pOut->numMipLevels = 1;
2024 
2025                 // (mip0) width = requestMipWidth
2026                 pOut->unalignedWidth = requestMipWidth;
2027 
2028                 // (mip0) height = requestMipHeight
2029                 pOut->unalignedHeight = requestMipHeight;
2030             }
2031             else
2032             {
2033                 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2034                 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2035                 // because single mip view may have different pitch value than original (multiple) mip view...
2036                 // A simple case would be:
2037                 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2038                 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2039                 //   mip0 width = 0x101/mip1 width = 0x80
2040                 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2041                 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2042 
2043                 // - 2 levels and mipId = 1
2044                 pOut->mipId        = 1;
2045                 pOut->numMipLevels = 2;
2046 
2047                 const UINT_32 upperMipWidth  = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth);
2048                 const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight);
2049 
2050                 const BOOL_32 needToAvoidInTail =
2051                     tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2052                     TRUE : FALSE;
2053 
2054                 const UINT_32 hwMipWidth  = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2055                 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2056 
2057                 const BOOL_32 needExtraWidth =
2058                     ((upperMipWidth < requestMipWidth * 2) ||
2059                      ((upperMipWidth == requestMipWidth * 2) &&
2060                       ((needToAvoidInTail == TRUE) ||
2061                        (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2062 
2063                 const BOOL_32 needExtraHeight =
2064                     ((upperMipHeight < requestMipHeight * 2) ||
2065                      ((upperMipHeight == requestMipHeight * 2) &&
2066                       ((needToAvoidInTail == TRUE) ||
2067                        (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2068 
2069                 // (mip0) width = requestLastMipLevelWidth
2070                 pOut->unalignedWidth  = upperMipWidth + (needExtraWidth ? 1: 0);
2071 
2072                 // (mip0) height = requestLastMipLevelHeight
2073                 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2074             }
2075 
2076             // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2077             ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2078             // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2079             ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2080         }
2081     }
2082 
2083     return returnCode;
2084 }
2085 
2086 /**
2087 ************************************************************************************************************************
2088 *   Gfx10Lib::ValidateNonSwModeParams
2089 *
2090 *   @brief
2091 *       Validate compute surface info params except swizzle mode
2092 *
2093 *   @return
2094 *       TRUE if parameters are valid, FALSE otherwise
2095 ************************************************************************************************************************
2096 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2097 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2098     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2099 {
2100     BOOL_32 valid = TRUE;
2101 
2102     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2103     {
2104         ADDR_ASSERT_ALWAYS();
2105         valid = FALSE;
2106     }
2107 
2108     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2109     {
2110         ADDR_ASSERT_ALWAYS();
2111         valid = FALSE;
2112     }
2113 
2114     const ADDR2_SURFACE_FLAGS flags    = pIn->flags;
2115     const AddrResourceType    rsrcType = pIn->resourceType;
2116     const BOOL_32             mipmap   = (pIn->numMipLevels > 1);
2117     const BOOL_32             msaa     = (pIn->numFrags > 1);
2118     const BOOL_32             display  = flags.display;
2119     const BOOL_32             tex3d    = IsTex3d(rsrcType);
2120     const BOOL_32             tex2d    = IsTex2d(rsrcType);
2121     const BOOL_32             tex1d    = IsTex1d(rsrcType);
2122     const BOOL_32             stereo   = flags.qbStereo;
2123 
2124     // Resource type check
2125     if (tex1d)
2126     {
2127         if (msaa || display || stereo)
2128         {
2129             ADDR_ASSERT_ALWAYS();
2130             valid = FALSE;
2131         }
2132     }
2133     else if (tex2d)
2134     {
2135         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2136         {
2137             ADDR_ASSERT_ALWAYS();
2138             valid = FALSE;
2139         }
2140     }
2141     else if (tex3d)
2142     {
2143         if (msaa || display || stereo)
2144         {
2145             ADDR_ASSERT_ALWAYS();
2146             valid = FALSE;
2147         }
2148     }
2149     else
2150     {
2151         ADDR_ASSERT_ALWAYS();
2152         valid = FALSE;
2153     }
2154 
2155     return valid;
2156 }
2157 
2158 /**
2159 ************************************************************************************************************************
2160 *   Gfx10Lib::ValidateSwModeParams
2161 *
2162 *   @brief
2163 *       Validate compute surface info related to swizzle mode
2164 *
2165 *   @return
2166 *       TRUE if parameters are valid, FALSE otherwise
2167 ************************************************************************************************************************
2168 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2169 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2170     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2171 {
2172     BOOL_32 valid = TRUE;
2173 
2174     if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2175     {
2176         ADDR_ASSERT_ALWAYS();
2177         valid = FALSE;
2178     }
2179     else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2180     {
2181         {
2182             ADDR_ASSERT_ALWAYS();
2183             valid = FALSE;
2184         }
2185     }
2186 
2187     const ADDR2_SURFACE_FLAGS flags       = pIn->flags;
2188     const AddrResourceType    rsrcType    = pIn->resourceType;
2189     const AddrSwizzleMode     swizzle     = pIn->swizzleMode;
2190     const BOOL_32             msaa        = (pIn->numFrags > 1);
2191     const BOOL_32             zbuffer     = flags.depth || flags.stencil;
2192     const BOOL_32             color       = flags.color;
2193     const BOOL_32             display     = flags.display;
2194     const BOOL_32             tex3d       = IsTex3d(rsrcType);
2195     const BOOL_32             tex2d       = IsTex2d(rsrcType);
2196     const BOOL_32             tex1d       = IsTex1d(rsrcType);
2197     const BOOL_32             thin3d      = flags.view3dAs2dArray;
2198     const BOOL_32             linear      = IsLinear(swizzle);
2199     const BOOL_32             blk256B     = IsBlock256b(swizzle);
2200     const BOOL_32             blkVar      = IsBlockVariable(swizzle);
2201     const BOOL_32             isNonPrtXor = IsNonPrtXor(swizzle);
2202     const BOOL_32             prt         = flags.prt;
2203     const BOOL_32             fmask       = flags.fmask;
2204 
2205     // Misc check
2206     if ((pIn->numFrags > 1) &&
2207         (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2208     {
2209         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2210         ADDR_ASSERT_ALWAYS();
2211         valid = FALSE;
2212     }
2213 
2214     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2215     {
2216         ADDR_ASSERT_ALWAYS();
2217         valid = FALSE;
2218     }
2219 
2220     if ((pIn->bpp == 96) && (linear == FALSE))
2221     {
2222         ADDR_ASSERT_ALWAYS();
2223         valid = FALSE;
2224     }
2225 
2226     const UINT_32 swizzleMask = 1 << swizzle;
2227 
2228     // Resource type check
2229     if (tex1d)
2230     {
2231         if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2232         {
2233             ADDR_ASSERT_ALWAYS();
2234             valid = FALSE;
2235         }
2236     }
2237     else if (tex2d)
2238     {
2239         if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2240         {
2241             {
2242                 ADDR_ASSERT_ALWAYS();
2243                 valid = FALSE;
2244             }
2245         }
2246         else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2247                  (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2248         {
2249             ADDR_ASSERT_ALWAYS();
2250             valid = FALSE;
2251         }
2252     }
2253     else if (tex3d)
2254     {
2255         if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2256             (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2257             (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2258         {
2259             ADDR_ASSERT_ALWAYS();
2260             valid = FALSE;
2261         }
2262     }
2263 
2264     // Swizzle type check
2265     if (linear)
2266     {
2267         if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2268         {
2269             ADDR_ASSERT_ALWAYS();
2270             valid = FALSE;
2271         }
2272     }
2273     else if (IsZOrderSwizzle(swizzle))
2274     {
2275         if ((pIn->bpp > 64)                         ||
2276             (msaa && (color || (pIn->bpp > 32)))    ||
2277             ElemLib::IsBlockCompressed(pIn->format) ||
2278             ElemLib::IsMacroPixelPacked(pIn->format))
2279         {
2280             ADDR_ASSERT_ALWAYS();
2281             valid = FALSE;
2282         }
2283     }
2284     else if (IsStandardSwizzle(rsrcType, swizzle))
2285     {
2286         if (zbuffer || msaa)
2287         {
2288             ADDR_ASSERT_ALWAYS();
2289             valid = FALSE;
2290         }
2291     }
2292     else if (IsDisplaySwizzle(rsrcType, swizzle))
2293     {
2294         if (zbuffer || msaa)
2295         {
2296             ADDR_ASSERT_ALWAYS();
2297             valid = FALSE;
2298         }
2299     }
2300     else if (IsRtOptSwizzle(swizzle))
2301     {
2302         if (zbuffer)
2303         {
2304             ADDR_ASSERT_ALWAYS();
2305             valid = FALSE;
2306         }
2307     }
2308     else
2309     {
2310         {
2311             ADDR_ASSERT_ALWAYS();
2312             valid = FALSE;
2313         }
2314     }
2315 
2316     // Block type check
2317     if (blk256B)
2318     {
2319         if (zbuffer || tex3d || msaa)
2320         {
2321             ADDR_ASSERT_ALWAYS();
2322             valid = FALSE;
2323         }
2324     }
2325     else if (blkVar)
2326     {
2327         if (m_blockVarSizeLog2 == 0)
2328         {
2329             ADDR_ASSERT_ALWAYS();
2330             valid = FALSE;
2331         }
2332     }
2333 
2334     return valid;
2335 }
2336 
2337 /**
2338 ************************************************************************************************************************
2339 *   Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2340 *
2341 *   @brief
2342 *       Compute surface info sanity check
2343 *
2344 *   @return
2345 *       Offset
2346 ************************************************************************************************************************
2347 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2348 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2349     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2350     ) const
2351 {
2352     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2353 }
2354 
2355 /**
2356 ************************************************************************************************************************
2357 *   Gfx10Lib::HwlGetPreferredSurfaceSetting
2358 *
2359 *   @brief
2360 *       Internal function to get suggested surface information for client to use
2361 *
2362 *   @return
2363 *       ADDR_E_RETURNCODE
2364 ************************************************************************************************************************
2365 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2366 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2367     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
2368     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
2369     ) const
2370 {
2371     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2372 
2373     if (pIn->flags.fmask)
2374     {
2375         const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2376         const BOOL_32 forbidVarBlockType  = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2377 
2378         if (forbid64KbBlockType && forbidVarBlockType)
2379         {
2380             // Invalid combination...
2381             ADDR_ASSERT_ALWAYS();
2382             returnCode = ADDR_INVALIDPARAMS;
2383         }
2384         else
2385         {
2386             pOut->resourceType                   = ADDR_RSRC_TEX_2D;
2387             pOut->validBlockSet.value            = 0;
2388             pOut->validBlockSet.macroThin64KB    = forbid64KbBlockType ? 0 : 1;
2389             pOut->validBlockSet.var              = forbidVarBlockType  ? 0 : 1;
2390             pOut->validSwModeSet.value           = 0;
2391             pOut->validSwModeSet.sw64KB_Z_X      = forbid64KbBlockType ? 0 : 1;
2392             pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType  ? 0 : 1;
2393             pOut->canXor                         = TRUE;
2394             pOut->validSwTypeSet.value           = AddrSwSetZ;
2395             pOut->clientPreferredSwSet           = pOut->validSwTypeSet;
2396 
2397             BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2398 
2399             if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2400             {
2401                 const UINT_8  maxFmaskSwizzleModeType = 2;
2402                 const UINT_32 ratioLow                = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2403                 const UINT_32 ratioHi                 = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2404                 const UINT_32 fmaskBpp                = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2405                 const UINT_32 numSlices               = Max(pIn->numSlices, 1u);
2406                 const UINT_32 width                   = Max(pIn->width, 1u);
2407                 const UINT_32 height                  = Max(pIn->height, 1u);
2408                 const UINT_64 sizeAlignInElement      = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2409 
2410                 AddrSwizzleMode swMode[maxFmaskSwizzleModeType]  = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2411                 Dim3d           blkDim[maxFmaskSwizzleModeType]  = {{}, {}};
2412                 Dim3d           padDim[maxFmaskSwizzleModeType]  = {{}, {}};
2413                 UINT_64         padSize[maxFmaskSwizzleModeType] = {};
2414 
2415                 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2416                 {
2417                     ComputeBlockDimensionForSurf(&blkDim[i].w,
2418                                                  &blkDim[i].h,
2419                                                  &blkDim[i].d,
2420                                                  fmaskBpp,
2421                                                  1,
2422                                                  pOut->resourceType,
2423                                                  swMode[i]);
2424 
2425                     padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2426                     padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2427                 }
2428 
2429                 if (Addr2BlockTypeWithinMemoryBudget(padSize[0],
2430                                                 padSize[1],
2431                                                 ratioLow,
2432                                                 ratioHi,
2433                                                 pIn->memoryBudget,
2434                                                 GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))
2435                 {
2436                     use64KbBlockType = FALSE;
2437                 }
2438             }
2439             else if (forbidVarBlockType)
2440             {
2441                 use64KbBlockType = TRUE;
2442             }
2443 
2444             if (use64KbBlockType)
2445             {
2446                 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2447             }
2448             else
2449             {
2450                 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2451             }
2452         }
2453     }
2454     else
2455     {
2456         UINT_32 bpp    = pIn->bpp;
2457         UINT_32 width  = Max(pIn->width, 1u);
2458         UINT_32 height = Max(pIn->height, 1u);
2459 
2460         // Set format to INVALID will skip this conversion
2461         if (pIn->format != ADDR_FMT_INVALID)
2462         {
2463             ElemMode elemMode = ADDR_UNCOMPRESSED;
2464             UINT_32 expandX, expandY;
2465 
2466             // Get compression/expansion factors and element mode which indicates compression/expansion
2467             bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2468                                                 &elemMode,
2469                                                 &expandX,
2470                                                 &expandY);
2471 
2472             UINT_32 basePitch = 0;
2473             GetElemLib()->AdjustSurfaceInfo(elemMode,
2474                                             expandX,
2475                                             expandY,
2476                                             &bpp,
2477                                             &basePitch,
2478                                             &width,
2479                                             &height);
2480         }
2481 
2482         const UINT_32 numSlices    = Max(pIn->numSlices,    1u);
2483         const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2484         const UINT_32 numSamples   = Max(pIn->numSamples,   1u);
2485         const UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2486         const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
2487 
2488         // Pre sanity check on non swizzle mode parameters
2489         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2490         localIn.flags        = pIn->flags;
2491         localIn.resourceType = pIn->resourceType;
2492         localIn.format       = pIn->format;
2493         localIn.bpp          = bpp;
2494         localIn.width        = width;
2495         localIn.height       = height;
2496         localIn.numSlices    = numSlices;
2497         localIn.numMipLevels = numMipLevels;
2498         localIn.numSamples   = numSamples;
2499         localIn.numFrags     = numFrags;
2500 
2501         if (ValidateNonSwModeParams(&localIn))
2502         {
2503             // Forbid swizzle mode(s) by client setting
2504             ADDR2_SWMODE_SET allowedSwModeSet = {};
2505             allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2506             allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx10Blk256BSwModeMask;
2507             allowedSwModeSet.value |=
2508                 pIn->forbiddenBlock.macroThin4KB ? 0 :
2509                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2510             allowedSwModeSet.value |=
2511                 pIn->forbiddenBlock.macroThick4KB ? 0 :
2512                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2513             allowedSwModeSet.value |=
2514                 pIn->forbiddenBlock.macroThin64KB ? 0 :
2515                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2516             allowedSwModeSet.value |=
2517                 pIn->forbiddenBlock.macroThick64KB ? 0 :
2518                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2519             allowedSwModeSet.value |=
2520                 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2521 
2522             if (pIn->preferredSwSet.value != 0)
2523             {
2524                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2525                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2526                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2527                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2528             }
2529 
2530             if (pIn->noXor)
2531             {
2532                 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2533             }
2534 
2535             if (pIn->maxAlign > 0)
2536             {
2537                 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2538                 {
2539                     allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2540                 }
2541 
2542                 if (pIn->maxAlign < Size64K)
2543                 {
2544                     allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2545                 }
2546 
2547                 if (pIn->maxAlign < Size4K)
2548                 {
2549                     allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2550                 }
2551 
2552                 if (pIn->maxAlign < Size256)
2553                 {
2554                     allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2555                 }
2556             }
2557 
2558             // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2559             switch (pIn->resourceType)
2560             {
2561                 case ADDR_RSRC_TEX_1D:
2562                     allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2563                     break;
2564 
2565                 case ADDR_RSRC_TEX_2D:
2566                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2567                     break;
2568 
2569                 case ADDR_RSRC_TEX_3D:
2570                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2571 
2572                     if (pIn->flags.view3dAs2dArray)
2573                     {
2574                         allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2575                     }
2576                     break;
2577 
2578                 default:
2579                     ADDR_ASSERT_ALWAYS();
2580                     allowedSwModeSet.value = 0;
2581                     break;
2582             }
2583 
2584             if (ElemLib::IsBlockCompressed(pIn->format)  ||
2585                 ElemLib::IsMacroPixelPacked(pIn->format) ||
2586                 (bpp > 64)                               ||
2587                 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2588             {
2589                 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2590             }
2591 
2592             if (pIn->format == ADDR_FMT_32_32_32)
2593             {
2594                 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2595             }
2596 
2597             if (msaa)
2598             {
2599                 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2600             }
2601 
2602             if (pIn->flags.depth || pIn->flags.stencil)
2603             {
2604                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2605             }
2606 
2607             if (pIn->flags.display)
2608             {
2609                 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
2610             }
2611 
2612             if (allowedSwModeSet.value != 0)
2613             {
2614 #if DEBUG
2615                 // Post sanity check, at least AddrLib should accept the output generated by its own
2616                 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2617 
2618                 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2619                 {
2620                     if (validateSwModeSet & 1)
2621                     {
2622                         localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2623                         ADDR_ASSERT(ValidateSwModeParams(&localIn));
2624                     }
2625 
2626                     validateSwModeSet >>= 1;
2627                 }
2628 #endif
2629 
2630                 pOut->resourceType   = pIn->resourceType;
2631                 pOut->validSwModeSet = allowedSwModeSet;
2632                 pOut->canXor         = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
2633                 pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2634                 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
2635 
2636                 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2637 
2638                 if (pOut->clientPreferredSwSet.value == 0)
2639                 {
2640                     pOut->clientPreferredSwSet.value = AddrSwSetAll;
2641                 }
2642 
2643                 // Apply optional restrictions
2644                 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
2645                 {
2646                     if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
2647                     {
2648                         // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
2649                         // the GL2 in VAR mode, so it should be avoided.
2650                         allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2651                     }
2652                     else
2653                     {
2654                         // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
2655                         // But we have to suffer from low performance because there is no other choice...
2656                         ADDR_ASSERT_ALWAYS();
2657                     }
2658                 }
2659 
2660                 if (pIn->flags.needEquation)
2661                 {
2662                     UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
2663                                                                         ADDR_MAX_LEGACY_EQUATION_COMP;
2664                     FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
2665                 }
2666 
2667                 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
2668                 {
2669                     pOut->swizzleMode = ADDR_SW_LINEAR;
2670                 }
2671                 else
2672                 {
2673                     const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
2674 
2675                     if ((height > 1) && (computeMinSize == FALSE))
2676                     {
2677                         // Always ignore linear swizzle mode if:
2678                         // 1. This is a (2D/3D) resource with height > 1
2679                         // 2. Client doesn't require computing minimize size
2680                         allowedSwModeSet.swLinear = 0;
2681                     }
2682 
2683                     // A bitfield where each bit represents a block type. Each swizzle mode maps to a block.
2684                     ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2685 
2686                     // Determine block size if there are 2 or more block type candidates
2687                     if (IsPow2(allowedBlockSet.value) == FALSE)
2688                     {
2689                         // Tracks a valid SwizzleMode for each valid block type
2690                         AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
2691 
2692                         swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
2693 
2694                         if (m_blockVarSizeLog2 != 0)
2695                         {
2696                             swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
2697                         }
2698 
2699                         if (pOut->resourceType == ADDR_RSRC_TEX_3D)
2700                         {
2701                             swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
2702                             swMode[AddrBlockThin64KB]  = ADDR_SW_64KB_R_X;
2703                             swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
2704                         }
2705                         else
2706                         {
2707                             swMode[AddrBlockMicro]    = ADDR_SW_256B_S;
2708                             swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_S;
2709                             swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
2710                         }
2711 
2712                         // Tracks the size of each valid swizzle mode's surface in bytes
2713                         UINT_64 padSize[AddrBlockMaxTiledType] = {};
2714 
2715                         const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
2716                         const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
2717                         const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2718                         UINT_32       minSizeBlk         = AddrBlockMicro; // Tracks the most optimal block to use
2719                         UINT_64       minSize            = 0;              // Tracks the minimum acceptable block type
2720 
2721                         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
2722 
2723                         // Iterate through all block types
2724                         for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
2725                         {
2726                             if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
2727                             {
2728                                 localIn.swizzleMode = swMode[i];
2729 
2730                                 if (localIn.swizzleMode == ADDR_SW_LINEAR)
2731                                 {
2732                                     returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
2733                                 }
2734                                 else
2735                                 {
2736                                     returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
2737                                 }
2738 
2739                                 if (returnCode == ADDR_OK)
2740                                 {
2741                                     padSize[i] = localOut.surfSize;
2742 
2743                                     if (minSize == 0)
2744                                     {
2745                                         minSize    = padSize[i];
2746                                         minSizeBlk = i;
2747                                     }
2748                                     else
2749                                     {
2750                                         // Checks if the block type is within the memory budget but favors larger blocks
2751                                         if (Addr2BlockTypeWithinMemoryBudget(
2752                                                 minSize,
2753                                                 padSize[i],
2754                                                 ratioLow,
2755                                                 ratioHi,
2756                                                 0.0,
2757                                                 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))
2758                                         {
2759                                             minSize    = padSize[i];
2760                                             minSizeBlk = i;
2761                                         }
2762                                     }
2763                                 }
2764                                 else
2765                                 {
2766                                     ADDR_ASSERT_ALWAYS();
2767                                     break;
2768                                 }
2769                             }
2770                         }
2771 
2772                         if (pIn->memoryBudget > 1.0)
2773                         {
2774                             // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
2775                             // smaller-block type again in coming loop
2776                             switch (minSizeBlk)
2777                             {
2778                                 case AddrBlockThick64KB:
2779                                     allowedBlockSet.macroThin64KB = 0;
2780                                 case AddrBlockThinVar:
2781                                 case AddrBlockThin64KB:
2782                                     allowedBlockSet.macroThick4KB = 0;
2783                                 case AddrBlockThick4KB:
2784                                     allowedBlockSet.macroThin4KB = 0;
2785                                 case AddrBlockThin4KB:
2786                                     allowedBlockSet.micro  = 0;
2787                                 case AddrBlockMicro:
2788                                     allowedBlockSet.linear = 0;
2789                                 case AddrBlockLinear:
2790                                     break;
2791 
2792                                 default:
2793                                     ADDR_ASSERT_ALWAYS();
2794                                     break;
2795                             }
2796 
2797                             for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2798                             {
2799                                 if ((i != minSizeBlk) &&
2800                                     Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
2801                                 {
2802                                     if (Addr2BlockTypeWithinMemoryBudget(
2803                                             minSize,
2804                                             padSize[i],
2805                                             0,
2806                                             0,
2807                                             pIn->memoryBudget,
2808                                             GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)
2809                                     {
2810                                         // Clear the block type if the memory waste is unacceptable
2811                                         allowedBlockSet.value &= ~(1u << (i - 1));
2812                                     }
2813                                 }
2814                             }
2815 
2816                             // Remove VAR block type if bigger block type is allowed
2817                             if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))
2818                             {
2819                                 if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)
2820                                 {
2821                                     allowedBlockSet.var = 0;
2822                                 }
2823                             }
2824 
2825                             // Remove linear block type if 2 or more block types are allowed
2826                             if (IsPow2(allowedBlockSet.value) == FALSE)
2827                             {
2828                                 allowedBlockSet.linear = 0;
2829                             }
2830 
2831                             // Select the biggest allowed block type
2832                             minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
2833 
2834                             if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
2835                             {
2836                                 minSizeBlk = AddrBlockLinear;
2837                             }
2838                         }
2839 
2840                         switch (minSizeBlk)
2841                         {
2842                             case AddrBlockLinear:
2843                                 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2844                                 break;
2845 
2846                             case AddrBlockMicro:
2847                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2848                                 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
2849                                 break;
2850 
2851                             case AddrBlockThin4KB:
2852                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2853                                 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
2854                                 break;
2855 
2856                             case AddrBlockThick4KB:
2857                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2858                                 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
2859                                 break;
2860 
2861                             case AddrBlockThin64KB:
2862                                 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2863                                                           Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
2864                                 break;
2865 
2866                             case AddrBlockThick64KB:
2867                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2868                                 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
2869                                 break;
2870 
2871                             case AddrBlockThinVar:
2872                                 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
2873                                 break;
2874 
2875                             default:
2876                                 ADDR_ASSERT_ALWAYS();
2877                                 allowedSwModeSet.value = 0;
2878                                 break;
2879                         }
2880                     }
2881 
2882                     // Block type should be determined.
2883                     ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
2884 
2885                     ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
2886 
2887                     // Determine swizzle type if there are 2 or more swizzle type candidates
2888                     if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
2889                     {
2890                         if (ElemLib::IsBlockCompressed(pIn->format))
2891                         {
2892                             if (allowedSwSet.sw_D)
2893                             {
2894                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2895                             }
2896                             else if (allowedSwSet.sw_S)
2897                             {
2898                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2899                             }
2900                             else
2901                             {
2902                                 ADDR_ASSERT(allowedSwSet.sw_R);
2903                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2904                             }
2905                         }
2906                         else if (ElemLib::IsMacroPixelPacked(pIn->format))
2907                         {
2908                             if (allowedSwSet.sw_S)
2909                             {
2910                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2911                             }
2912                             else if (allowedSwSet.sw_D)
2913                             {
2914                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2915                             }
2916                             else
2917                             {
2918                                 ADDR_ASSERT(allowedSwSet.sw_R);
2919                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2920                             }
2921                         }
2922                         else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2923                         {
2924                             if (pIn->flags.color &&
2925                                 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
2926                                 allowedSwSet.sw_D)
2927                             {
2928                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2929                             }
2930                             else if (allowedSwSet.sw_S)
2931                             {
2932                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2933                             }
2934                             else if (allowedSwSet.sw_R)
2935                             {
2936                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2937                             }
2938                             else
2939                             {
2940                                 ADDR_ASSERT(allowedSwSet.sw_Z);
2941                                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2942                             }
2943                         }
2944                         else
2945                         {
2946                             if (allowedSwSet.sw_R)
2947                             {
2948                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2949                             }
2950                             else if (allowedSwSet.sw_D)
2951                             {
2952                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2953                             }
2954                             else if (allowedSwSet.sw_S)
2955                             {
2956                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2957                             }
2958                             else
2959                             {
2960                                 ADDR_ASSERT(allowedSwSet.sw_Z);
2961                                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2962                             }
2963                         }
2964 
2965                         // Swizzle type should be determined.
2966                         ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
2967                     }
2968 
2969                     // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2970                     // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2971                     // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2972                     pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
2973                 }
2974             }
2975             else
2976             {
2977                 // Invalid combination...
2978                 ADDR_ASSERT_ALWAYS();
2979                 returnCode = ADDR_INVALIDPARAMS;
2980             }
2981         }
2982         else
2983         {
2984             // Invalid combination...
2985             ADDR_ASSERT_ALWAYS();
2986             returnCode = ADDR_INVALIDPARAMS;
2987         }
2988     }
2989 
2990     return returnCode;
2991 }
2992 
2993 /**
2994 ************************************************************************************************************************
2995 *   Gfx10Lib::ComputeStereoInfo
2996 *
2997 *   @brief
2998 *       Compute height alignment and right eye pipeBankXor for stereo surface
2999 *
3000 *   @return
3001 *       Error code
3002 *
3003 ************************************************************************************************************************
3004 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const3005 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3006     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
3007     UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
3008     UINT_32*                                pRightXor   ///< Right eye xor
3009     ) const
3010 {
3011     ADDR_E_RETURNCODE ret = ADDR_OK;
3012 
3013     *pRightXor = 0;
3014 
3015     if (IsNonPrtXor(pIn->swizzleMode))
3016     {
3017         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3018         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
3019         const UINT_32 rsrcType    = static_cast<UINT_32>(pIn->resourceType) - 1;
3020         const UINT_32 swMode      = static_cast<UINT_32>(pIn->swizzleMode);
3021         const UINT_32 eqIndex     = m_equationLookupTable[rsrcType][swMode][elemLog2];
3022 
3023         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3024         {
3025             UINT_32 yMax     = 0;
3026             UINT_32 yPosMask = 0;
3027 
3028             // First get "max y bit"
3029             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3030             {
3031                 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3032 
3033                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3034                     (m_equationTable[eqIndex].addr[i].index > yMax))
3035                 {
3036                     yMax = m_equationTable[eqIndex].addr[i].index;
3037                 }
3038 
3039                 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3040                     (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3041                     (m_equationTable[eqIndex].xor1[i].index > yMax))
3042                 {
3043                     yMax = m_equationTable[eqIndex].xor1[i].index;
3044                 }
3045 
3046                 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3047                     (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3048                     (m_equationTable[eqIndex].xor2[i].index > yMax))
3049                 {
3050                     yMax = m_equationTable[eqIndex].xor2[i].index;
3051                 }
3052             }
3053 
3054             // Then loop again for populating a position mask of "max Y bit"
3055             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3056             {
3057                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3058                     (m_equationTable[eqIndex].addr[i].index == yMax))
3059                 {
3060                     yPosMask |= 1u << i;
3061                 }
3062                 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3063                          (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3064                          (m_equationTable[eqIndex].xor1[i].index == yMax))
3065                 {
3066                     yPosMask |= 1u << i;
3067                 }
3068                 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3069                          (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3070                          (m_equationTable[eqIndex].xor2[i].index == yMax))
3071                 {
3072                     yPosMask |= 1u << i;
3073                 }
3074             }
3075 
3076             const UINT_32 additionalAlign = 1 << yMax;
3077 
3078             if (additionalAlign >= *pAlignY)
3079             {
3080                 *pAlignY = additionalAlign;
3081 
3082                 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3083 
3084                 if ((alignedHeight >> yMax) & 1)
3085                 {
3086                     *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3087                 }
3088             }
3089         }
3090         else
3091         {
3092             ret = ADDR_INVALIDPARAMS;
3093         }
3094     }
3095 
3096     return ret;
3097 }
3098 
3099 /**
3100 ************************************************************************************************************************
3101 *   Gfx10Lib::HwlComputeSurfaceInfoTiled
3102 *
3103 *   @brief
3104 *       Internal function to calculate alignment for tiled surface
3105 *
3106 *   @return
3107 *       ADDR_E_RETURNCODE
3108 ************************************************************************************************************************
3109 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3110 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3111      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3112      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3113      ) const
3114 {
3115     ADDR_E_RETURNCODE ret;
3116 
3117     // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3118     pOut->mipChainPitch    = 0;
3119     pOut->mipChainHeight   = 0;
3120     pOut->mipChainSlice    = 0;
3121     pOut->epitchIsHeight   = FALSE;
3122 
3123     // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3124     pOut->mipChainInTail   = FALSE;
3125     pOut->firstMipIdInTail = pIn->numMipLevels;
3126 
3127     if (IsBlock256b(pIn->swizzleMode))
3128     {
3129         ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3130     }
3131     else
3132     {
3133         ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3134     }
3135 
3136     return ret;
3137 }
3138 
3139 /**
3140 ************************************************************************************************************************
3141 *   Gfx10Lib::ComputeSurfaceInfoMicroTiled
3142 *
3143 *   @brief
3144 *       Internal function to calculate alignment for micro tiled surface
3145 *
3146 *   @return
3147 *       ADDR_E_RETURNCODE
3148 ************************************************************************************************************************
3149 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3150 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3151      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3152      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3153      ) const
3154 {
3155     ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3156                                                          &pOut->blockHeight,
3157                                                          &pOut->blockSlices,
3158                                                          pIn->bpp,
3159                                                          pIn->numFrags,
3160                                                          pIn->resourceType,
3161                                                          pIn->swizzleMode);
3162 
3163     if (ret == ADDR_OK)
3164     {
3165         const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3166 
3167         pOut->pitch     = PowTwoAlign(pIn->width,  pOut->blockWidth);
3168         pOut->height    = PowTwoAlign(pIn->height, pOut->blockHeight);
3169         pOut->numSlices = pIn->numSlices;
3170         pOut->baseAlign = blockSize;
3171 
3172         if (pIn->numMipLevels > 1)
3173         {
3174             const UINT_32 mip0Width    = pIn->width;
3175             const UINT_32 mip0Height   = pIn->height;
3176             UINT_64       mipSliceSize = 0;
3177 
3178             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3179             {
3180                 UINT_32 mipWidth, mipHeight;
3181 
3182                 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3183 
3184                 const UINT_32 mipActualWidth  = PowTwoAlign(mipWidth,  pOut->blockWidth);
3185                 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3186 
3187                 if (pOut->pMipInfo != NULL)
3188                 {
3189                     pOut->pMipInfo[i].pitch            = mipActualWidth;
3190                     pOut->pMipInfo[i].height           = mipActualHeight;
3191                     pOut->pMipInfo[i].depth            = 1;
3192                     pOut->pMipInfo[i].offset           = mipSliceSize;
3193                     pOut->pMipInfo[i].mipTailOffset    = 0;
3194                     pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3195                 }
3196 
3197                 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3198             }
3199 
3200             pOut->sliceSize = mipSliceSize;
3201             pOut->surfSize  = mipSliceSize * pOut->numSlices;
3202         }
3203         else
3204         {
3205             pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3206             pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3207 
3208             if (pOut->pMipInfo != NULL)
3209             {
3210                 pOut->pMipInfo[0].pitch            = pOut->pitch;
3211                 pOut->pMipInfo[0].height           = pOut->height;
3212                 pOut->pMipInfo[0].depth            = 1;
3213                 pOut->pMipInfo[0].offset           = 0;
3214                 pOut->pMipInfo[0].mipTailOffset    = 0;
3215                 pOut->pMipInfo[0].macroBlockOffset = 0;
3216             }
3217         }
3218 
3219     }
3220 
3221     return ret;
3222 }
3223 
3224 /**
3225 ************************************************************************************************************************
3226 *   Gfx10Lib::ComputeSurfaceInfoMacroTiled
3227 *
3228 *   @brief
3229 *       Internal function to calculate alignment for macro tiled surface
3230 *
3231 *   @return
3232 *       ADDR_E_RETURNCODE
3233 ************************************************************************************************************************
3234 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3235 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3236      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3237      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3238      ) const
3239 {
3240     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3241                                                                 &pOut->blockHeight,
3242                                                                 &pOut->blockSlices,
3243                                                                 pIn->bpp,
3244                                                                 pIn->numFrags,
3245                                                                 pIn->resourceType,
3246                                                                 pIn->swizzleMode);
3247 
3248     if (returnCode == ADDR_OK)
3249     {
3250         UINT_32 heightAlign = pOut->blockHeight;
3251 
3252         if (pIn->flags.qbStereo)
3253         {
3254             UINT_32 rightXor = 0;
3255 
3256             returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3257 
3258             if (returnCode == ADDR_OK)
3259             {
3260                 pOut->pStereoInfo->rightSwizzle = rightXor;
3261             }
3262         }
3263 
3264         if (returnCode == ADDR_OK)
3265         {
3266             const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3267             const UINT_32 blockSize     = 1 << blockSizeLog2;
3268 
3269             pOut->pitch     = PowTwoAlign(pIn->width,     pOut->blockWidth);
3270             pOut->height    = PowTwoAlign(pIn->height,    heightAlign);
3271             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3272             pOut->baseAlign = blockSize;
3273 
3274             if (pIn->numMipLevels > 1)
3275             {
3276                 const Dim3d  tailMaxDim         = GetMipTailDim(pIn->resourceType,
3277                                                                 pIn->swizzleMode,
3278                                                                 pOut->blockWidth,
3279                                                                 pOut->blockHeight,
3280                                                                 pOut->blockSlices);
3281                 const UINT_32 mip0Width         = pIn->width;
3282                 const UINT_32 mip0Height        = pIn->height;
3283                 const BOOL_32 isThin            = IsThin(pIn->resourceType, pIn->swizzleMode);
3284                 const UINT_32 mip0Depth         = isThin ? 1 : pIn->numSlices;
3285                 const UINT_32 maxMipsInTail     = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3286                 const UINT_32 index             = Log2(pIn->bpp >> 3);
3287                 UINT_32       firstMipInTail    = pIn->numMipLevels;
3288                 UINT_64       mipChainSliceSize = 0;
3289                 UINT_64       mipSize[MaxMipLevels];
3290                 UINT_64       mipSliceSize[MaxMipLevels];
3291 
3292                 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
3293                 Dim3d fixedTailMaxDim = tailMaxDim;
3294 
3295                 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3296                 {
3297                     fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3298                     fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3299                 }
3300 
3301                 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3302                 {
3303                     UINT_32 mipWidth, mipHeight, mipDepth;
3304 
3305                     GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3306 
3307                     if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3308                     {
3309                         firstMipInTail     = i;
3310                         mipChainSliceSize += blockSize / pOut->blockSlices;
3311                         break;
3312                     }
3313                     else
3314                     {
3315                         const UINT_32 pitch     = PowTwoAlign(mipWidth,  pOut->blockWidth);
3316                         const UINT_32 height    = PowTwoAlign(mipHeight, pOut->blockHeight);
3317                         const UINT_32 depth     = PowTwoAlign(mipDepth,  pOut->blockSlices);
3318                         const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3319 
3320                         mipSize[i]         = sliceSize * depth;
3321                         mipSliceSize[i]    = sliceSize * pOut->blockSlices;
3322                         mipChainSliceSize += sliceSize;
3323 
3324                         if (pOut->pMipInfo != NULL)
3325                         {
3326                             pOut->pMipInfo[i].pitch  = pitch;
3327                             pOut->pMipInfo[i].height = height;
3328                             pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3329                         }
3330                     }
3331                 }
3332 
3333                 pOut->sliceSize        = mipChainSliceSize;
3334                 pOut->surfSize         = mipChainSliceSize * pOut->numSlices;
3335                 pOut->mipChainInTail   = (firstMipInTail == 0) ? TRUE : FALSE;
3336                 pOut->firstMipIdInTail = firstMipInTail;
3337 
3338                 if (pOut->pMipInfo != NULL)
3339                 {
3340                     UINT_64 offset         = 0;
3341                     UINT_64 macroBlkOffset = 0;
3342                     UINT_32 tailMaxDepth   = 0;
3343 
3344                     if (firstMipInTail != pIn->numMipLevels)
3345                     {
3346                         UINT_32 mipWidth, mipHeight;
3347 
3348                         GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3349                                    &mipWidth, &mipHeight, &tailMaxDepth);
3350 
3351                         offset         = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3352                         macroBlkOffset = blockSize;
3353                     }
3354 
3355                     for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3356                     {
3357                         pOut->pMipInfo[i].offset           = offset;
3358                         pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3359                         pOut->pMipInfo[i].mipTailOffset    = 0;
3360 
3361                         offset         += mipSize[i];
3362                         macroBlkOffset += mipSliceSize[i];
3363                     }
3364 
3365                     UINT_32 pitch  = tailMaxDim.w;
3366                     UINT_32 height = tailMaxDim.h;
3367                     UINT_32 depth  = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3368 
3369                     tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3370 
3371                     for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3372                     {
3373                         const UINT_32 m         = maxMipsInTail - 1 - (i - firstMipInTail);
3374                         const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3375 
3376                         pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
3377                         pOut->pMipInfo[i].mipTailOffset    = mipOffset;
3378                         pOut->pMipInfo[i].macroBlockOffset = 0;
3379 
3380                         pOut->pMipInfo[i].pitch  = pitch;
3381                         pOut->pMipInfo[i].height = height;
3382                         pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3383 
3384                         UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
3385                                        ((mipOffset >> 10) & 2)  |
3386                                        ((mipOffset >> 11) & 4)  |
3387                                        ((mipOffset >> 12) & 8)  |
3388                                        ((mipOffset >> 13) & 16) |
3389                                        ((mipOffset >> 14) & 32);
3390                         UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
3391                                        ((mipOffset >> 9)  & 2)  |
3392                                        ((mipOffset >> 10) & 4)  |
3393                                        ((mipOffset >> 11) & 8)  |
3394                                        ((mipOffset >> 12) & 16) |
3395                                        ((mipOffset >> 13) & 32);
3396 
3397                         if (blockSizeLog2 & 1)
3398                         {
3399                             const UINT_32 temp = mipX;
3400                             mipX = mipY;
3401                             mipY = temp;
3402 
3403                             if (index & 1)
3404                             {
3405                                 mipY = (mipY << 1) | (mipX & 1);
3406                                 mipX = mipX >> 1;
3407                             }
3408                         }
3409 
3410                         if (isThin)
3411                         {
3412                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3413                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3414                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3415 
3416                             pitch  = Max(pitch  >> 1, Block256_2d[index].w);
3417                             height = Max(height >> 1, Block256_2d[index].h);
3418                         }
3419                         else
3420                         {
3421                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3422                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3423                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3424 
3425                             pitch  = Max(pitch  >> 1, Block256_3d[index].w);
3426                             height = Max(height >> 1, Block256_3d[index].h);
3427                         }
3428                     }
3429                 }
3430             }
3431             else
3432             {
3433                 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3434                 pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3435 
3436                 if (pOut->pMipInfo != NULL)
3437                 {
3438                     pOut->pMipInfo[0].pitch            = pOut->pitch;
3439                     pOut->pMipInfo[0].height           = pOut->height;
3440                     pOut->pMipInfo[0].depth            = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3441                     pOut->pMipInfo[0].offset           = 0;
3442                     pOut->pMipInfo[0].mipTailOffset    = 0;
3443                     pOut->pMipInfo[0].macroBlockOffset = 0;
3444                     pOut->pMipInfo[0].mipTailCoordX    = 0;
3445                     pOut->pMipInfo[0].mipTailCoordY    = 0;
3446                     pOut->pMipInfo[0].mipTailCoordZ    = 0;
3447                 }
3448             }
3449         }
3450     }
3451 
3452     return returnCode;
3453 }
3454 
3455 /**
3456 ************************************************************************************************************************
3457 *   Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3458 *
3459 *   @brief
3460 *       Internal function to calculate address from coord for tiled swizzle surface
3461 *
3462 *   @return
3463 *       ADDR_E_RETURNCODE
3464 ************************************************************************************************************************
3465 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3466 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3467      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
3468      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
3469      ) const
3470 {
3471     ADDR_E_RETURNCODE ret;
3472 
3473     if (IsBlock256b(pIn->swizzleMode))
3474     {
3475         ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3476     }
3477     else
3478     {
3479         ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3480     }
3481 
3482     return ret;
3483 }
3484 
3485 /**
3486 ************************************************************************************************************************
3487 *   Gfx10Lib::ComputeOffsetFromEquation
3488 *
3489 *   @brief
3490 *       Compute offset from equation
3491 *
3492 *   @return
3493 *       Offset
3494 ************************************************************************************************************************
3495 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3496 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3497     const ADDR_EQUATION* pEq,   ///< Equation
3498     UINT_32              x,     ///< x coord in bytes
3499     UINT_32              y,     ///< y coord in pixel
3500     UINT_32              z      ///< z coord in slice
3501     ) const
3502 {
3503     UINT_32 offset = 0;
3504 
3505     for (UINT_32 i = 0; i < pEq->numBits; i++)
3506     {
3507         UINT_32 v = 0;
3508 
3509         for (UINT_32 c = 0; c < pEq->numBitComponents; c++)
3510         {
3511             if (pEq->comps[c][i].valid)
3512             {
3513                 if (pEq->comps[c][i].channel == 0)
3514                 {
3515                     v ^= (x >> pEq->comps[c][i].index) & 1;
3516                 }
3517                 else if (pEq->comps[c][i].channel == 1)
3518                 {
3519                     v ^= (y >> pEq->comps[c][i].index) & 1;
3520                 }
3521                 else
3522                 {
3523                     ADDR_ASSERT(pEq->comps[c][i].channel == 2);
3524                     v ^= (z >> pEq->comps[c][i].index) & 1;
3525                 }
3526             }
3527         }
3528 
3529         offset |= (v << i);
3530     }
3531 
3532     return offset;
3533 }
3534 
3535 /**
3536 ************************************************************************************************************************
3537 *   Gfx10Lib::ComputeOffsetFromSwizzlePattern
3538 *
3539 *   @brief
3540 *       Compute offset from swizzle pattern
3541 *
3542 *   @return
3543 *       Offset
3544 ************************************************************************************************************************
3545 */
ComputeOffsetFromSwizzlePattern(const UINT_64 * pPattern,UINT_32 numBits,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const3546 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3547     const UINT_64* pPattern,    ///< Swizzle pattern
3548     UINT_32        numBits,     ///< Number of bits in pattern
3549     UINT_32        x,           ///< x coord in pixel
3550     UINT_32        y,           ///< y coord in pixel
3551     UINT_32        z,           ///< z coord in slice
3552     UINT_32        s            ///< sample id
3553     ) const
3554 {
3555     UINT_32                 offset          = 0;
3556     const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3557 
3558     for (UINT_32 i = 0; i < numBits; i++)
3559     {
3560         UINT_32 v = 0;
3561 
3562         if (pSwizzlePattern[i].x != 0)
3563         {
3564             UINT_16 mask  = pSwizzlePattern[i].x;
3565             UINT_32 xBits = x;
3566 
3567             while (mask != 0)
3568             {
3569                 if (mask & 1)
3570                 {
3571                     v ^= xBits & 1;
3572                 }
3573 
3574                 xBits >>= 1;
3575                 mask  >>= 1;
3576             }
3577         }
3578 
3579         if (pSwizzlePattern[i].y != 0)
3580         {
3581             UINT_16 mask  = pSwizzlePattern[i].y;
3582             UINT_32 yBits = y;
3583 
3584             while (mask != 0)
3585             {
3586                 if (mask & 1)
3587                 {
3588                     v ^= yBits & 1;
3589                 }
3590 
3591                 yBits >>= 1;
3592                 mask  >>= 1;
3593             }
3594         }
3595 
3596         if (pSwizzlePattern[i].z != 0)
3597         {
3598             UINT_16 mask  = pSwizzlePattern[i].z;
3599             UINT_32 zBits = z;
3600 
3601             while (mask != 0)
3602             {
3603                 if (mask & 1)
3604                 {
3605                     v ^= zBits & 1;
3606                 }
3607 
3608                 zBits >>= 1;
3609                 mask  >>= 1;
3610             }
3611         }
3612 
3613         if (pSwizzlePattern[i].s != 0)
3614         {
3615             UINT_16 mask  = pSwizzlePattern[i].s;
3616             UINT_32 sBits = s;
3617 
3618             while (mask != 0)
3619             {
3620                 if (mask & 1)
3621                 {
3622                     v ^= sBits & 1;
3623                 }
3624 
3625                 sBits >>= 1;
3626                 mask  >>= 1;
3627             }
3628         }
3629 
3630         offset |= (v << i);
3631     }
3632 
3633     return offset;
3634 }
3635 
3636 /**
3637 ************************************************************************************************************************
3638 *   Gfx10Lib::GetSwizzlePatternInfo
3639 *
3640 *   @brief
3641 *       Get swizzle pattern
3642 *
3643 *   @return
3644 *       Swizzle pattern information
3645 ************************************************************************************************************************
3646 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const3647 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
3648     AddrSwizzleMode  swizzleMode,       ///< Swizzle mode
3649     AddrResourceType resourceType,      ///< Resource type
3650     UINT_32          elemLog2,          ///< Element size in bytes log2
3651     UINT_32          numFrag            ///< Number of fragment
3652     ) const
3653 {
3654     // Now elemLog2 is going to be used to access the correct index insode of the pPatInfo array so we will start from
3655     // the right location
3656     const UINT_32          index       = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
3657     const ADDR_SW_PATINFO* patInfo     = NULL;
3658     const UINT_32          swizzleMask = 1 << swizzleMode;
3659 
3660     if (IsBlockVariable(swizzleMode))
3661     {
3662         if (m_blockVarSizeLog2 != 0)
3663         {
3664             ADDR_ASSERT(m_settings.supportRbPlus);
3665 
3666             if (IsRtOptSwizzle(swizzleMode))
3667             {
3668                 if (numFrag == 1)
3669                 {
3670                     patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
3671                 }
3672                 else if (numFrag == 2)
3673                 {
3674                     patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
3675                 }
3676                 else if (numFrag == 4)
3677                 {
3678                     patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
3679                 }
3680                 else
3681                 {
3682                     ADDR_ASSERT(numFrag == 8);
3683                     patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
3684                 }
3685             }
3686             else if (IsZOrderSwizzle(swizzleMode))
3687             {
3688                 if (numFrag == 1)
3689                 {
3690                     patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
3691                 }
3692                 else if (numFrag == 2)
3693                 {
3694                     patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
3695                 }
3696                 else if (numFrag == 4)
3697                 {
3698                     patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
3699                 }
3700                 else
3701                 {
3702                     ADDR_ASSERT(numFrag == 8);
3703                     patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
3704                 }
3705             }
3706         }
3707     }
3708     else if (IsLinear(swizzleMode) == FALSE)
3709     {
3710         if (resourceType == ADDR_RSRC_TEX_3D)
3711         {
3712             ADDR_ASSERT(numFrag == 1);
3713 
3714             if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
3715             {
3716                 if (IsRtOptSwizzle(swizzleMode))
3717                 {
3718                     if (swizzleMode == ADDR_SW_4KB_R_X)
3719                     {
3720                         patInfo = NULL;
3721                     }
3722                     else
3723                     {
3724                         patInfo = m_settings.supportRbPlus ?
3725                                   GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
3726                     }
3727                 }
3728                 else if (IsZOrderSwizzle(swizzleMode))
3729                 {
3730                     patInfo = m_settings.supportRbPlus ?
3731                               GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
3732                 }
3733                 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3734                 {
3735                     ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
3736                     patInfo = m_settings.supportRbPlus ?
3737                               GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
3738                 }
3739                 else
3740                 {
3741                     ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
3742 
3743                     if (IsBlock4kb(swizzleMode))
3744                     {
3745                         if (swizzleMode == ADDR_SW_4KB_S)
3746                         {
3747                             patInfo = m_settings.supportRbPlus ?
3748                                       GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
3749                         }
3750                         else
3751                         {
3752                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3753                             patInfo = m_settings.supportRbPlus ?
3754                                       GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
3755                         }
3756                     }
3757                     else
3758                     {
3759                         if (swizzleMode == ADDR_SW_64KB_S)
3760                         {
3761                             patInfo = m_settings.supportRbPlus ?
3762                                       GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
3763                         }
3764                         else if (swizzleMode == ADDR_SW_64KB_S_X)
3765                         {
3766                             patInfo = m_settings.supportRbPlus ?
3767                                       GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
3768                         }
3769                         else
3770                         {
3771                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3772                             patInfo = m_settings.supportRbPlus ?
3773                                       GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
3774                         }
3775                     }
3776                 }
3777             }
3778         }
3779         else
3780         {
3781             if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
3782             {
3783                 if (IsBlock256b(swizzleMode))
3784                 {
3785                     if (swizzleMode == ADDR_SW_256B_S)
3786                     {
3787                         patInfo = m_settings.supportRbPlus ?
3788                                   GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
3789                     }
3790                     else
3791                     {
3792                         ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
3793                         patInfo = m_settings.supportRbPlus ?
3794                                   GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
3795                     }
3796                 }
3797                 else if (IsBlock4kb(swizzleMode))
3798                 {
3799                     if (IsStandardSwizzle(resourceType, swizzleMode))
3800                     {
3801                         if (swizzleMode == ADDR_SW_4KB_S)
3802                         {
3803                             patInfo = m_settings.supportRbPlus ?
3804                                       GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
3805                         }
3806                         else
3807                         {
3808                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3809                             patInfo = m_settings.supportRbPlus ?
3810                                       GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
3811                         }
3812                     }
3813                     else
3814                     {
3815                         if (swizzleMode == ADDR_SW_4KB_D)
3816                         {
3817                             patInfo = m_settings.supportRbPlus ?
3818                                       GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
3819                         }
3820                         else if (swizzleMode == ADDR_SW_4KB_R_X)
3821                         {
3822                             patInfo = NULL;
3823                         }
3824                         else
3825                         {
3826                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
3827                             patInfo = m_settings.supportRbPlus ?
3828                                       GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
3829                         }
3830                     }
3831                 }
3832                 else
3833                 {
3834                     if (IsRtOptSwizzle(swizzleMode))
3835                     {
3836                         if (numFrag == 1)
3837                         {
3838                             patInfo = m_settings.supportRbPlus ?
3839                                       GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
3840                         }
3841                         else if (numFrag == 2)
3842                         {
3843                             patInfo = m_settings.supportRbPlus ?
3844                                       GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
3845                         }
3846                         else if (numFrag == 4)
3847                         {
3848                             patInfo = m_settings.supportRbPlus ?
3849                                       GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
3850                         }
3851                         else
3852                         {
3853                             ADDR_ASSERT(numFrag == 8);
3854                             patInfo = m_settings.supportRbPlus ?
3855                                       GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
3856                         }
3857                     }
3858                     else if (IsZOrderSwizzle(swizzleMode))
3859                     {
3860                         if (numFrag == 1)
3861                         {
3862                             patInfo = m_settings.supportRbPlus ?
3863                                       GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
3864                         }
3865                         else if (numFrag == 2)
3866                         {
3867                             patInfo = m_settings.supportRbPlus ?
3868                                       GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
3869                         }
3870                         else if (numFrag == 4)
3871                         {
3872                             patInfo = m_settings.supportRbPlus ?
3873                                       GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
3874                         }
3875                         else
3876                         {
3877                             ADDR_ASSERT(numFrag == 8);
3878                             patInfo = m_settings.supportRbPlus ?
3879                                       GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
3880                         }
3881                     }
3882                     else if (IsDisplaySwizzle(resourceType, swizzleMode))
3883                     {
3884                         if (swizzleMode == ADDR_SW_64KB_D)
3885                         {
3886                             patInfo = m_settings.supportRbPlus ?
3887                                       GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
3888                         }
3889                         else if (swizzleMode == ADDR_SW_64KB_D_X)
3890                         {
3891                             patInfo = m_settings.supportRbPlus ?
3892                                       GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
3893                         }
3894                         else
3895                         {
3896                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
3897                             patInfo = m_settings.supportRbPlus ?
3898                                       GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
3899                         }
3900                     }
3901                     else
3902                     {
3903                         if (swizzleMode == ADDR_SW_64KB_S)
3904                         {
3905                             patInfo = m_settings.supportRbPlus ?
3906                                       GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
3907                         }
3908                         else if (swizzleMode == ADDR_SW_64KB_S_X)
3909                         {
3910                             patInfo = m_settings.supportRbPlus ?
3911                                       GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
3912                         }
3913                         else
3914                         {
3915                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3916                             patInfo = m_settings.supportRbPlus ?
3917                                       GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
3918                         }
3919                     }
3920                 }
3921             }
3922         }
3923     }
3924 
3925     return (patInfo != NULL) ? &patInfo[index] : NULL;
3926 }
3927 
3928 /**
3929 ************************************************************************************************************************
3930 *   Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
3931 *
3932 *   @brief
3933 *       Internal function to calculate address from coord for micro tiled swizzle surface
3934 *
3935 *   @return
3936 *       ADDR_E_RETURNCODE
3937 ************************************************************************************************************************
3938 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3939 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3940      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
3941      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
3942      ) const
3943 {
3944     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
3945     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3946     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
3947     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
3948 
3949     localIn.swizzleMode  = pIn->swizzleMode;
3950     localIn.flags        = pIn->flags;
3951     localIn.resourceType = pIn->resourceType;
3952     localIn.bpp          = pIn->bpp;
3953     localIn.width        = Max(pIn->unalignedWidth,  1u);
3954     localIn.height       = Max(pIn->unalignedHeight, 1u);
3955     localIn.numSlices    = Max(pIn->numSlices,       1u);
3956     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
3957     localIn.numSamples   = Max(pIn->numSamples,      1u);
3958     localIn.numFrags     = Max(pIn->numFrags,        1u);
3959     localOut.pMipInfo    = mipInfo;
3960 
3961     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
3962 
3963     if (ret == ADDR_OK)
3964     {
3965         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3966         const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3967         const UINT_32 swMode   = static_cast<UINT_32>(pIn->swizzleMode);
3968         const UINT_32 eqIndex  = m_equationLookupTable[rsrcType][swMode][elemLog2];
3969 
3970         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3971         {
3972             const UINT_32 pb           = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3973             const UINT_32 yb           = pIn->y / localOut.blockHeight;
3974             const UINT_32 xb           = pIn->x / localOut.blockWidth;
3975             const UINT_32 blockIndex   = yb * pb + xb;
3976             const UINT_32 blockSize    = 256;
3977             const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3978                                                                    pIn->x << elemLog2,
3979                                                                    pIn->y,
3980                                                                    0);
3981             pOut->addr = localOut.sliceSize * pIn->slice +
3982                          mipInfo[pIn->mipId].macroBlockOffset +
3983                          (blockIndex * blockSize) +
3984                          blk256Offset;
3985         }
3986         else
3987         {
3988             ret = ADDR_INVALIDPARAMS;
3989         }
3990     }
3991 
3992     return ret;
3993 }
3994 
3995 /**
3996 ************************************************************************************************************************
3997 *   Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
3998 *
3999 *   @brief
4000 *       Internal function to calculate address from coord for macro tiled swizzle surface
4001 *
4002 *   @return
4003 *       ADDR_E_RETURNCODE
4004 ************************************************************************************************************************
4005 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4006 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4007      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4008      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4009      ) const
4010 {
4011     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4012     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4013     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4014     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4015 
4016     localIn.swizzleMode  = pIn->swizzleMode;
4017     localIn.flags        = pIn->flags;
4018     localIn.resourceType = pIn->resourceType;
4019     localIn.bpp          = pIn->bpp;
4020     localIn.width        = Max(pIn->unalignedWidth,  1u);
4021     localIn.height       = Max(pIn->unalignedHeight, 1u);
4022     localIn.numSlices    = Max(pIn->numSlices,       1u);
4023     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4024     localIn.numSamples   = Max(pIn->numSamples,      1u);
4025     localIn.numFrags     = Max(pIn->numFrags,        1u);
4026     localOut.pMipInfo    = mipInfo;
4027 
4028     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4029 
4030     if (ret == ADDR_OK)
4031     {
4032         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
4033         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4034         const UINT_32 blkMask     = (1 << blkSizeLog2) - 1;
4035         const UINT_32 pipeMask    = (1 << m_pipesLog2) - 1;
4036         const UINT_32 bankMask    = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4037         const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4038                                     (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4039 
4040         if (localIn.numFrags > 1)
4041         {
4042             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4043                                                                     pIn->resourceType,
4044                                                                     elemLog2,
4045                                                                     localIn.numFrags);
4046 
4047             if (pPatInfo != NULL)
4048             {
4049                 const UINT_32 pb        = localOut.pitch / localOut.blockWidth;
4050                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4051                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4052                 const UINT_64 blkIdx    = yb * pb + xb;
4053 
4054                 ADDR_BIT_SETTING fullSwizzlePattern[20];
4055                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4056 
4057                 const UINT_32 blkOffset =
4058                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4059                                                     blkSizeLog2,
4060                                                     pIn->x,
4061                                                     pIn->y,
4062                                                     pIn->slice,
4063                                                     pIn->sample);
4064 
4065                 pOut->addr = (localOut.sliceSize * pIn->slice) +
4066                              (blkIdx << blkSizeLog2) +
4067                              (blkOffset ^ pipeBankXor);
4068             }
4069             else
4070             {
4071                 ret = ADDR_INVALIDPARAMS;
4072             }
4073         }
4074         else
4075         {
4076             const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4077             const UINT_32 swMode  = static_cast<UINT_32>(pIn->swizzleMode);
4078             const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4079 
4080             if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4081             {
4082                 const BOOL_32 inTail    = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4083                 const BOOL_32 isThin    = IsThin(pIn->resourceType, pIn->swizzleMode);
4084                 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4085                 const UINT_32 sliceId   = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4086                 const UINT_32 x         = inTail ? (pIn->x     + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4087                 const UINT_32 y         = inTail ? (pIn->y     + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4088                 const UINT_32 z         = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4089                 const UINT_32 pb        = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4090                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4091                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4092                 const UINT_64 blkIdx    = yb * pb + xb;
4093                 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4094                                                                     x << elemLog2,
4095                                                                     y,
4096                                                                     z);
4097                 pOut->addr = sliceSize * sliceId +
4098                              mipInfo[pIn->mipId].macroBlockOffset +
4099                              (blkIdx << blkSizeLog2) +
4100                              (blkOffset ^ pipeBankXor);
4101             }
4102             else
4103             {
4104                 ret = ADDR_INVALIDPARAMS;
4105             }
4106         }
4107     }
4108 
4109     return ret;
4110 }
4111 
4112 /**
4113 ************************************************************************************************************************
4114 *   Gfx10Lib::HwlComputeMaxBaseAlignments
4115 *
4116 *   @brief
4117 *       Gets maximum alignments
4118 *   @return
4119 *       maximum alignments
4120 ************************************************************************************************************************
4121 */
HwlComputeMaxBaseAlignments() const4122 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4123 {
4124     return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4125 }
4126 
4127 /**
4128 ************************************************************************************************************************
4129 *   Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4130 *
4131 *   @brief
4132 *       Gets maximum alignments for metadata
4133 *   @return
4134 *       maximum alignments for metadata
4135 ************************************************************************************************************************
4136 */
HwlComputeMaxMetaBaseAlignments() const4137 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4138 {
4139     Dim3d metaBlk;
4140 
4141     const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4142     {
4143         ADDR_SW_64KB_Z_X,
4144         m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4145     };
4146 
4147     UINT_32 maxBaseAlignHtile = 0;
4148     UINT_32 maxBaseAlignCmask = 0;
4149 
4150     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4151     {
4152         for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4153         {
4154             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4155             {
4156                 // Max base alignment for Htile
4157                 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4158                                                                 ADDR_RSRC_TEX_2D,
4159                                                                 ValidSwizzleModeForXmask[swIdx],
4160                                                                 bppLog2,
4161                                                                 numFragLog2,
4162                                                                 TRUE,
4163                                                                 &metaBlk);
4164 
4165                 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4166             }
4167         }
4168 
4169         // Max base alignment for Cmask
4170         const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4171                                                         ADDR_RSRC_TEX_2D,
4172                                                         ValidSwizzleModeForXmask[swIdx],
4173                                                         0,
4174                                                         0,
4175                                                         TRUE,
4176                                                         &metaBlk);
4177 
4178         maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4179     }
4180 
4181     // Max base alignment for 2D Dcc
4182     const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4183     {
4184         ADDR_SW_64KB_S_X,
4185         ADDR_SW_64KB_D_X,
4186         ADDR_SW_64KB_R_X,
4187         m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4188     };
4189 
4190     UINT_32 maxBaseAlignDcc2D = 0;
4191 
4192     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4193     {
4194         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4195         {
4196             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4197             {
4198                 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4199                                                              ADDR_RSRC_TEX_2D,
4200                                                              ValidSwizzleModeForDcc2D[swIdx],
4201                                                              bppLog2,
4202                                                              numFragLog2,
4203                                                              TRUE,
4204                                                              &metaBlk);
4205 
4206                 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4207             }
4208         }
4209     }
4210 
4211     // Max base alignment for 3D Dcc
4212     const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4213     {
4214         ADDR_SW_64KB_Z_X,
4215         ADDR_SW_64KB_S_X,
4216         ADDR_SW_64KB_D_X,
4217         ADDR_SW_64KB_R_X,
4218         m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4219     };
4220 
4221     UINT_32 maxBaseAlignDcc3D = 0;
4222 
4223     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4224     {
4225         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4226         {
4227             const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4228                                                          ADDR_RSRC_TEX_3D,
4229                                                          ValidSwizzleModeForDcc3D[swIdx],
4230                                                          bppLog2,
4231                                                          0,
4232                                                          TRUE,
4233                                                          &metaBlk);
4234 
4235             maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4236         }
4237     }
4238 
4239     return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4240 }
4241 
4242 /**
4243 ************************************************************************************************************************
4244 *   Gfx10Lib::GetMetaElementSizeLog2
4245 *
4246 *   @brief
4247 *       Gets meta data element size log2
4248 *   @return
4249 *       Meta data element size log2
4250 ************************************************************************************************************************
4251 */
GetMetaElementSizeLog2(Gfx10DataType dataType)4252 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4253     Gfx10DataType dataType) ///< Data surface type
4254 {
4255     INT_32 elemSizeLog2 = 0;
4256 
4257     if (dataType == Gfx10DataColor)
4258     {
4259         elemSizeLog2 = 0;
4260     }
4261     else if (dataType == Gfx10DataDepthStencil)
4262     {
4263         elemSizeLog2 = 2;
4264     }
4265     else
4266     {
4267         ADDR_ASSERT(dataType == Gfx10DataFmask);
4268         elemSizeLog2 = -1;
4269     }
4270 
4271     return elemSizeLog2;
4272 }
4273 
4274 /**
4275 ************************************************************************************************************************
4276 *   Gfx10Lib::GetMetaCacheSizeLog2
4277 *
4278 *   @brief
4279 *       Gets meta data cache line size log2
4280 *   @return
4281 *       Meta data cache line size log2
4282 ************************************************************************************************************************
4283 */
GetMetaCacheSizeLog2(Gfx10DataType dataType)4284 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4285     Gfx10DataType dataType) ///< Data surface type
4286 {
4287     INT_32 cacheSizeLog2 = 0;
4288 
4289     if (dataType == Gfx10DataColor)
4290     {
4291         cacheSizeLog2 = 6;
4292     }
4293     else if (dataType == Gfx10DataDepthStencil)
4294     {
4295         cacheSizeLog2 = 8;
4296     }
4297     else
4298     {
4299         ADDR_ASSERT(dataType == Gfx10DataFmask);
4300         cacheSizeLog2 = 8;
4301     }
4302     return cacheSizeLog2;
4303 }
4304 
4305 /**
4306 ************************************************************************************************************************
4307 *   Gfx10Lib::HwlComputeSurfaceInfoLinear
4308 *
4309 *   @brief
4310 *       Internal function to calculate alignment for linear surface
4311 *
4312 *   @return
4313 *       ADDR_E_RETURNCODE
4314 ************************************************************************************************************************
4315 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4316 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4317      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4318      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4319      ) const
4320 {
4321     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4322 
4323     if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4324     {
4325         returnCode = ADDR_INVALIDPARAMS;
4326     }
4327     else
4328     {
4329         const UINT_32 elementBytes = pIn->bpp >> 3;
4330         const UINT_32 pitchAlign   = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4331         const UINT_32 mipDepth     = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4332         UINT_32       pitch        = PowTwoAlign(pIn->width, pitchAlign);
4333         UINT_32       actualHeight = pIn->height;
4334         UINT_64       sliceSize    = 0;
4335 
4336         if (pIn->numMipLevels > 1)
4337         {
4338             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4339             {
4340                 UINT_32 mipWidth, mipHeight;
4341 
4342                 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4343 
4344                 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4345 
4346                 if (pOut->pMipInfo != NULL)
4347                 {
4348                     pOut->pMipInfo[i].pitch            = mipActualWidth;
4349                     pOut->pMipInfo[i].height           = mipHeight;
4350                     pOut->pMipInfo[i].depth            = mipDepth;
4351                     pOut->pMipInfo[i].offset           = sliceSize;
4352                     pOut->pMipInfo[i].mipTailOffset    = 0;
4353                     pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4354                 }
4355 
4356                 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4357             }
4358         }
4359         else
4360         {
4361             returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4362 
4363             if (returnCode == ADDR_OK)
4364             {
4365                 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4366 
4367                 if (pOut->pMipInfo != NULL)
4368                 {
4369                     pOut->pMipInfo[0].pitch            = pitch;
4370                     pOut->pMipInfo[0].height           = actualHeight;
4371                     pOut->pMipInfo[0].depth            = mipDepth;
4372                     pOut->pMipInfo[0].offset           = 0;
4373                     pOut->pMipInfo[0].mipTailOffset    = 0;
4374                     pOut->pMipInfo[0].macroBlockOffset = 0;
4375                 }
4376             }
4377         }
4378 
4379         if (returnCode == ADDR_OK)
4380         {
4381             pOut->pitch          = pitch;
4382             pOut->height         = actualHeight;
4383             pOut->numSlices      = pIn->numSlices;
4384             pOut->sliceSize      = sliceSize;
4385             pOut->surfSize       = sliceSize * pOut->numSlices;
4386             pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4387             pOut->blockWidth     = pitchAlign;
4388             pOut->blockHeight    = 1;
4389             pOut->blockSlices    = 1;
4390 
4391             // Following members are useless on GFX10
4392             pOut->mipChainPitch  = 0;
4393             pOut->mipChainHeight = 0;
4394             pOut->mipChainSlice  = 0;
4395             pOut->epitchIsHeight = FALSE;
4396 
4397             // Post calculation validate
4398             ADDR_ASSERT(pOut->sliceSize > 0);
4399         }
4400     }
4401 
4402     return returnCode;
4403 }
4404 
4405 } // V2
4406 } // Addr
4407