• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ************************************************************************************************************************
3 *
4 *  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE
23 *
24 ***********************************************************************************************************************/
25 
26 /**
27 ************************************************************************************************************************
28 * @file  gfx10addrlib.cpp
29 * @brief Contain the implementation for the Gfx10Lib class.
30 ************************************************************************************************************************
31 */
32 
33 #include "gfx10addrlib.h"
34 #include "gfx10_gb_reg.h"
35 
36 #include "amdgpu_asic_addr.h"
37 
38 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40 
41 namespace Addr
42 {
43 /**
44 ************************************************************************************************************************
45 *   Gfx10HwlInit
46 *
47 *   @brief
48 *       Creates an Gfx10Lib object.
49 *
50 *   @return
51 *       Returns an Gfx10Lib object pointer.
52 ************************************************************************************************************************
53 */
Gfx10HwlInit(const Client * pClient)54 Addr::Lib* Gfx10HwlInit(const Client* pClient)
55 {
56     return V2::Gfx10Lib::CreateObj(pClient);
57 }
58 
59 namespace V2
60 {
61 
62 ////////////////////////////////////////////////////////////////////////////////////////////////////
63 //                               Static Const Member
64 ////////////////////////////////////////////////////////////////////////////////////////////////////
65 
66 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
67 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
68     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR
69     {{0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_S
70     {{0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_D
71     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
72 
73     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
74     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_S
75     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_D
76     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
77 
78     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
79     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_S
80     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_D
81     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
82 
83     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
84     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
85     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
86     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
87 
88     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
89     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_S_T
90     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_D_T
91     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
92 
93     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
94     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_S_X
95     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_D_X
96     {{0,    0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_4KB_R_X
97 
98     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_Z_X
99     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_S_X
100     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_D_X
101     {{0,    0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_64KB_R_X
102 
103     {{0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_VAR_Z_X
104     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
105     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
106     {{0,    0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_VAR_R_X
107     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR_GENERAL
108 };
109 
110 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
111 
112 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
113 const Dim3d Gfx10Lib::Block4K_Log2_3d[]  = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
114 
115 /**
116 ************************************************************************************************************************
117 *   Gfx10Lib::Gfx10Lib
118 *
119 *   @brief
120 *       Constructor
121 *
122 ************************************************************************************************************************
123 */
Gfx10Lib(const Client * pClient)124 Gfx10Lib::Gfx10Lib(const Client* pClient)
125     :
126     Lib(pClient),
127     m_numPkrLog2(0),
128     m_numSaLog2(0),
129     m_colorBaseIndex(0),
130     m_xmaskBaseIndex(0),
131     m_dccBaseIndex(0)
132 {
133     memset(&m_settings, 0, sizeof(m_settings));
134     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
135 }
136 
137 /**
138 ************************************************************************************************************************
139 *   Gfx10Lib::~Gfx10Lib
140 *
141 *   @brief
142 *       Destructor
143 ************************************************************************************************************************
144 */
~Gfx10Lib()145 Gfx10Lib::~Gfx10Lib()
146 {
147 }
148 
149 /**
150 ************************************************************************************************************************
151 *   Gfx10Lib::HwlComputeHtileInfo
152 *
153 *   @brief
154 *       Interface function stub of AddrComputeHtilenfo
155 *
156 *   @return
157 *       ADDR_E_RETURNCODE
158 ************************************************************************************************************************
159 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const160 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
161     const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,    ///< [in] input structure
162     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut    ///< [out] output structure
163     ) const
164 {
165     ADDR_E_RETURNCODE ret = ADDR_OK;
166 
167     if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
168          ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
169         (pIn->hTileFlags.pipeAligned != TRUE))
170     {
171         ret = ADDR_INVALIDPARAMS;
172     }
173     else
174     {
175         Dim3d         metaBlk     = {};
176         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
177                                                    ADDR_RSRC_TEX_2D,
178                                                    pIn->swizzleMode,
179                                                    0,
180                                                    0,
181                                                    TRUE,
182                                                    &metaBlk);
183 
184         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
185         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
186         pOut->baseAlign     = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
187         pOut->metaBlkWidth  = metaBlk.w;
188         pOut->metaBlkHeight = metaBlk.h;
189 
190         if (pIn->numMipLevels > 1)
191         {
192             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
193 
194             UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
195 
196             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
197             {
198                 UINT_32 mipWidth, mipHeight;
199 
200                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
201 
202                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
203                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
204 
205                 const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
206                 const UINT_32 heightInM    = mipHeight / metaBlk.h;
207                 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
208 
209                 if (pOut->pMipInfo != NULL)
210                 {
211                     pOut->pMipInfo[i].inMiptail = FALSE;
212                     pOut->pMipInfo[i].offset    = offset;
213                     pOut->pMipInfo[i].sliceSize = mipSliceSize;
214                 }
215 
216                 offset += mipSliceSize;
217             }
218 
219             pOut->sliceSize          = offset;
220             pOut->metaBlkNumPerSlice = offset / metaBlkSize;
221             pOut->htileBytes         = pOut->sliceSize * pIn->numSlices;
222 
223             if (pOut->pMipInfo != NULL)
224             {
225                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
226                 {
227                     pOut->pMipInfo[i].inMiptail = TRUE;
228                     pOut->pMipInfo[i].offset    = 0;
229                     pOut->pMipInfo[i].sliceSize = 0;
230                 }
231 
232                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
233                 {
234                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
235                 }
236             }
237         }
238         else
239         {
240             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
241             const UINT_32 heightInM = pOut->height / metaBlk.h;
242 
243             pOut->metaBlkNumPerSlice    = pitchInM * heightInM;
244             pOut->sliceSize             = pOut->metaBlkNumPerSlice * metaBlkSize;
245             pOut->htileBytes            = pOut->sliceSize * pIn->numSlices;
246 
247             if (pOut->pMipInfo != NULL)
248             {
249                 pOut->pMipInfo[0].inMiptail = FALSE;
250                 pOut->pMipInfo[0].offset    = 0;
251                 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
252             }
253         }
254 
255         // Get the HTILE address equation (copied from HtileAddrFromCoord).
256         // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
257         const UINT_32 index = m_xmaskBaseIndex;
258         const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
259 
260         ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
261         pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];
262     }
263 
264     return ret;
265 }
266 
267 /**
268 ************************************************************************************************************************
269 *   Gfx10Lib::HwlComputeCmaskInfo
270 *
271 *   @brief
272 *       Interface function stub of AddrComputeCmaskInfo
273 *
274 *   @return
275 *       ADDR_E_RETURNCODE
276 ************************************************************************************************************************
277 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const278 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
279     const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,    ///< [in] input structure
280     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut    ///< [out] output structure
281     ) const
282 {
283     ADDR_E_RETURNCODE ret = ADDR_OK;
284 
285     if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
286         (pIn->cMaskFlags.pipeAligned != TRUE)   ||
287         ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
288          ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
289     {
290         ret = ADDR_INVALIDPARAMS;
291     }
292     else
293     {
294         Dim3d         metaBlk     = {};
295         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
296                                                    ADDR_RSRC_TEX_2D,
297                                                    pIn->swizzleMode,
298                                                    0,
299                                                    0,
300                                                    TRUE,
301                                                    &metaBlk);
302 
303         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
304         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
305         pOut->baseAlign     = metaBlkSize;
306         pOut->metaBlkWidth  = metaBlk.w;
307         pOut->metaBlkHeight = metaBlk.h;
308 
309         if (pIn->numMipLevels > 1)
310         {
311             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
312 
313             UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
314 
315             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
316             {
317                 UINT_32 mipWidth, mipHeight;
318 
319                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
320 
321                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
322                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
323 
324                 const UINT_32 pitchInM  = mipWidth  / metaBlk.w;
325                 const UINT_32 heightInM = mipHeight / metaBlk.h;
326 
327                 if (pOut->pMipInfo != NULL)
328                 {
329                     pOut->pMipInfo[i].inMiptail = FALSE;
330                     pOut->pMipInfo[i].offset    = metaBlkPerSlice * metaBlkSize;
331                     pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
332                 }
333 
334                 metaBlkPerSlice += pitchInM * heightInM;
335             }
336 
337             pOut->metaBlkNumPerSlice = metaBlkPerSlice;
338 
339             if (pOut->pMipInfo != NULL)
340             {
341                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
342                 {
343                     pOut->pMipInfo[i].inMiptail = TRUE;
344                     pOut->pMipInfo[i].offset    = 0;
345                     pOut->pMipInfo[i].sliceSize = 0;
346                 }
347 
348                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
349                 {
350                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
351                 }
352             }
353         }
354         else
355         {
356             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
357             const UINT_32 heightInM = pOut->height / metaBlk.h;
358 
359             pOut->metaBlkNumPerSlice = pitchInM * heightInM;
360 
361             if (pOut->pMipInfo != NULL)
362             {
363                 pOut->pMipInfo[0].inMiptail = FALSE;
364                 pOut->pMipInfo[0].offset    = 0;
365                 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
366             }
367         }
368 
369         pOut->sliceSize  = pOut->metaBlkNumPerSlice * metaBlkSize;
370         pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
371 
372         // Get the CMASK address equation (copied from CmaskAddrFromCoord)
373         const UINT_32  fmaskBpp      = GetFmaskBpp(1, 1);
374         const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
375         const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
376         const UINT_8*  patIdxTable   =
377             (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
378             (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
379 
380         ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
381         pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]];
382     }
383 
384     return ret;
385 }
386 
387 /**
388 ************************************************************************************************************************
389 *   Gfx10Lib::HwlComputeDccInfo
390 *
391 *   @brief
392 *       Interface function to compute DCC key info
393 *
394 *   @return
395 *       ADDR_E_RETURNCODE
396 ************************************************************************************************************************
397 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const398 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
399     const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,    ///< [in] input structure
400     ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut    ///< [out] output structure
401     ) const
402 {
403     ADDR_E_RETURNCODE ret = ADDR_OK;
404 
405     if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
406     {
407         // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
408         // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
409         ret = ADDR_INVALIDPARAMS;
410     }
411     else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
412     {
413         // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
414         ret = ADDR_INVALIDPARAMS;
415     }
416     else
417     {
418         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
419 
420         {
421             // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
422             ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
423 
424             const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
425 
426             pOut->compressBlkWidth  = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
427             pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
428             pOut->compressBlkDepth  = isThick ? Block256_3d[elemLog2].d : 1;
429         }
430 
431         if (ret == ADDR_OK)
432         {
433             Dim3d         metaBlk     = {};
434             const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
435             const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
436                                                        pIn->resourceType,
437                                                        pIn->swizzleMode,
438                                                        elemLog2,
439                                                        numFragLog2,
440                                                        pIn->dccKeyFlags.pipeAligned,
441                                                        &metaBlk);
442 
443             pOut->dccRamBaseAlign   = metaBlkSize;
444             pOut->metaBlkWidth      = metaBlk.w;
445             pOut->metaBlkHeight     = metaBlk.h;
446             pOut->metaBlkDepth      = metaBlk.d;
447             pOut->metaBlkSize       = metaBlkSize;
448 
449             pOut->pitch             = PowTwoAlign(pIn->unalignedWidth,     metaBlk.w);
450             pOut->height            = PowTwoAlign(pIn->unalignedHeight,    metaBlk.h);
451             pOut->depth             = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
452 
453             if (pIn->numMipLevels > 1)
454             {
455                 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
456 
457                 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
458 
459                 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
460                 {
461                     UINT_32 mipWidth, mipHeight;
462 
463                     GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
464 
465                     mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
466                     mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
467 
468                     const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
469                     const UINT_32 heightInM    = mipHeight / metaBlk.h;
470                     const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
471 
472                     if (pOut->pMipInfo != NULL)
473                     {
474                         pOut->pMipInfo[i].inMiptail = FALSE;
475                         pOut->pMipInfo[i].offset    = offset;
476                         pOut->pMipInfo[i].sliceSize = mipSliceSize;
477                     }
478 
479                     offset += mipSliceSize;
480                 }
481 
482                 pOut->dccRamSliceSize    = offset;
483                 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
484                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
485 
486                 if (pOut->pMipInfo != NULL)
487                 {
488                     for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
489                     {
490                         pOut->pMipInfo[i].inMiptail = TRUE;
491                         pOut->pMipInfo[i].offset    = 0;
492                         pOut->pMipInfo[i].sliceSize = 0;
493                     }
494 
495                     if (pIn->firstMipIdInTail != pIn->numMipLevels)
496                     {
497                         pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
498                     }
499                 }
500             }
501             else
502             {
503                 const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
504                 const UINT_32 heightInM = pOut->height / metaBlk.h;
505 
506                 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
507                 pOut->dccRamSliceSize    = pOut->metaBlkNumPerSlice * metaBlkSize;
508                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
509 
510                 if (pOut->pMipInfo != NULL)
511                 {
512                     pOut->pMipInfo[0].inMiptail = FALSE;
513                     pOut->pMipInfo[0].offset    = 0;
514                     pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
515                 }
516             }
517 
518             // Get the DCC address equation (copied from DccAddrFromCoord)
519             const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
520             const UINT_32 numPipeLog2 = m_pipesLog2;
521             UINT_32       index       = m_dccBaseIndex + elemLog2;
522             const UINT_8* patIdxTable;
523 
524             if (m_settings.supportRbPlus)
525             {
526                 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
527 
528                 if (pIn->dccKeyFlags.pipeAligned)
529                 {
530                     index += MaxNumOfBpp;
531 
532                     if (m_numPkrLog2 < 2)
533                     {
534                         index += m_pipesLog2 * MaxNumOfBpp;
535                     }
536                     else
537                     {
538                         // 4 groups for "m_numPkrLog2 < 2" case
539                         index += 4 * MaxNumOfBpp;
540 
541                         const UINT_32 dccPipePerPkr = 3;
542 
543                         index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
544                                  (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
545                     }
546                 }
547             }
548             else
549             {
550                 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
551 
552                 if (pIn->dccKeyFlags.pipeAligned)
553                 {
554                     index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
555                 }
556                 else
557                 {
558                     index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
559                 }
560             }
561 
562             ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
563             pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];
564         }
565     }
566 
567     return ret;
568 }
569 
570 /**
571 ************************************************************************************************************************
572 *   Gfx10Lib::HwlComputeCmaskAddrFromCoord
573 *
574 *   @brief
575 *       Interface function stub of AddrComputeCmaskAddrFromCoord
576 *
577 *   @return
578 *       ADDR_E_RETURNCODE
579 ************************************************************************************************************************
580 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)581 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
582     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
583     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
584 {
585     // Only support pipe aligned CMask
586     ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
587 
588     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
589     input.size            = sizeof(input);
590     input.cMaskFlags      = pIn->cMaskFlags;
591     input.colorFlags      = pIn->colorFlags;
592     input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
593     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
594     input.numSlices       = Max(pIn->numSlices,       1u);
595     input.swizzleMode     = pIn->swizzleMode;
596     input.resourceType    = pIn->resourceType;
597 
598     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
599     output.size = sizeof(output);
600 
601     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
602 
603     if (returnCode == ADDR_OK)
604     {
605         const UINT_32  fmaskBpp      = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
606         const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
607         const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
608         const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
609         const UINT_8*  patIdxTable   =
610             (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
611             (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
612 
613         const UINT_32  blkSizeLog2  = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
614         const UINT_32  blkMask      = (1 << blkSizeLog2) - 1;
615         const UINT_32  blkOffset    = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
616                                                                       blkSizeLog2 + 1, // +1 for nibble offset
617                                                                       pIn->x,
618                                                                       pIn->y,
619                                                                       pIn->slice,
620                                                                       0);
621         const UINT_32 xb       = pIn->x / output.metaBlkWidth;
622         const UINT_32 yb       = pIn->y / output.metaBlkHeight;
623         const UINT_32 pb       = output.pitch / output.metaBlkWidth;
624         const UINT_32 blkIndex = (yb * pb) + xb;
625         const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
626 
627         pOut->addr = (output.sliceSize * pIn->slice) +
628                      (blkIndex * (1 << blkSizeLog2)) +
629                      ((blkOffset >> 1) ^ pipeXor);
630         pOut->bitPosition = (blkOffset & 1) << 2;
631     }
632 
633     return returnCode;
634 }
635 
636 /**
637 ************************************************************************************************************************
638 *   Gfx10Lib::HwlComputeHtileAddrFromCoord
639 *
640 *   @brief
641 *       Interface function stub of AddrComputeHtileAddrFromCoord
642 *
643 *   @return
644 *       ADDR_E_RETURNCODE
645 ************************************************************************************************************************
646 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)647 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
648     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
649     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
650 {
651     ADDR_E_RETURNCODE returnCode = ADDR_OK;
652 
653     if (pIn->numMipLevels > 1)
654     {
655         returnCode = ADDR_NOTIMPLEMENTED;
656     }
657     else
658     {
659         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
660         input.size            = sizeof(input);
661         input.hTileFlags      = pIn->hTileFlags;
662         input.depthFlags      = pIn->depthflags;
663         input.swizzleMode     = pIn->swizzleMode;
664         input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
665         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
666         input.numSlices       = Max(pIn->numSlices,       1u);
667         input.numMipLevels    = 1;
668 
669         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
670         output.size = sizeof(output);
671 
672         returnCode = ComputeHtileInfo(&input, &output);
673 
674         if (returnCode == ADDR_OK)
675         {
676             const UINT_32  numSampleLog2 = Log2(pIn->numSamples);
677             const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
678             const UINT_32  index         = m_xmaskBaseIndex + numSampleLog2;
679             const UINT_8*  patIdxTable   = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
680 
681             const UINT_32  blkSizeLog2   = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
682             const UINT_32  blkMask       = (1 << blkSizeLog2) - 1;
683             const UINT_32  blkOffset     = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
684                                                                            blkSizeLog2 + 1, // +1 for nibble offset
685                                                                            pIn->x,
686                                                                            pIn->y,
687                                                                            pIn->slice,
688                                                                            0);
689             const UINT_32 xb       = pIn->x / output.metaBlkWidth;
690             const UINT_32 yb       = pIn->y / output.metaBlkHeight;
691             const UINT_32 pb       = output.pitch / output.metaBlkWidth;
692             const UINT_32 blkIndex = (yb * pb) + xb;
693             const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
694 
695             pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
696                          (blkIndex * (1 << blkSizeLog2)) +
697                          ((blkOffset >> 1) ^ pipeXor);
698         }
699     }
700 
701     return returnCode;
702 }
703 
704 /**
705 ************************************************************************************************************************
706 *   Gfx10Lib::HwlComputeHtileCoordFromAddr
707 *
708 *   @brief
709 *       Interface function stub of AddrComputeHtileCoordFromAddr
710 *
711 *   @return
712 *       ADDR_E_RETURNCODE
713 ************************************************************************************************************************
714 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)715 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
716     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
717     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)   ///< [out] output structure
718 {
719     ADDR_NOT_IMPLEMENTED();
720 
721     return ADDR_OK;
722 }
723 
724 /**
725 ************************************************************************************************************************
726 *   Gfx10Lib::HwlSupportComputeDccAddrFromCoord
727 *
728 *   @brief
729 *       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
730 *
731 *   @return
732 *       ADDR_E_RETURNCODE
733 ************************************************************************************************************************
734 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)735 ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(
736     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
737 {
738     ADDR_E_RETURNCODE returnCode = ADDR_OK;
739 
740     if ((pIn->resourceType       != ADDR_RSRC_TEX_2D) ||
741         (pIn->swizzleMode        != ADDR_SW_64KB_R_X) ||
742         (pIn->dccKeyFlags.linear == TRUE)             ||
743         (pIn->numFrags           >  1)                ||
744         (pIn->numMipLevels       >  1)                ||
745         (pIn->mipId              >  0))
746     {
747         returnCode = ADDR_NOTSUPPORTED;
748     }
749     else if ((pIn->pitch == 0)         ||
750              (pIn->metaBlkWidth == 0)  ||
751              (pIn->metaBlkHeight == 0) ||
752              (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
753     {
754         returnCode = ADDR_NOTSUPPORTED;
755     }
756 
757     return returnCode;
758 }
759 
760 /**
761 ************************************************************************************************************************
762 *   Gfx10Lib::HwlComputeDccAddrFromCoord
763 *
764 *   @brief
765 *       Interface function stub of AddrComputeDccAddrFromCoord
766 *
767 *   @return
768 *       N/A
769 ************************************************************************************************************************
770 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)771 VOID Gfx10Lib::HwlComputeDccAddrFromCoord(
772     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,  ///< [in] input structure
773     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] output structure
774 {
775     const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
776     const UINT_32 numPipeLog2 = m_pipesLog2;
777     const UINT_32 pipeMask    = (1 << numPipeLog2) - 1;
778     UINT_32       index       = m_dccBaseIndex + elemLog2;
779     const UINT_8* patIdxTable;
780 
781     if (m_settings.supportRbPlus)
782     {
783         patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
784 
785         if (pIn->dccKeyFlags.pipeAligned)
786         {
787             index += MaxNumOfBpp;
788 
789             if (m_numPkrLog2 < 2)
790             {
791                 index += m_pipesLog2 * MaxNumOfBpp;
792             }
793             else
794             {
795                 // 4 groups for "m_numPkrLog2 < 2" case
796                 index += 4 * MaxNumOfBpp;
797 
798                 const UINT_32 dccPipePerPkr = 3;
799 
800                 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
801                          (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
802             }
803         }
804     }
805     else
806     {
807         patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
808 
809         if (pIn->dccKeyFlags.pipeAligned)
810         {
811             index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
812         }
813         else
814         {
815             index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
816         }
817     }
818 
819     const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
820     const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
821     const UINT_32  blkOffset   =
822         ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
823                                         blkSizeLog2 + 1, // +1 for nibble offset
824                                         pIn->x,
825                                         pIn->y,
826                                         pIn->slice,
827                                         0);
828     const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
829     const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
830     const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
831     const UINT_32 blkIndex = (yb * pb) + xb;
832     const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
833 
834     pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
835                  (blkIndex * (1 << blkSizeLog2)) +
836                  ((blkOffset >> 1) ^ pipeXor);
837 }
838 
839 /**
840 ************************************************************************************************************************
841 *   Gfx10Lib::HwlInitGlobalParams
842 *
843 *   @brief
844 *       Initializes global parameters
845 *
846 *   @return
847 *       TRUE if all settings are valid
848 *
849 ************************************************************************************************************************
850 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)851 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
852     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
853 {
854     BOOL_32              valid = TRUE;
855     GB_ADDR_CONFIG_GFX10 gbAddrConfig;
856 
857     gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
858 
859     // These values are copied from CModel code
860     switch (gbAddrConfig.bits.NUM_PIPES)
861     {
862         case ADDR_CONFIG_1_PIPE:
863             m_pipes     = 1;
864             m_pipesLog2 = 0;
865             break;
866         case ADDR_CONFIG_2_PIPE:
867             m_pipes     = 2;
868             m_pipesLog2 = 1;
869             break;
870         case ADDR_CONFIG_4_PIPE:
871             m_pipes     = 4;
872             m_pipesLog2 = 2;
873             break;
874         case ADDR_CONFIG_8_PIPE:
875             m_pipes     = 8;
876             m_pipesLog2 = 3;
877             break;
878         case ADDR_CONFIG_16_PIPE:
879             m_pipes     = 16;
880             m_pipesLog2 = 4;
881             break;
882         case ADDR_CONFIG_32_PIPE:
883             m_pipes     = 32;
884             m_pipesLog2 = 5;
885             break;
886         case ADDR_CONFIG_64_PIPE:
887             m_pipes     = 64;
888             m_pipesLog2 = 6;
889             break;
890         default:
891             ADDR_ASSERT_ALWAYS();
892             valid = FALSE;
893             break;
894     }
895 
896     switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
897     {
898         case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
899             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
900             m_pipeInterleaveLog2  = 8;
901             break;
902         case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
903             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
904             m_pipeInterleaveLog2  = 9;
905             break;
906         case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
907             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
908             m_pipeInterleaveLog2  = 10;
909             break;
910         case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
911             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
912             m_pipeInterleaveLog2  = 11;
913             break;
914         default:
915             ADDR_ASSERT_ALWAYS();
916             valid = FALSE;
917             break;
918     }
919 
920     // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
921     // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
922     // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
923     ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
924 
925     switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
926     {
927         case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
928             m_maxCompFrag     = 1;
929             m_maxCompFragLog2 = 0;
930             break;
931         case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
932             m_maxCompFrag     = 2;
933             m_maxCompFragLog2 = 1;
934             break;
935         case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
936             m_maxCompFrag     = 4;
937             m_maxCompFragLog2 = 2;
938             break;
939         case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
940             m_maxCompFrag     = 8;
941             m_maxCompFragLog2 = 3;
942             break;
943         default:
944             ADDR_ASSERT_ALWAYS();
945             valid = FALSE;
946             break;
947     }
948 
949     {
950         // Skip unaligned case
951         m_xmaskBaseIndex += MaxNumOfAA;
952 
953         m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA;
954         m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
955 
956         if (m_settings.supportRbPlus)
957         {
958             m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
959             m_numSaLog2  = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
960 
961             ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
962 
963             ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
964                           sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
965 
966             if (m_numPkrLog2 >= 2)
967             {
968                 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
969                 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
970             }
971         }
972         else
973         {
974             const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
975                                         static_cast<UINT_32>(ADDR_CONFIG_1_PIPE)  +
976                                         1;
977 
978             ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
979 
980             ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) ==
981                           sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]));
982         }
983     }
984 
985     if (m_settings.supportRbPlus)
986     {
987         // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
988         // corresponding SW_64KB_* mode
989         m_blockVarSizeLog2 = m_pipesLog2 + 14;
990     }
991 
992     if (valid)
993     {
994         InitEquationTable();
995     }
996 
997     return valid;
998 }
999 
1000 /**
1001 ************************************************************************************************************************
1002 *   Gfx10Lib::HwlConvertChipFamily
1003 *
1004 *   @brief
1005 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1006 *   @return
1007 *       ChipFamily
1008 ************************************************************************************************************************
1009 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)1010 ChipFamily Gfx10Lib::HwlConvertChipFamily(
1011     UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
1012     UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1013 {
1014     ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
1015 
1016     m_settings.dccUnsup3DSwDis  = 1;
1017     m_settings.dsMipmapHtileFix = 1;
1018 
1019     switch (chipFamily)
1020     {
1021         case FAMILY_NV:
1022             if (ASICREV_IS_NAVI10_P(chipRevision))
1023             {
1024                 m_settings.dsMipmapHtileFix = 0;
1025                 m_settings.isDcn20          = 1;
1026             }
1027 
1028             if (ASICREV_IS_NAVI12_P(chipRevision))
1029             {
1030                 m_settings.isDcn20 = 1;
1031             }
1032 
1033             if (ASICREV_IS_NAVI14_M(chipRevision))
1034             {
1035                 m_settings.isDcn20 = 1;
1036             }
1037 
1038             if (ASICREV_IS_NAVI21_M(chipRevision))
1039             {
1040                 m_settings.supportRbPlus   = 1;
1041                 m_settings.dccUnsup3DSwDis = 0;
1042             }
1043 
1044             if (ASICREV_IS_NAVI22_P(chipRevision))
1045             {
1046                 m_settings.supportRbPlus   = 1;
1047                 m_settings.dccUnsup3DSwDis = 0;
1048             }
1049 
1050             if (ASICREV_IS_NAVI23_P(chipRevision))
1051             {
1052                 m_settings.supportRbPlus   = 1;
1053                 m_settings.dccUnsup3DSwDis = 0;
1054             }
1055 
1056             if (ASICREV_IS_NAVI24_P(chipRevision))
1057             {
1058                 m_settings.supportRbPlus   = 1;
1059                 m_settings.dccUnsup3DSwDis = 0;
1060             }
1061             break;
1062 
1063         case FAMILY_VGH:
1064             if (ASICREV_IS_VANGOGH(chipRevision))
1065             {
1066                 m_settings.supportRbPlus   = 1;
1067                 m_settings.dccUnsup3DSwDis = 0;
1068             }
1069             else
1070             {
1071                 ADDR_ASSERT(!"Unknown chip revision");
1072             }
1073             break;
1074 
1075         case FAMILY_RMB:
1076             if (ASICREV_IS_REMBRANDT(chipRevision))
1077             {
1078                 m_settings.supportRbPlus   = 1;
1079                 m_settings.dccUnsup3DSwDis = 0;
1080             }
1081             else
1082             {
1083                 ADDR_ASSERT(!"Unknown chip revision");
1084             }
1085             break;
1086         case FAMILY_GC_10_3_6:
1087             if (ASICREV_IS_GFX1036(chipRevision))
1088             {
1089                 m_settings.supportRbPlus   = 1;
1090                 m_settings.dccUnsup3DSwDis = 0;
1091             }
1092             break;
1093         case FAMILY_GC_10_3_7:
1094             if (ASICREV_IS_GFX1037(chipRevision))
1095             {
1096                 m_settings.supportRbPlus   = 1;
1097                 m_settings.dccUnsup3DSwDis = 0;
1098             }
1099             else
1100             {
1101                 ADDR_ASSERT(!"Unknown chip revision");
1102             }
1103             break;
1104         default:
1105             ADDR_ASSERT(!"Unknown chip family");
1106             break;
1107     }
1108 
1109     m_configFlags.use32bppFor422Fmt = TRUE;
1110 
1111     return family;
1112 }
1113 
1114 /**
1115 ************************************************************************************************************************
1116 *   Gfx10Lib::GetBlk256SizeLog2
1117 *
1118 *   @brief
1119 *       Get block 256 size
1120 *
1121 *   @return
1122 *       N/A
1123 ************************************************************************************************************************
1124 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1125 void Gfx10Lib::GetBlk256SizeLog2(
1126     AddrResourceType resourceType,      ///< [in] Resource type
1127     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1128     UINT_32          elemLog2,          ///< [in] element size log2
1129     UINT_32          numSamplesLog2,    ///< [in] number of samples
1130     Dim3d*           pBlock             ///< [out] block size
1131     ) const
1132 {
1133     if (IsThin(resourceType, swizzleMode))
1134     {
1135         UINT_32 blockBits = 8 - elemLog2;
1136 
1137         if (IsZOrderSwizzle(swizzleMode))
1138         {
1139             blockBits -= numSamplesLog2;
1140         }
1141 
1142         pBlock->w = (blockBits >> 1) + (blockBits & 1);
1143         pBlock->h = (blockBits >> 1);
1144         pBlock->d = 0;
1145     }
1146     else
1147     {
1148         ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1149 
1150         UINT_32 blockBits = 8 - elemLog2;
1151 
1152         pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1153         pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1154         pBlock->h = (blockBits / 3);
1155     }
1156 }
1157 
1158 /**
1159 ************************************************************************************************************************
1160 *   Gfx10Lib::GetCompressedBlockSizeLog2
1161 *
1162 *   @brief
1163 *       Get compress block size
1164 *
1165 *   @return
1166 *       N/A
1167 ************************************************************************************************************************
1168 */
GetCompressedBlockSizeLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1169 void Gfx10Lib::GetCompressedBlockSizeLog2(
1170     Gfx10DataType    dataType,          ///< [in] Data type
1171     AddrResourceType resourceType,      ///< [in] Resource type
1172     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1173     UINT_32          elemLog2,          ///< [in] element size log2
1174     UINT_32          numSamplesLog2,    ///< [in] number of samples
1175     Dim3d*           pBlock             ///< [out] block size
1176     ) const
1177 {
1178     if (dataType == Gfx10DataColor)
1179     {
1180         GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1181     }
1182     else
1183     {
1184         ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1185         pBlock->w = 3;
1186         pBlock->h = 3;
1187         pBlock->d = 0;
1188     }
1189 }
1190 
1191 /**
1192 ************************************************************************************************************************
1193 *   Gfx10Lib::GetMetaOverlapLog2
1194 *
1195 *   @brief
1196 *       Get meta block overlap
1197 *
1198 *   @return
1199 *       N/A
1200 ************************************************************************************************************************
1201 */
GetMetaOverlapLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const1202 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1203     Gfx10DataType    dataType,          ///< [in] Data type
1204     AddrResourceType resourceType,      ///< [in] Resource type
1205     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1206     UINT_32          elemLog2,          ///< [in] element size log2
1207     UINT_32          numSamplesLog2     ///< [in] number of samples
1208     ) const
1209 {
1210     Dim3d compBlock;
1211     Dim3d microBlock;
1212 
1213     GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1214     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1215 
1216     const INT_32 compSizeLog2   = compBlock.w  + compBlock.h  + compBlock.d;
1217     const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1218     const INT_32 maxSizeLog2    = Max(compSizeLog2, blk256SizeLog2);
1219     const INT_32 numPipesLog2   = GetEffectiveNumPipes();
1220     INT_32       overlap        = numPipesLog2 - maxSizeLog2;
1221 
1222     if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1223     {
1224         overlap++;
1225     }
1226 
1227     // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1228     if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1229     {
1230         overlap--;
1231     }
1232     overlap = Max(overlap, 0);
1233     return overlap;
1234 }
1235 
1236 /**
1237 ************************************************************************************************************************
1238 *   Gfx10Lib::Get3DMetaOverlapLog2
1239 *
1240 *   @brief
1241 *       Get 3d meta block overlap
1242 *
1243 *   @return
1244 *       N/A
1245 ************************************************************************************************************************
1246 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const1247 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1248     AddrResourceType resourceType,      ///< [in] Resource type
1249     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1250     UINT_32          elemLog2           ///< [in] element size log2
1251     ) const
1252 {
1253     Dim3d microBlock;
1254     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1255 
1256     INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1257 
1258     if (m_settings.supportRbPlus)
1259     {
1260         overlap++;
1261     }
1262 
1263     if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1264     {
1265         overlap = 0;
1266     }
1267     return overlap;
1268 }
1269 
1270 /**
1271 ************************************************************************************************************************
1272 *   Gfx10Lib::GetPipeRotateAmount
1273 *
1274 *   @brief
1275 *       Get pipe rotate amount
1276 *
1277 *   @return
1278 *       Pipe rotate amount
1279 ************************************************************************************************************************
1280 */
1281 
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const1282 INT_32 Gfx10Lib::GetPipeRotateAmount(
1283     AddrResourceType resourceType,      ///< [in] Resource type
1284     AddrSwizzleMode  swizzleMode        ///< [in] Swizzle mode
1285     ) const
1286 {
1287     INT_32 amount = 0;
1288 
1289     if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1290     {
1291         amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1292                  1 : m_pipesLog2 - (m_numSaLog2 + 1);
1293     }
1294 
1295     return amount;
1296 }
1297 
1298 /**
1299 ************************************************************************************************************************
1300 *   Gfx10Lib::GetMetaBlkSize
1301 *
1302 *   @brief
1303 *       Get metadata block size
1304 *
1305 *   @return
1306 *       Meta block size
1307 ************************************************************************************************************************
1308 */
GetMetaBlkSize(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const1309 UINT_32 Gfx10Lib::GetMetaBlkSize(
1310     Gfx10DataType    dataType,          ///< [in] Data type
1311     AddrResourceType resourceType,      ///< [in] Resource type
1312     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1313     UINT_32          elemLog2,          ///< [in] element size log2
1314     UINT_32          numSamplesLog2,    ///< [in] number of samples
1315     BOOL_32          pipeAlign,         ///< [in] pipe align
1316     Dim3d*           pBlock             ///< [out] block size
1317     ) const
1318 {
1319     INT_32 metablkSizeLog2;
1320 
1321     {
1322         const INT_32 metaElemSizeLog2   = GetMetaElementSizeLog2(dataType);
1323         const INT_32 metaCacheSizeLog2  = GetMetaCacheSizeLog2(dataType);
1324         const INT_32 compBlkSizeLog2    = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1325         const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1326                                           numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1327         const INT_32 dataBlkSizeLog2    = GetBlockSizeLog2(swizzleMode);
1328         INT_32       numPipesLog2       = m_pipesLog2;
1329 
1330         if (IsThin(resourceType, swizzleMode))
1331         {
1332             if ((pipeAlign == FALSE) ||
1333                 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1334                 (IsDisplaySwizzle(resourceType, swizzleMode)  == TRUE))
1335             {
1336                 if (pipeAlign)
1337                 {
1338                     metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1339                     metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1340                 }
1341                 else
1342                 {
1343                     metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1344                 }
1345             }
1346             else
1347             {
1348                 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1349                 {
1350                     numPipesLog2++;
1351                 }
1352 
1353                 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1354 
1355                 if (numPipesLog2 >= 4)
1356                 {
1357                     INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1358 
1359                     // In 16Bpe 8xaa, we have an extra overlap bit
1360                     if ((pipeRotateLog2 > 0)  &&
1361                         (elemLog2 == 4)       &&
1362                         (numSamplesLog2 == 3) &&
1363                         (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1364                     {
1365                         overlapLog2++;
1366                     }
1367 
1368                     metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1369                     metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1370 
1371                     if (m_settings.supportRbPlus    &&
1372                         IsRtOptSwizzle(swizzleMode) &&
1373                         (numPipesLog2 == 6)         &&
1374                         (numSamplesLog2 == 3)       &&
1375                         (m_maxCompFragLog2 == 3)    &&
1376                         (metablkSizeLog2 < 15))
1377                     {
1378                         metablkSizeLog2 = 15;
1379                     }
1380                 }
1381                 else
1382                 {
1383                     metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1384                 }
1385 
1386                 if (dataType == Gfx10DataDepthStencil)
1387                 {
1388                     // For htile surfaces, pad meta block size to 2K * num_pipes
1389                     metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1390                 }
1391 
1392                 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1393 
1394                 if  (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1395                 {
1396                     const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1397 
1398                     metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1399                 }
1400             }
1401 
1402             const INT_32 metablkBitsLog2 =
1403                 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1404             pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1405             pBlock->h = 1 << (metablkBitsLog2 >> 1);
1406             pBlock->d = 1;
1407         }
1408         else
1409         {
1410             ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1411 
1412             if (pipeAlign)
1413             {
1414                 if (m_settings.supportRbPlus         &&
1415                     (m_pipesLog2 == m_numSaLog2 + 1) &&
1416                     (m_pipesLog2 > 1)                &&
1417                     IsRbAligned(resourceType, swizzleMode))
1418                 {
1419                     numPipesLog2++;
1420                 }
1421 
1422                 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1423 
1424                 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1425                 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1426                 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1427             }
1428             else
1429             {
1430                 metablkSizeLog2 = 12;
1431             }
1432 
1433             const INT_32 metablkBitsLog2 =
1434                 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1435             pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1436             pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1437             pBlock->d = 1 << (metablkBitsLog2 / 3);
1438         }
1439     }
1440 
1441     return (1 << static_cast<UINT_32>(metablkSizeLog2));
1442 }
1443 
1444 /**
1445 ************************************************************************************************************************
1446 *   Gfx10Lib::ConvertSwizzlePatternToEquation
1447 *
1448 *   @brief
1449 *       Convert swizzle pattern to equation.
1450 *
1451 *   @return
1452 *       N/A
1453 ************************************************************************************************************************
1454 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1455 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1456     UINT_32                elemLog2,  ///< [in] element bytes log2
1457     AddrResourceType       rsrcType,  ///< [in] resource type
1458     AddrSwizzleMode        swMode,    ///< [in] swizzle mode
1459     const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern infor
1460     ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
1461     const
1462 {
1463     // Get full swizzle pattern and store it as an ADDR_BIT_SETTING list
1464     ADDR_BIT_SETTING fullSwizzlePattern[20];
1465     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1466 
1467     const ADDR_BIT_SETTING* pSwizzle      = fullSwizzlePattern;
1468     const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode);
1469     pEquation->numBits            = blockSizeLog2;
1470     pEquation->stackedDepthSlices = FALSE;
1471 
1472     for (UINT_32 i = 0; i < elemLog2; i++)
1473     {
1474         pEquation->addr[i].channel = 0;
1475         pEquation->addr[i].valid   = 1;
1476         pEquation->addr[i].index   = i;
1477     }
1478 
1479     if (IsXor(swMode) == FALSE)
1480     {
1481         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1482         {
1483             ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1484 
1485             if (pSwizzle[i].x != 0)
1486             {
1487                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1488 
1489                 pEquation->addr[i].channel = 0;
1490                 pEquation->addr[i].valid   = 1;
1491                 pEquation->addr[i].index   = Log2(pSwizzle[i].x) + elemLog2;
1492             }
1493             else if (pSwizzle[i].y != 0)
1494             {
1495                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1496 
1497                 pEquation->addr[i].channel = 1;
1498                 pEquation->addr[i].valid   = 1;
1499                 pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1500             }
1501             else
1502             {
1503                 ADDR_ASSERT(pSwizzle[i].z != 0);
1504                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1505 
1506                 pEquation->addr[i].channel = 2;
1507                 pEquation->addr[i].valid   = 1;
1508                 pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1509             }
1510 
1511             pEquation->xor1[i].value = 0;
1512             pEquation->xor2[i].value = 0;
1513         }
1514     }
1515     else if (IsThin(rsrcType, swMode))
1516     {
1517         Dim3d dim;
1518         ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1519 
1520         const UINT_32 blkXLog2 = Log2(dim.w);
1521         const UINT_32 blkYLog2 = Log2(dim.h);
1522         const UINT_32 blkXMask = dim.w - 1;
1523         const UINT_32 blkYMask = dim.h - 1;
1524 
1525         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1526         UINT_32          xMask = 0;
1527         UINT_32          yMask = 0;
1528         UINT_32          bMask = (1 << elemLog2) - 1;
1529 
1530         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1531         {
1532             if (IsPow2(pSwizzle[i].value))
1533             {
1534                 if (pSwizzle[i].x != 0)
1535                 {
1536                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1537                     xMask |= pSwizzle[i].x;
1538 
1539                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1540 
1541                     ADDR_ASSERT(xLog2 < blkXLog2);
1542 
1543                     pEquation->addr[i].channel = 0;
1544                     pEquation->addr[i].valid   = 1;
1545                     pEquation->addr[i].index   = xLog2 + elemLog2;
1546                 }
1547                 else
1548                 {
1549                     ADDR_ASSERT(pSwizzle[i].y != 0);
1550                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1551                     yMask |= pSwizzle[i].y;
1552 
1553                     pEquation->addr[i].channel = 1;
1554                     pEquation->addr[i].valid   = 1;
1555                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1556 
1557                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1558                 }
1559 
1560                 swizzle[i].value = 0;
1561                 bMask |= 1 << i;
1562             }
1563             else
1564             {
1565                 if (pSwizzle[i].z != 0)
1566                 {
1567                     ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1568 
1569                     pEquation->xor2[i].channel = 2;
1570                     pEquation->xor2[i].valid   = 1;
1571                     pEquation->xor2[i].index   = Log2(pSwizzle[i].z);
1572                 }
1573 
1574                 swizzle[i].x = pSwizzle[i].x;
1575                 swizzle[i].y = pSwizzle[i].y;
1576                 swizzle[i].z = swizzle[i].s = 0;
1577 
1578                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1579 
1580                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1581 
1582                 if (xHi != 0)
1583                 {
1584                     ADDR_ASSERT(IsPow2(xHi));
1585                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1586 
1587                     pEquation->xor1[i].channel = 0;
1588                     pEquation->xor1[i].valid   = 1;
1589                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1590 
1591                     swizzle[i].x &= blkXMask;
1592                 }
1593 
1594                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1595 
1596                 if (yHi != 0)
1597                 {
1598                     ADDR_ASSERT(IsPow2(yHi));
1599 
1600                     if (xHi == 0)
1601                     {
1602                         ADDR_ASSERT(pEquation->xor1[i].value == 0);
1603                         pEquation->xor1[i].channel = 1;
1604                         pEquation->xor1[i].valid   = 1;
1605                         pEquation->xor1[i].index   = Log2(yHi);
1606                     }
1607                     else
1608                     {
1609                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1610                         pEquation->xor2[i].channel = 1;
1611                         pEquation->xor2[i].valid   = 1;
1612                         pEquation->xor2[i].index   = Log2(yHi);
1613                     }
1614 
1615                     swizzle[i].y &= blkYMask;
1616                 }
1617 
1618                 if (swizzle[i].value == 0)
1619                 {
1620                     bMask |= 1 << i;
1621                 }
1622             }
1623         }
1624 
1625         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1626         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1627 
1628         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1629 
1630         while (bMask != blockMask)
1631         {
1632             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1633             {
1634                 if ((bMask & (1 << i)) == 0)
1635                 {
1636                     if (IsPow2(swizzle[i].value))
1637                     {
1638                         if (swizzle[i].x != 0)
1639                         {
1640                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1641                             xMask |= swizzle[i].x;
1642 
1643                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1644 
1645                             ADDR_ASSERT(xLog2 < blkXLog2);
1646 
1647                             pEquation->addr[i].channel = 0;
1648                             pEquation->addr[i].valid   = 1;
1649                             pEquation->addr[i].index   = xLog2 + elemLog2;
1650                         }
1651                         else
1652                         {
1653                             ADDR_ASSERT(swizzle[i].y != 0);
1654                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1655                             yMask |= swizzle[i].y;
1656 
1657                             pEquation->addr[i].channel = 1;
1658                             pEquation->addr[i].valid   = 1;
1659                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1660 
1661                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1662                         }
1663 
1664                         swizzle[i].value = 0;
1665                         bMask |= 1 << i;
1666                     }
1667                     else
1668                     {
1669                         const UINT_32 x = swizzle[i].x & xMask;
1670                         const UINT_32 y = swizzle[i].y & yMask;
1671 
1672                         if (x != 0)
1673                         {
1674                             ADDR_ASSERT(IsPow2(x));
1675 
1676                             if (pEquation->xor1[i].value == 0)
1677                             {
1678                                 pEquation->xor1[i].channel = 0;
1679                                 pEquation->xor1[i].valid   = 1;
1680                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1681                             }
1682                             else
1683                             {
1684                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1685                                 pEquation->xor2[i].channel = 0;
1686                                 pEquation->xor2[i].valid   = 1;
1687                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1688                             }
1689                         }
1690 
1691                         if (y != 0)
1692                         {
1693                             ADDR_ASSERT(IsPow2(y));
1694 
1695                             if (pEquation->xor1[i].value == 0)
1696                             {
1697                                 pEquation->xor1[i].channel = 1;
1698                                 pEquation->xor1[i].valid   = 1;
1699                                 pEquation->xor1[i].index   = Log2(y);
1700                             }
1701                             else
1702                             {
1703                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1704                                 pEquation->xor2[i].channel = 1;
1705                                 pEquation->xor2[i].valid   = 1;
1706                                 pEquation->xor2[i].index   = Log2(y);
1707                             }
1708                         }
1709 
1710                         swizzle[i].x &= ~x;
1711                         swizzle[i].y &= ~y;
1712                     }
1713                 }
1714             }
1715         }
1716 
1717         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1718     }
1719     else
1720     {
1721         const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1722         const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1723         const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1724         const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1725         const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1726         const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1727 
1728         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1729         UINT_32          xMask = 0;
1730         UINT_32          yMask = 0;
1731         UINT_32          zMask = 0;
1732         UINT_32          bMask = (1 << elemLog2) - 1;
1733 
1734         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1735         {
1736             if (IsPow2(pSwizzle[i].value))
1737             {
1738                 if (pSwizzle[i].x != 0)
1739                 {
1740                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1741                     xMask |= pSwizzle[i].x;
1742 
1743                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1744 
1745                     ADDR_ASSERT(xLog2 < blkXLog2);
1746 
1747                     pEquation->addr[i].channel = 0;
1748                     pEquation->addr[i].valid   = 1;
1749                     pEquation->addr[i].index   = xLog2 + elemLog2;
1750                 }
1751                 else if (pSwizzle[i].y != 0)
1752                 {
1753                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1754                     yMask |= pSwizzle[i].y;
1755 
1756                     pEquation->addr[i].channel = 1;
1757                     pEquation->addr[i].valid   = 1;
1758                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1759 
1760                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1761                 }
1762                 else
1763                 {
1764                     ADDR_ASSERT(pSwizzle[i].z != 0);
1765                     ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1766                     zMask |= pSwizzle[i].z;
1767 
1768                     pEquation->addr[i].channel = 2;
1769                     pEquation->addr[i].valid   = 1;
1770                     pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1771 
1772                     ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1773                 }
1774 
1775                 swizzle[i].value = 0;
1776                 bMask |= 1 << i;
1777             }
1778             else
1779             {
1780                 swizzle[i].x = pSwizzle[i].x;
1781                 swizzle[i].y = pSwizzle[i].y;
1782                 swizzle[i].z = pSwizzle[i].z;
1783                 swizzle[i].s = 0;
1784 
1785                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1786 
1787                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1788                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1789                 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1790 
1791                 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1792 
1793                 if (xHi != 0)
1794                 {
1795                     ADDR_ASSERT(IsPow2(xHi));
1796                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1797 
1798                     pEquation->xor1[i].channel = 0;
1799                     pEquation->xor1[i].valid   = 1;
1800                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1801 
1802                     swizzle[i].x &= blkXMask;
1803                 }
1804 
1805                 if (yHi != 0)
1806                 {
1807                     ADDR_ASSERT(IsPow2(yHi));
1808 
1809                     if (pEquation->xor1[i].value == 0)
1810                     {
1811                         pEquation->xor1[i].channel = 1;
1812                         pEquation->xor1[i].valid   = 1;
1813                         pEquation->xor1[i].index   = Log2(yHi);
1814                     }
1815                     else
1816                     {
1817                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1818                         pEquation->xor2[i].channel = 1;
1819                         pEquation->xor2[i].valid   = 1;
1820                         pEquation->xor2[i].index   = Log2(yHi);
1821                     }
1822 
1823                     swizzle[i].y &= blkYMask;
1824                 }
1825 
1826                 if (zHi != 0)
1827                 {
1828                     ADDR_ASSERT(IsPow2(zHi));
1829 
1830                     if (pEquation->xor1[i].value == 0)
1831                     {
1832                         pEquation->xor1[i].channel = 2;
1833                         pEquation->xor1[i].valid   = 1;
1834                         pEquation->xor1[i].index   = Log2(zHi);
1835                     }
1836                     else
1837                     {
1838                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1839                         pEquation->xor2[i].channel = 2;
1840                         pEquation->xor2[i].valid   = 1;
1841                         pEquation->xor2[i].index   = Log2(zHi);
1842                     }
1843 
1844                     swizzle[i].z &= blkZMask;
1845                 }
1846 
1847                 if (swizzle[i].value == 0)
1848                 {
1849                     bMask |= 1 << i;
1850                 }
1851             }
1852         }
1853 
1854         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1855         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1856 
1857         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1858 
1859         while (bMask != blockMask)
1860         {
1861             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1862             {
1863                 if ((bMask & (1 << i)) == 0)
1864                 {
1865                     if (IsPow2(swizzle[i].value))
1866                     {
1867                         if (swizzle[i].x != 0)
1868                         {
1869                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1870                             xMask |= swizzle[i].x;
1871 
1872                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1873 
1874                             ADDR_ASSERT(xLog2 < blkXLog2);
1875 
1876                             pEquation->addr[i].channel = 0;
1877                             pEquation->addr[i].valid   = 1;
1878                             pEquation->addr[i].index   = xLog2 + elemLog2;
1879                         }
1880                         else if (swizzle[i].y != 0)
1881                         {
1882                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1883                             yMask |= swizzle[i].y;
1884 
1885                             pEquation->addr[i].channel = 1;
1886                             pEquation->addr[i].valid   = 1;
1887                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1888 
1889                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1890                         }
1891                         else
1892                         {
1893                             ADDR_ASSERT(swizzle[i].z != 0);
1894                             ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1895                             zMask |= swizzle[i].z;
1896 
1897                             pEquation->addr[i].channel = 2;
1898                             pEquation->addr[i].valid   = 1;
1899                             pEquation->addr[i].index   = Log2(swizzle[i].z);
1900 
1901                             ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1902                         }
1903 
1904                         swizzle[i].value = 0;
1905                         bMask |= 1 << i;
1906                     }
1907                     else
1908                     {
1909                         const UINT_32 x = swizzle[i].x & xMask;
1910                         const UINT_32 y = swizzle[i].y & yMask;
1911                         const UINT_32 z = swizzle[i].z & zMask;
1912 
1913                         if (x != 0)
1914                         {
1915                             ADDR_ASSERT(IsPow2(x));
1916 
1917                             if (pEquation->xor1[i].value == 0)
1918                             {
1919                                 pEquation->xor1[i].channel = 0;
1920                                 pEquation->xor1[i].valid   = 1;
1921                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1922                             }
1923                             else
1924                             {
1925                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1926                                 pEquation->xor2[i].channel = 0;
1927                                 pEquation->xor2[i].valid   = 1;
1928                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1929                             }
1930                         }
1931 
1932                         if (y != 0)
1933                         {
1934                             ADDR_ASSERT(IsPow2(y));
1935 
1936                             if (pEquation->xor1[i].value == 0)
1937                             {
1938                                 pEquation->xor1[i].channel = 1;
1939                                 pEquation->xor1[i].valid   = 1;
1940                                 pEquation->xor1[i].index   = Log2(y);
1941                             }
1942                             else
1943                             {
1944                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1945                                 pEquation->xor2[i].channel = 1;
1946                                 pEquation->xor2[i].valid   = 1;
1947                                 pEquation->xor2[i].index   = Log2(y);
1948                             }
1949                         }
1950 
1951                         if (z != 0)
1952                         {
1953                             ADDR_ASSERT(IsPow2(z));
1954 
1955                             if (pEquation->xor1[i].value == 0)
1956                             {
1957                                 pEquation->xor1[i].channel = 2;
1958                                 pEquation->xor1[i].valid   = 1;
1959                                 pEquation->xor1[i].index   = Log2(z);
1960                             }
1961                             else
1962                             {
1963                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1964                                 pEquation->xor2[i].channel = 2;
1965                                 pEquation->xor2[i].valid   = 1;
1966                                 pEquation->xor2[i].index   = Log2(z);
1967                             }
1968                         }
1969 
1970                         swizzle[i].x &= ~x;
1971                         swizzle[i].y &= ~y;
1972                         swizzle[i].z &= ~z;
1973                     }
1974                 }
1975             }
1976         }
1977 
1978         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1979     }
1980 }
1981 
1982 /**
1983 ************************************************************************************************************************
1984 *   Gfx10Lib::InitEquationTable
1985 *
1986 *   @brief
1987 *       Initialize Equation table.
1988 *
1989 *   @return
1990 *       N/A
1991 ************************************************************************************************************************
1992 */
InitEquationTable()1993 VOID Gfx10Lib::InitEquationTable()
1994 {
1995     memset(m_equationTable, 0, sizeof(m_equationTable));
1996 
1997     // Iterate through resourceTypes, up to MaxRsrcType where a "resourceType" refers to AddrResourceType (1D/2D/3D)
1998     // resources. This starts with rsrcTypeIdx = 0, however there is an offset added that will start us off at
1999     // computing 2D resources.
2000     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2001     {
2002         // Add offset. Start iterating from ADDR_RSRC_TEX_2D
2003         const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2004 
2005         // Iterate through the maximum number of swizzlemodes a type can hold
2006         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2007         {
2008             const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2009 
2010             // Iterate through the different bits-per-pixel settings (8bpp/16bpp/32bpp/64bpp/128bpp)
2011             for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
2012             {
2013                 UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
2014                 // May or may not return a ADDR_SW_PATINFO for a completely different swizzle mode, essentially
2015                 // overwriting the choice.
2016                 const ADDR_SW_PATINFO* pPatInfo      = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
2017 
2018                 if (pPatInfo != NULL)
2019                 {
2020                     ADDR_ASSERT(IsValidSwMode(swMode));
2021 
2022                     if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
2023                     {
2024                         ADDR_EQUATION equation = {};
2025 
2026                         // Passing in pPatInfo to get the addr equation
2027                         ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
2028 
2029                         equationIndex = m_numEquations;
2030                         ADDR_ASSERT(equationIndex < EquationTableSize);
2031                         // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
2032                         m_equationTable[equationIndex] = equation;
2033                         // Increment m_numEquations
2034                         m_numEquations++;
2035                     }
2036                     else // There is no equationIndex
2037                     {
2038                         // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
2039                         ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
2040                         ADDR_ASSERT(rsrcTypeIdx == 1);
2041                         ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
2042                         ADDR_ASSERT(m_settings.supportRbPlus == 1);
2043                     }
2044                 }
2045                 // equationIndex, which is used to look up equations in m_equationTable, will be cached for every
2046                 // iteration in this nested for-loop
2047                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
2048             }
2049         }
2050     }
2051 }
2052 
2053 /**
2054 ************************************************************************************************************************
2055 *   Gfx10Lib::HwlGetEquationIndex
2056 *
2057 *   @brief
2058 *       Interface function stub of GetEquationIndex
2059 *
2060 *   @return
2061 *       ADDR_E_RETURNCODE
2062 ************************************************************************************************************************
2063 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2064 UINT_32 Gfx10Lib::HwlGetEquationIndex(
2065     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
2066     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
2067     ) const
2068 {
2069     UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
2070 
2071     if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
2072         (pIn->resourceType == ADDR_RSRC_TEX_3D))
2073     {
2074         const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
2075         const UINT_32 swModeIdx   = static_cast<UINT_32>(pIn->swizzleMode);
2076         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
2077 
2078         equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
2079     }
2080 
2081     if (pOut->pMipInfo != NULL)
2082     {
2083         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2084         {
2085             pOut->pMipInfo[i].equationIndex = equationIdx;
2086         }
2087     }
2088 
2089     return equationIdx;
2090 }
2091 
2092 /**
2093 ************************************************************************************************************************
2094 *   Gfx10Lib::GetValidDisplaySwizzleModes
2095 *
2096 *   @brief
2097 *       Get valid swizzle modes mask for displayable surface
2098 *
2099 *   @return
2100 *       Valid swizzle modes mask for displayable surface
2101 ************************************************************************************************************************
2102 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const2103 UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
2104     UINT_32 bpp
2105     ) const
2106 {
2107     UINT_32 swModeMask = 0;
2108 
2109     if (bpp <= 64)
2110     {
2111         if (m_settings.isDcn20)
2112         {
2113             swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
2114         }
2115         else
2116         {
2117             swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
2118         }
2119     }
2120 
2121     return swModeMask;
2122 }
2123 
2124 /**
2125 ************************************************************************************************************************
2126 *   Gfx10Lib::IsValidDisplaySwizzleMode
2127 *
2128 *   @brief
2129 *       Check if a swizzle mode is supported by display engine
2130 *
2131 *   @return
2132 *       TRUE is swizzle mode is supported by display engine
2133 ************************************************************************************************************************
2134 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2135 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
2136     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2137     ) const
2138 {
2139     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
2140 
2141     return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
2142 }
2143 
2144 /**
2145 ************************************************************************************************************************
2146 *   Gfx10Lib::GetMaxNumMipsInTail
2147 *
2148 *   @brief
2149 *       Return max number of mips in tails
2150 *
2151 *   @return
2152 *       Max number of mips in tails
2153 ************************************************************************************************************************
2154 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const2155 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
2156     UINT_32 blockSizeLog2,     ///< block size log2
2157     BOOL_32 isThin             ///< is thin or thick
2158     ) const
2159 {
2160     UINT_32 effectiveLog2 = blockSizeLog2;
2161 
2162     if (isThin == FALSE)
2163     {
2164         effectiveLog2 -= (blockSizeLog2 - 8) / 3;
2165     }
2166 
2167     return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
2168 }
2169 
2170 /**
2171 ************************************************************************************************************************
2172 *   Gfx10Lib::HwlComputePipeBankXor
2173 *
2174 *   @brief
2175 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2176 *
2177 *   @return
2178 *       PipeBankXor value
2179 ************************************************************************************************************************
2180 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2181 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2182     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
2183     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
2184     ) const
2185 {
2186     if (IsNonPrtXor(pIn->swizzleMode))
2187     {
2188         const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
2189 
2190         // No pipe xor...
2191         const UINT_32 pipeXor = 0;
2192         UINT_32       bankXor = 0;
2193 
2194         const UINT_32         XorPatternLen = 8;
2195         static const UINT_32  XorBankRot1b[XorPatternLen] = {0,  1,  0,  1,  0,  1,  0,  1};
2196         static const UINT_32  XorBankRot2b[XorPatternLen] = {0,  2,  1,  3,  2,  0,  3,  1};
2197         static const UINT_32  XorBankRot3b[XorPatternLen] = {0,  4,  2,  6,  1,  5,  3,  7};
2198         static const UINT_32  XorBankRot4b[XorPatternLen] = {0,  8,  4, 12,  2, 10,  6, 14};
2199         static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
2200 
2201         switch (bankBits)
2202         {
2203             case 1:
2204             case 2:
2205             case 3:
2206             case 4:
2207                 bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
2208                 break;
2209             default:
2210                 // valid bank bits should be 0~4
2211                 ADDR_ASSERT_ALWAYS();
2212             case 0:
2213                 break;
2214         }
2215 
2216         pOut->pipeBankXor = bankXor | pipeXor;
2217     }
2218     else
2219     {
2220         pOut->pipeBankXor = 0;
2221     }
2222 
2223     return ADDR_OK;
2224 }
2225 
2226 /**
2227 ************************************************************************************************************************
2228 *   Gfx10Lib::HwlComputeSlicePipeBankXor
2229 *
2230 *   @brief
2231 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2232 *
2233 *   @return
2234 *       PipeBankXor value
2235 ************************************************************************************************************************
2236 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const2237 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2238     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
2239     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
2240     ) const
2241 {
2242     if (IsNonPrtXor(pIn->swizzleMode))
2243     {
2244         const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2245         const UINT_32 pipeBits  = GetPipeXorBits(blockBits);
2246         const UINT_32 pipeXor   = ReverseBitVector(pIn->slice, pipeBits);
2247 
2248         pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2249 
2250         if (pIn->bpe != 0)
2251         {
2252             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
2253                                                                     pIn->resourceType,
2254                                                                     Log2(pIn->bpe >> 3),
2255                                                                     1);
2256 
2257             if (pPatInfo != NULL)
2258             {
2259                 ADDR_BIT_SETTING fullSwizzlePattern[20];
2260                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
2261 
2262                 const UINT_32 pipeBankXorOffset =
2263                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
2264                                                     blockBits,
2265                                                     0,
2266                                                     0,
2267                                                     pIn->slice,
2268                                                     0);
2269 
2270                 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
2271 
2272                 // Should have no bit set under pipe interleave
2273                 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
2274 
2275                 // This assertion firing means old approach doesn't calculate a correct sliceXor value...
2276                 ADDR_ASSERT(pipeBankXor == pipeXor);
2277 
2278                 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
2279             }
2280         }
2281     }
2282     else
2283     {
2284         pOut->pipeBankXor = 0;
2285     }
2286 
2287     return ADDR_OK;
2288 }
2289 
2290 /**
2291 ************************************************************************************************************************
2292 *   Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2293 *
2294 *   @brief
2295 *       Compute sub resource offset to support swizzle pattern
2296 *
2297 *   @return
2298 *       Offset
2299 ************************************************************************************************************************
2300 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const2301 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2302     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
2303     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
2304     ) const
2305 {
2306     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2307 
2308     pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2309 
2310     return ADDR_OK;
2311 }
2312 
2313 /**
2314 ************************************************************************************************************************
2315 *   Gfx10Lib::HwlComputeNonBlockCompressedView
2316 *
2317 *   @brief
2318 *       Compute non-block-compressed view for a given mipmap level/slice.
2319 *
2320 *   @return
2321 *       ADDR_E_RETURNCODE
2322 ************************************************************************************************************************
2323 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const2324 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(
2325     const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
2326     ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
2327     ) const
2328 {
2329     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2330 
2331     if (pIn->resourceType != ADDR_RSRC_TEX_2D)
2332     {
2333         // Only 2D resource can have a NonBC view...
2334         returnCode = ADDR_INVALIDPARAMS;
2335     }
2336     else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
2337              ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
2338     {
2339         // Only support BC1~BC7, ASTC, or ETC2 for now...
2340         returnCode = ADDR_NOTSUPPORTED;
2341     }
2342     else
2343     {
2344         UINT_32 bcWidth, bcHeight;
2345         UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
2346 
2347         ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
2348         infoIn.flags        = pIn->flags;
2349         infoIn.swizzleMode  = pIn->swizzleMode;
2350         infoIn.resourceType = pIn->resourceType;
2351         infoIn.bpp          = bpp;
2352         infoIn.width        = RoundUpQuotient(pIn->width, bcWidth);
2353         infoIn.height       = RoundUpQuotient(pIn->height, bcHeight);
2354         infoIn.numSlices    = pIn->numSlices;
2355         infoIn.numMipLevels = pIn->numMipLevels;
2356         infoIn.numSamples   = 1;
2357         infoIn.numFrags     = 1;
2358 
2359         ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
2360 
2361         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
2362         infoOut.pMipInfo = mipInfo;
2363 
2364         const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
2365 
2366         if (tiled)
2367         {
2368             returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
2369         }
2370         else
2371         {
2372             returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
2373         }
2374 
2375         if (returnCode == ADDR_OK)
2376         {
2377             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
2378             subOffIn.swizzleMode      = infoIn.swizzleMode;
2379             subOffIn.resourceType     = infoIn.resourceType;
2380             subOffIn.slice            = pIn->slice;
2381             subOffIn.sliceSize        = infoOut.sliceSize;
2382             subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
2383             subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;
2384 
2385             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
2386 
2387             // For any mipmap level, move nonBc view base address by offset
2388             HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
2389             pOut->offset = subOffOut.offset;
2390 
2391             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
2392             slicePbXorIn.bpe             = infoIn.bpp;
2393             slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
2394             slicePbXorIn.resourceType    = infoIn.resourceType;
2395             slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2396             slicePbXorIn.slice           = pIn->slice;
2397 
2398             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2399 
2400             // For any mipmap level, nonBc view should use computed pbXor
2401             HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2402             pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2403 
2404             const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2405             const UINT_32 requestMipWidth  = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth);
2406             const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight);
2407 
2408             if (inTail)
2409             {
2410                 // For mipmap level that is in mip tail block, hack a lot of things...
2411                 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2412                 // are fit in tail block:
2413 
2414                 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2415                 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2416 
2417                 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2418                 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2419 
2420                 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2421                 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2422 
2423                 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2424                 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2425             }
2426             // This check should cover at least mipId == 0
2427             else if (requestMipWidth << pIn->mipId == infoIn.width)
2428             {
2429                 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2430                 // - only one mipmap level and mipId = 0
2431                 pOut->mipId        = 0;
2432                 pOut->numMipLevels = 1;
2433 
2434                 // (mip0) width = requestMipWidth
2435                 pOut->unalignedWidth = requestMipWidth;
2436 
2437                 // (mip0) height = requestMipHeight
2438                 pOut->unalignedHeight = requestMipHeight;
2439             }
2440             else
2441             {
2442                 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2443                 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2444                 // because single mip view may have different pitch value than original (multiple) mip view...
2445                 // A simple case would be:
2446                 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2447                 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2448                 //   mip0 width = 0x101/mip1 width = 0x80
2449                 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2450                 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2451 
2452                 // - 2 levels and mipId = 1
2453                 pOut->mipId        = 1;
2454                 pOut->numMipLevels = 2;
2455 
2456                 const UINT_32 upperMipWidth  = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth);
2457                 const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight);
2458 
2459                 const BOOL_32 needToAvoidInTail =
2460                     tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2461                     TRUE : FALSE;
2462 
2463                 const UINT_32 hwMipWidth  = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2464                 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2465 
2466                 const BOOL_32 needExtraWidth =
2467                     ((upperMipWidth < requestMipWidth * 2) ||
2468                      ((upperMipWidth == requestMipWidth * 2) &&
2469                       ((needToAvoidInTail == TRUE) ||
2470                        (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2471 
2472                 const BOOL_32 needExtraHeight =
2473                     ((upperMipHeight < requestMipHeight * 2) ||
2474                      ((upperMipHeight == requestMipHeight * 2) &&
2475                       ((needToAvoidInTail == TRUE) ||
2476                        (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2477 
2478                 // (mip0) width = requestLastMipLevelWidth
2479                 pOut->unalignedWidth  = upperMipWidth + (needExtraWidth ? 1: 0);
2480 
2481                 // (mip0) height = requestLastMipLevelHeight
2482                 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2483             }
2484 
2485             // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2486             ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2487             // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2488             ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2489         }
2490     }
2491 
2492     return returnCode;
2493 }
2494 
2495 /**
2496 ************************************************************************************************************************
2497 *   Gfx10Lib::ValidateNonSwModeParams
2498 *
2499 *   @brief
2500 *       Validate compute surface info params except swizzle mode
2501 *
2502 *   @return
2503 *       TRUE if parameters are valid, FALSE otherwise
2504 ************************************************************************************************************************
2505 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2506 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2507     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2508 {
2509     BOOL_32 valid = TRUE;
2510 
2511     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2512     {
2513         ADDR_ASSERT_ALWAYS();
2514         valid = FALSE;
2515     }
2516 
2517     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2518     {
2519         ADDR_ASSERT_ALWAYS();
2520         valid = FALSE;
2521     }
2522 
2523     const ADDR2_SURFACE_FLAGS flags    = pIn->flags;
2524     const AddrResourceType    rsrcType = pIn->resourceType;
2525     const BOOL_32             mipmap   = (pIn->numMipLevels > 1);
2526     const BOOL_32             msaa     = (pIn->numFrags > 1);
2527     const BOOL_32             display  = flags.display;
2528     const BOOL_32             tex3d    = IsTex3d(rsrcType);
2529     const BOOL_32             tex2d    = IsTex2d(rsrcType);
2530     const BOOL_32             tex1d    = IsTex1d(rsrcType);
2531     const BOOL_32             stereo   = flags.qbStereo;
2532 
2533     // Resource type check
2534     if (tex1d)
2535     {
2536         if (msaa || display || stereo)
2537         {
2538             ADDR_ASSERT_ALWAYS();
2539             valid = FALSE;
2540         }
2541     }
2542     else if (tex2d)
2543     {
2544         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2545         {
2546             ADDR_ASSERT_ALWAYS();
2547             valid = FALSE;
2548         }
2549     }
2550     else if (tex3d)
2551     {
2552         if (msaa || display || stereo)
2553         {
2554             ADDR_ASSERT_ALWAYS();
2555             valid = FALSE;
2556         }
2557     }
2558     else
2559     {
2560         ADDR_ASSERT_ALWAYS();
2561         valid = FALSE;
2562     }
2563 
2564     return valid;
2565 }
2566 
2567 /**
2568 ************************************************************************************************************************
2569 *   Gfx10Lib::ValidateSwModeParams
2570 *
2571 *   @brief
2572 *       Validate compute surface info related to swizzle mode
2573 *
2574 *   @return
2575 *       TRUE if parameters are valid, FALSE otherwise
2576 ************************************************************************************************************************
2577 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2578 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2579     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2580 {
2581     BOOL_32 valid = TRUE;
2582 
2583     if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2584     {
2585         ADDR_ASSERT_ALWAYS();
2586         valid = FALSE;
2587     }
2588     else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2589     {
2590         {
2591             ADDR_ASSERT_ALWAYS();
2592             valid = FALSE;
2593         }
2594     }
2595 
2596     const ADDR2_SURFACE_FLAGS flags       = pIn->flags;
2597     const AddrResourceType    rsrcType    = pIn->resourceType;
2598     const AddrSwizzleMode     swizzle     = pIn->swizzleMode;
2599     const BOOL_32             msaa        = (pIn->numFrags > 1);
2600     const BOOL_32             zbuffer     = flags.depth || flags.stencil;
2601     const BOOL_32             color       = flags.color;
2602     const BOOL_32             display     = flags.display;
2603     const BOOL_32             tex3d       = IsTex3d(rsrcType);
2604     const BOOL_32             tex2d       = IsTex2d(rsrcType);
2605     const BOOL_32             tex1d       = IsTex1d(rsrcType);
2606     const BOOL_32             thin3d      = flags.view3dAs2dArray;
2607     const BOOL_32             linear      = IsLinear(swizzle);
2608     const BOOL_32             blk256B     = IsBlock256b(swizzle);
2609     const BOOL_32             blkVar      = IsBlockVariable(swizzle);
2610     const BOOL_32             isNonPrtXor = IsNonPrtXor(swizzle);
2611     const BOOL_32             prt         = flags.prt;
2612     const BOOL_32             fmask       = flags.fmask;
2613 
2614     // Misc check
2615     if ((pIn->numFrags > 1) &&
2616         (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2617     {
2618         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2619         ADDR_ASSERT_ALWAYS();
2620         valid = FALSE;
2621     }
2622 
2623     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2624     {
2625         ADDR_ASSERT_ALWAYS();
2626         valid = FALSE;
2627     }
2628 
2629     if ((pIn->bpp == 96) && (linear == FALSE))
2630     {
2631         ADDR_ASSERT_ALWAYS();
2632         valid = FALSE;
2633     }
2634 
2635     const UINT_32 swizzleMask = 1 << swizzle;
2636 
2637     // Resource type check
2638     if (tex1d)
2639     {
2640         if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2641         {
2642             ADDR_ASSERT_ALWAYS();
2643             valid = FALSE;
2644         }
2645     }
2646     else if (tex2d)
2647     {
2648         if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2649         {
2650             {
2651                 ADDR_ASSERT_ALWAYS();
2652                 valid = FALSE;
2653             }
2654         }
2655         else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2656                  (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2657         {
2658             ADDR_ASSERT_ALWAYS();
2659             valid = FALSE;
2660         }
2661     }
2662     else if (tex3d)
2663     {
2664         if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2665             (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2666             (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2667         {
2668             ADDR_ASSERT_ALWAYS();
2669             valid = FALSE;
2670         }
2671     }
2672 
2673     // Swizzle type check
2674     if (linear)
2675     {
2676         if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2677         {
2678             ADDR_ASSERT_ALWAYS();
2679             valid = FALSE;
2680         }
2681     }
2682     else if (IsZOrderSwizzle(swizzle))
2683     {
2684         if ((pIn->bpp > 64)                         ||
2685             (msaa && (color || (pIn->bpp > 32)))    ||
2686             ElemLib::IsBlockCompressed(pIn->format) ||
2687             ElemLib::IsMacroPixelPacked(pIn->format))
2688         {
2689             ADDR_ASSERT_ALWAYS();
2690             valid = FALSE;
2691         }
2692     }
2693     else if (IsStandardSwizzle(rsrcType, swizzle))
2694     {
2695         if (zbuffer || msaa)
2696         {
2697             ADDR_ASSERT_ALWAYS();
2698             valid = FALSE;
2699         }
2700     }
2701     else if (IsDisplaySwizzle(rsrcType, swizzle))
2702     {
2703         if (zbuffer || msaa)
2704         {
2705             ADDR_ASSERT_ALWAYS();
2706             valid = FALSE;
2707         }
2708     }
2709     else if (IsRtOptSwizzle(swizzle))
2710     {
2711         if (zbuffer)
2712         {
2713             ADDR_ASSERT_ALWAYS();
2714             valid = FALSE;
2715         }
2716     }
2717     else
2718     {
2719         {
2720             ADDR_ASSERT_ALWAYS();
2721             valid = FALSE;
2722         }
2723     }
2724 
2725     // Block type check
2726     if (blk256B)
2727     {
2728         if (zbuffer || tex3d || msaa)
2729         {
2730             ADDR_ASSERT_ALWAYS();
2731             valid = FALSE;
2732         }
2733     }
2734     else if (blkVar)
2735     {
2736         if (m_blockVarSizeLog2 == 0)
2737         {
2738             ADDR_ASSERT_ALWAYS();
2739             valid = FALSE;
2740         }
2741     }
2742 
2743     return valid;
2744 }
2745 
2746 /**
2747 ************************************************************************************************************************
2748 *   Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2749 *
2750 *   @brief
2751 *       Compute surface info sanity check
2752 *
2753 *   @return
2754 *       Offset
2755 ************************************************************************************************************************
2756 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2757 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2758     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2759     ) const
2760 {
2761     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2762 }
2763 
2764 /**
2765 ************************************************************************************************************************
2766 *   Gfx10Lib::HwlGetPreferredSurfaceSetting
2767 *
2768 *   @brief
2769 *       Internal function to get suggested surface information for client to use
2770 *
2771 *   @return
2772 *       ADDR_E_RETURNCODE
2773 ************************************************************************************************************************
2774 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2775 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2776     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
2777     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
2778     ) const
2779 {
2780     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2781 
2782     if (pIn->flags.fmask)
2783     {
2784         const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2785         const BOOL_32 forbidVarBlockType  = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2786 
2787         if (forbid64KbBlockType && forbidVarBlockType)
2788         {
2789             // Invalid combination...
2790             ADDR_ASSERT_ALWAYS();
2791             returnCode = ADDR_INVALIDPARAMS;
2792         }
2793         else
2794         {
2795             pOut->resourceType                   = ADDR_RSRC_TEX_2D;
2796             pOut->validBlockSet.value            = 0;
2797             pOut->validBlockSet.macroThin64KB    = forbid64KbBlockType ? 0 : 1;
2798             pOut->validBlockSet.var              = forbidVarBlockType  ? 0 : 1;
2799             pOut->validSwModeSet.value           = 0;
2800             pOut->validSwModeSet.sw64KB_Z_X      = forbid64KbBlockType ? 0 : 1;
2801             pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType  ? 0 : 1;
2802             pOut->canXor                         = TRUE;
2803             pOut->validSwTypeSet.value           = AddrSwSetZ;
2804             pOut->clientPreferredSwSet           = pOut->validSwTypeSet;
2805 
2806             BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2807 
2808             if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2809             {
2810                 const UINT_8  maxFmaskSwizzleModeType = 2;
2811                 const UINT_32 ratioLow                = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2812                 const UINT_32 ratioHi                 = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2813                 const UINT_32 fmaskBpp                = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2814                 const UINT_32 numSlices               = Max(pIn->numSlices, 1u);
2815                 const UINT_32 width                   = Max(pIn->width, 1u);
2816                 const UINT_32 height                  = Max(pIn->height, 1u);
2817                 const UINT_64 sizeAlignInElement      = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2818 
2819                 AddrSwizzleMode swMode[maxFmaskSwizzleModeType]  = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2820                 Dim3d           blkDim[maxFmaskSwizzleModeType]  = {{}, {}};
2821                 Dim3d           padDim[maxFmaskSwizzleModeType]  = {{}, {}};
2822                 UINT_64         padSize[maxFmaskSwizzleModeType] = {};
2823 
2824                 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2825                 {
2826                     ComputeBlockDimensionForSurf(&blkDim[i].w,
2827                                                  &blkDim[i].h,
2828                                                  &blkDim[i].d,
2829                                                  fmaskBpp,
2830                                                  1,
2831                                                  pOut->resourceType,
2832                                                  swMode[i]);
2833 
2834                     padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2835                     padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2836                 }
2837 
2838                 if (BlockTypeWithinMemoryBudget(padSize[0],
2839                                                 padSize[1],
2840                                                 ratioLow,
2841                                                 ratioHi,
2842                                                 pIn->memoryBudget,
2843                                                 GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))
2844                 {
2845                     use64KbBlockType = FALSE;
2846                 }
2847             }
2848             else if (forbidVarBlockType)
2849             {
2850                 use64KbBlockType = TRUE;
2851             }
2852 
2853             if (use64KbBlockType)
2854             {
2855                 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2856             }
2857             else
2858             {
2859                 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2860             }
2861         }
2862     }
2863     else
2864     {
2865         UINT_32 bpp    = pIn->bpp;
2866         UINT_32 width  = Max(pIn->width, 1u);
2867         UINT_32 height = Max(pIn->height, 1u);
2868 
2869         // Set format to INVALID will skip this conversion
2870         if (pIn->format != ADDR_FMT_INVALID)
2871         {
2872             ElemMode elemMode = ADDR_UNCOMPRESSED;
2873             UINT_32 expandX, expandY;
2874 
2875             // Get compression/expansion factors and element mode which indicates compression/expansion
2876             bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2877                                                 &elemMode,
2878                                                 &expandX,
2879                                                 &expandY);
2880 
2881             UINT_32 basePitch = 0;
2882             GetElemLib()->AdjustSurfaceInfo(elemMode,
2883                                             expandX,
2884                                             expandY,
2885                                             &bpp,
2886                                             &basePitch,
2887                                             &width,
2888                                             &height);
2889         }
2890 
2891         const UINT_32 numSlices    = Max(pIn->numSlices,    1u);
2892         const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2893         const UINT_32 numSamples   = Max(pIn->numSamples,   1u);
2894         const UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2895         const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
2896 
2897         // Pre sanity check on non swizzle mode parameters
2898         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2899         localIn.flags        = pIn->flags;
2900         localIn.resourceType = pIn->resourceType;
2901         localIn.format       = pIn->format;
2902         localIn.bpp          = bpp;
2903         localIn.width        = width;
2904         localIn.height       = height;
2905         localIn.numSlices    = numSlices;
2906         localIn.numMipLevels = numMipLevels;
2907         localIn.numSamples   = numSamples;
2908         localIn.numFrags     = numFrags;
2909 
2910         if (ValidateNonSwModeParams(&localIn))
2911         {
2912             // Forbid swizzle mode(s) by client setting
2913             ADDR2_SWMODE_SET allowedSwModeSet = {};
2914             allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2915             allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx10Blk256BSwModeMask;
2916             allowedSwModeSet.value |=
2917                 pIn->forbiddenBlock.macroThin4KB ? 0 :
2918                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2919             allowedSwModeSet.value |=
2920                 pIn->forbiddenBlock.macroThick4KB ? 0 :
2921                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2922             allowedSwModeSet.value |=
2923                 pIn->forbiddenBlock.macroThin64KB ? 0 :
2924                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2925             allowedSwModeSet.value |=
2926                 pIn->forbiddenBlock.macroThick64KB ? 0 :
2927                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2928             allowedSwModeSet.value |=
2929                 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2930 
2931             if (pIn->preferredSwSet.value != 0)
2932             {
2933                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2934                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2935                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2936                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2937             }
2938 
2939             if (pIn->noXor)
2940             {
2941                 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2942             }
2943 
2944             if (pIn->maxAlign > 0)
2945             {
2946                 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2947                 {
2948                     allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2949                 }
2950 
2951                 if (pIn->maxAlign < Size64K)
2952                 {
2953                     allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2954                 }
2955 
2956                 if (pIn->maxAlign < Size4K)
2957                 {
2958                     allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2959                 }
2960 
2961                 if (pIn->maxAlign < Size256)
2962                 {
2963                     allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2964                 }
2965             }
2966 
2967             // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2968             switch (pIn->resourceType)
2969             {
2970                 case ADDR_RSRC_TEX_1D:
2971                     allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2972                     break;
2973 
2974                 case ADDR_RSRC_TEX_2D:
2975                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2976                     break;
2977 
2978                 case ADDR_RSRC_TEX_3D:
2979                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2980 
2981                     if (pIn->flags.view3dAs2dArray)
2982                     {
2983                         allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2984                     }
2985                     break;
2986 
2987                 default:
2988                     ADDR_ASSERT_ALWAYS();
2989                     allowedSwModeSet.value = 0;
2990                     break;
2991             }
2992 
2993             if (ElemLib::IsBlockCompressed(pIn->format)  ||
2994                 ElemLib::IsMacroPixelPacked(pIn->format) ||
2995                 (bpp > 64)                               ||
2996                 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2997             {
2998                 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2999             }
3000 
3001             if (pIn->format == ADDR_FMT_32_32_32)
3002             {
3003                 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3004             }
3005 
3006             if (msaa)
3007             {
3008                 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
3009             }
3010 
3011             if (pIn->flags.depth || pIn->flags.stencil)
3012             {
3013                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3014             }
3015 
3016             if (pIn->flags.display)
3017             {
3018                 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
3019             }
3020 
3021 
3022             if (allowedSwModeSet.value != 0)
3023             {
3024 #if DEBUG
3025                 // Post sanity check, at least AddrLib should accept the output generated by its own
3026                 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3027 
3028                 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3029                 {
3030                     if (validateSwModeSet & 1)
3031                     {
3032                         localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3033                         ADDR_ASSERT(ValidateSwModeParams(&localIn));
3034                     }
3035 
3036                     validateSwModeSet >>= 1;
3037                 }
3038 #endif
3039 
3040                 pOut->resourceType   = pIn->resourceType;
3041                 pOut->validSwModeSet = allowedSwModeSet;
3042                 pOut->canXor         = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
3043                 pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3044                 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3045 
3046                 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3047 
3048                 if (pOut->clientPreferredSwSet.value == 0)
3049                 {
3050                     pOut->clientPreferredSwSet.value = AddrSwSetAll;
3051                 }
3052 
3053                 // Apply optional restrictions
3054                 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
3055                 {
3056                     if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
3057                     {
3058                         // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
3059                         // the GL2 in VAR mode, so it should be avoided.
3060                         allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
3061                     }
3062                     else
3063                     {
3064                         // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
3065                         // But we have to suffer from low performance because there is no other choice...
3066                         ADDR_ASSERT_ALWAYS();
3067                     }
3068                 }
3069 
3070                 if (pIn->flags.needEquation)
3071                 {
3072                     FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3073                 }
3074 
3075                 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
3076                 {
3077                     pOut->swizzleMode = ADDR_SW_LINEAR;
3078                 }
3079                 else
3080                 {
3081                     const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3082 
3083                     if ((height > 1) && (computeMinSize == FALSE))
3084                     {
3085                         // Always ignore linear swizzle mode if:
3086                         // 1. This is a (2D/3D) resource with height > 1
3087                         // 2. Client doesn't require computing minimize size
3088                         allowedSwModeSet.swLinear = 0;
3089                     }
3090 
3091                     // A bitfield where each bit represents a block type. Each swizzle mode maps to a block.
3092                     ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3093 
3094                     // Determine block size if there are 2 or more block type candidates
3095                     if (IsPow2(allowedBlockSet.value) == FALSE)
3096                     {
3097                         // Tracks a valid SwizzleMode for each valid block type
3098                         AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3099 
3100                         swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3101 
3102                         if (m_blockVarSizeLog2 != 0)
3103                         {
3104                             swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
3105                         }
3106 
3107                         if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3108                         {
3109                             swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
3110                             swMode[AddrBlockThin64KB]  = ADDR_SW_64KB_R_X;
3111                             swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3112                         }
3113                         else
3114                         {
3115                             swMode[AddrBlockMicro]    = ADDR_SW_256B_S;
3116                             swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_S;
3117                             swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
3118                         }
3119 
3120                         // Tracks the size of each valid swizzle mode's surface in bytes
3121                         UINT_64 padSize[AddrBlockMaxTiledType] = {};
3122 
3123                         const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3124                         const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3125                         const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3126                         UINT_32       minSizeBlk         = AddrBlockMicro; // Tracks the most optimal block to use
3127                         UINT_64       minSize            = 0;              // Tracks the minimum acceptable block type
3128 
3129                         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3130 
3131                         // Iterate through all block types
3132                         for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3133                         {
3134                             if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3135                             {
3136                                 localIn.swizzleMode = swMode[i];
3137 
3138                                 if (localIn.swizzleMode == ADDR_SW_LINEAR)
3139                                 {
3140                                     returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3141                                 }
3142                                 else
3143                                 {
3144                                     returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3145                                 }
3146 
3147                                 if (returnCode == ADDR_OK)
3148                                 {
3149                                     padSize[i] = localOut.surfSize;
3150 
3151                                     if (minSize == 0)
3152                                     {
3153                                         minSize    = padSize[i];
3154                                         minSizeBlk = i;
3155                                     }
3156                                     else
3157                                     {
3158                                         // Checks if the block type is within the memory budget but favors larger blocks
3159                                         if (BlockTypeWithinMemoryBudget(
3160                                                 minSize,
3161                                                 padSize[i],
3162                                                 ratioLow,
3163                                                 ratioHi,
3164                                                 0.0,
3165                                                 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))
3166                                         {
3167                                             minSize    = padSize[i];
3168                                             minSizeBlk = i;
3169                                         }
3170                                     }
3171                                 }
3172                                 else
3173                                 {
3174                                     ADDR_ASSERT_ALWAYS();
3175                                     break;
3176                                 }
3177                             }
3178                         }
3179 
3180                         if (pIn->memoryBudget > 1.0)
3181                         {
3182                             // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3183                             // smaller-block type again in coming loop
3184                             switch (minSizeBlk)
3185                             {
3186                                 case AddrBlockThick64KB:
3187                                     allowedBlockSet.macroThin64KB = 0;
3188                                 case AddrBlockThinVar:
3189                                 case AddrBlockThin64KB:
3190                                     allowedBlockSet.macroThick4KB = 0;
3191                                 case AddrBlockThick4KB:
3192                                     allowedBlockSet.macroThin4KB = 0;
3193                                 case AddrBlockThin4KB:
3194                                     allowedBlockSet.micro  = 0;
3195                                 case AddrBlockMicro:
3196                                     allowedBlockSet.linear = 0;
3197                                 case AddrBlockLinear:
3198                                     break;
3199 
3200                                 default:
3201                                     ADDR_ASSERT_ALWAYS();
3202                                     break;
3203                             }
3204 
3205                             for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3206                             {
3207                                 if ((i != minSizeBlk) &&
3208                                     IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3209                                 {
3210                                     if (BlockTypeWithinMemoryBudget(
3211                                             minSize,
3212                                             padSize[i],
3213                                             0,
3214                                             0,
3215                                             pIn->memoryBudget,
3216                                             GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)
3217                                     {
3218                                         // Clear the block type if the memory waste is unacceptable
3219                                         allowedBlockSet.value &= ~(1u << (i - 1));
3220                                     }
3221                                 }
3222                             }
3223 
3224                             // Remove VAR block type if bigger block type is allowed
3225                             if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))
3226                             {
3227                                 if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)
3228                                 {
3229                                     allowedBlockSet.var = 0;
3230                                 }
3231                             }
3232 
3233                             // Remove linear block type if 2 or more block types are allowed
3234                             if (IsPow2(allowedBlockSet.value) == FALSE)
3235                             {
3236                                 allowedBlockSet.linear = 0;
3237                             }
3238 
3239                             // Select the biggest allowed block type
3240                             minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3241 
3242                             if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3243                             {
3244                                 minSizeBlk = AddrBlockLinear;
3245                             }
3246                         }
3247 
3248                         switch (minSizeBlk)
3249                         {
3250                             case AddrBlockLinear:
3251                                 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3252                                 break;
3253 
3254                             case AddrBlockMicro:
3255                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3256                                 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
3257                                 break;
3258 
3259                             case AddrBlockThin4KB:
3260                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3261                                 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
3262                                 break;
3263 
3264                             case AddrBlockThick4KB:
3265                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3266                                 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
3267                                 break;
3268 
3269                             case AddrBlockThin64KB:
3270                                 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3271                                                           Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
3272                                 break;
3273 
3274                             case AddrBlockThick64KB:
3275                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3276                                 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
3277                                 break;
3278 
3279                             case AddrBlockThinVar:
3280                                 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
3281                                 break;
3282 
3283                             default:
3284                                 ADDR_ASSERT_ALWAYS();
3285                                 allowedSwModeSet.value = 0;
3286                                 break;
3287                         }
3288                     }
3289 
3290                     // Block type should be determined.
3291                     ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3292 
3293                     ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3294 
3295                     // Determine swizzle type if there are 2 or more swizzle type candidates
3296                     if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3297                     {
3298                         if (ElemLib::IsBlockCompressed(pIn->format))
3299                         {
3300                             if (allowedSwSet.sw_D)
3301                             {
3302                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3303                             }
3304                             else if (allowedSwSet.sw_S)
3305                             {
3306                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3307                             }
3308                             else
3309                             {
3310                                 ADDR_ASSERT(allowedSwSet.sw_R);
3311                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3312                             }
3313                         }
3314                         else if (ElemLib::IsMacroPixelPacked(pIn->format))
3315                         {
3316                             if (allowedSwSet.sw_S)
3317                             {
3318                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3319                             }
3320                             else if (allowedSwSet.sw_D)
3321                             {
3322                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3323                             }
3324                             else
3325                             {
3326                                 ADDR_ASSERT(allowedSwSet.sw_R);
3327                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3328                             }
3329                         }
3330                         else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
3331                         {
3332                             if (pIn->flags.color &&
3333                                 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
3334                                 allowedSwSet.sw_D)
3335                             {
3336                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3337                             }
3338                             else if (allowedSwSet.sw_S)
3339                             {
3340                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3341                             }
3342                             else if (allowedSwSet.sw_R)
3343                             {
3344                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3345                             }
3346                             else
3347                             {
3348                                 ADDR_ASSERT(allowedSwSet.sw_Z);
3349                                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3350                             }
3351                         }
3352                         else
3353                         {
3354                             if (allowedSwSet.sw_R)
3355                             {
3356                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3357                             }
3358                             else if (allowedSwSet.sw_D)
3359                             {
3360                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3361                             }
3362                             else if (allowedSwSet.sw_S)
3363                             {
3364                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3365                             }
3366                             else
3367                             {
3368                                 ADDR_ASSERT(allowedSwSet.sw_Z);
3369                                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3370                             }
3371                         }
3372 
3373                         // Swizzle type should be determined.
3374                         ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3375                     }
3376 
3377                     // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
3378                     // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3379                     // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3380                     pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3381                 }
3382             }
3383             else
3384             {
3385                 // Invalid combination...
3386                 ADDR_ASSERT_ALWAYS();
3387                 returnCode = ADDR_INVALIDPARAMS;
3388             }
3389         }
3390         else
3391         {
3392             // Invalid combination...
3393             ADDR_ASSERT_ALWAYS();
3394             returnCode = ADDR_INVALIDPARAMS;
3395         }
3396     }
3397 
3398     return returnCode;
3399 }
3400 
3401 /**
3402 ************************************************************************************************************************
3403 *   Gfx10Lib::ComputeStereoInfo
3404 *
3405 *   @brief
3406 *       Compute height alignment and right eye pipeBankXor for stereo surface
3407 *
3408 *   @return
3409 *       Error code
3410 *
3411 ************************************************************************************************************************
3412 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const3413 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3414     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
3415     UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
3416     UINT_32*                                pRightXor   ///< Right eye xor
3417     ) const
3418 {
3419     ADDR_E_RETURNCODE ret = ADDR_OK;
3420 
3421     *pRightXor = 0;
3422 
3423     if (IsNonPrtXor(pIn->swizzleMode))
3424     {
3425         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3426         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
3427         const UINT_32 rsrcType    = static_cast<UINT_32>(pIn->resourceType) - 1;
3428         const UINT_32 swMode      = static_cast<UINT_32>(pIn->swizzleMode);
3429         const UINT_32 eqIndex     = m_equationLookupTable[rsrcType][swMode][elemLog2];
3430 
3431         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3432         {
3433             UINT_32 yMax     = 0;
3434             UINT_32 yPosMask = 0;
3435 
3436             // First get "max y bit"
3437             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3438             {
3439                 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3440 
3441                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3442                     (m_equationTable[eqIndex].addr[i].index > yMax))
3443                 {
3444                     yMax = m_equationTable[eqIndex].addr[i].index;
3445                 }
3446 
3447                 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3448                     (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3449                     (m_equationTable[eqIndex].xor1[i].index > yMax))
3450                 {
3451                     yMax = m_equationTable[eqIndex].xor1[i].index;
3452                 }
3453 
3454                 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3455                     (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3456                     (m_equationTable[eqIndex].xor2[i].index > yMax))
3457                 {
3458                     yMax = m_equationTable[eqIndex].xor2[i].index;
3459                 }
3460             }
3461 
3462             // Then loop again for populating a position mask of "max Y bit"
3463             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3464             {
3465                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3466                     (m_equationTable[eqIndex].addr[i].index == yMax))
3467                 {
3468                     yPosMask |= 1u << i;
3469                 }
3470                 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3471                          (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3472                          (m_equationTable[eqIndex].xor1[i].index == yMax))
3473                 {
3474                     yPosMask |= 1u << i;
3475                 }
3476                 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3477                          (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3478                          (m_equationTable[eqIndex].xor2[i].index == yMax))
3479                 {
3480                     yPosMask |= 1u << i;
3481                 }
3482             }
3483 
3484             const UINT_32 additionalAlign = 1 << yMax;
3485 
3486             if (additionalAlign >= *pAlignY)
3487             {
3488                 *pAlignY = additionalAlign;
3489 
3490                 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3491 
3492                 if ((alignedHeight >> yMax) & 1)
3493                 {
3494                     *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3495                 }
3496             }
3497         }
3498         else
3499         {
3500             ret = ADDR_INVALIDPARAMS;
3501         }
3502     }
3503 
3504     return ret;
3505 }
3506 
3507 /**
3508 ************************************************************************************************************************
3509 *   Gfx10Lib::HwlComputeSurfaceInfoTiled
3510 *
3511 *   @brief
3512 *       Internal function to calculate alignment for tiled surface
3513 *
3514 *   @return
3515 *       ADDR_E_RETURNCODE
3516 ************************************************************************************************************************
3517 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3518 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3519      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3520      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3521      ) const
3522 {
3523     ADDR_E_RETURNCODE ret;
3524 
3525     // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3526     pOut->mipChainPitch    = 0;
3527     pOut->mipChainHeight   = 0;
3528     pOut->mipChainSlice    = 0;
3529     pOut->epitchIsHeight   = FALSE;
3530 
3531     // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3532     pOut->mipChainInTail   = FALSE;
3533     pOut->firstMipIdInTail = pIn->numMipLevels;
3534 
3535     if (IsBlock256b(pIn->swizzleMode))
3536     {
3537         ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3538     }
3539     else
3540     {
3541         ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3542     }
3543 
3544     return ret;
3545 }
3546 
3547 /**
3548 ************************************************************************************************************************
3549 *   Gfx10Lib::ComputeSurfaceInfoMicroTiled
3550 *
3551 *   @brief
3552 *       Internal function to calculate alignment for micro tiled surface
3553 *
3554 *   @return
3555 *       ADDR_E_RETURNCODE
3556 ************************************************************************************************************************
3557 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3558 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3559      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3560      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3561      ) const
3562 {
3563     ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3564                                                          &pOut->blockHeight,
3565                                                          &pOut->blockSlices,
3566                                                          pIn->bpp,
3567                                                          pIn->numFrags,
3568                                                          pIn->resourceType,
3569                                                          pIn->swizzleMode);
3570 
3571     if (ret == ADDR_OK)
3572     {
3573         const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3574 
3575         pOut->pitch     = PowTwoAlign(pIn->width,  pOut->blockWidth);
3576         pOut->height    = PowTwoAlign(pIn->height, pOut->blockHeight);
3577         pOut->numSlices = pIn->numSlices;
3578         pOut->baseAlign = blockSize;
3579 
3580         if (pIn->numMipLevels > 1)
3581         {
3582             const UINT_32 mip0Width    = pIn->width;
3583             const UINT_32 mip0Height   = pIn->height;
3584             UINT_64       mipSliceSize = 0;
3585 
3586             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3587             {
3588                 UINT_32 mipWidth, mipHeight;
3589 
3590                 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3591 
3592                 const UINT_32 mipActualWidth  = PowTwoAlign(mipWidth,  pOut->blockWidth);
3593                 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3594 
3595                 if (pOut->pMipInfo != NULL)
3596                 {
3597                     pOut->pMipInfo[i].pitch            = mipActualWidth;
3598                     pOut->pMipInfo[i].height           = mipActualHeight;
3599                     pOut->pMipInfo[i].depth            = 1;
3600                     pOut->pMipInfo[i].offset           = mipSliceSize;
3601                     pOut->pMipInfo[i].mipTailOffset    = 0;
3602                     pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3603                 }
3604 
3605                 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3606             }
3607 
3608             pOut->sliceSize = mipSliceSize;
3609             pOut->surfSize  = mipSliceSize * pOut->numSlices;
3610         }
3611         else
3612         {
3613             pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3614             pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3615 
3616             if (pOut->pMipInfo != NULL)
3617             {
3618                 pOut->pMipInfo[0].pitch            = pOut->pitch;
3619                 pOut->pMipInfo[0].height           = pOut->height;
3620                 pOut->pMipInfo[0].depth            = 1;
3621                 pOut->pMipInfo[0].offset           = 0;
3622                 pOut->pMipInfo[0].mipTailOffset    = 0;
3623                 pOut->pMipInfo[0].macroBlockOffset = 0;
3624             }
3625         }
3626 
3627     }
3628 
3629     return ret;
3630 }
3631 
3632 /**
3633 ************************************************************************************************************************
3634 *   Gfx10Lib::ComputeSurfaceInfoMacroTiled
3635 *
3636 *   @brief
3637 *       Internal function to calculate alignment for macro tiled surface
3638 *
3639 *   @return
3640 *       ADDR_E_RETURNCODE
3641 ************************************************************************************************************************
3642 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3643 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3644      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3645      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3646      ) const
3647 {
3648     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3649                                                                 &pOut->blockHeight,
3650                                                                 &pOut->blockSlices,
3651                                                                 pIn->bpp,
3652                                                                 pIn->numFrags,
3653                                                                 pIn->resourceType,
3654                                                                 pIn->swizzleMode);
3655 
3656     if (returnCode == ADDR_OK)
3657     {
3658         UINT_32 heightAlign = pOut->blockHeight;
3659 
3660         if (pIn->flags.qbStereo)
3661         {
3662             UINT_32 rightXor = 0;
3663 
3664             returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3665 
3666             if (returnCode == ADDR_OK)
3667             {
3668                 pOut->pStereoInfo->rightSwizzle = rightXor;
3669             }
3670         }
3671 
3672         if (returnCode == ADDR_OK)
3673         {
3674             const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3675             const UINT_32 blockSize     = 1 << blockSizeLog2;
3676 
3677             pOut->pitch     = PowTwoAlign(pIn->width,     pOut->blockWidth);
3678             pOut->height    = PowTwoAlign(pIn->height,    heightAlign);
3679             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3680             pOut->baseAlign = blockSize;
3681 
3682             if (pIn->numMipLevels > 1)
3683             {
3684                 const Dim3d  tailMaxDim         = GetMipTailDim(pIn->resourceType,
3685                                                                 pIn->swizzleMode,
3686                                                                 pOut->blockWidth,
3687                                                                 pOut->blockHeight,
3688                                                                 pOut->blockSlices);
3689                 const UINT_32 mip0Width         = pIn->width;
3690                 const UINT_32 mip0Height        = pIn->height;
3691                 const BOOL_32 isThin            = IsThin(pIn->resourceType, pIn->swizzleMode);
3692                 const UINT_32 mip0Depth         = isThin ? 1 : pIn->numSlices;
3693                 const UINT_32 maxMipsInTail     = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3694                 const UINT_32 index             = Log2(pIn->bpp >> 3);
3695                 UINT_32       firstMipInTail    = pIn->numMipLevels;
3696                 UINT_64       mipChainSliceSize = 0;
3697                 UINT_64       mipSize[MaxMipLevels];
3698                 UINT_64       mipSliceSize[MaxMipLevels];
3699 
3700                 Dim3d fixedTailMaxDim = tailMaxDim;
3701 
3702                 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3703                 {
3704                     fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3705                     fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3706                 }
3707 
3708                 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3709                 {
3710                     UINT_32 mipWidth, mipHeight, mipDepth;
3711 
3712                     GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3713 
3714                     if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3715                     {
3716                         firstMipInTail     = i;
3717                         mipChainSliceSize += blockSize / pOut->blockSlices;
3718                         break;
3719                     }
3720                     else
3721                     {
3722                         const UINT_32 pitch     = PowTwoAlign(mipWidth,  pOut->blockWidth);
3723                         const UINT_32 height    = PowTwoAlign(mipHeight, pOut->blockHeight);
3724                         const UINT_32 depth     = PowTwoAlign(mipDepth,  pOut->blockSlices);
3725                         const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3726 
3727                         mipSize[i]         = sliceSize * depth;
3728                         mipSliceSize[i]    = sliceSize * pOut->blockSlices;
3729                         mipChainSliceSize += sliceSize;
3730 
3731                         if (pOut->pMipInfo != NULL)
3732                         {
3733                             pOut->pMipInfo[i].pitch  = pitch;
3734                             pOut->pMipInfo[i].height = height;
3735                             pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3736                         }
3737                     }
3738                 }
3739 
3740                 pOut->sliceSize        = mipChainSliceSize;
3741                 pOut->surfSize         = mipChainSliceSize * pOut->numSlices;
3742                 pOut->mipChainInTail   = (firstMipInTail == 0) ? TRUE : FALSE;
3743                 pOut->firstMipIdInTail = firstMipInTail;
3744 
3745                 if (pOut->pMipInfo != NULL)
3746                 {
3747                     UINT_64 offset         = 0;
3748                     UINT_64 macroBlkOffset = 0;
3749                     UINT_32 tailMaxDepth   = 0;
3750 
3751                     if (firstMipInTail != pIn->numMipLevels)
3752                     {
3753                         UINT_32 mipWidth, mipHeight;
3754 
3755                         GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3756                                    &mipWidth, &mipHeight, &tailMaxDepth);
3757 
3758                         offset         = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3759                         macroBlkOffset = blockSize;
3760                     }
3761 
3762                     for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3763                     {
3764                         pOut->pMipInfo[i].offset           = offset;
3765                         pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3766                         pOut->pMipInfo[i].mipTailOffset    = 0;
3767 
3768                         offset         += mipSize[i];
3769                         macroBlkOffset += mipSliceSize[i];
3770                     }
3771 
3772                     UINT_32 pitch  = tailMaxDim.w;
3773                     UINT_32 height = tailMaxDim.h;
3774                     UINT_32 depth  = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3775 
3776                     tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3777 
3778                     for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3779                     {
3780                         const UINT_32 m         = maxMipsInTail - 1 - (i - firstMipInTail);
3781                         const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3782 
3783                         pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
3784                         pOut->pMipInfo[i].mipTailOffset    = mipOffset;
3785                         pOut->pMipInfo[i].macroBlockOffset = 0;
3786 
3787                         pOut->pMipInfo[i].pitch  = pitch;
3788                         pOut->pMipInfo[i].height = height;
3789                         pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3790 
3791                         UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
3792                                        ((mipOffset >> 10) & 2)  |
3793                                        ((mipOffset >> 11) & 4)  |
3794                                        ((mipOffset >> 12) & 8)  |
3795                                        ((mipOffset >> 13) & 16) |
3796                                        ((mipOffset >> 14) & 32);
3797                         UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
3798                                        ((mipOffset >> 9)  & 2)  |
3799                                        ((mipOffset >> 10) & 4)  |
3800                                        ((mipOffset >> 11) & 8)  |
3801                                        ((mipOffset >> 12) & 16) |
3802                                        ((mipOffset >> 13) & 32);
3803 
3804                         if (blockSizeLog2 & 1)
3805                         {
3806                             const UINT_32 temp = mipX;
3807                             mipX = mipY;
3808                             mipY = temp;
3809 
3810                             if (index & 1)
3811                             {
3812                                 mipY = (mipY << 1) | (mipX & 1);
3813                                 mipX = mipX >> 1;
3814                             }
3815                         }
3816 
3817                         if (isThin)
3818                         {
3819                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3820                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3821                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3822 
3823                             pitch  = Max(pitch  >> 1, Block256_2d[index].w);
3824                             height = Max(height >> 1, Block256_2d[index].h);
3825                         }
3826                         else
3827                         {
3828                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3829                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3830                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3831 
3832                             pitch  = Max(pitch  >> 1, Block256_3d[index].w);
3833                             height = Max(height >> 1, Block256_3d[index].h);
3834                         }
3835                     }
3836                 }
3837             }
3838             else
3839             {
3840                 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3841                 pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3842 
3843                 if (pOut->pMipInfo != NULL)
3844                 {
3845                     pOut->pMipInfo[0].pitch            = pOut->pitch;
3846                     pOut->pMipInfo[0].height           = pOut->height;
3847                     pOut->pMipInfo[0].depth            = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3848                     pOut->pMipInfo[0].offset           = 0;
3849                     pOut->pMipInfo[0].mipTailOffset    = 0;
3850                     pOut->pMipInfo[0].macroBlockOffset = 0;
3851                     pOut->pMipInfo[0].mipTailCoordX    = 0;
3852                     pOut->pMipInfo[0].mipTailCoordY    = 0;
3853                     pOut->pMipInfo[0].mipTailCoordZ    = 0;
3854                 }
3855             }
3856         }
3857     }
3858 
3859     return returnCode;
3860 }
3861 
3862 /**
3863 ************************************************************************************************************************
3864 *   Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3865 *
3866 *   @brief
3867 *       Internal function to calculate address from coord for tiled swizzle surface
3868 *
3869 *   @return
3870 *       ADDR_E_RETURNCODE
3871 ************************************************************************************************************************
3872 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3873 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3874      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
3875      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
3876      ) const
3877 {
3878     ADDR_E_RETURNCODE ret;
3879 
3880     if (IsBlock256b(pIn->swizzleMode))
3881     {
3882         ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3883     }
3884     else
3885     {
3886         ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3887     }
3888 
3889     return ret;
3890 }
3891 
3892 /**
3893 ************************************************************************************************************************
3894 *   Gfx10Lib::ComputeOffsetFromEquation
3895 *
3896 *   @brief
3897 *       Compute offset from equation
3898 *
3899 *   @return
3900 *       Offset
3901 ************************************************************************************************************************
3902 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3903 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3904     const ADDR_EQUATION* pEq,   ///< Equation
3905     UINT_32              x,     ///< x coord in bytes
3906     UINT_32              y,     ///< y coord in pixel
3907     UINT_32              z      ///< z coord in slice
3908     ) const
3909 {
3910     UINT_32 offset = 0;
3911 
3912     for (UINT_32 i = 0; i < pEq->numBits; i++)
3913     {
3914         UINT_32 v = 0;
3915 
3916         if (pEq->addr[i].valid)
3917         {
3918             if (pEq->addr[i].channel == 0)
3919             {
3920                 v ^= (x >> pEq->addr[i].index) & 1;
3921             }
3922             else if (pEq->addr[i].channel == 1)
3923             {
3924                 v ^= (y >> pEq->addr[i].index) & 1;
3925             }
3926             else
3927             {
3928                 ADDR_ASSERT(pEq->addr[i].channel == 2);
3929                 v ^= (z >> pEq->addr[i].index) & 1;
3930             }
3931         }
3932 
3933         if (pEq->xor1[i].valid)
3934         {
3935             if (pEq->xor1[i].channel == 0)
3936             {
3937                 v ^= (x >> pEq->xor1[i].index) & 1;
3938             }
3939             else if (pEq->xor1[i].channel == 1)
3940             {
3941                 v ^= (y >> pEq->xor1[i].index) & 1;
3942             }
3943             else
3944             {
3945                 ADDR_ASSERT(pEq->xor1[i].channel == 2);
3946                 v ^= (z >> pEq->xor1[i].index) & 1;
3947             }
3948         }
3949 
3950         if (pEq->xor2[i].valid)
3951         {
3952             if (pEq->xor2[i].channel == 0)
3953             {
3954                 v ^= (x >> pEq->xor2[i].index) & 1;
3955             }
3956             else if (pEq->xor2[i].channel == 1)
3957             {
3958                 v ^= (y >> pEq->xor2[i].index) & 1;
3959             }
3960             else
3961             {
3962                 ADDR_ASSERT(pEq->xor2[i].channel == 2);
3963                 v ^= (z >> pEq->xor2[i].index) & 1;
3964             }
3965         }
3966 
3967         offset |= (v << i);
3968     }
3969 
3970     return offset;
3971 }
3972 
3973 /**
3974 ************************************************************************************************************************
3975 *   Gfx10Lib::ComputeOffsetFromSwizzlePattern
3976 *
3977 *   @brief
3978 *       Compute offset from swizzle pattern
3979 *
3980 *   @return
3981 *       Offset
3982 ************************************************************************************************************************
3983 */
ComputeOffsetFromSwizzlePattern(const UINT_64 * pPattern,UINT_32 numBits,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const3984 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3985     const UINT_64* pPattern,    ///< Swizzle pattern
3986     UINT_32        numBits,     ///< Number of bits in pattern
3987     UINT_32        x,           ///< x coord in pixel
3988     UINT_32        y,           ///< y coord in pixel
3989     UINT_32        z,           ///< z coord in slice
3990     UINT_32        s            ///< sample id
3991     ) const
3992 {
3993     UINT_32                 offset          = 0;
3994     const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3995 
3996     for (UINT_32 i = 0; i < numBits; i++)
3997     {
3998         UINT_32 v = 0;
3999 
4000         if (pSwizzlePattern[i].x != 0)
4001         {
4002             UINT_16 mask  = pSwizzlePattern[i].x;
4003             UINT_32 xBits = x;
4004 
4005             while (mask != 0)
4006             {
4007                 if (mask & 1)
4008                 {
4009                     v ^= xBits & 1;
4010                 }
4011 
4012                 xBits >>= 1;
4013                 mask  >>= 1;
4014             }
4015         }
4016 
4017         if (pSwizzlePattern[i].y != 0)
4018         {
4019             UINT_16 mask  = pSwizzlePattern[i].y;
4020             UINT_32 yBits = y;
4021 
4022             while (mask != 0)
4023             {
4024                 if (mask & 1)
4025                 {
4026                     v ^= yBits & 1;
4027                 }
4028 
4029                 yBits >>= 1;
4030                 mask  >>= 1;
4031             }
4032         }
4033 
4034         if (pSwizzlePattern[i].z != 0)
4035         {
4036             UINT_16 mask  = pSwizzlePattern[i].z;
4037             UINT_32 zBits = z;
4038 
4039             while (mask != 0)
4040             {
4041                 if (mask & 1)
4042                 {
4043                     v ^= zBits & 1;
4044                 }
4045 
4046                 zBits >>= 1;
4047                 mask  >>= 1;
4048             }
4049         }
4050 
4051         if (pSwizzlePattern[i].s != 0)
4052         {
4053             UINT_16 mask  = pSwizzlePattern[i].s;
4054             UINT_32 sBits = s;
4055 
4056             while (mask != 0)
4057             {
4058                 if (mask & 1)
4059                 {
4060                     v ^= sBits & 1;
4061                 }
4062 
4063                 sBits >>= 1;
4064                 mask  >>= 1;
4065             }
4066         }
4067 
4068         offset |= (v << i);
4069     }
4070 
4071     return offset;
4072 }
4073 
4074 /**
4075 ************************************************************************************************************************
4076 *   Gfx10Lib::GetSwizzlePatternInfo
4077 *
4078 *   @brief
4079 *       Get swizzle pattern
4080 *
4081 *   @return
4082 *       Swizzle pattern information
4083 ************************************************************************************************************************
4084 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const4085 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
4086     AddrSwizzleMode  swizzleMode,       ///< Swizzle mode
4087     AddrResourceType resourceType,      ///< Resource type
4088     UINT_32          elemLog2,          ///< Element size in bytes log2
4089     UINT_32          numFrag            ///< Number of fragment
4090     ) const
4091 {
4092     // Now elemLog2 is going to be used to access the correct index insode of the pPatInfo array so we will start from
4093     // the right location
4094     const UINT_32          index       = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
4095     const ADDR_SW_PATINFO* patInfo     = NULL;
4096     const UINT_32          swizzleMask = 1 << swizzleMode;
4097 
4098     if (IsBlockVariable(swizzleMode))
4099     {
4100         if (m_blockVarSizeLog2 != 0)
4101         {
4102             ADDR_ASSERT(m_settings.supportRbPlus);
4103 
4104             if (IsRtOptSwizzle(swizzleMode))
4105             {
4106                 if (numFrag == 1)
4107                 {
4108                     patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
4109                 }
4110                 else if (numFrag == 2)
4111                 {
4112                     patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
4113                 }
4114                 else if (numFrag == 4)
4115                 {
4116                     patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
4117                 }
4118                 else
4119                 {
4120                     ADDR_ASSERT(numFrag == 8);
4121                     patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
4122                 }
4123             }
4124             else if (IsZOrderSwizzle(swizzleMode))
4125             {
4126                 if (numFrag == 1)
4127                 {
4128                     patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
4129                 }
4130                 else if (numFrag == 2)
4131                 {
4132                     patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
4133                 }
4134                 else if (numFrag == 4)
4135                 {
4136                     patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
4137                 }
4138                 else
4139                 {
4140                     ADDR_ASSERT(numFrag == 8);
4141                     patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
4142                 }
4143             }
4144         }
4145     }
4146     else if (IsLinear(swizzleMode) == FALSE)
4147     {
4148         if (resourceType == ADDR_RSRC_TEX_3D)
4149         {
4150             ADDR_ASSERT(numFrag == 1);
4151 
4152             if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
4153             {
4154                 if (IsRtOptSwizzle(swizzleMode))
4155                 {
4156                     if (swizzleMode == ADDR_SW_4KB_R_X)
4157                     {
4158                         patInfo = NULL;
4159                     }
4160                     else
4161                     {
4162                         patInfo = m_settings.supportRbPlus ?
4163                                   GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4164                     }
4165                 }
4166                 else if (IsZOrderSwizzle(swizzleMode))
4167                 {
4168                     patInfo = m_settings.supportRbPlus ?
4169                               GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4170                 }
4171                 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4172                 {
4173                     ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
4174                     patInfo = m_settings.supportRbPlus ?
4175                               GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
4176                 }
4177                 else
4178                 {
4179                     ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
4180 
4181                     if (IsBlock4kb(swizzleMode))
4182                     {
4183                         if (swizzleMode == ADDR_SW_4KB_S)
4184                         {
4185                             patInfo = m_settings.supportRbPlus ?
4186                                       GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
4187                         }
4188                         else
4189                         {
4190                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4191                             patInfo = m_settings.supportRbPlus ?
4192                                       GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
4193                         }
4194                     }
4195                     else
4196                     {
4197                         if (swizzleMode == ADDR_SW_64KB_S)
4198                         {
4199                             patInfo = m_settings.supportRbPlus ?
4200                                       GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
4201                         }
4202                         else if (swizzleMode == ADDR_SW_64KB_S_X)
4203                         {
4204                             patInfo = m_settings.supportRbPlus ?
4205                                       GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
4206                         }
4207                         else
4208                         {
4209                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4210                             patInfo = m_settings.supportRbPlus ?
4211                                       GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
4212                         }
4213                     }
4214                 }
4215             }
4216         }
4217         else
4218         {
4219             if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
4220             {
4221                 if (IsBlock256b(swizzleMode))
4222                 {
4223                     if (swizzleMode == ADDR_SW_256B_S)
4224                     {
4225                         patInfo = m_settings.supportRbPlus ?
4226                                   GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
4227                     }
4228                     else
4229                     {
4230                         ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
4231                         patInfo = m_settings.supportRbPlus ?
4232                                   GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
4233                     }
4234                 }
4235                 else if (IsBlock4kb(swizzleMode))
4236                 {
4237                     if (IsStandardSwizzle(resourceType, swizzleMode))
4238                     {
4239                         if (swizzleMode == ADDR_SW_4KB_S)
4240                         {
4241                             patInfo = m_settings.supportRbPlus ?
4242                                       GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
4243                         }
4244                         else
4245                         {
4246                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4247                             patInfo = m_settings.supportRbPlus ?
4248                                       GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
4249                         }
4250                     }
4251                     else
4252                     {
4253                         if (swizzleMode == ADDR_SW_4KB_D)
4254                         {
4255                             patInfo = m_settings.supportRbPlus ?
4256                                       GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
4257                         }
4258                         else if (swizzleMode == ADDR_SW_4KB_R_X)
4259                         {
4260                             patInfo = NULL;
4261                         }
4262                         else
4263                         {
4264                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
4265                             patInfo = m_settings.supportRbPlus ?
4266                                       GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
4267                         }
4268                     }
4269                 }
4270                 else
4271                 {
4272                     if (IsRtOptSwizzle(swizzleMode))
4273                     {
4274                         if (numFrag == 1)
4275                         {
4276                             patInfo = m_settings.supportRbPlus ?
4277                                       GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4278                         }
4279                         else if (numFrag == 2)
4280                         {
4281                             patInfo = m_settings.supportRbPlus ?
4282                                       GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
4283                         }
4284                         else if (numFrag == 4)
4285                         {
4286                             patInfo = m_settings.supportRbPlus ?
4287                                       GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
4288                         }
4289                         else
4290                         {
4291                             ADDR_ASSERT(numFrag == 8);
4292                             patInfo = m_settings.supportRbPlus ?
4293                                       GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
4294                         }
4295                     }
4296                     else if (IsZOrderSwizzle(swizzleMode))
4297                     {
4298                         if (numFrag == 1)
4299                         {
4300                             patInfo = m_settings.supportRbPlus ?
4301                                       GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4302                         }
4303                         else if (numFrag == 2)
4304                         {
4305                             patInfo = m_settings.supportRbPlus ?
4306                                       GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
4307                         }
4308                         else if (numFrag == 4)
4309                         {
4310                             patInfo = m_settings.supportRbPlus ?
4311                                       GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
4312                         }
4313                         else
4314                         {
4315                             ADDR_ASSERT(numFrag == 8);
4316                             patInfo = m_settings.supportRbPlus ?
4317                                       GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
4318                         }
4319                     }
4320                     else if (IsDisplaySwizzle(resourceType, swizzleMode))
4321                     {
4322                         if (swizzleMode == ADDR_SW_64KB_D)
4323                         {
4324                             patInfo = m_settings.supportRbPlus ?
4325                                       GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
4326                         }
4327                         else if (swizzleMode == ADDR_SW_64KB_D_X)
4328                         {
4329                             patInfo = m_settings.supportRbPlus ?
4330                                       GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
4331                         }
4332                         else
4333                         {
4334                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
4335                             patInfo = m_settings.supportRbPlus ?
4336                                       GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
4337                         }
4338                     }
4339                     else
4340                     {
4341                         if (swizzleMode == ADDR_SW_64KB_S)
4342                         {
4343                             patInfo = m_settings.supportRbPlus ?
4344                                       GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
4345                         }
4346                         else if (swizzleMode == ADDR_SW_64KB_S_X)
4347                         {
4348                             patInfo = m_settings.supportRbPlus ?
4349                                       GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
4350                         }
4351                         else
4352                         {
4353                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4354                             patInfo = m_settings.supportRbPlus ?
4355                                       GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
4356                         }
4357                     }
4358                 }
4359             }
4360         }
4361     }
4362 
4363     return (patInfo != NULL) ? &patInfo[index] : NULL;
4364 }
4365 
4366 /**
4367 ************************************************************************************************************************
4368 *   Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
4369 *
4370 *   @brief
4371 *       Internal function to calculate address from coord for micro tiled swizzle surface
4372 *
4373 *   @return
4374 *       ADDR_E_RETURNCODE
4375 ************************************************************************************************************************
4376 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4377 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
4378      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4379      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4380      ) const
4381 {
4382     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4383     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4384     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4385 
4386     localIn.swizzleMode  = pIn->swizzleMode;
4387     localIn.flags        = pIn->flags;
4388     localIn.resourceType = pIn->resourceType;
4389     localIn.bpp          = pIn->bpp;
4390     localIn.width        = Max(pIn->unalignedWidth,  1u);
4391     localIn.height       = Max(pIn->unalignedHeight, 1u);
4392     localIn.numSlices    = Max(pIn->numSlices,       1u);
4393     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4394     localIn.numSamples   = Max(pIn->numSamples,      1u);
4395     localIn.numFrags     = Max(pIn->numFrags,        1u);
4396     localOut.pMipInfo    = mipInfo;
4397 
4398     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
4399 
4400     if (ret == ADDR_OK)
4401     {
4402         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4403         const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
4404         const UINT_32 swMode   = static_cast<UINT_32>(pIn->swizzleMode);
4405         const UINT_32 eqIndex  = m_equationLookupTable[rsrcType][swMode][elemLog2];
4406 
4407         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4408         {
4409             const UINT_32 pb           = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4410             const UINT_32 yb           = pIn->y / localOut.blockHeight;
4411             const UINT_32 xb           = pIn->x / localOut.blockWidth;
4412             const UINT_32 blockIndex   = yb * pb + xb;
4413             const UINT_32 blockSize    = 256;
4414             const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4415                                                                    pIn->x << elemLog2,
4416                                                                    pIn->y,
4417                                                                    0);
4418             pOut->addr = localOut.sliceSize * pIn->slice +
4419                          mipInfo[pIn->mipId].macroBlockOffset +
4420                          (blockIndex * blockSize) +
4421                          blk256Offset;
4422         }
4423         else
4424         {
4425             ret = ADDR_INVALIDPARAMS;
4426         }
4427     }
4428 
4429     return ret;
4430 }
4431 
4432 /**
4433 ************************************************************************************************************************
4434 *   Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
4435 *
4436 *   @brief
4437 *       Internal function to calculate address from coord for macro tiled swizzle surface
4438 *
4439 *   @return
4440 *       ADDR_E_RETURNCODE
4441 ************************************************************************************************************************
4442 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4443 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4444      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4445      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4446      ) const
4447 {
4448     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4449     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4450     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4451 
4452     localIn.swizzleMode  = pIn->swizzleMode;
4453     localIn.flags        = pIn->flags;
4454     localIn.resourceType = pIn->resourceType;
4455     localIn.bpp          = pIn->bpp;
4456     localIn.width        = Max(pIn->unalignedWidth,  1u);
4457     localIn.height       = Max(pIn->unalignedHeight, 1u);
4458     localIn.numSlices    = Max(pIn->numSlices,       1u);
4459     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4460     localIn.numSamples   = Max(pIn->numSamples,      1u);
4461     localIn.numFrags     = Max(pIn->numFrags,        1u);
4462     localOut.pMipInfo    = mipInfo;
4463 
4464     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4465 
4466     if (ret == ADDR_OK)
4467     {
4468         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
4469         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4470         const UINT_32 blkMask     = (1 << blkSizeLog2) - 1;
4471         const UINT_32 pipeMask    = (1 << m_pipesLog2) - 1;
4472         const UINT_32 bankMask    = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4473         const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4474                                     (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4475 
4476         if (localIn.numFrags > 1)
4477         {
4478             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4479                                                                     pIn->resourceType,
4480                                                                     elemLog2,
4481                                                                     localIn.numFrags);
4482 
4483             if (pPatInfo != NULL)
4484             {
4485                 const UINT_32 pb        = localOut.pitch / localOut.blockWidth;
4486                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4487                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4488                 const UINT_64 blkIdx    = yb * pb + xb;
4489 
4490                 ADDR_BIT_SETTING fullSwizzlePattern[20];
4491                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4492 
4493                 const UINT_32 blkOffset =
4494                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4495                                                     blkSizeLog2,
4496                                                     pIn->x,
4497                                                     pIn->y,
4498                                                     pIn->slice,
4499                                                     pIn->sample);
4500 
4501                 pOut->addr = (localOut.sliceSize * pIn->slice) +
4502                              (blkIdx << blkSizeLog2) +
4503                              (blkOffset ^ pipeBankXor);
4504             }
4505             else
4506             {
4507                 ret = ADDR_INVALIDPARAMS;
4508             }
4509         }
4510         else
4511         {
4512             const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4513             const UINT_32 swMode  = static_cast<UINT_32>(pIn->swizzleMode);
4514             const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4515 
4516             if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4517             {
4518                 const BOOL_32 inTail    = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4519                 const BOOL_32 isThin    = IsThin(pIn->resourceType, pIn->swizzleMode);
4520                 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4521                 const UINT_32 sliceId   = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4522                 const UINT_32 x         = inTail ? (pIn->x     + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4523                 const UINT_32 y         = inTail ? (pIn->y     + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4524                 const UINT_32 z         = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4525                 const UINT_32 pb        = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4526                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4527                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4528                 const UINT_64 blkIdx    = yb * pb + xb;
4529                 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4530                                                                     x << elemLog2,
4531                                                                     y,
4532                                                                     z);
4533                 pOut->addr = sliceSize * sliceId +
4534                              mipInfo[pIn->mipId].macroBlockOffset +
4535                              (blkIdx << blkSizeLog2) +
4536                              (blkOffset ^ pipeBankXor);
4537             }
4538             else
4539             {
4540                 ret = ADDR_INVALIDPARAMS;
4541             }
4542         }
4543     }
4544 
4545     return ret;
4546 }
4547 
4548 /**
4549 ************************************************************************************************************************
4550 *   Gfx10Lib::HwlComputeMaxBaseAlignments
4551 *
4552 *   @brief
4553 *       Gets maximum alignments
4554 *   @return
4555 *       maximum alignments
4556 ************************************************************************************************************************
4557 */
HwlComputeMaxBaseAlignments() const4558 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4559 {
4560     return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4561 }
4562 
4563 /**
4564 ************************************************************************************************************************
4565 *   Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4566 *
4567 *   @brief
4568 *       Gets maximum alignments for metadata
4569 *   @return
4570 *       maximum alignments for metadata
4571 ************************************************************************************************************************
4572 */
HwlComputeMaxMetaBaseAlignments() const4573 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4574 {
4575     Dim3d metaBlk;
4576 
4577     const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4578     {
4579         ADDR_SW_64KB_Z_X,
4580         m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4581     };
4582 
4583     UINT_32 maxBaseAlignHtile = 0;
4584     UINT_32 maxBaseAlignCmask = 0;
4585 
4586     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4587     {
4588         for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4589         {
4590             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4591             {
4592                 // Max base alignment for Htile
4593                 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4594                                                                 ADDR_RSRC_TEX_2D,
4595                                                                 ValidSwizzleModeForXmask[swIdx],
4596                                                                 bppLog2,
4597                                                                 numFragLog2,
4598                                                                 TRUE,
4599                                                                 &metaBlk);
4600 
4601                 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4602             }
4603         }
4604 
4605         // Max base alignment for Cmask
4606         const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4607                                                         ADDR_RSRC_TEX_2D,
4608                                                         ValidSwizzleModeForXmask[swIdx],
4609                                                         0,
4610                                                         0,
4611                                                         TRUE,
4612                                                         &metaBlk);
4613 
4614         maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4615     }
4616 
4617     // Max base alignment for 2D Dcc
4618     const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4619     {
4620         ADDR_SW_64KB_S_X,
4621         ADDR_SW_64KB_D_X,
4622         ADDR_SW_64KB_R_X,
4623         m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4624     };
4625 
4626     UINT_32 maxBaseAlignDcc2D = 0;
4627 
4628     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4629     {
4630         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4631         {
4632             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4633             {
4634                 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4635                                                              ADDR_RSRC_TEX_2D,
4636                                                              ValidSwizzleModeForDcc2D[swIdx],
4637                                                              bppLog2,
4638                                                              numFragLog2,
4639                                                              TRUE,
4640                                                              &metaBlk);
4641 
4642                 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4643             }
4644         }
4645     }
4646 
4647     // Max base alignment for 3D Dcc
4648     const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4649     {
4650         ADDR_SW_64KB_Z_X,
4651         ADDR_SW_64KB_S_X,
4652         ADDR_SW_64KB_D_X,
4653         ADDR_SW_64KB_R_X,
4654         m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4655     };
4656 
4657     UINT_32 maxBaseAlignDcc3D = 0;
4658 
4659     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4660     {
4661         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4662         {
4663             const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4664                                                          ADDR_RSRC_TEX_3D,
4665                                                          ValidSwizzleModeForDcc3D[swIdx],
4666                                                          bppLog2,
4667                                                          0,
4668                                                          TRUE,
4669                                                          &metaBlk);
4670 
4671             maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4672         }
4673     }
4674 
4675     return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4676 }
4677 
4678 /**
4679 ************************************************************************************************************************
4680 *   Gfx10Lib::GetMetaElementSizeLog2
4681 *
4682 *   @brief
4683 *       Gets meta data element size log2
4684 *   @return
4685 *       Meta data element size log2
4686 ************************************************************************************************************************
4687 */
GetMetaElementSizeLog2(Gfx10DataType dataType)4688 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4689     Gfx10DataType dataType) ///< Data surface type
4690 {
4691     INT_32 elemSizeLog2 = 0;
4692 
4693     if (dataType == Gfx10DataColor)
4694     {
4695         elemSizeLog2 = 0;
4696     }
4697     else if (dataType == Gfx10DataDepthStencil)
4698     {
4699         elemSizeLog2 = 2;
4700     }
4701     else
4702     {
4703         ADDR_ASSERT(dataType == Gfx10DataFmask);
4704         elemSizeLog2 = -1;
4705     }
4706 
4707     return elemSizeLog2;
4708 }
4709 
4710 /**
4711 ************************************************************************************************************************
4712 *   Gfx10Lib::GetMetaCacheSizeLog2
4713 *
4714 *   @brief
4715 *       Gets meta data cache line size log2
4716 *   @return
4717 *       Meta data cache line size log2
4718 ************************************************************************************************************************
4719 */
GetMetaCacheSizeLog2(Gfx10DataType dataType)4720 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4721     Gfx10DataType dataType) ///< Data surface type
4722 {
4723     INT_32 cacheSizeLog2 = 0;
4724 
4725     if (dataType == Gfx10DataColor)
4726     {
4727         cacheSizeLog2 = 6;
4728     }
4729     else if (dataType == Gfx10DataDepthStencil)
4730     {
4731         cacheSizeLog2 = 8;
4732     }
4733     else
4734     {
4735         ADDR_ASSERT(dataType == Gfx10DataFmask);
4736         cacheSizeLog2 = 8;
4737     }
4738     return cacheSizeLog2;
4739 }
4740 
4741 /**
4742 ************************************************************************************************************************
4743 *   Gfx10Lib::HwlComputeSurfaceInfoLinear
4744 *
4745 *   @brief
4746 *       Internal function to calculate alignment for linear surface
4747 *
4748 *   @return
4749 *       ADDR_E_RETURNCODE
4750 ************************************************************************************************************************
4751 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4752 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4753      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4754      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4755      ) const
4756 {
4757     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4758 
4759     if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4760     {
4761         returnCode = ADDR_INVALIDPARAMS;
4762     }
4763     else
4764     {
4765         const UINT_32 elementBytes = pIn->bpp >> 3;
4766         const UINT_32 pitchAlign   = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4767         const UINT_32 mipDepth     = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4768         UINT_32       pitch        = PowTwoAlign(pIn->width, pitchAlign);
4769         UINT_32       actualHeight = pIn->height;
4770         UINT_64       sliceSize    = 0;
4771 
4772         if (pIn->numMipLevels > 1)
4773         {
4774             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4775             {
4776                 UINT_32 mipWidth, mipHeight;
4777 
4778                 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4779 
4780                 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4781 
4782                 if (pOut->pMipInfo != NULL)
4783                 {
4784                     pOut->pMipInfo[i].pitch            = mipActualWidth;
4785                     pOut->pMipInfo[i].height           = mipHeight;
4786                     pOut->pMipInfo[i].depth            = mipDepth;
4787                     pOut->pMipInfo[i].offset           = sliceSize;
4788                     pOut->pMipInfo[i].mipTailOffset    = 0;
4789                     pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4790                 }
4791 
4792                 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4793             }
4794         }
4795         else
4796         {
4797             returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4798 
4799             if (returnCode == ADDR_OK)
4800             {
4801                 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4802 
4803                 if (pOut->pMipInfo != NULL)
4804                 {
4805                     pOut->pMipInfo[0].pitch            = pitch;
4806                     pOut->pMipInfo[0].height           = actualHeight;
4807                     pOut->pMipInfo[0].depth            = mipDepth;
4808                     pOut->pMipInfo[0].offset           = 0;
4809                     pOut->pMipInfo[0].mipTailOffset    = 0;
4810                     pOut->pMipInfo[0].macroBlockOffset = 0;
4811                 }
4812             }
4813         }
4814 
4815         if (returnCode == ADDR_OK)
4816         {
4817             pOut->pitch          = pitch;
4818             pOut->height         = actualHeight;
4819             pOut->numSlices      = pIn->numSlices;
4820             pOut->sliceSize      = sliceSize;
4821             pOut->surfSize       = sliceSize * pOut->numSlices;
4822             pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4823             pOut->blockWidth     = pitchAlign;
4824             pOut->blockHeight    = 1;
4825             pOut->blockSlices    = 1;
4826 
4827             // Following members are useless on GFX10
4828             pOut->mipChainPitch  = 0;
4829             pOut->mipChainHeight = 0;
4830             pOut->mipChainSlice  = 0;
4831             pOut->epitchIsHeight = FALSE;
4832 
4833             // Post calculation validate
4834             ADDR_ASSERT(pOut->sliceSize > 0);
4835         }
4836     }
4837 
4838     return returnCode;
4839 }
4840 
4841 } // V2
4842 } // Addr
4843