• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 /**
28 ************************************************************************************************************************
29 * @file  gfx10addrlib.cpp
30 * @brief Contain the implementation for the Gfx10Lib class.
31 ************************************************************************************************************************
32 */
33 
34 #include "gfx10addrlib.h"
35 #include "gfx10_gb_reg.h"
36 
37 #include "amdgpu_asic_addr.h"
38 
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
41 
42 namespace Addr
43 {
44 /**
45 ************************************************************************************************************************
46 *   Gfx10HwlInit
47 *
48 *   @brief
49 *       Creates an Gfx10Lib object.
50 *
51 *   @return
52 *       Returns an Gfx10Lib object pointer.
53 ************************************************************************************************************************
54 */
Gfx10HwlInit(const Client * pClient)55 Addr::Lib* Gfx10HwlInit(const Client* pClient)
56 {
57     return V2::Gfx10Lib::CreateObj(pClient);
58 }
59 
60 namespace V2
61 {
62 
63 ////////////////////////////////////////////////////////////////////////////////////////////////////
64 //                               Static Const Member
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66 
67 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
68 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
69     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
70     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
71     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_D
72     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
73 
74     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
75     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
76     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_D
77     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
78 
79     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
80     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
81     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_D
82     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
83 
84     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
85     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
86     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
87     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
88 
89     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
90     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_S_T
91     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_D_T
92     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
93 
94     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
95     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_S_X
96     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_D_X
97     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
98 
99     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_Z_X
100     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_S_X
101     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_D_X
102     {0,    0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    1,    0}, // ADDR_SW_64KB_R_X
103 
104     {0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_VAR_Z_X
105     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
106     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
107     {0,    0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0}, // ADDR_SW_VAR_R_X
108     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
109 };
110 
111 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
112 
113 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114 const Dim3d Gfx10Lib::Block4K_Log2_3d[]  = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
115 
116 /**
117 ************************************************************************************************************************
118 *   Gfx10Lib::Gfx10Lib
119 *
120 *   @brief
121 *       Constructor
122 *
123 ************************************************************************************************************************
124 */
Gfx10Lib(const Client * pClient)125 Gfx10Lib::Gfx10Lib(const Client* pClient)
126     :
127     Lib(pClient),
128     m_numPkrLog2(0),
129     m_numSaLog2(0),
130     m_colorBaseIndex(0),
131     m_xmaskBaseIndex(0),
132     m_dccBaseIndex(0)
133 {
134     memset(&m_settings, 0, sizeof(m_settings));
135     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
136 }
137 
138 /**
139 ************************************************************************************************************************
140 *   Gfx10Lib::~Gfx10Lib
141 *
142 *   @brief
143 *       Destructor
144 ************************************************************************************************************************
145 */
~Gfx10Lib()146 Gfx10Lib::~Gfx10Lib()
147 {
148 }
149 
150 /**
151 ************************************************************************************************************************
152 *   Gfx10Lib::HwlComputeHtileInfo
153 *
154 *   @brief
155 *       Interface function stub of AddrComputeHtilenfo
156 *
157 *   @return
158 *       ADDR_E_RETURNCODE
159 ************************************************************************************************************************
160 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const161 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
162     const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,    ///< [in] input structure
163     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut    ///< [out] output structure
164     ) const
165 {
166     ADDR_E_RETURNCODE ret = ADDR_OK;
167 
168     if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
169          ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
170         (pIn->hTileFlags.pipeAligned != TRUE))
171     {
172         ret = ADDR_INVALIDPARAMS;
173     }
174     else
175     {
176         Dim3d         metaBlk     = {0};
177         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
178                                                    ADDR_RSRC_TEX_2D,
179                                                    pIn->swizzleMode,
180                                                    0,
181                                                    0,
182                                                    TRUE,
183                                                    &metaBlk);
184 
185         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
186         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
187         pOut->baseAlign     = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
188         pOut->metaBlkWidth  = metaBlk.w;
189         pOut->metaBlkHeight = metaBlk.h;
190 
191         if (pIn->numMipLevels > 1)
192         {
193             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
194 
195             UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
196 
197             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
198             {
199                 UINT_32 mipWidth, mipHeight;
200 
201                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
202 
203                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
204                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
205 
206                 const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
207                 const UINT_32 heightInM    = mipHeight / metaBlk.h;
208                 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
209 
210                 if (pOut->pMipInfo != NULL)
211                 {
212                     pOut->pMipInfo[i].inMiptail = FALSE;
213                     pOut->pMipInfo[i].offset    = offset;
214                     pOut->pMipInfo[i].sliceSize = mipSliceSize;
215                 }
216 
217                 offset += mipSliceSize;
218             }
219 
220             pOut->sliceSize          = offset;
221             pOut->metaBlkNumPerSlice = offset / metaBlkSize;
222             pOut->htileBytes         = pOut->sliceSize * pIn->numSlices;
223 
224             if (pOut->pMipInfo != NULL)
225             {
226                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
227                 {
228                     pOut->pMipInfo[i].inMiptail = TRUE;
229                     pOut->pMipInfo[i].offset    = 0;
230                     pOut->pMipInfo[i].sliceSize = 0;
231                 }
232 
233                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
234                 {
235                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
236                 }
237             }
238         }
239         else
240         {
241             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
242             const UINT_32 heightInM = pOut->height / metaBlk.h;
243 
244             pOut->metaBlkNumPerSlice    = pitchInM * heightInM;
245             pOut->sliceSize             = pOut->metaBlkNumPerSlice * metaBlkSize;
246             pOut->htileBytes            = pOut->sliceSize * pIn->numSlices;
247 
248             if (pOut->pMipInfo != NULL)
249             {
250                 pOut->pMipInfo[0].inMiptail = FALSE;
251                 pOut->pMipInfo[0].offset    = 0;
252                 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
253             }
254         }
255     }
256 
257     return ret;
258 }
259 
260 /**
261 ************************************************************************************************************************
262 *   Gfx10Lib::HwlComputeCmaskInfo
263 *
264 *   @brief
265 *       Interface function stub of AddrComputeCmaskInfo
266 *
267 *   @return
268 *       ADDR_E_RETURNCODE
269 ************************************************************************************************************************
270 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const271 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
272     const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,    ///< [in] input structure
273     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut    ///< [out] output structure
274     ) const
275 {
276     ADDR_E_RETURNCODE ret = ADDR_OK;
277 
278     if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
279         (pIn->cMaskFlags.pipeAligned != TRUE)   ||
280         ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
281          ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
282     {
283         ret = ADDR_INVALIDPARAMS;
284     }
285     else
286     {
287         Dim3d         metaBlk     = {0};
288         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
289                                                    ADDR_RSRC_TEX_2D,
290                                                    pIn->swizzleMode,
291                                                    0,
292                                                    0,
293                                                    TRUE,
294                                                    &metaBlk);
295 
296         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
297         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
298         pOut->baseAlign     = metaBlkSize;
299         pOut->metaBlkWidth  = metaBlk.w;
300         pOut->metaBlkHeight = metaBlk.h;
301 
302         if (pIn->numMipLevels > 1)
303         {
304             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
305 
306             UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
307 
308             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
309             {
310                 UINT_32 mipWidth, mipHeight;
311 
312                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
313 
314                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
315                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
316 
317                 const UINT_32 pitchInM  = mipWidth  / metaBlk.w;
318                 const UINT_32 heightInM = mipHeight / metaBlk.h;
319 
320                 if (pOut->pMipInfo != NULL)
321                 {
322                     pOut->pMipInfo[i].inMiptail = FALSE;
323                     pOut->pMipInfo[i].offset    = metaBlkPerSlice * metaBlkSize;
324                     pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
325                 }
326 
327                 metaBlkPerSlice += pitchInM * heightInM;
328             }
329 
330             pOut->metaBlkNumPerSlice = metaBlkPerSlice;
331 
332             if (pOut->pMipInfo != NULL)
333             {
334                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
335                 {
336                     pOut->pMipInfo[i].inMiptail = TRUE;
337                     pOut->pMipInfo[i].offset    = 0;
338                     pOut->pMipInfo[i].sliceSize = 0;
339                 }
340 
341                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
342                 {
343                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
344                 }
345             }
346         }
347         else
348         {
349             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
350             const UINT_32 heightInM = pOut->height / metaBlk.h;
351 
352             pOut->metaBlkNumPerSlice = pitchInM * heightInM;
353 
354             if (pOut->pMipInfo != NULL)
355             {
356                 pOut->pMipInfo[0].inMiptail = FALSE;
357                 pOut->pMipInfo[0].offset    = 0;
358                 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
359             }
360         }
361 
362         pOut->sliceSize  = pOut->metaBlkNumPerSlice * metaBlkSize;
363         pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
364     }
365 
366     return ret;
367 }
368 
369 /**
370 ************************************************************************************************************************
371 *   Gfx10Lib::HwlComputeDccInfo
372 *
373 *   @brief
374 *       Interface function to compute DCC key info
375 *
376 *   @return
377 *       ADDR_E_RETURNCODE
378 ************************************************************************************************************************
379 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const380 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
381     const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,    ///< [in] input structure
382     ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut    ///< [out] output structure
383     ) const
384 {
385     ADDR_E_RETURNCODE ret = ADDR_OK;
386 
387     if (pIn->swizzleMode != ADDR_SW_64KB_Z_X && pIn->swizzleMode != ADDR_SW_64KB_R_X)
388     {
389         // Hardware does not support DCC for this swizzle mode.
390         ret = ADDR_INVALIDPARAMS;
391     }
392     else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
393     {
394         // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
395         ret = ADDR_INVALIDPARAMS;
396     }
397     else
398     {
399         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
400 
401         {
402             // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
403             ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
404 
405             const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
406 
407             pOut->compressBlkWidth  = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
408             pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
409             pOut->compressBlkDepth  = isThick ? Block256_3d[elemLog2].d : 1;
410         }
411 
412         if (ret == ADDR_OK)
413         {
414             Dim3d         metaBlk     = {0};
415             const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
416             const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
417                                                        pIn->resourceType,
418                                                        pIn->swizzleMode,
419                                                        elemLog2,
420                                                        numFragLog2,
421                                                        pIn->dccKeyFlags.pipeAligned,
422                                                        &metaBlk);
423 
424             pOut->dccRamBaseAlign   = metaBlkSize;
425             pOut->metaBlkWidth      = metaBlk.w;
426             pOut->metaBlkHeight     = metaBlk.h;
427             pOut->metaBlkDepth      = metaBlk.d;
428             pOut->metaBlkSize       = metaBlkSize;
429 
430             pOut->pitch             = PowTwoAlign(pIn->unalignedWidth,     metaBlk.w);
431             pOut->height            = PowTwoAlign(pIn->unalignedHeight,    metaBlk.h);
432             pOut->depth             = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
433 
434             if (pIn->numMipLevels > 1)
435             {
436                 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
437 
438                 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
439 
440                 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
441                 {
442                     UINT_32 mipWidth, mipHeight;
443 
444                     GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
445 
446                     mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
447                     mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
448 
449                     const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
450                     const UINT_32 heightInM    = mipHeight / metaBlk.h;
451                     const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
452 
453                     if (pOut->pMipInfo != NULL)
454                     {
455                         pOut->pMipInfo[i].inMiptail = FALSE;
456                         pOut->pMipInfo[i].offset    = offset;
457                         pOut->pMipInfo[i].sliceSize = mipSliceSize;
458                     }
459 
460                     offset += mipSliceSize;
461                 }
462 
463                 pOut->dccRamSliceSize    = offset;
464                 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
465                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
466 
467                 if (pOut->pMipInfo != NULL)
468                 {
469                     for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
470                     {
471                         pOut->pMipInfo[i].inMiptail = TRUE;
472                         pOut->pMipInfo[i].offset    = 0;
473                         pOut->pMipInfo[i].sliceSize = 0;
474                     }
475 
476                     if (pIn->firstMipIdInTail != pIn->numMipLevels)
477                     {
478                         pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
479                     }
480                 }
481             }
482             else
483             {
484                 const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
485                 const UINT_32 heightInM = pOut->height / metaBlk.h;
486 
487                 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
488                 pOut->dccRamSliceSize    = pOut->metaBlkNumPerSlice * metaBlkSize;
489                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
490 
491                 if (pOut->pMipInfo != NULL)
492                 {
493                     pOut->pMipInfo[0].inMiptail = FALSE;
494                     pOut->pMipInfo[0].offset    = 0;
495                     pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
496                 }
497             }
498         }
499     }
500 
501     return ret;
502 }
503 
504 /**
505 ************************************************************************************************************************
506 *   Gfx10Lib::HwlComputeCmaskAddrFromCoord
507 *
508 *   @brief
509 *       Interface function stub of AddrComputeCmaskAddrFromCoord
510 *
511 *   @return
512 *       ADDR_E_RETURNCODE
513 ************************************************************************************************************************
514 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)515 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
516     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
517     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
518 {
519     // Only support pipe aligned CMask
520     ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
521 
522     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
523     input.size            = sizeof(input);
524     input.cMaskFlags      = pIn->cMaskFlags;
525     input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
526     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
527     input.numSlices       = Max(pIn->numSlices,       1u);
528     input.swizzleMode     = pIn->swizzleMode;
529     input.resourceType    = pIn->resourceType;
530 
531     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
532     output.size = sizeof(output);
533 
534     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
535 
536     if (returnCode == ADDR_OK)
537     {
538         const UINT_32  fmaskBpp      = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
539         const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
540         const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
541         const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
542         const UINT_8*  patIdxTable   =
543             (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
544             (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
545 
546 
547         const UINT_32  blkSizeLog2  = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
548         const UINT_32  blkMask      = (1 << blkSizeLog2) - 1;
549         const UINT_32  blkOffset    = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
550                                                                       blkSizeLog2 + 1, // +1 for nibble offset
551                                                                       pIn->x,
552                                                                       pIn->y,
553                                                                       pIn->slice,
554                                                                       0);
555         const UINT_32 xb       = pIn->x / output.metaBlkWidth;
556         const UINT_32 yb       = pIn->y / output.metaBlkHeight;
557         const UINT_32 pb       = output.pitch / output.metaBlkWidth;
558         const UINT_32 blkIndex = (yb * pb) + xb;
559         const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
560 
561         pOut->addr = (output.sliceSize * pIn->slice) +
562                      (blkIndex * (1 << blkSizeLog2)) +
563                      ((blkOffset >> 1) ^ pipeXor);
564         pOut->bitPosition = (blkOffset & 1) << 2;
565     }
566 
567     return returnCode;
568 }
569 
570 /**
571 ************************************************************************************************************************
572 *   Gfx10Lib::HwlComputeHtileAddrFromCoord
573 *
574 *   @brief
575 *       Interface function stub of AddrComputeHtileAddrFromCoord
576 *
577 *   @return
578 *       ADDR_E_RETURNCODE
579 ************************************************************************************************************************
580 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)581 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
582     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
583     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
584 {
585     ADDR_E_RETURNCODE returnCode = ADDR_OK;
586 
587     if (pIn->numMipLevels > 1)
588     {
589         returnCode = ADDR_NOTIMPLEMENTED;
590     }
591     else
592     {
593         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
594         input.size            = sizeof(input);
595         input.hTileFlags      = pIn->hTileFlags;
596         input.depthFlags      = pIn->depthflags;
597         input.swizzleMode     = pIn->swizzleMode;
598         input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
599         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
600         input.numSlices       = Max(pIn->numSlices,       1u);
601         input.numMipLevels    = 1;
602 
603         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
604         output.size = sizeof(output);
605 
606         returnCode = ComputeHtileInfo(&input, &output);
607 
608         if (returnCode == ADDR_OK)
609         {
610             const UINT_32  numSampleLog2 = Log2(pIn->numSamples);
611             const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
612             const UINT_32  index         = m_xmaskBaseIndex + numSampleLog2;
613             const UINT_8*  patIdxTable   = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
614 
615 
616             const UINT_32  blkSizeLog2   = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
617             const UINT_32  blkMask       = (1 << blkSizeLog2) - 1;
618             const UINT_32  blkOffset     = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
619                                                                            blkSizeLog2 + 1, // +1 for nibble offset
620                                                                            pIn->x,
621                                                                            pIn->y,
622                                                                            pIn->slice,
623                                                                            0);
624             const UINT_32 xb       = pIn->x / output.metaBlkWidth;
625             const UINT_32 yb       = pIn->y / output.metaBlkHeight;
626             const UINT_32 pb       = output.pitch / output.metaBlkWidth;
627             const UINT_32 blkIndex = (yb * pb) + xb;
628             const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
629 
630             pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
631                          (blkIndex * (1 << blkSizeLog2)) +
632                          ((blkOffset >> 1) ^ pipeXor);
633         }
634     }
635 
636     return returnCode;
637 }
638 
639 /**
640 ************************************************************************************************************************
641 *   Gfx10Lib::HwlComputeHtileCoordFromAddr
642 *
643 *   @brief
644 *       Interface function stub of AddrComputeHtileCoordFromAddr
645 *
646 *   @return
647 *       ADDR_E_RETURNCODE
648 ************************************************************************************************************************
649 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)650 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
651     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
652     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)   ///< [out] output structure
653 {
654     ADDR_NOT_IMPLEMENTED();
655 
656     return ADDR_OK;
657 }
658 
659 /**
660 ************************************************************************************************************************
661 *   Gfx10Lib::HwlComputeDccAddrFromCoord
662 *
663 *   @brief
664 *       Interface function stub of AddrComputeDccAddrFromCoord
665 *
666 *   @return
667 *       ADDR_E_RETURNCODE
668 ************************************************************************************************************************
669 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)670 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord(
671     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,  ///< [in] input structure
672     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] output structure
673 {
674     ADDR_E_RETURNCODE returnCode = ADDR_OK;
675 
676     if ((pIn->resourceType       != ADDR_RSRC_TEX_2D) ||
677         (pIn->swizzleMode        != ADDR_SW_64KB_R_X) ||
678         (pIn->dccKeyFlags.linear == TRUE)             ||
679         (pIn->numFrags           >  1)                ||
680         (pIn->numMipLevels       >  1)                ||
681         (pIn->mipId              >  0))
682     {
683         returnCode = ADDR_NOTSUPPORTED;
684     }
685     else
686     {
687         const UINT_32  elemLog2    = Log2(pIn->bpp >> 3);
688         const UINT_32  numPipeLog2 = m_pipesLog2;
689         const UINT_32  pipeMask    = (1 << numPipeLog2) - 1;
690         UINT_32        index       = m_dccBaseIndex + elemLog2;
691         const UINT_8*  patIdxTable;
692 
693         if (m_settings.supportRbPlus)
694         {
695             patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
696 
697             if (pIn->dccKeyFlags.pipeAligned)
698             {
699                 index += MaxNumOfBpp;
700 
701                 if (m_numPkrLog2 < 2)
702                 {
703                     index += m_pipesLog2 * MaxNumOfBpp;
704                 }
705                 else
706                 {
707                     // 4 groups for "m_numPkrLog2 < 2" case
708                     index += 4 * MaxNumOfBpp;
709 
710                     const UINT_32 dccPipePerPkr = 3;
711 
712                     index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
713                              (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
714                 }
715             }
716         }
717         else
718         {
719             patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
720 
721             if (pIn->dccKeyFlags.pipeAligned)
722             {
723                 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
724             }
725             else
726             {
727                 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
728             }
729         }
730 
731         const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
732         const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
733         const UINT_32  blkOffset   =
734             ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
735                                             blkSizeLog2 + 1, // +1 for nibble offset
736                                             pIn->x,
737                                             pIn->y,
738                                             pIn->slice,
739                                             0);
740         const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
741         const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
742         const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
743         const UINT_32 blkIndex = (yb * pb) + xb;
744         const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
745 
746         pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
747                      (blkIndex * (1 << blkSizeLog2)) +
748                      ((blkOffset >> 1) ^ pipeXor);
749     }
750 
751     return returnCode;
752 }
753 
754 /**
755 ************************************************************************************************************************
756 *   Gfx10Lib::HwlInitGlobalParams
757 *
758 *   @brief
759 *       Initializes global parameters
760 *
761 *   @return
762 *       TRUE if all settings are valid
763 *
764 ************************************************************************************************************************
765 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)766 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
767     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
768 {
769     BOOL_32        valid = TRUE;
770     GB_ADDR_CONFIG_gfx10 gbAddrConfig;
771 
772     gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
773 
774     // These values are copied from CModel code
775     switch (gbAddrConfig.bits.NUM_PIPES)
776     {
777         case ADDR_CONFIG_1_PIPE:
778             m_pipes     = 1;
779             m_pipesLog2 = 0;
780             break;
781         case ADDR_CONFIG_2_PIPE:
782             m_pipes     = 2;
783             m_pipesLog2 = 1;
784             break;
785         case ADDR_CONFIG_4_PIPE:
786             m_pipes     = 4;
787             m_pipesLog2 = 2;
788             break;
789         case ADDR_CONFIG_8_PIPE:
790             m_pipes     = 8;
791             m_pipesLog2 = 3;
792             break;
793         case ADDR_CONFIG_16_PIPE:
794             m_pipes     = 16;
795             m_pipesLog2 = 4;
796             break;
797         case ADDR_CONFIG_32_PIPE:
798             m_pipes     = 32;
799             m_pipesLog2 = 5;
800             break;
801         case ADDR_CONFIG_64_PIPE:
802             m_pipes     = 64;
803             m_pipesLog2 = 6;
804             break;
805         default:
806             ADDR_ASSERT_ALWAYS();
807             valid = FALSE;
808             break;
809     }
810 
811     switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
812     {
813         case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
814             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
815             m_pipeInterleaveLog2  = 8;
816             break;
817         case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
818             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
819             m_pipeInterleaveLog2  = 9;
820             break;
821         case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
822             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
823             m_pipeInterleaveLog2  = 10;
824             break;
825         case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
826             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
827             m_pipeInterleaveLog2  = 11;
828             break;
829         default:
830             ADDR_ASSERT_ALWAYS();
831             valid = FALSE;
832             break;
833     }
834 
835     // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
836     // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
837     // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
838     ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
839 
840     switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
841     {
842         case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
843             m_maxCompFrag     = 1;
844             m_maxCompFragLog2 = 0;
845             break;
846         case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
847             m_maxCompFrag     = 2;
848             m_maxCompFragLog2 = 1;
849             break;
850         case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
851             m_maxCompFrag     = 4;
852             m_maxCompFragLog2 = 2;
853             break;
854         case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
855             m_maxCompFrag     = 8;
856             m_maxCompFragLog2 = 3;
857             break;
858         default:
859             ADDR_ASSERT_ALWAYS();
860             valid = FALSE;
861             break;
862     }
863 
864     {
865         // Skip unaligned case
866         m_xmaskBaseIndex += MaxNumOfAA;
867 
868         m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA;
869         m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
870 
871         if (m_settings.supportRbPlus)
872         {
873             m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
874             m_numSaLog2  = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
875 
876             ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
877 
878             ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
879                           sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
880 
881             if (m_numPkrLog2 >= 2)
882             {
883                 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
884                 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
885             }
886         }
887         else
888         {
889             const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
890                                         static_cast<UINT_32>(ADDR_CONFIG_1_PIPE)  +
891                                         1;
892 
893             ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
894 
895             ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) ==
896                           sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]));
897         }
898     }
899 
900     if (m_settings.supportRbPlus)
901     {
902         // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
903         // corresponding SW_64KB_* mode
904         m_blockVarSizeLog2 = m_pipesLog2 + 14;
905     }
906 
907 
908     if (valid)
909     {
910         InitEquationTable();
911     }
912 
913     return valid;
914 }
915 
916 /**
917 ************************************************************************************************************************
918 *   Gfx10Lib::HwlConvertChipFamily
919 *
920 *   @brief
921 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
922 *   @return
923 *       ChipFamily
924 ************************************************************************************************************************
925 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)926 ChipFamily Gfx10Lib::HwlConvertChipFamily(
927     UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
928     UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
929 {
930     ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
931 
932     m_settings.dccUnsup3DSwDis  = 1;
933     m_settings.dsMipmapHtileFix = 1;
934 
935     switch (chipFamily)
936     {
937         case FAMILY_NV:
938             if (ASICREV_IS_NAVI10_P(chipRevision))
939             {
940                 m_settings.dsMipmapHtileFix = 0;
941                 m_settings.isDcn20          = 1;
942             }
943 
944             if (ASICREV_IS_NAVI12_P(chipRevision))
945             {
946                 m_settings.isDcn20 = 1;
947             }
948 
949             if (ASICREV_IS_NAVI14_M(chipRevision))
950             {
951                 m_settings.isDcn20 = 1;
952             }
953 
954             if (ASICREV_IS_SIENNA_CICHLID(chipRevision))
955             {
956                 m_settings.supportRbPlus   = 1;
957                 m_settings.dccUnsup3DSwDis = 0;
958             }
959 
960             if (ASICREV_IS_NAVY_FLOUNDER(chipRevision))
961             {
962                 m_settings.supportRbPlus   = 1;
963                 m_settings.dccUnsup3DSwDis = 0;
964             }
965 
966             if (ASICREV_IS_DIMGREY_CAVEFISH(chipRevision))
967             {
968                 m_settings.supportRbPlus   = 1;
969                 m_settings.dccUnsup3DSwDis = 0;
970             }
971             break;
972 
973         case FAMILY_VGH:
974             if (ASICREV_IS_VANGOGH(chipRevision))
975             {
976                 m_settings.supportRbPlus   = 1;
977                 m_settings.dccUnsup3DSwDis = 0;
978             }
979             else
980             {
981                 ADDR_ASSERT(!"Unknown chip revision");
982             }
983             break;
984 
985         default:
986             ADDR_ASSERT(!"Unknown chip family");
987             break;
988     }
989 
990     m_configFlags.use32bppFor422Fmt = TRUE;
991 
992     return family;
993 }
994 
995 /**
996 ************************************************************************************************************************
997 *   Gfx10Lib::GetBlk256SizeLog2
998 *
999 *   @brief
1000 *       Get block 256 size
1001 *
1002 *   @return
1003 *       N/A
1004 ************************************************************************************************************************
1005 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1006 void Gfx10Lib::GetBlk256SizeLog2(
1007     AddrResourceType resourceType,      ///< [in] Resource type
1008     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1009     UINT_32          elemLog2,          ///< [in] element size log2
1010     UINT_32          numSamplesLog2,    ///< [in] number of samples
1011     Dim3d*           pBlock             ///< [out] block size
1012     ) const
1013 {
1014     if (IsThin(resourceType, swizzleMode))
1015     {
1016         UINT_32 blockBits = 8 - elemLog2;
1017 
1018         if (IsZOrderSwizzle(swizzleMode))
1019         {
1020             blockBits -= numSamplesLog2;
1021         }
1022 
1023         pBlock->w = (blockBits >> 1) + (blockBits & 1);
1024         pBlock->h = (blockBits >> 1);
1025         pBlock->d = 0;
1026     }
1027     else
1028     {
1029         ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1030 
1031         UINT_32 blockBits = 8 - elemLog2;
1032 
1033         pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1034         pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1035         pBlock->h = (blockBits / 3);
1036     }
1037 }
1038 
1039 /**
1040 ************************************************************************************************************************
1041 *   Gfx10Lib::GetCompressedBlockSizeLog2
1042 *
1043 *   @brief
1044 *       Get compress block size
1045 *
1046 *   @return
1047 *       N/A
1048 ************************************************************************************************************************
1049 */
GetCompressedBlockSizeLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1050 void Gfx10Lib::GetCompressedBlockSizeLog2(
1051     Gfx10DataType    dataType,          ///< [in] Data type
1052     AddrResourceType resourceType,      ///< [in] Resource type
1053     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1054     UINT_32          elemLog2,          ///< [in] element size log2
1055     UINT_32          numSamplesLog2,    ///< [in] number of samples
1056     Dim3d*           pBlock             ///< [out] block size
1057     ) const
1058 {
1059     if (dataType == Gfx10DataColor)
1060     {
1061         GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1062     }
1063     else
1064     {
1065         ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1066         pBlock->w = 3;
1067         pBlock->h = 3;
1068         pBlock->d = 0;
1069     }
1070 }
1071 
1072 /**
1073 ************************************************************************************************************************
1074 *   Gfx10Lib::GetMetaOverlapLog2
1075 *
1076 *   @brief
1077 *       Get meta block overlap
1078 *
1079 *   @return
1080 *       N/A
1081 ************************************************************************************************************************
1082 */
GetMetaOverlapLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const1083 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1084     Gfx10DataType    dataType,          ///< [in] Data type
1085     AddrResourceType resourceType,      ///< [in] Resource type
1086     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1087     UINT_32          elemLog2,          ///< [in] element size log2
1088     UINT_32          numSamplesLog2     ///< [in] number of samples
1089     ) const
1090 {
1091     Dim3d compBlock;
1092     Dim3d microBlock;
1093 
1094     GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1095     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1096 
1097     const INT_32 compSizeLog2   = compBlock.w  + compBlock.h  + compBlock.d;
1098     const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1099     const INT_32 maxSizeLog2    = Max(compSizeLog2, blk256SizeLog2);
1100     const INT_32 numPipesLog2   = GetEffectiveNumPipes();
1101     INT_32       overlap        = numPipesLog2 - maxSizeLog2;
1102 
1103     if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1104     {
1105         overlap++;
1106     }
1107 
1108     // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1109     if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1110     {
1111         overlap--;
1112     }
1113     overlap = Max(overlap, 0);
1114     return overlap;
1115 }
1116 
1117 /**
1118 ************************************************************************************************************************
1119 *   Gfx10Lib::Get3DMetaOverlapLog2
1120 *
1121 *   @brief
1122 *       Get 3d meta block overlap
1123 *
1124 *   @return
1125 *       N/A
1126 ************************************************************************************************************************
1127 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const1128 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1129     AddrResourceType resourceType,      ///< [in] Resource type
1130     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1131     UINT_32          elemLog2           ///< [in] element size log2
1132     ) const
1133 {
1134     Dim3d microBlock;
1135     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1136 
1137     INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1138 
1139     if (m_settings.supportRbPlus)
1140     {
1141         overlap++;
1142     }
1143 
1144     if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1145     {
1146         overlap = 0;
1147     }
1148     return overlap;
1149 }
1150 
1151 /**
1152 ************************************************************************************************************************
1153 *   Gfx10Lib::GetPipeRotateAmount
1154 *
1155 *   @brief
1156 *       Get pipe rotate amount
1157 *
1158 *   @return
1159 *       Pipe rotate amount
1160 ************************************************************************************************************************
1161 */
1162 
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const1163 INT_32 Gfx10Lib::GetPipeRotateAmount(
1164     AddrResourceType resourceType,      ///< [in] Resource type
1165     AddrSwizzleMode  swizzleMode        ///< [in] Swizzle mode
1166     ) const
1167 {
1168     INT_32 amount = 0;
1169 
1170     if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1171     {
1172         amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1173                  1 : m_pipesLog2 - (m_numSaLog2 + 1);
1174     }
1175 
1176     return amount;
1177 }
1178 
1179 /**
1180 ************************************************************************************************************************
1181 *   Gfx10Lib::GetMetaBlkSize
1182 *
1183 *   @brief
1184 *       Get metadata block size
1185 *
1186 *   @return
1187 *       Meta block size
1188 ************************************************************************************************************************
1189 */
GetMetaBlkSize(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const1190 UINT_32 Gfx10Lib::GetMetaBlkSize(
1191     Gfx10DataType    dataType,          ///< [in] Data type
1192     AddrResourceType resourceType,      ///< [in] Resource type
1193     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1194     UINT_32          elemLog2,          ///< [in] element size log2
1195     UINT_32          numSamplesLog2,    ///< [in] number of samples
1196     BOOL_32          pipeAlign,         ///< [in] pipe align
1197     Dim3d*           pBlock             ///< [out] block size
1198     ) const
1199 {
1200     INT_32 metablkSizeLog2;
1201 
1202     {
1203         const INT_32 metaElemSizeLog2   = GetMetaElementSizeLog2(dataType);
1204         const INT_32 metaCacheSizeLog2  = GetMetaCacheSizeLog2(dataType);
1205         const INT_32 compBlkSizeLog2    = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1206         const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1207                                           numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1208         const INT_32 dataBlkSizeLog2    = GetBlockSizeLog2(swizzleMode);
1209         INT_32       numPipesLog2       = m_pipesLog2;
1210 
1211         if (IsThin(resourceType, swizzleMode))
1212         {
1213             if ((pipeAlign == FALSE) ||
1214                 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1215                 (IsDisplaySwizzle(resourceType, swizzleMode)  == TRUE))
1216             {
1217                 if (pipeAlign)
1218                 {
1219                     metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1220                     metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1221                 }
1222                 else
1223                 {
1224                     metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1225                 }
1226             }
1227             else
1228             {
1229                 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1230                 {
1231                     numPipesLog2++;
1232                 }
1233 
1234                 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1235 
1236                 if (numPipesLog2 >= 4)
1237                 {
1238                     INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1239 
1240                     // In 16Bpe 8xaa, we have an extra overlap bit
1241                     if ((pipeRotateLog2 > 0)  &&
1242                         (elemLog2 == 4)       &&
1243                         (numSamplesLog2 == 3) &&
1244                         (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1245                     {
1246                         overlapLog2++;
1247                     }
1248 
1249                     metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1250                     metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1251 
1252                     if (m_settings.supportRbPlus    &&
1253                         IsRtOptSwizzle(swizzleMode) &&
1254                         (numPipesLog2 == 6)         &&
1255                         (numSamplesLog2 == 3)       &&
1256                         (m_maxCompFragLog2 == 3)    &&
1257                         (metablkSizeLog2 < 15))
1258                     {
1259                         metablkSizeLog2 = 15;
1260                     }
1261                 }
1262                 else
1263                 {
1264                     metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1265                 }
1266 
1267                 if (dataType == Gfx10DataDepthStencil)
1268                 {
1269                     // For htile surfaces, pad meta block size to 2K * num_pipes
1270                     metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1271                 }
1272 
1273                 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1274 
1275                 if  (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1276                 {
1277                     const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1278 
1279                     metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1280                 }
1281             }
1282 
1283             const INT_32 metablkBitsLog2 =
1284                 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1285             pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1286             pBlock->h = 1 << (metablkBitsLog2 >> 1);
1287             pBlock->d = 1;
1288         }
1289         else
1290         {
1291             ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1292 
1293             if (pipeAlign)
1294             {
1295                 if (m_settings.supportRbPlus         &&
1296                     (m_pipesLog2 == m_numSaLog2 + 1) &&
1297                     (m_pipesLog2 > 1)                &&
1298                     IsRbAligned(resourceType, swizzleMode))
1299                 {
1300                     numPipesLog2++;
1301                 }
1302 
1303                 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1304 
1305                 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1306                 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1307                 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1308             }
1309             else
1310             {
1311                 metablkSizeLog2 = 12;
1312             }
1313 
1314             const INT_32 metablkBitsLog2 =
1315                 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1316             pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1317             pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1318             pBlock->d = 1 << (metablkBitsLog2 / 3);
1319         }
1320     }
1321 
1322     return (1 << static_cast<UINT_32>(metablkSizeLog2));
1323 }
1324 
1325 /**
1326 ************************************************************************************************************************
1327 *   Gfx10Lib::ConvertSwizzlePatternToEquation
1328 *
1329 *   @brief
1330 *       Convert swizzle pattern to equation.
1331 *
1332 *   @return
1333 *       N/A
1334 ************************************************************************************************************************
1335 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1336 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1337     UINT_32                elemLog2,  ///< [in] element bytes log2
1338     AddrResourceType       rsrcType,  ///< [in] resource type
1339     AddrSwizzleMode        swMode,    ///< [in] swizzle mode
1340     const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern infor
1341     ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
1342     const
1343 {
1344     ADDR_BIT_SETTING fullSwizzlePattern[20];
1345     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1346 
1347     const ADDR_BIT_SETTING* pSwizzle      = fullSwizzlePattern;
1348     const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode);
1349 
1350     pEquation->numBits            = blockSizeLog2;
1351     pEquation->stackedDepthSlices = FALSE;
1352 
1353     for (UINT_32 i = 0; i < elemLog2; i++)
1354     {
1355         pEquation->addr[i].channel = 0;
1356         pEquation->addr[i].valid   = 1;
1357         pEquation->addr[i].index   = i;
1358     }
1359 
1360     if (IsXor(swMode) == FALSE)
1361     {
1362         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1363         {
1364             ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1365 
1366             if (pSwizzle[i].x != 0)
1367             {
1368                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1369 
1370                 pEquation->addr[i].channel = 0;
1371                 pEquation->addr[i].valid   = 1;
1372                 pEquation->addr[i].index   = Log2(pSwizzle[i].x) + elemLog2;
1373             }
1374             else if (pSwizzle[i].y != 0)
1375             {
1376                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1377 
1378                 pEquation->addr[i].channel = 1;
1379                 pEquation->addr[i].valid   = 1;
1380                 pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1381             }
1382             else
1383             {
1384                 ADDR_ASSERT(pSwizzle[i].z != 0);
1385                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1386 
1387                 pEquation->addr[i].channel = 2;
1388                 pEquation->addr[i].valid   = 1;
1389                 pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1390             }
1391 
1392             pEquation->xor1[i].value = 0;
1393             pEquation->xor2[i].value = 0;
1394         }
1395     }
1396     else if (IsThin(rsrcType, swMode))
1397     {
1398         Dim3d dim;
1399         ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1400 
1401         const UINT_32 blkXLog2 = Log2(dim.w);
1402         const UINT_32 blkYLog2 = Log2(dim.h);
1403         const UINT_32 blkXMask = dim.w - 1;
1404         const UINT_32 blkYMask = dim.h - 1;
1405 
1406         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1407         UINT_32          xMask = 0;
1408         UINT_32          yMask = 0;
1409         UINT_32          bMask = (1 << elemLog2) - 1;
1410 
1411         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1412         {
1413             if (IsPow2(pSwizzle[i].value))
1414             {
1415                 if (pSwizzle[i].x != 0)
1416                 {
1417                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1418                     xMask |= pSwizzle[i].x;
1419 
1420                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1421 
1422                     ADDR_ASSERT(xLog2 < blkXLog2);
1423 
1424                     pEquation->addr[i].channel = 0;
1425                     pEquation->addr[i].valid   = 1;
1426                     pEquation->addr[i].index   = xLog2 + elemLog2;
1427                 }
1428                 else
1429                 {
1430                     ADDR_ASSERT(pSwizzle[i].y != 0);
1431                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1432                     yMask |= pSwizzle[i].y;
1433 
1434                     pEquation->addr[i].channel = 1;
1435                     pEquation->addr[i].valid   = 1;
1436                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1437 
1438                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1439                 }
1440 
1441                 swizzle[i].value = 0;
1442                 bMask |= 1 << i;
1443             }
1444             else
1445             {
1446                 if (pSwizzle[i].z != 0)
1447                 {
1448                     ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1449 
1450                     pEquation->xor2[i].channel = 2;
1451                     pEquation->xor2[i].valid   = 1;
1452                     pEquation->xor2[i].index   = Log2(pSwizzle[i].z);
1453                 }
1454 
1455                 swizzle[i].x = pSwizzle[i].x;
1456                 swizzle[i].y = pSwizzle[i].y;
1457                 swizzle[i].z = swizzle[i].s = 0;
1458 
1459                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1460 
1461                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1462 
1463                 if (xHi != 0)
1464                 {
1465                     ADDR_ASSERT(IsPow2(xHi));
1466                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1467 
1468                     pEquation->xor1[i].channel = 0;
1469                     pEquation->xor1[i].valid   = 1;
1470                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1471 
1472                     swizzle[i].x &= blkXMask;
1473                 }
1474 
1475                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1476 
1477                 if (yHi != 0)
1478                 {
1479                     ADDR_ASSERT(IsPow2(yHi));
1480 
1481                     if (xHi == 0)
1482                     {
1483                         ADDR_ASSERT(pEquation->xor1[i].value == 0);
1484                         pEquation->xor1[i].channel = 1;
1485                         pEquation->xor1[i].valid   = 1;
1486                         pEquation->xor1[i].index   = Log2(yHi);
1487                     }
1488                     else
1489                     {
1490                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1491                         pEquation->xor2[i].channel = 1;
1492                         pEquation->xor2[i].valid   = 1;
1493                         pEquation->xor2[i].index   = Log2(yHi);
1494                     }
1495 
1496                     swizzle[i].y &= blkYMask;
1497                 }
1498 
1499                 if (swizzle[i].value == 0)
1500                 {
1501                     bMask |= 1 << i;
1502                 }
1503             }
1504         }
1505 
1506         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1507         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1508 
1509         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1510 
1511         while (bMask != blockMask)
1512         {
1513             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1514             {
1515                 if ((bMask & (1 << i)) == 0)
1516                 {
1517                     if (IsPow2(swizzle[i].value))
1518                     {
1519                         if (swizzle[i].x != 0)
1520                         {
1521                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1522                             xMask |= swizzle[i].x;
1523 
1524                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1525 
1526                             ADDR_ASSERT(xLog2 < blkXLog2);
1527 
1528                             pEquation->addr[i].channel = 0;
1529                             pEquation->addr[i].valid   = 1;
1530                             pEquation->addr[i].index   = xLog2 + elemLog2;
1531                         }
1532                         else
1533                         {
1534                             ADDR_ASSERT(swizzle[i].y != 0);
1535                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1536                             yMask |= swizzle[i].y;
1537 
1538                             pEquation->addr[i].channel = 1;
1539                             pEquation->addr[i].valid   = 1;
1540                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1541 
1542                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1543                         }
1544 
1545                         swizzle[i].value = 0;
1546                         bMask |= 1 << i;
1547                     }
1548                     else
1549                     {
1550                         const UINT_32 x = swizzle[i].x & xMask;
1551                         const UINT_32 y = swizzle[i].y & yMask;
1552 
1553                         if (x != 0)
1554                         {
1555                             ADDR_ASSERT(IsPow2(x));
1556 
1557                             if (pEquation->xor1[i].value == 0)
1558                             {
1559                                 pEquation->xor1[i].channel = 0;
1560                                 pEquation->xor1[i].valid   = 1;
1561                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1562                             }
1563                             else
1564                             {
1565                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1566                                 pEquation->xor2[i].channel = 0;
1567                                 pEquation->xor2[i].valid   = 1;
1568                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1569                             }
1570                         }
1571 
1572                         if (y != 0)
1573                         {
1574                             ADDR_ASSERT(IsPow2(y));
1575 
1576                             if (pEquation->xor1[i].value == 0)
1577                             {
1578                                 pEquation->xor1[i].channel = 1;
1579                                 pEquation->xor1[i].valid   = 1;
1580                                 pEquation->xor1[i].index   = Log2(y);
1581                             }
1582                             else
1583                             {
1584                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1585                                 pEquation->xor2[i].channel = 1;
1586                                 pEquation->xor2[i].valid   = 1;
1587                                 pEquation->xor2[i].index   = Log2(y);
1588                             }
1589                         }
1590 
1591                         swizzle[i].x &= ~x;
1592                         swizzle[i].y &= ~y;
1593                     }
1594                 }
1595             }
1596         }
1597 
1598         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1599     }
1600     else
1601     {
1602         const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1603         const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1604         const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1605         const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1606         const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1607         const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1608 
1609         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1610         UINT_32          xMask = 0;
1611         UINT_32          yMask = 0;
1612         UINT_32          zMask = 0;
1613         UINT_32          bMask = (1 << elemLog2) - 1;
1614 
1615         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1616         {
1617             if (IsPow2(pSwizzle[i].value))
1618             {
1619                 if (pSwizzle[i].x != 0)
1620                 {
1621                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1622                     xMask |= pSwizzle[i].x;
1623 
1624                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1625 
1626                     ADDR_ASSERT(xLog2 < blkXLog2);
1627 
1628                     pEquation->addr[i].channel = 0;
1629                     pEquation->addr[i].valid   = 1;
1630                     pEquation->addr[i].index   = xLog2 + elemLog2;
1631                 }
1632                 else if (pSwizzle[i].y != 0)
1633                 {
1634                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1635                     yMask |= pSwizzle[i].y;
1636 
1637                     pEquation->addr[i].channel = 1;
1638                     pEquation->addr[i].valid   = 1;
1639                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1640 
1641                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1642                 }
1643                 else
1644                 {
1645                     ADDR_ASSERT(pSwizzle[i].z != 0);
1646                     ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1647                     zMask |= pSwizzle[i].z;
1648 
1649                     pEquation->addr[i].channel = 2;
1650                     pEquation->addr[i].valid   = 1;
1651                     pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1652 
1653                     ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1654                 }
1655 
1656                 swizzle[i].value = 0;
1657                 bMask |= 1 << i;
1658             }
1659             else
1660             {
1661                 swizzle[i].x = pSwizzle[i].x;
1662                 swizzle[i].y = pSwizzle[i].y;
1663                 swizzle[i].z = pSwizzle[i].z;
1664                 swizzle[i].s = 0;
1665 
1666                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1667 
1668                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1669                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1670                 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1671 
1672                 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1673 
1674                 if (xHi != 0)
1675                 {
1676                     ADDR_ASSERT(IsPow2(xHi));
1677                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1678 
1679                     pEquation->xor1[i].channel = 0;
1680                     pEquation->xor1[i].valid   = 1;
1681                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1682 
1683                     swizzle[i].x &= blkXMask;
1684                 }
1685 
1686                 if (yHi != 0)
1687                 {
1688                     ADDR_ASSERT(IsPow2(yHi));
1689 
1690                     if (pEquation->xor1[i].value == 0)
1691                     {
1692                         pEquation->xor1[i].channel = 1;
1693                         pEquation->xor1[i].valid   = 1;
1694                         pEquation->xor1[i].index   = Log2(yHi);
1695                     }
1696                     else
1697                     {
1698                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1699                         pEquation->xor2[i].channel = 1;
1700                         pEquation->xor2[i].valid   = 1;
1701                         pEquation->xor2[i].index   = Log2(yHi);
1702                     }
1703 
1704                     swizzle[i].y &= blkYMask;
1705                 }
1706 
1707                 if (zHi != 0)
1708                 {
1709                     ADDR_ASSERT(IsPow2(zHi));
1710 
1711                     if (pEquation->xor1[i].value == 0)
1712                     {
1713                         pEquation->xor1[i].channel = 2;
1714                         pEquation->xor1[i].valid   = 1;
1715                         pEquation->xor1[i].index   = Log2(zHi);
1716                     }
1717                     else
1718                     {
1719                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1720                         pEquation->xor2[i].channel = 2;
1721                         pEquation->xor2[i].valid   = 1;
1722                         pEquation->xor2[i].index   = Log2(zHi);
1723                     }
1724 
1725                     swizzle[i].z &= blkZMask;
1726                 }
1727 
1728                 if (swizzle[i].value == 0)
1729                 {
1730                     bMask |= 1 << i;
1731                 }
1732             }
1733         }
1734 
1735         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1736         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1737 
1738         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1739 
1740         while (bMask != blockMask)
1741         {
1742             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1743             {
1744                 if ((bMask & (1 << i)) == 0)
1745                 {
1746                     if (IsPow2(swizzle[i].value))
1747                     {
1748                         if (swizzle[i].x != 0)
1749                         {
1750                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1751                             xMask |= swizzle[i].x;
1752 
1753                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1754 
1755                             ADDR_ASSERT(xLog2 < blkXLog2);
1756 
1757                             pEquation->addr[i].channel = 0;
1758                             pEquation->addr[i].valid   = 1;
1759                             pEquation->addr[i].index   = xLog2 + elemLog2;
1760                         }
1761                         else if (swizzle[i].y != 0)
1762                         {
1763                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1764                             yMask |= swizzle[i].y;
1765 
1766                             pEquation->addr[i].channel = 1;
1767                             pEquation->addr[i].valid   = 1;
1768                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1769 
1770                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1771                         }
1772                         else
1773                         {
1774                             ADDR_ASSERT(swizzle[i].z != 0);
1775                             ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1776                             zMask |= swizzle[i].z;
1777 
1778                             pEquation->addr[i].channel = 2;
1779                             pEquation->addr[i].valid   = 1;
1780                             pEquation->addr[i].index   = Log2(swizzle[i].z);
1781 
1782                             ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1783                         }
1784 
1785                         swizzle[i].value = 0;
1786                         bMask |= 1 << i;
1787                     }
1788                     else
1789                     {
1790                         const UINT_32 x = swizzle[i].x & xMask;
1791                         const UINT_32 y = swizzle[i].y & yMask;
1792                         const UINT_32 z = swizzle[i].z & zMask;
1793 
1794                         if (x != 0)
1795                         {
1796                             ADDR_ASSERT(IsPow2(x));
1797 
1798                             if (pEquation->xor1[i].value == 0)
1799                             {
1800                                 pEquation->xor1[i].channel = 0;
1801                                 pEquation->xor1[i].valid   = 1;
1802                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1803                             }
1804                             else
1805                             {
1806                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1807                                 pEquation->xor2[i].channel = 0;
1808                                 pEquation->xor2[i].valid   = 1;
1809                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1810                             }
1811                         }
1812 
1813                         if (y != 0)
1814                         {
1815                             ADDR_ASSERT(IsPow2(y));
1816 
1817                             if (pEquation->xor1[i].value == 0)
1818                             {
1819                                 pEquation->xor1[i].channel = 1;
1820                                 pEquation->xor1[i].valid   = 1;
1821                                 pEquation->xor1[i].index   = Log2(y);
1822                             }
1823                             else
1824                             {
1825                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1826                                 pEquation->xor2[i].channel = 1;
1827                                 pEquation->xor2[i].valid   = 1;
1828                                 pEquation->xor2[i].index   = Log2(y);
1829                             }
1830                         }
1831 
1832                         if (z != 0)
1833                         {
1834                             ADDR_ASSERT(IsPow2(z));
1835 
1836                             if (pEquation->xor1[i].value == 0)
1837                             {
1838                                 pEquation->xor1[i].channel = 2;
1839                                 pEquation->xor1[i].valid   = 1;
1840                                 pEquation->xor1[i].index   = Log2(z);
1841                             }
1842                             else
1843                             {
1844                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1845                                 pEquation->xor2[i].channel = 2;
1846                                 pEquation->xor2[i].valid   = 1;
1847                                 pEquation->xor2[i].index   = Log2(z);
1848                             }
1849                         }
1850 
1851                         swizzle[i].x &= ~x;
1852                         swizzle[i].y &= ~y;
1853                         swizzle[i].z &= ~z;
1854                     }
1855                 }
1856             }
1857         }
1858 
1859         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1860     }
1861 }
1862 
1863 /**
1864 ************************************************************************************************************************
1865 *   Gfx10Lib::InitEquationTable
1866 *
1867 *   @brief
1868 *       Initialize Equation table.
1869 *
1870 *   @return
1871 *       N/A
1872 ************************************************************************************************************************
1873 */
InitEquationTable()1874 VOID Gfx10Lib::InitEquationTable()
1875 {
1876     memset(m_equationTable, 0, sizeof(m_equationTable));
1877 
1878     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1879     {
1880         const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1881 
1882         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1883         {
1884             const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1885 
1886             for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1887             {
1888                 UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
1889                 const ADDR_SW_PATINFO* pPatInfo      = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1890 
1891                 if (pPatInfo != NULL)
1892                 {
1893                     ADDR_ASSERT(IsValidSwMode(swMode));
1894 
1895                     if (pPatInfo->maxItemCount <= 3)
1896                     {
1897                         ADDR_EQUATION equation = {0};
1898 
1899                         ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1900 
1901                         equationIndex = m_numEquations;
1902                         ADDR_ASSERT(equationIndex < EquationTableSize);
1903 
1904                         m_equationTable[equationIndex] = equation;
1905 
1906                         m_numEquations++;
1907                     }
1908                     else
1909                     {
1910                         // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
1911                         ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
1912                         ADDR_ASSERT(rsrcTypeIdx == 1);
1913                         ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
1914                         ADDR_ASSERT(m_settings.supportRbPlus == 1);
1915                     }
1916                 }
1917 
1918                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1919             }
1920         }
1921     }
1922 }
1923 
1924 /**
1925 ************************************************************************************************************************
1926 *   Gfx10Lib::HwlGetEquationIndex
1927 *
1928 *   @brief
1929 *       Interface function stub of GetEquationIndex
1930 *
1931 *   @return
1932 *       ADDR_E_RETURNCODE
1933 ************************************************************************************************************************
1934 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const1935 UINT_32 Gfx10Lib::HwlGetEquationIndex(
1936     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
1937     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
1938     ) const
1939 {
1940     UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1941 
1942     if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1943         (pIn->resourceType == ADDR_RSRC_TEX_3D))
1944     {
1945         const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1946         const UINT_32 swModeIdx   = static_cast<UINT_32>(pIn->swizzleMode);
1947         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
1948 
1949         equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1950     }
1951 
1952     if (pOut->pMipInfo != NULL)
1953     {
1954         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1955         {
1956             pOut->pMipInfo[i].equationIndex = equationIdx;
1957         }
1958     }
1959 
1960     return equationIdx;
1961 }
1962 
1963 /**
1964 ************************************************************************************************************************
1965 *   Gfx10Lib::GetValidDisplaySwizzleModes
1966 *
1967 *   @brief
1968 *       Get valid swizzle modes mask for displayable surface
1969 *
1970 *   @return
1971 *       Valid swizzle modes mask for displayable surface
1972 ************************************************************************************************************************
1973 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const1974 UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
1975     UINT_32 bpp
1976     ) const
1977 {
1978     UINT_32 swModeMask = 0;
1979 
1980     if (bpp <= 64)
1981     {
1982         if (m_settings.isDcn20)
1983         {
1984             swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
1985         }
1986         else
1987         {
1988             swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
1989         }
1990     }
1991 
1992     return swModeMask;
1993 }
1994 
1995 /**
1996 ************************************************************************************************************************
1997 *   Gfx10Lib::IsValidDisplaySwizzleMode
1998 *
1999 *   @brief
2000 *       Check if a swizzle mode is supported by display engine
2001 *
2002 *   @return
2003 *       TRUE is swizzle mode is supported by display engine
2004 ************************************************************************************************************************
2005 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2006 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
2007     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2008     ) const
2009 {
2010     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
2011 
2012     return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
2013 }
2014 
2015 /**
2016 ************************************************************************************************************************
2017 *   Gfx10Lib::GetMaxNumMipsInTail
2018 *
2019 *   @brief
2020 *       Return max number of mips in tails
2021 *
2022 *   @return
2023 *       Max number of mips in tails
2024 ************************************************************************************************************************
2025 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const2026 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
2027     UINT_32 blockSizeLog2,     ///< block size log2
2028     BOOL_32 isThin             ///< is thin or thick
2029     ) const
2030 {
2031     UINT_32 effectiveLog2 = blockSizeLog2;
2032 
2033     if (isThin == FALSE)
2034     {
2035         effectiveLog2 -= (blockSizeLog2 - 8) / 3;
2036     }
2037 
2038     return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
2039 }
2040 
2041 /**
2042 ************************************************************************************************************************
2043 *   Gfx10Lib::HwlComputePipeBankXor
2044 *
2045 *   @brief
2046 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2047 *
2048 *   @return
2049 *       PipeBankXor value
2050 ************************************************************************************************************************
2051 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2052 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2053     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
2054     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
2055     ) const
2056 {
2057     if (IsNonPrtXor(pIn->swizzleMode))
2058     {
2059         const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
2060 
2061         // No pipe xor...
2062         const UINT_32 pipeXor = 0;
2063         UINT_32       bankXor = 0;
2064 
2065         const UINT_32         XorPatternLen = 8;
2066         static const UINT_32  XorBankRot1b[XorPatternLen] = {0,  1,  0,  1,  0,  1,  0,  1};
2067         static const UINT_32  XorBankRot2b[XorPatternLen] = {0,  2,  1,  3,  2,  0,  3,  1};
2068         static const UINT_32  XorBankRot3b[XorPatternLen] = {0,  4,  2,  6,  1,  5,  3,  7};
2069         static const UINT_32  XorBankRot4b[XorPatternLen] = {0,  8,  4, 12,  2, 10,  6, 14};
2070         static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
2071 
2072         switch (bankBits)
2073         {
2074             case 1:
2075             case 2:
2076             case 3:
2077             case 4:
2078                 bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
2079                 break;
2080             default:
2081                 // valid bank bits should be 0~4
2082                 ADDR_ASSERT_ALWAYS();
2083             case 0:
2084                 break;
2085         }
2086 
2087         pOut->pipeBankXor = bankXor | pipeXor;
2088     }
2089     else
2090     {
2091         pOut->pipeBankXor = 0;
2092     }
2093 
2094     return ADDR_OK;
2095 }
2096 
2097 /**
2098 ************************************************************************************************************************
2099 *   Gfx10Lib::HwlComputeSlicePipeBankXor
2100 *
2101 *   @brief
2102 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2103 *
2104 *   @return
2105 *       PipeBankXor value
2106 ************************************************************************************************************************
2107 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const2108 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2109     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
2110     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
2111     ) const
2112 {
2113     if (IsNonPrtXor(pIn->swizzleMode))
2114     {
2115         const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2116         const UINT_32 pipeBits  = GetPipeXorBits(blockBits);
2117         const UINT_32 pipeXor   = ReverseBitVector(pIn->slice, pipeBits);
2118 
2119         pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2120 
2121         if (pIn->bpe != 0)
2122         {
2123             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
2124                                                                     pIn->resourceType,
2125                                                                     Log2(pIn->bpe >> 3),
2126                                                                     1);
2127 
2128             if (pPatInfo != NULL)
2129             {
2130                 ADDR_BIT_SETTING fullSwizzlePattern[20];
2131                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
2132 
2133                 const UINT_32 pipeBankXorOffset =
2134                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
2135                                                     blockBits,
2136                                                     0,
2137                                                     0,
2138                                                     pIn->slice,
2139                                                     0);
2140 
2141                 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
2142 
2143                 // Should have no bit set under pipe interleave
2144                 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
2145 
2146                 // This assertion firing means old approach doesn't calculate a correct sliceXor value...
2147                 ADDR_ASSERT(pipeBankXor == pipeXor);
2148 
2149                 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
2150             }
2151         }
2152     }
2153     else
2154     {
2155         pOut->pipeBankXor = 0;
2156     }
2157 
2158     return ADDR_OK;
2159 }
2160 
2161 /**
2162 ************************************************************************************************************************
2163 *   Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2164 *
2165 *   @brief
2166 *       Compute sub resource offset to support swizzle pattern
2167 *
2168 *   @return
2169 *       Offset
2170 ************************************************************************************************************************
2171 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const2172 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2173     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
2174     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
2175     ) const
2176 {
2177     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2178 
2179     pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2180 
2181     return ADDR_OK;
2182 }
2183 
2184 /**
2185 ************************************************************************************************************************
2186 *   Gfx10Lib::ValidateNonSwModeParams
2187 *
2188 *   @brief
2189 *       Validate compute surface info params except swizzle mode
2190 *
2191 *   @return
2192 *       TRUE if parameters are valid, FALSE otherwise
2193 ************************************************************************************************************************
2194 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2195 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2196     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2197 {
2198     BOOL_32 valid = TRUE;
2199 
2200     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2201     {
2202         ADDR_ASSERT_ALWAYS();
2203         valid = FALSE;
2204     }
2205 
2206     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2207     {
2208         ADDR_ASSERT_ALWAYS();
2209         valid = FALSE;
2210     }
2211 
2212     const ADDR2_SURFACE_FLAGS flags    = pIn->flags;
2213     const AddrResourceType    rsrcType = pIn->resourceType;
2214     const BOOL_32             mipmap   = (pIn->numMipLevels > 1);
2215     const BOOL_32             msaa     = (pIn->numFrags > 1);
2216     const BOOL_32             display  = flags.display;
2217     const BOOL_32             tex3d    = IsTex3d(rsrcType);
2218     const BOOL_32             tex2d    = IsTex2d(rsrcType);
2219     const BOOL_32             tex1d    = IsTex1d(rsrcType);
2220     const BOOL_32             stereo   = flags.qbStereo;
2221 
2222 
2223     // Resource type check
2224     if (tex1d)
2225     {
2226         if (msaa || display || stereo)
2227         {
2228             ADDR_ASSERT_ALWAYS();
2229             valid = FALSE;
2230         }
2231     }
2232     else if (tex2d)
2233     {
2234         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2235         {
2236             ADDR_ASSERT_ALWAYS();
2237             valid = FALSE;
2238         }
2239     }
2240     else if (tex3d)
2241     {
2242         if (msaa || display || stereo)
2243         {
2244             ADDR_ASSERT_ALWAYS();
2245             valid = FALSE;
2246         }
2247     }
2248     else
2249     {
2250         ADDR_ASSERT_ALWAYS();
2251         valid = FALSE;
2252     }
2253 
2254     return valid;
2255 }
2256 
2257 /**
2258 ************************************************************************************************************************
2259 *   Gfx10Lib::ValidateSwModeParams
2260 *
2261 *   @brief
2262 *       Validate compute surface info related to swizzle mode
2263 *
2264 *   @return
2265 *       TRUE if parameters are valid, FALSE otherwise
2266 ************************************************************************************************************************
2267 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2268 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2269     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2270 {
2271     BOOL_32 valid = TRUE;
2272 
2273     if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2274     {
2275         ADDR_ASSERT_ALWAYS();
2276         valid = FALSE;
2277     }
2278     else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2279     {
2280         {
2281             ADDR_ASSERT_ALWAYS();
2282             valid = FALSE;
2283         }
2284     }
2285 
2286     const ADDR2_SURFACE_FLAGS flags       = pIn->flags;
2287     const AddrResourceType    rsrcType    = pIn->resourceType;
2288     const AddrSwizzleMode     swizzle     = pIn->swizzleMode;
2289     const BOOL_32             msaa        = (pIn->numFrags > 1);
2290     const BOOL_32             zbuffer     = flags.depth || flags.stencil;
2291     const BOOL_32             color       = flags.color;
2292     const BOOL_32             display     = flags.display;
2293     const BOOL_32             tex3d       = IsTex3d(rsrcType);
2294     const BOOL_32             tex2d       = IsTex2d(rsrcType);
2295     const BOOL_32             tex1d       = IsTex1d(rsrcType);
2296     const BOOL_32             thin3d      = flags.view3dAs2dArray;
2297     const BOOL_32             linear      = IsLinear(swizzle);
2298     const BOOL_32             blk256B     = IsBlock256b(swizzle);
2299     const BOOL_32             blkVar      = IsBlockVariable(swizzle);
2300     const BOOL_32             isNonPrtXor = IsNonPrtXor(swizzle);
2301     const BOOL_32             prt         = flags.prt;
2302     const BOOL_32             fmask       = flags.fmask;
2303 
2304     // Misc check
2305     if ((pIn->numFrags > 1) &&
2306         (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2307     {
2308         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2309         ADDR_ASSERT_ALWAYS();
2310         valid = FALSE;
2311     }
2312 
2313     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2314     {
2315         ADDR_ASSERT_ALWAYS();
2316         valid = FALSE;
2317     }
2318 
2319     if ((pIn->bpp == 96) && (linear == FALSE))
2320     {
2321         ADDR_ASSERT_ALWAYS();
2322         valid = FALSE;
2323     }
2324 
2325     const UINT_32 swizzleMask = 1 << swizzle;
2326 
2327     // Resource type check
2328     if (tex1d)
2329     {
2330         if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2331         {
2332             ADDR_ASSERT_ALWAYS();
2333             valid = FALSE;
2334         }
2335     }
2336     else if (tex2d)
2337     {
2338         if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2339         {
2340             {
2341                 ADDR_ASSERT_ALWAYS();
2342                 valid = FALSE;
2343             }
2344         }
2345         else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2346                  (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2347         {
2348             ADDR_ASSERT_ALWAYS();
2349             valid = FALSE;
2350         }
2351 
2352     }
2353     else if (tex3d)
2354     {
2355         if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2356             (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2357             (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2358         {
2359             ADDR_ASSERT_ALWAYS();
2360             valid = FALSE;
2361         }
2362     }
2363 
2364     // Swizzle type check
2365     if (linear)
2366     {
2367         if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2368         {
2369             ADDR_ASSERT_ALWAYS();
2370             valid = FALSE;
2371         }
2372     }
2373     else if (IsZOrderSwizzle(swizzle))
2374     {
2375         if ((pIn->bpp > 64)                         ||
2376             (msaa && (color || (pIn->bpp > 32)))    ||
2377             ElemLib::IsBlockCompressed(pIn->format) ||
2378             ElemLib::IsMacroPixelPacked(pIn->format))
2379         {
2380             ADDR_ASSERT_ALWAYS();
2381             valid = FALSE;
2382         }
2383     }
2384     else if (IsStandardSwizzle(rsrcType, swizzle))
2385     {
2386         if (zbuffer || msaa)
2387         {
2388             ADDR_ASSERT_ALWAYS();
2389             valid = FALSE;
2390         }
2391     }
2392     else if (IsDisplaySwizzle(rsrcType, swizzle))
2393     {
2394         if (zbuffer || msaa)
2395         {
2396             ADDR_ASSERT_ALWAYS();
2397             valid = FALSE;
2398         }
2399     }
2400     else if (IsRtOptSwizzle(swizzle))
2401     {
2402         if (zbuffer)
2403         {
2404             ADDR_ASSERT_ALWAYS();
2405             valid = FALSE;
2406         }
2407     }
2408     else
2409     {
2410         {
2411             ADDR_ASSERT_ALWAYS();
2412             valid = FALSE;
2413         }
2414     }
2415 
2416     // Block type check
2417     if (blk256B)
2418     {
2419         if (zbuffer || tex3d || msaa)
2420         {
2421             ADDR_ASSERT_ALWAYS();
2422             valid = FALSE;
2423         }
2424     }
2425     else if (blkVar)
2426     {
2427         if (m_blockVarSizeLog2 == 0)
2428         {
2429             ADDR_ASSERT_ALWAYS();
2430             valid = FALSE;
2431         }
2432     }
2433 
2434     return valid;
2435 }
2436 
2437 /**
2438 ************************************************************************************************************************
2439 *   Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2440 *
2441 *   @brief
2442 *       Compute surface info sanity check
2443 *
2444 *   @return
2445 *       Offset
2446 ************************************************************************************************************************
2447 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2448 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2449     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2450     ) const
2451 {
2452     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2453 }
2454 
2455 /**
2456 ************************************************************************************************************************
2457 *   Gfx10Lib::HwlGetPreferredSurfaceSetting
2458 *
2459 *   @brief
2460 *       Internal function to get suggested surface information for cliet to use
2461 *
2462 *   @return
2463 *       ADDR_E_RETURNCODE
2464 ************************************************************************************************************************
2465 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2466 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2467     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
2468     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
2469     ) const
2470 {
2471     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2472 
2473     if (pIn->flags.fmask)
2474     {
2475         const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2476         const BOOL_32 forbidVarBlockType  = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2477 
2478         if (forbid64KbBlockType && forbidVarBlockType)
2479         {
2480             // Invalid combination...
2481             ADDR_ASSERT_ALWAYS();
2482             returnCode = ADDR_INVALIDPARAMS;
2483         }
2484         else
2485         {
2486             pOut->resourceType                   = ADDR_RSRC_TEX_2D;
2487             pOut->validBlockSet.value            = 0;
2488             pOut->validBlockSet.macroThin64KB    = forbid64KbBlockType ? 0 : 1;
2489             pOut->validBlockSet.var              = forbidVarBlockType  ? 0 : 1;
2490             pOut->validSwModeSet.value           = 0;
2491             pOut->validSwModeSet.sw64KB_Z_X      = forbid64KbBlockType ? 0 : 1;
2492             pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType  ? 0 : 1;
2493             pOut->canXor                         = TRUE;
2494             pOut->validSwTypeSet.value           = AddrSwSetZ;
2495             pOut->clientPreferredSwSet           = pOut->validSwTypeSet;
2496 
2497             BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2498 
2499             if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2500             {
2501                 const UINT_8  maxFmaskSwizzleModeType = 2;
2502                 const UINT_32 ratioLow                = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2503                 const UINT_32 ratioHi                 = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2504                 const UINT_32 fmaskBpp                = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2505                 const UINT_32 numSlices               = Max(pIn->numSlices, 1u);
2506                 const UINT_32 width                   = Max(pIn->width, 1u);
2507                 const UINT_32 height                  = Max(pIn->height, 1u);
2508                 const UINT_64 sizeAlignInElement      = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2509 
2510                 AddrSwizzleMode swMode[maxFmaskSwizzleModeType]  = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2511                 Dim3d           blkDim[maxFmaskSwizzleModeType]  = {{}, {}};
2512                 Dim3d           padDim[maxFmaskSwizzleModeType]  = {{}, {}};
2513                 UINT_64         padSize[maxFmaskSwizzleModeType] = {0};
2514 
2515                 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2516                 {
2517                     ComputeBlockDimensionForSurf(&blkDim[i].w,
2518                                                  &blkDim[i].h,
2519                                                  &blkDim[i].d,
2520                                                  fmaskBpp,
2521                                                  1,
2522                                                  pOut->resourceType,
2523                                                  swMode[i]);
2524 
2525                     padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2526                     padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2527                 }
2528 
2529                 if (GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0]))
2530                 {
2531                     if ((padSize[1] * ratioHi) <= (padSize[0] * ratioLow))
2532                     {
2533                         use64KbBlockType = FALSE;
2534                     }
2535                 }
2536                 else
2537                 {
2538                     if ((padSize[1] * ratioLow) < (padSize[0] * ratioHi))
2539                     {
2540                         use64KbBlockType = FALSE;
2541                     }
2542                 }
2543             }
2544             else if (forbidVarBlockType)
2545             {
2546                 use64KbBlockType = TRUE;
2547             }
2548 
2549             if (use64KbBlockType)
2550             {
2551                 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2552             }
2553             else
2554             {
2555                 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2556             }
2557         }
2558     }
2559     else
2560     {
2561         UINT_32 bpp    = pIn->bpp;
2562         UINT_32 width  = Max(pIn->width, 1u);
2563         UINT_32 height = Max(pIn->height, 1u);
2564 
2565         // Set format to INVALID will skip this conversion
2566         if (pIn->format != ADDR_FMT_INVALID)
2567         {
2568             ElemMode elemMode = ADDR_UNCOMPRESSED;
2569             UINT_32 expandX, expandY;
2570 
2571             // Get compression/expansion factors and element mode which indicates compression/expansion
2572             bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2573                                                 &elemMode,
2574                                                 &expandX,
2575                                                 &expandY);
2576 
2577             UINT_32 basePitch = 0;
2578             GetElemLib()->AdjustSurfaceInfo(elemMode,
2579                                             expandX,
2580                                             expandY,
2581                                             &bpp,
2582                                             &basePitch,
2583                                             &width,
2584                                             &height);
2585         }
2586 
2587         const UINT_32 numSlices    = Max(pIn->numSlices,    1u);
2588         const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2589         const UINT_32 numSamples   = Max(pIn->numSamples,   1u);
2590         const UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2591         const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
2592 
2593         // Pre sanity check on non swizzle mode parameters
2594         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
2595         localIn.flags        = pIn->flags;
2596         localIn.resourceType = pIn->resourceType;
2597         localIn.format       = pIn->format;
2598         localIn.bpp          = bpp;
2599         localIn.width        = width;
2600         localIn.height       = height;
2601         localIn.numSlices    = numSlices;
2602         localIn.numMipLevels = numMipLevels;
2603         localIn.numSamples   = numSamples;
2604         localIn.numFrags     = numFrags;
2605 
2606         if (ValidateNonSwModeParams(&localIn))
2607         {
2608             // Forbid swizzle mode(s) by client setting
2609             ADDR2_SWMODE_SET allowedSwModeSet = {0};
2610             allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2611             allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx10Blk256BSwModeMask;
2612             allowedSwModeSet.value |=
2613                 pIn->forbiddenBlock.macroThin4KB ? 0 :
2614                 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2615             allowedSwModeSet.value |=
2616                 pIn->forbiddenBlock.macroThick4KB ? 0 :
2617                 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2618             allowedSwModeSet.value |=
2619                 pIn->forbiddenBlock.macroThin64KB ? 0 :
2620                 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2621             allowedSwModeSet.value |=
2622                 pIn->forbiddenBlock.macroThick64KB ? 0 :
2623                 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2624             allowedSwModeSet.value |=
2625                 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2626 
2627             if (pIn->preferredSwSet.value != 0)
2628             {
2629                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2630                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2631                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2632                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2633             }
2634 
2635             if (pIn->noXor)
2636             {
2637                 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2638             }
2639 
2640             if (pIn->maxAlign > 0)
2641             {
2642                 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2643                 {
2644                     allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2645                 }
2646 
2647                 if (pIn->maxAlign < Size64K)
2648                 {
2649                     allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2650                 }
2651 
2652                 if (pIn->maxAlign < Size4K)
2653                 {
2654                     allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2655                 }
2656 
2657                 if (pIn->maxAlign < Size256)
2658                 {
2659                     allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2660                 }
2661             }
2662 
2663             // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2664             switch (pIn->resourceType)
2665             {
2666                 case ADDR_RSRC_TEX_1D:
2667                     allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2668                     break;
2669 
2670                 case ADDR_RSRC_TEX_2D:
2671                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2672 
2673                     break;
2674 
2675                 case ADDR_RSRC_TEX_3D:
2676                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2677 
2678                     if (pIn->flags.view3dAs2dArray)
2679                     {
2680                         allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2681                     }
2682                     break;
2683 
2684                 default:
2685                     ADDR_ASSERT_ALWAYS();
2686                     allowedSwModeSet.value = 0;
2687                     break;
2688             }
2689 
2690             if (ElemLib::IsBlockCompressed(pIn->format)  ||
2691                 ElemLib::IsMacroPixelPacked(pIn->format) ||
2692                 (bpp > 64)                               ||
2693                 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2694             {
2695                 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2696             }
2697 
2698             if (pIn->format == ADDR_FMT_32_32_32)
2699             {
2700                 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2701             }
2702 
2703             if (msaa)
2704             {
2705                 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2706             }
2707 
2708             if (pIn->flags.depth || pIn->flags.stencil)
2709             {
2710                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2711             }
2712 
2713             if (pIn->flags.display)
2714             {
2715                 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
2716             }
2717 
2718             if (allowedSwModeSet.value != 0)
2719             {
2720 #if DEBUG
2721                 // Post sanity check, at least AddrLib should accept the output generated by its own
2722                 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2723 
2724                 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2725                 {
2726                     if (validateSwModeSet & 1)
2727                     {
2728                         localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2729                         ADDR_ASSERT(ValidateSwModeParams(&localIn));
2730                     }
2731 
2732                     validateSwModeSet >>= 1;
2733                 }
2734 #endif
2735 
2736                 pOut->resourceType   = pIn->resourceType;
2737                 pOut->validSwModeSet = allowedSwModeSet;
2738                 pOut->canXor         = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
2739                 pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2740                 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
2741 
2742                 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2743 
2744                 if (pOut->clientPreferredSwSet.value == 0)
2745                 {
2746                     pOut->clientPreferredSwSet.value = AddrSwSetAll;
2747                 }
2748 
2749                 // Apply optional restrictions
2750                 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
2751                 {
2752                     if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
2753                     {
2754                         // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
2755                         // the GL2 in VAR mode, so it should be avoided.
2756                         allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2757                     }
2758                     else
2759                     {
2760                         // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
2761                         // But we have to suffer from low performance because there is no other choice...
2762                         ADDR_ASSERT_ALWAYS();
2763                     }
2764                 }
2765 
2766                 if (pIn->flags.needEquation)
2767                 {
2768                     FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
2769                 }
2770 
2771                 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
2772                 {
2773                     pOut->swizzleMode = ADDR_SW_LINEAR;
2774                 }
2775                 else
2776                 {
2777                     // Always ignore linear swizzle mode if there is other choice.
2778                     allowedSwModeSet.swLinear = 0;
2779 
2780                     ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2781 
2782                     // Determine block size if there are 2 or more block type candidates
2783                     if (IsPow2(allowedBlockSet.value) == FALSE)
2784                     {
2785                         AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_LINEAR};
2786 
2787                         if (m_blockVarSizeLog2 != 0)
2788                         {
2789                             swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
2790                         }
2791 
2792                         if (pOut->resourceType == ADDR_RSRC_TEX_3D)
2793                         {
2794                             swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
2795                             swMode[AddrBlockThin64KB]  = ADDR_SW_64KB_R_X;
2796                             swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
2797                         }
2798                         else
2799                         {
2800                             swMode[AddrBlockMicro]    = ADDR_SW_256B_S;
2801                             swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_S;
2802                             swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
2803                         }
2804 
2805                         Dim3d   blkDim[AddrBlockMaxTiledType]  = {0};
2806                         Dim3d   padDim[AddrBlockMaxTiledType]  = {0};
2807                         UINT_64 padSize[AddrBlockMaxTiledType] = {0};
2808 
2809                         const UINT_32 ratioLow           = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2810                         const UINT_32 ratioHi            = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2811                         const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2812                         UINT_32       minSizeBlk         = AddrBlockMicro;
2813                         UINT_64       minSize            = 0;
2814 
2815                         for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2816                         {
2817                             if (allowedBlockSet.value & (1 << i))
2818                             {
2819                                 ComputeBlockDimensionForSurf(&blkDim[i].w,
2820                                                              &blkDim[i].h,
2821                                                              &blkDim[i].d,
2822                                                              bpp,
2823                                                              numFrags,
2824                                                              pOut->resourceType,
2825                                                              swMode[i]);
2826 
2827                                 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2828                                 padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
2829 
2830                                 if (minSize == 0)
2831                                 {
2832                                     minSize    = padSize[i];
2833                                     minSizeBlk = i;
2834                                 }
2835                                 else
2836                                 {
2837                                     // Due to the fact that VAR block size = 16KB * m_pipes, it is possible that VAR
2838                                     // block size < 64KB. And ratio[Hi/Low] logic implicitly requires iterating from
2839                                     // smaller block type to bigger block type. So we have to correct comparing logic
2840                                     // according to the size of existing "minimun block" and size of coming/comparing
2841                                     // block. The new logic can also be useful to any future change about AddrBlockType.
2842                                     if (GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk]))
2843                                     {
2844                                         if ((padSize[i] * ratioHi) <= (minSize * ratioLow))
2845                                         {
2846                                             minSize    = padSize[i];
2847                                             minSizeBlk = i;
2848                                         }
2849                                     }
2850                                     else
2851                                     {
2852                                         if ((padSize[i] * ratioLow) < (minSize * ratioHi))
2853                                         {
2854                                             minSize    = padSize[i];
2855                                             minSizeBlk = i;
2856                                         }
2857                                     }
2858                                 }
2859                             }
2860                         }
2861 
2862                         if ((allowedBlockSet.micro == TRUE)      &&
2863                             (width  <= blkDim[AddrBlockMicro].w) &&
2864                             (height <= blkDim[AddrBlockMicro].h))
2865                         {
2866                             minSizeBlk = AddrBlockMicro;
2867                         }
2868 
2869                         if (minSizeBlk == AddrBlockMicro)
2870                         {
2871                             ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2872                             allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
2873                         }
2874                         else if (minSizeBlk == AddrBlockThick4KB)
2875                         {
2876                             ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2877                             allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
2878                         }
2879                         else if (minSizeBlk == AddrBlockThin4KB)
2880                         {
2881                             ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2882                             allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
2883                         }
2884                         else if (minSizeBlk == AddrBlockThick64KB)
2885                         {
2886                             ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2887                             allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
2888                         }
2889                         else if (minSizeBlk == AddrBlockThin64KB)
2890                         {
2891                             allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2892                                                       Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
2893                         }
2894                         else
2895                         {
2896                             ADDR_ASSERT(minSizeBlk == AddrBlockThinVar);
2897                             allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
2898                         }
2899                     }
2900 
2901                     // Block type should be determined.
2902                     ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
2903 
2904                     ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
2905 
2906                     // Determine swizzle type if there are 2 or more swizzle type candidates
2907                     if (IsPow2(allowedSwSet.value) == FALSE)
2908                     {
2909                         if (ElemLib::IsBlockCompressed(pIn->format))
2910                         {
2911                             if (allowedSwSet.sw_D)
2912                             {
2913                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2914                             }
2915                             else if (allowedSwSet.sw_S)
2916                             {
2917                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2918                             }
2919                             else
2920                             {
2921                                 ADDR_ASSERT(allowedSwSet.sw_R);
2922                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2923                             }
2924                         }
2925                         else if (ElemLib::IsMacroPixelPacked(pIn->format))
2926                         {
2927                             if (allowedSwSet.sw_S)
2928                             {
2929                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2930                             }
2931                             else if (allowedSwSet.sw_D)
2932                             {
2933                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2934                             }
2935                             else
2936                             {
2937                                 ADDR_ASSERT(allowedSwSet.sw_R);
2938                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2939                             }
2940                         }
2941                         else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2942                         {
2943                             if (pIn->flags.color &&
2944                                 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
2945                                 allowedSwSet.sw_D)
2946                             {
2947                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2948                             }
2949                             else if (allowedSwSet.sw_S)
2950                             {
2951                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2952                             }
2953                             else if (allowedSwSet.sw_R)
2954                             {
2955                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2956                             }
2957                             else
2958                             {
2959                                 ADDR_ASSERT(allowedSwSet.sw_Z);
2960                                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2961                             }
2962                         }
2963                         else
2964                         {
2965                             if (allowedSwSet.sw_R)
2966                             {
2967                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2968                             }
2969                             else if (allowedSwSet.sw_D)
2970                             {
2971                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2972                             }
2973                             else if (allowedSwSet.sw_S)
2974                             {
2975                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2976                             }
2977                             else
2978                             {
2979                                 ADDR_ASSERT(allowedSwSet.sw_Z);
2980                                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2981                             }
2982                         }
2983                     }
2984 
2985                     // Swizzle type should be determined.
2986                     ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
2987 
2988                     // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2989                     // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2990                     // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2991                     pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
2992                 }
2993             }
2994             else
2995             {
2996                 // Invalid combination...
2997                 ADDR_ASSERT_ALWAYS();
2998                 returnCode = ADDR_INVALIDPARAMS;
2999             }
3000         }
3001         else
3002         {
3003             // Invalid combination...
3004             ADDR_ASSERT_ALWAYS();
3005             returnCode = ADDR_INVALIDPARAMS;
3006         }
3007     }
3008 
3009     return returnCode;
3010 }
3011 
3012 /**
3013 ************************************************************************************************************************
3014 *   Gfx10Lib::ComputeStereoInfo
3015 *
3016 *   @brief
3017 *       Compute height alignment and right eye pipeBankXor for stereo surface
3018 *
3019 *   @return
3020 *       Error code
3021 *
3022 ************************************************************************************************************************
3023 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 blkHeight,UINT_32 * pAlignY,UINT_32 * pRightXor) const3024 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3025     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
3026     UINT_32                                 blkHeight,  ///< Block height
3027     UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
3028     UINT_32*                                pRightXor   ///< Right eye xor
3029     ) const
3030 {
3031     ADDR_E_RETURNCODE ret = ADDR_OK;
3032 
3033     *pAlignY   = 1;
3034     *pRightXor = 0;
3035 
3036     if (IsNonPrtXor(pIn->swizzleMode))
3037     {
3038         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3039         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
3040         const UINT_32 rsrcType    = static_cast<UINT_32>(pIn->resourceType) - 1;
3041         const UINT_32 swMode      = static_cast<UINT_32>(pIn->swizzleMode);
3042         const UINT_32 eqIndex     = m_equationLookupTable[rsrcType][swMode][elemLog2];
3043 
3044         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3045         {
3046             UINT_32 yMax = 0;
3047             UINT_32 yPos = 0;
3048 
3049             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3050             {
3051                 if (m_equationTable[eqIndex].xor1[i].value == 0)
3052                 {
3053                     break;
3054                 }
3055 
3056                 ADDR_ASSERT(m_equationTable[eqIndex].xor1[i].valid == 1);
3057 
3058                 if ((m_equationTable[eqIndex].xor1[i].channel == 1) &&
3059                     (m_equationTable[eqIndex].xor1[i].index > yMax))
3060                 {
3061                     yMax = m_equationTable[eqIndex].xor1[i].index;
3062                     yPos = i;
3063                 }
3064             }
3065 
3066             const UINT_32 additionalAlign = 1 << yMax;
3067 
3068             if (additionalAlign >= blkHeight)
3069             {
3070                 *pAlignY *= (additionalAlign / blkHeight);
3071 
3072                 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3073 
3074                 if ((alignedHeight >> yMax) & 1)
3075                 {
3076                     *pRightXor = 1 << (yPos - m_pipeInterleaveLog2);
3077                 }
3078             }
3079         }
3080         else
3081         {
3082             ret = ADDR_INVALIDPARAMS;
3083         }
3084     }
3085 
3086     return ret;
3087 }
3088 
3089 /**
3090 ************************************************************************************************************************
3091 *   Gfx10Lib::HwlComputeSurfaceInfoTiled
3092 *
3093 *   @brief
3094 *       Internal function to calculate alignment for tiled surface
3095 *
3096 *   @return
3097 *       ADDR_E_RETURNCODE
3098 ************************************************************************************************************************
3099 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3100 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3101      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3102      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3103      ) const
3104 {
3105     ADDR_E_RETURNCODE ret;
3106 
3107     // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3108     pOut->mipChainPitch    = 0;
3109     pOut->mipChainHeight   = 0;
3110     pOut->mipChainSlice    = 0;
3111     pOut->epitchIsHeight   = FALSE;
3112 
3113     // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3114     pOut->mipChainInTail   = FALSE;
3115     pOut->firstMipIdInTail = pIn->numMipLevels;
3116 
3117     if (IsBlock256b(pIn->swizzleMode))
3118     {
3119         ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3120     }
3121     else
3122     {
3123         ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3124     }
3125 
3126     return ret;
3127 }
3128 
3129 
3130 /**
3131 ************************************************************************************************************************
3132 *   Gfx10Lib::ComputeSurfaceInfoMicroTiled
3133 *
3134 *   @brief
3135 *       Internal function to calculate alignment for micro tiled surface
3136 *
3137 *   @return
3138 *       ADDR_E_RETURNCODE
3139 ************************************************************************************************************************
3140 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3141 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3142      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3143      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3144      ) const
3145 {
3146     ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3147                                                          &pOut->blockHeight,
3148                                                          &pOut->blockSlices,
3149                                                          pIn->bpp,
3150                                                          pIn->numFrags,
3151                                                          pIn->resourceType,
3152                                                          pIn->swizzleMode);
3153 
3154     if (ret == ADDR_OK)
3155     {
3156         const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3157 
3158         pOut->pitch     = PowTwoAlign(pIn->width,  pOut->blockWidth);
3159         pOut->height    = PowTwoAlign(pIn->height, pOut->blockHeight);
3160         pOut->numSlices = pIn->numSlices;
3161         pOut->baseAlign = blockSize;
3162 
3163         if (pIn->numMipLevels > 1)
3164         {
3165             const UINT_32 mip0Width    = pIn->width;
3166             const UINT_32 mip0Height   = pIn->height;
3167             UINT_64       mipSliceSize = 0;
3168 
3169             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3170             {
3171                 UINT_32 mipWidth, mipHeight;
3172 
3173                 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3174 
3175                 const UINT_32 mipActualWidth  = PowTwoAlign(mipWidth,  pOut->blockWidth);
3176                 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3177 
3178                 if (pOut->pMipInfo != NULL)
3179                 {
3180                     pOut->pMipInfo[i].pitch            = mipActualWidth;
3181                     pOut->pMipInfo[i].height           = mipActualHeight;
3182                     pOut->pMipInfo[i].depth            = 1;
3183                     pOut->pMipInfo[i].offset           = mipSliceSize;
3184                     pOut->pMipInfo[i].mipTailOffset    = 0;
3185                     pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3186                 }
3187 
3188                 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3189             }
3190 
3191             pOut->sliceSize = mipSliceSize;
3192             pOut->surfSize  = mipSliceSize * pOut->numSlices;
3193         }
3194         else
3195         {
3196             pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3197             pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3198 
3199             if (pOut->pMipInfo != NULL)
3200             {
3201                 pOut->pMipInfo[0].pitch            = pOut->pitch;
3202                 pOut->pMipInfo[0].height           = pOut->height;
3203                 pOut->pMipInfo[0].depth            = 1;
3204                 pOut->pMipInfo[0].offset           = 0;
3205                 pOut->pMipInfo[0].mipTailOffset    = 0;
3206                 pOut->pMipInfo[0].macroBlockOffset = 0;
3207             }
3208         }
3209 
3210     }
3211 
3212     return ret;
3213 }
3214 
3215 /**
3216 ************************************************************************************************************************
3217 *   Gfx10Lib::ComputeSurfaceInfoMacroTiled
3218 *
3219 *   @brief
3220 *       Internal function to calculate alignment for macro tiled surface
3221 *
3222 *   @return
3223 *       ADDR_E_RETURNCODE
3224 ************************************************************************************************************************
3225 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3226 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3227      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3228      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3229      ) const
3230 {
3231     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3232                                                                 &pOut->blockHeight,
3233                                                                 &pOut->blockSlices,
3234                                                                 pIn->bpp,
3235                                                                 pIn->numFrags,
3236                                                                 pIn->resourceType,
3237                                                                 pIn->swizzleMode);
3238 
3239     if (returnCode == ADDR_OK)
3240     {
3241         UINT_32 heightAlign = pOut->blockHeight;
3242 
3243         if (pIn->flags.qbStereo)
3244         {
3245             UINT_32 rightXor = 0;
3246             UINT_32 alignY   = 1;
3247 
3248             returnCode = ComputeStereoInfo(pIn, heightAlign, &alignY, &rightXor);
3249 
3250             if (returnCode == ADDR_OK)
3251             {
3252                 pOut->pStereoInfo->rightSwizzle = rightXor;
3253 
3254                 heightAlign *= alignY;
3255             }
3256         }
3257 
3258         if (returnCode == ADDR_OK)
3259         {
3260             const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3261             const UINT_32 blockSize     = 1 << blockSizeLog2;
3262 
3263             pOut->pitch     = PowTwoAlign(pIn->width,     pOut->blockWidth);
3264             pOut->height    = PowTwoAlign(pIn->height,    heightAlign);
3265             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3266             pOut->baseAlign = blockSize;
3267 
3268             if (pIn->numMipLevels > 1)
3269             {
3270                 const Dim3d  tailMaxDim         = GetMipTailDim(pIn->resourceType,
3271                                                                 pIn->swizzleMode,
3272                                                                 pOut->blockWidth,
3273                                                                 pOut->blockHeight,
3274                                                                 pOut->blockSlices);
3275                 const UINT_32 mip0Width         = pIn->width;
3276                 const UINT_32 mip0Height        = pIn->height;
3277                 const BOOL_32 isThin            = IsThin(pIn->resourceType, pIn->swizzleMode);
3278                 const UINT_32 mip0Depth         = isThin ? 1 : pIn->numSlices;
3279                 const UINT_32 maxMipsInTail     = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3280                 const UINT_32 index             = Log2(pIn->bpp >> 3);
3281                 UINT_32       firstMipInTail    = pIn->numMipLevels;
3282                 UINT_64       mipChainSliceSize = 0;
3283                 UINT_64       mipSize[MaxMipLevels];
3284                 UINT_64       mipSliceSize[MaxMipLevels];
3285 
3286                 Dim3d fixedTailMaxDim = tailMaxDim;
3287 
3288                 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3289                 {
3290                     fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3291                     fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3292                 }
3293 
3294                 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3295                 {
3296                     UINT_32 mipWidth, mipHeight, mipDepth;
3297 
3298                     GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3299 
3300                     if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3301                     {
3302                         firstMipInTail     = i;
3303                         mipChainSliceSize += blockSize / pOut->blockSlices;
3304                         break;
3305                     }
3306                     else
3307                     {
3308                         const UINT_32 pitch     = PowTwoAlign(mipWidth,  pOut->blockWidth);
3309                         const UINT_32 height    = PowTwoAlign(mipHeight, pOut->blockHeight);
3310                         const UINT_32 depth     = PowTwoAlign(mipDepth,  pOut->blockSlices);
3311                         const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3312 
3313                         mipSize[i]         = sliceSize * depth;
3314                         mipSliceSize[i]    = sliceSize * pOut->blockSlices;
3315                         mipChainSliceSize += sliceSize;
3316 
3317                         if (pOut->pMipInfo != NULL)
3318                         {
3319                             pOut->pMipInfo[i].pitch  = pitch;
3320                             pOut->pMipInfo[i].height = height;
3321                             pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3322                         }
3323                     }
3324                 }
3325 
3326                 pOut->sliceSize        = mipChainSliceSize;
3327                 pOut->surfSize         = mipChainSliceSize * pOut->numSlices;
3328                 pOut->mipChainInTail   = (firstMipInTail == 0) ? TRUE : FALSE;
3329                 pOut->firstMipIdInTail = firstMipInTail;
3330 
3331                 if (pOut->pMipInfo != NULL)
3332                 {
3333                     UINT_64 offset         = 0;
3334                     UINT_64 macroBlkOffset = 0;
3335                     UINT_32 tailMaxDepth   = 0;
3336 
3337                     if (firstMipInTail != pIn->numMipLevels)
3338                     {
3339                         UINT_32 mipWidth, mipHeight;
3340 
3341                         GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3342                                    &mipWidth, &mipHeight, &tailMaxDepth);
3343 
3344                         offset         = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3345                         macroBlkOffset = blockSize;
3346                     }
3347 
3348                     for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3349                     {
3350                         pOut->pMipInfo[i].offset           = offset;
3351                         pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3352                         pOut->pMipInfo[i].mipTailOffset    = 0;
3353 
3354                         offset         += mipSize[i];
3355                         macroBlkOffset += mipSliceSize[i];
3356                     }
3357 
3358                     UINT_32 pitch  = tailMaxDim.w;
3359                     UINT_32 height = tailMaxDim.h;
3360                     UINT_32 depth  = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3361 
3362                     tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3363 
3364                     for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3365                     {
3366                         const UINT_32 m         = maxMipsInTail - 1 - (i - firstMipInTail);
3367                         const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3368 
3369                         pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
3370                         pOut->pMipInfo[i].mipTailOffset    = mipOffset;
3371                         pOut->pMipInfo[i].macroBlockOffset = 0;
3372 
3373                         pOut->pMipInfo[i].pitch  = pitch;
3374                         pOut->pMipInfo[i].height = height;
3375                         pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3376 
3377                         UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
3378                                        ((mipOffset >> 10) & 2)  |
3379                                        ((mipOffset >> 11) & 4)  |
3380                                        ((mipOffset >> 12) & 8)  |
3381                                        ((mipOffset >> 13) & 16) |
3382                                        ((mipOffset >> 14) & 32);
3383                         UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
3384                                        ((mipOffset >> 9)  & 2)  |
3385                                        ((mipOffset >> 10) & 4)  |
3386                                        ((mipOffset >> 11) & 8)  |
3387                                        ((mipOffset >> 12) & 16) |
3388                                        ((mipOffset >> 13) & 32);
3389 
3390                         if (blockSizeLog2 & 1)
3391                         {
3392                             const UINT_32 temp = mipX;
3393                             mipX = mipY;
3394                             mipY = temp;
3395 
3396                             if (index & 1)
3397                             {
3398                                 mipY = (mipY << 1) | (mipX & 1);
3399                                 mipX = mipX >> 1;
3400                             }
3401                         }
3402 
3403                         if (isThin)
3404                         {
3405                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3406                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3407                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3408 
3409                             pitch  = Max(pitch  >> 1, Block256_2d[index].w);
3410                             height = Max(height >> 1, Block256_2d[index].h);
3411                         }
3412                         else
3413                         {
3414                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3415                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3416                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3417 
3418                             pitch  = Max(pitch  >> 1, Block256_3d[index].w);
3419                             height = Max(height >> 1, Block256_3d[index].h);
3420                         }
3421                     }
3422                 }
3423             }
3424             else
3425             {
3426                 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3427                 pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3428 
3429                 if (pOut->pMipInfo != NULL)
3430                 {
3431                     pOut->pMipInfo[0].pitch            = pOut->pitch;
3432                     pOut->pMipInfo[0].height           = pOut->height;
3433                     pOut->pMipInfo[0].depth            = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3434                     pOut->pMipInfo[0].offset           = 0;
3435                     pOut->pMipInfo[0].mipTailOffset    = 0;
3436                     pOut->pMipInfo[0].macroBlockOffset = 0;
3437                     pOut->pMipInfo[0].mipTailCoordX    = 0;
3438                     pOut->pMipInfo[0].mipTailCoordY    = 0;
3439                     pOut->pMipInfo[0].mipTailCoordZ    = 0;
3440                 }
3441             }
3442         }
3443     }
3444 
3445     return returnCode;
3446 }
3447 
3448 /**
3449 ************************************************************************************************************************
3450 *   Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3451 *
3452 *   @brief
3453 *       Internal function to calculate address from coord for tiled swizzle surface
3454 *
3455 *   @return
3456 *       ADDR_E_RETURNCODE
3457 ************************************************************************************************************************
3458 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3459 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3460      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
3461      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
3462      ) const
3463 {
3464     ADDR_E_RETURNCODE ret;
3465 
3466     if (IsBlock256b(pIn->swizzleMode))
3467     {
3468         ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3469     }
3470     else
3471     {
3472         ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3473     }
3474 
3475     return ret;
3476 }
3477 
3478 /**
3479 ************************************************************************************************************************
3480 *   Gfx10Lib::ComputeOffsetFromEquation
3481 *
3482 *   @brief
3483 *       Compute offset from equation
3484 *
3485 *   @return
3486 *       Offset
3487 ************************************************************************************************************************
3488 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3489 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3490     const ADDR_EQUATION* pEq,   ///< Equation
3491     UINT_32              x,     ///< x coord in bytes
3492     UINT_32              y,     ///< y coord in pixel
3493     UINT_32              z      ///< z coord in slice
3494     ) const
3495 {
3496     UINT_32 offset = 0;
3497 
3498     for (UINT_32 i = 0; i < pEq->numBits; i++)
3499     {
3500         UINT_32 v = 0;
3501 
3502         if (pEq->addr[i].valid)
3503         {
3504             if (pEq->addr[i].channel == 0)
3505             {
3506                 v ^= (x >> pEq->addr[i].index) & 1;
3507             }
3508             else if (pEq->addr[i].channel == 1)
3509             {
3510                 v ^= (y >> pEq->addr[i].index) & 1;
3511             }
3512             else
3513             {
3514                 ADDR_ASSERT(pEq->addr[i].channel == 2);
3515                 v ^= (z >> pEq->addr[i].index) & 1;
3516             }
3517         }
3518 
3519         if (pEq->xor1[i].valid)
3520         {
3521             if (pEq->xor1[i].channel == 0)
3522             {
3523                 v ^= (x >> pEq->xor1[i].index) & 1;
3524             }
3525             else if (pEq->xor1[i].channel == 1)
3526             {
3527                 v ^= (y >> pEq->xor1[i].index) & 1;
3528             }
3529             else
3530             {
3531                 ADDR_ASSERT(pEq->xor1[i].channel == 2);
3532                 v ^= (z >> pEq->xor1[i].index) & 1;
3533             }
3534         }
3535 
3536         if (pEq->xor2[i].valid)
3537         {
3538             if (pEq->xor2[i].channel == 0)
3539             {
3540                 v ^= (x >> pEq->xor2[i].index) & 1;
3541             }
3542             else if (pEq->xor2[i].channel == 1)
3543             {
3544                 v ^= (y >> pEq->xor2[i].index) & 1;
3545             }
3546             else
3547             {
3548                 ADDR_ASSERT(pEq->xor2[i].channel == 2);
3549                 v ^= (z >> pEq->xor2[i].index) & 1;
3550             }
3551         }
3552 
3553         offset |= (v << i);
3554     }
3555 
3556     return offset;
3557 }
3558 
3559 /**
3560 ************************************************************************************************************************
3561 *   Gfx10Lib::ComputeOffsetFromSwizzlePattern
3562 *
3563 *   @brief
3564 *       Compute offset from swizzle pattern
3565 *
3566 *   @return
3567 *       Offset
3568 ************************************************************************************************************************
3569 */
ComputeOffsetFromSwizzlePattern(const UINT_64 * pPattern,UINT_32 numBits,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const3570 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3571     const UINT_64* pPattern,    ///< Swizzle pattern
3572     UINT_32        numBits,     ///< Number of bits in pattern
3573     UINT_32        x,           ///< x coord in pixel
3574     UINT_32        y,           ///< y coord in pixel
3575     UINT_32        z,           ///< z coord in slice
3576     UINT_32        s            ///< sample id
3577     ) const
3578 {
3579     UINT_32                 offset          = 0;
3580     const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3581 
3582     for (UINT_32 i = 0; i < numBits; i++)
3583     {
3584         UINT_32 v = 0;
3585 
3586         if (pSwizzlePattern[i].x != 0)
3587         {
3588             UINT_16 mask  = pSwizzlePattern[i].x;
3589             UINT_32 xBits = x;
3590 
3591             while (mask != 0)
3592             {
3593                 if (mask & 1)
3594                 {
3595                     v ^= xBits & 1;
3596                 }
3597 
3598                 xBits >>= 1;
3599                 mask  >>= 1;
3600             }
3601         }
3602 
3603         if (pSwizzlePattern[i].y != 0)
3604         {
3605             UINT_16 mask  = pSwizzlePattern[i].y;
3606             UINT_32 yBits = y;
3607 
3608             while (mask != 0)
3609             {
3610                 if (mask & 1)
3611                 {
3612                     v ^= yBits & 1;
3613                 }
3614 
3615                 yBits >>= 1;
3616                 mask  >>= 1;
3617             }
3618         }
3619 
3620         if (pSwizzlePattern[i].z != 0)
3621         {
3622             UINT_16 mask  = pSwizzlePattern[i].z;
3623             UINT_32 zBits = z;
3624 
3625             while (mask != 0)
3626             {
3627                 if (mask & 1)
3628                 {
3629                     v ^= zBits & 1;
3630                 }
3631 
3632                 zBits >>= 1;
3633                 mask  >>= 1;
3634             }
3635         }
3636 
3637         if (pSwizzlePattern[i].s != 0)
3638         {
3639             UINT_16 mask  = pSwizzlePattern[i].s;
3640             UINT_32 sBits = s;
3641 
3642             while (mask != 0)
3643             {
3644                 if (mask & 1)
3645                 {
3646                     v ^= sBits & 1;
3647                 }
3648 
3649                 sBits >>= 1;
3650                 mask  >>= 1;
3651             }
3652         }
3653 
3654         offset |= (v << i);
3655     }
3656 
3657     return offset;
3658 }
3659 
3660 /**
3661 ************************************************************************************************************************
3662 *   Gfx10Lib::GetSwizzlePatternInfo
3663 *
3664 *   @brief
3665 *       Get swizzle pattern
3666 *
3667 *   @return
3668 *       Swizzle pattern information
3669 ************************************************************************************************************************
3670 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const3671 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
3672     AddrSwizzleMode  swizzleMode,       ///< Swizzle mode
3673     AddrResourceType resourceType,      ///< Resource type
3674     UINT_32          elemLog2,          ///< Element size in bytes log2
3675     UINT_32          numFrag            ///< Number of fragment
3676     ) const
3677 {
3678     const UINT_32          index       = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
3679     const ADDR_SW_PATINFO* patInfo     = NULL;
3680     const UINT_32          swizzleMask = 1 << swizzleMode;
3681 
3682     if (IsBlockVariable(swizzleMode))
3683     {
3684         if (m_blockVarSizeLog2 != 0)
3685         {
3686             ADDR_ASSERT(m_settings.supportRbPlus);
3687 
3688             if (IsRtOptSwizzle(swizzleMode))
3689             {
3690                 if (numFrag == 1)
3691                 {
3692                     patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
3693                 }
3694                 else if (numFrag == 2)
3695                 {
3696                     patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
3697                 }
3698                 else if (numFrag == 4)
3699                 {
3700                     patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
3701                 }
3702                 else
3703                 {
3704                     ADDR_ASSERT(numFrag == 8);
3705                     patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
3706                 }
3707             }
3708             else if (IsZOrderSwizzle(swizzleMode))
3709             {
3710                 if (numFrag == 1)
3711                 {
3712                     patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
3713                 }
3714                 else if (numFrag == 2)
3715                 {
3716                     patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
3717                 }
3718                 else if (numFrag == 4)
3719                 {
3720                     patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
3721                 }
3722                 else
3723                 {
3724                     ADDR_ASSERT(numFrag == 8);
3725                     patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
3726                 }
3727             }
3728         }
3729     }
3730     else if (IsLinear(swizzleMode) == FALSE)
3731     {
3732         if (resourceType == ADDR_RSRC_TEX_3D)
3733         {
3734             ADDR_ASSERT(numFrag == 1);
3735 
3736             if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
3737             {
3738                 if (IsRtOptSwizzle(swizzleMode))
3739                 {
3740                     patInfo = m_settings.supportRbPlus ?
3741                               GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
3742                 }
3743                 else if (IsZOrderSwizzle(swizzleMode))
3744                 {
3745                     patInfo = m_settings.supportRbPlus ?
3746                               GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
3747                 }
3748                 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3749                 {
3750                     ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
3751                     patInfo = m_settings.supportRbPlus ?
3752                               GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
3753                 }
3754                 else
3755                 {
3756                     ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
3757 
3758                     if (IsBlock4kb(swizzleMode))
3759                     {
3760                         if (swizzleMode == ADDR_SW_4KB_S)
3761                         {
3762                             patInfo = m_settings.supportRbPlus ?
3763                                       GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
3764                         }
3765                         else
3766                         {
3767                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3768                             patInfo = m_settings.supportRbPlus ?
3769                                       GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
3770                         }
3771                     }
3772                     else
3773                     {
3774                         if (swizzleMode == ADDR_SW_64KB_S)
3775                         {
3776                             patInfo = m_settings.supportRbPlus ?
3777                                       GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
3778                         }
3779                         else if (swizzleMode == ADDR_SW_64KB_S_X)
3780                         {
3781                             patInfo = m_settings.supportRbPlus ?
3782                                       GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
3783                         }
3784                         else
3785                         {
3786                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3787                             patInfo = m_settings.supportRbPlus ?
3788                                       GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
3789                         }
3790                     }
3791                 }
3792             }
3793         }
3794         else
3795         {
3796             if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
3797             {
3798                 if (IsBlock256b(swizzleMode))
3799                 {
3800                     if (swizzleMode == ADDR_SW_256B_S)
3801                     {
3802                         patInfo = m_settings.supportRbPlus ?
3803                                   GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
3804                     }
3805                     else
3806                     {
3807                         ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
3808                         patInfo = m_settings.supportRbPlus ?
3809                                   GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
3810                     }
3811                 }
3812                 else if (IsBlock4kb(swizzleMode))
3813                 {
3814                     if (IsStandardSwizzle(resourceType, swizzleMode))
3815                     {
3816                         if (swizzleMode == ADDR_SW_4KB_S)
3817                         {
3818                             patInfo = m_settings.supportRbPlus ?
3819                                       GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
3820                         }
3821                         else
3822                         {
3823                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3824                             patInfo = m_settings.supportRbPlus ?
3825                                       GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
3826                         }
3827                     }
3828                     else
3829                     {
3830                         if (swizzleMode == ADDR_SW_4KB_D)
3831                         {
3832                             patInfo = m_settings.supportRbPlus ?
3833                                       GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
3834                         }
3835                         else
3836                         {
3837                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
3838                             patInfo = m_settings.supportRbPlus ?
3839                                       GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
3840                         }
3841                     }
3842                 }
3843                 else
3844                 {
3845                     if (IsRtOptSwizzle(swizzleMode))
3846                     {
3847                         if (numFrag == 1)
3848                         {
3849                             patInfo = m_settings.supportRbPlus ?
3850                                       GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
3851                         }
3852                         else if (numFrag == 2)
3853                         {
3854                             patInfo = m_settings.supportRbPlus ?
3855                                       GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
3856                         }
3857                         else if (numFrag == 4)
3858                         {
3859                             patInfo = m_settings.supportRbPlus ?
3860                                       GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
3861                         }
3862                         else
3863                         {
3864                             ADDR_ASSERT(numFrag == 8);
3865                             patInfo = m_settings.supportRbPlus ?
3866                                       GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
3867                         }
3868                     }
3869                     else if (IsZOrderSwizzle(swizzleMode))
3870                     {
3871                         if (numFrag == 1)
3872                         {
3873                             patInfo = m_settings.supportRbPlus ?
3874                                       GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
3875                         }
3876                         else if (numFrag == 2)
3877                         {
3878                             patInfo = m_settings.supportRbPlus ?
3879                                       GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
3880                         }
3881                         else if (numFrag == 4)
3882                         {
3883                             patInfo = m_settings.supportRbPlus ?
3884                                       GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
3885                         }
3886                         else
3887                         {
3888                             ADDR_ASSERT(numFrag == 8);
3889                             patInfo = m_settings.supportRbPlus ?
3890                                       GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
3891                         }
3892                     }
3893                     else if (IsDisplaySwizzle(resourceType, swizzleMode))
3894                     {
3895                         if (swizzleMode == ADDR_SW_64KB_D)
3896                         {
3897                             patInfo = m_settings.supportRbPlus ?
3898                                       GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
3899                         }
3900                         else if (swizzleMode == ADDR_SW_64KB_D_X)
3901                         {
3902                             patInfo = m_settings.supportRbPlus ?
3903                                       GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
3904                         }
3905                         else
3906                         {
3907                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
3908                             patInfo = m_settings.supportRbPlus ?
3909                                       GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
3910                         }
3911                     }
3912                     else
3913                     {
3914                         if (swizzleMode == ADDR_SW_64KB_S)
3915                         {
3916                             patInfo = m_settings.supportRbPlus ?
3917                                       GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
3918                         }
3919                         else if (swizzleMode == ADDR_SW_64KB_S_X)
3920                         {
3921                             patInfo = m_settings.supportRbPlus ?
3922                                       GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
3923                         }
3924                         else
3925                         {
3926                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3927                             patInfo = m_settings.supportRbPlus ?
3928                                       GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
3929                         }
3930                     }
3931                 }
3932             }
3933         }
3934     }
3935 
3936     return (patInfo != NULL) ? &patInfo[index] : NULL;
3937 }
3938 
3939 
3940 /**
3941 ************************************************************************************************************************
3942 *   Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
3943 *
3944 *   @brief
3945 *       Internal function to calculate address from coord for micro tiled swizzle surface
3946 *
3947 *   @return
3948 *       ADDR_E_RETURNCODE
3949 ************************************************************************************************************************
3950 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3951 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3952      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
3953      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
3954      ) const
3955 {
3956     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
3957     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3958     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
3959 
3960     localIn.swizzleMode  = pIn->swizzleMode;
3961     localIn.flags        = pIn->flags;
3962     localIn.resourceType = pIn->resourceType;
3963     localIn.bpp          = pIn->bpp;
3964     localIn.width        = Max(pIn->unalignedWidth,  1u);
3965     localIn.height       = Max(pIn->unalignedHeight, 1u);
3966     localIn.numSlices    = Max(pIn->numSlices,       1u);
3967     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
3968     localIn.numSamples   = Max(pIn->numSamples,      1u);
3969     localIn.numFrags     = Max(pIn->numFrags,        1u);
3970     localOut.pMipInfo    = mipInfo;
3971 
3972     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
3973 
3974     if (ret == ADDR_OK)
3975     {
3976         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3977         const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3978         const UINT_32 swMode   = static_cast<UINT_32>(pIn->swizzleMode);
3979         const UINT_32 eqIndex  = m_equationLookupTable[rsrcType][swMode][elemLog2];
3980 
3981         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3982         {
3983             const UINT_32 pb           = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3984             const UINT_32 yb           = pIn->y / localOut.blockHeight;
3985             const UINT_32 xb           = pIn->x / localOut.blockWidth;
3986             const UINT_32 blockIndex   = yb * pb + xb;
3987             const UINT_32 blockSize    = 256;
3988             const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3989                                                                    pIn->x << elemLog2,
3990                                                                    pIn->y,
3991                                                                    0);
3992             pOut->addr = localOut.sliceSize * pIn->slice +
3993                          mipInfo[pIn->mipId].macroBlockOffset +
3994                          (blockIndex * blockSize) +
3995                          blk256Offset;
3996         }
3997         else
3998         {
3999             ret = ADDR_INVALIDPARAMS;
4000         }
4001     }
4002 
4003     return ret;
4004 }
4005 
4006 /**
4007 ************************************************************************************************************************
4008 *   Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
4009 *
4010 *   @brief
4011 *       Internal function to calculate address from coord for macro tiled swizzle surface
4012 *
4013 *   @return
4014 *       ADDR_E_RETURNCODE
4015 ************************************************************************************************************************
4016 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4017 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4018      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4019      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4020      ) const
4021 {
4022     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
4023     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4024     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4025 
4026     localIn.swizzleMode  = pIn->swizzleMode;
4027     localIn.flags        = pIn->flags;
4028     localIn.resourceType = pIn->resourceType;
4029     localIn.bpp          = pIn->bpp;
4030     localIn.width        = Max(pIn->unalignedWidth,  1u);
4031     localIn.height       = Max(pIn->unalignedHeight, 1u);
4032     localIn.numSlices    = Max(pIn->numSlices,       1u);
4033     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4034     localIn.numSamples   = Max(pIn->numSamples,      1u);
4035     localIn.numFrags     = Max(pIn->numFrags,        1u);
4036     localOut.pMipInfo    = mipInfo;
4037 
4038     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4039 
4040     if (ret == ADDR_OK)
4041     {
4042         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
4043         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4044         const UINT_32 blkMask     = (1 << blkSizeLog2) - 1;
4045         const UINT_32 pipeMask    = (1 << m_pipesLog2) - 1;
4046         const UINT_32 bankMask    = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4047         const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4048                                     (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4049 
4050         if (localIn.numFrags > 1)
4051         {
4052             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4053                                                                     pIn->resourceType,
4054                                                                     elemLog2,
4055                                                                     localIn.numFrags);
4056 
4057             if (pPatInfo != NULL)
4058             {
4059                 const UINT_32 pb        = localOut.pitch / localOut.blockWidth;
4060                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4061                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4062                 const UINT_64 blkIdx    = yb * pb + xb;
4063 
4064                 ADDR_BIT_SETTING fullSwizzlePattern[20];
4065                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4066 
4067                 const UINT_32 blkOffset =
4068                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4069                                                     blkSizeLog2,
4070                                                     pIn->x,
4071                                                     pIn->y,
4072                                                     pIn->slice,
4073                                                     pIn->sample);
4074 
4075                 pOut->addr = (localOut.sliceSize * pIn->slice) +
4076                              (blkIdx << blkSizeLog2) +
4077                              (blkOffset ^ pipeBankXor);
4078             }
4079             else
4080             {
4081                 ret = ADDR_INVALIDPARAMS;
4082             }
4083         }
4084         else
4085         {
4086             const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4087             const UINT_32 swMode  = static_cast<UINT_32>(pIn->swizzleMode);
4088             const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4089 
4090             if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4091             {
4092                 const BOOL_32 inTail    = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4093                 const BOOL_32 isThin    = IsThin(pIn->resourceType, pIn->swizzleMode);
4094                 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4095                 const UINT_32 sliceId   = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4096                 const UINT_32 x         = inTail ? (pIn->x     + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4097                 const UINT_32 y         = inTail ? (pIn->y     + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4098                 const UINT_32 z         = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4099                 const UINT_32 pb        = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4100                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4101                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4102                 const UINT_64 blkIdx    = yb * pb + xb;
4103                 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4104                                                                     x << elemLog2,
4105                                                                     y,
4106                                                                     z);
4107                 pOut->addr = sliceSize * sliceId +
4108                              mipInfo[pIn->mipId].macroBlockOffset +
4109                              (blkIdx << blkSizeLog2) +
4110                              (blkOffset ^ pipeBankXor);
4111             }
4112             else
4113             {
4114                 ret = ADDR_INVALIDPARAMS;
4115             }
4116         }
4117     }
4118 
4119     return ret;
4120 }
4121 
4122 /**
4123 ************************************************************************************************************************
4124 *   Gfx10Lib::HwlComputeMaxBaseAlignments
4125 *
4126 *   @brief
4127 *       Gets maximum alignments
4128 *   @return
4129 *       maximum alignments
4130 ************************************************************************************************************************
4131 */
HwlComputeMaxBaseAlignments() const4132 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4133 {
4134     return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4135 }
4136 
4137 /**
4138 ************************************************************************************************************************
4139 *   Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4140 *
4141 *   @brief
4142 *       Gets maximum alignments for metadata
4143 *   @return
4144 *       maximum alignments for metadata
4145 ************************************************************************************************************************
4146 */
HwlComputeMaxMetaBaseAlignments() const4147 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4148 {
4149     Dim3d metaBlk;
4150 
4151     const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4152     {
4153         ADDR_SW_64KB_Z_X,
4154         m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4155     };
4156 
4157     UINT_32 maxBaseAlignHtile = 0;
4158     UINT_32 maxBaseAlignCmask = 0;
4159 
4160     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4161     {
4162         for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4163         {
4164             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4165             {
4166                 // Max base alignment for Htile
4167                 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4168                                                                 ADDR_RSRC_TEX_2D,
4169                                                                 ValidSwizzleModeForXmask[swIdx],
4170                                                                 bppLog2,
4171                                                                 numFragLog2,
4172                                                                 TRUE,
4173                                                                 &metaBlk);
4174 
4175                 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4176             }
4177         }
4178 
4179         // Max base alignment for Cmask
4180         const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4181                                                         ADDR_RSRC_TEX_2D,
4182                                                         ValidSwizzleModeForXmask[swIdx],
4183                                                         0,
4184                                                         0,
4185                                                         TRUE,
4186                                                         &metaBlk);
4187 
4188         maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4189     }
4190 
4191     // Max base alignment for 2D Dcc
4192     const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4193     {
4194         ADDR_SW_64KB_S_X,
4195         ADDR_SW_64KB_D_X,
4196         ADDR_SW_64KB_R_X,
4197         m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4198     };
4199 
4200     UINT_32 maxBaseAlignDcc2D = 0;
4201 
4202     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4203     {
4204         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4205         {
4206             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4207             {
4208                 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4209                                                              ADDR_RSRC_TEX_2D,
4210                                                              ValidSwizzleModeForDcc2D[swIdx],
4211                                                              bppLog2,
4212                                                              numFragLog2,
4213                                                              TRUE,
4214                                                              &metaBlk);
4215 
4216                 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4217             }
4218         }
4219     }
4220 
4221     // Max base alignment for 3D Dcc
4222     const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4223     {
4224         ADDR_SW_64KB_Z_X,
4225         ADDR_SW_64KB_S_X,
4226         ADDR_SW_64KB_D_X,
4227         ADDR_SW_64KB_R_X,
4228         m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4229     };
4230 
4231     UINT_32 maxBaseAlignDcc3D = 0;
4232 
4233     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4234     {
4235         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4236         {
4237             const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4238                                                          ADDR_RSRC_TEX_3D,
4239                                                          ValidSwizzleModeForDcc3D[swIdx],
4240                                                          bppLog2,
4241                                                          0,
4242                                                          TRUE,
4243                                                          &metaBlk);
4244 
4245             maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4246         }
4247     }
4248 
4249     return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4250 }
4251 
4252 /**
4253 ************************************************************************************************************************
4254 *   Gfx10Lib::GetMetaElementSizeLog2
4255 *
4256 *   @brief
4257 *       Gets meta data element size log2
4258 *   @return
4259 *       Meta data element size log2
4260 ************************************************************************************************************************
4261 */
GetMetaElementSizeLog2(Gfx10DataType dataType)4262 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4263     Gfx10DataType dataType) ///< Data surface type
4264 {
4265     INT_32 elemSizeLog2 = 0;
4266 
4267     if (dataType == Gfx10DataColor)
4268     {
4269         elemSizeLog2 = 0;
4270     }
4271     else if (dataType == Gfx10DataDepthStencil)
4272     {
4273         elemSizeLog2 = 2;
4274     }
4275     else
4276     {
4277         ADDR_ASSERT(dataType == Gfx10DataFmask);
4278         elemSizeLog2 = -1;
4279     }
4280 
4281     return elemSizeLog2;
4282 }
4283 
4284 /**
4285 ************************************************************************************************************************
4286 *   Gfx10Lib::GetMetaCacheSizeLog2
4287 *
4288 *   @brief
4289 *       Gets meta data cache line size log2
4290 *   @return
4291 *       Meta data cache line size log2
4292 ************************************************************************************************************************
4293 */
GetMetaCacheSizeLog2(Gfx10DataType dataType)4294 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4295     Gfx10DataType dataType) ///< Data surface type
4296 {
4297     INT_32 cacheSizeLog2 = 0;
4298 
4299     if (dataType == Gfx10DataColor)
4300     {
4301         cacheSizeLog2 = 6;
4302     }
4303     else if (dataType == Gfx10DataDepthStencil)
4304     {
4305         cacheSizeLog2 = 8;
4306     }
4307     else
4308     {
4309         ADDR_ASSERT(dataType == Gfx10DataFmask);
4310         cacheSizeLog2 = 8;
4311     }
4312     return cacheSizeLog2;
4313 }
4314 
4315 /**
4316 ************************************************************************************************************************
4317 *   Gfx10Lib::HwlComputeSurfaceInfoLinear
4318 *
4319 *   @brief
4320 *       Internal function to calculate alignment for linear surface
4321 *
4322 *   @return
4323 *       ADDR_E_RETURNCODE
4324 ************************************************************************************************************************
4325 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4326 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4327      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4328      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4329      ) const
4330 {
4331     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4332 
4333     if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4334     {
4335         returnCode = ADDR_INVALIDPARAMS;
4336     }
4337     else
4338     {
4339         const UINT_32 elementBytes = pIn->bpp >> 3;
4340         const UINT_32 pitchAlign   = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4341         const UINT_32 mipDepth     = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4342         UINT_32       pitch        = PowTwoAlign(pIn->width, pitchAlign);
4343         UINT_32       actualHeight = pIn->height;
4344         UINT_64       sliceSize    = 0;
4345 
4346         if (pIn->numMipLevels > 1)
4347         {
4348             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4349             {
4350                 UINT_32 mipWidth, mipHeight;
4351 
4352                 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4353 
4354                 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4355 
4356                 if (pOut->pMipInfo != NULL)
4357                 {
4358                     pOut->pMipInfo[i].pitch            = mipActualWidth;
4359                     pOut->pMipInfo[i].height           = mipHeight;
4360                     pOut->pMipInfo[i].depth            = mipDepth;
4361                     pOut->pMipInfo[i].offset           = sliceSize;
4362                     pOut->pMipInfo[i].mipTailOffset    = 0;
4363                     pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4364                 }
4365 
4366                 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4367             }
4368         }
4369         else
4370         {
4371             returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4372 
4373             if (returnCode == ADDR_OK)
4374             {
4375                 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4376 
4377                 if (pOut->pMipInfo != NULL)
4378                 {
4379                     pOut->pMipInfo[0].pitch            = pitch;
4380                     pOut->pMipInfo[0].height           = actualHeight;
4381                     pOut->pMipInfo[0].depth            = mipDepth;
4382                     pOut->pMipInfo[0].offset           = 0;
4383                     pOut->pMipInfo[0].mipTailOffset    = 0;
4384                     pOut->pMipInfo[0].macroBlockOffset = 0;
4385                 }
4386             }
4387         }
4388 
4389         if (returnCode == ADDR_OK)
4390         {
4391             pOut->pitch          = pitch;
4392             pOut->height         = actualHeight;
4393             pOut->numSlices      = pIn->numSlices;
4394             pOut->sliceSize      = sliceSize;
4395             pOut->surfSize       = sliceSize * pOut->numSlices;
4396             pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4397             pOut->blockWidth     = pitchAlign;
4398             pOut->blockHeight    = 1;
4399             pOut->blockSlices    = 1;
4400 
4401             // Following members are useless on GFX10
4402             pOut->mipChainPitch  = 0;
4403             pOut->mipChainHeight = 0;
4404             pOut->mipChainSlice  = 0;
4405             pOut->epitchIsHeight = FALSE;
4406 
4407             // Post calculation validate
4408             ADDR_ASSERT(pOut->sliceSize > 0);
4409         }
4410     }
4411 
4412     return returnCode;
4413 }
4414 
4415 } // V2
4416 } // Addr
4417