1 /*
2  * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 /**
28 ************************************************************************************************************************
29 * @file  gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33 
34 #include "gfx9addrlib.h"
35 
36 #include "gfx9_gb_reg.h"
37 
38 #include "amdgpu_asic_addr.h"
39 
40 ////////////////////////////////////////////////////////////////////////////////////////////////////
41 ////////////////////////////////////////////////////////////////////////////////////////////////////
42 
43 namespace Addr
44 {
45 
46 /**
47 ************************************************************************************************************************
48 *   Gfx9HwlInit
49 *
50 *   @brief
51 *       Creates an Gfx9Lib object.
52 *
53 *   @return
54 *       Returns an Gfx9Lib object pointer.
55 ************************************************************************************************************************
56 */
Gfx9HwlInit(const Client * pClient)57 Addr::Lib* Gfx9HwlInit(const Client* pClient)
58 {
59     return V2::Gfx9Lib::CreateObj(pClient);
60 }
61 
62 namespace V2
63 {
64 
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66 //                               Static Const Member
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 
69 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
70 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
71     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
72     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
73     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_D
74     {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_R
75 
76     {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
77     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
78     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_D
79     {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_R
80 
81     {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
82     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
83     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_D
84     {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_R
85 
86     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
87     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
88     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
89     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
90 
91     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_Z_T
92     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_S_T
93     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_D_T
94     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0,    0}, // ADDR_SW_64KB_R_T
95 
96     {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_Z_x
97     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_S_x
98     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_D_x
99     {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0,    0}, // ADDR_SW_4KB_R_x
100 
101     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_Z_X
102     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_S_X
103     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_D_X
104     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0,    0}, // ADDR_SW_64KB_R_X
105 
106     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
107     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
108     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
109     {0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // Reserved
110     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
111 };
112 
113 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
114 
115 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
116 
117 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
118 
119 /**
120 ************************************************************************************************************************
121 *   Gfx9Lib::Gfx9Lib
122 *
123 *   @brief
124 *       Constructor
125 *
126 ************************************************************************************************************************
127 */
Gfx9Lib(const Client * pClient)128 Gfx9Lib::Gfx9Lib(const Client* pClient)
129     :
130     Lib(pClient)
131 {
132     memset(&m_settings, 0, sizeof(m_settings));
133     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
134     memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
135     m_metaEqOverrideIndex = 0;
136 }
137 
138 /**
139 ************************************************************************************************************************
140 *   Gfx9Lib::~Gfx9Lib
141 *
142 *   @brief
143 *       Destructor
144 ************************************************************************************************************************
145 */
~Gfx9Lib()146 Gfx9Lib::~Gfx9Lib()
147 {
148 }
149 
150 /**
151 ************************************************************************************************************************
152 *   Gfx9Lib::HwlComputeHtileInfo
153 *
154 *   @brief
155 *       Interface function stub of AddrComputeHtilenfo
156 *
157 *   @return
158 *       ADDR_E_RETURNCODE
159 ************************************************************************************************************************
160 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const161 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
162     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
163     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
164     ) const
165 {
166     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
167                                                        pIn->swizzleMode);
168 
169     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
170 
171     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
172 
173     if ((numPipeTotal == 1) && (numRbTotal == 1))
174     {
175         numCompressBlkPerMetaBlkLog2 = 10;
176     }
177     else
178     {
179         if (m_settings.applyAliasFix)
180         {
181             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
182         }
183         else
184         {
185             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
186         }
187     }
188 
189     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
190 
191     Dim3d   metaBlkDim   = {8, 8, 1};
192     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
193     UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
194     UINT_32 heightAmp    = totalAmpBits - widthAmp;
195     metaBlkDim.w <<= widthAmp;
196     metaBlkDim.h <<= heightAmp;
197 
198 #if DEBUG
199     Dim3d metaBlkDimDbg = {8, 8, 1};
200     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
201     {
202         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
203             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
204         {
205             metaBlkDimDbg.h <<= 1;
206         }
207         else
208         {
209             metaBlkDimDbg.w <<= 1;
210         }
211     }
212     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
213 #endif
214 
215     UINT_32 numMetaBlkX;
216     UINT_32 numMetaBlkY;
217     UINT_32 numMetaBlkZ;
218 
219     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
220                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
221                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
222 
223     const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
224     UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
225 
226     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
227     {
228         align *= (numPipeTotal >> 1);
229     }
230 
231     align = Max(align, metaBlkSize);
232 
233     if (m_settings.metaBaseAlignFix)
234     {
235         align = Max(align, GetBlockSize(pIn->swizzleMode));
236     }
237 
238     if (m_settings.htileAlignFix)
239     {
240         const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
241         const INT_32 htileCachelineSizeLog2 = 11;
242         const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
243 
244         INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
245 
246         align <<= rbMaskPadding;
247     }
248 
249     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
250     pOut->height     = numMetaBlkY * metaBlkDim.h;
251     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;
252 
253     pOut->metaBlkWidth       = metaBlkDim.w;
254     pOut->metaBlkHeight      = metaBlkDim.h;
255     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
256 
257     pOut->baseAlign  = align;
258     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
259 
260     return ADDR_OK;
261 }
262 
263 /**
264 ************************************************************************************************************************
265 *   Gfx9Lib::HwlComputeCmaskInfo
266 *
267 *   @brief
268 *       Interface function stub of AddrComputeCmaskInfo
269 *
270 *   @return
271 *       ADDR_E_RETURNCODE
272 ************************************************************************************************************************
273 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const274 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
275     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
276     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
277     ) const
278 {
279     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
280 
281     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
282                                                        pIn->swizzleMode);
283 
284     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
285 
286     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
287 
288     if ((numPipeTotal == 1) && (numRbTotal == 1))
289     {
290         numCompressBlkPerMetaBlkLog2 = 13;
291     }
292     else
293     {
294         if (m_settings.applyAliasFix)
295         {
296             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
297         }
298         else
299         {
300             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
301         }
302 
303         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
304     }
305 
306     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
307 
308     Dim2d metaBlkDim = {8, 8};
309     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
310     UINT_32 heightAmp = totalAmpBits >> 1;
311     UINT_32 widthAmp = totalAmpBits - heightAmp;
312     metaBlkDim.w <<= widthAmp;
313     metaBlkDim.h <<= heightAmp;
314 
315 #if DEBUG
316     Dim2d metaBlkDimDbg = {8, 8};
317     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
318     {
319         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
320         {
321             metaBlkDimDbg.h <<= 1;
322         }
323         else
324         {
325             metaBlkDimDbg.w <<= 1;
326         }
327     }
328     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
329 #endif
330 
331     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
332     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
333     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
334 
335     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
336 
337     if (m_settings.metaBaseAlignFix)
338     {
339         sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
340     }
341 
342     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
343     pOut->height     = numMetaBlkY * metaBlkDim.h;
344     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
345     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
346     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
347 
348     pOut->metaBlkWidth = metaBlkDim.w;
349     pOut->metaBlkHeight = metaBlkDim.h;
350 
351     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
352 
353     return ADDR_OK;
354 }
355 
356 /**
357 ************************************************************************************************************************
358 *   Gfx9Lib::GetMetaMipInfo
359 *
360 *   @brief
361 *       Get meta mip info
362 *
363 *   @return
364 *       N/A
365 ************************************************************************************************************************
366 */
GetMetaMipInfo(UINT_32 numMipLevels,Dim3d * pMetaBlkDim,BOOL_32 dataThick,ADDR2_META_MIP_INFO * pInfo,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 * pNumMetaBlkX,UINT_32 * pNumMetaBlkY,UINT_32 * pNumMetaBlkZ) const367 VOID Gfx9Lib::GetMetaMipInfo(
368     UINT_32 numMipLevels,           ///< [in]  number of mip levels
369     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
370     BOOL_32 dataThick,              ///< [in]  data surface is thick
371     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
372     UINT_32 mip0Width,              ///< [in]  mip0 width
373     UINT_32 mip0Height,             ///< [in]  mip0 height
374     UINT_32 mip0Depth,              ///< [in]  mip0 depth
375     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
376     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
377     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
378     const
379 {
380     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
381     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
382     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
383     UINT_32 tailWidth   = pMetaBlkDim->w;
384     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
385     UINT_32 tailDepth   = pMetaBlkDim->d;
386     BOOL_32 inTail      = FALSE;
387     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
388 
389     if (numMipLevels > 1)
390     {
391         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
392         {
393             // Z major
394             major = ADDR_MAJOR_Z;
395         }
396         else if (numMetaBlkX >= numMetaBlkY)
397         {
398             // X major
399             major = ADDR_MAJOR_X;
400         }
401         else
402         {
403             // Y major
404             major = ADDR_MAJOR_Y;
405         }
406 
407         inTail = ((mip0Width <= tailWidth) &&
408                   (mip0Height <= tailHeight) &&
409                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
410 
411         if (inTail == FALSE)
412         {
413             UINT_32 orderLimit;
414             UINT_32 *pMipDim;
415             UINT_32 *pOrderDim;
416 
417             if (major == ADDR_MAJOR_Z)
418             {
419                 // Z major
420                 pMipDim = &numMetaBlkY;
421                 pOrderDim = &numMetaBlkZ;
422                 orderLimit = 4;
423             }
424             else if (major == ADDR_MAJOR_X)
425             {
426                 // X major
427                 pMipDim = &numMetaBlkY;
428                 pOrderDim = &numMetaBlkX;
429                 orderLimit = 4;
430             }
431             else
432             {
433                 // Y major
434                 pMipDim = &numMetaBlkX;
435                 pOrderDim = &numMetaBlkY;
436                 orderLimit = 2;
437             }
438 
439             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
440             {
441                 *pMipDim += 2;
442             }
443             else
444             {
445                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
446             }
447         }
448     }
449 
450     if (pInfo != NULL)
451     {
452         UINT_32 mipWidth  = mip0Width;
453         UINT_32 mipHeight = mip0Height;
454         UINT_32 mipDepth  = mip0Depth;
455         Dim3d   mipCoord  = {0};
456 
457         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
458         {
459             if (inTail)
460             {
461                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
462                                    pMetaBlkDim);
463                 break;
464             }
465             else
466             {
467                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
468                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
469                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
470 
471                 pInfo[mip].inMiptail = FALSE;
472                 pInfo[mip].startX = mipCoord.w;
473                 pInfo[mip].startY = mipCoord.h;
474                 pInfo[mip].startZ = mipCoord.d;
475                 pInfo[mip].width  = mipWidth;
476                 pInfo[mip].height = mipHeight;
477                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
478 
479                 if ((mip >= 3) || (mip & 1))
480                 {
481                     switch (major)
482                     {
483                         case ADDR_MAJOR_X:
484                             mipCoord.w += mipWidth;
485                             break;
486                         case ADDR_MAJOR_Y:
487                             mipCoord.h += mipHeight;
488                             break;
489                         case ADDR_MAJOR_Z:
490                             mipCoord.d += mipDepth;
491                             break;
492                         default:
493                             break;
494                     }
495                 }
496                 else
497                 {
498                     switch (major)
499                     {
500                         case ADDR_MAJOR_X:
501                             mipCoord.h += mipHeight;
502                             break;
503                         case ADDR_MAJOR_Y:
504                             mipCoord.w += mipWidth;
505                             break;
506                         case ADDR_MAJOR_Z:
507                             mipCoord.h += mipHeight;
508                             break;
509                         default:
510                             break;
511                     }
512                 }
513 
514                 mipWidth  = Max(mipWidth >> 1, 1u);
515                 mipHeight = Max(mipHeight >> 1, 1u);
516                 mipDepth = Max(mipDepth >> 1, 1u);
517 
518                 inTail = ((mipWidth <= tailWidth) &&
519                           (mipHeight <= tailHeight) &&
520                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
521             }
522         }
523     }
524 
525     *pNumMetaBlkX = numMetaBlkX;
526     *pNumMetaBlkY = numMetaBlkY;
527     *pNumMetaBlkZ = numMetaBlkZ;
528 }
529 
530 /**
531 ************************************************************************************************************************
532 *   Gfx9Lib::HwlComputeDccInfo
533 *
534 *   @brief
535 *       Interface function to compute DCC key info
536 *
537 *   @return
538 *       ADDR_E_RETURNCODE
539 ************************************************************************************************************************
540 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const541 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
542     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
543     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
544     ) const
545 {
546     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
547     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
548     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
549 
550     if (dataLinear)
551     {
552         metaLinear = TRUE;
553     }
554     else if (metaLinear == TRUE)
555     {
556         pipeAligned = FALSE;
557     }
558 
559     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
560 
561     if (metaLinear)
562     {
563         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
564         ADDR_ASSERT_ALWAYS();
565 
566         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
567         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
568     }
569     else
570     {
571         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
572 
573         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
574 
575         UINT_32 numFrags = Max(pIn->numFrags, 1u);
576         UINT_32 numSlices = Max(pIn->numSlices, 1u);
577 
578         minMetaBlkSize /= numFrags;
579 
580         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
581 
582         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
583 
584         if ((numPipeTotal > 1) || (numRbTotal > 1))
585         {
586             const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
587 
588             numCompressBlkPerMetaBlk =
589                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
590 
591             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
592             {
593                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
594             }
595         }
596 
597         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
598         Dim3d metaBlkDim = compressBlkDim;
599 
600         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
601         {
602             if ((metaBlkDim.h < metaBlkDim.w) ||
603                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
604             {
605                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
606                 {
607                     metaBlkDim.h <<= 1;
608                 }
609                 else
610                 {
611                     metaBlkDim.d <<= 1;
612                 }
613             }
614             else
615             {
616                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
617                 {
618                     metaBlkDim.w <<= 1;
619                 }
620                 else
621                 {
622                     metaBlkDim.d <<= 1;
623                 }
624             }
625         }
626 
627         UINT_32 numMetaBlkX;
628         UINT_32 numMetaBlkY;
629         UINT_32 numMetaBlkZ;
630 
631         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
632                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
633                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
634 
635         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
636 
637         if (numFrags > m_maxCompFrag)
638         {
639             sizeAlign *= (numFrags / m_maxCompFrag);
640         }
641 
642         if (m_settings.metaBaseAlignFix)
643         {
644             sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
645         }
646 
647         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
648                            numCompressBlkPerMetaBlk * numFrags;
649         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
650         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
651 
652         pOut->pitch = numMetaBlkX * metaBlkDim.w;
653         pOut->height = numMetaBlkY * metaBlkDim.h;
654         pOut->depth = numMetaBlkZ * metaBlkDim.d;
655 
656         pOut->compressBlkWidth = compressBlkDim.w;
657         pOut->compressBlkHeight = compressBlkDim.h;
658         pOut->compressBlkDepth = compressBlkDim.d;
659 
660         pOut->metaBlkWidth = metaBlkDim.w;
661         pOut->metaBlkHeight = metaBlkDim.h;
662         pOut->metaBlkDepth = metaBlkDim.d;
663         pOut->metaBlkSize = numCompressBlkPerMetaBlk * numFrags;
664 
665         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
666         pOut->fastClearSizePerSlice =
667             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
668     }
669 
670     return ADDR_OK;
671 }
672 
673 /**
674 ************************************************************************************************************************
675 *   Gfx9Lib::HwlComputeMaxBaseAlignments
676 *
677 *   @brief
678 *       Gets maximum alignments
679 *   @return
680 *       maximum alignments
681 ************************************************************************************************************************
682 */
HwlComputeMaxBaseAlignments() const683 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
684 {
685     return Size64K;
686 }
687 
688 /**
689 ************************************************************************************************************************
690 *   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
691 *
692 *   @brief
693 *       Gets maximum alignments for metadata
694 *   @return
695 *       maximum alignments for metadata
696 ************************************************************************************************************************
697 */
HwlComputeMaxMetaBaseAlignments() const698 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
699 {
700     // Max base alignment for Htile
701     const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
702     const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;
703 
704     // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
705     // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
706     ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
707     const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
708 
709     UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
710 
711     if (maxNumPipeTotal > 2)
712     {
713         maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
714     }
715 
716     maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
717 
718     if (m_settings.metaBaseAlignFix)
719     {
720         maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
721     }
722 
723     if (m_settings.htileAlignFix)
724     {
725         maxBaseAlignHtile *= maxNumPipeTotal;
726     }
727 
728     // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
729 
730     // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
731     UINT_32 maxBaseAlignDcc3D = 65536;
732 
733     if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
734     {
735         maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
736     }
737 
738     // Max base alignment for Msaa Dcc
739     UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
740 
741     if (m_settings.metaBaseAlignFix)
742     {
743         maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
744     }
745 
746     return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
747 }
748 
749 /**
750 ************************************************************************************************************************
751 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
752 *
753 *   @brief
754 *       Interface function stub of AddrComputeCmaskAddrFromCoord
755 *
756 *   @return
757 *       ADDR_E_RETURNCODE
758 ************************************************************************************************************************
759 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)760 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
761     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
762     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
763 {
764     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
765     input.size            = sizeof(input);
766     input.cMaskFlags      = pIn->cMaskFlags;
767     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
768     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
769     input.numSlices       = Max(pIn->numSlices, 1u);
770     input.swizzleMode     = pIn->swizzleMode;
771     input.resourceType    = pIn->resourceType;
772 
773     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
774     output.size = sizeof(output);
775 
776     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
777 
778     if (returnCode == ADDR_OK)
779     {
780         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
781         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
782         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
783         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
784 
785         MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
786                                      Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
787                                      metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
788 
789         const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
790 
791         UINT_32 xb = pIn->x / output.metaBlkWidth;
792         UINT_32 yb = pIn->y / output.metaBlkHeight;
793         UINT_32 zb = pIn->slice;
794 
795         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
796         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
797         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
798 
799         UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
800         UINT_64 address = pMetaEq->solve(coords);
801 
802         pOut->addr = address >> 1;
803         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
804 
805 
806         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
807                                                            pIn->swizzleMode);
808 
809         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
810 
811         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
812     }
813 
814     return returnCode;
815 }
816 
817 /**
818 ************************************************************************************************************************
819 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
820 *
821 *   @brief
822 *       Interface function stub of AddrComputeHtileAddrFromCoord
823 *
824 *   @return
825 *       ADDR_E_RETURNCODE
826 ************************************************************************************************************************
827 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)828 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
829     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
830     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
831 {
832     ADDR_E_RETURNCODE returnCode = ADDR_OK;
833 
834     if (pIn->numMipLevels > 1)
835     {
836         returnCode = ADDR_NOTIMPLEMENTED;
837     }
838     else
839     {
840         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
841         input.size            = sizeof(input);
842         input.hTileFlags      = pIn->hTileFlags;
843         input.depthFlags      = pIn->depthflags;
844         input.swizzleMode     = pIn->swizzleMode;
845         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
846         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
847         input.numSlices       = Max(pIn->numSlices, 1u);
848         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
849 
850         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
851         output.size = sizeof(output);
852 
853         returnCode = ComputeHtileInfo(&input, &output);
854 
855         if (returnCode == ADDR_OK)
856         {
857             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
858             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
859             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
860             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
861 
862             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
863                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
864                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
865 
866             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
867 
868             UINT_32 xb = pIn->x / output.metaBlkWidth;
869             UINT_32 yb = pIn->y / output.metaBlkHeight;
870             UINT_32 zb = pIn->slice;
871 
872             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
873             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
874             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
875 
876             UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
877             UINT_64 address = pMetaEq->solve(coords);
878 
879             pOut->addr = address >> 1;
880 
881             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
882                                                                pIn->swizzleMode);
883 
884             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
885 
886             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
887         }
888     }
889 
890     return returnCode;
891 }
892 
893 /**
894 ************************************************************************************************************************
895 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
896 *
897 *   @brief
898 *       Interface function stub of AddrComputeHtileCoordFromAddr
899 *
900 *   @return
901 *       ADDR_E_RETURNCODE
902 ************************************************************************************************************************
903 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)904 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
905     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
906     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
907 {
908     ADDR_E_RETURNCODE returnCode = ADDR_OK;
909 
910     if (pIn->numMipLevels > 1)
911     {
912         returnCode = ADDR_NOTIMPLEMENTED;
913     }
914     else
915     {
916         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
917         input.size            = sizeof(input);
918         input.hTileFlags      = pIn->hTileFlags;
919         input.swizzleMode     = pIn->swizzleMode;
920         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
921         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
922         input.numSlices       = Max(pIn->numSlices, 1u);
923         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
924 
925         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
926         output.size = sizeof(output);
927 
928         returnCode = ComputeHtileInfo(&input, &output);
929 
930         if (returnCode == ADDR_OK)
931         {
932             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
933             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
934             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
935             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
936 
937             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
938                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
939                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
940 
941             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
942 
943             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
944                                                                pIn->swizzleMode);
945 
946             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
947 
948             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
949 
950             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
951             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
952 
953             UINT_32 coords[NUM_DIMS];
954             pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
955 
956             pOut->slice = coords[DIM_M] / sliceSizeInBlock;
957             pOut->y     = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
958             pOut->x     = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
959         }
960     }
961 
962     return returnCode;
963 }
964 
965 /**
966 ************************************************************************************************************************
967 *   Gfx9Lib::HwlComputeDccAddrFromCoord
968 *
969 *   @brief
970 *       Interface function stub of AddrComputeDccAddrFromCoord
971 *
972 *   @return
973 *       ADDR_E_RETURNCODE
974 ************************************************************************************************************************
975 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)976 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
977     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
978     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
979 {
980     ADDR_E_RETURNCODE returnCode = ADDR_OK;
981 
982     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
983     {
984         returnCode = ADDR_NOTIMPLEMENTED;
985     }
986     else
987     {
988         UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
989         UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
990         UINT_32 metaBlkWidthLog2  = Log2(pIn->metaBlkWidth);
991         UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
992         UINT_32 metaBlkDepthLog2  = Log2(pIn->metaBlkDepth);
993         UINT_32 compBlkWidthLog2  = Log2(pIn->compressBlkWidth);
994         UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
995         UINT_32 compBlkDepthLog2  = Log2(pIn->compressBlkDepth);
996 
997         MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
998                                      Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
999                                      metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1000                                      compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1001 
1002         const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1003 
1004         UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1005         UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1006         UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1007 
1008         UINT_32 pitchInBlock     = pIn->pitch / pIn->metaBlkWidth;
1009         UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1010         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1011 
1012         UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
1013         UINT_64 address = pMetaEq->solve(coords);
1014 
1015         pOut->addr = address >> 1;
1016 
1017         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1018                                                            pIn->swizzleMode);
1019 
1020         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1021 
1022         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1023     }
1024 
1025     return returnCode;
1026 }
1027 
1028 /**
1029 ************************************************************************************************************************
1030 *   Gfx9Lib::HwlInitGlobalParams
1031 *
1032 *   @brief
1033 *       Initializes global parameters
1034 *
1035 *   @return
1036 *       TRUE if all settings are valid
1037 *
1038 ************************************************************************************************************************
1039 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1040 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1041     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1042 {
1043     BOOL_32 valid = TRUE;
1044 
1045     if (m_settings.isArcticIsland)
1046     {
1047         GB_ADDR_CONFIG_gfx9 gbAddrConfig;
1048 
1049         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1050 
1051         // These values are copied from CModel code
1052         switch (gbAddrConfig.bits.NUM_PIPES)
1053         {
1054             case ADDR_CONFIG_1_PIPE:
1055                 m_pipes = 1;
1056                 m_pipesLog2 = 0;
1057                 break;
1058             case ADDR_CONFIG_2_PIPE:
1059                 m_pipes = 2;
1060                 m_pipesLog2 = 1;
1061                 break;
1062             case ADDR_CONFIG_4_PIPE:
1063                 m_pipes = 4;
1064                 m_pipesLog2 = 2;
1065                 break;
1066             case ADDR_CONFIG_8_PIPE:
1067                 m_pipes = 8;
1068                 m_pipesLog2 = 3;
1069                 break;
1070             case ADDR_CONFIG_16_PIPE:
1071                 m_pipes = 16;
1072                 m_pipesLog2 = 4;
1073                 break;
1074             case ADDR_CONFIG_32_PIPE:
1075                 m_pipes = 32;
1076                 m_pipesLog2 = 5;
1077                 break;
1078             default:
1079                 ADDR_ASSERT_ALWAYS();
1080                 break;
1081         }
1082 
1083         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1084         {
1085             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1086                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1087                 m_pipeInterleaveLog2 = 8;
1088                 break;
1089             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1090                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1091                 m_pipeInterleaveLog2 = 9;
1092                 break;
1093             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1094                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1095                 m_pipeInterleaveLog2 = 10;
1096                 break;
1097             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1098                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1099                 m_pipeInterleaveLog2 = 11;
1100                 break;
1101             default:
1102                 ADDR_ASSERT_ALWAYS();
1103                 break;
1104         }
1105 
1106         // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1107         // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1108         ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1109 
1110         switch (gbAddrConfig.bits.NUM_BANKS)
1111         {
1112             case ADDR_CONFIG_1_BANK:
1113                 m_banks = 1;
1114                 m_banksLog2 = 0;
1115                 break;
1116             case ADDR_CONFIG_2_BANK:
1117                 m_banks = 2;
1118                 m_banksLog2 = 1;
1119                 break;
1120             case ADDR_CONFIG_4_BANK:
1121                 m_banks = 4;
1122                 m_banksLog2 = 2;
1123                 break;
1124             case ADDR_CONFIG_8_BANK:
1125                 m_banks = 8;
1126                 m_banksLog2 = 3;
1127                 break;
1128             case ADDR_CONFIG_16_BANK:
1129                 m_banks = 16;
1130                 m_banksLog2 = 4;
1131                 break;
1132             default:
1133                 ADDR_ASSERT_ALWAYS();
1134                 break;
1135         }
1136 
1137         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1138         {
1139             case ADDR_CONFIG_1_SHADER_ENGINE:
1140                 m_se = 1;
1141                 m_seLog2 = 0;
1142                 break;
1143             case ADDR_CONFIG_2_SHADER_ENGINE:
1144                 m_se = 2;
1145                 m_seLog2 = 1;
1146                 break;
1147             case ADDR_CONFIG_4_SHADER_ENGINE:
1148                 m_se = 4;
1149                 m_seLog2 = 2;
1150                 break;
1151             case ADDR_CONFIG_8_SHADER_ENGINE:
1152                 m_se = 8;
1153                 m_seLog2 = 3;
1154                 break;
1155             default:
1156                 ADDR_ASSERT_ALWAYS();
1157                 break;
1158         }
1159 
1160         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1161         {
1162             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1163                 m_rbPerSe = 1;
1164                 m_rbPerSeLog2 = 0;
1165                 break;
1166             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1167                 m_rbPerSe = 2;
1168                 m_rbPerSeLog2 = 1;
1169                 break;
1170             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1171                 m_rbPerSe = 4;
1172                 m_rbPerSeLog2 = 2;
1173                 break;
1174             default:
1175                 ADDR_ASSERT_ALWAYS();
1176                 break;
1177         }
1178 
1179         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1180         {
1181             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1182                 m_maxCompFrag = 1;
1183                 m_maxCompFragLog2 = 0;
1184                 break;
1185             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1186                 m_maxCompFrag = 2;
1187                 m_maxCompFragLog2 = 1;
1188                 break;
1189             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1190                 m_maxCompFrag = 4;
1191                 m_maxCompFragLog2 = 2;
1192                 break;
1193             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1194                 m_maxCompFrag = 8;
1195                 m_maxCompFragLog2 = 3;
1196                 break;
1197             default:
1198                 ADDR_ASSERT_ALWAYS();
1199                 break;
1200         }
1201 
1202         if ((m_rbPerSeLog2 == 1) &&
1203             (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1204              ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1205         {
1206             ADDR_ASSERT(m_settings.isVega10 == FALSE);
1207 
1208             ADDR_ASSERT(m_settings.isRaven == FALSE);
1209 
1210             ADDR_ASSERT(m_settings.isVega20 == FALSE);
1211 
1212             if (m_settings.isVega12)
1213             {
1214                 m_settings.htileCacheRbConflict = 1;
1215             }
1216         }
1217 
1218         // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1219         m_blockVarSizeLog2 = 0;
1220     }
1221     else
1222     {
1223         valid = FALSE;
1224         ADDR_NOT_IMPLEMENTED();
1225     }
1226 
1227     if (valid)
1228     {
1229         InitEquationTable();
1230     }
1231 
1232     return valid;
1233 }
1234 
1235 /**
1236 ************************************************************************************************************************
1237 *   Gfx9Lib::HwlConvertChipFamily
1238 *
1239 *   @brief
1240 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1241 *   @return
1242 *       ChipFamily
1243 ************************************************************************************************************************
1244 */
HwlConvertChipFamily(UINT_32 uChipFamily,UINT_32 uChipRevision)1245 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1246     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1247     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1248 {
1249     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1250 
1251     switch (uChipFamily)
1252     {
1253         case FAMILY_AI:
1254             m_settings.isArcticIsland = 1;
1255             m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1256             m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1257             m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1258             m_settings.isDce12 = 1;
1259 
1260             if (m_settings.isVega10 == 0)
1261             {
1262                 m_settings.htileAlignFix = 1;
1263                 m_settings.applyAliasFix = 1;
1264             }
1265 
1266             m_settings.metaBaseAlignFix = 1;
1267 
1268             m_settings.depthPipeXorDisable = 1;
1269             break;
1270         case FAMILY_RV:
1271             m_settings.isArcticIsland = 1;
1272 
1273             if (ASICREV_IS_RAVEN(uChipRevision))
1274             {
1275                 m_settings.isRaven = 1;
1276 
1277                 m_settings.depthPipeXorDisable = 1;
1278             }
1279 
1280             if (ASICREV_IS_RAVEN2(uChipRevision))
1281             {
1282                 m_settings.isRaven = 1;
1283             }
1284 
1285             if (m_settings.isRaven == 0)
1286             {
1287                 m_settings.htileAlignFix = 1;
1288                 m_settings.applyAliasFix = 1;
1289             }
1290 
1291             m_settings.isDcn1 = m_settings.isRaven;
1292 
1293             if (ASICREV_IS_RENOIR(uChipRevision))
1294             {
1295                 m_settings.isRaven = 1;
1296                 m_settings.isDcn2  = 1;
1297             }
1298 
1299             m_settings.metaBaseAlignFix = 1;
1300             break;
1301 
1302         default:
1303             ADDR_ASSERT(!"No Chip found");
1304             break;
1305     }
1306 
1307     return family;
1308 }
1309 
1310 /**
1311 ************************************************************************************************************************
1312 *   Gfx9Lib::InitRbEquation
1313 *
1314 *   @brief
1315 *       Init RB equation
1316 *   @return
1317 *       N/A
1318 ************************************************************************************************************************
1319 */
GetRbEquation(CoordEq * pRbEq,UINT_32 numRbPerSeLog2,UINT_32 numSeLog2) const1320 VOID Gfx9Lib::GetRbEquation(
1321     CoordEq* pRbEq,             ///< [out] rb equation
1322     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1323     UINT_32  numSeLog2)         ///< [in] number of shader engine
1324     const
1325 {
1326     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1327     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1328     Coordinate cx(DIM_X, rbRegion);
1329     Coordinate cy(DIM_Y, rbRegion);
1330 
1331     UINT_32 start = 0;
1332     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1333 
1334     // Clear the rb equation
1335     pRbEq->resize(0);
1336     pRbEq->resize(numRbTotalLog2);
1337 
1338     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1339     {
1340         // Special case when more than 1 SE, and 2 RB per SE
1341         (*pRbEq)[0].add(cx);
1342         (*pRbEq)[0].add(cy);
1343         cx++;
1344         cy++;
1345 
1346         if (m_settings.applyAliasFix == false)
1347         {
1348             (*pRbEq)[0].add(cy);
1349         }
1350 
1351         (*pRbEq)[0].add(cy);
1352         start++;
1353     }
1354 
1355     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1356 
1357     for (UINT_32 i = 0; i < numBits; i++)
1358     {
1359         UINT_32 idx =
1360             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1361 
1362         if ((i % 2) == 1)
1363         {
1364             (*pRbEq)[idx].add(cx);
1365             cx++;
1366         }
1367         else
1368         {
1369             (*pRbEq)[idx].add(cy);
1370             cy++;
1371         }
1372     }
1373 }
1374 
1375 /**
1376 ************************************************************************************************************************
1377 *   Gfx9Lib::GetDataEquation
1378 *
1379 *   @brief
1380 *       Get data equation for fmask and Z
1381 *   @return
1382 *       N/A
1383 ************************************************************************************************************************
1384 */
GetDataEquation(CoordEq * pDataEq,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2) const1385 VOID Gfx9Lib::GetDataEquation(
1386     CoordEq* pDataEq,               ///< [out] data surface equation
1387     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1388     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1389     AddrResourceType resourceType,  ///< [in] data surface resource type
1390     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1391     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1392     const
1393 {
1394     Coordinate cx(DIM_X, 0);
1395     Coordinate cy(DIM_Y, 0);
1396     Coordinate cz(DIM_Z, 0);
1397     Coordinate cs(DIM_S, 0);
1398 
1399     // Clear the equation
1400     pDataEq->resize(0);
1401     pDataEq->resize(27);
1402 
1403     if (dataSurfaceType == Gfx9DataColor)
1404     {
1405         if (IsLinear(swizzleMode))
1406         {
1407             Coordinate cm(DIM_M, 0);
1408 
1409             pDataEq->resize(49);
1410 
1411             for (UINT_32 i = 0; i < 49; i++)
1412             {
1413                 (*pDataEq)[i].add(cm);
1414                 cm++;
1415             }
1416         }
1417         else if (IsThick(resourceType, swizzleMode))
1418         {
1419             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1420             UINT_32 i;
1421             if (IsStandardSwizzle(resourceType, swizzleMode))
1422             {
1423                 // Standard 3d swizzle
1424                 // Fill in bottom x bits
1425                 for (i = elementBytesLog2; i < 4; i++)
1426                 {
1427                     (*pDataEq)[i].add(cx);
1428                     cx++;
1429                 }
1430                 // Fill in 2 bits of y and then z
1431                 for (i = 4; i < 6; i++)
1432                 {
1433                     (*pDataEq)[i].add(cy);
1434                     cy++;
1435                 }
1436                 for (i = 6; i < 8; i++)
1437                 {
1438                     (*pDataEq)[i].add(cz);
1439                     cz++;
1440                 }
1441                 if (elementBytesLog2 < 2)
1442                 {
1443                     // fill in z & y bit
1444                     (*pDataEq)[8].add(cz);
1445                     (*pDataEq)[9].add(cy);
1446                     cz++;
1447                     cy++;
1448                 }
1449                 else if (elementBytesLog2 == 2)
1450                 {
1451                     // fill in y and x bit
1452                     (*pDataEq)[8].add(cy);
1453                     (*pDataEq)[9].add(cx);
1454                     cy++;
1455                     cx++;
1456                 }
1457                 else
1458                 {
1459                     // fill in 2 x bits
1460                     (*pDataEq)[8].add(cx);
1461                     cx++;
1462                     (*pDataEq)[9].add(cx);
1463                     cx++;
1464                 }
1465             }
1466             else
1467             {
1468                 // Z 3d swizzle
1469                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1470                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1471                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1472                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1473                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1474                 {
1475                     (*pDataEq)[i].add(cz);
1476                     cz++;
1477                 }
1478                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1479                 {
1480                     // add an x and z
1481                     (*pDataEq)[6].add(cx);
1482                     (*pDataEq)[7].add(cz);
1483                     cx++;
1484                     cz++;
1485                 }
1486                 else if (elementBytesLog2 == 2)
1487                 {
1488                     // add a y and z
1489                     (*pDataEq)[6].add(cy);
1490                     (*pDataEq)[7].add(cz);
1491                     cy++;
1492                     cz++;
1493                 }
1494                 // add y and x
1495                 (*pDataEq)[8].add(cy);
1496                 (*pDataEq)[9].add(cx);
1497                 cy++;
1498                 cx++;
1499             }
1500             // Fill in bit 10 and up
1501             pDataEq->mort3d( cz, cy, cx, 10 );
1502         }
1503         else if (IsThin(resourceType, swizzleMode))
1504         {
1505             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1506             // Color 2D
1507             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1508             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1509             UINT_32 i;
1510             // Fill in bottom x bits
1511             for (i = elementBytesLog2; i < 4; i++)
1512             {
1513                 (*pDataEq)[i].add(cx);
1514                 cx++;
1515             }
1516             // Fill in bottom y bits
1517             for (i = 4; i < 4 + microYBits; i++)
1518             {
1519                 (*pDataEq)[i].add(cy);
1520                 cy++;
1521             }
1522             // Fill in last of the micro_x bits
1523             for (i = 4 + microYBits; i < 8; i++)
1524             {
1525                 (*pDataEq)[i].add(cx);
1526                 cx++;
1527             }
1528             // Fill in x/y bits below sample split
1529             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1530             // Fill in sample bits
1531             for (i = 0; i < numSamplesLog2; i++)
1532             {
1533                 cs.set(DIM_S, i);
1534                 (*pDataEq)[tileSplitStart + i].add(cs);
1535             }
1536             // Fill in x/y bits above sample split
1537             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1538             {
1539                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1540             }
1541             else
1542             {
1543                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1544             }
1545         }
1546         else
1547         {
1548             ADDR_ASSERT_ALWAYS();
1549         }
1550     }
1551     else
1552     {
1553         // Fmask or depth
1554         UINT_32 sampleStart = elementBytesLog2;
1555         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1556         UINT_32 ymajStart = 6 + numSamplesLog2;
1557 
1558         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1559         {
1560             cs.set(DIM_S, s);
1561             (*pDataEq)[sampleStart + s].add(cs);
1562         }
1563 
1564         // Put in the x-major order pixel bits
1565         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1566         // Put in the y-major order pixel bits
1567         pDataEq->mort2d(cy, cx, ymajStart);
1568     }
1569 }
1570 
1571 /**
1572 ************************************************************************************************************************
1573 *   Gfx9Lib::GetPipeEquation
1574 *
1575 *   @brief
1576 *       Get pipe equation
1577 *   @return
1578 *       N/A
1579 ************************************************************************************************************************
1580 */
GetPipeEquation(CoordEq * pPipeEq,CoordEq * pDataEq,UINT_32 pipeInterleaveLog2,UINT_32 numPipeLog2,UINT_32 numSamplesLog2,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType) const1581 VOID Gfx9Lib::GetPipeEquation(
1582     CoordEq*         pPipeEq,            ///< [out] pipe equation
1583     CoordEq*         pDataEq,            ///< [in] data equation
1584     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1585     UINT_32          numPipeLog2,        ///< [in] number of pipes
1586     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1587     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1588     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1589     AddrResourceType resourceType        ///< [in] data surface resource type
1590     ) const
1591 {
1592     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1593     CoordEq dataEq;
1594 
1595     pDataEq->copy(dataEq);
1596 
1597     if (dataSurfaceType == Gfx9DataColor)
1598     {
1599         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1600         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1601     }
1602 
1603     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1604 
1605     // This section should only apply to z/stencil, maybe fmask
1606     // If the pipe bit is below the comp block size,
1607     // then keep moving up the address until we find a bit that is above
1608     UINT_32 pipeStart = 0;
1609 
1610     if (dataSurfaceType != Gfx9DataColor)
1611     {
1612         Coordinate tileMin(DIM_X, 3);
1613 
1614         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1615         {
1616             pipeStart++;
1617         }
1618 
1619         // if pipe is 0, then the first pipe bit is above the comp block size,
1620         // so we don't need to do anything
1621         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1622         // we will get the same pipe equation
1623         if (pipeStart != 0)
1624         {
1625             for (UINT_32 i = 0; i < numPipeLog2; i++)
1626             {
1627                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1628                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1629             }
1630         }
1631     }
1632 
1633     if (IsPrt(swizzleMode))
1634     {
1635         // Clear out bits above the block size if prt's are enabled
1636         dataEq.resize(blockSizeLog2);
1637         dataEq.resize(48);
1638     }
1639 
1640     if (IsXor(swizzleMode))
1641     {
1642         CoordEq xorMask;
1643 
1644         if (IsThick(resourceType, swizzleMode))
1645         {
1646             CoordEq xorMask2;
1647 
1648             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1649 
1650             xorMask.resize(numPipeLog2);
1651 
1652             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1653             {
1654                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1655                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1656             }
1657         }
1658         else
1659         {
1660             // Xor in the bits above the pipe+gpu bits
1661             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1662 
1663             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1664             {
1665                 Coordinate co;
1666                 CoordEq xorMask2;
1667                 // if 1xaa and not prt, then xor in the z bits
1668                 xorMask2.resize(0);
1669                 xorMask2.resize(numPipeLog2);
1670                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1671                 {
1672                     co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1673                     xorMask2[pipeIdx].add(co);
1674                 }
1675 
1676                 pPipeEq->xorin(xorMask2);
1677             }
1678         }
1679 
1680         xorMask.reverse();
1681         pPipeEq->xorin(xorMask);
1682     }
1683 }
1684 /**
1685 ************************************************************************************************************************
1686 *   Gfx9Lib::GetMetaEquation
1687 *
1688 *   @brief
1689 *       Get meta equation for cmask/htile/DCC
1690 *   @return
1691 *       Pointer to a calculated meta equation
1692 ************************************************************************************************************************
1693 */
GetMetaEquation(const MetaEqParams & metaEqParams)1694 const CoordEq* Gfx9Lib::GetMetaEquation(
1695     const MetaEqParams& metaEqParams)
1696 {
1697     UINT_32 cachedMetaEqIndex;
1698 
1699     for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1700     {
1701         if (memcmp(&metaEqParams,
1702                    &m_cachedMetaEqKey[cachedMetaEqIndex],
1703                    static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1704         {
1705             break;
1706         }
1707     }
1708 
1709     CoordEq* pMetaEq = NULL;
1710 
1711     if (cachedMetaEqIndex < MaxCachedMetaEq)
1712     {
1713         pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1714     }
1715     else
1716     {
1717         m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1718 
1719         pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1720 
1721         m_metaEqOverrideIndex %= MaxCachedMetaEq;
1722 
1723         GenMetaEquation(pMetaEq,
1724                         metaEqParams.maxMip,
1725                         metaEqParams.elementBytesLog2,
1726                         metaEqParams.numSamplesLog2,
1727                         metaEqParams.metaFlag,
1728                         metaEqParams.dataSurfaceType,
1729                         metaEqParams.swizzleMode,
1730                         metaEqParams.resourceType,
1731                         metaEqParams.metaBlkWidthLog2,
1732                         metaEqParams.metaBlkHeightLog2,
1733                         metaEqParams.metaBlkDepthLog2,
1734                         metaEqParams.compBlkWidthLog2,
1735                         metaEqParams.compBlkHeightLog2,
1736                         metaEqParams.compBlkDepthLog2);
1737     }
1738 
1739     return pMetaEq;
1740 }
1741 
1742 /**
1743 ************************************************************************************************************************
1744 *   Gfx9Lib::GenMetaEquation
1745 *
1746 *   @brief
1747 *       Get meta equation for cmask/htile/DCC
1748 *   @return
1749 *       N/A
1750 ************************************************************************************************************************
1751 */
GenMetaEquation(CoordEq * pMetaEq,UINT_32 maxMip,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2,ADDR2_META_FLAGS metaFlag,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 metaBlkWidthLog2,UINT_32 metaBlkHeightLog2,UINT_32 metaBlkDepthLog2,UINT_32 compBlkWidthLog2,UINT_32 compBlkHeightLog2,UINT_32 compBlkDepthLog2) const1752 VOID Gfx9Lib::GenMetaEquation(
1753     CoordEq*         pMetaEq,               ///< [out] meta equation
1754     UINT_32          maxMip,                ///< [in] max mip Id
1755     UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
1756     UINT_32          numSamplesLog2,        ///< [in] data surface sample count
1757     ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
1758     Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
1759     AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
1760     AddrResourceType resourceType,          ///< [in] data surface resource type
1761     UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
1762     UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
1763     UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
1764     UINT_32          compBlkWidthLog2,      ///< [in] compress block width
1765     UINT_32          compBlkHeightLog2,     ///< [in] compress block height
1766     UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
1767     const
1768 {
1769     UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1770     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1771 
1772     // Get the correct data address and rb equation
1773     CoordEq dataEq;
1774     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1775                     elementBytesLog2, numSamplesLog2);
1776 
1777     // Get pipe and rb equations
1778     CoordEq pipeEquation;
1779     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1780                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1781     numPipeTotalLog2 = pipeEquation.getsize();
1782 
1783     if (metaFlag.linear)
1784     {
1785         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1786         ADDR_ASSERT_ALWAYS();
1787 
1788         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1789 
1790         dataEq.copy(*pMetaEq);
1791 
1792         if (IsLinear(swizzleMode))
1793         {
1794             if (metaFlag.pipeAligned)
1795             {
1796                 // Remove the pipe bits
1797                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1798                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1799             }
1800             // Divide by comp block size, which for linear (which is always color) is 256 B
1801             pMetaEq->shift(-8);
1802 
1803             if (metaFlag.pipeAligned)
1804             {
1805                 // Put pipe bits back in
1806                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1807 
1808                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1809                 {
1810                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1811                 }
1812             }
1813         }
1814 
1815         pMetaEq->shift(1);
1816     }
1817     else
1818     {
1819         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1820         UINT_32 compFragLog2 =
1821             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1822             maxCompFragLog2 : numSamplesLog2;
1823 
1824         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1825 
1826         // Make sure the metaaddr is cleared
1827         pMetaEq->resize(0);
1828         pMetaEq->resize(27);
1829 
1830         if (IsThick(resourceType, swizzleMode))
1831         {
1832             Coordinate cx(DIM_X, 0);
1833             Coordinate cy(DIM_Y, 0);
1834             Coordinate cz(DIM_Z, 0);
1835 
1836             if (maxMip > 0)
1837             {
1838                 pMetaEq->mort3d(cy, cx, cz);
1839             }
1840             else
1841             {
1842                 pMetaEq->mort3d(cx, cy, cz);
1843             }
1844         }
1845         else
1846         {
1847             Coordinate cx(DIM_X, 0);
1848             Coordinate cy(DIM_Y, 0);
1849             Coordinate cs;
1850 
1851             if (maxMip > 0)
1852             {
1853                 pMetaEq->mort2d(cy, cx, compFragLog2);
1854             }
1855             else
1856             {
1857                 pMetaEq->mort2d(cx, cy, compFragLog2);
1858             }
1859 
1860             //------------------------------------------------------------------------------------------------------------------------
1861             // Put the compressible fragments at the lsb
1862             // the uncompressible frags will be at the msb of the micro address
1863             //------------------------------------------------------------------------------------------------------------------------
1864             for (UINT_32 s = 0; s < compFragLog2; s++)
1865             {
1866                 cs.set(DIM_S, s);
1867                 (*pMetaEq)[s].add(cs);
1868             }
1869         }
1870 
1871         // Keep a copy of the pipe equations
1872         CoordEq origPipeEquation;
1873         pipeEquation.copy(origPipeEquation);
1874 
1875         Coordinate co;
1876         // filter out everything under the compressed block size
1877         co.set(DIM_X, compBlkWidthLog2);
1878         pMetaEq->Filter('<', co, 0, DIM_X);
1879         co.set(DIM_Y, compBlkHeightLog2);
1880         pMetaEq->Filter('<', co, 0, DIM_Y);
1881         co.set(DIM_Z, compBlkDepthLog2);
1882         pMetaEq->Filter('<', co, 0, DIM_Z);
1883 
1884         // For non-color, filter out sample bits
1885         if (dataSurfaceType != Gfx9DataColor)
1886         {
1887             co.set(DIM_X, 0);
1888             pMetaEq->Filter('<', co, 0, DIM_S);
1889         }
1890 
1891         // filter out everything above the metablock size
1892         co.set(DIM_X, metaBlkWidthLog2 - 1);
1893         pMetaEq->Filter('>', co, 0, DIM_X);
1894         co.set(DIM_Y, metaBlkHeightLog2 - 1);
1895         pMetaEq->Filter('>', co, 0, DIM_Y);
1896         co.set(DIM_Z, metaBlkDepthLog2 - 1);
1897         pMetaEq->Filter('>', co, 0, DIM_Z);
1898 
1899         // filter out everything above the metablock size for the channel bits
1900         co.set(DIM_X, metaBlkWidthLog2 - 1);
1901         pipeEquation.Filter('>', co, 0, DIM_X);
1902         co.set(DIM_Y, metaBlkHeightLog2 - 1);
1903         pipeEquation.Filter('>', co, 0, DIM_Y);
1904         co.set(DIM_Z, metaBlkDepthLog2 - 1);
1905         pipeEquation.Filter('>', co, 0, DIM_Z);
1906 
1907         // Make sure we still have the same number of channel bits
1908         if (pipeEquation.getsize() != numPipeTotalLog2)
1909         {
1910             ADDR_ASSERT_ALWAYS();
1911         }
1912 
1913         // Loop through all channel and rb bits,
1914         // and make sure these components exist in the metadata address
1915         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1916         {
1917             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1918             {
1919                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1920                 {
1921                     ADDR_ASSERT_ALWAYS();
1922                 }
1923             }
1924         }
1925 
1926         const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
1927         const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1928         const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1929         CoordEq       origRbEquation;
1930 
1931         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1932 
1933         CoordEq rbEquation = origRbEquation;
1934 
1935         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1936         {
1937             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1938             {
1939                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1940                 {
1941                     ADDR_ASSERT_ALWAYS();
1942                 }
1943             }
1944         }
1945 
1946         if (m_settings.applyAliasFix)
1947         {
1948             co.set(DIM_Z, -1);
1949         }
1950 
1951         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1952         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1953         {
1954             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1955             {
1956                 BOOL_32 isRbEquationInPipeEquation = FALSE;
1957 
1958                 if (m_settings.applyAliasFix)
1959                 {
1960                     CoordTerm filteredPipeEq;
1961                     filteredPipeEq = pipeEquation[j];
1962 
1963                     filteredPipeEq.Filter('>', co, 0, DIM_Z);
1964 
1965                     isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1966                 }
1967                 else
1968                 {
1969                     isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1970                 }
1971 
1972                 if (isRbEquationInPipeEquation)
1973                 {
1974                     rbEquation[i].Clear();
1975                 }
1976             }
1977         }
1978 
1979          bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {0};
1980 
1981         // Loop through each bit of the channel, get the smallest coordinate,
1982         // and remove it from the metaaddr, and rb_equation
1983         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1984         {
1985             pipeEquation[i].getsmallest(co);
1986 
1987             UINT_32 old_size = pMetaEq->getsize();
1988             pMetaEq->Filter('=', co);
1989             UINT_32 new_size = pMetaEq->getsize();
1990             if (new_size != old_size-1)
1991             {
1992                 ADDR_ASSERT_ALWAYS();
1993             }
1994             pipeEquation.remove(co);
1995             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1996             {
1997                 if (rbEquation[j].remove(co))
1998                 {
1999                     // if we actually removed something from this bit, then add the remaining
2000                     // channel bits, as these can be removed for this bit
2001                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2002                     {
2003                         if (pipeEquation[i][k] != co)
2004                         {
2005                             rbEquation[j].add(pipeEquation[i][k]);
2006                             rbAppendedWithPipeBits[j] = true;
2007                         }
2008                     }
2009                 }
2010             }
2011         }
2012 
2013         // Loop through the rb bits and see what remain;
2014         // filter out the smallest coordinate if it remains
2015         UINT_32 rbBitsLeft = 0;
2016         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2017         {
2018             BOOL_32 isRbEqAppended = FALSE;
2019 
2020             if (m_settings.applyAliasFix)
2021             {
2022                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2023             }
2024             else
2025             {
2026                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2027             }
2028 
2029             if (isRbEqAppended)
2030             {
2031                 rbBitsLeft++;
2032                 rbEquation[i].getsmallest(co);
2033                 UINT_32 old_size = pMetaEq->getsize();
2034                 pMetaEq->Filter('=', co);
2035                 UINT_32 new_size = pMetaEq->getsize();
2036                 if (new_size != old_size - 1)
2037                 {
2038                     // assert warning
2039                 }
2040                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2041                 {
2042                     if (rbEquation[j].remove(co))
2043                     {
2044                         // if we actually removed something from this bit, then add the remaining
2045                         // rb bits, as these can be removed for this bit
2046                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2047                         {
2048                             if (rbEquation[i][k] != co)
2049                             {
2050                                 rbEquation[j].add(rbEquation[i][k]);
2051                                 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2052                             }
2053                         }
2054                     }
2055                 }
2056             }
2057         }
2058 
2059         // capture the size of the metaaddr
2060         UINT_32 metaSize = pMetaEq->getsize();
2061         // resize to 49 bits...make this a nibble address
2062         pMetaEq->resize(49);
2063         // Concatenate the macro address above the current address
2064         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2065         {
2066             co.set(DIM_M, j);
2067             (*pMetaEq)[i].add(co);
2068         }
2069 
2070         // Multiply by meta element size (in nibbles)
2071         if (dataSurfaceType == Gfx9DataColor)
2072         {
2073             pMetaEq->shift(1);
2074         }
2075         else if (dataSurfaceType == Gfx9DataDepthStencil)
2076         {
2077             pMetaEq->shift(3);
2078         }
2079 
2080         //------------------------------------------------------------------------------------------
2081         // Note the pipeInterleaveLog2+1 is because address is a nibble address
2082         // Shift up from pipe interleave number of channel
2083         // and rb bits left, and uncompressed fragments
2084         //------------------------------------------------------------------------------------------
2085 
2086         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2087 
2088         // Put in the channel bits
2089         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2090         {
2091             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2092         }
2093 
2094         // Put in remaining rb bits
2095         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2096         {
2097             BOOL_32 isRbEqAppended = FALSE;
2098 
2099             if (m_settings.applyAliasFix)
2100             {
2101                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2102             }
2103             else
2104             {
2105                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2106             }
2107 
2108             if (isRbEqAppended)
2109             {
2110                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2111                 // Mark any rb bit we add in to the rb mask
2112                 j++;
2113             }
2114         }
2115 
2116         //------------------------------------------------------------------------------------------
2117         // Put in the uncompressed fragment bits
2118         //------------------------------------------------------------------------------------------
2119         for (UINT_32 i = 0; i < uncompFragLog2; i++)
2120         {
2121             co.set(DIM_S, compFragLog2 + i);
2122             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2123         }
2124     }
2125 }
2126 
2127 /**
2128 ************************************************************************************************************************
2129 *   Gfx9Lib::IsEquationSupported
2130 *
2131 *   @brief
2132 *       Check if equation is supported for given swizzle mode and resource type.
2133 *
2134 *   @return
2135 *       TRUE if supported
2136 ************************************************************************************************************************
2137 */
IsEquationSupported(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2) const2138 BOOL_32 Gfx9Lib::IsEquationSupported(
2139     AddrResourceType rsrcType,
2140     AddrSwizzleMode  swMode,
2141     UINT_32          elementBytesLog2) const
2142 {
2143     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2144                         (IsValidSwMode(swMode) == TRUE) &&
2145                         (IsLinear(swMode) == FALSE) &&
2146                         (((IsTex2d(rsrcType) == TRUE) &&
2147                           ((elementBytesLog2 < 4) ||
2148                            ((IsRotateSwizzle(swMode) == FALSE) &&
2149                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
2150                          ((IsTex3d(rsrcType) == TRUE) &&
2151                           (IsRotateSwizzle(swMode) == FALSE) &&
2152                           (IsBlock256b(swMode) == FALSE)));
2153 
2154     return supported;
2155 }
2156 
2157 /**
2158 ************************************************************************************************************************
2159 *   Gfx9Lib::InitEquationTable
2160 *
2161 *   @brief
2162 *       Initialize Equation table.
2163 *
2164 *   @return
2165 *       N/A
2166 ************************************************************************************************************************
2167 */
InitEquationTable()2168 VOID Gfx9Lib::InitEquationTable()
2169 {
2170     memset(m_equationTable, 0, sizeof(m_equationTable));
2171 
2172     // Loop all possible resource type (2D/3D)
2173     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2174     {
2175         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2176 
2177         // Loop all possible swizzle mode
2178         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2179         {
2180             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2181 
2182             // Loop all possible bpp
2183             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2184             {
2185                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2186 
2187                 // Check if the input is supported
2188                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2189                 {
2190                     ADDR_EQUATION     equation;
2191                     ADDR_E_RETURNCODE retCode;
2192 
2193                     memset(&equation, 0, sizeof(ADDR_EQUATION));
2194 
2195                     // Generate the equation
2196                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2197                     {
2198                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2199                     }
2200                     else if (IsThin(rsrcType, swMode))
2201                     {
2202                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2203                     }
2204                     else
2205                     {
2206                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2207                     }
2208 
2209                     // Only fill the equation into the table if the return code is ADDR_OK,
2210                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2211                     // a valid input, we do nothing but just fill invalid equation index
2212                     // into the lookup table.
2213                     if (retCode == ADDR_OK)
2214                     {
2215                         equationIndex = m_numEquations;
2216                         ADDR_ASSERT(equationIndex < EquationTableSize);
2217 
2218                         m_equationTable[equationIndex] = equation;
2219 
2220                         m_numEquations++;
2221                     }
2222                     else
2223                     {
2224                         ADDR_ASSERT_ALWAYS();
2225                     }
2226                 }
2227 
2228                 // Fill the index into the lookup table, if the combination is not supported
2229                 // fill the invalid equation index
2230                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2231             }
2232         }
2233     }
2234 }
2235 
2236 /**
2237 ************************************************************************************************************************
2238 *   Gfx9Lib::HwlGetEquationIndex
2239 *
2240 *   @brief
2241 *       Interface function stub of GetEquationIndex
2242 *
2243 *   @return
2244 *       ADDR_E_RETURNCODE
2245 ************************************************************************************************************************
2246 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2247 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2248     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2249     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2250     ) const
2251 {
2252     AddrResourceType rsrcType         = pIn->resourceType;
2253     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2254     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2255     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2256 
2257     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2258     {
2259         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2260         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2261 
2262         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2263     }
2264 
2265     if (pOut->pMipInfo != NULL)
2266     {
2267         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2268         {
2269             pOut->pMipInfo[i].equationIndex = index;
2270         }
2271     }
2272 
2273     return index;
2274 }
2275 
2276 /**
2277 ************************************************************************************************************************
2278 *   Gfx9Lib::HwlComputeBlock256Equation
2279 *
2280 *   @brief
2281 *       Interface function stub of ComputeBlock256Equation
2282 *
2283 *   @return
2284 *       ADDR_E_RETURNCODE
2285 ************************************************************************************************************************
2286 */
HwlComputeBlock256Equation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2287 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2288     AddrResourceType rsrcType,
2289     AddrSwizzleMode  swMode,
2290     UINT_32          elementBytesLog2,
2291     ADDR_EQUATION*   pEquation) const
2292 {
2293     ADDR_E_RETURNCODE ret = ADDR_OK;
2294 
2295     pEquation->numBits = 8;
2296 
2297     UINT_32 i = 0;
2298     for (; i < elementBytesLog2; i++)
2299     {
2300         InitChannel(1, 0 , i, &pEquation->addr[i]);
2301     }
2302 
2303     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2304 
2305     const UINT_32 maxBitsUsed = 4;
2306     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {0};
2307     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {0};
2308 
2309     for (i = 0; i < maxBitsUsed; i++)
2310     {
2311         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2312         InitChannel(1, 1, i, &y[i]);
2313     }
2314 
2315     if (IsStandardSwizzle(rsrcType, swMode))
2316     {
2317         switch (elementBytesLog2)
2318         {
2319             case 0:
2320                 pixelBit[0] = x[0];
2321                 pixelBit[1] = x[1];
2322                 pixelBit[2] = x[2];
2323                 pixelBit[3] = x[3];
2324                 pixelBit[4] = y[0];
2325                 pixelBit[5] = y[1];
2326                 pixelBit[6] = y[2];
2327                 pixelBit[7] = y[3];
2328                 break;
2329             case 1:
2330                 pixelBit[0] = x[0];
2331                 pixelBit[1] = x[1];
2332                 pixelBit[2] = x[2];
2333                 pixelBit[3] = y[0];
2334                 pixelBit[4] = y[1];
2335                 pixelBit[5] = y[2];
2336                 pixelBit[6] = x[3];
2337                 break;
2338             case 2:
2339                 pixelBit[0] = x[0];
2340                 pixelBit[1] = x[1];
2341                 pixelBit[2] = y[0];
2342                 pixelBit[3] = y[1];
2343                 pixelBit[4] = y[2];
2344                 pixelBit[5] = x[2];
2345                 break;
2346             case 3:
2347                 pixelBit[0] = x[0];
2348                 pixelBit[1] = y[0];
2349                 pixelBit[2] = y[1];
2350                 pixelBit[3] = x[1];
2351                 pixelBit[4] = x[2];
2352                 break;
2353             case 4:
2354                 pixelBit[0] = y[0];
2355                 pixelBit[1] = y[1];
2356                 pixelBit[2] = x[0];
2357                 pixelBit[3] = x[1];
2358                 break;
2359             default:
2360                 ADDR_ASSERT_ALWAYS();
2361                 ret = ADDR_INVALIDPARAMS;
2362                 break;
2363         }
2364     }
2365     else if (IsDisplaySwizzle(rsrcType, swMode))
2366     {
2367         switch (elementBytesLog2)
2368         {
2369             case 0:
2370                 pixelBit[0] = x[0];
2371                 pixelBit[1] = x[1];
2372                 pixelBit[2] = x[2];
2373                 pixelBit[3] = y[1];
2374                 pixelBit[4] = y[0];
2375                 pixelBit[5] = y[2];
2376                 pixelBit[6] = x[3];
2377                 pixelBit[7] = y[3];
2378                 break;
2379             case 1:
2380                 pixelBit[0] = x[0];
2381                 pixelBit[1] = x[1];
2382                 pixelBit[2] = x[2];
2383                 pixelBit[3] = y[0];
2384                 pixelBit[4] = y[1];
2385                 pixelBit[5] = y[2];
2386                 pixelBit[6] = x[3];
2387                 break;
2388             case 2:
2389                 pixelBit[0] = x[0];
2390                 pixelBit[1] = x[1];
2391                 pixelBit[2] = y[0];
2392                 pixelBit[3] = x[2];
2393                 pixelBit[4] = y[1];
2394                 pixelBit[5] = y[2];
2395                 break;
2396             case 3:
2397                 pixelBit[0] = x[0];
2398                 pixelBit[1] = y[0];
2399                 pixelBit[2] = x[1];
2400                 pixelBit[3] = x[2];
2401                 pixelBit[4] = y[1];
2402                 break;
2403             case 4:
2404                 pixelBit[0] = x[0];
2405                 pixelBit[1] = y[0];
2406                 pixelBit[2] = x[1];
2407                 pixelBit[3] = y[1];
2408                 break;
2409             default:
2410                 ADDR_ASSERT_ALWAYS();
2411                 ret = ADDR_INVALIDPARAMS;
2412                 break;
2413         }
2414     }
2415     else if (IsRotateSwizzle(swMode))
2416     {
2417         switch (elementBytesLog2)
2418         {
2419             case 0:
2420                 pixelBit[0] = y[0];
2421                 pixelBit[1] = y[1];
2422                 pixelBit[2] = y[2];
2423                 pixelBit[3] = x[1];
2424                 pixelBit[4] = x[0];
2425                 pixelBit[5] = x[2];
2426                 pixelBit[6] = x[3];
2427                 pixelBit[7] = y[3];
2428                 break;
2429             case 1:
2430                 pixelBit[0] = y[0];
2431                 pixelBit[1] = y[1];
2432                 pixelBit[2] = y[2];
2433                 pixelBit[3] = x[0];
2434                 pixelBit[4] = x[1];
2435                 pixelBit[5] = x[2];
2436                 pixelBit[6] = x[3];
2437                 break;
2438             case 2:
2439                 pixelBit[0] = y[0];
2440                 pixelBit[1] = y[1];
2441                 pixelBit[2] = x[0];
2442                 pixelBit[3] = y[2];
2443                 pixelBit[4] = x[1];
2444                 pixelBit[5] = x[2];
2445                 break;
2446             case 3:
2447                 pixelBit[0] = y[0];
2448                 pixelBit[1] = x[0];
2449                 pixelBit[2] = y[1];
2450                 pixelBit[3] = x[1];
2451                 pixelBit[4] = x[2];
2452                 break;
2453             default:
2454                 ADDR_ASSERT_ALWAYS();
2455             case 4:
2456                 ret = ADDR_INVALIDPARAMS;
2457                 break;
2458         }
2459     }
2460     else
2461     {
2462         ADDR_ASSERT_ALWAYS();
2463         ret = ADDR_INVALIDPARAMS;
2464     }
2465 
2466     // Post validation
2467     if (ret == ADDR_OK)
2468     {
2469         Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2470         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2471                     (microBlockDim.w * (1 << elementBytesLog2)));
2472         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2473     }
2474 
2475     return ret;
2476 }
2477 
2478 /**
2479 ************************************************************************************************************************
2480 *   Gfx9Lib::HwlComputeThinEquation
2481 *
2482 *   @brief
2483 *       Interface function stub of ComputeThinEquation
2484 *
2485 *   @return
2486 *       ADDR_E_RETURNCODE
2487 ************************************************************************************************************************
2488 */
HwlComputeThinEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2489 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2490     AddrResourceType rsrcType,
2491     AddrSwizzleMode  swMode,
2492     UINT_32          elementBytesLog2,
2493     ADDR_EQUATION*   pEquation) const
2494 {
2495     ADDR_E_RETURNCODE ret = ADDR_OK;
2496 
2497     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2498 
2499     UINT_32 maxXorBits = blockSizeLog2;
2500     if (IsNonPrtXor(swMode))
2501     {
2502         // For non-prt-xor, maybe need to initialize some more bits for xor
2503         // The highest xor bit used in equation will be max the following 3 items:
2504         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2505         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2506         // 3. blockSizeLog2
2507 
2508         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2509         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2510                                      GetPipeXorBits(blockSizeLog2) +
2511                                      2 * GetBankXorBits(blockSizeLog2));
2512     }
2513 
2514     const UINT_32 maxBitsUsed = 14;
2515     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2516     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {0};
2517     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {0};
2518 
2519     const UINT_32 extraXorBits = 16;
2520     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2521     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {0};
2522 
2523     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2524     {
2525         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2526         InitChannel(1, 1, i, &y[i]);
2527     }
2528 
2529     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2530 
2531     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2532     {
2533         InitChannel(1, 0 , i, &pixelBit[i]);
2534     }
2535 
2536     UINT_32 xIdx = 0;
2537     UINT_32 yIdx = 0;
2538     UINT_32 lowBits = 0;
2539 
2540     if (IsZOrderSwizzle(swMode))
2541     {
2542         if (elementBytesLog2 <= 3)
2543         {
2544             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2545             {
2546                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2547             }
2548 
2549             lowBits = 6;
2550         }
2551         else
2552         {
2553             ret = ADDR_INVALIDPARAMS;
2554         }
2555     }
2556     else
2557     {
2558         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2559 
2560         if (ret == ADDR_OK)
2561         {
2562             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2563             xIdx = Log2(microBlockDim.w);
2564             yIdx = Log2(microBlockDim.h);
2565             lowBits = 8;
2566         }
2567     }
2568 
2569     if (ret == ADDR_OK)
2570     {
2571         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2572         {
2573             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2574         }
2575 
2576         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2577         {
2578             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2579         }
2580 
2581         if (IsXor(swMode))
2582         {
2583             // Fill XOR bits
2584             UINT_32 pipeStart = m_pipeInterleaveLog2;
2585             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2586 
2587             UINT_32 bankStart = pipeStart + pipeXorBits;
2588             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2589 
2590             for (UINT_32 i = 0; i < pipeXorBits; i++)
2591             {
2592                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2593                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2594                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2595 
2596                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2597             }
2598 
2599             for (UINT_32 i = 0; i < bankXorBits; i++)
2600             {
2601                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2602                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2603                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2604 
2605                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2606             }
2607 
2608             if (IsPrt(swMode) == FALSE)
2609             {
2610                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2611                 {
2612                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2613                 }
2614 
2615                 for (UINT_32 i = 0; i < bankXorBits; i++)
2616                 {
2617                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2618                 }
2619             }
2620         }
2621 
2622         pEquation->numBits = blockSizeLog2;
2623     }
2624 
2625     return ret;
2626 }
2627 
2628 /**
2629 ************************************************************************************************************************
2630 *   Gfx9Lib::HwlComputeThickEquation
2631 *
2632 *   @brief
2633 *       Interface function stub of ComputeThickEquation
2634 *
2635 *   @return
2636 *       ADDR_E_RETURNCODE
2637 ************************************************************************************************************************
2638 */
HwlComputeThickEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2639 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2640     AddrResourceType rsrcType,
2641     AddrSwizzleMode  swMode,
2642     UINT_32          elementBytesLog2,
2643     ADDR_EQUATION*   pEquation) const
2644 {
2645     ADDR_E_RETURNCODE ret = ADDR_OK;
2646 
2647     ADDR_ASSERT(IsTex3d(rsrcType));
2648 
2649     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2650 
2651     UINT_32 maxXorBits = blockSizeLog2;
2652     if (IsNonPrtXor(swMode))
2653     {
2654         // For non-prt-xor, maybe need to initialize some more bits for xor
2655         // The highest xor bit used in equation will be max the following 3:
2656         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2657         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2658         // 3. blockSizeLog2
2659 
2660         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2661         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2662                                      GetPipeXorBits(blockSizeLog2) +
2663                                      3 * GetBankXorBits(blockSizeLog2));
2664     }
2665 
2666     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2667     {
2668         InitChannel(1, 0 , i, &pEquation->addr[i]);
2669     }
2670 
2671     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2672 
2673     const UINT_32 maxBitsUsed = 12;
2674     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2675     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {0};
2676     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {0};
2677     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {0};
2678 
2679     const UINT_32 extraXorBits = 24;
2680     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2681     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {0};
2682 
2683     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2684     {
2685         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2686         InitChannel(1, 1, i, &y[i]);
2687         InitChannel(1, 2, i, &z[i]);
2688     }
2689 
2690     if (IsZOrderSwizzle(swMode))
2691     {
2692         switch (elementBytesLog2)
2693         {
2694             case 0:
2695                 pixelBit[0]  = x[0];
2696                 pixelBit[1]  = y[0];
2697                 pixelBit[2]  = x[1];
2698                 pixelBit[3]  = y[1];
2699                 pixelBit[4]  = z[0];
2700                 pixelBit[5]  = z[1];
2701                 pixelBit[6]  = x[2];
2702                 pixelBit[7]  = z[2];
2703                 pixelBit[8]  = y[2];
2704                 pixelBit[9]  = x[3];
2705                 break;
2706             case 1:
2707                 pixelBit[0]  = x[0];
2708                 pixelBit[1]  = y[0];
2709                 pixelBit[2]  = x[1];
2710                 pixelBit[3]  = y[1];
2711                 pixelBit[4]  = z[0];
2712                 pixelBit[5]  = z[1];
2713                 pixelBit[6]  = z[2];
2714                 pixelBit[7]  = y[2];
2715                 pixelBit[8]  = x[2];
2716                 break;
2717             case 2:
2718                 pixelBit[0]  = x[0];
2719                 pixelBit[1]  = y[0];
2720                 pixelBit[2]  = x[1];
2721                 pixelBit[3]  = z[0];
2722                 pixelBit[4]  = y[1];
2723                 pixelBit[5]  = z[1];
2724                 pixelBit[6]  = y[2];
2725                 pixelBit[7]  = x[2];
2726                 break;
2727             case 3:
2728                 pixelBit[0]  = x[0];
2729                 pixelBit[1]  = y[0];
2730                 pixelBit[2]  = z[0];
2731                 pixelBit[3]  = x[1];
2732                 pixelBit[4]  = z[1];
2733                 pixelBit[5]  = y[1];
2734                 pixelBit[6]  = x[2];
2735                 break;
2736             case 4:
2737                 pixelBit[0]  = x[0];
2738                 pixelBit[1]  = y[0];
2739                 pixelBit[2]  = z[0];
2740                 pixelBit[3]  = z[1];
2741                 pixelBit[4]  = y[1];
2742                 pixelBit[5]  = x[1];
2743                 break;
2744             default:
2745                 ADDR_ASSERT_ALWAYS();
2746                 ret = ADDR_INVALIDPARAMS;
2747                 break;
2748         }
2749     }
2750     else if (IsStandardSwizzle(rsrcType, swMode))
2751     {
2752         switch (elementBytesLog2)
2753         {
2754             case 0:
2755                 pixelBit[0]  = x[0];
2756                 pixelBit[1]  = x[1];
2757                 pixelBit[2]  = x[2];
2758                 pixelBit[3]  = x[3];
2759                 pixelBit[4]  = y[0];
2760                 pixelBit[5]  = y[1];
2761                 pixelBit[6]  = z[0];
2762                 pixelBit[7]  = z[1];
2763                 pixelBit[8]  = z[2];
2764                 pixelBit[9]  = y[2];
2765                 break;
2766             case 1:
2767                 pixelBit[0]  = x[0];
2768                 pixelBit[1]  = x[1];
2769                 pixelBit[2]  = x[2];
2770                 pixelBit[3]  = y[0];
2771                 pixelBit[4]  = y[1];
2772                 pixelBit[5]  = z[0];
2773                 pixelBit[6]  = z[1];
2774                 pixelBit[7]  = z[2];
2775                 pixelBit[8]  = y[2];
2776                 break;
2777             case 2:
2778                 pixelBit[0]  = x[0];
2779                 pixelBit[1]  = x[1];
2780                 pixelBit[2]  = y[0];
2781                 pixelBit[3]  = y[1];
2782                 pixelBit[4]  = z[0];
2783                 pixelBit[5]  = z[1];
2784                 pixelBit[6]  = y[2];
2785                 pixelBit[7]  = x[2];
2786                 break;
2787             case 3:
2788                 pixelBit[0]  = x[0];
2789                 pixelBit[1]  = y[0];
2790                 pixelBit[2]  = y[1];
2791                 pixelBit[3]  = z[0];
2792                 pixelBit[4]  = z[1];
2793                 pixelBit[5]  = x[1];
2794                 pixelBit[6]  = x[2];
2795                 break;
2796             case 4:
2797                 pixelBit[0]  = y[0];
2798                 pixelBit[1]  = y[1];
2799                 pixelBit[2]  = z[0];
2800                 pixelBit[3]  = z[1];
2801                 pixelBit[4]  = x[0];
2802                 pixelBit[5]  = x[1];
2803                 break;
2804             default:
2805                 ADDR_ASSERT_ALWAYS();
2806                 ret = ADDR_INVALIDPARAMS;
2807                 break;
2808         }
2809     }
2810     else
2811     {
2812         ADDR_ASSERT_ALWAYS();
2813         ret = ADDR_INVALIDPARAMS;
2814     }
2815 
2816     if (ret == ADDR_OK)
2817     {
2818         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2819         UINT_32 xIdx = Log2(microBlockDim.w);
2820         UINT_32 yIdx = Log2(microBlockDim.h);
2821         UINT_32 zIdx = Log2(microBlockDim.d);
2822 
2823         pixelBit = pEquation->addr;
2824 
2825         const UINT_32 lowBits = 10;
2826         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2827         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2828 
2829         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2830         {
2831             if ((i % 3) == 0)
2832             {
2833                 pixelBit[i] = x[xIdx++];
2834             }
2835             else if ((i % 3) == 1)
2836             {
2837                 pixelBit[i] = z[zIdx++];
2838             }
2839             else
2840             {
2841                 pixelBit[i] = y[yIdx++];
2842             }
2843         }
2844 
2845         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2846         {
2847             if ((i % 3) == 0)
2848             {
2849                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2850             }
2851             else if ((i % 3) == 1)
2852             {
2853                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2854             }
2855             else
2856             {
2857                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2858             }
2859         }
2860 
2861         if (IsXor(swMode))
2862         {
2863             // Fill XOR bits
2864             UINT_32 pipeStart = m_pipeInterleaveLog2;
2865             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2866             for (UINT_32 i = 0; i < pipeXorBits; i++)
2867             {
2868                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2869                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2870                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2871 
2872                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2873 
2874                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2875                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2876                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2877 
2878                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2879             }
2880 
2881             UINT_32 bankStart = pipeStart + pipeXorBits;
2882             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2883             for (UINT_32 i = 0; i < bankXorBits; i++)
2884             {
2885                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2886                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2887                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2888 
2889                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2890 
2891                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2892                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2893                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2894 
2895                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2896             }
2897         }
2898 
2899         pEquation->numBits = blockSizeLog2;
2900     }
2901 
2902     return ret;
2903 }
2904 
2905 /**
2906 ************************************************************************************************************************
2907 *   Gfx9Lib::IsValidDisplaySwizzleMode
2908 *
2909 *   @brief
2910 *       Check if a swizzle mode is supported by display engine
2911 *
2912 *   @return
2913 *       TRUE is swizzle mode is supported by display engine
2914 ************************************************************************************************************************
2915 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2916 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2917     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2918 {
2919     BOOL_32 support = FALSE;
2920 
2921     const UINT_32 swizzleMask = 1 << pIn->swizzleMode;
2922 
2923     if (m_settings.isDce12)
2924     {
2925         if (pIn->bpp == 32)
2926         {
2927             support = (Dce12Bpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
2928         }
2929         else if (pIn->bpp <= 64)
2930         {
2931             support = (Dce12NonBpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
2932         }
2933     }
2934     else if (m_settings.isDcn1)
2935     {
2936         if (pIn->bpp < 64)
2937         {
2938             support = (Dcn1NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
2939         }
2940         else if (pIn->bpp == 64)
2941         {
2942             support = (Dcn1Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
2943         }
2944     }
2945     else if (m_settings.isDcn2)
2946     {
2947         if (pIn->bpp < 64)
2948         {
2949             support = (Dcn2NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
2950         }
2951         else if (pIn->bpp == 64)
2952         {
2953             support = (Dcn2Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
2954         }
2955     }
2956     else
2957     {
2958         ADDR_NOT_IMPLEMENTED();
2959     }
2960 
2961     return support;
2962 }
2963 
2964 /**
2965 ************************************************************************************************************************
2966 *   Gfx9Lib::HwlComputePipeBankXor
2967 *
2968 *   @brief
2969 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2970 *
2971 *   @return
2972 *       PipeBankXor value
2973 ************************************************************************************************************************
2974 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2975 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
2976     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
2977     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
2978 {
2979     if (IsXor(pIn->swizzleMode))
2980     {
2981         UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2982         UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
2983         UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
2984 
2985         UINT_32 pipeXor = 0;
2986         UINT_32 bankXor = 0;
2987 
2988         const UINT_32 bankMask = (1 << bankBits) - 1;
2989         const UINT_32 index    = pIn->surfIndex & bankMask;
2990 
2991         const UINT_32 bpp      = pIn->flags.fmask ?
2992                                  GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
2993         if (bankBits == 4)
2994         {
2995             static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
2996             static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
2997 
2998             bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
2999         }
3000         else if (bankBits > 0)
3001         {
3002             UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3003             bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3004             bankXor = (index * bankIncrease) & bankMask;
3005         }
3006 
3007         pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3008     }
3009     else
3010     {
3011         pOut->pipeBankXor = 0;
3012     }
3013 
3014     return ADDR_OK;
3015 }
3016 
3017 /**
3018 ************************************************************************************************************************
3019 *   Gfx9Lib::HwlComputeSlicePipeBankXor
3020 *
3021 *   @brief
3022 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3023 *
3024 *   @return
3025 *       PipeBankXor value
3026 ************************************************************************************************************************
3027 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const3028 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3029     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3030     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
3031 {
3032     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3033     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3034     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3035 
3036     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3037     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3038 
3039     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3040 
3041     return ADDR_OK;
3042 }
3043 
3044 /**
3045 ************************************************************************************************************************
3046 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3047 *
3048 *   @brief
3049 *       Compute sub resource offset to support swizzle pattern
3050 *
3051 *   @return
3052 *       Offset
3053 ************************************************************************************************************************
3054 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const3055 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3056     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3057     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
3058 {
3059     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3060 
3061     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3062     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3063     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3064     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3065     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3066     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3067 
3068     pOut->offset = pIn->slice * pIn->sliceSize +
3069                    pIn->macroBlockOffset +
3070                    (pIn->mipTailOffset ^ pipeBankXor) -
3071                    static_cast<UINT_64>(pipeBankXor);
3072     return ADDR_OK;
3073 }
3074 
3075 /**
3076 ************************************************************************************************************************
3077 *   Gfx9Lib::ValidateNonSwModeParams
3078 *
3079 *   @brief
3080 *       Validate compute surface info params except swizzle mode
3081 *
3082 *   @return
3083 *       TRUE if parameters are valid, FALSE otherwise
3084 ************************************************************************************************************************
3085 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3086 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3087     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3088 {
3089     BOOL_32 valid = TRUE;
3090 
3091     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3092     {
3093         ADDR_ASSERT_ALWAYS();
3094         valid = FALSE;
3095     }
3096 
3097     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3098     {
3099         ADDR_ASSERT_ALWAYS();
3100         valid = FALSE;
3101     }
3102 
3103     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3104     const BOOL_32 msaa   = (pIn->numFrags > 1);
3105     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3106 
3107     const AddrResourceType rsrcType = pIn->resourceType;
3108     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3109     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3110     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3111 
3112     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3113     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3114     const BOOL_32             display = flags.display || flags.rotated;
3115     const BOOL_32             stereo  = flags.qbStereo;
3116     const BOOL_32             fmask   = flags.fmask;
3117 
3118     // Resource type check
3119     if (tex1d)
3120     {
3121         if (msaa || zbuffer || display || stereo || isBc || fmask)
3122         {
3123             ADDR_ASSERT_ALWAYS();
3124             valid = FALSE;
3125         }
3126     }
3127     else if (tex2d)
3128     {
3129         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3130         {
3131             ADDR_ASSERT_ALWAYS();
3132             valid = FALSE;
3133         }
3134     }
3135     else if (tex3d)
3136     {
3137         if (msaa || zbuffer || display || stereo || fmask)
3138         {
3139             ADDR_ASSERT_ALWAYS();
3140             valid = FALSE;
3141         }
3142     }
3143     else
3144     {
3145         ADDR_ASSERT_ALWAYS();
3146         valid = FALSE;
3147     }
3148 
3149     return valid;
3150 }
3151 
3152 /**
3153 ************************************************************************************************************************
3154 *   Gfx9Lib::ValidateSwModeParams
3155 *
3156 *   @brief
3157 *       Validate compute surface info related to swizzle mode
3158 *
3159 *   @return
3160 *       TRUE if parameters are valid, FALSE otherwise
3161 ************************************************************************************************************************
3162 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3163 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3164     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3165 {
3166     BOOL_32 valid = TRUE;
3167 
3168     if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3169     {
3170         ADDR_ASSERT_ALWAYS();
3171         valid = FALSE;
3172     }
3173 
3174     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3175     const BOOL_32 msaa   = (pIn->numFrags > 1);
3176     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3177     const BOOL_32 is422  = ElemLib::IsMacroPixelPacked(pIn->format);
3178 
3179     const AddrResourceType rsrcType = pIn->resourceType;
3180     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3181     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3182     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3183 
3184     const AddrSwizzleMode  swizzle     = pIn->swizzleMode;
3185     const BOOL_32          linear      = IsLinear(swizzle);
3186     const BOOL_32          blk256B     = IsBlock256b(swizzle);
3187     const BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
3188 
3189     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3190     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3191     const BOOL_32             color   = flags.color;
3192     const BOOL_32             texture = flags.texture;
3193     const BOOL_32             display = flags.display || flags.rotated;
3194     const BOOL_32             prt     = flags.prt;
3195     const BOOL_32             fmask   = flags.fmask;
3196 
3197     const BOOL_32             thin3d  = tex3d && flags.view3dAs2dArray;
3198     const BOOL_32             zMaxMip = tex3d && mipmap &&
3199                                         (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3200 
3201     // Misc check
3202     if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3203     {
3204         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3205         ADDR_ASSERT_ALWAYS();
3206         valid = FALSE;
3207     }
3208 
3209     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3210     {
3211         ADDR_ASSERT_ALWAYS();
3212         valid = FALSE;
3213     }
3214 
3215     if ((pIn->bpp == 96) && (linear == FALSE))
3216     {
3217         ADDR_ASSERT_ALWAYS();
3218         valid = FALSE;
3219     }
3220 
3221     if (prt && isNonPrtXor)
3222     {
3223         ADDR_ASSERT_ALWAYS();
3224         valid = FALSE;
3225     }
3226 
3227     // Resource type check
3228     if (tex1d)
3229     {
3230         if (linear == FALSE)
3231         {
3232             ADDR_ASSERT_ALWAYS();
3233             valid = FALSE;
3234         }
3235     }
3236 
3237     // Swizzle type check
3238     if (linear)
3239     {
3240         if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3241             ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3242         {
3243             ADDR_ASSERT_ALWAYS();
3244             valid = FALSE;
3245         }
3246     }
3247     else if (IsZOrderSwizzle(swizzle))
3248     {
3249         if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3250         {
3251             ADDR_ASSERT_ALWAYS();
3252             valid = FALSE;
3253         }
3254     }
3255     else if (IsStandardSwizzle(swizzle))
3256     {
3257         if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3258         {
3259             ADDR_ASSERT_ALWAYS();
3260             valid = FALSE;
3261         }
3262     }
3263     else if (IsDisplaySwizzle(swizzle))
3264     {
3265         if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3266         {
3267             ADDR_ASSERT_ALWAYS();
3268             valid = FALSE;
3269         }
3270     }
3271     else if (IsRotateSwizzle(swizzle))
3272     {
3273         if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3274         {
3275             ADDR_ASSERT_ALWAYS();
3276             valid = FALSE;
3277         }
3278     }
3279     else
3280     {
3281         ADDR_ASSERT_ALWAYS();
3282         valid = FALSE;
3283     }
3284 
3285     // Block type check
3286     if (blk256B)
3287     {
3288         if (prt || zbuffer || tex3d || mipmap || msaa)
3289         {
3290             ADDR_ASSERT_ALWAYS();
3291             valid = FALSE;
3292         }
3293     }
3294 
3295     return valid;
3296 }
3297 
3298 /**
3299 ************************************************************************************************************************
3300 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3301 *
3302 *   @brief
3303 *       Compute surface info sanity check
3304 *
3305 *   @return
3306 *       ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3307 ************************************************************************************************************************
3308 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3309 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3310     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3311 {
3312     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3313 }
3314 
3315 /**
3316 ************************************************************************************************************************
3317 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3318 *
3319 *   @brief
3320 *       Internal function to get suggested surface information for cliet to use
3321 *
3322 *   @return
3323 *       ADDR_E_RETURNCODE
3324 ************************************************************************************************************************
3325 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3326 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3327     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3328     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3329 {
3330     ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3331     ElemLib*          pElemLib   = GetElemLib();
3332 
3333     UINT_32 bpp        = pIn->bpp;
3334     UINT_32 width      = Max(pIn->width, 1u);
3335     UINT_32 height     = Max(pIn->height, 1u);
3336     UINT_32 numSamples = Max(pIn->numSamples, 1u);
3337     UINT_32 numFrags   = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3338 
3339     if (pIn->flags.fmask)
3340     {
3341         bpp                = GetFmaskBpp(numSamples, numFrags);
3342         numFrags           = 1;
3343         numSamples         = 1;
3344         pOut->resourceType = ADDR_RSRC_TEX_2D;
3345     }
3346     else
3347     {
3348         // Set format to INVALID will skip this conversion
3349         if (pIn->format != ADDR_FMT_INVALID)
3350         {
3351             UINT_32 expandX, expandY;
3352 
3353             // Don't care for this case
3354             ElemMode elemMode = ADDR_UNCOMPRESSED;
3355 
3356             // Get compression/expansion factors and element mode which indicates compression/expansion
3357             bpp = pElemLib->GetBitsPerPixel(pIn->format,
3358                                             &elemMode,
3359                                             &expandX,
3360                                             &expandY);
3361 
3362             UINT_32 basePitch = 0;
3363             GetElemLib()->AdjustSurfaceInfo(elemMode,
3364                                             expandX,
3365                                             expandY,
3366                                             &bpp,
3367                                             &basePitch,
3368                                             &width,
3369                                             &height);
3370         }
3371 
3372         // The output may get changed for volume(3D) texture resource in future
3373         pOut->resourceType = pIn->resourceType;
3374     }
3375 
3376     const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
3377     const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3378     const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
3379     const BOOL_32 displayRsrc  = pIn->flags.display || pIn->flags.rotated;
3380 
3381     // Pre sanity check on non swizzle mode parameters
3382     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3383     localIn.flags        = pIn->flags;
3384     localIn.resourceType = pOut->resourceType;
3385     localIn.format       = pIn->format;
3386     localIn.bpp          = bpp;
3387     localIn.width        = width;
3388     localIn.height       = height;
3389     localIn.numSlices    = numSlices;
3390     localIn.numMipLevels = numMipLevels;
3391     localIn.numSamples   = numSamples;
3392     localIn.numFrags     = numFrags;
3393 
3394     if (ValidateNonSwModeParams(&localIn))
3395     {
3396         // Forbid swizzle mode(s) by client setting
3397         ADDR2_SWMODE_SET allowedSwModeSet = {0};
3398         allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3399         allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx9Blk256BSwModeMask;
3400         allowedSwModeSet.value |=
3401             pIn->forbiddenBlock.macroThin4KB ? 0 :
3402             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3403         allowedSwModeSet.value |=
3404             pIn->forbiddenBlock.macroThick4KB ? 0 :
3405             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3406         allowedSwModeSet.value |=
3407             pIn->forbiddenBlock.macroThin64KB ? 0 :
3408             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3409         allowedSwModeSet.value |=
3410             pIn->forbiddenBlock.macroThick64KB ? 0 :
3411             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3412 
3413         if (pIn->preferredSwSet.value != 0)
3414         {
3415             allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3416             allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3417             allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3418             allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3419         }
3420 
3421         if (pIn->noXor)
3422         {
3423             allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3424         }
3425 
3426         if (pIn->maxAlign > 0)
3427         {
3428             if (pIn->maxAlign < Size64K)
3429             {
3430                 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3431             }
3432 
3433             if (pIn->maxAlign < Size4K)
3434             {
3435                 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3436             }
3437 
3438             if (pIn->maxAlign < Size256)
3439             {
3440                 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3441             }
3442         }
3443 
3444         // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3445         switch (pOut->resourceType)
3446         {
3447             case ADDR_RSRC_TEX_1D:
3448                 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3449                 break;
3450 
3451             case ADDR_RSRC_TEX_2D:
3452                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3453 
3454                 if (bpp > 64)
3455                 {
3456                     allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3457                 }
3458                 break;
3459 
3460             case ADDR_RSRC_TEX_3D:
3461                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3462 
3463                 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3464                 {
3465                     // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3466                     // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3467                     // or SW_*_Z modes if mipmapping is desired on a 3D surface
3468                     allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3469                 }
3470 
3471                 if ((bpp == 128) && pIn->flags.color)
3472                 {
3473                     allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3474                 }
3475 
3476                 if (pIn->flags.view3dAs2dArray)
3477                 {
3478                     allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3479                 }
3480                 break;
3481 
3482             default:
3483                 ADDR_ASSERT_ALWAYS();
3484                 allowedSwModeSet.value = 0;
3485                 break;
3486         }
3487 
3488         if (pIn->format == ADDR_FMT_32_32_32)
3489         {
3490             allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3491         }
3492 
3493         if (ElemLib::IsBlockCompressed(pIn->format))
3494         {
3495             if (pIn->flags.texture)
3496             {
3497                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3498             }
3499             else
3500             {
3501                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3502             }
3503         }
3504 
3505         if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3506             (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3507         {
3508             allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3509         }
3510 
3511         if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3512         {
3513             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3514 
3515             if (pIn->flags.noMetadata == FALSE)
3516             {
3517                 if (pIn->flags.depth &&
3518                     pIn->flags.texture &&
3519                     (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3520                 {
3521                     // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3522                     // equation from wrong address within memory range a tile covered and use the
3523                     // garbage data for compressed Z reading which finally leads to corruption.
3524                     allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3525                 }
3526 
3527                 if (m_settings.htileCacheRbConflict &&
3528                     (pIn->flags.depth || pIn->flags.stencil) &&
3529                     (numSlices > 1) &&
3530                     (pIn->flags.metaRbUnaligned == FALSE) &&
3531                     (pIn->flags.metaPipeUnaligned == FALSE))
3532                 {
3533                     // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3534                     allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3535                 }
3536             }
3537         }
3538 
3539         if (msaa)
3540         {
3541             allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3542         }
3543 
3544         if ((numFrags > 1) &&
3545             (Size4K < (m_pipeInterleaveBytes * numFrags)))
3546         {
3547             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3548             allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3549         }
3550 
3551         if (numMipLevels > 1)
3552         {
3553             allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3554         }
3555 
3556         if (displayRsrc)
3557         {
3558             if (m_settings.isDce12)
3559             {
3560                 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3561             }
3562             else if (m_settings.isDcn1)
3563             {
3564                 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3565             }
3566             else if (m_settings.isDcn2)
3567             {
3568                 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
3569             }
3570             else
3571             {
3572                 ADDR_NOT_IMPLEMENTED();
3573             }
3574         }
3575 
3576         if (allowedSwModeSet.value != 0)
3577         {
3578 #if DEBUG
3579             // Post sanity check, at least AddrLib should accept the output generated by its own
3580             UINT_32 validateSwModeSet = allowedSwModeSet.value;
3581 
3582             for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3583             {
3584                 if (validateSwModeSet & 1)
3585                 {
3586                     localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3587                     ADDR_ASSERT(ValidateSwModeParams(&localIn));
3588                 }
3589 
3590                 validateSwModeSet >>= 1;
3591             }
3592 #endif
3593 
3594             pOut->validSwModeSet = allowedSwModeSet;
3595             pOut->canXor         = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3596             pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3597             pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3598 
3599             pOut->clientPreferredSwSet = pIn->preferredSwSet;
3600 
3601             if (pOut->clientPreferredSwSet.value == 0)
3602             {
3603                 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3604             }
3605 
3606             // Apply optional restrictions
3607             if (pIn->flags.needEquation)
3608             {
3609                 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3610             }
3611 
3612             if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3613             {
3614                 pOut->swizzleMode = ADDR_SW_LINEAR;
3615             }
3616             else
3617             {
3618                 // Always ignore linear swizzle mode if there is other choice.
3619                 allowedSwModeSet.swLinear = 0;
3620 
3621                 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3622 
3623                 // Determine block size if there are 2 or more block type candidates
3624                 if (IsPow2(allowedBlockSet.value) == FALSE)
3625                 {
3626                     AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_LINEAR};
3627 
3628                     swMode[AddrBlockMicro]    = ADDR_SW_256B_D;
3629                     swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_D;
3630                     swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3631 
3632                     if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3633                     {
3634                         swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
3635                         swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3636                     }
3637 
3638                     Dim3d   blkDim[AddrBlockMaxTiledType]  = {0};
3639                     Dim3d   padDim[AddrBlockMaxTiledType]  = {0};
3640                     UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3641 
3642                     const UINT_32 ratioLow           = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3643                     const UINT_32 ratioHi            = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3644                     const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3645                     UINT_32       minSizeBlk         = AddrBlockMicro;
3646                     UINT_64       minSize            = 0;
3647 
3648                     for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3649                     {
3650                         if (allowedBlockSet.value & (1 << i))
3651                         {
3652                             ComputeBlockDimensionForSurf(&blkDim[i].w,
3653                                                          &blkDim[i].h,
3654                                                          &blkDim[i].d,
3655                                                          bpp,
3656                                                          numFrags,
3657                                                          pOut->resourceType,
3658                                                          swMode[i]);
3659 
3660                             if (displayRsrc)
3661                             {
3662                                 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3663                             }
3664 
3665                             padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3666                             padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
3667 
3668                             if ((minSize == 0) ||
3669                                 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3670                             {
3671                                 minSize    = padSize[i];
3672                                 minSizeBlk = i;
3673                             }
3674                         }
3675                     }
3676 
3677                     if ((allowedBlockSet.micro == TRUE)      &&
3678                         (width  <= blkDim[AddrBlockMicro].w) &&
3679                         (height <= blkDim[AddrBlockMicro].h) &&
3680                         (NextPow2(pIn->minSizeAlign) <= Size256))
3681                     {
3682                         minSizeBlk = AddrBlockMicro;
3683                     }
3684 
3685                     if (minSizeBlk == AddrBlockMicro)
3686                     {
3687                         ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3688                         allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3689                     }
3690                     else if (minSizeBlk == AddrBlockThick4KB)
3691                     {
3692                         ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3693                         allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3694                     }
3695                     else if (minSizeBlk == AddrBlockThin4KB)
3696                     {
3697                         allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3698                                                   Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3699                     }
3700                     else if (minSizeBlk == AddrBlockThick64KB)
3701                     {
3702                         ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3703                         allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3704                     }
3705                     else
3706                     {
3707                         ADDR_ASSERT(minSizeBlk == AddrBlockThin64KB);
3708                         allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3709                                                   Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3710                     }
3711                 }
3712 
3713                 // Block type should be determined.
3714                 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3715 
3716                 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3717 
3718                 // Determine swizzle type if there are 2 or more swizzle type candidates
3719                 if (IsPow2(allowedSwSet.value) == FALSE)
3720                 {
3721                     if (ElemLib::IsBlockCompressed(pIn->format))
3722                     {
3723                         if (allowedSwSet.sw_D)
3724                         {
3725                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3726                         }
3727                         else
3728                         {
3729                             ADDR_ASSERT(allowedSwSet.sw_S);
3730                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3731                         }
3732                     }
3733                     else if (ElemLib::IsMacroPixelPacked(pIn->format))
3734                     {
3735                         if (allowedSwSet.sw_S)
3736                         {
3737                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3738                         }
3739                         else if (allowedSwSet.sw_D)
3740                         {
3741                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3742                         }
3743                         else
3744                         {
3745                             ADDR_ASSERT(allowedSwSet.sw_R);
3746                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3747                         }
3748                     }
3749                     else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3750                     {
3751                         if (pIn->flags.color && allowedSwSet.sw_D)
3752                         {
3753                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3754                         }
3755                         else if (allowedSwSet.sw_Z)
3756                         {
3757                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3758                         }
3759                         else
3760                         {
3761                             ADDR_ASSERT(allowedSwSet.sw_S);
3762                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3763                         }
3764                     }
3765                     else
3766                     {
3767                         if (pIn->flags.rotated && allowedSwSet.sw_R)
3768                         {
3769                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3770                         }
3771                         else if (allowedSwSet.sw_D)
3772                         {
3773                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3774                         }
3775                         else if (allowedSwSet.sw_S)
3776                         {
3777                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3778                         }
3779                         else
3780                         {
3781                             ADDR_ASSERT(allowedSwSet.sw_Z);
3782                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3783                         }
3784                     }
3785                 }
3786 
3787                 // Swizzle type should be determined.
3788                 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3789 
3790                 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3791                 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3792                 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3793                 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3794             }
3795 
3796             returnCode = ADDR_OK;
3797         }
3798         else
3799         {
3800             // Invalid combination...
3801             ADDR_ASSERT_ALWAYS();
3802         }
3803     }
3804     else
3805     {
3806         // Invalid combination...
3807         ADDR_ASSERT_ALWAYS();
3808     }
3809 
3810     return returnCode;
3811 }
3812 
3813 /**
3814 ************************************************************************************************************************
3815 *   Gfx9Lib::ComputeStereoInfo
3816 *
3817 *   @brief
3818 *       Compute height alignment and right eye pipeBankXor for stereo surface
3819 *
3820 *   @return
3821 *       Error code
3822 *
3823 ************************************************************************************************************************
3824 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 * pHeightAlign) const3825 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3826     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3827     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
3828     UINT_32*                                pHeightAlign
3829     ) const
3830 {
3831     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3832 
3833     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3834 
3835     if (eqIndex < m_numEquations)
3836     {
3837         if (IsXor(pIn->swizzleMode))
3838         {
3839             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
3840             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
3841             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
3842             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
3843             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3844             const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];
3845 
3846             ADDR_ASSERT(maxYCoordBlock256 ==
3847                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
3848 
3849             const UINT_32 maxYCoordInBaseEquation =
3850                 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
3851 
3852             ADDR_ASSERT(maxYCoordInBaseEquation ==
3853                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3854 
3855             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3856 
3857             ADDR_ASSERT(maxYCoordInPipeXor ==
3858                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3859 
3860             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3861                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3862 
3863             ADDR_ASSERT(maxYCoordInBankXor ==
3864                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3865 
3866             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3867 
3868             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3869             {
3870                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3871 
3872                 if (pOut->pStereoInfo != NULL)
3873                 {
3874                     pOut->pStereoInfo->rightSwizzle = 0;
3875 
3876                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3877                     {
3878                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3879                         {
3880                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3881                         }
3882 
3883                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3884                         {
3885                             pOut->pStereoInfo->rightSwizzle |=
3886                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3887                         }
3888 
3889                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3890                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3891                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3892                     }
3893                 }
3894             }
3895         }
3896     }
3897     else
3898     {
3899         ADDR_ASSERT_ALWAYS();
3900         returnCode = ADDR_ERROR;
3901     }
3902 
3903     return returnCode;
3904 }
3905 
3906 /**
3907 ************************************************************************************************************************
3908 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
3909 *
3910 *   @brief
3911 *       Internal function to calculate alignment for tiled surface
3912 *
3913 *   @return
3914 *       ADDR_E_RETURNCODE
3915 ************************************************************************************************************************
3916 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3917 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3918      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3919      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3920      ) const
3921 {
3922     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3923                                                                 &pOut->blockHeight,
3924                                                                 &pOut->blockSlices,
3925                                                                 pIn->bpp,
3926                                                                 pIn->numFrags,
3927                                                                 pIn->resourceType,
3928                                                                 pIn->swizzleMode);
3929 
3930     if (returnCode == ADDR_OK)
3931     {
3932         UINT_32 pitchAlignInElement = pOut->blockWidth;
3933 
3934         if ((IsTex2d(pIn->resourceType) == TRUE) &&
3935             (pIn->flags.display || pIn->flags.rotated) &&
3936             (pIn->numMipLevels <= 1) &&
3937             (pIn->numSamples <= 1) &&
3938             (pIn->numFrags <= 1))
3939         {
3940             // Display engine needs pitch align to be at least 32 pixels.
3941             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3942         }
3943 
3944         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3945 
3946         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3947         {
3948             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3949             {
3950                 returnCode = ADDR_INVALIDPARAMS;
3951             }
3952             else if (pIn->pitchInElement < pOut->pitch)
3953             {
3954                 returnCode = ADDR_INVALIDPARAMS;
3955             }
3956             else
3957             {
3958                 pOut->pitch = pIn->pitchInElement;
3959             }
3960         }
3961 
3962         UINT_32 heightAlign = 0;
3963 
3964         if (pIn->flags.qbStereo)
3965         {
3966             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3967         }
3968 
3969         if (returnCode == ADDR_OK)
3970         {
3971             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3972 
3973             if (heightAlign > 1)
3974             {
3975                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3976             }
3977 
3978             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3979 
3980             pOut->epitchIsHeight   = FALSE;
3981             pOut->mipChainInTail   = FALSE;
3982             pOut->firstMipIdInTail = pIn->numMipLevels;
3983 
3984             pOut->mipChainPitch    = pOut->pitch;
3985             pOut->mipChainHeight   = pOut->height;
3986             pOut->mipChainSlice    = pOut->numSlices;
3987 
3988             if (pIn->numMipLevels > 1)
3989             {
3990                 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
3991                                                          pIn->swizzleMode,
3992                                                          pIn->bpp,
3993                                                          pIn->width,
3994                                                          pIn->height,
3995                                                          pIn->numSlices,
3996                                                          pOut->blockWidth,
3997                                                          pOut->blockHeight,
3998                                                          pOut->blockSlices,
3999                                                          pIn->numMipLevels,
4000                                                          pOut->pMipInfo);
4001 
4002                 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4003 
4004                 if (endingMipId == 0)
4005                 {
4006                     const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4007                                                            pIn->swizzleMode,
4008                                                            pOut->blockWidth,
4009                                                            pOut->blockHeight,
4010                                                            pOut->blockSlices);
4011 
4012                     pOut->epitchIsHeight = TRUE;
4013                     pOut->pitch          = tailMaxDim.w;
4014                     pOut->height         = tailMaxDim.h;
4015                     pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4016                                            tailMaxDim.d : pIn->numSlices;
4017                     pOut->mipChainInTail = TRUE;
4018                 }
4019                 else
4020                 {
4021                     UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
4022                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4023 
4024                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4025                                                            pIn->swizzleMode,
4026                                                            mip0WidthInBlk,
4027                                                            mip0HeightInBlk,
4028                                                            pOut->numSlices / pOut->blockSlices);
4029                     if (majorMode == ADDR_MAJOR_Y)
4030                     {
4031                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4032 
4033                         if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4034                         {
4035                             mip1WidthInBlk++;
4036                         }
4037 
4038                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4039 
4040                         pOut->epitchIsHeight = FALSE;
4041                     }
4042                     else
4043                     {
4044                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4045 
4046                         if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4047                         {
4048                             mip1HeightInBlk++;
4049                         }
4050 
4051                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4052 
4053                         pOut->epitchIsHeight = TRUE;
4054                     }
4055                 }
4056 
4057                 if (pOut->pMipInfo != NULL)
4058                 {
4059                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4060 
4061                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4062                     {
4063                         Dim3d   mipStartPos          = {0};
4064                         UINT_32 mipTailOffsetInBytes = 0;
4065 
4066                         mipStartPos = GetMipStartPos(pIn->resourceType,
4067                                                      pIn->swizzleMode,
4068                                                      pOut->pitch,
4069                                                      pOut->height,
4070                                                      pOut->numSlices,
4071                                                      pOut->blockWidth,
4072                                                      pOut->blockHeight,
4073                                                      pOut->blockSlices,
4074                                                      i,
4075                                                      elementBytesLog2,
4076                                                      &mipTailOffsetInBytes);
4077 
4078                         UINT_32 pitchInBlock     =
4079                             pOut->mipChainPitch / pOut->blockWidth;
4080                         UINT_32 sliceInBlock     =
4081                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4082                         UINT_64 blockIndex       =
4083                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4084                         UINT_64 macroBlockOffset =
4085                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4086 
4087                         pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4088                         pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
4089                     }
4090                 }
4091             }
4092             else if (pOut->pMipInfo != NULL)
4093             {
4094                 pOut->pMipInfo[0].pitch  = pOut->pitch;
4095                 pOut->pMipInfo[0].height = pOut->height;
4096                 pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4097                 pOut->pMipInfo[0].offset = 0;
4098             }
4099 
4100             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4101                               (pIn->bpp >> 3) * pIn->numFrags;
4102             pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
4103             pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4104 
4105             if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4106                 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4107                 (pIn->flags.texture == TRUE) &&
4108                 (pIn->flags.noMetadata == FALSE) &&
4109                 (pIn->flags.metaPipeUnaligned == FALSE))
4110             {
4111                 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4112                 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4113                 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4114                 // them, which may cause invalid metadata to be fetched.
4115                 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4116             }
4117 
4118             if (pIn->flags.prt)
4119             {
4120                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4121             }
4122         }
4123     }
4124 
4125     return returnCode;
4126 }
4127 
4128 /**
4129 ************************************************************************************************************************
4130 *   Gfx9Lib::HwlComputeSurfaceInfoLinear
4131 *
4132 *   @brief
4133 *       Internal function to calculate alignment for linear surface
4134 *
4135 *   @return
4136 *       ADDR_E_RETURNCODE
4137 ************************************************************************************************************************
4138 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4139 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4140      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4141      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4142      ) const
4143 {
4144     ADDR_E_RETURNCODE returnCode   = ADDR_OK;
4145     UINT_32           pitch        = 0;
4146     UINT_32           actualHeight = 0;
4147     UINT_32           elementBytes = pIn->bpp >> 3;
4148     const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
4149 
4150     if (IsTex1d(pIn->resourceType))
4151     {
4152         if (pIn->height > 1)
4153         {
4154             returnCode = ADDR_INVALIDPARAMS;
4155         }
4156         else
4157         {
4158             const UINT_32 pitchAlignInElement = alignment / elementBytes;
4159 
4160             pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
4161             actualHeight = pIn->numMipLevels;
4162 
4163             if (pIn->flags.prt == FALSE)
4164             {
4165                 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4166                                                         &pitch, &actualHeight);
4167             }
4168 
4169             if (returnCode == ADDR_OK)
4170             {
4171                 if (pOut->pMipInfo != NULL)
4172                 {
4173                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4174                     {
4175                         pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4176                         pOut->pMipInfo[i].pitch  = pitch;
4177                         pOut->pMipInfo[i].height = 1;
4178                         pOut->pMipInfo[i].depth  = 1;
4179                     }
4180                 }
4181             }
4182         }
4183     }
4184     else
4185     {
4186         returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4187     }
4188 
4189     if ((pitch == 0) || (actualHeight == 0))
4190     {
4191         returnCode = ADDR_INVALIDPARAMS;
4192     }
4193 
4194     if (returnCode == ADDR_OK)
4195     {
4196         pOut->pitch          = pitch;
4197         pOut->height         = pIn->height;
4198         pOut->numSlices      = pIn->numSlices;
4199         pOut->mipChainPitch  = pitch;
4200         pOut->mipChainHeight = actualHeight;
4201         pOut->mipChainSlice  = pOut->numSlices;
4202         pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4203         pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4204         pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
4205         pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4206         pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4207         pOut->blockHeight    = 1;
4208         pOut->blockSlices    = 1;
4209     }
4210 
4211     // Post calculation validate
4212     ADDR_ASSERT(pOut->sliceSize > 0);
4213 
4214     return returnCode;
4215 }
4216 
4217 /**
4218 ************************************************************************************************************************
4219 *   Gfx9Lib::GetMipChainInfo
4220 *
4221 *   @brief
4222 *       Internal function to get out information about mip chain
4223 *
4224 *   @return
4225 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4226 ************************************************************************************************************************
4227 */
GetMipChainInfo(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 bpp,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 numMipLevel,ADDR2_MIP_INFO * pMipInfo) const4228 UINT_32 Gfx9Lib::GetMipChainInfo(
4229     AddrResourceType  resourceType,
4230     AddrSwizzleMode   swizzleMode,
4231     UINT_32           bpp,
4232     UINT_32           mip0Width,
4233     UINT_32           mip0Height,
4234     UINT_32           mip0Depth,
4235     UINT_32           blockWidth,
4236     UINT_32           blockHeight,
4237     UINT_32           blockDepth,
4238     UINT_32           numMipLevel,
4239     ADDR2_MIP_INFO*   pMipInfo) const
4240 {
4241     const Dim3d tailMaxDim =
4242         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4243 
4244     UINT_32 mipPitch         = mip0Width;
4245     UINT_32 mipHeight        = mip0Height;
4246     UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
4247     UINT_32 offset           = 0;
4248     UINT_32 firstMipIdInTail = numMipLevel;
4249     BOOL_32 inTail           = FALSE;
4250     BOOL_32 finalDim         = FALSE;
4251     BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
4252     BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
4253 
4254     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4255     {
4256         if (inTail)
4257         {
4258             if (finalDim == FALSE)
4259             {
4260                 UINT_32 mipSize;
4261 
4262                 if (is3dThick)
4263                 {
4264                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4265                 }
4266                 else
4267                 {
4268                     mipSize = mipPitch * mipHeight * (bpp >> 3);
4269                 }
4270 
4271                 if (mipSize <= 256)
4272                 {
4273                     UINT_32 index = Log2(bpp >> 3);
4274 
4275                     if (is3dThick)
4276                     {
4277                         mipPitch  = Block256_3dZ[index].w;
4278                         mipHeight = Block256_3dZ[index].h;
4279                         mipDepth  = Block256_3dZ[index].d;
4280                     }
4281                     else
4282                     {
4283                         mipPitch  = Block256_2d[index].w;
4284                         mipHeight = Block256_2d[index].h;
4285                     }
4286 
4287                     finalDim = TRUE;
4288                 }
4289             }
4290         }
4291         else
4292         {
4293             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4294                                  mipPitch, mipHeight, mipDepth);
4295 
4296             if (inTail)
4297             {
4298                 firstMipIdInTail = mipId;
4299                 mipPitch         = tailMaxDim.w;
4300                 mipHeight        = tailMaxDim.h;
4301 
4302                 if (is3dThick)
4303                 {
4304                     mipDepth = tailMaxDim.d;
4305                 }
4306             }
4307             else
4308             {
4309                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
4310                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4311 
4312                 if (is3dThick)
4313                 {
4314                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
4315                 }
4316             }
4317         }
4318 
4319         if (pMipInfo != NULL)
4320         {
4321             pMipInfo[mipId].pitch  = mipPitch;
4322             pMipInfo[mipId].height = mipHeight;
4323             pMipInfo[mipId].depth  = mipDepth;
4324             pMipInfo[mipId].offset = offset;
4325         }
4326 
4327         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4328 
4329         if (finalDim)
4330         {
4331             if (is3dThin)
4332             {
4333                 mipDepth = Max(mipDepth >> 1, 1u);
4334             }
4335         }
4336         else
4337         {
4338             mipPitch  = Max(mipPitch >> 1, 1u);
4339             mipHeight = Max(mipHeight >> 1, 1u);
4340 
4341             if (is3dThick || is3dThin)
4342             {
4343                 mipDepth = Max(mipDepth >> 1, 1u);
4344             }
4345         }
4346     }
4347 
4348     return firstMipIdInTail;
4349 }
4350 
4351 /**
4352 ************************************************************************************************************************
4353 *   Gfx9Lib::GetMetaMiptailInfo
4354 *
4355 *   @brief
4356 *       Get mip tail coordinate information.
4357 *
4358 *   @return
4359 *       N/A
4360 ************************************************************************************************************************
4361 */
GetMetaMiptailInfo(ADDR2_META_MIP_INFO * pInfo,Dim3d mipCoord,UINT_32 numMipInTail,Dim3d * pMetaBlkDim) const4362 VOID Gfx9Lib::GetMetaMiptailInfo(
4363     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
4364     Dim3d                   mipCoord,       ///< [in] mip tail base coord
4365     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
4366     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
4367     ) const
4368 {
4369     BOOL_32 isThick   = (pMetaBlkDim->d > 1);
4370     UINT_32 mipWidth  = pMetaBlkDim->w;
4371     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4372     UINT_32 mipDepth  = pMetaBlkDim->d;
4373     UINT_32 minInc;
4374 
4375     if (isThick)
4376     {
4377         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4378     }
4379     else if (pMetaBlkDim->h >= 1024)
4380     {
4381         minInc = 256;
4382     }
4383     else if (pMetaBlkDim->h == 512)
4384     {
4385         minInc = 128;
4386     }
4387     else
4388     {
4389         minInc = 64;
4390     }
4391 
4392     UINT_32 blk32MipId = 0xFFFFFFFF;
4393 
4394     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4395     {
4396         pInfo[mip].inMiptail = TRUE;
4397         pInfo[mip].startX = mipCoord.w;
4398         pInfo[mip].startY = mipCoord.h;
4399         pInfo[mip].startZ = mipCoord.d;
4400         pInfo[mip].width = mipWidth;
4401         pInfo[mip].height = mipHeight;
4402         pInfo[mip].depth = mipDepth;
4403 
4404         if (mipWidth <= 32)
4405         {
4406             if (blk32MipId == 0xFFFFFFFF)
4407             {
4408                 blk32MipId = mip;
4409             }
4410 
4411             mipCoord.w = pInfo[blk32MipId].startX;
4412             mipCoord.h = pInfo[blk32MipId].startY;
4413             mipCoord.d = pInfo[blk32MipId].startZ;
4414 
4415             switch (mip - blk32MipId)
4416             {
4417                 case 0:
4418                     mipCoord.w += 32;       // 16x16
4419                     break;
4420                 case 1:
4421                     mipCoord.h += 32;       // 8x8
4422                     break;
4423                 case 2:
4424                     mipCoord.h += 32;       // 4x4
4425                     mipCoord.w += 16;
4426                     break;
4427                 case 3:
4428                     mipCoord.h += 32;       // 2x2
4429                     mipCoord.w += 32;
4430                     break;
4431                 case 4:
4432                     mipCoord.h += 32;       // 1x1
4433                     mipCoord.w += 48;
4434                     break;
4435                 // The following are for BC/ASTC formats
4436                 case 5:
4437                     mipCoord.h += 48;       // 1/2 x 1/2
4438                     break;
4439                 case 6:
4440                     mipCoord.h += 48;       // 1/4 x 1/4
4441                     mipCoord.w += 16;
4442                     break;
4443                 case 7:
4444                     mipCoord.h += 48;       // 1/8 x 1/8
4445                     mipCoord.w += 32;
4446                     break;
4447                 case 8:
4448                     mipCoord.h += 48;       // 1/16 x 1/16
4449                     mipCoord.w += 48;
4450                     break;
4451                 default:
4452                     ADDR_ASSERT_ALWAYS();
4453                     break;
4454             }
4455 
4456             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4457             mipHeight = mipWidth;
4458 
4459             if (isThick)
4460             {
4461                 mipDepth = mipWidth;
4462             }
4463         }
4464         else
4465         {
4466             if (mipWidth <= minInc)
4467             {
4468                 // if we're below the minimal increment...
4469                 if (isThick)
4470                 {
4471                     // For 3d, just go in z direction
4472                     mipCoord.d += mipDepth;
4473                 }
4474                 else
4475                 {
4476                     // For 2d, first go across, then down
4477                     if ((mipWidth * 2) == minInc)
4478                     {
4479                         // if we're 2 mips below, that's when we go back in x, and down in y
4480                         mipCoord.w -= minInc;
4481                         mipCoord.h += minInc;
4482                     }
4483                     else
4484                     {
4485                         // otherwise, just go across in x
4486                         mipCoord.w += minInc;
4487                     }
4488                 }
4489             }
4490             else
4491             {
4492                 // On even mip, go down, otherwise, go across
4493                 if (mip & 1)
4494                 {
4495                     mipCoord.w += mipWidth;
4496                 }
4497                 else
4498                 {
4499                     mipCoord.h += mipHeight;
4500                 }
4501             }
4502             // Divide the width by 2
4503             mipWidth >>= 1;
4504             // After the first mip in tail, the mip is always a square
4505             mipHeight = mipWidth;
4506             // ...or for 3d, a cube
4507             if (isThick)
4508             {
4509                 mipDepth = mipWidth;
4510             }
4511         }
4512     }
4513 }
4514 
4515 /**
4516 ************************************************************************************************************************
4517 *   Gfx9Lib::GetMipStartPos
4518 *
4519 *   @brief
4520 *       Internal function to get out information about mip logical start position
4521 *
4522 *   @return
4523 *       logical start position in macro block width/heith/depth of one mip level within one slice
4524 ************************************************************************************************************************
4525 */
GetMipStartPos(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 width,UINT_32 height,UINT_32 depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 mipId,UINT_32 log2ElementBytes,UINT_32 * pMipTailBytesOffset) const4526 Dim3d Gfx9Lib::GetMipStartPos(
4527     AddrResourceType  resourceType,
4528     AddrSwizzleMode   swizzleMode,
4529     UINT_32           width,
4530     UINT_32           height,
4531     UINT_32           depth,
4532     UINT_32           blockWidth,
4533     UINT_32           blockHeight,
4534     UINT_32           blockDepth,
4535     UINT_32           mipId,
4536     UINT_32           log2ElementBytes,
4537     UINT_32*          pMipTailBytesOffset) const
4538 {
4539     Dim3d       mipStartPos = {0};
4540     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4541 
4542     // Report mip in tail if Mip0 is already in mip tail
4543     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4544     UINT_32 log2BlkSize    = GetBlockSizeLog2(swizzleMode);
4545     UINT_32 mipIndexInTail = mipId;
4546 
4547     if (inMipTail == FALSE)
4548     {
4549         // Mip 0 dimension, unit in block
4550         UINT_32 mipWidthInBlk   = width  / blockWidth;
4551         UINT_32 mipHeightInBlk  = height / blockHeight;
4552         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4553         AddrMajorMode majorMode = GetMajorMode(resourceType,
4554                                                swizzleMode,
4555                                                mipWidthInBlk,
4556                                                mipHeightInBlk,
4557                                                mipDepthInBlk);
4558 
4559         UINT_32 endingMip = mipId + 1;
4560 
4561         for (UINT_32 i = 1; i <= mipId; i++)
4562         {
4563             if ((i == 1) || (i == 3))
4564             {
4565                 if (majorMode == ADDR_MAJOR_Y)
4566                 {
4567                     mipStartPos.w += mipWidthInBlk;
4568                 }
4569                 else
4570                 {
4571                     mipStartPos.h += mipHeightInBlk;
4572                 }
4573             }
4574             else
4575             {
4576                 if (majorMode == ADDR_MAJOR_X)
4577                 {
4578                    mipStartPos.w += mipWidthInBlk;
4579                 }
4580                 else if (majorMode == ADDR_MAJOR_Y)
4581                 {
4582                    mipStartPos.h += mipHeightInBlk;
4583                 }
4584                 else
4585                 {
4586                    mipStartPos.d += mipDepthInBlk;
4587                 }
4588             }
4589 
4590             BOOL_32 inTail = FALSE;
4591 
4592             if (IsThick(resourceType, swizzleMode))
4593             {
4594                 UINT_32 dim = log2BlkSize % 3;
4595 
4596                 if (dim == 0)
4597                 {
4598                     inTail =
4599                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4600                 }
4601                 else if (dim == 1)
4602                 {
4603                     inTail =
4604                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4605                 }
4606                 else
4607                 {
4608                     inTail =
4609                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4610                 }
4611             }
4612             else
4613             {
4614                 if (log2BlkSize & 1)
4615                 {
4616                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4617                 }
4618                 else
4619                 {
4620                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4621                 }
4622             }
4623 
4624             if (inTail)
4625             {
4626                 endingMip = i;
4627                 break;
4628             }
4629 
4630             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4631             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4632             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4633         }
4634 
4635         if (mipId >= endingMip)
4636         {
4637             inMipTail      = TRUE;
4638             mipIndexInTail = mipId - endingMip;
4639         }
4640     }
4641 
4642     if (inMipTail)
4643     {
4644         UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4645         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4646         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4647     }
4648 
4649     return mipStartPos;
4650 }
4651 
4652 /**
4653 ************************************************************************************************************************
4654 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4655 *
4656 *   @brief
4657 *       Internal function to calculate address from coord for tiled swizzle surface
4658 *
4659 *   @return
4660 *       ADDR_E_RETURNCODE
4661 ************************************************************************************************************************
4662 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4663 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4664      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4665      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4666      ) const
4667 {
4668     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4669     localIn.swizzleMode  = pIn->swizzleMode;
4670     localIn.flags        = pIn->flags;
4671     localIn.resourceType = pIn->resourceType;
4672     localIn.bpp          = pIn->bpp;
4673     localIn.width        = Max(pIn->unalignedWidth, 1u);
4674     localIn.height       = Max(pIn->unalignedHeight, 1u);
4675     localIn.numSlices    = Max(pIn->numSlices, 1u);
4676     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4677     localIn.numSamples   = Max(pIn->numSamples, 1u);
4678     localIn.numFrags     = Max(pIn->numFrags, 1u);
4679     if (localIn.numMipLevels <= 1)
4680     {
4681         localIn.pitchInElement = pIn->pitchInElement;
4682     }
4683 
4684     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4685     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4686 
4687     BOOL_32 valid = (returnCode == ADDR_OK) &&
4688                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4689                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4690                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4691 
4692     if (valid)
4693     {
4694         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4695         Dim3d   mipStartPos        = {0};
4696         UINT_32 mipTailBytesOffset = 0;
4697 
4698         if (pIn->numMipLevels > 1)
4699         {
4700             // Mip-map chain cannot be MSAA surface
4701             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4702 
4703             mipStartPos = GetMipStartPos(pIn->resourceType,
4704                                          pIn->swizzleMode,
4705                                          localOut.pitch,
4706                                          localOut.height,
4707                                          localOut.numSlices,
4708                                          localOut.blockWidth,
4709                                          localOut.blockHeight,
4710                                          localOut.blockSlices,
4711                                          pIn->mipId,
4712                                          log2ElementBytes,
4713                                          &mipTailBytesOffset);
4714         }
4715 
4716         UINT_32 interleaveOffset = 0;
4717         UINT_32 pipeBits = 0;
4718         UINT_32 pipeXor = 0;
4719         UINT_32 bankBits = 0;
4720         UINT_32 bankXor = 0;
4721 
4722         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4723         {
4724             UINT_32 blockOffset = 0;
4725             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4726 
4727             if (IsZOrderSwizzle(pIn->swizzleMode))
4728             {
4729                 // Morton generation
4730                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4731                 {
4732                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4733                     UINT_32 mortBits = totalLowBits / 2;
4734                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4735                     // Are 9 bits enough?
4736                     UINT_32 highBitsValue =
4737                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4738                     blockOffset = lowBitsValue | highBitsValue;
4739                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4740                 }
4741                 else
4742                 {
4743                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4744                 }
4745 
4746                 // Fill LSBs with sample bits
4747                 if (pIn->numSamples > 1)
4748                 {
4749                     blockOffset *= pIn->numSamples;
4750                     blockOffset |= pIn->sample;
4751                 }
4752 
4753                 // Shift according to BytesPP
4754                 blockOffset <<= log2ElementBytes;
4755             }
4756             else
4757             {
4758                 // Micro block offset
4759                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4760                 blockOffset = microBlockOffset;
4761 
4762                 // Micro block dimension
4763                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4764                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4765                 // Morton generation, does 12 bit enough?
4766                 blockOffset |=
4767                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4768 
4769                 // Sample bits start location
4770                 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4771                 // Join sample bits information to the highest Macro block bits
4772                 if (IsNonPrtXor(pIn->swizzleMode))
4773                 {
4774                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4775                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4776                 }
4777                 else
4778                 {
4779                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4780                     // after this op, the blockOffset only contains log2 Macro block size bits
4781                     blockOffset %= (1 << sampleStart);
4782                     blockOffset |= (pIn->sample << sampleStart);
4783                     ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4784                 }
4785             }
4786 
4787             if (IsXor(pIn->swizzleMode))
4788             {
4789                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4790                 if (IsPrt(pIn->swizzleMode))
4791                 {
4792                     blockOffset &= ((1 << log2BlkSize) - 1);
4793                 }
4794 
4795                 // Preserve offset inside pipe interleave
4796                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4797                 blockOffset >>= m_pipeInterleaveLog2;
4798 
4799                 // Pipe/Se xor bits
4800                 pipeBits = GetPipeXorBits(log2BlkSize);
4801                 // Pipe xor
4802                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4803                 blockOffset >>= pipeBits;
4804 
4805                 // Bank xor bits
4806                 bankBits = GetBankXorBits(log2BlkSize);
4807                 // Bank Xor
4808                 bankXor = FoldXor2d(blockOffset, bankBits);
4809                 blockOffset >>= bankBits;
4810 
4811                 // Put all the part back together
4812                 blockOffset <<= bankBits;
4813                 blockOffset |= bankXor;
4814                 blockOffset <<= pipeBits;
4815                 blockOffset |= pipeXor;
4816                 blockOffset <<= m_pipeInterleaveLog2;
4817                 blockOffset |= interleaveOffset;
4818             }
4819 
4820             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4821             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4822 
4823             blockOffset |= mipTailBytesOffset;
4824 
4825             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4826             {
4827                 // Apply slice xor if not MSAA/PRT
4828                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4829                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4830                                 (m_pipeInterleaveLog2 + pipeBits));
4831             }
4832 
4833             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4834                                                   bankBits, pipeBits, &blockOffset);
4835 
4836             blockOffset %= (1 << log2BlkSize);
4837 
4838             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4839             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4840             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4841             UINT_64 macroBlockIndex =
4842                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4843                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4844                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4845 
4846             pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
4847         }
4848         else
4849         {
4850             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4851 
4852             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4853 
4854             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4855                                               (pIn->y / microBlockDim.h),
4856                                               (pIn->slice / microBlockDim.d),
4857                                               8);
4858 
4859             blockOffset <<= 10;
4860             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4861 
4862             if (IsXor(pIn->swizzleMode))
4863             {
4864                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4865                 if (IsPrt(pIn->swizzleMode))
4866                 {
4867                     blockOffset &= ((1 << log2BlkSize) - 1);
4868                 }
4869 
4870                 // Preserve offset inside pipe interleave
4871                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4872                 blockOffset >>= m_pipeInterleaveLog2;
4873 
4874                 // Pipe/Se xor bits
4875                 pipeBits = GetPipeXorBits(log2BlkSize);
4876                 // Pipe xor
4877                 pipeXor = FoldXor3d(blockOffset, pipeBits);
4878                 blockOffset >>= pipeBits;
4879 
4880                 // Bank xor bits
4881                 bankBits = GetBankXorBits(log2BlkSize);
4882                 // Bank Xor
4883                 bankXor = FoldXor3d(blockOffset, bankBits);
4884                 blockOffset >>= bankBits;
4885 
4886                 // Put all the part back together
4887                 blockOffset <<= bankBits;
4888                 blockOffset |= bankXor;
4889                 blockOffset <<= pipeBits;
4890                 blockOffset |= pipeXor;
4891                 blockOffset <<= m_pipeInterleaveLog2;
4892                 blockOffset |= interleaveOffset;
4893             }
4894 
4895             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4896             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4897             blockOffset |= mipTailBytesOffset;
4898 
4899             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4900                                                   bankBits, pipeBits, &blockOffset);
4901 
4902             blockOffset %= (1 << log2BlkSize);
4903 
4904             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
4905             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4906             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4907 
4908             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4909             UINT_32 sliceSizeInBlock =
4910                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4911             UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4912 
4913             pOut->addr = blockOffset | (blockIndex << log2BlkSize);
4914         }
4915     }
4916     else
4917     {
4918         returnCode = ADDR_INVALIDPARAMS;
4919     }
4920 
4921     return returnCode;
4922 }
4923 
4924 /**
4925 ************************************************************************************************************************
4926 *   Gfx9Lib::ComputeSurfaceInfoLinear
4927 *
4928 *   @brief
4929 *       Internal function to calculate padding for linear swizzle 2D/3D surface
4930 *
4931 *   @return
4932 *       N/A
4933 ************************************************************************************************************************
4934 */
ComputeSurfaceLinearPadding(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pMipmap0PaddedWidth,UINT_32 * pSlice0PaddedHeight,ADDR2_MIP_INFO * pMipInfo) const4935 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4936     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
4937     UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
4938     UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
4939     ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
4940     ) const
4941 {
4942     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4943 
4944     UINT_32 elementBytes        = pIn->bpp >> 3;
4945     UINT_32 pitchAlignInElement = 0;
4946 
4947     if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4948     {
4949         ADDR_ASSERT(pIn->numMipLevels <= 1);
4950         ADDR_ASSERT(pIn->numSlices <= 1);
4951         pitchAlignInElement = 1;
4952     }
4953     else
4954     {
4955         pitchAlignInElement = (256 / elementBytes);
4956     }
4957 
4958     UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
4959     UINT_32 slice0PaddedHeight = pIn->height;
4960 
4961     returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4962                                             &mipChainWidth, &slice0PaddedHeight);
4963 
4964     if (returnCode == ADDR_OK)
4965     {
4966         UINT_32 mipChainHeight = 0;
4967         UINT_32 mipHeight      = pIn->height;
4968         UINT_32 mipDepth       = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4969 
4970         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4971         {
4972             if (pMipInfo != NULL)
4973             {
4974                 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4975                 pMipInfo[i].pitch  = mipChainWidth;
4976                 pMipInfo[i].height = mipHeight;
4977                 pMipInfo[i].depth  = mipDepth;
4978             }
4979 
4980             mipChainHeight += mipHeight;
4981             mipHeight = RoundHalf(mipHeight);
4982             mipHeight = Max(mipHeight, 1u);
4983         }
4984 
4985         *pMipmap0PaddedWidth = mipChainWidth;
4986         *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4987     }
4988 
4989     return returnCode;
4990 }
4991 
4992 /**
4993 ************************************************************************************************************************
4994 *   Gfx9Lib::ComputeThinBlockDimension
4995 *
4996 *   @brief
4997 *       Internal function to get thin block width/height/depth in element from surface input params.
4998 *
4999 *   @return
5000 *       N/A
5001 ************************************************************************************************************************
5002 */
ComputeThinBlockDimension(UINT_32 * pWidth,UINT_32 * pHeight,UINT_32 * pDepth,UINT_32 bpp,UINT_32 numSamples,AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const5003 VOID Gfx9Lib::ComputeThinBlockDimension(
5004     UINT_32*         pWidth,
5005     UINT_32*         pHeight,
5006     UINT_32*         pDepth,
5007     UINT_32          bpp,
5008     UINT_32          numSamples,
5009     AddrResourceType resourceType,
5010     AddrSwizzleMode  swizzleMode) const
5011 {
5012     ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5013 
5014     const UINT_32 log2BlkSize              = GetBlockSizeLog2(swizzleMode);
5015     const UINT_32 eleBytes                 = bpp >> 3;
5016     const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5017     const UINT_32 log2blkSizeIn256B        = log2BlkSize - 8;
5018     const UINT_32 widthAmp                 = log2blkSizeIn256B / 2;
5019     const UINT_32 heightAmp                = log2blkSizeIn256B - widthAmp;
5020 
5021     ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5022 
5023     *pWidth  = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5024     *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5025     *pDepth  = 1;
5026 
5027     if (numSamples > 1)
5028     {
5029         const UINT_32 log2sample = Log2(numSamples);
5030         const UINT_32 q          = log2sample >> 1;
5031         const UINT_32 r          = log2sample & 1;
5032 
5033         if (log2BlkSize & 1)
5034         {
5035             *pWidth  >>= q;
5036             *pHeight >>= (q + r);
5037         }
5038         else
5039         {
5040             *pWidth  >>= (q + r);
5041             *pHeight >>= q;
5042         }
5043     }
5044 }
5045 
5046 } // V2
5047 } // Addr
5048