• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ************************************************************************************************************************
3 *
4 *  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
5 *  SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8 
9 /**
10 ************************************************************************************************************************
11 * @file  gfx9addrlib.cpp
12 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
13 ************************************************************************************************************************
14 */
15 
16 #include "gfx9addrlib.h"
17 
18 #include "gfx9_gb_reg.h"
19 
20 #include "amdgpu_asic_addr.h"
21 
22 ////////////////////////////////////////////////////////////////////////////////////////////////////
23 ////////////////////////////////////////////////////////////////////////////////////////////////////
24 
25 namespace Addr
26 {
27 
28 /**
29 ************************************************************************************************************************
30 *   Gfx9HwlInit
31 *
32 *   @brief
33 *       Creates an Gfx9Lib object.
34 *
35 *   @return
36 *       Returns an Gfx9Lib object pointer.
37 ************************************************************************************************************************
38 */
Gfx9HwlInit(const Client * pClient)39 Addr::Lib* Gfx9HwlInit(const Client* pClient)
40 {
41     return V2::Gfx9Lib::CreateObj(pClient);
42 }
43 
44 namespace V2
45 {
46 
47 ////////////////////////////////////////////////////////////////////////////////////////////////////
48 //                               Static Const Member
49 ////////////////////////////////////////////////////////////////////////////////////////////////////
50 
51 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
52 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
53     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR
54     {{0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_S
55     {{0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_D
56     {{0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}}, // ADDR_SW_256B_R
57 
58     {{0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_Z
59     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_S
60     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_D
61     {{0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}}, // ADDR_SW_4KB_R
62 
63     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_Z
64     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_S
65     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_D
66     {{0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}}, // ADDR_SW_64KB_R
67 
68     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
69     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
70     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
71     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
72 
73     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_Z_T
74     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_S_T
75     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_D_T
76     {{0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0,    0}}, // ADDR_SW_64KB_R_T
77 
78     {{0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_Z_x
79     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_S_x
80     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_D_x
81     {{0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0,    0}}, // ADDR_SW_4KB_R_x
82 
83     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_Z_X
84     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_S_X
85     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_D_X
86     {{0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0,    0}}, // ADDR_SW_64KB_R_X
87 
88     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
89     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
90     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
91     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
92     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR_GENERAL
93 };
94 
95 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
96 
97 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
98 
99 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
100 
101 /**
102 ************************************************************************************************************************
103 *   Gfx9Lib::Gfx9Lib
104 *
105 *   @brief
106 *       Constructor
107 *
108 ************************************************************************************************************************
109 */
Gfx9Lib(const Client * pClient)110 Gfx9Lib::Gfx9Lib(const Client* pClient)
111     :
112     Lib(pClient)
113 {
114     memset(&m_settings, 0, sizeof(m_settings));
115     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
116     memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
117     m_metaEqOverrideIndex = 0;
118 }
119 
120 /**
121 ************************************************************************************************************************
122 *   Gfx9Lib::~Gfx9Lib
123 *
124 *   @brief
125 *       Destructor
126 ************************************************************************************************************************
127 */
~Gfx9Lib()128 Gfx9Lib::~Gfx9Lib()
129 {
130 }
131 
132 /**
133 ************************************************************************************************************************
134 *   Gfx9Lib::HwlComputeHtileInfo
135 *
136 *   @brief
137 *       Interface function stub of AddrComputeHtilenfo
138 *
139 *   @return
140 *       ADDR_E_RETURNCODE
141 ************************************************************************************************************************
142 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const143 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
144     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
145     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
146     ) const
147 {
148     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
149                                                        pIn->swizzleMode);
150 
151     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
152 
153     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
154 
155     if ((numPipeTotal == 1) && (numRbTotal == 1))
156     {
157         numCompressBlkPerMetaBlkLog2 = 10;
158     }
159     else
160     {
161         if (m_settings.applyAliasFix)
162         {
163             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
164         }
165         else
166         {
167             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
168         }
169     }
170 
171     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
172 
173     Dim3d   metaBlkDim   = {8, 8, 1};
174     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
175     UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
176     UINT_32 heightAmp    = totalAmpBits - widthAmp;
177     metaBlkDim.w <<= widthAmp;
178     metaBlkDim.h <<= heightAmp;
179 
180 #if DEBUG
181     Dim3d metaBlkDimDbg = {8, 8, 1};
182     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
183     {
184         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
185             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
186         {
187             metaBlkDimDbg.h <<= 1;
188         }
189         else
190         {
191             metaBlkDimDbg.w <<= 1;
192         }
193     }
194     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
195 #endif
196 
197     UINT_32 numMetaBlkX;
198     UINT_32 numMetaBlkY;
199     UINT_32 numMetaBlkZ;
200 
201     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
202                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
203                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
204 
205     const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
206     UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
207 
208     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
209     {
210         align *= (numPipeTotal >> 1);
211     }
212 
213     align = Max(align, metaBlkSize);
214 
215     if (m_settings.metaBaseAlignFix)
216     {
217         align = Max(align, GetBlockSize(pIn->swizzleMode));
218     }
219 
220     if (m_settings.htileAlignFix)
221     {
222         const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
223         const INT_32 htileCachelineSizeLog2 = 11;
224         const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
225 
226         INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
227 
228         align <<= rbMaskPadding;
229     }
230 
231     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
232     pOut->height     = numMetaBlkY * metaBlkDim.h;
233     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;
234 
235     pOut->metaBlkWidth       = metaBlkDim.w;
236     pOut->metaBlkHeight      = metaBlkDim.h;
237     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
238 
239     pOut->baseAlign  = align;
240     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
241 
242     return ADDR_OK;
243 }
244 
245 /**
246 ************************************************************************************************************************
247 *   Gfx9Lib::HwlComputeCmaskInfo
248 *
249 *   @brief
250 *       Interface function stub of AddrComputeCmaskInfo
251 *
252 *   @return
253 *       ADDR_E_RETURNCODE
254 ************************************************************************************************************************
255 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const256 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
257     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
258     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
259     ) const
260 {
261     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
262 
263     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
264                                                        pIn->swizzleMode);
265 
266     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
267 
268     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
269 
270     if ((numPipeTotal == 1) && (numRbTotal == 1))
271     {
272         numCompressBlkPerMetaBlkLog2 = 13;
273     }
274     else
275     {
276         if (m_settings.applyAliasFix)
277         {
278             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
279         }
280         else
281         {
282             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
283         }
284 
285         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
286     }
287 
288     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
289 
290     Dim2d metaBlkDim = {8, 8};
291     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
292     UINT_32 heightAmp = totalAmpBits >> 1;
293     UINT_32 widthAmp = totalAmpBits - heightAmp;
294     metaBlkDim.w <<= widthAmp;
295     metaBlkDim.h <<= heightAmp;
296 
297 #if DEBUG
298     Dim2d metaBlkDimDbg = {8, 8};
299     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
300     {
301         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
302         {
303             metaBlkDimDbg.h <<= 1;
304         }
305         else
306         {
307             metaBlkDimDbg.w <<= 1;
308         }
309     }
310     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
311 #endif
312 
313     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
314     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
315     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
316 
317     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
318 
319     if (m_settings.metaBaseAlignFix)
320     {
321         sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
322     }
323 
324     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
325     pOut->height     = numMetaBlkY * metaBlkDim.h;
326     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
327     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
328     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
329 
330     pOut->metaBlkWidth = metaBlkDim.w;
331     pOut->metaBlkHeight = metaBlkDim.h;
332 
333     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
334 
335     // Get the CMASK address equation (copied from CmaskAddrFromCoord)
336     UINT_32 fmaskBpp              = GetFmaskBpp(1, 1);
337     UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
338     UINT_32 metaBlkWidthLog2      = Log2(pOut->metaBlkWidth);
339     UINT_32 metaBlkHeightLog2     = Log2(pOut->metaBlkHeight);
340 
341     MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
342                                 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
343                                 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
344 
345     CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
346 
347     // Generate the CMASK address equation.
348     pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
349     bool checked = false;
350     for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
351        CoordTerm &bit = (*eq)[b];
352 
353        unsigned c;
354        for (c = 0; c < bit.getsize(); c++) {
355           Coordinate &coord = bit[c];
356           pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
357           pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
358        }
359        for (; c < 5; c++)
360           pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
361     }
362 
363     // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
364     for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
365        CoordTerm &prev = (*eq)[b - 1];
366        CoordTerm &cur = (*eq)[b];
367 
368        if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
369           prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
370           prev[0].getord() + 1 == cur[0].getord())
371           pOut->equation.gfx9.num_bits = b;
372        else
373           break;
374     }
375 
376     pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
377                                                                    pIn->swizzleMode);
378 
379     return ADDR_OK;
380 }
381 
382 /**
383 ************************************************************************************************************************
384 *   Gfx9Lib::GetMetaMipInfo
385 *
386 *   @brief
387 *       Get meta mip info
388 *
389 *   @return
390 *       N/A
391 ************************************************************************************************************************
392 */
GetMetaMipInfo(UINT_32 numMipLevels,Dim3d * pMetaBlkDim,BOOL_32 dataThick,ADDR2_META_MIP_INFO * pInfo,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 * pNumMetaBlkX,UINT_32 * pNumMetaBlkY,UINT_32 * pNumMetaBlkZ) const393 VOID Gfx9Lib::GetMetaMipInfo(
394     UINT_32 numMipLevels,           ///< [in]  number of mip levels
395     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
396     BOOL_32 dataThick,              ///< [in]  data surface is thick
397     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
398     UINT_32 mip0Width,              ///< [in]  mip0 width
399     UINT_32 mip0Height,             ///< [in]  mip0 height
400     UINT_32 mip0Depth,              ///< [in]  mip0 depth
401     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
402     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
403     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
404     const
405 {
406     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
407     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
408     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
409     UINT_32 tailWidth   = pMetaBlkDim->w;
410     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
411     UINT_32 tailDepth   = pMetaBlkDim->d;
412     BOOL_32 inTail      = FALSE;
413     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
414 
415     if (numMipLevels > 1)
416     {
417         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
418         {
419             // Z major
420             major = ADDR_MAJOR_Z;
421         }
422         else if (numMetaBlkX >= numMetaBlkY)
423         {
424             // X major
425             major = ADDR_MAJOR_X;
426         }
427         else
428         {
429             // Y major
430             major = ADDR_MAJOR_Y;
431         }
432 
433         inTail = ((mip0Width <= tailWidth) &&
434                   (mip0Height <= tailHeight) &&
435                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
436 
437         if (inTail == FALSE)
438         {
439             UINT_32 orderLimit;
440             UINT_32 *pMipDim;
441             UINT_32 *pOrderDim;
442 
443             if (major == ADDR_MAJOR_Z)
444             {
445                 // Z major
446                 pMipDim = &numMetaBlkY;
447                 pOrderDim = &numMetaBlkZ;
448                 orderLimit = 4;
449             }
450             else if (major == ADDR_MAJOR_X)
451             {
452                 // X major
453                 pMipDim = &numMetaBlkY;
454                 pOrderDim = &numMetaBlkX;
455                 orderLimit = 4;
456             }
457             else
458             {
459                 // Y major
460                 pMipDim = &numMetaBlkX;
461                 pOrderDim = &numMetaBlkY;
462                 orderLimit = 2;
463             }
464 
465             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
466             {
467                 *pMipDim += 2;
468             }
469             else
470             {
471                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
472             }
473         }
474     }
475 
476     if (pInfo != NULL)
477     {
478         UINT_32 mipWidth  = mip0Width;
479         UINT_32 mipHeight = mip0Height;
480         UINT_32 mipDepth  = mip0Depth;
481         Dim3d   mipCoord  = {0};
482 
483         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
484         {
485             if (inTail)
486             {
487                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
488                                    pMetaBlkDim);
489                 break;
490             }
491             else
492             {
493                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
494                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
495                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
496 
497                 pInfo[mip].inMiptail = FALSE;
498                 pInfo[mip].startX = mipCoord.w;
499                 pInfo[mip].startY = mipCoord.h;
500                 pInfo[mip].startZ = mipCoord.d;
501                 pInfo[mip].width  = mipWidth;
502                 pInfo[mip].height = mipHeight;
503                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
504 
505                 if ((mip >= 3) || (mip & 1))
506                 {
507                     switch (major)
508                     {
509                         case ADDR_MAJOR_X:
510                             mipCoord.w += mipWidth;
511                             break;
512                         case ADDR_MAJOR_Y:
513                             mipCoord.h += mipHeight;
514                             break;
515                         case ADDR_MAJOR_Z:
516                             mipCoord.d += mipDepth;
517                             break;
518                         default:
519                             break;
520                     }
521                 }
522                 else
523                 {
524                     switch (major)
525                     {
526                         case ADDR_MAJOR_X:
527                             mipCoord.h += mipHeight;
528                             break;
529                         case ADDR_MAJOR_Y:
530                             mipCoord.w += mipWidth;
531                             break;
532                         case ADDR_MAJOR_Z:
533                             mipCoord.h += mipHeight;
534                             break;
535                         default:
536                             break;
537                     }
538                 }
539 
540                 mipWidth  = Max(mipWidth >> 1, 1u);
541                 mipHeight = Max(mipHeight >> 1, 1u);
542                 mipDepth = Max(mipDepth >> 1, 1u);
543 
544                 inTail = ((mipWidth <= tailWidth) &&
545                           (mipHeight <= tailHeight) &&
546                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
547             }
548         }
549     }
550 
551     *pNumMetaBlkX = numMetaBlkX;
552     *pNumMetaBlkY = numMetaBlkY;
553     *pNumMetaBlkZ = numMetaBlkZ;
554 }
555 
556 /**
557 ************************************************************************************************************************
558 *   Gfx9Lib::HwlComputeDccInfo
559 *
560 *   @brief
561 *       Interface function to compute DCC key info
562 *
563 *   @return
564 *       ADDR_E_RETURNCODE
565 ************************************************************************************************************************
566 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const567 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
568     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
569     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
570     ) const
571 {
572     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
573     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
574     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
575 
576     if (dataLinear)
577     {
578         metaLinear = TRUE;
579     }
580     else if (metaLinear == TRUE)
581     {
582         pipeAligned = FALSE;
583     }
584 
585     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
586 
587     if (metaLinear)
588     {
589         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
590         ADDR_ASSERT_ALWAYS();
591 
592         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
593         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
594     }
595     else
596     {
597         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
598 
599         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
600 
601         UINT_32 numFrags = Max(pIn->numFrags, 1u);
602         UINT_32 numSlices = Max(pIn->numSlices, 1u);
603 
604         minMetaBlkSize /= numFrags;
605 
606         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
607 
608         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
609 
610         if ((numPipeTotal > 1) || (numRbTotal > 1))
611         {
612             const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
613 
614             numCompressBlkPerMetaBlk =
615                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
616 
617             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
618             {
619                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
620             }
621         }
622 
623         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
624         Dim3d metaBlkDim = compressBlkDim;
625 
626         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
627         {
628             if ((metaBlkDim.h < metaBlkDim.w) ||
629                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
630             {
631                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
632                 {
633                     metaBlkDim.h <<= 1;
634                 }
635                 else
636                 {
637                     metaBlkDim.d <<= 1;
638                 }
639             }
640             else
641             {
642                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
643                 {
644                     metaBlkDim.w <<= 1;
645                 }
646                 else
647                 {
648                     metaBlkDim.d <<= 1;
649                 }
650             }
651         }
652 
653         UINT_32 numMetaBlkX;
654         UINT_32 numMetaBlkY;
655         UINT_32 numMetaBlkZ;
656 
657         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
658                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
659                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
660 
661         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
662 
663         if (numFrags > m_maxCompFrag)
664         {
665             sizeAlign *= (numFrags / m_maxCompFrag);
666         }
667 
668         if (m_settings.metaBaseAlignFix)
669         {
670             sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
671         }
672 
673         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
674                            numCompressBlkPerMetaBlk * numFrags;
675         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
676         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
677 
678         pOut->pitch = numMetaBlkX * metaBlkDim.w;
679         pOut->height = numMetaBlkY * metaBlkDim.h;
680         pOut->depth = numMetaBlkZ * metaBlkDim.d;
681 
682         pOut->compressBlkWidth = compressBlkDim.w;
683         pOut->compressBlkHeight = compressBlkDim.h;
684         pOut->compressBlkDepth = compressBlkDim.d;
685 
686         pOut->metaBlkWidth = metaBlkDim.w;
687         pOut->metaBlkHeight = metaBlkDim.h;
688         pOut->metaBlkDepth = metaBlkDim.d;
689         pOut->metaBlkSize = numCompressBlkPerMetaBlk * numFrags;
690 
691         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
692         pOut->fastClearSizePerSlice =
693             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
694 
695         // Get the DCC address equation (copied from DccAddrFromCoord)
696         UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
697         UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
698         UINT_32 metaBlkWidthLog2  = Log2(pOut->metaBlkWidth);
699         UINT_32 metaBlkHeightLog2 = Log2(pOut->metaBlkHeight);
700         UINT_32 metaBlkDepthLog2  = Log2(pOut->metaBlkDepth);
701         UINT_32 compBlkWidthLog2  = Log2(pOut->compressBlkWidth);
702         UINT_32 compBlkHeightLog2 = Log2(pOut->compressBlkHeight);
703         UINT_32 compBlkDepthLog2  = Log2(pOut->compressBlkDepth);
704 
705         MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
706                                      Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
707                                      metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
708                                      compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
709 
710         CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
711 
712         // Generate the DCC address equation.
713         pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
714         bool checked = false;
715         for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
716            CoordTerm &bit = (*eq)[b];
717 
718            unsigned c;
719            for (c = 0; c < bit.getsize(); c++) {
720               Coordinate &coord = bit[c];
721               pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
722               pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
723            }
724            for (; c < 5; c++)
725               pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
726         }
727 
728         // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
729         for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
730            CoordTerm &prev = (*eq)[b - 1];
731            CoordTerm &cur = (*eq)[b];
732 
733            if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
734                prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
735                prev[0].getord() + 1 == cur[0].getord())
736               pOut->equation.gfx9.num_bits = b;
737            else
738               break;
739         }
740 
741         pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
742                                                                        pIn->swizzleMode);
743     }
744 
745     return ADDR_OK;
746 }
747 
748 /**
749 ************************************************************************************************************************
750 *   Gfx9Lib::HwlComputeMaxBaseAlignments
751 *
752 *   @brief
753 *       Gets maximum alignments
754 *   @return
755 *       maximum alignments
756 ************************************************************************************************************************
757 */
HwlComputeMaxBaseAlignments() const758 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
759 {
760     return Size64K;
761 }
762 
763 /**
764 ************************************************************************************************************************
765 *   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
766 *
767 *   @brief
768 *       Gets maximum alignments for metadata
769 *   @return
770 *       maximum alignments for metadata
771 ************************************************************************************************************************
772 */
HwlComputeMaxMetaBaseAlignments() const773 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
774 {
775     // Max base alignment for Htile
776     const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
777     const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;
778 
779     // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
780     // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
781     ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
782     const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
783 
784     UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
785 
786     if (maxNumPipeTotal > 2)
787     {
788         maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
789     }
790 
791     maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
792 
793     if (m_settings.metaBaseAlignFix)
794     {
795         maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
796     }
797 
798     if (m_settings.htileAlignFix)
799     {
800         maxBaseAlignHtile *= maxNumPipeTotal;
801     }
802 
803     // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
804 
805     // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
806     UINT_32 maxBaseAlignDcc3D = 65536;
807 
808     if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
809     {
810         maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
811     }
812 
813     // Max base alignment for Msaa Dcc
814     UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
815 
816     if (m_settings.metaBaseAlignFix)
817     {
818         maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
819     }
820 
821     return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
822 }
823 
824 /**
825 ************************************************************************************************************************
826 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
827 *
828 *   @brief
829 *       Interface function stub of AddrComputeCmaskAddrFromCoord
830 *
831 *   @return
832 *       ADDR_E_RETURNCODE
833 ************************************************************************************************************************
834 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)835 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
836     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
837     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
838 {
839     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
840     input.size            = sizeof(input);
841     input.cMaskFlags      = pIn->cMaskFlags;
842     input.colorFlags      = pIn->colorFlags;
843     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
844     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
845     input.numSlices       = Max(pIn->numSlices, 1u);
846     input.swizzleMode     = pIn->swizzleMode;
847     input.resourceType    = pIn->resourceType;
848 
849     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
850     output.size = sizeof(output);
851 
852     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
853 
854     if (returnCode == ADDR_OK)
855     {
856         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
857         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
858         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
859         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
860 
861         MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
862                                      Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
863                                      metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
864 
865         const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
866 
867         UINT_32 xb = pIn->x / output.metaBlkWidth;
868         UINT_32 yb = pIn->y / output.metaBlkHeight;
869         UINT_32 zb = pIn->slice;
870 
871         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
872         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
873         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
874 
875         UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
876         UINT_64 address  = pMetaEq->solve(coords);
877 
878         pOut->addr = address >> 1;
879         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
880 
881 
882         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
883                                                            pIn->swizzleMode);
884 
885         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
886 
887         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
888     }
889 
890     return returnCode;
891 }
892 
893 /**
894 ************************************************************************************************************************
895 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
896 *
897 *   @brief
898 *       Interface function stub of AddrComputeHtileAddrFromCoord
899 *
900 *   @return
901 *       ADDR_E_RETURNCODE
902 ************************************************************************************************************************
903 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)904 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
905     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
906     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
907 {
908     ADDR_E_RETURNCODE returnCode = ADDR_OK;
909 
910     if (pIn->numMipLevels > 1)
911     {
912         returnCode = ADDR_NOTIMPLEMENTED;
913     }
914     else
915     {
916         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
917         input.size            = sizeof(input);
918         input.hTileFlags      = pIn->hTileFlags;
919         input.depthFlags      = pIn->depthflags;
920         input.swizzleMode     = pIn->swizzleMode;
921         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
922         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
923         input.numSlices       = Max(pIn->numSlices, 1u);
924         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
925 
926         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
927         output.size = sizeof(output);
928 
929         returnCode = ComputeHtileInfo(&input, &output);
930 
931         if (returnCode == ADDR_OK)
932         {
933             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
934             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
935             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
936             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
937 
938             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
939                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
940                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
941 
942             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
943 
944             UINT_32 xb = pIn->x / output.metaBlkWidth;
945             UINT_32 yb = pIn->y / output.metaBlkHeight;
946             UINT_32 zb = pIn->slice;
947 
948             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
949             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
950             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
951 
952             UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
953             UINT_64 address  = pMetaEq->solve(coords);
954 
955             pOut->addr = address >> 1;
956 
957             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
958                                                                pIn->swizzleMode);
959 
960             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
961 
962             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
963         }
964     }
965 
966     return returnCode;
967 }
968 
969 /**
970 ************************************************************************************************************************
971 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
972 *
973 *   @brief
974 *       Interface function stub of AddrComputeHtileCoordFromAddr
975 *
976 *   @return
977 *       ADDR_E_RETURNCODE
978 ************************************************************************************************************************
979 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)980 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
981     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
982     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
983 {
984     ADDR_E_RETURNCODE returnCode = ADDR_OK;
985 
986     if (pIn->numMipLevels > 1)
987     {
988         returnCode = ADDR_NOTIMPLEMENTED;
989     }
990     else
991     {
992         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
993         input.size            = sizeof(input);
994         input.hTileFlags      = pIn->hTileFlags;
995         input.swizzleMode     = pIn->swizzleMode;
996         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
997         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
998         input.numSlices       = Max(pIn->numSlices, 1u);
999         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
1000 
1001         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
1002         output.size = sizeof(output);
1003 
1004         returnCode = ComputeHtileInfo(&input, &output);
1005 
1006         if (returnCode == ADDR_OK)
1007         {
1008             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
1009             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
1010             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1011             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
1012 
1013             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
1014                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
1015                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
1016 
1017             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1018 
1019             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
1020                                                                pIn->swizzleMode);
1021 
1022             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1023 
1024             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
1025 
1026             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
1027             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1028 
1029             UINT_32 coords[NUM_DIMS];
1030             pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
1031 
1032             pOut->slice = coords[DIM_M] / sliceSizeInBlock;
1033             pOut->y     = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
1034             pOut->x     = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
1035         }
1036     }
1037 
1038     return returnCode;
1039 }
1040 
1041 /**
1042 ************************************************************************************************************************
1043 *   Gfx9Lib::HwlSupportComputeDccAddrFromCoord
1044 *
1045 *   @brief
1046 *       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
1047 *
1048 *   @return
1049 *       ADDR_E_RETURNCODE
1050 ************************************************************************************************************************
1051 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)1052 ADDR_E_RETURNCODE Gfx9Lib::HwlSupportComputeDccAddrFromCoord(
1053     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
1054 {
1055     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1056 
1057     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
1058     {
1059         returnCode = ADDR_NOTSUPPORTED;
1060     }
1061     else if ((pIn->pitch == 0)             ||
1062              (pIn->height == 0)            ||
1063              (pIn->compressBlkWidth == 0)  ||
1064              (pIn->compressBlkHeight == 0) ||
1065              (pIn->compressBlkDepth == 0)  ||
1066              (pIn->metaBlkWidth == 0)      ||
1067              (pIn->metaBlkHeight == 0)     ||
1068              (pIn->metaBlkDepth == 0)      ||
1069              (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
1070     {
1071         returnCode = ADDR_NOTSUPPORTED;
1072     }
1073 
1074     return returnCode;
1075 }
1076 
1077 /**
1078 ************************************************************************************************************************
1079 *   Gfx9Lib::HwlComputeDccAddrFromCoord
1080 *
1081 *   @brief
1082 *       Interface function stub of AddrComputeDccAddrFromCoord
1083 *
1084 *   @return
1085 *       N/A
1086 ************************************************************************************************************************
1087 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)1088 VOID Gfx9Lib::HwlComputeDccAddrFromCoord(
1089     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
1090     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
1091 {
1092     UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
1093     UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
1094     UINT_32 metaBlkWidthLog2  = Log2(pIn->metaBlkWidth);
1095     UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
1096     UINT_32 metaBlkDepthLog2  = Log2(pIn->metaBlkDepth);
1097     UINT_32 compBlkWidthLog2  = Log2(pIn->compressBlkWidth);
1098     UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
1099     UINT_32 compBlkDepthLog2  = Log2(pIn->compressBlkDepth);
1100 
1101     MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1102                                  Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1103                                  metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1104                                  compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1105 
1106     const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1107 
1108     UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1109     UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1110     UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1111 
1112     UINT_32 pitchInBlock     = pIn->pitch / pIn->metaBlkWidth;
1113     UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1114     UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1115 
1116     UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex};
1117     UINT_64 address  = pMetaEq->solve(coords);
1118 
1119     pOut->addr = address >> 1;
1120 
1121     UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1122                                                        pIn->swizzleMode);
1123 
1124     UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1125 
1126     pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1127 }
1128 
1129 /**
1130 ************************************************************************************************************************
1131 *   Gfx9Lib::HwlInitGlobalParams
1132 *
1133 *   @brief
1134 *       Initializes global parameters
1135 *
1136 *   @return
1137 *       TRUE if all settings are valid
1138 *
1139 ************************************************************************************************************************
1140 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1141 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1142     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1143 {
1144     BOOL_32 valid = TRUE;
1145 
1146     if (m_settings.isArcticIsland)
1147     {
1148         GB_ADDR_CONFIG_GFX9 gbAddrConfig;
1149 
1150         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1151 
1152         // These values are copied from CModel code
1153         switch (gbAddrConfig.bits.NUM_PIPES)
1154         {
1155             case ADDR_CONFIG_1_PIPE:
1156                 m_pipes = 1;
1157                 m_pipesLog2 = 0;
1158                 break;
1159             case ADDR_CONFIG_2_PIPE:
1160                 m_pipes = 2;
1161                 m_pipesLog2 = 1;
1162                 break;
1163             case ADDR_CONFIG_4_PIPE:
1164                 m_pipes = 4;
1165                 m_pipesLog2 = 2;
1166                 break;
1167             case ADDR_CONFIG_8_PIPE:
1168                 m_pipes = 8;
1169                 m_pipesLog2 = 3;
1170                 break;
1171             case ADDR_CONFIG_16_PIPE:
1172                 m_pipes = 16;
1173                 m_pipesLog2 = 4;
1174                 break;
1175             case ADDR_CONFIG_32_PIPE:
1176                 m_pipes = 32;
1177                 m_pipesLog2 = 5;
1178                 break;
1179             default:
1180                 ADDR_ASSERT_ALWAYS();
1181                 break;
1182         }
1183 
1184         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1185         {
1186             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1187                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1188                 m_pipeInterleaveLog2 = 8;
1189                 break;
1190             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1191                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1192                 m_pipeInterleaveLog2 = 9;
1193                 break;
1194             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1195                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1196                 m_pipeInterleaveLog2 = 10;
1197                 break;
1198             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1199                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1200                 m_pipeInterleaveLog2 = 11;
1201                 break;
1202             default:
1203                 ADDR_ASSERT_ALWAYS();
1204                 break;
1205         }
1206 
1207         // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1208         // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1209         ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1210 
1211         switch (gbAddrConfig.bits.NUM_BANKS)
1212         {
1213             case ADDR_CONFIG_1_BANK:
1214                 m_banks = 1;
1215                 m_banksLog2 = 0;
1216                 break;
1217             case ADDR_CONFIG_2_BANK:
1218                 m_banks = 2;
1219                 m_banksLog2 = 1;
1220                 break;
1221             case ADDR_CONFIG_4_BANK:
1222                 m_banks = 4;
1223                 m_banksLog2 = 2;
1224                 break;
1225             case ADDR_CONFIG_8_BANK:
1226                 m_banks = 8;
1227                 m_banksLog2 = 3;
1228                 break;
1229             case ADDR_CONFIG_16_BANK:
1230                 m_banks = 16;
1231                 m_banksLog2 = 4;
1232                 break;
1233             default:
1234                 ADDR_ASSERT_ALWAYS();
1235                 break;
1236         }
1237 
1238         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1239         {
1240             case ADDR_CONFIG_1_SHADER_ENGINE:
1241                 m_se = 1;
1242                 m_seLog2 = 0;
1243                 break;
1244             case ADDR_CONFIG_2_SHADER_ENGINE:
1245                 m_se = 2;
1246                 m_seLog2 = 1;
1247                 break;
1248             case ADDR_CONFIG_4_SHADER_ENGINE:
1249                 m_se = 4;
1250                 m_seLog2 = 2;
1251                 break;
1252             case ADDR_CONFIG_8_SHADER_ENGINE:
1253                 m_se = 8;
1254                 m_seLog2 = 3;
1255                 break;
1256             default:
1257                 ADDR_ASSERT_ALWAYS();
1258                 break;
1259         }
1260 
1261         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1262         {
1263             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1264                 m_rbPerSe = 1;
1265                 m_rbPerSeLog2 = 0;
1266                 break;
1267             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1268                 m_rbPerSe = 2;
1269                 m_rbPerSeLog2 = 1;
1270                 break;
1271             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1272                 m_rbPerSe = 4;
1273                 m_rbPerSeLog2 = 2;
1274                 break;
1275             default:
1276                 ADDR_ASSERT_ALWAYS();
1277                 break;
1278         }
1279 
1280         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1281         {
1282             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1283                 m_maxCompFrag = 1;
1284                 m_maxCompFragLog2 = 0;
1285                 break;
1286             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1287                 m_maxCompFrag = 2;
1288                 m_maxCompFragLog2 = 1;
1289                 break;
1290             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1291                 m_maxCompFrag = 4;
1292                 m_maxCompFragLog2 = 2;
1293                 break;
1294             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1295                 m_maxCompFrag = 8;
1296                 m_maxCompFragLog2 = 3;
1297                 break;
1298             default:
1299                 ADDR_ASSERT_ALWAYS();
1300                 break;
1301         }
1302 
1303         if ((m_rbPerSeLog2 == 1) &&
1304             (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1305              ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1306         {
1307             ADDR_ASSERT(m_settings.isVega10 == FALSE);
1308 
1309             ADDR_ASSERT(m_settings.isRaven == FALSE);
1310 
1311             ADDR_ASSERT(m_settings.isVega20 == FALSE);
1312 
1313             if (m_settings.isVega12)
1314             {
1315                 m_settings.htileCacheRbConflict = 1;
1316             }
1317         }
1318 
1319         // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1320         m_blockVarSizeLog2 = 0;
1321     }
1322     else
1323     {
1324         valid = FALSE;
1325         ADDR_NOT_IMPLEMENTED();
1326     }
1327 
1328     if (valid)
1329     {
1330         InitEquationTable();
1331     }
1332 
1333     return valid;
1334 }
1335 
1336 /**
1337 ************************************************************************************************************************
1338 *   Gfx9Lib::HwlConvertChipFamily
1339 *
1340 *   @brief
1341 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1342 *   @return
1343 *       ChipFamily
1344 ************************************************************************************************************************
1345 */
HwlConvertChipFamily(UINT_32 uChipFamily,UINT_32 uChipRevision)1346 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1347     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1348     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1349 {
1350     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1351 
1352     switch (uChipFamily)
1353     {
1354         case FAMILY_AI:
1355             m_settings.isArcticIsland = 1;
1356             m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1357             m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1358             m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1359             m_settings.isDce12 = 1;
1360 
1361             if (m_settings.isVega10 == 0)
1362             {
1363                 m_settings.htileAlignFix = 1;
1364                 m_settings.applyAliasFix = 1;
1365             }
1366 
1367             m_settings.metaBaseAlignFix = 1;
1368 
1369             m_settings.depthPipeXorDisable = 1;
1370             break;
1371         case FAMILY_RV:
1372             m_settings.isArcticIsland = 1;
1373 
1374             if (ASICREV_IS_RAVEN(uChipRevision))
1375             {
1376                 m_settings.isRaven = 1;
1377 
1378                 m_settings.depthPipeXorDisable = 1;
1379             }
1380 
1381             if (ASICREV_IS_RAVEN2(uChipRevision))
1382             {
1383                 m_settings.isRaven = 1;
1384             }
1385 
1386             if (m_settings.isRaven == 0)
1387             {
1388                 m_settings.htileAlignFix = 1;
1389                 m_settings.applyAliasFix = 1;
1390             }
1391 
1392             m_settings.isDcn1 = m_settings.isRaven;
1393 
1394             if (ASICREV_IS_RENOIR(uChipRevision))
1395             {
1396                 m_settings.isRaven = 1;
1397                 m_settings.isDcn2  = 1;
1398             }
1399 
1400             m_settings.metaBaseAlignFix = 1;
1401             break;
1402 
1403         default:
1404             ADDR_ASSERT(!"No Chip found");
1405             break;
1406     }
1407 
1408     return family;
1409 }
1410 
1411 /**
1412 ************************************************************************************************************************
1413 *   Gfx9Lib::InitRbEquation
1414 *
1415 *   @brief
1416 *       Init RB equation
1417 *   @return
1418 *       N/A
1419 ************************************************************************************************************************
1420 */
GetRbEquation(CoordEq * pRbEq,UINT_32 numRbPerSeLog2,UINT_32 numSeLog2) const1421 VOID Gfx9Lib::GetRbEquation(
1422     CoordEq* pRbEq,             ///< [out] rb equation
1423     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1424     UINT_32  numSeLog2)         ///< [in] number of shader engine
1425     const
1426 {
1427     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1428     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1429     Coordinate cx(DIM_X, rbRegion);
1430     Coordinate cy(DIM_Y, rbRegion);
1431 
1432     UINT_32 start = 0;
1433     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1434 
1435     // Clear the rb equation
1436     pRbEq->resize(0);
1437     pRbEq->resize(numRbTotalLog2);
1438 
1439     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1440     {
1441         // Special case when more than 1 SE, and 2 RB per SE
1442         (*pRbEq)[0].add(cx);
1443         (*pRbEq)[0].add(cy);
1444         cx++;
1445         cy++;
1446 
1447         if (m_settings.applyAliasFix == false)
1448         {
1449             (*pRbEq)[0].add(cy);
1450         }
1451 
1452         (*pRbEq)[0].add(cy);
1453         start++;
1454     }
1455 
1456     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1457 
1458     for (UINT_32 i = 0; i < numBits; i++)
1459     {
1460         UINT_32 idx =
1461             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1462 
1463         if ((i % 2) == 1)
1464         {
1465             (*pRbEq)[idx].add(cx);
1466             cx++;
1467         }
1468         else
1469         {
1470             (*pRbEq)[idx].add(cy);
1471             cy++;
1472         }
1473     }
1474 }
1475 
1476 /**
1477 ************************************************************************************************************************
1478 *   Gfx9Lib::GetDataEquation
1479 *
1480 *   @brief
1481 *       Get data equation for fmask and Z
1482 *   @return
1483 *       N/A
1484 ************************************************************************************************************************
1485 */
GetDataEquation(CoordEq * pDataEq,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2) const1486 VOID Gfx9Lib::GetDataEquation(
1487     CoordEq* pDataEq,               ///< [out] data surface equation
1488     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1489     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1490     AddrResourceType resourceType,  ///< [in] data surface resource type
1491     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1492     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1493     const
1494 {
1495     Coordinate cx(DIM_X, 0);
1496     Coordinate cy(DIM_Y, 0);
1497     Coordinate cz(DIM_Z, 0);
1498     Coordinate cs(DIM_S, 0);
1499 
1500     // Clear the equation
1501     pDataEq->resize(0);
1502     pDataEq->resize(27);
1503 
1504     if (dataSurfaceType == Gfx9DataColor)
1505     {
1506         if (IsLinear(swizzleMode))
1507         {
1508             Coordinate cm(DIM_M, 0);
1509 
1510             pDataEq->resize(49);
1511 
1512             for (UINT_32 i = 0; i < 49; i++)
1513             {
1514                 (*pDataEq)[i].add(cm);
1515                 cm++;
1516             }
1517         }
1518         else if (IsThick(resourceType, swizzleMode))
1519         {
1520             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1521             UINT_32 i;
1522             if (IsStandardSwizzle(resourceType, swizzleMode))
1523             {
1524                 // Standard 3d swizzle
1525                 // Fill in bottom x bits
1526                 for (i = elementBytesLog2; i < 4; i++)
1527                 {
1528                     (*pDataEq)[i].add(cx);
1529                     cx++;
1530                 }
1531                 // Fill in 2 bits of y and then z
1532                 for (i = 4; i < 6; i++)
1533                 {
1534                     (*pDataEq)[i].add(cy);
1535                     cy++;
1536                 }
1537                 for (i = 6; i < 8; i++)
1538                 {
1539                     (*pDataEq)[i].add(cz);
1540                     cz++;
1541                 }
1542                 if (elementBytesLog2 < 2)
1543                 {
1544                     // fill in z & y bit
1545                     (*pDataEq)[8].add(cz);
1546                     (*pDataEq)[9].add(cy);
1547                     cz++;
1548                     cy++;
1549                 }
1550                 else if (elementBytesLog2 == 2)
1551                 {
1552                     // fill in y and x bit
1553                     (*pDataEq)[8].add(cy);
1554                     (*pDataEq)[9].add(cx);
1555                     cy++;
1556                     cx++;
1557                 }
1558                 else
1559                 {
1560                     // fill in 2 x bits
1561                     (*pDataEq)[8].add(cx);
1562                     cx++;
1563                     (*pDataEq)[9].add(cx);
1564                     cx++;
1565                 }
1566             }
1567             else
1568             {
1569                 // Z 3d swizzle
1570                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1571                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1572                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1573                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1574                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1575                 {
1576                     (*pDataEq)[i].add(cz);
1577                     cz++;
1578                 }
1579                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1580                 {
1581                     // add an x and z
1582                     (*pDataEq)[6].add(cx);
1583                     (*pDataEq)[7].add(cz);
1584                     cx++;
1585                     cz++;
1586                 }
1587                 else if (elementBytesLog2 == 2)
1588                 {
1589                     // add a y and z
1590                     (*pDataEq)[6].add(cy);
1591                     (*pDataEq)[7].add(cz);
1592                     cy++;
1593                     cz++;
1594                 }
1595                 // add y and x
1596                 (*pDataEq)[8].add(cy);
1597                 (*pDataEq)[9].add(cx);
1598                 cy++;
1599                 cx++;
1600             }
1601             // Fill in bit 10 and up
1602             pDataEq->mort3d( cz, cy, cx, 10 );
1603         }
1604         else if (IsThin(resourceType, swizzleMode))
1605         {
1606             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1607             // Color 2D
1608             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1609             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1610             UINT_32 i;
1611             // Fill in bottom x bits
1612             for (i = elementBytesLog2; i < 4; i++)
1613             {
1614                 (*pDataEq)[i].add(cx);
1615                 cx++;
1616             }
1617             // Fill in bottom y bits
1618             for (i = 4; i < 4 + microYBits; i++)
1619             {
1620                 (*pDataEq)[i].add(cy);
1621                 cy++;
1622             }
1623             // Fill in last of the micro_x bits
1624             for (i = 4 + microYBits; i < 8; i++)
1625             {
1626                 (*pDataEq)[i].add(cx);
1627                 cx++;
1628             }
1629             // Fill in x/y bits below sample split
1630             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1631             // Fill in sample bits
1632             for (i = 0; i < numSamplesLog2; i++)
1633             {
1634                 cs.set(DIM_S, i);
1635                 (*pDataEq)[tileSplitStart + i].add(cs);
1636             }
1637             // Fill in x/y bits above sample split
1638             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1639             {
1640                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1641             }
1642             else
1643             {
1644                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1645             }
1646         }
1647         else
1648         {
1649             ADDR_ASSERT_ALWAYS();
1650         }
1651     }
1652     else
1653     {
1654         // Fmask or depth
1655         UINT_32 sampleStart = elementBytesLog2;
1656         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1657         UINT_32 ymajStart = 6 + numSamplesLog2;
1658 
1659         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1660         {
1661             cs.set(DIM_S, s);
1662             (*pDataEq)[sampleStart + s].add(cs);
1663         }
1664 
1665         // Put in the x-major order pixel bits
1666         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1667         // Put in the y-major order pixel bits
1668         pDataEq->mort2d(cy, cx, ymajStart);
1669     }
1670 }
1671 
1672 /**
1673 ************************************************************************************************************************
1674 *   Gfx9Lib::GetPipeEquation
1675 *
1676 *   @brief
1677 *       Get pipe equation
1678 *   @return
1679 *       N/A
1680 ************************************************************************************************************************
1681 */
GetPipeEquation(CoordEq * pPipeEq,CoordEq * pDataEq,UINT_32 pipeInterleaveLog2,UINT_32 numPipeLog2,UINT_32 numSamplesLog2,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType) const1682 VOID Gfx9Lib::GetPipeEquation(
1683     CoordEq*         pPipeEq,            ///< [out] pipe equation
1684     CoordEq*         pDataEq,            ///< [in] data equation
1685     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1686     UINT_32          numPipeLog2,        ///< [in] number of pipes
1687     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1688     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1689     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1690     AddrResourceType resourceType        ///< [in] data surface resource type
1691     ) const
1692 {
1693     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1694     CoordEq dataEq;
1695 
1696     pDataEq->copy(dataEq);
1697 
1698     if (dataSurfaceType == Gfx9DataColor)
1699     {
1700         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1701         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1702     }
1703 
1704     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1705 
1706     // This section should only apply to z/stencil, maybe fmask
1707     // If the pipe bit is below the comp block size,
1708     // then keep moving up the address until we find a bit that is above
1709     UINT_32 pipeStart = 0;
1710 
1711     if (dataSurfaceType != Gfx9DataColor)
1712     {
1713         Coordinate tileMin(DIM_X, 3);
1714 
1715         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1716         {
1717             pipeStart++;
1718         }
1719 
1720         // if pipe is 0, then the first pipe bit is above the comp block size,
1721         // so we don't need to do anything
1722         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1723         // we will get the same pipe equation
1724         if (pipeStart != 0)
1725         {
1726             for (UINT_32 i = 0; i < numPipeLog2; i++)
1727             {
1728                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1729                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1730             }
1731         }
1732     }
1733 
1734     if (IsPrt(swizzleMode))
1735     {
1736         // Clear out bits above the block size if prt's are enabled
1737         dataEq.resize(blockSizeLog2);
1738         dataEq.resize(48);
1739     }
1740 
1741     if (IsXor(swizzleMode))
1742     {
1743         CoordEq xorMask;
1744 
1745         if (IsThick(resourceType, swizzleMode))
1746         {
1747             CoordEq xorMask2;
1748 
1749             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1750 
1751             xorMask.resize(numPipeLog2);
1752 
1753             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1754             {
1755                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1756                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1757             }
1758         }
1759         else
1760         {
1761             // Xor in the bits above the pipe+gpu bits
1762             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1763 
1764             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1765             {
1766                 Coordinate co;
1767                 CoordEq xorMask2;
1768                 // if 1xaa and not prt, then xor in the z bits
1769                 xorMask2.resize(0);
1770                 xorMask2.resize(numPipeLog2);
1771                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1772                 {
1773                     co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1774                     xorMask2[pipeIdx].add(co);
1775                 }
1776 
1777                 pPipeEq->xorin(xorMask2);
1778             }
1779         }
1780 
1781         xorMask.reverse();
1782         pPipeEq->xorin(xorMask);
1783     }
1784 }
1785 /**
1786 ************************************************************************************************************************
1787 *   Gfx9Lib::GetMetaEquation
1788 *
1789 *   @brief
1790 *       Get meta equation for cmask/htile/DCC
1791 *   @return
1792 *       Pointer to a calculated meta equation
1793 ************************************************************************************************************************
1794 */
GetMetaEquation(const MetaEqParams & metaEqParams)1795 const CoordEq* Gfx9Lib::GetMetaEquation(
1796     const MetaEqParams& metaEqParams)
1797 {
1798     UINT_32 cachedMetaEqIndex;
1799 
1800     for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1801     {
1802         if (memcmp(&metaEqParams,
1803                    &m_cachedMetaEqKey[cachedMetaEqIndex],
1804                    static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1805         {
1806             break;
1807         }
1808     }
1809 
1810     CoordEq* pMetaEq = NULL;
1811 
1812     if (cachedMetaEqIndex < MaxCachedMetaEq)
1813     {
1814         pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1815     }
1816     else
1817     {
1818         m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1819 
1820         pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1821 
1822         m_metaEqOverrideIndex %= MaxCachedMetaEq;
1823 
1824         GenMetaEquation(pMetaEq,
1825                         metaEqParams.maxMip,
1826                         metaEqParams.elementBytesLog2,
1827                         metaEqParams.numSamplesLog2,
1828                         metaEqParams.metaFlag,
1829                         metaEqParams.dataSurfaceType,
1830                         metaEqParams.swizzleMode,
1831                         metaEqParams.resourceType,
1832                         metaEqParams.metaBlkWidthLog2,
1833                         metaEqParams.metaBlkHeightLog2,
1834                         metaEqParams.metaBlkDepthLog2,
1835                         metaEqParams.compBlkWidthLog2,
1836                         metaEqParams.compBlkHeightLog2,
1837                         metaEqParams.compBlkDepthLog2);
1838     }
1839 
1840     return pMetaEq;
1841 }
1842 
1843 /**
1844 ************************************************************************************************************************
1845 *   Gfx9Lib::GenMetaEquation
1846 *
1847 *   @brief
1848 *       Get meta equation for cmask/htile/DCC
1849 *   @return
1850 *       N/A
1851 ************************************************************************************************************************
1852 */
GenMetaEquation(CoordEq * pMetaEq,UINT_32 maxMip,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2,ADDR2_META_FLAGS metaFlag,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 metaBlkWidthLog2,UINT_32 metaBlkHeightLog2,UINT_32 metaBlkDepthLog2,UINT_32 compBlkWidthLog2,UINT_32 compBlkHeightLog2,UINT_32 compBlkDepthLog2) const1853 VOID Gfx9Lib::GenMetaEquation(
1854     CoordEq*         pMetaEq,               ///< [out] meta equation
1855     UINT_32          maxMip,                ///< [in] max mip Id
1856     UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
1857     UINT_32          numSamplesLog2,        ///< [in] data surface sample count
1858     ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
1859     Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
1860     AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
1861     AddrResourceType resourceType,          ///< [in] data surface resource type
1862     UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
1863     UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
1864     UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
1865     UINT_32          compBlkWidthLog2,      ///< [in] compress block width
1866     UINT_32          compBlkHeightLog2,     ///< [in] compress block height
1867     UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
1868     const
1869 {
1870     UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1871     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1872 
1873     // Get the correct data address and rb equation
1874     CoordEq dataEq;
1875     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1876                     elementBytesLog2, numSamplesLog2);
1877 
1878     // Get pipe and rb equations
1879     CoordEq pipeEquation;
1880     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1881                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1882     numPipeTotalLog2 = pipeEquation.getsize();
1883 
1884     if (metaFlag.linear)
1885     {
1886         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1887         ADDR_ASSERT_ALWAYS();
1888 
1889         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1890 
1891         dataEq.copy(*pMetaEq);
1892 
1893         if (IsLinear(swizzleMode))
1894         {
1895             if (metaFlag.pipeAligned)
1896             {
1897                 // Remove the pipe bits
1898                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1899                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1900             }
1901             // Divide by comp block size, which for linear (which is always color) is 256 B
1902             pMetaEq->shift(-8);
1903 
1904             if (metaFlag.pipeAligned)
1905             {
1906                 // Put pipe bits back in
1907                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1908 
1909                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1910                 {
1911                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1912                 }
1913             }
1914         }
1915 
1916         pMetaEq->shift(1);
1917     }
1918     else
1919     {
1920         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1921         UINT_32 compFragLog2 =
1922             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1923             maxCompFragLog2 : numSamplesLog2;
1924 
1925         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1926 
1927         // Make sure the metaaddr is cleared
1928         pMetaEq->resize(0);
1929         pMetaEq->resize(27);
1930 
1931         if (IsThick(resourceType, swizzleMode))
1932         {
1933             Coordinate cx(DIM_X, 0);
1934             Coordinate cy(DIM_Y, 0);
1935             Coordinate cz(DIM_Z, 0);
1936 
1937             if (maxMip > 0)
1938             {
1939                 pMetaEq->mort3d(cy, cx, cz);
1940             }
1941             else
1942             {
1943                 pMetaEq->mort3d(cx, cy, cz);
1944             }
1945         }
1946         else
1947         {
1948             Coordinate cx(DIM_X, 0);
1949             Coordinate cy(DIM_Y, 0);
1950             Coordinate cs;
1951 
1952             if (maxMip > 0)
1953             {
1954                 pMetaEq->mort2d(cy, cx, compFragLog2);
1955             }
1956             else
1957             {
1958                 pMetaEq->mort2d(cx, cy, compFragLog2);
1959             }
1960 
1961             //------------------------------------------------------------------------------------------------------------------------
1962             // Put the compressible fragments at the lsb
1963             // the uncompressible frags will be at the msb of the micro address
1964             //------------------------------------------------------------------------------------------------------------------------
1965             for (UINT_32 s = 0; s < compFragLog2; s++)
1966             {
1967                 cs.set(DIM_S, s);
1968                 (*pMetaEq)[s].add(cs);
1969             }
1970         }
1971 
1972         // Keep a copy of the pipe equations
1973         CoordEq origPipeEquation;
1974         pipeEquation.copy(origPipeEquation);
1975 
1976         Coordinate co;
1977         // filter out everything under the compressed block size
1978         co.set(DIM_X, compBlkWidthLog2);
1979         pMetaEq->Filter('<', co, 0, DIM_X);
1980         co.set(DIM_Y, compBlkHeightLog2);
1981         pMetaEq->Filter('<', co, 0, DIM_Y);
1982         co.set(DIM_Z, compBlkDepthLog2);
1983         pMetaEq->Filter('<', co, 0, DIM_Z);
1984 
1985         // For non-color, filter out sample bits
1986         if (dataSurfaceType != Gfx9DataColor)
1987         {
1988             co.set(DIM_X, 0);
1989             pMetaEq->Filter('<', co, 0, DIM_S);
1990         }
1991 
1992         // filter out everything above the metablock size
1993         co.set(DIM_X, metaBlkWidthLog2 - 1);
1994         pMetaEq->Filter('>', co, 0, DIM_X);
1995         co.set(DIM_Y, metaBlkHeightLog2 - 1);
1996         pMetaEq->Filter('>', co, 0, DIM_Y);
1997         co.set(DIM_Z, metaBlkDepthLog2 - 1);
1998         pMetaEq->Filter('>', co, 0, DIM_Z);
1999 
2000         // filter out everything above the metablock size for the channel bits
2001         co.set(DIM_X, metaBlkWidthLog2 - 1);
2002         pipeEquation.Filter('>', co, 0, DIM_X);
2003         co.set(DIM_Y, metaBlkHeightLog2 - 1);
2004         pipeEquation.Filter('>', co, 0, DIM_Y);
2005         co.set(DIM_Z, metaBlkDepthLog2 - 1);
2006         pipeEquation.Filter('>', co, 0, DIM_Z);
2007 
2008         // Make sure we still have the same number of channel bits
2009         if (pipeEquation.getsize() != numPipeTotalLog2)
2010         {
2011             ADDR_ASSERT_ALWAYS();
2012         }
2013 
2014         // Loop through all channel and rb bits,
2015         // and make sure these components exist in the metadata address
2016         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2017         {
2018             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
2019             {
2020                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
2021                 {
2022                     ADDR_ASSERT_ALWAYS();
2023                 }
2024             }
2025         }
2026 
2027         const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
2028         const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
2029         const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
2030         CoordEq       origRbEquation;
2031 
2032         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
2033 
2034         CoordEq rbEquation = origRbEquation;
2035 
2036         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2037         {
2038             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
2039             {
2040                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
2041                 {
2042                     ADDR_ASSERT_ALWAYS();
2043                 }
2044             }
2045         }
2046 
2047         if (m_settings.applyAliasFix)
2048         {
2049             co.set(DIM_Z, -1);
2050         }
2051 
2052         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
2053         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2054         {
2055             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
2056             {
2057                 BOOL_32 isRbEquationInPipeEquation = FALSE;
2058 
2059                 if (m_settings.applyAliasFix)
2060                 {
2061                     CoordTerm filteredPipeEq;
2062                     filteredPipeEq = pipeEquation[j];
2063 
2064                     filteredPipeEq.Filter('>', co, 0, DIM_Z);
2065 
2066                     isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
2067                 }
2068                 else
2069                 {
2070                     isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
2071                 }
2072 
2073                 if (isRbEquationInPipeEquation)
2074                 {
2075                     rbEquation[i].Clear();
2076                 }
2077             }
2078         }
2079 
2080          bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
2081 
2082         // Loop through each bit of the channel, get the smallest coordinate,
2083         // and remove it from the metaaddr, and rb_equation
2084         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2085         {
2086             pipeEquation[i].getsmallest(co);
2087 
2088             UINT_32 old_size = pMetaEq->getsize();
2089             pMetaEq->Filter('=', co);
2090             UINT_32 new_size = pMetaEq->getsize();
2091             if (new_size != old_size-1)
2092             {
2093                 ADDR_ASSERT_ALWAYS();
2094             }
2095             pipeEquation.remove(co);
2096             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2097             {
2098                 if (rbEquation[j].remove(co))
2099                 {
2100                     // if we actually removed something from this bit, then add the remaining
2101                     // channel bits, as these can be removed for this bit
2102                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2103                     {
2104                         if (pipeEquation[i][k] != co)
2105                         {
2106                             rbEquation[j].add(pipeEquation[i][k]);
2107                             rbAppendedWithPipeBits[j] = true;
2108                         }
2109                     }
2110                 }
2111             }
2112         }
2113 
2114         // Loop through the rb bits and see what remain;
2115         // filter out the smallest coordinate if it remains
2116         UINT_32 rbBitsLeft = 0;
2117         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2118         {
2119             BOOL_32 isRbEqAppended = FALSE;
2120 
2121             if (m_settings.applyAliasFix)
2122             {
2123                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2124             }
2125             else
2126             {
2127                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2128             }
2129 
2130             if (isRbEqAppended)
2131             {
2132                 rbBitsLeft++;
2133                 rbEquation[i].getsmallest(co);
2134                 UINT_32 old_size = pMetaEq->getsize();
2135                 pMetaEq->Filter('=', co);
2136                 UINT_32 new_size = pMetaEq->getsize();
2137                 if (new_size != old_size - 1)
2138                 {
2139                     // assert warning
2140                 }
2141                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2142                 {
2143                     if (rbEquation[j].remove(co))
2144                     {
2145                         // if we actually removed something from this bit, then add the remaining
2146                         // rb bits, as these can be removed for this bit
2147                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2148                         {
2149                             if (rbEquation[i][k] != co)
2150                             {
2151                                 rbEquation[j].add(rbEquation[i][k]);
2152                                 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2153                             }
2154                         }
2155                     }
2156                 }
2157             }
2158         }
2159 
2160         // capture the size of the metaaddr
2161         UINT_32 metaSize = pMetaEq->getsize();
2162         // resize to 49 bits...make this a nibble address
2163         pMetaEq->resize(49);
2164         // Concatenate the macro address above the current address
2165         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2166         {
2167             co.set(DIM_M, j);
2168             (*pMetaEq)[i].add(co);
2169         }
2170 
2171         // Multiply by meta element size (in nibbles)
2172         if (dataSurfaceType == Gfx9DataColor)
2173         {
2174             pMetaEq->shift(1);
2175         }
2176         else if (dataSurfaceType == Gfx9DataDepthStencil)
2177         {
2178             pMetaEq->shift(3);
2179         }
2180 
2181         //------------------------------------------------------------------------------------------
2182         // Note the pipeInterleaveLog2+1 is because address is a nibble address
2183         // Shift up from pipe interleave number of channel
2184         // and rb bits left, and uncompressed fragments
2185         //------------------------------------------------------------------------------------------
2186 
2187         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2188 
2189         // Put in the channel bits
2190         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2191         {
2192             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2193         }
2194 
2195         // Put in remaining rb bits
2196         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2197         {
2198             BOOL_32 isRbEqAppended = FALSE;
2199 
2200             if (m_settings.applyAliasFix)
2201             {
2202                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2203             }
2204             else
2205             {
2206                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2207             }
2208 
2209             if (isRbEqAppended)
2210             {
2211                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2212                 // Mark any rb bit we add in to the rb mask
2213                 j++;
2214             }
2215         }
2216 
2217         //------------------------------------------------------------------------------------------
2218         // Put in the uncompressed fragment bits
2219         //------------------------------------------------------------------------------------------
2220         for (UINT_32 i = 0; i < uncompFragLog2; i++)
2221         {
2222             co.set(DIM_S, compFragLog2 + i);
2223             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2224         }
2225     }
2226 }
2227 
2228 /**
2229 ************************************************************************************************************************
2230 *   Gfx9Lib::IsEquationSupported
2231 *
2232 *   @brief
2233 *       Check if equation is supported for given swizzle mode and resource type.
2234 *
2235 *   @return
2236 *       TRUE if supported
2237 ************************************************************************************************************************
2238 */
IsEquationSupported(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2) const2239 BOOL_32 Gfx9Lib::IsEquationSupported(
2240     AddrResourceType rsrcType,
2241     AddrSwizzleMode  swMode,
2242     UINT_32          elementBytesLog2) const
2243 {
2244     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2245                         (IsValidSwMode(swMode) == TRUE) &&
2246                         (IsLinear(swMode) == FALSE) &&
2247                         (((IsTex2d(rsrcType) == TRUE) &&
2248                           ((elementBytesLog2 < 4) ||
2249                            ((IsRotateSwizzle(swMode) == FALSE) &&
2250                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
2251                          ((IsTex3d(rsrcType) == TRUE) &&
2252                           (IsRotateSwizzle(swMode) == FALSE) &&
2253                           (IsBlock256b(swMode) == FALSE)));
2254 
2255     return supported;
2256 }
2257 
2258 /**
2259 ************************************************************************************************************************
2260 *   Gfx9Lib::InitEquationTable
2261 *
2262 *   @brief
2263 *       Initialize Equation table.
2264 *
2265 *   @return
2266 *       N/A
2267 ************************************************************************************************************************
2268 */
InitEquationTable()2269 VOID Gfx9Lib::InitEquationTable()
2270 {
2271     memset(m_equationTable, 0, sizeof(m_equationTable));
2272 
2273     // Loop all possible resource type (2D/3D)
2274     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2275     {
2276         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2277 
2278         // Loop all possible swizzle mode
2279         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2280         {
2281             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2282 
2283             // Loop all possible bpp
2284             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2285             {
2286                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2287 
2288                 // Check if the input is supported
2289                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2290                 {
2291                     ADDR_EQUATION     equation;
2292                     ADDR_E_RETURNCODE retCode;
2293 
2294                     memset(&equation, 0, sizeof(ADDR_EQUATION));
2295 
2296                     // Generate the equation
2297                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2298                     {
2299                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2300                     }
2301                     else if (IsThin(rsrcType, swMode))
2302                     {
2303                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2304                     }
2305                     else
2306                     {
2307                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2308                     }
2309 
2310                     // Only fill the equation into the table if the return code is ADDR_OK,
2311                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2312                     // a valid input, we do nothing but just fill invalid equation index
2313                     // into the lookup table.
2314                     if (retCode == ADDR_OK)
2315                     {
2316                         equationIndex = m_numEquations;
2317                         ADDR_ASSERT(equationIndex < EquationTableSize);
2318 
2319                         m_equationTable[equationIndex] = equation;
2320 
2321                         m_numEquations++;
2322                     }
2323                     else
2324                     {
2325                         ADDR_ASSERT_ALWAYS();
2326                     }
2327                 }
2328 
2329                 // Fill the index into the lookup table, if the combination is not supported
2330                 // fill the invalid equation index
2331                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2332             }
2333         }
2334     }
2335 }
2336 
2337 /**
2338 ************************************************************************************************************************
2339 *   Gfx9Lib::HwlGetEquationIndex
2340 *
2341 *   @brief
2342 *       Interface function stub of GetEquationIndex
2343 *
2344 *   @return
2345 *       ADDR_E_RETURNCODE
2346 ************************************************************************************************************************
2347 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2348 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2349     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2350     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2351     ) const
2352 {
2353     AddrResourceType rsrcType         = pIn->resourceType;
2354     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2355     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2356     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2357 
2358     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2359     {
2360         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2361         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2362 
2363         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2364     }
2365 
2366     if (pOut->pMipInfo != NULL)
2367     {
2368         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2369         {
2370             pOut->pMipInfo[i].equationIndex = index;
2371         }
2372     }
2373 
2374     return index;
2375 }
2376 
2377 /**
2378 ************************************************************************************************************************
2379 *   Gfx9Lib::HwlComputeBlock256Equation
2380 *
2381 *   @brief
2382 *       Interface function stub of ComputeBlock256Equation
2383 *
2384 *   @return
2385 *       ADDR_E_RETURNCODE
2386 ************************************************************************************************************************
2387 */
HwlComputeBlock256Equation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2388 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2389     AddrResourceType rsrcType,
2390     AddrSwizzleMode  swMode,
2391     UINT_32          elementBytesLog2,
2392     ADDR_EQUATION*   pEquation) const
2393 {
2394     ADDR_E_RETURNCODE ret = ADDR_OK;
2395 
2396     pEquation->numBits = 8;
2397     pEquation->numBitComponents = 1;
2398 
2399     UINT_32 i = 0;
2400     for (; i < elementBytesLog2; i++)
2401     {
2402         InitChannel(1, 0 , i, &pEquation->addr[i]);
2403     }
2404 
2405     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2406 
2407     const UINT_32 maxBitsUsed = 4;
2408     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2409     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2410 
2411     for (i = 0; i < maxBitsUsed; i++)
2412     {
2413         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2414         InitChannel(1, 1, i, &y[i]);
2415     }
2416 
2417     if (IsStandardSwizzle(rsrcType, swMode))
2418     {
2419         switch (elementBytesLog2)
2420         {
2421             case 0:
2422                 pixelBit[0] = x[0];
2423                 pixelBit[1] = x[1];
2424                 pixelBit[2] = x[2];
2425                 pixelBit[3] = x[3];
2426                 pixelBit[4] = y[0];
2427                 pixelBit[5] = y[1];
2428                 pixelBit[6] = y[2];
2429                 pixelBit[7] = y[3];
2430                 break;
2431             case 1:
2432                 pixelBit[0] = x[0];
2433                 pixelBit[1] = x[1];
2434                 pixelBit[2] = x[2];
2435                 pixelBit[3] = y[0];
2436                 pixelBit[4] = y[1];
2437                 pixelBit[5] = y[2];
2438                 pixelBit[6] = x[3];
2439                 break;
2440             case 2:
2441                 pixelBit[0] = x[0];
2442                 pixelBit[1] = x[1];
2443                 pixelBit[2] = y[0];
2444                 pixelBit[3] = y[1];
2445                 pixelBit[4] = y[2];
2446                 pixelBit[5] = x[2];
2447                 break;
2448             case 3:
2449                 pixelBit[0] = x[0];
2450                 pixelBit[1] = y[0];
2451                 pixelBit[2] = y[1];
2452                 pixelBit[3] = x[1];
2453                 pixelBit[4] = x[2];
2454                 break;
2455             case 4:
2456                 pixelBit[0] = y[0];
2457                 pixelBit[1] = y[1];
2458                 pixelBit[2] = x[0];
2459                 pixelBit[3] = x[1];
2460                 break;
2461             default:
2462                 ADDR_ASSERT_ALWAYS();
2463                 ret = ADDR_INVALIDPARAMS;
2464                 break;
2465         }
2466     }
2467     else if (IsDisplaySwizzle(rsrcType, swMode))
2468     {
2469         switch (elementBytesLog2)
2470         {
2471             case 0:
2472                 pixelBit[0] = x[0];
2473                 pixelBit[1] = x[1];
2474                 pixelBit[2] = x[2];
2475                 pixelBit[3] = y[1];
2476                 pixelBit[4] = y[0];
2477                 pixelBit[5] = y[2];
2478                 pixelBit[6] = x[3];
2479                 pixelBit[7] = y[3];
2480                 break;
2481             case 1:
2482                 pixelBit[0] = x[0];
2483                 pixelBit[1] = x[1];
2484                 pixelBit[2] = x[2];
2485                 pixelBit[3] = y[0];
2486                 pixelBit[4] = y[1];
2487                 pixelBit[5] = y[2];
2488                 pixelBit[6] = x[3];
2489                 break;
2490             case 2:
2491                 pixelBit[0] = x[0];
2492                 pixelBit[1] = x[1];
2493                 pixelBit[2] = y[0];
2494                 pixelBit[3] = x[2];
2495                 pixelBit[4] = y[1];
2496                 pixelBit[5] = y[2];
2497                 break;
2498             case 3:
2499                 pixelBit[0] = x[0];
2500                 pixelBit[1] = y[0];
2501                 pixelBit[2] = x[1];
2502                 pixelBit[3] = x[2];
2503                 pixelBit[4] = y[1];
2504                 break;
2505             case 4:
2506                 pixelBit[0] = x[0];
2507                 pixelBit[1] = y[0];
2508                 pixelBit[2] = x[1];
2509                 pixelBit[3] = y[1];
2510                 break;
2511             default:
2512                 ADDR_ASSERT_ALWAYS();
2513                 ret = ADDR_INVALIDPARAMS;
2514                 break;
2515         }
2516     }
2517     else if (IsRotateSwizzle(swMode))
2518     {
2519         switch (elementBytesLog2)
2520         {
2521             case 0:
2522                 pixelBit[0] = y[0];
2523                 pixelBit[1] = y[1];
2524                 pixelBit[2] = y[2];
2525                 pixelBit[3] = x[1];
2526                 pixelBit[4] = x[0];
2527                 pixelBit[5] = x[2];
2528                 pixelBit[6] = x[3];
2529                 pixelBit[7] = y[3];
2530                 break;
2531             case 1:
2532                 pixelBit[0] = y[0];
2533                 pixelBit[1] = y[1];
2534                 pixelBit[2] = y[2];
2535                 pixelBit[3] = x[0];
2536                 pixelBit[4] = x[1];
2537                 pixelBit[5] = x[2];
2538                 pixelBit[6] = x[3];
2539                 break;
2540             case 2:
2541                 pixelBit[0] = y[0];
2542                 pixelBit[1] = y[1];
2543                 pixelBit[2] = x[0];
2544                 pixelBit[3] = y[2];
2545                 pixelBit[4] = x[1];
2546                 pixelBit[5] = x[2];
2547                 break;
2548             case 3:
2549                 pixelBit[0] = y[0];
2550                 pixelBit[1] = x[0];
2551                 pixelBit[2] = y[1];
2552                 pixelBit[3] = x[1];
2553                 pixelBit[4] = x[2];
2554                 break;
2555             default:
2556                 ADDR_ASSERT_ALWAYS();
2557             case 4:
2558                 ret = ADDR_INVALIDPARAMS;
2559                 break;
2560         }
2561     }
2562     else
2563     {
2564         ADDR_ASSERT_ALWAYS();
2565         ret = ADDR_INVALIDPARAMS;
2566     }
2567 
2568     // Post validation
2569     if (ret == ADDR_OK)
2570     {
2571         Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2572         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2573                     (microBlockDim.w * (1 << elementBytesLog2)));
2574         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2575     }
2576 
2577     return ret;
2578 }
2579 
2580 /**
2581 ************************************************************************************************************************
2582 *   Gfx9Lib::HwlComputeThinEquation
2583 *
2584 *   @brief
2585 *       Interface function stub of ComputeThinEquation
2586 *
2587 *   @return
2588 *       ADDR_E_RETURNCODE
2589 ************************************************************************************************************************
2590 */
HwlComputeThinEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2591 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2592     AddrResourceType rsrcType,
2593     AddrSwizzleMode  swMode,
2594     UINT_32          elementBytesLog2,
2595     ADDR_EQUATION*   pEquation) const
2596 {
2597     ADDR_E_RETURNCODE ret = ADDR_OK;
2598 
2599     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2600 
2601     UINT_32 maxXorBits = blockSizeLog2;
2602     if (IsNonPrtXor(swMode))
2603     {
2604         // For non-prt-xor, maybe need to initialize some more bits for xor
2605         // The highest xor bit used in equation will be max the following 3 items:
2606         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2607         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2608         // 3. blockSizeLog2
2609 
2610         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2611         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2612                                      GetPipeXorBits(blockSizeLog2) +
2613                                      2 * GetBankXorBits(blockSizeLog2));
2614     }
2615 
2616     const UINT_32 maxBitsUsed = 14;
2617     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2618     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2619     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2620 
2621     const UINT_32 extraXorBits = 16;
2622     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2623     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2624 
2625     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2626     {
2627         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2628         InitChannel(1, 1, i, &y[i]);
2629     }
2630 
2631     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2632 
2633     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2634     {
2635         InitChannel(1, 0 , i, &pixelBit[i]);
2636     }
2637 
2638     UINT_32 xIdx = 0;
2639     UINT_32 yIdx = 0;
2640     UINT_32 lowBits = 0;
2641 
2642     if (IsZOrderSwizzle(swMode))
2643     {
2644         if (elementBytesLog2 <= 3)
2645         {
2646             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2647             {
2648                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2649             }
2650 
2651             lowBits = 6;
2652         }
2653         else
2654         {
2655             ret = ADDR_INVALIDPARAMS;
2656         }
2657     }
2658     else
2659     {
2660         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2661 
2662         if (ret == ADDR_OK)
2663         {
2664             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2665             xIdx = Log2(microBlockDim.w);
2666             yIdx = Log2(microBlockDim.h);
2667             lowBits = 8;
2668         }
2669     }
2670 
2671     if (ret == ADDR_OK)
2672     {
2673         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2674         {
2675             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2676         }
2677 
2678         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2679         {
2680             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2681         }
2682 
2683         if (IsXor(swMode))
2684         {
2685             // Fill XOR bits
2686             UINT_32 pipeStart = m_pipeInterleaveLog2;
2687             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2688 
2689             UINT_32 bankStart = pipeStart + pipeXorBits;
2690             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2691 
2692             for (UINT_32 i = 0; i < pipeXorBits; i++)
2693             {
2694                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2695                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2696                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2697 
2698                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2699             }
2700 
2701             for (UINT_32 i = 0; i < bankXorBits; i++)
2702             {
2703                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2704                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2705                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2706 
2707                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2708             }
2709 
2710             if (IsPrt(swMode) == FALSE)
2711             {
2712                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2713                 {
2714                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2715                 }
2716 
2717                 for (UINT_32 i = 0; i < bankXorBits; i++)
2718                 {
2719                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2720                 }
2721             }
2722         }
2723 
2724         FillEqBitComponents(pEquation);
2725         pEquation->numBits = blockSizeLog2;
2726     }
2727 
2728     return ret;
2729 }
2730 
2731 /**
2732 ************************************************************************************************************************
2733 *   Gfx9Lib::HwlComputeThickEquation
2734 *
2735 *   @brief
2736 *       Interface function stub of ComputeThickEquation
2737 *
2738 *   @return
2739 *       ADDR_E_RETURNCODE
2740 ************************************************************************************************************************
2741 */
HwlComputeThickEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2742 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2743     AddrResourceType rsrcType,
2744     AddrSwizzleMode  swMode,
2745     UINT_32          elementBytesLog2,
2746     ADDR_EQUATION*   pEquation) const
2747 {
2748     ADDR_E_RETURNCODE ret = ADDR_OK;
2749 
2750     ADDR_ASSERT(IsTex3d(rsrcType));
2751 
2752     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2753 
2754     UINT_32 maxXorBits = blockSizeLog2;
2755     if (IsNonPrtXor(swMode))
2756     {
2757         // For non-prt-xor, maybe need to initialize some more bits for xor
2758         // The highest xor bit used in equation will be max the following 3:
2759         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2760         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2761         // 3. blockSizeLog2
2762 
2763         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2764         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2765                                      GetPipeXorBits(blockSizeLog2) +
2766                                      3 * GetBankXorBits(blockSizeLog2));
2767     }
2768 
2769     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2770     {
2771         InitChannel(1, 0 , i, &pEquation->addr[i]);
2772     }
2773 
2774     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2775 
2776     const UINT_32 maxBitsUsed = 12;
2777     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2778     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2779     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2780     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2781 
2782     const UINT_32 extraXorBits = 24;
2783     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2784     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2785 
2786     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2787     {
2788         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2789         InitChannel(1, 1, i, &y[i]);
2790         InitChannel(1, 2, i, &z[i]);
2791     }
2792 
2793     if (IsZOrderSwizzle(swMode))
2794     {
2795         switch (elementBytesLog2)
2796         {
2797             case 0:
2798                 pixelBit[0]  = x[0];
2799                 pixelBit[1]  = y[0];
2800                 pixelBit[2]  = x[1];
2801                 pixelBit[3]  = y[1];
2802                 pixelBit[4]  = z[0];
2803                 pixelBit[5]  = z[1];
2804                 pixelBit[6]  = x[2];
2805                 pixelBit[7]  = z[2];
2806                 pixelBit[8]  = y[2];
2807                 pixelBit[9]  = x[3];
2808                 break;
2809             case 1:
2810                 pixelBit[0]  = x[0];
2811                 pixelBit[1]  = y[0];
2812                 pixelBit[2]  = x[1];
2813                 pixelBit[3]  = y[1];
2814                 pixelBit[4]  = z[0];
2815                 pixelBit[5]  = z[1];
2816                 pixelBit[6]  = z[2];
2817                 pixelBit[7]  = y[2];
2818                 pixelBit[8]  = x[2];
2819                 break;
2820             case 2:
2821                 pixelBit[0]  = x[0];
2822                 pixelBit[1]  = y[0];
2823                 pixelBit[2]  = x[1];
2824                 pixelBit[3]  = z[0];
2825                 pixelBit[4]  = y[1];
2826                 pixelBit[5]  = z[1];
2827                 pixelBit[6]  = y[2];
2828                 pixelBit[7]  = x[2];
2829                 break;
2830             case 3:
2831                 pixelBit[0]  = x[0];
2832                 pixelBit[1]  = y[0];
2833                 pixelBit[2]  = z[0];
2834                 pixelBit[3]  = x[1];
2835                 pixelBit[4]  = z[1];
2836                 pixelBit[5]  = y[1];
2837                 pixelBit[6]  = x[2];
2838                 break;
2839             case 4:
2840                 pixelBit[0]  = x[0];
2841                 pixelBit[1]  = y[0];
2842                 pixelBit[2]  = z[0];
2843                 pixelBit[3]  = z[1];
2844                 pixelBit[4]  = y[1];
2845                 pixelBit[5]  = x[1];
2846                 break;
2847             default:
2848                 ADDR_ASSERT_ALWAYS();
2849                 ret = ADDR_INVALIDPARAMS;
2850                 break;
2851         }
2852     }
2853     else if (IsStandardSwizzle(rsrcType, swMode))
2854     {
2855         switch (elementBytesLog2)
2856         {
2857             case 0:
2858                 pixelBit[0]  = x[0];
2859                 pixelBit[1]  = x[1];
2860                 pixelBit[2]  = x[2];
2861                 pixelBit[3]  = x[3];
2862                 pixelBit[4]  = y[0];
2863                 pixelBit[5]  = y[1];
2864                 pixelBit[6]  = z[0];
2865                 pixelBit[7]  = z[1];
2866                 pixelBit[8]  = z[2];
2867                 pixelBit[9]  = y[2];
2868                 break;
2869             case 1:
2870                 pixelBit[0]  = x[0];
2871                 pixelBit[1]  = x[1];
2872                 pixelBit[2]  = x[2];
2873                 pixelBit[3]  = y[0];
2874                 pixelBit[4]  = y[1];
2875                 pixelBit[5]  = z[0];
2876                 pixelBit[6]  = z[1];
2877                 pixelBit[7]  = z[2];
2878                 pixelBit[8]  = y[2];
2879                 break;
2880             case 2:
2881                 pixelBit[0]  = x[0];
2882                 pixelBit[1]  = x[1];
2883                 pixelBit[2]  = y[0];
2884                 pixelBit[3]  = y[1];
2885                 pixelBit[4]  = z[0];
2886                 pixelBit[5]  = z[1];
2887                 pixelBit[6]  = y[2];
2888                 pixelBit[7]  = x[2];
2889                 break;
2890             case 3:
2891                 pixelBit[0]  = x[0];
2892                 pixelBit[1]  = y[0];
2893                 pixelBit[2]  = y[1];
2894                 pixelBit[3]  = z[0];
2895                 pixelBit[4]  = z[1];
2896                 pixelBit[5]  = x[1];
2897                 pixelBit[6]  = x[2];
2898                 break;
2899             case 4:
2900                 pixelBit[0]  = y[0];
2901                 pixelBit[1]  = y[1];
2902                 pixelBit[2]  = z[0];
2903                 pixelBit[3]  = z[1];
2904                 pixelBit[4]  = x[0];
2905                 pixelBit[5]  = x[1];
2906                 break;
2907             default:
2908                 ADDR_ASSERT_ALWAYS();
2909                 ret = ADDR_INVALIDPARAMS;
2910                 break;
2911         }
2912     }
2913     else
2914     {
2915         ADDR_ASSERT_ALWAYS();
2916         ret = ADDR_INVALIDPARAMS;
2917     }
2918 
2919     if (ret == ADDR_OK)
2920     {
2921         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2922         UINT_32 xIdx = Log2(microBlockDim.w);
2923         UINT_32 yIdx = Log2(microBlockDim.h);
2924         UINT_32 zIdx = Log2(microBlockDim.d);
2925 
2926         pixelBit = pEquation->addr;
2927 
2928         const UINT_32 lowBits = 10;
2929         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2930         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2931 
2932         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2933         {
2934             if ((i % 3) == 0)
2935             {
2936                 pixelBit[i] = x[xIdx++];
2937             }
2938             else if ((i % 3) == 1)
2939             {
2940                 pixelBit[i] = z[zIdx++];
2941             }
2942             else
2943             {
2944                 pixelBit[i] = y[yIdx++];
2945             }
2946         }
2947 
2948         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2949         {
2950             if ((i % 3) == 0)
2951             {
2952                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2953             }
2954             else if ((i % 3) == 1)
2955             {
2956                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2957             }
2958             else
2959             {
2960                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2961             }
2962         }
2963 
2964         if (IsXor(swMode))
2965         {
2966             // Fill XOR bits
2967             UINT_32 pipeStart = m_pipeInterleaveLog2;
2968             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2969             for (UINT_32 i = 0; i < pipeXorBits; i++)
2970             {
2971                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2972                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2973                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2974 
2975                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2976 
2977                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2978                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2979                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2980 
2981                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2982             }
2983 
2984             UINT_32 bankStart = pipeStart + pipeXorBits;
2985             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2986             for (UINT_32 i = 0; i < bankXorBits; i++)
2987             {
2988                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2989                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2990                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2991 
2992                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2993 
2994                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2995                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2996                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2997 
2998                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2999             }
3000         }
3001 
3002         FillEqBitComponents(pEquation);
3003         pEquation->numBits = blockSizeLog2;
3004     }
3005 
3006     return ret;
3007 }
3008 
3009 /**
3010 ************************************************************************************************************************
3011 *   Gfx9Lib::IsValidDisplaySwizzleMode
3012 *
3013 *   @brief
3014 *       Check if a swizzle mode is supported by display engine
3015 *
3016 *   @return
3017 *       TRUE is swizzle mode is supported by display engine
3018 ************************************************************************************************************************
3019 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3020 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
3021     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3022 {
3023     BOOL_32 support = FALSE;
3024 
3025     const UINT_32 swizzleMask = 1 << pIn->swizzleMode;
3026 
3027     if (m_settings.isDce12)
3028     {
3029         if (pIn->bpp == 32)
3030         {
3031             support = (Dce12Bpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3032         }
3033         else if (pIn->bpp <= 64)
3034         {
3035             support = (Dce12NonBpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3036         }
3037     }
3038     else if (m_settings.isDcn1)
3039     {
3040         if (pIn->bpp < 64)
3041         {
3042             support = (Dcn1NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3043         }
3044         else if (pIn->bpp == 64)
3045         {
3046             support = (Dcn1Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3047         }
3048     }
3049     else if (m_settings.isDcn2)
3050     {
3051         if (pIn->bpp < 64)
3052         {
3053             support = (Dcn2NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3054         }
3055         else if (pIn->bpp == 64)
3056         {
3057             support = (Dcn2Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3058         }
3059     }
3060     else
3061     {
3062         ADDR_NOT_IMPLEMENTED();
3063     }
3064 
3065     return support;
3066 }
3067 
3068 /**
3069 ************************************************************************************************************************
3070 *   Gfx9Lib::HwlComputePipeBankXor
3071 *
3072 *   @brief
3073 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3074 *
3075 *   @return
3076 *       PipeBankXor value
3077 ************************************************************************************************************************
3078 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const3079 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3080     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3081     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
3082 {
3083     if (IsXor(pIn->swizzleMode))
3084     {
3085         UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3086         UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3087         UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3088 
3089         UINT_32 pipeXor = 0;
3090         UINT_32 bankXor = 0;
3091 
3092         const UINT_32 bankMask = (1 << bankBits) - 1;
3093         const UINT_32 index    = pIn->surfIndex & bankMask;
3094 
3095         const UINT_32 bpp      = pIn->flags.fmask ?
3096                                  GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3097         if (bankBits == 4)
3098         {
3099             static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3100             static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3101 
3102             bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3103         }
3104         else if (bankBits > 0)
3105         {
3106             UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3107             bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3108             bankXor = (index * bankIncrease) & bankMask;
3109         }
3110 
3111         pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3112     }
3113     else
3114     {
3115         pOut->pipeBankXor = 0;
3116     }
3117 
3118     return ADDR_OK;
3119 }
3120 
3121 /**
3122 ************************************************************************************************************************
3123 *   Gfx9Lib::HwlComputeSlicePipeBankXor
3124 *
3125 *   @brief
3126 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3127 *
3128 *   @return
3129 *       PipeBankXor value
3130 ************************************************************************************************************************
3131 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const3132 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3133     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3134     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
3135 {
3136     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3137     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3138     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3139 
3140     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3141     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3142 
3143     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3144 
3145     return ADDR_OK;
3146 }
3147 
3148 /**
3149 ************************************************************************************************************************
3150 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3151 *
3152 *   @brief
3153 *       Compute sub resource offset to support swizzle pattern
3154 *
3155 *   @return
3156 *       Offset
3157 ************************************************************************************************************************
3158 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const3159 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3160     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3161     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
3162 {
3163     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3164 
3165     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3166     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3167     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3168     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3169     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3170     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3171 
3172     pOut->offset = pIn->slice * pIn->sliceSize +
3173                    pIn->macroBlockOffset +
3174                    (pIn->mipTailOffset ^ pipeBankXor) -
3175                    static_cast<UINT_64>(pipeBankXor);
3176     return ADDR_OK;
3177 }
3178 
3179 /**
3180 ************************************************************************************************************************
3181 *   Gfx9Lib::ValidateNonSwModeParams
3182 *
3183 *   @brief
3184 *       Validate compute surface info params except swizzle mode
3185 *
3186 *   @return
3187 *       TRUE if parameters are valid, FALSE otherwise
3188 ************************************************************************************************************************
3189 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3190 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3191     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3192 {
3193     BOOL_32 valid = TRUE;
3194 
3195     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3196     {
3197         ADDR_ASSERT_ALWAYS();
3198         valid = FALSE;
3199     }
3200 
3201     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3202     {
3203         ADDR_ASSERT_ALWAYS();
3204         valid = FALSE;
3205     }
3206 
3207     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3208     const BOOL_32 msaa   = (pIn->numFrags > 1);
3209     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3210 
3211     const AddrResourceType rsrcType = pIn->resourceType;
3212     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3213     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3214     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3215 
3216     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3217     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3218     const BOOL_32             display = flags.display || flags.rotated;
3219     const BOOL_32             stereo  = flags.qbStereo;
3220     const BOOL_32             fmask   = flags.fmask;
3221 
3222     // Resource type check
3223     if (tex1d)
3224     {
3225         if (msaa || zbuffer || display || stereo || isBc || fmask)
3226         {
3227             ADDR_ASSERT_ALWAYS();
3228             valid = FALSE;
3229         }
3230     }
3231     else if (tex2d)
3232     {
3233         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3234         {
3235             ADDR_ASSERT_ALWAYS();
3236             valid = FALSE;
3237         }
3238     }
3239     else if (tex3d)
3240     {
3241         if (msaa || zbuffer || display || stereo || fmask)
3242         {
3243             ADDR_ASSERT_ALWAYS();
3244             valid = FALSE;
3245         }
3246     }
3247     else
3248     {
3249         ADDR_ASSERT_ALWAYS();
3250         valid = FALSE;
3251     }
3252 
3253     return valid;
3254 }
3255 
3256 /**
3257 ************************************************************************************************************************
3258 *   Gfx9Lib::ValidateSwModeParams
3259 *
3260 *   @brief
3261 *       Validate compute surface info related to swizzle mode
3262 *
3263 *   @return
3264 *       TRUE if parameters are valid, FALSE otherwise
3265 ************************************************************************************************************************
3266 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3267 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3268     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3269 {
3270     BOOL_32 valid = TRUE;
3271 
3272     if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3273     {
3274         ADDR_ASSERT_ALWAYS();
3275         valid = FALSE;
3276     }
3277 
3278     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3279     const BOOL_32 msaa   = (pIn->numFrags > 1);
3280     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3281     const BOOL_32 is422  = ElemLib::IsMacroPixelPacked(pIn->format);
3282 
3283     const AddrResourceType rsrcType = pIn->resourceType;
3284     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3285     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3286     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3287 
3288     const AddrSwizzleMode  swizzle     = pIn->swizzleMode;
3289     const BOOL_32          linear      = IsLinear(swizzle);
3290     const BOOL_32          blk256B     = IsBlock256b(swizzle);
3291     const BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
3292 
3293     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3294     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3295     const BOOL_32             color   = flags.color;
3296     const BOOL_32             texture = flags.texture;
3297     const BOOL_32             display = flags.display || flags.rotated;
3298     const BOOL_32             prt     = flags.prt;
3299     const BOOL_32             fmask   = flags.fmask;
3300 
3301     const BOOL_32             thin3d  = tex3d && flags.view3dAs2dArray;
3302     const BOOL_32             zMaxMip = tex3d && mipmap &&
3303                                         (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3304 
3305     // Misc check
3306     if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3307     {
3308         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3309         ADDR_ASSERT_ALWAYS();
3310         valid = FALSE;
3311     }
3312 
3313     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3314     {
3315         ADDR_ASSERT_ALWAYS();
3316         valid = FALSE;
3317     }
3318 
3319     if ((pIn->bpp == 96) && (linear == FALSE))
3320     {
3321         ADDR_ASSERT_ALWAYS();
3322         valid = FALSE;
3323     }
3324 
3325     if (prt && isNonPrtXor)
3326     {
3327         ADDR_ASSERT_ALWAYS();
3328         valid = FALSE;
3329     }
3330 
3331     // Resource type check
3332     if (tex1d)
3333     {
3334         if (linear == FALSE)
3335         {
3336             ADDR_ASSERT_ALWAYS();
3337             valid = FALSE;
3338         }
3339     }
3340 
3341     // Swizzle type check
3342     if (linear)
3343     {
3344         if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3345             ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3346         {
3347             ADDR_ASSERT_ALWAYS();
3348             valid = FALSE;
3349         }
3350     }
3351     else if (IsZOrderSwizzle(swizzle))
3352     {
3353         if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3354         {
3355             ADDR_ASSERT_ALWAYS();
3356             valid = FALSE;
3357         }
3358     }
3359     else if (IsStandardSwizzle(swizzle))
3360     {
3361         if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3362         {
3363             ADDR_ASSERT_ALWAYS();
3364             valid = FALSE;
3365         }
3366     }
3367     else if (IsDisplaySwizzle(swizzle))
3368     {
3369         if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3370         {
3371             ADDR_ASSERT_ALWAYS();
3372             valid = FALSE;
3373         }
3374     }
3375     else if (IsRotateSwizzle(swizzle))
3376     {
3377         if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3378         {
3379             ADDR_ASSERT_ALWAYS();
3380             valid = FALSE;
3381         }
3382     }
3383     else
3384     {
3385         ADDR_ASSERT_ALWAYS();
3386         valid = FALSE;
3387     }
3388 
3389     // Block type check
3390     if (blk256B)
3391     {
3392         if (prt || zbuffer || tex3d || mipmap || msaa)
3393         {
3394             ADDR_ASSERT_ALWAYS();
3395             valid = FALSE;
3396         }
3397     }
3398 
3399     return valid;
3400 }
3401 
3402 /**
3403 ************************************************************************************************************************
3404 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3405 *
3406 *   @brief
3407 *       Compute surface info sanity check
3408 *
3409 *   @return
3410 *       ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3411 ************************************************************************************************************************
3412 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3413 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3414     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3415 {
3416     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3417 }
3418 
3419 /**
3420 ************************************************************************************************************************
3421 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3422 *
3423 *   @brief
3424 *       Internal function to get suggested surface information for cliet to use
3425 *
3426 *   @return
3427 *       ADDR_E_RETURNCODE
3428 ************************************************************************************************************************
3429 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3430 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3431     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3432     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3433 {
3434     ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3435     ElemLib*          pElemLib   = GetElemLib();
3436 
3437     UINT_32 bpp        = pIn->bpp;
3438     UINT_32 width      = Max(pIn->width, 1u);
3439     UINT_32 height     = Max(pIn->height, 1u);
3440     UINT_32 numSamples = Max(pIn->numSamples, 1u);
3441     UINT_32 numFrags   = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3442 
3443     if (pIn->flags.fmask)
3444     {
3445         bpp                = GetFmaskBpp(numSamples, numFrags);
3446         numFrags           = 1;
3447         numSamples         = 1;
3448         pOut->resourceType = ADDR_RSRC_TEX_2D;
3449     }
3450     else
3451     {
3452         // Set format to INVALID will skip this conversion
3453         if (pIn->format != ADDR_FMT_INVALID)
3454         {
3455             UINT_32 expandX, expandY;
3456 
3457             // Don't care for this case
3458             ElemMode elemMode = ADDR_UNCOMPRESSED;
3459 
3460             // Get compression/expansion factors and element mode which indicates compression/expansion
3461             bpp = pElemLib->GetBitsPerPixel(pIn->format,
3462                                             &elemMode,
3463                                             &expandX,
3464                                             &expandY);
3465 
3466             UINT_32 basePitch = 0;
3467             GetElemLib()->AdjustSurfaceInfo(elemMode,
3468                                             expandX,
3469                                             expandY,
3470                                             &bpp,
3471                                             &basePitch,
3472                                             &width,
3473                                             &height);
3474         }
3475 
3476         // The output may get changed for volume(3D) texture resource in future
3477         pOut->resourceType = pIn->resourceType;
3478     }
3479 
3480     const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
3481     const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3482     const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
3483     const BOOL_32 displayRsrc  = pIn->flags.display || pIn->flags.rotated;
3484 
3485     // Pre sanity check on non swizzle mode parameters
3486     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3487     localIn.flags        = pIn->flags;
3488     localIn.resourceType = pOut->resourceType;
3489     localIn.format       = pIn->format;
3490     localIn.bpp          = bpp;
3491     localIn.width        = width;
3492     localIn.height       = height;
3493     localIn.numSlices    = numSlices;
3494     localIn.numMipLevels = numMipLevels;
3495     localIn.numSamples   = numSamples;
3496     localIn.numFrags     = numFrags;
3497 
3498     if (ValidateNonSwModeParams(&localIn))
3499     {
3500         // Forbid swizzle mode(s) by client setting
3501         ADDR2_SWMODE_SET allowedSwModeSet = {};
3502         allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3503         allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx9Blk256BSwModeMask;
3504         allowedSwModeSet.value |=
3505             pIn->forbiddenBlock.macroThin4KB ? 0 :
3506             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3507         allowedSwModeSet.value |=
3508             pIn->forbiddenBlock.macroThick4KB ? 0 :
3509             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3510         allowedSwModeSet.value |=
3511             pIn->forbiddenBlock.macroThin64KB ? 0 :
3512             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3513         allowedSwModeSet.value |=
3514             pIn->forbiddenBlock.macroThick64KB ? 0 :
3515             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3516 
3517         if (pIn->preferredSwSet.value != 0)
3518         {
3519             allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3520             allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3521             allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3522             allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3523         }
3524 
3525         if (pIn->noXor)
3526         {
3527             allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3528         }
3529 
3530         if (pIn->maxAlign > 0)
3531         {
3532             if (pIn->maxAlign < Size64K)
3533             {
3534                 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3535             }
3536 
3537             if (pIn->maxAlign < Size4K)
3538             {
3539                 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3540             }
3541 
3542             if (pIn->maxAlign < Size256)
3543             {
3544                 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3545             }
3546         }
3547 
3548         // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3549         switch (pOut->resourceType)
3550         {
3551             case ADDR_RSRC_TEX_1D:
3552                 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3553                 break;
3554 
3555             case ADDR_RSRC_TEX_2D:
3556                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3557 
3558                 if (bpp > 64)
3559                 {
3560                     allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3561                 }
3562                 break;
3563 
3564             case ADDR_RSRC_TEX_3D:
3565                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3566 
3567                 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3568                 {
3569                     // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3570                     // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3571                     // or SW_*_Z modes if mipmapping is desired on a 3D surface
3572                     allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3573                 }
3574 
3575                 if ((bpp == 128) && pIn->flags.color)
3576                 {
3577                     allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3578                 }
3579 
3580                 if (pIn->flags.view3dAs2dArray)
3581                 {
3582                     allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3583                 }
3584                 break;
3585 
3586             default:
3587                 ADDR_ASSERT_ALWAYS();
3588                 allowedSwModeSet.value = 0;
3589                 break;
3590         }
3591 
3592         if (pIn->format == ADDR_FMT_32_32_32)
3593         {
3594             allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3595         }
3596 
3597         if (ElemLib::IsBlockCompressed(pIn->format))
3598         {
3599             if (pIn->flags.texture)
3600             {
3601                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3602             }
3603             else
3604             {
3605                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3606             }
3607         }
3608 
3609         if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3610             (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3611         {
3612             allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3613         }
3614 
3615         if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3616         {
3617             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3618 
3619             if (pIn->flags.noMetadata == FALSE)
3620             {
3621                 if (pIn->flags.depth &&
3622                     pIn->flags.texture &&
3623                     (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3624                 {
3625                     // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3626                     // equation from wrong address within memory range a tile covered and use the
3627                     // garbage data for compressed Z reading which finally leads to corruption.
3628                     allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3629                 }
3630 
3631                 if (m_settings.htileCacheRbConflict &&
3632                     (pIn->flags.depth || pIn->flags.stencil) &&
3633                     (numSlices > 1) &&
3634                     (pIn->flags.metaRbUnaligned == FALSE) &&
3635                     (pIn->flags.metaPipeUnaligned == FALSE))
3636                 {
3637                     // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3638                     allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3639                 }
3640             }
3641         }
3642 
3643         if (msaa)
3644         {
3645             allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3646         }
3647 
3648         if ((numFrags > 1) &&
3649             (Size4K < (m_pipeInterleaveBytes * numFrags)))
3650         {
3651             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3652             allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3653         }
3654 
3655         if (numMipLevels > 1)
3656         {
3657             allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3658         }
3659 
3660         if (displayRsrc)
3661         {
3662             if (m_settings.isDce12)
3663             {
3664                 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3665             }
3666             else if (m_settings.isDcn1)
3667             {
3668                 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3669             }
3670             else if (m_settings.isDcn2)
3671             {
3672                 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
3673             }
3674             else
3675             {
3676                 ADDR_NOT_IMPLEMENTED();
3677             }
3678         }
3679 
3680         if (allowedSwModeSet.value != 0)
3681         {
3682 #if DEBUG
3683             // Post sanity check, at least AddrLib should accept the output generated by its own
3684             UINT_32 validateSwModeSet = allowedSwModeSet.value;
3685 
3686             for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3687             {
3688                 if (validateSwModeSet & 1)
3689                 {
3690                     localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3691                     ADDR_ASSERT(ValidateSwModeParams(&localIn));
3692                 }
3693 
3694                 validateSwModeSet >>= 1;
3695             }
3696 #endif
3697 
3698             pOut->validSwModeSet = allowedSwModeSet;
3699             pOut->canXor         = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3700             pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3701             pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3702 
3703             pOut->clientPreferredSwSet = pIn->preferredSwSet;
3704 
3705             if (pOut->clientPreferredSwSet.value == 0)
3706             {
3707                 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3708             }
3709 
3710             // Apply optional restrictions
3711             if (pIn->flags.needEquation)
3712             {
3713                 UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
3714                                                                     ADDR_MAX_LEGACY_EQUATION_COMP;
3715                 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
3716             }
3717 
3718             if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3719             {
3720                 pOut->swizzleMode = ADDR_SW_LINEAR;
3721             }
3722             else
3723             {
3724                 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3725 
3726                 if ((height > 1) && (computeMinSize == FALSE))
3727                 {
3728                     // Always ignore linear swizzle mode if:
3729                     // 1. This is a (2D/3D) resource with height > 1
3730                     // 2. Client doesn't require computing minimize size
3731                     allowedSwModeSet.swLinear = 0;
3732                 }
3733 
3734                 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3735 
3736                 // Determine block size if there are 2 or more block type candidates
3737                 if (IsPow2(allowedBlockSet.value) == FALSE)
3738                 {
3739                     AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3740 
3741                     swMode[AddrBlockLinear]   = ADDR_SW_LINEAR;
3742                     swMode[AddrBlockMicro]    = ADDR_SW_256B_D;
3743                     swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_D;
3744                     swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3745 
3746                     if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3747                     {
3748                         swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
3749                         swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3750                     }
3751 
3752                     UINT_64 padSize[AddrBlockMaxTiledType] = {};
3753 
3754                     const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3755                     const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3756                     const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3757                     UINT_32       minSizeBlk         = AddrBlockMicro;
3758                     UINT_64       minSize            = 0;
3759 
3760                     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3761 
3762                     for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3763                     {
3764                         if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3765                         {
3766                             localIn.swizzleMode = swMode[i];
3767 
3768                             if (localIn.swizzleMode == ADDR_SW_LINEAR)
3769                             {
3770                                 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3771                             }
3772                             else
3773                             {
3774                                 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3775                             }
3776 
3777                             if (returnCode == ADDR_OK)
3778                             {
3779                                 padSize[i] = localOut.surfSize;
3780 
3781                                 if ((minSize == 0) ||
3782                                     Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
3783                                 {
3784                                     minSize    = padSize[i];
3785                                     minSizeBlk = i;
3786                                 }
3787                             }
3788                             else
3789                             {
3790                                 ADDR_ASSERT_ALWAYS();
3791                                 break;
3792                             }
3793                         }
3794                     }
3795 
3796                     if (pIn->memoryBudget > 1.0)
3797                     {
3798                         // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3799                         // smaller-block type again in coming loop
3800                         switch (minSizeBlk)
3801                         {
3802                             case AddrBlockThick64KB:
3803                                 allowedBlockSet.macroThin64KB = 0;
3804                             case AddrBlockThin64KB:
3805                                 allowedBlockSet.macroThick4KB = 0;
3806                             case AddrBlockThick4KB:
3807                                 allowedBlockSet.macroThin4KB = 0;
3808                             case AddrBlockThin4KB:
3809                                 allowedBlockSet.micro  = 0;
3810                             case AddrBlockMicro:
3811                                 allowedBlockSet.linear = 0;
3812                             case AddrBlockLinear:
3813                                 break;
3814 
3815                             default:
3816                                 ADDR_ASSERT_ALWAYS();
3817                                 break;
3818                         }
3819 
3820                         for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3821                         {
3822                             if ((i != minSizeBlk) &&
3823                                 Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3824                             {
3825                                 if (Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
3826                                 {
3827                                     // Clear the block type if the memory waste is unacceptable
3828                                     allowedBlockSet.value &= ~(1u << (i - 1));
3829                                 }
3830                             }
3831                         }
3832 
3833                         // Remove linear block type if 2 or more block types are allowed
3834                         if (IsPow2(allowedBlockSet.value) == FALSE)
3835                         {
3836                             allowedBlockSet.linear = 0;
3837                         }
3838 
3839                         // Select the biggest allowed block type
3840                         minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3841 
3842                         if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3843                         {
3844                             minSizeBlk = AddrBlockLinear;
3845                         }
3846                     }
3847 
3848                     switch (minSizeBlk)
3849                     {
3850                         case AddrBlockLinear:
3851                             allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3852                             break;
3853 
3854                         case AddrBlockMicro:
3855                             ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3856                             allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3857                             break;
3858 
3859                         case AddrBlockThin4KB:
3860                             allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3861                                                       Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3862                             break;
3863 
3864                         case AddrBlockThick4KB:
3865                             ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3866                             allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3867                             break;
3868 
3869                         case AddrBlockThin64KB:
3870                             allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3871                                                       Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3872                             break;
3873 
3874                         case AddrBlockThick64KB:
3875                             ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3876                             allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3877                             break;
3878 
3879                         default:
3880                             ADDR_ASSERT_ALWAYS();
3881                             allowedSwModeSet.value = 0;
3882                             break;
3883                     }
3884                 }
3885 
3886                 // Block type should be determined.
3887                 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3888 
3889                 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3890 
3891                 // Determine swizzle type if there are 2 or more swizzle type candidates
3892                 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3893                 {
3894                     if (ElemLib::IsBlockCompressed(pIn->format))
3895                     {
3896                         if (allowedSwSet.sw_D)
3897                         {
3898                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3899                         }
3900                         else
3901                         {
3902                             ADDR_ASSERT(allowedSwSet.sw_S);
3903                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3904                         }
3905                     }
3906                     else if (ElemLib::IsMacroPixelPacked(pIn->format))
3907                     {
3908                         if (allowedSwSet.sw_S)
3909                         {
3910                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3911                         }
3912                         else if (allowedSwSet.sw_D)
3913                         {
3914                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3915                         }
3916                         else
3917                         {
3918                             ADDR_ASSERT(allowedSwSet.sw_R);
3919                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3920                         }
3921                     }
3922                     else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3923                     {
3924                         if (pIn->flags.color && allowedSwSet.sw_D)
3925                         {
3926                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3927                         }
3928                         else if (allowedSwSet.sw_Z)
3929                         {
3930                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3931                         }
3932                         else
3933                         {
3934                             ADDR_ASSERT(allowedSwSet.sw_S);
3935                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3936                         }
3937                     }
3938                     else
3939                     {
3940                         if (pIn->flags.rotated && allowedSwSet.sw_R)
3941                         {
3942                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3943                         }
3944                         else if (allowedSwSet.sw_D)
3945                         {
3946                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3947                         }
3948                         else if (allowedSwSet.sw_S)
3949                         {
3950                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3951                         }
3952                         else
3953                         {
3954                             ADDR_ASSERT(allowedSwSet.sw_Z);
3955                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3956                         }
3957                     }
3958 
3959                     // Swizzle type should be determined.
3960                     ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3961                 }
3962 
3963                 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3964                 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3965                 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3966                 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3967             }
3968 
3969             returnCode = ADDR_OK;
3970         }
3971         else
3972         {
3973             // Invalid combination...
3974             ADDR_ASSERT_ALWAYS();
3975         }
3976     }
3977     else
3978     {
3979         // Invalid combination...
3980         ADDR_ASSERT_ALWAYS();
3981     }
3982 
3983     return returnCode;
3984 }
3985 
3986 /**
3987 ************************************************************************************************************************
3988 *   Gfx9Lib::ComputeStereoInfo
3989 *
3990 *   @brief
3991 *       Compute height alignment and right eye pipeBankXor for stereo surface
3992 *
3993 *   @return
3994 *       Error code
3995 *
3996 ************************************************************************************************************************
3997 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 * pHeightAlign) const3998 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3999     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
4000     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
4001     UINT_32*                                pHeightAlign
4002     ) const
4003 {
4004     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4005 
4006     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
4007 
4008     if (eqIndex < m_numEquations)
4009     {
4010         if (IsXor(pIn->swizzleMode))
4011         {
4012             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
4013             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
4014             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
4015             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
4016             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
4017             const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];
4018 
4019             ADDR_ASSERT(maxYCoordBlock256 ==
4020                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
4021 
4022             const UINT_32 maxYCoordInBaseEquation =
4023                 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
4024 
4025             ADDR_ASSERT(maxYCoordInBaseEquation ==
4026                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
4027 
4028             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
4029 
4030             ADDR_ASSERT(maxYCoordInPipeXor ==
4031                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
4032 
4033             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
4034                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
4035 
4036             ADDR_ASSERT(maxYCoordInBankXor ==
4037                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
4038 
4039             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
4040 
4041             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
4042             {
4043                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
4044 
4045                 if (pOut->pStereoInfo != NULL)
4046                 {
4047                     pOut->pStereoInfo->rightSwizzle = 0;
4048 
4049                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
4050                     {
4051                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
4052                         {
4053                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
4054                         }
4055 
4056                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
4057                         {
4058                             pOut->pStereoInfo->rightSwizzle |=
4059                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
4060                         }
4061 
4062                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
4063                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
4064                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
4065                     }
4066                 }
4067             }
4068         }
4069     }
4070     else
4071     {
4072         ADDR_ASSERT_ALWAYS();
4073         returnCode = ADDR_ERROR;
4074     }
4075 
4076     return returnCode;
4077 }
4078 
4079 /**
4080 ************************************************************************************************************************
4081 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
4082 *
4083 *   @brief
4084 *       Internal function to calculate alignment for tiled surface
4085 *
4086 *   @return
4087 *       ADDR_E_RETURNCODE
4088 ************************************************************************************************************************
4089 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4090 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
4091      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4092      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4093      ) const
4094 {
4095     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
4096                                                                 &pOut->blockHeight,
4097                                                                 &pOut->blockSlices,
4098                                                                 pIn->bpp,
4099                                                                 pIn->numFrags,
4100                                                                 pIn->resourceType,
4101                                                                 pIn->swizzleMode);
4102 
4103     if (returnCode == ADDR_OK)
4104     {
4105         UINT_32 pitchAlignInElement = pOut->blockWidth;
4106 
4107         if ((IsTex2d(pIn->resourceType) == TRUE) &&
4108             (pIn->flags.display || pIn->flags.rotated) &&
4109             (pIn->numMipLevels <= 1) &&
4110             (pIn->numSamples <= 1) &&
4111             (pIn->numFrags <= 1))
4112         {
4113             // Display engine needs pitch align to be at least 32 pixels.
4114             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
4115         }
4116 
4117         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4118 
4119         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
4120         {
4121             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
4122             {
4123                 returnCode = ADDR_INVALIDPARAMS;
4124             }
4125             else if (pIn->pitchInElement < pOut->pitch)
4126             {
4127                 returnCode = ADDR_INVALIDPARAMS;
4128             }
4129             else
4130             {
4131                 pOut->pitch = pIn->pitchInElement;
4132             }
4133         }
4134 
4135         UINT_32 heightAlign = 0;
4136 
4137         if (pIn->flags.qbStereo)
4138         {
4139             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
4140         }
4141 
4142         if (returnCode == ADDR_OK)
4143         {
4144             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
4145 
4146             if (heightAlign > 1)
4147             {
4148                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
4149             }
4150 
4151             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
4152 
4153             pOut->epitchIsHeight   = FALSE;
4154             pOut->mipChainInTail   = FALSE;
4155             pOut->firstMipIdInTail = pIn->numMipLevels;
4156 
4157             pOut->mipChainPitch    = pOut->pitch;
4158             pOut->mipChainHeight   = pOut->height;
4159             pOut->mipChainSlice    = pOut->numSlices;
4160 
4161             if (pIn->numMipLevels > 1)
4162             {
4163                 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4164                                                          pIn->swizzleMode,
4165                                                          pIn->bpp,
4166                                                          pIn->width,
4167                                                          pIn->height,
4168                                                          pIn->numSlices,
4169                                                          pOut->blockWidth,
4170                                                          pOut->blockHeight,
4171                                                          pOut->blockSlices,
4172                                                          pIn->numMipLevels,
4173                                                          pOut->pMipInfo);
4174 
4175                 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4176 
4177                 if (endingMipId == 0)
4178                 {
4179                     const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4180                                                            pIn->swizzleMode,
4181                                                            pOut->blockWidth,
4182                                                            pOut->blockHeight,
4183                                                            pOut->blockSlices);
4184 
4185                     pOut->epitchIsHeight = TRUE;
4186                     pOut->pitch          = tailMaxDim.w;
4187                     pOut->height         = tailMaxDim.h;
4188                     pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4189                                            tailMaxDim.d : pIn->numSlices;
4190                     pOut->mipChainInTail = TRUE;
4191                 }
4192                 else
4193                 {
4194                     UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
4195                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4196 
4197                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4198                                                            pIn->swizzleMode,
4199                                                            mip0WidthInBlk,
4200                                                            mip0HeightInBlk,
4201                                                            pOut->numSlices / pOut->blockSlices);
4202                     if (majorMode == ADDR_MAJOR_Y)
4203                     {
4204                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4205 
4206                         if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4207                         {
4208                             mip1WidthInBlk++;
4209                         }
4210 
4211                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4212 
4213                         pOut->epitchIsHeight = FALSE;
4214                     }
4215                     else
4216                     {
4217                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4218 
4219                         if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4220                         {
4221                             mip1HeightInBlk++;
4222                         }
4223 
4224                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4225 
4226                         pOut->epitchIsHeight = TRUE;
4227                     }
4228                 }
4229 
4230                 if (pOut->pMipInfo != NULL)
4231                 {
4232                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4233 
4234                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4235                     {
4236                         Dim3d   mipStartPos          = {0};
4237                         UINT_32 mipTailOffsetInBytes = 0;
4238 
4239                         mipStartPos = GetMipStartPos(pIn->resourceType,
4240                                                      pIn->swizzleMode,
4241                                                      pOut->pitch,
4242                                                      pOut->height,
4243                                                      pOut->numSlices,
4244                                                      pOut->blockWidth,
4245                                                      pOut->blockHeight,
4246                                                      pOut->blockSlices,
4247                                                      i,
4248                                                      elementBytesLog2,
4249                                                      &mipTailOffsetInBytes);
4250 
4251                         UINT_32 pitchInBlock     =
4252                             pOut->mipChainPitch / pOut->blockWidth;
4253                         UINT_32 sliceInBlock     =
4254                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4255                         UINT_64 blockIndex       =
4256                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4257                         UINT_64 macroBlockOffset =
4258                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4259 
4260                         pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4261                         pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
4262                     }
4263                 }
4264             }
4265             else if (pOut->pMipInfo != NULL)
4266             {
4267                 pOut->pMipInfo[0].pitch  = pOut->pitch;
4268                 pOut->pMipInfo[0].height = pOut->height;
4269                 pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4270                 pOut->pMipInfo[0].offset = 0;
4271             }
4272 
4273             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4274                               (pIn->bpp >> 3) * pIn->numFrags;
4275             pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
4276             pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4277 
4278             if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4279                 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4280                 (pIn->flags.texture == TRUE) &&
4281                 (pIn->flags.noMetadata == FALSE) &&
4282                 (pIn->flags.metaPipeUnaligned == FALSE))
4283             {
4284                 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4285                 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4286                 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4287                 // them, which may cause invalid metadata to be fetched.
4288                 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4289             }
4290 
4291             if (pIn->flags.prt)
4292             {
4293                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4294             }
4295         }
4296     }
4297 
4298     return returnCode;
4299 }
4300 
4301 /**
4302 ************************************************************************************************************************
4303 *   Gfx9Lib::HwlComputeSurfaceInfoLinear
4304 *
4305 *   @brief
4306 *       Internal function to calculate alignment for linear surface
4307 *
4308 *   @return
4309 *       ADDR_E_RETURNCODE
4310 ************************************************************************************************************************
4311 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4312 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4313      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4314      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4315      ) const
4316 {
4317     ADDR_E_RETURNCODE returnCode   = ADDR_OK;
4318     UINT_32           pitch        = 0;
4319     UINT_32           actualHeight = 0;
4320     UINT_32           elementBytes = pIn->bpp >> 3;
4321     const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
4322 
4323     if (IsTex1d(pIn->resourceType))
4324     {
4325         if (pIn->height > 1)
4326         {
4327             returnCode = ADDR_INVALIDPARAMS;
4328         }
4329         else
4330         {
4331             const UINT_32 pitchAlignInElement = alignment / elementBytes;
4332 
4333             pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
4334             actualHeight = pIn->numMipLevels;
4335 
4336             if (pIn->flags.prt == FALSE)
4337             {
4338                 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4339                                                         &pitch, &actualHeight);
4340             }
4341 
4342             if (returnCode == ADDR_OK)
4343             {
4344                 if (pOut->pMipInfo != NULL)
4345                 {
4346                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4347                     {
4348                         pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4349                         pOut->pMipInfo[i].pitch  = pitch;
4350                         pOut->pMipInfo[i].height = 1;
4351                         pOut->pMipInfo[i].depth  = 1;
4352                     }
4353                 }
4354             }
4355         }
4356     }
4357     else
4358     {
4359         returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4360     }
4361 
4362     if ((pitch == 0) || (actualHeight == 0))
4363     {
4364         returnCode = ADDR_INVALIDPARAMS;
4365     }
4366 
4367     if (returnCode == ADDR_OK)
4368     {
4369         pOut->pitch          = pitch;
4370         pOut->height         = pIn->height;
4371         pOut->numSlices      = pIn->numSlices;
4372         pOut->mipChainPitch  = pitch;
4373         pOut->mipChainHeight = actualHeight;
4374         pOut->mipChainSlice  = pOut->numSlices;
4375         pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4376         pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4377         pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
4378         pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4379         pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4380         pOut->blockHeight    = 1;
4381         pOut->blockSlices    = 1;
4382     }
4383 
4384     // Post calculation validate
4385     ADDR_ASSERT(pOut->sliceSize > 0);
4386 
4387     return returnCode;
4388 }
4389 
4390 /**
4391 ************************************************************************************************************************
4392 *   Gfx9Lib::GetMipChainInfo
4393 *
4394 *   @brief
4395 *       Internal function to get out information about mip chain
4396 *
4397 *   @return
4398 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4399 ************************************************************************************************************************
4400 */
GetMipChainInfo(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 bpp,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 numMipLevel,ADDR2_MIP_INFO * pMipInfo) const4401 UINT_32 Gfx9Lib::GetMipChainInfo(
4402     AddrResourceType  resourceType,
4403     AddrSwizzleMode   swizzleMode,
4404     UINT_32           bpp,
4405     UINT_32           mip0Width,
4406     UINT_32           mip0Height,
4407     UINT_32           mip0Depth,
4408     UINT_32           blockWidth,
4409     UINT_32           blockHeight,
4410     UINT_32           blockDepth,
4411     UINT_32           numMipLevel,
4412     ADDR2_MIP_INFO*   pMipInfo) const
4413 {
4414     const Dim3d tailMaxDim =
4415         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4416 
4417     UINT_32 mipPitch         = mip0Width;
4418     UINT_32 mipHeight        = mip0Height;
4419     UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
4420     UINT_32 offset           = 0;
4421     UINT_32 firstMipIdInTail = numMipLevel;
4422     BOOL_32 inTail           = FALSE;
4423     BOOL_32 finalDim         = FALSE;
4424     BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
4425     BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
4426 
4427     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4428     {
4429         if (inTail)
4430         {
4431             if (finalDim == FALSE)
4432             {
4433                 UINT_32 mipSize;
4434 
4435                 if (is3dThick)
4436                 {
4437                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4438                 }
4439                 else
4440                 {
4441                     mipSize = mipPitch * mipHeight * (bpp >> 3);
4442                 }
4443 
4444                 if (mipSize <= 256)
4445                 {
4446                     UINT_32 index = Log2(bpp >> 3);
4447 
4448                     if (is3dThick)
4449                     {
4450                         mipPitch  = Block256_3dZ[index].w;
4451                         mipHeight = Block256_3dZ[index].h;
4452                         mipDepth  = Block256_3dZ[index].d;
4453                     }
4454                     else
4455                     {
4456                         mipPitch  = Block256_2d[index].w;
4457                         mipHeight = Block256_2d[index].h;
4458                     }
4459 
4460                     finalDim = TRUE;
4461                 }
4462             }
4463         }
4464         else
4465         {
4466             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4467                                  mipPitch, mipHeight, mipDepth);
4468 
4469             if (inTail)
4470             {
4471                 firstMipIdInTail = mipId;
4472                 mipPitch         = tailMaxDim.w;
4473                 mipHeight        = tailMaxDim.h;
4474 
4475                 if (is3dThick)
4476                 {
4477                     mipDepth = tailMaxDim.d;
4478                 }
4479             }
4480             else
4481             {
4482                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
4483                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4484 
4485                 if (is3dThick)
4486                 {
4487                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
4488                 }
4489             }
4490         }
4491 
4492         if (pMipInfo != NULL)
4493         {
4494             pMipInfo[mipId].pitch  = mipPitch;
4495             pMipInfo[mipId].height = mipHeight;
4496             pMipInfo[mipId].depth  = mipDepth;
4497             pMipInfo[mipId].offset = offset;
4498         }
4499 
4500         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4501 
4502         if (finalDim)
4503         {
4504             if (is3dThin)
4505             {
4506                 mipDepth = Max(mipDepth >> 1, 1u);
4507             }
4508         }
4509         else
4510         {
4511             mipPitch  = Max(mipPitch >> 1, 1u);
4512             mipHeight = Max(mipHeight >> 1, 1u);
4513 
4514             if (is3dThick || is3dThin)
4515             {
4516                 mipDepth = Max(mipDepth >> 1, 1u);
4517             }
4518         }
4519     }
4520 
4521     return firstMipIdInTail;
4522 }
4523 
4524 /**
4525 ************************************************************************************************************************
4526 *   Gfx9Lib::GetMetaMiptailInfo
4527 *
4528 *   @brief
4529 *       Get mip tail coordinate information.
4530 *
4531 *   @return
4532 *       N/A
4533 ************************************************************************************************************************
4534 */
GetMetaMiptailInfo(ADDR2_META_MIP_INFO * pInfo,Dim3d mipCoord,UINT_32 numMipInTail,Dim3d * pMetaBlkDim) const4535 VOID Gfx9Lib::GetMetaMiptailInfo(
4536     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
4537     Dim3d                   mipCoord,       ///< [in] mip tail base coord
4538     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
4539     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
4540     ) const
4541 {
4542     BOOL_32 isThick   = (pMetaBlkDim->d > 1);
4543     UINT_32 mipWidth  = pMetaBlkDim->w;
4544     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4545     UINT_32 mipDepth  = pMetaBlkDim->d;
4546     UINT_32 minInc;
4547 
4548     if (isThick)
4549     {
4550         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4551     }
4552     else if (pMetaBlkDim->h >= 1024)
4553     {
4554         minInc = 256;
4555     }
4556     else if (pMetaBlkDim->h == 512)
4557     {
4558         minInc = 128;
4559     }
4560     else
4561     {
4562         minInc = 64;
4563     }
4564 
4565     UINT_32 blk32MipId = 0xFFFFFFFF;
4566 
4567     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4568     {
4569         pInfo[mip].inMiptail = TRUE;
4570         pInfo[mip].startX = mipCoord.w;
4571         pInfo[mip].startY = mipCoord.h;
4572         pInfo[mip].startZ = mipCoord.d;
4573         pInfo[mip].width = mipWidth;
4574         pInfo[mip].height = mipHeight;
4575         pInfo[mip].depth = mipDepth;
4576 
4577         if (mipWidth <= 32)
4578         {
4579             if (blk32MipId == 0xFFFFFFFF)
4580             {
4581                 blk32MipId = mip;
4582             }
4583 
4584             mipCoord.w = pInfo[blk32MipId].startX;
4585             mipCoord.h = pInfo[blk32MipId].startY;
4586             mipCoord.d = pInfo[blk32MipId].startZ;
4587 
4588             switch (mip - blk32MipId)
4589             {
4590                 case 0:
4591                     mipCoord.w += 32;       // 16x16
4592                     break;
4593                 case 1:
4594                     mipCoord.h += 32;       // 8x8
4595                     break;
4596                 case 2:
4597                     mipCoord.h += 32;       // 4x4
4598                     mipCoord.w += 16;
4599                     break;
4600                 case 3:
4601                     mipCoord.h += 32;       // 2x2
4602                     mipCoord.w += 32;
4603                     break;
4604                 case 4:
4605                     mipCoord.h += 32;       // 1x1
4606                     mipCoord.w += 48;
4607                     break;
4608                 // The following are for BC/ASTC formats
4609                 case 5:
4610                     mipCoord.h += 48;       // 1/2 x 1/2
4611                     break;
4612                 case 6:
4613                     mipCoord.h += 48;       // 1/4 x 1/4
4614                     mipCoord.w += 16;
4615                     break;
4616                 case 7:
4617                     mipCoord.h += 48;       // 1/8 x 1/8
4618                     mipCoord.w += 32;
4619                     break;
4620                 case 8:
4621                     mipCoord.h += 48;       // 1/16 x 1/16
4622                     mipCoord.w += 48;
4623                     break;
4624                 default:
4625                     ADDR_ASSERT_ALWAYS();
4626                     break;
4627             }
4628 
4629             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4630             mipHeight = mipWidth;
4631 
4632             if (isThick)
4633             {
4634                 mipDepth = mipWidth;
4635             }
4636         }
4637         else
4638         {
4639             if (mipWidth <= minInc)
4640             {
4641                 // if we're below the minimal increment...
4642                 if (isThick)
4643                 {
4644                     // For 3d, just go in z direction
4645                     mipCoord.d += mipDepth;
4646                 }
4647                 else
4648                 {
4649                     // For 2d, first go across, then down
4650                     if ((mipWidth * 2) == minInc)
4651                     {
4652                         // if we're 2 mips below, that's when we go back in x, and down in y
4653                         mipCoord.w -= minInc;
4654                         mipCoord.h += minInc;
4655                     }
4656                     else
4657                     {
4658                         // otherwise, just go across in x
4659                         mipCoord.w += minInc;
4660                     }
4661                 }
4662             }
4663             else
4664             {
4665                 // On even mip, go down, otherwise, go across
4666                 if (mip & 1)
4667                 {
4668                     mipCoord.w += mipWidth;
4669                 }
4670                 else
4671                 {
4672                     mipCoord.h += mipHeight;
4673                 }
4674             }
4675             // Divide the width by 2
4676             mipWidth >>= 1;
4677             // After the first mip in tail, the mip is always a square
4678             mipHeight = mipWidth;
4679             // ...or for 3d, a cube
4680             if (isThick)
4681             {
4682                 mipDepth = mipWidth;
4683             }
4684         }
4685     }
4686 }
4687 
4688 /**
4689 ************************************************************************************************************************
4690 *   Gfx9Lib::GetMipStartPos
4691 *
4692 *   @brief
4693 *       Internal function to get out information about mip logical start position
4694 *
4695 *   @return
4696 *       logical start position in macro block width/heith/depth of one mip level within one slice
4697 ************************************************************************************************************************
4698 */
GetMipStartPos(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 width,UINT_32 height,UINT_32 depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 mipId,UINT_32 log2ElementBytes,UINT_32 * pMipTailBytesOffset) const4699 Dim3d Gfx9Lib::GetMipStartPos(
4700     AddrResourceType  resourceType,
4701     AddrSwizzleMode   swizzleMode,
4702     UINT_32           width,
4703     UINT_32           height,
4704     UINT_32           depth,
4705     UINT_32           blockWidth,
4706     UINT_32           blockHeight,
4707     UINT_32           blockDepth,
4708     UINT_32           mipId,
4709     UINT_32           log2ElementBytes,
4710     UINT_32*          pMipTailBytesOffset) const
4711 {
4712     Dim3d       mipStartPos = {0};
4713     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4714 
4715     // Report mip in tail if Mip0 is already in mip tail
4716     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4717     UINT_32 log2BlkSize    = GetBlockSizeLog2(swizzleMode);
4718     UINT_32 mipIndexInTail = mipId;
4719 
4720     if (inMipTail == FALSE)
4721     {
4722         // Mip 0 dimension, unit in block
4723         UINT_32 mipWidthInBlk   = width  / blockWidth;
4724         UINT_32 mipHeightInBlk  = height / blockHeight;
4725         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4726         AddrMajorMode majorMode = GetMajorMode(resourceType,
4727                                                swizzleMode,
4728                                                mipWidthInBlk,
4729                                                mipHeightInBlk,
4730                                                mipDepthInBlk);
4731 
4732         UINT_32 endingMip = mipId + 1;
4733 
4734         for (UINT_32 i = 1; i <= mipId; i++)
4735         {
4736             if ((i == 1) || (i == 3))
4737             {
4738                 if (majorMode == ADDR_MAJOR_Y)
4739                 {
4740                     mipStartPos.w += mipWidthInBlk;
4741                 }
4742                 else
4743                 {
4744                     mipStartPos.h += mipHeightInBlk;
4745                 }
4746             }
4747             else
4748             {
4749                 if (majorMode == ADDR_MAJOR_X)
4750                 {
4751                    mipStartPos.w += mipWidthInBlk;
4752                 }
4753                 else if (majorMode == ADDR_MAJOR_Y)
4754                 {
4755                    mipStartPos.h += mipHeightInBlk;
4756                 }
4757                 else
4758                 {
4759                    mipStartPos.d += mipDepthInBlk;
4760                 }
4761             }
4762 
4763             BOOL_32 inTail = FALSE;
4764 
4765             if (IsThick(resourceType, swizzleMode))
4766             {
4767                 UINT_32 dim = log2BlkSize % 3;
4768 
4769                 if (dim == 0)
4770                 {
4771                     inTail =
4772                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4773                 }
4774                 else if (dim == 1)
4775                 {
4776                     inTail =
4777                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4778                 }
4779                 else
4780                 {
4781                     inTail =
4782                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4783                 }
4784             }
4785             else
4786             {
4787                 if (log2BlkSize & 1)
4788                 {
4789                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4790                 }
4791                 else
4792                 {
4793                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4794                 }
4795             }
4796 
4797             if (inTail)
4798             {
4799                 endingMip = i;
4800                 break;
4801             }
4802 
4803             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4804             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4805             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4806         }
4807 
4808         if (mipId >= endingMip)
4809         {
4810             inMipTail      = TRUE;
4811             mipIndexInTail = mipId - endingMip;
4812         }
4813     }
4814 
4815     if (inMipTail)
4816     {
4817         UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4818         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4819         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4820     }
4821 
4822     return mipStartPos;
4823 }
4824 
4825 /**
4826 ************************************************************************************************************************
4827 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4828 *
4829 *   @brief
4830 *       Internal function to calculate address from coord for tiled swizzle surface
4831 *
4832 *   @return
4833 *       ADDR_E_RETURNCODE
4834 ************************************************************************************************************************
4835 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4836 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4837      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4838      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4839      ) const
4840 {
4841     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4842     localIn.swizzleMode  = pIn->swizzleMode;
4843     localIn.flags        = pIn->flags;
4844     localIn.resourceType = pIn->resourceType;
4845     localIn.bpp          = pIn->bpp;
4846     localIn.width        = Max(pIn->unalignedWidth, 1u);
4847     localIn.height       = Max(pIn->unalignedHeight, 1u);
4848     localIn.numSlices    = Max(pIn->numSlices, 1u);
4849     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4850     localIn.numSamples   = Max(pIn->numSamples, 1u);
4851     localIn.numFrags     = Max(pIn->numFrags, 1u);
4852     if (localIn.numMipLevels <= 1)
4853     {
4854         localIn.pitchInElement = pIn->pitchInElement;
4855     }
4856 
4857     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4858     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4859 
4860     BOOL_32 valid = (returnCode == ADDR_OK) &&
4861                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4862                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4863                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4864 
4865     if (valid)
4866     {
4867         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4868         Dim3d   mipStartPos        = {0};
4869         UINT_32 mipTailBytesOffset = 0;
4870 
4871         if (pIn->numMipLevels > 1)
4872         {
4873             // Mip-map chain cannot be MSAA surface
4874             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4875 
4876             mipStartPos = GetMipStartPos(pIn->resourceType,
4877                                          pIn->swizzleMode,
4878                                          localOut.pitch,
4879                                          localOut.height,
4880                                          localOut.numSlices,
4881                                          localOut.blockWidth,
4882                                          localOut.blockHeight,
4883                                          localOut.blockSlices,
4884                                          pIn->mipId,
4885                                          log2ElementBytes,
4886                                          &mipTailBytesOffset);
4887         }
4888 
4889         UINT_32 interleaveOffset = 0;
4890         UINT_32 pipeBits = 0;
4891         UINT_32 pipeXor = 0;
4892         UINT_32 bankBits = 0;
4893         UINT_32 bankXor = 0;
4894 
4895         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4896         {
4897             UINT_32 blockOffset = 0;
4898             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4899 
4900             if (IsZOrderSwizzle(pIn->swizzleMode))
4901             {
4902                 // Morton generation
4903                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4904                 {
4905                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4906                     UINT_32 mortBits = totalLowBits / 2;
4907                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4908                     // Are 9 bits enough?
4909                     UINT_32 highBitsValue =
4910                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4911                     blockOffset = lowBitsValue | highBitsValue;
4912                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4913                 }
4914                 else
4915                 {
4916                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4917                 }
4918 
4919                 // Fill LSBs with sample bits
4920                 if (pIn->numSamples > 1)
4921                 {
4922                     blockOffset *= pIn->numSamples;
4923                     blockOffset |= pIn->sample;
4924                 }
4925 
4926                 // Shift according to BytesPP
4927                 blockOffset <<= log2ElementBytes;
4928             }
4929             else
4930             {
4931                 // Micro block offset
4932                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4933                 blockOffset = microBlockOffset;
4934 
4935                 // Micro block dimension
4936                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4937                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4938                 // Morton generation, does 12 bit enough?
4939                 blockOffset |=
4940                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4941 
4942                 // Sample bits start location
4943                 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4944                 // Join sample bits information to the highest Macro block bits
4945                 if (IsNonPrtXor(pIn->swizzleMode))
4946                 {
4947                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4948                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4949                 }
4950                 else
4951                 {
4952                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4953                     // after this op, the blockOffset only contains log2 Macro block size bits
4954                     blockOffset %= (1 << sampleStart);
4955                     blockOffset |= (pIn->sample << sampleStart);
4956                     ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4957                 }
4958             }
4959 
4960             if (IsXor(pIn->swizzleMode))
4961             {
4962                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4963                 if (IsPrt(pIn->swizzleMode))
4964                 {
4965                     blockOffset &= ((1 << log2BlkSize) - 1);
4966                 }
4967 
4968                 // Preserve offset inside pipe interleave
4969                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4970                 blockOffset >>= m_pipeInterleaveLog2;
4971 
4972                 // Pipe/Se xor bits
4973                 pipeBits = GetPipeXorBits(log2BlkSize);
4974                 // Pipe xor
4975                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4976                 blockOffset >>= pipeBits;
4977 
4978                 // Bank xor bits
4979                 bankBits = GetBankXorBits(log2BlkSize);
4980                 // Bank Xor
4981                 bankXor = FoldXor2d(blockOffset, bankBits);
4982                 blockOffset >>= bankBits;
4983 
4984                 // Put all the part back together
4985                 blockOffset <<= bankBits;
4986                 blockOffset |= bankXor;
4987                 blockOffset <<= pipeBits;
4988                 blockOffset |= pipeXor;
4989                 blockOffset <<= m_pipeInterleaveLog2;
4990                 blockOffset |= interleaveOffset;
4991             }
4992 
4993             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4994             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4995 
4996             blockOffset |= mipTailBytesOffset;
4997 
4998             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4999             {
5000                 // Apply slice xor if not MSAA/PRT
5001                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
5002                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
5003                                 (m_pipeInterleaveLog2 + pipeBits));
5004             }
5005 
5006             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5007                                                   bankBits, pipeBits, &blockOffset);
5008 
5009             blockOffset %= (1 << log2BlkSize);
5010 
5011             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
5012             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
5013             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
5014             UINT_64 macroBlockIndex =
5015                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
5016                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
5017                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
5018 
5019             pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
5020         }
5021         else
5022         {
5023             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
5024 
5025             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
5026 
5027             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
5028                                               (pIn->y / microBlockDim.h),
5029                                               (pIn->slice / microBlockDim.d),
5030                                               8);
5031 
5032             blockOffset <<= 10;
5033             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
5034 
5035             if (IsXor(pIn->swizzleMode))
5036             {
5037                 // Mask off bits above Macro block bits to keep page synonyms working for prt
5038                 if (IsPrt(pIn->swizzleMode))
5039                 {
5040                     blockOffset &= ((1 << log2BlkSize) - 1);
5041                 }
5042 
5043                 // Preserve offset inside pipe interleave
5044                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
5045                 blockOffset >>= m_pipeInterleaveLog2;
5046 
5047                 // Pipe/Se xor bits
5048                 pipeBits = GetPipeXorBits(log2BlkSize);
5049                 // Pipe xor
5050                 pipeXor = FoldXor3d(blockOffset, pipeBits);
5051                 blockOffset >>= pipeBits;
5052 
5053                 // Bank xor bits
5054                 bankBits = GetBankXorBits(log2BlkSize);
5055                 // Bank Xor
5056                 bankXor = FoldXor3d(blockOffset, bankBits);
5057                 blockOffset >>= bankBits;
5058 
5059                 // Put all the part back together
5060                 blockOffset <<= bankBits;
5061                 blockOffset |= bankXor;
5062                 blockOffset <<= pipeBits;
5063                 blockOffset |= pipeXor;
5064                 blockOffset <<= m_pipeInterleaveLog2;
5065                 blockOffset |= interleaveOffset;
5066             }
5067 
5068             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
5069             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
5070             blockOffset |= mipTailBytesOffset;
5071 
5072             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5073                                                   bankBits, pipeBits, &blockOffset);
5074 
5075             blockOffset %= (1 << log2BlkSize);
5076 
5077             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
5078             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
5079             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
5080 
5081             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
5082             UINT_32 sliceSizeInBlock =
5083                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
5084             UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
5085 
5086             pOut->addr = blockOffset | (blockIndex << log2BlkSize);
5087         }
5088     }
5089     else
5090     {
5091         returnCode = ADDR_INVALIDPARAMS;
5092     }
5093 
5094     return returnCode;
5095 }
5096 
5097 /**
5098 ************************************************************************************************************************
5099 *   Gfx9Lib::ComputeSurfaceInfoLinear
5100 *
5101 *   @brief
5102 *       Internal function to calculate padding for linear swizzle 2D/3D surface
5103 *
5104 *   @return
5105 *       N/A
5106 ************************************************************************************************************************
5107 */
ComputeSurfaceLinearPadding(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pMipmap0PaddedWidth,UINT_32 * pSlice0PaddedHeight,ADDR2_MIP_INFO * pMipInfo) const5108 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
5109     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
5110     UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
5111     UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
5112     ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
5113     ) const
5114 {
5115     ADDR_E_RETURNCODE returnCode = ADDR_OK;
5116 
5117     UINT_32 elementBytes        = pIn->bpp >> 3;
5118     UINT_32 pitchAlignInElement = 0;
5119 
5120     if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
5121     {
5122         ADDR_ASSERT(pIn->numMipLevels <= 1);
5123         ADDR_ASSERT(pIn->numSlices <= 1);
5124         pitchAlignInElement = 1;
5125     }
5126     else
5127     {
5128         pitchAlignInElement = (256 / elementBytes);
5129     }
5130 
5131     UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
5132     UINT_32 slice0PaddedHeight = pIn->height;
5133 
5134     returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
5135                                             &mipChainWidth, &slice0PaddedHeight);
5136 
5137     if (returnCode == ADDR_OK)
5138     {
5139         UINT_32 mipChainHeight = 0;
5140         UINT_32 mipHeight      = pIn->height;
5141         UINT_32 mipDepth       = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
5142 
5143         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
5144         {
5145             if (pMipInfo != NULL)
5146             {
5147                 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
5148                 pMipInfo[i].pitch  = mipChainWidth;
5149                 pMipInfo[i].height = mipHeight;
5150                 pMipInfo[i].depth  = mipDepth;
5151             }
5152 
5153             mipChainHeight += mipHeight;
5154             mipHeight = RoundHalf(mipHeight);
5155             mipHeight = Max(mipHeight, 1u);
5156         }
5157 
5158         *pMipmap0PaddedWidth = mipChainWidth;
5159         *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
5160     }
5161 
5162     return returnCode;
5163 }
5164 
5165 /**
5166 ************************************************************************************************************************
5167 *   Gfx9Lib::ComputeThinBlockDimension
5168 *
5169 *   @brief
5170 *       Internal function to get thin block width/height/depth in element from surface input params.
5171 *
5172 *   @return
5173 *       N/A
5174 ************************************************************************************************************************
5175 */
ComputeThinBlockDimension(UINT_32 * pWidth,UINT_32 * pHeight,UINT_32 * pDepth,UINT_32 bpp,UINT_32 numSamples,AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const5176 VOID Gfx9Lib::ComputeThinBlockDimension(
5177     UINT_32*         pWidth,
5178     UINT_32*         pHeight,
5179     UINT_32*         pDepth,
5180     UINT_32          bpp,
5181     UINT_32          numSamples,
5182     AddrResourceType resourceType,
5183     AddrSwizzleMode  swizzleMode) const
5184 {
5185     ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5186 
5187     const UINT_32 log2BlkSize              = GetBlockSizeLog2(swizzleMode);
5188     const UINT_32 eleBytes                 = bpp >> 3;
5189     const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5190     const UINT_32 log2blkSizeIn256B        = log2BlkSize - 8;
5191     const UINT_32 widthAmp                 = log2blkSizeIn256B / 2;
5192     const UINT_32 heightAmp                = log2blkSizeIn256B - widthAmp;
5193 
5194     ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5195 
5196     *pWidth  = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5197     *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5198     *pDepth  = 1;
5199 
5200     if (numSamples > 1)
5201     {
5202         const UINT_32 log2sample = Log2(numSamples);
5203         const UINT_32 q          = log2sample >> 1;
5204         const UINT_32 r          = log2sample & 1;
5205 
5206         if (log2BlkSize & 1)
5207         {
5208             *pWidth  >>= q;
5209             *pHeight >>= (q + r);
5210         }
5211         else
5212         {
5213             *pWidth  >>= (q + r);
5214             *pHeight >>= q;
5215         }
5216     }
5217 }
5218 
5219 } // V2
5220 } // Addr
5221