• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ************************************************************************************************************************
3 *
4 *  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE
23 *
24 ***********************************************************************************************************************/
25 
26 /**
27 ************************************************************************************************************************
28 * @file  gfx9addrlib.cpp
29 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
30 ************************************************************************************************************************
31 */
32 
33 #include "gfx9addrlib.h"
34 
35 #include "gfx9_gb_reg.h"
36 
37 #include "amdgpu_asic_addr.h"
38 
39 ////////////////////////////////////////////////////////////////////////////////////////////////////
40 ////////////////////////////////////////////////////////////////////////////////////////////////////
41 
42 namespace Addr
43 {
44 
45 /**
46 ************************************************************************************************************************
47 *   Gfx9HwlInit
48 *
49 *   @brief
50 *       Creates an Gfx9Lib object.
51 *
52 *   @return
53 *       Returns an Gfx9Lib object pointer.
54 ************************************************************************************************************************
55 */
Gfx9HwlInit(const Client * pClient)56 Addr::Lib* Gfx9HwlInit(const Client* pClient)
57 {
58     return V2::Gfx9Lib::CreateObj(pClient);
59 }
60 
61 namespace V2
62 {
63 
64 ////////////////////////////////////////////////////////////////////////////////////////////////////
65 //                               Static Const Member
66 ////////////////////////////////////////////////////////////////////////////////////////////////////
67 
68 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
69 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
70     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR
71     {{0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_S
72     {{0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_D
73     {{0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}}, // ADDR_SW_256B_R
74 
75     {{0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_Z
76     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_S
77     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_D
78     {{0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}}, // ADDR_SW_4KB_R
79 
80     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_Z
81     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_S
82     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_D
83     {{0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}}, // ADDR_SW_64KB_R
84 
85     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
86     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
87     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
88     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
89 
90     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_Z_T
91     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_S_T
92     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_D_T
93     {{0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0,    0}}, // ADDR_SW_64KB_R_T
94 
95     {{0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_Z_x
96     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_S_x
97     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_D_x
98     {{0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0,    0}}, // ADDR_SW_4KB_R_x
99 
100     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_Z_X
101     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_S_X
102     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_D_X
103     {{0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0,    0}}, // ADDR_SW_64KB_R_X
104 
105     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
106     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
107     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
108     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
109     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR_GENERAL
110 };
111 
112 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
113 
114 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
115 
116 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
117 
118 /**
119 ************************************************************************************************************************
120 *   Gfx9Lib::Gfx9Lib
121 *
122 *   @brief
123 *       Constructor
124 *
125 ************************************************************************************************************************
126 */
Gfx9Lib(const Client * pClient)127 Gfx9Lib::Gfx9Lib(const Client* pClient)
128     :
129     Lib(pClient)
130 {
131     memset(&m_settings, 0, sizeof(m_settings));
132     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
133     memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
134     m_metaEqOverrideIndex = 0;
135 }
136 
137 /**
138 ************************************************************************************************************************
139 *   Gfx9Lib::~Gfx9Lib
140 *
141 *   @brief
142 *       Destructor
143 ************************************************************************************************************************
144 */
~Gfx9Lib()145 Gfx9Lib::~Gfx9Lib()
146 {
147 }
148 
149 /**
150 ************************************************************************************************************************
151 *   Gfx9Lib::HwlComputeHtileInfo
152 *
153 *   @brief
154 *       Interface function stub of AddrComputeHtilenfo
155 *
156 *   @return
157 *       ADDR_E_RETURNCODE
158 ************************************************************************************************************************
159 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const160 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
161     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
162     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
163     ) const
164 {
165     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
166                                                        pIn->swizzleMode);
167 
168     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
169 
170     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
171 
172     if ((numPipeTotal == 1) && (numRbTotal == 1))
173     {
174         numCompressBlkPerMetaBlkLog2 = 10;
175     }
176     else
177     {
178         if (m_settings.applyAliasFix)
179         {
180             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
181         }
182         else
183         {
184             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
185         }
186     }
187 
188     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
189 
190     Dim3d   metaBlkDim   = {8, 8, 1};
191     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
192     UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
193     UINT_32 heightAmp    = totalAmpBits - widthAmp;
194     metaBlkDim.w <<= widthAmp;
195     metaBlkDim.h <<= heightAmp;
196 
197 #if DEBUG
198     Dim3d metaBlkDimDbg = {8, 8, 1};
199     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
200     {
201         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
202             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
203         {
204             metaBlkDimDbg.h <<= 1;
205         }
206         else
207         {
208             metaBlkDimDbg.w <<= 1;
209         }
210     }
211     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
212 #endif
213 
214     UINT_32 numMetaBlkX;
215     UINT_32 numMetaBlkY;
216     UINT_32 numMetaBlkZ;
217 
218     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
219                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
220                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
221 
222     const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
223     UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
224 
225     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
226     {
227         align *= (numPipeTotal >> 1);
228     }
229 
230     align = Max(align, metaBlkSize);
231 
232     if (m_settings.metaBaseAlignFix)
233     {
234         align = Max(align, GetBlockSize(pIn->swizzleMode));
235     }
236 
237     if (m_settings.htileAlignFix)
238     {
239         const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
240         const INT_32 htileCachelineSizeLog2 = 11;
241         const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
242 
243         INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
244 
245         align <<= rbMaskPadding;
246     }
247 
248     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
249     pOut->height     = numMetaBlkY * metaBlkDim.h;
250     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;
251 
252     pOut->metaBlkWidth       = metaBlkDim.w;
253     pOut->metaBlkHeight      = metaBlkDim.h;
254     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
255 
256     pOut->baseAlign  = align;
257     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
258 
259     return ADDR_OK;
260 }
261 
262 /**
263 ************************************************************************************************************************
264 *   Gfx9Lib::HwlComputeCmaskInfo
265 *
266 *   @brief
267 *       Interface function stub of AddrComputeCmaskInfo
268 *
269 *   @return
270 *       ADDR_E_RETURNCODE
271 ************************************************************************************************************************
272 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const273 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
274     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
275     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
276     ) const
277 {
278     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
279 
280     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
281                                                        pIn->swizzleMode);
282 
283     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
284 
285     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
286 
287     if ((numPipeTotal == 1) && (numRbTotal == 1))
288     {
289         numCompressBlkPerMetaBlkLog2 = 13;
290     }
291     else
292     {
293         if (m_settings.applyAliasFix)
294         {
295             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
296         }
297         else
298         {
299             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
300         }
301 
302         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
303     }
304 
305     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
306 
307     Dim2d metaBlkDim = {8, 8};
308     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
309     UINT_32 heightAmp = totalAmpBits >> 1;
310     UINT_32 widthAmp = totalAmpBits - heightAmp;
311     metaBlkDim.w <<= widthAmp;
312     metaBlkDim.h <<= heightAmp;
313 
314 #if DEBUG
315     Dim2d metaBlkDimDbg = {8, 8};
316     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
317     {
318         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
319         {
320             metaBlkDimDbg.h <<= 1;
321         }
322         else
323         {
324             metaBlkDimDbg.w <<= 1;
325         }
326     }
327     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
328 #endif
329 
330     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
331     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
332     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
333 
334     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
335 
336     if (m_settings.metaBaseAlignFix)
337     {
338         sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
339     }
340 
341     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
342     pOut->height     = numMetaBlkY * metaBlkDim.h;
343     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
344     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
345     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
346 
347     pOut->metaBlkWidth = metaBlkDim.w;
348     pOut->metaBlkHeight = metaBlkDim.h;
349 
350     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
351 
352     // Get the CMASK address equation (copied from CmaskAddrFromCoord)
353     UINT_32 fmaskBpp              = GetFmaskBpp(1, 1);
354     UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
355     UINT_32 metaBlkWidthLog2      = Log2(pOut->metaBlkWidth);
356     UINT_32 metaBlkHeightLog2     = Log2(pOut->metaBlkHeight);
357 
358     MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
359                                 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
360                                 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
361 
362     CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
363 
364     // Generate the CMASK address equation.
365     pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
366     bool checked = false;
367     for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
368        CoordTerm &bit = (*eq)[b];
369 
370        unsigned c;
371        for (c = 0; c < bit.getsize(); c++) {
372           Coordinate &coord = bit[c];
373           pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
374           pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
375        }
376        for (; c < 5; c++)
377           pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
378     }
379 
380     // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
381     for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
382        CoordTerm &prev = (*eq)[b - 1];
383        CoordTerm &cur = (*eq)[b];
384 
385        if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
386           prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
387           prev[0].getord() + 1 == cur[0].getord())
388           pOut->equation.gfx9.num_bits = b;
389        else
390           break;
391     }
392 
393     pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
394                                                                    pIn->swizzleMode);
395 
396     return ADDR_OK;
397 }
398 
399 /**
400 ************************************************************************************************************************
401 *   Gfx9Lib::GetMetaMipInfo
402 *
403 *   @brief
404 *       Get meta mip info
405 *
406 *   @return
407 *       N/A
408 ************************************************************************************************************************
409 */
GetMetaMipInfo(UINT_32 numMipLevels,Dim3d * pMetaBlkDim,BOOL_32 dataThick,ADDR2_META_MIP_INFO * pInfo,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 * pNumMetaBlkX,UINT_32 * pNumMetaBlkY,UINT_32 * pNumMetaBlkZ) const410 VOID Gfx9Lib::GetMetaMipInfo(
411     UINT_32 numMipLevels,           ///< [in]  number of mip levels
412     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
413     BOOL_32 dataThick,              ///< [in]  data surface is thick
414     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
415     UINT_32 mip0Width,              ///< [in]  mip0 width
416     UINT_32 mip0Height,             ///< [in]  mip0 height
417     UINT_32 mip0Depth,              ///< [in]  mip0 depth
418     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
419     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
420     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
421     const
422 {
423     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
424     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
425     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
426     UINT_32 tailWidth   = pMetaBlkDim->w;
427     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
428     UINT_32 tailDepth   = pMetaBlkDim->d;
429     BOOL_32 inTail      = FALSE;
430     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
431 
432     if (numMipLevels > 1)
433     {
434         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
435         {
436             // Z major
437             major = ADDR_MAJOR_Z;
438         }
439         else if (numMetaBlkX >= numMetaBlkY)
440         {
441             // X major
442             major = ADDR_MAJOR_X;
443         }
444         else
445         {
446             // Y major
447             major = ADDR_MAJOR_Y;
448         }
449 
450         inTail = ((mip0Width <= tailWidth) &&
451                   (mip0Height <= tailHeight) &&
452                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
453 
454         if (inTail == FALSE)
455         {
456             UINT_32 orderLimit;
457             UINT_32 *pMipDim;
458             UINT_32 *pOrderDim;
459 
460             if (major == ADDR_MAJOR_Z)
461             {
462                 // Z major
463                 pMipDim = &numMetaBlkY;
464                 pOrderDim = &numMetaBlkZ;
465                 orderLimit = 4;
466             }
467             else if (major == ADDR_MAJOR_X)
468             {
469                 // X major
470                 pMipDim = &numMetaBlkY;
471                 pOrderDim = &numMetaBlkX;
472                 orderLimit = 4;
473             }
474             else
475             {
476                 // Y major
477                 pMipDim = &numMetaBlkX;
478                 pOrderDim = &numMetaBlkY;
479                 orderLimit = 2;
480             }
481 
482             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
483             {
484                 *pMipDim += 2;
485             }
486             else
487             {
488                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
489             }
490         }
491     }
492 
493     if (pInfo != NULL)
494     {
495         UINT_32 mipWidth  = mip0Width;
496         UINT_32 mipHeight = mip0Height;
497         UINT_32 mipDepth  = mip0Depth;
498         Dim3d   mipCoord  = {0};
499 
500         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
501         {
502             if (inTail)
503             {
504                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
505                                    pMetaBlkDim);
506                 break;
507             }
508             else
509             {
510                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
511                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
512                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
513 
514                 pInfo[mip].inMiptail = FALSE;
515                 pInfo[mip].startX = mipCoord.w;
516                 pInfo[mip].startY = mipCoord.h;
517                 pInfo[mip].startZ = mipCoord.d;
518                 pInfo[mip].width  = mipWidth;
519                 pInfo[mip].height = mipHeight;
520                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
521 
522                 if ((mip >= 3) || (mip & 1))
523                 {
524                     switch (major)
525                     {
526                         case ADDR_MAJOR_X:
527                             mipCoord.w += mipWidth;
528                             break;
529                         case ADDR_MAJOR_Y:
530                             mipCoord.h += mipHeight;
531                             break;
532                         case ADDR_MAJOR_Z:
533                             mipCoord.d += mipDepth;
534                             break;
535                         default:
536                             break;
537                     }
538                 }
539                 else
540                 {
541                     switch (major)
542                     {
543                         case ADDR_MAJOR_X:
544                             mipCoord.h += mipHeight;
545                             break;
546                         case ADDR_MAJOR_Y:
547                             mipCoord.w += mipWidth;
548                             break;
549                         case ADDR_MAJOR_Z:
550                             mipCoord.h += mipHeight;
551                             break;
552                         default:
553                             break;
554                     }
555                 }
556 
557                 mipWidth  = Max(mipWidth >> 1, 1u);
558                 mipHeight = Max(mipHeight >> 1, 1u);
559                 mipDepth = Max(mipDepth >> 1, 1u);
560 
561                 inTail = ((mipWidth <= tailWidth) &&
562                           (mipHeight <= tailHeight) &&
563                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
564             }
565         }
566     }
567 
568     *pNumMetaBlkX = numMetaBlkX;
569     *pNumMetaBlkY = numMetaBlkY;
570     *pNumMetaBlkZ = numMetaBlkZ;
571 }
572 
573 /**
574 ************************************************************************************************************************
575 *   Gfx9Lib::HwlComputeDccInfo
576 *
577 *   @brief
578 *       Interface function to compute DCC key info
579 *
580 *   @return
581 *       ADDR_E_RETURNCODE
582 ************************************************************************************************************************
583 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const584 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
585     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
586     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
587     ) const
588 {
589     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
590     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
591     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
592 
593     if (dataLinear)
594     {
595         metaLinear = TRUE;
596     }
597     else if (metaLinear == TRUE)
598     {
599         pipeAligned = FALSE;
600     }
601 
602     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
603 
604     if (metaLinear)
605     {
606         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
607         ADDR_ASSERT_ALWAYS();
608 
609         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
610         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
611     }
612     else
613     {
614         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
615 
616         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
617 
618         UINT_32 numFrags = Max(pIn->numFrags, 1u);
619         UINT_32 numSlices = Max(pIn->numSlices, 1u);
620 
621         minMetaBlkSize /= numFrags;
622 
623         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
624 
625         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
626 
627         if ((numPipeTotal > 1) || (numRbTotal > 1))
628         {
629             const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
630 
631             numCompressBlkPerMetaBlk =
632                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
633 
634             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
635             {
636                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
637             }
638         }
639 
640         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
641         Dim3d metaBlkDim = compressBlkDim;
642 
643         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
644         {
645             if ((metaBlkDim.h < metaBlkDim.w) ||
646                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
647             {
648                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
649                 {
650                     metaBlkDim.h <<= 1;
651                 }
652                 else
653                 {
654                     metaBlkDim.d <<= 1;
655                 }
656             }
657             else
658             {
659                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
660                 {
661                     metaBlkDim.w <<= 1;
662                 }
663                 else
664                 {
665                     metaBlkDim.d <<= 1;
666                 }
667             }
668         }
669 
670         UINT_32 numMetaBlkX;
671         UINT_32 numMetaBlkY;
672         UINT_32 numMetaBlkZ;
673 
674         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
675                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
676                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
677 
678         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
679 
680         if (numFrags > m_maxCompFrag)
681         {
682             sizeAlign *= (numFrags / m_maxCompFrag);
683         }
684 
685         if (m_settings.metaBaseAlignFix)
686         {
687             sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
688         }
689 
690         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
691                            numCompressBlkPerMetaBlk * numFrags;
692         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
693         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
694 
695         pOut->pitch = numMetaBlkX * metaBlkDim.w;
696         pOut->height = numMetaBlkY * metaBlkDim.h;
697         pOut->depth = numMetaBlkZ * metaBlkDim.d;
698 
699         pOut->compressBlkWidth = compressBlkDim.w;
700         pOut->compressBlkHeight = compressBlkDim.h;
701         pOut->compressBlkDepth = compressBlkDim.d;
702 
703         pOut->metaBlkWidth = metaBlkDim.w;
704         pOut->metaBlkHeight = metaBlkDim.h;
705         pOut->metaBlkDepth = metaBlkDim.d;
706         pOut->metaBlkSize = numCompressBlkPerMetaBlk * numFrags;
707 
708         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
709         pOut->fastClearSizePerSlice =
710             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
711 
712         // Get the DCC address equation (copied from DccAddrFromCoord)
713         UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
714         UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
715         UINT_32 metaBlkWidthLog2  = Log2(pOut->metaBlkWidth);
716         UINT_32 metaBlkHeightLog2 = Log2(pOut->metaBlkHeight);
717         UINT_32 metaBlkDepthLog2  = Log2(pOut->metaBlkDepth);
718         UINT_32 compBlkWidthLog2  = Log2(pOut->compressBlkWidth);
719         UINT_32 compBlkHeightLog2 = Log2(pOut->compressBlkHeight);
720         UINT_32 compBlkDepthLog2  = Log2(pOut->compressBlkDepth);
721 
722         MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
723                                      Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
724                                      metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
725                                      compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
726 
727         CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
728 
729         // Generate the DCC address equation.
730         pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
731         bool checked = false;
732         for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
733            CoordTerm &bit = (*eq)[b];
734 
735            unsigned c;
736            for (c = 0; c < bit.getsize(); c++) {
737               Coordinate &coord = bit[c];
738               pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
739               pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
740            }
741            for (; c < 5; c++)
742               pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
743         }
744 
745         // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
746         for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
747            CoordTerm &prev = (*eq)[b - 1];
748            CoordTerm &cur = (*eq)[b];
749 
750            if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
751                prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
752                prev[0].getord() + 1 == cur[0].getord())
753               pOut->equation.gfx9.num_bits = b;
754            else
755               break;
756         }
757 
758         pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
759                                                                        pIn->swizzleMode);
760     }
761 
762     return ADDR_OK;
763 }
764 
765 /**
766 ************************************************************************************************************************
767 *   Gfx9Lib::HwlComputeMaxBaseAlignments
768 *
769 *   @brief
770 *       Gets maximum alignments
771 *   @return
772 *       maximum alignments
773 ************************************************************************************************************************
774 */
HwlComputeMaxBaseAlignments() const775 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
776 {
777     return Size64K;
778 }
779 
780 /**
781 ************************************************************************************************************************
782 *   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
783 *
784 *   @brief
785 *       Gets maximum alignments for metadata
786 *   @return
787 *       maximum alignments for metadata
788 ************************************************************************************************************************
789 */
HwlComputeMaxMetaBaseAlignments() const790 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
791 {
792     // Max base alignment for Htile
793     const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
794     const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;
795 
796     // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
797     // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
798     ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
799     const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
800 
801     UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
802 
803     if (maxNumPipeTotal > 2)
804     {
805         maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
806     }
807 
808     maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
809 
810     if (m_settings.metaBaseAlignFix)
811     {
812         maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
813     }
814 
815     if (m_settings.htileAlignFix)
816     {
817         maxBaseAlignHtile *= maxNumPipeTotal;
818     }
819 
820     // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
821 
822     // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
823     UINT_32 maxBaseAlignDcc3D = 65536;
824 
825     if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
826     {
827         maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
828     }
829 
830     // Max base alignment for Msaa Dcc
831     UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
832 
833     if (m_settings.metaBaseAlignFix)
834     {
835         maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
836     }
837 
838     return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
839 }
840 
841 /**
842 ************************************************************************************************************************
843 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
844 *
845 *   @brief
846 *       Interface function stub of AddrComputeCmaskAddrFromCoord
847 *
848 *   @return
849 *       ADDR_E_RETURNCODE
850 ************************************************************************************************************************
851 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)852 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
853     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
854     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
855 {
856     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
857     input.size            = sizeof(input);
858     input.cMaskFlags      = pIn->cMaskFlags;
859     input.colorFlags      = pIn->colorFlags;
860     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
861     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
862     input.numSlices       = Max(pIn->numSlices, 1u);
863     input.swizzleMode     = pIn->swizzleMode;
864     input.resourceType    = pIn->resourceType;
865 
866     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
867     output.size = sizeof(output);
868 
869     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
870 
871     if (returnCode == ADDR_OK)
872     {
873         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
874         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
875         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
876         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
877 
878         MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
879                                      Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
880                                      metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
881 
882         const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
883 
884         UINT_32 xb = pIn->x / output.metaBlkWidth;
885         UINT_32 yb = pIn->y / output.metaBlkHeight;
886         UINT_32 zb = pIn->slice;
887 
888         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
889         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
890         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
891 
892         UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
893         UINT_64 address  = pMetaEq->solve(coords);
894 
895         pOut->addr = address >> 1;
896         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
897 
898 
899         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
900                                                            pIn->swizzleMode);
901 
902         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
903 
904         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
905     }
906 
907     return returnCode;
908 }
909 
910 /**
911 ************************************************************************************************************************
912 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
913 *
914 *   @brief
915 *       Interface function stub of AddrComputeHtileAddrFromCoord
916 *
917 *   @return
918 *       ADDR_E_RETURNCODE
919 ************************************************************************************************************************
920 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)921 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
922     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
923     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
924 {
925     ADDR_E_RETURNCODE returnCode = ADDR_OK;
926 
927     if (pIn->numMipLevels > 1)
928     {
929         returnCode = ADDR_NOTIMPLEMENTED;
930     }
931     else
932     {
933         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
934         input.size            = sizeof(input);
935         input.hTileFlags      = pIn->hTileFlags;
936         input.depthFlags      = pIn->depthflags;
937         input.swizzleMode     = pIn->swizzleMode;
938         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
939         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
940         input.numSlices       = Max(pIn->numSlices, 1u);
941         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
942 
943         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
944         output.size = sizeof(output);
945 
946         returnCode = ComputeHtileInfo(&input, &output);
947 
948         if (returnCode == ADDR_OK)
949         {
950             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
951             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
952             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
953             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
954 
955             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
956                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
957                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
958 
959             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
960 
961             UINT_32 xb = pIn->x / output.metaBlkWidth;
962             UINT_32 yb = pIn->y / output.metaBlkHeight;
963             UINT_32 zb = pIn->slice;
964 
965             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
966             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
967             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
968 
969             UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
970             UINT_64 address  = pMetaEq->solve(coords);
971 
972             pOut->addr = address >> 1;
973 
974             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
975                                                                pIn->swizzleMode);
976 
977             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
978 
979             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
980         }
981     }
982 
983     return returnCode;
984 }
985 
986 /**
987 ************************************************************************************************************************
988 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
989 *
990 *   @brief
991 *       Interface function stub of AddrComputeHtileCoordFromAddr
992 *
993 *   @return
994 *       ADDR_E_RETURNCODE
995 ************************************************************************************************************************
996 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)997 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
998     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
999     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
1000 {
1001     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1002 
1003     if (pIn->numMipLevels > 1)
1004     {
1005         returnCode = ADDR_NOTIMPLEMENTED;
1006     }
1007     else
1008     {
1009         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
1010         input.size            = sizeof(input);
1011         input.hTileFlags      = pIn->hTileFlags;
1012         input.swizzleMode     = pIn->swizzleMode;
1013         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
1014         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
1015         input.numSlices       = Max(pIn->numSlices, 1u);
1016         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
1017 
1018         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
1019         output.size = sizeof(output);
1020 
1021         returnCode = ComputeHtileInfo(&input, &output);
1022 
1023         if (returnCode == ADDR_OK)
1024         {
1025             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
1026             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
1027             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1028             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
1029 
1030             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
1031                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
1032                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
1033 
1034             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1035 
1036             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
1037                                                                pIn->swizzleMode);
1038 
1039             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1040 
1041             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
1042 
1043             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
1044             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1045 
1046             UINT_32 coords[NUM_DIMS];
1047             pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
1048 
1049             pOut->slice = coords[DIM_M] / sliceSizeInBlock;
1050             pOut->y     = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
1051             pOut->x     = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
1052         }
1053     }
1054 
1055     return returnCode;
1056 }
1057 
1058 /**
1059 ************************************************************************************************************************
1060 *   Gfx9Lib::HwlSupportComputeDccAddrFromCoord
1061 *
1062 *   @brief
1063 *       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
1064 *
1065 *   @return
1066 *       ADDR_E_RETURNCODE
1067 ************************************************************************************************************************
1068 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)1069 ADDR_E_RETURNCODE Gfx9Lib::HwlSupportComputeDccAddrFromCoord(
1070     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
1071 {
1072     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1073 
1074     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
1075     {
1076         returnCode = ADDR_NOTSUPPORTED;
1077     }
1078     else if ((pIn->pitch == 0)             ||
1079              (pIn->height == 0)            ||
1080              (pIn->compressBlkWidth == 0)  ||
1081              (pIn->compressBlkHeight == 0) ||
1082              (pIn->compressBlkDepth == 0)  ||
1083              (pIn->metaBlkWidth == 0)      ||
1084              (pIn->metaBlkHeight == 0)     ||
1085              (pIn->metaBlkDepth == 0)      ||
1086              (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
1087     {
1088         returnCode = ADDR_NOTSUPPORTED;
1089     }
1090 
1091     return returnCode;
1092 }
1093 
1094 /**
1095 ************************************************************************************************************************
1096 *   Gfx9Lib::HwlComputeDccAddrFromCoord
1097 *
1098 *   @brief
1099 *       Interface function stub of AddrComputeDccAddrFromCoord
1100 *
1101 *   @return
1102 *       N/A
1103 ************************************************************************************************************************
1104 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)1105 VOID Gfx9Lib::HwlComputeDccAddrFromCoord(
1106     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
1107     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
1108 {
1109     UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
1110     UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
1111     UINT_32 metaBlkWidthLog2  = Log2(pIn->metaBlkWidth);
1112     UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
1113     UINT_32 metaBlkDepthLog2  = Log2(pIn->metaBlkDepth);
1114     UINT_32 compBlkWidthLog2  = Log2(pIn->compressBlkWidth);
1115     UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
1116     UINT_32 compBlkDepthLog2  = Log2(pIn->compressBlkDepth);
1117 
1118     MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1119                                  Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1120                                  metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1121                                  compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1122 
1123     const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1124 
1125     UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1126     UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1127     UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1128 
1129     UINT_32 pitchInBlock     = pIn->pitch / pIn->metaBlkWidth;
1130     UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1131     UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1132 
1133     UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex};
1134     UINT_64 address  = pMetaEq->solve(coords);
1135 
1136     pOut->addr = address >> 1;
1137 
1138     UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1139                                                        pIn->swizzleMode);
1140 
1141     UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1142 
1143     pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1144 }
1145 
1146 /**
1147 ************************************************************************************************************************
1148 *   Gfx9Lib::HwlInitGlobalParams
1149 *
1150 *   @brief
1151 *       Initializes global parameters
1152 *
1153 *   @return
1154 *       TRUE if all settings are valid
1155 *
1156 ************************************************************************************************************************
1157 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1158 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1159     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1160 {
1161     BOOL_32 valid = TRUE;
1162 
1163     if (m_settings.isArcticIsland)
1164     {
1165         GB_ADDR_CONFIG_GFX9 gbAddrConfig;
1166 
1167         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1168 
1169         // These values are copied from CModel code
1170         switch (gbAddrConfig.bits.NUM_PIPES)
1171         {
1172             case ADDR_CONFIG_1_PIPE:
1173                 m_pipes = 1;
1174                 m_pipesLog2 = 0;
1175                 break;
1176             case ADDR_CONFIG_2_PIPE:
1177                 m_pipes = 2;
1178                 m_pipesLog2 = 1;
1179                 break;
1180             case ADDR_CONFIG_4_PIPE:
1181                 m_pipes = 4;
1182                 m_pipesLog2 = 2;
1183                 break;
1184             case ADDR_CONFIG_8_PIPE:
1185                 m_pipes = 8;
1186                 m_pipesLog2 = 3;
1187                 break;
1188             case ADDR_CONFIG_16_PIPE:
1189                 m_pipes = 16;
1190                 m_pipesLog2 = 4;
1191                 break;
1192             case ADDR_CONFIG_32_PIPE:
1193                 m_pipes = 32;
1194                 m_pipesLog2 = 5;
1195                 break;
1196             default:
1197                 ADDR_ASSERT_ALWAYS();
1198                 break;
1199         }
1200 
1201         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1202         {
1203             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1204                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1205                 m_pipeInterleaveLog2 = 8;
1206                 break;
1207             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1208                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1209                 m_pipeInterleaveLog2 = 9;
1210                 break;
1211             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1212                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1213                 m_pipeInterleaveLog2 = 10;
1214                 break;
1215             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1216                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1217                 m_pipeInterleaveLog2 = 11;
1218                 break;
1219             default:
1220                 ADDR_ASSERT_ALWAYS();
1221                 break;
1222         }
1223 
1224         // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1225         // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1226         ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1227 
1228         switch (gbAddrConfig.bits.NUM_BANKS)
1229         {
1230             case ADDR_CONFIG_1_BANK:
1231                 m_banks = 1;
1232                 m_banksLog2 = 0;
1233                 break;
1234             case ADDR_CONFIG_2_BANK:
1235                 m_banks = 2;
1236                 m_banksLog2 = 1;
1237                 break;
1238             case ADDR_CONFIG_4_BANK:
1239                 m_banks = 4;
1240                 m_banksLog2 = 2;
1241                 break;
1242             case ADDR_CONFIG_8_BANK:
1243                 m_banks = 8;
1244                 m_banksLog2 = 3;
1245                 break;
1246             case ADDR_CONFIG_16_BANK:
1247                 m_banks = 16;
1248                 m_banksLog2 = 4;
1249                 break;
1250             default:
1251                 ADDR_ASSERT_ALWAYS();
1252                 break;
1253         }
1254 
1255         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1256         {
1257             case ADDR_CONFIG_1_SHADER_ENGINE:
1258                 m_se = 1;
1259                 m_seLog2 = 0;
1260                 break;
1261             case ADDR_CONFIG_2_SHADER_ENGINE:
1262                 m_se = 2;
1263                 m_seLog2 = 1;
1264                 break;
1265             case ADDR_CONFIG_4_SHADER_ENGINE:
1266                 m_se = 4;
1267                 m_seLog2 = 2;
1268                 break;
1269             case ADDR_CONFIG_8_SHADER_ENGINE:
1270                 m_se = 8;
1271                 m_seLog2 = 3;
1272                 break;
1273             default:
1274                 ADDR_ASSERT_ALWAYS();
1275                 break;
1276         }
1277 
1278         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1279         {
1280             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1281                 m_rbPerSe = 1;
1282                 m_rbPerSeLog2 = 0;
1283                 break;
1284             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1285                 m_rbPerSe = 2;
1286                 m_rbPerSeLog2 = 1;
1287                 break;
1288             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1289                 m_rbPerSe = 4;
1290                 m_rbPerSeLog2 = 2;
1291                 break;
1292             default:
1293                 ADDR_ASSERT_ALWAYS();
1294                 break;
1295         }
1296 
1297         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1298         {
1299             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1300                 m_maxCompFrag = 1;
1301                 m_maxCompFragLog2 = 0;
1302                 break;
1303             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1304                 m_maxCompFrag = 2;
1305                 m_maxCompFragLog2 = 1;
1306                 break;
1307             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1308                 m_maxCompFrag = 4;
1309                 m_maxCompFragLog2 = 2;
1310                 break;
1311             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1312                 m_maxCompFrag = 8;
1313                 m_maxCompFragLog2 = 3;
1314                 break;
1315             default:
1316                 ADDR_ASSERT_ALWAYS();
1317                 break;
1318         }
1319 
1320         if ((m_rbPerSeLog2 == 1) &&
1321             (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1322              ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1323         {
1324             ADDR_ASSERT(m_settings.isVega10 == FALSE);
1325 
1326             ADDR_ASSERT(m_settings.isRaven == FALSE);
1327 
1328             ADDR_ASSERT(m_settings.isVega20 == FALSE);
1329 
1330             if (m_settings.isVega12)
1331             {
1332                 m_settings.htileCacheRbConflict = 1;
1333             }
1334         }
1335 
1336         // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1337         m_blockVarSizeLog2 = 0;
1338     }
1339     else
1340     {
1341         valid = FALSE;
1342         ADDR_NOT_IMPLEMENTED();
1343     }
1344 
1345     if (valid)
1346     {
1347         InitEquationTable();
1348     }
1349 
1350     return valid;
1351 }
1352 
1353 /**
1354 ************************************************************************************************************************
1355 *   Gfx9Lib::HwlConvertChipFamily
1356 *
1357 *   @brief
1358 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1359 *   @return
1360 *       ChipFamily
1361 ************************************************************************************************************************
1362 */
HwlConvertChipFamily(UINT_32 uChipFamily,UINT_32 uChipRevision)1363 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1364     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1365     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1366 {
1367     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1368 
1369     switch (uChipFamily)
1370     {
1371         case FAMILY_AI:
1372             m_settings.isArcticIsland = 1;
1373             m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1374             m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1375             m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1376             m_settings.isDce12 = 1;
1377 
1378             if (m_settings.isVega10 == 0)
1379             {
1380                 m_settings.htileAlignFix = 1;
1381                 m_settings.applyAliasFix = 1;
1382             }
1383 
1384             m_settings.metaBaseAlignFix = 1;
1385 
1386             m_settings.depthPipeXorDisable = 1;
1387             break;
1388         case FAMILY_RV:
1389             m_settings.isArcticIsland = 1;
1390 
1391             if (ASICREV_IS_RAVEN(uChipRevision))
1392             {
1393                 m_settings.isRaven = 1;
1394 
1395                 m_settings.depthPipeXorDisable = 1;
1396             }
1397 
1398             if (ASICREV_IS_RAVEN2(uChipRevision))
1399             {
1400                 m_settings.isRaven = 1;
1401             }
1402 
1403             if (m_settings.isRaven == 0)
1404             {
1405                 m_settings.htileAlignFix = 1;
1406                 m_settings.applyAliasFix = 1;
1407             }
1408 
1409             m_settings.isDcn1 = m_settings.isRaven;
1410 
1411             if (ASICREV_IS_RENOIR(uChipRevision))
1412             {
1413                 m_settings.isRaven = 1;
1414                 m_settings.isDcn2  = 1;
1415             }
1416 
1417             m_settings.metaBaseAlignFix = 1;
1418             break;
1419 
1420         default:
1421             ADDR_ASSERT(!"No Chip found");
1422             break;
1423     }
1424 
1425     return family;
1426 }
1427 
1428 /**
1429 ************************************************************************************************************************
1430 *   Gfx9Lib::InitRbEquation
1431 *
1432 *   @brief
1433 *       Init RB equation
1434 *   @return
1435 *       N/A
1436 ************************************************************************************************************************
1437 */
GetRbEquation(CoordEq * pRbEq,UINT_32 numRbPerSeLog2,UINT_32 numSeLog2) const1438 VOID Gfx9Lib::GetRbEquation(
1439     CoordEq* pRbEq,             ///< [out] rb equation
1440     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1441     UINT_32  numSeLog2)         ///< [in] number of shader engine
1442     const
1443 {
1444     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1445     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1446     Coordinate cx(DIM_X, rbRegion);
1447     Coordinate cy(DIM_Y, rbRegion);
1448 
1449     UINT_32 start = 0;
1450     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1451 
1452     // Clear the rb equation
1453     pRbEq->resize(0);
1454     pRbEq->resize(numRbTotalLog2);
1455 
1456     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1457     {
1458         // Special case when more than 1 SE, and 2 RB per SE
1459         (*pRbEq)[0].add(cx);
1460         (*pRbEq)[0].add(cy);
1461         cx++;
1462         cy++;
1463 
1464         if (m_settings.applyAliasFix == false)
1465         {
1466             (*pRbEq)[0].add(cy);
1467         }
1468 
1469         (*pRbEq)[0].add(cy);
1470         start++;
1471     }
1472 
1473     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1474 
1475     for (UINT_32 i = 0; i < numBits; i++)
1476     {
1477         UINT_32 idx =
1478             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1479 
1480         if ((i % 2) == 1)
1481         {
1482             (*pRbEq)[idx].add(cx);
1483             cx++;
1484         }
1485         else
1486         {
1487             (*pRbEq)[idx].add(cy);
1488             cy++;
1489         }
1490     }
1491 }
1492 
1493 /**
1494 ************************************************************************************************************************
1495 *   Gfx9Lib::GetDataEquation
1496 *
1497 *   @brief
1498 *       Get data equation for fmask and Z
1499 *   @return
1500 *       N/A
1501 ************************************************************************************************************************
1502 */
GetDataEquation(CoordEq * pDataEq,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2) const1503 VOID Gfx9Lib::GetDataEquation(
1504     CoordEq* pDataEq,               ///< [out] data surface equation
1505     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1506     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1507     AddrResourceType resourceType,  ///< [in] data surface resource type
1508     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1509     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1510     const
1511 {
1512     Coordinate cx(DIM_X, 0);
1513     Coordinate cy(DIM_Y, 0);
1514     Coordinate cz(DIM_Z, 0);
1515     Coordinate cs(DIM_S, 0);
1516 
1517     // Clear the equation
1518     pDataEq->resize(0);
1519     pDataEq->resize(27);
1520 
1521     if (dataSurfaceType == Gfx9DataColor)
1522     {
1523         if (IsLinear(swizzleMode))
1524         {
1525             Coordinate cm(DIM_M, 0);
1526 
1527             pDataEq->resize(49);
1528 
1529             for (UINT_32 i = 0; i < 49; i++)
1530             {
1531                 (*pDataEq)[i].add(cm);
1532                 cm++;
1533             }
1534         }
1535         else if (IsThick(resourceType, swizzleMode))
1536         {
1537             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1538             UINT_32 i;
1539             if (IsStandardSwizzle(resourceType, swizzleMode))
1540             {
1541                 // Standard 3d swizzle
1542                 // Fill in bottom x bits
1543                 for (i = elementBytesLog2; i < 4; i++)
1544                 {
1545                     (*pDataEq)[i].add(cx);
1546                     cx++;
1547                 }
1548                 // Fill in 2 bits of y and then z
1549                 for (i = 4; i < 6; i++)
1550                 {
1551                     (*pDataEq)[i].add(cy);
1552                     cy++;
1553                 }
1554                 for (i = 6; i < 8; i++)
1555                 {
1556                     (*pDataEq)[i].add(cz);
1557                     cz++;
1558                 }
1559                 if (elementBytesLog2 < 2)
1560                 {
1561                     // fill in z & y bit
1562                     (*pDataEq)[8].add(cz);
1563                     (*pDataEq)[9].add(cy);
1564                     cz++;
1565                     cy++;
1566                 }
1567                 else if (elementBytesLog2 == 2)
1568                 {
1569                     // fill in y and x bit
1570                     (*pDataEq)[8].add(cy);
1571                     (*pDataEq)[9].add(cx);
1572                     cy++;
1573                     cx++;
1574                 }
1575                 else
1576                 {
1577                     // fill in 2 x bits
1578                     (*pDataEq)[8].add(cx);
1579                     cx++;
1580                     (*pDataEq)[9].add(cx);
1581                     cx++;
1582                 }
1583             }
1584             else
1585             {
1586                 // Z 3d swizzle
1587                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1588                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1589                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1590                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1591                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1592                 {
1593                     (*pDataEq)[i].add(cz);
1594                     cz++;
1595                 }
1596                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1597                 {
1598                     // add an x and z
1599                     (*pDataEq)[6].add(cx);
1600                     (*pDataEq)[7].add(cz);
1601                     cx++;
1602                     cz++;
1603                 }
1604                 else if (elementBytesLog2 == 2)
1605                 {
1606                     // add a y and z
1607                     (*pDataEq)[6].add(cy);
1608                     (*pDataEq)[7].add(cz);
1609                     cy++;
1610                     cz++;
1611                 }
1612                 // add y and x
1613                 (*pDataEq)[8].add(cy);
1614                 (*pDataEq)[9].add(cx);
1615                 cy++;
1616                 cx++;
1617             }
1618             // Fill in bit 10 and up
1619             pDataEq->mort3d( cz, cy, cx, 10 );
1620         }
1621         else if (IsThin(resourceType, swizzleMode))
1622         {
1623             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1624             // Color 2D
1625             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1626             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1627             UINT_32 i;
1628             // Fill in bottom x bits
1629             for (i = elementBytesLog2; i < 4; i++)
1630             {
1631                 (*pDataEq)[i].add(cx);
1632                 cx++;
1633             }
1634             // Fill in bottom y bits
1635             for (i = 4; i < 4 + microYBits; i++)
1636             {
1637                 (*pDataEq)[i].add(cy);
1638                 cy++;
1639             }
1640             // Fill in last of the micro_x bits
1641             for (i = 4 + microYBits; i < 8; i++)
1642             {
1643                 (*pDataEq)[i].add(cx);
1644                 cx++;
1645             }
1646             // Fill in x/y bits below sample split
1647             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1648             // Fill in sample bits
1649             for (i = 0; i < numSamplesLog2; i++)
1650             {
1651                 cs.set(DIM_S, i);
1652                 (*pDataEq)[tileSplitStart + i].add(cs);
1653             }
1654             // Fill in x/y bits above sample split
1655             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1656             {
1657                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1658             }
1659             else
1660             {
1661                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1662             }
1663         }
1664         else
1665         {
1666             ADDR_ASSERT_ALWAYS();
1667         }
1668     }
1669     else
1670     {
1671         // Fmask or depth
1672         UINT_32 sampleStart = elementBytesLog2;
1673         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1674         UINT_32 ymajStart = 6 + numSamplesLog2;
1675 
1676         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1677         {
1678             cs.set(DIM_S, s);
1679             (*pDataEq)[sampleStart + s].add(cs);
1680         }
1681 
1682         // Put in the x-major order pixel bits
1683         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1684         // Put in the y-major order pixel bits
1685         pDataEq->mort2d(cy, cx, ymajStart);
1686     }
1687 }
1688 
1689 /**
1690 ************************************************************************************************************************
1691 *   Gfx9Lib::GetPipeEquation
1692 *
1693 *   @brief
1694 *       Get pipe equation
1695 *   @return
1696 *       N/A
1697 ************************************************************************************************************************
1698 */
GetPipeEquation(CoordEq * pPipeEq,CoordEq * pDataEq,UINT_32 pipeInterleaveLog2,UINT_32 numPipeLog2,UINT_32 numSamplesLog2,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType) const1699 VOID Gfx9Lib::GetPipeEquation(
1700     CoordEq*         pPipeEq,            ///< [out] pipe equation
1701     CoordEq*         pDataEq,            ///< [in] data equation
1702     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1703     UINT_32          numPipeLog2,        ///< [in] number of pipes
1704     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1705     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1706     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1707     AddrResourceType resourceType        ///< [in] data surface resource type
1708     ) const
1709 {
1710     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1711     CoordEq dataEq;
1712 
1713     pDataEq->copy(dataEq);
1714 
1715     if (dataSurfaceType == Gfx9DataColor)
1716     {
1717         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1718         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1719     }
1720 
1721     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1722 
1723     // This section should only apply to z/stencil, maybe fmask
1724     // If the pipe bit is below the comp block size,
1725     // then keep moving up the address until we find a bit that is above
1726     UINT_32 pipeStart = 0;
1727 
1728     if (dataSurfaceType != Gfx9DataColor)
1729     {
1730         Coordinate tileMin(DIM_X, 3);
1731 
1732         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1733         {
1734             pipeStart++;
1735         }
1736 
1737         // if pipe is 0, then the first pipe bit is above the comp block size,
1738         // so we don't need to do anything
1739         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1740         // we will get the same pipe equation
1741         if (pipeStart != 0)
1742         {
1743             for (UINT_32 i = 0; i < numPipeLog2; i++)
1744             {
1745                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1746                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1747             }
1748         }
1749     }
1750 
1751     if (IsPrt(swizzleMode))
1752     {
1753         // Clear out bits above the block size if prt's are enabled
1754         dataEq.resize(blockSizeLog2);
1755         dataEq.resize(48);
1756     }
1757 
1758     if (IsXor(swizzleMode))
1759     {
1760         CoordEq xorMask;
1761 
1762         if (IsThick(resourceType, swizzleMode))
1763         {
1764             CoordEq xorMask2;
1765 
1766             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1767 
1768             xorMask.resize(numPipeLog2);
1769 
1770             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1771             {
1772                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1773                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1774             }
1775         }
1776         else
1777         {
1778             // Xor in the bits above the pipe+gpu bits
1779             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1780 
1781             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1782             {
1783                 Coordinate co;
1784                 CoordEq xorMask2;
1785                 // if 1xaa and not prt, then xor in the z bits
1786                 xorMask2.resize(0);
1787                 xorMask2.resize(numPipeLog2);
1788                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1789                 {
1790                     co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1791                     xorMask2[pipeIdx].add(co);
1792                 }
1793 
1794                 pPipeEq->xorin(xorMask2);
1795             }
1796         }
1797 
1798         xorMask.reverse();
1799         pPipeEq->xorin(xorMask);
1800     }
1801 }
1802 /**
1803 ************************************************************************************************************************
1804 *   Gfx9Lib::GetMetaEquation
1805 *
1806 *   @brief
1807 *       Get meta equation for cmask/htile/DCC
1808 *   @return
1809 *       Pointer to a calculated meta equation
1810 ************************************************************************************************************************
1811 */
GetMetaEquation(const MetaEqParams & metaEqParams)1812 const CoordEq* Gfx9Lib::GetMetaEquation(
1813     const MetaEqParams& metaEqParams)
1814 {
1815     UINT_32 cachedMetaEqIndex;
1816 
1817     for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1818     {
1819         if (memcmp(&metaEqParams,
1820                    &m_cachedMetaEqKey[cachedMetaEqIndex],
1821                    static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1822         {
1823             break;
1824         }
1825     }
1826 
1827     CoordEq* pMetaEq = NULL;
1828 
1829     if (cachedMetaEqIndex < MaxCachedMetaEq)
1830     {
1831         pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1832     }
1833     else
1834     {
1835         m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1836 
1837         pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1838 
1839         m_metaEqOverrideIndex %= MaxCachedMetaEq;
1840 
1841         GenMetaEquation(pMetaEq,
1842                         metaEqParams.maxMip,
1843                         metaEqParams.elementBytesLog2,
1844                         metaEqParams.numSamplesLog2,
1845                         metaEqParams.metaFlag,
1846                         metaEqParams.dataSurfaceType,
1847                         metaEqParams.swizzleMode,
1848                         metaEqParams.resourceType,
1849                         metaEqParams.metaBlkWidthLog2,
1850                         metaEqParams.metaBlkHeightLog2,
1851                         metaEqParams.metaBlkDepthLog2,
1852                         metaEqParams.compBlkWidthLog2,
1853                         metaEqParams.compBlkHeightLog2,
1854                         metaEqParams.compBlkDepthLog2);
1855     }
1856 
1857     return pMetaEq;
1858 }
1859 
1860 /**
1861 ************************************************************************************************************************
1862 *   Gfx9Lib::GenMetaEquation
1863 *
1864 *   @brief
1865 *       Get meta equation for cmask/htile/DCC
1866 *   @return
1867 *       N/A
1868 ************************************************************************************************************************
1869 */
GenMetaEquation(CoordEq * pMetaEq,UINT_32 maxMip,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2,ADDR2_META_FLAGS metaFlag,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 metaBlkWidthLog2,UINT_32 metaBlkHeightLog2,UINT_32 metaBlkDepthLog2,UINT_32 compBlkWidthLog2,UINT_32 compBlkHeightLog2,UINT_32 compBlkDepthLog2) const1870 VOID Gfx9Lib::GenMetaEquation(
1871     CoordEq*         pMetaEq,               ///< [out] meta equation
1872     UINT_32          maxMip,                ///< [in] max mip Id
1873     UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
1874     UINT_32          numSamplesLog2,        ///< [in] data surface sample count
1875     ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
1876     Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
1877     AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
1878     AddrResourceType resourceType,          ///< [in] data surface resource type
1879     UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
1880     UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
1881     UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
1882     UINT_32          compBlkWidthLog2,      ///< [in] compress block width
1883     UINT_32          compBlkHeightLog2,     ///< [in] compress block height
1884     UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
1885     const
1886 {
1887     UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1888     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1889 
1890     // Get the correct data address and rb equation
1891     CoordEq dataEq;
1892     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1893                     elementBytesLog2, numSamplesLog2);
1894 
1895     // Get pipe and rb equations
1896     CoordEq pipeEquation;
1897     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1898                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1899     numPipeTotalLog2 = pipeEquation.getsize();
1900 
1901     if (metaFlag.linear)
1902     {
1903         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1904         ADDR_ASSERT_ALWAYS();
1905 
1906         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1907 
1908         dataEq.copy(*pMetaEq);
1909 
1910         if (IsLinear(swizzleMode))
1911         {
1912             if (metaFlag.pipeAligned)
1913             {
1914                 // Remove the pipe bits
1915                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1916                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1917             }
1918             // Divide by comp block size, which for linear (which is always color) is 256 B
1919             pMetaEq->shift(-8);
1920 
1921             if (metaFlag.pipeAligned)
1922             {
1923                 // Put pipe bits back in
1924                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1925 
1926                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1927                 {
1928                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1929                 }
1930             }
1931         }
1932 
1933         pMetaEq->shift(1);
1934     }
1935     else
1936     {
1937         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1938         UINT_32 compFragLog2 =
1939             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1940             maxCompFragLog2 : numSamplesLog2;
1941 
1942         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1943 
1944         // Make sure the metaaddr is cleared
1945         pMetaEq->resize(0);
1946         pMetaEq->resize(27);
1947 
1948         if (IsThick(resourceType, swizzleMode))
1949         {
1950             Coordinate cx(DIM_X, 0);
1951             Coordinate cy(DIM_Y, 0);
1952             Coordinate cz(DIM_Z, 0);
1953 
1954             if (maxMip > 0)
1955             {
1956                 pMetaEq->mort3d(cy, cx, cz);
1957             }
1958             else
1959             {
1960                 pMetaEq->mort3d(cx, cy, cz);
1961             }
1962         }
1963         else
1964         {
1965             Coordinate cx(DIM_X, 0);
1966             Coordinate cy(DIM_Y, 0);
1967             Coordinate cs;
1968 
1969             if (maxMip > 0)
1970             {
1971                 pMetaEq->mort2d(cy, cx, compFragLog2);
1972             }
1973             else
1974             {
1975                 pMetaEq->mort2d(cx, cy, compFragLog2);
1976             }
1977 
1978             //------------------------------------------------------------------------------------------------------------------------
1979             // Put the compressible fragments at the lsb
1980             // the uncompressible frags will be at the msb of the micro address
1981             //------------------------------------------------------------------------------------------------------------------------
1982             for (UINT_32 s = 0; s < compFragLog2; s++)
1983             {
1984                 cs.set(DIM_S, s);
1985                 (*pMetaEq)[s].add(cs);
1986             }
1987         }
1988 
1989         // Keep a copy of the pipe equations
1990         CoordEq origPipeEquation;
1991         pipeEquation.copy(origPipeEquation);
1992 
1993         Coordinate co;
1994         // filter out everything under the compressed block size
1995         co.set(DIM_X, compBlkWidthLog2);
1996         pMetaEq->Filter('<', co, 0, DIM_X);
1997         co.set(DIM_Y, compBlkHeightLog2);
1998         pMetaEq->Filter('<', co, 0, DIM_Y);
1999         co.set(DIM_Z, compBlkDepthLog2);
2000         pMetaEq->Filter('<', co, 0, DIM_Z);
2001 
2002         // For non-color, filter out sample bits
2003         if (dataSurfaceType != Gfx9DataColor)
2004         {
2005             co.set(DIM_X, 0);
2006             pMetaEq->Filter('<', co, 0, DIM_S);
2007         }
2008 
2009         // filter out everything above the metablock size
2010         co.set(DIM_X, metaBlkWidthLog2 - 1);
2011         pMetaEq->Filter('>', co, 0, DIM_X);
2012         co.set(DIM_Y, metaBlkHeightLog2 - 1);
2013         pMetaEq->Filter('>', co, 0, DIM_Y);
2014         co.set(DIM_Z, metaBlkDepthLog2 - 1);
2015         pMetaEq->Filter('>', co, 0, DIM_Z);
2016 
2017         // filter out everything above the metablock size for the channel bits
2018         co.set(DIM_X, metaBlkWidthLog2 - 1);
2019         pipeEquation.Filter('>', co, 0, DIM_X);
2020         co.set(DIM_Y, metaBlkHeightLog2 - 1);
2021         pipeEquation.Filter('>', co, 0, DIM_Y);
2022         co.set(DIM_Z, metaBlkDepthLog2 - 1);
2023         pipeEquation.Filter('>', co, 0, DIM_Z);
2024 
2025         // Make sure we still have the same number of channel bits
2026         if (pipeEquation.getsize() != numPipeTotalLog2)
2027         {
2028             ADDR_ASSERT_ALWAYS();
2029         }
2030 
2031         // Loop through all channel and rb bits,
2032         // and make sure these components exist in the metadata address
2033         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2034         {
2035             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
2036             {
2037                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
2038                 {
2039                     ADDR_ASSERT_ALWAYS();
2040                 }
2041             }
2042         }
2043 
2044         const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
2045         const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
2046         const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
2047         CoordEq       origRbEquation;
2048 
2049         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
2050 
2051         CoordEq rbEquation = origRbEquation;
2052 
2053         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2054         {
2055             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
2056             {
2057                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
2058                 {
2059                     ADDR_ASSERT_ALWAYS();
2060                 }
2061             }
2062         }
2063 
2064         if (m_settings.applyAliasFix)
2065         {
2066             co.set(DIM_Z, -1);
2067         }
2068 
2069         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
2070         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2071         {
2072             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
2073             {
2074                 BOOL_32 isRbEquationInPipeEquation = FALSE;
2075 
2076                 if (m_settings.applyAliasFix)
2077                 {
2078                     CoordTerm filteredPipeEq;
2079                     filteredPipeEq = pipeEquation[j];
2080 
2081                     filteredPipeEq.Filter('>', co, 0, DIM_Z);
2082 
2083                     isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
2084                 }
2085                 else
2086                 {
2087                     isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
2088                 }
2089 
2090                 if (isRbEquationInPipeEquation)
2091                 {
2092                     rbEquation[i].Clear();
2093                 }
2094             }
2095         }
2096 
2097          bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
2098 
2099         // Loop through each bit of the channel, get the smallest coordinate,
2100         // and remove it from the metaaddr, and rb_equation
2101         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2102         {
2103             pipeEquation[i].getsmallest(co);
2104 
2105             UINT_32 old_size = pMetaEq->getsize();
2106             pMetaEq->Filter('=', co);
2107             UINT_32 new_size = pMetaEq->getsize();
2108             if (new_size != old_size-1)
2109             {
2110                 ADDR_ASSERT_ALWAYS();
2111             }
2112             pipeEquation.remove(co);
2113             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2114             {
2115                 if (rbEquation[j].remove(co))
2116                 {
2117                     // if we actually removed something from this bit, then add the remaining
2118                     // channel bits, as these can be removed for this bit
2119                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2120                     {
2121                         if (pipeEquation[i][k] != co)
2122                         {
2123                             rbEquation[j].add(pipeEquation[i][k]);
2124                             rbAppendedWithPipeBits[j] = true;
2125                         }
2126                     }
2127                 }
2128             }
2129         }
2130 
2131         // Loop through the rb bits and see what remain;
2132         // filter out the smallest coordinate if it remains
2133         UINT_32 rbBitsLeft = 0;
2134         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2135         {
2136             BOOL_32 isRbEqAppended = FALSE;
2137 
2138             if (m_settings.applyAliasFix)
2139             {
2140                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2141             }
2142             else
2143             {
2144                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2145             }
2146 
2147             if (isRbEqAppended)
2148             {
2149                 rbBitsLeft++;
2150                 rbEquation[i].getsmallest(co);
2151                 UINT_32 old_size = pMetaEq->getsize();
2152                 pMetaEq->Filter('=', co);
2153                 UINT_32 new_size = pMetaEq->getsize();
2154                 if (new_size != old_size - 1)
2155                 {
2156                     // assert warning
2157                 }
2158                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2159                 {
2160                     if (rbEquation[j].remove(co))
2161                     {
2162                         // if we actually removed something from this bit, then add the remaining
2163                         // rb bits, as these can be removed for this bit
2164                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2165                         {
2166                             if (rbEquation[i][k] != co)
2167                             {
2168                                 rbEquation[j].add(rbEquation[i][k]);
2169                                 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2170                             }
2171                         }
2172                     }
2173                 }
2174             }
2175         }
2176 
2177         // capture the size of the metaaddr
2178         UINT_32 metaSize = pMetaEq->getsize();
2179         // resize to 49 bits...make this a nibble address
2180         pMetaEq->resize(49);
2181         // Concatenate the macro address above the current address
2182         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2183         {
2184             co.set(DIM_M, j);
2185             (*pMetaEq)[i].add(co);
2186         }
2187 
2188         // Multiply by meta element size (in nibbles)
2189         if (dataSurfaceType == Gfx9DataColor)
2190         {
2191             pMetaEq->shift(1);
2192         }
2193         else if (dataSurfaceType == Gfx9DataDepthStencil)
2194         {
2195             pMetaEq->shift(3);
2196         }
2197 
2198         //------------------------------------------------------------------------------------------
2199         // Note the pipeInterleaveLog2+1 is because address is a nibble address
2200         // Shift up from pipe interleave number of channel
2201         // and rb bits left, and uncompressed fragments
2202         //------------------------------------------------------------------------------------------
2203 
2204         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2205 
2206         // Put in the channel bits
2207         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2208         {
2209             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2210         }
2211 
2212         // Put in remaining rb bits
2213         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2214         {
2215             BOOL_32 isRbEqAppended = FALSE;
2216 
2217             if (m_settings.applyAliasFix)
2218             {
2219                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2220             }
2221             else
2222             {
2223                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2224             }
2225 
2226             if (isRbEqAppended)
2227             {
2228                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2229                 // Mark any rb bit we add in to the rb mask
2230                 j++;
2231             }
2232         }
2233 
2234         //------------------------------------------------------------------------------------------
2235         // Put in the uncompressed fragment bits
2236         //------------------------------------------------------------------------------------------
2237         for (UINT_32 i = 0; i < uncompFragLog2; i++)
2238         {
2239             co.set(DIM_S, compFragLog2 + i);
2240             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2241         }
2242     }
2243 }
2244 
2245 /**
2246 ************************************************************************************************************************
2247 *   Gfx9Lib::IsEquationSupported
2248 *
2249 *   @brief
2250 *       Check if equation is supported for given swizzle mode and resource type.
2251 *
2252 *   @return
2253 *       TRUE if supported
2254 ************************************************************************************************************************
2255 */
IsEquationSupported(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2) const2256 BOOL_32 Gfx9Lib::IsEquationSupported(
2257     AddrResourceType rsrcType,
2258     AddrSwizzleMode  swMode,
2259     UINT_32          elementBytesLog2) const
2260 {
2261     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2262                         (IsValidSwMode(swMode) == TRUE) &&
2263                         (IsLinear(swMode) == FALSE) &&
2264                         (((IsTex2d(rsrcType) == TRUE) &&
2265                           ((elementBytesLog2 < 4) ||
2266                            ((IsRotateSwizzle(swMode) == FALSE) &&
2267                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
2268                          ((IsTex3d(rsrcType) == TRUE) &&
2269                           (IsRotateSwizzle(swMode) == FALSE) &&
2270                           (IsBlock256b(swMode) == FALSE)));
2271 
2272     return supported;
2273 }
2274 
2275 /**
2276 ************************************************************************************************************************
2277 *   Gfx9Lib::InitEquationTable
2278 *
2279 *   @brief
2280 *       Initialize Equation table.
2281 *
2282 *   @return
2283 *       N/A
2284 ************************************************************************************************************************
2285 */
InitEquationTable()2286 VOID Gfx9Lib::InitEquationTable()
2287 {
2288     memset(m_equationTable, 0, sizeof(m_equationTable));
2289 
2290     // Loop all possible resource type (2D/3D)
2291     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2292     {
2293         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2294 
2295         // Loop all possible swizzle mode
2296         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2297         {
2298             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2299 
2300             // Loop all possible bpp
2301             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2302             {
2303                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2304 
2305                 // Check if the input is supported
2306                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2307                 {
2308                     ADDR_EQUATION     equation;
2309                     ADDR_E_RETURNCODE retCode;
2310 
2311                     memset(&equation, 0, sizeof(ADDR_EQUATION));
2312 
2313                     // Generate the equation
2314                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2315                     {
2316                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2317                     }
2318                     else if (IsThin(rsrcType, swMode))
2319                     {
2320                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2321                     }
2322                     else
2323                     {
2324                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2325                     }
2326 
2327                     // Only fill the equation into the table if the return code is ADDR_OK,
2328                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2329                     // a valid input, we do nothing but just fill invalid equation index
2330                     // into the lookup table.
2331                     if (retCode == ADDR_OK)
2332                     {
2333                         equationIndex = m_numEquations;
2334                         ADDR_ASSERT(equationIndex < EquationTableSize);
2335 
2336                         m_equationTable[equationIndex] = equation;
2337 
2338                         m_numEquations++;
2339                     }
2340                     else
2341                     {
2342                         ADDR_ASSERT_ALWAYS();
2343                     }
2344                 }
2345 
2346                 // Fill the index into the lookup table, if the combination is not supported
2347                 // fill the invalid equation index
2348                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2349             }
2350         }
2351     }
2352 }
2353 
2354 /**
2355 ************************************************************************************************************************
2356 *   Gfx9Lib::HwlGetEquationIndex
2357 *
2358 *   @brief
2359 *       Interface function stub of GetEquationIndex
2360 *
2361 *   @return
2362 *       ADDR_E_RETURNCODE
2363 ************************************************************************************************************************
2364 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2365 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2366     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2367     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2368     ) const
2369 {
2370     AddrResourceType rsrcType         = pIn->resourceType;
2371     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2372     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2373     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2374 
2375     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2376     {
2377         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2378         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2379 
2380         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2381     }
2382 
2383     if (pOut->pMipInfo != NULL)
2384     {
2385         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2386         {
2387             pOut->pMipInfo[i].equationIndex = index;
2388         }
2389     }
2390 
2391     return index;
2392 }
2393 
2394 /**
2395 ************************************************************************************************************************
2396 *   Gfx9Lib::HwlComputeBlock256Equation
2397 *
2398 *   @brief
2399 *       Interface function stub of ComputeBlock256Equation
2400 *
2401 *   @return
2402 *       ADDR_E_RETURNCODE
2403 ************************************************************************************************************************
2404 */
HwlComputeBlock256Equation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2405 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2406     AddrResourceType rsrcType,
2407     AddrSwizzleMode  swMode,
2408     UINT_32          elementBytesLog2,
2409     ADDR_EQUATION*   pEquation) const
2410 {
2411     ADDR_E_RETURNCODE ret = ADDR_OK;
2412 
2413     pEquation->numBits = 8;
2414 
2415     UINT_32 i = 0;
2416     for (; i < elementBytesLog2; i++)
2417     {
2418         InitChannel(1, 0 , i, &pEquation->addr[i]);
2419     }
2420 
2421     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2422 
2423     const UINT_32 maxBitsUsed = 4;
2424     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2425     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2426 
2427     for (i = 0; i < maxBitsUsed; i++)
2428     {
2429         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2430         InitChannel(1, 1, i, &y[i]);
2431     }
2432 
2433     if (IsStandardSwizzle(rsrcType, swMode))
2434     {
2435         switch (elementBytesLog2)
2436         {
2437             case 0:
2438                 pixelBit[0] = x[0];
2439                 pixelBit[1] = x[1];
2440                 pixelBit[2] = x[2];
2441                 pixelBit[3] = x[3];
2442                 pixelBit[4] = y[0];
2443                 pixelBit[5] = y[1];
2444                 pixelBit[6] = y[2];
2445                 pixelBit[7] = y[3];
2446                 break;
2447             case 1:
2448                 pixelBit[0] = x[0];
2449                 pixelBit[1] = x[1];
2450                 pixelBit[2] = x[2];
2451                 pixelBit[3] = y[0];
2452                 pixelBit[4] = y[1];
2453                 pixelBit[5] = y[2];
2454                 pixelBit[6] = x[3];
2455                 break;
2456             case 2:
2457                 pixelBit[0] = x[0];
2458                 pixelBit[1] = x[1];
2459                 pixelBit[2] = y[0];
2460                 pixelBit[3] = y[1];
2461                 pixelBit[4] = y[2];
2462                 pixelBit[5] = x[2];
2463                 break;
2464             case 3:
2465                 pixelBit[0] = x[0];
2466                 pixelBit[1] = y[0];
2467                 pixelBit[2] = y[1];
2468                 pixelBit[3] = x[1];
2469                 pixelBit[4] = x[2];
2470                 break;
2471             case 4:
2472                 pixelBit[0] = y[0];
2473                 pixelBit[1] = y[1];
2474                 pixelBit[2] = x[0];
2475                 pixelBit[3] = x[1];
2476                 break;
2477             default:
2478                 ADDR_ASSERT_ALWAYS();
2479                 ret = ADDR_INVALIDPARAMS;
2480                 break;
2481         }
2482     }
2483     else if (IsDisplaySwizzle(rsrcType, swMode))
2484     {
2485         switch (elementBytesLog2)
2486         {
2487             case 0:
2488                 pixelBit[0] = x[0];
2489                 pixelBit[1] = x[1];
2490                 pixelBit[2] = x[2];
2491                 pixelBit[3] = y[1];
2492                 pixelBit[4] = y[0];
2493                 pixelBit[5] = y[2];
2494                 pixelBit[6] = x[3];
2495                 pixelBit[7] = y[3];
2496                 break;
2497             case 1:
2498                 pixelBit[0] = x[0];
2499                 pixelBit[1] = x[1];
2500                 pixelBit[2] = x[2];
2501                 pixelBit[3] = y[0];
2502                 pixelBit[4] = y[1];
2503                 pixelBit[5] = y[2];
2504                 pixelBit[6] = x[3];
2505                 break;
2506             case 2:
2507                 pixelBit[0] = x[0];
2508                 pixelBit[1] = x[1];
2509                 pixelBit[2] = y[0];
2510                 pixelBit[3] = x[2];
2511                 pixelBit[4] = y[1];
2512                 pixelBit[5] = y[2];
2513                 break;
2514             case 3:
2515                 pixelBit[0] = x[0];
2516                 pixelBit[1] = y[0];
2517                 pixelBit[2] = x[1];
2518                 pixelBit[3] = x[2];
2519                 pixelBit[4] = y[1];
2520                 break;
2521             case 4:
2522                 pixelBit[0] = x[0];
2523                 pixelBit[1] = y[0];
2524                 pixelBit[2] = x[1];
2525                 pixelBit[3] = y[1];
2526                 break;
2527             default:
2528                 ADDR_ASSERT_ALWAYS();
2529                 ret = ADDR_INVALIDPARAMS;
2530                 break;
2531         }
2532     }
2533     else if (IsRotateSwizzle(swMode))
2534     {
2535         switch (elementBytesLog2)
2536         {
2537             case 0:
2538                 pixelBit[0] = y[0];
2539                 pixelBit[1] = y[1];
2540                 pixelBit[2] = y[2];
2541                 pixelBit[3] = x[1];
2542                 pixelBit[4] = x[0];
2543                 pixelBit[5] = x[2];
2544                 pixelBit[6] = x[3];
2545                 pixelBit[7] = y[3];
2546                 break;
2547             case 1:
2548                 pixelBit[0] = y[0];
2549                 pixelBit[1] = y[1];
2550                 pixelBit[2] = y[2];
2551                 pixelBit[3] = x[0];
2552                 pixelBit[4] = x[1];
2553                 pixelBit[5] = x[2];
2554                 pixelBit[6] = x[3];
2555                 break;
2556             case 2:
2557                 pixelBit[0] = y[0];
2558                 pixelBit[1] = y[1];
2559                 pixelBit[2] = x[0];
2560                 pixelBit[3] = y[2];
2561                 pixelBit[4] = x[1];
2562                 pixelBit[5] = x[2];
2563                 break;
2564             case 3:
2565                 pixelBit[0] = y[0];
2566                 pixelBit[1] = x[0];
2567                 pixelBit[2] = y[1];
2568                 pixelBit[3] = x[1];
2569                 pixelBit[4] = x[2];
2570                 break;
2571             default:
2572                 ADDR_ASSERT_ALWAYS();
2573             case 4:
2574                 ret = ADDR_INVALIDPARAMS;
2575                 break;
2576         }
2577     }
2578     else
2579     {
2580         ADDR_ASSERT_ALWAYS();
2581         ret = ADDR_INVALIDPARAMS;
2582     }
2583 
2584     // Post validation
2585     if (ret == ADDR_OK)
2586     {
2587         Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2588         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2589                     (microBlockDim.w * (1 << elementBytesLog2)));
2590         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2591     }
2592 
2593     return ret;
2594 }
2595 
2596 /**
2597 ************************************************************************************************************************
2598 *   Gfx9Lib::HwlComputeThinEquation
2599 *
2600 *   @brief
2601 *       Interface function stub of ComputeThinEquation
2602 *
2603 *   @return
2604 *       ADDR_E_RETURNCODE
2605 ************************************************************************************************************************
2606 */
HwlComputeThinEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2607 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2608     AddrResourceType rsrcType,
2609     AddrSwizzleMode  swMode,
2610     UINT_32          elementBytesLog2,
2611     ADDR_EQUATION*   pEquation) const
2612 {
2613     ADDR_E_RETURNCODE ret = ADDR_OK;
2614 
2615     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2616 
2617     UINT_32 maxXorBits = blockSizeLog2;
2618     if (IsNonPrtXor(swMode))
2619     {
2620         // For non-prt-xor, maybe need to initialize some more bits for xor
2621         // The highest xor bit used in equation will be max the following 3 items:
2622         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2623         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2624         // 3. blockSizeLog2
2625 
2626         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2627         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2628                                      GetPipeXorBits(blockSizeLog2) +
2629                                      2 * GetBankXorBits(blockSizeLog2));
2630     }
2631 
2632     const UINT_32 maxBitsUsed = 14;
2633     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2634     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2635     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2636 
2637     const UINT_32 extraXorBits = 16;
2638     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2639     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2640 
2641     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2642     {
2643         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2644         InitChannel(1, 1, i, &y[i]);
2645     }
2646 
2647     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2648 
2649     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2650     {
2651         InitChannel(1, 0 , i, &pixelBit[i]);
2652     }
2653 
2654     UINT_32 xIdx = 0;
2655     UINT_32 yIdx = 0;
2656     UINT_32 lowBits = 0;
2657 
2658     if (IsZOrderSwizzle(swMode))
2659     {
2660         if (elementBytesLog2 <= 3)
2661         {
2662             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2663             {
2664                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2665             }
2666 
2667             lowBits = 6;
2668         }
2669         else
2670         {
2671             ret = ADDR_INVALIDPARAMS;
2672         }
2673     }
2674     else
2675     {
2676         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2677 
2678         if (ret == ADDR_OK)
2679         {
2680             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2681             xIdx = Log2(microBlockDim.w);
2682             yIdx = Log2(microBlockDim.h);
2683             lowBits = 8;
2684         }
2685     }
2686 
2687     if (ret == ADDR_OK)
2688     {
2689         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2690         {
2691             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2692         }
2693 
2694         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2695         {
2696             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2697         }
2698 
2699         if (IsXor(swMode))
2700         {
2701             // Fill XOR bits
2702             UINT_32 pipeStart = m_pipeInterleaveLog2;
2703             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2704 
2705             UINT_32 bankStart = pipeStart + pipeXorBits;
2706             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2707 
2708             for (UINT_32 i = 0; i < pipeXorBits; i++)
2709             {
2710                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2711                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2712                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2713 
2714                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2715             }
2716 
2717             for (UINT_32 i = 0; i < bankXorBits; i++)
2718             {
2719                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2720                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2721                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2722 
2723                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2724             }
2725 
2726             if (IsPrt(swMode) == FALSE)
2727             {
2728                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2729                 {
2730                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2731                 }
2732 
2733                 for (UINT_32 i = 0; i < bankXorBits; i++)
2734                 {
2735                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2736                 }
2737             }
2738         }
2739 
2740         pEquation->numBits = blockSizeLog2;
2741     }
2742 
2743     return ret;
2744 }
2745 
2746 /**
2747 ************************************************************************************************************************
2748 *   Gfx9Lib::HwlComputeThickEquation
2749 *
2750 *   @brief
2751 *       Interface function stub of ComputeThickEquation
2752 *
2753 *   @return
2754 *       ADDR_E_RETURNCODE
2755 ************************************************************************************************************************
2756 */
HwlComputeThickEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2757 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2758     AddrResourceType rsrcType,
2759     AddrSwizzleMode  swMode,
2760     UINT_32          elementBytesLog2,
2761     ADDR_EQUATION*   pEquation) const
2762 {
2763     ADDR_E_RETURNCODE ret = ADDR_OK;
2764 
2765     ADDR_ASSERT(IsTex3d(rsrcType));
2766 
2767     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2768 
2769     UINT_32 maxXorBits = blockSizeLog2;
2770     if (IsNonPrtXor(swMode))
2771     {
2772         // For non-prt-xor, maybe need to initialize some more bits for xor
2773         // The highest xor bit used in equation will be max the following 3:
2774         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2775         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2776         // 3. blockSizeLog2
2777 
2778         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2779         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2780                                      GetPipeXorBits(blockSizeLog2) +
2781                                      3 * GetBankXorBits(blockSizeLog2));
2782     }
2783 
2784     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2785     {
2786         InitChannel(1, 0 , i, &pEquation->addr[i]);
2787     }
2788 
2789     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2790 
2791     const UINT_32 maxBitsUsed = 12;
2792     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2793     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2794     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2795     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2796 
2797     const UINT_32 extraXorBits = 24;
2798     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2799     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2800 
2801     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2802     {
2803         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2804         InitChannel(1, 1, i, &y[i]);
2805         InitChannel(1, 2, i, &z[i]);
2806     }
2807 
2808     if (IsZOrderSwizzle(swMode))
2809     {
2810         switch (elementBytesLog2)
2811         {
2812             case 0:
2813                 pixelBit[0]  = x[0];
2814                 pixelBit[1]  = y[0];
2815                 pixelBit[2]  = x[1];
2816                 pixelBit[3]  = y[1];
2817                 pixelBit[4]  = z[0];
2818                 pixelBit[5]  = z[1];
2819                 pixelBit[6]  = x[2];
2820                 pixelBit[7]  = z[2];
2821                 pixelBit[8]  = y[2];
2822                 pixelBit[9]  = x[3];
2823                 break;
2824             case 1:
2825                 pixelBit[0]  = x[0];
2826                 pixelBit[1]  = y[0];
2827                 pixelBit[2]  = x[1];
2828                 pixelBit[3]  = y[1];
2829                 pixelBit[4]  = z[0];
2830                 pixelBit[5]  = z[1];
2831                 pixelBit[6]  = z[2];
2832                 pixelBit[7]  = y[2];
2833                 pixelBit[8]  = x[2];
2834                 break;
2835             case 2:
2836                 pixelBit[0]  = x[0];
2837                 pixelBit[1]  = y[0];
2838                 pixelBit[2]  = x[1];
2839                 pixelBit[3]  = z[0];
2840                 pixelBit[4]  = y[1];
2841                 pixelBit[5]  = z[1];
2842                 pixelBit[6]  = y[2];
2843                 pixelBit[7]  = x[2];
2844                 break;
2845             case 3:
2846                 pixelBit[0]  = x[0];
2847                 pixelBit[1]  = y[0];
2848                 pixelBit[2]  = z[0];
2849                 pixelBit[3]  = x[1];
2850                 pixelBit[4]  = z[1];
2851                 pixelBit[5]  = y[1];
2852                 pixelBit[6]  = x[2];
2853                 break;
2854             case 4:
2855                 pixelBit[0]  = x[0];
2856                 pixelBit[1]  = y[0];
2857                 pixelBit[2]  = z[0];
2858                 pixelBit[3]  = z[1];
2859                 pixelBit[4]  = y[1];
2860                 pixelBit[5]  = x[1];
2861                 break;
2862             default:
2863                 ADDR_ASSERT_ALWAYS();
2864                 ret = ADDR_INVALIDPARAMS;
2865                 break;
2866         }
2867     }
2868     else if (IsStandardSwizzle(rsrcType, swMode))
2869     {
2870         switch (elementBytesLog2)
2871         {
2872             case 0:
2873                 pixelBit[0]  = x[0];
2874                 pixelBit[1]  = x[1];
2875                 pixelBit[2]  = x[2];
2876                 pixelBit[3]  = x[3];
2877                 pixelBit[4]  = y[0];
2878                 pixelBit[5]  = y[1];
2879                 pixelBit[6]  = z[0];
2880                 pixelBit[7]  = z[1];
2881                 pixelBit[8]  = z[2];
2882                 pixelBit[9]  = y[2];
2883                 break;
2884             case 1:
2885                 pixelBit[0]  = x[0];
2886                 pixelBit[1]  = x[1];
2887                 pixelBit[2]  = x[2];
2888                 pixelBit[3]  = y[0];
2889                 pixelBit[4]  = y[1];
2890                 pixelBit[5]  = z[0];
2891                 pixelBit[6]  = z[1];
2892                 pixelBit[7]  = z[2];
2893                 pixelBit[8]  = y[2];
2894                 break;
2895             case 2:
2896                 pixelBit[0]  = x[0];
2897                 pixelBit[1]  = x[1];
2898                 pixelBit[2]  = y[0];
2899                 pixelBit[3]  = y[1];
2900                 pixelBit[4]  = z[0];
2901                 pixelBit[5]  = z[1];
2902                 pixelBit[6]  = y[2];
2903                 pixelBit[7]  = x[2];
2904                 break;
2905             case 3:
2906                 pixelBit[0]  = x[0];
2907                 pixelBit[1]  = y[0];
2908                 pixelBit[2]  = y[1];
2909                 pixelBit[3]  = z[0];
2910                 pixelBit[4]  = z[1];
2911                 pixelBit[5]  = x[1];
2912                 pixelBit[6]  = x[2];
2913                 break;
2914             case 4:
2915                 pixelBit[0]  = y[0];
2916                 pixelBit[1]  = y[1];
2917                 pixelBit[2]  = z[0];
2918                 pixelBit[3]  = z[1];
2919                 pixelBit[4]  = x[0];
2920                 pixelBit[5]  = x[1];
2921                 break;
2922             default:
2923                 ADDR_ASSERT_ALWAYS();
2924                 ret = ADDR_INVALIDPARAMS;
2925                 break;
2926         }
2927     }
2928     else
2929     {
2930         ADDR_ASSERT_ALWAYS();
2931         ret = ADDR_INVALIDPARAMS;
2932     }
2933 
2934     if (ret == ADDR_OK)
2935     {
2936         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2937         UINT_32 xIdx = Log2(microBlockDim.w);
2938         UINT_32 yIdx = Log2(microBlockDim.h);
2939         UINT_32 zIdx = Log2(microBlockDim.d);
2940 
2941         pixelBit = pEquation->addr;
2942 
2943         const UINT_32 lowBits = 10;
2944         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2945         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2946 
2947         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2948         {
2949             if ((i % 3) == 0)
2950             {
2951                 pixelBit[i] = x[xIdx++];
2952             }
2953             else if ((i % 3) == 1)
2954             {
2955                 pixelBit[i] = z[zIdx++];
2956             }
2957             else
2958             {
2959                 pixelBit[i] = y[yIdx++];
2960             }
2961         }
2962 
2963         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2964         {
2965             if ((i % 3) == 0)
2966             {
2967                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2968             }
2969             else if ((i % 3) == 1)
2970             {
2971                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2972             }
2973             else
2974             {
2975                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2976             }
2977         }
2978 
2979         if (IsXor(swMode))
2980         {
2981             // Fill XOR bits
2982             UINT_32 pipeStart = m_pipeInterleaveLog2;
2983             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2984             for (UINT_32 i = 0; i < pipeXorBits; i++)
2985             {
2986                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2987                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2988                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2989 
2990                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2991 
2992                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2993                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2994                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2995 
2996                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2997             }
2998 
2999             UINT_32 bankStart = pipeStart + pipeXorBits;
3000             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
3001             for (UINT_32 i = 0; i < bankXorBits; i++)
3002             {
3003                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
3004                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
3005                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
3006 
3007                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
3008 
3009                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
3010                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
3011                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
3012 
3013                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
3014             }
3015         }
3016 
3017         pEquation->numBits = blockSizeLog2;
3018     }
3019 
3020     return ret;
3021 }
3022 
3023 /**
3024 ************************************************************************************************************************
3025 *   Gfx9Lib::IsValidDisplaySwizzleMode
3026 *
3027 *   @brief
3028 *       Check if a swizzle mode is supported by display engine
3029 *
3030 *   @return
3031 *       TRUE is swizzle mode is supported by display engine
3032 ************************************************************************************************************************
3033 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3034 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
3035     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3036 {
3037     BOOL_32 support = FALSE;
3038 
3039     const UINT_32 swizzleMask = 1 << pIn->swizzleMode;
3040 
3041     if (m_settings.isDce12)
3042     {
3043         if (pIn->bpp == 32)
3044         {
3045             support = (Dce12Bpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3046         }
3047         else if (pIn->bpp <= 64)
3048         {
3049             support = (Dce12NonBpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3050         }
3051     }
3052     else if (m_settings.isDcn1)
3053     {
3054         if (pIn->bpp < 64)
3055         {
3056             support = (Dcn1NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3057         }
3058         else if (pIn->bpp == 64)
3059         {
3060             support = (Dcn1Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3061         }
3062     }
3063     else if (m_settings.isDcn2)
3064     {
3065         if (pIn->bpp < 64)
3066         {
3067             support = (Dcn2NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3068         }
3069         else if (pIn->bpp == 64)
3070         {
3071             support = (Dcn2Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3072         }
3073     }
3074     else
3075     {
3076         ADDR_NOT_IMPLEMENTED();
3077     }
3078 
3079     return support;
3080 }
3081 
3082 /**
3083 ************************************************************************************************************************
3084 *   Gfx9Lib::HwlComputePipeBankXor
3085 *
3086 *   @brief
3087 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3088 *
3089 *   @return
3090 *       PipeBankXor value
3091 ************************************************************************************************************************
3092 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const3093 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3094     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3095     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
3096 {
3097     if (IsXor(pIn->swizzleMode))
3098     {
3099         UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3100         UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3101         UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3102 
3103         UINT_32 pipeXor = 0;
3104         UINT_32 bankXor = 0;
3105 
3106         const UINT_32 bankMask = (1 << bankBits) - 1;
3107         const UINT_32 index    = pIn->surfIndex & bankMask;
3108 
3109         const UINT_32 bpp      = pIn->flags.fmask ?
3110                                  GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3111         if (bankBits == 4)
3112         {
3113             static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3114             static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3115 
3116             bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3117         }
3118         else if (bankBits > 0)
3119         {
3120             UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3121             bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3122             bankXor = (index * bankIncrease) & bankMask;
3123         }
3124 
3125         pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3126     }
3127     else
3128     {
3129         pOut->pipeBankXor = 0;
3130     }
3131 
3132     return ADDR_OK;
3133 }
3134 
3135 /**
3136 ************************************************************************************************************************
3137 *   Gfx9Lib::HwlComputeSlicePipeBankXor
3138 *
3139 *   @brief
3140 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3141 *
3142 *   @return
3143 *       PipeBankXor value
3144 ************************************************************************************************************************
3145 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const3146 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3147     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3148     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
3149 {
3150     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3151     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3152     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3153 
3154     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3155     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3156 
3157     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3158 
3159     return ADDR_OK;
3160 }
3161 
3162 /**
3163 ************************************************************************************************************************
3164 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3165 *
3166 *   @brief
3167 *       Compute sub resource offset to support swizzle pattern
3168 *
3169 *   @return
3170 *       Offset
3171 ************************************************************************************************************************
3172 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const3173 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3174     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3175     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
3176 {
3177     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3178 
3179     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3180     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3181     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3182     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3183     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3184     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3185 
3186     pOut->offset = pIn->slice * pIn->sliceSize +
3187                    pIn->macroBlockOffset +
3188                    (pIn->mipTailOffset ^ pipeBankXor) -
3189                    static_cast<UINT_64>(pipeBankXor);
3190     return ADDR_OK;
3191 }
3192 
3193 /**
3194 ************************************************************************************************************************
3195 *   Gfx9Lib::ValidateNonSwModeParams
3196 *
3197 *   @brief
3198 *       Validate compute surface info params except swizzle mode
3199 *
3200 *   @return
3201 *       TRUE if parameters are valid, FALSE otherwise
3202 ************************************************************************************************************************
3203 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3204 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3205     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3206 {
3207     BOOL_32 valid = TRUE;
3208 
3209     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3210     {
3211         ADDR_ASSERT_ALWAYS();
3212         valid = FALSE;
3213     }
3214 
3215     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3216     {
3217         ADDR_ASSERT_ALWAYS();
3218         valid = FALSE;
3219     }
3220 
3221     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3222     const BOOL_32 msaa   = (pIn->numFrags > 1);
3223     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3224 
3225     const AddrResourceType rsrcType = pIn->resourceType;
3226     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3227     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3228     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3229 
3230     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3231     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3232     const BOOL_32             display = flags.display || flags.rotated;
3233     const BOOL_32             stereo  = flags.qbStereo;
3234     const BOOL_32             fmask   = flags.fmask;
3235 
3236     // Resource type check
3237     if (tex1d)
3238     {
3239         if (msaa || zbuffer || display || stereo || isBc || fmask)
3240         {
3241             ADDR_ASSERT_ALWAYS();
3242             valid = FALSE;
3243         }
3244     }
3245     else if (tex2d)
3246     {
3247         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3248         {
3249             ADDR_ASSERT_ALWAYS();
3250             valid = FALSE;
3251         }
3252     }
3253     else if (tex3d)
3254     {
3255         if (msaa || zbuffer || display || stereo || fmask)
3256         {
3257             ADDR_ASSERT_ALWAYS();
3258             valid = FALSE;
3259         }
3260     }
3261     else
3262     {
3263         ADDR_ASSERT_ALWAYS();
3264         valid = FALSE;
3265     }
3266 
3267     return valid;
3268 }
3269 
3270 /**
3271 ************************************************************************************************************************
3272 *   Gfx9Lib::ValidateSwModeParams
3273 *
3274 *   @brief
3275 *       Validate compute surface info related to swizzle mode
3276 *
3277 *   @return
3278 *       TRUE if parameters are valid, FALSE otherwise
3279 ************************************************************************************************************************
3280 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3281 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3282     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3283 {
3284     BOOL_32 valid = TRUE;
3285 
3286     if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3287     {
3288         ADDR_ASSERT_ALWAYS();
3289         valid = FALSE;
3290     }
3291 
3292     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3293     const BOOL_32 msaa   = (pIn->numFrags > 1);
3294     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3295     const BOOL_32 is422  = ElemLib::IsMacroPixelPacked(pIn->format);
3296 
3297     const AddrResourceType rsrcType = pIn->resourceType;
3298     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3299     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3300     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3301 
3302     const AddrSwizzleMode  swizzle     = pIn->swizzleMode;
3303     const BOOL_32          linear      = IsLinear(swizzle);
3304     const BOOL_32          blk256B     = IsBlock256b(swizzle);
3305     const BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
3306 
3307     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3308     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3309     const BOOL_32             color   = flags.color;
3310     const BOOL_32             texture = flags.texture;
3311     const BOOL_32             display = flags.display || flags.rotated;
3312     const BOOL_32             prt     = flags.prt;
3313     const BOOL_32             fmask   = flags.fmask;
3314 
3315     const BOOL_32             thin3d  = tex3d && flags.view3dAs2dArray;
3316     const BOOL_32             zMaxMip = tex3d && mipmap &&
3317                                         (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3318 
3319     // Misc check
3320     if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3321     {
3322         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3323         ADDR_ASSERT_ALWAYS();
3324         valid = FALSE;
3325     }
3326 
3327     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3328     {
3329         ADDR_ASSERT_ALWAYS();
3330         valid = FALSE;
3331     }
3332 
3333     if ((pIn->bpp == 96) && (linear == FALSE))
3334     {
3335         ADDR_ASSERT_ALWAYS();
3336         valid = FALSE;
3337     }
3338 
3339     if (prt && isNonPrtXor)
3340     {
3341         ADDR_ASSERT_ALWAYS();
3342         valid = FALSE;
3343     }
3344 
3345     // Resource type check
3346     if (tex1d)
3347     {
3348         if (linear == FALSE)
3349         {
3350             ADDR_ASSERT_ALWAYS();
3351             valid = FALSE;
3352         }
3353     }
3354 
3355     // Swizzle type check
3356     if (linear)
3357     {
3358         if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3359             ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3360         {
3361             ADDR_ASSERT_ALWAYS();
3362             valid = FALSE;
3363         }
3364     }
3365     else if (IsZOrderSwizzle(swizzle))
3366     {
3367         if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3368         {
3369             ADDR_ASSERT_ALWAYS();
3370             valid = FALSE;
3371         }
3372     }
3373     else if (IsStandardSwizzle(swizzle))
3374     {
3375         if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3376         {
3377             ADDR_ASSERT_ALWAYS();
3378             valid = FALSE;
3379         }
3380     }
3381     else if (IsDisplaySwizzle(swizzle))
3382     {
3383         if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3384         {
3385             ADDR_ASSERT_ALWAYS();
3386             valid = FALSE;
3387         }
3388     }
3389     else if (IsRotateSwizzle(swizzle))
3390     {
3391         if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3392         {
3393             ADDR_ASSERT_ALWAYS();
3394             valid = FALSE;
3395         }
3396     }
3397     else
3398     {
3399         ADDR_ASSERT_ALWAYS();
3400         valid = FALSE;
3401     }
3402 
3403     // Block type check
3404     if (blk256B)
3405     {
3406         if (prt || zbuffer || tex3d || mipmap || msaa)
3407         {
3408             ADDR_ASSERT_ALWAYS();
3409             valid = FALSE;
3410         }
3411     }
3412 
3413     return valid;
3414 }
3415 
3416 /**
3417 ************************************************************************************************************************
3418 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3419 *
3420 *   @brief
3421 *       Compute surface info sanity check
3422 *
3423 *   @return
3424 *       ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3425 ************************************************************************************************************************
3426 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3427 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3428     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3429 {
3430     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3431 }
3432 
3433 /**
3434 ************************************************************************************************************************
3435 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3436 *
3437 *   @brief
3438 *       Internal function to get suggested surface information for cliet to use
3439 *
3440 *   @return
3441 *       ADDR_E_RETURNCODE
3442 ************************************************************************************************************************
3443 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3444 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3445     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3446     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3447 {
3448     ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3449     ElemLib*          pElemLib   = GetElemLib();
3450 
3451     UINT_32 bpp        = pIn->bpp;
3452     UINT_32 width      = Max(pIn->width, 1u);
3453     UINT_32 height     = Max(pIn->height, 1u);
3454     UINT_32 numSamples = Max(pIn->numSamples, 1u);
3455     UINT_32 numFrags   = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3456 
3457     if (pIn->flags.fmask)
3458     {
3459         bpp                = GetFmaskBpp(numSamples, numFrags);
3460         numFrags           = 1;
3461         numSamples         = 1;
3462         pOut->resourceType = ADDR_RSRC_TEX_2D;
3463     }
3464     else
3465     {
3466         // Set format to INVALID will skip this conversion
3467         if (pIn->format != ADDR_FMT_INVALID)
3468         {
3469             UINT_32 expandX, expandY;
3470 
3471             // Don't care for this case
3472             ElemMode elemMode = ADDR_UNCOMPRESSED;
3473 
3474             // Get compression/expansion factors and element mode which indicates compression/expansion
3475             bpp = pElemLib->GetBitsPerPixel(pIn->format,
3476                                             &elemMode,
3477                                             &expandX,
3478                                             &expandY);
3479 
3480             UINT_32 basePitch = 0;
3481             GetElemLib()->AdjustSurfaceInfo(elemMode,
3482                                             expandX,
3483                                             expandY,
3484                                             &bpp,
3485                                             &basePitch,
3486                                             &width,
3487                                             &height);
3488         }
3489 
3490         // The output may get changed for volume(3D) texture resource in future
3491         pOut->resourceType = pIn->resourceType;
3492     }
3493 
3494     const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
3495     const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3496     const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
3497     const BOOL_32 displayRsrc  = pIn->flags.display || pIn->flags.rotated;
3498 
3499     // Pre sanity check on non swizzle mode parameters
3500     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3501     localIn.flags        = pIn->flags;
3502     localIn.resourceType = pOut->resourceType;
3503     localIn.format       = pIn->format;
3504     localIn.bpp          = bpp;
3505     localIn.width        = width;
3506     localIn.height       = height;
3507     localIn.numSlices    = numSlices;
3508     localIn.numMipLevels = numMipLevels;
3509     localIn.numSamples   = numSamples;
3510     localIn.numFrags     = numFrags;
3511 
3512     if (ValidateNonSwModeParams(&localIn))
3513     {
3514         // Forbid swizzle mode(s) by client setting
3515         ADDR2_SWMODE_SET allowedSwModeSet = {};
3516         allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3517         allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx9Blk256BSwModeMask;
3518         allowedSwModeSet.value |=
3519             pIn->forbiddenBlock.macroThin4KB ? 0 :
3520             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3521         allowedSwModeSet.value |=
3522             pIn->forbiddenBlock.macroThick4KB ? 0 :
3523             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3524         allowedSwModeSet.value |=
3525             pIn->forbiddenBlock.macroThin64KB ? 0 :
3526             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3527         allowedSwModeSet.value |=
3528             pIn->forbiddenBlock.macroThick64KB ? 0 :
3529             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3530 
3531         if (pIn->preferredSwSet.value != 0)
3532         {
3533             allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3534             allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3535             allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3536             allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3537         }
3538 
3539         if (pIn->noXor)
3540         {
3541             allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3542         }
3543 
3544         if (pIn->maxAlign > 0)
3545         {
3546             if (pIn->maxAlign < Size64K)
3547             {
3548                 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3549             }
3550 
3551             if (pIn->maxAlign < Size4K)
3552             {
3553                 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3554             }
3555 
3556             if (pIn->maxAlign < Size256)
3557             {
3558                 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3559             }
3560         }
3561 
3562         // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3563         switch (pOut->resourceType)
3564         {
3565             case ADDR_RSRC_TEX_1D:
3566                 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3567                 break;
3568 
3569             case ADDR_RSRC_TEX_2D:
3570                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3571 
3572                 if (bpp > 64)
3573                 {
3574                     allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3575                 }
3576                 break;
3577 
3578             case ADDR_RSRC_TEX_3D:
3579                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3580 
3581                 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3582                 {
3583                     // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3584                     // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3585                     // or SW_*_Z modes if mipmapping is desired on a 3D surface
3586                     allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3587                 }
3588 
3589                 if ((bpp == 128) && pIn->flags.color)
3590                 {
3591                     allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3592                 }
3593 
3594                 if (pIn->flags.view3dAs2dArray)
3595                 {
3596                     allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3597                 }
3598                 break;
3599 
3600             default:
3601                 ADDR_ASSERT_ALWAYS();
3602                 allowedSwModeSet.value = 0;
3603                 break;
3604         }
3605 
3606         if (pIn->format == ADDR_FMT_32_32_32)
3607         {
3608             allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3609         }
3610 
3611         if (ElemLib::IsBlockCompressed(pIn->format))
3612         {
3613             if (pIn->flags.texture)
3614             {
3615                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3616             }
3617             else
3618             {
3619                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3620             }
3621         }
3622 
3623         if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3624             (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3625         {
3626             allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3627         }
3628 
3629         if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3630         {
3631             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3632 
3633             if (pIn->flags.noMetadata == FALSE)
3634             {
3635                 if (pIn->flags.depth &&
3636                     pIn->flags.texture &&
3637                     (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3638                 {
3639                     // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3640                     // equation from wrong address within memory range a tile covered and use the
3641                     // garbage data for compressed Z reading which finally leads to corruption.
3642                     allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3643                 }
3644 
3645                 if (m_settings.htileCacheRbConflict &&
3646                     (pIn->flags.depth || pIn->flags.stencil) &&
3647                     (numSlices > 1) &&
3648                     (pIn->flags.metaRbUnaligned == FALSE) &&
3649                     (pIn->flags.metaPipeUnaligned == FALSE))
3650                 {
3651                     // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3652                     allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3653                 }
3654             }
3655         }
3656 
3657         if (msaa)
3658         {
3659             allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3660         }
3661 
3662         if ((numFrags > 1) &&
3663             (Size4K < (m_pipeInterleaveBytes * numFrags)))
3664         {
3665             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3666             allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3667         }
3668 
3669         if (numMipLevels > 1)
3670         {
3671             allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3672         }
3673 
3674         if (displayRsrc)
3675         {
3676             if (m_settings.isDce12)
3677             {
3678                 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3679             }
3680             else if (m_settings.isDcn1)
3681             {
3682                 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3683             }
3684             else if (m_settings.isDcn2)
3685             {
3686                 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
3687             }
3688             else
3689             {
3690                 ADDR_NOT_IMPLEMENTED();
3691             }
3692         }
3693 
3694         if (allowedSwModeSet.value != 0)
3695         {
3696 #if DEBUG
3697             // Post sanity check, at least AddrLib should accept the output generated by its own
3698             UINT_32 validateSwModeSet = allowedSwModeSet.value;
3699 
3700             for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3701             {
3702                 if (validateSwModeSet & 1)
3703                 {
3704                     localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3705                     ADDR_ASSERT(ValidateSwModeParams(&localIn));
3706                 }
3707 
3708                 validateSwModeSet >>= 1;
3709             }
3710 #endif
3711 
3712             pOut->validSwModeSet = allowedSwModeSet;
3713             pOut->canXor         = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3714             pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3715             pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3716 
3717             pOut->clientPreferredSwSet = pIn->preferredSwSet;
3718 
3719             if (pOut->clientPreferredSwSet.value == 0)
3720             {
3721                 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3722             }
3723 
3724             // Apply optional restrictions
3725             if (pIn->flags.needEquation)
3726             {
3727                 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3728             }
3729 
3730             if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3731             {
3732                 pOut->swizzleMode = ADDR_SW_LINEAR;
3733             }
3734             else
3735             {
3736                 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3737 
3738                 if ((height > 1) && (computeMinSize == FALSE))
3739                 {
3740                     // Always ignore linear swizzle mode if:
3741                     // 1. This is a (2D/3D) resource with height > 1
3742                     // 2. Client doesn't require computing minimize size
3743                     allowedSwModeSet.swLinear = 0;
3744                 }
3745 
3746                 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3747 
3748                 // Determine block size if there are 2 or more block type candidates
3749                 if (IsPow2(allowedBlockSet.value) == FALSE)
3750                 {
3751                     AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3752 
3753                     swMode[AddrBlockLinear]   = ADDR_SW_LINEAR;
3754                     swMode[AddrBlockMicro]    = ADDR_SW_256B_D;
3755                     swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_D;
3756                     swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3757 
3758                     if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3759                     {
3760                         swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
3761                         swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3762                     }
3763 
3764                     UINT_64 padSize[AddrBlockMaxTiledType] = {};
3765 
3766                     const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3767                     const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3768                     const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3769                     UINT_32       minSizeBlk         = AddrBlockMicro;
3770                     UINT_64       minSize            = 0;
3771 
3772                     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3773 
3774                     for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3775                     {
3776                         if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3777                         {
3778                             localIn.swizzleMode = swMode[i];
3779 
3780                             if (localIn.swizzleMode == ADDR_SW_LINEAR)
3781                             {
3782                                 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3783                             }
3784                             else
3785                             {
3786                                 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3787                             }
3788 
3789                             if (returnCode == ADDR_OK)
3790                             {
3791                                 padSize[i] = localOut.surfSize;
3792 
3793                                 if ((minSize == 0) ||
3794                                     BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
3795                                 {
3796                                     minSize    = padSize[i];
3797                                     minSizeBlk = i;
3798                                 }
3799                             }
3800                             else
3801                             {
3802                                 ADDR_ASSERT_ALWAYS();
3803                                 break;
3804                             }
3805                         }
3806                     }
3807 
3808                     if (pIn->memoryBudget > 1.0)
3809                     {
3810                         // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3811                         // smaller-block type again in coming loop
3812                         switch (minSizeBlk)
3813                         {
3814                             case AddrBlockThick64KB:
3815                                 allowedBlockSet.macroThin64KB = 0;
3816                             case AddrBlockThin64KB:
3817                                 allowedBlockSet.macroThick4KB = 0;
3818                             case AddrBlockThick4KB:
3819                                 allowedBlockSet.macroThin4KB = 0;
3820                             case AddrBlockThin4KB:
3821                                 allowedBlockSet.micro  = 0;
3822                             case AddrBlockMicro:
3823                                 allowedBlockSet.linear = 0;
3824                             case AddrBlockLinear:
3825                                 break;
3826 
3827                             default:
3828                                 ADDR_ASSERT_ALWAYS();
3829                                 break;
3830                         }
3831 
3832                         for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3833                         {
3834                             if ((i != minSizeBlk) &&
3835                                 IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3836                             {
3837                                 if (BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
3838                                 {
3839                                     // Clear the block type if the memory waste is unacceptable
3840                                     allowedBlockSet.value &= ~(1u << (i - 1));
3841                                 }
3842                             }
3843                         }
3844 
3845                         // Remove linear block type if 2 or more block types are allowed
3846                         if (IsPow2(allowedBlockSet.value) == FALSE)
3847                         {
3848                             allowedBlockSet.linear = 0;
3849                         }
3850 
3851                         // Select the biggest allowed block type
3852                         minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3853 
3854                         if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3855                         {
3856                             minSizeBlk = AddrBlockLinear;
3857                         }
3858                     }
3859 
3860                     switch (minSizeBlk)
3861                     {
3862                         case AddrBlockLinear:
3863                             allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3864                             break;
3865 
3866                         case AddrBlockMicro:
3867                             ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3868                             allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3869                             break;
3870 
3871                         case AddrBlockThin4KB:
3872                             allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3873                                                       Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3874                             break;
3875 
3876                         case AddrBlockThick4KB:
3877                             ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3878                             allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3879                             break;
3880 
3881                         case AddrBlockThin64KB:
3882                             allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3883                                                       Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3884                             break;
3885 
3886                         case AddrBlockThick64KB:
3887                             ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3888                             allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3889                             break;
3890 
3891                         default:
3892                             ADDR_ASSERT_ALWAYS();
3893                             allowedSwModeSet.value = 0;
3894                             break;
3895                     }
3896                 }
3897 
3898                 // Block type should be determined.
3899                 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3900 
3901                 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3902 
3903                 // Determine swizzle type if there are 2 or more swizzle type candidates
3904                 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3905                 {
3906                     if (ElemLib::IsBlockCompressed(pIn->format))
3907                     {
3908                         if (allowedSwSet.sw_D)
3909                         {
3910                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3911                         }
3912                         else
3913                         {
3914                             ADDR_ASSERT(allowedSwSet.sw_S);
3915                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3916                         }
3917                     }
3918                     else if (ElemLib::IsMacroPixelPacked(pIn->format))
3919                     {
3920                         if (allowedSwSet.sw_S)
3921                         {
3922                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3923                         }
3924                         else if (allowedSwSet.sw_D)
3925                         {
3926                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3927                         }
3928                         else
3929                         {
3930                             ADDR_ASSERT(allowedSwSet.sw_R);
3931                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3932                         }
3933                     }
3934                     else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3935                     {
3936                         if (pIn->flags.color && allowedSwSet.sw_D)
3937                         {
3938                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3939                         }
3940                         else if (allowedSwSet.sw_Z)
3941                         {
3942                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3943                         }
3944                         else
3945                         {
3946                             ADDR_ASSERT(allowedSwSet.sw_S);
3947                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3948                         }
3949                     }
3950                     else
3951                     {
3952                         if (pIn->flags.rotated && allowedSwSet.sw_R)
3953                         {
3954                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3955                         }
3956                         else if (allowedSwSet.sw_D)
3957                         {
3958                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3959                         }
3960                         else if (allowedSwSet.sw_S)
3961                         {
3962                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3963                         }
3964                         else
3965                         {
3966                             ADDR_ASSERT(allowedSwSet.sw_Z);
3967                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3968                         }
3969                     }
3970 
3971                     // Swizzle type should be determined.
3972                     ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3973                 }
3974 
3975                 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3976                 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3977                 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3978                 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3979             }
3980 
3981             returnCode = ADDR_OK;
3982         }
3983         else
3984         {
3985             // Invalid combination...
3986             ADDR_ASSERT_ALWAYS();
3987         }
3988     }
3989     else
3990     {
3991         // Invalid combination...
3992         ADDR_ASSERT_ALWAYS();
3993     }
3994 
3995     return returnCode;
3996 }
3997 
3998 /**
3999 ************************************************************************************************************************
4000 *   Gfx9Lib::ComputeStereoInfo
4001 *
4002 *   @brief
4003 *       Compute height alignment and right eye pipeBankXor for stereo surface
4004 *
4005 *   @return
4006 *       Error code
4007 *
4008 ************************************************************************************************************************
4009 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 * pHeightAlign) const4010 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
4011     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
4012     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
4013     UINT_32*                                pHeightAlign
4014     ) const
4015 {
4016     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4017 
4018     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
4019 
4020     if (eqIndex < m_numEquations)
4021     {
4022         if (IsXor(pIn->swizzleMode))
4023         {
4024             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
4025             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
4026             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
4027             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
4028             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
4029             const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];
4030 
4031             ADDR_ASSERT(maxYCoordBlock256 ==
4032                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
4033 
4034             const UINT_32 maxYCoordInBaseEquation =
4035                 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
4036 
4037             ADDR_ASSERT(maxYCoordInBaseEquation ==
4038                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
4039 
4040             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
4041 
4042             ADDR_ASSERT(maxYCoordInPipeXor ==
4043                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
4044 
4045             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
4046                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
4047 
4048             ADDR_ASSERT(maxYCoordInBankXor ==
4049                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
4050 
4051             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
4052 
4053             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
4054             {
4055                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
4056 
4057                 if (pOut->pStereoInfo != NULL)
4058                 {
4059                     pOut->pStereoInfo->rightSwizzle = 0;
4060 
4061                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
4062                     {
4063                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
4064                         {
4065                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
4066                         }
4067 
4068                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
4069                         {
4070                             pOut->pStereoInfo->rightSwizzle |=
4071                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
4072                         }
4073 
4074                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
4075                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
4076                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
4077                     }
4078                 }
4079             }
4080         }
4081     }
4082     else
4083     {
4084         ADDR_ASSERT_ALWAYS();
4085         returnCode = ADDR_ERROR;
4086     }
4087 
4088     return returnCode;
4089 }
4090 
4091 /**
4092 ************************************************************************************************************************
4093 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
4094 *
4095 *   @brief
4096 *       Internal function to calculate alignment for tiled surface
4097 *
4098 *   @return
4099 *       ADDR_E_RETURNCODE
4100 ************************************************************************************************************************
4101 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4102 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
4103      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4104      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4105      ) const
4106 {
4107     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
4108                                                                 &pOut->blockHeight,
4109                                                                 &pOut->blockSlices,
4110                                                                 pIn->bpp,
4111                                                                 pIn->numFrags,
4112                                                                 pIn->resourceType,
4113                                                                 pIn->swizzleMode);
4114 
4115     if (returnCode == ADDR_OK)
4116     {
4117         UINT_32 pitchAlignInElement = pOut->blockWidth;
4118 
4119         if ((IsTex2d(pIn->resourceType) == TRUE) &&
4120             (pIn->flags.display || pIn->flags.rotated) &&
4121             (pIn->numMipLevels <= 1) &&
4122             (pIn->numSamples <= 1) &&
4123             (pIn->numFrags <= 1))
4124         {
4125             // Display engine needs pitch align to be at least 32 pixels.
4126             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
4127         }
4128 
4129         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4130 
4131         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
4132         {
4133             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
4134             {
4135                 returnCode = ADDR_INVALIDPARAMS;
4136             }
4137             else if (pIn->pitchInElement < pOut->pitch)
4138             {
4139                 returnCode = ADDR_INVALIDPARAMS;
4140             }
4141             else
4142             {
4143                 pOut->pitch = pIn->pitchInElement;
4144             }
4145         }
4146 
4147         UINT_32 heightAlign = 0;
4148 
4149         if (pIn->flags.qbStereo)
4150         {
4151             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
4152         }
4153 
4154         if (returnCode == ADDR_OK)
4155         {
4156             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
4157 
4158             if (heightAlign > 1)
4159             {
4160                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
4161             }
4162 
4163             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
4164 
4165             pOut->epitchIsHeight   = FALSE;
4166             pOut->mipChainInTail   = FALSE;
4167             pOut->firstMipIdInTail = pIn->numMipLevels;
4168 
4169             pOut->mipChainPitch    = pOut->pitch;
4170             pOut->mipChainHeight   = pOut->height;
4171             pOut->mipChainSlice    = pOut->numSlices;
4172 
4173             if (pIn->numMipLevels > 1)
4174             {
4175                 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4176                                                          pIn->swizzleMode,
4177                                                          pIn->bpp,
4178                                                          pIn->width,
4179                                                          pIn->height,
4180                                                          pIn->numSlices,
4181                                                          pOut->blockWidth,
4182                                                          pOut->blockHeight,
4183                                                          pOut->blockSlices,
4184                                                          pIn->numMipLevels,
4185                                                          pOut->pMipInfo);
4186 
4187                 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4188 
4189                 if (endingMipId == 0)
4190                 {
4191                     const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4192                                                            pIn->swizzleMode,
4193                                                            pOut->blockWidth,
4194                                                            pOut->blockHeight,
4195                                                            pOut->blockSlices);
4196 
4197                     pOut->epitchIsHeight = TRUE;
4198                     pOut->pitch          = tailMaxDim.w;
4199                     pOut->height         = tailMaxDim.h;
4200                     pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4201                                            tailMaxDim.d : pIn->numSlices;
4202                     pOut->mipChainInTail = TRUE;
4203                 }
4204                 else
4205                 {
4206                     UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
4207                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4208 
4209                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4210                                                            pIn->swizzleMode,
4211                                                            mip0WidthInBlk,
4212                                                            mip0HeightInBlk,
4213                                                            pOut->numSlices / pOut->blockSlices);
4214                     if (majorMode == ADDR_MAJOR_Y)
4215                     {
4216                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4217 
4218                         if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4219                         {
4220                             mip1WidthInBlk++;
4221                         }
4222 
4223                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4224 
4225                         pOut->epitchIsHeight = FALSE;
4226                     }
4227                     else
4228                     {
4229                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4230 
4231                         if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4232                         {
4233                             mip1HeightInBlk++;
4234                         }
4235 
4236                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4237 
4238                         pOut->epitchIsHeight = TRUE;
4239                     }
4240                 }
4241 
4242                 if (pOut->pMipInfo != NULL)
4243                 {
4244                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4245 
4246                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4247                     {
4248                         Dim3d   mipStartPos          = {0};
4249                         UINT_32 mipTailOffsetInBytes = 0;
4250 
4251                         mipStartPos = GetMipStartPos(pIn->resourceType,
4252                                                      pIn->swizzleMode,
4253                                                      pOut->pitch,
4254                                                      pOut->height,
4255                                                      pOut->numSlices,
4256                                                      pOut->blockWidth,
4257                                                      pOut->blockHeight,
4258                                                      pOut->blockSlices,
4259                                                      i,
4260                                                      elementBytesLog2,
4261                                                      &mipTailOffsetInBytes);
4262 
4263                         UINT_32 pitchInBlock     =
4264                             pOut->mipChainPitch / pOut->blockWidth;
4265                         UINT_32 sliceInBlock     =
4266                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4267                         UINT_64 blockIndex       =
4268                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4269                         UINT_64 macroBlockOffset =
4270                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4271 
4272                         pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4273                         pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
4274                     }
4275                 }
4276             }
4277             else if (pOut->pMipInfo != NULL)
4278             {
4279                 pOut->pMipInfo[0].pitch  = pOut->pitch;
4280                 pOut->pMipInfo[0].height = pOut->height;
4281                 pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4282                 pOut->pMipInfo[0].offset = 0;
4283             }
4284 
4285             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4286                               (pIn->bpp >> 3) * pIn->numFrags;
4287             pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
4288             pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4289 
4290             if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4291                 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4292                 (pIn->flags.texture == TRUE) &&
4293                 (pIn->flags.noMetadata == FALSE) &&
4294                 (pIn->flags.metaPipeUnaligned == FALSE))
4295             {
4296                 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4297                 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4298                 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4299                 // them, which may cause invalid metadata to be fetched.
4300                 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4301             }
4302 
4303             if (pIn->flags.prt)
4304             {
4305                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4306             }
4307         }
4308     }
4309 
4310     return returnCode;
4311 }
4312 
4313 /**
4314 ************************************************************************************************************************
4315 *   Gfx9Lib::HwlComputeSurfaceInfoLinear
4316 *
4317 *   @brief
4318 *       Internal function to calculate alignment for linear surface
4319 *
4320 *   @return
4321 *       ADDR_E_RETURNCODE
4322 ************************************************************************************************************************
4323 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4324 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4325      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4326      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4327      ) const
4328 {
4329     ADDR_E_RETURNCODE returnCode   = ADDR_OK;
4330     UINT_32           pitch        = 0;
4331     UINT_32           actualHeight = 0;
4332     UINT_32           elementBytes = pIn->bpp >> 3;
4333     const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
4334 
4335     if (IsTex1d(pIn->resourceType))
4336     {
4337         if (pIn->height > 1)
4338         {
4339             returnCode = ADDR_INVALIDPARAMS;
4340         }
4341         else
4342         {
4343             const UINT_32 pitchAlignInElement = alignment / elementBytes;
4344 
4345             pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
4346             actualHeight = pIn->numMipLevels;
4347 
4348             if (pIn->flags.prt == FALSE)
4349             {
4350                 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4351                                                         &pitch, &actualHeight);
4352             }
4353 
4354             if (returnCode == ADDR_OK)
4355             {
4356                 if (pOut->pMipInfo != NULL)
4357                 {
4358                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4359                     {
4360                         pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4361                         pOut->pMipInfo[i].pitch  = pitch;
4362                         pOut->pMipInfo[i].height = 1;
4363                         pOut->pMipInfo[i].depth  = 1;
4364                     }
4365                 }
4366             }
4367         }
4368     }
4369     else
4370     {
4371         returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4372     }
4373 
4374     if ((pitch == 0) || (actualHeight == 0))
4375     {
4376         returnCode = ADDR_INVALIDPARAMS;
4377     }
4378 
4379     if (returnCode == ADDR_OK)
4380     {
4381         pOut->pitch          = pitch;
4382         pOut->height         = pIn->height;
4383         pOut->numSlices      = pIn->numSlices;
4384         pOut->mipChainPitch  = pitch;
4385         pOut->mipChainHeight = actualHeight;
4386         pOut->mipChainSlice  = pOut->numSlices;
4387         pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4388         pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4389         pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
4390         pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4391         pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4392         pOut->blockHeight    = 1;
4393         pOut->blockSlices    = 1;
4394     }
4395 
4396     // Post calculation validate
4397     ADDR_ASSERT(pOut->sliceSize > 0);
4398 
4399     return returnCode;
4400 }
4401 
4402 /**
4403 ************************************************************************************************************************
4404 *   Gfx9Lib::GetMipChainInfo
4405 *
4406 *   @brief
4407 *       Internal function to get out information about mip chain
4408 *
4409 *   @return
4410 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4411 ************************************************************************************************************************
4412 */
GetMipChainInfo(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 bpp,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 numMipLevel,ADDR2_MIP_INFO * pMipInfo) const4413 UINT_32 Gfx9Lib::GetMipChainInfo(
4414     AddrResourceType  resourceType,
4415     AddrSwizzleMode   swizzleMode,
4416     UINT_32           bpp,
4417     UINT_32           mip0Width,
4418     UINT_32           mip0Height,
4419     UINT_32           mip0Depth,
4420     UINT_32           blockWidth,
4421     UINT_32           blockHeight,
4422     UINT_32           blockDepth,
4423     UINT_32           numMipLevel,
4424     ADDR2_MIP_INFO*   pMipInfo) const
4425 {
4426     const Dim3d tailMaxDim =
4427         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4428 
4429     UINT_32 mipPitch         = mip0Width;
4430     UINT_32 mipHeight        = mip0Height;
4431     UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
4432     UINT_32 offset           = 0;
4433     UINT_32 firstMipIdInTail = numMipLevel;
4434     BOOL_32 inTail           = FALSE;
4435     BOOL_32 finalDim         = FALSE;
4436     BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
4437     BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
4438 
4439     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4440     {
4441         if (inTail)
4442         {
4443             if (finalDim == FALSE)
4444             {
4445                 UINT_32 mipSize;
4446 
4447                 if (is3dThick)
4448                 {
4449                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4450                 }
4451                 else
4452                 {
4453                     mipSize = mipPitch * mipHeight * (bpp >> 3);
4454                 }
4455 
4456                 if (mipSize <= 256)
4457                 {
4458                     UINT_32 index = Log2(bpp >> 3);
4459 
4460                     if (is3dThick)
4461                     {
4462                         mipPitch  = Block256_3dZ[index].w;
4463                         mipHeight = Block256_3dZ[index].h;
4464                         mipDepth  = Block256_3dZ[index].d;
4465                     }
4466                     else
4467                     {
4468                         mipPitch  = Block256_2d[index].w;
4469                         mipHeight = Block256_2d[index].h;
4470                     }
4471 
4472                     finalDim = TRUE;
4473                 }
4474             }
4475         }
4476         else
4477         {
4478             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4479                                  mipPitch, mipHeight, mipDepth);
4480 
4481             if (inTail)
4482             {
4483                 firstMipIdInTail = mipId;
4484                 mipPitch         = tailMaxDim.w;
4485                 mipHeight        = tailMaxDim.h;
4486 
4487                 if (is3dThick)
4488                 {
4489                     mipDepth = tailMaxDim.d;
4490                 }
4491             }
4492             else
4493             {
4494                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
4495                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4496 
4497                 if (is3dThick)
4498                 {
4499                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
4500                 }
4501             }
4502         }
4503 
4504         if (pMipInfo != NULL)
4505         {
4506             pMipInfo[mipId].pitch  = mipPitch;
4507             pMipInfo[mipId].height = mipHeight;
4508             pMipInfo[mipId].depth  = mipDepth;
4509             pMipInfo[mipId].offset = offset;
4510         }
4511 
4512         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4513 
4514         if (finalDim)
4515         {
4516             if (is3dThin)
4517             {
4518                 mipDepth = Max(mipDepth >> 1, 1u);
4519             }
4520         }
4521         else
4522         {
4523             mipPitch  = Max(mipPitch >> 1, 1u);
4524             mipHeight = Max(mipHeight >> 1, 1u);
4525 
4526             if (is3dThick || is3dThin)
4527             {
4528                 mipDepth = Max(mipDepth >> 1, 1u);
4529             }
4530         }
4531     }
4532 
4533     return firstMipIdInTail;
4534 }
4535 
4536 /**
4537 ************************************************************************************************************************
4538 *   Gfx9Lib::GetMetaMiptailInfo
4539 *
4540 *   @brief
4541 *       Get mip tail coordinate information.
4542 *
4543 *   @return
4544 *       N/A
4545 ************************************************************************************************************************
4546 */
GetMetaMiptailInfo(ADDR2_META_MIP_INFO * pInfo,Dim3d mipCoord,UINT_32 numMipInTail,Dim3d * pMetaBlkDim) const4547 VOID Gfx9Lib::GetMetaMiptailInfo(
4548     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
4549     Dim3d                   mipCoord,       ///< [in] mip tail base coord
4550     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
4551     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
4552     ) const
4553 {
4554     BOOL_32 isThick   = (pMetaBlkDim->d > 1);
4555     UINT_32 mipWidth  = pMetaBlkDim->w;
4556     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4557     UINT_32 mipDepth  = pMetaBlkDim->d;
4558     UINT_32 minInc;
4559 
4560     if (isThick)
4561     {
4562         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4563     }
4564     else if (pMetaBlkDim->h >= 1024)
4565     {
4566         minInc = 256;
4567     }
4568     else if (pMetaBlkDim->h == 512)
4569     {
4570         minInc = 128;
4571     }
4572     else
4573     {
4574         minInc = 64;
4575     }
4576 
4577     UINT_32 blk32MipId = 0xFFFFFFFF;
4578 
4579     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4580     {
4581         pInfo[mip].inMiptail = TRUE;
4582         pInfo[mip].startX = mipCoord.w;
4583         pInfo[mip].startY = mipCoord.h;
4584         pInfo[mip].startZ = mipCoord.d;
4585         pInfo[mip].width = mipWidth;
4586         pInfo[mip].height = mipHeight;
4587         pInfo[mip].depth = mipDepth;
4588 
4589         if (mipWidth <= 32)
4590         {
4591             if (blk32MipId == 0xFFFFFFFF)
4592             {
4593                 blk32MipId = mip;
4594             }
4595 
4596             mipCoord.w = pInfo[blk32MipId].startX;
4597             mipCoord.h = pInfo[blk32MipId].startY;
4598             mipCoord.d = pInfo[blk32MipId].startZ;
4599 
4600             switch (mip - blk32MipId)
4601             {
4602                 case 0:
4603                     mipCoord.w += 32;       // 16x16
4604                     break;
4605                 case 1:
4606                     mipCoord.h += 32;       // 8x8
4607                     break;
4608                 case 2:
4609                     mipCoord.h += 32;       // 4x4
4610                     mipCoord.w += 16;
4611                     break;
4612                 case 3:
4613                     mipCoord.h += 32;       // 2x2
4614                     mipCoord.w += 32;
4615                     break;
4616                 case 4:
4617                     mipCoord.h += 32;       // 1x1
4618                     mipCoord.w += 48;
4619                     break;
4620                 // The following are for BC/ASTC formats
4621                 case 5:
4622                     mipCoord.h += 48;       // 1/2 x 1/2
4623                     break;
4624                 case 6:
4625                     mipCoord.h += 48;       // 1/4 x 1/4
4626                     mipCoord.w += 16;
4627                     break;
4628                 case 7:
4629                     mipCoord.h += 48;       // 1/8 x 1/8
4630                     mipCoord.w += 32;
4631                     break;
4632                 case 8:
4633                     mipCoord.h += 48;       // 1/16 x 1/16
4634                     mipCoord.w += 48;
4635                     break;
4636                 default:
4637                     ADDR_ASSERT_ALWAYS();
4638                     break;
4639             }
4640 
4641             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4642             mipHeight = mipWidth;
4643 
4644             if (isThick)
4645             {
4646                 mipDepth = mipWidth;
4647             }
4648         }
4649         else
4650         {
4651             if (mipWidth <= minInc)
4652             {
4653                 // if we're below the minimal increment...
4654                 if (isThick)
4655                 {
4656                     // For 3d, just go in z direction
4657                     mipCoord.d += mipDepth;
4658                 }
4659                 else
4660                 {
4661                     // For 2d, first go across, then down
4662                     if ((mipWidth * 2) == minInc)
4663                     {
4664                         // if we're 2 mips below, that's when we go back in x, and down in y
4665                         mipCoord.w -= minInc;
4666                         mipCoord.h += minInc;
4667                     }
4668                     else
4669                     {
4670                         // otherwise, just go across in x
4671                         mipCoord.w += minInc;
4672                     }
4673                 }
4674             }
4675             else
4676             {
4677                 // On even mip, go down, otherwise, go across
4678                 if (mip & 1)
4679                 {
4680                     mipCoord.w += mipWidth;
4681                 }
4682                 else
4683                 {
4684                     mipCoord.h += mipHeight;
4685                 }
4686             }
4687             // Divide the width by 2
4688             mipWidth >>= 1;
4689             // After the first mip in tail, the mip is always a square
4690             mipHeight = mipWidth;
4691             // ...or for 3d, a cube
4692             if (isThick)
4693             {
4694                 mipDepth = mipWidth;
4695             }
4696         }
4697     }
4698 }
4699 
4700 /**
4701 ************************************************************************************************************************
4702 *   Gfx9Lib::GetMipStartPos
4703 *
4704 *   @brief
4705 *       Internal function to get out information about mip logical start position
4706 *
4707 *   @return
4708 *       logical start position in macro block width/heith/depth of one mip level within one slice
4709 ************************************************************************************************************************
4710 */
GetMipStartPos(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 width,UINT_32 height,UINT_32 depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 mipId,UINT_32 log2ElementBytes,UINT_32 * pMipTailBytesOffset) const4711 Dim3d Gfx9Lib::GetMipStartPos(
4712     AddrResourceType  resourceType,
4713     AddrSwizzleMode   swizzleMode,
4714     UINT_32           width,
4715     UINT_32           height,
4716     UINT_32           depth,
4717     UINT_32           blockWidth,
4718     UINT_32           blockHeight,
4719     UINT_32           blockDepth,
4720     UINT_32           mipId,
4721     UINT_32           log2ElementBytes,
4722     UINT_32*          pMipTailBytesOffset) const
4723 {
4724     Dim3d       mipStartPos = {0};
4725     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4726 
4727     // Report mip in tail if Mip0 is already in mip tail
4728     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4729     UINT_32 log2BlkSize    = GetBlockSizeLog2(swizzleMode);
4730     UINT_32 mipIndexInTail = mipId;
4731 
4732     if (inMipTail == FALSE)
4733     {
4734         // Mip 0 dimension, unit in block
4735         UINT_32 mipWidthInBlk   = width  / blockWidth;
4736         UINT_32 mipHeightInBlk  = height / blockHeight;
4737         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4738         AddrMajorMode majorMode = GetMajorMode(resourceType,
4739                                                swizzleMode,
4740                                                mipWidthInBlk,
4741                                                mipHeightInBlk,
4742                                                mipDepthInBlk);
4743 
4744         UINT_32 endingMip = mipId + 1;
4745 
4746         for (UINT_32 i = 1; i <= mipId; i++)
4747         {
4748             if ((i == 1) || (i == 3))
4749             {
4750                 if (majorMode == ADDR_MAJOR_Y)
4751                 {
4752                     mipStartPos.w += mipWidthInBlk;
4753                 }
4754                 else
4755                 {
4756                     mipStartPos.h += mipHeightInBlk;
4757                 }
4758             }
4759             else
4760             {
4761                 if (majorMode == ADDR_MAJOR_X)
4762                 {
4763                    mipStartPos.w += mipWidthInBlk;
4764                 }
4765                 else if (majorMode == ADDR_MAJOR_Y)
4766                 {
4767                    mipStartPos.h += mipHeightInBlk;
4768                 }
4769                 else
4770                 {
4771                    mipStartPos.d += mipDepthInBlk;
4772                 }
4773             }
4774 
4775             BOOL_32 inTail = FALSE;
4776 
4777             if (IsThick(resourceType, swizzleMode))
4778             {
4779                 UINT_32 dim = log2BlkSize % 3;
4780 
4781                 if (dim == 0)
4782                 {
4783                     inTail =
4784                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4785                 }
4786                 else if (dim == 1)
4787                 {
4788                     inTail =
4789                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4790                 }
4791                 else
4792                 {
4793                     inTail =
4794                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4795                 }
4796             }
4797             else
4798             {
4799                 if (log2BlkSize & 1)
4800                 {
4801                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4802                 }
4803                 else
4804                 {
4805                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4806                 }
4807             }
4808 
4809             if (inTail)
4810             {
4811                 endingMip = i;
4812                 break;
4813             }
4814 
4815             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4816             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4817             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4818         }
4819 
4820         if (mipId >= endingMip)
4821         {
4822             inMipTail      = TRUE;
4823             mipIndexInTail = mipId - endingMip;
4824         }
4825     }
4826 
4827     if (inMipTail)
4828     {
4829         UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4830         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4831         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4832     }
4833 
4834     return mipStartPos;
4835 }
4836 
4837 /**
4838 ************************************************************************************************************************
4839 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4840 *
4841 *   @brief
4842 *       Internal function to calculate address from coord for tiled swizzle surface
4843 *
4844 *   @return
4845 *       ADDR_E_RETURNCODE
4846 ************************************************************************************************************************
4847 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4848 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4849      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4850      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4851      ) const
4852 {
4853     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4854     localIn.swizzleMode  = pIn->swizzleMode;
4855     localIn.flags        = pIn->flags;
4856     localIn.resourceType = pIn->resourceType;
4857     localIn.bpp          = pIn->bpp;
4858     localIn.width        = Max(pIn->unalignedWidth, 1u);
4859     localIn.height       = Max(pIn->unalignedHeight, 1u);
4860     localIn.numSlices    = Max(pIn->numSlices, 1u);
4861     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4862     localIn.numSamples   = Max(pIn->numSamples, 1u);
4863     localIn.numFrags     = Max(pIn->numFrags, 1u);
4864     if (localIn.numMipLevels <= 1)
4865     {
4866         localIn.pitchInElement = pIn->pitchInElement;
4867     }
4868 
4869     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4870     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4871 
4872     BOOL_32 valid = (returnCode == ADDR_OK) &&
4873                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4874                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4875                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4876 
4877     if (valid)
4878     {
4879         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4880         Dim3d   mipStartPos        = {0};
4881         UINT_32 mipTailBytesOffset = 0;
4882 
4883         if (pIn->numMipLevels > 1)
4884         {
4885             // Mip-map chain cannot be MSAA surface
4886             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4887 
4888             mipStartPos = GetMipStartPos(pIn->resourceType,
4889                                          pIn->swizzleMode,
4890                                          localOut.pitch,
4891                                          localOut.height,
4892                                          localOut.numSlices,
4893                                          localOut.blockWidth,
4894                                          localOut.blockHeight,
4895                                          localOut.blockSlices,
4896                                          pIn->mipId,
4897                                          log2ElementBytes,
4898                                          &mipTailBytesOffset);
4899         }
4900 
4901         UINT_32 interleaveOffset = 0;
4902         UINT_32 pipeBits = 0;
4903         UINT_32 pipeXor = 0;
4904         UINT_32 bankBits = 0;
4905         UINT_32 bankXor = 0;
4906 
4907         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4908         {
4909             UINT_32 blockOffset = 0;
4910             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4911 
4912             if (IsZOrderSwizzle(pIn->swizzleMode))
4913             {
4914                 // Morton generation
4915                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4916                 {
4917                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4918                     UINT_32 mortBits = totalLowBits / 2;
4919                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4920                     // Are 9 bits enough?
4921                     UINT_32 highBitsValue =
4922                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4923                     blockOffset = lowBitsValue | highBitsValue;
4924                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4925                 }
4926                 else
4927                 {
4928                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4929                 }
4930 
4931                 // Fill LSBs with sample bits
4932                 if (pIn->numSamples > 1)
4933                 {
4934                     blockOffset *= pIn->numSamples;
4935                     blockOffset |= pIn->sample;
4936                 }
4937 
4938                 // Shift according to BytesPP
4939                 blockOffset <<= log2ElementBytes;
4940             }
4941             else
4942             {
4943                 // Micro block offset
4944                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4945                 blockOffset = microBlockOffset;
4946 
4947                 // Micro block dimension
4948                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4949                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4950                 // Morton generation, does 12 bit enough?
4951                 blockOffset |=
4952                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4953 
4954                 // Sample bits start location
4955                 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4956                 // Join sample bits information to the highest Macro block bits
4957                 if (IsNonPrtXor(pIn->swizzleMode))
4958                 {
4959                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4960                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4961                 }
4962                 else
4963                 {
4964                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4965                     // after this op, the blockOffset only contains log2 Macro block size bits
4966                     blockOffset %= (1 << sampleStart);
4967                     blockOffset |= (pIn->sample << sampleStart);
4968                     ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4969                 }
4970             }
4971 
4972             if (IsXor(pIn->swizzleMode))
4973             {
4974                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4975                 if (IsPrt(pIn->swizzleMode))
4976                 {
4977                     blockOffset &= ((1 << log2BlkSize) - 1);
4978                 }
4979 
4980                 // Preserve offset inside pipe interleave
4981                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4982                 blockOffset >>= m_pipeInterleaveLog2;
4983 
4984                 // Pipe/Se xor bits
4985                 pipeBits = GetPipeXorBits(log2BlkSize);
4986                 // Pipe xor
4987                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4988                 blockOffset >>= pipeBits;
4989 
4990                 // Bank xor bits
4991                 bankBits = GetBankXorBits(log2BlkSize);
4992                 // Bank Xor
4993                 bankXor = FoldXor2d(blockOffset, bankBits);
4994                 blockOffset >>= bankBits;
4995 
4996                 // Put all the part back together
4997                 blockOffset <<= bankBits;
4998                 blockOffset |= bankXor;
4999                 blockOffset <<= pipeBits;
5000                 blockOffset |= pipeXor;
5001                 blockOffset <<= m_pipeInterleaveLog2;
5002                 blockOffset |= interleaveOffset;
5003             }
5004 
5005             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
5006             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
5007 
5008             blockOffset |= mipTailBytesOffset;
5009 
5010             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
5011             {
5012                 // Apply slice xor if not MSAA/PRT
5013                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
5014                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
5015                                 (m_pipeInterleaveLog2 + pipeBits));
5016             }
5017 
5018             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5019                                                   bankBits, pipeBits, &blockOffset);
5020 
5021             blockOffset %= (1 << log2BlkSize);
5022 
5023             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
5024             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
5025             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
5026             UINT_64 macroBlockIndex =
5027                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
5028                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
5029                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
5030 
5031             pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
5032         }
5033         else
5034         {
5035             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
5036 
5037             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
5038 
5039             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
5040                                               (pIn->y / microBlockDim.h),
5041                                               (pIn->slice / microBlockDim.d),
5042                                               8);
5043 
5044             blockOffset <<= 10;
5045             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
5046 
5047             if (IsXor(pIn->swizzleMode))
5048             {
5049                 // Mask off bits above Macro block bits to keep page synonyms working for prt
5050                 if (IsPrt(pIn->swizzleMode))
5051                 {
5052                     blockOffset &= ((1 << log2BlkSize) - 1);
5053                 }
5054 
5055                 // Preserve offset inside pipe interleave
5056                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
5057                 blockOffset >>= m_pipeInterleaveLog2;
5058 
5059                 // Pipe/Se xor bits
5060                 pipeBits = GetPipeXorBits(log2BlkSize);
5061                 // Pipe xor
5062                 pipeXor = FoldXor3d(blockOffset, pipeBits);
5063                 blockOffset >>= pipeBits;
5064 
5065                 // Bank xor bits
5066                 bankBits = GetBankXorBits(log2BlkSize);
5067                 // Bank Xor
5068                 bankXor = FoldXor3d(blockOffset, bankBits);
5069                 blockOffset >>= bankBits;
5070 
5071                 // Put all the part back together
5072                 blockOffset <<= bankBits;
5073                 blockOffset |= bankXor;
5074                 blockOffset <<= pipeBits;
5075                 blockOffset |= pipeXor;
5076                 blockOffset <<= m_pipeInterleaveLog2;
5077                 blockOffset |= interleaveOffset;
5078             }
5079 
5080             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
5081             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
5082             blockOffset |= mipTailBytesOffset;
5083 
5084             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5085                                                   bankBits, pipeBits, &blockOffset);
5086 
5087             blockOffset %= (1 << log2BlkSize);
5088 
5089             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
5090             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
5091             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
5092 
5093             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
5094             UINT_32 sliceSizeInBlock =
5095                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
5096             UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
5097 
5098             pOut->addr = blockOffset | (blockIndex << log2BlkSize);
5099         }
5100     }
5101     else
5102     {
5103         returnCode = ADDR_INVALIDPARAMS;
5104     }
5105 
5106     return returnCode;
5107 }
5108 
5109 /**
5110 ************************************************************************************************************************
5111 *   Gfx9Lib::ComputeSurfaceInfoLinear
5112 *
5113 *   @brief
5114 *       Internal function to calculate padding for linear swizzle 2D/3D surface
5115 *
5116 *   @return
5117 *       N/A
5118 ************************************************************************************************************************
5119 */
ComputeSurfaceLinearPadding(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pMipmap0PaddedWidth,UINT_32 * pSlice0PaddedHeight,ADDR2_MIP_INFO * pMipInfo) const5120 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
5121     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
5122     UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
5123     UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
5124     ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
5125     ) const
5126 {
5127     ADDR_E_RETURNCODE returnCode = ADDR_OK;
5128 
5129     UINT_32 elementBytes        = pIn->bpp >> 3;
5130     UINT_32 pitchAlignInElement = 0;
5131 
5132     if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
5133     {
5134         ADDR_ASSERT(pIn->numMipLevels <= 1);
5135         ADDR_ASSERT(pIn->numSlices <= 1);
5136         pitchAlignInElement = 1;
5137     }
5138     else
5139     {
5140         pitchAlignInElement = (256 / elementBytes);
5141     }
5142 
5143     UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
5144     UINT_32 slice0PaddedHeight = pIn->height;
5145 
5146     returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
5147                                             &mipChainWidth, &slice0PaddedHeight);
5148 
5149     if (returnCode == ADDR_OK)
5150     {
5151         UINT_32 mipChainHeight = 0;
5152         UINT_32 mipHeight      = pIn->height;
5153         UINT_32 mipDepth       = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
5154 
5155         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
5156         {
5157             if (pMipInfo != NULL)
5158             {
5159                 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
5160                 pMipInfo[i].pitch  = mipChainWidth;
5161                 pMipInfo[i].height = mipHeight;
5162                 pMipInfo[i].depth  = mipDepth;
5163             }
5164 
5165             mipChainHeight += mipHeight;
5166             mipHeight = RoundHalf(mipHeight);
5167             mipHeight = Max(mipHeight, 1u);
5168         }
5169 
5170         *pMipmap0PaddedWidth = mipChainWidth;
5171         *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
5172     }
5173 
5174     return returnCode;
5175 }
5176 
5177 /**
5178 ************************************************************************************************************************
5179 *   Gfx9Lib::ComputeThinBlockDimension
5180 *
5181 *   @brief
5182 *       Internal function to get thin block width/height/depth in element from surface input params.
5183 *
5184 *   @return
5185 *       N/A
5186 ************************************************************************************************************************
5187 */
ComputeThinBlockDimension(UINT_32 * pWidth,UINT_32 * pHeight,UINT_32 * pDepth,UINT_32 bpp,UINT_32 numSamples,AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const5188 VOID Gfx9Lib::ComputeThinBlockDimension(
5189     UINT_32*         pWidth,
5190     UINT_32*         pHeight,
5191     UINT_32*         pDepth,
5192     UINT_32          bpp,
5193     UINT_32          numSamples,
5194     AddrResourceType resourceType,
5195     AddrSwizzleMode  swizzleMode) const
5196 {
5197     ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5198 
5199     const UINT_32 log2BlkSize              = GetBlockSizeLog2(swizzleMode);
5200     const UINT_32 eleBytes                 = bpp >> 3;
5201     const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5202     const UINT_32 log2blkSizeIn256B        = log2BlkSize - 8;
5203     const UINT_32 widthAmp                 = log2blkSizeIn256B / 2;
5204     const UINT_32 heightAmp                = log2blkSizeIn256B - widthAmp;
5205 
5206     ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5207 
5208     *pWidth  = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5209     *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5210     *pDepth  = 1;
5211 
5212     if (numSamples > 1)
5213     {
5214         const UINT_32 log2sample = Log2(numSamples);
5215         const UINT_32 q          = log2sample >> 1;
5216         const UINT_32 r          = log2sample & 1;
5217 
5218         if (log2BlkSize & 1)
5219         {
5220             *pWidth  >>= q;
5221             *pHeight >>= (q + r);
5222         }
5223         else
5224         {
5225             *pWidth  >>= (q + r);
5226             *pHeight >>= q;
5227         }
5228     }
5229 }
5230 
5231 } // V2
5232 } // Addr
5233