• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ************************************************************************************************************************
3 *
4 *  Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
5 *  SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8 
9 /**
10 ************************************************************************************************************************
11 * @file  gfx12addrlib.cpp
12 * @brief Contain the implementation for the Gfx12Lib class.
13 ************************************************************************************************************************
14 */
15 
16 #include "gfx12addrlib.h"
17 #include "gfx12_gb_reg.h"
18 #include "addrswizzler.h"
19 
20 #include "amdgpu_asic_addr.h"
21 
22 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
23 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
24 
25 namespace Addr
26 {
27 /**
28 ************************************************************************************************************************
29 *   Gfx12HwlInit
30 *
31 *   @brief
32 *       Creates an Gfx12Lib object.
33 *
34 *   @return
35 *       Returns an Gfx12Lib object pointer.
36 ************************************************************************************************************************
37 */
Gfx12HwlInit(const Client * pClient)38 Addr::Lib* Gfx12HwlInit(
39     const Client* pClient)
40 {
41     return V3::Gfx12Lib::CreateObj(pClient);
42 }
43 
44 namespace V3
45 {
46 
47 ////////////////////////////////////////////////////////////////////////////////////////////////////
48 //                               Static Const Member
49 ////////////////////////////////////////////////////////////////////////////////////////////////////
50 const SwizzleModeFlags Gfx12Lib::SwizzleModeTable[ADDR3_MAX_TYPE] =
51 {//Linear 2d   3d  256B  4KB  64KB  256KB  Reserved
52     {{1,   0,   0,    0,   0,    0,     0,    0}}, // ADDR3_LINEAR
53     {{0,   1,   0,    1,   0,    0,     0,    0}}, // ADDR3_256B_2D
54     {{0,   1,   0,    0,   1,    0,     0,    0}}, // ADDR3_4KB_2D
55     {{0,   1,   0,    0,   0,    1,     0,    0}}, // ADDR3_64KB_2D
56     {{0,   1,   0,    0,   0,    0,     1,    0}}, // ADDR3_256KB_2D
57     {{0,   0,   1,    0,   1,    0,     0,    0}}, // ADDR3_4KB_3D
58     {{0,   0,   1,    0,   0,    1,     0,    0}}, // ADDR3_64KB_3D
59     {{0,   0,   1,    0,   0,    0,     1,    0}}, // ADDR3_256KB_3D
60 };
61 
62 /**
63 ************************************************************************************************************************
64 *   Gfx12Lib::Gfx12Lib
65 *
66 *   @brief
67 *       Constructor
68 *
69 ************************************************************************************************************************
70 */
Gfx12Lib(const Client * pClient)71 Gfx12Lib::Gfx12Lib(
72     const Client* pClient)
73     :
74     Lib(pClient),
75     m_numSwizzleBits(0)
76 {
77     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
78 }
79 
80 /**
81 ************************************************************************************************************************
82 *   Gfx12Lib::~Gfx12Lib
83 *
84 *   @brief
85 *       Destructor
86 ************************************************************************************************************************
87 */
~Gfx12Lib()88 Gfx12Lib::~Gfx12Lib()
89 {
90 }
91 
92 /**
93 ************************************************************************************************************************
94 *   Gfx12Lib::ConvertSwizzlePatternToEquation
95 *
96 *   @brief
97 *       Convert swizzle pattern to equation.
98 *
99 *   @return
100 *       N/A
101 ************************************************************************************************************************
102 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,Addr3SwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const103 VOID Gfx12Lib::ConvertSwizzlePatternToEquation(
104     UINT_32                elemLog2,  ///< [in] element bytes log2
105     Addr3SwizzleMode       swMode,    ///< [in] swizzle mode
106     const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern info
107     ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
108     const
109 {
110     ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K];
111     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
112 
113     const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
114     const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode, TRUE);
115 
116     pEquation->numBits = blockSizeLog2;
117     pEquation->stackedDepthSlices = FALSE;
118 
119     for (UINT_32 i = 0; i < elemLog2; i++)
120     {
121         pEquation->addr[i].channel = 0;
122         pEquation->addr[i].valid = 1;
123         pEquation->addr[i].index = i;
124     }
125 
126     for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
127     {
128         ADDR_ASSERT(IsPow2(pSwizzle[i].value));
129 
130         if (pSwizzle[i].x != 0)
131         {
132             ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
133 
134             pEquation->addr[i].channel = 0;
135             pEquation->addr[i].valid = 1;
136             pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
137         }
138         else if (pSwizzle[i].y != 0)
139         {
140             ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
141 
142             pEquation->addr[i].channel = 1;
143             pEquation->addr[i].valid = 1;
144             pEquation->addr[i].index = Log2(pSwizzle[i].y);
145         }
146         else if (pSwizzle[i].z != 0)
147         {
148             ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
149 
150             pEquation->addr[i].channel = 2;
151             pEquation->addr[i].valid = 1;
152             pEquation->addr[i].index = Log2(pSwizzle[i].z);
153         }
154         else if (pSwizzle[i].s != 0)
155         {
156             ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].s)));
157 
158             pEquation->addr[i].channel = 3;
159             pEquation->addr[i].valid = 1;
160             pEquation->addr[i].index = Log2(pSwizzle[i].s);
161         }
162         else
163         {
164             ADDR_ASSERT_ALWAYS();
165         }
166     }
167 }
168 
169 /**
170 ************************************************************************************************************************
171 *   Gfx12Lib::InitEquationTable
172 *
173 *   @brief
174 *       Initialize Equation table.
175 *
176 *   @return
177 *       N/A
178 ************************************************************************************************************************
179 */
InitEquationTable()180 VOID Gfx12Lib::InitEquationTable()
181 {
182     memset(m_equationTable, 0, sizeof(m_equationTable));
183 
184     for (UINT_32 swModeIdx = 0; swModeIdx < ADDR3_MAX_TYPE; swModeIdx++)
185     {
186         const Addr3SwizzleMode swMode = static_cast<Addr3SwizzleMode>(swModeIdx);
187 
188         // Skip linear equation (data table is not useful for 2D/3D images-- only contains x-coordinate bits)
189         if (IsValidSwMode(swMode) && (IsLinear(swMode) == false))
190         {
191             const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxNumMsaaRates : 1;
192 
193             for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++)
194             {
195                 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
196                 {
197                     UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
198                     const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, elemLog2, 1 << msaaIdx);
199 
200                     if (pPatInfo != NULL)
201                     {
202                         ADDR_EQUATION equation = {};
203 
204                         ConvertSwizzlePatternToEquation(elemLog2, swMode, pPatInfo, &equation);
205 
206                         equationIndex = m_numEquations;
207                         ADDR_ASSERT(equationIndex < NumSwizzlePatterns);
208 
209                         m_equationTable[equationIndex] = equation;
210                         m_numEquations++;
211                     }
212                     SetEquationTableEntry(swMode, msaaIdx, elemLog2, equationIndex);
213                 } // loop through bpp sizes
214             } // loop through MSAA rates
215         } // End check for valid non-linear modes
216     } // loop through swizzle modes
217 }
218 
219 /**
220 ************************************************************************************************************************
221 *   Gfx12Lib::HwlGetEquationIndex
222 *
223 *   @brief
224 *       Return equationIndex by surface info input
225 *
226 *   @return
227 *       equationIndex
228 ************************************************************************************************************************
229 */
HwlGetEquationIndex(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pIn) const230 UINT_32 Gfx12Lib::HwlGetEquationIndex(
231     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn    ///< [in] input structure
232     ) const
233 {
234     UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
235 
236     if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
237         (pIn->resourceType == ADDR_RSRC_TEX_3D))
238     {
239         equationIdx = GetEquationTableEntry(pIn->swizzleMode, Log2(pIn->numSamples), Log2(pIn->bpp >> 3));
240     }
241 
242     return equationIdx;
243 }
244 
245 /**
246 ************************************************************************************************************************
247 *   Gfx12Lib::InitBlockDimensionTable
248 *
249 *   @brief
250 *       Initialize block dimension table for all swizzle modes + msaa samples + bpp bundles.
251 *
252 *   @return
253 *       N/A
254 ************************************************************************************************************************
255 */
InitBlockDimensionTable()256 VOID Gfx12Lib::InitBlockDimensionTable()
257 {
258     memset(m_blockDimensionTable, 0, sizeof(m_blockDimensionTable));
259 
260     ADDR3_COMPUTE_SURFACE_INFO_INPUT surfaceInfo {};
261 
262 
263     for (UINT_32 swModeIdx = 0; swModeIdx < ADDR3_MAX_TYPE; swModeIdx++)
264     {
265         const Addr3SwizzleMode swMode = static_cast<Addr3SwizzleMode>(swModeIdx);
266 
267         if (IsValidSwMode(swMode))
268         {
269             surfaceInfo.swizzleMode = swMode;
270             const UINT_32 maxMsaa   = Is2dSwizzle(swMode) ? MaxNumMsaaRates : 1;
271 
272             for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++)
273             {
274                 surfaceInfo.numSamples = (1u << msaaIdx);
275                 for (UINT_32 elementBytesLog2 = 0; elementBytesLog2 < MaxElementBytesLog2; elementBytesLog2++)
276                 {
277                     surfaceInfo.bpp = (1u << (elementBytesLog2 + 3));
278                     ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT input{ &surfaceInfo };
279                     ComputeBlockDimensionForSurf(&input, &m_blockDimensionTable[swModeIdx][msaaIdx][elementBytesLog2]);
280                 } // end loop through bpp sizes
281             } // end loop through MSAA rates
282         } // end check for valid swizzle modes
283     } // end loop through swizzle modes
284 }
285 
286 /**
287 ************************************************************************************************************************
288 *   Gfx12Lib::GetMipOrigin
289 *
290 *   @brief
291 *       Internal function to calculate origins of the mip levels
292 *
293 *   @return
294 *       ADDR_E_RETURNCODE
295 ************************************************************************************************************************
296 */
GetMipOrigin(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR_EXTENT3D & mipExtentFirstInTail,ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const297 VOID Gfx12Lib::GetMipOrigin(
298      const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,        ///< [in] input structure
299      const ADDR_EXTENT3D&                           mipExtentFirstInTail,
300      ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*             pOut        ///< [out] output structure
301      ) const
302 {
303     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
304     const BOOL_32        is3d             = (pSurfInfo->resourceType == ADDR_RSRC_TEX_3D);
305     const UINT_32        bytesPerPixel    = pSurfInfo->bpp >> 3;
306     const UINT_32        elementBytesLog2 = Log2(bytesPerPixel);
307     const UINT_32        samplesLog2      = Log2(pSurfInfo->numSamples);
308 
309     // Calculate the width/height/depth for the given microblock, because the mip offset calculation
310     // is in units of microblocks but we want it in elements.
311     ADDR_EXTENT3D        microBlockExtent = HwlGetMicroBlockSize(pIn);
312     const ADDR_EXTENT3D  tailMaxDim       = GetMipTailDim(pIn, pOut->blockExtent);
313     const UINT_32        blockSizeLog2    = GetBlockSizeLog2(pSurfInfo->swizzleMode);
314 
315     UINT_32 pitch  = tailMaxDim.width;
316     UINT_32 height = tailMaxDim.height;
317     UINT_32 depth  = (is3d ? PowTwoAlign(mipExtentFirstInTail.depth, microBlockExtent.depth) : 1);
318 
319     const UINT_32 tailMaxDepth   = (is3d ? (depth / microBlockExtent.depth) : 1);
320 
321     for (UINT_32 i = pOut->firstMipIdInTail; i < pSurfInfo->numMipLevels; i++)
322     {
323         const INT_32  mipInTail = CalcMipInTail(pIn, pOut, i);
324         const UINT_32 mipOffset = CalcMipOffset(pIn, mipInTail);
325 
326         pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
327         pOut->pMipInfo[i].mipTailOffset    = mipOffset;
328         pOut->pMipInfo[i].macroBlockOffset = 0;
329 
330         pOut->pMipInfo[i].pitch  = pitch;
331         pOut->pMipInfo[i].height = height;
332         pOut->pMipInfo[i].depth  = depth;
333         if (IsLinear(pSurfInfo->swizzleMode))
334         {
335             pOut->pMipInfo[i].mipTailCoordX = mipOffset >> 8;
336             pOut->pMipInfo[i].mipTailCoordY = 0;
337             pOut->pMipInfo[i].mipTailCoordZ = 0;
338         }
339         else
340         {
341             UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
342                            ((mipOffset >> 10) & 2)  |
343                            ((mipOffset >> 11) & 4)  |
344                            ((mipOffset >> 12) & 8)  |
345                            ((mipOffset >> 13) & 16) |
346                            ((mipOffset >> 14) & 32);
347             UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
348                            ((mipOffset >> 9)  & 2)  |
349                            ((mipOffset >> 10) & 4)  |
350                            ((mipOffset >> 11) & 8)  |
351                            ((mipOffset >> 12) & 16) |
352                            ((mipOffset >> 13) & 32);
353 
354             pOut->pMipInfo[i].mipTailCoordX = mipX * microBlockExtent.width;
355             pOut->pMipInfo[i].mipTailCoordY = mipY * microBlockExtent.height;
356             pOut->pMipInfo[i].mipTailCoordZ = 0;
357         }
358         if (IsLinear(pSurfInfo->swizzleMode))
359         {
360             pitch = Max(pitch >> 1, 1u);
361         }
362         else
363         {
364             pOut->pMipInfo[i].pitch  = PowTwoAlign(pitch,  microBlockExtent.width);
365             pOut->pMipInfo[i].height = PowTwoAlign(height, microBlockExtent.height);
366             pOut->pMipInfo[i].depth  = PowTwoAlign(depth,  microBlockExtent.depth);
367             pitch  = Max(pitch >> 1,  1u);
368             height = Max(height >> 1, 1u);
369             depth  = Max(depth >> 1,  1u);
370         }
371     }
372 }
373 
374 /**
375 ************************************************************************************************************************
376 *   Gfx12Lib::GetMipOffset
377 *
378 *   @brief
379 *       Internal function to calculate alignment for a surface
380 *
381 *   @return
382 *       ADDR_E_RETURNCODE
383 ************************************************************************************************************************
384 */
GetMipOffset(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const385 VOID Gfx12Lib::GetMipOffset(
386      const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,    ///< [in] input structure
387      ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*             pOut    ///< [out] output structure
388      ) const
389 {
390     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
391     const UINT_32        bytesPerPixel    = pSurfInfo->bpp >> 3;
392     const UINT_32        elementBytesLog2 = Log2(bytesPerPixel);
393     const UINT_32        blockSizeLog2    = GetBlockSizeLog2(pSurfInfo->swizzleMode);
394     const UINT_32        blockSize        = 1 << blockSizeLog2;
395     const ADDR_EXTENT3D  tailMaxDim       = GetMipTailDim(pIn, pOut->blockExtent);;
396     const ADDR_EXTENT3D  mip0Dims         = GetBaseMipExtents(pSurfInfo);
397     const UINT_32        maxMipsInTail    = GetMaxNumMipsInTail(pIn);
398     const bool           isLinear         = IsLinear(pSurfInfo->swizzleMode);
399 
400     UINT_32 firstMipInTail    = pSurfInfo->numMipLevels;
401     UINT_64 mipChainSliceSize = 0;
402     UINT_64 mipChainSliceSizeDense  = 0;
403     UINT_64 mipSize[MaxMipLevels];
404     UINT_64 mipSliceSize[MaxMipLevels];
405 
406     const BOOL_32 useCustomPitch    = UseCustomPitch(pSurfInfo);
407     for (UINT_32 mipIdx = 0; mipIdx < pSurfInfo->numMipLevels; mipIdx++)
408     {
409         const ADDR_EXTENT3D  mipExtents = GetMipExtent(mip0Dims, mipIdx);
410 
411         if (Lib::SupportsMipTail(pSurfInfo->swizzleMode) &&
412             (pSurfInfo->numMipLevels > 1)                &&
413             IsInMipTail(tailMaxDim, mipExtents, maxMipsInTail, pSurfInfo->numMipLevels - mipIdx))
414         {
415             firstMipInTail          = mipIdx;
416             mipChainSliceSize      += blockSize / pOut->blockExtent.depth;
417             mipChainSliceSizeDense += blockSize / pOut->blockExtent.depth;
418             break;
419         }
420         else
421         {
422             UINT_32 pitchImgData   = 0u;
423             UINT_32 pitchSliceSize = 0u;
424             if (isLinear)
425             {
426                 // The slice size of a linear image is calculated as if the "pitch" is 256 byte aligned.
427                 // However, the rendering pitch is aligned to 128 bytes, and that is what needs to be reported
428                 // to our clients in the normal 'pitch' field.
429                 // Note this is NOT the same as the total size of the image being aligned to 256 bytes!
430                 pitchImgData   = (useCustomPitch ? pOut->pitch : PowTwoAlign(mipExtents.width, 128u / bytesPerPixel));
431                 pitchSliceSize = PowTwoAlign(pitchImgData, blockSize / bytesPerPixel);
432             }
433             else
434             {
435                 pitchImgData   = PowTwoAlign(mipExtents.width, pOut->blockExtent.width);
436                 pitchSliceSize = pitchImgData;
437             }
438 
439             UINT_32 height = UseCustomHeight(pSurfInfo)
440                                         ? pOut->height
441                                         : PowTwoAlign(mipExtents.height, pOut->blockExtent.height);
442             const UINT_32 depth  = PowTwoAlign(mipExtents.depth, pOut->blockExtent.depth);
443 
444             if (isLinear && pSurfInfo->flags.denseSliceExact && ((pitchImgData % blockSize) != 0))
445             {
446                 // If we want size to exactly equal (data)pitch * height, make sure that value is 256B aligned.
447                 // Essentially, if the pitch is less aligned, ensure the height is padded so total alignment is 256B.
448                 ADDR_ASSERT((blockSize % 128) == 0);
449                 height = PowTwoAlign(height, blockSize / 128u);
450             }
451 
452             // The original "blockExtent" calculation does subtraction of logs (i.e., division) to get the
453             // sizes.  We aligned our pitch and height to those sizes, which means we need to multiply the various
454             // factors back together to get back to the slice size.
455             UINT_64 sizeExceptPitch = static_cast<UINT_64>(height) * pSurfInfo->numSamples * (pSurfInfo->bpp >> 3);
456             UINT_64 sliceSize       = static_cast<UINT_64>(pitchSliceSize) * sizeExceptPitch;
457             UINT_64 sliceDataSize   = PowTwoAlign(static_cast<UINT_64>(pitchImgData) * sizeExceptPitch,
458                                                   static_cast<UINT_64>(blockSize));
459 
460             UINT_64 hwSliceSize     = sliceSize * pOut->blockExtent.depth;
461             ADDR_ASSERT(PowTwoAlign(hwSliceSize, static_cast<UINT_64>(blockSize)) == hwSliceSize);
462 
463             if ((mipIdx == 0) && CanTrimLinearPadding(pSurfInfo))
464             {
465                 // When this is the last linear subresource of the whole image (as laid out in memory), then we don't
466                 // need to worry about the real slice size and can reduce it to the end of the image data (or some
467                 // inflated value to meet a custom depth pitch)
468                 pitchSliceSize = pitchImgData;
469                 if (UseCustomHeight(pSurfInfo))
470                 {
471                     sliceSize = pSurfInfo->sliceAlign;
472                 }
473                 else
474                 {
475                     sliceSize = sliceDataSize;
476                 }
477                 // CanTrimLinearPadding is always false for 3D swizzles, so block depth is always 1.
478                 hwSliceSize = sliceSize;
479             }
480 
481             mipSize[mipIdx]         = sliceSize * depth;
482             mipSliceSize[mipIdx]    = hwSliceSize;
483             mipChainSliceSize      += sliceSize;
484             mipChainSliceSizeDense += (mipIdx == 0) ? sliceDataSize : sliceSize;
485 
486             if (pOut->pMipInfo != NULL)
487             {
488                 pOut->pMipInfo[mipIdx].pitch         = pitchImgData;
489                 pOut->pMipInfo[mipIdx].pitchForSlice = pitchSliceSize;
490                 pOut->pMipInfo[mipIdx].height        = height;
491                 pOut->pMipInfo[mipIdx].depth         = depth;
492             }
493         }
494     }
495 
496     pOut->sliceSize            = mipChainSliceSize;
497     pOut->sliceSizeDensePacked = mipChainSliceSizeDense;
498     pOut->surfSize             = mipChainSliceSize * pOut->numSlices;
499     pOut->mipChainInTail       = (firstMipInTail == 0) ? TRUE : FALSE;
500     pOut->firstMipIdInTail     = firstMipInTail;
501 
502     if (pOut->pMipInfo != NULL)
503     {
504         if (isLinear)
505         {
506             // 1. Linear swizzle mode doesn't have miptails.
507             // 2. The organization of linear 3D mipmap resource is same as GFX11, we should use mip slice size to
508             // caculate mip offset.
509             ADDR_ASSERT(firstMipInTail == pSurfInfo->numMipLevels);
510 
511             UINT_64 sliceSize = 0;
512 
513             for (INT_32 i = static_cast<INT_32>(pSurfInfo->numMipLevels) - 1; i >= 0; i--)
514             {
515                 pOut->pMipInfo[i].offset           = sliceSize;
516                 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
517                 pOut->pMipInfo[i].mipTailOffset    = 0;
518 
519                 sliceSize += mipSliceSize[i];
520             }
521         }
522         else
523         {
524             UINT_64 offset         = 0;
525             UINT_64 macroBlkOffset = 0;
526 
527             // Even though "firstMipInTail" is zero-based while "numMipLevels" is one-based, from definition of
528             // _ADDR3_COMPUTE_SURFACE_INFO_OUTPUT struct,
529             // UINT_32             firstMipIdInTail;     ///< The id of first mip in tail, if there is no mip
530             //                                           ///  in tail, it will be set to number of mip levels
531             // See initialization:
532             //              UINT_32       firstMipInTail    = pIn->numMipLevels
533             // It is possible that they are equal if
534             //      1. a single mip level image that's larger than the largest mip that would fit in the mip tail if
535             //         the mip tail existed
536             //      2. 256B_2D and linear images which don't have miptails from HWAL functionality
537             //
538             // We can use firstMipInTail != pIn->numMipLevels to check it has mip in tails and do mipInfo assignment.
539             if (firstMipInTail != pSurfInfo->numMipLevels)
540             {
541                 // Determine the application dimensions of the first mip level that resides in the tail.
542                 // This is distinct from "tailMaxDim" which is the maximum size of a mip level that will fit in the
543                 // tail.
544                 ADDR_EXTENT3D mipExtentFirstInTail = GetMipExtent(mip0Dims, firstMipInTail);
545 
546                 // For a 2D image, "alignedDepth" is always "1".
547                 // For a 3D image, this is effectively the number of application slices associated with the first mip
548                 //                 in the tail (up-aligned to HW requirements).
549                 const UINT_32 alignedDepth = PowTwoAlign(mipExtentFirstInTail.depth, pOut->blockExtent.depth);
550 
551                 // "hwSlices" is the number of HW blocks required to represent the first mip level in the tail.
552                 const UINT_32 hwSlices = alignedDepth / pOut->blockExtent.depth;
553 
554                 // Note that for 3D images that utilize a 2D swizzle mode, there really can be multiple
555                 // HW slices that encompass the mip tail; i.e., hwSlices is not necessarily one.
556                 // For example, you could have a single mip level 8x8x32 image with a 4KB_2D swizzle mode
557                 // The 8x8 region fits into a 4KB block (so it's "in the tail"), but because we have a 2D
558                 // swizzle mode (where each slice is its own block, so blockExtent.depth == 1), hwSlices
559                 // will now be equivalent to the number of application slices, or 32.
560 
561                 // Mip tails are stored in "reverse" order -- i.e., the mip-tail itself is stored first, so the
562                 // first mip level outside the tail has an offset that's the dimension of the tail itself, or one
563                 // swizzle block in size.
564                 offset         = blockSize * hwSlices;
565                 macroBlkOffset = blockSize;
566 
567                 // And determine the per-mip information for everything inside the mip tail.
568                 GetMipOrigin(pIn, mipExtentFirstInTail, pOut);
569             }
570 
571             // Again, because mip-levels are stored backwards (smallest first), we start determining mip-level
572             // offsets from the smallest to the largest.
573             // Note that firstMipInTail == 0 immediately terminates the loop, so there is no need to check for this
574             // case.
575             for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
576             {
577                 pOut->pMipInfo[i].offset           = offset;
578                 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
579                 pOut->pMipInfo[i].mipTailOffset    = 0;
580 
581                 offset         += mipSize[i];
582                 macroBlkOffset += mipSliceSize[i];
583             }
584         }
585     }
586 }
587 
588 /**
589 ************************************************************************************************************************
590 *   Gfx12Lib::HwlComputeSurfaceInfo
591 *
592 *   @brief
593 *       Internal function to calculate alignment for a surface
594 *
595 *   @return
596 *       VOID
597 ************************************************************************************************************************
598 */
HwlComputeSurfaceInfo(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pSurfInfo,ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const599 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceInfo(
600      const ADDR3_COMPUTE_SURFACE_INFO_INPUT*  pSurfInfo,  ///< [in] input structure
601      ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*       pOut        ///< [out] output structure
602      ) const
603 {
604     ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT input{ pSurfInfo };
605 
606     // Check that only 2D swizzle mode supports MSAA
607     const UINT_32 samplesLog2 = Is2dSwizzle(pSurfInfo->swizzleMode) ? Log2(pSurfInfo->numSamples) : 0;
608 
609     // The block dimension width/height/depth is determined only by swizzle mode, MSAA samples and bpp
610     pOut->blockExtent = GetBlockDimensionTableEntry(pSurfInfo->swizzleMode, samplesLog2, Log2(pSurfInfo->bpp >> 3));
611 
612     ADDR_E_RETURNCODE  returnCode = ApplyCustomizedPitchHeight(pSurfInfo, pOut);
613 
614     if (returnCode == ADDR_OK)
615     {
616         pOut->numSlices = PowTwoAlign(pSurfInfo->numSlices, pOut->blockExtent.depth);
617         pOut->baseAlign = 1 << GetBlockSizeLog2(pSurfInfo->swizzleMode);
618 
619         GetMipOffset(&input, pOut);
620 
621         SanityCheckSurfSize(&input, pOut);
622 
623         // Slices must be exact multiples of the block sizes.  However:
624         // - with 3D images, one block will contain multiple slices, so that needs to be taken into account.
625         // - with linear images that have only one slice, we may trim and use the pitch alignment for size.
626         ADDR_ASSERT(((pOut->sliceSize * pOut->blockExtent.depth) %
627                      GetBlockSize(pSurfInfo->swizzleMode, CanTrimLinearPadding(pSurfInfo))) == 0);
628     }
629 
630     return returnCode;
631 }
632 
633 /**
634 ************************************************************************************************************************
635 *   Gfx12Lib::GetBaseMipExtents
636 *
637 *   @brief
638 *       Return the size of the base mip level in a nice cozy little structure.
639 *
640 ************************************************************************************************************************
641 */
GetBaseMipExtents(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pIn) const642 ADDR_EXTENT3D Gfx12Lib::GetBaseMipExtents(
643     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn
644     ) const
645 {
646     return { pIn->width,
647              pIn->height,
648              (IsTex3d(pIn->resourceType) ? pIn->numSlices : 1) }; // slices is depth for 3d
649 }
650 
651 /**
652 ************************************************************************************************************************
653 *   Gfx12Lib::GetMaxNumMipsInTail
654 *
655 *   @brief
656 *       Return max number of mips in tails
657 *
658 *   @return
659 *       Max number of mips in tails
660 ************************************************************************************************************************
661 */
GetMaxNumMipsInTail(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn) const662 UINT_32 Gfx12Lib::GetMaxNumMipsInTail(
663     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn
664     ) const
665 {
666     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
667     const UINT_32  blockSizeLog2 = GetBlockSizeLog2(pSurfInfo->swizzleMode);
668 
669     UINT_32 effectiveLog2 = blockSizeLog2;
670     UINT_32 mipsInTail    = 1;
671 
672     if (Is3dSwizzle(pSurfInfo->swizzleMode))
673     {
674         effectiveLog2 -= (blockSizeLog2 - 8) / 3;
675     }
676 
677     if (effectiveLog2 > 8)
678     {
679         mipsInTail = (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
680     }
681 
682     return mipsInTail;
683 }
684 
685 /**
686 ************************************************************************************************************************
687 *   Gfx12Lib::HwlCalcMipInTail
688 *
689 *   @brief
690 *       Internal function to calculate the "mipInTail" parameter.
691 *
692 *   @return
693 *       The magic "mipInTail" parameter.
694 ************************************************************************************************************************
695 */
CalcMipInTail(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 mipLevel) const696 INT_32 Gfx12Lib::CalcMipInTail(
697     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
698     const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,
699     UINT_32                                        mipLevel
700     ) const
701 {
702     const INT_32  firstMipIdInTail = static_cast<INT_32>(pOut->firstMipIdInTail);
703 
704     INT_32  mipInTail = 0;
705 
706     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
707     mipInTail = static_cast<INT_32>(mipLevel) - firstMipIdInTail;
708     if ((mipInTail < 0) || (pSurfInfo->numMipLevels == 1) || (GetBlockSize(pSurfInfo->swizzleMode) <= 256))
709     {
710         mipInTail = MaxMipLevels;
711     }
712 
713     return mipInTail;
714 }
715 
716 /**
717 ************************************************************************************************************************
718 *   Gfx12Lib::CalcMipOffset
719 *
720 *   @brief
721 *
722 *   @return
723 *       The magic "mipInTail" parameter.
724 ************************************************************************************************************************
725 */
CalcMipOffset(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,UINT_32 mipInTail) const726 UINT_32 Gfx12Lib::CalcMipOffset(
727     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
728     UINT_32                                        mipInTail
729     ) const
730 {
731     const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(pIn);
732 
733     const INT_32  signedM       = static_cast<INT_32>(maxMipsInTail) - static_cast<INT_32>(1) - mipInTail;
734     const UINT_32 m             = Max(0, signedM);
735     const UINT_32 mipOffset     = (m > 6) ? (16 << m) : (m << 8);
736 
737     return mipOffset;
738 }
739 
740 /**
741 ************************************************************************************************************************
742 *   Gfx12Lib::HwlComputeSurfaceAddrFromCoordLinear
743 *
744 *   @brief
745 *       Internal function to calculate address from coord for linear swizzle surface
746 *
747 *   @return
748 *       ADDR_E_RETURNCODE
749 ************************************************************************************************************************
750 */
HwlComputeSurfaceAddrFromCoordLinear(const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pSurfInfoIn,ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const751 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceAddrFromCoordLinear(
752     const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,         ///< [in] input structure
753     const ADDR3_COMPUTE_SURFACE_INFO_INPUT*          pSurfInfoIn, ///< [in] input structure
754     ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut         ///< [out] output structure
755     ) const
756 {
757     ADDR3_MIP_INFO mipInfo[MaxMipLevels];
758     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
759 
760     ADDR3_COMPUTE_SURFACE_INFO_OUTPUT surfInfoOut = {0};
761     surfInfoOut.size     = sizeof(surfInfoOut);
762     surfInfoOut.pMipInfo = mipInfo;
763 
764     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfo(pSurfInfoIn, &surfInfoOut);
765 
766     if (returnCode == ADDR_OK)
767     {
768         pOut->addr        = (surfInfoOut.sliceSize * pIn->slice) +
769                             mipInfo[pIn->mipId].offset +
770                             (pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3);
771 
772         pOut->bitPosition = 0;
773     }
774 
775     return returnCode;
776 }
777 
778 /**
779 ************************************************************************************************************************
780 *   Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled
781 *
782 *   @brief
783 *       Internal function to calculate address from coord for tiled swizzle surface
784 *
785 *   @return
786 *       ADDR_E_RETURNCODE
787 ************************************************************************************************************************
788 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const789 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled(
790      const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
791      ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
792      ) const
793 {
794     // 256B block cannot support 3D image.
795     ADDR_ASSERT((IsTex3d(pIn->resourceType) && IsBlock256b(pIn->swizzleMode)) == FALSE);
796 
797     ADDR3_COMPUTE_SURFACE_INFO_INPUT  localIn               = {};
798     ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut              = {};
799     ADDR3_MIP_INFO                    mipInfo[MaxMipLevels] = {};
800 
801     localIn.size         = sizeof(localIn);
802     localIn.flags        = pIn->flags;
803     localIn.swizzleMode  = pIn->swizzleMode;
804     localIn.resourceType = pIn->resourceType;
805     localIn.format       = ADDR_FMT_INVALID;
806     localIn.bpp          = pIn->bpp;
807     localIn.width        = Max(pIn->unAlignedDims.width, 1u);
808     localIn.height       = Max(pIn->unAlignedDims.height, 1u);
809     localIn.numSlices    = Max(pIn->unAlignedDims.depth, 1u);
810     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
811     localIn.numSamples   = Max(pIn->numSamples, 1u);
812 
813     localOut.size        = sizeof(localOut);
814     localOut.pMipInfo    = mipInfo;
815     ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT input{ &localIn };
816 
817     ADDR_E_RETURNCODE ret = ComputeSurfaceInfo(&localIn, &localOut);
818 
819     if (ret == ADDR_OK)
820     {
821         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
822         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
823 
824         // Addr3 equation table excludes linear swizzle mode, and fortunately HwlComputeSurfaceAddrFromCoordTiled() is
825         // only called for non-linear swizzle mode.
826         const UINT_32 eqIndex     = GetEquationTableEntry(pIn->swizzleMode, Log2(localIn.numSamples), elemLog2);
827 
828         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
829         {
830             ADDR3_COORD  coords = {};
831 
832             // For a 3D image, one swizzle block contains multiple application slices.
833             // For any given image, each HW slice is addressed identically to any other HW slice.
834             // hwSliceSizeBytes is the size of one HW slice; i.e., the number of bytes for the pattern to repeat.
835             // hwSliceId is the index (0, 1, 2...) of the HW slice that an application slice resides in.
836             const UINT_64 hwSliceSizeBytes = localOut.sliceSize * localOut.blockExtent.depth;
837             const UINT_32 hwSliceId = pIn->slice / localOut.blockExtent.depth;
838 
839             const UINT_32 pb     = mipInfo[pIn->mipId].pitch / localOut.blockExtent.width;
840             const UINT_32 yb     = pIn->y / localOut.blockExtent.height;
841             const UINT_32 xb     = pIn->x / localOut.blockExtent.width;
842             const UINT_64 blkIdx = yb * pb + xb;
843 
844             // Technically, the addition of "mipTailCoordX" is only necessary if we're in the mip-tail.
845             // The "mipTailCoordXYZ" values should be zero if we're not in the mip-tail.
846             const BOOL_32 inTail = ((mipInfo[pIn->mipId].mipTailOffset != 0) && (blkSizeLog2 != Log2Size256));
847 
848             ADDR_ASSERT((inTail == TRUE) ||
849                         // If we're not in the tail, then all of these must be zero.
850                         ((mipInfo[pIn->mipId].mipTailCoordX == 0) &&
851                          (mipInfo[pIn->mipId].mipTailCoordY == 0) &&
852                          (mipInfo[pIn->mipId].mipTailCoordZ == 0)));
853 
854             coords.x = pIn->x     + mipInfo[pIn->mipId].mipTailCoordX;
855             coords.y = pIn->y     + mipInfo[pIn->mipId].mipTailCoordY;
856             coords.z = pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ;
857 
858             // Note that in this path, blkIdx does not account for the HW slice ID, so we need to
859             // add it in here.
860             pOut->addr = hwSliceSizeBytes * hwSliceId;
861 
862             const UINT_32 blkOffset  = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
863                                                                  coords.x << elemLog2,
864                                                                  coords.y,
865                                                                  coords.z,
866                                                                  pIn->sample);
867 
868             pOut->addr += mipInfo[pIn->mipId].macroBlockOffset +
869                           (blkIdx << blkSizeLog2)              +
870                           blkOffset;
871 
872             ADDR_ASSERT(pOut->addr < localOut.surfSize);
873         }
874         else
875         {
876             ret = ADDR_INVALIDPARAMS;
877         }
878     }
879 
880     return ret;
881 }
882 
883 /**
884 ************************************************************************************************************************
885 *   Gfx12Lib::HwlCopyMemToSurface
886 *
887 *   @brief
888 *       Copy multiple regions from memory to a non-linear surface.
889 *
890 *   @return
891 *       Error or success.
892 ************************************************************************************************************************
893 */
HwlCopyMemToSurface(const ADDR3_COPY_MEMSURFACE_INPUT * pIn,const ADDR3_COPY_MEMSURFACE_REGION * pRegions,UINT_32 regionCount) const894 ADDR_E_RETURNCODE Gfx12Lib::HwlCopyMemToSurface(
895     const ADDR3_COPY_MEMSURFACE_INPUT*  pIn,
896     const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
897     UINT_32                             regionCount
898     ) const
899 {
900     // Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
901     // optimized for a particular micro-swizzle mode if available.
902     ADDR3_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
903     ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
904     ADDR3_MIP_INFO                    mipInfo[MaxMipLevels] = {{0}};
905     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
906     ADDR_E_RETURNCODE returnCode = ADDR_OK;
907 
908     if (pIn->numSamples > 1)
909     {
910         // TODO: MSAA
911         returnCode = ADDR_NOTIMPLEMENTED;
912     }
913 
914     localIn.size         = sizeof(localIn);
915     localIn.flags        = pIn->flags;
916     localIn.swizzleMode  = pIn->swizzleMode;
917     localIn.resourceType = pIn->resourceType;
918     localIn.format       = pIn->format;
919     localIn.bpp          = pIn->bpp;
920     localIn.width        = Max(pIn->unAlignedDims.width,  1u);
921     localIn.height       = Max(pIn->unAlignedDims.height, 1u);
922     localIn.numSlices    = Max(pIn->unAlignedDims.depth,  1u);
923     localIn.numMipLevels = Max(pIn->numMipLevels,         1u);
924     localIn.numSamples   = Max(pIn->numSamples,           1u);
925 
926     localOut.size     = sizeof(localOut);
927     localOut.pMipInfo = mipInfo;
928 
929     if (returnCode == ADDR_OK)
930     {
931         returnCode = ComputeSurfaceInfo(&localIn, &localOut);
932     }
933 
934     LutAddresser addresser = LutAddresser();
935     UnalignedCopyMemImgFunc pfnCopyUnaligned = nullptr;
936     if (returnCode == ADDR_OK)
937     {
938         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
939         const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
940                                                                 Log2(pIn->bpp >> 3),
941                                                                 pIn->numSamples);
942 
943         ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K] = {};
944         GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
945         addresser.Init(fullSwizzlePattern, Log2Size256K, localOut.blockExtent, blkSizeLog2);
946         pfnCopyUnaligned = addresser.GetCopyMemImgFunc();
947         if (pfnCopyUnaligned == nullptr)
948         {
949             ADDR_ASSERT_ALWAYS(); // What format is this?
950             returnCode = ADDR_INVALIDPARAMS;
951         }
952     }
953 
954     if (returnCode == ADDR_OK)
955     {
956         for (UINT_32  regionIdx = 0; regionIdx < regionCount; regionIdx++)
957         {
958             const ADDR3_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
959             const ADDR3_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
960             UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
961             UINT_32 yBlks = pMipInfo->pitch / localOut.blockExtent.width;
962 
963             UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
964             UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
965             UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
966 
967             for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
968             {
969                 // The copy functions take the base address of the hardware slice, not the logical slice. Those are
970                 // not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
971                 // for unaligned copies.
972                 UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockExtent.depth);
973                 UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
974 
975                 UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
976                 UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
977 
978                 ADDR_COORD2D sliceOrigin = { xStart, yStart };
979                 ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
980 
981                 pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
982                                  VoidPtrInc(pCurRegion->pMem, memOffset),
983                                  pCurRegion->memRowPitch,
984                                  yBlks,
985                                  sliceOrigin,
986                                  sliceExtent,
987                                  sliceXor,
988                                  addresser);
989             }
990         }
991     }
992     return returnCode;
993 }
994 
995 /**
996 ************************************************************************************************************************
997 *   Gfx12Lib::HwlCopySurfaceToMem
998 *
999 *   @brief
1000 *       Copy multiple regions from a non-linear surface to memory.
1001 *
1002 *   @return
1003 *       Error or success.
1004 ************************************************************************************************************************
1005 */
HwlCopySurfaceToMem(const ADDR3_COPY_MEMSURFACE_INPUT * pIn,const ADDR3_COPY_MEMSURFACE_REGION * pRegions,UINT_32 regionCount) const1006 ADDR_E_RETURNCODE Gfx12Lib::HwlCopySurfaceToMem(
1007     const ADDR3_COPY_MEMSURFACE_INPUT*  pIn,
1008     const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
1009     UINT_32                             regionCount
1010     ) const
1011 {
1012     // Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
1013     // optimized for a particular micro-swizzle mode if available.
1014     ADDR3_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
1015     ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
1016     ADDR3_MIP_INFO                    mipInfo[MaxMipLevels] = {{0}};
1017     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
1018     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1019 
1020     if (pIn->numSamples > 1)
1021     {
1022         // TODO: MSAA
1023         returnCode = ADDR_NOTIMPLEMENTED;
1024     }
1025 
1026     localIn.size         = sizeof(localIn);
1027     localIn.flags        = pIn->flags;
1028     localIn.swizzleMode  = pIn->swizzleMode;
1029     localIn.resourceType = pIn->resourceType;
1030     localIn.format       = pIn->format;
1031     localIn.bpp          = pIn->bpp;
1032     localIn.width        = Max(pIn->unAlignedDims.width,  1u);
1033     localIn.height       = Max(pIn->unAlignedDims.height, 1u);
1034     localIn.numSlices    = Max(pIn->unAlignedDims.depth,  1u);
1035     localIn.numMipLevels = Max(pIn->numMipLevels,         1u);
1036     localIn.numSamples   = Max(pIn->numSamples,           1u);
1037 
1038     localOut.size     = sizeof(localOut);
1039     localOut.pMipInfo = mipInfo;
1040 
1041     if (returnCode == ADDR_OK)
1042     {
1043         returnCode = ComputeSurfaceInfo(&localIn, &localOut);
1044     }
1045 
1046     LutAddresser addresser = LutAddresser();
1047     UnalignedCopyMemImgFunc pfnCopyUnaligned = nullptr;
1048     if (returnCode == ADDR_OK)
1049     {
1050         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
1051         const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
1052                                                                 Log2(pIn->bpp >> 3),
1053                                                                 pIn->numSamples);
1054 
1055         ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K] = {};
1056         GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1057         addresser.Init(fullSwizzlePattern, Log2Size256K, localOut.blockExtent, blkSizeLog2);
1058         pfnCopyUnaligned = addresser.GetCopyImgMemFunc();
1059         if (pfnCopyUnaligned == nullptr)
1060         {
1061             ADDR_ASSERT_ALWAYS(); // What format is this?
1062             returnCode = ADDR_INVALIDPARAMS;
1063         }
1064     }
1065 
1066     if (returnCode == ADDR_OK)
1067     {
1068         for (UINT_32  regionIdx = 0; regionIdx < regionCount; regionIdx++)
1069         {
1070             const ADDR3_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
1071             const ADDR3_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
1072             UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
1073             UINT_32 yBlks = pMipInfo->pitch / localOut.blockExtent.width;
1074 
1075             UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
1076             UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
1077             UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
1078 
1079             for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
1080             {
1081                 // The copy functions take the base address of the hardware slice, not the logical slice. Those are
1082                 // not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
1083                 // for unaligned copies.
1084                 UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockExtent.depth);
1085                 UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
1086 
1087                 UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
1088                 UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
1089 
1090                 ADDR_COORD2D sliceOrigin = { xStart, yStart };
1091                 ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
1092 
1093                 pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
1094                                  VoidPtrInc(pCurRegion->pMem, memOffset),
1095                                  pCurRegion->memRowPitch,
1096                                  yBlks,
1097                                  sliceOrigin,
1098                                  sliceExtent,
1099                                  sliceXor,
1100                                  addresser);
1101             }
1102         }
1103     }
1104     return returnCode;
1105 }
1106 
1107 
1108 /**
1109 ************************************************************************************************************************
1110 *   Gfx12Lib::HwlComputePipeBankXor
1111 *
1112 *   @brief
1113 *       Generate a PipeBankXor value to be ORed into bits above numSwizzleBits of address
1114 *
1115 *   @return
1116 *       PipeBankXor value
1117 ************************************************************************************************************************
1118 */
HwlComputePipeBankXor(const ADDR3_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const1119 ADDR_E_RETURNCODE Gfx12Lib::HwlComputePipeBankXor(
1120     const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
1121     ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
1122     ) const
1123 {
1124     if ((m_numSwizzleBits != 0)               && // does this configuration support swizzling
1125         //         base address XOR in GFX12 will be applied to all blk_size = 4KB, 64KB, or 256KB swizzle modes,
1126         //         Note that Linear and 256B are excluded.
1127         (IsLinear(pIn->swizzleMode) == FALSE) &&
1128         (IsBlock256b(pIn->swizzleMode) == FALSE))
1129     {
1130         pOut->pipeBankXor = pIn->surfIndex % (1 << m_numSwizzleBits);
1131     }
1132     else
1133     {
1134         pOut->pipeBankXor = 0;
1135     }
1136 
1137     return ADDR_OK;
1138 }
1139 
1140 /**
1141 ************************************************************************************************************************
1142 *   Gfx12Lib::ComputeOffsetFromEquation
1143 *
1144 *   @brief
1145 *       Compute offset from equation
1146 *
1147 *   @return
1148 *       Offset
1149 ************************************************************************************************************************
1150 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const1151 UINT_32 Gfx12Lib::ComputeOffsetFromEquation(
1152     const ADDR_EQUATION* pEq,   ///< Equation
1153     UINT_32              x,     ///< x coord in bytes
1154     UINT_32              y,     ///< y coord in pixel
1155     UINT_32              z,     ///< z coord in slice
1156     UINT_32              s      ///< MSAA sample index
1157     ) const
1158 {
1159     UINT_32 offset = 0;
1160 
1161     for (UINT_32 i = 0; i < pEq->numBits; i++)
1162     {
1163         UINT_32 v = 0;
1164 
1165         if (pEq->addr[i].valid)
1166         {
1167             if (pEq->addr[i].channel == 0)
1168             {
1169                 v ^= (x >> pEq->addr[i].index) & 1;
1170             }
1171             else if (pEq->addr[i].channel == 1)
1172             {
1173                 v ^= (y >> pEq->addr[i].index) & 1;
1174             }
1175             else if (pEq->addr[i].channel == 2)
1176             {
1177                 v ^= (z >> pEq->addr[i].index) & 1;
1178             }
1179             else if (pEq->addr[i].channel == 3)
1180             {
1181                 v ^= (s >> pEq->addr[i].index) & 1;
1182             }
1183             else
1184             {
1185                 ADDR_ASSERT_ALWAYS();
1186             }
1187         }
1188 
1189         offset |= (v << i);
1190     }
1191 
1192     return offset;
1193 }
1194 
1195 /**
1196 ************************************************************************************************************************
1197 *   Gfx12Lib::GetSwizzlePatternInfo
1198 *
1199 *   @brief
1200 *       Get swizzle pattern
1201 *
1202 *   @return
1203 *       Swizzle pattern information
1204 ************************************************************************************************************************
1205 */
GetSwizzlePatternInfo(Addr3SwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numFrag) const1206 const ADDR_SW_PATINFO* Gfx12Lib::GetSwizzlePatternInfo(
1207     Addr3SwizzleMode swizzleMode,       ///< Swizzle mode
1208     UINT_32          elemLog2,          ///< Element size in bytes log2
1209     UINT_32          numFrag            ///< Number of fragment
1210     ) const
1211 {
1212     const ADDR_SW_PATINFO* patInfo = NULL;
1213 
1214     if (Is2dSwizzle(swizzleMode) == FALSE)
1215     {
1216         ADDR_ASSERT(numFrag == 1);
1217     }
1218 
1219     switch (swizzleMode)
1220     {
1221     case ADDR3_256KB_2D:
1222         switch (numFrag)
1223         {
1224         case 1:
1225             patInfo = GFX12_SW_256KB_2D_1xAA_PATINFO;
1226             break;
1227         case 2:
1228             patInfo = GFX12_SW_256KB_2D_2xAA_PATINFO;
1229             break;
1230         case 4:
1231             patInfo = GFX12_SW_256KB_2D_4xAA_PATINFO;
1232             break;
1233         case 8:
1234             patInfo = GFX12_SW_256KB_2D_8xAA_PATINFO;
1235             break;
1236         default:
1237             ADDR_ASSERT_ALWAYS();
1238         }
1239         break;
1240     case ADDR3_256KB_3D:
1241         patInfo = GFX12_SW_256KB_3D_PATINFO;
1242         break;
1243     case ADDR3_64KB_2D:
1244         switch (numFrag)
1245         {
1246         case 1:
1247             patInfo = GFX12_SW_64KB_2D_1xAA_PATINFO;
1248             break;
1249         case 2:
1250             patInfo = GFX12_SW_64KB_2D_2xAA_PATINFO;
1251             break;
1252         case 4:
1253             patInfo = GFX12_SW_64KB_2D_4xAA_PATINFO;
1254             break;
1255         case 8:
1256             patInfo = GFX12_SW_64KB_2D_8xAA_PATINFO;
1257             break;
1258         default:
1259             ADDR_ASSERT_ALWAYS();
1260         }
1261         break;
1262     case ADDR3_64KB_3D:
1263         patInfo = GFX12_SW_64KB_3D_PATINFO;
1264         break;
1265     case ADDR3_4KB_2D:
1266         switch (numFrag)
1267         {
1268         case 1:
1269             patInfo = GFX12_SW_4KB_2D_1xAA_PATINFO;
1270             break;
1271         case 2:
1272             patInfo = GFX12_SW_4KB_2D_2xAA_PATINFO;
1273             break;
1274         case 4:
1275             patInfo = GFX12_SW_4KB_2D_4xAA_PATINFO;
1276             break;
1277         case 8:
1278             patInfo = GFX12_SW_4KB_2D_8xAA_PATINFO;
1279             break;
1280         default:
1281             ADDR_ASSERT_ALWAYS();
1282         }
1283         break;
1284     case ADDR3_4KB_3D:
1285         patInfo = GFX12_SW_4KB_3D_PATINFO;
1286         break;
1287     case ADDR3_256B_2D:
1288         switch (numFrag)
1289         {
1290         case 1:
1291             patInfo = GFX12_SW_256B_2D_1xAA_PATINFO;
1292             break;
1293         case 2:
1294             patInfo = GFX12_SW_256B_2D_2xAA_PATINFO;
1295             break;
1296         case 4:
1297             patInfo = GFX12_SW_256B_2D_4xAA_PATINFO;
1298             break;
1299         case 8:
1300             patInfo = GFX12_SW_256B_2D_8xAA_PATINFO;
1301             break;
1302         default:
1303             break;
1304         }
1305         break;
1306     default:
1307         ADDR_ASSERT_ALWAYS();
1308         break;
1309     }
1310 
1311     return (patInfo != NULL) ? &patInfo[elemLog2] : NULL;
1312 }
1313 /**
1314 ************************************************************************************************************************
1315 *   Gfx12Lib::HwlInitGlobalParams
1316 *
1317 *   @brief
1318 *       Initializes global parameters
1319 *
1320 *   @return
1321 *       TRUE if all settings are valid
1322 *
1323 ************************************************************************************************************************
1324 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1325 BOOL_32 Gfx12Lib::HwlInitGlobalParams(
1326     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1327 {
1328     BOOL_32              valid = TRUE;
1329     GB_ADDR_CONFIG_GFX12 gbAddrConfig;
1330 
1331     gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1332 
1333     switch (gbAddrConfig.bits.NUM_PIPES)
1334     {
1335         case ADDR_CONFIG_1_PIPE:
1336             m_pipesLog2 = 0;
1337             break;
1338         case ADDR_CONFIG_2_PIPE:
1339             m_pipesLog2 = 1;
1340             break;
1341         case ADDR_CONFIG_4_PIPE:
1342             m_pipesLog2 = 2;
1343             break;
1344         case ADDR_CONFIG_8_PIPE:
1345             m_pipesLog2 = 3;
1346             break;
1347         case ADDR_CONFIG_16_PIPE:
1348             m_pipesLog2 = 4;
1349             break;
1350         case ADDR_CONFIG_32_PIPE:
1351             m_pipesLog2 = 5;
1352             break;
1353         case ADDR_CONFIG_64_PIPE:
1354             m_pipesLog2 = 6;
1355             break;
1356         default:
1357             ADDR_ASSERT_ALWAYS();
1358             valid = FALSE;
1359             break;
1360     }
1361 
1362     switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1363     {
1364         case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1365             m_pipeInterleaveLog2 = 8;
1366             break;
1367         case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1368             m_pipeInterleaveLog2 = 9;
1369             break;
1370         case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1371             m_pipeInterleaveLog2 = 10;
1372             break;
1373         case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1374             m_pipeInterleaveLog2 = 11;
1375             break;
1376         default:
1377             ADDR_ASSERT_ALWAYS();
1378             valid = FALSE;
1379             break;
1380     }
1381 
1382     m_numSwizzleBits = ((m_pipesLog2 >= 3) ? m_pipesLog2 - 2 : 0);
1383 
1384     if (valid)
1385     {
1386         InitEquationTable();
1387         InitBlockDimensionTable();
1388     }
1389 
1390     return valid;
1391 }
1392 
1393 /**
1394 ************************************************************************************************************************
1395 *   Gfx12Lib::HwlComputeNonBlockCompressedView
1396 *
1397 *   @brief
1398 *       Compute non-block-compressed view for a given mipmap level/slice.
1399 *
1400 *   @return
1401 *       ADDR_E_RETURNCODE
1402 ************************************************************************************************************************
1403 */
HwlComputeNonBlockCompressedView(const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const1404 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeNonBlockCompressedView(
1405     const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
1406     ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
1407     ) const
1408 {
1409     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1410 
1411     if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
1412         ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
1413     {
1414         // Only support BC1~BC7, ASTC, or ETC2 for now...
1415         returnCode = ADDR_NOTSUPPORTED;
1416     }
1417     else
1418     {
1419         UINT_32 bcWidth, bcHeight;
1420         const UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
1421 
1422         ADDR3_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
1423         infoIn.size         = sizeof(infoIn);
1424         infoIn.flags        = pIn->flags;
1425         infoIn.swizzleMode  = pIn->swizzleMode;
1426         infoIn.resourceType = pIn->resourceType;
1427         infoIn.format       = pIn->format;
1428         infoIn.bpp          = bpp;
1429         infoIn.width        = RoundUpQuotient(pIn->unAlignedDims.width, bcWidth);
1430         infoIn.height       = RoundUpQuotient(pIn->unAlignedDims.height, bcHeight);
1431         infoIn.numSlices    = pIn->unAlignedDims.depth;
1432         infoIn.numMipLevels = pIn->numMipLevels;
1433         infoIn.numSamples   = 1;
1434 
1435         ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {};
1436 
1437         ADDR3_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
1438         infoOut.size     = sizeof(infoOut);
1439         infoOut.pMipInfo = mipInfo;
1440 
1441         returnCode = HwlComputeSurfaceInfo(&infoIn, &infoOut);
1442 
1443         if (returnCode == ADDR_OK)
1444         {
1445             ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
1446             subOffIn.size             = sizeof(subOffIn);
1447             subOffIn.swizzleMode      = infoIn.swizzleMode;
1448             subOffIn.resourceType     = infoIn.resourceType;
1449             subOffIn.pipeBankXor      = pIn->pipeBankXor;
1450             subOffIn.slice            = pIn->slice;
1451             subOffIn.sliceSize        = infoOut.sliceSize;
1452             subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
1453             subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;
1454 
1455             ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
1456             subOffOut.size = sizeof(subOffOut);
1457 
1458             // For any mipmap level, move nonBc view base address by offset
1459             HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
1460             pOut->offset = subOffOut.offset;
1461 
1462             ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
1463             slicePbXorIn.size            = sizeof(slicePbXorIn);
1464             slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
1465             slicePbXorIn.resourceType    = infoIn.resourceType;
1466             slicePbXorIn.bpe             = infoIn.bpp;
1467             slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
1468             slicePbXorIn.slice           = pIn->slice;
1469             slicePbXorIn.numSamples      = 1;
1470 
1471             ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
1472             slicePbXorOut.size = sizeof(slicePbXorOut);
1473 
1474             // For any mipmap level, nonBc view should use computed pbXor
1475             HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
1476             pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
1477 
1478             const BOOL_32 tiled            = (pIn->swizzleMode != ADDR3_LINEAR);
1479             const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail);
1480             const UINT_32 requestMipWidth  =
1481                     RoundUpQuotient(Max(pIn->unAlignedDims.width  >> pIn->mipId, 1u), bcWidth);
1482             const UINT_32 requestMipHeight =
1483                     RoundUpQuotient(Max(pIn->unAlignedDims.height >> pIn->mipId, 1u), bcHeight);
1484 
1485             if (inTail)
1486             {
1487                 // For mipmap level that is in mip tail block, hack a lot of things...
1488                 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
1489                 // are fit in tail block:
1490 
1491                 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
1492                 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
1493 
1494                 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
1495                 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
1496 
1497                 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
1498                 pOut->unAlignedDims.width  = Min(requestMipWidth << pOut->mipId, infoOut.blockExtent.width / 2);
1499 
1500                 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
1501                 pOut->unAlignedDims.height = Min(requestMipHeight << pOut->mipId, infoOut.blockExtent.height);
1502             }
1503             // This check should cover at least mipId == 0
1504             else if ((requestMipWidth << pIn->mipId) == infoIn.width)
1505             {
1506                 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
1507                 // - only one mipmap level and mipId = 0
1508                 pOut->mipId        = 0;
1509                 pOut->numMipLevels = 1;
1510 
1511                 // (mip0) width = requestMipWidth
1512                 pOut->unAlignedDims.width  = requestMipWidth;
1513 
1514                 // (mip0) height = requestMipHeight
1515                 pOut->unAlignedDims.height = requestMipHeight;
1516             }
1517             else
1518             {
1519                 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
1520                 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
1521                 // because single mip view may have different pitch value than original (multiple) mip view...
1522                 // A simple case would be:
1523                 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
1524                 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
1525                 //   mip0 width = 0x101/mip1 width = 0x80
1526                 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
1527                 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
1528 
1529                 // - 2 levels and mipId = 1
1530                 pOut->mipId        = 1;
1531                 pOut->numMipLevels = 2;
1532 
1533                 const UINT_32 upperMipWidth  =
1534                     RoundUpQuotient(Max(pIn->unAlignedDims.width  >> (pIn->mipId - 1), 1u), bcWidth);
1535                 const UINT_32 upperMipHeight =
1536                     RoundUpQuotient(Max(pIn->unAlignedDims.height >> (pIn->mipId - 1), 1u), bcHeight);
1537 
1538                 const BOOL_32 needToAvoidInTail = tiled                                              &&
1539                                                   (requestMipWidth <= infoOut.blockExtent.width / 2) &&
1540                                                   (requestMipHeight <= infoOut.blockExtent.height);
1541 
1542                 const UINT_32 hwMipWidth  =
1543                     PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockExtent.width);
1544                 const UINT_32 hwMipHeight =
1545                     PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockExtent.height);
1546 
1547                 const BOOL_32 needExtraWidth =
1548                     ((upperMipWidth < requestMipWidth * 2) ||
1549                      ((upperMipWidth == requestMipWidth * 2) &&
1550                       ((needToAvoidInTail == TRUE) ||
1551                        (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockExtent.width)))));
1552 
1553                 const BOOL_32 needExtraHeight =
1554                     ((upperMipHeight < requestMipHeight * 2) ||
1555                      ((upperMipHeight == requestMipHeight * 2) &&
1556                       ((needToAvoidInTail == TRUE) ||
1557                        (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockExtent.height)))));
1558 
1559                 // (mip0) width = requestLastMipLevelWidth
1560                 pOut->unAlignedDims.width  = upperMipWidth + (needExtraWidth ? 1: 0);
1561 
1562                 // (mip0) height = requestLastMipLevelHeight
1563                 pOut->unAlignedDims.height = upperMipHeight + (needExtraHeight ? 1: 0);
1564             }
1565 
1566             // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
1567             ADDR_ASSERT(ShiftRight(pOut->unAlignedDims.width, pOut->mipId)  == requestMipWidth);
1568             // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
1569             ADDR_ASSERT(ShiftRight(pOut->unAlignedDims.height, pOut->mipId) == requestMipHeight);
1570         }
1571     }
1572 
1573     return returnCode;
1574 }
1575 
1576 /**
1577 ************************************************************************************************************************
1578 *   Gfx12Lib::HwlComputeSubResourceOffsetForSwizzlePattern
1579 *
1580 *   @brief
1581 *       Compute sub resource offset to support swizzle pattern
1582 *
1583 *   @return
1584 *       VOID
1585 ************************************************************************************************************************
1586 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const1587 VOID Gfx12Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
1588     const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
1589     ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
1590     ) const
1591 {
1592     pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
1593 }
1594 
1595 /**
1596 ************************************************************************************************************************
1597 *   Gfx12Lib::HwlComputeSlicePipeBankXor
1598 *
1599 *   @brief
1600 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
1601 *
1602 *   @return
1603 *       PipeBankXor value
1604 ************************************************************************************************************************
1605 */
HwlComputeSlicePipeBankXor(const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const1606 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSlicePipeBankXor(
1607     const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
1608     ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
1609     ) const
1610 {
1611     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1612 
1613     // PipeBankXor is only applied to 4KB, 64KB and 256KB on GFX12.
1614     if ((IsLinear(pIn->swizzleMode) == FALSE) && (IsBlock256b(pIn->swizzleMode) == FALSE))
1615     {
1616         if (pIn->bpe == 0)
1617         {
1618             // Require a valid bytes-per-element value passed from client...
1619             returnCode = ADDR_INVALIDPARAMS;
1620         }
1621         else
1622         {
1623             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
1624                                                                     Log2(pIn->bpe >> 3),
1625                                                                     1);
1626 
1627             if (pPatInfo != NULL)
1628             {
1629                 const UINT_32 elemLog2    = Log2(pIn->bpe >> 3);
1630 
1631                 // Addr3 equation table excludes linear swizzle mode, and fortunately when calling
1632                 // HwlComputeSlicePipeBankXor the swizzle mode is non-linear, so we don't need to worry about negative
1633                 // table index.
1634                 const UINT_32 eqIndex     = GetEquationTableEntry(pIn->swizzleMode, Log2(pIn->numSamples), elemLog2);
1635 
1636                 const UINT_32 pipeBankXorOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
1637                                                                             0,
1638                                                                             0,
1639                                                                             pIn->slice,
1640                                                                             0);
1641 
1642                 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
1643 
1644                 // Should have no bit set under pipe interleave
1645                 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
1646 
1647                 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
1648             }
1649             else
1650             {
1651                 // Should never come here...
1652                 ADDR_NOT_IMPLEMENTED();
1653 
1654                 returnCode = ADDR_NOTSUPPORTED;
1655             }
1656         }
1657     }
1658     else
1659     {
1660         pOut->pipeBankXor = 0;
1661     }
1662 
1663     return returnCode;
1664 }
1665 
1666 /**
1667 ************************************************************************************************************************
1668 *   Gfx12Lib::HwlConvertChipFamily
1669 *
1670 *   @brief
1671 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1672 *   @return
1673 *       ChipFamily
1674 ************************************************************************************************************************
1675 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)1676 ChipFamily Gfx12Lib::HwlConvertChipFamily(
1677     UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
1678     UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1679 {
1680     return ADDR_CHIP_FAMILY_NAVI;
1681 }
1682 
1683 /**
1684 ************************************************************************************************************************
1685 *   Gfx12Lib::SanityCheckSurfSize
1686 *
1687 *   @brief
1688 *       Calculate the surface size via the exact hardware algorithm to see if it matches.
1689 *
1690 *   @return
1691 ************************************************************************************************************************
1692 */
SanityCheckSurfSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const1693 void Gfx12Lib::SanityCheckSurfSize(
1694     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
1695     const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*       pOut
1696     ) const
1697 {
1698 #if DEBUG
1699     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
1700     // Verify that the requested image size is valid for the below algorithm.  The below code includes
1701     // implicit assumptions about the surface dimensions being less than "MaxImageDim"; otherwise, it can't
1702     // calculate "firstMipInTail" accurately and the below assertion will trip incorrectly.
1703     //
1704     // Surfaces destined for use only on the SDMA engine can exceed the gfx-engine-imposed limitations of
1705     // the "maximum" image dimensions.
1706     if ((pSurfInfo->width <= MaxImageDim)         &&
1707         (pSurfInfo->height <= MaxImageDim)        &&
1708         (pSurfInfo->numMipLevels <= MaxMipLevels) &&
1709         (UseCustomPitch(pSurfInfo) == FALSE)      &&
1710         (UseCustomHeight(pSurfInfo) == FALSE)     &&
1711         // HiZS surfaces have a reduced image size (i.e,. each pixel represents an 8x8 region of the parent
1712         // image, at least for single samples) but they still have the same number of mip levels as the
1713         // parent image.  This disconnect produces false assertions below as the image size doesn't apparently
1714         // support the specified number of mip levels.
1715         ((pSurfInfo->flags.hiZHiS == 0) || (pSurfInfo->numMipLevels == 1)))
1716     {
1717         UINT_32  lastMipSize = 1;
1718         UINT_64  dataChainSize = 0;
1719 
1720         const ADDR_EXTENT3D  mip0Dims      = GetBaseMipExtents(pSurfInfo);
1721         const UINT_32        blockSizeLog2 = GetBlockSizeLog2(pSurfInfo->swizzleMode);
1722         const ADDR_EXTENT3D  tailMaxDim    = GetMipTailDim(pIn, pOut->blockExtent);
1723         const UINT_32        maxMipsInTail = GetMaxNumMipsInTail(pIn);
1724 
1725         UINT_32  firstMipInTail = 0;
1726         for (INT_32 mipIdx = MaxMipLevels - 1; mipIdx >= 0; mipIdx--)
1727         {
1728             const ADDR_EXTENT3D  mipExtents = GetMipExtent(mip0Dims, mipIdx);
1729 
1730             if (IsInMipTail(tailMaxDim, mipExtents, maxMipsInTail, pSurfInfo->numMipLevels - mipIdx))
1731             {
1732                 firstMipInTail = mipIdx;
1733             }
1734         }
1735 
1736         for (INT_32 mipIdx = firstMipInTail - 1; mipIdx >= -1; mipIdx--)
1737         {
1738             if (mipIdx < (static_cast<INT_32>(pSurfInfo->numMipLevels) - 1))
1739             {
1740                 dataChainSize += lastMipSize;
1741             }
1742 
1743             if (mipIdx >= 0)
1744             {
1745                 const ADDR_EXTENT3D  mipExtents     = GetMipExtent(mip0Dims, mipIdx);
1746                 const UINT_32        mipBlockWidth  = ShiftCeil(mipExtents.width, Log2(pOut->blockExtent.width));
1747                 const UINT_32        mipBlockHeight = ShiftCeil(mipExtents.height, Log2(pOut->blockExtent.height));
1748 
1749                 lastMipSize = 4 * lastMipSize
1750                     - ((mipBlockWidth & 1) ? mipBlockHeight : 0)
1751                     - ((mipBlockHeight & 1) ? mipBlockWidth : 0)
1752                     - ((mipBlockWidth & mipBlockHeight & 1) ? 1 : 0);
1753             }
1754         }
1755 
1756         if (CanTrimLinearPadding(pSurfInfo))
1757         {
1758             ADDR_ASSERT((pOut->sliceSize * pOut->blockExtent.depth) <= (dataChainSize << blockSizeLog2));
1759         }
1760         else
1761         {
1762             ADDR_ASSERT((pOut->sliceSize * pOut->blockExtent.depth) == (dataChainSize << blockSizeLog2));
1763         }
1764     }
1765 #endif
1766 }
1767 
1768 /**
1769 ************************************************************************************************************************
1770 *   Gfx12Lib::HwlGetMicroBlockSize
1771 *
1772 *   @brief
1773 *       Determines the dimensions of a 256B microblock
1774 *
1775 *   @return
1776 ************************************************************************************************************************
1777 */
HwlGetMicroBlockSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn) const1778 ADDR_EXTENT3D Gfx12Lib::HwlGetMicroBlockSize(
1779     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn
1780     ) const
1781 {
1782     ADDR_EXTENT3D out = {};
1783     INT_32 widthLog2  = 0;
1784     INT_32 heightLog2 = 0;
1785     INT_32 depthLog2  = 0;
1786     Addr3SwizzleMode swMode    = pIn->pSurfInfo->swizzleMode;
1787     UINT_32          bppLog2   = Log2(pIn->pSurfInfo->bpp >> 3);
1788     UINT_32          blockBits = 8 - bppLog2;
1789     if (IsLinear(swMode))
1790     {
1791         widthLog2 = blockBits;
1792     }
1793     else if (Is2dSwizzle(swMode))
1794     {
1795         widthLog2  = (blockBits >> 1) + (blockBits & 1);
1796         heightLog2 = (blockBits >> 1);
1797     }
1798     else
1799     {
1800         ADDR_ASSERT(Is3dSwizzle(swMode));
1801         depthLog2  = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1802         widthLog2  = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1803         heightLog2 = (blockBits / 3);
1804     }
1805     out.width  = 1 << widthLog2;
1806     out.height = 1 << heightLog2;
1807     out.depth  = 1 << depthLog2;
1808     return out;
1809 }
1810 
1811 /**
1812 ************************************************************************************************************************
1813 *   Gfx12Lib::HwlCalcBlockSize
1814 *
1815 *   @brief
1816 *       Determines the extent, in pixels of a swizzle block.
1817 *
1818 *   @return
1819 ************************************************************************************************************************
1820 */
HwlCalcBlockSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,ADDR_EXTENT3D * pExtent) const1821 VOID Gfx12Lib::HwlCalcBlockSize(
1822     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
1823     ADDR_EXTENT3D*                                 pExtent
1824     ) const
1825 {
1826     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
1827     const UINT_32                           log2BlkSize = GetBlockSizeLog2(pSurfInfo->swizzleMode);
1828     const UINT_32 eleBytes     = pSurfInfo->bpp >> 3;
1829     const UINT_32 log2EleBytes = Log2(eleBytes);
1830 
1831     if (IsLinear(pSurfInfo->swizzleMode))
1832     {
1833         // 1D swizzle mode doesn't support MSAA, so there is no need to consider log2(samples)
1834         pExtent->width  = 1 << (log2BlkSize - log2EleBytes);
1835         pExtent->height = 1;
1836         pExtent->depth  = 1;
1837     }
1838     else if (Is3dSwizzle(pSurfInfo->swizzleMode))
1839     {
1840         // 3D swizlze mode doesn't support MSAA, so there is no need to consider log2(samples)
1841         const UINT_32 base             = (log2BlkSize / 3) - (log2EleBytes / 3);
1842         const UINT_32 log2BlkSizeMod3  = log2BlkSize % 3;
1843         const UINT_32 log2EleBytesMod3 = log2EleBytes % 3;
1844 
1845         UINT_32  x = base;
1846         UINT_32  y = base;
1847         UINT_32  z = base;
1848 
1849         if (log2BlkSizeMod3 > 0)
1850         {
1851             x++;
1852         }
1853 
1854         if (log2BlkSizeMod3 > 1)
1855         {
1856             z++;
1857         }
1858 
1859         if (log2EleBytesMod3 > 0)
1860         {
1861             x--;
1862         }
1863 
1864         if (log2EleBytesMod3 > 1)
1865         {
1866             z--;
1867         }
1868 
1869         pExtent->width  = 1u << x;
1870         pExtent->height = 1u << y;
1871         pExtent->depth  = 1u << z;
1872     }
1873     else
1874     {
1875         // Only 2D swizzle mode supports MSAA...
1876         // Since for gfx12 MSAA is unconditionally supported by all 2D swizzle modes, we don't need to restrict samples
1877         // to be 1 for ADDR3_256B_2D and ADDR3_4KB_2D as gfx10/11 did.
1878         const UINT_32 log2Samples = Log2(pSurfInfo->numSamples);
1879         const UINT_32 log2Width   = (log2BlkSize  >> 1)  -
1880                                     (log2EleBytes >> 1)  -
1881                                     (log2Samples  >> 1)  -
1882                                     (log2EleBytes & log2Samples & 1);
1883         const UINT_32 log2Height  = (log2BlkSize  >> 1)  -
1884                                     (log2EleBytes >> 1)  -
1885                                     (log2Samples  >> 1)  -
1886                                     ((log2EleBytes | log2Samples) & 1);
1887 
1888         // Return the extent in actual units, not log2
1889         pExtent->width  = 1u << log2Width;
1890         pExtent->height = 1u << log2Height;
1891         pExtent->depth  = 1;
1892     }
1893 }
1894 
1895 /**
1896 ************************************************************************************************************************
1897 *   Gfx12Lib::HwlGetMipInTailMaxSize
1898 *
1899 *   @brief
1900 *       Determines the max size of a mip level that fits in the mip-tail.
1901 *
1902 *   @return
1903 ************************************************************************************************************************
1904 */
HwlGetMipInTailMaxSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR_EXTENT3D & blockDims) const1905 ADDR_EXTENT3D Gfx12Lib::HwlGetMipInTailMaxSize(
1906     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
1907     const ADDR_EXTENT3D&                           blockDims) const
1908 {
1909     ADDR_EXTENT3D mipTailDim = {};
1910     const Addr3SwizzleMode swizzleMode = pIn->pSurfInfo->swizzleMode;
1911     const UINT_32          log2BlkSize = GetBlockSizeLog2(swizzleMode);
1912 
1913     mipTailDim = blockDims;
1914 
1915     if (Is3dSwizzle(swizzleMode))
1916     {
1917         const UINT_32 dim = log2BlkSize % 3;
1918 
1919         if (dim == 0)
1920         {
1921             mipTailDim.height >>= 1;
1922         }
1923         else if (dim == 1)
1924         {
1925             mipTailDim.width >>= 1;
1926         }
1927         else
1928         {
1929             mipTailDim.depth >>= 1;
1930         }
1931     }
1932     else
1933     {
1934         if ((log2BlkSize % 2) == 0)
1935         {
1936             mipTailDim.width >>= 1;
1937         }
1938         else
1939         {
1940             mipTailDim.height >>= 1;
1941         }
1942     }
1943     return mipTailDim;
1944 }
1945 
1946 
1947 /**
1948 ************************************************************************************************************************
1949 *   Lib::GetPossibleSwizzleModes
1950 *
1951 *   @brief
1952 *       GFX12 specific implementation of Addr3GetPossibleSwizzleModes
1953 *
1954 *   @return
1955 *       ADDR_E_RETURNCODE
1956 ************************************************************************************************************************
1957 */
HwlGetPossibleSwizzleModes(const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT * pIn,ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT * pOut) const1958 ADDR_E_RETURNCODE Gfx12Lib::HwlGetPossibleSwizzleModes(
1959      const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn,    ///< [in] input structure
1960      ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT*      pOut    ///< [out] output structure
1961      ) const
1962 {
1963     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1964 
1965     const ADDR3_SURFACE_FLAGS flags = pIn->flags;
1966 
1967     if (pIn->bpp == 96)
1968     {
1969         pOut->validModes.swLinear = 1;
1970     }
1971     // Depth/Stencil images can't be linear and must be 2D swizzle modes.
1972     // These three are related to DB block that supports only SW_64KB_2D and SW_256KB_2D for DSV.
1973     else if (flags.depth || flags.stencil)
1974     {
1975         pOut->validModes.sw2d64kB  = 1;
1976         pOut->validModes.sw2d256kB = 1;
1977     }
1978     // The organization of elements in the hierarchical surface is the same as any other surface, and it can support
1979     // any 2D swizzle mode (SW_256_2D, SW_4KB_2D, SW_64KB_2D, or SW_256KB_2D).  The swizzle mode can be selected
1980     // orthogonally to the underlying z or stencil surface.
1981     else if (pIn->flags.hiZHiS)
1982     {
1983         pOut->validModes.sw2d256B  = 1;
1984         pOut->validModes.sw2d4kB   = 1;
1985         pOut->validModes.sw2d64kB  = 1;
1986         pOut->validModes.sw2d256kB = 1;
1987     }
1988     // MSAA can't be linear and must be 2D swizzle modes.
1989     else if (pIn->numSamples > 1)
1990     {
1991         pOut->validModes.sw2d256B  = 1;
1992         pOut->validModes.sw2d4kB   = 1;
1993         pOut->validModes.sw2d64kB  = 1;
1994         pOut->validModes.sw2d256kB = 1;
1995     }
1996     // Some APIs (like Vulkan) require that PRT should always use 64KB blocks
1997     else if (flags.standardPrt)
1998     {
1999         if (IsTex3d(pIn->resourceType) && (flags.view3dAs2dArray == 0))
2000         {
2001             pOut->validModes.sw3d64kB = 1;
2002         }
2003         else
2004         {
2005             pOut->validModes.sw2d64kB = 1;
2006         }
2007     }
2008     else if (// Block-compressed images need to be either using 2D or linear swizzle modes.
2009              flags.blockCompressed                 ||
2010              // Only 3D w/ view3dAs2dArray == 0 will use 1D/2D block swizzle modes
2011              (IsTex3d(pIn->resourceType) == FALSE) || flags.view3dAs2dArray ||
2012              //      NV12 and P010 support
2013              //      SW_LINEAR, SW_256B_2D, SW_4KB_2D, SW_64KB_2D, SW_256KB_2D
2014              // There could be more multimedia formats that require more hw specific tiling modes...
2015              flags.nv12                            || flags.p010)
2016     {
2017         // Linear is not allowed for VRS images.
2018         if (flags.isVrsImage == 0)
2019         {
2020             pOut->validModes.swLinear = 1;
2021         }
2022 
2023         // 3D resources can't use SW_256B_2D
2024         if (IsTex3d(pIn->resourceType) == FALSE)
2025         {
2026             pOut->validModes.sw2d256B = 1;
2027         }
2028         pOut->validModes.sw2d4kB   = 1;
2029         pOut->validModes.sw2d64kB  = 1;
2030         pOut->validModes.sw2d256kB = 1;
2031     }
2032     else if (IsTex3d(pIn->resourceType))
2033     {
2034         // An eventual determination would be based on pal setting of height_watermark and depth_watermark.
2035         // However, we just adopt the simpler logic currently.
2036         // For 3D images w/ view3dAs2dArray = 0, SW_3D is preferred.
2037         // For 3D images w/ view3dAs2dArray = 1, it should go to 2D path above.
2038         // Enable linear since client may force linear tiling for 3D texture that does not set view3dAs2dArray.
2039         pOut->validModes.swLinear  = 1;
2040         pOut->validModes.sw3d4kB   = 1;
2041         pOut->validModes.sw3d64kB  = 1;
2042         pOut->validModes.sw3d256kB = 1;
2043     }
2044 
2045     // If client specifies a max alignment, remove swizzles that require alignment beyond it.
2046     if (pIn->maxAlign != 0)
2047     {
2048         if (pIn->maxAlign < Size256K)
2049         {
2050             pOut->validModes.value &= ~Blk256KBSwModeMask;
2051         }
2052 
2053         if (pIn->maxAlign < Size64K)
2054         {
2055             pOut->validModes.value &= ~Blk64KBSwModeMask;
2056         }
2057 
2058         if (pIn->maxAlign < Size4K)
2059         {
2060             pOut->validModes.value &= ~Blk4KBSwModeMask;
2061         }
2062 
2063         if (pIn->maxAlign < Size256)
2064         {
2065             pOut->validModes.value &= ~Blk256BSwModeMask;
2066         }
2067     }
2068 
2069     return returnCode;
2070 }
2071 
2072 /**
2073 ************************************************************************************************************************
2074 *   Gfx12Lib::HwlComputeStereoInfo
2075 *
2076 *   @brief
2077 *       Compute height alignment and right eye pipeBankXor for stereo surface
2078 *
2079 *   @return
2080 *       Error code
2081 *
2082 ************************************************************************************************************************
2083 */
HwlComputeStereoInfo(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const2084 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeStereoInfo(
2085     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
2086     UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
2087     UINT_32*                                pRightXor   ///< Right eye xor
2088     ) const
2089 {
2090     ADDR_E_RETURNCODE ret = ADDR_OK;
2091 
2092     *pRightXor = 0;
2093 
2094     const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
2095     const UINT_32 samplesLog2 = Log2(pIn->numSamples);
2096     const UINT_32 eqIndex     = GetEquationTableEntry(pIn->swizzleMode, samplesLog2, elemLog2);
2097 
2098     if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
2099     {
2100         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
2101 
2102         UINT_32 yMax     = 0;
2103         UINT_32 yPosMask = 0;
2104 
2105         // First get "max y bit"
2106         for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2107         {
2108             ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
2109 
2110             if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
2111                 (m_equationTable[eqIndex].addr[i].index > yMax))
2112             {
2113                 yMax = m_equationTable[eqIndex].addr[i].index;
2114             }
2115         }
2116 
2117         // Then loop again for populating a position mask of "max Y bit"
2118         for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2119         {
2120             if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
2121                 (m_equationTable[eqIndex].addr[i].index == yMax))
2122             {
2123                 yPosMask |= 1u << i;
2124             }
2125         }
2126 
2127         const UINT_32 additionalAlign = 1 << yMax;
2128 
2129         if (additionalAlign >= *pAlignY)
2130         {
2131             *pAlignY = additionalAlign;
2132 
2133             const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
2134 
2135             if ((alignedHeight >> yMax) & 1)
2136             {
2137                 *pRightXor = yPosMask >> m_pipeInterleaveLog2;
2138             }
2139         }
2140     }
2141     else
2142     {
2143         ret = ADDR_INVALIDPARAMS;
2144     }
2145 
2146     return ret;
2147 }
2148 
2149 /**
2150 ************************************************************************************************************************
2151 *   Gfx12Lib::HwlValidateNonSwModeParams
2152 *
2153 *   @brief
2154 *       Validate compute surface info params except swizzle mode
2155 *
2156 *   @return
2157 *       TRUE if parameters are valid, FALSE otherwise
2158 ************************************************************************************************************************
2159 */
HwlValidateNonSwModeParams(const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT * pIn) const2160 BOOL_32 Gfx12Lib::HwlValidateNonSwModeParams(
2161     const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn
2162     ) const
2163 {
2164     const ADDR3_SURFACE_FLAGS flags     = pIn->flags;
2165     const AddrResourceType    rsrcType  = pIn->resourceType;
2166     const BOOL_32             isVrs     = flags.isVrsImage;
2167     const BOOL_32             isStereo  = flags.qbStereo;
2168     const BOOL_32             isDisplay = flags.display;
2169     const BOOL_32             isMipmap  = (pIn->numMipLevels > 1);
2170     const BOOL_32             isMsaa    = (pIn->numSamples > 1);
2171     const UINT_32             bpp       = pIn->bpp;
2172 
2173     BOOL_32                   valid     = TRUE;
2174     if ((bpp == 0) || (bpp > 128) || (pIn->width == 0) || (pIn->numSamples > 8))
2175     {
2176         ADDR_ASSERT_ALWAYS();
2177         valid = FALSE;
2178     }
2179 
2180     // Resource type check
2181     if (IsTex1d(rsrcType))
2182     {
2183         if (isMsaa || isStereo || isVrs || isDisplay)
2184         {
2185             ADDR_ASSERT_ALWAYS();
2186             valid = FALSE;
2187         }
2188     }
2189     else if (IsTex2d(rsrcType))
2190     {
2191         if ((isMsaa && isMipmap) || (isStereo && isMsaa) || (isStereo && isMipmap) ||
2192             // VRS surface needs to be 8BPP format
2193             (isVrs && (bpp != 8)))
2194         {
2195             ADDR_ASSERT_ALWAYS();
2196             valid = FALSE;
2197         }
2198     }
2199     else if (IsTex3d(rsrcType))
2200     {
2201         if (isMsaa || isStereo || isVrs || isDisplay)
2202         {
2203             ADDR_ASSERT_ALWAYS();
2204             valid = FALSE;
2205         }
2206     }
2207     else
2208     {
2209         // An invalid resource type that is not 1D, 2D or 3D.
2210         ADDR_ASSERT_ALWAYS();
2211         valid = FALSE;
2212     }
2213 
2214     return valid;
2215 }
2216 
2217 } // V3
2218 } // Addr
2219