1 /*
2 ************************************************************************************************************************
3 *
4 * Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
5 * SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8
9 /**
10 ************************************************************************************************************************
11 * @file gfx12addrlib.cpp
12 * @brief Contain the implementation for the Gfx12Lib class.
13 ************************************************************************************************************************
14 */
15
16 #include "gfx12addrlib.h"
17 #include "gfx12_gb_reg.h"
18 #include "addrswizzler.h"
19
20 #include "amdgpu_asic_addr.h"
21
22 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
23 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
24
25 namespace Addr
26 {
27 /**
28 ************************************************************************************************************************
29 * Gfx12HwlInit
30 *
31 * @brief
32 * Creates an Gfx12Lib object.
33 *
34 * @return
35 * Returns an Gfx12Lib object pointer.
36 ************************************************************************************************************************
37 */
Gfx12HwlInit(const Client * pClient)38 Addr::Lib* Gfx12HwlInit(
39 const Client* pClient)
40 {
41 return V3::Gfx12Lib::CreateObj(pClient);
42 }
43
44 namespace V3
45 {
46
47 ////////////////////////////////////////////////////////////////////////////////////////////////////
48 // Static Const Member
49 ////////////////////////////////////////////////////////////////////////////////////////////////////
50 const SwizzleModeFlags Gfx12Lib::SwizzleModeTable[ADDR3_MAX_TYPE] =
51 {//Linear 2d 3d 256B 4KB 64KB 256KB Reserved
52 {{1, 0, 0, 0, 0, 0, 0, 0}}, // ADDR3_LINEAR
53 {{0, 1, 0, 1, 0, 0, 0, 0}}, // ADDR3_256B_2D
54 {{0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR3_4KB_2D
55 {{0, 1, 0, 0, 0, 1, 0, 0}}, // ADDR3_64KB_2D
56 {{0, 1, 0, 0, 0, 0, 1, 0}}, // ADDR3_256KB_2D
57 {{0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR3_4KB_3D
58 {{0, 0, 1, 0, 0, 1, 0, 0}}, // ADDR3_64KB_3D
59 {{0, 0, 1, 0, 0, 0, 1, 0}}, // ADDR3_256KB_3D
60 };
61
62 /**
63 ************************************************************************************************************************
64 * Gfx12Lib::Gfx12Lib
65 *
66 * @brief
67 * Constructor
68 *
69 ************************************************************************************************************************
70 */
Gfx12Lib(const Client * pClient)71 Gfx12Lib::Gfx12Lib(
72 const Client* pClient)
73 :
74 Lib(pClient),
75 m_numSwizzleBits(0)
76 {
77 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
78 }
79
80 /**
81 ************************************************************************************************************************
82 * Gfx12Lib::~Gfx12Lib
83 *
84 * @brief
85 * Destructor
86 ************************************************************************************************************************
87 */
~Gfx12Lib()88 Gfx12Lib::~Gfx12Lib()
89 {
90 }
91
92 /**
93 ************************************************************************************************************************
94 * Gfx12Lib::ConvertSwizzlePatternToEquation
95 *
96 * @brief
97 * Convert swizzle pattern to equation.
98 *
99 * @return
100 * N/A
101 ************************************************************************************************************************
102 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,Addr3SwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const103 VOID Gfx12Lib::ConvertSwizzlePatternToEquation(
104 UINT_32 elemLog2, ///< [in] element bytes log2
105 Addr3SwizzleMode swMode, ///< [in] swizzle mode
106 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern info
107 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
108 const
109 {
110 ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K];
111 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
112
113 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
114 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode, TRUE);
115
116 pEquation->numBits = blockSizeLog2;
117 pEquation->stackedDepthSlices = FALSE;
118
119 for (UINT_32 i = 0; i < elemLog2; i++)
120 {
121 pEquation->addr[i].channel = 0;
122 pEquation->addr[i].valid = 1;
123 pEquation->addr[i].index = i;
124 }
125
126 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
127 {
128 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
129
130 if (pSwizzle[i].x != 0)
131 {
132 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
133
134 pEquation->addr[i].channel = 0;
135 pEquation->addr[i].valid = 1;
136 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
137 }
138 else if (pSwizzle[i].y != 0)
139 {
140 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
141
142 pEquation->addr[i].channel = 1;
143 pEquation->addr[i].valid = 1;
144 pEquation->addr[i].index = Log2(pSwizzle[i].y);
145 }
146 else if (pSwizzle[i].z != 0)
147 {
148 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
149
150 pEquation->addr[i].channel = 2;
151 pEquation->addr[i].valid = 1;
152 pEquation->addr[i].index = Log2(pSwizzle[i].z);
153 }
154 else if (pSwizzle[i].s != 0)
155 {
156 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].s)));
157
158 pEquation->addr[i].channel = 3;
159 pEquation->addr[i].valid = 1;
160 pEquation->addr[i].index = Log2(pSwizzle[i].s);
161 }
162 else
163 {
164 ADDR_ASSERT_ALWAYS();
165 }
166 }
167 }
168
169 /**
170 ************************************************************************************************************************
171 * Gfx12Lib::InitEquationTable
172 *
173 * @brief
174 * Initialize Equation table.
175 *
176 * @return
177 * N/A
178 ************************************************************************************************************************
179 */
InitEquationTable()180 VOID Gfx12Lib::InitEquationTable()
181 {
182 memset(m_equationTable, 0, sizeof(m_equationTable));
183
184 for (UINT_32 swModeIdx = 0; swModeIdx < ADDR3_MAX_TYPE; swModeIdx++)
185 {
186 const Addr3SwizzleMode swMode = static_cast<Addr3SwizzleMode>(swModeIdx);
187
188 // Skip linear equation (data table is not useful for 2D/3D images-- only contains x-coordinate bits)
189 if (IsValidSwMode(swMode) && (IsLinear(swMode) == false))
190 {
191 const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxNumMsaaRates : 1;
192
193 for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++)
194 {
195 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
196 {
197 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
198 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, elemLog2, 1 << msaaIdx);
199
200 if (pPatInfo != NULL)
201 {
202 ADDR_EQUATION equation = {};
203
204 ConvertSwizzlePatternToEquation(elemLog2, swMode, pPatInfo, &equation);
205
206 equationIndex = m_numEquations;
207 ADDR_ASSERT(equationIndex < NumSwizzlePatterns);
208
209 m_equationTable[equationIndex] = equation;
210 m_numEquations++;
211 }
212 SetEquationTableEntry(swMode, msaaIdx, elemLog2, equationIndex);
213 } // loop through bpp sizes
214 } // loop through MSAA rates
215 } // End check for valid non-linear modes
216 } // loop through swizzle modes
217 }
218
219 /**
220 ************************************************************************************************************************
221 * Gfx12Lib::HwlGetEquationIndex
222 *
223 * @brief
224 * Return equationIndex by surface info input
225 *
226 * @return
227 * equationIndex
228 ************************************************************************************************************************
229 */
HwlGetEquationIndex(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pIn) const230 UINT_32 Gfx12Lib::HwlGetEquationIndex(
231 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
232 ) const
233 {
234 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
235
236 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
237 (pIn->resourceType == ADDR_RSRC_TEX_3D))
238 {
239 equationIdx = GetEquationTableEntry(pIn->swizzleMode, Log2(pIn->numSamples), Log2(pIn->bpp >> 3));
240 }
241
242 return equationIdx;
243 }
244
245 /**
246 ************************************************************************************************************************
247 * Gfx12Lib::InitBlockDimensionTable
248 *
249 * @brief
250 * Initialize block dimension table for all swizzle modes + msaa samples + bpp bundles.
251 *
252 * @return
253 * N/A
254 ************************************************************************************************************************
255 */
InitBlockDimensionTable()256 VOID Gfx12Lib::InitBlockDimensionTable()
257 {
258 memset(m_blockDimensionTable, 0, sizeof(m_blockDimensionTable));
259
260 ADDR3_COMPUTE_SURFACE_INFO_INPUT surfaceInfo {};
261
262
263 for (UINT_32 swModeIdx = 0; swModeIdx < ADDR3_MAX_TYPE; swModeIdx++)
264 {
265 const Addr3SwizzleMode swMode = static_cast<Addr3SwizzleMode>(swModeIdx);
266
267 if (IsValidSwMode(swMode))
268 {
269 surfaceInfo.swizzleMode = swMode;
270 const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxNumMsaaRates : 1;
271
272 for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++)
273 {
274 surfaceInfo.numSamples = (1u << msaaIdx);
275 for (UINT_32 elementBytesLog2 = 0; elementBytesLog2 < MaxElementBytesLog2; elementBytesLog2++)
276 {
277 surfaceInfo.bpp = (1u << (elementBytesLog2 + 3));
278 ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT input{ &surfaceInfo };
279 ComputeBlockDimensionForSurf(&input, &m_blockDimensionTable[swModeIdx][msaaIdx][elementBytesLog2]);
280 } // end loop through bpp sizes
281 } // end loop through MSAA rates
282 } // end check for valid swizzle modes
283 } // end loop through swizzle modes
284 }
285
286 /**
287 ************************************************************************************************************************
288 * Gfx12Lib::GetMipOrigin
289 *
290 * @brief
291 * Internal function to calculate origins of the mip levels
292 *
293 * @return
294 * ADDR_E_RETURNCODE
295 ************************************************************************************************************************
296 */
GetMipOrigin(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR_EXTENT3D & mipExtentFirstInTail,ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const297 VOID Gfx12Lib::GetMipOrigin(
298 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn, ///< [in] input structure
299 const ADDR_EXTENT3D& mipExtentFirstInTail,
300 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
301 ) const
302 {
303 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
304 const BOOL_32 is3d = (pSurfInfo->resourceType == ADDR_RSRC_TEX_3D);
305 const UINT_32 bytesPerPixel = pSurfInfo->bpp >> 3;
306 const UINT_32 elementBytesLog2 = Log2(bytesPerPixel);
307 const UINT_32 samplesLog2 = Log2(pSurfInfo->numSamples);
308
309 // Calculate the width/height/depth for the given microblock, because the mip offset calculation
310 // is in units of microblocks but we want it in elements.
311 ADDR_EXTENT3D microBlockExtent = HwlGetMicroBlockSize(pIn);
312 const ADDR_EXTENT3D tailMaxDim = GetMipTailDim(pIn, pOut->blockExtent);
313 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pSurfInfo->swizzleMode);
314
315 UINT_32 pitch = tailMaxDim.width;
316 UINT_32 height = tailMaxDim.height;
317 UINT_32 depth = (is3d ? PowTwoAlign(mipExtentFirstInTail.depth, microBlockExtent.depth) : 1);
318
319 const UINT_32 tailMaxDepth = (is3d ? (depth / microBlockExtent.depth) : 1);
320
321 for (UINT_32 i = pOut->firstMipIdInTail; i < pSurfInfo->numMipLevels; i++)
322 {
323 const INT_32 mipInTail = CalcMipInTail(pIn, pOut, i);
324 const UINT_32 mipOffset = CalcMipOffset(pIn, mipInTail);
325
326 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
327 pOut->pMipInfo[i].mipTailOffset = mipOffset;
328 pOut->pMipInfo[i].macroBlockOffset = 0;
329
330 pOut->pMipInfo[i].pitch = pitch;
331 pOut->pMipInfo[i].height = height;
332 pOut->pMipInfo[i].depth = depth;
333 if (IsLinear(pSurfInfo->swizzleMode))
334 {
335 pOut->pMipInfo[i].mipTailCoordX = mipOffset >> 8;
336 pOut->pMipInfo[i].mipTailCoordY = 0;
337 pOut->pMipInfo[i].mipTailCoordZ = 0;
338 }
339 else
340 {
341 UINT_32 mipX = ((mipOffset >> 9) & 1) |
342 ((mipOffset >> 10) & 2) |
343 ((mipOffset >> 11) & 4) |
344 ((mipOffset >> 12) & 8) |
345 ((mipOffset >> 13) & 16) |
346 ((mipOffset >> 14) & 32);
347 UINT_32 mipY = ((mipOffset >> 8) & 1) |
348 ((mipOffset >> 9) & 2) |
349 ((mipOffset >> 10) & 4) |
350 ((mipOffset >> 11) & 8) |
351 ((mipOffset >> 12) & 16) |
352 ((mipOffset >> 13) & 32);
353
354 pOut->pMipInfo[i].mipTailCoordX = mipX * microBlockExtent.width;
355 pOut->pMipInfo[i].mipTailCoordY = mipY * microBlockExtent.height;
356 pOut->pMipInfo[i].mipTailCoordZ = 0;
357 }
358 if (IsLinear(pSurfInfo->swizzleMode))
359 {
360 pitch = Max(pitch >> 1, 1u);
361 }
362 else
363 {
364 pOut->pMipInfo[i].pitch = PowTwoAlign(pitch, microBlockExtent.width);
365 pOut->pMipInfo[i].height = PowTwoAlign(height, microBlockExtent.height);
366 pOut->pMipInfo[i].depth = PowTwoAlign(depth, microBlockExtent.depth);
367 pitch = Max(pitch >> 1, 1u);
368 height = Max(height >> 1, 1u);
369 depth = Max(depth >> 1, 1u);
370 }
371 }
372 }
373
374 /**
375 ************************************************************************************************************************
376 * Gfx12Lib::GetMipOffset
377 *
378 * @brief
379 * Internal function to calculate alignment for a surface
380 *
381 * @return
382 * ADDR_E_RETURNCODE
383 ************************************************************************************************************************
384 */
GetMipOffset(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const385 VOID Gfx12Lib::GetMipOffset(
386 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn, ///< [in] input structure
387 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
388 ) const
389 {
390 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
391 const UINT_32 bytesPerPixel = pSurfInfo->bpp >> 3;
392 const UINT_32 elementBytesLog2 = Log2(bytesPerPixel);
393 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pSurfInfo->swizzleMode);
394 const UINT_32 blockSize = 1 << blockSizeLog2;
395 const ADDR_EXTENT3D tailMaxDim = GetMipTailDim(pIn, pOut->blockExtent);;
396 const ADDR_EXTENT3D mip0Dims = GetBaseMipExtents(pSurfInfo);
397 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(pIn);
398 const bool isLinear = IsLinear(pSurfInfo->swizzleMode);
399
400 UINT_32 firstMipInTail = pSurfInfo->numMipLevels;
401 UINT_64 mipChainSliceSize = 0;
402 UINT_64 mipChainSliceSizeDense = 0;
403 UINT_64 mipSize[MaxMipLevels];
404 UINT_64 mipSliceSize[MaxMipLevels];
405
406 const BOOL_32 useCustomPitch = UseCustomPitch(pSurfInfo);
407 for (UINT_32 mipIdx = 0; mipIdx < pSurfInfo->numMipLevels; mipIdx++)
408 {
409 const ADDR_EXTENT3D mipExtents = GetMipExtent(mip0Dims, mipIdx);
410
411 if (Lib::SupportsMipTail(pSurfInfo->swizzleMode) &&
412 (pSurfInfo->numMipLevels > 1) &&
413 IsInMipTail(tailMaxDim, mipExtents, maxMipsInTail, pSurfInfo->numMipLevels - mipIdx))
414 {
415 firstMipInTail = mipIdx;
416 mipChainSliceSize += blockSize / pOut->blockExtent.depth;
417 mipChainSliceSizeDense += blockSize / pOut->blockExtent.depth;
418 break;
419 }
420 else
421 {
422 UINT_32 pitchImgData = 0u;
423 UINT_32 pitchSliceSize = 0u;
424 if (isLinear)
425 {
426 // The slice size of a linear image is calculated as if the "pitch" is 256 byte aligned.
427 // However, the rendering pitch is aligned to 128 bytes, and that is what needs to be reported
428 // to our clients in the normal 'pitch' field.
429 // Note this is NOT the same as the total size of the image being aligned to 256 bytes!
430 pitchImgData = (useCustomPitch ? pOut->pitch : PowTwoAlign(mipExtents.width, 128u / bytesPerPixel));
431 pitchSliceSize = PowTwoAlign(pitchImgData, blockSize / bytesPerPixel);
432 }
433 else
434 {
435 pitchImgData = PowTwoAlign(mipExtents.width, pOut->blockExtent.width);
436 pitchSliceSize = pitchImgData;
437 }
438
439 UINT_32 height = UseCustomHeight(pSurfInfo)
440 ? pOut->height
441 : PowTwoAlign(mipExtents.height, pOut->blockExtent.height);
442 const UINT_32 depth = PowTwoAlign(mipExtents.depth, pOut->blockExtent.depth);
443
444 if (isLinear && pSurfInfo->flags.denseSliceExact && ((pitchImgData % blockSize) != 0))
445 {
446 // If we want size to exactly equal (data)pitch * height, make sure that value is 256B aligned.
447 // Essentially, if the pitch is less aligned, ensure the height is padded so total alignment is 256B.
448 ADDR_ASSERT((blockSize % 128) == 0);
449 height = PowTwoAlign(height, blockSize / 128u);
450 }
451
452 // The original "blockExtent" calculation does subtraction of logs (i.e., division) to get the
453 // sizes. We aligned our pitch and height to those sizes, which means we need to multiply the various
454 // factors back together to get back to the slice size.
455 UINT_64 sizeExceptPitch = static_cast<UINT_64>(height) * pSurfInfo->numSamples * (pSurfInfo->bpp >> 3);
456 UINT_64 sliceSize = static_cast<UINT_64>(pitchSliceSize) * sizeExceptPitch;
457 UINT_64 sliceDataSize = PowTwoAlign(static_cast<UINT_64>(pitchImgData) * sizeExceptPitch,
458 static_cast<UINT_64>(blockSize));
459
460 UINT_64 hwSliceSize = sliceSize * pOut->blockExtent.depth;
461 ADDR_ASSERT(PowTwoAlign(hwSliceSize, static_cast<UINT_64>(blockSize)) == hwSliceSize);
462
463 if ((mipIdx == 0) && CanTrimLinearPadding(pSurfInfo))
464 {
465 // When this is the last linear subresource of the whole image (as laid out in memory), then we don't
466 // need to worry about the real slice size and can reduce it to the end of the image data (or some
467 // inflated value to meet a custom depth pitch)
468 pitchSliceSize = pitchImgData;
469 if (UseCustomHeight(pSurfInfo))
470 {
471 sliceSize = pSurfInfo->sliceAlign;
472 }
473 else
474 {
475 sliceSize = sliceDataSize;
476 }
477 // CanTrimLinearPadding is always false for 3D swizzles, so block depth is always 1.
478 hwSliceSize = sliceSize;
479 }
480
481 mipSize[mipIdx] = sliceSize * depth;
482 mipSliceSize[mipIdx] = hwSliceSize;
483 mipChainSliceSize += sliceSize;
484 mipChainSliceSizeDense += (mipIdx == 0) ? sliceDataSize : sliceSize;
485
486 if (pOut->pMipInfo != NULL)
487 {
488 pOut->pMipInfo[mipIdx].pitch = pitchImgData;
489 pOut->pMipInfo[mipIdx].pitchForSlice = pitchSliceSize;
490 pOut->pMipInfo[mipIdx].height = height;
491 pOut->pMipInfo[mipIdx].depth = depth;
492 }
493 }
494 }
495
496 pOut->sliceSize = mipChainSliceSize;
497 pOut->sliceSizeDensePacked = mipChainSliceSizeDense;
498 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
499 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
500 pOut->firstMipIdInTail = firstMipInTail;
501
502 if (pOut->pMipInfo != NULL)
503 {
504 if (isLinear)
505 {
506 // 1. Linear swizzle mode doesn't have miptails.
507 // 2. The organization of linear 3D mipmap resource is same as GFX11, we should use mip slice size to
508 // caculate mip offset.
509 ADDR_ASSERT(firstMipInTail == pSurfInfo->numMipLevels);
510
511 UINT_64 sliceSize = 0;
512
513 for (INT_32 i = static_cast<INT_32>(pSurfInfo->numMipLevels) - 1; i >= 0; i--)
514 {
515 pOut->pMipInfo[i].offset = sliceSize;
516 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
517 pOut->pMipInfo[i].mipTailOffset = 0;
518
519 sliceSize += mipSliceSize[i];
520 }
521 }
522 else
523 {
524 UINT_64 offset = 0;
525 UINT_64 macroBlkOffset = 0;
526
527 // Even though "firstMipInTail" is zero-based while "numMipLevels" is one-based, from definition of
528 // _ADDR3_COMPUTE_SURFACE_INFO_OUTPUT struct,
529 // UINT_32 firstMipIdInTail; ///< The id of first mip in tail, if there is no mip
530 // /// in tail, it will be set to number of mip levels
531 // See initialization:
532 // UINT_32 firstMipInTail = pIn->numMipLevels
533 // It is possible that they are equal if
534 // 1. a single mip level image that's larger than the largest mip that would fit in the mip tail if
535 // the mip tail existed
536 // 2. 256B_2D and linear images which don't have miptails from HWAL functionality
537 //
538 // We can use firstMipInTail != pIn->numMipLevels to check it has mip in tails and do mipInfo assignment.
539 if (firstMipInTail != pSurfInfo->numMipLevels)
540 {
541 // Determine the application dimensions of the first mip level that resides in the tail.
542 // This is distinct from "tailMaxDim" which is the maximum size of a mip level that will fit in the
543 // tail.
544 ADDR_EXTENT3D mipExtentFirstInTail = GetMipExtent(mip0Dims, firstMipInTail);
545
546 // For a 2D image, "alignedDepth" is always "1".
547 // For a 3D image, this is effectively the number of application slices associated with the first mip
548 // in the tail (up-aligned to HW requirements).
549 const UINT_32 alignedDepth = PowTwoAlign(mipExtentFirstInTail.depth, pOut->blockExtent.depth);
550
551 // "hwSlices" is the number of HW blocks required to represent the first mip level in the tail.
552 const UINT_32 hwSlices = alignedDepth / pOut->blockExtent.depth;
553
554 // Note that for 3D images that utilize a 2D swizzle mode, there really can be multiple
555 // HW slices that encompass the mip tail; i.e., hwSlices is not necessarily one.
556 // For example, you could have a single mip level 8x8x32 image with a 4KB_2D swizzle mode
557 // The 8x8 region fits into a 4KB block (so it's "in the tail"), but because we have a 2D
558 // swizzle mode (where each slice is its own block, so blockExtent.depth == 1), hwSlices
559 // will now be equivalent to the number of application slices, or 32.
560
561 // Mip tails are stored in "reverse" order -- i.e., the mip-tail itself is stored first, so the
562 // first mip level outside the tail has an offset that's the dimension of the tail itself, or one
563 // swizzle block in size.
564 offset = blockSize * hwSlices;
565 macroBlkOffset = blockSize;
566
567 // And determine the per-mip information for everything inside the mip tail.
568 GetMipOrigin(pIn, mipExtentFirstInTail, pOut);
569 }
570
571 // Again, because mip-levels are stored backwards (smallest first), we start determining mip-level
572 // offsets from the smallest to the largest.
573 // Note that firstMipInTail == 0 immediately terminates the loop, so there is no need to check for this
574 // case.
575 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
576 {
577 pOut->pMipInfo[i].offset = offset;
578 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
579 pOut->pMipInfo[i].mipTailOffset = 0;
580
581 offset += mipSize[i];
582 macroBlkOffset += mipSliceSize[i];
583 }
584 }
585 }
586 }
587
588 /**
589 ************************************************************************************************************************
590 * Gfx12Lib::HwlComputeSurfaceInfo
591 *
592 * @brief
593 * Internal function to calculate alignment for a surface
594 *
595 * @return
596 * VOID
597 ************************************************************************************************************************
598 */
HwlComputeSurfaceInfo(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pSurfInfo,ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const599 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceInfo(
600 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo, ///< [in] input structure
601 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
602 ) const
603 {
604 ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT input{ pSurfInfo };
605
606 // Check that only 2D swizzle mode supports MSAA
607 const UINT_32 samplesLog2 = Is2dSwizzle(pSurfInfo->swizzleMode) ? Log2(pSurfInfo->numSamples) : 0;
608
609 // The block dimension width/height/depth is determined only by swizzle mode, MSAA samples and bpp
610 pOut->blockExtent = GetBlockDimensionTableEntry(pSurfInfo->swizzleMode, samplesLog2, Log2(pSurfInfo->bpp >> 3));
611
612 ADDR_E_RETURNCODE returnCode = ApplyCustomizedPitchHeight(pSurfInfo, pOut);
613
614 if (returnCode == ADDR_OK)
615 {
616 pOut->numSlices = PowTwoAlign(pSurfInfo->numSlices, pOut->blockExtent.depth);
617 pOut->baseAlign = 1 << GetBlockSizeLog2(pSurfInfo->swizzleMode);
618
619 GetMipOffset(&input, pOut);
620
621 SanityCheckSurfSize(&input, pOut);
622
623 // Slices must be exact multiples of the block sizes. However:
624 // - with 3D images, one block will contain multiple slices, so that needs to be taken into account.
625 // - with linear images that have only one slice, we may trim and use the pitch alignment for size.
626 ADDR_ASSERT(((pOut->sliceSize * pOut->blockExtent.depth) %
627 GetBlockSize(pSurfInfo->swizzleMode, CanTrimLinearPadding(pSurfInfo))) == 0);
628 }
629
630 return returnCode;
631 }
632
633 /**
634 ************************************************************************************************************************
635 * Gfx12Lib::GetBaseMipExtents
636 *
637 * @brief
638 * Return the size of the base mip level in a nice cozy little structure.
639 *
640 ************************************************************************************************************************
641 */
GetBaseMipExtents(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pIn) const642 ADDR_EXTENT3D Gfx12Lib::GetBaseMipExtents(
643 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn
644 ) const
645 {
646 return { pIn->width,
647 pIn->height,
648 (IsTex3d(pIn->resourceType) ? pIn->numSlices : 1) }; // slices is depth for 3d
649 }
650
651 /**
652 ************************************************************************************************************************
653 * Gfx12Lib::GetMaxNumMipsInTail
654 *
655 * @brief
656 * Return max number of mips in tails
657 *
658 * @return
659 * Max number of mips in tails
660 ************************************************************************************************************************
661 */
GetMaxNumMipsInTail(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn) const662 UINT_32 Gfx12Lib::GetMaxNumMipsInTail(
663 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn
664 ) const
665 {
666 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
667 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pSurfInfo->swizzleMode);
668
669 UINT_32 effectiveLog2 = blockSizeLog2;
670 UINT_32 mipsInTail = 1;
671
672 if (Is3dSwizzle(pSurfInfo->swizzleMode))
673 {
674 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
675 }
676
677 if (effectiveLog2 > 8)
678 {
679 mipsInTail = (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
680 }
681
682 return mipsInTail;
683 }
684
685 /**
686 ************************************************************************************************************************
687 * Gfx12Lib::HwlCalcMipInTail
688 *
689 * @brief
690 * Internal function to calculate the "mipInTail" parameter.
691 *
692 * @return
693 * The magic "mipInTail" parameter.
694 ************************************************************************************************************************
695 */
CalcMipInTail(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 mipLevel) const696 INT_32 Gfx12Lib::CalcMipInTail(
697 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
698 const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
699 UINT_32 mipLevel
700 ) const
701 {
702 const INT_32 firstMipIdInTail = static_cast<INT_32>(pOut->firstMipIdInTail);
703
704 INT_32 mipInTail = 0;
705
706 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
707 mipInTail = static_cast<INT_32>(mipLevel) - firstMipIdInTail;
708 if ((mipInTail < 0) || (pSurfInfo->numMipLevels == 1) || (GetBlockSize(pSurfInfo->swizzleMode) <= 256))
709 {
710 mipInTail = MaxMipLevels;
711 }
712
713 return mipInTail;
714 }
715
716 /**
717 ************************************************************************************************************************
718 * Gfx12Lib::CalcMipOffset
719 *
720 * @brief
721 *
722 * @return
723 * The magic "mipInTail" parameter.
724 ************************************************************************************************************************
725 */
CalcMipOffset(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,UINT_32 mipInTail) const726 UINT_32 Gfx12Lib::CalcMipOffset(
727 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
728 UINT_32 mipInTail
729 ) const
730 {
731 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(pIn);
732
733 const INT_32 signedM = static_cast<INT_32>(maxMipsInTail) - static_cast<INT_32>(1) - mipInTail;
734 const UINT_32 m = Max(0, signedM);
735 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
736
737 return mipOffset;
738 }
739
740 /**
741 ************************************************************************************************************************
742 * Gfx12Lib::HwlComputeSurfaceAddrFromCoordLinear
743 *
744 * @brief
745 * Internal function to calculate address from coord for linear swizzle surface
746 *
747 * @return
748 * ADDR_E_RETURNCODE
749 ************************************************************************************************************************
750 */
HwlComputeSurfaceAddrFromCoordLinear(const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pSurfInfoIn,ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const751 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceAddrFromCoordLinear(
752 const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
753 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfoIn, ///< [in] input structure
754 ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
755 ) const
756 {
757 ADDR3_MIP_INFO mipInfo[MaxMipLevels];
758 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
759
760 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT surfInfoOut = {0};
761 surfInfoOut.size = sizeof(surfInfoOut);
762 surfInfoOut.pMipInfo = mipInfo;
763
764 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfo(pSurfInfoIn, &surfInfoOut);
765
766 if (returnCode == ADDR_OK)
767 {
768 pOut->addr = (surfInfoOut.sliceSize * pIn->slice) +
769 mipInfo[pIn->mipId].offset +
770 (pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3);
771
772 pOut->bitPosition = 0;
773 }
774
775 return returnCode;
776 }
777
778 /**
779 ************************************************************************************************************************
780 * Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled
781 *
782 * @brief
783 * Internal function to calculate address from coord for tiled swizzle surface
784 *
785 * @return
786 * ADDR_E_RETURNCODE
787 ************************************************************************************************************************
788 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const789 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled(
790 const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
791 ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
792 ) const
793 {
794 // 256B block cannot support 3D image.
795 ADDR_ASSERT((IsTex3d(pIn->resourceType) && IsBlock256b(pIn->swizzleMode)) == FALSE);
796
797 ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = {};
798 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
799 ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {};
800
801 localIn.size = sizeof(localIn);
802 localIn.flags = pIn->flags;
803 localIn.swizzleMode = pIn->swizzleMode;
804 localIn.resourceType = pIn->resourceType;
805 localIn.format = ADDR_FMT_INVALID;
806 localIn.bpp = pIn->bpp;
807 localIn.width = Max(pIn->unAlignedDims.width, 1u);
808 localIn.height = Max(pIn->unAlignedDims.height, 1u);
809 localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
810 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
811 localIn.numSamples = Max(pIn->numSamples, 1u);
812
813 localOut.size = sizeof(localOut);
814 localOut.pMipInfo = mipInfo;
815 ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT input{ &localIn };
816
817 ADDR_E_RETURNCODE ret = ComputeSurfaceInfo(&localIn, &localOut);
818
819 if (ret == ADDR_OK)
820 {
821 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
822 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
823
824 // Addr3 equation table excludes linear swizzle mode, and fortunately HwlComputeSurfaceAddrFromCoordTiled() is
825 // only called for non-linear swizzle mode.
826 const UINT_32 eqIndex = GetEquationTableEntry(pIn->swizzleMode, Log2(localIn.numSamples), elemLog2);
827
828 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
829 {
830 ADDR3_COORD coords = {};
831
832 // For a 3D image, one swizzle block contains multiple application slices.
833 // For any given image, each HW slice is addressed identically to any other HW slice.
834 // hwSliceSizeBytes is the size of one HW slice; i.e., the number of bytes for the pattern to repeat.
835 // hwSliceId is the index (0, 1, 2...) of the HW slice that an application slice resides in.
836 const UINT_64 hwSliceSizeBytes = localOut.sliceSize * localOut.blockExtent.depth;
837 const UINT_32 hwSliceId = pIn->slice / localOut.blockExtent.depth;
838
839 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockExtent.width;
840 const UINT_32 yb = pIn->y / localOut.blockExtent.height;
841 const UINT_32 xb = pIn->x / localOut.blockExtent.width;
842 const UINT_64 blkIdx = yb * pb + xb;
843
844 // Technically, the addition of "mipTailCoordX" is only necessary if we're in the mip-tail.
845 // The "mipTailCoordXYZ" values should be zero if we're not in the mip-tail.
846 const BOOL_32 inTail = ((mipInfo[pIn->mipId].mipTailOffset != 0) && (blkSizeLog2 != Log2Size256));
847
848 ADDR_ASSERT((inTail == TRUE) ||
849 // If we're not in the tail, then all of these must be zero.
850 ((mipInfo[pIn->mipId].mipTailCoordX == 0) &&
851 (mipInfo[pIn->mipId].mipTailCoordY == 0) &&
852 (mipInfo[pIn->mipId].mipTailCoordZ == 0)));
853
854 coords.x = pIn->x + mipInfo[pIn->mipId].mipTailCoordX;
855 coords.y = pIn->y + mipInfo[pIn->mipId].mipTailCoordY;
856 coords.z = pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ;
857
858 // Note that in this path, blkIdx does not account for the HW slice ID, so we need to
859 // add it in here.
860 pOut->addr = hwSliceSizeBytes * hwSliceId;
861
862 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
863 coords.x << elemLog2,
864 coords.y,
865 coords.z,
866 pIn->sample);
867
868 pOut->addr += mipInfo[pIn->mipId].macroBlockOffset +
869 (blkIdx << blkSizeLog2) +
870 blkOffset;
871
872 ADDR_ASSERT(pOut->addr < localOut.surfSize);
873 }
874 else
875 {
876 ret = ADDR_INVALIDPARAMS;
877 }
878 }
879
880 return ret;
881 }
882
883 /**
884 ************************************************************************************************************************
885 * Gfx12Lib::HwlCopyMemToSurface
886 *
887 * @brief
888 * Copy multiple regions from memory to a non-linear surface.
889 *
890 * @return
891 * Error or success.
892 ************************************************************************************************************************
893 */
HwlCopyMemToSurface(const ADDR3_COPY_MEMSURFACE_INPUT * pIn,const ADDR3_COPY_MEMSURFACE_REGION * pRegions,UINT_32 regionCount) const894 ADDR_E_RETURNCODE Gfx12Lib::HwlCopyMemToSurface(
895 const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
896 const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
897 UINT_32 regionCount
898 ) const
899 {
900 // Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
901 // optimized for a particular micro-swizzle mode if available.
902 ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
903 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
904 ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
905 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
906 ADDR_E_RETURNCODE returnCode = ADDR_OK;
907
908 if (pIn->numSamples > 1)
909 {
910 // TODO: MSAA
911 returnCode = ADDR_NOTIMPLEMENTED;
912 }
913
914 localIn.size = sizeof(localIn);
915 localIn.flags = pIn->flags;
916 localIn.swizzleMode = pIn->swizzleMode;
917 localIn.resourceType = pIn->resourceType;
918 localIn.format = pIn->format;
919 localIn.bpp = pIn->bpp;
920 localIn.width = Max(pIn->unAlignedDims.width, 1u);
921 localIn.height = Max(pIn->unAlignedDims.height, 1u);
922 localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
923 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
924 localIn.numSamples = Max(pIn->numSamples, 1u);
925
926 localOut.size = sizeof(localOut);
927 localOut.pMipInfo = mipInfo;
928
929 if (returnCode == ADDR_OK)
930 {
931 returnCode = ComputeSurfaceInfo(&localIn, &localOut);
932 }
933
934 LutAddresser addresser = LutAddresser();
935 UnalignedCopyMemImgFunc pfnCopyUnaligned = nullptr;
936 if (returnCode == ADDR_OK)
937 {
938 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
939 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
940 Log2(pIn->bpp >> 3),
941 pIn->numSamples);
942
943 ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K] = {};
944 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
945 addresser.Init(fullSwizzlePattern, Log2Size256K, localOut.blockExtent, blkSizeLog2);
946 pfnCopyUnaligned = addresser.GetCopyMemImgFunc();
947 if (pfnCopyUnaligned == nullptr)
948 {
949 ADDR_ASSERT_ALWAYS(); // What format is this?
950 returnCode = ADDR_INVALIDPARAMS;
951 }
952 }
953
954 if (returnCode == ADDR_OK)
955 {
956 for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
957 {
958 const ADDR3_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
959 const ADDR3_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
960 UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
961 UINT_32 yBlks = pMipInfo->pitch / localOut.blockExtent.width;
962
963 UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
964 UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
965 UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
966
967 for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
968 {
969 // The copy functions take the base address of the hardware slice, not the logical slice. Those are
970 // not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
971 // for unaligned copies.
972 UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockExtent.depth);
973 UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
974
975 UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
976 UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
977
978 ADDR_COORD2D sliceOrigin = { xStart, yStart };
979 ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
980
981 pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
982 VoidPtrInc(pCurRegion->pMem, memOffset),
983 pCurRegion->memRowPitch,
984 yBlks,
985 sliceOrigin,
986 sliceExtent,
987 sliceXor,
988 addresser);
989 }
990 }
991 }
992 return returnCode;
993 }
994
995 /**
996 ************************************************************************************************************************
997 * Gfx12Lib::HwlCopySurfaceToMem
998 *
999 * @brief
1000 * Copy multiple regions from a non-linear surface to memory.
1001 *
1002 * @return
1003 * Error or success.
1004 ************************************************************************************************************************
1005 */
HwlCopySurfaceToMem(const ADDR3_COPY_MEMSURFACE_INPUT * pIn,const ADDR3_COPY_MEMSURFACE_REGION * pRegions,UINT_32 regionCount) const1006 ADDR_E_RETURNCODE Gfx12Lib::HwlCopySurfaceToMem(
1007 const ADDR3_COPY_MEMSURFACE_INPUT* pIn,
1008 const ADDR3_COPY_MEMSURFACE_REGION* pRegions,
1009 UINT_32 regionCount
1010 ) const
1011 {
1012 // Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
1013 // optimized for a particular micro-swizzle mode if available.
1014 ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
1015 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
1016 ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
1017 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
1018 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1019
1020 if (pIn->numSamples > 1)
1021 {
1022 // TODO: MSAA
1023 returnCode = ADDR_NOTIMPLEMENTED;
1024 }
1025
1026 localIn.size = sizeof(localIn);
1027 localIn.flags = pIn->flags;
1028 localIn.swizzleMode = pIn->swizzleMode;
1029 localIn.resourceType = pIn->resourceType;
1030 localIn.format = pIn->format;
1031 localIn.bpp = pIn->bpp;
1032 localIn.width = Max(pIn->unAlignedDims.width, 1u);
1033 localIn.height = Max(pIn->unAlignedDims.height, 1u);
1034 localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
1035 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
1036 localIn.numSamples = Max(pIn->numSamples, 1u);
1037
1038 localOut.size = sizeof(localOut);
1039 localOut.pMipInfo = mipInfo;
1040
1041 if (returnCode == ADDR_OK)
1042 {
1043 returnCode = ComputeSurfaceInfo(&localIn, &localOut);
1044 }
1045
1046 LutAddresser addresser = LutAddresser();
1047 UnalignedCopyMemImgFunc pfnCopyUnaligned = nullptr;
1048 if (returnCode == ADDR_OK)
1049 {
1050 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
1051 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
1052 Log2(pIn->bpp >> 3),
1053 pIn->numSamples);
1054
1055 ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K] = {};
1056 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1057 addresser.Init(fullSwizzlePattern, Log2Size256K, localOut.blockExtent, blkSizeLog2);
1058 pfnCopyUnaligned = addresser.GetCopyImgMemFunc();
1059 if (pfnCopyUnaligned == nullptr)
1060 {
1061 ADDR_ASSERT_ALWAYS(); // What format is this?
1062 returnCode = ADDR_INVALIDPARAMS;
1063 }
1064 }
1065
1066 if (returnCode == ADDR_OK)
1067 {
1068 for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
1069 {
1070 const ADDR3_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
1071 const ADDR3_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
1072 UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
1073 UINT_32 yBlks = pMipInfo->pitch / localOut.blockExtent.width;
1074
1075 UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
1076 UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
1077 UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
1078
1079 for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
1080 {
1081 // The copy functions take the base address of the hardware slice, not the logical slice. Those are
1082 // not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
1083 // for unaligned copies.
1084 UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockExtent.depth);
1085 UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
1086
1087 UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
1088 UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
1089
1090 ADDR_COORD2D sliceOrigin = { xStart, yStart };
1091 ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
1092
1093 pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
1094 VoidPtrInc(pCurRegion->pMem, memOffset),
1095 pCurRegion->memRowPitch,
1096 yBlks,
1097 sliceOrigin,
1098 sliceExtent,
1099 sliceXor,
1100 addresser);
1101 }
1102 }
1103 }
1104 return returnCode;
1105 }
1106
1107
1108 /**
1109 ************************************************************************************************************************
1110 * Gfx12Lib::HwlComputePipeBankXor
1111 *
1112 * @brief
1113 * Generate a PipeBankXor value to be ORed into bits above numSwizzleBits of address
1114 *
1115 * @return
1116 * PipeBankXor value
1117 ************************************************************************************************************************
1118 */
HwlComputePipeBankXor(const ADDR3_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const1119 ADDR_E_RETURNCODE Gfx12Lib::HwlComputePipeBankXor(
1120 const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
1121 ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
1122 ) const
1123 {
1124 if ((m_numSwizzleBits != 0) && // does this configuration support swizzling
1125 // base address XOR in GFX12 will be applied to all blk_size = 4KB, 64KB, or 256KB swizzle modes,
1126 // Note that Linear and 256B are excluded.
1127 (IsLinear(pIn->swizzleMode) == FALSE) &&
1128 (IsBlock256b(pIn->swizzleMode) == FALSE))
1129 {
1130 pOut->pipeBankXor = pIn->surfIndex % (1 << m_numSwizzleBits);
1131 }
1132 else
1133 {
1134 pOut->pipeBankXor = 0;
1135 }
1136
1137 return ADDR_OK;
1138 }
1139
1140 /**
1141 ************************************************************************************************************************
1142 * Gfx12Lib::ComputeOffsetFromEquation
1143 *
1144 * @brief
1145 * Compute offset from equation
1146 *
1147 * @return
1148 * Offset
1149 ************************************************************************************************************************
1150 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const1151 UINT_32 Gfx12Lib::ComputeOffsetFromEquation(
1152 const ADDR_EQUATION* pEq, ///< Equation
1153 UINT_32 x, ///< x coord in bytes
1154 UINT_32 y, ///< y coord in pixel
1155 UINT_32 z, ///< z coord in slice
1156 UINT_32 s ///< MSAA sample index
1157 ) const
1158 {
1159 UINT_32 offset = 0;
1160
1161 for (UINT_32 i = 0; i < pEq->numBits; i++)
1162 {
1163 UINT_32 v = 0;
1164
1165 if (pEq->addr[i].valid)
1166 {
1167 if (pEq->addr[i].channel == 0)
1168 {
1169 v ^= (x >> pEq->addr[i].index) & 1;
1170 }
1171 else if (pEq->addr[i].channel == 1)
1172 {
1173 v ^= (y >> pEq->addr[i].index) & 1;
1174 }
1175 else if (pEq->addr[i].channel == 2)
1176 {
1177 v ^= (z >> pEq->addr[i].index) & 1;
1178 }
1179 else if (pEq->addr[i].channel == 3)
1180 {
1181 v ^= (s >> pEq->addr[i].index) & 1;
1182 }
1183 else
1184 {
1185 ADDR_ASSERT_ALWAYS();
1186 }
1187 }
1188
1189 offset |= (v << i);
1190 }
1191
1192 return offset;
1193 }
1194
1195 /**
1196 ************************************************************************************************************************
1197 * Gfx12Lib::GetSwizzlePatternInfo
1198 *
1199 * @brief
1200 * Get swizzle pattern
1201 *
1202 * @return
1203 * Swizzle pattern information
1204 ************************************************************************************************************************
1205 */
GetSwizzlePatternInfo(Addr3SwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numFrag) const1206 const ADDR_SW_PATINFO* Gfx12Lib::GetSwizzlePatternInfo(
1207 Addr3SwizzleMode swizzleMode, ///< Swizzle mode
1208 UINT_32 elemLog2, ///< Element size in bytes log2
1209 UINT_32 numFrag ///< Number of fragment
1210 ) const
1211 {
1212 const ADDR_SW_PATINFO* patInfo = NULL;
1213
1214 if (Is2dSwizzle(swizzleMode) == FALSE)
1215 {
1216 ADDR_ASSERT(numFrag == 1);
1217 }
1218
1219 switch (swizzleMode)
1220 {
1221 case ADDR3_256KB_2D:
1222 switch (numFrag)
1223 {
1224 case 1:
1225 patInfo = GFX12_SW_256KB_2D_1xAA_PATINFO;
1226 break;
1227 case 2:
1228 patInfo = GFX12_SW_256KB_2D_2xAA_PATINFO;
1229 break;
1230 case 4:
1231 patInfo = GFX12_SW_256KB_2D_4xAA_PATINFO;
1232 break;
1233 case 8:
1234 patInfo = GFX12_SW_256KB_2D_8xAA_PATINFO;
1235 break;
1236 default:
1237 ADDR_ASSERT_ALWAYS();
1238 }
1239 break;
1240 case ADDR3_256KB_3D:
1241 patInfo = GFX12_SW_256KB_3D_PATINFO;
1242 break;
1243 case ADDR3_64KB_2D:
1244 switch (numFrag)
1245 {
1246 case 1:
1247 patInfo = GFX12_SW_64KB_2D_1xAA_PATINFO;
1248 break;
1249 case 2:
1250 patInfo = GFX12_SW_64KB_2D_2xAA_PATINFO;
1251 break;
1252 case 4:
1253 patInfo = GFX12_SW_64KB_2D_4xAA_PATINFO;
1254 break;
1255 case 8:
1256 patInfo = GFX12_SW_64KB_2D_8xAA_PATINFO;
1257 break;
1258 default:
1259 ADDR_ASSERT_ALWAYS();
1260 }
1261 break;
1262 case ADDR3_64KB_3D:
1263 patInfo = GFX12_SW_64KB_3D_PATINFO;
1264 break;
1265 case ADDR3_4KB_2D:
1266 switch (numFrag)
1267 {
1268 case 1:
1269 patInfo = GFX12_SW_4KB_2D_1xAA_PATINFO;
1270 break;
1271 case 2:
1272 patInfo = GFX12_SW_4KB_2D_2xAA_PATINFO;
1273 break;
1274 case 4:
1275 patInfo = GFX12_SW_4KB_2D_4xAA_PATINFO;
1276 break;
1277 case 8:
1278 patInfo = GFX12_SW_4KB_2D_8xAA_PATINFO;
1279 break;
1280 default:
1281 ADDR_ASSERT_ALWAYS();
1282 }
1283 break;
1284 case ADDR3_4KB_3D:
1285 patInfo = GFX12_SW_4KB_3D_PATINFO;
1286 break;
1287 case ADDR3_256B_2D:
1288 switch (numFrag)
1289 {
1290 case 1:
1291 patInfo = GFX12_SW_256B_2D_1xAA_PATINFO;
1292 break;
1293 case 2:
1294 patInfo = GFX12_SW_256B_2D_2xAA_PATINFO;
1295 break;
1296 case 4:
1297 patInfo = GFX12_SW_256B_2D_4xAA_PATINFO;
1298 break;
1299 case 8:
1300 patInfo = GFX12_SW_256B_2D_8xAA_PATINFO;
1301 break;
1302 default:
1303 break;
1304 }
1305 break;
1306 default:
1307 ADDR_ASSERT_ALWAYS();
1308 break;
1309 }
1310
1311 return (patInfo != NULL) ? &patInfo[elemLog2] : NULL;
1312 }
1313 /**
1314 ************************************************************************************************************************
1315 * Gfx12Lib::HwlInitGlobalParams
1316 *
1317 * @brief
1318 * Initializes global parameters
1319 *
1320 * @return
1321 * TRUE if all settings are valid
1322 *
1323 ************************************************************************************************************************
1324 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1325 BOOL_32 Gfx12Lib::HwlInitGlobalParams(
1326 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1327 {
1328 BOOL_32 valid = TRUE;
1329 GB_ADDR_CONFIG_GFX12 gbAddrConfig;
1330
1331 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1332
1333 switch (gbAddrConfig.bits.NUM_PIPES)
1334 {
1335 case ADDR_CONFIG_1_PIPE:
1336 m_pipesLog2 = 0;
1337 break;
1338 case ADDR_CONFIG_2_PIPE:
1339 m_pipesLog2 = 1;
1340 break;
1341 case ADDR_CONFIG_4_PIPE:
1342 m_pipesLog2 = 2;
1343 break;
1344 case ADDR_CONFIG_8_PIPE:
1345 m_pipesLog2 = 3;
1346 break;
1347 case ADDR_CONFIG_16_PIPE:
1348 m_pipesLog2 = 4;
1349 break;
1350 case ADDR_CONFIG_32_PIPE:
1351 m_pipesLog2 = 5;
1352 break;
1353 case ADDR_CONFIG_64_PIPE:
1354 m_pipesLog2 = 6;
1355 break;
1356 default:
1357 ADDR_ASSERT_ALWAYS();
1358 valid = FALSE;
1359 break;
1360 }
1361
1362 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1363 {
1364 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1365 m_pipeInterleaveLog2 = 8;
1366 break;
1367 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1368 m_pipeInterleaveLog2 = 9;
1369 break;
1370 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1371 m_pipeInterleaveLog2 = 10;
1372 break;
1373 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1374 m_pipeInterleaveLog2 = 11;
1375 break;
1376 default:
1377 ADDR_ASSERT_ALWAYS();
1378 valid = FALSE;
1379 break;
1380 }
1381
1382 m_numSwizzleBits = ((m_pipesLog2 >= 3) ? m_pipesLog2 - 2 : 0);
1383
1384 if (valid)
1385 {
1386 InitEquationTable();
1387 InitBlockDimensionTable();
1388 }
1389
1390 return valid;
1391 }
1392
1393 /**
1394 ************************************************************************************************************************
1395 * Gfx12Lib::HwlComputeNonBlockCompressedView
1396 *
1397 * @brief
1398 * Compute non-block-compressed view for a given mipmap level/slice.
1399 *
1400 * @return
1401 * ADDR_E_RETURNCODE
1402 ************************************************************************************************************************
1403 */
HwlComputeNonBlockCompressedView(const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const1404 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeNonBlockCompressedView(
1405 const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure
1406 ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure
1407 ) const
1408 {
1409 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1410
1411 if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
1412 ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
1413 {
1414 // Only support BC1~BC7, ASTC, or ETC2 for now...
1415 returnCode = ADDR_NOTSUPPORTED;
1416 }
1417 else
1418 {
1419 UINT_32 bcWidth, bcHeight;
1420 const UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
1421
1422 ADDR3_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
1423 infoIn.size = sizeof(infoIn);
1424 infoIn.flags = pIn->flags;
1425 infoIn.swizzleMode = pIn->swizzleMode;
1426 infoIn.resourceType = pIn->resourceType;
1427 infoIn.format = pIn->format;
1428 infoIn.bpp = bpp;
1429 infoIn.width = RoundUpQuotient(pIn->unAlignedDims.width, bcWidth);
1430 infoIn.height = RoundUpQuotient(pIn->unAlignedDims.height, bcHeight);
1431 infoIn.numSlices = pIn->unAlignedDims.depth;
1432 infoIn.numMipLevels = pIn->numMipLevels;
1433 infoIn.numSamples = 1;
1434
1435 ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {};
1436
1437 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
1438 infoOut.size = sizeof(infoOut);
1439 infoOut.pMipInfo = mipInfo;
1440
1441 returnCode = HwlComputeSurfaceInfo(&infoIn, &infoOut);
1442
1443 if (returnCode == ADDR_OK)
1444 {
1445 ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
1446 subOffIn.size = sizeof(subOffIn);
1447 subOffIn.swizzleMode = infoIn.swizzleMode;
1448 subOffIn.resourceType = infoIn.resourceType;
1449 subOffIn.pipeBankXor = pIn->pipeBankXor;
1450 subOffIn.slice = pIn->slice;
1451 subOffIn.sliceSize = infoOut.sliceSize;
1452 subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
1453 subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset;
1454
1455 ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
1456 subOffOut.size = sizeof(subOffOut);
1457
1458 // For any mipmap level, move nonBc view base address by offset
1459 HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
1460 pOut->offset = subOffOut.offset;
1461
1462 ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
1463 slicePbXorIn.size = sizeof(slicePbXorIn);
1464 slicePbXorIn.swizzleMode = infoIn.swizzleMode;
1465 slicePbXorIn.resourceType = infoIn.resourceType;
1466 slicePbXorIn.bpe = infoIn.bpp;
1467 slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
1468 slicePbXorIn.slice = pIn->slice;
1469 slicePbXorIn.numSamples = 1;
1470
1471 ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
1472 slicePbXorOut.size = sizeof(slicePbXorOut);
1473
1474 // For any mipmap level, nonBc view should use computed pbXor
1475 HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
1476 pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
1477
1478 const BOOL_32 tiled = (pIn->swizzleMode != ADDR3_LINEAR);
1479 const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail);
1480 const UINT_32 requestMipWidth =
1481 RoundUpQuotient(Max(pIn->unAlignedDims.width >> pIn->mipId, 1u), bcWidth);
1482 const UINT_32 requestMipHeight =
1483 RoundUpQuotient(Max(pIn->unAlignedDims.height >> pIn->mipId, 1u), bcHeight);
1484
1485 if (inTail)
1486 {
1487 // For mipmap level that is in mip tail block, hack a lot of things...
1488 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
1489 // are fit in tail block:
1490
1491 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
1492 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
1493
1494 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
1495 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
1496
1497 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
1498 pOut->unAlignedDims.width = Min(requestMipWidth << pOut->mipId, infoOut.blockExtent.width / 2);
1499
1500 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
1501 pOut->unAlignedDims.height = Min(requestMipHeight << pOut->mipId, infoOut.blockExtent.height);
1502 }
1503 // This check should cover at least mipId == 0
1504 else if ((requestMipWidth << pIn->mipId) == infoIn.width)
1505 {
1506 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
1507 // - only one mipmap level and mipId = 0
1508 pOut->mipId = 0;
1509 pOut->numMipLevels = 1;
1510
1511 // (mip0) width = requestMipWidth
1512 pOut->unAlignedDims.width = requestMipWidth;
1513
1514 // (mip0) height = requestMipHeight
1515 pOut->unAlignedDims.height = requestMipHeight;
1516 }
1517 else
1518 {
1519 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
1520 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
1521 // because single mip view may have different pitch value than original (multiple) mip view...
1522 // A simple case would be:
1523 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
1524 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
1525 // mip0 width = 0x101/mip1 width = 0x80
1526 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
1527 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
1528
1529 // - 2 levels and mipId = 1
1530 pOut->mipId = 1;
1531 pOut->numMipLevels = 2;
1532
1533 const UINT_32 upperMipWidth =
1534 RoundUpQuotient(Max(pIn->unAlignedDims.width >> (pIn->mipId - 1), 1u), bcWidth);
1535 const UINT_32 upperMipHeight =
1536 RoundUpQuotient(Max(pIn->unAlignedDims.height >> (pIn->mipId - 1), 1u), bcHeight);
1537
1538 const BOOL_32 needToAvoidInTail = tiled &&
1539 (requestMipWidth <= infoOut.blockExtent.width / 2) &&
1540 (requestMipHeight <= infoOut.blockExtent.height);
1541
1542 const UINT_32 hwMipWidth =
1543 PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockExtent.width);
1544 const UINT_32 hwMipHeight =
1545 PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockExtent.height);
1546
1547 const BOOL_32 needExtraWidth =
1548 ((upperMipWidth < requestMipWidth * 2) ||
1549 ((upperMipWidth == requestMipWidth * 2) &&
1550 ((needToAvoidInTail == TRUE) ||
1551 (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockExtent.width)))));
1552
1553 const BOOL_32 needExtraHeight =
1554 ((upperMipHeight < requestMipHeight * 2) ||
1555 ((upperMipHeight == requestMipHeight * 2) &&
1556 ((needToAvoidInTail == TRUE) ||
1557 (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockExtent.height)))));
1558
1559 // (mip0) width = requestLastMipLevelWidth
1560 pOut->unAlignedDims.width = upperMipWidth + (needExtraWidth ? 1: 0);
1561
1562 // (mip0) height = requestLastMipLevelHeight
1563 pOut->unAlignedDims.height = upperMipHeight + (needExtraHeight ? 1: 0);
1564 }
1565
1566 // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
1567 ADDR_ASSERT(ShiftRight(pOut->unAlignedDims.width, pOut->mipId) == requestMipWidth);
1568 // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
1569 ADDR_ASSERT(ShiftRight(pOut->unAlignedDims.height, pOut->mipId) == requestMipHeight);
1570 }
1571 }
1572
1573 return returnCode;
1574 }
1575
1576 /**
1577 ************************************************************************************************************************
1578 * Gfx12Lib::HwlComputeSubResourceOffsetForSwizzlePattern
1579 *
1580 * @brief
1581 * Compute sub resource offset to support swizzle pattern
1582 *
1583 * @return
1584 * VOID
1585 ************************************************************************************************************************
1586 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const1587 VOID Gfx12Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
1588 const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
1589 ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
1590 ) const
1591 {
1592 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
1593 }
1594
1595 /**
1596 ************************************************************************************************************************
1597 * Gfx12Lib::HwlComputeSlicePipeBankXor
1598 *
1599 * @brief
1600 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
1601 *
1602 * @return
1603 * PipeBankXor value
1604 ************************************************************************************************************************
1605 */
HwlComputeSlicePipeBankXor(const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const1606 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSlicePipeBankXor(
1607 const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
1608 ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
1609 ) const
1610 {
1611 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1612
1613 // PipeBankXor is only applied to 4KB, 64KB and 256KB on GFX12.
1614 if ((IsLinear(pIn->swizzleMode) == FALSE) && (IsBlock256b(pIn->swizzleMode) == FALSE))
1615 {
1616 if (pIn->bpe == 0)
1617 {
1618 // Require a valid bytes-per-element value passed from client...
1619 returnCode = ADDR_INVALIDPARAMS;
1620 }
1621 else
1622 {
1623 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
1624 Log2(pIn->bpe >> 3),
1625 1);
1626
1627 if (pPatInfo != NULL)
1628 {
1629 const UINT_32 elemLog2 = Log2(pIn->bpe >> 3);
1630
1631 // Addr3 equation table excludes linear swizzle mode, and fortunately when calling
1632 // HwlComputeSlicePipeBankXor the swizzle mode is non-linear, so we don't need to worry about negative
1633 // table index.
1634 const UINT_32 eqIndex = GetEquationTableEntry(pIn->swizzleMode, Log2(pIn->numSamples), elemLog2);
1635
1636 const UINT_32 pipeBankXorOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
1637 0,
1638 0,
1639 pIn->slice,
1640 0);
1641
1642 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
1643
1644 // Should have no bit set under pipe interleave
1645 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
1646
1647 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
1648 }
1649 else
1650 {
1651 // Should never come here...
1652 ADDR_NOT_IMPLEMENTED();
1653
1654 returnCode = ADDR_NOTSUPPORTED;
1655 }
1656 }
1657 }
1658 else
1659 {
1660 pOut->pipeBankXor = 0;
1661 }
1662
1663 return returnCode;
1664 }
1665
1666 /**
1667 ************************************************************************************************************************
1668 * Gfx12Lib::HwlConvertChipFamily
1669 *
1670 * @brief
1671 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1672 * @return
1673 * ChipFamily
1674 ************************************************************************************************************************
1675 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)1676 ChipFamily Gfx12Lib::HwlConvertChipFamily(
1677 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
1678 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1679 {
1680 return ADDR_CHIP_FAMILY_NAVI;
1681 }
1682
1683 /**
1684 ************************************************************************************************************************
1685 * Gfx12Lib::SanityCheckSurfSize
1686 *
1687 * @brief
1688 * Calculate the surface size via the exact hardware algorithm to see if it matches.
1689 *
1690 * @return
1691 ************************************************************************************************************************
1692 */
SanityCheckSurfSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const1693 void Gfx12Lib::SanityCheckSurfSize(
1694 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
1695 const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut
1696 ) const
1697 {
1698 #if DEBUG
1699 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
1700 // Verify that the requested image size is valid for the below algorithm. The below code includes
1701 // implicit assumptions about the surface dimensions being less than "MaxImageDim"; otherwise, it can't
1702 // calculate "firstMipInTail" accurately and the below assertion will trip incorrectly.
1703 //
1704 // Surfaces destined for use only on the SDMA engine can exceed the gfx-engine-imposed limitations of
1705 // the "maximum" image dimensions.
1706 if ((pSurfInfo->width <= MaxImageDim) &&
1707 (pSurfInfo->height <= MaxImageDim) &&
1708 (pSurfInfo->numMipLevels <= MaxMipLevels) &&
1709 (UseCustomPitch(pSurfInfo) == FALSE) &&
1710 (UseCustomHeight(pSurfInfo) == FALSE) &&
1711 // HiZS surfaces have a reduced image size (i.e,. each pixel represents an 8x8 region of the parent
1712 // image, at least for single samples) but they still have the same number of mip levels as the
1713 // parent image. This disconnect produces false assertions below as the image size doesn't apparently
1714 // support the specified number of mip levels.
1715 ((pSurfInfo->flags.hiZHiS == 0) || (pSurfInfo->numMipLevels == 1)))
1716 {
1717 UINT_32 lastMipSize = 1;
1718 UINT_64 dataChainSize = 0;
1719
1720 const ADDR_EXTENT3D mip0Dims = GetBaseMipExtents(pSurfInfo);
1721 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pSurfInfo->swizzleMode);
1722 const ADDR_EXTENT3D tailMaxDim = GetMipTailDim(pIn, pOut->blockExtent);
1723 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(pIn);
1724
1725 UINT_32 firstMipInTail = 0;
1726 for (INT_32 mipIdx = MaxMipLevels - 1; mipIdx >= 0; mipIdx--)
1727 {
1728 const ADDR_EXTENT3D mipExtents = GetMipExtent(mip0Dims, mipIdx);
1729
1730 if (IsInMipTail(tailMaxDim, mipExtents, maxMipsInTail, pSurfInfo->numMipLevels - mipIdx))
1731 {
1732 firstMipInTail = mipIdx;
1733 }
1734 }
1735
1736 for (INT_32 mipIdx = firstMipInTail - 1; mipIdx >= -1; mipIdx--)
1737 {
1738 if (mipIdx < (static_cast<INT_32>(pSurfInfo->numMipLevels) - 1))
1739 {
1740 dataChainSize += lastMipSize;
1741 }
1742
1743 if (mipIdx >= 0)
1744 {
1745 const ADDR_EXTENT3D mipExtents = GetMipExtent(mip0Dims, mipIdx);
1746 const UINT_32 mipBlockWidth = ShiftCeil(mipExtents.width, Log2(pOut->blockExtent.width));
1747 const UINT_32 mipBlockHeight = ShiftCeil(mipExtents.height, Log2(pOut->blockExtent.height));
1748
1749 lastMipSize = 4 * lastMipSize
1750 - ((mipBlockWidth & 1) ? mipBlockHeight : 0)
1751 - ((mipBlockHeight & 1) ? mipBlockWidth : 0)
1752 - ((mipBlockWidth & mipBlockHeight & 1) ? 1 : 0);
1753 }
1754 }
1755
1756 if (CanTrimLinearPadding(pSurfInfo))
1757 {
1758 ADDR_ASSERT((pOut->sliceSize * pOut->blockExtent.depth) <= (dataChainSize << blockSizeLog2));
1759 }
1760 else
1761 {
1762 ADDR_ASSERT((pOut->sliceSize * pOut->blockExtent.depth) == (dataChainSize << blockSizeLog2));
1763 }
1764 }
1765 #endif
1766 }
1767
1768 /**
1769 ************************************************************************************************************************
1770 * Gfx12Lib::HwlGetMicroBlockSize
1771 *
1772 * @brief
1773 * Determines the dimensions of a 256B microblock
1774 *
1775 * @return
1776 ************************************************************************************************************************
1777 */
HwlGetMicroBlockSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn) const1778 ADDR_EXTENT3D Gfx12Lib::HwlGetMicroBlockSize(
1779 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn
1780 ) const
1781 {
1782 ADDR_EXTENT3D out = {};
1783 INT_32 widthLog2 = 0;
1784 INT_32 heightLog2 = 0;
1785 INT_32 depthLog2 = 0;
1786 Addr3SwizzleMode swMode = pIn->pSurfInfo->swizzleMode;
1787 UINT_32 bppLog2 = Log2(pIn->pSurfInfo->bpp >> 3);
1788 UINT_32 blockBits = 8 - bppLog2;
1789 if (IsLinear(swMode))
1790 {
1791 widthLog2 = blockBits;
1792 }
1793 else if (Is2dSwizzle(swMode))
1794 {
1795 widthLog2 = (blockBits >> 1) + (blockBits & 1);
1796 heightLog2 = (blockBits >> 1);
1797 }
1798 else
1799 {
1800 ADDR_ASSERT(Is3dSwizzle(swMode));
1801 depthLog2 = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1802 widthLog2 = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1803 heightLog2 = (blockBits / 3);
1804 }
1805 out.width = 1 << widthLog2;
1806 out.height = 1 << heightLog2;
1807 out.depth = 1 << depthLog2;
1808 return out;
1809 }
1810
1811 /**
1812 ************************************************************************************************************************
1813 * Gfx12Lib::HwlCalcBlockSize
1814 *
1815 * @brief
1816 * Determines the extent, in pixels of a swizzle block.
1817 *
1818 * @return
1819 ************************************************************************************************************************
1820 */
HwlCalcBlockSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,ADDR_EXTENT3D * pExtent) const1821 VOID Gfx12Lib::HwlCalcBlockSize(
1822 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
1823 ADDR_EXTENT3D* pExtent
1824 ) const
1825 {
1826 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
1827 const UINT_32 log2BlkSize = GetBlockSizeLog2(pSurfInfo->swizzleMode);
1828 const UINT_32 eleBytes = pSurfInfo->bpp >> 3;
1829 const UINT_32 log2EleBytes = Log2(eleBytes);
1830
1831 if (IsLinear(pSurfInfo->swizzleMode))
1832 {
1833 // 1D swizzle mode doesn't support MSAA, so there is no need to consider log2(samples)
1834 pExtent->width = 1 << (log2BlkSize - log2EleBytes);
1835 pExtent->height = 1;
1836 pExtent->depth = 1;
1837 }
1838 else if (Is3dSwizzle(pSurfInfo->swizzleMode))
1839 {
1840 // 3D swizlze mode doesn't support MSAA, so there is no need to consider log2(samples)
1841 const UINT_32 base = (log2BlkSize / 3) - (log2EleBytes / 3);
1842 const UINT_32 log2BlkSizeMod3 = log2BlkSize % 3;
1843 const UINT_32 log2EleBytesMod3 = log2EleBytes % 3;
1844
1845 UINT_32 x = base;
1846 UINT_32 y = base;
1847 UINT_32 z = base;
1848
1849 if (log2BlkSizeMod3 > 0)
1850 {
1851 x++;
1852 }
1853
1854 if (log2BlkSizeMod3 > 1)
1855 {
1856 z++;
1857 }
1858
1859 if (log2EleBytesMod3 > 0)
1860 {
1861 x--;
1862 }
1863
1864 if (log2EleBytesMod3 > 1)
1865 {
1866 z--;
1867 }
1868
1869 pExtent->width = 1u << x;
1870 pExtent->height = 1u << y;
1871 pExtent->depth = 1u << z;
1872 }
1873 else
1874 {
1875 // Only 2D swizzle mode supports MSAA...
1876 // Since for gfx12 MSAA is unconditionally supported by all 2D swizzle modes, we don't need to restrict samples
1877 // to be 1 for ADDR3_256B_2D and ADDR3_4KB_2D as gfx10/11 did.
1878 const UINT_32 log2Samples = Log2(pSurfInfo->numSamples);
1879 const UINT_32 log2Width = (log2BlkSize >> 1) -
1880 (log2EleBytes >> 1) -
1881 (log2Samples >> 1) -
1882 (log2EleBytes & log2Samples & 1);
1883 const UINT_32 log2Height = (log2BlkSize >> 1) -
1884 (log2EleBytes >> 1) -
1885 (log2Samples >> 1) -
1886 ((log2EleBytes | log2Samples) & 1);
1887
1888 // Return the extent in actual units, not log2
1889 pExtent->width = 1u << log2Width;
1890 pExtent->height = 1u << log2Height;
1891 pExtent->depth = 1;
1892 }
1893 }
1894
1895 /**
1896 ************************************************************************************************************************
1897 * Gfx12Lib::HwlGetMipInTailMaxSize
1898 *
1899 * @brief
1900 * Determines the max size of a mip level that fits in the mip-tail.
1901 *
1902 * @return
1903 ************************************************************************************************************************
1904 */
HwlGetMipInTailMaxSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR_EXTENT3D & blockDims) const1905 ADDR_EXTENT3D Gfx12Lib::HwlGetMipInTailMaxSize(
1906 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
1907 const ADDR_EXTENT3D& blockDims) const
1908 {
1909 ADDR_EXTENT3D mipTailDim = {};
1910 const Addr3SwizzleMode swizzleMode = pIn->pSurfInfo->swizzleMode;
1911 const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
1912
1913 mipTailDim = blockDims;
1914
1915 if (Is3dSwizzle(swizzleMode))
1916 {
1917 const UINT_32 dim = log2BlkSize % 3;
1918
1919 if (dim == 0)
1920 {
1921 mipTailDim.height >>= 1;
1922 }
1923 else if (dim == 1)
1924 {
1925 mipTailDim.width >>= 1;
1926 }
1927 else
1928 {
1929 mipTailDim.depth >>= 1;
1930 }
1931 }
1932 else
1933 {
1934 if ((log2BlkSize % 2) == 0)
1935 {
1936 mipTailDim.width >>= 1;
1937 }
1938 else
1939 {
1940 mipTailDim.height >>= 1;
1941 }
1942 }
1943 return mipTailDim;
1944 }
1945
1946
1947 /**
1948 ************************************************************************************************************************
1949 * Lib::GetPossibleSwizzleModes
1950 *
1951 * @brief
1952 * GFX12 specific implementation of Addr3GetPossibleSwizzleModes
1953 *
1954 * @return
1955 * ADDR_E_RETURNCODE
1956 ************************************************************************************************************************
1957 */
HwlGetPossibleSwizzleModes(const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT * pIn,ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT * pOut) const1958 ADDR_E_RETURNCODE Gfx12Lib::HwlGetPossibleSwizzleModes(
1959 const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn, ///< [in] input structure
1960 ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT* pOut ///< [out] output structure
1961 ) const
1962 {
1963 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1964
1965 const ADDR3_SURFACE_FLAGS flags = pIn->flags;
1966
1967 if (pIn->bpp == 96)
1968 {
1969 pOut->validModes.swLinear = 1;
1970 }
1971 // Depth/Stencil images can't be linear and must be 2D swizzle modes.
1972 // These three are related to DB block that supports only SW_64KB_2D and SW_256KB_2D for DSV.
1973 else if (flags.depth || flags.stencil)
1974 {
1975 pOut->validModes.sw2d64kB = 1;
1976 pOut->validModes.sw2d256kB = 1;
1977 }
1978 // The organization of elements in the hierarchical surface is the same as any other surface, and it can support
1979 // any 2D swizzle mode (SW_256_2D, SW_4KB_2D, SW_64KB_2D, or SW_256KB_2D). The swizzle mode can be selected
1980 // orthogonally to the underlying z or stencil surface.
1981 else if (pIn->flags.hiZHiS)
1982 {
1983 pOut->validModes.sw2d256B = 1;
1984 pOut->validModes.sw2d4kB = 1;
1985 pOut->validModes.sw2d64kB = 1;
1986 pOut->validModes.sw2d256kB = 1;
1987 }
1988 // MSAA can't be linear and must be 2D swizzle modes.
1989 else if (pIn->numSamples > 1)
1990 {
1991 pOut->validModes.sw2d256B = 1;
1992 pOut->validModes.sw2d4kB = 1;
1993 pOut->validModes.sw2d64kB = 1;
1994 pOut->validModes.sw2d256kB = 1;
1995 }
1996 // Some APIs (like Vulkan) require that PRT should always use 64KB blocks
1997 else if (flags.standardPrt)
1998 {
1999 if (IsTex3d(pIn->resourceType) && (flags.view3dAs2dArray == 0))
2000 {
2001 pOut->validModes.sw3d64kB = 1;
2002 }
2003 else
2004 {
2005 pOut->validModes.sw2d64kB = 1;
2006 }
2007 }
2008 else if (// Block-compressed images need to be either using 2D or linear swizzle modes.
2009 flags.blockCompressed ||
2010 // Only 3D w/ view3dAs2dArray == 0 will use 1D/2D block swizzle modes
2011 (IsTex3d(pIn->resourceType) == FALSE) || flags.view3dAs2dArray ||
2012 // NV12 and P010 support
2013 // SW_LINEAR, SW_256B_2D, SW_4KB_2D, SW_64KB_2D, SW_256KB_2D
2014 // There could be more multimedia formats that require more hw specific tiling modes...
2015 flags.nv12 || flags.p010)
2016 {
2017 // Linear is not allowed for VRS images.
2018 if (flags.isVrsImage == 0)
2019 {
2020 pOut->validModes.swLinear = 1;
2021 }
2022
2023 // 3D resources can't use SW_256B_2D
2024 if (IsTex3d(pIn->resourceType) == FALSE)
2025 {
2026 pOut->validModes.sw2d256B = 1;
2027 }
2028 pOut->validModes.sw2d4kB = 1;
2029 pOut->validModes.sw2d64kB = 1;
2030 pOut->validModes.sw2d256kB = 1;
2031 }
2032 else if (IsTex3d(pIn->resourceType))
2033 {
2034 // An eventual determination would be based on pal setting of height_watermark and depth_watermark.
2035 // However, we just adopt the simpler logic currently.
2036 // For 3D images w/ view3dAs2dArray = 0, SW_3D is preferred.
2037 // For 3D images w/ view3dAs2dArray = 1, it should go to 2D path above.
2038 // Enable linear since client may force linear tiling for 3D texture that does not set view3dAs2dArray.
2039 pOut->validModes.swLinear = 1;
2040 pOut->validModes.sw3d4kB = 1;
2041 pOut->validModes.sw3d64kB = 1;
2042 pOut->validModes.sw3d256kB = 1;
2043 }
2044
2045 // If client specifies a max alignment, remove swizzles that require alignment beyond it.
2046 if (pIn->maxAlign != 0)
2047 {
2048 if (pIn->maxAlign < Size256K)
2049 {
2050 pOut->validModes.value &= ~Blk256KBSwModeMask;
2051 }
2052
2053 if (pIn->maxAlign < Size64K)
2054 {
2055 pOut->validModes.value &= ~Blk64KBSwModeMask;
2056 }
2057
2058 if (pIn->maxAlign < Size4K)
2059 {
2060 pOut->validModes.value &= ~Blk4KBSwModeMask;
2061 }
2062
2063 if (pIn->maxAlign < Size256)
2064 {
2065 pOut->validModes.value &= ~Blk256BSwModeMask;
2066 }
2067 }
2068
2069 return returnCode;
2070 }
2071
2072 /**
2073 ************************************************************************************************************************
2074 * Gfx12Lib::HwlComputeStereoInfo
2075 *
2076 * @brief
2077 * Compute height alignment and right eye pipeBankXor for stereo surface
2078 *
2079 * @return
2080 * Error code
2081 *
2082 ************************************************************************************************************************
2083 */
HwlComputeStereoInfo(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const2084 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeStereoInfo(
2085 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
2086 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
2087 UINT_32* pRightXor ///< Right eye xor
2088 ) const
2089 {
2090 ADDR_E_RETURNCODE ret = ADDR_OK;
2091
2092 *pRightXor = 0;
2093
2094 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
2095 const UINT_32 samplesLog2 = Log2(pIn->numSamples);
2096 const UINT_32 eqIndex = GetEquationTableEntry(pIn->swizzleMode, samplesLog2, elemLog2);
2097
2098 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
2099 {
2100 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
2101
2102 UINT_32 yMax = 0;
2103 UINT_32 yPosMask = 0;
2104
2105 // First get "max y bit"
2106 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2107 {
2108 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
2109
2110 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
2111 (m_equationTable[eqIndex].addr[i].index > yMax))
2112 {
2113 yMax = m_equationTable[eqIndex].addr[i].index;
2114 }
2115 }
2116
2117 // Then loop again for populating a position mask of "max Y bit"
2118 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2119 {
2120 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
2121 (m_equationTable[eqIndex].addr[i].index == yMax))
2122 {
2123 yPosMask |= 1u << i;
2124 }
2125 }
2126
2127 const UINT_32 additionalAlign = 1 << yMax;
2128
2129 if (additionalAlign >= *pAlignY)
2130 {
2131 *pAlignY = additionalAlign;
2132
2133 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
2134
2135 if ((alignedHeight >> yMax) & 1)
2136 {
2137 *pRightXor = yPosMask >> m_pipeInterleaveLog2;
2138 }
2139 }
2140 }
2141 else
2142 {
2143 ret = ADDR_INVALIDPARAMS;
2144 }
2145
2146 return ret;
2147 }
2148
2149 /**
2150 ************************************************************************************************************************
2151 * Gfx12Lib::HwlValidateNonSwModeParams
2152 *
2153 * @brief
2154 * Validate compute surface info params except swizzle mode
2155 *
2156 * @return
2157 * TRUE if parameters are valid, FALSE otherwise
2158 ************************************************************************************************************************
2159 */
HwlValidateNonSwModeParams(const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT * pIn) const2160 BOOL_32 Gfx12Lib::HwlValidateNonSwModeParams(
2161 const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn
2162 ) const
2163 {
2164 const ADDR3_SURFACE_FLAGS flags = pIn->flags;
2165 const AddrResourceType rsrcType = pIn->resourceType;
2166 const BOOL_32 isVrs = flags.isVrsImage;
2167 const BOOL_32 isStereo = flags.qbStereo;
2168 const BOOL_32 isDisplay = flags.display;
2169 const BOOL_32 isMipmap = (pIn->numMipLevels > 1);
2170 const BOOL_32 isMsaa = (pIn->numSamples > 1);
2171 const UINT_32 bpp = pIn->bpp;
2172
2173 BOOL_32 valid = TRUE;
2174 if ((bpp == 0) || (bpp > 128) || (pIn->width == 0) || (pIn->numSamples > 8))
2175 {
2176 ADDR_ASSERT_ALWAYS();
2177 valid = FALSE;
2178 }
2179
2180 // Resource type check
2181 if (IsTex1d(rsrcType))
2182 {
2183 if (isMsaa || isStereo || isVrs || isDisplay)
2184 {
2185 ADDR_ASSERT_ALWAYS();
2186 valid = FALSE;
2187 }
2188 }
2189 else if (IsTex2d(rsrcType))
2190 {
2191 if ((isMsaa && isMipmap) || (isStereo && isMsaa) || (isStereo && isMipmap) ||
2192 // VRS surface needs to be 8BPP format
2193 (isVrs && (bpp != 8)))
2194 {
2195 ADDR_ASSERT_ALWAYS();
2196 valid = FALSE;
2197 }
2198 }
2199 else if (IsTex3d(rsrcType))
2200 {
2201 if (isMsaa || isStereo || isVrs || isDisplay)
2202 {
2203 ADDR_ASSERT_ALWAYS();
2204 valid = FALSE;
2205 }
2206 }
2207 else
2208 {
2209 // An invalid resource type that is not 1D, 2D or 3D.
2210 ADDR_ASSERT_ALWAYS();
2211 valid = FALSE;
2212 }
2213
2214 return valid;
2215 }
2216
2217 } // V3
2218 } // Addr
2219