1 /*
2 ************************************************************************************************************************
3 *
4 * Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
5 * SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8
9 /**
10 ************************************************************************************************************************
11 * @file gfx11addrlib.cpp
12 * @brief Contain the implementation for the Gfx11Lib class.
13 ************************************************************************************************************************
14 */
15
16 #include "gfx11addrlib.h"
17 #include "gfx11_gb_reg.h"
18 #include "addrswizzler.h"
19
20 #include "amdgpu_asic_addr.h"
21
22 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
23 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
24
25 namespace Addr
26 {
27 /**
28 ************************************************************************************************************************
29 * Gfx11HwlInit
30 *
31 * @brief
32 * Creates an Gfx11Lib object.
33 *
34 * @return
35 * Returns an Gfx11Lib object pointer.
36 ************************************************************************************************************************
37 */
Gfx11HwlInit(const Client * pClient)38 Addr::Lib* Gfx11HwlInit(const Client* pClient)
39 {
40 return V2::Gfx11Lib::CreateObj(pClient);
41 }
42
43 namespace V2
44 {
45
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
47 // Static Const Member
48 ////////////////////////////////////////////////////////////////////////////////////////////////////
49
50 const SwizzleModeFlags Gfx11Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
51 {//Linear 256B 4KB 64KB 256KB Z Std Disp Rot XOR T RtOpt Reserved
52 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR
53 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
54 {{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_D
55 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
56
57 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
58 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_S
59 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_D
60 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
61
62 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
63 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_S
64 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_D
65 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
66
67 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
68 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
69 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
70 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
71
72 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
73 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_S_T
74 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_D_T
75 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
76
77 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
78 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_S_X
79 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_D_X
80 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
81
82 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_Z_X
83 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_S_X
84 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_D_X
85 {{0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_64KB_R_X
86
87 {{0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_256KB_Z_X
88 {{0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_256KB_S_X
89 {{0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_256KB_D_X
90 {{0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_256KB_R_X
91 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR_GENERAL
92 };
93
94 const Dim3d Gfx11Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
95
96 const Dim3d Gfx11Lib::Block256K_Log2_3d[] = {{6, 6, 6}, {5, 6, 6}, {5, 6, 5}, {5, 5, 5}, {4, 5, 5}};
97 const Dim3d Gfx11Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
98 const Dim3d Gfx11Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
99
100 /**
101 ************************************************************************************************************************
102 * Gfx11Lib::Gfx11Lib
103 *
104 * @brief
105 * Constructor
106 *
107 ************************************************************************************************************************
108 */
Gfx11Lib(const Client * pClient)109 Gfx11Lib::Gfx11Lib(const Client* pClient)
110 :
111 Lib(pClient),
112 m_numPkrLog2(0),
113 m_numSaLog2(0),
114 m_colorBaseIndex(0),
115 m_htileBaseIndex(0),
116 m_dccBaseIndex(0)
117 {
118 memset(&m_settings, 0, sizeof(m_settings));
119 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
120 }
121
122 /**
123 ************************************************************************************************************************
124 * Gfx11Lib::~Gfx11Lib
125 *
126 * @brief
127 * Destructor
128 ************************************************************************************************************************
129 */
~Gfx11Lib()130 Gfx11Lib::~Gfx11Lib()
131 {
132 }
133
134 /**
135 ************************************************************************************************************************
136 * Gfx11Lib::HwlComputeHtileInfo
137 *
138 * @brief
139 * Interface function stub of AddrComputeHtilenfo
140 *
141 * @return
142 * ADDR_E_RETURNCODE
143 ************************************************************************************************************************
144 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const145 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileInfo(
146 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
147 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
148 ) const
149 {
150 ADDR_E_RETURNCODE ret = ADDR_OK;
151
152 if ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
153 (pIn->swizzleMode != ADDR_SW_256KB_Z_X) &&
154 (pIn->hTileFlags.pipeAligned != TRUE))
155 {
156 ret = ADDR_INVALIDPARAMS;
157 }
158 else
159 {
160 Dim3d metaBlk = {};
161 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx11DataDepthStencil,
162 ADDR_RSRC_TEX_2D,
163 pIn->swizzleMode,
164 0,
165 0,
166 TRUE,
167 &metaBlk);
168
169 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
170 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
171 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
172 pOut->metaBlkWidth = metaBlk.w;
173 pOut->metaBlkHeight = metaBlk.h;
174
175 if (pIn->numMipLevels > 1)
176 {
177 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
178
179 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
180
181 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
182 {
183 UINT_32 mipWidth, mipHeight;
184
185 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
186
187 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
188 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
189
190 const UINT_32 pitchInM = mipWidth / metaBlk.w;
191 const UINT_32 heightInM = mipHeight / metaBlk.h;
192 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
193
194 if (pOut->pMipInfo != NULL)
195 {
196 pOut->pMipInfo[i].inMiptail = FALSE;
197 pOut->pMipInfo[i].offset = offset;
198 pOut->pMipInfo[i].sliceSize = mipSliceSize;
199 }
200
201 offset += mipSliceSize;
202 }
203
204 pOut->sliceSize = offset;
205 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
206 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
207
208 if (pOut->pMipInfo != NULL)
209 {
210 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
211 {
212 pOut->pMipInfo[i].inMiptail = TRUE;
213 pOut->pMipInfo[i].offset = 0;
214 pOut->pMipInfo[i].sliceSize = 0;
215 }
216
217 if (pIn->firstMipIdInTail != pIn->numMipLevels)
218 {
219 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
220 }
221 }
222 }
223 else
224 {
225 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
226 const UINT_32 heightInM = pOut->height / metaBlk.h;
227
228 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
229 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
230 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
231
232 if (pOut->pMipInfo != NULL)
233 {
234 pOut->pMipInfo[0].inMiptail = FALSE;
235 pOut->pMipInfo[0].offset = 0;
236 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
237 }
238 }
239
240 // Get the HTILE address equation (copied from HtileAddrFromCoord).
241 // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
242 const UINT_32 index = m_htileBaseIndex;
243 const UINT_8* patIdxTable = GFX11_HTILE_PATIDX;
244
245 ADDR_C_ASSERT(sizeof(GFX11_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
246 pOut->equation.gfx10_bits = (UINT_16 *)GFX11_HTILE_SW_PATTERN[patIdxTable[index]];
247 }
248
249 return ret;
250 }
251
252 /**
253 ************************************************************************************************************************
254 * Gfx11Lib::HwlComputeDccInfo
255 *
256 * @brief
257 * Interface function to compute DCC key info
258 *
259 * @return
260 * ADDR_E_RETURNCODE
261 ************************************************************************************************************************
262 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const263 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeDccInfo(
264 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
265 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
266 ) const
267 {
268 ADDR_E_RETURNCODE ret = ADDR_OK;
269
270 if (IsLinear(pIn->swizzleMode))
271 {
272 ret = ADDR_INVALIDPARAMS;
273 }
274 else if (pIn->dccKeyFlags.pipeAligned &&
275 (IsStandardSwizzle(pIn->swizzleMode) ||
276 IsDisplaySwizzle(pIn->swizzleMode)))
277 {
278 ret = ADDR_INVALIDPARAMS;
279 }
280 else
281 {
282 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
283 const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
284 Dim3d compBlock = {};
285
286 GetCompressedBlockSizeLog2(Gfx11DataColor,
287 pIn->resourceType,
288 pIn->swizzleMode,
289 elemLog2,
290 numFragLog2,
291 &compBlock);
292 pOut->compressBlkWidth = 1 << compBlock.w;
293 pOut->compressBlkHeight = 1 << compBlock.h;
294 pOut->compressBlkDepth = 1 << compBlock.d;
295
296 if (ret == ADDR_OK)
297 {
298 Dim3d metaBlk = {};
299 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx11DataColor,
300 pIn->resourceType,
301 pIn->swizzleMode,
302 elemLog2,
303 numFragLog2,
304 pIn->dccKeyFlags.pipeAligned,
305 &metaBlk);
306
307 pOut->dccRamBaseAlign = metaBlkSize;
308 pOut->metaBlkWidth = metaBlk.w;
309 pOut->metaBlkHeight = metaBlk.h;
310 pOut->metaBlkDepth = metaBlk.d;
311 pOut->metaBlkSize = metaBlkSize;
312
313 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
314 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
315 pOut->depth = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
316
317 if (pIn->numMipLevels > 1)
318 {
319 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
320
321 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
322
323 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
324 {
325 UINT_32 mipWidth, mipHeight;
326
327 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
328
329 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
330 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
331
332 const UINT_32 pitchInM = mipWidth / metaBlk.w;
333 const UINT_32 heightInM = mipHeight / metaBlk.h;
334 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
335
336 if (pOut->pMipInfo != NULL)
337 {
338 pOut->pMipInfo[i].inMiptail = FALSE;
339 pOut->pMipInfo[i].offset = offset;
340 pOut->pMipInfo[i].sliceSize = mipSliceSize;
341 }
342
343 offset += mipSliceSize;
344 }
345
346 pOut->dccRamSliceSize = offset;
347 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
348 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
349
350 if (pOut->pMipInfo != NULL)
351 {
352 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
353 {
354 pOut->pMipInfo[i].inMiptail = TRUE;
355 pOut->pMipInfo[i].offset = 0;
356 pOut->pMipInfo[i].sliceSize = 0;
357 }
358
359 if (pIn->firstMipIdInTail != pIn->numMipLevels)
360 {
361 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
362 }
363 }
364 }
365 else
366 {
367 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
368 const UINT_32 heightInM = pOut->height / metaBlk.h;
369
370 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
371 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
372 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
373
374 if (pOut->pMipInfo != NULL)
375 {
376 pOut->pMipInfo[0].inMiptail = FALSE;
377 pOut->pMipInfo[0].offset = 0;
378 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
379 }
380 }
381
382 // Get the DCC address equation (copied from DccAddrFromCoord)
383 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
384 const UINT_32 numPipeLog2 = m_pipesLog2;
385 UINT_32 index = m_dccBaseIndex + elemLog2;
386 const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_64KB_R_X) ?
387 GFX11_DCC_64K_R_X_PATIDX : GFX11_DCC_256K_R_X_PATIDX;
388
389 if (pIn->dccKeyFlags.pipeAligned)
390 {
391 index += MaxNumOfBpp;
392
393 if (m_numPkrLog2 < 2)
394 {
395 index += m_pipesLog2 * MaxNumOfBpp;
396 }
397 else
398 {
399 // 4 groups for "m_numPkrLog2 < 2" case
400 index += 4 * MaxNumOfBpp;
401
402 const UINT_32 dccPipePerPkr = 3;
403
404 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
405 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
406 }
407 }
408
409 ADDR_C_ASSERT(sizeof(GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
410 pOut->equation.gfx10_bits = (UINT_16*)GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]];
411 }
412 }
413
414 return ret;
415 }
416
417 /**
418 ************************************************************************************************************************
419 * Gfx11Lib::HwlComputeHtileAddrFromCoord
420 *
421 * @brief
422 * Interface function stub of AddrComputeHtileAddrFromCoord
423 *
424 * @return
425 * ADDR_E_RETURNCODE
426 ************************************************************************************************************************
427 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)428 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileAddrFromCoord(
429 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
430 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
431 {
432 ADDR_E_RETURNCODE returnCode = ADDR_OK;
433
434 if (pIn->numMipLevels > 1)
435 {
436 returnCode = ADDR_NOTIMPLEMENTED;
437 }
438 else
439 {
440 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
441 input.size = sizeof(input);
442 input.hTileFlags = pIn->hTileFlags;
443 input.depthFlags = pIn->depthflags;
444 input.swizzleMode = pIn->swizzleMode;
445 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
446 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
447 input.numSlices = Max(pIn->numSlices, 1u);
448 input.numMipLevels = 1;
449
450 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
451 output.size = sizeof(output);
452
453 returnCode = ComputeHtileInfo(&input, &output);
454
455 if (returnCode == ADDR_OK)
456 {
457 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
458 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
459 const UINT_32 index = m_htileBaseIndex + numSampleLog2;
460 const UINT_8* patIdxTable = GFX11_HTILE_PATIDX;
461 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
462 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
463 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX11_HTILE_SW_PATTERN[patIdxTable[index]],
464 blkSizeLog2 + 1, // +1 for nibble offset
465 pIn->x,
466 pIn->y,
467 pIn->slice,
468 0);
469 const UINT_32 xb = pIn->x / output.metaBlkWidth;
470 const UINT_32 yb = pIn->y / output.metaBlkHeight;
471 const UINT_32 pb = output.pitch / output.metaBlkWidth;
472 const UINT_32 blkIndex = (yb * pb) + xb;
473 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
474
475 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
476 (blkIndex * (1 << blkSizeLog2)) +
477 ((blkOffset >> 1) ^ pipeXor);
478 }
479 }
480
481 return returnCode;
482 }
483
484 /**
485 ************************************************************************************************************************
486 * Gfx11Lib::HwlComputeHtileCoordFromAddr
487 *
488 * @brief
489 * Interface function stub of AddrComputeHtileCoordFromAddr
490 *
491 * @return
492 * ADDR_E_RETURNCODE
493 ************************************************************************************************************************
494 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)495 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileCoordFromAddr(
496 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
497 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
498 {
499 ADDR_NOT_IMPLEMENTED();
500
501 return ADDR_OK;
502 }
503
504 /**
505 ************************************************************************************************************************
506 * Gfx11Lib::HwlSupportComputeDccAddrFromCoord
507 *
508 * @brief
509 * Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
510 *
511 * @return
512 * ADDR_E_RETURNCODE
513 ************************************************************************************************************************
514 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)515 ADDR_E_RETURNCODE Gfx11Lib::HwlSupportComputeDccAddrFromCoord(
516 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
517 {
518 ADDR_E_RETURNCODE returnCode = ADDR_OK;
519
520 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
521 ((pIn->swizzleMode != ADDR_SW_64KB_R_X) &&
522 (pIn->swizzleMode != ADDR_SW_256KB_R_X)) ||
523 (pIn->dccKeyFlags.linear == TRUE) ||
524 (pIn->numFrags > 1) ||
525 (pIn->numMipLevels > 1) ||
526 (pIn->mipId > 0))
527 {
528 returnCode = ADDR_NOTSUPPORTED;
529 }
530 else if ((pIn->pitch == 0) ||
531 (pIn->metaBlkWidth == 0) ||
532 (pIn->metaBlkHeight == 0) ||
533 (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
534 {
535 returnCode = ADDR_NOTSUPPORTED;
536 }
537
538 return returnCode;
539 }
540
541 /**
542 ************************************************************************************************************************
543 * Gfx11Lib::HwlComputeDccAddrFromCoord
544 *
545 * @brief
546 * Interface function stub of AddrComputeDccAddrFromCoord
547 *
548 * @return
549 * N/A
550 ************************************************************************************************************************
551 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)552 VOID Gfx11Lib::HwlComputeDccAddrFromCoord(
553 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
554 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
555 {
556 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
557 const UINT_32 numPipeLog2 = m_pipesLog2;
558 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
559 UINT_32 index = m_dccBaseIndex + elemLog2;
560 const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_64KB_R_X) ?
561 GFX11_DCC_64K_R_X_PATIDX : GFX11_DCC_256K_R_X_PATIDX;
562
563 if (pIn->dccKeyFlags.pipeAligned)
564 {
565 index += MaxNumOfBpp;
566
567 if (m_numPkrLog2 < 2)
568 {
569 index += m_pipesLog2 * MaxNumOfBpp;
570 }
571 else
572 {
573 // 4 groups for "m_numPkrLog2 < 2" case
574 index += 4 * MaxNumOfBpp;
575
576 const UINT_32 dccPipePerPkr = 3;
577
578 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
579 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
580 }
581 }
582
583 const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
584 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
585 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]],
586 blkSizeLog2 + 1, // +1 for nibble offset
587 pIn->x,
588 pIn->y,
589 pIn->slice,
590 0);
591 const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
592 const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
593 const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
594 const UINT_32 blkIndex = (yb * pb) + xb;
595 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
596
597 pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
598 (blkIndex * (1 << blkSizeLog2)) +
599 ((blkOffset >> 1) ^ pipeXor);
600 }
601
602 /**
603 ************************************************************************************************************************
604 * Gfx11Lib::HwlInitGlobalParams
605 *
606 * @brief
607 * Initializes global parameters
608 *
609 * @return
610 * TRUE if all settings are valid
611 *
612 ************************************************************************************************************************
613 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)614 BOOL_32 Gfx11Lib::HwlInitGlobalParams(
615 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
616 {
617 BOOL_32 valid = TRUE;
618 GB_ADDR_CONFIG_GFX11 gbAddrConfig;
619
620 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
621
622 switch (gbAddrConfig.bits.NUM_PIPES)
623 {
624 case ADDR_CONFIG_1_PIPE:
625 m_pipes = 1;
626 m_pipesLog2 = 0;
627 break;
628 case ADDR_CONFIG_2_PIPE:
629 m_pipes = 2;
630 m_pipesLog2 = 1;
631 break;
632 case ADDR_CONFIG_4_PIPE:
633 m_pipes = 4;
634 m_pipesLog2 = 2;
635 break;
636 case ADDR_CONFIG_8_PIPE:
637 m_pipes = 8;
638 m_pipesLog2 = 3;
639 break;
640 case ADDR_CONFIG_16_PIPE:
641 m_pipes = 16;
642 m_pipesLog2 = 4;
643 break;
644 case ADDR_CONFIG_32_PIPE:
645 m_pipes = 32;
646 m_pipesLog2 = 5;
647 break;
648 case ADDR_CONFIG_64_PIPE:
649 m_pipes = 64;
650 m_pipesLog2 = 6;
651 break;
652 default:
653 ADDR_ASSERT_ALWAYS();
654 valid = FALSE;
655 break;
656 }
657
658 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
659 {
660 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
661 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
662 m_pipeInterleaveLog2 = 8;
663 break;
664 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
665 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
666 m_pipeInterleaveLog2 = 9;
667 break;
668 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
669 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
670 m_pipeInterleaveLog2 = 10;
671 break;
672 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
673 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
674 m_pipeInterleaveLog2 = 11;
675 break;
676 default:
677 ADDR_ASSERT_ALWAYS();
678 valid = FALSE;
679 break;
680 }
681
682 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
683 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
684 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
685 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
686
687 // These fields are deprecated on GFX11; they do nothing on HW.
688 m_maxCompFrag = 1;
689 m_maxCompFragLog2 = 0;
690
691 // Skip unaligned case
692 m_htileBaseIndex += MaxNumOfAA;
693
694 m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
695 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
696
697 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
698 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
699
700 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
701
702 if (m_numPkrLog2 >= 2)
703 {
704 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
705 m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
706 }
707
708 // There is no so-called VAR swizzle mode on GFX11 and instead there are 4 256KB swizzle modes. Here we treat 256KB
709 // swizzle mode as "VAR" swizzle mode for reusing exising facilities (e.g GetBlockSizeLog2()) provided by base class
710 m_blockVarSizeLog2 = 18;
711
712 if (valid)
713 {
714 InitEquationTable();
715 }
716
717 return valid;
718 }
719
720 /**
721 ************************************************************************************************************************
722 * Gfx11Lib::HwlConvertChipFamily
723 *
724 * @brief
725 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
726 * @return
727 * ChipFamily
728 ************************************************************************************************************************
729 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)730 ChipFamily Gfx11Lib::HwlConvertChipFamily(
731 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
732 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
733 {
734 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
735
736 switch (chipFamily)
737 {
738 case FAMILY_NV3:
739 if (ASICREV_IS_NAVI31_P(chipRevision))
740 {
741 }
742 if (ASICREV_IS_NAVI32_P(chipRevision))
743 {
744 }
745 if (ASICREV_IS_NAVI33_P(chipRevision))
746 {
747 }
748 break;
749
750 case FAMILY_GFX1150:
751 {
752 m_settings.isGfx1150 = 1;
753 }
754 break;
755 case FAMILY_PHX:
756 m_settings.isPhoenix = 1;
757 break;
758 default:
759 ADDR_ASSERT(!"Unknown chip family");
760 break;
761 }
762
763 m_configFlags.use32bppFor422Fmt = TRUE;
764
765 return family;
766 }
767
768 /**
769 ************************************************************************************************************************
770 * Gfx11Lib::GetBlk256SizeLog2
771 *
772 * @brief
773 * Get block 256 size
774 *
775 * @return
776 * N/A
777 ************************************************************************************************************************
778 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const779 void Gfx11Lib::GetBlk256SizeLog2(
780 AddrResourceType resourceType, ///< [in] Resource type
781 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
782 UINT_32 elemLog2, ///< [in] element size log2
783 UINT_32 numSamplesLog2, ///< [in] number of samples
784 Dim3d* pBlock ///< [out] block size
785 ) const
786 {
787 if (IsThin(resourceType, swizzleMode))
788 {
789 UINT_32 blockBits = 8 - elemLog2;
790
791 // On GFX11, Z and R modes are the same thing.
792 if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
793 {
794 blockBits -= numSamplesLog2;
795 }
796
797 pBlock->w = (blockBits >> 1) + (blockBits & 1);
798 pBlock->h = (blockBits >> 1);
799 pBlock->d = 0;
800 }
801 else
802 {
803 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
804
805 UINT_32 blockBits = 8 - elemLog2;
806
807 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
808 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
809 pBlock->h = (blockBits / 3);
810 }
811 }
812
813 /**
814 ************************************************************************************************************************
815 * Gfx11Lib::GetCompressedBlockSizeLog2
816 *
817 * @brief
818 * Get compress block size
819 *
820 * @return
821 * N/A
822 ************************************************************************************************************************
823 */
GetCompressedBlockSizeLog2(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const824 void Gfx11Lib::GetCompressedBlockSizeLog2(
825 Gfx11DataType dataType, ///< [in] Data type
826 AddrResourceType resourceType, ///< [in] Resource type
827 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
828 UINT_32 elemLog2, ///< [in] element size log2
829 UINT_32 numSamplesLog2, ///< [in] number of samples
830 Dim3d* pBlock ///< [out] block size
831 ) const
832 {
833 if (dataType == Gfx11DataColor)
834 {
835 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
836 }
837 else
838 {
839 ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
840 pBlock->w = 3;
841 pBlock->h = 3;
842 pBlock->d = 0;
843 }
844 }
845
846 /**
847 ************************************************************************************************************************
848 * Gfx11Lib::GetMetaOverlapLog2
849 *
850 * @brief
851 * Get meta block overlap
852 *
853 * @return
854 * N/A
855 ************************************************************************************************************************
856 */
GetMetaOverlapLog2(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const857 INT_32 Gfx11Lib::GetMetaOverlapLog2(
858 Gfx11DataType dataType, ///< [in] Data type
859 AddrResourceType resourceType, ///< [in] Resource type
860 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
861 UINT_32 elemLog2, ///< [in] element size log2
862 UINT_32 numSamplesLog2 ///< [in] number of samples
863 ) const
864 {
865 Dim3d compBlock;
866 Dim3d microBlock;
867
868 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
869 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, µBlock);
870
871 const INT_32 blkSizeLog2 = GetBlockSizeLog2(swizzleMode);
872 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
873 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
874 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
875 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
876 INT_32 overlap = numPipesLog2 - maxSizeLog2;
877
878 if (numPipesLog2 > 1)
879 {
880 overlap++;
881 }
882
883 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
884 if ((elemLog2 == 4) && (numSamplesLog2 == 3) && (blkSizeLog2 == 16))
885 {
886 overlap--;
887 }
888 overlap += 16 - blkSizeLog2;
889 overlap = Max(overlap, 0);
890 return overlap;
891 }
892
893 /**
894 ************************************************************************************************************************
895 * Gfx11Lib::Get3DMetaOverlapLog2
896 *
897 * @brief
898 * Get 3d meta block overlap
899 *
900 * @return
901 * N/A
902 ************************************************************************************************************************
903 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const904 INT_32 Gfx11Lib::Get3DMetaOverlapLog2(
905 AddrResourceType resourceType, ///< [in] Resource type
906 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
907 UINT_32 elemLog2 ///< [in] element size log2
908 ) const
909 {
910 Dim3d microBlock;
911 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, µBlock);
912
913 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
914
915 overlap++;
916
917 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
918 {
919 overlap = 0;
920 }
921 return overlap;
922 }
923
924 /**
925 ************************************************************************************************************************
926 * Gfx11Lib::GetPipeRotateAmount
927 *
928 * @brief
929 * Get pipe rotate amount
930 *
931 * @return
932 * Pipe rotate amount
933 ************************************************************************************************************************
934 */
935
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const936 INT_32 Gfx11Lib::GetPipeRotateAmount(
937 AddrResourceType resourceType, ///< [in] Resource type
938 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
939 ) const
940 {
941 INT_32 amount = 0;
942
943 if ((m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
944 {
945 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
946 1 : m_pipesLog2 - (m_numSaLog2 + 1);
947 }
948
949 return amount;
950 }
951
952 /**
953 ************************************************************************************************************************
954 * Gfx11Lib::GetMetaBlkSize
955 *
956 * @brief
957 * Get metadata block size
958 *
959 * @return
960 * Meta block size
961 ************************************************************************************************************************
962 */
GetMetaBlkSize(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const963 UINT_32 Gfx11Lib::GetMetaBlkSize(
964 Gfx11DataType dataType, ///< [in] Data type
965 AddrResourceType resourceType, ///< [in] Resource type
966 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
967 UINT_32 elemLog2, ///< [in] element size log2
968 UINT_32 numSamplesLog2, ///< [in] number of samples
969 BOOL_32 pipeAlign, ///< [in] pipe align
970 Dim3d* pBlock ///< [out] block size
971 ) const
972 {
973 INT_32 metablkSizeLog2;
974
975 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
976 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
977 const INT_32 compBlkSizeLog2 = (dataType == Gfx11DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
978 const INT_32 metaBlkSamplesLog2 = numSamplesLog2;
979 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
980 INT_32 numPipesLog2 = m_pipesLog2;
981
982 if (IsThin(resourceType, swizzleMode))
983 {
984 if ((pipeAlign == FALSE) ||
985 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
986 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
987 {
988 if (pipeAlign)
989 {
990 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
991 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
992 }
993 else
994 {
995 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
996 }
997 }
998 else
999 {
1000 if ((m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1001 {
1002 numPipesLog2++;
1003 }
1004
1005 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1006
1007 if (numPipesLog2 >= 4)
1008 {
1009 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1010
1011 // In 16Bpe 8xaa, we have an extra overlap bit
1012 if ((pipeRotateLog2 > 0) &&
1013 (elemLog2 == 4) &&
1014 (numSamplesLog2 == 3) &&
1015 (IsZOrderSwizzle(swizzleMode) ||
1016 IsRtOptSwizzle(swizzleMode) ||
1017 (GetEffectiveNumPipes() > 3)))
1018 {
1019 overlapLog2++;
1020 }
1021
1022 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1023 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1024 }
1025 else
1026 {
1027 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1028 }
1029
1030 if (dataType == Gfx11DataDepthStencil)
1031 {
1032 // For htile surfaces, pad meta block size to 2K * num_pipes
1033 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1034 }
1035
1036 /* This chunk is not part of upstream addrlib. See !28268 */
1037 const INT_32 compFragLog2 = numSamplesLog2;
1038
1039 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1040 {
1041 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1042
1043 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1044 }
1045 /* End of the non-upstream chunk. */
1046 }
1047
1048 const INT_32 metablkBitsLog2 =
1049 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1050 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1051 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1052 pBlock->d = 1;
1053 }
1054 else
1055 {
1056 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1057
1058 if (pipeAlign)
1059 {
1060 if ((m_pipesLog2 == m_numSaLog2 + 1) &&
1061 (m_pipesLog2 > 1) &&
1062 IsRbAligned(resourceType, swizzleMode))
1063 {
1064 numPipesLog2++;
1065 }
1066
1067 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1068
1069 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1070 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1071 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1072 }
1073 else
1074 {
1075 metablkSizeLog2 = 12;
1076 }
1077
1078 const INT_32 metablkBitsLog2 =
1079 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1080 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1081 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1082 pBlock->d = 1 << (metablkBitsLog2 / 3);
1083 }
1084
1085 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1086 }
1087
1088 /**
1089 ************************************************************************************************************************
1090 * Gfx11Lib::ConvertSwizzlePatternToEquation
1091 *
1092 * @brief
1093 * Convert swizzle pattern to equation.
1094 *
1095 * @return
1096 * N/A
1097 ************************************************************************************************************************
1098 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1099 VOID Gfx11Lib::ConvertSwizzlePatternToEquation(
1100 UINT_32 elemLog2, ///< [in] element bytes log2
1101 AddrResourceType rsrcType, ///< [in] resource type
1102 AddrSwizzleMode swMode, ///< [in] swizzle mode
1103 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor
1104 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1105 const
1106 {
1107 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
1108 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1109
1110 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
1111 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1112 memset(pEquation, 0, sizeof(ADDR_EQUATION));
1113 pEquation->numBits = blockSizeLog2;
1114 pEquation->numBitComponents = pPatInfo->maxItemCount;
1115 pEquation->stackedDepthSlices = FALSE;
1116
1117 for (UINT_32 i = 0; i < elemLog2; i++)
1118 {
1119 pEquation->addr[i].channel = 0;
1120 pEquation->addr[i].valid = 1;
1121 pEquation->addr[i].index = i;
1122 }
1123
1124 if (IsXor(swMode) == FALSE)
1125 {
1126 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1127 {
1128 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1129
1130 if (pSwizzle[i].x != 0)
1131 {
1132 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1133
1134 pEquation->addr[i].channel = 0;
1135 pEquation->addr[i].valid = 1;
1136 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1137 }
1138 else if (pSwizzle[i].y != 0)
1139 {
1140 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1141
1142 pEquation->addr[i].channel = 1;
1143 pEquation->addr[i].valid = 1;
1144 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1145 }
1146 else
1147 {
1148 ADDR_ASSERT(pSwizzle[i].z != 0);
1149 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1150
1151 pEquation->addr[i].channel = 2;
1152 pEquation->addr[i].valid = 1;
1153 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1154 }
1155
1156 pEquation->xor1[i].value = 0;
1157 pEquation->xor2[i].value = 0;
1158 }
1159 }
1160 else if (IsThin(rsrcType, swMode))
1161 {
1162 Dim3d dim;
1163 ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1164
1165 const UINT_32 blkXLog2 = Log2(dim.w);
1166 const UINT_32 blkYLog2 = Log2(dim.h);
1167 const UINT_32 blkXMask = dim.w - 1;
1168 const UINT_32 blkYMask = dim.h - 1;
1169
1170 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1171 UINT_32 xMask = 0;
1172 UINT_32 yMask = 0;
1173 UINT_32 bMask = (1 << elemLog2) - 1;
1174
1175 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1176 {
1177 if (IsPow2(pSwizzle[i].value))
1178 {
1179 if (pSwizzle[i].x != 0)
1180 {
1181 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1182 xMask |= pSwizzle[i].x;
1183
1184 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1185
1186 ADDR_ASSERT(xLog2 < blkXLog2);
1187
1188 pEquation->addr[i].channel = 0;
1189 pEquation->addr[i].valid = 1;
1190 pEquation->addr[i].index = xLog2 + elemLog2;
1191 }
1192 else
1193 {
1194 ADDR_ASSERT(pSwizzle[i].y != 0);
1195 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1196 yMask |= pSwizzle[i].y;
1197
1198 pEquation->addr[i].channel = 1;
1199 pEquation->addr[i].valid = 1;
1200 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1201
1202 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1203 }
1204
1205 swizzle[i].value = 0;
1206 bMask |= 1 << i;
1207 }
1208 else
1209 {
1210 if (pSwizzle[i].z != 0)
1211 {
1212 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1213
1214 pEquation->xor2[i].channel = 2;
1215 pEquation->xor2[i].valid = 1;
1216 pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1217 }
1218
1219 swizzle[i].x = pSwizzle[i].x;
1220 swizzle[i].y = pSwizzle[i].y;
1221 swizzle[i].z = swizzle[i].s = 0;
1222
1223 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1224
1225 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1226
1227 if (xHi != 0)
1228 {
1229 ADDR_ASSERT(IsPow2(xHi));
1230 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1231
1232 pEquation->xor1[i].channel = 0;
1233 pEquation->xor1[i].valid = 1;
1234 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1235
1236 swizzle[i].x &= blkXMask;
1237 }
1238
1239 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1240
1241 if (yHi != 0)
1242 {
1243 ADDR_ASSERT(IsPow2(yHi));
1244
1245 if (xHi == 0)
1246 {
1247 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1248 pEquation->xor1[i].channel = 1;
1249 pEquation->xor1[i].valid = 1;
1250 pEquation->xor1[i].index = Log2(yHi);
1251 }
1252 else
1253 {
1254 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1255 pEquation->xor2[i].channel = 1;
1256 pEquation->xor2[i].valid = 1;
1257 pEquation->xor2[i].index = Log2(yHi);
1258 }
1259
1260 swizzle[i].y &= blkYMask;
1261 }
1262
1263 if (swizzle[i].value == 0)
1264 {
1265 bMask |= 1 << i;
1266 }
1267 }
1268 }
1269
1270 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1271 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1272
1273 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1274
1275 while (bMask != blockMask)
1276 {
1277 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1278 {
1279 if ((bMask & (1 << i)) == 0)
1280 {
1281 if (IsPow2(swizzle[i].value))
1282 {
1283 if (swizzle[i].x != 0)
1284 {
1285 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1286 xMask |= swizzle[i].x;
1287
1288 const UINT_32 xLog2 = Log2(swizzle[i].x);
1289
1290 ADDR_ASSERT(xLog2 < blkXLog2);
1291
1292 pEquation->addr[i].channel = 0;
1293 pEquation->addr[i].valid = 1;
1294 pEquation->addr[i].index = xLog2 + elemLog2;
1295 }
1296 else
1297 {
1298 ADDR_ASSERT(swizzle[i].y != 0);
1299 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1300 yMask |= swizzle[i].y;
1301
1302 pEquation->addr[i].channel = 1;
1303 pEquation->addr[i].valid = 1;
1304 pEquation->addr[i].index = Log2(swizzle[i].y);
1305
1306 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1307 }
1308
1309 swizzle[i].value = 0;
1310 bMask |= 1 << i;
1311 }
1312 else
1313 {
1314 const UINT_32 x = swizzle[i].x & xMask;
1315 const UINT_32 y = swizzle[i].y & yMask;
1316
1317 if (x != 0)
1318 {
1319 ADDR_ASSERT(IsPow2(x));
1320
1321 if (pEquation->xor1[i].value == 0)
1322 {
1323 pEquation->xor1[i].channel = 0;
1324 pEquation->xor1[i].valid = 1;
1325 pEquation->xor1[i].index = Log2(x) + elemLog2;
1326 }
1327 else
1328 {
1329 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1330 pEquation->xor2[i].channel = 0;
1331 pEquation->xor2[i].valid = 1;
1332 pEquation->xor2[i].index = Log2(x) + elemLog2;
1333 }
1334 }
1335
1336 if (y != 0)
1337 {
1338 ADDR_ASSERT(IsPow2(y));
1339
1340 if (pEquation->xor1[i].value == 0)
1341 {
1342 pEquation->xor1[i].channel = 1;
1343 pEquation->xor1[i].valid = 1;
1344 pEquation->xor1[i].index = Log2(y);
1345 }
1346 else
1347 {
1348 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1349 pEquation->xor2[i].channel = 1;
1350 pEquation->xor2[i].valid = 1;
1351 pEquation->xor2[i].index = Log2(y);
1352 }
1353 }
1354
1355 swizzle[i].x &= ~x;
1356 swizzle[i].y &= ~y;
1357 }
1358 }
1359 }
1360 }
1361
1362 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1363 }
1364 else
1365 {
1366 const Dim3d& blkDim = (blockSizeLog2 == Log2Size256K) ?
1367 Block256K_Log2_3d[elemLog2] :
1368 ((blockSizeLog2 == Log2Size4K) ? Block4K_Log2_3d[elemLog2] : Block64K_Log2_3d[elemLog2]);
1369
1370 const UINT_32 blkXLog2 = blkDim.w;
1371 const UINT_32 blkYLog2 = blkDim.h;
1372 const UINT_32 blkZLog2 = blkDim.d;
1373 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1374 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1375 const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1376
1377 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1378 UINT_32 xMask = 0;
1379 UINT_32 yMask = 0;
1380 UINT_32 zMask = 0;
1381 UINT_32 bMask = (1 << elemLog2) - 1;
1382
1383 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1384 {
1385 if (IsPow2(pSwizzle[i].value))
1386 {
1387 if (pSwizzle[i].x != 0)
1388 {
1389 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1390 xMask |= pSwizzle[i].x;
1391
1392 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1393
1394 ADDR_ASSERT(xLog2 < blkXLog2);
1395
1396 pEquation->addr[i].channel = 0;
1397 pEquation->addr[i].valid = 1;
1398 pEquation->addr[i].index = xLog2 + elemLog2;
1399 }
1400 else if (pSwizzle[i].y != 0)
1401 {
1402 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1403 yMask |= pSwizzle[i].y;
1404
1405 pEquation->addr[i].channel = 1;
1406 pEquation->addr[i].valid = 1;
1407 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1408
1409 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1410 }
1411 else
1412 {
1413 ADDR_ASSERT(pSwizzle[i].z != 0);
1414 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1415 zMask |= pSwizzle[i].z;
1416
1417 pEquation->addr[i].channel = 2;
1418 pEquation->addr[i].valid = 1;
1419 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1420
1421 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1422 }
1423
1424 swizzle[i].value = 0;
1425 bMask |= 1 << i;
1426 }
1427 else
1428 {
1429 swizzle[i].x = pSwizzle[i].x;
1430 swizzle[i].y = pSwizzle[i].y;
1431 swizzle[i].z = pSwizzle[i].z;
1432 swizzle[i].s = 0;
1433
1434 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1435
1436 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1437 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1438 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1439
1440 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1441
1442 if (xHi != 0)
1443 {
1444 ADDR_ASSERT(IsPow2(xHi));
1445 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1446
1447 pEquation->xor1[i].channel = 0;
1448 pEquation->xor1[i].valid = 1;
1449 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1450
1451 swizzle[i].x &= blkXMask;
1452 }
1453
1454 if (yHi != 0)
1455 {
1456 ADDR_ASSERT(IsPow2(yHi));
1457
1458 if (pEquation->xor1[i].value == 0)
1459 {
1460 pEquation->xor1[i].channel = 1;
1461 pEquation->xor1[i].valid = 1;
1462 pEquation->xor1[i].index = Log2(yHi);
1463 }
1464 else
1465 {
1466 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1467 pEquation->xor2[i].channel = 1;
1468 pEquation->xor2[i].valid = 1;
1469 pEquation->xor2[i].index = Log2(yHi);
1470 }
1471
1472 swizzle[i].y &= blkYMask;
1473 }
1474
1475 if (zHi != 0)
1476 {
1477 ADDR_ASSERT(IsPow2(zHi));
1478
1479 if (pEquation->xor1[i].value == 0)
1480 {
1481 pEquation->xor1[i].channel = 2;
1482 pEquation->xor1[i].valid = 1;
1483 pEquation->xor1[i].index = Log2(zHi);
1484 }
1485 else
1486 {
1487 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1488 pEquation->xor2[i].channel = 2;
1489 pEquation->xor2[i].valid = 1;
1490 pEquation->xor2[i].index = Log2(zHi);
1491 }
1492
1493 swizzle[i].z &= blkZMask;
1494 }
1495
1496 if (swizzle[i].value == 0)
1497 {
1498 bMask |= 1 << i;
1499 }
1500 }
1501 }
1502
1503 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1504 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1505
1506 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1507
1508 while (bMask != blockMask)
1509 {
1510 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1511 {
1512 if ((bMask & (1 << i)) == 0)
1513 {
1514 if (IsPow2(swizzle[i].value))
1515 {
1516 if (swizzle[i].x != 0)
1517 {
1518 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1519 xMask |= swizzle[i].x;
1520
1521 const UINT_32 xLog2 = Log2(swizzle[i].x);
1522
1523 ADDR_ASSERT(xLog2 < blkXLog2);
1524
1525 pEquation->addr[i].channel = 0;
1526 pEquation->addr[i].valid = 1;
1527 pEquation->addr[i].index = xLog2 + elemLog2;
1528 }
1529 else if (swizzle[i].y != 0)
1530 {
1531 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1532 yMask |= swizzle[i].y;
1533
1534 pEquation->addr[i].channel = 1;
1535 pEquation->addr[i].valid = 1;
1536 pEquation->addr[i].index = Log2(swizzle[i].y);
1537
1538 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1539 }
1540 else
1541 {
1542 ADDR_ASSERT(swizzle[i].z != 0);
1543 ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1544 zMask |= swizzle[i].z;
1545
1546 pEquation->addr[i].channel = 2;
1547 pEquation->addr[i].valid = 1;
1548 pEquation->addr[i].index = Log2(swizzle[i].z);
1549
1550 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1551 }
1552
1553 swizzle[i].value = 0;
1554 bMask |= 1 << i;
1555 }
1556 else
1557 {
1558 const UINT_32 x = swizzle[i].x & xMask;
1559 const UINT_32 y = swizzle[i].y & yMask;
1560 const UINT_32 z = swizzle[i].z & zMask;
1561
1562 if (x != 0)
1563 {
1564 ADDR_ASSERT(IsPow2(x));
1565
1566 if (pEquation->xor1[i].value == 0)
1567 {
1568 pEquation->xor1[i].channel = 0;
1569 pEquation->xor1[i].valid = 1;
1570 pEquation->xor1[i].index = Log2(x) + elemLog2;
1571 }
1572 else
1573 {
1574 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1575 pEquation->xor2[i].channel = 0;
1576 pEquation->xor2[i].valid = 1;
1577 pEquation->xor2[i].index = Log2(x) + elemLog2;
1578 }
1579 }
1580
1581 if (y != 0)
1582 {
1583 ADDR_ASSERT(IsPow2(y));
1584
1585 if (pEquation->xor1[i].value == 0)
1586 {
1587 pEquation->xor1[i].channel = 1;
1588 pEquation->xor1[i].valid = 1;
1589 pEquation->xor1[i].index = Log2(y);
1590 }
1591 else
1592 {
1593 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1594 pEquation->xor2[i].channel = 1;
1595 pEquation->xor2[i].valid = 1;
1596 pEquation->xor2[i].index = Log2(y);
1597 }
1598 }
1599
1600 if (z != 0)
1601 {
1602 ADDR_ASSERT(IsPow2(z));
1603
1604 if (pEquation->xor1[i].value == 0)
1605 {
1606 pEquation->xor1[i].channel = 2;
1607 pEquation->xor1[i].valid = 1;
1608 pEquation->xor1[i].index = Log2(z);
1609 }
1610 else
1611 {
1612 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1613 pEquation->xor2[i].channel = 2;
1614 pEquation->xor2[i].valid = 1;
1615 pEquation->xor2[i].index = Log2(z);
1616 }
1617 }
1618
1619 swizzle[i].x &= ~x;
1620 swizzle[i].y &= ~y;
1621 swizzle[i].z &= ~z;
1622 }
1623 }
1624 }
1625 }
1626
1627 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1628 }
1629 }
1630
1631 /**
1632 ************************************************************************************************************************
1633 * Gfx11Lib::InitEquationTable
1634 *
1635 * @brief
1636 * Initialize Equation table.
1637 *
1638 * @return
1639 * N/A
1640 ************************************************************************************************************************
1641 */
InitEquationTable()1642 VOID Gfx11Lib::InitEquationTable()
1643 {
1644 memset(m_equationTable, 0, sizeof(m_equationTable));
1645
1646 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1647 {
1648 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1649
1650 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1651 {
1652 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1653
1654 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1655 {
1656 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1657 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1658
1659 if (pPatInfo != NULL)
1660 {
1661 ADDR_ASSERT(IsValidSwMode(swMode));
1662
1663 if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
1664 {
1665 ADDR_EQUATION equation = {};
1666
1667 // Passing in pPatInfo to get the addr equation
1668 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1669
1670 equationIndex = m_numEquations;
1671 ADDR_ASSERT(equationIndex < EquationTableSize);
1672 // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
1673 m_equationTable[equationIndex] = equation;
1674 // Increment m_numEquations
1675 m_numEquations++;
1676 }
1677 else // There is no equationIndex
1678 {
1679 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X
1680 ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
1681 ADDR_ASSERT(rsrcType == ADDR_RSRC_TEX_3D);
1682 ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
1683 }
1684 }
1685
1686 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1687 }
1688 }
1689 }
1690 }
1691
1692 /**
1693 ************************************************************************************************************************
1694 * Gfx11Lib::HwlGetEquationIndex
1695 *
1696 * @brief
1697 * Interface function stub of GetEquationIndex
1698 *
1699 * @return
1700 * ADDR_E_RETURNCODE
1701 ************************************************************************************************************************
1702 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const1703 UINT_32 Gfx11Lib::HwlGetEquationIndex(
1704 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
1705 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
1706 ) const
1707 {
1708 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1709
1710 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1711 (pIn->resourceType == ADDR_RSRC_TEX_3D))
1712 {
1713 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1714 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
1715 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
1716
1717 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1718 }
1719
1720 if (pOut->pMipInfo != NULL)
1721 {
1722 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1723 {
1724 pOut->pMipInfo[i].equationIndex = equationIdx;
1725 }
1726 }
1727
1728 return equationIdx;
1729 }
1730
1731 /**
1732 ************************************************************************************************************************
1733 * Gfx11Lib::GetValidDisplaySwizzleModes
1734 *
1735 * @brief
1736 * Get valid swizzle modes mask for displayable surface
1737 *
1738 * @return
1739 * Valid swizzle modes mask for displayable surface
1740 ************************************************************************************************************************
1741 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const1742 UINT_32 Gfx11Lib::GetValidDisplaySwizzleModes(
1743 UINT_32 bpp
1744 ) const
1745 {
1746 UINT_32 swModeMask = 0;
1747
1748 if (bpp <= 64)
1749 {
1750 const ChipFamily family = GetChipFamily();
1751
1752 swModeMask = Dcn32SwModeMask;
1753
1754 if (false
1755 || (m_settings.isPhoenix)
1756 || (m_settings.isGfx1150)
1757 )
1758 {
1759 // Not all GPUs support displaying with 256kB swizzle modes.
1760 swModeMask &= ~((1u << ADDR_SW_256KB_D_X) |
1761 (1u << ADDR_SW_256KB_R_X));
1762 }
1763 }
1764
1765 return swModeMask;
1766 }
1767
1768 /**
1769 ************************************************************************************************************************
1770 * Gfx11Lib::IsValidDisplaySwizzleMode
1771 *
1772 * @brief
1773 * Check if a swizzle mode is supported by display engine
1774 *
1775 * @return
1776 * TRUE is swizzle mode is supported by display engine
1777 ************************************************************************************************************************
1778 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const1779 BOOL_32 Gfx11Lib::IsValidDisplaySwizzleMode(
1780 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
1781 ) const
1782 {
1783 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
1784
1785 return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
1786 }
1787
1788 /**
1789 ************************************************************************************************************************
1790 * Gfx11Lib::GetMaxNumMipsInTail
1791 *
1792 * @brief
1793 * Return max number of mips in tails
1794 *
1795 * @return
1796 * Max number of mips in tails
1797 ************************************************************************************************************************
1798 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const1799 UINT_32 Gfx11Lib::GetMaxNumMipsInTail(
1800 UINT_32 blockSizeLog2, ///< block size log2
1801 BOOL_32 isThin ///< is thin or thick
1802 ) const
1803 {
1804 UINT_32 effectiveLog2 = blockSizeLog2;
1805
1806 if (isThin == FALSE)
1807 {
1808 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
1809 }
1810
1811 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
1812 }
1813
1814 /**
1815 ************************************************************************************************************************
1816 * Gfx11Lib::HwlComputePipeBankXor
1817 *
1818 * @brief
1819 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
1820 *
1821 * @return
1822 * PipeBankXor value
1823 ************************************************************************************************************************
1824 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const1825 ADDR_E_RETURNCODE Gfx11Lib::HwlComputePipeBankXor(
1826 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
1827 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
1828 ) const
1829 {
1830 if (IsNonPrtXor(pIn->swizzleMode))
1831 {
1832 pOut->pipeBankXor = 0;
1833 }
1834 else
1835 {
1836 pOut->pipeBankXor = 0;
1837 }
1838
1839 return ADDR_OK;
1840 }
1841
1842 /**
1843 ************************************************************************************************************************
1844 * Gfx11Lib::HwlComputeSlicePipeBankXor
1845 *
1846 * @brief
1847 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
1848 *
1849 * @return
1850 * PipeBankXor value
1851 ************************************************************************************************************************
1852 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const1853 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSlicePipeBankXor(
1854 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
1855 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
1856 ) const
1857 {
1858 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1859
1860 if (IsNonPrtXor(pIn->swizzleMode))
1861 {
1862 if (pIn->bpe == 0)
1863 {
1864 ADDR_ASSERT_ALWAYS();
1865
1866 // Require a valid bytes-per-element value passed from client...
1867 returnCode = ADDR_INVALIDPARAMS;
1868 }
1869 else
1870 {
1871 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
1872 pIn->resourceType,
1873 Log2(pIn->bpe >> 3),
1874 1);
1875
1876 if (pPatInfo != NULL)
1877 {
1878 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
1879 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1880
1881 const UINT_32 pipeBankXorOffset =
1882 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
1883 GetBlockSizeLog2(pIn->swizzleMode),
1884 0,
1885 0,
1886 pIn->slice,
1887 0);
1888
1889 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
1890
1891 // Should have no bit set under pipe interleave
1892 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
1893
1894 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
1895 }
1896 else
1897 {
1898 // Should never come here...
1899 ADDR_NOT_IMPLEMENTED();
1900
1901 returnCode = ADDR_NOTSUPPORTED;
1902 }
1903 }
1904 }
1905 else
1906 {
1907 pOut->pipeBankXor = 0;
1908 }
1909
1910 return returnCode;
1911 }
1912
1913 /**
1914 ************************************************************************************************************************
1915 * Gfx11Lib::HwlComputeSubResourceOffsetForSwizzlePattern
1916 *
1917 * @brief
1918 * Compute sub resource offset to support swizzle pattern
1919 *
1920 * @return
1921 * Offset
1922 ************************************************************************************************************************
1923 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const1924 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
1925 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
1926 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
1927 ) const
1928 {
1929 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
1930
1931 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
1932
1933 return ADDR_OK;
1934 }
1935
1936 /**
1937 ************************************************************************************************************************
1938 * Gfx11Lib::HwlComputeNonBlockCompressedView
1939 *
1940 * @brief
1941 * Compute non-block-compressed view for a given mipmap level/slice.
1942 *
1943 * @return
1944 * ADDR_E_RETURNCODE
1945 ************************************************************************************************************************
1946 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const1947 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeNonBlockCompressedView(
1948 const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure
1949 ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure
1950 ) const
1951 {
1952 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1953
1954 if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE)
1955 {
1956 // Only thin swizzle mode can have a NonBC view...
1957 returnCode = ADDR_INVALIDPARAMS;
1958 }
1959 else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
1960 ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
1961 {
1962 // Only support BC1~BC7, ASTC, or ETC2 for now...
1963 returnCode = ADDR_NOTSUPPORTED;
1964 }
1965 else
1966 {
1967 UINT_32 bcWidth, bcHeight;
1968 UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
1969
1970 ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
1971 infoIn.flags = pIn->flags;
1972 infoIn.swizzleMode = pIn->swizzleMode;
1973 infoIn.resourceType = pIn->resourceType;
1974 infoIn.bpp = bpp;
1975 infoIn.width = RoundUpQuotient(pIn->width, bcWidth);
1976 infoIn.height = RoundUpQuotient(pIn->height, bcHeight);
1977 infoIn.numSlices = pIn->numSlices;
1978 infoIn.numMipLevels = pIn->numMipLevels;
1979 infoIn.numSamples = 1;
1980 infoIn.numFrags = 1;
1981
1982 ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
1983
1984 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
1985 infoOut.pMipInfo = mipInfo;
1986
1987 const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
1988
1989 if (tiled)
1990 {
1991 returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
1992 }
1993 else
1994 {
1995 returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
1996 }
1997
1998 if (returnCode == ADDR_OK)
1999 {
2000 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
2001 subOffIn.swizzleMode = infoIn.swizzleMode;
2002 subOffIn.resourceType = infoIn.resourceType;
2003 subOffIn.slice = pIn->slice;
2004 subOffIn.sliceSize = infoOut.sliceSize;
2005 subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
2006 subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset;
2007
2008 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
2009
2010 // For any mipmap level, move nonBc view base address by offset
2011 HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
2012 pOut->offset = subOffOut.offset;
2013
2014 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
2015 slicePbXorIn.bpe = infoIn.bpp;
2016 slicePbXorIn.swizzleMode = infoIn.swizzleMode;
2017 slicePbXorIn.resourceType = infoIn.resourceType;
2018 slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2019 slicePbXorIn.slice = pIn->slice;
2020
2021 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2022
2023 // For any mipmap level, nonBc view should use computed pbXor
2024 HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2025 pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2026
2027 const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2028 const UINT_32 requestMipWidth = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth);
2029 const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight);
2030
2031 if (inTail)
2032 {
2033 // For mipmap level that is in mip tail block, hack a lot of things...
2034 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2035 // are fit in tail block:
2036
2037 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2038 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2039
2040 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2041 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2042
2043 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2044 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2045
2046 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2047 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2048 }
2049 // This check should cover at least mipId == 0
2050 else if (requestMipWidth << pIn->mipId == infoIn.width)
2051 {
2052 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2053 // - only one mipmap level and mipId = 0
2054 pOut->mipId = 0;
2055 pOut->numMipLevels = 1;
2056
2057 // (mip0) width = requestMipWidth
2058 pOut->unalignedWidth = requestMipWidth;
2059
2060 // (mip0) height = requestMipHeight
2061 pOut->unalignedHeight = requestMipHeight;
2062 }
2063 else
2064 {
2065 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2066 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2067 // because single mip view may have different pitch value than original (multiple) mip view...
2068 // A simple case would be:
2069 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2070 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2071 // mip0 width = 0x101/mip1 width = 0x80
2072 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2073 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2074
2075 // - 2 levels and mipId = 1
2076 pOut->mipId = 1;
2077 pOut->numMipLevels = 2;
2078
2079 const UINT_32 upperMipWidth = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth);
2080 const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight);
2081
2082 const BOOL_32 needToAvoidInTail =
2083 tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2084 TRUE : FALSE;
2085
2086 const UINT_32 hwMipWidth = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2087 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2088
2089 const BOOL_32 needExtraWidth =
2090 ((upperMipWidth < requestMipWidth * 2) ||
2091 ((upperMipWidth == requestMipWidth * 2) &&
2092 ((needToAvoidInTail == TRUE) ||
2093 (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2094
2095 const BOOL_32 needExtraHeight =
2096 ((upperMipHeight < requestMipHeight * 2) ||
2097 ((upperMipHeight == requestMipHeight * 2) &&
2098 ((needToAvoidInTail == TRUE) ||
2099 (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2100
2101 // (mip0) width = requestLastMipLevelWidth
2102 pOut->unalignedWidth = upperMipWidth + (needExtraWidth ? 1: 0);
2103
2104 // (mip0) height = requestLastMipLevelHeight
2105 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2106 }
2107
2108 // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2109 ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2110 // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2111 ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2112 }
2113 }
2114
2115 return returnCode;
2116 }
2117
2118 /**
2119 ************************************************************************************************************************
2120 * Gfx11Lib::ValidateNonSwModeParams
2121 *
2122 * @brief
2123 * Validate compute surface info params except swizzle mode
2124 *
2125 * @return
2126 * TRUE if parameters are valid, FALSE otherwise
2127 ************************************************************************************************************************
2128 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2129 BOOL_32 Gfx11Lib::ValidateNonSwModeParams(
2130 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2131 {
2132 BOOL_32 valid = TRUE;
2133
2134 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8))
2135 {
2136 ADDR_ASSERT_ALWAYS();
2137 valid = FALSE;
2138 }
2139 else if (pIn->flags.fmask == 1)
2140 {
2141 // There is no FMASK for GFX11 ASICs
2142 ADDR_ASSERT_ALWAYS();
2143 valid = FALSE;
2144 }
2145 else if (pIn->numSamples > 8)
2146 {
2147 // There is no EQAA support for GFX11 ASICs, so the max number of sample is 8
2148 ADDR_ASSERT_ALWAYS();
2149 valid = FALSE;
2150 }
2151 else if ((pIn->numFrags != 0) && (pIn->numSamples != pIn->numFrags))
2152 {
2153 // There is no EQAA support for GFX11 ASICs, so the number of sample has to be same as number of fragment
2154 ADDR_ASSERT_ALWAYS();
2155 valid = FALSE;
2156 }
2157
2158 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2159 const AddrResourceType rsrcType = pIn->resourceType;
2160 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2161 const BOOL_32 msaa = (pIn->numSamples > 1);
2162 const BOOL_32 display = flags.display;
2163 const BOOL_32 tex3d = IsTex3d(rsrcType);
2164 const BOOL_32 tex2d = IsTex2d(rsrcType);
2165 const BOOL_32 tex1d = IsTex1d(rsrcType);
2166 const BOOL_32 stereo = flags.qbStereo;
2167
2168 // Resource type check
2169 if (tex1d)
2170 {
2171 if (msaa || display || stereo)
2172 {
2173 ADDR_ASSERT_ALWAYS();
2174 valid = FALSE;
2175 }
2176 }
2177 else if (tex2d)
2178 {
2179 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2180 {
2181 ADDR_ASSERT_ALWAYS();
2182 valid = FALSE;
2183 }
2184 }
2185 else if (tex3d)
2186 {
2187 if (msaa || display || stereo)
2188 {
2189 ADDR_ASSERT_ALWAYS();
2190 valid = FALSE;
2191 }
2192 }
2193 else
2194 {
2195 ADDR_ASSERT_ALWAYS();
2196 valid = FALSE;
2197 }
2198
2199 return valid;
2200 }
2201
2202 /**
2203 ************************************************************************************************************************
2204 * Gfx11Lib::ValidateSwModeParams
2205 *
2206 * @brief
2207 * Validate compute surface info related to swizzle mode
2208 *
2209 * @return
2210 * TRUE if parameters are valid, FALSE otherwise
2211 ************************************************************************************************************************
2212 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2213 BOOL_32 Gfx11Lib::ValidateSwModeParams(
2214 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2215 {
2216 BOOL_32 valid = TRUE;
2217
2218 if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2219 {
2220 ADDR_ASSERT_ALWAYS();
2221 valid = FALSE;
2222 }
2223 else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2224 {
2225 ADDR_ASSERT_ALWAYS();
2226 valid = FALSE;
2227 }
2228
2229 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2230 const AddrResourceType rsrcType = pIn->resourceType;
2231 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2232 const BOOL_32 msaa = (pIn->numSamples > 1);
2233 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2234 const BOOL_32 color = flags.color;
2235 const BOOL_32 display = flags.display;
2236 const BOOL_32 tex3d = IsTex3d(rsrcType);
2237 const BOOL_32 tex2d = IsTex2d(rsrcType);
2238 const BOOL_32 tex1d = IsTex1d(rsrcType);
2239 const BOOL_32 thin3d = flags.view3dAs2dArray;
2240 const BOOL_32 linear = IsLinear(swizzle);
2241 const BOOL_32 blk256B = IsBlock256b(swizzle);
2242 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2243 const BOOL_32 prt = flags.prt;
2244
2245 // Misc check
2246 if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numSamples)))
2247 {
2248 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2249 ADDR_ASSERT_ALWAYS();
2250 valid = FALSE;
2251 }
2252
2253 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2254 {
2255 ADDR_ASSERT_ALWAYS();
2256 valid = FALSE;
2257 }
2258
2259 if ((pIn->bpp == 96) && (linear == FALSE))
2260 {
2261 ADDR_ASSERT_ALWAYS();
2262 valid = FALSE;
2263 }
2264
2265 const UINT_32 swizzleMask = 1 << swizzle;
2266
2267 // Resource type check
2268 if (tex1d)
2269 {
2270 if ((swizzleMask & Gfx11Rsrc1dSwModeMask) == 0)
2271 {
2272 ADDR_ASSERT_ALWAYS();
2273 valid = FALSE;
2274 }
2275 }
2276 else if (tex2d)
2277 {
2278 if ((swizzleMask & Gfx11Rsrc2dSwModeMask) == 0)
2279 {
2280 ADDR_ASSERT_ALWAYS();
2281 valid = FALSE;
2282 }
2283 else if (prt && ((swizzleMask & Gfx11Rsrc2dPrtSwModeMask) == 0))
2284 {
2285 ADDR_ASSERT_ALWAYS();
2286 valid = FALSE;
2287 }
2288 }
2289 else if (tex3d)
2290 {
2291 if (((swizzleMask & Gfx11Rsrc3dSwModeMask) == 0) ||
2292 (prt && ((swizzleMask & Gfx11Rsrc3dPrtSwModeMask) == 0)) ||
2293 (thin3d && ((swizzleMask & Gfx11Rsrc3dViewAs2dSwModeMask) == 0)))
2294 {
2295 ADDR_ASSERT_ALWAYS();
2296 valid = FALSE;
2297 }
2298 }
2299
2300 // Swizzle type check
2301 if (linear)
2302 {
2303 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2304 {
2305 ADDR_ASSERT_ALWAYS();
2306 valid = FALSE;
2307 }
2308 }
2309 else if (IsZOrderSwizzle(swizzle))
2310 {
2311 if ((pIn->bpp > 64) ||
2312 (msaa && (color || (pIn->bpp > 32))) ||
2313 ElemLib::IsBlockCompressed(pIn->format) ||
2314 ElemLib::IsMacroPixelPacked(pIn->format))
2315 {
2316 ADDR_ASSERT_ALWAYS();
2317 valid = FALSE;
2318 }
2319 }
2320 else if (IsStandardSwizzle(rsrcType, swizzle))
2321 {
2322 if (zbuffer || msaa)
2323 {
2324 ADDR_ASSERT_ALWAYS();
2325 valid = FALSE;
2326 }
2327 }
2328 else if (IsDisplaySwizzle(rsrcType, swizzle))
2329 {
2330 if (zbuffer || msaa)
2331 {
2332 ADDR_ASSERT_ALWAYS();
2333 valid = FALSE;
2334 }
2335 }
2336 else if (IsRtOptSwizzle(swizzle))
2337 {
2338 if (zbuffer)
2339 {
2340 ADDR_ASSERT_ALWAYS();
2341 valid = FALSE;
2342 }
2343 }
2344 else
2345 {
2346 ADDR_ASSERT_ALWAYS();
2347 valid = FALSE;
2348 }
2349
2350 // Block type check
2351 if (blk256B)
2352 {
2353 if (zbuffer || tex3d || msaa)
2354 {
2355 ADDR_ASSERT_ALWAYS();
2356 valid = FALSE;
2357 }
2358 }
2359
2360 return valid;
2361 }
2362
2363 /**
2364 ************************************************************************************************************************
2365 * Gfx11Lib::HwlComputeSurfaceInfoSanityCheck
2366 *
2367 * @brief
2368 * Compute surface info sanity check
2369 *
2370 * @return
2371 * Offset
2372 ************************************************************************************************************************
2373 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2374 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoSanityCheck(
2375 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2376 ) const
2377 {
2378 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2379 }
2380
2381 /**
2382 ************************************************************************************************************************
2383 * Gfx11Lib::HwlGetPreferredSurfaceSetting
2384 *
2385 * @brief
2386 * Internal function to get suggested surface information for cliet to use
2387 *
2388 * @return
2389 * ADDR_E_RETURNCODE
2390 ************************************************************************************************************************
2391 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2392 ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
2393 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2394 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2395 ) const
2396 {
2397 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2398
2399 if (pIn->flags.fmask)
2400 {
2401 // There is no FMASK for GFX11 ASICs.
2402 ADDR_ASSERT_ALWAYS();
2403
2404 returnCode = ADDR_INVALIDPARAMS;
2405 }
2406 else
2407 {
2408 UINT_32 bpp = pIn->bpp;
2409 UINT_32 width = Max(pIn->width, 1u);
2410 UINT_32 height = Max(pIn->height, 1u);
2411
2412 // Set format to INVALID will skip this conversion
2413 if (pIn->format != ADDR_FMT_INVALID)
2414 {
2415 ElemMode elemMode = ADDR_UNCOMPRESSED;
2416 UINT_32 expandX, expandY;
2417
2418 // Get compression/expansion factors and element mode which indicates compression/expansion
2419 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2420 &elemMode,
2421 &expandX,
2422 &expandY);
2423
2424 UINT_32 basePitch = 0;
2425 GetElemLib()->AdjustSurfaceInfo(elemMode,
2426 expandX,
2427 expandY,
2428 &bpp,
2429 &basePitch,
2430 &width,
2431 &height);
2432 }
2433
2434 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2435 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2436 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2437 const BOOL_32 msaa = numSamples > 1;
2438
2439 // Pre sanity check on non swizzle mode parameters
2440 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2441 localIn.flags = pIn->flags;
2442 localIn.resourceType = pIn->resourceType;
2443 localIn.format = pIn->format;
2444 localIn.bpp = bpp;
2445 localIn.width = width;
2446 localIn.height = height;
2447 localIn.numSlices = numSlices;
2448 localIn.numMipLevels = numMipLevels;
2449 localIn.numSamples = numSamples;
2450 localIn.numFrags = numSamples;
2451
2452 if (ValidateNonSwModeParams(&localIn))
2453 {
2454 // Forbid swizzle mode(s) by client setting
2455 ADDR2_SWMODE_SET allowedSwModeSet = {};
2456 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx11LinearSwModeMask;
2457 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx11Blk256BSwModeMask;
2458 allowedSwModeSet.value |=
2459 pIn->forbiddenBlock.macroThin4KB ? 0 :
2460 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx11Blk4KBSwModeMask);
2461 allowedSwModeSet.value |=
2462 pIn->forbiddenBlock.macroThick4KB ? 0 :
2463 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick4KBSwModeMask : 0);
2464 allowedSwModeSet.value |=
2465 pIn->forbiddenBlock.macroThin64KB ? 0 :
2466 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThin64KBSwModeMask : Gfx11Blk64KBSwModeMask);
2467 allowedSwModeSet.value |=
2468 pIn->forbiddenBlock.macroThick64KB ? 0 :
2469 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick64KBSwModeMask : 0);
2470 allowedSwModeSet.value |=
2471 pIn->forbiddenBlock.gfx11.thin256KB ? 0 :
2472 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThin256KBSwModeMask : Gfx11Blk256KBSwModeMask);
2473 allowedSwModeSet.value |=
2474 pIn->forbiddenBlock.gfx11.thick256KB ? 0 :
2475 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick256KBSwModeMask : 0);
2476
2477 if (pIn->preferredSwSet.value != 0)
2478 {
2479 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx11ZSwModeMask;
2480 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx11StandardSwModeMask;
2481 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx11DisplaySwModeMask;
2482 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx11RenderSwModeMask;
2483 }
2484
2485 if (pIn->noXor)
2486 {
2487 allowedSwModeSet.value &= ~Gfx11XorSwModeMask;
2488 }
2489
2490 if (pIn->maxAlign > 0)
2491 {
2492 if (pIn->maxAlign < Size256K)
2493 {
2494 allowedSwModeSet.value &= ~Gfx11Blk256KBSwModeMask;
2495 }
2496
2497 if (pIn->maxAlign < Size64K)
2498 {
2499 allowedSwModeSet.value &= ~Gfx11Blk64KBSwModeMask;
2500 }
2501
2502 if (pIn->maxAlign < Size4K)
2503 {
2504 allowedSwModeSet.value &= ~Gfx11Blk4KBSwModeMask;
2505 }
2506
2507 if (pIn->maxAlign < Size256)
2508 {
2509 allowedSwModeSet.value &= ~Gfx11Blk256BSwModeMask;
2510 }
2511 }
2512
2513 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2514 switch (pIn->resourceType)
2515 {
2516 case ADDR_RSRC_TEX_1D:
2517 allowedSwModeSet.value &= Gfx11Rsrc1dSwModeMask;
2518 break;
2519
2520 case ADDR_RSRC_TEX_2D:
2521 allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc2dPrtSwModeMask : Gfx11Rsrc2dSwModeMask;
2522 break;
2523
2524 case ADDR_RSRC_TEX_3D:
2525 allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc3dPrtSwModeMask : Gfx11Rsrc3dSwModeMask;
2526
2527 if (pIn->flags.view3dAs2dArray)
2528 {
2529 // SW_LINEAR can be used for 3D thin images, including BCn image format.
2530 allowedSwModeSet.value &= Gfx11Rsrc3dViewAs2dSwModeMask;
2531 }
2532 break;
2533
2534 default:
2535 ADDR_ASSERT_ALWAYS();
2536 allowedSwModeSet.value = 0;
2537 break;
2538 }
2539
2540 if (ElemLib::IsBlockCompressed(pIn->format) ||
2541 ElemLib::IsMacroPixelPacked(pIn->format) ||
2542 (bpp > 64) ||
2543 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2544 {
2545 allowedSwModeSet.value &= ~Gfx11ZSwModeMask;
2546 }
2547
2548 if (pIn->format == ADDR_FMT_32_32_32)
2549 {
2550 allowedSwModeSet.value &= Gfx11LinearSwModeMask;
2551 }
2552
2553 if (msaa)
2554 {
2555 allowedSwModeSet.value &= Gfx11MsaaSwModeMask;
2556 }
2557
2558 if (pIn->flags.depth || pIn->flags.stencil)
2559 {
2560 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2561 }
2562
2563 if (pIn->flags.requireMetadata)
2564 {
2565 // Linear images can never be compressed
2566 allowedSwModeSet.value &= ~Gfx11LinearSwModeMask;
2567 if (pIn->flags.color)
2568 {
2569 // 256B formats must not be pipe-aligned (can't use in CB)
2570 allowedSwModeSet.value &= ~(Gfx11Blk256BSwModeMask);
2571 // D/S formats must not be pipe-aligned
2572 allowedSwModeSet.value &= ~(Gfx11DisplaySwModeMask | Gfx11StandardSwModeMask);
2573 }
2574 }
2575
2576 if (pIn->flags.display)
2577 {
2578 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
2579 }
2580
2581 if (allowedSwModeSet.value != 0)
2582 {
2583 #if DEBUG
2584 // Post sanity check, at least AddrLib should accept the output generated by its own
2585 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2586
2587 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2588 {
2589 if (validateSwModeSet & 1)
2590 {
2591 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2592 ADDR_ASSERT(ValidateSwModeParams(&localIn));
2593 }
2594
2595 validateSwModeSet >>= 1;
2596 }
2597 #endif
2598
2599 pOut->resourceType = pIn->resourceType;
2600 pOut->validSwModeSet = allowedSwModeSet;
2601 pOut->canXor = (allowedSwModeSet.value & Gfx11XorSwModeMask) ? TRUE : FALSE;
2602
2603 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &(pOut->validBlockSet));
2604 GetAllowedSwSet(allowedSwModeSet, &(pOut->validSwTypeSet));
2605
2606 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2607
2608 if (pOut->clientPreferredSwSet.value == 0)
2609 {
2610 pOut->clientPreferredSwSet.value = AddrSwSetAll;
2611 }
2612
2613 // Apply optional restrictions
2614 if (pIn->flags.needEquation)
2615 {
2616 UINT_32 components = pIn->flags.allowExtEquation ? ADDR_MAX_EQUATION_COMP :
2617 ADDR_MAX_LEGACY_EQUATION_COMP;
2618 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
2619 }
2620
2621 if (allowedSwModeSet.value == Gfx11LinearSwModeMask)
2622 {
2623 pOut->swizzleMode = ADDR_SW_LINEAR;
2624 }
2625 else
2626 {
2627 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
2628
2629 if ((height > 1) && (computeMinSize == FALSE))
2630 {
2631 // Always ignore linear swizzle mode if:
2632 // 1. This is a (2D/3D) resource with height > 1
2633 // 2. Client doesn't require computing minimize size
2634 allowedSwModeSet.swLinear = 0;
2635 }
2636
2637 ADDR2_BLOCK_SET allowedBlockSet = {};
2638 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &allowedBlockSet);
2639
2640 // Determine block size if there are 2 or more block type candidates
2641 if (IsPow2(allowedBlockSet.value) == FALSE)
2642 {
2643 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
2644
2645 swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
2646
2647 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
2648 {
2649 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S_X;
2650 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;
2651 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S_X;
2652 swMode[AddrBlockThin256KB] = ADDR_SW_256KB_R_X;
2653 swMode[AddrBlockThick256KB] = ADDR_SW_256KB_S_X;
2654 }
2655 else
2656 {
2657 swMode[AddrBlockMicro] = ADDR_SW_256B_D;
2658 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D_X;
2659 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D_X;
2660 swMode[AddrBlockThin256KB] = ADDR_SW_256KB_D_X;
2661 }
2662
2663 UINT_64 padSize[AddrBlockMaxTiledType] = {};
2664
2665 const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
2666 const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
2667 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2668 UINT_32 minSizeBlk = AddrBlockMicro;
2669 UINT_64 minSize = 0;
2670
2671 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
2672
2673 for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
2674 {
2675 if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
2676 {
2677 localIn.swizzleMode = swMode[i];
2678
2679 if (localIn.swizzleMode == ADDR_SW_LINEAR)
2680 {
2681 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
2682 }
2683 else
2684 {
2685 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
2686 }
2687
2688 if (returnCode == ADDR_OK)
2689 {
2690 padSize[i] = localOut.surfSize;
2691
2692 if ((minSize == 0) ||
2693 Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
2694 {
2695 minSize = padSize[i];
2696 minSizeBlk = i;
2697 }
2698 }
2699 else
2700 {
2701 ADDR_ASSERT_ALWAYS();
2702 break;
2703 }
2704 }
2705 }
2706
2707 if (pIn->memoryBudget > 1.0)
2708 {
2709 // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
2710 // smaller-block type again in coming loop
2711 switch (minSizeBlk)
2712 {
2713 case AddrBlockThick256KB:
2714 allowedBlockSet.gfx11.thin256KB = 0;
2715 case AddrBlockThin256KB:
2716 allowedBlockSet.macroThick64KB = 0;
2717 case AddrBlockThick64KB:
2718 allowedBlockSet.macroThin64KB = 0;
2719 case AddrBlockThin64KB:
2720 allowedBlockSet.macroThick4KB = 0;
2721 case AddrBlockThick4KB:
2722 allowedBlockSet.macroThin4KB = 0;
2723 case AddrBlockThin4KB:
2724 allowedBlockSet.micro = 0;
2725 case AddrBlockMicro:
2726 allowedBlockSet.linear = 0;
2727 case AddrBlockLinear:
2728 break;
2729
2730 default:
2731 ADDR_ASSERT_ALWAYS();
2732 break;
2733 }
2734
2735 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2736 {
2737 if ((i != minSizeBlk) &&
2738 Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
2739 {
2740 if (Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
2741 {
2742 // Clear the block type if the memory waste is unacceptable
2743 allowedBlockSet.value &= ~(1u << (i - 1));
2744 }
2745 }
2746 }
2747
2748 // Remove linear block type if 2 or more block types are allowed
2749 if (IsPow2(allowedBlockSet.value) == FALSE)
2750 {
2751 allowedBlockSet.linear = 0;
2752 }
2753
2754 // Select the biggest allowed block type
2755 minSizeBlk = Log2(allowedBlockSet.value) + 1;
2756
2757 if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
2758 {
2759 minSizeBlk = AddrBlockLinear;
2760 }
2761 }
2762
2763 switch (minSizeBlk)
2764 {
2765 case AddrBlockLinear:
2766 allowedSwModeSet.value &= Gfx11LinearSwModeMask;
2767 break;
2768
2769 case AddrBlockMicro:
2770 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2771 allowedSwModeSet.value &= Gfx11Blk256BSwModeMask;
2772 break;
2773
2774 case AddrBlockThin4KB:
2775 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2776 allowedSwModeSet.value &= Gfx11Blk4KBSwModeMask;
2777 break;
2778
2779 case AddrBlockThick4KB:
2780 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2781 allowedSwModeSet.value &= Gfx11Rsrc3dThick4KBSwModeMask;
2782 break;
2783
2784 case AddrBlockThin64KB:
2785 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2786 Gfx11Rsrc3dThin64KBSwModeMask : Gfx11Blk64KBSwModeMask;
2787 break;
2788
2789 case AddrBlockThick64KB:
2790 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2791 allowedSwModeSet.value &= Gfx11Rsrc3dThick64KBSwModeMask;
2792 break;
2793
2794 case AddrBlockThin256KB:
2795 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2796 Gfx11Rsrc3dThin256KBSwModeMask : Gfx11Blk256KBSwModeMask;
2797 break;
2798
2799 case AddrBlockThick256KB:
2800 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2801 allowedSwModeSet.value &= Gfx11Rsrc3dThick256KBSwModeMask;
2802 break;
2803
2804 default:
2805 ADDR_ASSERT_ALWAYS();
2806 allowedSwModeSet.value = 0;
2807 break;
2808 }
2809 }
2810
2811 // Block type should be determined.
2812 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &allowedBlockSet);
2813 ADDR_ASSERT(IsPow2(allowedBlockSet.value));
2814
2815 ADDR2_SWTYPE_SET allowedSwSet = {};
2816 GetAllowedSwSet(allowedSwModeSet, &allowedSwSet);
2817
2818 // Determine swizzle type if there are 2 or more swizzle type candidates
2819 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
2820 {
2821 if (ElemLib::IsBlockCompressed(pIn->format))
2822 {
2823 if (allowedSwSet.sw_D)
2824 {
2825 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2826 }
2827 else if (allowedSwSet.sw_S)
2828 {
2829 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2830 }
2831 else
2832 {
2833 ADDR_ASSERT(allowedSwSet.sw_R);
2834 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2835 }
2836 }
2837 else if (ElemLib::IsMacroPixelPacked(pIn->format))
2838 {
2839 if (allowedSwSet.sw_S)
2840 {
2841 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2842 }
2843 else if (allowedSwSet.sw_D)
2844 {
2845 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2846 }
2847 else
2848 {
2849 ADDR_ASSERT(allowedSwSet.sw_R);
2850 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2851 }
2852 }
2853 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2854 {
2855 if (pIn->flags.color && allowedSwSet.sw_R)
2856 {
2857 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2858 }
2859 else if (allowedSwSet.sw_S)
2860 {
2861 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2862 }
2863 else if (allowedSwSet.sw_D)
2864 {
2865 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2866 }
2867 else
2868 {
2869 ADDR_ASSERT(allowedSwSet.sw_Z);
2870 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2871 }
2872 }
2873 else
2874 {
2875 if (allowedSwSet.sw_R)
2876 {
2877 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2878 }
2879 else if (allowedSwSet.sw_D)
2880 {
2881 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2882 }
2883 else if (allowedSwSet.sw_Z)
2884 {
2885 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2886 }
2887 else
2888 {
2889 ADDR_ASSERT_ALWAYS();
2890 }
2891 }
2892
2893 // Swizzle type should be determined.
2894 GetAllowedSwSet(allowedSwModeSet, &allowedSwSet);
2895 ADDR_ASSERT(IsPow2(allowedSwSet.value));
2896 }
2897
2898 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2899 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2900 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2901 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2(allowedSwModeSet.value));
2902 }
2903 }
2904 else
2905 {
2906 // Invalid combination...
2907 ADDR_ASSERT_ALWAYS();
2908 returnCode = ADDR_INVALIDPARAMS;
2909 }
2910 }
2911 else
2912 {
2913 // Invalid combination...
2914 ADDR_ASSERT_ALWAYS();
2915 returnCode = ADDR_INVALIDPARAMS;
2916 }
2917 }
2918
2919 return returnCode;
2920 }
2921
2922 /**
2923 ************************************************************************************************************************
2924 * Gfx11Lib::HwlGetPossibleSwizzleModes
2925 *
2926 * @brief
2927 * Returns a list of swizzle modes that are valid from the hardware's perspective for the client to choose from
2928 *
2929 * @return
2930 * ADDR_E_RETURNCODE
2931 ************************************************************************************************************************
2932 */
HwlGetPossibleSwizzleModes(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2933 ADDR_E_RETURNCODE Gfx11Lib::HwlGetPossibleSwizzleModes(
2934 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2935 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2936 ) const
2937 {
2938 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2939
2940 if (pIn->flags.fmask)
2941 {
2942 // There is no FMASK for GFX11 ASICs.
2943 ADDR_ASSERT_ALWAYS();
2944
2945 returnCode = ADDR_INVALIDPARAMS;
2946 }
2947 else
2948 {
2949 UINT_32 bpp = pIn->bpp;
2950 UINT_32 width = Max(pIn->width, 1u);
2951 UINT_32 height = Max(pIn->height, 1u);
2952
2953 // Set format to INVALID will skip this conversion
2954 if (pIn->format != ADDR_FMT_INVALID)
2955 {
2956 ElemMode elemMode = ADDR_UNCOMPRESSED;
2957 UINT_32 expandX, expandY;
2958
2959 // Get compression/expansion factors and element mode which indicates compression/expansion
2960 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2961 &elemMode,
2962 &expandX,
2963 &expandY);
2964
2965 UINT_32 basePitch = 0;
2966 GetElemLib()->AdjustSurfaceInfo(elemMode,
2967 expandX,
2968 expandY,
2969 &bpp,
2970 &basePitch,
2971 &width,
2972 &height);
2973 }
2974
2975 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2976 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2977 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2978 const BOOL_32 msaa = numSamples > 1;
2979
2980 // Pre sanity check on non swizzle mode parameters
2981 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2982 localIn.flags = pIn->flags;
2983 localIn.resourceType = pIn->resourceType;
2984 localIn.format = pIn->format;
2985 localIn.bpp = bpp;
2986 localIn.width = width;
2987 localIn.height = height;
2988 localIn.numSlices = numSlices;
2989 localIn.numMipLevels = numMipLevels;
2990 localIn.numSamples = numSamples;
2991 localIn.numFrags = numSamples;
2992
2993 if (ValidateNonSwModeParams(&localIn))
2994 {
2995 // Allow appropriate swizzle modes by default
2996 ADDR2_SWMODE_SET allowedSwModeSet = {};
2997 allowedSwModeSet.value |= Gfx11LinearSwModeMask | Gfx11Blk256BSwModeMask;
2998 if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2999 {
3000 allowedSwModeSet.value |= Gfx11Rsrc3dThick4KBSwModeMask |
3001 Gfx11Rsrc3dThin64KBSwModeMask |
3002 Gfx11Rsrc3dThick64KBSwModeMask |
3003 Gfx11Rsrc3dThin256KBSwModeMask |
3004 Gfx11Rsrc3dThick256KBSwModeMask;
3005 }
3006 else
3007 {
3008 allowedSwModeSet.value |= Gfx11Blk4KBSwModeMask | Gfx11Blk64KBSwModeMask | Gfx11Blk256KBSwModeMask;
3009 }
3010
3011 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3012 switch (pIn->resourceType)
3013 {
3014 case ADDR_RSRC_TEX_1D:
3015 allowedSwModeSet.value &= Gfx11Rsrc1dSwModeMask;
3016 break;
3017
3018 case ADDR_RSRC_TEX_2D:
3019 allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc2dPrtSwModeMask : Gfx11Rsrc2dSwModeMask;
3020 break;
3021
3022 case ADDR_RSRC_TEX_3D:
3023 allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc3dPrtSwModeMask : Gfx11Rsrc3dSwModeMask;
3024
3025 if (pIn->flags.view3dAs2dArray)
3026 {
3027 // SW_LINEAR can be used for 3D thin images, including BCn image format.
3028 allowedSwModeSet.value &= Gfx11Rsrc3dViewAs2dSwModeMask;
3029 }
3030 break;
3031
3032 default:
3033 ADDR_ASSERT_ALWAYS();
3034 allowedSwModeSet.value = 0;
3035 break;
3036 }
3037
3038 // TODO: figure out if following restrictions are correct on GFX11...
3039 if (ElemLib::IsBlockCompressed(pIn->format) ||
3040 ElemLib::IsMacroPixelPacked(pIn->format) ||
3041 (bpp > 64) ||
3042 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3043 {
3044 allowedSwModeSet.value &= ~Gfx11ZSwModeMask;
3045 }
3046
3047 if (pIn->format == ADDR_FMT_32_32_32)
3048 {
3049 allowedSwModeSet.value &= Gfx11LinearSwModeMask;
3050 }
3051
3052 if (msaa)
3053 {
3054 allowedSwModeSet.value &= Gfx11MsaaSwModeMask;
3055 }
3056
3057 if (pIn->flags.depth || pIn->flags.stencil)
3058 {
3059 allowedSwModeSet.value &= Gfx11ZSwModeMask;
3060 }
3061
3062 if (pIn->flags.requireMetadata)
3063 {
3064 // Linear images can never be compressed
3065 allowedSwModeSet.value &= ~Gfx11LinearSwModeMask;
3066 if (pIn->flags.color)
3067 {
3068 // 256B formats must not be pipe-aligned (can't use in CB)
3069 allowedSwModeSet.value &= ~(Gfx11Blk256BSwModeMask);
3070 // D/S formats must not be pipe-aligned
3071 allowedSwModeSet.value &= ~(Gfx11DisplaySwModeMask | Gfx11StandardSwModeMask);
3072 }
3073 }
3074
3075 if (pIn->flags.display)
3076 {
3077 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
3078 }
3079
3080 if (allowedSwModeSet.value != 0)
3081 {
3082 #if DEBUG
3083 // Post sanity check, at least AddrLib should accept the output generated by its own
3084 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3085
3086 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3087 {
3088 if (validateSwModeSet & 1)
3089 {
3090 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3091 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3092 }
3093
3094 validateSwModeSet >>= 1;
3095 }
3096 #endif
3097
3098 pOut->resourceType = pIn->resourceType;
3099 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3100
3101 if (pOut->clientPreferredSwSet.value == 0)
3102 {
3103 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3104 }
3105
3106 if (pIn->flags.needEquation)
3107 {
3108 UINT_32 components = pIn->flags.allowExtEquation ? ADDR_MAX_EQUATION_COMP :
3109 ADDR_MAX_LEGACY_EQUATION_COMP;
3110 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
3111 }
3112
3113 pOut->validSwModeSet = allowedSwModeSet;
3114 pOut->canXor = (allowedSwModeSet.value & Gfx11XorSwModeMask) ? TRUE : FALSE;
3115 }
3116 else
3117 {
3118 // Invalid combination...
3119 ADDR_ASSERT_ALWAYS();
3120 returnCode = ADDR_INVALIDPARAMS;
3121 }
3122 }
3123 else
3124 {
3125 // Invalid combination...
3126 ADDR_ASSERT_ALWAYS();
3127 returnCode = ADDR_INVALIDPARAMS;
3128 }
3129 }
3130
3131 return returnCode;
3132 }
3133
3134 /**
3135 ************************************************************************************************************************
3136 * Gfx11Lib::HwlGetAllowedBlockSet
3137 *
3138 * @brief
3139 * Returns the set of allowed block sizes given the allowed swizzle modes and resource type
3140 *
3141 * @return
3142 * ADDR_E_RETURNCODE
3143 ************************************************************************************************************************
3144 */
HwlGetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet,AddrResourceType rsrcType,ADDR2_BLOCK_SET * pAllowedBlockSet) const3145 ADDR_E_RETURNCODE Gfx11Lib::HwlGetAllowedBlockSet(
3146 ADDR2_SWMODE_SET allowedSwModeSet, ///< [in] allowed swizzle modes
3147 AddrResourceType rsrcType, ///< [in] resource type
3148 ADDR2_BLOCK_SET* pAllowedBlockSet ///< [out] allowed block sizes
3149 ) const
3150 {
3151 ADDR2_BLOCK_SET allowedBlockSet = {};
3152
3153 allowedBlockSet.micro = (allowedSwModeSet.value & Gfx11Blk256BSwModeMask) ? TRUE : FALSE;
3154 allowedBlockSet.linear = (allowedSwModeSet.value & Gfx11LinearSwModeMask) ? TRUE : FALSE;
3155
3156 if (rsrcType == ADDR_RSRC_TEX_3D)
3157 {
3158 allowedBlockSet.macroThick4KB = (allowedSwModeSet.value & Gfx11Rsrc3dThick4KBSwModeMask) ? TRUE : FALSE;
3159 allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx11Rsrc3dThin64KBSwModeMask) ? TRUE : FALSE;
3160 allowedBlockSet.macroThick64KB = (allowedSwModeSet.value & Gfx11Rsrc3dThick64KBSwModeMask) ? TRUE : FALSE;
3161 allowedBlockSet.gfx11.thin256KB = (allowedSwModeSet.value & Gfx11Rsrc3dThin256KBSwModeMask) ? TRUE : FALSE;
3162 allowedBlockSet.gfx11.thick256KB = (allowedSwModeSet.value & Gfx11Rsrc3dThick256KBSwModeMask) ? TRUE : FALSE;
3163 }
3164 else
3165 {
3166 allowedBlockSet.macroThin4KB = (allowedSwModeSet.value & Gfx11Blk4KBSwModeMask) ? TRUE : FALSE;
3167 allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx11Blk64KBSwModeMask) ? TRUE : FALSE;
3168 allowedBlockSet.gfx11.thin256KB = (allowedSwModeSet.value & Gfx11Blk256KBSwModeMask) ? TRUE : FALSE;
3169 }
3170
3171 *pAllowedBlockSet = allowedBlockSet;
3172 return ADDR_OK;
3173 }
3174
3175 /**
3176 ************************************************************************************************************************
3177 * Gfx11Lib::HwlGetAllowedSwSet
3178 *
3179 * @brief
3180 * Returns the set of allowed swizzle types given the allowed swizzle modes
3181 * @return
3182 * ADDR_E_RETURNCODE
3183 ************************************************************************************************************************
3184 */
HwlGetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet,ADDR2_SWTYPE_SET * pAllowedSwSet) const3185 ADDR_E_RETURNCODE Gfx11Lib::HwlGetAllowedSwSet(
3186 ADDR2_SWMODE_SET allowedSwModeSet, ///< [in] allowed swizzle modes
3187 ADDR2_SWTYPE_SET* pAllowedSwSet ///< [out] allowed swizzle types
3188 ) const
3189 {
3190 ADDR2_SWTYPE_SET allowedSwSet = {};
3191
3192 allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx11ZSwModeMask) ? TRUE : FALSE;
3193 allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx11StandardSwModeMask) ? TRUE : FALSE;
3194 allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx11DisplaySwModeMask) ? TRUE : FALSE;
3195 allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx11RenderSwModeMask) ? TRUE : FALSE;
3196
3197 *pAllowedSwSet = allowedSwSet;
3198 return ADDR_OK;
3199 }
3200
3201 /**
3202 ************************************************************************************************************************
3203 * Gfx11Lib::ComputeStereoInfo
3204 *
3205 * @brief
3206 * Compute height alignment and right eye pipeBankXor for stereo surface
3207 *
3208 * @return
3209 * Error code
3210 *
3211 ************************************************************************************************************************
3212 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const3213 ADDR_E_RETURNCODE Gfx11Lib::ComputeStereoInfo(
3214 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
3215 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
3216 UINT_32* pRightXor ///< Right eye xor
3217 ) const
3218 {
3219 ADDR_E_RETURNCODE ret = ADDR_OK;
3220
3221 *pRightXor = 0;
3222
3223 if (IsNonPrtXor(pIn->swizzleMode))
3224 {
3225 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3226 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3227 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3228 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3229 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3230
3231 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3232 {
3233 UINT_32 yMax = 0;
3234 UINT_32 yPosMask = 0;
3235
3236 // First get "max y bit"
3237 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3238 {
3239 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3240
3241 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3242 (m_equationTable[eqIndex].addr[i].index > yMax))
3243 {
3244 yMax = m_equationTable[eqIndex].addr[i].index;
3245 }
3246
3247 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3248 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3249 (m_equationTable[eqIndex].xor1[i].index > yMax))
3250 {
3251 yMax = m_equationTable[eqIndex].xor1[i].index;
3252 }
3253
3254 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3255 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3256 (m_equationTable[eqIndex].xor2[i].index > yMax))
3257 {
3258 yMax = m_equationTable[eqIndex].xor2[i].index;
3259 }
3260 }
3261
3262 // Then loop again for populating a position mask of "max Y bit"
3263 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3264 {
3265 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3266 (m_equationTable[eqIndex].addr[i].index == yMax))
3267 {
3268 yPosMask |= 1u << i;
3269 }
3270 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3271 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3272 (m_equationTable[eqIndex].xor1[i].index == yMax))
3273 {
3274 yPosMask |= 1u << i;
3275 }
3276 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3277 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3278 (m_equationTable[eqIndex].xor2[i].index == yMax))
3279 {
3280 yPosMask |= 1u << i;
3281 }
3282 }
3283
3284 const UINT_32 additionalAlign = 1 << yMax;
3285
3286 if (additionalAlign >= *pAlignY)
3287 {
3288 *pAlignY = additionalAlign;
3289
3290 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3291
3292 if ((alignedHeight >> yMax) & 1)
3293 {
3294 *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3295 }
3296 }
3297 }
3298 else
3299 {
3300 ret = ADDR_INVALIDPARAMS;
3301 }
3302 }
3303
3304 return ret;
3305 }
3306
3307 /**
3308 ************************************************************************************************************************
3309 * Gfx11Lib::HwlComputeSurfaceInfoTiled
3310 *
3311 * @brief
3312 * Internal function to calculate alignment for tiled surface
3313 *
3314 * @return
3315 * ADDR_E_RETURNCODE
3316 ************************************************************************************************************************
3317 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3318 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoTiled(
3319 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3320 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3321 ) const
3322 {
3323 ADDR_E_RETURNCODE ret;
3324
3325 // Mip chain dimesion and epitch has no meaning in GFX11, set to default value
3326 pOut->mipChainPitch = 0;
3327 pOut->mipChainHeight = 0;
3328 pOut->mipChainSlice = 0;
3329 pOut->epitchIsHeight = FALSE;
3330
3331 // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3332 pOut->mipChainInTail = FALSE;
3333 pOut->firstMipIdInTail = pIn->numMipLevels;
3334
3335 if (IsBlock256b(pIn->swizzleMode))
3336 {
3337 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3338 }
3339 else
3340 {
3341 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3342 }
3343
3344 return ret;
3345 }
3346
3347 /**
3348 ************************************************************************************************************************
3349 * Gfx11Lib::ComputeSurfaceInfoMicroTiled
3350 *
3351 * @brief
3352 * Internal function to calculate alignment for micro tiled surface
3353 *
3354 * @return
3355 * ADDR_E_RETURNCODE
3356 ************************************************************************************************************************
3357 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3358 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceInfoMicroTiled(
3359 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3360 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3361 ) const
3362 {
3363 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3364 &pOut->blockHeight,
3365 &pOut->blockSlices,
3366 pIn->bpp,
3367 pIn->numSamples,
3368 pIn->resourceType,
3369 pIn->swizzleMode);
3370
3371 if (ret == ADDR_OK)
3372 {
3373 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3374
3375 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3376 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3377 pOut->numSlices = pIn->numSlices;
3378 pOut->baseAlign = blockSize;
3379
3380 if (pIn->numMipLevels > 1)
3381 {
3382 const UINT_32 mip0Width = pIn->width;
3383 const UINT_32 mip0Height = pIn->height;
3384 UINT_64 mipSliceSize = 0;
3385
3386 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3387 {
3388 UINT_32 mipWidth, mipHeight;
3389
3390 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3391
3392 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
3393 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3394
3395 if (pOut->pMipInfo != NULL)
3396 {
3397 pOut->pMipInfo[i].pitch = mipActualWidth;
3398 pOut->pMipInfo[i].height = mipActualHeight;
3399 pOut->pMipInfo[i].depth = 1;
3400 pOut->pMipInfo[i].offset = mipSliceSize;
3401 pOut->pMipInfo[i].mipTailOffset = 0;
3402 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3403 }
3404
3405 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3406 }
3407
3408 pOut->sliceSize = mipSliceSize;
3409 pOut->surfSize = mipSliceSize * pOut->numSlices;
3410 }
3411 else
3412 {
3413 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3414 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3415
3416 if (pOut->pMipInfo != NULL)
3417 {
3418 pOut->pMipInfo[0].pitch = pOut->pitch;
3419 pOut->pMipInfo[0].height = pOut->height;
3420 pOut->pMipInfo[0].depth = 1;
3421 pOut->pMipInfo[0].offset = 0;
3422 pOut->pMipInfo[0].mipTailOffset = 0;
3423 pOut->pMipInfo[0].macroBlockOffset = 0;
3424 }
3425 }
3426
3427 }
3428
3429 return ret;
3430 }
3431
3432 /**
3433 ************************************************************************************************************************
3434 * Gfx11Lib::ComputeSurfaceInfoMacroTiled
3435 *
3436 * @brief
3437 * Internal function to calculate alignment for macro tiled surface
3438 *
3439 * @return
3440 * ADDR_E_RETURNCODE
3441 ************************************************************************************************************************
3442 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3443 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceInfoMacroTiled(
3444 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3445 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3446 ) const
3447 {
3448 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3449 &pOut->blockHeight,
3450 &pOut->blockSlices,
3451 pIn->bpp,
3452 pIn->numSamples,
3453 pIn->resourceType,
3454 pIn->swizzleMode);
3455
3456 if (returnCode == ADDR_OK)
3457 {
3458 UINT_32 heightAlign = pOut->blockHeight;
3459
3460 if (pIn->flags.qbStereo)
3461 {
3462 UINT_32 rightXor = 0;
3463
3464 returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3465
3466 if (returnCode == ADDR_OK)
3467 {
3468 pOut->pStereoInfo->rightSwizzle = rightXor;
3469 }
3470 }
3471
3472 if (returnCode == ADDR_OK)
3473 {
3474 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3475 const UINT_32 blockSize = 1 << blockSizeLog2;
3476
3477 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3478 pOut->height = PowTwoAlign(pIn->height, heightAlign);
3479 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3480 pOut->baseAlign = blockSize;
3481
3482 if (pIn->numMipLevels > 1)
3483 {
3484 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3485 pIn->swizzleMode,
3486 pOut->blockWidth,
3487 pOut->blockHeight,
3488 pOut->blockSlices);
3489 const UINT_32 mip0Width = pIn->width;
3490 const UINT_32 mip0Height = pIn->height;
3491 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3492 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3493 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3494 const UINT_32 index = Log2(pIn->bpp >> 3);
3495 UINT_32 firstMipInTail = pIn->numMipLevels;
3496 UINT_64 mipChainSliceSize = 0;
3497 UINT_64 mipSize[MaxMipLevels];
3498 UINT_64 mipSliceSize[MaxMipLevels];
3499
3500 // For htile, we need to make z16 and stencil enter the mip tail at the same time as z32 would
3501 Dim3d fixedTailMaxDim = tailMaxDim;
3502 if (IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3503 {
3504 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3505 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3506 }
3507
3508 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3509 {
3510 UINT_32 mipWidth, mipHeight, mipDepth;
3511
3512 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3513
3514 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3515 {
3516 firstMipInTail = i;
3517 mipChainSliceSize += blockSize / pOut->blockSlices;
3518 break;
3519 }
3520 else
3521 {
3522 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3523 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3524 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3525 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3526
3527 mipSize[i] = sliceSize * depth;
3528 mipSliceSize[i] = sliceSize * pOut->blockSlices;
3529 mipChainSliceSize += sliceSize;
3530
3531 if (pOut->pMipInfo != NULL)
3532 {
3533 pOut->pMipInfo[i].pitch = pitch;
3534 pOut->pMipInfo[i].height = height;
3535 pOut->pMipInfo[i].depth = depth;
3536 }
3537 }
3538 }
3539
3540 pOut->sliceSize = mipChainSliceSize;
3541 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3542 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3543 pOut->firstMipIdInTail = firstMipInTail;
3544
3545 if (pOut->pMipInfo != NULL)
3546 {
3547 UINT_64 offset = 0;
3548 UINT_64 macroBlkOffset = 0;
3549 UINT_32 tailMaxDepth = 0;
3550
3551 if (firstMipInTail != pIn->numMipLevels)
3552 {
3553 UINT_32 mipWidth, mipHeight;
3554
3555 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3556 &mipWidth, &mipHeight, &tailMaxDepth);
3557
3558 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3559 macroBlkOffset = blockSize;
3560 }
3561
3562 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3563 {
3564 pOut->pMipInfo[i].offset = offset;
3565 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3566 pOut->pMipInfo[i].mipTailOffset = 0;
3567
3568 offset += mipSize[i];
3569 macroBlkOffset += mipSliceSize[i];
3570 }
3571
3572 UINT_32 pitch = tailMaxDim.w;
3573 UINT_32 height = tailMaxDim.h;
3574 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3575
3576 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3577
3578 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3579 {
3580 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3581 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3582
3583 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
3584 pOut->pMipInfo[i].mipTailOffset = mipOffset;
3585 pOut->pMipInfo[i].macroBlockOffset = 0;
3586
3587 pOut->pMipInfo[i].pitch = pitch;
3588 pOut->pMipInfo[i].height = height;
3589 pOut->pMipInfo[i].depth = depth;
3590
3591 UINT_32 mipX = ((mipOffset >> 9) & 1) |
3592 ((mipOffset >> 10) & 2) |
3593 ((mipOffset >> 11) & 4) |
3594 ((mipOffset >> 12) & 8) |
3595 ((mipOffset >> 13) & 16) |
3596 ((mipOffset >> 14) & 32);
3597 UINT_32 mipY = ((mipOffset >> 8) & 1) |
3598 ((mipOffset >> 9) & 2) |
3599 ((mipOffset >> 10) & 4) |
3600 ((mipOffset >> 11) & 8) |
3601 ((mipOffset >> 12) & 16) |
3602 ((mipOffset >> 13) & 32);
3603
3604 if (blockSizeLog2 & 1)
3605 {
3606 const UINT_32 temp = mipX;
3607 mipX = mipY;
3608 mipY = temp;
3609
3610 if (index & 1)
3611 {
3612 mipY = (mipY << 1) | (mipX & 1);
3613 mipX = mipX >> 1;
3614 }
3615 }
3616
3617 if (isThin)
3618 {
3619 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3620 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3621 pOut->pMipInfo[i].mipTailCoordZ = 0;
3622
3623 pitch = Max(pitch >> 1, Block256_2d[index].w);
3624 height = Max(height >> 1, Block256_2d[index].h);
3625 depth = 1;
3626 }
3627 else
3628 {
3629 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3630 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3631 pOut->pMipInfo[i].mipTailCoordZ = 0;
3632
3633 pitch = Max(pitch >> 1, Block256_3d[index].w);
3634 height = Max(height >> 1, Block256_3d[index].h);
3635 depth = PowTwoAlign(Max(depth >> 1, 1u), Block256_3d[index].d);
3636 }
3637 }
3638 }
3639 }
3640 else
3641 {
3642 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numSamples;
3643 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3644
3645 if (pOut->pMipInfo != NULL)
3646 {
3647 pOut->pMipInfo[0].pitch = pOut->pitch;
3648 pOut->pMipInfo[0].height = pOut->height;
3649 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3650 pOut->pMipInfo[0].offset = 0;
3651 pOut->pMipInfo[0].mipTailOffset = 0;
3652 pOut->pMipInfo[0].macroBlockOffset = 0;
3653 pOut->pMipInfo[0].mipTailCoordX = 0;
3654 pOut->pMipInfo[0].mipTailCoordY = 0;
3655 pOut->pMipInfo[0].mipTailCoordZ = 0;
3656 }
3657 }
3658 }
3659 }
3660
3661 return returnCode;
3662 }
3663
3664 /**
3665 ************************************************************************************************************************
3666 * Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled
3667 *
3668 * @brief
3669 * Internal function to calculate address from coord for tiled swizzle surface
3670 *
3671 * @return
3672 * ADDR_E_RETURNCODE
3673 ************************************************************************************************************************
3674 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3675 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled(
3676 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3677 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3678 ) const
3679 {
3680 ADDR_E_RETURNCODE ret;
3681
3682 if (IsBlock256b(pIn->swizzleMode))
3683 {
3684 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3685 }
3686 else
3687 {
3688 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3689 }
3690
3691 return ret;
3692 }
3693
3694 /**
3695 ************************************************************************************************************************
3696 * Gfx11Lib::HwlCopyMemToSurface
3697 *
3698 * @brief
3699 * Copy multiple regions from memory to a non-linear surface.
3700 *
3701 * @return
3702 * Error or success.
3703 ************************************************************************************************************************
3704 */
HwlCopyMemToSurface(const ADDR2_COPY_MEMSURFACE_INPUT * pIn,const ADDR2_COPY_MEMSURFACE_REGION * pRegions,UINT_32 regionCount) const3705 ADDR_E_RETURNCODE Gfx11Lib::HwlCopyMemToSurface(
3706 const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
3707 const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
3708 UINT_32 regionCount
3709 ) const
3710 {
3711 // Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
3712 // optimized for a particular micro-swizzle mode if available.
3713 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3714 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3715 ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
3716 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
3717 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3718
3719 if (pIn->numSamples > 1)
3720 {
3721 // TODO: MSAA
3722 returnCode = ADDR_NOTIMPLEMENTED;
3723 }
3724 if (IsBlockVariable(pIn->swizzleMode))
3725 {
3726 // TODO: larger LUTs for worst-case 256KB swizzle.
3727 returnCode = ADDR_NOTIMPLEMENTED;
3728 }
3729
3730 localIn.size = sizeof(localIn);
3731 localIn.flags = pIn->flags;
3732 localIn.swizzleMode = pIn->swizzleMode;
3733 localIn.resourceType = pIn->resourceType;
3734 localIn.format = pIn->format;
3735 localIn.bpp = pIn->bpp;
3736 localIn.width = Max(pIn->unAlignedDims.width, 1u);
3737 localIn.height = Max(pIn->unAlignedDims.height, 1u);
3738 localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
3739 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3740 localIn.numSamples = Max(pIn->numSamples, 1u);
3741
3742 localOut.size = sizeof(localOut);
3743 localOut.pMipInfo = mipInfo;
3744
3745 if (returnCode == ADDR_OK)
3746 {
3747 returnCode = ComputeSurfaceInfo(&localIn, &localOut);
3748 }
3749 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3750 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
3751 pIn->resourceType,
3752 Log2(pIn->bpp >> 3),
3753 pIn->numSamples);
3754
3755 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT] = {};
3756 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
3757 ADDR_EXTENT3D blockExtent = {
3758 localOut.blockWidth,
3759 localOut.blockHeight,
3760 localOut.blockSlices
3761 };
3762
3763 LutAddresser addresser = LutAddresser();
3764 addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
3765 UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyMemImgFunc();
3766 if (pfnCopyUnaligned == nullptr)
3767 {
3768 ADDR_ASSERT_ALWAYS();
3769 returnCode = ADDR_INVALIDPARAMS;
3770 }
3771
3772 if (returnCode == ADDR_OK)
3773 {
3774 for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
3775 {
3776 const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
3777 const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
3778 UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
3779 UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
3780
3781 UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
3782 UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
3783 UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
3784
3785 for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
3786 {
3787 // The copy functions take the base address of the hardware slice, not the logical slice. Those are
3788 // not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
3789 // for unaligned copies.
3790 UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
3791 UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
3792
3793 UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
3794 UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
3795
3796 ADDR_COORD2D sliceOrigin = { xStart, yStart };
3797 ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
3798
3799 pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
3800 VoidPtrInc(pCurRegion->pMem, memOffset),
3801 pCurRegion->memRowPitch,
3802 yBlks,
3803 sliceOrigin,
3804 sliceExtent,
3805 sliceXor,
3806 addresser);
3807 }
3808 }
3809 }
3810 return returnCode;
3811 }
3812
3813 /**
3814 ************************************************************************************************************************
3815 * Gfx11Lib::HwlCopySurfaceToMem
3816 *
3817 * @brief
3818 * Copy multiple regions from a non-linear surface to memory.
3819 *
3820 * @return
3821 * Error or success.
3822 ************************************************************************************************************************
3823 */
HwlCopySurfaceToMem(const ADDR2_COPY_MEMSURFACE_INPUT * pIn,const ADDR2_COPY_MEMSURFACE_REGION * pRegions,UINT_32 regionCount) const3824 ADDR_E_RETURNCODE Gfx11Lib::HwlCopySurfaceToMem(
3825 const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
3826 const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
3827 UINT_32 regionCount
3828 ) const
3829 {
3830 // Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
3831 // optimized for a particular micro-swizzle mode if available.
3832 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3833 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3834 ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
3835 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
3836 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3837
3838 if (pIn->numSamples > 1)
3839 {
3840 // TODO: MSAA
3841 returnCode = ADDR_NOTIMPLEMENTED;
3842 }
3843 if (IsBlockVariable(pIn->swizzleMode))
3844 {
3845 // TODO: larger LUTs for worst-case 256KB swizzle.
3846 returnCode = ADDR_NOTIMPLEMENTED;
3847 }
3848
3849 localIn.size = sizeof(localIn);
3850 localIn.flags = pIn->flags;
3851 localIn.swizzleMode = pIn->swizzleMode;
3852 localIn.resourceType = pIn->resourceType;
3853 localIn.format = pIn->format;
3854 localIn.bpp = pIn->bpp;
3855 localIn.width = Max(pIn->unAlignedDims.width, 1u);
3856 localIn.height = Max(pIn->unAlignedDims.height, 1u);
3857 localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
3858 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3859 localIn.numSamples = Max(pIn->numSamples, 1u);
3860
3861 localOut.size = sizeof(localOut);
3862 localOut.pMipInfo = mipInfo;
3863
3864 if (returnCode == ADDR_OK)
3865 {
3866 returnCode = ComputeSurfaceInfo(&localIn, &localOut);
3867 }
3868 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3869 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
3870 pIn->resourceType,
3871 Log2(pIn->bpp >> 3),
3872 pIn->numSamples);
3873
3874 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT] = {};
3875 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
3876 ADDR_EXTENT3D blockExtent = {
3877 localOut.blockWidth,
3878 localOut.blockHeight,
3879 localOut.blockSlices
3880 };
3881
3882 LutAddresser addresser = LutAddresser();
3883 addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
3884 UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyImgMemFunc();
3885 if (pfnCopyUnaligned == nullptr)
3886 {
3887 ADDR_ASSERT_ALWAYS();
3888 returnCode = ADDR_INVALIDPARAMS;
3889 }
3890
3891 if (returnCode == ADDR_OK)
3892 {
3893 for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
3894 {
3895 const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
3896 const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
3897 UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
3898 UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
3899
3900 UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
3901 UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
3902 UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
3903
3904 for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
3905 {
3906 // The copy functions take the base address of the hardware slice, not the logical slice. Those are
3907 // not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
3908 // for unaligned copies.
3909 UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
3910 UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
3911
3912 UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
3913 UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
3914
3915 ADDR_COORD2D sliceOrigin = { xStart, yStart };
3916 ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
3917
3918 pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
3919 VoidPtrInc(pCurRegion->pMem, memOffset),
3920 pCurRegion->memRowPitch,
3921 yBlks,
3922 sliceOrigin,
3923 sliceExtent,
3924 sliceXor,
3925 addresser);
3926 }
3927 }
3928 }
3929 return returnCode;
3930 }
3931
3932
3933 /**
3934 ************************************************************************************************************************
3935 * Gfx11Lib::ComputeOffsetFromEquation
3936 *
3937 * @brief
3938 * Compute offset from equation
3939 *
3940 * @return
3941 * Offset
3942 ************************************************************************************************************************
3943 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3944 UINT_32 Gfx11Lib::ComputeOffsetFromEquation(
3945 const ADDR_EQUATION* pEq, ///< Equation
3946 UINT_32 x, ///< x coord in bytes
3947 UINT_32 y, ///< y coord in pixel
3948 UINT_32 z ///< z coord in slice
3949 ) const
3950 {
3951 UINT_32 offset = 0;
3952
3953 for (UINT_32 i = 0; i < pEq->numBits; i++)
3954 {
3955 UINT_32 v = 0;
3956
3957 for (UINT_32 c = 0; c < pEq->numBitComponents; c++)
3958 {
3959 if (pEq->comps[c][i].valid)
3960 {
3961 if (pEq->comps[c][i].channel == 0)
3962 {
3963 v ^= (x >> pEq->comps[c][i].index) & 1;
3964 }
3965 else if (pEq->comps[c][i].channel == 1)
3966 {
3967 v ^= (y >> pEq->comps[c][i].index) & 1;
3968 }
3969 else
3970 {
3971 ADDR_ASSERT(pEq->comps[c][i].channel == 2);
3972 v ^= (z >> pEq->comps[c][i].index) & 1;
3973 }
3974 }
3975 }
3976
3977 offset |= (v << i);
3978 }
3979
3980 return offset;
3981 }
3982
3983 /**
3984 ************************************************************************************************************************
3985 * Gfx11Lib::GetSwizzlePatternInfo
3986 *
3987 * @brief
3988 * Get swizzle pattern
3989 *
3990 * @return
3991 * Swizzle pattern information
3992 ************************************************************************************************************************
3993 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const3994 const ADDR_SW_PATINFO* Gfx11Lib::GetSwizzlePatternInfo(
3995 AddrSwizzleMode swizzleMode, ///< Swizzle mode
3996 AddrResourceType resourceType, ///< Resource type
3997 UINT_32 elemLog2, ///< Element size in bytes log2
3998 UINT_32 numFrag ///< Number of fragment
3999 ) const
4000 {
4001 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
4002 const ADDR_SW_PATINFO* patInfo = NULL;
4003 const UINT_32 swizzleMask = 1 << swizzleMode;
4004 const BOOL_32 isBlock256k = IsBlock256kb(swizzleMode);
4005 const BOOL_32 isBlock64K = IsBlock64kb(swizzleMode);
4006
4007 if (IsLinear(swizzleMode) == FALSE)
4008 {
4009 if (resourceType == ADDR_RSRC_TEX_3D)
4010 {
4011 ADDR_ASSERT(numFrag == 1);
4012
4013 if ((swizzleMask & Gfx11Rsrc3dSwModeMask) != 0)
4014 {
4015 if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
4016 {
4017 if (isBlock256k)
4018 {
4019 ADDR_ASSERT((swizzleMode == ADDR_SW_256KB_Z_X) || (swizzleMode == ADDR_SW_256KB_R_X));
4020 patInfo = GFX11_SW_256K_ZR_X_1xaa_PATINFO;
4021 }
4022 else if (isBlock64K)
4023 {
4024 ADDR_ASSERT((swizzleMode == ADDR_SW_64KB_Z_X) || (swizzleMode == ADDR_SW_64KB_R_X));
4025 patInfo = GFX11_SW_64K_ZR_X_1xaa_PATINFO;
4026 }
4027 else
4028 {
4029 ADDR_ASSERT_ALWAYS();
4030 }
4031 }
4032 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4033 {
4034 if (isBlock256k)
4035 {
4036 ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_D_X);
4037 // patInfo = GFX11_SW_256K_D3_X_PATINFO;
4038 }
4039 else if (isBlock64K)
4040 {
4041 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
4042 patInfo = GFX11_SW_64K_D3_X_PATINFO;
4043 }
4044 else
4045 {
4046 ADDR_ASSERT_ALWAYS();
4047 }
4048 }
4049 else
4050 {
4051 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
4052
4053 if (isBlock256k)
4054 {
4055 ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_S_X);
4056 patInfo = GFX11_SW_256K_S3_X_PATINFO;
4057 }
4058 else if (isBlock64K)
4059 {
4060 if (swizzleMode == ADDR_SW_64KB_S)
4061 {
4062 patInfo = GFX11_SW_64K_S3_PATINFO;
4063 }
4064 else if (swizzleMode == ADDR_SW_64KB_S_X)
4065 {
4066 patInfo = GFX11_SW_64K_S3_X_PATINFO;
4067 }
4068 else if (swizzleMode == ADDR_SW_64KB_S_T)
4069 {
4070 patInfo = GFX11_SW_64K_S3_T_PATINFO;
4071 }
4072 else
4073 {
4074 ADDR_ASSERT_ALWAYS();
4075 }
4076 }
4077 else if (IsBlock4kb(swizzleMode))
4078 {
4079 if (swizzleMode == ADDR_SW_4KB_S)
4080 {
4081 patInfo = GFX11_SW_4K_S3_PATINFO;
4082 }
4083 else if (swizzleMode == ADDR_SW_4KB_S_X)
4084 {
4085 patInfo = GFX11_SW_4K_S3_X_PATINFO;
4086 }
4087 else
4088 {
4089 ADDR_ASSERT_ALWAYS();
4090 }
4091 }
4092 else
4093 {
4094 ADDR_ASSERT_ALWAYS();
4095 }
4096 }
4097 }
4098 }
4099 else
4100 {
4101 if ((swizzleMask & Gfx11Rsrc2dSwModeMask) != 0)
4102 {
4103 if (IsBlock256b(swizzleMode))
4104 {
4105 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
4106 patInfo = GFX11_SW_256_D_PATINFO;
4107 }
4108 else if (IsBlock4kb(swizzleMode))
4109 {
4110 if (swizzleMode == ADDR_SW_4KB_D)
4111 {
4112 patInfo = GFX11_SW_4K_D_PATINFO;
4113 }
4114 else if (swizzleMode == ADDR_SW_4KB_D_X)
4115 {
4116 patInfo = GFX11_SW_4K_D_X_PATINFO;
4117 }
4118 else
4119 {
4120 ADDR_ASSERT_ALWAYS();
4121 }
4122 }
4123 else if (isBlock64K)
4124 {
4125 if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
4126 {
4127 if (numFrag == 1)
4128 {
4129 patInfo = GFX11_SW_64K_ZR_X_1xaa_PATINFO;
4130 }
4131 else if (numFrag == 2)
4132 {
4133 patInfo = GFX11_SW_64K_ZR_X_2xaa_PATINFO;
4134 }
4135 else if (numFrag == 4)
4136 {
4137 patInfo = GFX11_SW_64K_ZR_X_4xaa_PATINFO;
4138 }
4139 else if (numFrag == 8)
4140 {
4141 patInfo = GFX11_SW_64K_ZR_X_8xaa_PATINFO;
4142 }
4143 else
4144 {
4145 ADDR_ASSERT_ALWAYS();
4146 }
4147 }
4148 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4149 {
4150 if (swizzleMode == ADDR_SW_64KB_D)
4151 {
4152 patInfo = GFX11_SW_64K_D_PATINFO;
4153 }
4154 else if (swizzleMode == ADDR_SW_64KB_D_X)
4155 {
4156 patInfo = GFX11_SW_64K_D_X_PATINFO;
4157 }
4158 else if (swizzleMode == ADDR_SW_64KB_D_T)
4159 {
4160 patInfo = GFX11_SW_64K_D_T_PATINFO;
4161 }
4162 else
4163 {
4164 ADDR_ASSERT_ALWAYS();
4165 }
4166 }
4167 else
4168 {
4169 ADDR_ASSERT_ALWAYS();
4170 }
4171 }
4172 else if (isBlock256k)
4173 {
4174 if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
4175 {
4176 if (numFrag == 1)
4177 {
4178 patInfo = GFX11_SW_256K_ZR_X_1xaa_PATINFO;
4179 }
4180 else if (numFrag == 2)
4181 {
4182 patInfo = GFX11_SW_256K_ZR_X_2xaa_PATINFO;
4183 }
4184 else if (numFrag == 4)
4185 {
4186 patInfo = GFX11_SW_256K_ZR_X_4xaa_PATINFO;
4187 }
4188 else if (numFrag == 8)
4189 {
4190 patInfo = GFX11_SW_256K_ZR_X_8xaa_PATINFO;
4191 }
4192 else
4193 {
4194 ADDR_ASSERT_ALWAYS();
4195 }
4196 }
4197 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4198 {
4199 ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_D_X);
4200 patInfo = GFX11_SW_256K_D_X_PATINFO;
4201 }
4202 else
4203 {
4204 ADDR_ASSERT_ALWAYS();
4205 }
4206 }
4207 else
4208 {
4209 ADDR_ASSERT_ALWAYS();
4210 }
4211 }
4212 }
4213 }
4214
4215 return (patInfo != NULL) ? &patInfo[index] : NULL;
4216 }
4217
4218 /**
4219 ************************************************************************************************************************
4220 * Gfx11Lib::ComputeSurfaceAddrFromCoordMicroTiled
4221 *
4222 * @brief
4223 * Internal function to calculate address from coord for micro tiled swizzle surface
4224 *
4225 * @return
4226 * ADDR_E_RETURNCODE
4227 ************************************************************************************************************************
4228 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4229 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMicroTiled(
4230 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4231 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4232 ) const
4233 {
4234 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
4235 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4236 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4237
4238 localIn.swizzleMode = pIn->swizzleMode;
4239 localIn.flags = pIn->flags;
4240 localIn.resourceType = pIn->resourceType;
4241 localIn.bpp = pIn->bpp;
4242 localIn.width = Max(pIn->unalignedWidth, 1u);
4243 localIn.height = Max(pIn->unalignedHeight, 1u);
4244 localIn.numSlices = Max(pIn->numSlices, 1u);
4245 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4246 localIn.numSamples = Max(pIn->numSamples, 1u);
4247 localIn.numFrags = localIn.numSamples;
4248 localOut.pMipInfo = mipInfo;
4249
4250 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
4251
4252 if (ret == ADDR_OK)
4253 {
4254 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4255 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
4256 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4257 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
4258
4259 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4260 {
4261 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4262 const UINT_32 yb = pIn->y / localOut.blockHeight;
4263 const UINT_32 xb = pIn->x / localOut.blockWidth;
4264 const UINT_32 blockIndex = yb * pb + xb;
4265 const UINT_32 blockSize = 256;
4266 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4267 pIn->x << elemLog2,
4268 pIn->y,
4269 0);
4270 pOut->addr = localOut.sliceSize * pIn->slice +
4271 mipInfo[pIn->mipId].macroBlockOffset +
4272 (blockIndex * blockSize) +
4273 blk256Offset;
4274 }
4275 else
4276 {
4277 ret = ADDR_INVALIDPARAMS;
4278 }
4279 }
4280
4281 return ret;
4282 }
4283
4284 /**
4285 ************************************************************************************************************************
4286 * Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled
4287 *
4288 * @brief
4289 * Internal function to calculate address from coord for macro tiled swizzle surface
4290 *
4291 * @return
4292 * ADDR_E_RETURNCODE
4293 ************************************************************************************************************************
4294 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4295 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4296 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4297 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4298 ) const
4299 {
4300 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
4301 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4302 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4303
4304 localIn.swizzleMode = pIn->swizzleMode;
4305 localIn.flags = pIn->flags;
4306 localIn.resourceType = pIn->resourceType;
4307 localIn.bpp = pIn->bpp;
4308 localIn.width = Max(pIn->unalignedWidth, 1u);
4309 localIn.height = Max(pIn->unalignedHeight, 1u);
4310 localIn.numSlices = Max(pIn->numSlices, 1u);
4311 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4312 localIn.numSamples = Max(pIn->numSamples, 1u);
4313 localIn.numFrags = localIn.numSamples;
4314 localOut.pMipInfo = mipInfo;
4315
4316 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4317
4318 if (ret == ADDR_OK)
4319 {
4320 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4321 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4322 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
4323 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
4324 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4325 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4326 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4327
4328 if (localIn.numSamples > 1)
4329 {
4330 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4331 pIn->resourceType,
4332 elemLog2,
4333 localIn.numSamples);
4334
4335 if (pPatInfo != NULL)
4336 {
4337 const UINT_32 pb = localOut.pitch / localOut.blockWidth;
4338 const UINT_32 yb = pIn->y / localOut.blockHeight;
4339 const UINT_32 xb = pIn->x / localOut.blockWidth;
4340 const UINT_64 blkIdx = yb * pb + xb;
4341
4342 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
4343 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4344
4345 const UINT_32 blkOffset =
4346 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4347 blkSizeLog2,
4348 pIn->x,
4349 pIn->y,
4350 pIn->slice,
4351 pIn->sample);
4352
4353 pOut->addr = (localOut.sliceSize * pIn->slice) +
4354 (blkIdx << blkSizeLog2) +
4355 (blkOffset ^ pipeBankXor);
4356 }
4357 else
4358 {
4359 ret = ADDR_INVALIDPARAMS;
4360 }
4361 }
4362 else
4363 {
4364 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4365 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4366 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4367
4368 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4369 {
4370 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4371 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
4372 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4373 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4374 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4375 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4376 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4377 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4378 const UINT_32 yb = pIn->y / localOut.blockHeight;
4379 const UINT_32 xb = pIn->x / localOut.blockWidth;
4380 const UINT_64 blkIdx = yb * pb + xb;
4381 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4382 x << elemLog2,
4383 y,
4384 z);
4385 pOut->addr = sliceSize * sliceId +
4386 mipInfo[pIn->mipId].macroBlockOffset +
4387 (blkIdx << blkSizeLog2) +
4388 (blkOffset ^ pipeBankXor);
4389 }
4390 else
4391 {
4392 ret = ADDR_INVALIDPARAMS;
4393 }
4394 }
4395 }
4396
4397 return ret;
4398 }
4399
4400 /**
4401 ************************************************************************************************************************
4402 * Gfx11Lib::HwlComputeMaxBaseAlignments
4403 *
4404 * @brief
4405 * Gets maximum alignments
4406 * @return
4407 * maximum alignments
4408 ************************************************************************************************************************
4409 */
HwlComputeMaxBaseAlignments() const4410 UINT_32 Gfx11Lib::HwlComputeMaxBaseAlignments() const
4411 {
4412 return Size256K;
4413 }
4414
4415 /**
4416 ************************************************************************************************************************
4417 * Gfx11Lib::HwlComputeMaxMetaBaseAlignments
4418 *
4419 * @brief
4420 * Gets maximum alignments for metadata
4421 * @return
4422 * maximum alignments for metadata
4423 ************************************************************************************************************************
4424 */
HwlComputeMaxMetaBaseAlignments() const4425 UINT_32 Gfx11Lib::HwlComputeMaxMetaBaseAlignments() const
4426 {
4427 Dim3d metaBlk;
4428
4429 // Max base alignment for Htile
4430 const AddrSwizzleMode ValidSwizzleModeForHtile[] =
4431 {
4432 ADDR_SW_64KB_Z_X,
4433 ADDR_SW_256KB_Z_X,
4434 };
4435
4436 UINT_32 maxBaseAlignHtile = 0;
4437
4438 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForHtile) / sizeof(ValidSwizzleModeForHtile[0]); swIdx++)
4439 {
4440 for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4441 {
4442 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4443 {
4444 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx11DataDepthStencil,
4445 ADDR_RSRC_TEX_2D,
4446 ValidSwizzleModeForHtile[swIdx],
4447 bppLog2,
4448 numFragLog2,
4449 TRUE,
4450 &metaBlk);
4451
4452 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4453 }
4454 }
4455 }
4456
4457 // Max base alignment for 2D Dcc
4458 // swizzle mode support DCC...
4459 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4460 {
4461 ADDR_SW_64KB_R_X,
4462 ADDR_SW_256KB_R_X,
4463 };
4464
4465 UINT_32 maxBaseAlignDcc2D = 0;
4466
4467 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4468 {
4469 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4470 {
4471 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4472 {
4473 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx11DataColor,
4474 ADDR_RSRC_TEX_2D,
4475 ValidSwizzleModeForDcc2D[swIdx],
4476 bppLog2,
4477 numFragLog2,
4478 TRUE,
4479 &metaBlk);
4480
4481 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4482 }
4483 }
4484 }
4485
4486 // Max base alignment for 3D Dcc
4487 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4488 {
4489 ADDR_SW_64KB_S_X,
4490 ADDR_SW_64KB_D_X,
4491 ADDR_SW_64KB_R_X,
4492 ADDR_SW_256KB_S_X,
4493 ADDR_SW_256KB_D_X,
4494 ADDR_SW_256KB_R_X,
4495 };
4496
4497 UINT_32 maxBaseAlignDcc3D = 0;
4498
4499 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4500 {
4501 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4502 {
4503 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx11DataColor,
4504 ADDR_RSRC_TEX_3D,
4505 ValidSwizzleModeForDcc3D[swIdx],
4506 bppLog2,
4507 0,
4508 TRUE,
4509 &metaBlk);
4510
4511 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4512 }
4513 }
4514
4515 return Max(maxBaseAlignHtile, Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4516 }
4517
4518 /**
4519 ************************************************************************************************************************
4520 * Gfx11Lib::GetMetaElementSizeLog2
4521 *
4522 * @brief
4523 * Gets meta data element size log2
4524 * @return
4525 * Meta data element size log2
4526 ************************************************************************************************************************
4527 */
GetMetaElementSizeLog2(Gfx11DataType dataType)4528 INT_32 Gfx11Lib::GetMetaElementSizeLog2(
4529 Gfx11DataType dataType) ///< Data surface type
4530 {
4531 INT_32 elemSizeLog2 = 0;
4532
4533 if (dataType == Gfx11DataColor)
4534 {
4535 elemSizeLog2 = 0;
4536 }
4537 else
4538 {
4539 ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
4540 elemSizeLog2 = 2;
4541 }
4542
4543 return elemSizeLog2;
4544 }
4545
4546 /**
4547 ************************************************************************************************************************
4548 * Gfx11Lib::GetMetaCacheSizeLog2
4549 *
4550 * @brief
4551 * Gets meta data cache line size log2
4552 * @return
4553 * Meta data cache line size log2
4554 ************************************************************************************************************************
4555 */
GetMetaCacheSizeLog2(Gfx11DataType dataType)4556 INT_32 Gfx11Lib::GetMetaCacheSizeLog2(
4557 Gfx11DataType dataType) ///< Data surface type
4558 {
4559 INT_32 cacheSizeLog2 = 0;
4560
4561 if (dataType == Gfx11DataColor)
4562 {
4563 cacheSizeLog2 = 6;
4564 }
4565 else
4566 {
4567 ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
4568 cacheSizeLog2 = 8;
4569 }
4570
4571 return cacheSizeLog2;
4572 }
4573
4574 /**
4575 ************************************************************************************************************************
4576 * Gfx11Lib::HwlComputeSurfaceInfoLinear
4577 *
4578 * @brief
4579 * Internal function to calculate alignment for linear surface
4580 *
4581 * @return
4582 * ADDR_E_RETURNCODE
4583 ************************************************************************************************************************
4584 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4585 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoLinear(
4586 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4587 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4588 ) const
4589 {
4590 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4591
4592 if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4593 {
4594 returnCode = ADDR_INVALIDPARAMS;
4595 }
4596 else
4597 {
4598 const UINT_32 elementBytes = pIn->bpp >> 3;
4599 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4600 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4601 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
4602 UINT_32 actualHeight = pIn->height;
4603 UINT_64 sliceSize = 0;
4604
4605 if (pIn->numMipLevels > 1)
4606 {
4607 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4608 {
4609 UINT_32 mipWidth, mipHeight;
4610
4611 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4612
4613 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4614
4615 if (pOut->pMipInfo != NULL)
4616 {
4617 pOut->pMipInfo[i].pitch = mipActualWidth;
4618 pOut->pMipInfo[i].height = mipHeight;
4619 pOut->pMipInfo[i].depth = mipDepth;
4620 pOut->pMipInfo[i].offset = sliceSize;
4621 pOut->pMipInfo[i].mipTailOffset = 0;
4622 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4623 }
4624
4625 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4626 }
4627 }
4628 else
4629 {
4630 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4631
4632 if (returnCode == ADDR_OK)
4633 {
4634 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4635
4636 if (pOut->pMipInfo != NULL)
4637 {
4638 pOut->pMipInfo[0].pitch = pitch;
4639 pOut->pMipInfo[0].height = actualHeight;
4640 pOut->pMipInfo[0].depth = mipDepth;
4641 pOut->pMipInfo[0].offset = 0;
4642 pOut->pMipInfo[0].mipTailOffset = 0;
4643 pOut->pMipInfo[0].macroBlockOffset = 0;
4644 }
4645 }
4646 }
4647
4648 if (returnCode == ADDR_OK)
4649 {
4650 pOut->pitch = pitch;
4651 pOut->height = actualHeight;
4652 pOut->numSlices = pIn->numSlices;
4653 pOut->sliceSize = sliceSize;
4654 pOut->surfSize = sliceSize * pOut->numSlices;
4655 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4656 pOut->blockWidth = pitchAlign;
4657 pOut->blockHeight = 1;
4658 pOut->blockSlices = 1;
4659
4660 // Following members are useless on GFX11
4661 pOut->mipChainPitch = 0;
4662 pOut->mipChainHeight = 0;
4663 pOut->mipChainSlice = 0;
4664 pOut->epitchIsHeight = FALSE;
4665
4666 // Post calculation validate
4667 ADDR_ASSERT(pOut->sliceSize > 0);
4668 }
4669 }
4670
4671 return returnCode;
4672 }
4673
4674 } // V2
4675 } // Addr
4676