1 /*
2 ************************************************************************************************************************
3 *
4 * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE
23 *
24 ***********************************************************************************************************************/
25
26 /**
27 ************************************************************************************************************************
28 * @file gfx11addrlib.cpp
29 * @brief Contain the implementation for the Gfx11Lib class.
30 ************************************************************************************************************************
31 */
32
33 #include "gfx11addrlib.h"
34 #include "gfx11_gb_reg.h"
35
36 #include "amdgpu_asic_addr.h"
37
38 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40
41 namespace Addr
42 {
43 /**
44 ************************************************************************************************************************
45 * Gfx11HwlInit
46 *
47 * @brief
48 * Creates an Gfx11Lib object.
49 *
50 * @return
51 * Returns an Gfx11Lib object pointer.
52 ************************************************************************************************************************
53 */
Gfx11HwlInit(const Client * pClient)54 Addr::Lib* Gfx11HwlInit(const Client* pClient)
55 {
56 return V2::Gfx11Lib::CreateObj(pClient);
57 }
58
59 namespace V2
60 {
61
62 ////////////////////////////////////////////////////////////////////////////////////////////////////
63 // Static Const Member
64 ////////////////////////////////////////////////////////////////////////////////////////////////////
65
66 const SwizzleModeFlags Gfx11Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
67 {//Linear 256B 4KB 64KB 256KB Z Std Disp Rot XOR T RtOpt Reserved
68 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR
69 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
70 {{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_D
71 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
72
73 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
74 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_S
75 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_D
76 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
77
78 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
79 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_S
80 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_D
81 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
82
83 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
84 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
85 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
86 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
87
88 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
89 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_S_T
90 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_D_T
91 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
92
93 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
94 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_S_X
95 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_D_X
96 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
97
98 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_Z_X
99 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_S_X
100 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_D_X
101 {{0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_64KB_R_X
102
103 {{0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_256KB_Z_X
104 {{0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_256KB_S_X
105 {{0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_256KB_D_X
106 {{0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_256KB_R_X
107 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR_GENERAL
108 };
109
110 const Dim3d Gfx11Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
111
112 const Dim3d Gfx11Lib::Block256K_Log2_3d[] = {{6, 6, 6}, {5, 6, 6}, {5, 6, 5}, {5, 5, 5}, {4, 5, 5}};
113 const Dim3d Gfx11Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114 const Dim3d Gfx11Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
115
116 /**
117 ************************************************************************************************************************
118 * Gfx11Lib::Gfx11Lib
119 *
120 * @brief
121 * Constructor
122 *
123 ************************************************************************************************************************
124 */
Gfx11Lib(const Client * pClient)125 Gfx11Lib::Gfx11Lib(const Client* pClient)
126 :
127 Lib(pClient),
128 m_numPkrLog2(0),
129 m_numSaLog2(0),
130 m_colorBaseIndex(0),
131 m_htileBaseIndex(0),
132 m_dccBaseIndex(0)
133 {
134 memset(&m_settings, 0, sizeof(m_settings));
135 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
136 }
137
138 /**
139 ************************************************************************************************************************
140 * Gfx11Lib::~Gfx11Lib
141 *
142 * @brief
143 * Destructor
144 ************************************************************************************************************************
145 */
~Gfx11Lib()146 Gfx11Lib::~Gfx11Lib()
147 {
148 }
149
150 /**
151 ************************************************************************************************************************
152 * Gfx11Lib::HwlComputeHtileInfo
153 *
154 * @brief
155 * Interface function stub of AddrComputeHtilenfo
156 *
157 * @return
158 * ADDR_E_RETURNCODE
159 ************************************************************************************************************************
160 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const161 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileInfo(
162 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
163 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
164 ) const
165 {
166 ADDR_E_RETURNCODE ret = ADDR_OK;
167
168 if ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
169 (pIn->swizzleMode != ADDR_SW_256KB_Z_X) &&
170 (pIn->hTileFlags.pipeAligned != TRUE))
171 {
172 ret = ADDR_INVALIDPARAMS;
173 }
174 else
175 {
176 Dim3d metaBlk = {};
177 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx11DataDepthStencil,
178 ADDR_RSRC_TEX_2D,
179 pIn->swizzleMode,
180 0,
181 0,
182 TRUE,
183 &metaBlk);
184
185 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
186 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
187 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
188 pOut->metaBlkWidth = metaBlk.w;
189 pOut->metaBlkHeight = metaBlk.h;
190
191 if (pIn->numMipLevels > 1)
192 {
193 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
194
195 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
196
197 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
198 {
199 UINT_32 mipWidth, mipHeight;
200
201 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
202
203 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
204 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
205
206 const UINT_32 pitchInM = mipWidth / metaBlk.w;
207 const UINT_32 heightInM = mipHeight / metaBlk.h;
208 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
209
210 if (pOut->pMipInfo != NULL)
211 {
212 pOut->pMipInfo[i].inMiptail = FALSE;
213 pOut->pMipInfo[i].offset = offset;
214 pOut->pMipInfo[i].sliceSize = mipSliceSize;
215 }
216
217 offset += mipSliceSize;
218 }
219
220 pOut->sliceSize = offset;
221 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
222 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
223
224 if (pOut->pMipInfo != NULL)
225 {
226 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
227 {
228 pOut->pMipInfo[i].inMiptail = TRUE;
229 pOut->pMipInfo[i].offset = 0;
230 pOut->pMipInfo[i].sliceSize = 0;
231 }
232
233 if (pIn->firstMipIdInTail != pIn->numMipLevels)
234 {
235 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
236 }
237 }
238 }
239 else
240 {
241 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
242 const UINT_32 heightInM = pOut->height / metaBlk.h;
243
244 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
245 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
246 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
247
248 if (pOut->pMipInfo != NULL)
249 {
250 pOut->pMipInfo[0].inMiptail = FALSE;
251 pOut->pMipInfo[0].offset = 0;
252 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
253 }
254 }
255
256 // Get the HTILE address equation (copied from HtileAddrFromCoord).
257 // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
258 const UINT_32 index = m_htileBaseIndex;
259 const UINT_8* patIdxTable = GFX11_HTILE_PATIDX;
260
261 ADDR_C_ASSERT(sizeof(GFX11_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
262 pOut->equation.gfx10_bits = (UINT_16 *)GFX11_HTILE_SW_PATTERN[patIdxTable[index]];
263 }
264
265 return ret;
266 }
267
268 /**
269 ************************************************************************************************************************
270 * Gfx11Lib::HwlComputeDccInfo
271 *
272 * @brief
273 * Interface function to compute DCC key info
274 *
275 * @return
276 * ADDR_E_RETURNCODE
277 ************************************************************************************************************************
278 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const279 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeDccInfo(
280 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
281 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
282 ) const
283 {
284 ADDR_E_RETURNCODE ret = ADDR_OK;
285
286 if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
287 {
288 ret = ADDR_INVALIDPARAMS;
289 }
290 else
291 {
292 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
293
294 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
295
296 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
297 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
298 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
299
300 if (ret == ADDR_OK)
301 {
302 Dim3d metaBlk = {};
303 const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
304 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx11DataColor,
305 pIn->resourceType,
306 pIn->swizzleMode,
307 elemLog2,
308 numFragLog2,
309 pIn->dccKeyFlags.pipeAligned,
310 &metaBlk);
311
312 pOut->dccRamBaseAlign = metaBlkSize;
313 pOut->metaBlkWidth = metaBlk.w;
314 pOut->metaBlkHeight = metaBlk.h;
315 pOut->metaBlkDepth = metaBlk.d;
316 pOut->metaBlkSize = metaBlkSize;
317
318 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
319 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
320 pOut->depth = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
321
322 if (pIn->numMipLevels > 1)
323 {
324 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
325
326 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
327
328 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
329 {
330 UINT_32 mipWidth, mipHeight;
331
332 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
333
334 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
335 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
336
337 const UINT_32 pitchInM = mipWidth / metaBlk.w;
338 const UINT_32 heightInM = mipHeight / metaBlk.h;
339 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
340
341 if (pOut->pMipInfo != NULL)
342 {
343 pOut->pMipInfo[i].inMiptail = FALSE;
344 pOut->pMipInfo[i].offset = offset;
345 pOut->pMipInfo[i].sliceSize = mipSliceSize;
346 }
347
348 offset += mipSliceSize;
349 }
350
351 pOut->dccRamSliceSize = offset;
352 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
353 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
354
355 if (pOut->pMipInfo != NULL)
356 {
357 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
358 {
359 pOut->pMipInfo[i].inMiptail = TRUE;
360 pOut->pMipInfo[i].offset = 0;
361 pOut->pMipInfo[i].sliceSize = 0;
362 }
363
364 if (pIn->firstMipIdInTail != pIn->numMipLevels)
365 {
366 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
367 }
368 }
369 }
370 else
371 {
372 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
373 const UINT_32 heightInM = pOut->height / metaBlk.h;
374
375 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
376 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
377 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
378
379 if (pOut->pMipInfo != NULL)
380 {
381 pOut->pMipInfo[0].inMiptail = FALSE;
382 pOut->pMipInfo[0].offset = 0;
383 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
384 }
385 }
386
387 // Get the DCC address equation (copied from DccAddrFromCoord)
388 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
389 const UINT_32 numPipeLog2 = m_pipesLog2;
390 UINT_32 index = m_dccBaseIndex + elemLog2;
391 const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_64KB_R_X) ?
392 GFX11_DCC_64K_R_X_PATIDX : GFX11_DCC_256K_R_X_PATIDX;
393
394 if (pIn->dccKeyFlags.pipeAligned)
395 {
396 index += MaxNumOfBpp;
397
398 if (m_numPkrLog2 < 2)
399 {
400 index += m_pipesLog2 * MaxNumOfBpp;
401 }
402 else
403 {
404 // 4 groups for "m_numPkrLog2 < 2" case
405 index += 4 * MaxNumOfBpp;
406
407 const UINT_32 dccPipePerPkr = 3;
408
409 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
410 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
411 }
412 }
413
414 ADDR_C_ASSERT(sizeof(GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
415 pOut->equation.gfx10_bits = (UINT_16*)GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]];
416 }
417 }
418
419 return ret;
420 }
421
422 /**
423 ************************************************************************************************************************
424 * Gfx11Lib::HwlComputeHtileAddrFromCoord
425 *
426 * @brief
427 * Interface function stub of AddrComputeHtileAddrFromCoord
428 *
429 * @return
430 * ADDR_E_RETURNCODE
431 ************************************************************************************************************************
432 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)433 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileAddrFromCoord(
434 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
435 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
436 {
437 ADDR_E_RETURNCODE returnCode = ADDR_OK;
438
439 if (pIn->numMipLevels > 1)
440 {
441 returnCode = ADDR_NOTIMPLEMENTED;
442 }
443 else
444 {
445 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
446 input.size = sizeof(input);
447 input.hTileFlags = pIn->hTileFlags;
448 input.depthFlags = pIn->depthflags;
449 input.swizzleMode = pIn->swizzleMode;
450 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
451 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
452 input.numSlices = Max(pIn->numSlices, 1u);
453 input.numMipLevels = 1;
454
455 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
456 output.size = sizeof(output);
457
458 returnCode = ComputeHtileInfo(&input, &output);
459
460 if (returnCode == ADDR_OK)
461 {
462 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
463 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
464 const UINT_32 index = m_htileBaseIndex + numSampleLog2;
465 const UINT_8* patIdxTable = GFX11_HTILE_PATIDX;
466 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
467 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
468 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX11_HTILE_SW_PATTERN[patIdxTable[index]],
469 blkSizeLog2 + 1, // +1 for nibble offset
470 pIn->x,
471 pIn->y,
472 pIn->slice,
473 0);
474 const UINT_32 xb = pIn->x / output.metaBlkWidth;
475 const UINT_32 yb = pIn->y / output.metaBlkHeight;
476 const UINT_32 pb = output.pitch / output.metaBlkWidth;
477 const UINT_32 blkIndex = (yb * pb) + xb;
478 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
479
480 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
481 (blkIndex * (1 << blkSizeLog2)) +
482 ((blkOffset >> 1) ^ pipeXor);
483 }
484 }
485
486 return returnCode;
487 }
488
489 /**
490 ************************************************************************************************************************
491 * Gfx11Lib::HwlComputeHtileCoordFromAddr
492 *
493 * @brief
494 * Interface function stub of AddrComputeHtileCoordFromAddr
495 *
496 * @return
497 * ADDR_E_RETURNCODE
498 ************************************************************************************************************************
499 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)500 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileCoordFromAddr(
501 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
502 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
503 {
504 ADDR_NOT_IMPLEMENTED();
505
506 return ADDR_OK;
507 }
508
509 /**
510 ************************************************************************************************************************
511 * Gfx11Lib::HwlSupportComputeDccAddrFromCoord
512 *
513 * @brief
514 * Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
515 *
516 * @return
517 * ADDR_E_RETURNCODE
518 ************************************************************************************************************************
519 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)520 ADDR_E_RETURNCODE Gfx11Lib::HwlSupportComputeDccAddrFromCoord(
521 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
522 {
523 ADDR_E_RETURNCODE returnCode = ADDR_OK;
524
525 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
526 ((pIn->swizzleMode != ADDR_SW_64KB_R_X) &&
527 (pIn->swizzleMode != ADDR_SW_256KB_R_X)) ||
528 (pIn->dccKeyFlags.linear == TRUE) ||
529 (pIn->numFrags > 1) ||
530 (pIn->numMipLevels > 1) ||
531 (pIn->mipId > 0))
532 {
533 returnCode = ADDR_NOTSUPPORTED;
534 }
535 else if ((pIn->pitch == 0) ||
536 (pIn->metaBlkWidth == 0) ||
537 (pIn->metaBlkHeight == 0) ||
538 (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
539 {
540 returnCode = ADDR_NOTSUPPORTED;
541 }
542
543 return returnCode;
544 }
545
546 /**
547 ************************************************************************************************************************
548 * Gfx11Lib::HwlComputeDccAddrFromCoord
549 *
550 * @brief
551 * Interface function stub of AddrComputeDccAddrFromCoord
552 *
553 * @return
554 * N/A
555 ************************************************************************************************************************
556 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)557 VOID Gfx11Lib::HwlComputeDccAddrFromCoord(
558 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
559 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
560 {
561 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
562 const UINT_32 numPipeLog2 = m_pipesLog2;
563 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
564 UINT_32 index = m_dccBaseIndex + elemLog2;
565 const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_64KB_R_X) ?
566 GFX11_DCC_64K_R_X_PATIDX : GFX11_DCC_256K_R_X_PATIDX;
567
568 if (pIn->dccKeyFlags.pipeAligned)
569 {
570 index += MaxNumOfBpp;
571
572 if (m_numPkrLog2 < 2)
573 {
574 index += m_pipesLog2 * MaxNumOfBpp;
575 }
576 else
577 {
578 // 4 groups for "m_numPkrLog2 < 2" case
579 index += 4 * MaxNumOfBpp;
580
581 const UINT_32 dccPipePerPkr = 3;
582
583 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
584 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
585 }
586 }
587
588 const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
589 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
590 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]],
591 blkSizeLog2 + 1, // +1 for nibble offset
592 pIn->x,
593 pIn->y,
594 pIn->slice,
595 0);
596 const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
597 const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
598 const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
599 const UINT_32 blkIndex = (yb * pb) + xb;
600 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
601
602 pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
603 (blkIndex * (1 << blkSizeLog2)) +
604 ((blkOffset >> 1) ^ pipeXor);
605 }
606
607 /**
608 ************************************************************************************************************************
609 * Gfx11Lib::HwlInitGlobalParams
610 *
611 * @brief
612 * Initializes global parameters
613 *
614 * @return
615 * TRUE if all settings are valid
616 *
617 ************************************************************************************************************************
618 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)619 BOOL_32 Gfx11Lib::HwlInitGlobalParams(
620 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
621 {
622 BOOL_32 valid = TRUE;
623 GB_ADDR_CONFIG_GFX11 gbAddrConfig;
624
625 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
626
627 switch (gbAddrConfig.bits.NUM_PIPES)
628 {
629 case ADDR_CONFIG_1_PIPE:
630 m_pipes = 1;
631 m_pipesLog2 = 0;
632 break;
633 case ADDR_CONFIG_2_PIPE:
634 m_pipes = 2;
635 m_pipesLog2 = 1;
636 break;
637 case ADDR_CONFIG_4_PIPE:
638 m_pipes = 4;
639 m_pipesLog2 = 2;
640 break;
641 case ADDR_CONFIG_8_PIPE:
642 m_pipes = 8;
643 m_pipesLog2 = 3;
644 break;
645 case ADDR_CONFIG_16_PIPE:
646 m_pipes = 16;
647 m_pipesLog2 = 4;
648 break;
649 case ADDR_CONFIG_32_PIPE:
650 m_pipes = 32;
651 m_pipesLog2 = 5;
652 break;
653 case ADDR_CONFIG_64_PIPE:
654 m_pipes = 64;
655 m_pipesLog2 = 6;
656 break;
657 default:
658 ADDR_ASSERT_ALWAYS();
659 valid = FALSE;
660 break;
661 }
662
663 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
664 {
665 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
666 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
667 m_pipeInterleaveLog2 = 8;
668 break;
669 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
670 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
671 m_pipeInterleaveLog2 = 9;
672 break;
673 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
674 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
675 m_pipeInterleaveLog2 = 10;
676 break;
677 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
678 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
679 m_pipeInterleaveLog2 = 11;
680 break;
681 default:
682 ADDR_ASSERT_ALWAYS();
683 valid = FALSE;
684 break;
685 }
686
687 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
688 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
689 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
690 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
691
692 // These fields are deprecated on GFX11; they do nothing on HW.
693 m_maxCompFrag = 1;
694 m_maxCompFragLog2 = 0;
695
696 // Skip unaligned case
697 m_htileBaseIndex += MaxNumOfAA;
698
699 m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
700 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
701
702 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
703 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
704
705 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
706
707 if (m_numPkrLog2 >= 2)
708 {
709 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
710 m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
711 }
712
713 // There is no so-called VAR swizzle mode on GFX11 and instead there are 4 256KB swizzle modes. Here we treat 256KB
714 // swizzle mode as "VAR" swizzle mode for reusing exising facilities (e.g GetBlockSizeLog2()) provided by base class
715 m_blockVarSizeLog2 = 18;
716
717 if (valid)
718 {
719 InitEquationTable();
720 }
721
722 return valid;
723 }
724
725 /**
726 ************************************************************************************************************************
727 * Gfx11Lib::HwlConvertChipFamily
728 *
729 * @brief
730 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
731 * @return
732 * ChipFamily
733 ************************************************************************************************************************
734 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)735 ChipFamily Gfx11Lib::HwlConvertChipFamily(
736 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
737 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
738 {
739 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
740
741 switch (chipFamily)
742 {
743 case FAMILY_GFX1100:
744 if (ASICREV_IS_GFX1100(chipRevision))
745 {
746 }
747 if (ASICREV_IS_GFX1101(chipRevision))
748 {
749 }
750 if (ASICREV_IS_GFX1102(chipRevision))
751 {
752 }
753 break;
754 case FAMILY_GFX1103:
755 if (ASICREV_IS_GFX1103(chipRevision))
756 {
757 }
758 break;
759 default:
760 ADDR_ASSERT(!"Unknown chip family");
761 break;
762 }
763
764 m_configFlags.use32bppFor422Fmt = TRUE;
765
766 return family;
767 }
768
769 /**
770 ************************************************************************************************************************
771 * Gfx11Lib::GetBlk256SizeLog2
772 *
773 * @brief
774 * Get block 256 size
775 *
776 * @return
777 * N/A
778 ************************************************************************************************************************
779 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const780 void Gfx11Lib::GetBlk256SizeLog2(
781 AddrResourceType resourceType, ///< [in] Resource type
782 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
783 UINT_32 elemLog2, ///< [in] element size log2
784 UINT_32 numSamplesLog2, ///< [in] number of samples
785 Dim3d* pBlock ///< [out] block size
786 ) const
787 {
788 if (IsThin(resourceType, swizzleMode))
789 {
790 UINT_32 blockBits = 8 - elemLog2;
791
792 // On GFX11, Z and R modes are the same thing.
793 if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
794 {
795 blockBits -= numSamplesLog2;
796 }
797
798 pBlock->w = (blockBits >> 1) + (blockBits & 1);
799 pBlock->h = (blockBits >> 1);
800 pBlock->d = 0;
801 }
802 else
803 {
804 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
805
806 UINT_32 blockBits = 8 - elemLog2;
807
808 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
809 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
810 pBlock->h = (blockBits / 3);
811 }
812 }
813
814 /**
815 ************************************************************************************************************************
816 * Gfx11Lib::GetCompressedBlockSizeLog2
817 *
818 * @brief
819 * Get compress block size
820 *
821 * @return
822 * N/A
823 ************************************************************************************************************************
824 */
GetCompressedBlockSizeLog2(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const825 void Gfx11Lib::GetCompressedBlockSizeLog2(
826 Gfx11DataType dataType, ///< [in] Data type
827 AddrResourceType resourceType, ///< [in] Resource type
828 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
829 UINT_32 elemLog2, ///< [in] element size log2
830 UINT_32 numSamplesLog2, ///< [in] number of samples
831 Dim3d* pBlock ///< [out] block size
832 ) const
833 {
834 if (dataType == Gfx11DataColor)
835 {
836 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
837 }
838 else
839 {
840 ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
841 pBlock->w = 3;
842 pBlock->h = 3;
843 pBlock->d = 0;
844 }
845 }
846
847 /**
848 ************************************************************************************************************************
849 * Gfx11Lib::GetMetaOverlapLog2
850 *
851 * @brief
852 * Get meta block overlap
853 *
854 * @return
855 * N/A
856 ************************************************************************************************************************
857 */
GetMetaOverlapLog2(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const858 INT_32 Gfx11Lib::GetMetaOverlapLog2(
859 Gfx11DataType dataType, ///< [in] Data type
860 AddrResourceType resourceType, ///< [in] Resource type
861 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
862 UINT_32 elemLog2, ///< [in] element size log2
863 UINT_32 numSamplesLog2 ///< [in] number of samples
864 ) const
865 {
866 Dim3d compBlock;
867 Dim3d microBlock;
868
869 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
870 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, µBlock);
871
872 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
873 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
874 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
875 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
876 INT_32 overlap = numPipesLog2 - maxSizeLog2;
877
878 if (numPipesLog2 > 1)
879 {
880 overlap++;
881 }
882
883 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
884 if ((elemLog2 == 4) && (numSamplesLog2 == 3))
885 {
886 overlap--;
887 }
888 overlap = Max(overlap, 0);
889 return overlap;
890 }
891
892 /**
893 ************************************************************************************************************************
894 * Gfx11Lib::Get3DMetaOverlapLog2
895 *
896 * @brief
897 * Get 3d meta block overlap
898 *
899 * @return
900 * N/A
901 ************************************************************************************************************************
902 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const903 INT_32 Gfx11Lib::Get3DMetaOverlapLog2(
904 AddrResourceType resourceType, ///< [in] Resource type
905 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
906 UINT_32 elemLog2 ///< [in] element size log2
907 ) const
908 {
909 Dim3d microBlock;
910 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, µBlock);
911
912 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
913
914 overlap++;
915
916 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
917 {
918 overlap = 0;
919 }
920 return overlap;
921 }
922
923 /**
924 ************************************************************************************************************************
925 * Gfx11Lib::GetPipeRotateAmount
926 *
927 * @brief
928 * Get pipe rotate amount
929 *
930 * @return
931 * Pipe rotate amount
932 ************************************************************************************************************************
933 */
934
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const935 INT_32 Gfx11Lib::GetPipeRotateAmount(
936 AddrResourceType resourceType, ///< [in] Resource type
937 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
938 ) const
939 {
940 INT_32 amount = 0;
941
942 if ((m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
943 {
944 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
945 1 : m_pipesLog2 - (m_numSaLog2 + 1);
946 }
947
948 return amount;
949 }
950
951 /**
952 ************************************************************************************************************************
953 * Gfx11Lib::GetMetaBlkSize
954 *
955 * @brief
956 * Get metadata block size
957 *
958 * @return
959 * Meta block size
960 ************************************************************************************************************************
961 */
GetMetaBlkSize(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const962 UINT_32 Gfx11Lib::GetMetaBlkSize(
963 Gfx11DataType dataType, ///< [in] Data type
964 AddrResourceType resourceType, ///< [in] Resource type
965 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
966 UINT_32 elemLog2, ///< [in] element size log2
967 UINT_32 numSamplesLog2, ///< [in] number of samples
968 BOOL_32 pipeAlign, ///< [in] pipe align
969 Dim3d* pBlock ///< [out] block size
970 ) const
971 {
972 INT_32 metablkSizeLog2;
973
974 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
975 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
976 const INT_32 compBlkSizeLog2 = (dataType == Gfx11DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
977 const INT_32 metaBlkSamplesLog2 = numSamplesLog2;
978 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
979 INT_32 numPipesLog2 = m_pipesLog2;
980
981 if (IsThin(resourceType, swizzleMode))
982 {
983 if ((pipeAlign == FALSE) ||
984 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
985 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
986 {
987 if (pipeAlign)
988 {
989 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
990 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
991 }
992 else
993 {
994 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
995 }
996 }
997 else
998 {
999 if ((m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1000 {
1001 numPipesLog2++;
1002 }
1003
1004 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1005
1006 if (numPipesLog2 >= 4)
1007 {
1008 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1009
1010 // In 16Bpe 8xaa, we have an extra overlap bit
1011 if ((pipeRotateLog2 > 0) &&
1012 (elemLog2 == 4) &&
1013 (numSamplesLog2 == 3) &&
1014 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1015 {
1016 overlapLog2++;
1017 }
1018
1019 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1020 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1021 }
1022 else
1023 {
1024 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1025 }
1026
1027 if (dataType == Gfx11DataDepthStencil)
1028 {
1029 // For htile surfaces, pad meta block size to 2K * num_pipes
1030 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1031 }
1032
1033 const INT_32 compFragLog2 = numSamplesLog2;
1034
1035 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1036 {
1037 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1038
1039 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1040 }
1041 }
1042
1043 const INT_32 metablkBitsLog2 =
1044 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1045 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1046 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1047 pBlock->d = 1;
1048 }
1049 else
1050 {
1051 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1052
1053 if (pipeAlign)
1054 {
1055 if ((m_pipesLog2 == m_numSaLog2 + 1) &&
1056 (m_pipesLog2 > 1) &&
1057 IsRbAligned(resourceType, swizzleMode))
1058 {
1059 numPipesLog2++;
1060 }
1061
1062 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1063
1064 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1065 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1066 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1067 }
1068 else
1069 {
1070 metablkSizeLog2 = 12;
1071 }
1072
1073 const INT_32 metablkBitsLog2 =
1074 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1075 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1076 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1077 pBlock->d = 1 << (metablkBitsLog2 / 3);
1078 }
1079
1080 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1081 }
1082
1083 /**
1084 ************************************************************************************************************************
1085 * Gfx11Lib::ConvertSwizzlePatternToEquation
1086 *
1087 * @brief
1088 * Convert swizzle pattern to equation.
1089 *
1090 * @return
1091 * N/A
1092 ************************************************************************************************************************
1093 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1094 VOID Gfx11Lib::ConvertSwizzlePatternToEquation(
1095 UINT_32 elemLog2, ///< [in] element bytes log2
1096 AddrResourceType rsrcType, ///< [in] resource type
1097 AddrSwizzleMode swMode, ///< [in] swizzle mode
1098 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor
1099 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1100 const
1101 {
1102 ADDR_BIT_SETTING fullSwizzlePattern[20];
1103 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1104
1105 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
1106 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1107
1108 pEquation->numBits = blockSizeLog2;
1109 pEquation->stackedDepthSlices = FALSE;
1110
1111 for (UINT_32 i = 0; i < elemLog2; i++)
1112 {
1113 pEquation->addr[i].channel = 0;
1114 pEquation->addr[i].valid = 1;
1115 pEquation->addr[i].index = i;
1116 }
1117
1118 if (IsXor(swMode) == FALSE)
1119 {
1120 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1121 {
1122 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1123
1124 if (pSwizzle[i].x != 0)
1125 {
1126 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1127
1128 pEquation->addr[i].channel = 0;
1129 pEquation->addr[i].valid = 1;
1130 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1131 }
1132 else if (pSwizzle[i].y != 0)
1133 {
1134 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1135
1136 pEquation->addr[i].channel = 1;
1137 pEquation->addr[i].valid = 1;
1138 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1139 }
1140 else
1141 {
1142 ADDR_ASSERT(pSwizzle[i].z != 0);
1143 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1144
1145 pEquation->addr[i].channel = 2;
1146 pEquation->addr[i].valid = 1;
1147 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1148 }
1149
1150 pEquation->xor1[i].value = 0;
1151 pEquation->xor2[i].value = 0;
1152 }
1153 }
1154 else if (IsThin(rsrcType, swMode))
1155 {
1156 Dim3d dim;
1157 ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1158
1159 const UINT_32 blkXLog2 = Log2(dim.w);
1160 const UINT_32 blkYLog2 = Log2(dim.h);
1161 const UINT_32 blkXMask = dim.w - 1;
1162 const UINT_32 blkYMask = dim.h - 1;
1163
1164 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1165 UINT_32 xMask = 0;
1166 UINT_32 yMask = 0;
1167 UINT_32 bMask = (1 << elemLog2) - 1;
1168
1169 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1170 {
1171 if (IsPow2(pSwizzle[i].value))
1172 {
1173 if (pSwizzle[i].x != 0)
1174 {
1175 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1176 xMask |= pSwizzle[i].x;
1177
1178 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1179
1180 ADDR_ASSERT(xLog2 < blkXLog2);
1181
1182 pEquation->addr[i].channel = 0;
1183 pEquation->addr[i].valid = 1;
1184 pEquation->addr[i].index = xLog2 + elemLog2;
1185 }
1186 else
1187 {
1188 ADDR_ASSERT(pSwizzle[i].y != 0);
1189 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1190 yMask |= pSwizzle[i].y;
1191
1192 pEquation->addr[i].channel = 1;
1193 pEquation->addr[i].valid = 1;
1194 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1195
1196 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1197 }
1198
1199 swizzle[i].value = 0;
1200 bMask |= 1 << i;
1201 }
1202 else
1203 {
1204 if (pSwizzle[i].z != 0)
1205 {
1206 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1207
1208 pEquation->xor2[i].channel = 2;
1209 pEquation->xor2[i].valid = 1;
1210 pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1211 }
1212
1213 swizzle[i].x = pSwizzle[i].x;
1214 swizzle[i].y = pSwizzle[i].y;
1215 swizzle[i].z = swizzle[i].s = 0;
1216
1217 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1218
1219 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1220
1221 if (xHi != 0)
1222 {
1223 ADDR_ASSERT(IsPow2(xHi));
1224 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1225
1226 pEquation->xor1[i].channel = 0;
1227 pEquation->xor1[i].valid = 1;
1228 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1229
1230 swizzle[i].x &= blkXMask;
1231 }
1232
1233 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1234
1235 if (yHi != 0)
1236 {
1237 ADDR_ASSERT(IsPow2(yHi));
1238
1239 if (xHi == 0)
1240 {
1241 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1242 pEquation->xor1[i].channel = 1;
1243 pEquation->xor1[i].valid = 1;
1244 pEquation->xor1[i].index = Log2(yHi);
1245 }
1246 else
1247 {
1248 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1249 pEquation->xor2[i].channel = 1;
1250 pEquation->xor2[i].valid = 1;
1251 pEquation->xor2[i].index = Log2(yHi);
1252 }
1253
1254 swizzle[i].y &= blkYMask;
1255 }
1256
1257 if (swizzle[i].value == 0)
1258 {
1259 bMask |= 1 << i;
1260 }
1261 }
1262 }
1263
1264 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1265 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1266
1267 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1268
1269 while (bMask != blockMask)
1270 {
1271 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1272 {
1273 if ((bMask & (1 << i)) == 0)
1274 {
1275 if (IsPow2(swizzle[i].value))
1276 {
1277 if (swizzle[i].x != 0)
1278 {
1279 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1280 xMask |= swizzle[i].x;
1281
1282 const UINT_32 xLog2 = Log2(swizzle[i].x);
1283
1284 ADDR_ASSERT(xLog2 < blkXLog2);
1285
1286 pEquation->addr[i].channel = 0;
1287 pEquation->addr[i].valid = 1;
1288 pEquation->addr[i].index = xLog2 + elemLog2;
1289 }
1290 else
1291 {
1292 ADDR_ASSERT(swizzle[i].y != 0);
1293 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1294 yMask |= swizzle[i].y;
1295
1296 pEquation->addr[i].channel = 1;
1297 pEquation->addr[i].valid = 1;
1298 pEquation->addr[i].index = Log2(swizzle[i].y);
1299
1300 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1301 }
1302
1303 swizzle[i].value = 0;
1304 bMask |= 1 << i;
1305 }
1306 else
1307 {
1308 const UINT_32 x = swizzle[i].x & xMask;
1309 const UINT_32 y = swizzle[i].y & yMask;
1310
1311 if (x != 0)
1312 {
1313 ADDR_ASSERT(IsPow2(x));
1314
1315 if (pEquation->xor1[i].value == 0)
1316 {
1317 pEquation->xor1[i].channel = 0;
1318 pEquation->xor1[i].valid = 1;
1319 pEquation->xor1[i].index = Log2(x) + elemLog2;
1320 }
1321 else
1322 {
1323 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1324 pEquation->xor2[i].channel = 0;
1325 pEquation->xor2[i].valid = 1;
1326 pEquation->xor2[i].index = Log2(x) + elemLog2;
1327 }
1328 }
1329
1330 if (y != 0)
1331 {
1332 ADDR_ASSERT(IsPow2(y));
1333
1334 if (pEquation->xor1[i].value == 0)
1335 {
1336 pEquation->xor1[i].channel = 1;
1337 pEquation->xor1[i].valid = 1;
1338 pEquation->xor1[i].index = Log2(y);
1339 }
1340 else
1341 {
1342 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1343 pEquation->xor2[i].channel = 1;
1344 pEquation->xor2[i].valid = 1;
1345 pEquation->xor2[i].index = Log2(y);
1346 }
1347 }
1348
1349 swizzle[i].x &= ~x;
1350 swizzle[i].y &= ~y;
1351 }
1352 }
1353 }
1354 }
1355
1356 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1357 }
1358 else
1359 {
1360 const Dim3d& blkDim = (blockSizeLog2 == Log2Size256K) ?
1361 Block256K_Log2_3d[elemLog2] :
1362 ((blockSizeLog2 == Log2Size4K) ? Block4K_Log2_3d[elemLog2] : Block64K_Log2_3d[elemLog2]);
1363
1364 const UINT_32 blkXLog2 = blkDim.w;
1365 const UINT_32 blkYLog2 = blkDim.h;
1366 const UINT_32 blkZLog2 = blkDim.d;
1367 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1368 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1369 const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1370
1371 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1372 UINT_32 xMask = 0;
1373 UINT_32 yMask = 0;
1374 UINT_32 zMask = 0;
1375 UINT_32 bMask = (1 << elemLog2) - 1;
1376
1377 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1378 {
1379 if (IsPow2(pSwizzle[i].value))
1380 {
1381 if (pSwizzle[i].x != 0)
1382 {
1383 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1384 xMask |= pSwizzle[i].x;
1385
1386 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1387
1388 ADDR_ASSERT(xLog2 < blkXLog2);
1389
1390 pEquation->addr[i].channel = 0;
1391 pEquation->addr[i].valid = 1;
1392 pEquation->addr[i].index = xLog2 + elemLog2;
1393 }
1394 else if (pSwizzle[i].y != 0)
1395 {
1396 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1397 yMask |= pSwizzle[i].y;
1398
1399 pEquation->addr[i].channel = 1;
1400 pEquation->addr[i].valid = 1;
1401 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1402
1403 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1404 }
1405 else
1406 {
1407 ADDR_ASSERT(pSwizzle[i].z != 0);
1408 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1409 zMask |= pSwizzle[i].z;
1410
1411 pEquation->addr[i].channel = 2;
1412 pEquation->addr[i].valid = 1;
1413 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1414
1415 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1416 }
1417
1418 swizzle[i].value = 0;
1419 bMask |= 1 << i;
1420 }
1421 else
1422 {
1423 swizzle[i].x = pSwizzle[i].x;
1424 swizzle[i].y = pSwizzle[i].y;
1425 swizzle[i].z = pSwizzle[i].z;
1426 swizzle[i].s = 0;
1427
1428 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1429
1430 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1431 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1432 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1433
1434 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1435
1436 if (xHi != 0)
1437 {
1438 ADDR_ASSERT(IsPow2(xHi));
1439 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1440
1441 pEquation->xor1[i].channel = 0;
1442 pEquation->xor1[i].valid = 1;
1443 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1444
1445 swizzle[i].x &= blkXMask;
1446 }
1447
1448 if (yHi != 0)
1449 {
1450 ADDR_ASSERT(IsPow2(yHi));
1451
1452 if (pEquation->xor1[i].value == 0)
1453 {
1454 pEquation->xor1[i].channel = 1;
1455 pEquation->xor1[i].valid = 1;
1456 pEquation->xor1[i].index = Log2(yHi);
1457 }
1458 else
1459 {
1460 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1461 pEquation->xor2[i].channel = 1;
1462 pEquation->xor2[i].valid = 1;
1463 pEquation->xor2[i].index = Log2(yHi);
1464 }
1465
1466 swizzle[i].y &= blkYMask;
1467 }
1468
1469 if (zHi != 0)
1470 {
1471 ADDR_ASSERT(IsPow2(zHi));
1472
1473 if (pEquation->xor1[i].value == 0)
1474 {
1475 pEquation->xor1[i].channel = 2;
1476 pEquation->xor1[i].valid = 1;
1477 pEquation->xor1[i].index = Log2(zHi);
1478 }
1479 else
1480 {
1481 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1482 pEquation->xor2[i].channel = 2;
1483 pEquation->xor2[i].valid = 1;
1484 pEquation->xor2[i].index = Log2(zHi);
1485 }
1486
1487 swizzle[i].z &= blkZMask;
1488 }
1489
1490 if (swizzle[i].value == 0)
1491 {
1492 bMask |= 1 << i;
1493 }
1494 }
1495 }
1496
1497 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1498 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1499
1500 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1501
1502 while (bMask != blockMask)
1503 {
1504 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1505 {
1506 if ((bMask & (1 << i)) == 0)
1507 {
1508 if (IsPow2(swizzle[i].value))
1509 {
1510 if (swizzle[i].x != 0)
1511 {
1512 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1513 xMask |= swizzle[i].x;
1514
1515 const UINT_32 xLog2 = Log2(swizzle[i].x);
1516
1517 ADDR_ASSERT(xLog2 < blkXLog2);
1518
1519 pEquation->addr[i].channel = 0;
1520 pEquation->addr[i].valid = 1;
1521 pEquation->addr[i].index = xLog2 + elemLog2;
1522 }
1523 else if (swizzle[i].y != 0)
1524 {
1525 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1526 yMask |= swizzle[i].y;
1527
1528 pEquation->addr[i].channel = 1;
1529 pEquation->addr[i].valid = 1;
1530 pEquation->addr[i].index = Log2(swizzle[i].y);
1531
1532 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1533 }
1534 else
1535 {
1536 ADDR_ASSERT(swizzle[i].z != 0);
1537 ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1538 zMask |= swizzle[i].z;
1539
1540 pEquation->addr[i].channel = 2;
1541 pEquation->addr[i].valid = 1;
1542 pEquation->addr[i].index = Log2(swizzle[i].z);
1543
1544 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1545 }
1546
1547 swizzle[i].value = 0;
1548 bMask |= 1 << i;
1549 }
1550 else
1551 {
1552 const UINT_32 x = swizzle[i].x & xMask;
1553 const UINT_32 y = swizzle[i].y & yMask;
1554 const UINT_32 z = swizzle[i].z & zMask;
1555
1556 if (x != 0)
1557 {
1558 ADDR_ASSERT(IsPow2(x));
1559
1560 if (pEquation->xor1[i].value == 0)
1561 {
1562 pEquation->xor1[i].channel = 0;
1563 pEquation->xor1[i].valid = 1;
1564 pEquation->xor1[i].index = Log2(x) + elemLog2;
1565 }
1566 else
1567 {
1568 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1569 pEquation->xor2[i].channel = 0;
1570 pEquation->xor2[i].valid = 1;
1571 pEquation->xor2[i].index = Log2(x) + elemLog2;
1572 }
1573 }
1574
1575 if (y != 0)
1576 {
1577 ADDR_ASSERT(IsPow2(y));
1578
1579 if (pEquation->xor1[i].value == 0)
1580 {
1581 pEquation->xor1[i].channel = 1;
1582 pEquation->xor1[i].valid = 1;
1583 pEquation->xor1[i].index = Log2(y);
1584 }
1585 else
1586 {
1587 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1588 pEquation->xor2[i].channel = 1;
1589 pEquation->xor2[i].valid = 1;
1590 pEquation->xor2[i].index = Log2(y);
1591 }
1592 }
1593
1594 if (z != 0)
1595 {
1596 ADDR_ASSERT(IsPow2(z));
1597
1598 if (pEquation->xor1[i].value == 0)
1599 {
1600 pEquation->xor1[i].channel = 2;
1601 pEquation->xor1[i].valid = 1;
1602 pEquation->xor1[i].index = Log2(z);
1603 }
1604 else
1605 {
1606 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1607 pEquation->xor2[i].channel = 2;
1608 pEquation->xor2[i].valid = 1;
1609 pEquation->xor2[i].index = Log2(z);
1610 }
1611 }
1612
1613 swizzle[i].x &= ~x;
1614 swizzle[i].y &= ~y;
1615 swizzle[i].z &= ~z;
1616 }
1617 }
1618 }
1619 }
1620
1621 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1622 }
1623 }
1624
1625 /**
1626 ************************************************************************************************************************
1627 * Gfx11Lib::InitEquationTable
1628 *
1629 * @brief
1630 * Initialize Equation table.
1631 *
1632 * @return
1633 * N/A
1634 ************************************************************************************************************************
1635 */
InitEquationTable()1636 VOID Gfx11Lib::InitEquationTable()
1637 {
1638 memset(m_equationTable, 0, sizeof(m_equationTable));
1639
1640 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1641 {
1642 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1643
1644 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1645 {
1646 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1647
1648 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1649 {
1650 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1651 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1652
1653 if (pPatInfo != NULL)
1654 {
1655 ADDR_ASSERT(IsValidSwMode(swMode));
1656
1657 if (pPatInfo->maxItemCount <= 3)
1658 {
1659 ADDR_EQUATION equation = {};
1660
1661 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1662
1663 equationIndex = m_numEquations;
1664 ADDR_ASSERT(equationIndex < EquationTableSize);
1665
1666 m_equationTable[equationIndex] = equation;
1667
1668 m_numEquations++;
1669 }
1670 else
1671 {
1672 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X
1673 ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
1674 ADDR_ASSERT(rsrcType == ADDR_RSRC_TEX_3D);
1675 ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
1676 }
1677 }
1678
1679 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1680 }
1681 }
1682 }
1683 }
1684
1685 /**
1686 ************************************************************************************************************************
1687 * Gfx11Lib::HwlGetEquationIndex
1688 *
1689 * @brief
1690 * Interface function stub of GetEquationIndex
1691 *
1692 * @return
1693 * ADDR_E_RETURNCODE
1694 ************************************************************************************************************************
1695 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const1696 UINT_32 Gfx11Lib::HwlGetEquationIndex(
1697 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
1698 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
1699 ) const
1700 {
1701 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1702
1703 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1704 (pIn->resourceType == ADDR_RSRC_TEX_3D))
1705 {
1706 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1707 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
1708 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
1709
1710 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1711 }
1712
1713 if (pOut->pMipInfo != NULL)
1714 {
1715 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1716 {
1717 pOut->pMipInfo[i].equationIndex = equationIdx;
1718 }
1719 }
1720
1721 return equationIdx;
1722 }
1723
1724 /**
1725 ************************************************************************************************************************
1726 * Gfx11Lib::GetValidDisplaySwizzleModes
1727 *
1728 * @brief
1729 * Get valid swizzle modes mask for displayable surface
1730 *
1731 * @return
1732 * Valid swizzle modes mask for displayable surface
1733 ************************************************************************************************************************
1734 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const1735 UINT_32 Gfx11Lib::GetValidDisplaySwizzleModes(
1736 UINT_32 bpp
1737 ) const
1738 {
1739 UINT_32 swModeMask = 0;
1740
1741 if (bpp <= 64)
1742 {
1743 swModeMask = Dcn32SwModeMask;
1744 }
1745
1746 return swModeMask;
1747 }
1748
1749 /**
1750 ************************************************************************************************************************
1751 * Gfx11Lib::IsValidDisplaySwizzleMode
1752 *
1753 * @brief
1754 * Check if a swizzle mode is supported by display engine
1755 *
1756 * @return
1757 * TRUE is swizzle mode is supported by display engine
1758 ************************************************************************************************************************
1759 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const1760 BOOL_32 Gfx11Lib::IsValidDisplaySwizzleMode(
1761 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
1762 ) const
1763 {
1764 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
1765
1766 return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
1767 }
1768
1769 /**
1770 ************************************************************************************************************************
1771 * Gfx11Lib::GetMaxNumMipsInTail
1772 *
1773 * @brief
1774 * Return max number of mips in tails
1775 *
1776 * @return
1777 * Max number of mips in tails
1778 ************************************************************************************************************************
1779 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const1780 UINT_32 Gfx11Lib::GetMaxNumMipsInTail(
1781 UINT_32 blockSizeLog2, ///< block size log2
1782 BOOL_32 isThin ///< is thin or thick
1783 ) const
1784 {
1785 UINT_32 effectiveLog2 = blockSizeLog2;
1786
1787 if (isThin == FALSE)
1788 {
1789 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
1790 }
1791
1792 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
1793 }
1794
1795 /**
1796 ************************************************************************************************************************
1797 * Gfx11Lib::HwlComputePipeBankXor
1798 *
1799 * @brief
1800 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
1801 *
1802 * @return
1803 * PipeBankXor value
1804 ************************************************************************************************************************
1805 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const1806 ADDR_E_RETURNCODE Gfx11Lib::HwlComputePipeBankXor(
1807 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
1808 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
1809 ) const
1810 {
1811 if (IsNonPrtXor(pIn->swizzleMode))
1812 {
1813 pOut->pipeBankXor = 0;
1814 }
1815 else
1816 {
1817 pOut->pipeBankXor = 0;
1818 }
1819
1820 return ADDR_OK;
1821 }
1822
1823 /**
1824 ************************************************************************************************************************
1825 * Gfx11Lib::HwlComputeSlicePipeBankXor
1826 *
1827 * @brief
1828 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
1829 *
1830 * @return
1831 * PipeBankXor value
1832 ************************************************************************************************************************
1833 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const1834 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSlicePipeBankXor(
1835 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
1836 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
1837 ) const
1838 {
1839 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1840
1841 if (IsNonPrtXor(pIn->swizzleMode))
1842 {
1843 if (pIn->bpe == 0)
1844 {
1845 ADDR_ASSERT_ALWAYS();
1846
1847 // Require a valid bytes-per-element value passed from client...
1848 returnCode = ADDR_INVALIDPARAMS;
1849 }
1850 else
1851 {
1852 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
1853 pIn->resourceType,
1854 Log2(pIn->bpe >> 3),
1855 1);
1856
1857 if (pPatInfo != NULL)
1858 {
1859 ADDR_BIT_SETTING fullSwizzlePattern[20];
1860 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1861
1862 const UINT_32 pipeBankXorOffset =
1863 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
1864 GetBlockSizeLog2(pIn->swizzleMode),
1865 0,
1866 0,
1867 pIn->slice,
1868 0);
1869
1870 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
1871
1872 // Should have no bit set under pipe interleave
1873 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
1874
1875 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
1876 }
1877 else
1878 {
1879 // Should never come here...
1880 ADDR_NOT_IMPLEMENTED();
1881
1882 returnCode = ADDR_NOTSUPPORTED;
1883 }
1884 }
1885 }
1886 else
1887 {
1888 pOut->pipeBankXor = 0;
1889 }
1890
1891 return returnCode;
1892 }
1893
1894 /**
1895 ************************************************************************************************************************
1896 * Gfx11Lib::HwlComputeSubResourceOffsetForSwizzlePattern
1897 *
1898 * @brief
1899 * Compute sub resource offset to support swizzle pattern
1900 *
1901 * @return
1902 * Offset
1903 ************************************************************************************************************************
1904 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const1905 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
1906 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
1907 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
1908 ) const
1909 {
1910 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
1911
1912 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
1913
1914 return ADDR_OK;
1915 }
1916
1917 /**
1918 ************************************************************************************************************************
1919 * Gfx11Lib::HwlComputeNonBlockCompressedView
1920 *
1921 * @brief
1922 * Compute non-block-compressed view for a given mipmap level/slice.
1923 *
1924 * @return
1925 * ADDR_E_RETURNCODE
1926 ************************************************************************************************************************
1927 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const1928 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeNonBlockCompressedView(
1929 const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure
1930 ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure
1931 ) const
1932 {
1933 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1934
1935 if (pIn->resourceType != ADDR_RSRC_TEX_2D)
1936 {
1937 // Only 2D resource can have a NonBC view...
1938 returnCode = ADDR_INVALIDPARAMS;
1939 }
1940 else if ((pIn->format != ADDR_FMT_ASTC_8x8) &&
1941 ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
1942 {
1943 // Only support BC1~BC7 or ASTC_8x8 for now...
1944 returnCode = ADDR_NOTSUPPORTED;
1945 }
1946 else
1947 {
1948 UINT_32 bcWidth, bcHeight;
1949 UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
1950
1951 ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
1952 infoIn.flags = pIn->flags;
1953 infoIn.swizzleMode = pIn->swizzleMode;
1954 infoIn.resourceType = pIn->resourceType;
1955 infoIn.bpp = bpp;
1956 infoIn.width = PowTwoAlign(pIn->width, bcWidth) / bcWidth;
1957 infoIn.height = PowTwoAlign(pIn->height, bcHeight) / bcHeight;
1958 infoIn.numSlices = pIn->numSlices;
1959 infoIn.numMipLevels = pIn->numMipLevels;
1960 infoIn.numSamples = 1;
1961 infoIn.numFrags = 1;
1962
1963 ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
1964
1965 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
1966 infoOut.pMipInfo = mipInfo;
1967
1968 const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
1969
1970 if (tiled)
1971 {
1972 returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
1973 }
1974 else
1975 {
1976 returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
1977 }
1978
1979 if (returnCode == ADDR_OK)
1980 {
1981 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
1982 subOffIn.swizzleMode = infoIn.swizzleMode;
1983 subOffIn.resourceType = infoIn.resourceType;
1984 subOffIn.slice = pIn->slice;
1985 subOffIn.sliceSize = infoOut.sliceSize;
1986 subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
1987 subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset;
1988
1989 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
1990
1991 // For any mipmap level, move nonBc view base address by offset
1992 HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
1993 pOut->offset = subOffOut.offset;
1994
1995 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
1996 slicePbXorIn.bpe = infoIn.bpp;
1997 slicePbXorIn.swizzleMode = infoIn.swizzleMode;
1998 slicePbXorIn.resourceType = infoIn.resourceType;
1999 slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2000 slicePbXorIn.slice = pIn->slice;
2001
2002 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2003
2004 // For any mipmap level, nonBc view should use computed pbXor
2005 HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2006 pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2007
2008 const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2009 const UINT_32 requestMipWidth = PowTwoAlign(Max(pIn->width >> pIn->mipId, 1u), bcWidth) / bcWidth;
2010 const UINT_32 requestMipHeight = PowTwoAlign(Max(pIn->height >> pIn->mipId, 1u), bcHeight) / bcHeight;
2011
2012 if (inTail)
2013 {
2014 // For mipmap level that is in mip tail block, hack a lot of things...
2015 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2016 // are fit in tail block:
2017
2018 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2019 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2020
2021 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2022 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2023
2024 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2025 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2026
2027 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2028 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2029 }
2030 // This check should cover at least mipId == 0
2031 else if (requestMipWidth << pIn->mipId == infoIn.width)
2032 {
2033 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2034 // - only one mipmap level and mipId = 0
2035 pOut->mipId = 0;
2036 pOut->numMipLevels = 1;
2037
2038 // (mip0) width = requestMipWidth
2039 pOut->unalignedWidth = requestMipWidth;
2040
2041 // (mip0) height = requestMipHeight
2042 pOut->unalignedHeight = requestMipHeight;
2043 }
2044 else
2045 {
2046 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2047 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2048 // because single mip view may have different pitch value than original (multiple) mip view...
2049 // A simple case would be:
2050 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2051 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2052 // mip0 width = 0x101/mip1 width = 0x80
2053 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2054 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2055
2056 // - 2 levels and mipId = 1
2057 pOut->mipId = 1;
2058 pOut->numMipLevels = 2;
2059
2060 const UINT_32 upperMipWidth =
2061 PowTwoAlign(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth) / bcWidth;
2062 const UINT_32 upperMipHeight =
2063 PowTwoAlign(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight) / bcHeight;
2064
2065 const BOOL_32 needToAvoidInTail =
2066 tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2067 TRUE : FALSE;
2068
2069 const UINT_32 hwMipWidth = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2070 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2071
2072 const BOOL_32 needExtraWidth =
2073 ((upperMipWidth < requestMipWidth * 2) ||
2074 ((upperMipWidth == requestMipWidth * 2) &&
2075 ((needToAvoidInTail == TRUE) ||
2076 (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2077
2078 const BOOL_32 needExtraHeight =
2079 ((upperMipHeight < requestMipHeight * 2) ||
2080 ((upperMipHeight == requestMipHeight * 2) &&
2081 ((needToAvoidInTail == TRUE) ||
2082 (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2083
2084 // (mip0) width = requestLastMipLevelWidth
2085 pOut->unalignedWidth = upperMipWidth + (needExtraWidth ? 1: 0);
2086
2087 // (mip0) height = requestLastMipLevelHeight
2088 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2089 }
2090
2091 // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2092 ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2093 // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2094 ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2095 }
2096 }
2097
2098 return returnCode;
2099 }
2100
2101 /**
2102 ************************************************************************************************************************
2103 * Gfx11Lib::ValidateNonSwModeParams
2104 *
2105 * @brief
2106 * Validate compute surface info params except swizzle mode
2107 *
2108 * @return
2109 * TRUE if parameters are valid, FALSE otherwise
2110 ************************************************************************************************************************
2111 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2112 BOOL_32 Gfx11Lib::ValidateNonSwModeParams(
2113 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2114 {
2115 BOOL_32 valid = TRUE;
2116
2117 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8))
2118 {
2119 ADDR_ASSERT_ALWAYS();
2120 valid = FALSE;
2121 }
2122 else if (pIn->flags.fmask == 1)
2123 {
2124 // There is no FMASK for GFX11 ASICs
2125 ADDR_ASSERT_ALWAYS();
2126 valid = FALSE;
2127 }
2128 else if (pIn->numSamples > 8)
2129 {
2130 // There is no EQAA support for GFX11 ASICs, so the max number of sample is 8
2131 ADDR_ASSERT_ALWAYS();
2132 valid = FALSE;
2133 }
2134 else if ((pIn->numFrags != 0) && (pIn->numSamples != pIn->numFrags))
2135 {
2136 // There is no EQAA support for GFX11 ASICs, so the number of sample has to be same as number of fragment
2137 ADDR_ASSERT_ALWAYS();
2138 valid = FALSE;
2139 }
2140
2141 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2142 const AddrResourceType rsrcType = pIn->resourceType;
2143 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2144 const BOOL_32 msaa = (pIn->numSamples > 1);
2145 const BOOL_32 display = flags.display;
2146 const BOOL_32 tex3d = IsTex3d(rsrcType);
2147 const BOOL_32 tex2d = IsTex2d(rsrcType);
2148 const BOOL_32 tex1d = IsTex1d(rsrcType);
2149 const BOOL_32 stereo = flags.qbStereo;
2150
2151 // Resource type check
2152 if (tex1d)
2153 {
2154 if (msaa || display || stereo)
2155 {
2156 ADDR_ASSERT_ALWAYS();
2157 valid = FALSE;
2158 }
2159 }
2160 else if (tex2d)
2161 {
2162 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2163 {
2164 ADDR_ASSERT_ALWAYS();
2165 valid = FALSE;
2166 }
2167 }
2168 else if (tex3d)
2169 {
2170 if (msaa || display || stereo)
2171 {
2172 ADDR_ASSERT_ALWAYS();
2173 valid = FALSE;
2174 }
2175 }
2176 else
2177 {
2178 ADDR_ASSERT_ALWAYS();
2179 valid = FALSE;
2180 }
2181
2182 return valid;
2183 }
2184
2185 /**
2186 ************************************************************************************************************************
2187 * Gfx11Lib::ValidateSwModeParams
2188 *
2189 * @brief
2190 * Validate compute surface info related to swizzle mode
2191 *
2192 * @return
2193 * TRUE if parameters are valid, FALSE otherwise
2194 ************************************************************************************************************************
2195 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2196 BOOL_32 Gfx11Lib::ValidateSwModeParams(
2197 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2198 {
2199 BOOL_32 valid = TRUE;
2200
2201 if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2202 {
2203 ADDR_ASSERT_ALWAYS();
2204 valid = FALSE;
2205 }
2206 else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2207 {
2208 ADDR_ASSERT_ALWAYS();
2209 valid = FALSE;
2210 }
2211
2212 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2213 const AddrResourceType rsrcType = pIn->resourceType;
2214 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2215 const BOOL_32 msaa = (pIn->numSamples > 1);
2216 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2217 const BOOL_32 color = flags.color;
2218 const BOOL_32 display = flags.display;
2219 const BOOL_32 tex3d = IsTex3d(rsrcType);
2220 const BOOL_32 tex2d = IsTex2d(rsrcType);
2221 const BOOL_32 tex1d = IsTex1d(rsrcType);
2222 const BOOL_32 thin3d = flags.view3dAs2dArray;
2223 const BOOL_32 linear = IsLinear(swizzle);
2224 const BOOL_32 blk256B = IsBlock256b(swizzle);
2225 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2226 const BOOL_32 prt = flags.prt;
2227
2228 // Misc check
2229 if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numSamples)))
2230 {
2231 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2232 ADDR_ASSERT_ALWAYS();
2233 valid = FALSE;
2234 }
2235
2236 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2237 {
2238 ADDR_ASSERT_ALWAYS();
2239 valid = FALSE;
2240 }
2241
2242 if ((pIn->bpp == 96) && (linear == FALSE))
2243 {
2244 ADDR_ASSERT_ALWAYS();
2245 valid = FALSE;
2246 }
2247
2248 const UINT_32 swizzleMask = 1 << swizzle;
2249
2250 // Resource type check
2251 if (tex1d)
2252 {
2253 if ((swizzleMask & Gfx11Rsrc1dSwModeMask) == 0)
2254 {
2255 ADDR_ASSERT_ALWAYS();
2256 valid = FALSE;
2257 }
2258 }
2259 else if (tex2d)
2260 {
2261 if ((swizzleMask & Gfx11Rsrc2dSwModeMask) == 0)
2262 {
2263 ADDR_ASSERT_ALWAYS();
2264 valid = FALSE;
2265 }
2266 else if (prt && ((swizzleMask & Gfx11Rsrc2dPrtSwModeMask) == 0))
2267 {
2268 ADDR_ASSERT_ALWAYS();
2269 valid = FALSE;
2270 }
2271 }
2272 else if (tex3d)
2273 {
2274 if (((swizzleMask & Gfx11Rsrc3dSwModeMask) == 0) ||
2275 (prt && ((swizzleMask & Gfx11Rsrc3dPrtSwModeMask) == 0)) ||
2276 (thin3d && ((swizzleMask & Gfx11Rsrc3dThinSwModeMask) == 0)))
2277 {
2278 ADDR_ASSERT_ALWAYS();
2279 valid = FALSE;
2280 }
2281 }
2282
2283 // Swizzle type check
2284 if (linear)
2285 {
2286 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2287 {
2288 ADDR_ASSERT_ALWAYS();
2289 valid = FALSE;
2290 }
2291 }
2292 else if (IsZOrderSwizzle(swizzle))
2293 {
2294 if ((pIn->bpp > 64) ||
2295 (msaa && (color || (pIn->bpp > 32))) ||
2296 ElemLib::IsBlockCompressed(pIn->format) ||
2297 ElemLib::IsMacroPixelPacked(pIn->format))
2298 {
2299 ADDR_ASSERT_ALWAYS();
2300 valid = FALSE;
2301 }
2302 }
2303 else if (IsStandardSwizzle(rsrcType, swizzle))
2304 {
2305 if (zbuffer || msaa)
2306 {
2307 ADDR_ASSERT_ALWAYS();
2308 valid = FALSE;
2309 }
2310 }
2311 else if (IsDisplaySwizzle(rsrcType, swizzle))
2312 {
2313 if (zbuffer || msaa)
2314 {
2315 ADDR_ASSERT_ALWAYS();
2316 valid = FALSE;
2317 }
2318 }
2319 else if (IsRtOptSwizzle(swizzle))
2320 {
2321 if (zbuffer)
2322 {
2323 ADDR_ASSERT_ALWAYS();
2324 valid = FALSE;
2325 }
2326 }
2327 else
2328 {
2329 ADDR_ASSERT_ALWAYS();
2330 valid = FALSE;
2331 }
2332
2333 // Block type check
2334 if (blk256B)
2335 {
2336 if (zbuffer || tex3d || msaa)
2337 {
2338 ADDR_ASSERT_ALWAYS();
2339 valid = FALSE;
2340 }
2341 }
2342
2343 return valid;
2344 }
2345
2346 /**
2347 ************************************************************************************************************************
2348 * Gfx11Lib::HwlComputeSurfaceInfoSanityCheck
2349 *
2350 * @brief
2351 * Compute surface info sanity check
2352 *
2353 * @return
2354 * Offset
2355 ************************************************************************************************************************
2356 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2357 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoSanityCheck(
2358 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2359 ) const
2360 {
2361 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2362 }
2363
2364 /**
2365 ************************************************************************************************************************
2366 * Gfx11Lib::HwlGetPreferredSurfaceSetting
2367 *
2368 * @brief
2369 * Internal function to get suggested surface information for cliet to use
2370 *
2371 * @return
2372 * ADDR_E_RETURNCODE
2373 ************************************************************************************************************************
2374 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2375 ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
2376 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2377 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2378 ) const
2379 {
2380 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2381
2382 if (pIn->flags.fmask)
2383 {
2384 // There is no FMASK for GFX11 ASICs.
2385 ADDR_ASSERT_ALWAYS();
2386
2387 returnCode = ADDR_INVALIDPARAMS;
2388 }
2389 else
2390 {
2391 UINT_32 bpp = pIn->bpp;
2392 UINT_32 width = Max(pIn->width, 1u);
2393 UINT_32 height = Max(pIn->height, 1u);
2394
2395 // Set format to INVALID will skip this conversion
2396 if (pIn->format != ADDR_FMT_INVALID)
2397 {
2398 ElemMode elemMode = ADDR_UNCOMPRESSED;
2399 UINT_32 expandX, expandY;
2400
2401 // Get compression/expansion factors and element mode which indicates compression/expansion
2402 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2403 &elemMode,
2404 &expandX,
2405 &expandY);
2406
2407 UINT_32 basePitch = 0;
2408 GetElemLib()->AdjustSurfaceInfo(elemMode,
2409 expandX,
2410 expandY,
2411 &bpp,
2412 &basePitch,
2413 &width,
2414 &height);
2415 }
2416
2417 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2418 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2419 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2420 const BOOL_32 msaa = numSamples > 1;
2421
2422 // Pre sanity check on non swizzle mode parameters
2423 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2424 localIn.flags = pIn->flags;
2425 localIn.resourceType = pIn->resourceType;
2426 localIn.format = pIn->format;
2427 localIn.bpp = bpp;
2428 localIn.width = width;
2429 localIn.height = height;
2430 localIn.numSlices = numSlices;
2431 localIn.numMipLevels = numMipLevels;
2432 localIn.numSamples = numSamples;
2433 localIn.numFrags = numSamples;
2434
2435 if (ValidateNonSwModeParams(&localIn))
2436 {
2437 // Forbid swizzle mode(s) by client setting
2438 ADDR2_SWMODE_SET allowedSwModeSet = {};
2439 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx11LinearSwModeMask;
2440 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx11Blk256BSwModeMask;
2441 allowedSwModeSet.value |=
2442 pIn->forbiddenBlock.macroThin4KB ? 0 :
2443 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx11Blk4KBSwModeMask);
2444 allowedSwModeSet.value |=
2445 pIn->forbiddenBlock.macroThick4KB ? 0 :
2446 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick4KBSwModeMask : 0);
2447 allowedSwModeSet.value |=
2448 pIn->forbiddenBlock.macroThin64KB ? 0 :
2449 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThin64KBSwModeMask : Gfx11Blk64KBSwModeMask);
2450 allowedSwModeSet.value |=
2451 pIn->forbiddenBlock.macroThick64KB ? 0 :
2452 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick64KBSwModeMask : 0);
2453 allowedSwModeSet.value |=
2454 pIn->forbiddenBlock.gfx11.thin256KB ? 0 :
2455 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThin256KBSwModeMask : Gfx11Blk256KBSwModeMask);
2456 allowedSwModeSet.value |=
2457 pIn->forbiddenBlock.gfx11.thick256KB ? 0 :
2458 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick256KBSwModeMask : 0);
2459
2460 if (pIn->preferredSwSet.value != 0)
2461 {
2462 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx11ZSwModeMask;
2463 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx11StandardSwModeMask;
2464 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx11DisplaySwModeMask;
2465 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx11RenderSwModeMask;
2466 }
2467
2468 if (pIn->noXor)
2469 {
2470 allowedSwModeSet.value &= ~Gfx11XorSwModeMask;
2471 }
2472
2473 if (pIn->maxAlign > 0)
2474 {
2475 if (pIn->maxAlign < Size256K)
2476 {
2477 allowedSwModeSet.value &= ~Gfx11Blk256KBSwModeMask;
2478 }
2479
2480 if (pIn->maxAlign < Size64K)
2481 {
2482 allowedSwModeSet.value &= ~Gfx11Blk64KBSwModeMask;
2483 }
2484
2485 if (pIn->maxAlign < Size4K)
2486 {
2487 allowedSwModeSet.value &= ~Gfx11Blk4KBSwModeMask;
2488 }
2489
2490 if (pIn->maxAlign < Size256)
2491 {
2492 allowedSwModeSet.value &= ~Gfx11Blk256BSwModeMask;
2493 }
2494 }
2495
2496 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2497 switch (pIn->resourceType)
2498 {
2499 case ADDR_RSRC_TEX_1D:
2500 allowedSwModeSet.value &= Gfx11Rsrc1dSwModeMask;
2501 break;
2502
2503 case ADDR_RSRC_TEX_2D:
2504 allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc2dPrtSwModeMask : Gfx11Rsrc2dSwModeMask;
2505 break;
2506
2507 case ADDR_RSRC_TEX_3D:
2508 allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc3dPrtSwModeMask : Gfx11Rsrc3dSwModeMask;
2509
2510 if (pIn->flags.view3dAs2dArray)
2511 {
2512 allowedSwModeSet.value &= Gfx11Rsrc3dThinSwModeMask;
2513 }
2514 break;
2515
2516 default:
2517 ADDR_ASSERT_ALWAYS();
2518 allowedSwModeSet.value = 0;
2519 break;
2520 }
2521
2522 if (ElemLib::IsBlockCompressed(pIn->format) ||
2523 ElemLib::IsMacroPixelPacked(pIn->format) ||
2524 (bpp > 64) ||
2525 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2526 {
2527 allowedSwModeSet.value &= ~Gfx11ZSwModeMask;
2528 }
2529
2530 if (pIn->format == ADDR_FMT_32_32_32)
2531 {
2532 allowedSwModeSet.value &= Gfx11LinearSwModeMask;
2533 }
2534
2535 if (msaa)
2536 {
2537 allowedSwModeSet.value &= Gfx11MsaaSwModeMask;
2538 }
2539
2540 if (pIn->flags.depth || pIn->flags.stencil)
2541 {
2542 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2543 }
2544
2545 if (pIn->flags.display)
2546 {
2547 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
2548 }
2549
2550 if (allowedSwModeSet.value != 0)
2551 {
2552 #if DEBUG
2553 // Post sanity check, at least AddrLib should accept the output generated by its own
2554 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2555
2556 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2557 {
2558 if (validateSwModeSet & 1)
2559 {
2560 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2561 ADDR_ASSERT(ValidateSwModeParams(&localIn));
2562 }
2563
2564 validateSwModeSet >>= 1;
2565 }
2566 #endif
2567
2568 pOut->resourceType = pIn->resourceType;
2569 pOut->validSwModeSet = allowedSwModeSet;
2570 pOut->canXor = (allowedSwModeSet.value & Gfx11XorSwModeMask) ? TRUE : FALSE;
2571 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2572 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
2573
2574 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2575
2576 if (pOut->clientPreferredSwSet.value == 0)
2577 {
2578 pOut->clientPreferredSwSet.value = AddrSwSetAll;
2579 }
2580
2581 // Apply optional restrictions
2582 if (pIn->flags.needEquation)
2583 {
2584 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
2585 }
2586
2587 if (allowedSwModeSet.value == Gfx11LinearSwModeMask)
2588 {
2589 pOut->swizzleMode = ADDR_SW_LINEAR;
2590 }
2591 else
2592 {
2593 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
2594
2595 if ((height > 1) && (computeMinSize == FALSE))
2596 {
2597 // Always ignore linear swizzle mode if:
2598 // 1. This is a (2D/3D) resource with height > 1
2599 // 2. Client doesn't require computing minimize size
2600 allowedSwModeSet.swLinear = 0;
2601 }
2602
2603 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2604
2605 // Determine block size if there are 2 or more block type candidates
2606 if (IsPow2(allowedBlockSet.value) == FALSE)
2607 {
2608 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
2609
2610 swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
2611
2612 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
2613 {
2614 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S_X;
2615 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;
2616 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S_X;
2617 swMode[AddrBlockThin256KB] = ADDR_SW_256KB_R_X;
2618 swMode[AddrBlockThick256KB] = ADDR_SW_256KB_S_X;
2619 }
2620 else
2621 {
2622 swMode[AddrBlockMicro] = ADDR_SW_256B_D;
2623 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D_X;
2624 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D_X;
2625 swMode[AddrBlockThin256KB] = ADDR_SW_256KB_D_X;
2626 }
2627
2628 UINT_64 padSize[AddrBlockMaxTiledType] = {};
2629
2630 const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
2631 const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
2632 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2633 UINT_32 minSizeBlk = AddrBlockMicro;
2634 UINT_64 minSize = 0;
2635
2636 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
2637
2638 for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
2639 {
2640 if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
2641 {
2642 localIn.swizzleMode = swMode[i];
2643
2644 if (localIn.swizzleMode == ADDR_SW_LINEAR)
2645 {
2646 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
2647 }
2648 else
2649 {
2650 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
2651 }
2652
2653 if (returnCode == ADDR_OK)
2654 {
2655 padSize[i] = localOut.surfSize;
2656
2657 if ((minSize == 0) ||
2658 BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
2659 {
2660 minSize = padSize[i];
2661 minSizeBlk = i;
2662 }
2663 }
2664 else
2665 {
2666 ADDR_ASSERT_ALWAYS();
2667 break;
2668 }
2669 }
2670 }
2671
2672 if (pIn->memoryBudget > 1.0)
2673 {
2674 // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
2675 // smaller-block type again in coming loop
2676 switch (minSizeBlk)
2677 {
2678 case AddrBlockThick256KB:
2679 allowedBlockSet.gfx11.thin256KB = 0;
2680 case AddrBlockThin256KB:
2681 allowedBlockSet.macroThick64KB = 0;
2682 case AddrBlockThick64KB:
2683 allowedBlockSet.macroThin64KB = 0;
2684 case AddrBlockThin64KB:
2685 allowedBlockSet.macroThick4KB = 0;
2686 case AddrBlockThick4KB:
2687 allowedBlockSet.macroThin4KB = 0;
2688 case AddrBlockThin4KB:
2689 allowedBlockSet.micro = 0;
2690 case AddrBlockMicro:
2691 allowedBlockSet.linear = 0;
2692 case AddrBlockLinear:
2693 break;
2694
2695 default:
2696 ADDR_ASSERT_ALWAYS();
2697 break;
2698 }
2699
2700 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2701 {
2702 if ((i != minSizeBlk) &&
2703 IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
2704 {
2705 if (BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
2706 {
2707 // Clear the block type if the memory waste is unacceptable
2708 allowedBlockSet.value &= ~(1u << (i - 1));
2709 }
2710 }
2711 }
2712
2713 // Remove linear block type if 2 or more block types are allowed
2714 if (IsPow2(allowedBlockSet.value) == FALSE)
2715 {
2716 allowedBlockSet.linear = 0;
2717 }
2718
2719 // Select the biggest allowed block type
2720 minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
2721
2722 if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
2723 {
2724 minSizeBlk = AddrBlockLinear;
2725 }
2726 }
2727
2728 switch (minSizeBlk)
2729 {
2730 case AddrBlockLinear:
2731 allowedSwModeSet.value &= Gfx11LinearSwModeMask;
2732 break;
2733
2734 case AddrBlockMicro:
2735 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2736 allowedSwModeSet.value &= Gfx11Blk256BSwModeMask;
2737 break;
2738
2739 case AddrBlockThin4KB:
2740 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2741 allowedSwModeSet.value &= Gfx11Blk4KBSwModeMask;
2742 break;
2743
2744 case AddrBlockThick4KB:
2745 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2746 allowedSwModeSet.value &= Gfx11Rsrc3dThick4KBSwModeMask;
2747 break;
2748
2749 case AddrBlockThin64KB:
2750 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2751 Gfx11Rsrc3dThin64KBSwModeMask : Gfx11Blk64KBSwModeMask;
2752 break;
2753
2754 case AddrBlockThick64KB:
2755 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2756 allowedSwModeSet.value &= Gfx11Rsrc3dThick64KBSwModeMask;
2757 break;
2758
2759 case AddrBlockThin256KB:
2760 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2761 Gfx11Rsrc3dThin256KBSwModeMask : Gfx11Blk256KBSwModeMask;
2762 break;
2763
2764 case AddrBlockThick256KB:
2765 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2766 allowedSwModeSet.value &= Gfx11Rsrc3dThick256KBSwModeMask;
2767 break;
2768
2769 default:
2770 ADDR_ASSERT_ALWAYS();
2771 allowedSwModeSet.value = 0;
2772 break;
2773 }
2774 }
2775
2776 // Block type should be determined.
2777 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
2778
2779 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
2780
2781 // Determine swizzle type if there are 2 or more swizzle type candidates
2782 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
2783 {
2784 if (ElemLib::IsBlockCompressed(pIn->format))
2785 {
2786 if (allowedSwSet.sw_D)
2787 {
2788 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2789 }
2790 else if (allowedSwSet.sw_S)
2791 {
2792 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2793 }
2794 else
2795 {
2796 ADDR_ASSERT(allowedSwSet.sw_R);
2797 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2798 }
2799 }
2800 else if (ElemLib::IsMacroPixelPacked(pIn->format))
2801 {
2802 if (allowedSwSet.sw_S)
2803 {
2804 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2805 }
2806 else if (allowedSwSet.sw_D)
2807 {
2808 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2809 }
2810 else
2811 {
2812 ADDR_ASSERT(allowedSwSet.sw_R);
2813 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2814 }
2815 }
2816 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2817 {
2818 if (allowedSwSet.sw_D)
2819 {
2820 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2821 }
2822 else if (allowedSwSet.sw_S)
2823 {
2824 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2825 }
2826 else if (allowedSwSet.sw_R)
2827 {
2828 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2829 }
2830 else
2831 {
2832 ADDR_ASSERT(allowedSwSet.sw_Z);
2833 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2834 }
2835 }
2836 else
2837 {
2838 if (allowedSwSet.sw_R)
2839 {
2840 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2841 }
2842 else if (allowedSwSet.sw_D)
2843 {
2844 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2845 }
2846 else if (allowedSwSet.sw_Z)
2847 {
2848 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2849 }
2850 else
2851 {
2852 ADDR_ASSERT_ALWAYS();
2853 }
2854 }
2855
2856 // Swizzle type should be determined.
2857 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
2858 }
2859
2860 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2861 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2862 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2863 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
2864 }
2865 }
2866 else
2867 {
2868 // Invalid combination...
2869 ADDR_ASSERT_ALWAYS();
2870 returnCode = ADDR_INVALIDPARAMS;
2871 }
2872 }
2873 else
2874 {
2875 // Invalid combination...
2876 ADDR_ASSERT_ALWAYS();
2877 returnCode = ADDR_INVALIDPARAMS;
2878 }
2879 }
2880
2881 return returnCode;
2882 }
2883
2884 /**
2885 ************************************************************************************************************************
2886 * Gfx11Lib::ComputeStereoInfo
2887 *
2888 * @brief
2889 * Compute height alignment and right eye pipeBankXor for stereo surface
2890 *
2891 * @return
2892 * Error code
2893 *
2894 ************************************************************************************************************************
2895 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const2896 ADDR_E_RETURNCODE Gfx11Lib::ComputeStereoInfo(
2897 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
2898 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
2899 UINT_32* pRightXor ///< Right eye xor
2900 ) const
2901 {
2902 ADDR_E_RETURNCODE ret = ADDR_OK;
2903
2904 *pRightXor = 0;
2905
2906 if (IsNonPrtXor(pIn->swizzleMode))
2907 {
2908 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
2909 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
2910 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
2911 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
2912 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
2913
2914 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
2915 {
2916 UINT_32 yMax = 0;
2917 UINT_32 yPosMask = 0;
2918
2919 // First get "max y bit"
2920 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2921 {
2922 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
2923
2924 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
2925 (m_equationTable[eqIndex].addr[i].index > yMax))
2926 {
2927 yMax = m_equationTable[eqIndex].addr[i].index;
2928 }
2929
2930 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
2931 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
2932 (m_equationTable[eqIndex].xor1[i].index > yMax))
2933 {
2934 yMax = m_equationTable[eqIndex].xor1[i].index;
2935 }
2936
2937 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
2938 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
2939 (m_equationTable[eqIndex].xor2[i].index > yMax))
2940 {
2941 yMax = m_equationTable[eqIndex].xor2[i].index;
2942 }
2943 }
2944
2945 // Then loop again for populating a position mask of "max Y bit"
2946 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
2947 {
2948 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
2949 (m_equationTable[eqIndex].addr[i].index == yMax))
2950 {
2951 yPosMask |= 1u << i;
2952 }
2953 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
2954 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
2955 (m_equationTable[eqIndex].xor1[i].index == yMax))
2956 {
2957 yPosMask |= 1u << i;
2958 }
2959 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
2960 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
2961 (m_equationTable[eqIndex].xor2[i].index == yMax))
2962 {
2963 yPosMask |= 1u << i;
2964 }
2965 }
2966
2967 const UINT_32 additionalAlign = 1 << yMax;
2968
2969 if (additionalAlign >= *pAlignY)
2970 {
2971 *pAlignY = additionalAlign;
2972
2973 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
2974
2975 if ((alignedHeight >> yMax) & 1)
2976 {
2977 *pRightXor = yPosMask >> m_pipeInterleaveLog2;
2978 }
2979 }
2980 }
2981 else
2982 {
2983 ret = ADDR_INVALIDPARAMS;
2984 }
2985 }
2986
2987 return ret;
2988 }
2989
2990 /**
2991 ************************************************************************************************************************
2992 * Gfx11Lib::HwlComputeSurfaceInfoTiled
2993 *
2994 * @brief
2995 * Internal function to calculate alignment for tiled surface
2996 *
2997 * @return
2998 * ADDR_E_RETURNCODE
2999 ************************************************************************************************************************
3000 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3001 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoTiled(
3002 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3003 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3004 ) const
3005 {
3006 ADDR_E_RETURNCODE ret;
3007
3008 // Mip chain dimesion and epitch has no meaning in GFX11, set to default value
3009 pOut->mipChainPitch = 0;
3010 pOut->mipChainHeight = 0;
3011 pOut->mipChainSlice = 0;
3012 pOut->epitchIsHeight = FALSE;
3013
3014 // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3015 pOut->mipChainInTail = FALSE;
3016 pOut->firstMipIdInTail = pIn->numMipLevels;
3017
3018 if (IsBlock256b(pIn->swizzleMode))
3019 {
3020 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3021 }
3022 else
3023 {
3024 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3025 }
3026
3027 return ret;
3028 }
3029
3030 /**
3031 ************************************************************************************************************************
3032 * Gfx11Lib::ComputeSurfaceInfoMicroTiled
3033 *
3034 * @brief
3035 * Internal function to calculate alignment for micro tiled surface
3036 *
3037 * @return
3038 * ADDR_E_RETURNCODE
3039 ************************************************************************************************************************
3040 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3041 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceInfoMicroTiled(
3042 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3043 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3044 ) const
3045 {
3046 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3047 &pOut->blockHeight,
3048 &pOut->blockSlices,
3049 pIn->bpp,
3050 pIn->numSamples,
3051 pIn->resourceType,
3052 pIn->swizzleMode);
3053
3054 if (ret == ADDR_OK)
3055 {
3056 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3057
3058 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3059 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3060 pOut->numSlices = pIn->numSlices;
3061 pOut->baseAlign = blockSize;
3062
3063 if (pIn->numMipLevels > 1)
3064 {
3065 const UINT_32 mip0Width = pIn->width;
3066 const UINT_32 mip0Height = pIn->height;
3067 UINT_64 mipSliceSize = 0;
3068
3069 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3070 {
3071 UINT_32 mipWidth, mipHeight;
3072
3073 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3074
3075 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
3076 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3077
3078 if (pOut->pMipInfo != NULL)
3079 {
3080 pOut->pMipInfo[i].pitch = mipActualWidth;
3081 pOut->pMipInfo[i].height = mipActualHeight;
3082 pOut->pMipInfo[i].depth = 1;
3083 pOut->pMipInfo[i].offset = mipSliceSize;
3084 pOut->pMipInfo[i].mipTailOffset = 0;
3085 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3086 }
3087
3088 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3089 }
3090
3091 pOut->sliceSize = mipSliceSize;
3092 pOut->surfSize = mipSliceSize * pOut->numSlices;
3093 }
3094 else
3095 {
3096 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3097 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3098
3099 if (pOut->pMipInfo != NULL)
3100 {
3101 pOut->pMipInfo[0].pitch = pOut->pitch;
3102 pOut->pMipInfo[0].height = pOut->height;
3103 pOut->pMipInfo[0].depth = 1;
3104 pOut->pMipInfo[0].offset = 0;
3105 pOut->pMipInfo[0].mipTailOffset = 0;
3106 pOut->pMipInfo[0].macroBlockOffset = 0;
3107 }
3108 }
3109
3110 }
3111
3112 return ret;
3113 }
3114
3115 /**
3116 ************************************************************************************************************************
3117 * Gfx11Lib::ComputeSurfaceInfoMacroTiled
3118 *
3119 * @brief
3120 * Internal function to calculate alignment for macro tiled surface
3121 *
3122 * @return
3123 * ADDR_E_RETURNCODE
3124 ************************************************************************************************************************
3125 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3126 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceInfoMacroTiled(
3127 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3128 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3129 ) const
3130 {
3131 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3132 &pOut->blockHeight,
3133 &pOut->blockSlices,
3134 pIn->bpp,
3135 pIn->numSamples,
3136 pIn->resourceType,
3137 pIn->swizzleMode);
3138
3139 if (returnCode == ADDR_OK)
3140 {
3141 UINT_32 heightAlign = pOut->blockHeight;
3142
3143 if (pIn->flags.qbStereo)
3144 {
3145 UINT_32 rightXor = 0;
3146
3147 returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3148
3149 if (returnCode == ADDR_OK)
3150 {
3151 pOut->pStereoInfo->rightSwizzle = rightXor;
3152 }
3153 }
3154
3155 if (returnCode == ADDR_OK)
3156 {
3157 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3158 const UINT_32 blockSize = 1 << blockSizeLog2;
3159
3160 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3161 pOut->height = PowTwoAlign(pIn->height, heightAlign);
3162 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3163 pOut->baseAlign = blockSize;
3164
3165 if (pIn->numMipLevels > 1)
3166 {
3167 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3168 pIn->swizzleMode,
3169 pOut->blockWidth,
3170 pOut->blockHeight,
3171 pOut->blockSlices);
3172 const UINT_32 mip0Width = pIn->width;
3173 const UINT_32 mip0Height = pIn->height;
3174 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3175 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3176 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3177 const UINT_32 index = Log2(pIn->bpp >> 3);
3178 UINT_32 firstMipInTail = pIn->numMipLevels;
3179 UINT_64 mipChainSliceSize = 0;
3180 UINT_64 mipSize[MaxMipLevels];
3181 UINT_64 mipSliceSize[MaxMipLevels];
3182
3183 Dim3d fixedTailMaxDim = tailMaxDim;
3184
3185 if ((IsZOrderSwizzle(pIn->swizzleMode) || IsRtOptSwizzle(pIn->swizzleMode)) && (index <= 1))
3186 {
3187 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3188 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3189 }
3190
3191 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3192 {
3193 UINT_32 mipWidth, mipHeight, mipDepth;
3194
3195 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3196
3197 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3198 {
3199 firstMipInTail = i;
3200 mipChainSliceSize += blockSize / pOut->blockSlices;
3201 break;
3202 }
3203 else
3204 {
3205 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3206 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3207 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3208 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3209
3210 mipSize[i] = sliceSize * depth;
3211 mipSliceSize[i] = sliceSize * pOut->blockSlices;
3212 mipChainSliceSize += sliceSize;
3213
3214 if (pOut->pMipInfo != NULL)
3215 {
3216 pOut->pMipInfo[i].pitch = pitch;
3217 pOut->pMipInfo[i].height = height;
3218 pOut->pMipInfo[i].depth = depth;
3219 }
3220 }
3221 }
3222
3223 pOut->sliceSize = mipChainSliceSize;
3224 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3225 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3226 pOut->firstMipIdInTail = firstMipInTail;
3227
3228 if (pOut->pMipInfo != NULL)
3229 {
3230 UINT_64 offset = 0;
3231 UINT_64 macroBlkOffset = 0;
3232 UINT_32 tailMaxDepth = 0;
3233
3234 if (firstMipInTail != pIn->numMipLevels)
3235 {
3236 UINT_32 mipWidth, mipHeight;
3237
3238 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3239 &mipWidth, &mipHeight, &tailMaxDepth);
3240
3241 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3242 macroBlkOffset = blockSize;
3243 }
3244
3245 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3246 {
3247 pOut->pMipInfo[i].offset = offset;
3248 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3249 pOut->pMipInfo[i].mipTailOffset = 0;
3250
3251 offset += mipSize[i];
3252 macroBlkOffset += mipSliceSize[i];
3253 }
3254
3255 UINT_32 pitch = tailMaxDim.w;
3256 UINT_32 height = tailMaxDim.h;
3257 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3258
3259 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3260
3261 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3262 {
3263 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3264 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3265
3266 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
3267 pOut->pMipInfo[i].mipTailOffset = mipOffset;
3268 pOut->pMipInfo[i].macroBlockOffset = 0;
3269
3270 pOut->pMipInfo[i].pitch = pitch;
3271 pOut->pMipInfo[i].height = height;
3272 pOut->pMipInfo[i].depth = depth;
3273
3274 UINT_32 mipX = ((mipOffset >> 9) & 1) |
3275 ((mipOffset >> 10) & 2) |
3276 ((mipOffset >> 11) & 4) |
3277 ((mipOffset >> 12) & 8) |
3278 ((mipOffset >> 13) & 16) |
3279 ((mipOffset >> 14) & 32);
3280 UINT_32 mipY = ((mipOffset >> 8) & 1) |
3281 ((mipOffset >> 9) & 2) |
3282 ((mipOffset >> 10) & 4) |
3283 ((mipOffset >> 11) & 8) |
3284 ((mipOffset >> 12) & 16) |
3285 ((mipOffset >> 13) & 32);
3286
3287 if (blockSizeLog2 & 1)
3288 {
3289 const UINT_32 temp = mipX;
3290 mipX = mipY;
3291 mipY = temp;
3292
3293 if (index & 1)
3294 {
3295 mipY = (mipY << 1) | (mipX & 1);
3296 mipX = mipX >> 1;
3297 }
3298 }
3299
3300 if (isThin)
3301 {
3302 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3303 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3304 pOut->pMipInfo[i].mipTailCoordZ = 0;
3305
3306 pitch = Max(pitch >> 1, Block256_2d[index].w);
3307 height = Max(height >> 1, Block256_2d[index].h);
3308 depth = 1;
3309 }
3310 else
3311 {
3312 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3313 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3314 pOut->pMipInfo[i].mipTailCoordZ = 0;
3315
3316 pitch = Max(pitch >> 1, Block256_3d[index].w);
3317 height = Max(height >> 1, Block256_3d[index].h);
3318 depth = PowTwoAlign(Max(depth >> 1, 1u), Block256_3d[index].d);
3319 }
3320 }
3321 }
3322 }
3323 else
3324 {
3325 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numSamples;
3326 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3327
3328 if (pOut->pMipInfo != NULL)
3329 {
3330 pOut->pMipInfo[0].pitch = pOut->pitch;
3331 pOut->pMipInfo[0].height = pOut->height;
3332 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3333 pOut->pMipInfo[0].offset = 0;
3334 pOut->pMipInfo[0].mipTailOffset = 0;
3335 pOut->pMipInfo[0].macroBlockOffset = 0;
3336 pOut->pMipInfo[0].mipTailCoordX = 0;
3337 pOut->pMipInfo[0].mipTailCoordY = 0;
3338 pOut->pMipInfo[0].mipTailCoordZ = 0;
3339 }
3340 }
3341 }
3342 }
3343
3344 return returnCode;
3345 }
3346
3347 /**
3348 ************************************************************************************************************************
3349 * Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled
3350 *
3351 * @brief
3352 * Internal function to calculate address from coord for tiled swizzle surface
3353 *
3354 * @return
3355 * ADDR_E_RETURNCODE
3356 ************************************************************************************************************************
3357 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3358 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled(
3359 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3360 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3361 ) const
3362 {
3363 ADDR_E_RETURNCODE ret;
3364
3365 if (IsBlock256b(pIn->swizzleMode))
3366 {
3367 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3368 }
3369 else
3370 {
3371 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3372 }
3373
3374 return ret;
3375 }
3376
3377 /**
3378 ************************************************************************************************************************
3379 * Gfx11Lib::ComputeOffsetFromEquation
3380 *
3381 * @brief
3382 * Compute offset from equation
3383 *
3384 * @return
3385 * Offset
3386 ************************************************************************************************************************
3387 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3388 UINT_32 Gfx11Lib::ComputeOffsetFromEquation(
3389 const ADDR_EQUATION* pEq, ///< Equation
3390 UINT_32 x, ///< x coord in bytes
3391 UINT_32 y, ///< y coord in pixel
3392 UINT_32 z ///< z coord in slice
3393 ) const
3394 {
3395 UINT_32 offset = 0;
3396
3397 for (UINT_32 i = 0; i < pEq->numBits; i++)
3398 {
3399 UINT_32 v = 0;
3400
3401 if (pEq->addr[i].valid)
3402 {
3403 if (pEq->addr[i].channel == 0)
3404 {
3405 v ^= (x >> pEq->addr[i].index) & 1;
3406 }
3407 else if (pEq->addr[i].channel == 1)
3408 {
3409 v ^= (y >> pEq->addr[i].index) & 1;
3410 }
3411 else
3412 {
3413 ADDR_ASSERT(pEq->addr[i].channel == 2);
3414 v ^= (z >> pEq->addr[i].index) & 1;
3415 }
3416 }
3417
3418 if (pEq->xor1[i].valid)
3419 {
3420 if (pEq->xor1[i].channel == 0)
3421 {
3422 v ^= (x >> pEq->xor1[i].index) & 1;
3423 }
3424 else if (pEq->xor1[i].channel == 1)
3425 {
3426 v ^= (y >> pEq->xor1[i].index) & 1;
3427 }
3428 else
3429 {
3430 ADDR_ASSERT(pEq->xor1[i].channel == 2);
3431 v ^= (z >> pEq->xor1[i].index) & 1;
3432 }
3433 }
3434
3435 if (pEq->xor2[i].valid)
3436 {
3437 if (pEq->xor2[i].channel == 0)
3438 {
3439 v ^= (x >> pEq->xor2[i].index) & 1;
3440 }
3441 else if (pEq->xor2[i].channel == 1)
3442 {
3443 v ^= (y >> pEq->xor2[i].index) & 1;
3444 }
3445 else
3446 {
3447 ADDR_ASSERT(pEq->xor2[i].channel == 2);
3448 v ^= (z >> pEq->xor2[i].index) & 1;
3449 }
3450 }
3451
3452 offset |= (v << i);
3453 }
3454
3455 return offset;
3456 }
3457
3458 /**
3459 ************************************************************************************************************************
3460 * Gfx11Lib::ComputeOffsetFromSwizzlePattern
3461 *
3462 * @brief
3463 * Compute offset from swizzle pattern
3464 *
3465 * @return
3466 * Offset
3467 ************************************************************************************************************************
3468 */
ComputeOffsetFromSwizzlePattern(const UINT_64 * pPattern,UINT_32 numBits,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const3469 UINT_32 Gfx11Lib::ComputeOffsetFromSwizzlePattern(
3470 const UINT_64* pPattern, ///< Swizzle pattern
3471 UINT_32 numBits, ///< Number of bits in pattern
3472 UINT_32 x, ///< x coord in pixel
3473 UINT_32 y, ///< y coord in pixel
3474 UINT_32 z, ///< z coord in slice
3475 UINT_32 s ///< sample id
3476 ) const
3477 {
3478 UINT_32 offset = 0;
3479 const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3480
3481 for (UINT_32 i = 0; i < numBits; i++)
3482 {
3483 UINT_32 v = 0;
3484
3485 if (pSwizzlePattern[i].x != 0)
3486 {
3487 UINT_16 mask = pSwizzlePattern[i].x;
3488 UINT_32 xBits = x;
3489
3490 while (mask != 0)
3491 {
3492 if (mask & 1)
3493 {
3494 v ^= xBits & 1;
3495 }
3496
3497 xBits >>= 1;
3498 mask >>= 1;
3499 }
3500 }
3501
3502 if (pSwizzlePattern[i].y != 0)
3503 {
3504 UINT_16 mask = pSwizzlePattern[i].y;
3505 UINT_32 yBits = y;
3506
3507 while (mask != 0)
3508 {
3509 if (mask & 1)
3510 {
3511 v ^= yBits & 1;
3512 }
3513
3514 yBits >>= 1;
3515 mask >>= 1;
3516 }
3517 }
3518
3519 if (pSwizzlePattern[i].z != 0)
3520 {
3521 UINT_16 mask = pSwizzlePattern[i].z;
3522 UINT_32 zBits = z;
3523
3524 while (mask != 0)
3525 {
3526 if (mask & 1)
3527 {
3528 v ^= zBits & 1;
3529 }
3530
3531 zBits >>= 1;
3532 mask >>= 1;
3533 }
3534 }
3535
3536 if (pSwizzlePattern[i].s != 0)
3537 {
3538 UINT_16 mask = pSwizzlePattern[i].s;
3539 UINT_32 sBits = s;
3540
3541 while (mask != 0)
3542 {
3543 if (mask & 1)
3544 {
3545 v ^= sBits & 1;
3546 }
3547
3548 sBits >>= 1;
3549 mask >>= 1;
3550 }
3551 }
3552
3553 offset |= (v << i);
3554 }
3555
3556 return offset;
3557 }
3558
3559 /**
3560 ************************************************************************************************************************
3561 * Gfx11Lib::GetSwizzlePatternInfo
3562 *
3563 * @brief
3564 * Get swizzle pattern
3565 *
3566 * @return
3567 * Swizzle pattern information
3568 ************************************************************************************************************************
3569 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const3570 const ADDR_SW_PATINFO* Gfx11Lib::GetSwizzlePatternInfo(
3571 AddrSwizzleMode swizzleMode, ///< Swizzle mode
3572 AddrResourceType resourceType, ///< Resource type
3573 UINT_32 elemLog2, ///< Element size in bytes log2
3574 UINT_32 numFrag ///< Number of fragment
3575 ) const
3576 {
3577 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
3578 const ADDR_SW_PATINFO* patInfo = NULL;
3579 const UINT_32 swizzleMask = 1 << swizzleMode;
3580 const BOOL_32 isBlock256k = IsBlock256kb(swizzleMode);
3581 const BOOL_32 isBlock64K = IsBlock64kb(swizzleMode);
3582
3583 if (IsLinear(swizzleMode) == FALSE)
3584 {
3585 if (resourceType == ADDR_RSRC_TEX_3D)
3586 {
3587 ADDR_ASSERT(numFrag == 1);
3588
3589 if ((swizzleMask & Gfx11Rsrc3dSwModeMask) != 0)
3590 {
3591 if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
3592 {
3593 if (isBlock256k)
3594 {
3595 ADDR_ASSERT((swizzleMode == ADDR_SW_256KB_Z_X) || (swizzleMode == ADDR_SW_256KB_R_X));
3596 patInfo = GFX11_SW_256K_ZR_X_1xaa_PATINFO;
3597 }
3598 else if (isBlock64K)
3599 {
3600 ADDR_ASSERT((swizzleMode == ADDR_SW_64KB_Z_X) || (swizzleMode == ADDR_SW_64KB_R_X));
3601 patInfo = GFX11_SW_64K_ZR_X_1xaa_PATINFO;
3602 }
3603 else
3604 {
3605 ADDR_ASSERT_ALWAYS();
3606 }
3607 }
3608 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3609 {
3610 if (isBlock256k)
3611 {
3612 ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_D_X);
3613 // patInfo = GFX11_SW_256K_D3_X_PATINFO;
3614 }
3615 else if (isBlock64K)
3616 {
3617 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
3618 patInfo = GFX11_SW_64K_D3_X_PATINFO;
3619 }
3620 else
3621 {
3622 ADDR_ASSERT_ALWAYS();
3623 }
3624 }
3625 else
3626 {
3627 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
3628
3629 if (isBlock256k)
3630 {
3631 ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_S_X);
3632 patInfo = GFX11_SW_256K_S3_X_PATINFO;
3633 }
3634 else if (isBlock64K)
3635 {
3636 if (swizzleMode == ADDR_SW_64KB_S)
3637 {
3638 patInfo = GFX11_SW_64K_S3_PATINFO;
3639 }
3640 else if (swizzleMode == ADDR_SW_64KB_S_X)
3641 {
3642 patInfo = GFX11_SW_64K_S3_X_PATINFO;
3643 }
3644 else if (swizzleMode == ADDR_SW_64KB_S_T)
3645 {
3646 patInfo = GFX11_SW_64K_S3_T_PATINFO;
3647 }
3648 else
3649 {
3650 ADDR_ASSERT_ALWAYS();
3651 }
3652 }
3653 else if (IsBlock4kb(swizzleMode))
3654 {
3655 if (swizzleMode == ADDR_SW_4KB_S)
3656 {
3657 patInfo = GFX11_SW_4K_S3_PATINFO;
3658 }
3659 else if (swizzleMode == ADDR_SW_4KB_S_X)
3660 {
3661 patInfo = GFX11_SW_4K_S3_X_PATINFO;
3662 }
3663 else
3664 {
3665 ADDR_ASSERT_ALWAYS();
3666 }
3667 }
3668 else
3669 {
3670 ADDR_ASSERT_ALWAYS();
3671 }
3672 }
3673 }
3674 }
3675 else
3676 {
3677 if ((swizzleMask & Gfx11Rsrc2dSwModeMask) != 0)
3678 {
3679 if (IsBlock256b(swizzleMode))
3680 {
3681 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
3682 patInfo = GFX11_SW_256_D_PATINFO;
3683 }
3684 else if (IsBlock4kb(swizzleMode))
3685 {
3686 if (swizzleMode == ADDR_SW_4KB_D)
3687 {
3688 patInfo = GFX11_SW_4K_D_PATINFO;
3689 }
3690 else if (swizzleMode == ADDR_SW_4KB_D_X)
3691 {
3692 patInfo = GFX11_SW_4K_D_X_PATINFO;
3693 }
3694 else
3695 {
3696 ADDR_ASSERT_ALWAYS();
3697 }
3698 }
3699 else if (isBlock64K)
3700 {
3701 if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
3702 {
3703 if (numFrag == 1)
3704 {
3705 patInfo = GFX11_SW_64K_ZR_X_1xaa_PATINFO;
3706 }
3707 else if (numFrag == 2)
3708 {
3709 patInfo = GFX11_SW_64K_ZR_X_2xaa_PATINFO;
3710 }
3711 else if (numFrag == 4)
3712 {
3713 patInfo = GFX11_SW_64K_ZR_X_4xaa_PATINFO;
3714 }
3715 else if (numFrag == 8)
3716 {
3717 patInfo = GFX11_SW_64K_ZR_X_8xaa_PATINFO;
3718 }
3719 else
3720 {
3721 ADDR_ASSERT_ALWAYS();
3722 }
3723 }
3724 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3725 {
3726 if (swizzleMode == ADDR_SW_64KB_D)
3727 {
3728 patInfo = GFX11_SW_64K_D_PATINFO;
3729 }
3730 else if (swizzleMode == ADDR_SW_64KB_D_X)
3731 {
3732 patInfo = GFX11_SW_64K_D_X_PATINFO;
3733 }
3734 else if (swizzleMode == ADDR_SW_64KB_D_T)
3735 {
3736 patInfo = GFX11_SW_64K_D_T_PATINFO;
3737 }
3738 else
3739 {
3740 ADDR_ASSERT_ALWAYS();
3741 }
3742 }
3743 else
3744 {
3745 ADDR_ASSERT_ALWAYS();
3746 }
3747 }
3748 else if (isBlock256k)
3749 {
3750 if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
3751 {
3752 if (numFrag == 1)
3753 {
3754 patInfo = GFX11_SW_256K_ZR_X_1xaa_PATINFO;
3755 }
3756 else if (numFrag == 2)
3757 {
3758 patInfo = GFX11_SW_256K_ZR_X_2xaa_PATINFO;
3759 }
3760 else if (numFrag == 4)
3761 {
3762 patInfo = GFX11_SW_256K_ZR_X_4xaa_PATINFO;
3763 }
3764 else if (numFrag == 8)
3765 {
3766 patInfo = GFX11_SW_256K_ZR_X_8xaa_PATINFO;
3767 }
3768 else
3769 {
3770 ADDR_ASSERT_ALWAYS();
3771 }
3772 }
3773 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3774 {
3775 ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_D_X);
3776 patInfo = GFX11_SW_256K_D_X_PATINFO;
3777 }
3778 else
3779 {
3780 ADDR_ASSERT_ALWAYS();
3781 }
3782 }
3783 else
3784 {
3785 ADDR_ASSERT_ALWAYS();
3786 }
3787 }
3788 }
3789 }
3790
3791 return (patInfo != NULL) ? &patInfo[index] : NULL;
3792 }
3793
3794 /**
3795 ************************************************************************************************************************
3796 * Gfx11Lib::ComputeSurfaceAddrFromCoordMicroTiled
3797 *
3798 * @brief
3799 * Internal function to calculate address from coord for micro tiled swizzle surface
3800 *
3801 * @return
3802 * ADDR_E_RETURNCODE
3803 ************************************************************************************************************************
3804 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3805 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3806 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3807 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3808 ) const
3809 {
3810 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3811 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3812 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3813
3814 localIn.swizzleMode = pIn->swizzleMode;
3815 localIn.flags = pIn->flags;
3816 localIn.resourceType = pIn->resourceType;
3817 localIn.bpp = pIn->bpp;
3818 localIn.width = Max(pIn->unalignedWidth, 1u);
3819 localIn.height = Max(pIn->unalignedHeight, 1u);
3820 localIn.numSlices = Max(pIn->numSlices, 1u);
3821 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3822 localIn.numSamples = Max(pIn->numSamples, 1u);
3823 localIn.numFrags = localIn.numSamples;
3824 localOut.pMipInfo = mipInfo;
3825
3826 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
3827
3828 if (ret == ADDR_OK)
3829 {
3830 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3831 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3832 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3833 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3834
3835 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3836 {
3837 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3838 const UINT_32 yb = pIn->y / localOut.blockHeight;
3839 const UINT_32 xb = pIn->x / localOut.blockWidth;
3840 const UINT_32 blockIndex = yb * pb + xb;
3841 const UINT_32 blockSize = 256;
3842 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3843 pIn->x << elemLog2,
3844 pIn->y,
3845 0);
3846 pOut->addr = localOut.sliceSize * pIn->slice +
3847 mipInfo[pIn->mipId].macroBlockOffset +
3848 (blockIndex * blockSize) +
3849 blk256Offset;
3850 }
3851 else
3852 {
3853 ret = ADDR_INVALIDPARAMS;
3854 }
3855 }
3856
3857 return ret;
3858 }
3859
3860 /**
3861 ************************************************************************************************************************
3862 * Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled
3863 *
3864 * @brief
3865 * Internal function to calculate address from coord for macro tiled swizzle surface
3866 *
3867 * @return
3868 * ADDR_E_RETURNCODE
3869 ************************************************************************************************************************
3870 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3871 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled(
3872 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3873 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3874 ) const
3875 {
3876 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3877 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3878 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3879
3880 localIn.swizzleMode = pIn->swizzleMode;
3881 localIn.flags = pIn->flags;
3882 localIn.resourceType = pIn->resourceType;
3883 localIn.bpp = pIn->bpp;
3884 localIn.width = Max(pIn->unalignedWidth, 1u);
3885 localIn.height = Max(pIn->unalignedHeight, 1u);
3886 localIn.numSlices = Max(pIn->numSlices, 1u);
3887 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3888 localIn.numSamples = Max(pIn->numSamples, 1u);
3889 localIn.numFrags = localIn.numSamples;
3890 localOut.pMipInfo = mipInfo;
3891
3892 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
3893
3894 if (ret == ADDR_OK)
3895 {
3896 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3897 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3898 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
3899 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
3900 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
3901 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
3902 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
3903
3904 if (localIn.numSamples > 1)
3905 {
3906 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
3907 pIn->resourceType,
3908 elemLog2,
3909 localIn.numSamples);
3910
3911 if (pPatInfo != NULL)
3912 {
3913 const UINT_32 pb = localOut.pitch / localOut.blockWidth;
3914 const UINT_32 yb = pIn->y / localOut.blockHeight;
3915 const UINT_32 xb = pIn->x / localOut.blockWidth;
3916 const UINT_64 blkIdx = yb * pb + xb;
3917
3918 ADDR_BIT_SETTING fullSwizzlePattern[20];
3919 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
3920
3921 const UINT_32 blkOffset =
3922 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
3923 blkSizeLog2,
3924 pIn->x,
3925 pIn->y,
3926 pIn->slice,
3927 pIn->sample);
3928
3929 pOut->addr = (localOut.sliceSize * pIn->slice) +
3930 (blkIdx << blkSizeLog2) +
3931 (blkOffset ^ pipeBankXor);
3932 }
3933 else
3934 {
3935 ret = ADDR_INVALIDPARAMS;
3936 }
3937 }
3938 else
3939 {
3940 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
3941 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3942 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
3943
3944 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3945 {
3946 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
3947 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3948 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
3949 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
3950 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
3951 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
3952 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
3953 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3954 const UINT_32 yb = pIn->y / localOut.blockHeight;
3955 const UINT_32 xb = pIn->x / localOut.blockWidth;
3956 const UINT_64 blkIdx = yb * pb + xb;
3957 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3958 x << elemLog2,
3959 y,
3960 z);
3961 pOut->addr = sliceSize * sliceId +
3962 mipInfo[pIn->mipId].macroBlockOffset +
3963 (blkIdx << blkSizeLog2) +
3964 (blkOffset ^ pipeBankXor);
3965 }
3966 else
3967 {
3968 ret = ADDR_INVALIDPARAMS;
3969 }
3970 }
3971 }
3972
3973 return ret;
3974 }
3975
3976 /**
3977 ************************************************************************************************************************
3978 * Gfx11Lib::HwlComputeMaxBaseAlignments
3979 *
3980 * @brief
3981 * Gets maximum alignments
3982 * @return
3983 * maximum alignments
3984 ************************************************************************************************************************
3985 */
HwlComputeMaxBaseAlignments() const3986 UINT_32 Gfx11Lib::HwlComputeMaxBaseAlignments() const
3987 {
3988 return Size256K;
3989 }
3990
3991 /**
3992 ************************************************************************************************************************
3993 * Gfx11Lib::HwlComputeMaxMetaBaseAlignments
3994 *
3995 * @brief
3996 * Gets maximum alignments for metadata
3997 * @return
3998 * maximum alignments for metadata
3999 ************************************************************************************************************************
4000 */
HwlComputeMaxMetaBaseAlignments() const4001 UINT_32 Gfx11Lib::HwlComputeMaxMetaBaseAlignments() const
4002 {
4003 Dim3d metaBlk;
4004
4005 // Max base alignment for Htile
4006 const AddrSwizzleMode ValidSwizzleModeForHtile[] =
4007 {
4008 ADDR_SW_64KB_Z_X,
4009 ADDR_SW_256KB_Z_X,
4010 };
4011
4012 UINT_32 maxBaseAlignHtile = 0;
4013
4014 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForHtile) / sizeof(ValidSwizzleModeForHtile[0]); swIdx++)
4015 {
4016 for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4017 {
4018 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4019 {
4020 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx11DataDepthStencil,
4021 ADDR_RSRC_TEX_2D,
4022 ValidSwizzleModeForHtile[swIdx],
4023 bppLog2,
4024 numFragLog2,
4025 TRUE,
4026 &metaBlk);
4027
4028 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4029 }
4030 }
4031 }
4032
4033 // Max base alignment for 2D Dcc
4034 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4035 {
4036 ADDR_SW_64KB_R_X,
4037 ADDR_SW_256KB_R_X,
4038 };
4039
4040 UINT_32 maxBaseAlignDcc2D = 0;
4041
4042 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4043 {
4044 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4045 {
4046 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4047 {
4048 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx11DataColor,
4049 ADDR_RSRC_TEX_2D,
4050 ValidSwizzleModeForDcc2D[swIdx],
4051 bppLog2,
4052 numFragLog2,
4053 TRUE,
4054 &metaBlk);
4055
4056 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4057 }
4058 }
4059 }
4060
4061 // Max base alignment for 3D Dcc
4062 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4063 {
4064 ADDR_SW_64KB_S_X,
4065 ADDR_SW_64KB_D_X,
4066 ADDR_SW_64KB_R_X,
4067 ADDR_SW_256KB_S_X,
4068 ADDR_SW_256KB_D_X,
4069 ADDR_SW_256KB_R_X,
4070 };
4071
4072 UINT_32 maxBaseAlignDcc3D = 0;
4073
4074 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4075 {
4076 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4077 {
4078 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx11DataColor,
4079 ADDR_RSRC_TEX_3D,
4080 ValidSwizzleModeForDcc3D[swIdx],
4081 bppLog2,
4082 0,
4083 TRUE,
4084 &metaBlk);
4085
4086 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4087 }
4088 }
4089
4090 return Max(maxBaseAlignHtile, Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4091 }
4092
4093 /**
4094 ************************************************************************************************************************
4095 * Gfx11Lib::GetMetaElementSizeLog2
4096 *
4097 * @brief
4098 * Gets meta data element size log2
4099 * @return
4100 * Meta data element size log2
4101 ************************************************************************************************************************
4102 */
GetMetaElementSizeLog2(Gfx11DataType dataType)4103 INT_32 Gfx11Lib::GetMetaElementSizeLog2(
4104 Gfx11DataType dataType) ///< Data surface type
4105 {
4106 INT_32 elemSizeLog2 = 0;
4107
4108 if (dataType == Gfx11DataColor)
4109 {
4110 elemSizeLog2 = 0;
4111 }
4112 else
4113 {
4114 ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
4115 elemSizeLog2 = 2;
4116 }
4117
4118 return elemSizeLog2;
4119 }
4120
4121 /**
4122 ************************************************************************************************************************
4123 * Gfx11Lib::GetMetaCacheSizeLog2
4124 *
4125 * @brief
4126 * Gets meta data cache line size log2
4127 * @return
4128 * Meta data cache line size log2
4129 ************************************************************************************************************************
4130 */
GetMetaCacheSizeLog2(Gfx11DataType dataType)4131 INT_32 Gfx11Lib::GetMetaCacheSizeLog2(
4132 Gfx11DataType dataType) ///< Data surface type
4133 {
4134 INT_32 cacheSizeLog2 = 0;
4135
4136 if (dataType == Gfx11DataColor)
4137 {
4138 cacheSizeLog2 = 6;
4139 }
4140 else
4141 {
4142 ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
4143 cacheSizeLog2 = 8;
4144 }
4145
4146 return cacheSizeLog2;
4147 }
4148
4149 /**
4150 ************************************************************************************************************************
4151 * Gfx11Lib::HwlComputeSurfaceInfoLinear
4152 *
4153 * @brief
4154 * Internal function to calculate alignment for linear surface
4155 *
4156 * @return
4157 * ADDR_E_RETURNCODE
4158 ************************************************************************************************************************
4159 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4160 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoLinear(
4161 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4162 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4163 ) const
4164 {
4165 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4166
4167 if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4168 {
4169 returnCode = ADDR_INVALIDPARAMS;
4170 }
4171 else
4172 {
4173 const UINT_32 elementBytes = pIn->bpp >> 3;
4174 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4175 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4176 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
4177 UINT_32 actualHeight = pIn->height;
4178 UINT_64 sliceSize = 0;
4179
4180 if (pIn->numMipLevels > 1)
4181 {
4182 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4183 {
4184 UINT_32 mipWidth, mipHeight;
4185
4186 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4187
4188 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4189
4190 if (pOut->pMipInfo != NULL)
4191 {
4192 pOut->pMipInfo[i].pitch = mipActualWidth;
4193 pOut->pMipInfo[i].height = mipHeight;
4194 pOut->pMipInfo[i].depth = mipDepth;
4195 pOut->pMipInfo[i].offset = sliceSize;
4196 pOut->pMipInfo[i].mipTailOffset = 0;
4197 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4198 }
4199
4200 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4201 }
4202 }
4203 else
4204 {
4205 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4206
4207 if (returnCode == ADDR_OK)
4208 {
4209 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4210
4211 if (pOut->pMipInfo != NULL)
4212 {
4213 pOut->pMipInfo[0].pitch = pitch;
4214 pOut->pMipInfo[0].height = actualHeight;
4215 pOut->pMipInfo[0].depth = mipDepth;
4216 pOut->pMipInfo[0].offset = 0;
4217 pOut->pMipInfo[0].mipTailOffset = 0;
4218 pOut->pMipInfo[0].macroBlockOffset = 0;
4219 }
4220 }
4221 }
4222
4223 if (returnCode == ADDR_OK)
4224 {
4225 pOut->pitch = pitch;
4226 pOut->height = actualHeight;
4227 pOut->numSlices = pIn->numSlices;
4228 pOut->sliceSize = sliceSize;
4229 pOut->surfSize = sliceSize * pOut->numSlices;
4230 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4231 pOut->blockWidth = pitchAlign;
4232 pOut->blockHeight = 1;
4233 pOut->blockSlices = 1;
4234
4235 // Following members are useless on GFX11
4236 pOut->mipChainPitch = 0;
4237 pOut->mipChainHeight = 0;
4238 pOut->mipChainSlice = 0;
4239 pOut->epitchIsHeight = FALSE;
4240
4241 // Post calculation validate
4242 ADDR_ASSERT(pOut->sliceSize > 0);
4243 }
4244 }
4245
4246 return returnCode;
4247 }
4248
4249 } // V2
4250 } // Addr
4251