1 /*
2 ************************************************************************************************************************
3 *
4 * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
5 * SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8
9 /**
10 ************************************************************************************************************************
11 * @file gfx9addrlib.cpp
12 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
13 ************************************************************************************************************************
14 */
15
16 #include "gfx9addrlib.h"
17
18 #include "gfx9_gb_reg.h"
19
20 #include "amdgpu_asic_addr.h"
21
22 ////////////////////////////////////////////////////////////////////////////////////////////////////
23 ////////////////////////////////////////////////////////////////////////////////////////////////////
24
25 namespace Addr
26 {
27
28 /**
29 ************************************************************************************************************************
30 * Gfx9HwlInit
31 *
32 * @brief
33 * Creates an Gfx9Lib object.
34 *
35 * @return
36 * Returns an Gfx9Lib object pointer.
37 ************************************************************************************************************************
38 */
Gfx9HwlInit(const Client * pClient)39 Addr::Lib* Gfx9HwlInit(const Client* pClient)
40 {
41 return V2::Gfx9Lib::CreateObj(pClient);
42 }
43
44 namespace V2
45 {
46
47 ////////////////////////////////////////////////////////////////////////////////////////////////////
48 // Static Const Member
49 ////////////////////////////////////////////////////////////////////////////////////////////////////
50
51 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
52 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
53 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR
54 {{0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_S
55 {{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_D
56 {{0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}}, // ADDR_SW_256B_R
57
58 {{0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_Z
59 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_S
60 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_D
61 {{0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}}, // ADDR_SW_4KB_R
62
63 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_Z
64 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_S
65 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_D
66 {{0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}}, // ADDR_SW_64KB_R
67
68 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
69 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
70 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
71 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
72
73 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_Z_T
74 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_S_T
75 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_D_T
76 {{0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0}}, // ADDR_SW_64KB_R_T
77
78 {{0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_Z_x
79 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_S_x
80 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_D_x
81 {{0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0}}, // ADDR_SW_4KB_R_x
82
83 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_Z_X
84 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_S_X
85 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_D_X
86 {{0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}}, // ADDR_SW_64KB_R_X
87
88 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
89 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
90 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
91 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
92 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR_GENERAL
93 };
94
95 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
96
97 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
98
99 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
100
101 /**
102 ************************************************************************************************************************
103 * Gfx9Lib::Gfx9Lib
104 *
105 * @brief
106 * Constructor
107 *
108 ************************************************************************************************************************
109 */
Gfx9Lib(const Client * pClient)110 Gfx9Lib::Gfx9Lib(const Client* pClient)
111 :
112 Lib(pClient)
113 {
114 memset(&m_settings, 0, sizeof(m_settings));
115 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
116 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
117 m_metaEqOverrideIndex = 0;
118 }
119
120 /**
121 ************************************************************************************************************************
122 * Gfx9Lib::~Gfx9Lib
123 *
124 * @brief
125 * Destructor
126 ************************************************************************************************************************
127 */
~Gfx9Lib()128 Gfx9Lib::~Gfx9Lib()
129 {
130 }
131
132 /**
133 ************************************************************************************************************************
134 * Gfx9Lib::HwlComputeHtileInfo
135 *
136 * @brief
137 * Interface function stub of AddrComputeHtilenfo
138 *
139 * @return
140 * ADDR_E_RETURNCODE
141 ************************************************************************************************************************
142 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const143 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
144 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
145 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
146 ) const
147 {
148 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
149 pIn->swizzleMode);
150
151 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
152
153 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
154
155 if ((numPipeTotal == 1) && (numRbTotal == 1))
156 {
157 numCompressBlkPerMetaBlkLog2 = 10;
158 }
159 else
160 {
161 if (m_settings.applyAliasFix)
162 {
163 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
164 }
165 else
166 {
167 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
168 }
169 }
170
171 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
172
173 Dim3d metaBlkDim = {8, 8, 1};
174 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
175 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
176 UINT_32 heightAmp = totalAmpBits - widthAmp;
177 metaBlkDim.w <<= widthAmp;
178 metaBlkDim.h <<= heightAmp;
179
180 #if DEBUG
181 Dim3d metaBlkDimDbg = {8, 8, 1};
182 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
183 {
184 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
185 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
186 {
187 metaBlkDimDbg.h <<= 1;
188 }
189 else
190 {
191 metaBlkDimDbg.w <<= 1;
192 }
193 }
194 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
195 #endif
196
197 UINT_32 numMetaBlkX;
198 UINT_32 numMetaBlkY;
199 UINT_32 numMetaBlkZ;
200
201 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
202 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
203 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
204
205 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
206 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
207
208 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
209 {
210 align *= (numPipeTotal >> 1);
211 }
212
213 align = Max(align, metaBlkSize);
214
215 if (m_settings.metaBaseAlignFix)
216 {
217 align = Max(align, GetBlockSize(pIn->swizzleMode));
218 }
219
220 if (m_settings.htileAlignFix)
221 {
222 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
223 const INT_32 htileCachelineSizeLog2 = 11;
224 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
225
226 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
227
228 align <<= rbMaskPadding;
229 }
230
231 pOut->pitch = numMetaBlkX * metaBlkDim.w;
232 pOut->height = numMetaBlkY * metaBlkDim.h;
233 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
234
235 pOut->metaBlkWidth = metaBlkDim.w;
236 pOut->metaBlkHeight = metaBlkDim.h;
237 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
238
239 pOut->baseAlign = align;
240 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
241
242 return ADDR_OK;
243 }
244
245 /**
246 ************************************************************************************************************************
247 * Gfx9Lib::HwlComputeCmaskInfo
248 *
249 * @brief
250 * Interface function stub of AddrComputeCmaskInfo
251 *
252 * @return
253 * ADDR_E_RETURNCODE
254 ************************************************************************************************************************
255 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const256 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
257 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
258 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
259 ) const
260 {
261 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
262
263 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
264 pIn->swizzleMode);
265
266 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
267
268 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
269
270 if ((numPipeTotal == 1) && (numRbTotal == 1))
271 {
272 numCompressBlkPerMetaBlkLog2 = 13;
273 }
274 else
275 {
276 if (m_settings.applyAliasFix)
277 {
278 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
279 }
280 else
281 {
282 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
283 }
284
285 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
286 }
287
288 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
289
290 Dim2d metaBlkDim = {8, 8};
291 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
292 UINT_32 heightAmp = totalAmpBits >> 1;
293 UINT_32 widthAmp = totalAmpBits - heightAmp;
294 metaBlkDim.w <<= widthAmp;
295 metaBlkDim.h <<= heightAmp;
296
297 #if DEBUG
298 Dim2d metaBlkDimDbg = {8, 8};
299 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
300 {
301 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
302 {
303 metaBlkDimDbg.h <<= 1;
304 }
305 else
306 {
307 metaBlkDimDbg.w <<= 1;
308 }
309 }
310 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
311 #endif
312
313 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
314 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
315 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
316
317 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
318
319 if (m_settings.metaBaseAlignFix)
320 {
321 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
322 }
323
324 pOut->pitch = numMetaBlkX * metaBlkDim.w;
325 pOut->height = numMetaBlkY * metaBlkDim.h;
326 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
327 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
328 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
329
330 pOut->metaBlkWidth = metaBlkDim.w;
331 pOut->metaBlkHeight = metaBlkDim.h;
332
333 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
334
335 // Get the CMASK address equation (copied from CmaskAddrFromCoord)
336 UINT_32 fmaskBpp = GetFmaskBpp(1, 1);
337 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
338 UINT_32 metaBlkWidthLog2 = Log2(pOut->metaBlkWidth);
339 UINT_32 metaBlkHeightLog2 = Log2(pOut->metaBlkHeight);
340
341 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
342 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
343 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
344
345 CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
346
347 // Generate the CMASK address equation.
348 pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
349 bool checked = false;
350 for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
351 CoordTerm &bit = (*eq)[b];
352
353 unsigned c;
354 for (c = 0; c < bit.getsize(); c++) {
355 Coordinate &coord = bit[c];
356 pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
357 pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
358 }
359 for (; c < 5; c++)
360 pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
361 }
362
363 // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
364 for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
365 CoordTerm &prev = (*eq)[b - 1];
366 CoordTerm &cur = (*eq)[b];
367
368 if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
369 prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
370 prev[0].getord() + 1 == cur[0].getord())
371 pOut->equation.gfx9.num_bits = b;
372 else
373 break;
374 }
375
376 pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
377 pIn->swizzleMode);
378
379 return ADDR_OK;
380 }
381
382 /**
383 ************************************************************************************************************************
384 * Gfx9Lib::GetMetaMipInfo
385 *
386 * @brief
387 * Get meta mip info
388 *
389 * @return
390 * N/A
391 ************************************************************************************************************************
392 */
GetMetaMipInfo(UINT_32 numMipLevels,Dim3d * pMetaBlkDim,BOOL_32 dataThick,ADDR2_META_MIP_INFO * pInfo,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 * pNumMetaBlkX,UINT_32 * pNumMetaBlkY,UINT_32 * pNumMetaBlkZ) const393 VOID Gfx9Lib::GetMetaMipInfo(
394 UINT_32 numMipLevels, ///< [in] number of mip levels
395 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
396 BOOL_32 dataThick, ///< [in] data surface is thick
397 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
398 UINT_32 mip0Width, ///< [in] mip0 width
399 UINT_32 mip0Height, ///< [in] mip0 height
400 UINT_32 mip0Depth, ///< [in] mip0 depth
401 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
402 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
403 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
404 const
405 {
406 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
407 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
408 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
409 UINT_32 tailWidth = pMetaBlkDim->w;
410 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
411 UINT_32 tailDepth = pMetaBlkDim->d;
412 BOOL_32 inTail = FALSE;
413 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
414
415 if (numMipLevels > 1)
416 {
417 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
418 {
419 // Z major
420 major = ADDR_MAJOR_Z;
421 }
422 else if (numMetaBlkX >= numMetaBlkY)
423 {
424 // X major
425 major = ADDR_MAJOR_X;
426 }
427 else
428 {
429 // Y major
430 major = ADDR_MAJOR_Y;
431 }
432
433 inTail = ((mip0Width <= tailWidth) &&
434 (mip0Height <= tailHeight) &&
435 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
436
437 if (inTail == FALSE)
438 {
439 UINT_32 orderLimit;
440 UINT_32 *pMipDim;
441 UINT_32 *pOrderDim;
442
443 if (major == ADDR_MAJOR_Z)
444 {
445 // Z major
446 pMipDim = &numMetaBlkY;
447 pOrderDim = &numMetaBlkZ;
448 orderLimit = 4;
449 }
450 else if (major == ADDR_MAJOR_X)
451 {
452 // X major
453 pMipDim = &numMetaBlkY;
454 pOrderDim = &numMetaBlkX;
455 orderLimit = 4;
456 }
457 else
458 {
459 // Y major
460 pMipDim = &numMetaBlkX;
461 pOrderDim = &numMetaBlkY;
462 orderLimit = 2;
463 }
464
465 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
466 {
467 *pMipDim += 2;
468 }
469 else
470 {
471 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
472 }
473 }
474 }
475
476 if (pInfo != NULL)
477 {
478 UINT_32 mipWidth = mip0Width;
479 UINT_32 mipHeight = mip0Height;
480 UINT_32 mipDepth = mip0Depth;
481 Dim3d mipCoord = {0};
482
483 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
484 {
485 if (inTail)
486 {
487 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
488 pMetaBlkDim);
489 break;
490 }
491 else
492 {
493 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
494 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
495 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
496
497 pInfo[mip].inMiptail = FALSE;
498 pInfo[mip].startX = mipCoord.w;
499 pInfo[mip].startY = mipCoord.h;
500 pInfo[mip].startZ = mipCoord.d;
501 pInfo[mip].width = mipWidth;
502 pInfo[mip].height = mipHeight;
503 pInfo[mip].depth = dataThick ? mipDepth : 1;
504
505 if ((mip >= 3) || (mip & 1))
506 {
507 switch (major)
508 {
509 case ADDR_MAJOR_X:
510 mipCoord.w += mipWidth;
511 break;
512 case ADDR_MAJOR_Y:
513 mipCoord.h += mipHeight;
514 break;
515 case ADDR_MAJOR_Z:
516 mipCoord.d += mipDepth;
517 break;
518 default:
519 break;
520 }
521 }
522 else
523 {
524 switch (major)
525 {
526 case ADDR_MAJOR_X:
527 mipCoord.h += mipHeight;
528 break;
529 case ADDR_MAJOR_Y:
530 mipCoord.w += mipWidth;
531 break;
532 case ADDR_MAJOR_Z:
533 mipCoord.h += mipHeight;
534 break;
535 default:
536 break;
537 }
538 }
539
540 mipWidth = Max(mipWidth >> 1, 1u);
541 mipHeight = Max(mipHeight >> 1, 1u);
542 mipDepth = Max(mipDepth >> 1, 1u);
543
544 inTail = ((mipWidth <= tailWidth) &&
545 (mipHeight <= tailHeight) &&
546 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
547 }
548 }
549 }
550
551 *pNumMetaBlkX = numMetaBlkX;
552 *pNumMetaBlkY = numMetaBlkY;
553 *pNumMetaBlkZ = numMetaBlkZ;
554 }
555
556 /**
557 ************************************************************************************************************************
558 * Gfx9Lib::HwlComputeDccInfo
559 *
560 * @brief
561 * Interface function to compute DCC key info
562 *
563 * @return
564 * ADDR_E_RETURNCODE
565 ************************************************************************************************************************
566 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const567 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
568 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
569 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
570 ) const
571 {
572 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
573 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
574 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
575
576 if (dataLinear)
577 {
578 metaLinear = TRUE;
579 }
580 else if (metaLinear == TRUE)
581 {
582 pipeAligned = FALSE;
583 }
584
585 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
586
587 if (metaLinear)
588 {
589 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
590 ADDR_ASSERT_ALWAYS();
591
592 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
593 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
594 }
595 else
596 {
597 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
598
599 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
600
601 UINT_32 numFrags = Max(pIn->numFrags, 1u);
602 UINT_32 numSlices = Max(pIn->numSlices, 1u);
603
604 minMetaBlkSize /= numFrags;
605
606 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
607
608 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
609
610 if ((numPipeTotal > 1) || (numRbTotal > 1))
611 {
612 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
613
614 numCompressBlkPerMetaBlk =
615 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
616
617 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
618 {
619 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
620 }
621 }
622
623 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
624 Dim3d metaBlkDim = compressBlkDim;
625
626 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
627 {
628 if ((metaBlkDim.h < metaBlkDim.w) ||
629 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
630 {
631 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
632 {
633 metaBlkDim.h <<= 1;
634 }
635 else
636 {
637 metaBlkDim.d <<= 1;
638 }
639 }
640 else
641 {
642 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
643 {
644 metaBlkDim.w <<= 1;
645 }
646 else
647 {
648 metaBlkDim.d <<= 1;
649 }
650 }
651 }
652
653 UINT_32 numMetaBlkX;
654 UINT_32 numMetaBlkY;
655 UINT_32 numMetaBlkZ;
656
657 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
658 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
659 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
660
661 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
662
663 if (numFrags > m_maxCompFrag)
664 {
665 sizeAlign *= (numFrags / m_maxCompFrag);
666 }
667
668 if (m_settings.metaBaseAlignFix)
669 {
670 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
671 }
672
673 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
674 numCompressBlkPerMetaBlk * numFrags;
675 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
676 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
677
678 pOut->pitch = numMetaBlkX * metaBlkDim.w;
679 pOut->height = numMetaBlkY * metaBlkDim.h;
680 pOut->depth = numMetaBlkZ * metaBlkDim.d;
681
682 pOut->compressBlkWidth = compressBlkDim.w;
683 pOut->compressBlkHeight = compressBlkDim.h;
684 pOut->compressBlkDepth = compressBlkDim.d;
685
686 pOut->metaBlkWidth = metaBlkDim.w;
687 pOut->metaBlkHeight = metaBlkDim.h;
688 pOut->metaBlkDepth = metaBlkDim.d;
689 pOut->metaBlkSize = numCompressBlkPerMetaBlk * numFrags;
690
691 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
692 pOut->fastClearSizePerSlice =
693 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
694
695 // Get the DCC address equation (copied from DccAddrFromCoord)
696 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
697 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
698 UINT_32 metaBlkWidthLog2 = Log2(pOut->metaBlkWidth);
699 UINT_32 metaBlkHeightLog2 = Log2(pOut->metaBlkHeight);
700 UINT_32 metaBlkDepthLog2 = Log2(pOut->metaBlkDepth);
701 UINT_32 compBlkWidthLog2 = Log2(pOut->compressBlkWidth);
702 UINT_32 compBlkHeightLog2 = Log2(pOut->compressBlkHeight);
703 UINT_32 compBlkDepthLog2 = Log2(pOut->compressBlkDepth);
704
705 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
706 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
707 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
708 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
709
710 CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
711
712 // Generate the DCC address equation.
713 pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
714 bool checked = false;
715 for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
716 CoordTerm &bit = (*eq)[b];
717
718 unsigned c;
719 for (c = 0; c < bit.getsize(); c++) {
720 Coordinate &coord = bit[c];
721 pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
722 pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
723 }
724 for (; c < 5; c++)
725 pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
726 }
727
728 // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
729 for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
730 CoordTerm &prev = (*eq)[b - 1];
731 CoordTerm &cur = (*eq)[b];
732
733 if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
734 prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
735 prev[0].getord() + 1 == cur[0].getord())
736 pOut->equation.gfx9.num_bits = b;
737 else
738 break;
739 }
740
741 pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
742 pIn->swizzleMode);
743 }
744
745 return ADDR_OK;
746 }
747
748 /**
749 ************************************************************************************************************************
750 * Gfx9Lib::HwlComputeMaxBaseAlignments
751 *
752 * @brief
753 * Gets maximum alignments
754 * @return
755 * maximum alignments
756 ************************************************************************************************************************
757 */
HwlComputeMaxBaseAlignments() const758 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
759 {
760 return Size64K;
761 }
762
763 /**
764 ************************************************************************************************************************
765 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
766 *
767 * @brief
768 * Gets maximum alignments for metadata
769 * @return
770 * maximum alignments for metadata
771 ************************************************************************************************************************
772 */
HwlComputeMaxMetaBaseAlignments() const773 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
774 {
775 // Max base alignment for Htile
776 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
777 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
778
779 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
780 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
781 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
782 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
783
784 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
785
786 if (maxNumPipeTotal > 2)
787 {
788 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
789 }
790
791 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
792
793 if (m_settings.metaBaseAlignFix)
794 {
795 maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
796 }
797
798 if (m_settings.htileAlignFix)
799 {
800 maxBaseAlignHtile *= maxNumPipeTotal;
801 }
802
803 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
804
805 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
806 UINT_32 maxBaseAlignDcc3D = 65536;
807
808 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
809 {
810 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
811 }
812
813 // Max base alignment for Msaa Dcc
814 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
815
816 if (m_settings.metaBaseAlignFix)
817 {
818 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
819 }
820
821 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
822 }
823
824 /**
825 ************************************************************************************************************************
826 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
827 *
828 * @brief
829 * Interface function stub of AddrComputeCmaskAddrFromCoord
830 *
831 * @return
832 * ADDR_E_RETURNCODE
833 ************************************************************************************************************************
834 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)835 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
836 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
837 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
838 {
839 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
840 input.size = sizeof(input);
841 input.cMaskFlags = pIn->cMaskFlags;
842 input.colorFlags = pIn->colorFlags;
843 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
844 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
845 input.numSlices = Max(pIn->numSlices, 1u);
846 input.swizzleMode = pIn->swizzleMode;
847 input.resourceType = pIn->resourceType;
848
849 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
850 output.size = sizeof(output);
851
852 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
853
854 if (returnCode == ADDR_OK)
855 {
856 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
857 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
858 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
859 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
860
861 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
862 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
863 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
864
865 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
866
867 UINT_32 xb = pIn->x / output.metaBlkWidth;
868 UINT_32 yb = pIn->y / output.metaBlkHeight;
869 UINT_32 zb = pIn->slice;
870
871 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
872 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
873 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
874
875 UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
876 UINT_64 address = pMetaEq->solve(coords);
877
878 pOut->addr = address >> 1;
879 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
880
881
882 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
883 pIn->swizzleMode);
884
885 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
886
887 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
888 }
889
890 return returnCode;
891 }
892
893 /**
894 ************************************************************************************************************************
895 * Gfx9Lib::HwlComputeHtileAddrFromCoord
896 *
897 * @brief
898 * Interface function stub of AddrComputeHtileAddrFromCoord
899 *
900 * @return
901 * ADDR_E_RETURNCODE
902 ************************************************************************************************************************
903 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)904 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
905 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
906 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
907 {
908 ADDR_E_RETURNCODE returnCode = ADDR_OK;
909
910 if (pIn->numMipLevels > 1)
911 {
912 returnCode = ADDR_NOTIMPLEMENTED;
913 }
914 else
915 {
916 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
917 input.size = sizeof(input);
918 input.hTileFlags = pIn->hTileFlags;
919 input.depthFlags = pIn->depthflags;
920 input.swizzleMode = pIn->swizzleMode;
921 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
922 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
923 input.numSlices = Max(pIn->numSlices, 1u);
924 input.numMipLevels = Max(pIn->numMipLevels, 1u);
925
926 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
927 output.size = sizeof(output);
928
929 returnCode = ComputeHtileInfo(&input, &output);
930
931 if (returnCode == ADDR_OK)
932 {
933 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
934 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
935 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
936 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
937
938 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
939 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
940 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
941
942 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
943
944 UINT_32 xb = pIn->x / output.metaBlkWidth;
945 UINT_32 yb = pIn->y / output.metaBlkHeight;
946 UINT_32 zb = pIn->slice;
947
948 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
949 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
950 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
951
952 UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
953 UINT_64 address = pMetaEq->solve(coords);
954
955 pOut->addr = address >> 1;
956
957 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
958 pIn->swizzleMode);
959
960 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
961
962 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
963 }
964 }
965
966 return returnCode;
967 }
968
969 /**
970 ************************************************************************************************************************
971 * Gfx9Lib::HwlComputeHtileCoordFromAddr
972 *
973 * @brief
974 * Interface function stub of AddrComputeHtileCoordFromAddr
975 *
976 * @return
977 * ADDR_E_RETURNCODE
978 ************************************************************************************************************************
979 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)980 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
981 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
982 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
983 {
984 ADDR_E_RETURNCODE returnCode = ADDR_OK;
985
986 if (pIn->numMipLevels > 1)
987 {
988 returnCode = ADDR_NOTIMPLEMENTED;
989 }
990 else
991 {
992 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
993 input.size = sizeof(input);
994 input.hTileFlags = pIn->hTileFlags;
995 input.swizzleMode = pIn->swizzleMode;
996 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
997 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
998 input.numSlices = Max(pIn->numSlices, 1u);
999 input.numMipLevels = Max(pIn->numMipLevels, 1u);
1000
1001 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
1002 output.size = sizeof(output);
1003
1004 returnCode = ComputeHtileInfo(&input, &output);
1005
1006 if (returnCode == ADDR_OK)
1007 {
1008 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1009 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
1010 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1011 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
1012
1013 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
1014 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
1015 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
1016
1017 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1018
1019 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
1020 pIn->swizzleMode);
1021
1022 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1023
1024 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
1025
1026 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
1027 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1028
1029 UINT_32 coords[NUM_DIMS];
1030 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
1031
1032 pOut->slice = coords[DIM_M] / sliceSizeInBlock;
1033 pOut->y = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
1034 pOut->x = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
1035 }
1036 }
1037
1038 return returnCode;
1039 }
1040
1041 /**
1042 ************************************************************************************************************************
1043 * Gfx9Lib::HwlSupportComputeDccAddrFromCoord
1044 *
1045 * @brief
1046 * Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
1047 *
1048 * @return
1049 * ADDR_E_RETURNCODE
1050 ************************************************************************************************************************
1051 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)1052 ADDR_E_RETURNCODE Gfx9Lib::HwlSupportComputeDccAddrFromCoord(
1053 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
1054 {
1055 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1056
1057 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
1058 {
1059 returnCode = ADDR_NOTSUPPORTED;
1060 }
1061 else if ((pIn->pitch == 0) ||
1062 (pIn->height == 0) ||
1063 (pIn->compressBlkWidth == 0) ||
1064 (pIn->compressBlkHeight == 0) ||
1065 (pIn->compressBlkDepth == 0) ||
1066 (pIn->metaBlkWidth == 0) ||
1067 (pIn->metaBlkHeight == 0) ||
1068 (pIn->metaBlkDepth == 0) ||
1069 (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
1070 {
1071 returnCode = ADDR_NOTSUPPORTED;
1072 }
1073
1074 return returnCode;
1075 }
1076
1077 /**
1078 ************************************************************************************************************************
1079 * Gfx9Lib::HwlComputeDccAddrFromCoord
1080 *
1081 * @brief
1082 * Interface function stub of AddrComputeDccAddrFromCoord
1083 *
1084 * @return
1085 * N/A
1086 ************************************************************************************************************************
1087 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)1088 VOID Gfx9Lib::HwlComputeDccAddrFromCoord(
1089 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
1090 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
1091 {
1092 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1093 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
1094 UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth);
1095 UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
1096 UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth);
1097 UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth);
1098 UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
1099 UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth);
1100
1101 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1102 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1103 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1104 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1105
1106 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1107
1108 UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1109 UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1110 UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1111
1112 UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth;
1113 UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1114 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1115
1116 UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex};
1117 UINT_64 address = pMetaEq->solve(coords);
1118
1119 pOut->addr = address >> 1;
1120
1121 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1122 pIn->swizzleMode);
1123
1124 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1125
1126 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1127 }
1128
1129 /**
1130 ************************************************************************************************************************
1131 * Gfx9Lib::HwlInitGlobalParams
1132 *
1133 * @brief
1134 * Initializes global parameters
1135 *
1136 * @return
1137 * TRUE if all settings are valid
1138 *
1139 ************************************************************************************************************************
1140 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1141 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1142 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1143 {
1144 BOOL_32 valid = TRUE;
1145
1146 if (m_settings.isArcticIsland)
1147 {
1148 GB_ADDR_CONFIG_GFX9 gbAddrConfig;
1149
1150 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1151
1152 // These values are copied from CModel code
1153 switch (gbAddrConfig.bits.NUM_PIPES)
1154 {
1155 case ADDR_CONFIG_1_PIPE:
1156 m_pipes = 1;
1157 m_pipesLog2 = 0;
1158 break;
1159 case ADDR_CONFIG_2_PIPE:
1160 m_pipes = 2;
1161 m_pipesLog2 = 1;
1162 break;
1163 case ADDR_CONFIG_4_PIPE:
1164 m_pipes = 4;
1165 m_pipesLog2 = 2;
1166 break;
1167 case ADDR_CONFIG_8_PIPE:
1168 m_pipes = 8;
1169 m_pipesLog2 = 3;
1170 break;
1171 case ADDR_CONFIG_16_PIPE:
1172 m_pipes = 16;
1173 m_pipesLog2 = 4;
1174 break;
1175 case ADDR_CONFIG_32_PIPE:
1176 m_pipes = 32;
1177 m_pipesLog2 = 5;
1178 break;
1179 default:
1180 ADDR_ASSERT_ALWAYS();
1181 break;
1182 }
1183
1184 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1185 {
1186 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1187 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1188 m_pipeInterleaveLog2 = 8;
1189 break;
1190 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1191 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1192 m_pipeInterleaveLog2 = 9;
1193 break;
1194 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1195 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1196 m_pipeInterleaveLog2 = 10;
1197 break;
1198 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1199 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1200 m_pipeInterleaveLog2 = 11;
1201 break;
1202 default:
1203 ADDR_ASSERT_ALWAYS();
1204 break;
1205 }
1206
1207 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1208 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1209 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1210
1211 switch (gbAddrConfig.bits.NUM_BANKS)
1212 {
1213 case ADDR_CONFIG_1_BANK:
1214 m_banks = 1;
1215 m_banksLog2 = 0;
1216 break;
1217 case ADDR_CONFIG_2_BANK:
1218 m_banks = 2;
1219 m_banksLog2 = 1;
1220 break;
1221 case ADDR_CONFIG_4_BANK:
1222 m_banks = 4;
1223 m_banksLog2 = 2;
1224 break;
1225 case ADDR_CONFIG_8_BANK:
1226 m_banks = 8;
1227 m_banksLog2 = 3;
1228 break;
1229 case ADDR_CONFIG_16_BANK:
1230 m_banks = 16;
1231 m_banksLog2 = 4;
1232 break;
1233 default:
1234 ADDR_ASSERT_ALWAYS();
1235 break;
1236 }
1237
1238 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1239 {
1240 case ADDR_CONFIG_1_SHADER_ENGINE:
1241 m_se = 1;
1242 m_seLog2 = 0;
1243 break;
1244 case ADDR_CONFIG_2_SHADER_ENGINE:
1245 m_se = 2;
1246 m_seLog2 = 1;
1247 break;
1248 case ADDR_CONFIG_4_SHADER_ENGINE:
1249 m_se = 4;
1250 m_seLog2 = 2;
1251 break;
1252 case ADDR_CONFIG_8_SHADER_ENGINE:
1253 m_se = 8;
1254 m_seLog2 = 3;
1255 break;
1256 default:
1257 ADDR_ASSERT_ALWAYS();
1258 break;
1259 }
1260
1261 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1262 {
1263 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1264 m_rbPerSe = 1;
1265 m_rbPerSeLog2 = 0;
1266 break;
1267 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1268 m_rbPerSe = 2;
1269 m_rbPerSeLog2 = 1;
1270 break;
1271 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1272 m_rbPerSe = 4;
1273 m_rbPerSeLog2 = 2;
1274 break;
1275 default:
1276 ADDR_ASSERT_ALWAYS();
1277 break;
1278 }
1279
1280 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1281 {
1282 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1283 m_maxCompFrag = 1;
1284 m_maxCompFragLog2 = 0;
1285 break;
1286 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1287 m_maxCompFrag = 2;
1288 m_maxCompFragLog2 = 1;
1289 break;
1290 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1291 m_maxCompFrag = 4;
1292 m_maxCompFragLog2 = 2;
1293 break;
1294 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1295 m_maxCompFrag = 8;
1296 m_maxCompFragLog2 = 3;
1297 break;
1298 default:
1299 ADDR_ASSERT_ALWAYS();
1300 break;
1301 }
1302
1303 if ((m_rbPerSeLog2 == 1) &&
1304 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1305 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1306 {
1307 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1308
1309 ADDR_ASSERT(m_settings.isRaven == FALSE);
1310
1311 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1312
1313 if (m_settings.isVega12)
1314 {
1315 m_settings.htileCacheRbConflict = 1;
1316 }
1317 }
1318
1319 // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1320 m_blockVarSizeLog2 = 0;
1321 }
1322 else
1323 {
1324 valid = FALSE;
1325 ADDR_NOT_IMPLEMENTED();
1326 }
1327
1328 if (valid)
1329 {
1330 InitEquationTable();
1331 }
1332
1333 return valid;
1334 }
1335
1336 /**
1337 ************************************************************************************************************************
1338 * Gfx9Lib::HwlConvertChipFamily
1339 *
1340 * @brief
1341 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1342 * @return
1343 * ChipFamily
1344 ************************************************************************************************************************
1345 */
HwlConvertChipFamily(UINT_32 uChipFamily,UINT_32 uChipRevision)1346 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1347 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1348 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1349 {
1350 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1351
1352 switch (uChipFamily)
1353 {
1354 case FAMILY_AI:
1355 m_settings.isArcticIsland = 1;
1356 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1357 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1358 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1359 m_settings.isDce12 = 1;
1360
1361 if (m_settings.isVega10 == 0)
1362 {
1363 m_settings.htileAlignFix = 1;
1364 m_settings.applyAliasFix = 1;
1365 }
1366
1367 m_settings.metaBaseAlignFix = 1;
1368
1369 m_settings.depthPipeXorDisable = 1;
1370 break;
1371 case FAMILY_RV:
1372 m_settings.isArcticIsland = 1;
1373
1374 if (ASICREV_IS_RAVEN(uChipRevision))
1375 {
1376 m_settings.isRaven = 1;
1377
1378 m_settings.depthPipeXorDisable = 1;
1379 }
1380
1381 if (ASICREV_IS_RAVEN2(uChipRevision))
1382 {
1383 m_settings.isRaven = 1;
1384 }
1385
1386 if (m_settings.isRaven == 0)
1387 {
1388 m_settings.htileAlignFix = 1;
1389 m_settings.applyAliasFix = 1;
1390 }
1391
1392 m_settings.isDcn1 = m_settings.isRaven;
1393
1394 if (ASICREV_IS_RENOIR(uChipRevision))
1395 {
1396 m_settings.isRaven = 1;
1397 m_settings.isDcn2 = 1;
1398 }
1399
1400 m_settings.metaBaseAlignFix = 1;
1401 break;
1402
1403 default:
1404 ADDR_ASSERT(!"No Chip found");
1405 break;
1406 }
1407
1408 return family;
1409 }
1410
1411 /**
1412 ************************************************************************************************************************
1413 * Gfx9Lib::InitRbEquation
1414 *
1415 * @brief
1416 * Init RB equation
1417 * @return
1418 * N/A
1419 ************************************************************************************************************************
1420 */
GetRbEquation(CoordEq * pRbEq,UINT_32 numRbPerSeLog2,UINT_32 numSeLog2) const1421 VOID Gfx9Lib::GetRbEquation(
1422 CoordEq* pRbEq, ///< [out] rb equation
1423 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1424 UINT_32 numSeLog2) ///< [in] number of shader engine
1425 const
1426 {
1427 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1428 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1429 Coordinate cx(DIM_X, rbRegion);
1430 Coordinate cy(DIM_Y, rbRegion);
1431
1432 UINT_32 start = 0;
1433 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1434
1435 // Clear the rb equation
1436 pRbEq->resize(0);
1437 pRbEq->resize(numRbTotalLog2);
1438
1439 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1440 {
1441 // Special case when more than 1 SE, and 2 RB per SE
1442 (*pRbEq)[0].add(cx);
1443 (*pRbEq)[0].add(cy);
1444 cx++;
1445 cy++;
1446
1447 if (m_settings.applyAliasFix == false)
1448 {
1449 (*pRbEq)[0].add(cy);
1450 }
1451
1452 (*pRbEq)[0].add(cy);
1453 start++;
1454 }
1455
1456 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1457
1458 for (UINT_32 i = 0; i < numBits; i++)
1459 {
1460 UINT_32 idx =
1461 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1462
1463 if ((i % 2) == 1)
1464 {
1465 (*pRbEq)[idx].add(cx);
1466 cx++;
1467 }
1468 else
1469 {
1470 (*pRbEq)[idx].add(cy);
1471 cy++;
1472 }
1473 }
1474 }
1475
1476 /**
1477 ************************************************************************************************************************
1478 * Gfx9Lib::GetDataEquation
1479 *
1480 * @brief
1481 * Get data equation for fmask and Z
1482 * @return
1483 * N/A
1484 ************************************************************************************************************************
1485 */
GetDataEquation(CoordEq * pDataEq,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2) const1486 VOID Gfx9Lib::GetDataEquation(
1487 CoordEq* pDataEq, ///< [out] data surface equation
1488 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1489 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1490 AddrResourceType resourceType, ///< [in] data surface resource type
1491 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1492 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1493 const
1494 {
1495 Coordinate cx(DIM_X, 0);
1496 Coordinate cy(DIM_Y, 0);
1497 Coordinate cz(DIM_Z, 0);
1498 Coordinate cs(DIM_S, 0);
1499
1500 // Clear the equation
1501 pDataEq->resize(0);
1502 pDataEq->resize(27);
1503
1504 if (dataSurfaceType == Gfx9DataColor)
1505 {
1506 if (IsLinear(swizzleMode))
1507 {
1508 Coordinate cm(DIM_M, 0);
1509
1510 pDataEq->resize(49);
1511
1512 for (UINT_32 i = 0; i < 49; i++)
1513 {
1514 (*pDataEq)[i].add(cm);
1515 cm++;
1516 }
1517 }
1518 else if (IsThick(resourceType, swizzleMode))
1519 {
1520 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1521 UINT_32 i;
1522 if (IsStandardSwizzle(resourceType, swizzleMode))
1523 {
1524 // Standard 3d swizzle
1525 // Fill in bottom x bits
1526 for (i = elementBytesLog2; i < 4; i++)
1527 {
1528 (*pDataEq)[i].add(cx);
1529 cx++;
1530 }
1531 // Fill in 2 bits of y and then z
1532 for (i = 4; i < 6; i++)
1533 {
1534 (*pDataEq)[i].add(cy);
1535 cy++;
1536 }
1537 for (i = 6; i < 8; i++)
1538 {
1539 (*pDataEq)[i].add(cz);
1540 cz++;
1541 }
1542 if (elementBytesLog2 < 2)
1543 {
1544 // fill in z & y bit
1545 (*pDataEq)[8].add(cz);
1546 (*pDataEq)[9].add(cy);
1547 cz++;
1548 cy++;
1549 }
1550 else if (elementBytesLog2 == 2)
1551 {
1552 // fill in y and x bit
1553 (*pDataEq)[8].add(cy);
1554 (*pDataEq)[9].add(cx);
1555 cy++;
1556 cx++;
1557 }
1558 else
1559 {
1560 // fill in 2 x bits
1561 (*pDataEq)[8].add(cx);
1562 cx++;
1563 (*pDataEq)[9].add(cx);
1564 cx++;
1565 }
1566 }
1567 else
1568 {
1569 // Z 3d swizzle
1570 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1571 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1572 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1573 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1574 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1575 {
1576 (*pDataEq)[i].add(cz);
1577 cz++;
1578 }
1579 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1580 {
1581 // add an x and z
1582 (*pDataEq)[6].add(cx);
1583 (*pDataEq)[7].add(cz);
1584 cx++;
1585 cz++;
1586 }
1587 else if (elementBytesLog2 == 2)
1588 {
1589 // add a y and z
1590 (*pDataEq)[6].add(cy);
1591 (*pDataEq)[7].add(cz);
1592 cy++;
1593 cz++;
1594 }
1595 // add y and x
1596 (*pDataEq)[8].add(cy);
1597 (*pDataEq)[9].add(cx);
1598 cy++;
1599 cx++;
1600 }
1601 // Fill in bit 10 and up
1602 pDataEq->mort3d( cz, cy, cx, 10 );
1603 }
1604 else if (IsThin(resourceType, swizzleMode))
1605 {
1606 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1607 // Color 2D
1608 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1609 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1610 UINT_32 i;
1611 // Fill in bottom x bits
1612 for (i = elementBytesLog2; i < 4; i++)
1613 {
1614 (*pDataEq)[i].add(cx);
1615 cx++;
1616 }
1617 // Fill in bottom y bits
1618 for (i = 4; i < 4 + microYBits; i++)
1619 {
1620 (*pDataEq)[i].add(cy);
1621 cy++;
1622 }
1623 // Fill in last of the micro_x bits
1624 for (i = 4 + microYBits; i < 8; i++)
1625 {
1626 (*pDataEq)[i].add(cx);
1627 cx++;
1628 }
1629 // Fill in x/y bits below sample split
1630 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1631 // Fill in sample bits
1632 for (i = 0; i < numSamplesLog2; i++)
1633 {
1634 cs.set(DIM_S, i);
1635 (*pDataEq)[tileSplitStart + i].add(cs);
1636 }
1637 // Fill in x/y bits above sample split
1638 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1639 {
1640 pDataEq->mort2d(cx, cy, blockSizeLog2);
1641 }
1642 else
1643 {
1644 pDataEq->mort2d(cy, cx, blockSizeLog2);
1645 }
1646 }
1647 else
1648 {
1649 ADDR_ASSERT_ALWAYS();
1650 }
1651 }
1652 else
1653 {
1654 // Fmask or depth
1655 UINT_32 sampleStart = elementBytesLog2;
1656 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1657 UINT_32 ymajStart = 6 + numSamplesLog2;
1658
1659 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1660 {
1661 cs.set(DIM_S, s);
1662 (*pDataEq)[sampleStart + s].add(cs);
1663 }
1664
1665 // Put in the x-major order pixel bits
1666 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1667 // Put in the y-major order pixel bits
1668 pDataEq->mort2d(cy, cx, ymajStart);
1669 }
1670 }
1671
1672 /**
1673 ************************************************************************************************************************
1674 * Gfx9Lib::GetPipeEquation
1675 *
1676 * @brief
1677 * Get pipe equation
1678 * @return
1679 * N/A
1680 ************************************************************************************************************************
1681 */
GetPipeEquation(CoordEq * pPipeEq,CoordEq * pDataEq,UINT_32 pipeInterleaveLog2,UINT_32 numPipeLog2,UINT_32 numSamplesLog2,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType) const1682 VOID Gfx9Lib::GetPipeEquation(
1683 CoordEq* pPipeEq, ///< [out] pipe equation
1684 CoordEq* pDataEq, ///< [in] data equation
1685 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1686 UINT_32 numPipeLog2, ///< [in] number of pipes
1687 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1688 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1689 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1690 AddrResourceType resourceType ///< [in] data surface resource type
1691 ) const
1692 {
1693 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1694 CoordEq dataEq;
1695
1696 pDataEq->copy(dataEq);
1697
1698 if (dataSurfaceType == Gfx9DataColor)
1699 {
1700 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1701 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1702 }
1703
1704 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1705
1706 // This section should only apply to z/stencil, maybe fmask
1707 // If the pipe bit is below the comp block size,
1708 // then keep moving up the address until we find a bit that is above
1709 UINT_32 pipeStart = 0;
1710
1711 if (dataSurfaceType != Gfx9DataColor)
1712 {
1713 Coordinate tileMin(DIM_X, 3);
1714
1715 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1716 {
1717 pipeStart++;
1718 }
1719
1720 // if pipe is 0, then the first pipe bit is above the comp block size,
1721 // so we don't need to do anything
1722 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1723 // we will get the same pipe equation
1724 if (pipeStart != 0)
1725 {
1726 for (UINT_32 i = 0; i < numPipeLog2; i++)
1727 {
1728 // Copy the jth bit above pipe interleave to the current pipe equation bit
1729 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1730 }
1731 }
1732 }
1733
1734 if (IsPrt(swizzleMode))
1735 {
1736 // Clear out bits above the block size if prt's are enabled
1737 dataEq.resize(blockSizeLog2);
1738 dataEq.resize(48);
1739 }
1740
1741 if (IsXor(swizzleMode))
1742 {
1743 CoordEq xorMask;
1744
1745 if (IsThick(resourceType, swizzleMode))
1746 {
1747 CoordEq xorMask2;
1748
1749 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1750
1751 xorMask.resize(numPipeLog2);
1752
1753 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1754 {
1755 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1756 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1757 }
1758 }
1759 else
1760 {
1761 // Xor in the bits above the pipe+gpu bits
1762 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1763
1764 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1765 {
1766 Coordinate co;
1767 CoordEq xorMask2;
1768 // if 1xaa and not prt, then xor in the z bits
1769 xorMask2.resize(0);
1770 xorMask2.resize(numPipeLog2);
1771 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1772 {
1773 co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1774 xorMask2[pipeIdx].add(co);
1775 }
1776
1777 pPipeEq->xorin(xorMask2);
1778 }
1779 }
1780
1781 xorMask.reverse();
1782 pPipeEq->xorin(xorMask);
1783 }
1784 }
1785 /**
1786 ************************************************************************************************************************
1787 * Gfx9Lib::GetMetaEquation
1788 *
1789 * @brief
1790 * Get meta equation for cmask/htile/DCC
1791 * @return
1792 * Pointer to a calculated meta equation
1793 ************************************************************************************************************************
1794 */
GetMetaEquation(const MetaEqParams & metaEqParams)1795 const CoordEq* Gfx9Lib::GetMetaEquation(
1796 const MetaEqParams& metaEqParams)
1797 {
1798 UINT_32 cachedMetaEqIndex;
1799
1800 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1801 {
1802 if (memcmp(&metaEqParams,
1803 &m_cachedMetaEqKey[cachedMetaEqIndex],
1804 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1805 {
1806 break;
1807 }
1808 }
1809
1810 CoordEq* pMetaEq = NULL;
1811
1812 if (cachedMetaEqIndex < MaxCachedMetaEq)
1813 {
1814 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1815 }
1816 else
1817 {
1818 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1819
1820 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1821
1822 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1823
1824 GenMetaEquation(pMetaEq,
1825 metaEqParams.maxMip,
1826 metaEqParams.elementBytesLog2,
1827 metaEqParams.numSamplesLog2,
1828 metaEqParams.metaFlag,
1829 metaEqParams.dataSurfaceType,
1830 metaEqParams.swizzleMode,
1831 metaEqParams.resourceType,
1832 metaEqParams.metaBlkWidthLog2,
1833 metaEqParams.metaBlkHeightLog2,
1834 metaEqParams.metaBlkDepthLog2,
1835 metaEqParams.compBlkWidthLog2,
1836 metaEqParams.compBlkHeightLog2,
1837 metaEqParams.compBlkDepthLog2);
1838 }
1839
1840 return pMetaEq;
1841 }
1842
1843 /**
1844 ************************************************************************************************************************
1845 * Gfx9Lib::GenMetaEquation
1846 *
1847 * @brief
1848 * Get meta equation for cmask/htile/DCC
1849 * @return
1850 * N/A
1851 ************************************************************************************************************************
1852 */
GenMetaEquation(CoordEq * pMetaEq,UINT_32 maxMip,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2,ADDR2_META_FLAGS metaFlag,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 metaBlkWidthLog2,UINT_32 metaBlkHeightLog2,UINT_32 metaBlkDepthLog2,UINT_32 compBlkWidthLog2,UINT_32 compBlkHeightLog2,UINT_32 compBlkDepthLog2) const1853 VOID Gfx9Lib::GenMetaEquation(
1854 CoordEq* pMetaEq, ///< [out] meta equation
1855 UINT_32 maxMip, ///< [in] max mip Id
1856 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1857 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1858 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1859 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1860 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1861 AddrResourceType resourceType, ///< [in] data surface resource type
1862 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1863 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1864 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1865 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1866 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1867 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1868 const
1869 {
1870 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1871 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1872
1873 // Get the correct data address and rb equation
1874 CoordEq dataEq;
1875 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1876 elementBytesLog2, numSamplesLog2);
1877
1878 // Get pipe and rb equations
1879 CoordEq pipeEquation;
1880 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1881 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1882 numPipeTotalLog2 = pipeEquation.getsize();
1883
1884 if (metaFlag.linear)
1885 {
1886 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1887 ADDR_ASSERT_ALWAYS();
1888
1889 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1890
1891 dataEq.copy(*pMetaEq);
1892
1893 if (IsLinear(swizzleMode))
1894 {
1895 if (metaFlag.pipeAligned)
1896 {
1897 // Remove the pipe bits
1898 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1899 pMetaEq->shift(-shift, pipeInterleaveLog2);
1900 }
1901 // Divide by comp block size, which for linear (which is always color) is 256 B
1902 pMetaEq->shift(-8);
1903
1904 if (metaFlag.pipeAligned)
1905 {
1906 // Put pipe bits back in
1907 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1908
1909 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1910 {
1911 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1912 }
1913 }
1914 }
1915
1916 pMetaEq->shift(1);
1917 }
1918 else
1919 {
1920 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1921 UINT_32 compFragLog2 =
1922 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1923 maxCompFragLog2 : numSamplesLog2;
1924
1925 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1926
1927 // Make sure the metaaddr is cleared
1928 pMetaEq->resize(0);
1929 pMetaEq->resize(27);
1930
1931 if (IsThick(resourceType, swizzleMode))
1932 {
1933 Coordinate cx(DIM_X, 0);
1934 Coordinate cy(DIM_Y, 0);
1935 Coordinate cz(DIM_Z, 0);
1936
1937 if (maxMip > 0)
1938 {
1939 pMetaEq->mort3d(cy, cx, cz);
1940 }
1941 else
1942 {
1943 pMetaEq->mort3d(cx, cy, cz);
1944 }
1945 }
1946 else
1947 {
1948 Coordinate cx(DIM_X, 0);
1949 Coordinate cy(DIM_Y, 0);
1950 Coordinate cs;
1951
1952 if (maxMip > 0)
1953 {
1954 pMetaEq->mort2d(cy, cx, compFragLog2);
1955 }
1956 else
1957 {
1958 pMetaEq->mort2d(cx, cy, compFragLog2);
1959 }
1960
1961 //------------------------------------------------------------------------------------------------------------------------
1962 // Put the compressible fragments at the lsb
1963 // the uncompressible frags will be at the msb of the micro address
1964 //------------------------------------------------------------------------------------------------------------------------
1965 for (UINT_32 s = 0; s < compFragLog2; s++)
1966 {
1967 cs.set(DIM_S, s);
1968 (*pMetaEq)[s].add(cs);
1969 }
1970 }
1971
1972 // Keep a copy of the pipe equations
1973 CoordEq origPipeEquation;
1974 pipeEquation.copy(origPipeEquation);
1975
1976 Coordinate co;
1977 // filter out everything under the compressed block size
1978 co.set(DIM_X, compBlkWidthLog2);
1979 pMetaEq->Filter('<', co, 0, DIM_X);
1980 co.set(DIM_Y, compBlkHeightLog2);
1981 pMetaEq->Filter('<', co, 0, DIM_Y);
1982 co.set(DIM_Z, compBlkDepthLog2);
1983 pMetaEq->Filter('<', co, 0, DIM_Z);
1984
1985 // For non-color, filter out sample bits
1986 if (dataSurfaceType != Gfx9DataColor)
1987 {
1988 co.set(DIM_X, 0);
1989 pMetaEq->Filter('<', co, 0, DIM_S);
1990 }
1991
1992 // filter out everything above the metablock size
1993 co.set(DIM_X, metaBlkWidthLog2 - 1);
1994 pMetaEq->Filter('>', co, 0, DIM_X);
1995 co.set(DIM_Y, metaBlkHeightLog2 - 1);
1996 pMetaEq->Filter('>', co, 0, DIM_Y);
1997 co.set(DIM_Z, metaBlkDepthLog2 - 1);
1998 pMetaEq->Filter('>', co, 0, DIM_Z);
1999
2000 // filter out everything above the metablock size for the channel bits
2001 co.set(DIM_X, metaBlkWidthLog2 - 1);
2002 pipeEquation.Filter('>', co, 0, DIM_X);
2003 co.set(DIM_Y, metaBlkHeightLog2 - 1);
2004 pipeEquation.Filter('>', co, 0, DIM_Y);
2005 co.set(DIM_Z, metaBlkDepthLog2 - 1);
2006 pipeEquation.Filter('>', co, 0, DIM_Z);
2007
2008 // Make sure we still have the same number of channel bits
2009 if (pipeEquation.getsize() != numPipeTotalLog2)
2010 {
2011 ADDR_ASSERT_ALWAYS();
2012 }
2013
2014 // Loop through all channel and rb bits,
2015 // and make sure these components exist in the metadata address
2016 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2017 {
2018 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
2019 {
2020 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
2021 {
2022 ADDR_ASSERT_ALWAYS();
2023 }
2024 }
2025 }
2026
2027 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
2028 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
2029 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
2030 CoordEq origRbEquation;
2031
2032 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
2033
2034 CoordEq rbEquation = origRbEquation;
2035
2036 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2037 {
2038 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
2039 {
2040 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
2041 {
2042 ADDR_ASSERT_ALWAYS();
2043 }
2044 }
2045 }
2046
2047 if (m_settings.applyAliasFix)
2048 {
2049 co.set(DIM_Z, -1);
2050 }
2051
2052 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
2053 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2054 {
2055 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
2056 {
2057 BOOL_32 isRbEquationInPipeEquation = FALSE;
2058
2059 if (m_settings.applyAliasFix)
2060 {
2061 CoordTerm filteredPipeEq;
2062 filteredPipeEq = pipeEquation[j];
2063
2064 filteredPipeEq.Filter('>', co, 0, DIM_Z);
2065
2066 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
2067 }
2068 else
2069 {
2070 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
2071 }
2072
2073 if (isRbEquationInPipeEquation)
2074 {
2075 rbEquation[i].Clear();
2076 }
2077 }
2078 }
2079
2080 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
2081
2082 // Loop through each bit of the channel, get the smallest coordinate,
2083 // and remove it from the metaaddr, and rb_equation
2084 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2085 {
2086 pipeEquation[i].getsmallest(co);
2087
2088 UINT_32 old_size = pMetaEq->getsize();
2089 pMetaEq->Filter('=', co);
2090 UINT_32 new_size = pMetaEq->getsize();
2091 if (new_size != old_size-1)
2092 {
2093 ADDR_ASSERT_ALWAYS();
2094 }
2095 pipeEquation.remove(co);
2096 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2097 {
2098 if (rbEquation[j].remove(co))
2099 {
2100 // if we actually removed something from this bit, then add the remaining
2101 // channel bits, as these can be removed for this bit
2102 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2103 {
2104 if (pipeEquation[i][k] != co)
2105 {
2106 rbEquation[j].add(pipeEquation[i][k]);
2107 rbAppendedWithPipeBits[j] = true;
2108 }
2109 }
2110 }
2111 }
2112 }
2113
2114 // Loop through the rb bits and see what remain;
2115 // filter out the smallest coordinate if it remains
2116 UINT_32 rbBitsLeft = 0;
2117 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2118 {
2119 BOOL_32 isRbEqAppended = FALSE;
2120
2121 if (m_settings.applyAliasFix)
2122 {
2123 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2124 }
2125 else
2126 {
2127 isRbEqAppended = (rbEquation[i].getsize() > 0);
2128 }
2129
2130 if (isRbEqAppended)
2131 {
2132 rbBitsLeft++;
2133 rbEquation[i].getsmallest(co);
2134 UINT_32 old_size = pMetaEq->getsize();
2135 pMetaEq->Filter('=', co);
2136 UINT_32 new_size = pMetaEq->getsize();
2137 if (new_size != old_size - 1)
2138 {
2139 // assert warning
2140 }
2141 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2142 {
2143 if (rbEquation[j].remove(co))
2144 {
2145 // if we actually removed something from this bit, then add the remaining
2146 // rb bits, as these can be removed for this bit
2147 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2148 {
2149 if (rbEquation[i][k] != co)
2150 {
2151 rbEquation[j].add(rbEquation[i][k]);
2152 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2153 }
2154 }
2155 }
2156 }
2157 }
2158 }
2159
2160 // capture the size of the metaaddr
2161 UINT_32 metaSize = pMetaEq->getsize();
2162 // resize to 49 bits...make this a nibble address
2163 pMetaEq->resize(49);
2164 // Concatenate the macro address above the current address
2165 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2166 {
2167 co.set(DIM_M, j);
2168 (*pMetaEq)[i].add(co);
2169 }
2170
2171 // Multiply by meta element size (in nibbles)
2172 if (dataSurfaceType == Gfx9DataColor)
2173 {
2174 pMetaEq->shift(1);
2175 }
2176 else if (dataSurfaceType == Gfx9DataDepthStencil)
2177 {
2178 pMetaEq->shift(3);
2179 }
2180
2181 //------------------------------------------------------------------------------------------
2182 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2183 // Shift up from pipe interleave number of channel
2184 // and rb bits left, and uncompressed fragments
2185 //------------------------------------------------------------------------------------------
2186
2187 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2188
2189 // Put in the channel bits
2190 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2191 {
2192 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2193 }
2194
2195 // Put in remaining rb bits
2196 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2197 {
2198 BOOL_32 isRbEqAppended = FALSE;
2199
2200 if (m_settings.applyAliasFix)
2201 {
2202 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2203 }
2204 else
2205 {
2206 isRbEqAppended = (rbEquation[i].getsize() > 0);
2207 }
2208
2209 if (isRbEqAppended)
2210 {
2211 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2212 // Mark any rb bit we add in to the rb mask
2213 j++;
2214 }
2215 }
2216
2217 //------------------------------------------------------------------------------------------
2218 // Put in the uncompressed fragment bits
2219 //------------------------------------------------------------------------------------------
2220 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2221 {
2222 co.set(DIM_S, compFragLog2 + i);
2223 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2224 }
2225 }
2226 }
2227
2228 /**
2229 ************************************************************************************************************************
2230 * Gfx9Lib::IsEquationSupported
2231 *
2232 * @brief
2233 * Check if equation is supported for given swizzle mode and resource type.
2234 *
2235 * @return
2236 * TRUE if supported
2237 ************************************************************************************************************************
2238 */
IsEquationSupported(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2) const2239 BOOL_32 Gfx9Lib::IsEquationSupported(
2240 AddrResourceType rsrcType,
2241 AddrSwizzleMode swMode,
2242 UINT_32 elementBytesLog2) const
2243 {
2244 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2245 (IsValidSwMode(swMode) == TRUE) &&
2246 (IsLinear(swMode) == FALSE) &&
2247 (((IsTex2d(rsrcType) == TRUE) &&
2248 ((elementBytesLog2 < 4) ||
2249 ((IsRotateSwizzle(swMode) == FALSE) &&
2250 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2251 ((IsTex3d(rsrcType) == TRUE) &&
2252 (IsRotateSwizzle(swMode) == FALSE) &&
2253 (IsBlock256b(swMode) == FALSE)));
2254
2255 return supported;
2256 }
2257
2258 /**
2259 ************************************************************************************************************************
2260 * Gfx9Lib::InitEquationTable
2261 *
2262 * @brief
2263 * Initialize Equation table.
2264 *
2265 * @return
2266 * N/A
2267 ************************************************************************************************************************
2268 */
InitEquationTable()2269 VOID Gfx9Lib::InitEquationTable()
2270 {
2271 memset(m_equationTable, 0, sizeof(m_equationTable));
2272
2273 // Loop all possible resource type (2D/3D)
2274 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2275 {
2276 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2277
2278 // Loop all possible swizzle mode
2279 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2280 {
2281 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2282
2283 // Loop all possible bpp
2284 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2285 {
2286 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2287
2288 // Check if the input is supported
2289 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2290 {
2291 ADDR_EQUATION equation;
2292 ADDR_E_RETURNCODE retCode;
2293
2294 memset(&equation, 0, sizeof(ADDR_EQUATION));
2295
2296 // Generate the equation
2297 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2298 {
2299 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2300 }
2301 else if (IsThin(rsrcType, swMode))
2302 {
2303 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2304 }
2305 else
2306 {
2307 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2308 }
2309
2310 // Only fill the equation into the table if the return code is ADDR_OK,
2311 // otherwise if the return code is not ADDR_OK, it indicates this is not
2312 // a valid input, we do nothing but just fill invalid equation index
2313 // into the lookup table.
2314 if (retCode == ADDR_OK)
2315 {
2316 equationIndex = m_numEquations;
2317 ADDR_ASSERT(equationIndex < EquationTableSize);
2318
2319 m_equationTable[equationIndex] = equation;
2320
2321 m_numEquations++;
2322 }
2323 else
2324 {
2325 ADDR_ASSERT_ALWAYS();
2326 }
2327 }
2328
2329 // Fill the index into the lookup table, if the combination is not supported
2330 // fill the invalid equation index
2331 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2332 }
2333 }
2334 }
2335 }
2336
2337 /**
2338 ************************************************************************************************************************
2339 * Gfx9Lib::HwlGetEquationIndex
2340 *
2341 * @brief
2342 * Interface function stub of GetEquationIndex
2343 *
2344 * @return
2345 * ADDR_E_RETURNCODE
2346 ************************************************************************************************************************
2347 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2348 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2349 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2350 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2351 ) const
2352 {
2353 AddrResourceType rsrcType = pIn->resourceType;
2354 AddrSwizzleMode swMode = pIn->swizzleMode;
2355 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2356 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2357
2358 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2359 {
2360 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2361 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2362
2363 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2364 }
2365
2366 if (pOut->pMipInfo != NULL)
2367 {
2368 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2369 {
2370 pOut->pMipInfo[i].equationIndex = index;
2371 }
2372 }
2373
2374 return index;
2375 }
2376
2377 /**
2378 ************************************************************************************************************************
2379 * Gfx9Lib::HwlComputeBlock256Equation
2380 *
2381 * @brief
2382 * Interface function stub of ComputeBlock256Equation
2383 *
2384 * @return
2385 * ADDR_E_RETURNCODE
2386 ************************************************************************************************************************
2387 */
HwlComputeBlock256Equation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2388 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2389 AddrResourceType rsrcType,
2390 AddrSwizzleMode swMode,
2391 UINT_32 elementBytesLog2,
2392 ADDR_EQUATION* pEquation) const
2393 {
2394 ADDR_E_RETURNCODE ret = ADDR_OK;
2395
2396 pEquation->numBits = 8;
2397 pEquation->numBitComponents = 1;
2398
2399 UINT_32 i = 0;
2400 for (; i < elementBytesLog2; i++)
2401 {
2402 InitChannel(1, 0 , i, &pEquation->addr[i]);
2403 }
2404
2405 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2406
2407 const UINT_32 maxBitsUsed = 4;
2408 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2409 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2410
2411 for (i = 0; i < maxBitsUsed; i++)
2412 {
2413 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2414 InitChannel(1, 1, i, &y[i]);
2415 }
2416
2417 if (IsStandardSwizzle(rsrcType, swMode))
2418 {
2419 switch (elementBytesLog2)
2420 {
2421 case 0:
2422 pixelBit[0] = x[0];
2423 pixelBit[1] = x[1];
2424 pixelBit[2] = x[2];
2425 pixelBit[3] = x[3];
2426 pixelBit[4] = y[0];
2427 pixelBit[5] = y[1];
2428 pixelBit[6] = y[2];
2429 pixelBit[7] = y[3];
2430 break;
2431 case 1:
2432 pixelBit[0] = x[0];
2433 pixelBit[1] = x[1];
2434 pixelBit[2] = x[2];
2435 pixelBit[3] = y[0];
2436 pixelBit[4] = y[1];
2437 pixelBit[5] = y[2];
2438 pixelBit[6] = x[3];
2439 break;
2440 case 2:
2441 pixelBit[0] = x[0];
2442 pixelBit[1] = x[1];
2443 pixelBit[2] = y[0];
2444 pixelBit[3] = y[1];
2445 pixelBit[4] = y[2];
2446 pixelBit[5] = x[2];
2447 break;
2448 case 3:
2449 pixelBit[0] = x[0];
2450 pixelBit[1] = y[0];
2451 pixelBit[2] = y[1];
2452 pixelBit[3] = x[1];
2453 pixelBit[4] = x[2];
2454 break;
2455 case 4:
2456 pixelBit[0] = y[0];
2457 pixelBit[1] = y[1];
2458 pixelBit[2] = x[0];
2459 pixelBit[3] = x[1];
2460 break;
2461 default:
2462 ADDR_ASSERT_ALWAYS();
2463 ret = ADDR_INVALIDPARAMS;
2464 break;
2465 }
2466 }
2467 else if (IsDisplaySwizzle(rsrcType, swMode))
2468 {
2469 switch (elementBytesLog2)
2470 {
2471 case 0:
2472 pixelBit[0] = x[0];
2473 pixelBit[1] = x[1];
2474 pixelBit[2] = x[2];
2475 pixelBit[3] = y[1];
2476 pixelBit[4] = y[0];
2477 pixelBit[5] = y[2];
2478 pixelBit[6] = x[3];
2479 pixelBit[7] = y[3];
2480 break;
2481 case 1:
2482 pixelBit[0] = x[0];
2483 pixelBit[1] = x[1];
2484 pixelBit[2] = x[2];
2485 pixelBit[3] = y[0];
2486 pixelBit[4] = y[1];
2487 pixelBit[5] = y[2];
2488 pixelBit[6] = x[3];
2489 break;
2490 case 2:
2491 pixelBit[0] = x[0];
2492 pixelBit[1] = x[1];
2493 pixelBit[2] = y[0];
2494 pixelBit[3] = x[2];
2495 pixelBit[4] = y[1];
2496 pixelBit[5] = y[2];
2497 break;
2498 case 3:
2499 pixelBit[0] = x[0];
2500 pixelBit[1] = y[0];
2501 pixelBit[2] = x[1];
2502 pixelBit[3] = x[2];
2503 pixelBit[4] = y[1];
2504 break;
2505 case 4:
2506 pixelBit[0] = x[0];
2507 pixelBit[1] = y[0];
2508 pixelBit[2] = x[1];
2509 pixelBit[3] = y[1];
2510 break;
2511 default:
2512 ADDR_ASSERT_ALWAYS();
2513 ret = ADDR_INVALIDPARAMS;
2514 break;
2515 }
2516 }
2517 else if (IsRotateSwizzle(swMode))
2518 {
2519 switch (elementBytesLog2)
2520 {
2521 case 0:
2522 pixelBit[0] = y[0];
2523 pixelBit[1] = y[1];
2524 pixelBit[2] = y[2];
2525 pixelBit[3] = x[1];
2526 pixelBit[4] = x[0];
2527 pixelBit[5] = x[2];
2528 pixelBit[6] = x[3];
2529 pixelBit[7] = y[3];
2530 break;
2531 case 1:
2532 pixelBit[0] = y[0];
2533 pixelBit[1] = y[1];
2534 pixelBit[2] = y[2];
2535 pixelBit[3] = x[0];
2536 pixelBit[4] = x[1];
2537 pixelBit[5] = x[2];
2538 pixelBit[6] = x[3];
2539 break;
2540 case 2:
2541 pixelBit[0] = y[0];
2542 pixelBit[1] = y[1];
2543 pixelBit[2] = x[0];
2544 pixelBit[3] = y[2];
2545 pixelBit[4] = x[1];
2546 pixelBit[5] = x[2];
2547 break;
2548 case 3:
2549 pixelBit[0] = y[0];
2550 pixelBit[1] = x[0];
2551 pixelBit[2] = y[1];
2552 pixelBit[3] = x[1];
2553 pixelBit[4] = x[2];
2554 break;
2555 default:
2556 ADDR_ASSERT_ALWAYS();
2557 case 4:
2558 ret = ADDR_INVALIDPARAMS;
2559 break;
2560 }
2561 }
2562 else
2563 {
2564 ADDR_ASSERT_ALWAYS();
2565 ret = ADDR_INVALIDPARAMS;
2566 }
2567
2568 // Post validation
2569 if (ret == ADDR_OK)
2570 {
2571 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2572 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2573 (microBlockDim.w * (1 << elementBytesLog2)));
2574 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2575 }
2576
2577 return ret;
2578 }
2579
2580 /**
2581 ************************************************************************************************************************
2582 * Gfx9Lib::HwlComputeThinEquation
2583 *
2584 * @brief
2585 * Interface function stub of ComputeThinEquation
2586 *
2587 * @return
2588 * ADDR_E_RETURNCODE
2589 ************************************************************************************************************************
2590 */
HwlComputeThinEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2591 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2592 AddrResourceType rsrcType,
2593 AddrSwizzleMode swMode,
2594 UINT_32 elementBytesLog2,
2595 ADDR_EQUATION* pEquation) const
2596 {
2597 ADDR_E_RETURNCODE ret = ADDR_OK;
2598
2599 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2600
2601 UINT_32 maxXorBits = blockSizeLog2;
2602 if (IsNonPrtXor(swMode))
2603 {
2604 // For non-prt-xor, maybe need to initialize some more bits for xor
2605 // The highest xor bit used in equation will be max the following 3 items:
2606 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2607 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2608 // 3. blockSizeLog2
2609
2610 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2611 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2612 GetPipeXorBits(blockSizeLog2) +
2613 2 * GetBankXorBits(blockSizeLog2));
2614 }
2615
2616 const UINT_32 maxBitsUsed = 14;
2617 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2618 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2619 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2620
2621 const UINT_32 extraXorBits = 16;
2622 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2623 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2624
2625 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2626 {
2627 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2628 InitChannel(1, 1, i, &y[i]);
2629 }
2630
2631 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2632
2633 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2634 {
2635 InitChannel(1, 0 , i, &pixelBit[i]);
2636 }
2637
2638 UINT_32 xIdx = 0;
2639 UINT_32 yIdx = 0;
2640 UINT_32 lowBits = 0;
2641
2642 if (IsZOrderSwizzle(swMode))
2643 {
2644 if (elementBytesLog2 <= 3)
2645 {
2646 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2647 {
2648 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2649 }
2650
2651 lowBits = 6;
2652 }
2653 else
2654 {
2655 ret = ADDR_INVALIDPARAMS;
2656 }
2657 }
2658 else
2659 {
2660 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2661
2662 if (ret == ADDR_OK)
2663 {
2664 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2665 xIdx = Log2(microBlockDim.w);
2666 yIdx = Log2(microBlockDim.h);
2667 lowBits = 8;
2668 }
2669 }
2670
2671 if (ret == ADDR_OK)
2672 {
2673 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2674 {
2675 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2676 }
2677
2678 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2679 {
2680 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2681 }
2682
2683 if (IsXor(swMode))
2684 {
2685 // Fill XOR bits
2686 UINT_32 pipeStart = m_pipeInterleaveLog2;
2687 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2688
2689 UINT_32 bankStart = pipeStart + pipeXorBits;
2690 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2691
2692 for (UINT_32 i = 0; i < pipeXorBits; i++)
2693 {
2694 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2695 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2696 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2697
2698 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2699 }
2700
2701 for (UINT_32 i = 0; i < bankXorBits; i++)
2702 {
2703 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2704 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2705 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2706
2707 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2708 }
2709
2710 if (IsPrt(swMode) == FALSE)
2711 {
2712 for (UINT_32 i = 0; i < pipeXorBits; i++)
2713 {
2714 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2715 }
2716
2717 for (UINT_32 i = 0; i < bankXorBits; i++)
2718 {
2719 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2720 }
2721 }
2722 }
2723
2724 FillEqBitComponents(pEquation);
2725 pEquation->numBits = blockSizeLog2;
2726 }
2727
2728 return ret;
2729 }
2730
2731 /**
2732 ************************************************************************************************************************
2733 * Gfx9Lib::HwlComputeThickEquation
2734 *
2735 * @brief
2736 * Interface function stub of ComputeThickEquation
2737 *
2738 * @return
2739 * ADDR_E_RETURNCODE
2740 ************************************************************************************************************************
2741 */
HwlComputeThickEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2742 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2743 AddrResourceType rsrcType,
2744 AddrSwizzleMode swMode,
2745 UINT_32 elementBytesLog2,
2746 ADDR_EQUATION* pEquation) const
2747 {
2748 ADDR_E_RETURNCODE ret = ADDR_OK;
2749
2750 ADDR_ASSERT(IsTex3d(rsrcType));
2751
2752 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2753
2754 UINT_32 maxXorBits = blockSizeLog2;
2755 if (IsNonPrtXor(swMode))
2756 {
2757 // For non-prt-xor, maybe need to initialize some more bits for xor
2758 // The highest xor bit used in equation will be max the following 3:
2759 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2760 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2761 // 3. blockSizeLog2
2762
2763 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2764 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2765 GetPipeXorBits(blockSizeLog2) +
2766 3 * GetBankXorBits(blockSizeLog2));
2767 }
2768
2769 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2770 {
2771 InitChannel(1, 0 , i, &pEquation->addr[i]);
2772 }
2773
2774 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2775
2776 const UINT_32 maxBitsUsed = 12;
2777 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2778 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2779 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2780 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2781
2782 const UINT_32 extraXorBits = 24;
2783 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2784 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2785
2786 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2787 {
2788 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2789 InitChannel(1, 1, i, &y[i]);
2790 InitChannel(1, 2, i, &z[i]);
2791 }
2792
2793 if (IsZOrderSwizzle(swMode))
2794 {
2795 switch (elementBytesLog2)
2796 {
2797 case 0:
2798 pixelBit[0] = x[0];
2799 pixelBit[1] = y[0];
2800 pixelBit[2] = x[1];
2801 pixelBit[3] = y[1];
2802 pixelBit[4] = z[0];
2803 pixelBit[5] = z[1];
2804 pixelBit[6] = x[2];
2805 pixelBit[7] = z[2];
2806 pixelBit[8] = y[2];
2807 pixelBit[9] = x[3];
2808 break;
2809 case 1:
2810 pixelBit[0] = x[0];
2811 pixelBit[1] = y[0];
2812 pixelBit[2] = x[1];
2813 pixelBit[3] = y[1];
2814 pixelBit[4] = z[0];
2815 pixelBit[5] = z[1];
2816 pixelBit[6] = z[2];
2817 pixelBit[7] = y[2];
2818 pixelBit[8] = x[2];
2819 break;
2820 case 2:
2821 pixelBit[0] = x[0];
2822 pixelBit[1] = y[0];
2823 pixelBit[2] = x[1];
2824 pixelBit[3] = z[0];
2825 pixelBit[4] = y[1];
2826 pixelBit[5] = z[1];
2827 pixelBit[6] = y[2];
2828 pixelBit[7] = x[2];
2829 break;
2830 case 3:
2831 pixelBit[0] = x[0];
2832 pixelBit[1] = y[0];
2833 pixelBit[2] = z[0];
2834 pixelBit[3] = x[1];
2835 pixelBit[4] = z[1];
2836 pixelBit[5] = y[1];
2837 pixelBit[6] = x[2];
2838 break;
2839 case 4:
2840 pixelBit[0] = x[0];
2841 pixelBit[1] = y[0];
2842 pixelBit[2] = z[0];
2843 pixelBit[3] = z[1];
2844 pixelBit[4] = y[1];
2845 pixelBit[5] = x[1];
2846 break;
2847 default:
2848 ADDR_ASSERT_ALWAYS();
2849 ret = ADDR_INVALIDPARAMS;
2850 break;
2851 }
2852 }
2853 else if (IsStandardSwizzle(rsrcType, swMode))
2854 {
2855 switch (elementBytesLog2)
2856 {
2857 case 0:
2858 pixelBit[0] = x[0];
2859 pixelBit[1] = x[1];
2860 pixelBit[2] = x[2];
2861 pixelBit[3] = x[3];
2862 pixelBit[4] = y[0];
2863 pixelBit[5] = y[1];
2864 pixelBit[6] = z[0];
2865 pixelBit[7] = z[1];
2866 pixelBit[8] = z[2];
2867 pixelBit[9] = y[2];
2868 break;
2869 case 1:
2870 pixelBit[0] = x[0];
2871 pixelBit[1] = x[1];
2872 pixelBit[2] = x[2];
2873 pixelBit[3] = y[0];
2874 pixelBit[4] = y[1];
2875 pixelBit[5] = z[0];
2876 pixelBit[6] = z[1];
2877 pixelBit[7] = z[2];
2878 pixelBit[8] = y[2];
2879 break;
2880 case 2:
2881 pixelBit[0] = x[0];
2882 pixelBit[1] = x[1];
2883 pixelBit[2] = y[0];
2884 pixelBit[3] = y[1];
2885 pixelBit[4] = z[0];
2886 pixelBit[5] = z[1];
2887 pixelBit[6] = y[2];
2888 pixelBit[7] = x[2];
2889 break;
2890 case 3:
2891 pixelBit[0] = x[0];
2892 pixelBit[1] = y[0];
2893 pixelBit[2] = y[1];
2894 pixelBit[3] = z[0];
2895 pixelBit[4] = z[1];
2896 pixelBit[5] = x[1];
2897 pixelBit[6] = x[2];
2898 break;
2899 case 4:
2900 pixelBit[0] = y[0];
2901 pixelBit[1] = y[1];
2902 pixelBit[2] = z[0];
2903 pixelBit[3] = z[1];
2904 pixelBit[4] = x[0];
2905 pixelBit[5] = x[1];
2906 break;
2907 default:
2908 ADDR_ASSERT_ALWAYS();
2909 ret = ADDR_INVALIDPARAMS;
2910 break;
2911 }
2912 }
2913 else
2914 {
2915 ADDR_ASSERT_ALWAYS();
2916 ret = ADDR_INVALIDPARAMS;
2917 }
2918
2919 if (ret == ADDR_OK)
2920 {
2921 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2922 UINT_32 xIdx = Log2(microBlockDim.w);
2923 UINT_32 yIdx = Log2(microBlockDim.h);
2924 UINT_32 zIdx = Log2(microBlockDim.d);
2925
2926 pixelBit = pEquation->addr;
2927
2928 const UINT_32 lowBits = 10;
2929 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2930 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2931
2932 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2933 {
2934 if ((i % 3) == 0)
2935 {
2936 pixelBit[i] = x[xIdx++];
2937 }
2938 else if ((i % 3) == 1)
2939 {
2940 pixelBit[i] = z[zIdx++];
2941 }
2942 else
2943 {
2944 pixelBit[i] = y[yIdx++];
2945 }
2946 }
2947
2948 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2949 {
2950 if ((i % 3) == 0)
2951 {
2952 xorExtra[i - blockSizeLog2] = x[xIdx++];
2953 }
2954 else if ((i % 3) == 1)
2955 {
2956 xorExtra[i - blockSizeLog2] = z[zIdx++];
2957 }
2958 else
2959 {
2960 xorExtra[i - blockSizeLog2] = y[yIdx++];
2961 }
2962 }
2963
2964 if (IsXor(swMode))
2965 {
2966 // Fill XOR bits
2967 UINT_32 pipeStart = m_pipeInterleaveLog2;
2968 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2969 for (UINT_32 i = 0; i < pipeXorBits; i++)
2970 {
2971 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2972 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2973 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2974
2975 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2976
2977 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2978 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2979 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2980
2981 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2982 }
2983
2984 UINT_32 bankStart = pipeStart + pipeXorBits;
2985 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2986 for (UINT_32 i = 0; i < bankXorBits; i++)
2987 {
2988 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2989 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2990 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2991
2992 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2993
2994 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2995 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2996 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2997
2998 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2999 }
3000 }
3001
3002 FillEqBitComponents(pEquation);
3003 pEquation->numBits = blockSizeLog2;
3004 }
3005
3006 return ret;
3007 }
3008
3009 /**
3010 ************************************************************************************************************************
3011 * Gfx9Lib::IsValidDisplaySwizzleMode
3012 *
3013 * @brief
3014 * Check if a swizzle mode is supported by display engine
3015 *
3016 * @return
3017 * TRUE is swizzle mode is supported by display engine
3018 ************************************************************************************************************************
3019 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3020 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
3021 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3022 {
3023 BOOL_32 support = FALSE;
3024
3025 const UINT_32 swizzleMask = 1 << pIn->swizzleMode;
3026
3027 if (m_settings.isDce12)
3028 {
3029 if (pIn->bpp == 32)
3030 {
3031 support = (Dce12Bpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3032 }
3033 else if (pIn->bpp <= 64)
3034 {
3035 support = (Dce12NonBpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3036 }
3037 }
3038 else if (m_settings.isDcn1)
3039 {
3040 if (pIn->bpp < 64)
3041 {
3042 support = (Dcn1NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3043 }
3044 else if (pIn->bpp == 64)
3045 {
3046 support = (Dcn1Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3047 }
3048 }
3049 else if (m_settings.isDcn2)
3050 {
3051 if (pIn->bpp < 64)
3052 {
3053 support = (Dcn2NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3054 }
3055 else if (pIn->bpp == 64)
3056 {
3057 support = (Dcn2Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3058 }
3059 }
3060 else
3061 {
3062 ADDR_NOT_IMPLEMENTED();
3063 }
3064
3065 return support;
3066 }
3067
3068 /**
3069 ************************************************************************************************************************
3070 * Gfx9Lib::HwlComputePipeBankXor
3071 *
3072 * @brief
3073 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3074 *
3075 * @return
3076 * PipeBankXor value
3077 ************************************************************************************************************************
3078 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const3079 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3080 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3081 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
3082 {
3083 if (IsXor(pIn->swizzleMode))
3084 {
3085 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3086 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3087 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3088
3089 UINT_32 pipeXor = 0;
3090 UINT_32 bankXor = 0;
3091
3092 const UINT_32 bankMask = (1 << bankBits) - 1;
3093 const UINT_32 index = pIn->surfIndex & bankMask;
3094
3095 const UINT_32 bpp = pIn->flags.fmask ?
3096 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3097 if (bankBits == 4)
3098 {
3099 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3100 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3101
3102 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3103 }
3104 else if (bankBits > 0)
3105 {
3106 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3107 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3108 bankXor = (index * bankIncrease) & bankMask;
3109 }
3110
3111 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3112 }
3113 else
3114 {
3115 pOut->pipeBankXor = 0;
3116 }
3117
3118 return ADDR_OK;
3119 }
3120
3121 /**
3122 ************************************************************************************************************************
3123 * Gfx9Lib::HwlComputeSlicePipeBankXor
3124 *
3125 * @brief
3126 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3127 *
3128 * @return
3129 * PipeBankXor value
3130 ************************************************************************************************************************
3131 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const3132 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3133 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3134 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3135 {
3136 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3137 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3138 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3139
3140 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3141 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3142
3143 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3144
3145 return ADDR_OK;
3146 }
3147
3148 /**
3149 ************************************************************************************************************************
3150 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3151 *
3152 * @brief
3153 * Compute sub resource offset to support swizzle pattern
3154 *
3155 * @return
3156 * Offset
3157 ************************************************************************************************************************
3158 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const3159 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3160 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3161 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3162 {
3163 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3164
3165 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3166 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3167 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3168 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3169 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3170 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3171
3172 pOut->offset = pIn->slice * pIn->sliceSize +
3173 pIn->macroBlockOffset +
3174 (pIn->mipTailOffset ^ pipeBankXor) -
3175 static_cast<UINT_64>(pipeBankXor);
3176 return ADDR_OK;
3177 }
3178
3179 /**
3180 ************************************************************************************************************************
3181 * Gfx9Lib::ValidateNonSwModeParams
3182 *
3183 * @brief
3184 * Validate compute surface info params except swizzle mode
3185 *
3186 * @return
3187 * TRUE if parameters are valid, FALSE otherwise
3188 ************************************************************************************************************************
3189 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3190 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3191 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3192 {
3193 BOOL_32 valid = TRUE;
3194
3195 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3196 {
3197 ADDR_ASSERT_ALWAYS();
3198 valid = FALSE;
3199 }
3200
3201 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3202 {
3203 ADDR_ASSERT_ALWAYS();
3204 valid = FALSE;
3205 }
3206
3207 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3208 const BOOL_32 msaa = (pIn->numFrags > 1);
3209 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3210
3211 const AddrResourceType rsrcType = pIn->resourceType;
3212 const BOOL_32 tex3d = IsTex3d(rsrcType);
3213 const BOOL_32 tex2d = IsTex2d(rsrcType);
3214 const BOOL_32 tex1d = IsTex1d(rsrcType);
3215
3216 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3217 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3218 const BOOL_32 display = flags.display || flags.rotated;
3219 const BOOL_32 stereo = flags.qbStereo;
3220 const BOOL_32 fmask = flags.fmask;
3221
3222 // Resource type check
3223 if (tex1d)
3224 {
3225 if (msaa || zbuffer || display || stereo || isBc || fmask)
3226 {
3227 ADDR_ASSERT_ALWAYS();
3228 valid = FALSE;
3229 }
3230 }
3231 else if (tex2d)
3232 {
3233 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3234 {
3235 ADDR_ASSERT_ALWAYS();
3236 valid = FALSE;
3237 }
3238 }
3239 else if (tex3d)
3240 {
3241 if (msaa || zbuffer || display || stereo || fmask)
3242 {
3243 ADDR_ASSERT_ALWAYS();
3244 valid = FALSE;
3245 }
3246 }
3247 else
3248 {
3249 ADDR_ASSERT_ALWAYS();
3250 valid = FALSE;
3251 }
3252
3253 return valid;
3254 }
3255
3256 /**
3257 ************************************************************************************************************************
3258 * Gfx9Lib::ValidateSwModeParams
3259 *
3260 * @brief
3261 * Validate compute surface info related to swizzle mode
3262 *
3263 * @return
3264 * TRUE if parameters are valid, FALSE otherwise
3265 ************************************************************************************************************************
3266 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3267 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3268 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3269 {
3270 BOOL_32 valid = TRUE;
3271
3272 if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3273 {
3274 ADDR_ASSERT_ALWAYS();
3275 valid = FALSE;
3276 }
3277
3278 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3279 const BOOL_32 msaa = (pIn->numFrags > 1);
3280 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3281 const BOOL_32 is422 = ElemLib::IsMacroPixelPacked(pIn->format);
3282
3283 const AddrResourceType rsrcType = pIn->resourceType;
3284 const BOOL_32 tex3d = IsTex3d(rsrcType);
3285 const BOOL_32 tex2d = IsTex2d(rsrcType);
3286 const BOOL_32 tex1d = IsTex1d(rsrcType);
3287
3288 const AddrSwizzleMode swizzle = pIn->swizzleMode;
3289 const BOOL_32 linear = IsLinear(swizzle);
3290 const BOOL_32 blk256B = IsBlock256b(swizzle);
3291 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3292
3293 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3294 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3295 const BOOL_32 color = flags.color;
3296 const BOOL_32 texture = flags.texture;
3297 const BOOL_32 display = flags.display || flags.rotated;
3298 const BOOL_32 prt = flags.prt;
3299 const BOOL_32 fmask = flags.fmask;
3300
3301 const BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3302 const BOOL_32 zMaxMip = tex3d && mipmap &&
3303 (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3304
3305 // Misc check
3306 if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3307 {
3308 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3309 ADDR_ASSERT_ALWAYS();
3310 valid = FALSE;
3311 }
3312
3313 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3314 {
3315 ADDR_ASSERT_ALWAYS();
3316 valid = FALSE;
3317 }
3318
3319 if ((pIn->bpp == 96) && (linear == FALSE))
3320 {
3321 ADDR_ASSERT_ALWAYS();
3322 valid = FALSE;
3323 }
3324
3325 if (prt && isNonPrtXor)
3326 {
3327 ADDR_ASSERT_ALWAYS();
3328 valid = FALSE;
3329 }
3330
3331 // Resource type check
3332 if (tex1d)
3333 {
3334 if (linear == FALSE)
3335 {
3336 ADDR_ASSERT_ALWAYS();
3337 valid = FALSE;
3338 }
3339 }
3340
3341 // Swizzle type check
3342 if (linear)
3343 {
3344 if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3345 ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3346 {
3347 ADDR_ASSERT_ALWAYS();
3348 valid = FALSE;
3349 }
3350 }
3351 else if (IsZOrderSwizzle(swizzle))
3352 {
3353 if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3354 {
3355 ADDR_ASSERT_ALWAYS();
3356 valid = FALSE;
3357 }
3358 }
3359 else if (IsStandardSwizzle(swizzle))
3360 {
3361 if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3362 {
3363 ADDR_ASSERT_ALWAYS();
3364 valid = FALSE;
3365 }
3366 }
3367 else if (IsDisplaySwizzle(swizzle))
3368 {
3369 if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3370 {
3371 ADDR_ASSERT_ALWAYS();
3372 valid = FALSE;
3373 }
3374 }
3375 else if (IsRotateSwizzle(swizzle))
3376 {
3377 if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3378 {
3379 ADDR_ASSERT_ALWAYS();
3380 valid = FALSE;
3381 }
3382 }
3383 else
3384 {
3385 ADDR_ASSERT_ALWAYS();
3386 valid = FALSE;
3387 }
3388
3389 // Block type check
3390 if (blk256B)
3391 {
3392 if (prt || zbuffer || tex3d || mipmap || msaa)
3393 {
3394 ADDR_ASSERT_ALWAYS();
3395 valid = FALSE;
3396 }
3397 }
3398
3399 return valid;
3400 }
3401
3402 /**
3403 ************************************************************************************************************************
3404 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3405 *
3406 * @brief
3407 * Compute surface info sanity check
3408 *
3409 * @return
3410 * ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3411 ************************************************************************************************************************
3412 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3413 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3414 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3415 {
3416 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3417 }
3418
3419 /**
3420 ************************************************************************************************************************
3421 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3422 *
3423 * @brief
3424 * Internal function to get suggested surface information for cliet to use
3425 *
3426 * @return
3427 * ADDR_E_RETURNCODE
3428 ************************************************************************************************************************
3429 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3430 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3431 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3432 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3433 {
3434 ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3435 ElemLib* pElemLib = GetElemLib();
3436
3437 UINT_32 bpp = pIn->bpp;
3438 UINT_32 width = Max(pIn->width, 1u);
3439 UINT_32 height = Max(pIn->height, 1u);
3440 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3441 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3442
3443 if (pIn->flags.fmask)
3444 {
3445 bpp = GetFmaskBpp(numSamples, numFrags);
3446 numFrags = 1;
3447 numSamples = 1;
3448 pOut->resourceType = ADDR_RSRC_TEX_2D;
3449 }
3450 else
3451 {
3452 // Set format to INVALID will skip this conversion
3453 if (pIn->format != ADDR_FMT_INVALID)
3454 {
3455 UINT_32 expandX, expandY;
3456
3457 // Don't care for this case
3458 ElemMode elemMode = ADDR_UNCOMPRESSED;
3459
3460 // Get compression/expansion factors and element mode which indicates compression/expansion
3461 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3462 &elemMode,
3463 &expandX,
3464 &expandY);
3465
3466 UINT_32 basePitch = 0;
3467 GetElemLib()->AdjustSurfaceInfo(elemMode,
3468 expandX,
3469 expandY,
3470 &bpp,
3471 &basePitch,
3472 &width,
3473 &height);
3474 }
3475
3476 // The output may get changed for volume(3D) texture resource in future
3477 pOut->resourceType = pIn->resourceType;
3478 }
3479
3480 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3481 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3482 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3483 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3484
3485 // Pre sanity check on non swizzle mode parameters
3486 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3487 localIn.flags = pIn->flags;
3488 localIn.resourceType = pOut->resourceType;
3489 localIn.format = pIn->format;
3490 localIn.bpp = bpp;
3491 localIn.width = width;
3492 localIn.height = height;
3493 localIn.numSlices = numSlices;
3494 localIn.numMipLevels = numMipLevels;
3495 localIn.numSamples = numSamples;
3496 localIn.numFrags = numFrags;
3497
3498 if (ValidateNonSwModeParams(&localIn))
3499 {
3500 // Forbid swizzle mode(s) by client setting
3501 ADDR2_SWMODE_SET allowedSwModeSet = {};
3502 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3503 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3504 allowedSwModeSet.value |=
3505 pIn->forbiddenBlock.macroThin4KB ? 0 :
3506 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3507 allowedSwModeSet.value |=
3508 pIn->forbiddenBlock.macroThick4KB ? 0 :
3509 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3510 allowedSwModeSet.value |=
3511 pIn->forbiddenBlock.macroThin64KB ? 0 :
3512 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3513 allowedSwModeSet.value |=
3514 pIn->forbiddenBlock.macroThick64KB ? 0 :
3515 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3516
3517 if (pIn->preferredSwSet.value != 0)
3518 {
3519 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3520 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3521 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3522 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3523 }
3524
3525 if (pIn->noXor)
3526 {
3527 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3528 }
3529
3530 if (pIn->maxAlign > 0)
3531 {
3532 if (pIn->maxAlign < Size64K)
3533 {
3534 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3535 }
3536
3537 if (pIn->maxAlign < Size4K)
3538 {
3539 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3540 }
3541
3542 if (pIn->maxAlign < Size256)
3543 {
3544 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3545 }
3546 }
3547
3548 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3549 switch (pOut->resourceType)
3550 {
3551 case ADDR_RSRC_TEX_1D:
3552 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3553 break;
3554
3555 case ADDR_RSRC_TEX_2D:
3556 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3557
3558 if (bpp > 64)
3559 {
3560 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3561 }
3562 break;
3563
3564 case ADDR_RSRC_TEX_3D:
3565 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3566
3567 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3568 {
3569 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3570 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3571 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3572 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3573 }
3574
3575 if ((bpp == 128) && pIn->flags.color)
3576 {
3577 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3578 }
3579
3580 if (pIn->flags.view3dAs2dArray)
3581 {
3582 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3583 }
3584 break;
3585
3586 default:
3587 ADDR_ASSERT_ALWAYS();
3588 allowedSwModeSet.value = 0;
3589 break;
3590 }
3591
3592 if (pIn->format == ADDR_FMT_32_32_32)
3593 {
3594 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3595 }
3596
3597 if (ElemLib::IsBlockCompressed(pIn->format))
3598 {
3599 if (pIn->flags.texture)
3600 {
3601 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3602 }
3603 else
3604 {
3605 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3606 }
3607 }
3608
3609 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3610 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3611 {
3612 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3613 }
3614
3615 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3616 {
3617 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3618
3619 if (pIn->flags.noMetadata == FALSE)
3620 {
3621 if (pIn->flags.depth &&
3622 pIn->flags.texture &&
3623 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3624 {
3625 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3626 // equation from wrong address within memory range a tile covered and use the
3627 // garbage data for compressed Z reading which finally leads to corruption.
3628 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3629 }
3630
3631 if (m_settings.htileCacheRbConflict &&
3632 (pIn->flags.depth || pIn->flags.stencil) &&
3633 (numSlices > 1) &&
3634 (pIn->flags.metaRbUnaligned == FALSE) &&
3635 (pIn->flags.metaPipeUnaligned == FALSE))
3636 {
3637 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3638 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3639 }
3640 }
3641 }
3642
3643 if (msaa)
3644 {
3645 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3646 }
3647
3648 if ((numFrags > 1) &&
3649 (Size4K < (m_pipeInterleaveBytes * numFrags)))
3650 {
3651 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3652 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3653 }
3654
3655 if (numMipLevels > 1)
3656 {
3657 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3658 }
3659
3660 if (displayRsrc)
3661 {
3662 if (m_settings.isDce12)
3663 {
3664 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3665 }
3666 else if (m_settings.isDcn1)
3667 {
3668 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3669 }
3670 else if (m_settings.isDcn2)
3671 {
3672 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
3673 }
3674 else
3675 {
3676 ADDR_NOT_IMPLEMENTED();
3677 }
3678 }
3679
3680 if (allowedSwModeSet.value != 0)
3681 {
3682 #if DEBUG
3683 // Post sanity check, at least AddrLib should accept the output generated by its own
3684 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3685
3686 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3687 {
3688 if (validateSwModeSet & 1)
3689 {
3690 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3691 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3692 }
3693
3694 validateSwModeSet >>= 1;
3695 }
3696 #endif
3697
3698 pOut->validSwModeSet = allowedSwModeSet;
3699 pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3700 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3701 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3702
3703 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3704
3705 if (pOut->clientPreferredSwSet.value == 0)
3706 {
3707 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3708 }
3709
3710 // Apply optional restrictions
3711 if (pIn->flags.needEquation)
3712 {
3713 UINT_32 components = pIn->flags.allowExtEquation ? ADDR_MAX_EQUATION_COMP :
3714 ADDR_MAX_LEGACY_EQUATION_COMP;
3715 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
3716 }
3717
3718 if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3719 {
3720 pOut->swizzleMode = ADDR_SW_LINEAR;
3721 }
3722 else
3723 {
3724 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3725
3726 if ((height > 1) && (computeMinSize == FALSE))
3727 {
3728 // Always ignore linear swizzle mode if:
3729 // 1. This is a (2D/3D) resource with height > 1
3730 // 2. Client doesn't require computing minimize size
3731 allowedSwModeSet.swLinear = 0;
3732 }
3733
3734 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3735
3736 // Determine block size if there are 2 or more block type candidates
3737 if (IsPow2(allowedBlockSet.value) == FALSE)
3738 {
3739 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3740
3741 swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3742 swMode[AddrBlockMicro] = ADDR_SW_256B_D;
3743 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D;
3744 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3745
3746 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3747 {
3748 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3749 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3750 }
3751
3752 UINT_64 padSize[AddrBlockMaxTiledType] = {};
3753
3754 const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3755 const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3756 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3757 UINT_32 minSizeBlk = AddrBlockMicro;
3758 UINT_64 minSize = 0;
3759
3760 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3761
3762 for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3763 {
3764 if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3765 {
3766 localIn.swizzleMode = swMode[i];
3767
3768 if (localIn.swizzleMode == ADDR_SW_LINEAR)
3769 {
3770 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3771 }
3772 else
3773 {
3774 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3775 }
3776
3777 if (returnCode == ADDR_OK)
3778 {
3779 padSize[i] = localOut.surfSize;
3780
3781 if ((minSize == 0) ||
3782 Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
3783 {
3784 minSize = padSize[i];
3785 minSizeBlk = i;
3786 }
3787 }
3788 else
3789 {
3790 ADDR_ASSERT_ALWAYS();
3791 break;
3792 }
3793 }
3794 }
3795
3796 if (pIn->memoryBudget > 1.0)
3797 {
3798 // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3799 // smaller-block type again in coming loop
3800 switch (minSizeBlk)
3801 {
3802 case AddrBlockThick64KB:
3803 allowedBlockSet.macroThin64KB = 0;
3804 case AddrBlockThin64KB:
3805 allowedBlockSet.macroThick4KB = 0;
3806 case AddrBlockThick4KB:
3807 allowedBlockSet.macroThin4KB = 0;
3808 case AddrBlockThin4KB:
3809 allowedBlockSet.micro = 0;
3810 case AddrBlockMicro:
3811 allowedBlockSet.linear = 0;
3812 case AddrBlockLinear:
3813 break;
3814
3815 default:
3816 ADDR_ASSERT_ALWAYS();
3817 break;
3818 }
3819
3820 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3821 {
3822 if ((i != minSizeBlk) &&
3823 Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3824 {
3825 if (Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
3826 {
3827 // Clear the block type if the memory waste is unacceptable
3828 allowedBlockSet.value &= ~(1u << (i - 1));
3829 }
3830 }
3831 }
3832
3833 // Remove linear block type if 2 or more block types are allowed
3834 if (IsPow2(allowedBlockSet.value) == FALSE)
3835 {
3836 allowedBlockSet.linear = 0;
3837 }
3838
3839 // Select the biggest allowed block type
3840 minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3841
3842 if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3843 {
3844 minSizeBlk = AddrBlockLinear;
3845 }
3846 }
3847
3848 switch (minSizeBlk)
3849 {
3850 case AddrBlockLinear:
3851 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3852 break;
3853
3854 case AddrBlockMicro:
3855 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3856 allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3857 break;
3858
3859 case AddrBlockThin4KB:
3860 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3861 Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3862 break;
3863
3864 case AddrBlockThick4KB:
3865 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3866 allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3867 break;
3868
3869 case AddrBlockThin64KB:
3870 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3871 Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3872 break;
3873
3874 case AddrBlockThick64KB:
3875 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3876 allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3877 break;
3878
3879 default:
3880 ADDR_ASSERT_ALWAYS();
3881 allowedSwModeSet.value = 0;
3882 break;
3883 }
3884 }
3885
3886 // Block type should be determined.
3887 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3888
3889 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3890
3891 // Determine swizzle type if there are 2 or more swizzle type candidates
3892 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3893 {
3894 if (ElemLib::IsBlockCompressed(pIn->format))
3895 {
3896 if (allowedSwSet.sw_D)
3897 {
3898 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3899 }
3900 else
3901 {
3902 ADDR_ASSERT(allowedSwSet.sw_S);
3903 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3904 }
3905 }
3906 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3907 {
3908 if (allowedSwSet.sw_S)
3909 {
3910 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3911 }
3912 else if (allowedSwSet.sw_D)
3913 {
3914 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3915 }
3916 else
3917 {
3918 ADDR_ASSERT(allowedSwSet.sw_R);
3919 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3920 }
3921 }
3922 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3923 {
3924 if (pIn->flags.color && allowedSwSet.sw_D)
3925 {
3926 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3927 }
3928 else if (allowedSwSet.sw_Z)
3929 {
3930 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3931 }
3932 else
3933 {
3934 ADDR_ASSERT(allowedSwSet.sw_S);
3935 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3936 }
3937 }
3938 else
3939 {
3940 if (pIn->flags.rotated && allowedSwSet.sw_R)
3941 {
3942 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3943 }
3944 else if (allowedSwSet.sw_D)
3945 {
3946 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3947 }
3948 else if (allowedSwSet.sw_S)
3949 {
3950 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3951 }
3952 else
3953 {
3954 ADDR_ASSERT(allowedSwSet.sw_Z);
3955 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3956 }
3957 }
3958
3959 // Swizzle type should be determined.
3960 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3961 }
3962
3963 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3964 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3965 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3966 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3967 }
3968
3969 returnCode = ADDR_OK;
3970 }
3971 else
3972 {
3973 // Invalid combination...
3974 ADDR_ASSERT_ALWAYS();
3975 }
3976 }
3977 else
3978 {
3979 // Invalid combination...
3980 ADDR_ASSERT_ALWAYS();
3981 }
3982
3983 return returnCode;
3984 }
3985
3986 /**
3987 ************************************************************************************************************************
3988 * Gfx9Lib::ComputeStereoInfo
3989 *
3990 * @brief
3991 * Compute height alignment and right eye pipeBankXor for stereo surface
3992 *
3993 * @return
3994 * Error code
3995 *
3996 ************************************************************************************************************************
3997 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 * pHeightAlign) const3998 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3999 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
4000 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
4001 UINT_32* pHeightAlign
4002 ) const
4003 {
4004 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4005
4006 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
4007
4008 if (eqIndex < m_numEquations)
4009 {
4010 if (IsXor(pIn->swizzleMode))
4011 {
4012 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4013 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
4014 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
4015 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
4016 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
4017 const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
4018
4019 ADDR_ASSERT(maxYCoordBlock256 ==
4020 GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
4021
4022 const UINT_32 maxYCoordInBaseEquation =
4023 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
4024
4025 ADDR_ASSERT(maxYCoordInBaseEquation ==
4026 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
4027
4028 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
4029
4030 ADDR_ASSERT(maxYCoordInPipeXor ==
4031 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
4032
4033 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
4034 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
4035
4036 ADDR_ASSERT(maxYCoordInBankXor ==
4037 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
4038
4039 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
4040
4041 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
4042 {
4043 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
4044
4045 if (pOut->pStereoInfo != NULL)
4046 {
4047 pOut->pStereoInfo->rightSwizzle = 0;
4048
4049 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
4050 {
4051 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
4052 {
4053 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
4054 }
4055
4056 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
4057 {
4058 pOut->pStereoInfo->rightSwizzle |=
4059 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
4060 }
4061
4062 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
4063 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
4064 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
4065 }
4066 }
4067 }
4068 }
4069 }
4070 else
4071 {
4072 ADDR_ASSERT_ALWAYS();
4073 returnCode = ADDR_ERROR;
4074 }
4075
4076 return returnCode;
4077 }
4078
4079 /**
4080 ************************************************************************************************************************
4081 * Gfx9Lib::HwlComputeSurfaceInfoTiled
4082 *
4083 * @brief
4084 * Internal function to calculate alignment for tiled surface
4085 *
4086 * @return
4087 * ADDR_E_RETURNCODE
4088 ************************************************************************************************************************
4089 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4090 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
4091 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4092 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4093 ) const
4094 {
4095 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
4096 &pOut->blockHeight,
4097 &pOut->blockSlices,
4098 pIn->bpp,
4099 pIn->numFrags,
4100 pIn->resourceType,
4101 pIn->swizzleMode);
4102
4103 if (returnCode == ADDR_OK)
4104 {
4105 UINT_32 pitchAlignInElement = pOut->blockWidth;
4106
4107 if ((IsTex2d(pIn->resourceType) == TRUE) &&
4108 (pIn->flags.display || pIn->flags.rotated) &&
4109 (pIn->numMipLevels <= 1) &&
4110 (pIn->numSamples <= 1) &&
4111 (pIn->numFrags <= 1))
4112 {
4113 // Display engine needs pitch align to be at least 32 pixels.
4114 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
4115 }
4116
4117 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4118
4119 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
4120 {
4121 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
4122 {
4123 returnCode = ADDR_INVALIDPARAMS;
4124 }
4125 else if (pIn->pitchInElement < pOut->pitch)
4126 {
4127 returnCode = ADDR_INVALIDPARAMS;
4128 }
4129 else
4130 {
4131 pOut->pitch = pIn->pitchInElement;
4132 }
4133 }
4134
4135 UINT_32 heightAlign = 0;
4136
4137 if (pIn->flags.qbStereo)
4138 {
4139 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
4140 }
4141
4142 if (returnCode == ADDR_OK)
4143 {
4144 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
4145
4146 if (heightAlign > 1)
4147 {
4148 pOut->height = PowTwoAlign(pOut->height, heightAlign);
4149 }
4150
4151 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
4152
4153 pOut->epitchIsHeight = FALSE;
4154 pOut->mipChainInTail = FALSE;
4155 pOut->firstMipIdInTail = pIn->numMipLevels;
4156
4157 pOut->mipChainPitch = pOut->pitch;
4158 pOut->mipChainHeight = pOut->height;
4159 pOut->mipChainSlice = pOut->numSlices;
4160
4161 if (pIn->numMipLevels > 1)
4162 {
4163 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4164 pIn->swizzleMode,
4165 pIn->bpp,
4166 pIn->width,
4167 pIn->height,
4168 pIn->numSlices,
4169 pOut->blockWidth,
4170 pOut->blockHeight,
4171 pOut->blockSlices,
4172 pIn->numMipLevels,
4173 pOut->pMipInfo);
4174
4175 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4176
4177 if (endingMipId == 0)
4178 {
4179 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4180 pIn->swizzleMode,
4181 pOut->blockWidth,
4182 pOut->blockHeight,
4183 pOut->blockSlices);
4184
4185 pOut->epitchIsHeight = TRUE;
4186 pOut->pitch = tailMaxDim.w;
4187 pOut->height = tailMaxDim.h;
4188 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4189 tailMaxDim.d : pIn->numSlices;
4190 pOut->mipChainInTail = TRUE;
4191 }
4192 else
4193 {
4194 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
4195 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4196
4197 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4198 pIn->swizzleMode,
4199 mip0WidthInBlk,
4200 mip0HeightInBlk,
4201 pOut->numSlices / pOut->blockSlices);
4202 if (majorMode == ADDR_MAJOR_Y)
4203 {
4204 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4205
4206 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4207 {
4208 mip1WidthInBlk++;
4209 }
4210
4211 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4212
4213 pOut->epitchIsHeight = FALSE;
4214 }
4215 else
4216 {
4217 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4218
4219 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4220 {
4221 mip1HeightInBlk++;
4222 }
4223
4224 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4225
4226 pOut->epitchIsHeight = TRUE;
4227 }
4228 }
4229
4230 if (pOut->pMipInfo != NULL)
4231 {
4232 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4233
4234 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4235 {
4236 Dim3d mipStartPos = {0};
4237 UINT_32 mipTailOffsetInBytes = 0;
4238
4239 mipStartPos = GetMipStartPos(pIn->resourceType,
4240 pIn->swizzleMode,
4241 pOut->pitch,
4242 pOut->height,
4243 pOut->numSlices,
4244 pOut->blockWidth,
4245 pOut->blockHeight,
4246 pOut->blockSlices,
4247 i,
4248 elementBytesLog2,
4249 &mipTailOffsetInBytes);
4250
4251 UINT_32 pitchInBlock =
4252 pOut->mipChainPitch / pOut->blockWidth;
4253 UINT_32 sliceInBlock =
4254 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4255 UINT_64 blockIndex =
4256 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4257 UINT_64 macroBlockOffset =
4258 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4259
4260 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4261 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
4262 }
4263 }
4264 }
4265 else if (pOut->pMipInfo != NULL)
4266 {
4267 pOut->pMipInfo[0].pitch = pOut->pitch;
4268 pOut->pMipInfo[0].height = pOut->height;
4269 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4270 pOut->pMipInfo[0].offset = 0;
4271 }
4272
4273 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4274 (pIn->bpp >> 3) * pIn->numFrags;
4275 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
4276 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4277
4278 if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4279 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4280 (pIn->flags.texture == TRUE) &&
4281 (pIn->flags.noMetadata == FALSE) &&
4282 (pIn->flags.metaPipeUnaligned == FALSE))
4283 {
4284 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4285 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4286 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4287 // them, which may cause invalid metadata to be fetched.
4288 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4289 }
4290
4291 if (pIn->flags.prt)
4292 {
4293 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4294 }
4295 }
4296 }
4297
4298 return returnCode;
4299 }
4300
4301 /**
4302 ************************************************************************************************************************
4303 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4304 *
4305 * @brief
4306 * Internal function to calculate alignment for linear surface
4307 *
4308 * @return
4309 * ADDR_E_RETURNCODE
4310 ************************************************************************************************************************
4311 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4312 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4313 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4314 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4315 ) const
4316 {
4317 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4318 UINT_32 pitch = 0;
4319 UINT_32 actualHeight = 0;
4320 UINT_32 elementBytes = pIn->bpp >> 3;
4321 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4322
4323 if (IsTex1d(pIn->resourceType))
4324 {
4325 if (pIn->height > 1)
4326 {
4327 returnCode = ADDR_INVALIDPARAMS;
4328 }
4329 else
4330 {
4331 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4332
4333 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4334 actualHeight = pIn->numMipLevels;
4335
4336 if (pIn->flags.prt == FALSE)
4337 {
4338 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4339 &pitch, &actualHeight);
4340 }
4341
4342 if (returnCode == ADDR_OK)
4343 {
4344 if (pOut->pMipInfo != NULL)
4345 {
4346 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4347 {
4348 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4349 pOut->pMipInfo[i].pitch = pitch;
4350 pOut->pMipInfo[i].height = 1;
4351 pOut->pMipInfo[i].depth = 1;
4352 }
4353 }
4354 }
4355 }
4356 }
4357 else
4358 {
4359 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4360 }
4361
4362 if ((pitch == 0) || (actualHeight == 0))
4363 {
4364 returnCode = ADDR_INVALIDPARAMS;
4365 }
4366
4367 if (returnCode == ADDR_OK)
4368 {
4369 pOut->pitch = pitch;
4370 pOut->height = pIn->height;
4371 pOut->numSlices = pIn->numSlices;
4372 pOut->mipChainPitch = pitch;
4373 pOut->mipChainHeight = actualHeight;
4374 pOut->mipChainSlice = pOut->numSlices;
4375 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4376 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4377 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4378 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4379 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4380 pOut->blockHeight = 1;
4381 pOut->blockSlices = 1;
4382 }
4383
4384 // Post calculation validate
4385 ADDR_ASSERT(pOut->sliceSize > 0);
4386
4387 return returnCode;
4388 }
4389
4390 /**
4391 ************************************************************************************************************************
4392 * Gfx9Lib::GetMipChainInfo
4393 *
4394 * @brief
4395 * Internal function to get out information about mip chain
4396 *
4397 * @return
4398 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4399 ************************************************************************************************************************
4400 */
GetMipChainInfo(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 bpp,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 numMipLevel,ADDR2_MIP_INFO * pMipInfo) const4401 UINT_32 Gfx9Lib::GetMipChainInfo(
4402 AddrResourceType resourceType,
4403 AddrSwizzleMode swizzleMode,
4404 UINT_32 bpp,
4405 UINT_32 mip0Width,
4406 UINT_32 mip0Height,
4407 UINT_32 mip0Depth,
4408 UINT_32 blockWidth,
4409 UINT_32 blockHeight,
4410 UINT_32 blockDepth,
4411 UINT_32 numMipLevel,
4412 ADDR2_MIP_INFO* pMipInfo) const
4413 {
4414 const Dim3d tailMaxDim =
4415 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4416
4417 UINT_32 mipPitch = mip0Width;
4418 UINT_32 mipHeight = mip0Height;
4419 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4420 UINT_32 offset = 0;
4421 UINT_32 firstMipIdInTail = numMipLevel;
4422 BOOL_32 inTail = FALSE;
4423 BOOL_32 finalDim = FALSE;
4424 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4425 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4426
4427 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4428 {
4429 if (inTail)
4430 {
4431 if (finalDim == FALSE)
4432 {
4433 UINT_32 mipSize;
4434
4435 if (is3dThick)
4436 {
4437 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4438 }
4439 else
4440 {
4441 mipSize = mipPitch * mipHeight * (bpp >> 3);
4442 }
4443
4444 if (mipSize <= 256)
4445 {
4446 UINT_32 index = Log2(bpp >> 3);
4447
4448 if (is3dThick)
4449 {
4450 mipPitch = Block256_3dZ[index].w;
4451 mipHeight = Block256_3dZ[index].h;
4452 mipDepth = Block256_3dZ[index].d;
4453 }
4454 else
4455 {
4456 mipPitch = Block256_2d[index].w;
4457 mipHeight = Block256_2d[index].h;
4458 }
4459
4460 finalDim = TRUE;
4461 }
4462 }
4463 }
4464 else
4465 {
4466 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4467 mipPitch, mipHeight, mipDepth);
4468
4469 if (inTail)
4470 {
4471 firstMipIdInTail = mipId;
4472 mipPitch = tailMaxDim.w;
4473 mipHeight = tailMaxDim.h;
4474
4475 if (is3dThick)
4476 {
4477 mipDepth = tailMaxDim.d;
4478 }
4479 }
4480 else
4481 {
4482 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4483 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4484
4485 if (is3dThick)
4486 {
4487 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4488 }
4489 }
4490 }
4491
4492 if (pMipInfo != NULL)
4493 {
4494 pMipInfo[mipId].pitch = mipPitch;
4495 pMipInfo[mipId].height = mipHeight;
4496 pMipInfo[mipId].depth = mipDepth;
4497 pMipInfo[mipId].offset = offset;
4498 }
4499
4500 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4501
4502 if (finalDim)
4503 {
4504 if (is3dThin)
4505 {
4506 mipDepth = Max(mipDepth >> 1, 1u);
4507 }
4508 }
4509 else
4510 {
4511 mipPitch = Max(mipPitch >> 1, 1u);
4512 mipHeight = Max(mipHeight >> 1, 1u);
4513
4514 if (is3dThick || is3dThin)
4515 {
4516 mipDepth = Max(mipDepth >> 1, 1u);
4517 }
4518 }
4519 }
4520
4521 return firstMipIdInTail;
4522 }
4523
4524 /**
4525 ************************************************************************************************************************
4526 * Gfx9Lib::GetMetaMiptailInfo
4527 *
4528 * @brief
4529 * Get mip tail coordinate information.
4530 *
4531 * @return
4532 * N/A
4533 ************************************************************************************************************************
4534 */
GetMetaMiptailInfo(ADDR2_META_MIP_INFO * pInfo,Dim3d mipCoord,UINT_32 numMipInTail,Dim3d * pMetaBlkDim) const4535 VOID Gfx9Lib::GetMetaMiptailInfo(
4536 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4537 Dim3d mipCoord, ///< [in] mip tail base coord
4538 UINT_32 numMipInTail, ///< [in] number of mips in tail
4539 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4540 ) const
4541 {
4542 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4543 UINT_32 mipWidth = pMetaBlkDim->w;
4544 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4545 UINT_32 mipDepth = pMetaBlkDim->d;
4546 UINT_32 minInc;
4547
4548 if (isThick)
4549 {
4550 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4551 }
4552 else if (pMetaBlkDim->h >= 1024)
4553 {
4554 minInc = 256;
4555 }
4556 else if (pMetaBlkDim->h == 512)
4557 {
4558 minInc = 128;
4559 }
4560 else
4561 {
4562 minInc = 64;
4563 }
4564
4565 UINT_32 blk32MipId = 0xFFFFFFFF;
4566
4567 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4568 {
4569 pInfo[mip].inMiptail = TRUE;
4570 pInfo[mip].startX = mipCoord.w;
4571 pInfo[mip].startY = mipCoord.h;
4572 pInfo[mip].startZ = mipCoord.d;
4573 pInfo[mip].width = mipWidth;
4574 pInfo[mip].height = mipHeight;
4575 pInfo[mip].depth = mipDepth;
4576
4577 if (mipWidth <= 32)
4578 {
4579 if (blk32MipId == 0xFFFFFFFF)
4580 {
4581 blk32MipId = mip;
4582 }
4583
4584 mipCoord.w = pInfo[blk32MipId].startX;
4585 mipCoord.h = pInfo[blk32MipId].startY;
4586 mipCoord.d = pInfo[blk32MipId].startZ;
4587
4588 switch (mip - blk32MipId)
4589 {
4590 case 0:
4591 mipCoord.w += 32; // 16x16
4592 break;
4593 case 1:
4594 mipCoord.h += 32; // 8x8
4595 break;
4596 case 2:
4597 mipCoord.h += 32; // 4x4
4598 mipCoord.w += 16;
4599 break;
4600 case 3:
4601 mipCoord.h += 32; // 2x2
4602 mipCoord.w += 32;
4603 break;
4604 case 4:
4605 mipCoord.h += 32; // 1x1
4606 mipCoord.w += 48;
4607 break;
4608 // The following are for BC/ASTC formats
4609 case 5:
4610 mipCoord.h += 48; // 1/2 x 1/2
4611 break;
4612 case 6:
4613 mipCoord.h += 48; // 1/4 x 1/4
4614 mipCoord.w += 16;
4615 break;
4616 case 7:
4617 mipCoord.h += 48; // 1/8 x 1/8
4618 mipCoord.w += 32;
4619 break;
4620 case 8:
4621 mipCoord.h += 48; // 1/16 x 1/16
4622 mipCoord.w += 48;
4623 break;
4624 default:
4625 ADDR_ASSERT_ALWAYS();
4626 break;
4627 }
4628
4629 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4630 mipHeight = mipWidth;
4631
4632 if (isThick)
4633 {
4634 mipDepth = mipWidth;
4635 }
4636 }
4637 else
4638 {
4639 if (mipWidth <= minInc)
4640 {
4641 // if we're below the minimal increment...
4642 if (isThick)
4643 {
4644 // For 3d, just go in z direction
4645 mipCoord.d += mipDepth;
4646 }
4647 else
4648 {
4649 // For 2d, first go across, then down
4650 if ((mipWidth * 2) == minInc)
4651 {
4652 // if we're 2 mips below, that's when we go back in x, and down in y
4653 mipCoord.w -= minInc;
4654 mipCoord.h += minInc;
4655 }
4656 else
4657 {
4658 // otherwise, just go across in x
4659 mipCoord.w += minInc;
4660 }
4661 }
4662 }
4663 else
4664 {
4665 // On even mip, go down, otherwise, go across
4666 if (mip & 1)
4667 {
4668 mipCoord.w += mipWidth;
4669 }
4670 else
4671 {
4672 mipCoord.h += mipHeight;
4673 }
4674 }
4675 // Divide the width by 2
4676 mipWidth >>= 1;
4677 // After the first mip in tail, the mip is always a square
4678 mipHeight = mipWidth;
4679 // ...or for 3d, a cube
4680 if (isThick)
4681 {
4682 mipDepth = mipWidth;
4683 }
4684 }
4685 }
4686 }
4687
4688 /**
4689 ************************************************************************************************************************
4690 * Gfx9Lib::GetMipStartPos
4691 *
4692 * @brief
4693 * Internal function to get out information about mip logical start position
4694 *
4695 * @return
4696 * logical start position in macro block width/heith/depth of one mip level within one slice
4697 ************************************************************************************************************************
4698 */
GetMipStartPos(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 width,UINT_32 height,UINT_32 depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 mipId,UINT_32 log2ElementBytes,UINT_32 * pMipTailBytesOffset) const4699 Dim3d Gfx9Lib::GetMipStartPos(
4700 AddrResourceType resourceType,
4701 AddrSwizzleMode swizzleMode,
4702 UINT_32 width,
4703 UINT_32 height,
4704 UINT_32 depth,
4705 UINT_32 blockWidth,
4706 UINT_32 blockHeight,
4707 UINT_32 blockDepth,
4708 UINT_32 mipId,
4709 UINT_32 log2ElementBytes,
4710 UINT_32* pMipTailBytesOffset) const
4711 {
4712 Dim3d mipStartPos = {0};
4713 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4714
4715 // Report mip in tail if Mip0 is already in mip tail
4716 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4717 UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
4718 UINT_32 mipIndexInTail = mipId;
4719
4720 if (inMipTail == FALSE)
4721 {
4722 // Mip 0 dimension, unit in block
4723 UINT_32 mipWidthInBlk = width / blockWidth;
4724 UINT_32 mipHeightInBlk = height / blockHeight;
4725 UINT_32 mipDepthInBlk = depth / blockDepth;
4726 AddrMajorMode majorMode = GetMajorMode(resourceType,
4727 swizzleMode,
4728 mipWidthInBlk,
4729 mipHeightInBlk,
4730 mipDepthInBlk);
4731
4732 UINT_32 endingMip = mipId + 1;
4733
4734 for (UINT_32 i = 1; i <= mipId; i++)
4735 {
4736 if ((i == 1) || (i == 3))
4737 {
4738 if (majorMode == ADDR_MAJOR_Y)
4739 {
4740 mipStartPos.w += mipWidthInBlk;
4741 }
4742 else
4743 {
4744 mipStartPos.h += mipHeightInBlk;
4745 }
4746 }
4747 else
4748 {
4749 if (majorMode == ADDR_MAJOR_X)
4750 {
4751 mipStartPos.w += mipWidthInBlk;
4752 }
4753 else if (majorMode == ADDR_MAJOR_Y)
4754 {
4755 mipStartPos.h += mipHeightInBlk;
4756 }
4757 else
4758 {
4759 mipStartPos.d += mipDepthInBlk;
4760 }
4761 }
4762
4763 BOOL_32 inTail = FALSE;
4764
4765 if (IsThick(resourceType, swizzleMode))
4766 {
4767 UINT_32 dim = log2BlkSize % 3;
4768
4769 if (dim == 0)
4770 {
4771 inTail =
4772 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4773 }
4774 else if (dim == 1)
4775 {
4776 inTail =
4777 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4778 }
4779 else
4780 {
4781 inTail =
4782 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4783 }
4784 }
4785 else
4786 {
4787 if (log2BlkSize & 1)
4788 {
4789 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4790 }
4791 else
4792 {
4793 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4794 }
4795 }
4796
4797 if (inTail)
4798 {
4799 endingMip = i;
4800 break;
4801 }
4802
4803 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4804 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4805 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4806 }
4807
4808 if (mipId >= endingMip)
4809 {
4810 inMipTail = TRUE;
4811 mipIndexInTail = mipId - endingMip;
4812 }
4813 }
4814
4815 if (inMipTail)
4816 {
4817 UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4818 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4819 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4820 }
4821
4822 return mipStartPos;
4823 }
4824
4825 /**
4826 ************************************************************************************************************************
4827 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4828 *
4829 * @brief
4830 * Internal function to calculate address from coord for tiled swizzle surface
4831 *
4832 * @return
4833 * ADDR_E_RETURNCODE
4834 ************************************************************************************************************************
4835 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4836 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4837 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4838 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4839 ) const
4840 {
4841 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4842 localIn.swizzleMode = pIn->swizzleMode;
4843 localIn.flags = pIn->flags;
4844 localIn.resourceType = pIn->resourceType;
4845 localIn.bpp = pIn->bpp;
4846 localIn.width = Max(pIn->unalignedWidth, 1u);
4847 localIn.height = Max(pIn->unalignedHeight, 1u);
4848 localIn.numSlices = Max(pIn->numSlices, 1u);
4849 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4850 localIn.numSamples = Max(pIn->numSamples, 1u);
4851 localIn.numFrags = Max(pIn->numFrags, 1u);
4852 if (localIn.numMipLevels <= 1)
4853 {
4854 localIn.pitchInElement = pIn->pitchInElement;
4855 }
4856
4857 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4858 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4859
4860 BOOL_32 valid = (returnCode == ADDR_OK) &&
4861 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4862 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4863 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4864
4865 if (valid)
4866 {
4867 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4868 Dim3d mipStartPos = {0};
4869 UINT_32 mipTailBytesOffset = 0;
4870
4871 if (pIn->numMipLevels > 1)
4872 {
4873 // Mip-map chain cannot be MSAA surface
4874 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4875
4876 mipStartPos = GetMipStartPos(pIn->resourceType,
4877 pIn->swizzleMode,
4878 localOut.pitch,
4879 localOut.height,
4880 localOut.numSlices,
4881 localOut.blockWidth,
4882 localOut.blockHeight,
4883 localOut.blockSlices,
4884 pIn->mipId,
4885 log2ElementBytes,
4886 &mipTailBytesOffset);
4887 }
4888
4889 UINT_32 interleaveOffset = 0;
4890 UINT_32 pipeBits = 0;
4891 UINT_32 pipeXor = 0;
4892 UINT_32 bankBits = 0;
4893 UINT_32 bankXor = 0;
4894
4895 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4896 {
4897 UINT_32 blockOffset = 0;
4898 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4899
4900 if (IsZOrderSwizzle(pIn->swizzleMode))
4901 {
4902 // Morton generation
4903 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4904 {
4905 UINT_32 totalLowBits = 6 - log2ElementBytes;
4906 UINT_32 mortBits = totalLowBits / 2;
4907 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4908 // Are 9 bits enough?
4909 UINT_32 highBitsValue =
4910 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4911 blockOffset = lowBitsValue | highBitsValue;
4912 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4913 }
4914 else
4915 {
4916 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4917 }
4918
4919 // Fill LSBs with sample bits
4920 if (pIn->numSamples > 1)
4921 {
4922 blockOffset *= pIn->numSamples;
4923 blockOffset |= pIn->sample;
4924 }
4925
4926 // Shift according to BytesPP
4927 blockOffset <<= log2ElementBytes;
4928 }
4929 else
4930 {
4931 // Micro block offset
4932 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4933 blockOffset = microBlockOffset;
4934
4935 // Micro block dimension
4936 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4937 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4938 // Morton generation, does 12 bit enough?
4939 blockOffset |=
4940 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4941
4942 // Sample bits start location
4943 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4944 // Join sample bits information to the highest Macro block bits
4945 if (IsNonPrtXor(pIn->swizzleMode))
4946 {
4947 // Non-prt-Xor : xor highest Macro block bits with sample bits
4948 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4949 }
4950 else
4951 {
4952 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4953 // after this op, the blockOffset only contains log2 Macro block size bits
4954 blockOffset %= (1 << sampleStart);
4955 blockOffset |= (pIn->sample << sampleStart);
4956 ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4957 }
4958 }
4959
4960 if (IsXor(pIn->swizzleMode))
4961 {
4962 // Mask off bits above Macro block bits to keep page synonyms working for prt
4963 if (IsPrt(pIn->swizzleMode))
4964 {
4965 blockOffset &= ((1 << log2BlkSize) - 1);
4966 }
4967
4968 // Preserve offset inside pipe interleave
4969 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4970 blockOffset >>= m_pipeInterleaveLog2;
4971
4972 // Pipe/Se xor bits
4973 pipeBits = GetPipeXorBits(log2BlkSize);
4974 // Pipe xor
4975 pipeXor = FoldXor2d(blockOffset, pipeBits);
4976 blockOffset >>= pipeBits;
4977
4978 // Bank xor bits
4979 bankBits = GetBankXorBits(log2BlkSize);
4980 // Bank Xor
4981 bankXor = FoldXor2d(blockOffset, bankBits);
4982 blockOffset >>= bankBits;
4983
4984 // Put all the part back together
4985 blockOffset <<= bankBits;
4986 blockOffset |= bankXor;
4987 blockOffset <<= pipeBits;
4988 blockOffset |= pipeXor;
4989 blockOffset <<= m_pipeInterleaveLog2;
4990 blockOffset |= interleaveOffset;
4991 }
4992
4993 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4994 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4995
4996 blockOffset |= mipTailBytesOffset;
4997
4998 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4999 {
5000 // Apply slice xor if not MSAA/PRT
5001 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
5002 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
5003 (m_pipeInterleaveLog2 + pipeBits));
5004 }
5005
5006 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5007 bankBits, pipeBits, &blockOffset);
5008
5009 blockOffset %= (1 << log2BlkSize);
5010
5011 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
5012 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
5013 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
5014 UINT_64 macroBlockIndex =
5015 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
5016 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
5017 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
5018
5019 pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
5020 }
5021 else
5022 {
5023 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
5024
5025 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
5026
5027 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
5028 (pIn->y / microBlockDim.h),
5029 (pIn->slice / microBlockDim.d),
5030 8);
5031
5032 blockOffset <<= 10;
5033 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
5034
5035 if (IsXor(pIn->swizzleMode))
5036 {
5037 // Mask off bits above Macro block bits to keep page synonyms working for prt
5038 if (IsPrt(pIn->swizzleMode))
5039 {
5040 blockOffset &= ((1 << log2BlkSize) - 1);
5041 }
5042
5043 // Preserve offset inside pipe interleave
5044 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
5045 blockOffset >>= m_pipeInterleaveLog2;
5046
5047 // Pipe/Se xor bits
5048 pipeBits = GetPipeXorBits(log2BlkSize);
5049 // Pipe xor
5050 pipeXor = FoldXor3d(blockOffset, pipeBits);
5051 blockOffset >>= pipeBits;
5052
5053 // Bank xor bits
5054 bankBits = GetBankXorBits(log2BlkSize);
5055 // Bank Xor
5056 bankXor = FoldXor3d(blockOffset, bankBits);
5057 blockOffset >>= bankBits;
5058
5059 // Put all the part back together
5060 blockOffset <<= bankBits;
5061 blockOffset |= bankXor;
5062 blockOffset <<= pipeBits;
5063 blockOffset |= pipeXor;
5064 blockOffset <<= m_pipeInterleaveLog2;
5065 blockOffset |= interleaveOffset;
5066 }
5067
5068 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
5069 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
5070 blockOffset |= mipTailBytesOffset;
5071
5072 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5073 bankBits, pipeBits, &blockOffset);
5074
5075 blockOffset %= (1 << log2BlkSize);
5076
5077 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
5078 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
5079 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
5080
5081 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
5082 UINT_32 sliceSizeInBlock =
5083 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
5084 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
5085
5086 pOut->addr = blockOffset | (blockIndex << log2BlkSize);
5087 }
5088 }
5089 else
5090 {
5091 returnCode = ADDR_INVALIDPARAMS;
5092 }
5093
5094 return returnCode;
5095 }
5096
5097 /**
5098 ************************************************************************************************************************
5099 * Gfx9Lib::ComputeSurfaceInfoLinear
5100 *
5101 * @brief
5102 * Internal function to calculate padding for linear swizzle 2D/3D surface
5103 *
5104 * @return
5105 * N/A
5106 ************************************************************************************************************************
5107 */
ComputeSurfaceLinearPadding(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pMipmap0PaddedWidth,UINT_32 * pSlice0PaddedHeight,ADDR2_MIP_INFO * pMipInfo) const5108 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
5109 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
5110 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
5111 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
5112 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
5113 ) const
5114 {
5115 ADDR_E_RETURNCODE returnCode = ADDR_OK;
5116
5117 UINT_32 elementBytes = pIn->bpp >> 3;
5118 UINT_32 pitchAlignInElement = 0;
5119
5120 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
5121 {
5122 ADDR_ASSERT(pIn->numMipLevels <= 1);
5123 ADDR_ASSERT(pIn->numSlices <= 1);
5124 pitchAlignInElement = 1;
5125 }
5126 else
5127 {
5128 pitchAlignInElement = (256 / elementBytes);
5129 }
5130
5131 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
5132 UINT_32 slice0PaddedHeight = pIn->height;
5133
5134 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
5135 &mipChainWidth, &slice0PaddedHeight);
5136
5137 if (returnCode == ADDR_OK)
5138 {
5139 UINT_32 mipChainHeight = 0;
5140 UINT_32 mipHeight = pIn->height;
5141 UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
5142
5143 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
5144 {
5145 if (pMipInfo != NULL)
5146 {
5147 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
5148 pMipInfo[i].pitch = mipChainWidth;
5149 pMipInfo[i].height = mipHeight;
5150 pMipInfo[i].depth = mipDepth;
5151 }
5152
5153 mipChainHeight += mipHeight;
5154 mipHeight = RoundHalf(mipHeight);
5155 mipHeight = Max(mipHeight, 1u);
5156 }
5157
5158 *pMipmap0PaddedWidth = mipChainWidth;
5159 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
5160 }
5161
5162 return returnCode;
5163 }
5164
5165 /**
5166 ************************************************************************************************************************
5167 * Gfx9Lib::ComputeThinBlockDimension
5168 *
5169 * @brief
5170 * Internal function to get thin block width/height/depth in element from surface input params.
5171 *
5172 * @return
5173 * N/A
5174 ************************************************************************************************************************
5175 */
ComputeThinBlockDimension(UINT_32 * pWidth,UINT_32 * pHeight,UINT_32 * pDepth,UINT_32 bpp,UINT_32 numSamples,AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const5176 VOID Gfx9Lib::ComputeThinBlockDimension(
5177 UINT_32* pWidth,
5178 UINT_32* pHeight,
5179 UINT_32* pDepth,
5180 UINT_32 bpp,
5181 UINT_32 numSamples,
5182 AddrResourceType resourceType,
5183 AddrSwizzleMode swizzleMode) const
5184 {
5185 ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5186
5187 const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
5188 const UINT_32 eleBytes = bpp >> 3;
5189 const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5190 const UINT_32 log2blkSizeIn256B = log2BlkSize - 8;
5191 const UINT_32 widthAmp = log2blkSizeIn256B / 2;
5192 const UINT_32 heightAmp = log2blkSizeIn256B - widthAmp;
5193
5194 ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5195
5196 *pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5197 *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5198 *pDepth = 1;
5199
5200 if (numSamples > 1)
5201 {
5202 const UINT_32 log2sample = Log2(numSamples);
5203 const UINT_32 q = log2sample >> 1;
5204 const UINT_32 r = log2sample & 1;
5205
5206 if (log2BlkSize & 1)
5207 {
5208 *pWidth >>= q;
5209 *pHeight >>= (q + r);
5210 }
5211 else
5212 {
5213 *pWidth >>= (q + r);
5214 *pHeight >>= q;
5215 }
5216 }
5217 }
5218
5219 } // V2
5220 } // Addr
5221