1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 ////////////////////////////////////////////////////////////////////////////////////////////////////
41 ////////////////////////////////////////////////////////////////////////////////////////////////////
42
43 namespace Addr
44 {
45
46 /**
47 ************************************************************************************************************************
48 * Gfx9HwlInit
49 *
50 * @brief
51 * Creates an Gfx9Lib object.
52 *
53 * @return
54 * Returns an Gfx9Lib object pointer.
55 ************************************************************************************************************************
56 */
Gfx9HwlInit(const Client * pClient)57 Addr::Lib* Gfx9HwlInit(const Client* pClient)
58 {
59 return V2::Gfx9Lib::CreateObj(pClient);
60 }
61
62 namespace V2
63 {
64
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66 // Static Const Member
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68
69 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
70 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
71 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
72 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
73 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
74 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_R
75
76 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
77 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
78 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
79 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_R
80
81 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
82 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
83 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
84 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_R
85
86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
88 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90
91 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_Z_T
92 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
93 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
94 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0}, // ADDR_SW_64KB_R_T
95
96 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_Z_x
97 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_x
98 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_x
99 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_4KB_R_x
100
101 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
102 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
103 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
104 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}, // ADDR_SW_64KB_R_X
105
106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
108 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
109 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
110 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
111 };
112
113 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
114
115 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
116
117 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
118
119 /**
120 ************************************************************************************************************************
121 * Gfx9Lib::Gfx9Lib
122 *
123 * @brief
124 * Constructor
125 *
126 ************************************************************************************************************************
127 */
Gfx9Lib(const Client * pClient)128 Gfx9Lib::Gfx9Lib(const Client* pClient)
129 :
130 Lib(pClient)
131 {
132 memset(&m_settings, 0, sizeof(m_settings));
133 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
134 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
135 m_metaEqOverrideIndex = 0;
136 }
137
138 /**
139 ************************************************************************************************************************
140 * Gfx9Lib::~Gfx9Lib
141 *
142 * @brief
143 * Destructor
144 ************************************************************************************************************************
145 */
~Gfx9Lib()146 Gfx9Lib::~Gfx9Lib()
147 {
148 }
149
150 /**
151 ************************************************************************************************************************
152 * Gfx9Lib::HwlComputeHtileInfo
153 *
154 * @brief
155 * Interface function stub of AddrComputeHtilenfo
156 *
157 * @return
158 * ADDR_E_RETURNCODE
159 ************************************************************************************************************************
160 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const161 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
162 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
163 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
164 ) const
165 {
166 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
167 pIn->swizzleMode);
168
169 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
170
171 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
172
173 if ((numPipeTotal == 1) && (numRbTotal == 1))
174 {
175 numCompressBlkPerMetaBlkLog2 = 10;
176 }
177 else
178 {
179 if (m_settings.applyAliasFix)
180 {
181 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
182 }
183 else
184 {
185 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
186 }
187 }
188
189 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
190
191 Dim3d metaBlkDim = {8, 8, 1};
192 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
193 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
194 UINT_32 heightAmp = totalAmpBits - widthAmp;
195 metaBlkDim.w <<= widthAmp;
196 metaBlkDim.h <<= heightAmp;
197
198 #if DEBUG
199 Dim3d metaBlkDimDbg = {8, 8, 1};
200 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
201 {
202 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
203 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
204 {
205 metaBlkDimDbg.h <<= 1;
206 }
207 else
208 {
209 metaBlkDimDbg.w <<= 1;
210 }
211 }
212 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
213 #endif
214
215 UINT_32 numMetaBlkX;
216 UINT_32 numMetaBlkY;
217 UINT_32 numMetaBlkZ;
218
219 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
220 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
221 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
222
223 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
224 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
225
226 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
227 {
228 align *= (numPipeTotal >> 1);
229 }
230
231 align = Max(align, metaBlkSize);
232
233 if (m_settings.metaBaseAlignFix)
234 {
235 align = Max(align, GetBlockSize(pIn->swizzleMode));
236 }
237
238 if (m_settings.htileAlignFix)
239 {
240 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
241 const INT_32 htileCachelineSizeLog2 = 11;
242 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
243
244 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
245
246 align <<= rbMaskPadding;
247 }
248
249 pOut->pitch = numMetaBlkX * metaBlkDim.w;
250 pOut->height = numMetaBlkY * metaBlkDim.h;
251 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
252
253 pOut->metaBlkWidth = metaBlkDim.w;
254 pOut->metaBlkHeight = metaBlkDim.h;
255 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
256
257 pOut->baseAlign = align;
258 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
259
260 return ADDR_OK;
261 }
262
263 /**
264 ************************************************************************************************************************
265 * Gfx9Lib::HwlComputeCmaskInfo
266 *
267 * @brief
268 * Interface function stub of AddrComputeCmaskInfo
269 *
270 * @return
271 * ADDR_E_RETURNCODE
272 ************************************************************************************************************************
273 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const274 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
275 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
276 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
277 ) const
278 {
279 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
280
281 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
282 pIn->swizzleMode);
283
284 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
285
286 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
287
288 if ((numPipeTotal == 1) && (numRbTotal == 1))
289 {
290 numCompressBlkPerMetaBlkLog2 = 13;
291 }
292 else
293 {
294 if (m_settings.applyAliasFix)
295 {
296 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
297 }
298 else
299 {
300 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
301 }
302
303 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
304 }
305
306 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
307
308 Dim2d metaBlkDim = {8, 8};
309 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
310 UINT_32 heightAmp = totalAmpBits >> 1;
311 UINT_32 widthAmp = totalAmpBits - heightAmp;
312 metaBlkDim.w <<= widthAmp;
313 metaBlkDim.h <<= heightAmp;
314
315 #if DEBUG
316 Dim2d metaBlkDimDbg = {8, 8};
317 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
318 {
319 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
320 {
321 metaBlkDimDbg.h <<= 1;
322 }
323 else
324 {
325 metaBlkDimDbg.w <<= 1;
326 }
327 }
328 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
329 #endif
330
331 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
332 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
333 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
334
335 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
336
337 if (m_settings.metaBaseAlignFix)
338 {
339 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
340 }
341
342 pOut->pitch = numMetaBlkX * metaBlkDim.w;
343 pOut->height = numMetaBlkY * metaBlkDim.h;
344 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
345 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
346 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
347
348 pOut->metaBlkWidth = metaBlkDim.w;
349 pOut->metaBlkHeight = metaBlkDim.h;
350
351 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
352
353 return ADDR_OK;
354 }
355
356 /**
357 ************************************************************************************************************************
358 * Gfx9Lib::GetMetaMipInfo
359 *
360 * @brief
361 * Get meta mip info
362 *
363 * @return
364 * N/A
365 ************************************************************************************************************************
366 */
GetMetaMipInfo(UINT_32 numMipLevels,Dim3d * pMetaBlkDim,BOOL_32 dataThick,ADDR2_META_MIP_INFO * pInfo,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 * pNumMetaBlkX,UINT_32 * pNumMetaBlkY,UINT_32 * pNumMetaBlkZ) const367 VOID Gfx9Lib::GetMetaMipInfo(
368 UINT_32 numMipLevels, ///< [in] number of mip levels
369 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
370 BOOL_32 dataThick, ///< [in] data surface is thick
371 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
372 UINT_32 mip0Width, ///< [in] mip0 width
373 UINT_32 mip0Height, ///< [in] mip0 height
374 UINT_32 mip0Depth, ///< [in] mip0 depth
375 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
376 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
377 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
378 const
379 {
380 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
381 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
382 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
383 UINT_32 tailWidth = pMetaBlkDim->w;
384 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
385 UINT_32 tailDepth = pMetaBlkDim->d;
386 BOOL_32 inTail = FALSE;
387 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
388
389 if (numMipLevels > 1)
390 {
391 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
392 {
393 // Z major
394 major = ADDR_MAJOR_Z;
395 }
396 else if (numMetaBlkX >= numMetaBlkY)
397 {
398 // X major
399 major = ADDR_MAJOR_X;
400 }
401 else
402 {
403 // Y major
404 major = ADDR_MAJOR_Y;
405 }
406
407 inTail = ((mip0Width <= tailWidth) &&
408 (mip0Height <= tailHeight) &&
409 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
410
411 if (inTail == FALSE)
412 {
413 UINT_32 orderLimit;
414 UINT_32 *pMipDim;
415 UINT_32 *pOrderDim;
416
417 if (major == ADDR_MAJOR_Z)
418 {
419 // Z major
420 pMipDim = &numMetaBlkY;
421 pOrderDim = &numMetaBlkZ;
422 orderLimit = 4;
423 }
424 else if (major == ADDR_MAJOR_X)
425 {
426 // X major
427 pMipDim = &numMetaBlkY;
428 pOrderDim = &numMetaBlkX;
429 orderLimit = 4;
430 }
431 else
432 {
433 // Y major
434 pMipDim = &numMetaBlkX;
435 pOrderDim = &numMetaBlkY;
436 orderLimit = 2;
437 }
438
439 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
440 {
441 *pMipDim += 2;
442 }
443 else
444 {
445 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
446 }
447 }
448 }
449
450 if (pInfo != NULL)
451 {
452 UINT_32 mipWidth = mip0Width;
453 UINT_32 mipHeight = mip0Height;
454 UINT_32 mipDepth = mip0Depth;
455 Dim3d mipCoord = {0};
456
457 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
458 {
459 if (inTail)
460 {
461 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
462 pMetaBlkDim);
463 break;
464 }
465 else
466 {
467 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
468 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
469 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
470
471 pInfo[mip].inMiptail = FALSE;
472 pInfo[mip].startX = mipCoord.w;
473 pInfo[mip].startY = mipCoord.h;
474 pInfo[mip].startZ = mipCoord.d;
475 pInfo[mip].width = mipWidth;
476 pInfo[mip].height = mipHeight;
477 pInfo[mip].depth = dataThick ? mipDepth : 1;
478
479 if ((mip >= 3) || (mip & 1))
480 {
481 switch (major)
482 {
483 case ADDR_MAJOR_X:
484 mipCoord.w += mipWidth;
485 break;
486 case ADDR_MAJOR_Y:
487 mipCoord.h += mipHeight;
488 break;
489 case ADDR_MAJOR_Z:
490 mipCoord.d += mipDepth;
491 break;
492 default:
493 break;
494 }
495 }
496 else
497 {
498 switch (major)
499 {
500 case ADDR_MAJOR_X:
501 mipCoord.h += mipHeight;
502 break;
503 case ADDR_MAJOR_Y:
504 mipCoord.w += mipWidth;
505 break;
506 case ADDR_MAJOR_Z:
507 mipCoord.h += mipHeight;
508 break;
509 default:
510 break;
511 }
512 }
513
514 mipWidth = Max(mipWidth >> 1, 1u);
515 mipHeight = Max(mipHeight >> 1, 1u);
516 mipDepth = Max(mipDepth >> 1, 1u);
517
518 inTail = ((mipWidth <= tailWidth) &&
519 (mipHeight <= tailHeight) &&
520 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
521 }
522 }
523 }
524
525 *pNumMetaBlkX = numMetaBlkX;
526 *pNumMetaBlkY = numMetaBlkY;
527 *pNumMetaBlkZ = numMetaBlkZ;
528 }
529
530 /**
531 ************************************************************************************************************************
532 * Gfx9Lib::HwlComputeDccInfo
533 *
534 * @brief
535 * Interface function to compute DCC key info
536 *
537 * @return
538 * ADDR_E_RETURNCODE
539 ************************************************************************************************************************
540 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const541 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
542 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
543 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
544 ) const
545 {
546 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
547 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
548 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
549
550 if (dataLinear)
551 {
552 metaLinear = TRUE;
553 }
554 else if (metaLinear == TRUE)
555 {
556 pipeAligned = FALSE;
557 }
558
559 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
560
561 if (metaLinear)
562 {
563 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
564 ADDR_ASSERT_ALWAYS();
565
566 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
567 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
568 }
569 else
570 {
571 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
572
573 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
574
575 UINT_32 numFrags = Max(pIn->numFrags, 1u);
576 UINT_32 numSlices = Max(pIn->numSlices, 1u);
577
578 minMetaBlkSize /= numFrags;
579
580 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
581
582 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
583
584 if ((numPipeTotal > 1) || (numRbTotal > 1))
585 {
586 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
587
588 numCompressBlkPerMetaBlk =
589 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
590
591 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
592 {
593 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
594 }
595 }
596
597 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
598 Dim3d metaBlkDim = compressBlkDim;
599
600 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
601 {
602 if ((metaBlkDim.h < metaBlkDim.w) ||
603 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
604 {
605 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
606 {
607 metaBlkDim.h <<= 1;
608 }
609 else
610 {
611 metaBlkDim.d <<= 1;
612 }
613 }
614 else
615 {
616 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
617 {
618 metaBlkDim.w <<= 1;
619 }
620 else
621 {
622 metaBlkDim.d <<= 1;
623 }
624 }
625 }
626
627 UINT_32 numMetaBlkX;
628 UINT_32 numMetaBlkY;
629 UINT_32 numMetaBlkZ;
630
631 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
632 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
633 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
634
635 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
636
637 if (numFrags > m_maxCompFrag)
638 {
639 sizeAlign *= (numFrags / m_maxCompFrag);
640 }
641
642 if (m_settings.metaBaseAlignFix)
643 {
644 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
645 }
646
647 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
648 numCompressBlkPerMetaBlk * numFrags;
649 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
650 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
651
652 pOut->pitch = numMetaBlkX * metaBlkDim.w;
653 pOut->height = numMetaBlkY * metaBlkDim.h;
654 pOut->depth = numMetaBlkZ * metaBlkDim.d;
655
656 pOut->compressBlkWidth = compressBlkDim.w;
657 pOut->compressBlkHeight = compressBlkDim.h;
658 pOut->compressBlkDepth = compressBlkDim.d;
659
660 pOut->metaBlkWidth = metaBlkDim.w;
661 pOut->metaBlkHeight = metaBlkDim.h;
662 pOut->metaBlkDepth = metaBlkDim.d;
663 pOut->metaBlkSize = numCompressBlkPerMetaBlk * numFrags;
664
665 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
666 pOut->fastClearSizePerSlice =
667 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
668 }
669
670 return ADDR_OK;
671 }
672
673 /**
674 ************************************************************************************************************************
675 * Gfx9Lib::HwlComputeMaxBaseAlignments
676 *
677 * @brief
678 * Gets maximum alignments
679 * @return
680 * maximum alignments
681 ************************************************************************************************************************
682 */
HwlComputeMaxBaseAlignments() const683 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
684 {
685 return Size64K;
686 }
687
688 /**
689 ************************************************************************************************************************
690 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
691 *
692 * @brief
693 * Gets maximum alignments for metadata
694 * @return
695 * maximum alignments for metadata
696 ************************************************************************************************************************
697 */
HwlComputeMaxMetaBaseAlignments() const698 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
699 {
700 // Max base alignment for Htile
701 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
702 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
703
704 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
705 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
706 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
707 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
708
709 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
710
711 if (maxNumPipeTotal > 2)
712 {
713 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
714 }
715
716 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
717
718 if (m_settings.metaBaseAlignFix)
719 {
720 maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
721 }
722
723 if (m_settings.htileAlignFix)
724 {
725 maxBaseAlignHtile *= maxNumPipeTotal;
726 }
727
728 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
729
730 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
731 UINT_32 maxBaseAlignDcc3D = 65536;
732
733 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
734 {
735 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
736 }
737
738 // Max base alignment for Msaa Dcc
739 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
740
741 if (m_settings.metaBaseAlignFix)
742 {
743 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
744 }
745
746 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
747 }
748
749 /**
750 ************************************************************************************************************************
751 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
752 *
753 * @brief
754 * Interface function stub of AddrComputeCmaskAddrFromCoord
755 *
756 * @return
757 * ADDR_E_RETURNCODE
758 ************************************************************************************************************************
759 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)760 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
761 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
762 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
763 {
764 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
765 input.size = sizeof(input);
766 input.cMaskFlags = pIn->cMaskFlags;
767 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
768 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
769 input.numSlices = Max(pIn->numSlices, 1u);
770 input.swizzleMode = pIn->swizzleMode;
771 input.resourceType = pIn->resourceType;
772
773 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
774 output.size = sizeof(output);
775
776 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
777
778 if (returnCode == ADDR_OK)
779 {
780 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
781 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
782 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
783 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
784
785 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
786 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
787 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
788
789 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
790
791 UINT_32 xb = pIn->x / output.metaBlkWidth;
792 UINT_32 yb = pIn->y / output.metaBlkHeight;
793 UINT_32 zb = pIn->slice;
794
795 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
796 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
797 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
798
799 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
800 UINT_64 address = pMetaEq->solve(coords);
801
802 pOut->addr = address >> 1;
803 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
804
805
806 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
807 pIn->swizzleMode);
808
809 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
810
811 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
812 }
813
814 return returnCode;
815 }
816
817 /**
818 ************************************************************************************************************************
819 * Gfx9Lib::HwlComputeHtileAddrFromCoord
820 *
821 * @brief
822 * Interface function stub of AddrComputeHtileAddrFromCoord
823 *
824 * @return
825 * ADDR_E_RETURNCODE
826 ************************************************************************************************************************
827 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)828 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
829 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
830 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
831 {
832 ADDR_E_RETURNCODE returnCode = ADDR_OK;
833
834 if (pIn->numMipLevels > 1)
835 {
836 returnCode = ADDR_NOTIMPLEMENTED;
837 }
838 else
839 {
840 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
841 input.size = sizeof(input);
842 input.hTileFlags = pIn->hTileFlags;
843 input.depthFlags = pIn->depthflags;
844 input.swizzleMode = pIn->swizzleMode;
845 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
846 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
847 input.numSlices = Max(pIn->numSlices, 1u);
848 input.numMipLevels = Max(pIn->numMipLevels, 1u);
849
850 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
851 output.size = sizeof(output);
852
853 returnCode = ComputeHtileInfo(&input, &output);
854
855 if (returnCode == ADDR_OK)
856 {
857 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
858 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
859 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
860 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
861
862 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
863 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
864 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
865
866 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
867
868 UINT_32 xb = pIn->x / output.metaBlkWidth;
869 UINT_32 yb = pIn->y / output.metaBlkHeight;
870 UINT_32 zb = pIn->slice;
871
872 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
873 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
874 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
875
876 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, 0, blockIndex };
877 UINT_64 address = pMetaEq->solve(coords);
878
879 pOut->addr = address >> 1;
880
881 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
882 pIn->swizzleMode);
883
884 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
885
886 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
887 }
888 }
889
890 return returnCode;
891 }
892
893 /**
894 ************************************************************************************************************************
895 * Gfx9Lib::HwlComputeHtileCoordFromAddr
896 *
897 * @brief
898 * Interface function stub of AddrComputeHtileCoordFromAddr
899 *
900 * @return
901 * ADDR_E_RETURNCODE
902 ************************************************************************************************************************
903 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)904 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
905 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
906 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
907 {
908 ADDR_E_RETURNCODE returnCode = ADDR_OK;
909
910 if (pIn->numMipLevels > 1)
911 {
912 returnCode = ADDR_NOTIMPLEMENTED;
913 }
914 else
915 {
916 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
917 input.size = sizeof(input);
918 input.hTileFlags = pIn->hTileFlags;
919 input.swizzleMode = pIn->swizzleMode;
920 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
921 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
922 input.numSlices = Max(pIn->numSlices, 1u);
923 input.numMipLevels = Max(pIn->numMipLevels, 1u);
924
925 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
926 output.size = sizeof(output);
927
928 returnCode = ComputeHtileInfo(&input, &output);
929
930 if (returnCode == ADDR_OK)
931 {
932 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
933 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
934 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
935 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
936
937 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
938 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
939 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
940
941 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
942
943 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
944 pIn->swizzleMode);
945
946 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
947
948 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
949
950 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
951 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
952
953 UINT_32 coords[NUM_DIMS];
954 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
955
956 pOut->slice = coords[DIM_M] / sliceSizeInBlock;
957 pOut->y = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
958 pOut->x = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
959 }
960 }
961
962 return returnCode;
963 }
964
965 /**
966 ************************************************************************************************************************
967 * Gfx9Lib::HwlComputeDccAddrFromCoord
968 *
969 * @brief
970 * Interface function stub of AddrComputeDccAddrFromCoord
971 *
972 * @return
973 * ADDR_E_RETURNCODE
974 ************************************************************************************************************************
975 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)976 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
977 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
978 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
979 {
980 ADDR_E_RETURNCODE returnCode = ADDR_OK;
981
982 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
983 {
984 returnCode = ADDR_NOTIMPLEMENTED;
985 }
986 else
987 {
988 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
989 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
990 UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth);
991 UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
992 UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth);
993 UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth);
994 UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
995 UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth);
996
997 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
998 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
999 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1000 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1001
1002 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1003
1004 UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1005 UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1006 UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1007
1008 UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth;
1009 UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1010 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1011
1012 UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
1013 UINT_64 address = pMetaEq->solve(coords);
1014
1015 pOut->addr = address >> 1;
1016
1017 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1018 pIn->swizzleMode);
1019
1020 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1021
1022 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1023 }
1024
1025 return returnCode;
1026 }
1027
1028 /**
1029 ************************************************************************************************************************
1030 * Gfx9Lib::HwlInitGlobalParams
1031 *
1032 * @brief
1033 * Initializes global parameters
1034 *
1035 * @return
1036 * TRUE if all settings are valid
1037 *
1038 ************************************************************************************************************************
1039 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1040 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1041 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1042 {
1043 BOOL_32 valid = TRUE;
1044
1045 if (m_settings.isArcticIsland)
1046 {
1047 GB_ADDR_CONFIG_gfx9 gbAddrConfig;
1048
1049 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1050
1051 // These values are copied from CModel code
1052 switch (gbAddrConfig.bits.NUM_PIPES)
1053 {
1054 case ADDR_CONFIG_1_PIPE:
1055 m_pipes = 1;
1056 m_pipesLog2 = 0;
1057 break;
1058 case ADDR_CONFIG_2_PIPE:
1059 m_pipes = 2;
1060 m_pipesLog2 = 1;
1061 break;
1062 case ADDR_CONFIG_4_PIPE:
1063 m_pipes = 4;
1064 m_pipesLog2 = 2;
1065 break;
1066 case ADDR_CONFIG_8_PIPE:
1067 m_pipes = 8;
1068 m_pipesLog2 = 3;
1069 break;
1070 case ADDR_CONFIG_16_PIPE:
1071 m_pipes = 16;
1072 m_pipesLog2 = 4;
1073 break;
1074 case ADDR_CONFIG_32_PIPE:
1075 m_pipes = 32;
1076 m_pipesLog2 = 5;
1077 break;
1078 default:
1079 ADDR_ASSERT_ALWAYS();
1080 break;
1081 }
1082
1083 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1084 {
1085 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1086 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1087 m_pipeInterleaveLog2 = 8;
1088 break;
1089 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1090 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1091 m_pipeInterleaveLog2 = 9;
1092 break;
1093 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1094 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1095 m_pipeInterleaveLog2 = 10;
1096 break;
1097 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1098 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1099 m_pipeInterleaveLog2 = 11;
1100 break;
1101 default:
1102 ADDR_ASSERT_ALWAYS();
1103 break;
1104 }
1105
1106 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1107 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1108 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1109
1110 switch (gbAddrConfig.bits.NUM_BANKS)
1111 {
1112 case ADDR_CONFIG_1_BANK:
1113 m_banks = 1;
1114 m_banksLog2 = 0;
1115 break;
1116 case ADDR_CONFIG_2_BANK:
1117 m_banks = 2;
1118 m_banksLog2 = 1;
1119 break;
1120 case ADDR_CONFIG_4_BANK:
1121 m_banks = 4;
1122 m_banksLog2 = 2;
1123 break;
1124 case ADDR_CONFIG_8_BANK:
1125 m_banks = 8;
1126 m_banksLog2 = 3;
1127 break;
1128 case ADDR_CONFIG_16_BANK:
1129 m_banks = 16;
1130 m_banksLog2 = 4;
1131 break;
1132 default:
1133 ADDR_ASSERT_ALWAYS();
1134 break;
1135 }
1136
1137 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1138 {
1139 case ADDR_CONFIG_1_SHADER_ENGINE:
1140 m_se = 1;
1141 m_seLog2 = 0;
1142 break;
1143 case ADDR_CONFIG_2_SHADER_ENGINE:
1144 m_se = 2;
1145 m_seLog2 = 1;
1146 break;
1147 case ADDR_CONFIG_4_SHADER_ENGINE:
1148 m_se = 4;
1149 m_seLog2 = 2;
1150 break;
1151 case ADDR_CONFIG_8_SHADER_ENGINE:
1152 m_se = 8;
1153 m_seLog2 = 3;
1154 break;
1155 default:
1156 ADDR_ASSERT_ALWAYS();
1157 break;
1158 }
1159
1160 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1161 {
1162 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1163 m_rbPerSe = 1;
1164 m_rbPerSeLog2 = 0;
1165 break;
1166 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1167 m_rbPerSe = 2;
1168 m_rbPerSeLog2 = 1;
1169 break;
1170 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1171 m_rbPerSe = 4;
1172 m_rbPerSeLog2 = 2;
1173 break;
1174 default:
1175 ADDR_ASSERT_ALWAYS();
1176 break;
1177 }
1178
1179 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1180 {
1181 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1182 m_maxCompFrag = 1;
1183 m_maxCompFragLog2 = 0;
1184 break;
1185 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1186 m_maxCompFrag = 2;
1187 m_maxCompFragLog2 = 1;
1188 break;
1189 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1190 m_maxCompFrag = 4;
1191 m_maxCompFragLog2 = 2;
1192 break;
1193 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1194 m_maxCompFrag = 8;
1195 m_maxCompFragLog2 = 3;
1196 break;
1197 default:
1198 ADDR_ASSERT_ALWAYS();
1199 break;
1200 }
1201
1202 if ((m_rbPerSeLog2 == 1) &&
1203 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1204 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1205 {
1206 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1207
1208 ADDR_ASSERT(m_settings.isRaven == FALSE);
1209
1210 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1211
1212 if (m_settings.isVega12)
1213 {
1214 m_settings.htileCacheRbConflict = 1;
1215 }
1216 }
1217
1218 // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1219 m_blockVarSizeLog2 = 0;
1220 }
1221 else
1222 {
1223 valid = FALSE;
1224 ADDR_NOT_IMPLEMENTED();
1225 }
1226
1227 if (valid)
1228 {
1229 InitEquationTable();
1230 }
1231
1232 return valid;
1233 }
1234
1235 /**
1236 ************************************************************************************************************************
1237 * Gfx9Lib::HwlConvertChipFamily
1238 *
1239 * @brief
1240 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1241 * @return
1242 * ChipFamily
1243 ************************************************************************************************************************
1244 */
HwlConvertChipFamily(UINT_32 uChipFamily,UINT_32 uChipRevision)1245 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1246 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1247 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1248 {
1249 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1250
1251 switch (uChipFamily)
1252 {
1253 case FAMILY_AI:
1254 m_settings.isArcticIsland = 1;
1255 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1256 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1257 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1258 m_settings.isDce12 = 1;
1259
1260 if (m_settings.isVega10 == 0)
1261 {
1262 m_settings.htileAlignFix = 1;
1263 m_settings.applyAliasFix = 1;
1264 }
1265
1266 m_settings.metaBaseAlignFix = 1;
1267
1268 m_settings.depthPipeXorDisable = 1;
1269 break;
1270 case FAMILY_RV:
1271 m_settings.isArcticIsland = 1;
1272
1273 if (ASICREV_IS_RAVEN(uChipRevision))
1274 {
1275 m_settings.isRaven = 1;
1276
1277 m_settings.depthPipeXorDisable = 1;
1278 }
1279
1280 if (ASICREV_IS_RAVEN2(uChipRevision))
1281 {
1282 m_settings.isRaven = 1;
1283 }
1284
1285 if (m_settings.isRaven == 0)
1286 {
1287 m_settings.htileAlignFix = 1;
1288 m_settings.applyAliasFix = 1;
1289 }
1290
1291 m_settings.isDcn1 = m_settings.isRaven;
1292
1293 if (ASICREV_IS_RENOIR(uChipRevision))
1294 {
1295 m_settings.isRaven = 1;
1296 m_settings.isDcn2 = 1;
1297 }
1298
1299 m_settings.metaBaseAlignFix = 1;
1300 break;
1301
1302 default:
1303 ADDR_ASSERT(!"No Chip found");
1304 break;
1305 }
1306
1307 return family;
1308 }
1309
1310 /**
1311 ************************************************************************************************************************
1312 * Gfx9Lib::InitRbEquation
1313 *
1314 * @brief
1315 * Init RB equation
1316 * @return
1317 * N/A
1318 ************************************************************************************************************************
1319 */
GetRbEquation(CoordEq * pRbEq,UINT_32 numRbPerSeLog2,UINT_32 numSeLog2) const1320 VOID Gfx9Lib::GetRbEquation(
1321 CoordEq* pRbEq, ///< [out] rb equation
1322 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1323 UINT_32 numSeLog2) ///< [in] number of shader engine
1324 const
1325 {
1326 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1327 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1328 Coordinate cx(DIM_X, rbRegion);
1329 Coordinate cy(DIM_Y, rbRegion);
1330
1331 UINT_32 start = 0;
1332 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1333
1334 // Clear the rb equation
1335 pRbEq->resize(0);
1336 pRbEq->resize(numRbTotalLog2);
1337
1338 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1339 {
1340 // Special case when more than 1 SE, and 2 RB per SE
1341 (*pRbEq)[0].add(cx);
1342 (*pRbEq)[0].add(cy);
1343 cx++;
1344 cy++;
1345
1346 if (m_settings.applyAliasFix == false)
1347 {
1348 (*pRbEq)[0].add(cy);
1349 }
1350
1351 (*pRbEq)[0].add(cy);
1352 start++;
1353 }
1354
1355 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1356
1357 for (UINT_32 i = 0; i < numBits; i++)
1358 {
1359 UINT_32 idx =
1360 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1361
1362 if ((i % 2) == 1)
1363 {
1364 (*pRbEq)[idx].add(cx);
1365 cx++;
1366 }
1367 else
1368 {
1369 (*pRbEq)[idx].add(cy);
1370 cy++;
1371 }
1372 }
1373 }
1374
1375 /**
1376 ************************************************************************************************************************
1377 * Gfx9Lib::GetDataEquation
1378 *
1379 * @brief
1380 * Get data equation for fmask and Z
1381 * @return
1382 * N/A
1383 ************************************************************************************************************************
1384 */
GetDataEquation(CoordEq * pDataEq,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2) const1385 VOID Gfx9Lib::GetDataEquation(
1386 CoordEq* pDataEq, ///< [out] data surface equation
1387 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1388 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1389 AddrResourceType resourceType, ///< [in] data surface resource type
1390 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1391 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1392 const
1393 {
1394 Coordinate cx(DIM_X, 0);
1395 Coordinate cy(DIM_Y, 0);
1396 Coordinate cz(DIM_Z, 0);
1397 Coordinate cs(DIM_S, 0);
1398
1399 // Clear the equation
1400 pDataEq->resize(0);
1401 pDataEq->resize(27);
1402
1403 if (dataSurfaceType == Gfx9DataColor)
1404 {
1405 if (IsLinear(swizzleMode))
1406 {
1407 Coordinate cm(DIM_M, 0);
1408
1409 pDataEq->resize(49);
1410
1411 for (UINT_32 i = 0; i < 49; i++)
1412 {
1413 (*pDataEq)[i].add(cm);
1414 cm++;
1415 }
1416 }
1417 else if (IsThick(resourceType, swizzleMode))
1418 {
1419 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1420 UINT_32 i;
1421 if (IsStandardSwizzle(resourceType, swizzleMode))
1422 {
1423 // Standard 3d swizzle
1424 // Fill in bottom x bits
1425 for (i = elementBytesLog2; i < 4; i++)
1426 {
1427 (*pDataEq)[i].add(cx);
1428 cx++;
1429 }
1430 // Fill in 2 bits of y and then z
1431 for (i = 4; i < 6; i++)
1432 {
1433 (*pDataEq)[i].add(cy);
1434 cy++;
1435 }
1436 for (i = 6; i < 8; i++)
1437 {
1438 (*pDataEq)[i].add(cz);
1439 cz++;
1440 }
1441 if (elementBytesLog2 < 2)
1442 {
1443 // fill in z & y bit
1444 (*pDataEq)[8].add(cz);
1445 (*pDataEq)[9].add(cy);
1446 cz++;
1447 cy++;
1448 }
1449 else if (elementBytesLog2 == 2)
1450 {
1451 // fill in y and x bit
1452 (*pDataEq)[8].add(cy);
1453 (*pDataEq)[9].add(cx);
1454 cy++;
1455 cx++;
1456 }
1457 else
1458 {
1459 // fill in 2 x bits
1460 (*pDataEq)[8].add(cx);
1461 cx++;
1462 (*pDataEq)[9].add(cx);
1463 cx++;
1464 }
1465 }
1466 else
1467 {
1468 // Z 3d swizzle
1469 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1470 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1471 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1472 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1473 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1474 {
1475 (*pDataEq)[i].add(cz);
1476 cz++;
1477 }
1478 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1479 {
1480 // add an x and z
1481 (*pDataEq)[6].add(cx);
1482 (*pDataEq)[7].add(cz);
1483 cx++;
1484 cz++;
1485 }
1486 else if (elementBytesLog2 == 2)
1487 {
1488 // add a y and z
1489 (*pDataEq)[6].add(cy);
1490 (*pDataEq)[7].add(cz);
1491 cy++;
1492 cz++;
1493 }
1494 // add y and x
1495 (*pDataEq)[8].add(cy);
1496 (*pDataEq)[9].add(cx);
1497 cy++;
1498 cx++;
1499 }
1500 // Fill in bit 10 and up
1501 pDataEq->mort3d( cz, cy, cx, 10 );
1502 }
1503 else if (IsThin(resourceType, swizzleMode))
1504 {
1505 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1506 // Color 2D
1507 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1508 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1509 UINT_32 i;
1510 // Fill in bottom x bits
1511 for (i = elementBytesLog2; i < 4; i++)
1512 {
1513 (*pDataEq)[i].add(cx);
1514 cx++;
1515 }
1516 // Fill in bottom y bits
1517 for (i = 4; i < 4 + microYBits; i++)
1518 {
1519 (*pDataEq)[i].add(cy);
1520 cy++;
1521 }
1522 // Fill in last of the micro_x bits
1523 for (i = 4 + microYBits; i < 8; i++)
1524 {
1525 (*pDataEq)[i].add(cx);
1526 cx++;
1527 }
1528 // Fill in x/y bits below sample split
1529 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1530 // Fill in sample bits
1531 for (i = 0; i < numSamplesLog2; i++)
1532 {
1533 cs.set(DIM_S, i);
1534 (*pDataEq)[tileSplitStart + i].add(cs);
1535 }
1536 // Fill in x/y bits above sample split
1537 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1538 {
1539 pDataEq->mort2d(cx, cy, blockSizeLog2);
1540 }
1541 else
1542 {
1543 pDataEq->mort2d(cy, cx, blockSizeLog2);
1544 }
1545 }
1546 else
1547 {
1548 ADDR_ASSERT_ALWAYS();
1549 }
1550 }
1551 else
1552 {
1553 // Fmask or depth
1554 UINT_32 sampleStart = elementBytesLog2;
1555 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1556 UINT_32 ymajStart = 6 + numSamplesLog2;
1557
1558 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1559 {
1560 cs.set(DIM_S, s);
1561 (*pDataEq)[sampleStart + s].add(cs);
1562 }
1563
1564 // Put in the x-major order pixel bits
1565 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1566 // Put in the y-major order pixel bits
1567 pDataEq->mort2d(cy, cx, ymajStart);
1568 }
1569 }
1570
1571 /**
1572 ************************************************************************************************************************
1573 * Gfx9Lib::GetPipeEquation
1574 *
1575 * @brief
1576 * Get pipe equation
1577 * @return
1578 * N/A
1579 ************************************************************************************************************************
1580 */
GetPipeEquation(CoordEq * pPipeEq,CoordEq * pDataEq,UINT_32 pipeInterleaveLog2,UINT_32 numPipeLog2,UINT_32 numSamplesLog2,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType) const1581 VOID Gfx9Lib::GetPipeEquation(
1582 CoordEq* pPipeEq, ///< [out] pipe equation
1583 CoordEq* pDataEq, ///< [in] data equation
1584 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1585 UINT_32 numPipeLog2, ///< [in] number of pipes
1586 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1587 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1588 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1589 AddrResourceType resourceType ///< [in] data surface resource type
1590 ) const
1591 {
1592 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1593 CoordEq dataEq;
1594
1595 pDataEq->copy(dataEq);
1596
1597 if (dataSurfaceType == Gfx9DataColor)
1598 {
1599 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1600 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1601 }
1602
1603 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1604
1605 // This section should only apply to z/stencil, maybe fmask
1606 // If the pipe bit is below the comp block size,
1607 // then keep moving up the address until we find a bit that is above
1608 UINT_32 pipeStart = 0;
1609
1610 if (dataSurfaceType != Gfx9DataColor)
1611 {
1612 Coordinate tileMin(DIM_X, 3);
1613
1614 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1615 {
1616 pipeStart++;
1617 }
1618
1619 // if pipe is 0, then the first pipe bit is above the comp block size,
1620 // so we don't need to do anything
1621 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1622 // we will get the same pipe equation
1623 if (pipeStart != 0)
1624 {
1625 for (UINT_32 i = 0; i < numPipeLog2; i++)
1626 {
1627 // Copy the jth bit above pipe interleave to the current pipe equation bit
1628 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1629 }
1630 }
1631 }
1632
1633 if (IsPrt(swizzleMode))
1634 {
1635 // Clear out bits above the block size if prt's are enabled
1636 dataEq.resize(blockSizeLog2);
1637 dataEq.resize(48);
1638 }
1639
1640 if (IsXor(swizzleMode))
1641 {
1642 CoordEq xorMask;
1643
1644 if (IsThick(resourceType, swizzleMode))
1645 {
1646 CoordEq xorMask2;
1647
1648 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1649
1650 xorMask.resize(numPipeLog2);
1651
1652 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1653 {
1654 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1655 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1656 }
1657 }
1658 else
1659 {
1660 // Xor in the bits above the pipe+gpu bits
1661 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1662
1663 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1664 {
1665 Coordinate co;
1666 CoordEq xorMask2;
1667 // if 1xaa and not prt, then xor in the z bits
1668 xorMask2.resize(0);
1669 xorMask2.resize(numPipeLog2);
1670 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1671 {
1672 co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1673 xorMask2[pipeIdx].add(co);
1674 }
1675
1676 pPipeEq->xorin(xorMask2);
1677 }
1678 }
1679
1680 xorMask.reverse();
1681 pPipeEq->xorin(xorMask);
1682 }
1683 }
1684 /**
1685 ************************************************************************************************************************
1686 * Gfx9Lib::GetMetaEquation
1687 *
1688 * @brief
1689 * Get meta equation for cmask/htile/DCC
1690 * @return
1691 * Pointer to a calculated meta equation
1692 ************************************************************************************************************************
1693 */
GetMetaEquation(const MetaEqParams & metaEqParams)1694 const CoordEq* Gfx9Lib::GetMetaEquation(
1695 const MetaEqParams& metaEqParams)
1696 {
1697 UINT_32 cachedMetaEqIndex;
1698
1699 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1700 {
1701 if (memcmp(&metaEqParams,
1702 &m_cachedMetaEqKey[cachedMetaEqIndex],
1703 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1704 {
1705 break;
1706 }
1707 }
1708
1709 CoordEq* pMetaEq = NULL;
1710
1711 if (cachedMetaEqIndex < MaxCachedMetaEq)
1712 {
1713 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1714 }
1715 else
1716 {
1717 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1718
1719 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1720
1721 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1722
1723 GenMetaEquation(pMetaEq,
1724 metaEqParams.maxMip,
1725 metaEqParams.elementBytesLog2,
1726 metaEqParams.numSamplesLog2,
1727 metaEqParams.metaFlag,
1728 metaEqParams.dataSurfaceType,
1729 metaEqParams.swizzleMode,
1730 metaEqParams.resourceType,
1731 metaEqParams.metaBlkWidthLog2,
1732 metaEqParams.metaBlkHeightLog2,
1733 metaEqParams.metaBlkDepthLog2,
1734 metaEqParams.compBlkWidthLog2,
1735 metaEqParams.compBlkHeightLog2,
1736 metaEqParams.compBlkDepthLog2);
1737 }
1738
1739 return pMetaEq;
1740 }
1741
1742 /**
1743 ************************************************************************************************************************
1744 * Gfx9Lib::GenMetaEquation
1745 *
1746 * @brief
1747 * Get meta equation for cmask/htile/DCC
1748 * @return
1749 * N/A
1750 ************************************************************************************************************************
1751 */
GenMetaEquation(CoordEq * pMetaEq,UINT_32 maxMip,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2,ADDR2_META_FLAGS metaFlag,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 metaBlkWidthLog2,UINT_32 metaBlkHeightLog2,UINT_32 metaBlkDepthLog2,UINT_32 compBlkWidthLog2,UINT_32 compBlkHeightLog2,UINT_32 compBlkDepthLog2) const1752 VOID Gfx9Lib::GenMetaEquation(
1753 CoordEq* pMetaEq, ///< [out] meta equation
1754 UINT_32 maxMip, ///< [in] max mip Id
1755 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1756 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1757 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1758 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1759 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1760 AddrResourceType resourceType, ///< [in] data surface resource type
1761 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1762 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1763 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1764 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1765 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1766 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1767 const
1768 {
1769 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1770 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1771
1772 // Get the correct data address and rb equation
1773 CoordEq dataEq;
1774 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1775 elementBytesLog2, numSamplesLog2);
1776
1777 // Get pipe and rb equations
1778 CoordEq pipeEquation;
1779 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1780 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1781 numPipeTotalLog2 = pipeEquation.getsize();
1782
1783 if (metaFlag.linear)
1784 {
1785 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1786 ADDR_ASSERT_ALWAYS();
1787
1788 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1789
1790 dataEq.copy(*pMetaEq);
1791
1792 if (IsLinear(swizzleMode))
1793 {
1794 if (metaFlag.pipeAligned)
1795 {
1796 // Remove the pipe bits
1797 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1798 pMetaEq->shift(-shift, pipeInterleaveLog2);
1799 }
1800 // Divide by comp block size, which for linear (which is always color) is 256 B
1801 pMetaEq->shift(-8);
1802
1803 if (metaFlag.pipeAligned)
1804 {
1805 // Put pipe bits back in
1806 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1807
1808 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1809 {
1810 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1811 }
1812 }
1813 }
1814
1815 pMetaEq->shift(1);
1816 }
1817 else
1818 {
1819 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1820 UINT_32 compFragLog2 =
1821 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1822 maxCompFragLog2 : numSamplesLog2;
1823
1824 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1825
1826 // Make sure the metaaddr is cleared
1827 pMetaEq->resize(0);
1828 pMetaEq->resize(27);
1829
1830 if (IsThick(resourceType, swizzleMode))
1831 {
1832 Coordinate cx(DIM_X, 0);
1833 Coordinate cy(DIM_Y, 0);
1834 Coordinate cz(DIM_Z, 0);
1835
1836 if (maxMip > 0)
1837 {
1838 pMetaEq->mort3d(cy, cx, cz);
1839 }
1840 else
1841 {
1842 pMetaEq->mort3d(cx, cy, cz);
1843 }
1844 }
1845 else
1846 {
1847 Coordinate cx(DIM_X, 0);
1848 Coordinate cy(DIM_Y, 0);
1849 Coordinate cs;
1850
1851 if (maxMip > 0)
1852 {
1853 pMetaEq->mort2d(cy, cx, compFragLog2);
1854 }
1855 else
1856 {
1857 pMetaEq->mort2d(cx, cy, compFragLog2);
1858 }
1859
1860 //------------------------------------------------------------------------------------------------------------------------
1861 // Put the compressible fragments at the lsb
1862 // the uncompressible frags will be at the msb of the micro address
1863 //------------------------------------------------------------------------------------------------------------------------
1864 for (UINT_32 s = 0; s < compFragLog2; s++)
1865 {
1866 cs.set(DIM_S, s);
1867 (*pMetaEq)[s].add(cs);
1868 }
1869 }
1870
1871 // Keep a copy of the pipe equations
1872 CoordEq origPipeEquation;
1873 pipeEquation.copy(origPipeEquation);
1874
1875 Coordinate co;
1876 // filter out everything under the compressed block size
1877 co.set(DIM_X, compBlkWidthLog2);
1878 pMetaEq->Filter('<', co, 0, DIM_X);
1879 co.set(DIM_Y, compBlkHeightLog2);
1880 pMetaEq->Filter('<', co, 0, DIM_Y);
1881 co.set(DIM_Z, compBlkDepthLog2);
1882 pMetaEq->Filter('<', co, 0, DIM_Z);
1883
1884 // For non-color, filter out sample bits
1885 if (dataSurfaceType != Gfx9DataColor)
1886 {
1887 co.set(DIM_X, 0);
1888 pMetaEq->Filter('<', co, 0, DIM_S);
1889 }
1890
1891 // filter out everything above the metablock size
1892 co.set(DIM_X, metaBlkWidthLog2 - 1);
1893 pMetaEq->Filter('>', co, 0, DIM_X);
1894 co.set(DIM_Y, metaBlkHeightLog2 - 1);
1895 pMetaEq->Filter('>', co, 0, DIM_Y);
1896 co.set(DIM_Z, metaBlkDepthLog2 - 1);
1897 pMetaEq->Filter('>', co, 0, DIM_Z);
1898
1899 // filter out everything above the metablock size for the channel bits
1900 co.set(DIM_X, metaBlkWidthLog2 - 1);
1901 pipeEquation.Filter('>', co, 0, DIM_X);
1902 co.set(DIM_Y, metaBlkHeightLog2 - 1);
1903 pipeEquation.Filter('>', co, 0, DIM_Y);
1904 co.set(DIM_Z, metaBlkDepthLog2 - 1);
1905 pipeEquation.Filter('>', co, 0, DIM_Z);
1906
1907 // Make sure we still have the same number of channel bits
1908 if (pipeEquation.getsize() != numPipeTotalLog2)
1909 {
1910 ADDR_ASSERT_ALWAYS();
1911 }
1912
1913 // Loop through all channel and rb bits,
1914 // and make sure these components exist in the metadata address
1915 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1916 {
1917 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1918 {
1919 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1920 {
1921 ADDR_ASSERT_ALWAYS();
1922 }
1923 }
1924 }
1925
1926 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1927 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1928 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1929 CoordEq origRbEquation;
1930
1931 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1932
1933 CoordEq rbEquation = origRbEquation;
1934
1935 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1936 {
1937 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1938 {
1939 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1940 {
1941 ADDR_ASSERT_ALWAYS();
1942 }
1943 }
1944 }
1945
1946 if (m_settings.applyAliasFix)
1947 {
1948 co.set(DIM_Z, -1);
1949 }
1950
1951 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1952 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1953 {
1954 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1955 {
1956 BOOL_32 isRbEquationInPipeEquation = FALSE;
1957
1958 if (m_settings.applyAliasFix)
1959 {
1960 CoordTerm filteredPipeEq;
1961 filteredPipeEq = pipeEquation[j];
1962
1963 filteredPipeEq.Filter('>', co, 0, DIM_Z);
1964
1965 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1966 }
1967 else
1968 {
1969 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1970 }
1971
1972 if (isRbEquationInPipeEquation)
1973 {
1974 rbEquation[i].Clear();
1975 }
1976 }
1977 }
1978
1979 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {0};
1980
1981 // Loop through each bit of the channel, get the smallest coordinate,
1982 // and remove it from the metaaddr, and rb_equation
1983 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1984 {
1985 pipeEquation[i].getsmallest(co);
1986
1987 UINT_32 old_size = pMetaEq->getsize();
1988 pMetaEq->Filter('=', co);
1989 UINT_32 new_size = pMetaEq->getsize();
1990 if (new_size != old_size-1)
1991 {
1992 ADDR_ASSERT_ALWAYS();
1993 }
1994 pipeEquation.remove(co);
1995 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1996 {
1997 if (rbEquation[j].remove(co))
1998 {
1999 // if we actually removed something from this bit, then add the remaining
2000 // channel bits, as these can be removed for this bit
2001 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2002 {
2003 if (pipeEquation[i][k] != co)
2004 {
2005 rbEquation[j].add(pipeEquation[i][k]);
2006 rbAppendedWithPipeBits[j] = true;
2007 }
2008 }
2009 }
2010 }
2011 }
2012
2013 // Loop through the rb bits and see what remain;
2014 // filter out the smallest coordinate if it remains
2015 UINT_32 rbBitsLeft = 0;
2016 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2017 {
2018 BOOL_32 isRbEqAppended = FALSE;
2019
2020 if (m_settings.applyAliasFix)
2021 {
2022 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2023 }
2024 else
2025 {
2026 isRbEqAppended = (rbEquation[i].getsize() > 0);
2027 }
2028
2029 if (isRbEqAppended)
2030 {
2031 rbBitsLeft++;
2032 rbEquation[i].getsmallest(co);
2033 UINT_32 old_size = pMetaEq->getsize();
2034 pMetaEq->Filter('=', co);
2035 UINT_32 new_size = pMetaEq->getsize();
2036 if (new_size != old_size - 1)
2037 {
2038 // assert warning
2039 }
2040 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2041 {
2042 if (rbEquation[j].remove(co))
2043 {
2044 // if we actually removed something from this bit, then add the remaining
2045 // rb bits, as these can be removed for this bit
2046 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2047 {
2048 if (rbEquation[i][k] != co)
2049 {
2050 rbEquation[j].add(rbEquation[i][k]);
2051 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2052 }
2053 }
2054 }
2055 }
2056 }
2057 }
2058
2059 // capture the size of the metaaddr
2060 UINT_32 metaSize = pMetaEq->getsize();
2061 // resize to 49 bits...make this a nibble address
2062 pMetaEq->resize(49);
2063 // Concatenate the macro address above the current address
2064 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2065 {
2066 co.set(DIM_M, j);
2067 (*pMetaEq)[i].add(co);
2068 }
2069
2070 // Multiply by meta element size (in nibbles)
2071 if (dataSurfaceType == Gfx9DataColor)
2072 {
2073 pMetaEq->shift(1);
2074 }
2075 else if (dataSurfaceType == Gfx9DataDepthStencil)
2076 {
2077 pMetaEq->shift(3);
2078 }
2079
2080 //------------------------------------------------------------------------------------------
2081 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2082 // Shift up from pipe interleave number of channel
2083 // and rb bits left, and uncompressed fragments
2084 //------------------------------------------------------------------------------------------
2085
2086 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2087
2088 // Put in the channel bits
2089 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2090 {
2091 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2092 }
2093
2094 // Put in remaining rb bits
2095 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2096 {
2097 BOOL_32 isRbEqAppended = FALSE;
2098
2099 if (m_settings.applyAliasFix)
2100 {
2101 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2102 }
2103 else
2104 {
2105 isRbEqAppended = (rbEquation[i].getsize() > 0);
2106 }
2107
2108 if (isRbEqAppended)
2109 {
2110 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2111 // Mark any rb bit we add in to the rb mask
2112 j++;
2113 }
2114 }
2115
2116 //------------------------------------------------------------------------------------------
2117 // Put in the uncompressed fragment bits
2118 //------------------------------------------------------------------------------------------
2119 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2120 {
2121 co.set(DIM_S, compFragLog2 + i);
2122 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2123 }
2124 }
2125 }
2126
2127 /**
2128 ************************************************************************************************************************
2129 * Gfx9Lib::IsEquationSupported
2130 *
2131 * @brief
2132 * Check if equation is supported for given swizzle mode and resource type.
2133 *
2134 * @return
2135 * TRUE if supported
2136 ************************************************************************************************************************
2137 */
IsEquationSupported(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2) const2138 BOOL_32 Gfx9Lib::IsEquationSupported(
2139 AddrResourceType rsrcType,
2140 AddrSwizzleMode swMode,
2141 UINT_32 elementBytesLog2) const
2142 {
2143 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2144 (IsValidSwMode(swMode) == TRUE) &&
2145 (IsLinear(swMode) == FALSE) &&
2146 (((IsTex2d(rsrcType) == TRUE) &&
2147 ((elementBytesLog2 < 4) ||
2148 ((IsRotateSwizzle(swMode) == FALSE) &&
2149 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2150 ((IsTex3d(rsrcType) == TRUE) &&
2151 (IsRotateSwizzle(swMode) == FALSE) &&
2152 (IsBlock256b(swMode) == FALSE)));
2153
2154 return supported;
2155 }
2156
2157 /**
2158 ************************************************************************************************************************
2159 * Gfx9Lib::InitEquationTable
2160 *
2161 * @brief
2162 * Initialize Equation table.
2163 *
2164 * @return
2165 * N/A
2166 ************************************************************************************************************************
2167 */
InitEquationTable()2168 VOID Gfx9Lib::InitEquationTable()
2169 {
2170 memset(m_equationTable, 0, sizeof(m_equationTable));
2171
2172 // Loop all possible resource type (2D/3D)
2173 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2174 {
2175 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2176
2177 // Loop all possible swizzle mode
2178 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2179 {
2180 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2181
2182 // Loop all possible bpp
2183 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2184 {
2185 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2186
2187 // Check if the input is supported
2188 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2189 {
2190 ADDR_EQUATION equation;
2191 ADDR_E_RETURNCODE retCode;
2192
2193 memset(&equation, 0, sizeof(ADDR_EQUATION));
2194
2195 // Generate the equation
2196 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2197 {
2198 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2199 }
2200 else if (IsThin(rsrcType, swMode))
2201 {
2202 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2203 }
2204 else
2205 {
2206 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2207 }
2208
2209 // Only fill the equation into the table if the return code is ADDR_OK,
2210 // otherwise if the return code is not ADDR_OK, it indicates this is not
2211 // a valid input, we do nothing but just fill invalid equation index
2212 // into the lookup table.
2213 if (retCode == ADDR_OK)
2214 {
2215 equationIndex = m_numEquations;
2216 ADDR_ASSERT(equationIndex < EquationTableSize);
2217
2218 m_equationTable[equationIndex] = equation;
2219
2220 m_numEquations++;
2221 }
2222 else
2223 {
2224 ADDR_ASSERT_ALWAYS();
2225 }
2226 }
2227
2228 // Fill the index into the lookup table, if the combination is not supported
2229 // fill the invalid equation index
2230 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2231 }
2232 }
2233 }
2234 }
2235
2236 /**
2237 ************************************************************************************************************************
2238 * Gfx9Lib::HwlGetEquationIndex
2239 *
2240 * @brief
2241 * Interface function stub of GetEquationIndex
2242 *
2243 * @return
2244 * ADDR_E_RETURNCODE
2245 ************************************************************************************************************************
2246 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2247 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2248 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2249 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2250 ) const
2251 {
2252 AddrResourceType rsrcType = pIn->resourceType;
2253 AddrSwizzleMode swMode = pIn->swizzleMode;
2254 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2255 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2256
2257 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2258 {
2259 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2260 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2261
2262 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2263 }
2264
2265 if (pOut->pMipInfo != NULL)
2266 {
2267 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2268 {
2269 pOut->pMipInfo[i].equationIndex = index;
2270 }
2271 }
2272
2273 return index;
2274 }
2275
2276 /**
2277 ************************************************************************************************************************
2278 * Gfx9Lib::HwlComputeBlock256Equation
2279 *
2280 * @brief
2281 * Interface function stub of ComputeBlock256Equation
2282 *
2283 * @return
2284 * ADDR_E_RETURNCODE
2285 ************************************************************************************************************************
2286 */
HwlComputeBlock256Equation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2287 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2288 AddrResourceType rsrcType,
2289 AddrSwizzleMode swMode,
2290 UINT_32 elementBytesLog2,
2291 ADDR_EQUATION* pEquation) const
2292 {
2293 ADDR_E_RETURNCODE ret = ADDR_OK;
2294
2295 pEquation->numBits = 8;
2296
2297 UINT_32 i = 0;
2298 for (; i < elementBytesLog2; i++)
2299 {
2300 InitChannel(1, 0 , i, &pEquation->addr[i]);
2301 }
2302
2303 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2304
2305 const UINT_32 maxBitsUsed = 4;
2306 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {0};
2307 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {0};
2308
2309 for (i = 0; i < maxBitsUsed; i++)
2310 {
2311 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2312 InitChannel(1, 1, i, &y[i]);
2313 }
2314
2315 if (IsStandardSwizzle(rsrcType, swMode))
2316 {
2317 switch (elementBytesLog2)
2318 {
2319 case 0:
2320 pixelBit[0] = x[0];
2321 pixelBit[1] = x[1];
2322 pixelBit[2] = x[2];
2323 pixelBit[3] = x[3];
2324 pixelBit[4] = y[0];
2325 pixelBit[5] = y[1];
2326 pixelBit[6] = y[2];
2327 pixelBit[7] = y[3];
2328 break;
2329 case 1:
2330 pixelBit[0] = x[0];
2331 pixelBit[1] = x[1];
2332 pixelBit[2] = x[2];
2333 pixelBit[3] = y[0];
2334 pixelBit[4] = y[1];
2335 pixelBit[5] = y[2];
2336 pixelBit[6] = x[3];
2337 break;
2338 case 2:
2339 pixelBit[0] = x[0];
2340 pixelBit[1] = x[1];
2341 pixelBit[2] = y[0];
2342 pixelBit[3] = y[1];
2343 pixelBit[4] = y[2];
2344 pixelBit[5] = x[2];
2345 break;
2346 case 3:
2347 pixelBit[0] = x[0];
2348 pixelBit[1] = y[0];
2349 pixelBit[2] = y[1];
2350 pixelBit[3] = x[1];
2351 pixelBit[4] = x[2];
2352 break;
2353 case 4:
2354 pixelBit[0] = y[0];
2355 pixelBit[1] = y[1];
2356 pixelBit[2] = x[0];
2357 pixelBit[3] = x[1];
2358 break;
2359 default:
2360 ADDR_ASSERT_ALWAYS();
2361 ret = ADDR_INVALIDPARAMS;
2362 break;
2363 }
2364 }
2365 else if (IsDisplaySwizzle(rsrcType, swMode))
2366 {
2367 switch (elementBytesLog2)
2368 {
2369 case 0:
2370 pixelBit[0] = x[0];
2371 pixelBit[1] = x[1];
2372 pixelBit[2] = x[2];
2373 pixelBit[3] = y[1];
2374 pixelBit[4] = y[0];
2375 pixelBit[5] = y[2];
2376 pixelBit[6] = x[3];
2377 pixelBit[7] = y[3];
2378 break;
2379 case 1:
2380 pixelBit[0] = x[0];
2381 pixelBit[1] = x[1];
2382 pixelBit[2] = x[2];
2383 pixelBit[3] = y[0];
2384 pixelBit[4] = y[1];
2385 pixelBit[5] = y[2];
2386 pixelBit[6] = x[3];
2387 break;
2388 case 2:
2389 pixelBit[0] = x[0];
2390 pixelBit[1] = x[1];
2391 pixelBit[2] = y[0];
2392 pixelBit[3] = x[2];
2393 pixelBit[4] = y[1];
2394 pixelBit[5] = y[2];
2395 break;
2396 case 3:
2397 pixelBit[0] = x[0];
2398 pixelBit[1] = y[0];
2399 pixelBit[2] = x[1];
2400 pixelBit[3] = x[2];
2401 pixelBit[4] = y[1];
2402 break;
2403 case 4:
2404 pixelBit[0] = x[0];
2405 pixelBit[1] = y[0];
2406 pixelBit[2] = x[1];
2407 pixelBit[3] = y[1];
2408 break;
2409 default:
2410 ADDR_ASSERT_ALWAYS();
2411 ret = ADDR_INVALIDPARAMS;
2412 break;
2413 }
2414 }
2415 else if (IsRotateSwizzle(swMode))
2416 {
2417 switch (elementBytesLog2)
2418 {
2419 case 0:
2420 pixelBit[0] = y[0];
2421 pixelBit[1] = y[1];
2422 pixelBit[2] = y[2];
2423 pixelBit[3] = x[1];
2424 pixelBit[4] = x[0];
2425 pixelBit[5] = x[2];
2426 pixelBit[6] = x[3];
2427 pixelBit[7] = y[3];
2428 break;
2429 case 1:
2430 pixelBit[0] = y[0];
2431 pixelBit[1] = y[1];
2432 pixelBit[2] = y[2];
2433 pixelBit[3] = x[0];
2434 pixelBit[4] = x[1];
2435 pixelBit[5] = x[2];
2436 pixelBit[6] = x[3];
2437 break;
2438 case 2:
2439 pixelBit[0] = y[0];
2440 pixelBit[1] = y[1];
2441 pixelBit[2] = x[0];
2442 pixelBit[3] = y[2];
2443 pixelBit[4] = x[1];
2444 pixelBit[5] = x[2];
2445 break;
2446 case 3:
2447 pixelBit[0] = y[0];
2448 pixelBit[1] = x[0];
2449 pixelBit[2] = y[1];
2450 pixelBit[3] = x[1];
2451 pixelBit[4] = x[2];
2452 break;
2453 default:
2454 ADDR_ASSERT_ALWAYS();
2455 case 4:
2456 ret = ADDR_INVALIDPARAMS;
2457 break;
2458 }
2459 }
2460 else
2461 {
2462 ADDR_ASSERT_ALWAYS();
2463 ret = ADDR_INVALIDPARAMS;
2464 }
2465
2466 // Post validation
2467 if (ret == ADDR_OK)
2468 {
2469 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2470 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2471 (microBlockDim.w * (1 << elementBytesLog2)));
2472 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2473 }
2474
2475 return ret;
2476 }
2477
2478 /**
2479 ************************************************************************************************************************
2480 * Gfx9Lib::HwlComputeThinEquation
2481 *
2482 * @brief
2483 * Interface function stub of ComputeThinEquation
2484 *
2485 * @return
2486 * ADDR_E_RETURNCODE
2487 ************************************************************************************************************************
2488 */
HwlComputeThinEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2489 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2490 AddrResourceType rsrcType,
2491 AddrSwizzleMode swMode,
2492 UINT_32 elementBytesLog2,
2493 ADDR_EQUATION* pEquation) const
2494 {
2495 ADDR_E_RETURNCODE ret = ADDR_OK;
2496
2497 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2498
2499 UINT_32 maxXorBits = blockSizeLog2;
2500 if (IsNonPrtXor(swMode))
2501 {
2502 // For non-prt-xor, maybe need to initialize some more bits for xor
2503 // The highest xor bit used in equation will be max the following 3 items:
2504 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2505 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2506 // 3. blockSizeLog2
2507
2508 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2509 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2510 GetPipeXorBits(blockSizeLog2) +
2511 2 * GetBankXorBits(blockSizeLog2));
2512 }
2513
2514 const UINT_32 maxBitsUsed = 14;
2515 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2516 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {0};
2517 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {0};
2518
2519 const UINT_32 extraXorBits = 16;
2520 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2521 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {0};
2522
2523 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2524 {
2525 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2526 InitChannel(1, 1, i, &y[i]);
2527 }
2528
2529 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2530
2531 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2532 {
2533 InitChannel(1, 0 , i, &pixelBit[i]);
2534 }
2535
2536 UINT_32 xIdx = 0;
2537 UINT_32 yIdx = 0;
2538 UINT_32 lowBits = 0;
2539
2540 if (IsZOrderSwizzle(swMode))
2541 {
2542 if (elementBytesLog2 <= 3)
2543 {
2544 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2545 {
2546 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2547 }
2548
2549 lowBits = 6;
2550 }
2551 else
2552 {
2553 ret = ADDR_INVALIDPARAMS;
2554 }
2555 }
2556 else
2557 {
2558 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2559
2560 if (ret == ADDR_OK)
2561 {
2562 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2563 xIdx = Log2(microBlockDim.w);
2564 yIdx = Log2(microBlockDim.h);
2565 lowBits = 8;
2566 }
2567 }
2568
2569 if (ret == ADDR_OK)
2570 {
2571 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2572 {
2573 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2574 }
2575
2576 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2577 {
2578 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2579 }
2580
2581 if (IsXor(swMode))
2582 {
2583 // Fill XOR bits
2584 UINT_32 pipeStart = m_pipeInterleaveLog2;
2585 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2586
2587 UINT_32 bankStart = pipeStart + pipeXorBits;
2588 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2589
2590 for (UINT_32 i = 0; i < pipeXorBits; i++)
2591 {
2592 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2593 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2594 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2595
2596 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2597 }
2598
2599 for (UINT_32 i = 0; i < bankXorBits; i++)
2600 {
2601 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2602 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2603 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2604
2605 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2606 }
2607
2608 if (IsPrt(swMode) == FALSE)
2609 {
2610 for (UINT_32 i = 0; i < pipeXorBits; i++)
2611 {
2612 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2613 }
2614
2615 for (UINT_32 i = 0; i < bankXorBits; i++)
2616 {
2617 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2618 }
2619 }
2620 }
2621
2622 pEquation->numBits = blockSizeLog2;
2623 }
2624
2625 return ret;
2626 }
2627
2628 /**
2629 ************************************************************************************************************************
2630 * Gfx9Lib::HwlComputeThickEquation
2631 *
2632 * @brief
2633 * Interface function stub of ComputeThickEquation
2634 *
2635 * @return
2636 * ADDR_E_RETURNCODE
2637 ************************************************************************************************************************
2638 */
HwlComputeThickEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2639 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2640 AddrResourceType rsrcType,
2641 AddrSwizzleMode swMode,
2642 UINT_32 elementBytesLog2,
2643 ADDR_EQUATION* pEquation) const
2644 {
2645 ADDR_E_RETURNCODE ret = ADDR_OK;
2646
2647 ADDR_ASSERT(IsTex3d(rsrcType));
2648
2649 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2650
2651 UINT_32 maxXorBits = blockSizeLog2;
2652 if (IsNonPrtXor(swMode))
2653 {
2654 // For non-prt-xor, maybe need to initialize some more bits for xor
2655 // The highest xor bit used in equation will be max the following 3:
2656 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2657 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2658 // 3. blockSizeLog2
2659
2660 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2661 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2662 GetPipeXorBits(blockSizeLog2) +
2663 3 * GetBankXorBits(blockSizeLog2));
2664 }
2665
2666 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2667 {
2668 InitChannel(1, 0 , i, &pEquation->addr[i]);
2669 }
2670
2671 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2672
2673 const UINT_32 maxBitsUsed = 12;
2674 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2675 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {0};
2676 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {0};
2677 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {0};
2678
2679 const UINT_32 extraXorBits = 24;
2680 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2681 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {0};
2682
2683 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2684 {
2685 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2686 InitChannel(1, 1, i, &y[i]);
2687 InitChannel(1, 2, i, &z[i]);
2688 }
2689
2690 if (IsZOrderSwizzle(swMode))
2691 {
2692 switch (elementBytesLog2)
2693 {
2694 case 0:
2695 pixelBit[0] = x[0];
2696 pixelBit[1] = y[0];
2697 pixelBit[2] = x[1];
2698 pixelBit[3] = y[1];
2699 pixelBit[4] = z[0];
2700 pixelBit[5] = z[1];
2701 pixelBit[6] = x[2];
2702 pixelBit[7] = z[2];
2703 pixelBit[8] = y[2];
2704 pixelBit[9] = x[3];
2705 break;
2706 case 1:
2707 pixelBit[0] = x[0];
2708 pixelBit[1] = y[0];
2709 pixelBit[2] = x[1];
2710 pixelBit[3] = y[1];
2711 pixelBit[4] = z[0];
2712 pixelBit[5] = z[1];
2713 pixelBit[6] = z[2];
2714 pixelBit[7] = y[2];
2715 pixelBit[8] = x[2];
2716 break;
2717 case 2:
2718 pixelBit[0] = x[0];
2719 pixelBit[1] = y[0];
2720 pixelBit[2] = x[1];
2721 pixelBit[3] = z[0];
2722 pixelBit[4] = y[1];
2723 pixelBit[5] = z[1];
2724 pixelBit[6] = y[2];
2725 pixelBit[7] = x[2];
2726 break;
2727 case 3:
2728 pixelBit[0] = x[0];
2729 pixelBit[1] = y[0];
2730 pixelBit[2] = z[0];
2731 pixelBit[3] = x[1];
2732 pixelBit[4] = z[1];
2733 pixelBit[5] = y[1];
2734 pixelBit[6] = x[2];
2735 break;
2736 case 4:
2737 pixelBit[0] = x[0];
2738 pixelBit[1] = y[0];
2739 pixelBit[2] = z[0];
2740 pixelBit[3] = z[1];
2741 pixelBit[4] = y[1];
2742 pixelBit[5] = x[1];
2743 break;
2744 default:
2745 ADDR_ASSERT_ALWAYS();
2746 ret = ADDR_INVALIDPARAMS;
2747 break;
2748 }
2749 }
2750 else if (IsStandardSwizzle(rsrcType, swMode))
2751 {
2752 switch (elementBytesLog2)
2753 {
2754 case 0:
2755 pixelBit[0] = x[0];
2756 pixelBit[1] = x[1];
2757 pixelBit[2] = x[2];
2758 pixelBit[3] = x[3];
2759 pixelBit[4] = y[0];
2760 pixelBit[5] = y[1];
2761 pixelBit[6] = z[0];
2762 pixelBit[7] = z[1];
2763 pixelBit[8] = z[2];
2764 pixelBit[9] = y[2];
2765 break;
2766 case 1:
2767 pixelBit[0] = x[0];
2768 pixelBit[1] = x[1];
2769 pixelBit[2] = x[2];
2770 pixelBit[3] = y[0];
2771 pixelBit[4] = y[1];
2772 pixelBit[5] = z[0];
2773 pixelBit[6] = z[1];
2774 pixelBit[7] = z[2];
2775 pixelBit[8] = y[2];
2776 break;
2777 case 2:
2778 pixelBit[0] = x[0];
2779 pixelBit[1] = x[1];
2780 pixelBit[2] = y[0];
2781 pixelBit[3] = y[1];
2782 pixelBit[4] = z[0];
2783 pixelBit[5] = z[1];
2784 pixelBit[6] = y[2];
2785 pixelBit[7] = x[2];
2786 break;
2787 case 3:
2788 pixelBit[0] = x[0];
2789 pixelBit[1] = y[0];
2790 pixelBit[2] = y[1];
2791 pixelBit[3] = z[0];
2792 pixelBit[4] = z[1];
2793 pixelBit[5] = x[1];
2794 pixelBit[6] = x[2];
2795 break;
2796 case 4:
2797 pixelBit[0] = y[0];
2798 pixelBit[1] = y[1];
2799 pixelBit[2] = z[0];
2800 pixelBit[3] = z[1];
2801 pixelBit[4] = x[0];
2802 pixelBit[5] = x[1];
2803 break;
2804 default:
2805 ADDR_ASSERT_ALWAYS();
2806 ret = ADDR_INVALIDPARAMS;
2807 break;
2808 }
2809 }
2810 else
2811 {
2812 ADDR_ASSERT_ALWAYS();
2813 ret = ADDR_INVALIDPARAMS;
2814 }
2815
2816 if (ret == ADDR_OK)
2817 {
2818 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2819 UINT_32 xIdx = Log2(microBlockDim.w);
2820 UINT_32 yIdx = Log2(microBlockDim.h);
2821 UINT_32 zIdx = Log2(microBlockDim.d);
2822
2823 pixelBit = pEquation->addr;
2824
2825 const UINT_32 lowBits = 10;
2826 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2827 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2828
2829 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2830 {
2831 if ((i % 3) == 0)
2832 {
2833 pixelBit[i] = x[xIdx++];
2834 }
2835 else if ((i % 3) == 1)
2836 {
2837 pixelBit[i] = z[zIdx++];
2838 }
2839 else
2840 {
2841 pixelBit[i] = y[yIdx++];
2842 }
2843 }
2844
2845 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2846 {
2847 if ((i % 3) == 0)
2848 {
2849 xorExtra[i - blockSizeLog2] = x[xIdx++];
2850 }
2851 else if ((i % 3) == 1)
2852 {
2853 xorExtra[i - blockSizeLog2] = z[zIdx++];
2854 }
2855 else
2856 {
2857 xorExtra[i - blockSizeLog2] = y[yIdx++];
2858 }
2859 }
2860
2861 if (IsXor(swMode))
2862 {
2863 // Fill XOR bits
2864 UINT_32 pipeStart = m_pipeInterleaveLog2;
2865 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2866 for (UINT_32 i = 0; i < pipeXorBits; i++)
2867 {
2868 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2869 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2870 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2871
2872 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2873
2874 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2875 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2876 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2877
2878 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2879 }
2880
2881 UINT_32 bankStart = pipeStart + pipeXorBits;
2882 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2883 for (UINT_32 i = 0; i < bankXorBits; i++)
2884 {
2885 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2886 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2887 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2888
2889 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2890
2891 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2892 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2893 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2894
2895 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2896 }
2897 }
2898
2899 pEquation->numBits = blockSizeLog2;
2900 }
2901
2902 return ret;
2903 }
2904
2905 /**
2906 ************************************************************************************************************************
2907 * Gfx9Lib::IsValidDisplaySwizzleMode
2908 *
2909 * @brief
2910 * Check if a swizzle mode is supported by display engine
2911 *
2912 * @return
2913 * TRUE is swizzle mode is supported by display engine
2914 ************************************************************************************************************************
2915 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2916 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2917 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2918 {
2919 BOOL_32 support = FALSE;
2920
2921 const UINT_32 swizzleMask = 1 << pIn->swizzleMode;
2922
2923 if (m_settings.isDce12)
2924 {
2925 if (pIn->bpp == 32)
2926 {
2927 support = (Dce12Bpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
2928 }
2929 else if (pIn->bpp <= 64)
2930 {
2931 support = (Dce12NonBpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
2932 }
2933 }
2934 else if (m_settings.isDcn1)
2935 {
2936 if (pIn->bpp < 64)
2937 {
2938 support = (Dcn1NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
2939 }
2940 else if (pIn->bpp == 64)
2941 {
2942 support = (Dcn1Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
2943 }
2944 }
2945 else if (m_settings.isDcn2)
2946 {
2947 if (pIn->bpp < 64)
2948 {
2949 support = (Dcn2NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
2950 }
2951 else if (pIn->bpp == 64)
2952 {
2953 support = (Dcn2Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
2954 }
2955 }
2956 else
2957 {
2958 ADDR_NOT_IMPLEMENTED();
2959 }
2960
2961 return support;
2962 }
2963
2964 /**
2965 ************************************************************************************************************************
2966 * Gfx9Lib::HwlComputePipeBankXor
2967 *
2968 * @brief
2969 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2970 *
2971 * @return
2972 * PipeBankXor value
2973 ************************************************************************************************************************
2974 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2975 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
2976 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
2977 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
2978 {
2979 if (IsXor(pIn->swizzleMode))
2980 {
2981 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2982 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
2983 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
2984
2985 UINT_32 pipeXor = 0;
2986 UINT_32 bankXor = 0;
2987
2988 const UINT_32 bankMask = (1 << bankBits) - 1;
2989 const UINT_32 index = pIn->surfIndex & bankMask;
2990
2991 const UINT_32 bpp = pIn->flags.fmask ?
2992 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
2993 if (bankBits == 4)
2994 {
2995 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
2996 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
2997
2998 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
2999 }
3000 else if (bankBits > 0)
3001 {
3002 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3003 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3004 bankXor = (index * bankIncrease) & bankMask;
3005 }
3006
3007 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3008 }
3009 else
3010 {
3011 pOut->pipeBankXor = 0;
3012 }
3013
3014 return ADDR_OK;
3015 }
3016
3017 /**
3018 ************************************************************************************************************************
3019 * Gfx9Lib::HwlComputeSlicePipeBankXor
3020 *
3021 * @brief
3022 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3023 *
3024 * @return
3025 * PipeBankXor value
3026 ************************************************************************************************************************
3027 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const3028 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3029 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3030 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3031 {
3032 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3033 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3034 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3035
3036 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3037 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3038
3039 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3040
3041 return ADDR_OK;
3042 }
3043
3044 /**
3045 ************************************************************************************************************************
3046 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3047 *
3048 * @brief
3049 * Compute sub resource offset to support swizzle pattern
3050 *
3051 * @return
3052 * Offset
3053 ************************************************************************************************************************
3054 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const3055 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3056 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3057 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3058 {
3059 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3060
3061 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3062 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3063 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3064 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3065 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3066 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3067
3068 pOut->offset = pIn->slice * pIn->sliceSize +
3069 pIn->macroBlockOffset +
3070 (pIn->mipTailOffset ^ pipeBankXor) -
3071 static_cast<UINT_64>(pipeBankXor);
3072 return ADDR_OK;
3073 }
3074
3075 /**
3076 ************************************************************************************************************************
3077 * Gfx9Lib::ValidateNonSwModeParams
3078 *
3079 * @brief
3080 * Validate compute surface info params except swizzle mode
3081 *
3082 * @return
3083 * TRUE if parameters are valid, FALSE otherwise
3084 ************************************************************************************************************************
3085 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3086 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3087 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3088 {
3089 BOOL_32 valid = TRUE;
3090
3091 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3092 {
3093 ADDR_ASSERT_ALWAYS();
3094 valid = FALSE;
3095 }
3096
3097 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3098 {
3099 ADDR_ASSERT_ALWAYS();
3100 valid = FALSE;
3101 }
3102
3103 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3104 const BOOL_32 msaa = (pIn->numFrags > 1);
3105 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3106
3107 const AddrResourceType rsrcType = pIn->resourceType;
3108 const BOOL_32 tex3d = IsTex3d(rsrcType);
3109 const BOOL_32 tex2d = IsTex2d(rsrcType);
3110 const BOOL_32 tex1d = IsTex1d(rsrcType);
3111
3112 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3113 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3114 const BOOL_32 display = flags.display || flags.rotated;
3115 const BOOL_32 stereo = flags.qbStereo;
3116 const BOOL_32 fmask = flags.fmask;
3117
3118 // Resource type check
3119 if (tex1d)
3120 {
3121 if (msaa || zbuffer || display || stereo || isBc || fmask)
3122 {
3123 ADDR_ASSERT_ALWAYS();
3124 valid = FALSE;
3125 }
3126 }
3127 else if (tex2d)
3128 {
3129 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3130 {
3131 ADDR_ASSERT_ALWAYS();
3132 valid = FALSE;
3133 }
3134 }
3135 else if (tex3d)
3136 {
3137 if (msaa || zbuffer || display || stereo || fmask)
3138 {
3139 ADDR_ASSERT_ALWAYS();
3140 valid = FALSE;
3141 }
3142 }
3143 else
3144 {
3145 ADDR_ASSERT_ALWAYS();
3146 valid = FALSE;
3147 }
3148
3149 return valid;
3150 }
3151
3152 /**
3153 ************************************************************************************************************************
3154 * Gfx9Lib::ValidateSwModeParams
3155 *
3156 * @brief
3157 * Validate compute surface info related to swizzle mode
3158 *
3159 * @return
3160 * TRUE if parameters are valid, FALSE otherwise
3161 ************************************************************************************************************************
3162 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3163 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3164 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3165 {
3166 BOOL_32 valid = TRUE;
3167
3168 if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3169 {
3170 ADDR_ASSERT_ALWAYS();
3171 valid = FALSE;
3172 }
3173
3174 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3175 const BOOL_32 msaa = (pIn->numFrags > 1);
3176 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3177 const BOOL_32 is422 = ElemLib::IsMacroPixelPacked(pIn->format);
3178
3179 const AddrResourceType rsrcType = pIn->resourceType;
3180 const BOOL_32 tex3d = IsTex3d(rsrcType);
3181 const BOOL_32 tex2d = IsTex2d(rsrcType);
3182 const BOOL_32 tex1d = IsTex1d(rsrcType);
3183
3184 const AddrSwizzleMode swizzle = pIn->swizzleMode;
3185 const BOOL_32 linear = IsLinear(swizzle);
3186 const BOOL_32 blk256B = IsBlock256b(swizzle);
3187 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3188
3189 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3190 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3191 const BOOL_32 color = flags.color;
3192 const BOOL_32 texture = flags.texture;
3193 const BOOL_32 display = flags.display || flags.rotated;
3194 const BOOL_32 prt = flags.prt;
3195 const BOOL_32 fmask = flags.fmask;
3196
3197 const BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3198 const BOOL_32 zMaxMip = tex3d && mipmap &&
3199 (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3200
3201 // Misc check
3202 if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3203 {
3204 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3205 ADDR_ASSERT_ALWAYS();
3206 valid = FALSE;
3207 }
3208
3209 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3210 {
3211 ADDR_ASSERT_ALWAYS();
3212 valid = FALSE;
3213 }
3214
3215 if ((pIn->bpp == 96) && (linear == FALSE))
3216 {
3217 ADDR_ASSERT_ALWAYS();
3218 valid = FALSE;
3219 }
3220
3221 if (prt && isNonPrtXor)
3222 {
3223 ADDR_ASSERT_ALWAYS();
3224 valid = FALSE;
3225 }
3226
3227 // Resource type check
3228 if (tex1d)
3229 {
3230 if (linear == FALSE)
3231 {
3232 ADDR_ASSERT_ALWAYS();
3233 valid = FALSE;
3234 }
3235 }
3236
3237 // Swizzle type check
3238 if (linear)
3239 {
3240 if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3241 ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3242 {
3243 ADDR_ASSERT_ALWAYS();
3244 valid = FALSE;
3245 }
3246 }
3247 else if (IsZOrderSwizzle(swizzle))
3248 {
3249 if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3250 {
3251 ADDR_ASSERT_ALWAYS();
3252 valid = FALSE;
3253 }
3254 }
3255 else if (IsStandardSwizzle(swizzle))
3256 {
3257 if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3258 {
3259 ADDR_ASSERT_ALWAYS();
3260 valid = FALSE;
3261 }
3262 }
3263 else if (IsDisplaySwizzle(swizzle))
3264 {
3265 if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3266 {
3267 ADDR_ASSERT_ALWAYS();
3268 valid = FALSE;
3269 }
3270 }
3271 else if (IsRotateSwizzle(swizzle))
3272 {
3273 if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3274 {
3275 ADDR_ASSERT_ALWAYS();
3276 valid = FALSE;
3277 }
3278 }
3279 else
3280 {
3281 ADDR_ASSERT_ALWAYS();
3282 valid = FALSE;
3283 }
3284
3285 // Block type check
3286 if (blk256B)
3287 {
3288 if (prt || zbuffer || tex3d || mipmap || msaa)
3289 {
3290 ADDR_ASSERT_ALWAYS();
3291 valid = FALSE;
3292 }
3293 }
3294
3295 return valid;
3296 }
3297
3298 /**
3299 ************************************************************************************************************************
3300 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3301 *
3302 * @brief
3303 * Compute surface info sanity check
3304 *
3305 * @return
3306 * ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3307 ************************************************************************************************************************
3308 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3309 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3310 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3311 {
3312 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3313 }
3314
3315 /**
3316 ************************************************************************************************************************
3317 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3318 *
3319 * @brief
3320 * Internal function to get suggested surface information for cliet to use
3321 *
3322 * @return
3323 * ADDR_E_RETURNCODE
3324 ************************************************************************************************************************
3325 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3326 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3327 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3328 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3329 {
3330 ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3331 ElemLib* pElemLib = GetElemLib();
3332
3333 UINT_32 bpp = pIn->bpp;
3334 UINT_32 width = Max(pIn->width, 1u);
3335 UINT_32 height = Max(pIn->height, 1u);
3336 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3337 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3338
3339 if (pIn->flags.fmask)
3340 {
3341 bpp = GetFmaskBpp(numSamples, numFrags);
3342 numFrags = 1;
3343 numSamples = 1;
3344 pOut->resourceType = ADDR_RSRC_TEX_2D;
3345 }
3346 else
3347 {
3348 // Set format to INVALID will skip this conversion
3349 if (pIn->format != ADDR_FMT_INVALID)
3350 {
3351 UINT_32 expandX, expandY;
3352
3353 // Don't care for this case
3354 ElemMode elemMode = ADDR_UNCOMPRESSED;
3355
3356 // Get compression/expansion factors and element mode which indicates compression/expansion
3357 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3358 &elemMode,
3359 &expandX,
3360 &expandY);
3361
3362 UINT_32 basePitch = 0;
3363 GetElemLib()->AdjustSurfaceInfo(elemMode,
3364 expandX,
3365 expandY,
3366 &bpp,
3367 &basePitch,
3368 &width,
3369 &height);
3370 }
3371
3372 // The output may get changed for volume(3D) texture resource in future
3373 pOut->resourceType = pIn->resourceType;
3374 }
3375
3376 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3377 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3378 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3379 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3380
3381 // Pre sanity check on non swizzle mode parameters
3382 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3383 localIn.flags = pIn->flags;
3384 localIn.resourceType = pOut->resourceType;
3385 localIn.format = pIn->format;
3386 localIn.bpp = bpp;
3387 localIn.width = width;
3388 localIn.height = height;
3389 localIn.numSlices = numSlices;
3390 localIn.numMipLevels = numMipLevels;
3391 localIn.numSamples = numSamples;
3392 localIn.numFrags = numFrags;
3393
3394 if (ValidateNonSwModeParams(&localIn))
3395 {
3396 // Forbid swizzle mode(s) by client setting
3397 ADDR2_SWMODE_SET allowedSwModeSet = {0};
3398 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3399 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3400 allowedSwModeSet.value |=
3401 pIn->forbiddenBlock.macroThin4KB ? 0 :
3402 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3403 allowedSwModeSet.value |=
3404 pIn->forbiddenBlock.macroThick4KB ? 0 :
3405 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3406 allowedSwModeSet.value |=
3407 pIn->forbiddenBlock.macroThin64KB ? 0 :
3408 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3409 allowedSwModeSet.value |=
3410 pIn->forbiddenBlock.macroThick64KB ? 0 :
3411 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3412
3413 if (pIn->preferredSwSet.value != 0)
3414 {
3415 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3416 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3417 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3418 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3419 }
3420
3421 if (pIn->noXor)
3422 {
3423 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3424 }
3425
3426 if (pIn->maxAlign > 0)
3427 {
3428 if (pIn->maxAlign < Size64K)
3429 {
3430 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3431 }
3432
3433 if (pIn->maxAlign < Size4K)
3434 {
3435 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3436 }
3437
3438 if (pIn->maxAlign < Size256)
3439 {
3440 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3441 }
3442 }
3443
3444 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3445 switch (pOut->resourceType)
3446 {
3447 case ADDR_RSRC_TEX_1D:
3448 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3449 break;
3450
3451 case ADDR_RSRC_TEX_2D:
3452 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3453
3454 if (bpp > 64)
3455 {
3456 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3457 }
3458 break;
3459
3460 case ADDR_RSRC_TEX_3D:
3461 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3462
3463 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3464 {
3465 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3466 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3467 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3468 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3469 }
3470
3471 if ((bpp == 128) && pIn->flags.color)
3472 {
3473 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3474 }
3475
3476 if (pIn->flags.view3dAs2dArray)
3477 {
3478 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3479 }
3480 break;
3481
3482 default:
3483 ADDR_ASSERT_ALWAYS();
3484 allowedSwModeSet.value = 0;
3485 break;
3486 }
3487
3488 if (pIn->format == ADDR_FMT_32_32_32)
3489 {
3490 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3491 }
3492
3493 if (ElemLib::IsBlockCompressed(pIn->format))
3494 {
3495 if (pIn->flags.texture)
3496 {
3497 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3498 }
3499 else
3500 {
3501 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3502 }
3503 }
3504
3505 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3506 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3507 {
3508 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3509 }
3510
3511 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3512 {
3513 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3514
3515 if (pIn->flags.noMetadata == FALSE)
3516 {
3517 if (pIn->flags.depth &&
3518 pIn->flags.texture &&
3519 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3520 {
3521 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3522 // equation from wrong address within memory range a tile covered and use the
3523 // garbage data for compressed Z reading which finally leads to corruption.
3524 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3525 }
3526
3527 if (m_settings.htileCacheRbConflict &&
3528 (pIn->flags.depth || pIn->flags.stencil) &&
3529 (numSlices > 1) &&
3530 (pIn->flags.metaRbUnaligned == FALSE) &&
3531 (pIn->flags.metaPipeUnaligned == FALSE))
3532 {
3533 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3534 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3535 }
3536 }
3537 }
3538
3539 if (msaa)
3540 {
3541 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3542 }
3543
3544 if ((numFrags > 1) &&
3545 (Size4K < (m_pipeInterleaveBytes * numFrags)))
3546 {
3547 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3548 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3549 }
3550
3551 if (numMipLevels > 1)
3552 {
3553 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3554 }
3555
3556 if (displayRsrc)
3557 {
3558 if (m_settings.isDce12)
3559 {
3560 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3561 }
3562 else if (m_settings.isDcn1)
3563 {
3564 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3565 }
3566 else if (m_settings.isDcn2)
3567 {
3568 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
3569 }
3570 else
3571 {
3572 ADDR_NOT_IMPLEMENTED();
3573 }
3574 }
3575
3576 if (allowedSwModeSet.value != 0)
3577 {
3578 #if DEBUG
3579 // Post sanity check, at least AddrLib should accept the output generated by its own
3580 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3581
3582 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3583 {
3584 if (validateSwModeSet & 1)
3585 {
3586 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3587 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3588 }
3589
3590 validateSwModeSet >>= 1;
3591 }
3592 #endif
3593
3594 pOut->validSwModeSet = allowedSwModeSet;
3595 pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3596 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3597 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3598
3599 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3600
3601 if (pOut->clientPreferredSwSet.value == 0)
3602 {
3603 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3604 }
3605
3606 // Apply optional restrictions
3607 if (pIn->flags.needEquation)
3608 {
3609 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3610 }
3611
3612 if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3613 {
3614 pOut->swizzleMode = ADDR_SW_LINEAR;
3615 }
3616 else
3617 {
3618 // Always ignore linear swizzle mode if there is other choice.
3619 allowedSwModeSet.swLinear = 0;
3620
3621 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3622
3623 // Determine block size if there are 2 or more block type candidates
3624 if (IsPow2(allowedBlockSet.value) == FALSE)
3625 {
3626 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_LINEAR};
3627
3628 swMode[AddrBlockMicro] = ADDR_SW_256B_D;
3629 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D;
3630 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3631
3632 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3633 {
3634 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3635 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3636 }
3637
3638 Dim3d blkDim[AddrBlockMaxTiledType] = {0};
3639 Dim3d padDim[AddrBlockMaxTiledType] = {0};
3640 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3641
3642 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
3643 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
3644 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3645 UINT_32 minSizeBlk = AddrBlockMicro;
3646 UINT_64 minSize = 0;
3647
3648 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3649 {
3650 if (allowedBlockSet.value & (1 << i))
3651 {
3652 ComputeBlockDimensionForSurf(&blkDim[i].w,
3653 &blkDim[i].h,
3654 &blkDim[i].d,
3655 bpp,
3656 numFrags,
3657 pOut->resourceType,
3658 swMode[i]);
3659
3660 if (displayRsrc)
3661 {
3662 blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
3663 }
3664
3665 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
3666 padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
3667
3668 if ((minSize == 0) ||
3669 ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
3670 {
3671 minSize = padSize[i];
3672 minSizeBlk = i;
3673 }
3674 }
3675 }
3676
3677 if ((allowedBlockSet.micro == TRUE) &&
3678 (width <= blkDim[AddrBlockMicro].w) &&
3679 (height <= blkDim[AddrBlockMicro].h) &&
3680 (NextPow2(pIn->minSizeAlign) <= Size256))
3681 {
3682 minSizeBlk = AddrBlockMicro;
3683 }
3684
3685 if (minSizeBlk == AddrBlockMicro)
3686 {
3687 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3688 allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3689 }
3690 else if (minSizeBlk == AddrBlockThick4KB)
3691 {
3692 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3693 allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3694 }
3695 else if (minSizeBlk == AddrBlockThin4KB)
3696 {
3697 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3698 Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3699 }
3700 else if (minSizeBlk == AddrBlockThick64KB)
3701 {
3702 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3703 allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3704 }
3705 else
3706 {
3707 ADDR_ASSERT(minSizeBlk == AddrBlockThin64KB);
3708 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3709 Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3710 }
3711 }
3712
3713 // Block type should be determined.
3714 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3715
3716 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3717
3718 // Determine swizzle type if there are 2 or more swizzle type candidates
3719 if (IsPow2(allowedSwSet.value) == FALSE)
3720 {
3721 if (ElemLib::IsBlockCompressed(pIn->format))
3722 {
3723 if (allowedSwSet.sw_D)
3724 {
3725 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3726 }
3727 else
3728 {
3729 ADDR_ASSERT(allowedSwSet.sw_S);
3730 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3731 }
3732 }
3733 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3734 {
3735 if (allowedSwSet.sw_S)
3736 {
3737 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3738 }
3739 else if (allowedSwSet.sw_D)
3740 {
3741 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3742 }
3743 else
3744 {
3745 ADDR_ASSERT(allowedSwSet.sw_R);
3746 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3747 }
3748 }
3749 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3750 {
3751 if (pIn->flags.color && allowedSwSet.sw_D)
3752 {
3753 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3754 }
3755 else if (allowedSwSet.sw_Z)
3756 {
3757 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3758 }
3759 else
3760 {
3761 ADDR_ASSERT(allowedSwSet.sw_S);
3762 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3763 }
3764 }
3765 else
3766 {
3767 if (pIn->flags.rotated && allowedSwSet.sw_R)
3768 {
3769 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3770 }
3771 else if (allowedSwSet.sw_D)
3772 {
3773 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3774 }
3775 else if (allowedSwSet.sw_S)
3776 {
3777 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3778 }
3779 else
3780 {
3781 ADDR_ASSERT(allowedSwSet.sw_Z);
3782 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3783 }
3784 }
3785 }
3786
3787 // Swizzle type should be determined.
3788 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3789
3790 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3791 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3792 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3793 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3794 }
3795
3796 returnCode = ADDR_OK;
3797 }
3798 else
3799 {
3800 // Invalid combination...
3801 ADDR_ASSERT_ALWAYS();
3802 }
3803 }
3804 else
3805 {
3806 // Invalid combination...
3807 ADDR_ASSERT_ALWAYS();
3808 }
3809
3810 return returnCode;
3811 }
3812
3813 /**
3814 ************************************************************************************************************************
3815 * Gfx9Lib::ComputeStereoInfo
3816 *
3817 * @brief
3818 * Compute height alignment and right eye pipeBankXor for stereo surface
3819 *
3820 * @return
3821 * Error code
3822 *
3823 ************************************************************************************************************************
3824 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 * pHeightAlign) const3825 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3826 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3827 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3828 UINT_32* pHeightAlign
3829 ) const
3830 {
3831 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3832
3833 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3834
3835 if (eqIndex < m_numEquations)
3836 {
3837 if (IsXor(pIn->swizzleMode))
3838 {
3839 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3840 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
3841 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
3842 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
3843 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3844 const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3845
3846 ADDR_ASSERT(maxYCoordBlock256 ==
3847 GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
3848
3849 const UINT_32 maxYCoordInBaseEquation =
3850 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
3851
3852 ADDR_ASSERT(maxYCoordInBaseEquation ==
3853 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3854
3855 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3856
3857 ADDR_ASSERT(maxYCoordInPipeXor ==
3858 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3859
3860 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3861 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3862
3863 ADDR_ASSERT(maxYCoordInBankXor ==
3864 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3865
3866 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3867
3868 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3869 {
3870 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3871
3872 if (pOut->pStereoInfo != NULL)
3873 {
3874 pOut->pStereoInfo->rightSwizzle = 0;
3875
3876 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3877 {
3878 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3879 {
3880 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3881 }
3882
3883 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3884 {
3885 pOut->pStereoInfo->rightSwizzle |=
3886 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3887 }
3888
3889 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3890 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3891 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3892 }
3893 }
3894 }
3895 }
3896 }
3897 else
3898 {
3899 ADDR_ASSERT_ALWAYS();
3900 returnCode = ADDR_ERROR;
3901 }
3902
3903 return returnCode;
3904 }
3905
3906 /**
3907 ************************************************************************************************************************
3908 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3909 *
3910 * @brief
3911 * Internal function to calculate alignment for tiled surface
3912 *
3913 * @return
3914 * ADDR_E_RETURNCODE
3915 ************************************************************************************************************************
3916 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3917 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3918 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3919 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3920 ) const
3921 {
3922 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3923 &pOut->blockHeight,
3924 &pOut->blockSlices,
3925 pIn->bpp,
3926 pIn->numFrags,
3927 pIn->resourceType,
3928 pIn->swizzleMode);
3929
3930 if (returnCode == ADDR_OK)
3931 {
3932 UINT_32 pitchAlignInElement = pOut->blockWidth;
3933
3934 if ((IsTex2d(pIn->resourceType) == TRUE) &&
3935 (pIn->flags.display || pIn->flags.rotated) &&
3936 (pIn->numMipLevels <= 1) &&
3937 (pIn->numSamples <= 1) &&
3938 (pIn->numFrags <= 1))
3939 {
3940 // Display engine needs pitch align to be at least 32 pixels.
3941 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3942 }
3943
3944 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3945
3946 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3947 {
3948 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3949 {
3950 returnCode = ADDR_INVALIDPARAMS;
3951 }
3952 else if (pIn->pitchInElement < pOut->pitch)
3953 {
3954 returnCode = ADDR_INVALIDPARAMS;
3955 }
3956 else
3957 {
3958 pOut->pitch = pIn->pitchInElement;
3959 }
3960 }
3961
3962 UINT_32 heightAlign = 0;
3963
3964 if (pIn->flags.qbStereo)
3965 {
3966 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3967 }
3968
3969 if (returnCode == ADDR_OK)
3970 {
3971 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3972
3973 if (heightAlign > 1)
3974 {
3975 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3976 }
3977
3978 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3979
3980 pOut->epitchIsHeight = FALSE;
3981 pOut->mipChainInTail = FALSE;
3982 pOut->firstMipIdInTail = pIn->numMipLevels;
3983
3984 pOut->mipChainPitch = pOut->pitch;
3985 pOut->mipChainHeight = pOut->height;
3986 pOut->mipChainSlice = pOut->numSlices;
3987
3988 if (pIn->numMipLevels > 1)
3989 {
3990 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
3991 pIn->swizzleMode,
3992 pIn->bpp,
3993 pIn->width,
3994 pIn->height,
3995 pIn->numSlices,
3996 pOut->blockWidth,
3997 pOut->blockHeight,
3998 pOut->blockSlices,
3999 pIn->numMipLevels,
4000 pOut->pMipInfo);
4001
4002 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4003
4004 if (endingMipId == 0)
4005 {
4006 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4007 pIn->swizzleMode,
4008 pOut->blockWidth,
4009 pOut->blockHeight,
4010 pOut->blockSlices);
4011
4012 pOut->epitchIsHeight = TRUE;
4013 pOut->pitch = tailMaxDim.w;
4014 pOut->height = tailMaxDim.h;
4015 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4016 tailMaxDim.d : pIn->numSlices;
4017 pOut->mipChainInTail = TRUE;
4018 }
4019 else
4020 {
4021 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
4022 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4023
4024 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4025 pIn->swizzleMode,
4026 mip0WidthInBlk,
4027 mip0HeightInBlk,
4028 pOut->numSlices / pOut->blockSlices);
4029 if (majorMode == ADDR_MAJOR_Y)
4030 {
4031 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4032
4033 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4034 {
4035 mip1WidthInBlk++;
4036 }
4037
4038 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4039
4040 pOut->epitchIsHeight = FALSE;
4041 }
4042 else
4043 {
4044 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4045
4046 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4047 {
4048 mip1HeightInBlk++;
4049 }
4050
4051 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4052
4053 pOut->epitchIsHeight = TRUE;
4054 }
4055 }
4056
4057 if (pOut->pMipInfo != NULL)
4058 {
4059 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4060
4061 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4062 {
4063 Dim3d mipStartPos = {0};
4064 UINT_32 mipTailOffsetInBytes = 0;
4065
4066 mipStartPos = GetMipStartPos(pIn->resourceType,
4067 pIn->swizzleMode,
4068 pOut->pitch,
4069 pOut->height,
4070 pOut->numSlices,
4071 pOut->blockWidth,
4072 pOut->blockHeight,
4073 pOut->blockSlices,
4074 i,
4075 elementBytesLog2,
4076 &mipTailOffsetInBytes);
4077
4078 UINT_32 pitchInBlock =
4079 pOut->mipChainPitch / pOut->blockWidth;
4080 UINT_32 sliceInBlock =
4081 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4082 UINT_64 blockIndex =
4083 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4084 UINT_64 macroBlockOffset =
4085 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4086
4087 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4088 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
4089 }
4090 }
4091 }
4092 else if (pOut->pMipInfo != NULL)
4093 {
4094 pOut->pMipInfo[0].pitch = pOut->pitch;
4095 pOut->pMipInfo[0].height = pOut->height;
4096 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4097 pOut->pMipInfo[0].offset = 0;
4098 }
4099
4100 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4101 (pIn->bpp >> 3) * pIn->numFrags;
4102 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
4103 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4104
4105 if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4106 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4107 (pIn->flags.texture == TRUE) &&
4108 (pIn->flags.noMetadata == FALSE) &&
4109 (pIn->flags.metaPipeUnaligned == FALSE))
4110 {
4111 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4112 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4113 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4114 // them, which may cause invalid metadata to be fetched.
4115 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4116 }
4117
4118 if (pIn->flags.prt)
4119 {
4120 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4121 }
4122 }
4123 }
4124
4125 return returnCode;
4126 }
4127
4128 /**
4129 ************************************************************************************************************************
4130 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4131 *
4132 * @brief
4133 * Internal function to calculate alignment for linear surface
4134 *
4135 * @return
4136 * ADDR_E_RETURNCODE
4137 ************************************************************************************************************************
4138 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4139 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4140 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4141 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4142 ) const
4143 {
4144 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4145 UINT_32 pitch = 0;
4146 UINT_32 actualHeight = 0;
4147 UINT_32 elementBytes = pIn->bpp >> 3;
4148 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4149
4150 if (IsTex1d(pIn->resourceType))
4151 {
4152 if (pIn->height > 1)
4153 {
4154 returnCode = ADDR_INVALIDPARAMS;
4155 }
4156 else
4157 {
4158 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4159
4160 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4161 actualHeight = pIn->numMipLevels;
4162
4163 if (pIn->flags.prt == FALSE)
4164 {
4165 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4166 &pitch, &actualHeight);
4167 }
4168
4169 if (returnCode == ADDR_OK)
4170 {
4171 if (pOut->pMipInfo != NULL)
4172 {
4173 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4174 {
4175 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4176 pOut->pMipInfo[i].pitch = pitch;
4177 pOut->pMipInfo[i].height = 1;
4178 pOut->pMipInfo[i].depth = 1;
4179 }
4180 }
4181 }
4182 }
4183 }
4184 else
4185 {
4186 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4187 }
4188
4189 if ((pitch == 0) || (actualHeight == 0))
4190 {
4191 returnCode = ADDR_INVALIDPARAMS;
4192 }
4193
4194 if (returnCode == ADDR_OK)
4195 {
4196 pOut->pitch = pitch;
4197 pOut->height = pIn->height;
4198 pOut->numSlices = pIn->numSlices;
4199 pOut->mipChainPitch = pitch;
4200 pOut->mipChainHeight = actualHeight;
4201 pOut->mipChainSlice = pOut->numSlices;
4202 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4203 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4204 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4205 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4206 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4207 pOut->blockHeight = 1;
4208 pOut->blockSlices = 1;
4209 }
4210
4211 // Post calculation validate
4212 ADDR_ASSERT(pOut->sliceSize > 0);
4213
4214 return returnCode;
4215 }
4216
4217 /**
4218 ************************************************************************************************************************
4219 * Gfx9Lib::GetMipChainInfo
4220 *
4221 * @brief
4222 * Internal function to get out information about mip chain
4223 *
4224 * @return
4225 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4226 ************************************************************************************************************************
4227 */
GetMipChainInfo(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 bpp,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 numMipLevel,ADDR2_MIP_INFO * pMipInfo) const4228 UINT_32 Gfx9Lib::GetMipChainInfo(
4229 AddrResourceType resourceType,
4230 AddrSwizzleMode swizzleMode,
4231 UINT_32 bpp,
4232 UINT_32 mip0Width,
4233 UINT_32 mip0Height,
4234 UINT_32 mip0Depth,
4235 UINT_32 blockWidth,
4236 UINT_32 blockHeight,
4237 UINT_32 blockDepth,
4238 UINT_32 numMipLevel,
4239 ADDR2_MIP_INFO* pMipInfo) const
4240 {
4241 const Dim3d tailMaxDim =
4242 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4243
4244 UINT_32 mipPitch = mip0Width;
4245 UINT_32 mipHeight = mip0Height;
4246 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4247 UINT_32 offset = 0;
4248 UINT_32 firstMipIdInTail = numMipLevel;
4249 BOOL_32 inTail = FALSE;
4250 BOOL_32 finalDim = FALSE;
4251 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4252 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4253
4254 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4255 {
4256 if (inTail)
4257 {
4258 if (finalDim == FALSE)
4259 {
4260 UINT_32 mipSize;
4261
4262 if (is3dThick)
4263 {
4264 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4265 }
4266 else
4267 {
4268 mipSize = mipPitch * mipHeight * (bpp >> 3);
4269 }
4270
4271 if (mipSize <= 256)
4272 {
4273 UINT_32 index = Log2(bpp >> 3);
4274
4275 if (is3dThick)
4276 {
4277 mipPitch = Block256_3dZ[index].w;
4278 mipHeight = Block256_3dZ[index].h;
4279 mipDepth = Block256_3dZ[index].d;
4280 }
4281 else
4282 {
4283 mipPitch = Block256_2d[index].w;
4284 mipHeight = Block256_2d[index].h;
4285 }
4286
4287 finalDim = TRUE;
4288 }
4289 }
4290 }
4291 else
4292 {
4293 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4294 mipPitch, mipHeight, mipDepth);
4295
4296 if (inTail)
4297 {
4298 firstMipIdInTail = mipId;
4299 mipPitch = tailMaxDim.w;
4300 mipHeight = tailMaxDim.h;
4301
4302 if (is3dThick)
4303 {
4304 mipDepth = tailMaxDim.d;
4305 }
4306 }
4307 else
4308 {
4309 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4310 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4311
4312 if (is3dThick)
4313 {
4314 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4315 }
4316 }
4317 }
4318
4319 if (pMipInfo != NULL)
4320 {
4321 pMipInfo[mipId].pitch = mipPitch;
4322 pMipInfo[mipId].height = mipHeight;
4323 pMipInfo[mipId].depth = mipDepth;
4324 pMipInfo[mipId].offset = offset;
4325 }
4326
4327 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4328
4329 if (finalDim)
4330 {
4331 if (is3dThin)
4332 {
4333 mipDepth = Max(mipDepth >> 1, 1u);
4334 }
4335 }
4336 else
4337 {
4338 mipPitch = Max(mipPitch >> 1, 1u);
4339 mipHeight = Max(mipHeight >> 1, 1u);
4340
4341 if (is3dThick || is3dThin)
4342 {
4343 mipDepth = Max(mipDepth >> 1, 1u);
4344 }
4345 }
4346 }
4347
4348 return firstMipIdInTail;
4349 }
4350
4351 /**
4352 ************************************************************************************************************************
4353 * Gfx9Lib::GetMetaMiptailInfo
4354 *
4355 * @brief
4356 * Get mip tail coordinate information.
4357 *
4358 * @return
4359 * N/A
4360 ************************************************************************************************************************
4361 */
GetMetaMiptailInfo(ADDR2_META_MIP_INFO * pInfo,Dim3d mipCoord,UINT_32 numMipInTail,Dim3d * pMetaBlkDim) const4362 VOID Gfx9Lib::GetMetaMiptailInfo(
4363 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4364 Dim3d mipCoord, ///< [in] mip tail base coord
4365 UINT_32 numMipInTail, ///< [in] number of mips in tail
4366 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4367 ) const
4368 {
4369 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4370 UINT_32 mipWidth = pMetaBlkDim->w;
4371 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4372 UINT_32 mipDepth = pMetaBlkDim->d;
4373 UINT_32 minInc;
4374
4375 if (isThick)
4376 {
4377 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4378 }
4379 else if (pMetaBlkDim->h >= 1024)
4380 {
4381 minInc = 256;
4382 }
4383 else if (pMetaBlkDim->h == 512)
4384 {
4385 minInc = 128;
4386 }
4387 else
4388 {
4389 minInc = 64;
4390 }
4391
4392 UINT_32 blk32MipId = 0xFFFFFFFF;
4393
4394 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4395 {
4396 pInfo[mip].inMiptail = TRUE;
4397 pInfo[mip].startX = mipCoord.w;
4398 pInfo[mip].startY = mipCoord.h;
4399 pInfo[mip].startZ = mipCoord.d;
4400 pInfo[mip].width = mipWidth;
4401 pInfo[mip].height = mipHeight;
4402 pInfo[mip].depth = mipDepth;
4403
4404 if (mipWidth <= 32)
4405 {
4406 if (blk32MipId == 0xFFFFFFFF)
4407 {
4408 blk32MipId = mip;
4409 }
4410
4411 mipCoord.w = pInfo[blk32MipId].startX;
4412 mipCoord.h = pInfo[blk32MipId].startY;
4413 mipCoord.d = pInfo[blk32MipId].startZ;
4414
4415 switch (mip - blk32MipId)
4416 {
4417 case 0:
4418 mipCoord.w += 32; // 16x16
4419 break;
4420 case 1:
4421 mipCoord.h += 32; // 8x8
4422 break;
4423 case 2:
4424 mipCoord.h += 32; // 4x4
4425 mipCoord.w += 16;
4426 break;
4427 case 3:
4428 mipCoord.h += 32; // 2x2
4429 mipCoord.w += 32;
4430 break;
4431 case 4:
4432 mipCoord.h += 32; // 1x1
4433 mipCoord.w += 48;
4434 break;
4435 // The following are for BC/ASTC formats
4436 case 5:
4437 mipCoord.h += 48; // 1/2 x 1/2
4438 break;
4439 case 6:
4440 mipCoord.h += 48; // 1/4 x 1/4
4441 mipCoord.w += 16;
4442 break;
4443 case 7:
4444 mipCoord.h += 48; // 1/8 x 1/8
4445 mipCoord.w += 32;
4446 break;
4447 case 8:
4448 mipCoord.h += 48; // 1/16 x 1/16
4449 mipCoord.w += 48;
4450 break;
4451 default:
4452 ADDR_ASSERT_ALWAYS();
4453 break;
4454 }
4455
4456 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4457 mipHeight = mipWidth;
4458
4459 if (isThick)
4460 {
4461 mipDepth = mipWidth;
4462 }
4463 }
4464 else
4465 {
4466 if (mipWidth <= minInc)
4467 {
4468 // if we're below the minimal increment...
4469 if (isThick)
4470 {
4471 // For 3d, just go in z direction
4472 mipCoord.d += mipDepth;
4473 }
4474 else
4475 {
4476 // For 2d, first go across, then down
4477 if ((mipWidth * 2) == minInc)
4478 {
4479 // if we're 2 mips below, that's when we go back in x, and down in y
4480 mipCoord.w -= minInc;
4481 mipCoord.h += minInc;
4482 }
4483 else
4484 {
4485 // otherwise, just go across in x
4486 mipCoord.w += minInc;
4487 }
4488 }
4489 }
4490 else
4491 {
4492 // On even mip, go down, otherwise, go across
4493 if (mip & 1)
4494 {
4495 mipCoord.w += mipWidth;
4496 }
4497 else
4498 {
4499 mipCoord.h += mipHeight;
4500 }
4501 }
4502 // Divide the width by 2
4503 mipWidth >>= 1;
4504 // After the first mip in tail, the mip is always a square
4505 mipHeight = mipWidth;
4506 // ...or for 3d, a cube
4507 if (isThick)
4508 {
4509 mipDepth = mipWidth;
4510 }
4511 }
4512 }
4513 }
4514
4515 /**
4516 ************************************************************************************************************************
4517 * Gfx9Lib::GetMipStartPos
4518 *
4519 * @brief
4520 * Internal function to get out information about mip logical start position
4521 *
4522 * @return
4523 * logical start position in macro block width/heith/depth of one mip level within one slice
4524 ************************************************************************************************************************
4525 */
GetMipStartPos(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 width,UINT_32 height,UINT_32 depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 mipId,UINT_32 log2ElementBytes,UINT_32 * pMipTailBytesOffset) const4526 Dim3d Gfx9Lib::GetMipStartPos(
4527 AddrResourceType resourceType,
4528 AddrSwizzleMode swizzleMode,
4529 UINT_32 width,
4530 UINT_32 height,
4531 UINT_32 depth,
4532 UINT_32 blockWidth,
4533 UINT_32 blockHeight,
4534 UINT_32 blockDepth,
4535 UINT_32 mipId,
4536 UINT_32 log2ElementBytes,
4537 UINT_32* pMipTailBytesOffset) const
4538 {
4539 Dim3d mipStartPos = {0};
4540 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4541
4542 // Report mip in tail if Mip0 is already in mip tail
4543 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4544 UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
4545 UINT_32 mipIndexInTail = mipId;
4546
4547 if (inMipTail == FALSE)
4548 {
4549 // Mip 0 dimension, unit in block
4550 UINT_32 mipWidthInBlk = width / blockWidth;
4551 UINT_32 mipHeightInBlk = height / blockHeight;
4552 UINT_32 mipDepthInBlk = depth / blockDepth;
4553 AddrMajorMode majorMode = GetMajorMode(resourceType,
4554 swizzleMode,
4555 mipWidthInBlk,
4556 mipHeightInBlk,
4557 mipDepthInBlk);
4558
4559 UINT_32 endingMip = mipId + 1;
4560
4561 for (UINT_32 i = 1; i <= mipId; i++)
4562 {
4563 if ((i == 1) || (i == 3))
4564 {
4565 if (majorMode == ADDR_MAJOR_Y)
4566 {
4567 mipStartPos.w += mipWidthInBlk;
4568 }
4569 else
4570 {
4571 mipStartPos.h += mipHeightInBlk;
4572 }
4573 }
4574 else
4575 {
4576 if (majorMode == ADDR_MAJOR_X)
4577 {
4578 mipStartPos.w += mipWidthInBlk;
4579 }
4580 else if (majorMode == ADDR_MAJOR_Y)
4581 {
4582 mipStartPos.h += mipHeightInBlk;
4583 }
4584 else
4585 {
4586 mipStartPos.d += mipDepthInBlk;
4587 }
4588 }
4589
4590 BOOL_32 inTail = FALSE;
4591
4592 if (IsThick(resourceType, swizzleMode))
4593 {
4594 UINT_32 dim = log2BlkSize % 3;
4595
4596 if (dim == 0)
4597 {
4598 inTail =
4599 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4600 }
4601 else if (dim == 1)
4602 {
4603 inTail =
4604 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4605 }
4606 else
4607 {
4608 inTail =
4609 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4610 }
4611 }
4612 else
4613 {
4614 if (log2BlkSize & 1)
4615 {
4616 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4617 }
4618 else
4619 {
4620 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4621 }
4622 }
4623
4624 if (inTail)
4625 {
4626 endingMip = i;
4627 break;
4628 }
4629
4630 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4631 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4632 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4633 }
4634
4635 if (mipId >= endingMip)
4636 {
4637 inMipTail = TRUE;
4638 mipIndexInTail = mipId - endingMip;
4639 }
4640 }
4641
4642 if (inMipTail)
4643 {
4644 UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4645 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4646 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4647 }
4648
4649 return mipStartPos;
4650 }
4651
4652 /**
4653 ************************************************************************************************************************
4654 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4655 *
4656 * @brief
4657 * Internal function to calculate address from coord for tiled swizzle surface
4658 *
4659 * @return
4660 * ADDR_E_RETURNCODE
4661 ************************************************************************************************************************
4662 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4663 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4664 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4665 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4666 ) const
4667 {
4668 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4669 localIn.swizzleMode = pIn->swizzleMode;
4670 localIn.flags = pIn->flags;
4671 localIn.resourceType = pIn->resourceType;
4672 localIn.bpp = pIn->bpp;
4673 localIn.width = Max(pIn->unalignedWidth, 1u);
4674 localIn.height = Max(pIn->unalignedHeight, 1u);
4675 localIn.numSlices = Max(pIn->numSlices, 1u);
4676 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4677 localIn.numSamples = Max(pIn->numSamples, 1u);
4678 localIn.numFrags = Max(pIn->numFrags, 1u);
4679 if (localIn.numMipLevels <= 1)
4680 {
4681 localIn.pitchInElement = pIn->pitchInElement;
4682 }
4683
4684 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4685 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4686
4687 BOOL_32 valid = (returnCode == ADDR_OK) &&
4688 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4689 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4690 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4691
4692 if (valid)
4693 {
4694 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4695 Dim3d mipStartPos = {0};
4696 UINT_32 mipTailBytesOffset = 0;
4697
4698 if (pIn->numMipLevels > 1)
4699 {
4700 // Mip-map chain cannot be MSAA surface
4701 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4702
4703 mipStartPos = GetMipStartPos(pIn->resourceType,
4704 pIn->swizzleMode,
4705 localOut.pitch,
4706 localOut.height,
4707 localOut.numSlices,
4708 localOut.blockWidth,
4709 localOut.blockHeight,
4710 localOut.blockSlices,
4711 pIn->mipId,
4712 log2ElementBytes,
4713 &mipTailBytesOffset);
4714 }
4715
4716 UINT_32 interleaveOffset = 0;
4717 UINT_32 pipeBits = 0;
4718 UINT_32 pipeXor = 0;
4719 UINT_32 bankBits = 0;
4720 UINT_32 bankXor = 0;
4721
4722 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4723 {
4724 UINT_32 blockOffset = 0;
4725 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4726
4727 if (IsZOrderSwizzle(pIn->swizzleMode))
4728 {
4729 // Morton generation
4730 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4731 {
4732 UINT_32 totalLowBits = 6 - log2ElementBytes;
4733 UINT_32 mortBits = totalLowBits / 2;
4734 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4735 // Are 9 bits enough?
4736 UINT_32 highBitsValue =
4737 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4738 blockOffset = lowBitsValue | highBitsValue;
4739 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4740 }
4741 else
4742 {
4743 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4744 }
4745
4746 // Fill LSBs with sample bits
4747 if (pIn->numSamples > 1)
4748 {
4749 blockOffset *= pIn->numSamples;
4750 blockOffset |= pIn->sample;
4751 }
4752
4753 // Shift according to BytesPP
4754 blockOffset <<= log2ElementBytes;
4755 }
4756 else
4757 {
4758 // Micro block offset
4759 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4760 blockOffset = microBlockOffset;
4761
4762 // Micro block dimension
4763 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4764 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4765 // Morton generation, does 12 bit enough?
4766 blockOffset |=
4767 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4768
4769 // Sample bits start location
4770 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4771 // Join sample bits information to the highest Macro block bits
4772 if (IsNonPrtXor(pIn->swizzleMode))
4773 {
4774 // Non-prt-Xor : xor highest Macro block bits with sample bits
4775 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4776 }
4777 else
4778 {
4779 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4780 // after this op, the blockOffset only contains log2 Macro block size bits
4781 blockOffset %= (1 << sampleStart);
4782 blockOffset |= (pIn->sample << sampleStart);
4783 ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4784 }
4785 }
4786
4787 if (IsXor(pIn->swizzleMode))
4788 {
4789 // Mask off bits above Macro block bits to keep page synonyms working for prt
4790 if (IsPrt(pIn->swizzleMode))
4791 {
4792 blockOffset &= ((1 << log2BlkSize) - 1);
4793 }
4794
4795 // Preserve offset inside pipe interleave
4796 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4797 blockOffset >>= m_pipeInterleaveLog2;
4798
4799 // Pipe/Se xor bits
4800 pipeBits = GetPipeXorBits(log2BlkSize);
4801 // Pipe xor
4802 pipeXor = FoldXor2d(blockOffset, pipeBits);
4803 blockOffset >>= pipeBits;
4804
4805 // Bank xor bits
4806 bankBits = GetBankXorBits(log2BlkSize);
4807 // Bank Xor
4808 bankXor = FoldXor2d(blockOffset, bankBits);
4809 blockOffset >>= bankBits;
4810
4811 // Put all the part back together
4812 blockOffset <<= bankBits;
4813 blockOffset |= bankXor;
4814 blockOffset <<= pipeBits;
4815 blockOffset |= pipeXor;
4816 blockOffset <<= m_pipeInterleaveLog2;
4817 blockOffset |= interleaveOffset;
4818 }
4819
4820 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4821 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4822
4823 blockOffset |= mipTailBytesOffset;
4824
4825 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4826 {
4827 // Apply slice xor if not MSAA/PRT
4828 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4829 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4830 (m_pipeInterleaveLog2 + pipeBits));
4831 }
4832
4833 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4834 bankBits, pipeBits, &blockOffset);
4835
4836 blockOffset %= (1 << log2BlkSize);
4837
4838 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4839 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4840 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4841 UINT_64 macroBlockIndex =
4842 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4843 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4844 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4845
4846 pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
4847 }
4848 else
4849 {
4850 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4851
4852 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4853
4854 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4855 (pIn->y / microBlockDim.h),
4856 (pIn->slice / microBlockDim.d),
4857 8);
4858
4859 blockOffset <<= 10;
4860 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4861
4862 if (IsXor(pIn->swizzleMode))
4863 {
4864 // Mask off bits above Macro block bits to keep page synonyms working for prt
4865 if (IsPrt(pIn->swizzleMode))
4866 {
4867 blockOffset &= ((1 << log2BlkSize) - 1);
4868 }
4869
4870 // Preserve offset inside pipe interleave
4871 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4872 blockOffset >>= m_pipeInterleaveLog2;
4873
4874 // Pipe/Se xor bits
4875 pipeBits = GetPipeXorBits(log2BlkSize);
4876 // Pipe xor
4877 pipeXor = FoldXor3d(blockOffset, pipeBits);
4878 blockOffset >>= pipeBits;
4879
4880 // Bank xor bits
4881 bankBits = GetBankXorBits(log2BlkSize);
4882 // Bank Xor
4883 bankXor = FoldXor3d(blockOffset, bankBits);
4884 blockOffset >>= bankBits;
4885
4886 // Put all the part back together
4887 blockOffset <<= bankBits;
4888 blockOffset |= bankXor;
4889 blockOffset <<= pipeBits;
4890 blockOffset |= pipeXor;
4891 blockOffset <<= m_pipeInterleaveLog2;
4892 blockOffset |= interleaveOffset;
4893 }
4894
4895 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4896 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4897 blockOffset |= mipTailBytesOffset;
4898
4899 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4900 bankBits, pipeBits, &blockOffset);
4901
4902 blockOffset %= (1 << log2BlkSize);
4903
4904 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
4905 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4906 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4907
4908 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4909 UINT_32 sliceSizeInBlock =
4910 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4911 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4912
4913 pOut->addr = blockOffset | (blockIndex << log2BlkSize);
4914 }
4915 }
4916 else
4917 {
4918 returnCode = ADDR_INVALIDPARAMS;
4919 }
4920
4921 return returnCode;
4922 }
4923
4924 /**
4925 ************************************************************************************************************************
4926 * Gfx9Lib::ComputeSurfaceInfoLinear
4927 *
4928 * @brief
4929 * Internal function to calculate padding for linear swizzle 2D/3D surface
4930 *
4931 * @return
4932 * N/A
4933 ************************************************************************************************************************
4934 */
ComputeSurfaceLinearPadding(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pMipmap0PaddedWidth,UINT_32 * pSlice0PaddedHeight,ADDR2_MIP_INFO * pMipInfo) const4935 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4936 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
4937 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
4938 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
4939 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
4940 ) const
4941 {
4942 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4943
4944 UINT_32 elementBytes = pIn->bpp >> 3;
4945 UINT_32 pitchAlignInElement = 0;
4946
4947 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4948 {
4949 ADDR_ASSERT(pIn->numMipLevels <= 1);
4950 ADDR_ASSERT(pIn->numSlices <= 1);
4951 pitchAlignInElement = 1;
4952 }
4953 else
4954 {
4955 pitchAlignInElement = (256 / elementBytes);
4956 }
4957
4958 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
4959 UINT_32 slice0PaddedHeight = pIn->height;
4960
4961 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4962 &mipChainWidth, &slice0PaddedHeight);
4963
4964 if (returnCode == ADDR_OK)
4965 {
4966 UINT_32 mipChainHeight = 0;
4967 UINT_32 mipHeight = pIn->height;
4968 UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4969
4970 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4971 {
4972 if (pMipInfo != NULL)
4973 {
4974 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4975 pMipInfo[i].pitch = mipChainWidth;
4976 pMipInfo[i].height = mipHeight;
4977 pMipInfo[i].depth = mipDepth;
4978 }
4979
4980 mipChainHeight += mipHeight;
4981 mipHeight = RoundHalf(mipHeight);
4982 mipHeight = Max(mipHeight, 1u);
4983 }
4984
4985 *pMipmap0PaddedWidth = mipChainWidth;
4986 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4987 }
4988
4989 return returnCode;
4990 }
4991
4992 /**
4993 ************************************************************************************************************************
4994 * Gfx9Lib::ComputeThinBlockDimension
4995 *
4996 * @brief
4997 * Internal function to get thin block width/height/depth in element from surface input params.
4998 *
4999 * @return
5000 * N/A
5001 ************************************************************************************************************************
5002 */
ComputeThinBlockDimension(UINT_32 * pWidth,UINT_32 * pHeight,UINT_32 * pDepth,UINT_32 bpp,UINT_32 numSamples,AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const5003 VOID Gfx9Lib::ComputeThinBlockDimension(
5004 UINT_32* pWidth,
5005 UINT_32* pHeight,
5006 UINT_32* pDepth,
5007 UINT_32 bpp,
5008 UINT_32 numSamples,
5009 AddrResourceType resourceType,
5010 AddrSwizzleMode swizzleMode) const
5011 {
5012 ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5013
5014 const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
5015 const UINT_32 eleBytes = bpp >> 3;
5016 const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5017 const UINT_32 log2blkSizeIn256B = log2BlkSize - 8;
5018 const UINT_32 widthAmp = log2blkSizeIn256B / 2;
5019 const UINT_32 heightAmp = log2blkSizeIn256B - widthAmp;
5020
5021 ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5022
5023 *pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5024 *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5025 *pDepth = 1;
5026
5027 if (numSamples > 1)
5028 {
5029 const UINT_32 log2sample = Log2(numSamples);
5030 const UINT_32 q = log2sample >> 1;
5031 const UINT_32 r = log2sample & 1;
5032
5033 if (log2BlkSize & 1)
5034 {
5035 *pWidth >>= q;
5036 *pHeight >>= (q + r);
5037 }
5038 else
5039 {
5040 *pWidth >>= (q + r);
5041 *pHeight >>= q;
5042 }
5043 }
5044 }
5045
5046 } // V2
5047 } // Addr
5048