1 /*
2 * Copyright © 2017 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx9addrlib.h"
35
36 #include "gfx9_gb_reg.h"
37
38 #include "amdgpu_asic_addr.h"
39
40 ////////////////////////////////////////////////////////////////////////////////////////////////////
41 ////////////////////////////////////////////////////////////////////////////////////////////////////
42
43 namespace Addr
44 {
45
46 /**
47 ************************************************************************************************************************
48 * Gfx9HwlInit
49 *
50 * @brief
51 * Creates an Gfx9Lib object.
52 *
53 * @return
54 * Returns an Gfx9Lib object pointer.
55 ************************************************************************************************************************
56 */
Gfx9HwlInit(const Client * pClient)57 Addr::Lib* Gfx9HwlInit(const Client* pClient)
58 {
59 return V2::Gfx9Lib::CreateObj(pClient);
60 }
61
62 namespace V2
63 {
64
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66 // Static Const Member
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68
69 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
70 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt
71 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
72 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
73 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D
74 {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R
75
76 {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z
77 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
78 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D
79 {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R
80
81 {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z
82 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
83 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D
84 {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R
85
86 {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z
87 {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S
88 {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D
89 {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R
90
91 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T
92 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T
93 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T
94 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T
95
96 {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x
97 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x
98 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x
99 {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x
100
101 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X
102 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X
103 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X
104 {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X
105
106 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X
107 {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X
108 {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X
109 {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X
110 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
111 };
112
113 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
114 8, 6, 5, 4, 3, 2, 1, 0};
115
116 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
117
118 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
119
120 /**
121 ************************************************************************************************************************
122 * Gfx9Lib::Gfx9Lib
123 *
124 * @brief
125 * Constructor
126 *
127 ************************************************************************************************************************
128 */
Gfx9Lib(const Client * pClient)129 Gfx9Lib::Gfx9Lib(const Client* pClient)
130 :
131 Lib(pClient),
132 m_numEquations(0)
133 {
134 m_class = AI_ADDRLIB;
135 memset(&m_settings, 0, sizeof(m_settings));
136 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
137 }
138
139 /**
140 ************************************************************************************************************************
141 * Gfx9Lib::~Gfx9Lib
142 *
143 * @brief
144 * Destructor
145 ************************************************************************************************************************
146 */
~Gfx9Lib()147 Gfx9Lib::~Gfx9Lib()
148 {
149 }
150
151 /**
152 ************************************************************************************************************************
153 * Gfx9Lib::HwlComputeHtileInfo
154 *
155 * @brief
156 * Interface function stub of AddrComputeHtilenfo
157 *
158 * @return
159 * ADDR_E_RETURNCODE
160 ************************************************************************************************************************
161 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const162 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
163 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
164 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
165 ) const
166 {
167 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
168 pIn->swizzleMode);
169
170 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
171
172 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
173
174 if ((numPipeTotal == 1) && (numRbTotal == 1))
175 {
176 numCompressBlkPerMetaBlkLog2 = 10;
177 }
178 else
179 {
180 if (m_settings.applyAliasFix)
181 {
182 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
183 }
184 else
185 {
186 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
187 }
188 }
189
190 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
191
192 Dim3d metaBlkDim = {8, 8, 1};
193 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
194 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
195 UINT_32 heightAmp = totalAmpBits - widthAmp;
196 metaBlkDim.w <<= widthAmp;
197 metaBlkDim.h <<= heightAmp;
198
199 #if DEBUG
200 Dim3d metaBlkDimDbg = {8, 8, 1};
201 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
202 {
203 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
204 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
205 {
206 metaBlkDimDbg.h <<= 1;
207 }
208 else
209 {
210 metaBlkDimDbg.w <<= 1;
211 }
212 }
213 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
214 #endif
215
216 UINT_32 numMetaBlkX;
217 UINT_32 numMetaBlkY;
218 UINT_32 numMetaBlkZ;
219
220 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
221 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
222 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
223
224 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
225
226 if (m_settings.htileAlignFix)
227 {
228 sizeAlign <<= 1;
229 }
230
231 pOut->pitch = numMetaBlkX * metaBlkDim.w;
232 pOut->height = numMetaBlkY * metaBlkDim.h;
233 pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
234
235 pOut->metaBlkWidth = metaBlkDim.w;
236 pOut->metaBlkHeight = metaBlkDim.h;
237 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
238
239 pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign);
240
241 if (m_settings.metaBaseAlignFix)
242 {
243 pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
244 }
245
246 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
247 {
248 UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2;
249
250 if (additionalAlign > sizeAlign)
251 {
252 sizeAlign = additionalAlign;
253 }
254 }
255
256 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
257
258 return ADDR_OK;
259 }
260
261 /**
262 ************************************************************************************************************************
263 * Gfx9Lib::HwlComputeCmaskInfo
264 *
265 * @brief
266 * Interface function stub of AddrComputeCmaskInfo
267 *
268 * @return
269 * ADDR_E_RETURNCODE
270 ************************************************************************************************************************
271 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const272 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
273 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
274 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
275 ) const
276 {
277 // TODO: Clarify with AddrLib team
278 // ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
279
280 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
281 pIn->swizzleMode);
282
283 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
284
285 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
286
287 if ((numPipeTotal == 1) && (numRbTotal == 1))
288 {
289 numCompressBlkPerMetaBlkLog2 = 13;
290 }
291 else
292 {
293 if (m_settings.applyAliasFix)
294 {
295 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
296 }
297 else
298 {
299 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
300 }
301
302 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
303 }
304
305 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
306
307 Dim2d metaBlkDim = {8, 8};
308 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
309 UINT_32 heightAmp = totalAmpBits >> 1;
310 UINT_32 widthAmp = totalAmpBits - heightAmp;
311 metaBlkDim.w <<= widthAmp;
312 metaBlkDim.h <<= heightAmp;
313
314 #if DEBUG
315 Dim2d metaBlkDimDbg = {8, 8};
316 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
317 {
318 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
319 {
320 metaBlkDimDbg.h <<= 1;
321 }
322 else
323 {
324 metaBlkDimDbg.w <<= 1;
325 }
326 }
327 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
328 #endif
329
330 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
331 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
332 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
333
334 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
335
336 pOut->pitch = numMetaBlkX * metaBlkDim.w;
337 pOut->height = numMetaBlkY * metaBlkDim.h;
338 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
339 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
340 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
341
342 if (m_settings.metaBaseAlignFix)
343 {
344 pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
345 }
346
347 pOut->metaBlkWidth = metaBlkDim.w;
348 pOut->metaBlkHeight = metaBlkDim.h;
349
350 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
351
352 return ADDR_OK;
353 }
354
355 /**
356 ************************************************************************************************************************
357 * Gfx9Lib::GetMetaMipInfo
358 *
359 * @brief
360 * Get meta mip info
361 *
362 * @return
363 * N/A
364 ************************************************************************************************************************
365 */
GetMetaMipInfo(UINT_32 numMipLevels,Dim3d * pMetaBlkDim,BOOL_32 dataThick,ADDR2_META_MIP_INFO * pInfo,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 * pNumMetaBlkX,UINT_32 * pNumMetaBlkY,UINT_32 * pNumMetaBlkZ) const366 VOID Gfx9Lib::GetMetaMipInfo(
367 UINT_32 numMipLevels, ///< [in] number of mip levels
368 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
369 BOOL_32 dataThick, ///< [in] data surface is thick
370 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
371 UINT_32 mip0Width, ///< [in] mip0 width
372 UINT_32 mip0Height, ///< [in] mip0 height
373 UINT_32 mip0Depth, ///< [in] mip0 depth
374 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
375 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
376 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
377 const
378 {
379 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
380 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
381 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
382 UINT_32 tailWidth = pMetaBlkDim->w;
383 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
384 UINT_32 tailDepth = pMetaBlkDim->d;
385 BOOL_32 inTail = FALSE;
386 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
387
388 if (numMipLevels > 1)
389 {
390 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
391 {
392 // Z major
393 major = ADDR_MAJOR_Z;
394 }
395 else if (numMetaBlkX >= numMetaBlkY)
396 {
397 // X major
398 major = ADDR_MAJOR_X;
399 }
400 else
401 {
402 // Y major
403 major = ADDR_MAJOR_Y;
404 }
405
406 inTail = ((mip0Width <= tailWidth) &&
407 (mip0Height <= tailHeight) &&
408 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
409
410 if (inTail == FALSE)
411 {
412 UINT_32 orderLimit;
413 UINT_32 *pMipDim;
414 UINT_32 *pOrderDim;
415
416 if (major == ADDR_MAJOR_Z)
417 {
418 // Z major
419 pMipDim = &numMetaBlkY;
420 pOrderDim = &numMetaBlkZ;
421 orderLimit = 4;
422 }
423 else if (major == ADDR_MAJOR_X)
424 {
425 // X major
426 pMipDim = &numMetaBlkY;
427 pOrderDim = &numMetaBlkX;
428 orderLimit = 4;
429 }
430 else
431 {
432 // Y major
433 pMipDim = &numMetaBlkX;
434 pOrderDim = &numMetaBlkY;
435 orderLimit = 2;
436 }
437
438 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
439 {
440 *pMipDim += 2;
441 }
442 else
443 {
444 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
445 }
446 }
447 }
448
449 if (pInfo != NULL)
450 {
451 UINT_32 mipWidth = mip0Width;
452 UINT_32 mipHeight = mip0Height;
453 UINT_32 mipDepth = mip0Depth;
454 Dim3d mipCoord = {0};
455
456 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
457 {
458 if (inTail)
459 {
460 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
461 pMetaBlkDim);
462 break;
463 }
464 else
465 {
466 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
467 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
468 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
469
470 pInfo[mip].inMiptail = FALSE;
471 pInfo[mip].startX = mipCoord.w;
472 pInfo[mip].startY = mipCoord.h;
473 pInfo[mip].startZ = mipCoord.d;
474 pInfo[mip].width = mipWidth;
475 pInfo[mip].height = mipHeight;
476 pInfo[mip].depth = dataThick ? mipDepth : 1;
477
478 if ((mip >= 3) || (mip & 1))
479 {
480 switch (major)
481 {
482 case ADDR_MAJOR_X:
483 mipCoord.w += mipWidth;
484 break;
485 case ADDR_MAJOR_Y:
486 mipCoord.h += mipHeight;
487 break;
488 case ADDR_MAJOR_Z:
489 mipCoord.d += mipDepth;
490 break;
491 default:
492 break;
493 }
494 }
495 else
496 {
497 switch (major)
498 {
499 case ADDR_MAJOR_X:
500 mipCoord.h += mipHeight;
501 break;
502 case ADDR_MAJOR_Y:
503 mipCoord.w += mipWidth;
504 break;
505 case ADDR_MAJOR_Z:
506 mipCoord.h += mipHeight;
507 break;
508 default:
509 break;
510 }
511 }
512
513 mipWidth = Max(mipWidth >> 1, 1u);
514 mipHeight = Max(mipHeight >> 1, 1u);
515 mipDepth = Max(mipDepth >> 1, 1u);
516
517 inTail = ((mipWidth <= tailWidth) &&
518 (mipHeight <= tailHeight) &&
519 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
520 }
521 }
522 }
523
524 *pNumMetaBlkX = numMetaBlkX;
525 *pNumMetaBlkY = numMetaBlkY;
526 *pNumMetaBlkZ = numMetaBlkZ;
527 }
528
529 /**
530 ************************************************************************************************************************
531 * Gfx9Lib::HwlComputeDccInfo
532 *
533 * @brief
534 * Interface function to compute DCC key info
535 *
536 * @return
537 * ADDR_E_RETURNCODE
538 ************************************************************************************************************************
539 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const540 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
541 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
542 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
543 ) const
544 {
545 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
546 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
547 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
548
549 if (dataLinear)
550 {
551 metaLinear = TRUE;
552 }
553 else if (metaLinear == TRUE)
554 {
555 pipeAligned = FALSE;
556 }
557
558 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
559
560 if (metaLinear)
561 {
562 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
563 ADDR_ASSERT_ALWAYS();
564
565 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
566 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
567 }
568 else
569 {
570 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
571
572 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
573
574 UINT_32 numFrags = Max(pIn->numFrags, 1u);
575 UINT_32 numSlices = Max(pIn->numSlices, 1u);
576
577 minMetaBlkSize /= numFrags;
578
579 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
580
581 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
582
583 if ((numPipeTotal > 1) || (numRbTotal > 1))
584 {
585 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
586
587 numCompressBlkPerMetaBlk =
588 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
589
590 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
591 {
592 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
593 }
594 }
595
596 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
597 Dim3d metaBlkDim = compressBlkDim;
598
599 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
600 {
601 if ((metaBlkDim.h < metaBlkDim.w) ||
602 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
603 {
604 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
605 {
606 metaBlkDim.h <<= 1;
607 }
608 else
609 {
610 metaBlkDim.d <<= 1;
611 }
612 }
613 else
614 {
615 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
616 {
617 metaBlkDim.w <<= 1;
618 }
619 else
620 {
621 metaBlkDim.d <<= 1;
622 }
623 }
624 }
625
626 UINT_32 numMetaBlkX;
627 UINT_32 numMetaBlkY;
628 UINT_32 numMetaBlkZ;
629
630 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
631 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
632 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
633
634 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
635
636 if (numFrags > m_maxCompFrag)
637 {
638 sizeAlign *= (numFrags / m_maxCompFrag);
639 }
640
641 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
642 numCompressBlkPerMetaBlk * numFrags;
643 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
644 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
645
646 if (m_settings.metaBaseAlignFix)
647 {
648 pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode));
649 }
650
651 pOut->pitch = numMetaBlkX * metaBlkDim.w;
652 pOut->height = numMetaBlkY * metaBlkDim.h;
653 pOut->depth = numMetaBlkZ * metaBlkDim.d;
654
655 pOut->compressBlkWidth = compressBlkDim.w;
656 pOut->compressBlkHeight = compressBlkDim.h;
657 pOut->compressBlkDepth = compressBlkDim.d;
658
659 pOut->metaBlkWidth = metaBlkDim.w;
660 pOut->metaBlkHeight = metaBlkDim.h;
661 pOut->metaBlkDepth = metaBlkDim.d;
662
663 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
664 pOut->fastClearSizePerSlice =
665 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
666 }
667
668 return ADDR_OK;
669 }
670
671 /**
672 ************************************************************************************************************************
673 * Gfx9Lib::HwlGetMaxAlignments
674 *
675 * @brief
676 * Gets maximum alignments
677 * @return
678 * ADDR_E_RETURNCODE
679 ************************************************************************************************************************
680 */
HwlGetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT * pOut) const681 ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
682 ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure
683 ) const
684 {
685 pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB);
686
687 return ADDR_OK;
688 }
689
690 /**
691 ************************************************************************************************************************
692 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
693 *
694 * @brief
695 * Interface function stub of AddrComputeCmaskAddrFromCoord
696 *
697 * @return
698 * ADDR_E_RETURNCODE
699 ************************************************************************************************************************
700 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)701 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
702 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
703 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
704 {
705 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
706 input.size = sizeof(input);
707 input.cMaskFlags = pIn->cMaskFlags;
708 input.colorFlags = pIn->colorFlags;
709 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
710 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
711 input.numSlices = Max(pIn->numSlices, 1u);
712 input.swizzleMode = pIn->swizzleMode;
713 input.resourceType = pIn->resourceType;
714
715 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
716 output.size = sizeof(output);
717
718 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
719
720 if (returnCode == ADDR_OK)
721 {
722 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
723 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
724 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
725 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
726
727 const CoordEq* pMetaEq = GetMetaEquation({0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
728 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
729 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
730
731 UINT_32 xb = pIn->x / output.metaBlkWidth;
732 UINT_32 yb = pIn->y / output.metaBlkHeight;
733 UINT_32 zb = pIn->slice;
734
735 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
736 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
737 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
738
739 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
740
741 pOut->addr = address >> 1;
742 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
743
744
745 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
746 pIn->swizzleMode);
747
748 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
749
750 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
751 }
752
753 return returnCode;
754 }
755
756 /**
757 ************************************************************************************************************************
758 * Gfx9Lib::HwlComputeHtileAddrFromCoord
759 *
760 * @brief
761 * Interface function stub of AddrComputeHtileAddrFromCoord
762 *
763 * @return
764 * ADDR_E_RETURNCODE
765 ************************************************************************************************************************
766 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)767 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
768 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
769 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
770 {
771 ADDR_E_RETURNCODE returnCode = ADDR_OK;
772
773 if (pIn->numMipLevels > 1)
774 {
775 returnCode = ADDR_NOTIMPLEMENTED;
776 }
777 else
778 {
779 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
780 input.size = sizeof(input);
781 input.hTileFlags = pIn->hTileFlags;
782 input.depthFlags = pIn->depthflags;
783 input.swizzleMode = pIn->swizzleMode;
784 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
785 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
786 input.numSlices = Max(pIn->numSlices, 1u);
787 input.numMipLevels = Max(pIn->numMipLevels, 1u);
788
789 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
790 output.size = sizeof(output);
791
792 returnCode = ComputeHtileInfo(&input, &output);
793
794 if (returnCode == ADDR_OK)
795 {
796 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
797 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
798 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
799 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
800
801 const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
802 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
803 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
804
805 UINT_32 xb = pIn->x / output.metaBlkWidth;
806 UINT_32 yb = pIn->y / output.metaBlkHeight;
807 UINT_32 zb = pIn->slice;
808
809 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
810 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
811 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
812
813 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
814
815 pOut->addr = address >> 1;
816
817 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
818 pIn->swizzleMode);
819
820 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
821
822 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
823 }
824 }
825
826 return returnCode;
827 }
828
829 /**
830 ************************************************************************************************************************
831 * Gfx9Lib::HwlComputeHtileCoordFromAddr
832 *
833 * @brief
834 * Interface function stub of AddrComputeHtileCoordFromAddr
835 *
836 * @return
837 * ADDR_E_RETURNCODE
838 ************************************************************************************************************************
839 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)840 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
841 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
842 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
843 {
844 ADDR_E_RETURNCODE returnCode = ADDR_OK;
845
846 if (pIn->numMipLevels > 1)
847 {
848 returnCode = ADDR_NOTIMPLEMENTED;
849 }
850 else
851 {
852 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
853 input.size = sizeof(input);
854 input.hTileFlags = pIn->hTileFlags;
855 input.swizzleMode = pIn->swizzleMode;
856 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
857 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
858 input.numSlices = Max(pIn->numSlices, 1u);
859 input.numMipLevels = Max(pIn->numMipLevels, 1u);
860
861 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
862 output.size = sizeof(output);
863
864 returnCode = ComputeHtileInfo(&input, &output);
865
866 if (returnCode == ADDR_OK)
867 {
868 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
869 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
870 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
871 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
872
873 const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
874 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
875 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
876
877 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
878 pIn->swizzleMode);
879
880 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
881
882 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
883
884 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
885 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
886
887 UINT_32 x, y, z, s, m;
888 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
889
890 pOut->slice = m / sliceSizeInBlock;
891 pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
892 pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x;
893 }
894 }
895
896 return returnCode;
897 }
898
899 /**
900 ************************************************************************************************************************
901 * Gfx9Lib::HwlComputeDccAddrFromCoord
902 *
903 * @brief
904 * Interface function stub of AddrComputeDccAddrFromCoord
905 *
906 * @return
907 * ADDR_E_RETURNCODE
908 ************************************************************************************************************************
909 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)910 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
911 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
912 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
913 {
914 ADDR_E_RETURNCODE returnCode = ADDR_OK;
915
916 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
917 {
918 returnCode = ADDR_NOTIMPLEMENTED;
919 }
920 else
921 {
922 ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
923 input.size = sizeof(input);
924 input.dccKeyFlags = pIn->dccKeyFlags;
925 input.colorFlags = pIn->colorFlags;
926 input.swizzleMode = pIn->swizzleMode;
927 input.resourceType = pIn->resourceType;
928 input.bpp = pIn->bpp;
929 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
930 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
931 input.numSlices = Max(pIn->numSlices, 1u);
932 input.numFrags = Max(pIn->numFrags, 1u);
933 input.numMipLevels = Max(pIn->numMipLevels, 1u);
934
935 ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
936 output.size = sizeof(output);
937
938 returnCode = ComputeDccInfo(&input, &output);
939
940 if (returnCode == ADDR_OK)
941 {
942 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
943 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
944 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
945 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
946 UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
947 UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
948 UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
949 UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
950
951 const CoordEq* pMetaEq = GetMetaEquation({pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
952 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
953 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
954 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2});
955
956 UINT_32 xb = pIn->x / output.metaBlkWidth;
957 UINT_32 yb = pIn->y / output.metaBlkHeight;
958 UINT_32 zb = pIn->slice / output.metaBlkDepth;
959
960 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
961 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
962 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
963
964 UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
965
966 pOut->addr = address >> 1;
967
968 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
969 pIn->swizzleMode);
970
971 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
972
973 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
974 }
975 }
976
977 return returnCode;
978 }
979
980 /**
981 ************************************************************************************************************************
982 * Gfx9Lib::HwlInitGlobalParams
983 *
984 * @brief
985 * Initializes global parameters
986 *
987 * @return
988 * TRUE if all settings are valid
989 *
990 ************************************************************************************************************************
991 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)992 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
993 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
994 {
995 BOOL_32 valid = TRUE;
996
997 if (m_settings.isArcticIsland)
998 {
999 GB_ADDR_CONFIG gbAddrConfig;
1000
1001 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1002
1003 // These values are copied from CModel code
1004 switch (gbAddrConfig.bits.NUM_PIPES)
1005 {
1006 case ADDR_CONFIG_1_PIPE:
1007 m_pipes = 1;
1008 m_pipesLog2 = 0;
1009 break;
1010 case ADDR_CONFIG_2_PIPE:
1011 m_pipes = 2;
1012 m_pipesLog2 = 1;
1013 break;
1014 case ADDR_CONFIG_4_PIPE:
1015 m_pipes = 4;
1016 m_pipesLog2 = 2;
1017 break;
1018 case ADDR_CONFIG_8_PIPE:
1019 m_pipes = 8;
1020 m_pipesLog2 = 3;
1021 break;
1022 case ADDR_CONFIG_16_PIPE:
1023 m_pipes = 16;
1024 m_pipesLog2 = 4;
1025 break;
1026 case ADDR_CONFIG_32_PIPE:
1027 m_pipes = 32;
1028 m_pipesLog2 = 5;
1029 break;
1030 default:
1031 ADDR_ASSERT_ALWAYS();
1032 break;
1033 }
1034
1035 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1036 {
1037 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1038 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1039 m_pipeInterleaveLog2 = 8;
1040 break;
1041 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1042 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1043 m_pipeInterleaveLog2 = 9;
1044 break;
1045 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1046 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1047 m_pipeInterleaveLog2 = 10;
1048 break;
1049 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1050 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1051 m_pipeInterleaveLog2 = 11;
1052 break;
1053 default:
1054 ADDR_ASSERT_ALWAYS();
1055 break;
1056 }
1057
1058 switch (gbAddrConfig.bits.NUM_BANKS)
1059 {
1060 case ADDR_CONFIG_1_BANK:
1061 m_banks = 1;
1062 m_banksLog2 = 0;
1063 break;
1064 case ADDR_CONFIG_2_BANK:
1065 m_banks = 2;
1066 m_banksLog2 = 1;
1067 break;
1068 case ADDR_CONFIG_4_BANK:
1069 m_banks = 4;
1070 m_banksLog2 = 2;
1071 break;
1072 case ADDR_CONFIG_8_BANK:
1073 m_banks = 8;
1074 m_banksLog2 = 3;
1075 break;
1076 case ADDR_CONFIG_16_BANK:
1077 m_banks = 16;
1078 m_banksLog2 = 4;
1079 break;
1080 default:
1081 ADDR_ASSERT_ALWAYS();
1082 break;
1083 }
1084
1085 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1086 {
1087 case ADDR_CONFIG_1_SHADER_ENGINE:
1088 m_se = 1;
1089 m_seLog2 = 0;
1090 break;
1091 case ADDR_CONFIG_2_SHADER_ENGINE:
1092 m_se = 2;
1093 m_seLog2 = 1;
1094 break;
1095 case ADDR_CONFIG_4_SHADER_ENGINE:
1096 m_se = 4;
1097 m_seLog2 = 2;
1098 break;
1099 case ADDR_CONFIG_8_SHADER_ENGINE:
1100 m_se = 8;
1101 m_seLog2 = 3;
1102 break;
1103 default:
1104 ADDR_ASSERT_ALWAYS();
1105 break;
1106 }
1107
1108 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1109 {
1110 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1111 m_rbPerSe = 1;
1112 m_rbPerSeLog2 = 0;
1113 break;
1114 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1115 m_rbPerSe = 2;
1116 m_rbPerSeLog2 = 1;
1117 break;
1118 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1119 m_rbPerSe = 4;
1120 m_rbPerSeLog2 = 2;
1121 break;
1122 default:
1123 ADDR_ASSERT_ALWAYS();
1124 break;
1125 }
1126
1127 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1128 {
1129 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1130 m_maxCompFrag = 1;
1131 m_maxCompFragLog2 = 0;
1132 break;
1133 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1134 m_maxCompFrag = 2;
1135 m_maxCompFragLog2 = 1;
1136 break;
1137 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1138 m_maxCompFrag = 4;
1139 m_maxCompFragLog2 = 2;
1140 break;
1141 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1142 m_maxCompFrag = 8;
1143 m_maxCompFragLog2 = 3;
1144 break;
1145 default:
1146 ADDR_ASSERT_ALWAYS();
1147 break;
1148 }
1149
1150 m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1151 ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1152 ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1153 m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1154 }
1155 else
1156 {
1157 valid = FALSE;
1158 ADDR_NOT_IMPLEMENTED();
1159 }
1160
1161 if (valid)
1162 {
1163 InitEquationTable();
1164 }
1165
1166 return valid;
1167 }
1168
1169 /**
1170 ************************************************************************************************************************
1171 * Gfx9Lib::HwlConvertChipFamily
1172 *
1173 * @brief
1174 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1175 * @return
1176 * ChipFamily
1177 ************************************************************************************************************************
1178 */
HwlConvertChipFamily(UINT_32 uChipFamily,UINT_32 uChipRevision)1179 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1180 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1181 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1182 {
1183 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1184
1185 switch (uChipFamily)
1186 {
1187 case FAMILY_AI:
1188 m_settings.isArcticIsland = 1;
1189 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1190
1191 m_settings.isDce12 = 1;
1192
1193 if (m_settings.isVega10 == 0)
1194 {
1195 m_settings.htileAlignFix = 1;
1196 m_settings.applyAliasFix = 1;
1197 }
1198
1199 m_settings.metaBaseAlignFix = 1;
1200
1201 m_settings.depthPipeXorDisable = 1;
1202 break;
1203 case FAMILY_RV:
1204 m_settings.isArcticIsland = 1;
1205 m_settings.isRaven = ASICREV_IS_RAVEN(uChipRevision);
1206
1207 if (m_settings.isRaven)
1208 {
1209 m_settings.isDcn1 = 1;
1210 }
1211
1212 m_settings.metaBaseAlignFix = 1;
1213
1214 if (ASICREV_IS_RAVEN(uChipRevision))
1215 {
1216 m_settings.depthPipeXorDisable = 1;
1217 }
1218 break;
1219
1220 default:
1221 ADDR_ASSERT(!"This should be a Fusion");
1222 break;
1223 }
1224
1225 return family;
1226 }
1227
1228 /**
1229 ************************************************************************************************************************
1230 * Gfx9Lib::InitRbEquation
1231 *
1232 * @brief
1233 * Init RB equation
1234 * @return
1235 * N/A
1236 ************************************************************************************************************************
1237 */
GetRbEquation(CoordEq * pRbEq,UINT_32 numRbPerSeLog2,UINT_32 numSeLog2) const1238 VOID Gfx9Lib::GetRbEquation(
1239 CoordEq* pRbEq, ///< [out] rb equation
1240 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1241 UINT_32 numSeLog2) ///< [in] number of shader engine
1242 const
1243 {
1244 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1245 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1246 Coordinate cx('x', rbRegion);
1247 Coordinate cy('y', rbRegion);
1248
1249 UINT_32 start = 0;
1250 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1251
1252 // Clear the rb equation
1253 pRbEq->resize(0);
1254 pRbEq->resize(numRbTotalLog2);
1255
1256 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1257 {
1258 // Special case when more than 1 SE, and 2 RB per SE
1259 (*pRbEq)[0].add(cx);
1260 (*pRbEq)[0].add(cy);
1261 cx++;
1262 cy++;
1263
1264 if (m_settings.applyAliasFix == false)
1265 {
1266 (*pRbEq)[0].add(cy);
1267 }
1268
1269 (*pRbEq)[0].add(cy);
1270 start++;
1271 }
1272
1273 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1274
1275 for (UINT_32 i = 0; i < numBits; i++)
1276 {
1277 UINT_32 idx =
1278 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1279
1280 if ((i % 2) == 1)
1281 {
1282 (*pRbEq)[idx].add(cx);
1283 cx++;
1284 }
1285 else
1286 {
1287 (*pRbEq)[idx].add(cy);
1288 cy++;
1289 }
1290 }
1291 }
1292
1293 /**
1294 ************************************************************************************************************************
1295 * Gfx9Lib::GetDataEquation
1296 *
1297 * @brief
1298 * Get data equation for fmask and Z
1299 * @return
1300 * N/A
1301 ************************************************************************************************************************
1302 */
GetDataEquation(CoordEq * pDataEq,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2) const1303 VOID Gfx9Lib::GetDataEquation(
1304 CoordEq* pDataEq, ///< [out] data surface equation
1305 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1306 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1307 AddrResourceType resourceType, ///< [in] data surface resource type
1308 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1309 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1310 const
1311 {
1312 Coordinate cx('x', 0);
1313 Coordinate cy('y', 0);
1314 Coordinate cz('z', 0);
1315 Coordinate cs('s', 0);
1316
1317 // Clear the equation
1318 pDataEq->resize(0);
1319 pDataEq->resize(27);
1320
1321 if (dataSurfaceType == Gfx9DataColor)
1322 {
1323 if (IsLinear(swizzleMode))
1324 {
1325 Coordinate cm('m', 0);
1326
1327 pDataEq->resize(49);
1328
1329 for (UINT_32 i = 0; i < 49; i++)
1330 {
1331 (*pDataEq)[i].add(cm);
1332 cm++;
1333 }
1334 }
1335 else if (IsThick(resourceType, swizzleMode))
1336 {
1337 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1338 UINT_32 i;
1339 if (IsStandardSwizzle(resourceType, swizzleMode))
1340 {
1341 // Standard 3d swizzle
1342 // Fill in bottom x bits
1343 for (i = elementBytesLog2; i < 4; i++)
1344 {
1345 (*pDataEq)[i].add(cx);
1346 cx++;
1347 }
1348 // Fill in 2 bits of y and then z
1349 for (i = 4; i < 6; i++)
1350 {
1351 (*pDataEq)[i].add(cy);
1352 cy++;
1353 }
1354 for (i = 6; i < 8; i++)
1355 {
1356 (*pDataEq)[i].add(cz);
1357 cz++;
1358 }
1359 if (elementBytesLog2 < 2)
1360 {
1361 // fill in z & y bit
1362 (*pDataEq)[8].add(cz);
1363 (*pDataEq)[9].add(cy);
1364 cz++;
1365 cy++;
1366 }
1367 else if (elementBytesLog2 == 2)
1368 {
1369 // fill in y and x bit
1370 (*pDataEq)[8].add(cy);
1371 (*pDataEq)[9].add(cx);
1372 cy++;
1373 cx++;
1374 }
1375 else
1376 {
1377 // fill in 2 x bits
1378 (*pDataEq)[8].add(cx);
1379 cx++;
1380 (*pDataEq)[9].add(cx);
1381 cx++;
1382 }
1383 }
1384 else
1385 {
1386 // Z 3d swizzle
1387 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1388 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1389 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1390 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1391 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1392 {
1393 (*pDataEq)[i].add(cz);
1394 cz++;
1395 }
1396 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1397 {
1398 // add an x and z
1399 (*pDataEq)[6].add(cx);
1400 (*pDataEq)[7].add(cz);
1401 cx++;
1402 cz++;
1403 }
1404 else if (elementBytesLog2 == 2)
1405 {
1406 // add a y and z
1407 (*pDataEq)[6].add(cy);
1408 (*pDataEq)[7].add(cz);
1409 cy++;
1410 cz++;
1411 }
1412 // add y and x
1413 (*pDataEq)[8].add(cy);
1414 (*pDataEq)[9].add(cx);
1415 cy++;
1416 cx++;
1417 }
1418 // Fill in bit 10 and up
1419 pDataEq->mort3d( cz, cy, cx, 10 );
1420 }
1421 else if (IsThin(resourceType, swizzleMode))
1422 {
1423 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1424 // Color 2D
1425 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1426 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1427 UINT_32 i;
1428 // Fill in bottom x bits
1429 for (i = elementBytesLog2; i < 4; i++)
1430 {
1431 (*pDataEq)[i].add(cx);
1432 cx++;
1433 }
1434 // Fill in bottom y bits
1435 for (i = 4; i < 4 + microYBits; i++)
1436 {
1437 (*pDataEq)[i].add(cy);
1438 cy++;
1439 }
1440 // Fill in last of the micro_x bits
1441 for (i = 4 + microYBits; i < 8; i++)
1442 {
1443 (*pDataEq)[i].add(cx);
1444 cx++;
1445 }
1446 // Fill in x/y bits below sample split
1447 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1448 // Fill in sample bits
1449 for (i = 0; i < numSamplesLog2; i++)
1450 {
1451 cs.set('s', i);
1452 (*pDataEq)[tileSplitStart + i].add(cs);
1453 }
1454 // Fill in x/y bits above sample split
1455 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1456 {
1457 pDataEq->mort2d(cx, cy, blockSizeLog2);
1458 }
1459 else
1460 {
1461 pDataEq->mort2d(cy, cx, blockSizeLog2);
1462 }
1463 }
1464 else
1465 {
1466 ADDR_ASSERT_ALWAYS();
1467 }
1468 }
1469 else
1470 {
1471 // Fmask or depth
1472 UINT_32 sampleStart = elementBytesLog2;
1473 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1474 UINT_32 ymajStart = 6 + numSamplesLog2;
1475
1476 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1477 {
1478 cs.set('s', s);
1479 (*pDataEq)[sampleStart + s].add(cs);
1480 }
1481
1482 // Put in the x-major order pixel bits
1483 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1484 // Put in the y-major order pixel bits
1485 pDataEq->mort2d(cy, cx, ymajStart);
1486 }
1487 }
1488
1489 /**
1490 ************************************************************************************************************************
1491 * Gfx9Lib::GetPipeEquation
1492 *
1493 * @brief
1494 * Get pipe equation
1495 * @return
1496 * N/A
1497 ************************************************************************************************************************
1498 */
GetPipeEquation(CoordEq * pPipeEq,CoordEq * pDataEq,UINT_32 pipeInterleaveLog2,UINT_32 numPipeLog2,UINT_32 numSamplesLog2,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType) const1499 VOID Gfx9Lib::GetPipeEquation(
1500 CoordEq* pPipeEq, ///< [out] pipe equation
1501 CoordEq* pDataEq, ///< [in] data equation
1502 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1503 UINT_32 numPipeLog2, ///< [in] number of pipes
1504 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1505 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1506 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1507 AddrResourceType resourceType ///< [in] data surface resource type
1508 ) const
1509 {
1510 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1511 CoordEq dataEq;
1512
1513 pDataEq->copy(dataEq);
1514
1515 if (dataSurfaceType == Gfx9DataColor)
1516 {
1517 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1518 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1519 }
1520
1521 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1522
1523 // This section should only apply to z/stencil, maybe fmask
1524 // If the pipe bit is below the comp block size,
1525 // then keep moving up the address until we find a bit that is above
1526 UINT_32 pipeStart = 0;
1527
1528 if (dataSurfaceType != Gfx9DataColor)
1529 {
1530 Coordinate tileMin('x', 3);
1531
1532 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1533 {
1534 pipeStart++;
1535 }
1536
1537 // if pipe is 0, then the first pipe bit is above the comp block size,
1538 // so we don't need to do anything
1539 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1540 // we will get the same pipe equation
1541 if (pipeStart != 0)
1542 {
1543 for (UINT_32 i = 0; i < numPipeLog2; i++)
1544 {
1545 // Copy the jth bit above pipe interleave to the current pipe equation bit
1546 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1547 }
1548 }
1549 }
1550
1551 if (IsPrt(swizzleMode))
1552 {
1553 // Clear out bits above the block size if prt's are enabled
1554 dataEq.resize(blockSizeLog2);
1555 dataEq.resize(48);
1556 }
1557
1558 if (IsXor(swizzleMode))
1559 {
1560 CoordEq xorMask;
1561
1562 if (IsThick(resourceType, swizzleMode))
1563 {
1564 CoordEq xorMask2;
1565
1566 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1567
1568 xorMask.resize(numPipeLog2);
1569
1570 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1571 {
1572 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1573 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1574 }
1575 }
1576 else
1577 {
1578 // Xor in the bits above the pipe+gpu bits
1579 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1580
1581 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1582 {
1583 Coordinate co;
1584 CoordEq xorMask2;
1585 // if 1xaa and not prt, then xor in the z bits
1586 xorMask2.resize(0);
1587 xorMask2.resize(numPipeLog2);
1588 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1589 {
1590 co.set('z', numPipeLog2 - 1 - pipeIdx);
1591 xorMask2[pipeIdx].add(co);
1592 }
1593
1594 pPipeEq->xorin(xorMask2);
1595 }
1596 }
1597
1598 xorMask.reverse();
1599 pPipeEq->xorin(xorMask);
1600 }
1601 }
1602 /**
1603 ************************************************************************************************************************
1604 * Gfx9Lib::GetMetaEquation
1605 *
1606 * @brief
1607 * Get meta equation for cmask/htile/DCC
1608 * @return
1609 * Pointer to a calculated meta equation
1610 ************************************************************************************************************************
1611 */
GetMetaEquation(const MetaEqParams & metaEqParams)1612 const CoordEq* Gfx9Lib::GetMetaEquation(
1613 const MetaEqParams& metaEqParams)
1614 {
1615 UINT_32 cachedMetaEqIndex;
1616
1617 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1618 {
1619 if (memcmp(&metaEqParams,
1620 &m_cachedMetaEqKey[cachedMetaEqIndex],
1621 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1622 {
1623 break;
1624 }
1625 }
1626
1627 CoordEq* pMetaEq = NULL;
1628
1629 if (cachedMetaEqIndex < MaxCachedMetaEq)
1630 {
1631 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1632 }
1633 else
1634 {
1635 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1636
1637 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1638
1639 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1640
1641 GenMetaEquation(pMetaEq,
1642 metaEqParams.maxMip,
1643 metaEqParams.elementBytesLog2,
1644 metaEqParams.numSamplesLog2,
1645 metaEqParams.metaFlag,
1646 metaEqParams.dataSurfaceType,
1647 metaEqParams.swizzleMode,
1648 metaEqParams.resourceType,
1649 metaEqParams.metaBlkWidthLog2,
1650 metaEqParams.metaBlkHeightLog2,
1651 metaEqParams.metaBlkDepthLog2,
1652 metaEqParams.compBlkWidthLog2,
1653 metaEqParams.compBlkHeightLog2,
1654 metaEqParams.compBlkDepthLog2);
1655 }
1656
1657 return pMetaEq;
1658 }
1659
1660 /**
1661 ************************************************************************************************************************
1662 * Gfx9Lib::GenMetaEquation
1663 *
1664 * @brief
1665 * Get meta equation for cmask/htile/DCC
1666 * @return
1667 * N/A
1668 ************************************************************************************************************************
1669 */
GenMetaEquation(CoordEq * pMetaEq,UINT_32 maxMip,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2,ADDR2_META_FLAGS metaFlag,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 metaBlkWidthLog2,UINT_32 metaBlkHeightLog2,UINT_32 metaBlkDepthLog2,UINT_32 compBlkWidthLog2,UINT_32 compBlkHeightLog2,UINT_32 compBlkDepthLog2) const1670 VOID Gfx9Lib::GenMetaEquation(
1671 CoordEq* pMetaEq, ///< [out] meta equation
1672 UINT_32 maxMip, ///< [in] max mip Id
1673 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1674 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1675 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1676 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1677 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1678 AddrResourceType resourceType, ///< [in] data surface resource type
1679 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1680 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1681 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1682 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1683 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1684 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1685 const
1686 {
1687 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1688 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1689
1690 // Get the correct data address and rb equation
1691 CoordEq dataEq;
1692 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1693 elementBytesLog2, numSamplesLog2);
1694
1695 // Get pipe and rb equations
1696 CoordEq pipeEquation;
1697 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1698 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1699 numPipeTotalLog2 = pipeEquation.getsize();
1700
1701 if (metaFlag.linear)
1702 {
1703 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1704 ADDR_ASSERT_ALWAYS();
1705
1706 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1707
1708 dataEq.copy(*pMetaEq);
1709
1710 if (IsLinear(swizzleMode))
1711 {
1712 if (metaFlag.pipeAligned)
1713 {
1714 // Remove the pipe bits
1715 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1716 pMetaEq->shift(-shift, pipeInterleaveLog2);
1717 }
1718 // Divide by comp block size, which for linear (which is always color) is 256 B
1719 pMetaEq->shift(-8);
1720
1721 if (metaFlag.pipeAligned)
1722 {
1723 // Put pipe bits back in
1724 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1725
1726 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1727 {
1728 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1729 }
1730 }
1731 }
1732
1733 pMetaEq->shift(1);
1734 }
1735 else
1736 {
1737 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1738 UINT_32 compFragLog2 =
1739 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1740 maxCompFragLog2 : numSamplesLog2;
1741
1742 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1743
1744 // Make sure the metaaddr is cleared
1745 pMetaEq->resize(0);
1746 pMetaEq->resize(27);
1747
1748 if (IsThick(resourceType, swizzleMode))
1749 {
1750 Coordinate cx('x', 0);
1751 Coordinate cy('y', 0);
1752 Coordinate cz('z', 0);
1753
1754 if (maxMip > 0)
1755 {
1756 pMetaEq->mort3d(cy, cx, cz);
1757 }
1758 else
1759 {
1760 pMetaEq->mort3d(cx, cy, cz);
1761 }
1762 }
1763 else
1764 {
1765 Coordinate cx('x', 0);
1766 Coordinate cy('y', 0);
1767 Coordinate cs;
1768
1769 if (maxMip > 0)
1770 {
1771 pMetaEq->mort2d(cy, cx, compFragLog2);
1772 }
1773 else
1774 {
1775 pMetaEq->mort2d(cx, cy, compFragLog2);
1776 }
1777
1778 //------------------------------------------------------------------------------------------------------------------------
1779 // Put the compressible fragments at the lsb
1780 // the uncompressible frags will be at the msb of the micro address
1781 //------------------------------------------------------------------------------------------------------------------------
1782 for (UINT_32 s = 0; s < compFragLog2; s++)
1783 {
1784 cs.set('s', s);
1785 (*pMetaEq)[s].add(cs);
1786 }
1787 }
1788
1789 // Keep a copy of the pipe equations
1790 CoordEq origPipeEquation;
1791 pipeEquation.copy(origPipeEquation);
1792
1793 Coordinate co;
1794 // filter out everything under the compressed block size
1795 co.set('x', compBlkWidthLog2);
1796 pMetaEq->Filter('<', co, 0, 'x');
1797 co.set('y', compBlkHeightLog2);
1798 pMetaEq->Filter('<', co, 0, 'y');
1799 co.set('z', compBlkDepthLog2);
1800 pMetaEq->Filter('<', co, 0, 'z');
1801
1802 // For non-color, filter out sample bits
1803 if (dataSurfaceType != Gfx9DataColor)
1804 {
1805 co.set('x', 0);
1806 pMetaEq->Filter('<', co, 0, 's');
1807 }
1808
1809 // filter out everything above the metablock size
1810 co.set('x', metaBlkWidthLog2 - 1);
1811 pMetaEq->Filter('>', co, 0, 'x');
1812 co.set('y', metaBlkHeightLog2 - 1);
1813 pMetaEq->Filter('>', co, 0, 'y');
1814 co.set('z', metaBlkDepthLog2 - 1);
1815 pMetaEq->Filter('>', co, 0, 'z');
1816
1817 // filter out everything above the metablock size for the channel bits
1818 co.set('x', metaBlkWidthLog2 - 1);
1819 pipeEquation.Filter('>', co, 0, 'x');
1820 co.set('y', metaBlkHeightLog2 - 1);
1821 pipeEquation.Filter('>', co, 0, 'y');
1822 co.set('z', metaBlkDepthLog2 - 1);
1823 pipeEquation.Filter('>', co, 0, 'z');
1824
1825 // Make sure we still have the same number of channel bits
1826 if (pipeEquation.getsize() != numPipeTotalLog2)
1827 {
1828 ADDR_ASSERT_ALWAYS();
1829 }
1830
1831 // Loop through all channel and rb bits,
1832 // and make sure these components exist in the metadata address
1833 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1834 {
1835 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1836 {
1837 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1838 {
1839 ADDR_ASSERT_ALWAYS();
1840 }
1841 }
1842 }
1843
1844 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
1845 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1846 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1847 CoordEq origRbEquation;
1848
1849 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1850
1851 CoordEq rbEquation = origRbEquation;
1852
1853 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1854 {
1855 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1856 {
1857 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1858 {
1859 ADDR_ASSERT_ALWAYS();
1860 }
1861 }
1862 }
1863
1864 if (m_settings.applyAliasFix)
1865 {
1866 co.set('z', -1);
1867 }
1868
1869 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1870 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1871 {
1872 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1873 {
1874 BOOL_32 isRbEquationInPipeEquation = FALSE;
1875
1876 if (m_settings.applyAliasFix)
1877 {
1878 CoordTerm filteredPipeEq;
1879 filteredPipeEq = pipeEquation[j];
1880
1881 filteredPipeEq.Filter('>', co, 0, 'z');
1882
1883 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1884 }
1885 else
1886 {
1887 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1888 }
1889
1890 if (isRbEquationInPipeEquation)
1891 {
1892 rbEquation[i].Clear();
1893 }
1894 }
1895 }
1896
1897 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1898
1899 // Loop through each bit of the channel, get the smallest coordinate,
1900 // and remove it from the metaaddr, and rb_equation
1901 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1902 {
1903 pipeEquation[i].getsmallest(co);
1904
1905 UINT_32 old_size = pMetaEq->getsize();
1906 pMetaEq->Filter('=', co);
1907 UINT_32 new_size = pMetaEq->getsize();
1908 if (new_size != old_size-1)
1909 {
1910 ADDR_ASSERT_ALWAYS();
1911 }
1912 pipeEquation.remove(co);
1913 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1914 {
1915 if (rbEquation[j].remove(co))
1916 {
1917 // if we actually removed something from this bit, then add the remaining
1918 // channel bits, as these can be removed for this bit
1919 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
1920 {
1921 if (pipeEquation[i][k] != co)
1922 {
1923 rbEquation[j].add(pipeEquation[i][k]);
1924 rbAppendedWithPipeBits[j] = true;
1925 }
1926 }
1927 }
1928 }
1929 }
1930
1931 // Loop through the rb bits and see what remain;
1932 // filter out the smallest coordinate if it remains
1933 UINT_32 rbBitsLeft = 0;
1934 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1935 {
1936 BOOL_32 isRbEqAppended = FALSE;
1937
1938 if (m_settings.applyAliasFix)
1939 {
1940 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
1941 }
1942 else
1943 {
1944 isRbEqAppended = (rbEquation[i].getsize() > 0);
1945 }
1946
1947 if (isRbEqAppended)
1948 {
1949 rbBitsLeft++;
1950 rbEquation[i].getsmallest(co);
1951 UINT_32 old_size = pMetaEq->getsize();
1952 pMetaEq->Filter('=', co);
1953 UINT_32 new_size = pMetaEq->getsize();
1954 if (new_size != old_size - 1)
1955 {
1956 // assert warning
1957 }
1958 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
1959 {
1960 if (rbEquation[j].remove(co))
1961 {
1962 // if we actually removed something from this bit, then add the remaining
1963 // rb bits, as these can be removed for this bit
1964 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
1965 {
1966 if (rbEquation[i][k] != co)
1967 {
1968 rbEquation[j].add(rbEquation[i][k]);
1969 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
1970 }
1971 }
1972 }
1973 }
1974 }
1975 }
1976
1977 // capture the size of the metaaddr
1978 UINT_32 metaSize = pMetaEq->getsize();
1979 // resize to 49 bits...make this a nibble address
1980 pMetaEq->resize(49);
1981 // Concatenate the macro address above the current address
1982 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
1983 {
1984 co.set('m', j);
1985 (*pMetaEq)[i].add(co);
1986 }
1987
1988 // Multiply by meta element size (in nibbles)
1989 if (dataSurfaceType == Gfx9DataColor)
1990 {
1991 pMetaEq->shift(1);
1992 }
1993 else if (dataSurfaceType == Gfx9DataDepthStencil)
1994 {
1995 pMetaEq->shift(3);
1996 }
1997
1998 //------------------------------------------------------------------------------------------
1999 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2000 // Shift up from pipe interleave number of channel
2001 // and rb bits left, and uncompressed fragments
2002 //------------------------------------------------------------------------------------------
2003
2004 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2005
2006 // Put in the channel bits
2007 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2008 {
2009 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2010 }
2011
2012 // Put in remaining rb bits
2013 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2014 {
2015 BOOL_32 isRbEqAppended = FALSE;
2016
2017 if (m_settings.applyAliasFix)
2018 {
2019 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2020 }
2021 else
2022 {
2023 isRbEqAppended = (rbEquation[i].getsize() > 0);
2024 }
2025
2026 if (isRbEqAppended)
2027 {
2028 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2029 // Mark any rb bit we add in to the rb mask
2030 j++;
2031 }
2032 }
2033
2034 //------------------------------------------------------------------------------------------
2035 // Put in the uncompressed fragment bits
2036 //------------------------------------------------------------------------------------------
2037 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2038 {
2039 co.set('s', compFragLog2 + i);
2040 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2041 }
2042 }
2043 }
2044
2045 /**
2046 ************************************************************************************************************************
2047 * Gfx9Lib::IsEquationSupported
2048 *
2049 * @brief
2050 * Check if equation is supported for given swizzle mode and resource type.
2051 *
2052 * @return
2053 * TRUE if supported
2054 ************************************************************************************************************************
2055 */
IsEquationSupported(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2) const2056 BOOL_32 Gfx9Lib::IsEquationSupported(
2057 AddrResourceType rsrcType,
2058 AddrSwizzleMode swMode,
2059 UINT_32 elementBytesLog2) const
2060 {
2061 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2062 (IsLinear(swMode) == FALSE) &&
2063 (((IsTex2d(rsrcType) == TRUE) &&
2064 ((elementBytesLog2 < 4) ||
2065 ((IsRotateSwizzle(swMode) == FALSE) &&
2066 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2067 ((IsTex3d(rsrcType) == TRUE) &&
2068 (IsRotateSwizzle(swMode) == FALSE) &&
2069 (IsBlock256b(swMode) == FALSE)));
2070
2071 return supported;
2072 }
2073
2074 /**
2075 ************************************************************************************************************************
2076 * Gfx9Lib::InitEquationTable
2077 *
2078 * @brief
2079 * Initialize Equation table.
2080 *
2081 * @return
2082 * N/A
2083 ************************************************************************************************************************
2084 */
InitEquationTable()2085 VOID Gfx9Lib::InitEquationTable()
2086 {
2087 memset(m_equationTable, 0, sizeof(m_equationTable));
2088
2089 // Loop all possible resource type (2D/3D)
2090 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2091 {
2092 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2093
2094 // Loop all possible swizzle mode
2095 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
2096 {
2097 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2098
2099 // Loop all possible bpp
2100 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2101 {
2102 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2103
2104 // Check if the input is supported
2105 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2106 {
2107 ADDR_EQUATION equation;
2108 ADDR_E_RETURNCODE retCode;
2109
2110 memset(&equation, 0, sizeof(ADDR_EQUATION));
2111
2112 // Generate the equation
2113 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2114 {
2115 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2116 }
2117 else if (IsThin(rsrcType, swMode))
2118 {
2119 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2120 }
2121 else
2122 {
2123 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2124 }
2125
2126 // Only fill the equation into the table if the return code is ADDR_OK,
2127 // otherwise if the return code is not ADDR_OK, it indicates this is not
2128 // a valid input, we do nothing but just fill invalid equation index
2129 // into the lookup table.
2130 if (retCode == ADDR_OK)
2131 {
2132 equationIndex = m_numEquations;
2133 ADDR_ASSERT(equationIndex < EquationTableSize);
2134
2135 m_equationTable[equationIndex] = equation;
2136
2137 m_numEquations++;
2138 }
2139 else
2140 {
2141 ADDR_ASSERT_ALWAYS();
2142 }
2143 }
2144
2145 // Fill the index into the lookup table, if the combination is not supported
2146 // fill the invalid equation index
2147 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2148 }
2149 }
2150 }
2151 }
2152
2153 /**
2154 ************************************************************************************************************************
2155 * Gfx9Lib::HwlGetEquationIndex
2156 *
2157 * @brief
2158 * Interface function stub of GetEquationIndex
2159 *
2160 * @return
2161 * ADDR_E_RETURNCODE
2162 ************************************************************************************************************************
2163 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2164 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2165 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2166 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2167 ) const
2168 {
2169 AddrResourceType rsrcType = pIn->resourceType;
2170 AddrSwizzleMode swMode = pIn->swizzleMode;
2171 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2172 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2173
2174 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2175 {
2176 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2177 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2178
2179 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2180 }
2181
2182 if (pOut->pMipInfo != NULL)
2183 {
2184 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2185 {
2186 pOut->pMipInfo[i].equationIndex = index;
2187 }
2188 }
2189
2190 return index;
2191 }
2192
2193 /**
2194 ************************************************************************************************************************
2195 * Gfx9Lib::HwlComputeBlock256Equation
2196 *
2197 * @brief
2198 * Interface function stub of ComputeBlock256Equation
2199 *
2200 * @return
2201 * ADDR_E_RETURNCODE
2202 ************************************************************************************************************************
2203 */
HwlComputeBlock256Equation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2204 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2205 AddrResourceType rsrcType,
2206 AddrSwizzleMode swMode,
2207 UINT_32 elementBytesLog2,
2208 ADDR_EQUATION* pEquation) const
2209 {
2210 ADDR_E_RETURNCODE ret = ADDR_OK;
2211
2212 pEquation->numBits = 8;
2213
2214 UINT_32 i = 0;
2215 for (; i < elementBytesLog2; i++)
2216 {
2217 InitChannel(1, 0 , i, &pEquation->addr[i]);
2218 }
2219
2220 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2221
2222 const UINT_32 maxBitsUsed = 4;
2223 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2224 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2225
2226 for (i = 0; i < maxBitsUsed; i++)
2227 {
2228 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2229 InitChannel(1, 1, i, &y[i]);
2230 }
2231
2232 if (IsStandardSwizzle(rsrcType, swMode))
2233 {
2234 switch (elementBytesLog2)
2235 {
2236 case 0:
2237 pixelBit[0] = x[0];
2238 pixelBit[1] = x[1];
2239 pixelBit[2] = x[2];
2240 pixelBit[3] = x[3];
2241 pixelBit[4] = y[0];
2242 pixelBit[5] = y[1];
2243 pixelBit[6] = y[2];
2244 pixelBit[7] = y[3];
2245 break;
2246 case 1:
2247 pixelBit[0] = x[0];
2248 pixelBit[1] = x[1];
2249 pixelBit[2] = x[2];
2250 pixelBit[3] = y[0];
2251 pixelBit[4] = y[1];
2252 pixelBit[5] = y[2];
2253 pixelBit[6] = x[3];
2254 break;
2255 case 2:
2256 pixelBit[0] = x[0];
2257 pixelBit[1] = x[1];
2258 pixelBit[2] = y[0];
2259 pixelBit[3] = y[1];
2260 pixelBit[4] = y[2];
2261 pixelBit[5] = x[2];
2262 break;
2263 case 3:
2264 pixelBit[0] = x[0];
2265 pixelBit[1] = y[0];
2266 pixelBit[2] = y[1];
2267 pixelBit[3] = x[1];
2268 pixelBit[4] = x[2];
2269 break;
2270 case 4:
2271 pixelBit[0] = y[0];
2272 pixelBit[1] = y[1];
2273 pixelBit[2] = x[0];
2274 pixelBit[3] = x[1];
2275 break;
2276 default:
2277 ADDR_ASSERT_ALWAYS();
2278 ret = ADDR_INVALIDPARAMS;
2279 break;
2280 }
2281 }
2282 else if (IsDisplaySwizzle(rsrcType, swMode))
2283 {
2284 switch (elementBytesLog2)
2285 {
2286 case 0:
2287 pixelBit[0] = x[0];
2288 pixelBit[1] = x[1];
2289 pixelBit[2] = x[2];
2290 pixelBit[3] = y[1];
2291 pixelBit[4] = y[0];
2292 pixelBit[5] = y[2];
2293 pixelBit[6] = x[3];
2294 pixelBit[7] = y[3];
2295 break;
2296 case 1:
2297 pixelBit[0] = x[0];
2298 pixelBit[1] = x[1];
2299 pixelBit[2] = x[2];
2300 pixelBit[3] = y[0];
2301 pixelBit[4] = y[1];
2302 pixelBit[5] = y[2];
2303 pixelBit[6] = x[3];
2304 break;
2305 case 2:
2306 pixelBit[0] = x[0];
2307 pixelBit[1] = x[1];
2308 pixelBit[2] = y[0];
2309 pixelBit[3] = x[2];
2310 pixelBit[4] = y[1];
2311 pixelBit[5] = y[2];
2312 break;
2313 case 3:
2314 pixelBit[0] = x[0];
2315 pixelBit[1] = y[0];
2316 pixelBit[2] = x[1];
2317 pixelBit[3] = x[2];
2318 pixelBit[4] = y[1];
2319 break;
2320 case 4:
2321 pixelBit[0] = x[0];
2322 pixelBit[1] = y[0];
2323 pixelBit[2] = x[1];
2324 pixelBit[3] = y[1];
2325 break;
2326 default:
2327 ADDR_ASSERT_ALWAYS();
2328 ret = ADDR_INVALIDPARAMS;
2329 break;
2330 }
2331 }
2332 else if (IsRotateSwizzle(swMode))
2333 {
2334 switch (elementBytesLog2)
2335 {
2336 case 0:
2337 pixelBit[0] = y[0];
2338 pixelBit[1] = y[1];
2339 pixelBit[2] = y[2];
2340 pixelBit[3] = x[1];
2341 pixelBit[4] = x[0];
2342 pixelBit[5] = x[2];
2343 pixelBit[6] = x[3];
2344 pixelBit[7] = y[3];
2345 break;
2346 case 1:
2347 pixelBit[0] = y[0];
2348 pixelBit[1] = y[1];
2349 pixelBit[2] = y[2];
2350 pixelBit[3] = x[0];
2351 pixelBit[4] = x[1];
2352 pixelBit[5] = x[2];
2353 pixelBit[6] = x[3];
2354 break;
2355 case 2:
2356 pixelBit[0] = y[0];
2357 pixelBit[1] = y[1];
2358 pixelBit[2] = x[0];
2359 pixelBit[3] = y[2];
2360 pixelBit[4] = x[1];
2361 pixelBit[5] = x[2];
2362 break;
2363 case 3:
2364 pixelBit[0] = y[0];
2365 pixelBit[1] = x[0];
2366 pixelBit[2] = y[1];
2367 pixelBit[3] = x[1];
2368 pixelBit[4] = x[2];
2369 break;
2370 default:
2371 ADDR_ASSERT_ALWAYS();
2372 case 4:
2373 ret = ADDR_INVALIDPARAMS;
2374 break;
2375 }
2376 }
2377 else
2378 {
2379 ADDR_ASSERT_ALWAYS();
2380 ret = ADDR_INVALIDPARAMS;
2381 }
2382
2383 // Post validation
2384 if (ret == ADDR_OK)
2385 {
2386 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2387 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2388 (microBlockDim.w * (1 << elementBytesLog2)));
2389 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2390 }
2391
2392 return ret;
2393 }
2394
2395 /**
2396 ************************************************************************************************************************
2397 * Gfx9Lib::HwlComputeThinEquation
2398 *
2399 * @brief
2400 * Interface function stub of ComputeThinEquation
2401 *
2402 * @return
2403 * ADDR_E_RETURNCODE
2404 ************************************************************************************************************************
2405 */
HwlComputeThinEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2406 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2407 AddrResourceType rsrcType,
2408 AddrSwizzleMode swMode,
2409 UINT_32 elementBytesLog2,
2410 ADDR_EQUATION* pEquation) const
2411 {
2412 ADDR_E_RETURNCODE ret = ADDR_OK;
2413
2414 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2415
2416 UINT_32 maxXorBits = blockSizeLog2;
2417 if (IsNonPrtXor(swMode))
2418 {
2419 // For non-prt-xor, maybe need to initialize some more bits for xor
2420 // The highest xor bit used in equation will be max the following 3 items:
2421 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2422 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2423 // 3. blockSizeLog2
2424
2425 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2426 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2427 GetPipeXorBits(blockSizeLog2) +
2428 2 * GetBankXorBits(blockSizeLog2));
2429 }
2430
2431 const UINT_32 maxBitsUsed = 14;
2432 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2433 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2434 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2435
2436 const UINT_32 extraXorBits = 16;
2437 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2438 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2439
2440 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2441 {
2442 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2443 InitChannel(1, 1, i, &y[i]);
2444 }
2445
2446 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2447
2448 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2449 {
2450 InitChannel(1, 0 , i, &pixelBit[i]);
2451 }
2452
2453 UINT_32 xIdx = 0;
2454 UINT_32 yIdx = 0;
2455 UINT_32 lowBits = 0;
2456
2457 if (IsZOrderSwizzle(swMode))
2458 {
2459 if (elementBytesLog2 <= 3)
2460 {
2461 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2462 {
2463 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2464 }
2465
2466 lowBits = 6;
2467 }
2468 else
2469 {
2470 ret = ADDR_INVALIDPARAMS;
2471 }
2472 }
2473 else
2474 {
2475 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2476
2477 if (ret == ADDR_OK)
2478 {
2479 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2480 xIdx = Log2(microBlockDim.w);
2481 yIdx = Log2(microBlockDim.h);
2482 lowBits = 8;
2483 }
2484 }
2485
2486 if (ret == ADDR_OK)
2487 {
2488 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2489 {
2490 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2491 }
2492
2493 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2494 {
2495 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2496 }
2497
2498 if (IsXor(swMode))
2499 {
2500 // Fill XOR bits
2501 UINT_32 pipeStart = m_pipeInterleaveLog2;
2502 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2503
2504 UINT_32 bankStart = pipeStart + pipeXorBits;
2505 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2506
2507 for (UINT_32 i = 0; i < pipeXorBits; i++)
2508 {
2509 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2510 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2511 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2512
2513 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2514 }
2515
2516 for (UINT_32 i = 0; i < bankXorBits; i++)
2517 {
2518 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2519 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2520 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2521
2522 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2523 }
2524
2525 if (IsPrt(swMode) == FALSE)
2526 {
2527 for (UINT_32 i = 0; i < pipeXorBits; i++)
2528 {
2529 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2530 }
2531
2532 for (UINT_32 i = 0; i < bankXorBits; i++)
2533 {
2534 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2535 }
2536 }
2537 }
2538
2539 pEquation->numBits = blockSizeLog2;
2540 }
2541
2542 return ret;
2543 }
2544
2545 /**
2546 ************************************************************************************************************************
2547 * Gfx9Lib::HwlComputeThickEquation
2548 *
2549 * @brief
2550 * Interface function stub of ComputeThickEquation
2551 *
2552 * @return
2553 * ADDR_E_RETURNCODE
2554 ************************************************************************************************************************
2555 */
HwlComputeThickEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2556 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2557 AddrResourceType rsrcType,
2558 AddrSwizzleMode swMode,
2559 UINT_32 elementBytesLog2,
2560 ADDR_EQUATION* pEquation) const
2561 {
2562 ADDR_E_RETURNCODE ret = ADDR_OK;
2563
2564 ADDR_ASSERT(IsTex3d(rsrcType));
2565
2566 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2567
2568 UINT_32 maxXorBits = blockSizeLog2;
2569 if (IsNonPrtXor(swMode))
2570 {
2571 // For non-prt-xor, maybe need to initialize some more bits for xor
2572 // The highest xor bit used in equation will be max the following 3:
2573 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2574 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2575 // 3. blockSizeLog2
2576
2577 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2578 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2579 GetPipeXorBits(blockSizeLog2) +
2580 3 * GetBankXorBits(blockSizeLog2));
2581 }
2582
2583 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2584 {
2585 InitChannel(1, 0 , i, &pEquation->addr[i]);
2586 }
2587
2588 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2589
2590 const UINT_32 maxBitsUsed = 12;
2591 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2592 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2593 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2594 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2595
2596 const UINT_32 extraXorBits = 24;
2597 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2598 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2599
2600 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2601 {
2602 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2603 InitChannel(1, 1, i, &y[i]);
2604 InitChannel(1, 2, i, &z[i]);
2605 }
2606
2607 if (IsZOrderSwizzle(swMode))
2608 {
2609 switch (elementBytesLog2)
2610 {
2611 case 0:
2612 pixelBit[0] = x[0];
2613 pixelBit[1] = y[0];
2614 pixelBit[2] = x[1];
2615 pixelBit[3] = y[1];
2616 pixelBit[4] = z[0];
2617 pixelBit[5] = z[1];
2618 pixelBit[6] = x[2];
2619 pixelBit[7] = z[2];
2620 pixelBit[8] = y[2];
2621 pixelBit[9] = x[3];
2622 break;
2623 case 1:
2624 pixelBit[0] = x[0];
2625 pixelBit[1] = y[0];
2626 pixelBit[2] = x[1];
2627 pixelBit[3] = y[1];
2628 pixelBit[4] = z[0];
2629 pixelBit[5] = z[1];
2630 pixelBit[6] = z[2];
2631 pixelBit[7] = y[2];
2632 pixelBit[8] = x[2];
2633 break;
2634 case 2:
2635 pixelBit[0] = x[0];
2636 pixelBit[1] = y[0];
2637 pixelBit[2] = x[1];
2638 pixelBit[3] = z[0];
2639 pixelBit[4] = y[1];
2640 pixelBit[5] = z[1];
2641 pixelBit[6] = y[2];
2642 pixelBit[7] = x[2];
2643 break;
2644 case 3:
2645 pixelBit[0] = x[0];
2646 pixelBit[1] = y[0];
2647 pixelBit[2] = z[0];
2648 pixelBit[3] = x[1];
2649 pixelBit[4] = z[1];
2650 pixelBit[5] = y[1];
2651 pixelBit[6] = x[2];
2652 break;
2653 case 4:
2654 pixelBit[0] = x[0];
2655 pixelBit[1] = y[0];
2656 pixelBit[2] = z[0];
2657 pixelBit[3] = z[1];
2658 pixelBit[4] = y[1];
2659 pixelBit[5] = x[1];
2660 break;
2661 default:
2662 ADDR_ASSERT_ALWAYS();
2663 ret = ADDR_INVALIDPARAMS;
2664 break;
2665 }
2666 }
2667 else if (IsStandardSwizzle(rsrcType, swMode))
2668 {
2669 switch (elementBytesLog2)
2670 {
2671 case 0:
2672 pixelBit[0] = x[0];
2673 pixelBit[1] = x[1];
2674 pixelBit[2] = x[2];
2675 pixelBit[3] = x[3];
2676 pixelBit[4] = y[0];
2677 pixelBit[5] = y[1];
2678 pixelBit[6] = z[0];
2679 pixelBit[7] = z[1];
2680 pixelBit[8] = z[2];
2681 pixelBit[9] = y[2];
2682 break;
2683 case 1:
2684 pixelBit[0] = x[0];
2685 pixelBit[1] = x[1];
2686 pixelBit[2] = x[2];
2687 pixelBit[3] = y[0];
2688 pixelBit[4] = y[1];
2689 pixelBit[5] = z[0];
2690 pixelBit[6] = z[1];
2691 pixelBit[7] = z[2];
2692 pixelBit[8] = y[2];
2693 break;
2694 case 2:
2695 pixelBit[0] = x[0];
2696 pixelBit[1] = x[1];
2697 pixelBit[2] = y[0];
2698 pixelBit[3] = y[1];
2699 pixelBit[4] = z[0];
2700 pixelBit[5] = z[1];
2701 pixelBit[6] = y[2];
2702 pixelBit[7] = x[2];
2703 break;
2704 case 3:
2705 pixelBit[0] = x[0];
2706 pixelBit[1] = y[0];
2707 pixelBit[2] = y[1];
2708 pixelBit[3] = z[0];
2709 pixelBit[4] = z[1];
2710 pixelBit[5] = x[1];
2711 pixelBit[6] = x[2];
2712 break;
2713 case 4:
2714 pixelBit[0] = y[0];
2715 pixelBit[1] = y[1];
2716 pixelBit[2] = z[0];
2717 pixelBit[3] = z[1];
2718 pixelBit[4] = x[0];
2719 pixelBit[5] = x[1];
2720 break;
2721 default:
2722 ADDR_ASSERT_ALWAYS();
2723 ret = ADDR_INVALIDPARAMS;
2724 break;
2725 }
2726 }
2727 else
2728 {
2729 ADDR_ASSERT_ALWAYS();
2730 ret = ADDR_INVALIDPARAMS;
2731 }
2732
2733 if (ret == ADDR_OK)
2734 {
2735 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2736 UINT_32 xIdx = Log2(microBlockDim.w);
2737 UINT_32 yIdx = Log2(microBlockDim.h);
2738 UINT_32 zIdx = Log2(microBlockDim.d);
2739
2740 pixelBit = pEquation->addr;
2741
2742 const UINT_32 lowBits = 10;
2743 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2744 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2745
2746 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2747 {
2748 if ((i % 3) == 0)
2749 {
2750 pixelBit[i] = x[xIdx++];
2751 }
2752 else if ((i % 3) == 1)
2753 {
2754 pixelBit[i] = z[zIdx++];
2755 }
2756 else
2757 {
2758 pixelBit[i] = y[yIdx++];
2759 }
2760 }
2761
2762 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2763 {
2764 if ((i % 3) == 0)
2765 {
2766 xorExtra[i - blockSizeLog2] = x[xIdx++];
2767 }
2768 else if ((i % 3) == 1)
2769 {
2770 xorExtra[i - blockSizeLog2] = z[zIdx++];
2771 }
2772 else
2773 {
2774 xorExtra[i - blockSizeLog2] = y[yIdx++];
2775 }
2776 }
2777
2778 if (IsXor(swMode))
2779 {
2780 // Fill XOR bits
2781 UINT_32 pipeStart = m_pipeInterleaveLog2;
2782 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2783 for (UINT_32 i = 0; i < pipeXorBits; i++)
2784 {
2785 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2786 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2787 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2788
2789 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2790
2791 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2792 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2793 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2794
2795 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2796 }
2797
2798 UINT_32 bankStart = pipeStart + pipeXorBits;
2799 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2800 for (UINT_32 i = 0; i < bankXorBits; i++)
2801 {
2802 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2803 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2804 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2805
2806 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2807
2808 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2809 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2810 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2811
2812 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2813 }
2814 }
2815
2816 pEquation->numBits = blockSizeLog2;
2817 }
2818
2819 return ret;
2820 }
2821
2822 /**
2823 ************************************************************************************************************************
2824 * Gfx9Lib::IsValidDisplaySwizzleMode
2825 *
2826 * @brief
2827 * Check if a swizzle mode is supported by display engine
2828 *
2829 * @return
2830 * TRUE is swizzle mode is supported by display engine
2831 ************************************************************************************************************************
2832 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2833 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2834 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2835 {
2836 BOOL_32 support = FALSE;
2837
2838 const AddrResourceType resourceType = pIn->resourceType;
2839 (void)resourceType;
2840 const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2841
2842 if (m_settings.isDce12)
2843 {
2844 switch (swizzleMode)
2845 {
2846 case ADDR_SW_256B_D:
2847 case ADDR_SW_256B_R:
2848 support = (pIn->bpp == 32);
2849 break;
2850
2851 case ADDR_SW_LINEAR:
2852 case ADDR_SW_4KB_D:
2853 case ADDR_SW_4KB_R:
2854 case ADDR_SW_64KB_D:
2855 case ADDR_SW_64KB_R:
2856 case ADDR_SW_VAR_D:
2857 case ADDR_SW_VAR_R:
2858 case ADDR_SW_4KB_D_X:
2859 case ADDR_SW_4KB_R_X:
2860 case ADDR_SW_64KB_D_X:
2861 case ADDR_SW_64KB_R_X:
2862 case ADDR_SW_VAR_D_X:
2863 case ADDR_SW_VAR_R_X:
2864 support = (pIn->bpp <= 64);
2865 break;
2866
2867 default:
2868 break;
2869 }
2870 }
2871 else if (m_settings.isDcn1)
2872 {
2873 switch (swizzleMode)
2874 {
2875 case ADDR_SW_4KB_D:
2876 case ADDR_SW_64KB_D:
2877 case ADDR_SW_VAR_D:
2878 case ADDR_SW_64KB_D_T:
2879 case ADDR_SW_4KB_D_X:
2880 case ADDR_SW_64KB_D_X:
2881 case ADDR_SW_VAR_D_X:
2882 support = (pIn->bpp == 64);
2883 break;
2884
2885 case ADDR_SW_LINEAR:
2886 case ADDR_SW_4KB_S:
2887 case ADDR_SW_64KB_S:
2888 case ADDR_SW_VAR_S:
2889 case ADDR_SW_64KB_S_T:
2890 case ADDR_SW_4KB_S_X:
2891 case ADDR_SW_64KB_S_X:
2892 case ADDR_SW_VAR_S_X:
2893 support = (pIn->bpp <= 64);
2894 break;
2895
2896 default:
2897 break;
2898 }
2899 }
2900 else
2901 {
2902 ADDR_NOT_IMPLEMENTED();
2903 }
2904
2905 return support;
2906 }
2907
2908 /**
2909 ************************************************************************************************************************
2910 * Gfx9Lib::HwlComputePipeBankXor
2911 *
2912 * @brief
2913 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2914 *
2915 * @return
2916 * PipeBankXor value
2917 ************************************************************************************************************************
2918 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2919 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
2920 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
2921 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
2922 {
2923 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2924 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
2925 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
2926
2927 UINT_32 pipeXor = 0;
2928 UINT_32 bankXor = 0;
2929
2930 const UINT_32 bankMask = (1 << bankBits) - 1;
2931 const UINT_32 index = pIn->surfIndex & bankMask;
2932
2933 const UINT_32 bpp = pIn->flags.fmask ?
2934 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
2935 if (bankBits == 4)
2936 {
2937 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
2938 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
2939
2940 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
2941 }
2942 else if (bankBits > 0)
2943 {
2944 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
2945 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
2946 bankXor = (index * bankIncrease) & bankMask;
2947 }
2948
2949 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
2950
2951 return ADDR_OK;
2952 }
2953
2954 /**
2955 ************************************************************************************************************************
2956 * Gfx9Lib::HwlComputeSlicePipeBankXor
2957 *
2958 * @brief
2959 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2960 *
2961 * @return
2962 * PipeBankXor value
2963 ************************************************************************************************************************
2964 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const2965 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
2966 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
2967 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
2968 {
2969 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2970 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
2971 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
2972
2973 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2974 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
2975
2976 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
2977
2978 return ADDR_OK;
2979 }
2980
2981 /**
2982 ************************************************************************************************************************
2983 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2984 *
2985 * @brief
2986 * Compute sub resource offset to support swizzle pattern
2987 *
2988 * @return
2989 * Offset
2990 ************************************************************************************************************************
2991 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const2992 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2993 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
2994 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
2995 {
2996 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2997
2998 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2999 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3000 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3001 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3002 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3003 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3004
3005 pOut->offset = pIn->slice * pIn->sliceSize +
3006 pIn->macroBlockOffset +
3007 (pIn->mipTailOffset ^ pipeBankXor) -
3008 static_cast<UINT_64>(pipeBankXor);
3009 return ADDR_OK;
3010 }
3011
3012 /**
3013 ************************************************************************************************************************
3014 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3015 *
3016 * @brief
3017 * Compute surface info sanity check
3018 *
3019 * @return
3020 * Offset
3021 ************************************************************************************************************************
3022 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3023 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3024 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3025 {
3026 BOOL_32 invalid = FALSE;
3027
3028 if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3029 {
3030 invalid = TRUE;
3031 }
3032 else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) ||
3033 (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
3034 {
3035 invalid = TRUE;
3036 }
3037
3038 BOOL_32 mipmap = (pIn->numMipLevels > 1);
3039 BOOL_32 msaa = (pIn->numFrags > 1);
3040
3041 ADDR2_SURFACE_FLAGS flags = pIn->flags;
3042 BOOL_32 zbuffer = (flags.depth || flags.stencil);
3043 BOOL_32 color = flags.color;
3044 BOOL_32 display = flags.display || flags.rotated;
3045
3046 AddrResourceType rsrcType = pIn->resourceType;
3047 BOOL_32 tex3d = IsTex3d(rsrcType);
3048 AddrSwizzleMode swizzle = pIn->swizzleMode;
3049 BOOL_32 linear = IsLinear(swizzle);
3050 BOOL_32 blk256B = IsBlock256b(swizzle);
3051 BOOL_32 blkVar = IsBlockVariable(swizzle);
3052 BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3053 BOOL_32 prt = flags.prt;
3054 BOOL_32 stereo = flags.qbStereo;
3055
3056 if (invalid == FALSE)
3057 {
3058 if ((pIn->numFrags > 1) &&
3059 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3060 {
3061 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3062 invalid = TRUE;
3063 }
3064 }
3065
3066 if (invalid == FALSE)
3067 {
3068 switch (rsrcType)
3069 {
3070 case ADDR_RSRC_TEX_1D:
3071 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
3072 break;
3073 case ADDR_RSRC_TEX_2D:
3074 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
3075 break;
3076 case ADDR_RSRC_TEX_3D:
3077 invalid = msaa || zbuffer || display || stereo;
3078 break;
3079 default:
3080 invalid = TRUE;
3081 break;
3082 }
3083 }
3084
3085 if (invalid == FALSE)
3086 {
3087 if (display)
3088 {
3089 invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
3090 }
3091 }
3092
3093 if (invalid == FALSE)
3094 {
3095 if (linear)
3096 {
3097 invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
3098 zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
3099 }
3100 else
3101 {
3102 if (blk256B || blkVar || isNonPrtXor)
3103 {
3104 invalid = prt;
3105 if (blk256B)
3106 {
3107 invalid = invalid || zbuffer || tex3d || mipmap || msaa;
3108 }
3109 }
3110
3111 if (invalid == FALSE)
3112 {
3113 if (IsZOrderSwizzle(swizzle))
3114 {
3115 invalid = color && msaa;
3116 }
3117 else if (IsStandardSwizzle(rsrcType, swizzle))
3118 {
3119 invalid = zbuffer;
3120 }
3121 else if (IsDisplaySwizzle(rsrcType, swizzle))
3122 {
3123 invalid = zbuffer;
3124 }
3125 else if (IsRotateSwizzle(swizzle))
3126 {
3127 invalid = zbuffer || (pIn->bpp > 64) || tex3d;
3128 }
3129 else
3130 {
3131 ADDR_ASSERT(!"invalid swizzle mode");
3132 invalid = TRUE;
3133 }
3134 }
3135 }
3136 }
3137
3138 ADDR_ASSERT(invalid == FALSE);
3139
3140 return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
3141 }
3142
3143 /**
3144 ************************************************************************************************************************
3145 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3146 *
3147 * @brief
3148 * Internal function to get suggested surface information for cliet to use
3149 *
3150 * @return
3151 * ADDR_E_RETURNCODE
3152 ************************************************************************************************************************
3153 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3154 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3155 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3156 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3157 {
3158 // Macro define resource block type
3159 enum AddrBlockType
3160 {
3161 AddrBlockMicro = 0, // Resource uses 256B block
3162 AddrBlock4KB = 1, // Resource uses 4KB block
3163 AddrBlock64KB = 2, // Resource uses 64KB block
3164 AddrBlockVar = 3, // Resource uses var blcok
3165 AddrBlockLinear = 4, // Resource uses linear swizzle mode
3166
3167 AddrBlockMaxTiledType = AddrBlock64KB + 1,
3168 };
3169
3170 enum AddrBlockSet
3171 {
3172 AddrBlockSetMicro = 1 << AddrBlockMicro,
3173 AddrBlockSetMacro4KB = 1 << AddrBlock4KB,
3174 AddrBlockSetMacro64KB = 1 << AddrBlock64KB,
3175 AddrBlockSetVar = 1 << AddrBlockVar,
3176 AddrBlockSetLinear = 1 << AddrBlockLinear,
3177
3178 AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
3179 };
3180
3181 enum AddrSwSet
3182 {
3183 AddrSwSetZ = 1 << ADDR_SW_Z,
3184 AddrSwSetS = 1 << ADDR_SW_S,
3185 AddrSwSetD = 1 << ADDR_SW_D,
3186 AddrSwSetR = 1 << ADDR_SW_R,
3187
3188 AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
3189 };
3190
3191 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3192 ElemLib* pElemLib = GetElemLib();
3193
3194 // Set format to INVALID will skip this conversion
3195 UINT_32 expandX = 1;
3196 UINT_32 expandY = 1;
3197 UINT_32 bpp = pIn->bpp;
3198 UINT_32 width = pIn->width;
3199 UINT_32 height = pIn->height;
3200
3201 if (pIn->format != ADDR_FMT_INVALID)
3202 {
3203 // Don't care for this case
3204 ElemMode elemMode = ADDR_UNCOMPRESSED;
3205
3206 // Get compression/expansion factors and element mode which indicates compression/expansion
3207 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3208 &elemMode,
3209 &expandX,
3210 &expandY);
3211
3212 UINT_32 basePitch = 0;
3213 GetElemLib()->AdjustSurfaceInfo(elemMode,
3214 expandX,
3215 expandY,
3216 &bpp,
3217 &basePitch,
3218 &width,
3219 &height);
3220 }
3221
3222 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3223 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3224 UINT_32 slice = Max(pIn->numSlices, 1u);
3225 UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3226 UINT_32 minSizeAlign = NextPow2(pIn->minSizeAlign);
3227
3228 if (pIn->flags.fmask)
3229 {
3230 bpp = GetFmaskBpp(numSamples, numFrags);
3231 numFrags = 1;
3232 numSamples = 1;
3233 pOut->resourceType = ADDR_RSRC_TEX_2D;
3234 }
3235 else
3236 {
3237 // The output may get changed for volume(3D) texture resource in future
3238 pOut->resourceType = pIn->resourceType;
3239 }
3240
3241 if (bpp < 8)
3242 {
3243 ADDR_ASSERT_ALWAYS();
3244
3245 returnCode = ADDR_INVALIDPARAMS;
3246 }
3247 else if (IsTex1d(pOut->resourceType))
3248 {
3249 pOut->swizzleMode = ADDR_SW_LINEAR;
3250 pOut->validBlockSet.value = AddrBlockSetLinear;
3251 pOut->canXor = FALSE;
3252 }
3253 else
3254 {
3255 ADDR2_BLOCK_SET blockSet;
3256 blockSet.value = 0;
3257
3258 ADDR2_SWTYPE_SET addrPreferredSwSet, addrValidSwSet, clientPreferredSwSet;
3259 addrPreferredSwSet.value = AddrSwSetS;
3260 addrValidSwSet = addrPreferredSwSet;
3261 clientPreferredSwSet = pIn->preferredSwSet;
3262
3263 if (clientPreferredSwSet.value == 0)
3264 {
3265 clientPreferredSwSet.value = AddrSwSetAll;
3266 }
3267
3268 // prt Xor and non-xor will have less height align requirement for stereo surface
3269 BOOL_32 prtXor = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE);
3270 BOOL_32 displayResource = FALSE;
3271
3272 pOut->canXor = (pIn->flags.prt == FALSE) && (pIn->noXor == FALSE);
3273
3274 // Filter out improper swType and blockSet by HW restriction
3275 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3276 {
3277 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3278 blockSet.value = AddrBlockSetMacro;
3279 addrPreferredSwSet.value = AddrSwSetZ;
3280 addrValidSwSet.value = AddrSwSetZ;
3281
3282 if (pIn->flags.depth && pIn->flags.texture)
3283 {
3284 if (((bpp == 16) && (numFrags >= 4)) ||
3285 ((bpp == 32) && (numFrags >= 2)))
3286 {
3287 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3288 // equation from wrong address within memory range a tile covered and use the
3289 // garbage data for compressed Z reading which finally leads to corruption.
3290 pOut->canXor = FALSE;
3291 prtXor = FALSE;
3292 }
3293 }
3294 }
3295 else if (ElemLib::IsBlockCompressed(pIn->format))
3296 {
3297 // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes.
3298 // Not sure under what circumstances "_D" would be appropriate as these formats
3299 // are not displayable.
3300 blockSet.value = AddrBlockSetMacro;
3301
3302 // This isn't to be used as texture and caller doesn't allow macro tiled.
3303 if ((pIn->flags.texture == FALSE) &&
3304 (pIn->forbiddenBlock.macro4KB && pIn->forbiddenBlock.macro64KB))
3305 {
3306 blockSet.value |= AddrBlockSetLinear;
3307 }
3308
3309 addrPreferredSwSet.value = AddrSwSetD;
3310 addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
3311 }
3312 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3313 {
3314 // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes.
3315 // Its notclear under what circumstances the D or R modes would be appropriate
3316 // since these formats are not displayable.
3317 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3318
3319 addrPreferredSwSet.value = AddrSwSetS;
3320 addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR;
3321 }
3322 else if (IsTex3d(pOut->resourceType))
3323 {
3324 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3325
3326 if (pIn->flags.prt)
3327 {
3328 // PRT cannot use SW_D which gives an unexpected block dimension
3329 addrPreferredSwSet.value = AddrSwSetZ;
3330 addrValidSwSet.value = AddrSwSetZ | AddrSwSetS;
3331 }
3332 else if ((numMipLevels > 1) && (slice >= width) && (slice >= height))
3333 {
3334 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3335 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3336 addrPreferredSwSet.value = AddrSwSetZ;
3337 addrValidSwSet.value = AddrSwSetZ | AddrSwSetS;
3338 }
3339 else if (pIn->flags.color)
3340 {
3341 addrPreferredSwSet.value = AddrSwSetD;
3342 addrValidSwSet.value = AddrSwSetZ | AddrSwSetS | AddrSwSetD;
3343 }
3344 else
3345 {
3346 addrPreferredSwSet.value = AddrSwSetZ;
3347 addrValidSwSet.value = AddrSwSetZ | AddrSwSetD;
3348 if (bpp != 128)
3349 {
3350 addrValidSwSet.value |= AddrSwSetS;
3351 }
3352 }
3353 }
3354 else
3355 {
3356 addrPreferredSwSet.value = ((pIn->flags.display == TRUE) ||
3357 (pIn->flags.overlay == TRUE) ||
3358 (pIn->bpp == 128)) ? AddrSwSetD : AddrSwSetS;
3359
3360 addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR;
3361
3362 if (numMipLevels > 1)
3363 {
3364 ADDR_ASSERT(numFrags == 1);
3365 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3366 }
3367 else if ((numFrags > 1) || (numSamples > 1))
3368 {
3369 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3370 blockSet.value = AddrBlockSetMacro;
3371 }
3372 else
3373 {
3374 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3375 blockSet.value = AddrBlockSetLinear | AddrBlockSetMicro | AddrBlockSetMacro;
3376
3377 displayResource = pIn->flags.rotated || pIn->flags.display;
3378
3379 if (displayResource)
3380 {
3381 addrPreferredSwSet.value = pIn->flags.rotated ? AddrSwSetR : AddrSwSetD;
3382
3383 if (pIn->bpp > 64)
3384 {
3385 blockSet.value = 0;
3386 }
3387 else if (m_settings.isDce12)
3388 {
3389 if (pIn->bpp != 32)
3390 {
3391 blockSet.micro = FALSE;
3392 }
3393
3394 // DCE12 does not support display surface to be _T swizzle mode
3395 prtXor = FALSE;
3396
3397 addrValidSwSet.value = AddrSwSetD | AddrSwSetR;
3398 }
3399 else if (m_settings.isDcn1)
3400 {
3401 // _R is not supported by Dcn1
3402 if (pIn->bpp == 64)
3403 {
3404 addrPreferredSwSet.value = AddrSwSetD;
3405 addrValidSwSet.value = AddrSwSetD;
3406 }
3407 else
3408 {
3409 addrPreferredSwSet.value = AddrSwSetS;
3410 addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
3411 }
3412
3413 blockSet.micro = FALSE;
3414 }
3415 else
3416 {
3417 ADDR_NOT_IMPLEMENTED();
3418 returnCode = ADDR_NOTSUPPORTED;
3419 }
3420 }
3421 }
3422 }
3423
3424 ADDR_ASSERT((addrValidSwSet.value & addrPreferredSwSet.value) == addrPreferredSwSet.value);
3425
3426 pOut->clientPreferredSwSet = clientPreferredSwSet;
3427
3428 // Clamp client preferred set to valid set
3429 clientPreferredSwSet.value &= addrValidSwSet.value;
3430
3431 pOut->validSwTypeSet = addrValidSwSet;
3432
3433 if (clientPreferredSwSet.value == 0)
3434 {
3435 // Client asks for an invalid swizzle type...
3436 ADDR_ASSERT_ALWAYS();
3437 returnCode = ADDR_INVALIDPARAMS;
3438 }
3439 else
3440 {
3441 if (IsPow2(clientPreferredSwSet.value))
3442 {
3443 // Only one swizzle type left, use it directly
3444 addrPreferredSwSet.value = clientPreferredSwSet.value;
3445 }
3446 else if ((clientPreferredSwSet.value & addrPreferredSwSet.value) == 0)
3447 {
3448 // Client wants 2 or more a valid swizzle type but none of them is addrlib preferred
3449 if (clientPreferredSwSet.sw_D)
3450 {
3451 addrPreferredSwSet.value = AddrSwSetD;
3452 }
3453 else if (clientPreferredSwSet.sw_Z)
3454 {
3455 addrPreferredSwSet.value = AddrSwSetZ;
3456 }
3457 else if (clientPreferredSwSet.sw_R)
3458 {
3459 addrPreferredSwSet.value = AddrSwSetR;
3460 }
3461 else
3462 {
3463 ADDR_ASSERT(clientPreferredSwSet.sw_S);
3464 addrPreferredSwSet.value = AddrSwSetS;
3465 }
3466 }
3467
3468 if ((numFrags > 1) &&
3469 (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3470 {
3471 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3472 blockSet.macro4KB = FALSE;
3473 }
3474
3475 if (pIn->flags.prt)
3476 {
3477 blockSet.value &= AddrBlockSetMacro64KB;
3478 }
3479
3480 // Apply customized forbidden setting
3481 blockSet.value &= ~pIn->forbiddenBlock.value;
3482
3483 if (pIn->maxAlign > 0)
3484 {
3485 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3486 {
3487 blockSet.macro64KB = FALSE;
3488 }
3489
3490 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3491 {
3492 blockSet.macro4KB = FALSE;
3493 }
3494
3495 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3496 {
3497 blockSet.micro = FALSE;
3498 }
3499 }
3500
3501 Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3502 Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3503 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3504
3505 if (blockSet.micro)
3506 {
3507 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
3508 &blkAlign[AddrBlockMicro].h,
3509 &blkAlign[AddrBlockMicro].d,
3510 bpp,
3511 numFrags,
3512 pOut->resourceType,
3513 ADDR_SW_256B);
3514
3515 if (returnCode == ADDR_OK)
3516 {
3517 if (displayResource)
3518 {
3519 blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
3520 }
3521 else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
3522 (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
3523 {
3524 // If one 256B block can contain the surface, don't bother bigger block type
3525 blockSet.macro4KB = FALSE;
3526 blockSet.macro64KB = FALSE;
3527 blockSet.var = FALSE;
3528 }
3529
3530 padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
3531 slice, &paddedDim[AddrBlockMicro]);
3532 }
3533 }
3534
3535 if ((returnCode == ADDR_OK) && blockSet.macro4KB)
3536 {
3537 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
3538 &blkAlign[AddrBlock4KB].h,
3539 &blkAlign[AddrBlock4KB].d,
3540 bpp,
3541 numFrags,
3542 pOut->resourceType,
3543 ADDR_SW_4KB);
3544
3545 if (returnCode == ADDR_OK)
3546 {
3547 if (displayResource)
3548 {
3549 blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
3550 }
3551
3552 padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
3553 slice, &paddedDim[AddrBlock4KB]);
3554
3555 ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
3556 }
3557 }
3558
3559 if ((returnCode == ADDR_OK) && blockSet.macro64KB)
3560 {
3561 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
3562 &blkAlign[AddrBlock64KB].h,
3563 &blkAlign[AddrBlock64KB].d,
3564 bpp,
3565 numFrags,
3566 pOut->resourceType,
3567 ADDR_SW_64KB);
3568
3569 if (returnCode == ADDR_OK)
3570 {
3571 if (displayResource)
3572 {
3573 blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
3574 }
3575
3576 padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
3577 slice, &paddedDim[AddrBlock64KB]);
3578
3579 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
3580 ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
3581 }
3582 }
3583
3584 if (returnCode == ADDR_OK)
3585 {
3586 UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
3587
3588 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3589 {
3590 padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
3591 }
3592
3593 // Use minimum block type which meets all conditions above if flag minimizeAlign was set
3594 if (pIn->flags.minimizeAlign)
3595 {
3596 // If padded size of 64KB block is larger than padded size of 256B block or 4KB
3597 // block, filter out 64KB block from candidate list
3598 if (blockSet.macro64KB &&
3599 ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
3600 (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
3601 {
3602 blockSet.macro64KB = FALSE;
3603 }
3604
3605 // If padded size of 4KB block is larger than padded size of 256B block,
3606 // filter out 4KB block from candidate list
3607 if (blockSet.macro4KB &&
3608 blockSet.micro &&
3609 (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
3610 {
3611 blockSet.macro4KB = FALSE;
3612 }
3613 }
3614 // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
3615 else if (pIn->flags.opt4space)
3616 {
3617 UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
3618 (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
3619
3620 threshold += threshold >> 1;
3621
3622 if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
3623 {
3624 blockSet.macro64KB = FALSE;
3625 }
3626
3627 if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
3628 {
3629 blockSet.macro4KB = FALSE;
3630 }
3631 }
3632 else
3633 {
3634 if (blockSet.macro64KB &&
3635 (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
3636 ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
3637 {
3638 // If 64KB block waste more than half memory on padding, filter it out from
3639 // candidate list when it is not the only choice left
3640 blockSet.macro64KB = FALSE;
3641 }
3642 }
3643
3644 if (blockSet.value == 0)
3645 {
3646 // Bad things happen, client will not get any useful information from AddrLib.
3647 // Maybe we should fill in some output earlier instead of outputing nothing?
3648 ADDR_ASSERT_ALWAYS();
3649 returnCode = ADDR_INVALIDPARAMS;
3650 }
3651 else
3652 {
3653 pOut->validBlockSet = blockSet;
3654 pOut->canXor = pOut->canXor &&
3655 (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
3656
3657 if (blockSet.macro64KB || blockSet.macro4KB)
3658 {
3659 if (addrPreferredSwSet.value == AddrSwSetZ)
3660 {
3661 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
3662 }
3663 else if (addrPreferredSwSet.value == AddrSwSetS)
3664 {
3665 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
3666 }
3667 else if (addrPreferredSwSet.value == AddrSwSetD)
3668 {
3669 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
3670 }
3671 else
3672 {
3673 ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
3674 pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
3675 }
3676
3677 if (prtXor && blockSet.macro64KB)
3678 {
3679 // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
3680 const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
3681 pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
3682 }
3683 else if (pOut->canXor)
3684 {
3685 // Client wants XOR and this is allowed, return XOR version swizzle mode
3686 const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
3687 pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
3688 }
3689 }
3690 else if (blockSet.micro)
3691 {
3692 if (addrPreferredSwSet.value == AddrSwSetS)
3693 {
3694 pOut->swizzleMode = ADDR_SW_256B_S;
3695 }
3696 else if (addrPreferredSwSet.value == AddrSwSetD)
3697 {
3698 pOut->swizzleMode = ADDR_SW_256B_D;
3699 }
3700 else
3701 {
3702 ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
3703 pOut->swizzleMode = ADDR_SW_256B_R;
3704 }
3705 }
3706 else if (blockSet.linear)
3707 {
3708 // Fall into this branch doesn't mean linear is suitable, only no other choices!
3709 pOut->swizzleMode = ADDR_SW_LINEAR;
3710 }
3711 else
3712 {
3713 ADDR_ASSERT(blockSet.var);
3714
3715 // Designer consider VAR swizzle mode is usless for most cases
3716 ADDR_UNHANDLED_CASE();
3717
3718 returnCode = ADDR_NOTSUPPORTED;
3719 }
3720
3721 #if DEBUG
3722 // Post sanity check, at least AddrLib should accept the output generated by its own
3723 if (pOut->swizzleMode != ADDR_SW_LINEAR)
3724 {
3725 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3726 localIn.flags = pIn->flags;
3727 localIn.swizzleMode = pOut->swizzleMode;
3728 localIn.resourceType = pOut->resourceType;
3729 localIn.format = pIn->format;
3730 localIn.bpp = bpp;
3731 localIn.width = width;
3732 localIn.height = height;
3733 localIn.numSlices = slice;
3734 localIn.numMipLevels = numMipLevels;
3735 localIn.numSamples = numSamples;
3736 localIn.numFrags = numFrags;
3737
3738 HwlComputeSurfaceInfoSanityCheck(&localIn);
3739
3740 }
3741 #endif
3742 }
3743 }
3744 }
3745 }
3746
3747 return returnCode;
3748 }
3749
3750 /**
3751 ************************************************************************************************************************
3752 * Gfx9Lib::ComputeStereoInfo
3753 *
3754 * @brief
3755 * Compute height alignment and right eye pipeBankXor for stereo surface
3756 *
3757 * @return
3758 * Error code
3759 *
3760 ************************************************************************************************************************
3761 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 * pHeightAlign) const3762 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3763 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3764 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3765 UINT_32* pHeightAlign
3766 ) const
3767 {
3768 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3769
3770 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3771
3772 if (eqIndex < m_numEquations)
3773 {
3774 if (IsXor(pIn->swizzleMode))
3775 {
3776 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3777 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
3778 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
3779 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
3780 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3781 const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
3782
3783 ADDR_ASSERT(maxYCoordBlock256 ==
3784 GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3785
3786 const UINT_32 maxYCoordInBaseEquation =
3787 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3788
3789 ADDR_ASSERT(maxYCoordInBaseEquation ==
3790 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3791
3792 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3793
3794 ADDR_ASSERT(maxYCoordInPipeXor ==
3795 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3796
3797 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3798 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3799
3800 ADDR_ASSERT(maxYCoordInBankXor ==
3801 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3802
3803 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3804
3805 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3806 {
3807 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3808
3809 if (pOut->pStereoInfo != NULL)
3810 {
3811 pOut->pStereoInfo->rightSwizzle = 0;
3812
3813 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3814 {
3815 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3816 {
3817 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3818 }
3819
3820 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3821 {
3822 pOut->pStereoInfo->rightSwizzle |=
3823 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3824 }
3825
3826 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3827 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3828 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3829 }
3830 }
3831 }
3832 }
3833 }
3834 else
3835 {
3836 ADDR_ASSERT_ALWAYS();
3837 returnCode = ADDR_ERROR;
3838 }
3839
3840 return returnCode;
3841 }
3842
3843 /**
3844 ************************************************************************************************************************
3845 * Gfx9Lib::HwlComputeSurfaceInfoTiled
3846 *
3847 * @brief
3848 * Internal function to calculate alignment for tiled surface
3849 *
3850 * @return
3851 * ADDR_E_RETURNCODE
3852 ************************************************************************************************************************
3853 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3854 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3855 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3856 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3857 ) const
3858 {
3859 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3860 &pOut->blockHeight,
3861 &pOut->blockSlices,
3862 pIn->bpp,
3863 pIn->numFrags,
3864 pIn->resourceType,
3865 pIn->swizzleMode);
3866
3867 if (returnCode == ADDR_OK)
3868 {
3869 UINT_32 pitchAlignInElement = pOut->blockWidth;
3870
3871 if ((IsTex2d(pIn->resourceType) == TRUE) &&
3872 (pIn->flags.display || pIn->flags.rotated) &&
3873 (pIn->numMipLevels <= 1) &&
3874 (pIn->numSamples <= 1) &&
3875 (pIn->numFrags <= 1))
3876 {
3877 // Display engine needs pitch align to be at least 32 pixels.
3878 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3879 }
3880
3881 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3882
3883 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3884 {
3885 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3886 {
3887 returnCode = ADDR_INVALIDPARAMS;
3888 }
3889 else if (pIn->pitchInElement < pOut->pitch)
3890 {
3891 returnCode = ADDR_INVALIDPARAMS;
3892 }
3893 else
3894 {
3895 pOut->pitch = pIn->pitchInElement;
3896 }
3897 }
3898
3899 UINT_32 heightAlign = 0;
3900
3901 if (pIn->flags.qbStereo)
3902 {
3903 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3904 }
3905
3906 if (returnCode == ADDR_OK)
3907 {
3908 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3909
3910 if (heightAlign > 1)
3911 {
3912 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3913 }
3914
3915 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3916
3917 pOut->epitchIsHeight = FALSE;
3918 pOut->mipChainInTail = FALSE;
3919 pOut->firstMipIdInTail = pIn->numMipLevels;
3920
3921 pOut->mipChainPitch = pOut->pitch;
3922 pOut->mipChainHeight = pOut->height;
3923 pOut->mipChainSlice = pOut->numSlices;
3924
3925 if (pIn->numMipLevels > 1)
3926 {
3927 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
3928 pIn->swizzleMode,
3929 pIn->bpp,
3930 pIn->width,
3931 pIn->height,
3932 pIn->numSlices,
3933 pOut->blockWidth,
3934 pOut->blockHeight,
3935 pOut->blockSlices,
3936 pIn->numMipLevels,
3937 pOut->pMipInfo);
3938
3939 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
3940
3941 if (endingMipId == 0)
3942 {
3943 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3944 pIn->swizzleMode,
3945 pOut->blockWidth,
3946 pOut->blockHeight,
3947 pOut->blockSlices);
3948
3949 pOut->epitchIsHeight = TRUE;
3950 pOut->pitch = tailMaxDim.w;
3951 pOut->height = tailMaxDim.h;
3952 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
3953 tailMaxDim.d : pIn->numSlices;
3954 pOut->mipChainInTail = TRUE;
3955 }
3956 else
3957 {
3958 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
3959 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
3960
3961 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
3962 pIn->swizzleMode,
3963 mip0WidthInBlk,
3964 mip0HeightInBlk,
3965 pOut->numSlices / pOut->blockSlices);
3966 if (majorMode == ADDR_MAJOR_Y)
3967 {
3968 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
3969
3970 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
3971 {
3972 mip1WidthInBlk++;
3973 }
3974
3975 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
3976
3977 pOut->epitchIsHeight = FALSE;
3978 }
3979 else
3980 {
3981 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
3982
3983 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
3984 {
3985 mip1HeightInBlk++;
3986 }
3987
3988 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
3989
3990 pOut->epitchIsHeight = TRUE;
3991 }
3992 }
3993
3994 if (pOut->pMipInfo != NULL)
3995 {
3996 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
3997
3998 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3999 {
4000 Dim3d mipStartPos = {0};
4001 UINT_32 mipTailOffsetInBytes = 0;
4002
4003 mipStartPos = GetMipStartPos(pIn->resourceType,
4004 pIn->swizzleMode,
4005 pOut->pitch,
4006 pOut->height,
4007 pOut->numSlices,
4008 pOut->blockWidth,
4009 pOut->blockHeight,
4010 pOut->blockSlices,
4011 i,
4012 elementBytesLog2,
4013 &mipTailOffsetInBytes);
4014
4015 UINT_32 pitchInBlock =
4016 pOut->mipChainPitch / pOut->blockWidth;
4017 UINT_32 sliceInBlock =
4018 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4019 UINT_64 blockIndex =
4020 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4021 UINT_64 macroBlockOffset =
4022 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4023
4024 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4025 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
4026 }
4027 }
4028 }
4029 else if (pOut->pMipInfo != NULL)
4030 {
4031 pOut->pMipInfo[0].pitch = pOut->pitch;
4032 pOut->pMipInfo[0].height = pOut->height;
4033 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4034 pOut->pMipInfo[0].offset = 0;
4035 }
4036
4037 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4038 (pIn->bpp >> 3) * pIn->numFrags;
4039 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
4040 pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode);
4041
4042 if (pIn->flags.prt)
4043 {
4044 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4045 }
4046 }
4047 }
4048
4049 return returnCode;
4050 }
4051
4052 /**
4053 ************************************************************************************************************************
4054 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4055 *
4056 * @brief
4057 * Internal function to calculate alignment for linear surface
4058 *
4059 * @return
4060 * ADDR_E_RETURNCODE
4061 ************************************************************************************************************************
4062 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4063 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4064 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4065 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4066 ) const
4067 {
4068 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4069 UINT_32 pitch = 0;
4070 UINT_32 actualHeight = 0;
4071 UINT_32 elementBytes = pIn->bpp >> 3;
4072 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4073
4074 if (IsTex1d(pIn->resourceType))
4075 {
4076 if (pIn->height > 1)
4077 {
4078 returnCode = ADDR_INVALIDPARAMS;
4079 }
4080 else
4081 {
4082 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4083
4084 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4085 actualHeight = pIn->numMipLevels;
4086
4087 if (pIn->flags.prt == FALSE)
4088 {
4089 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4090 &pitch, &actualHeight);
4091 }
4092
4093 if (returnCode == ADDR_OK)
4094 {
4095 if (pOut->pMipInfo != NULL)
4096 {
4097 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4098 {
4099 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4100 pOut->pMipInfo[i].pitch = pitch;
4101 pOut->pMipInfo[i].height = 1;
4102 pOut->pMipInfo[i].depth = 1;
4103 }
4104 }
4105 }
4106 }
4107 }
4108 else
4109 {
4110 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4111 }
4112
4113 if ((pitch == 0) || (actualHeight == 0))
4114 {
4115 returnCode = ADDR_INVALIDPARAMS;
4116 }
4117
4118 if (returnCode == ADDR_OK)
4119 {
4120 pOut->pitch = pitch;
4121 pOut->height = pIn->height;
4122 pOut->numSlices = pIn->numSlices;
4123 pOut->mipChainPitch = pitch;
4124 pOut->mipChainHeight = actualHeight;
4125 pOut->mipChainSlice = pOut->numSlices;
4126 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4127 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4128 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4129 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4130 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4131 pOut->blockHeight = 1;
4132 pOut->blockSlices = 1;
4133 }
4134
4135 // Post calculation validate
4136 ADDR_ASSERT(pOut->sliceSize > 0);
4137
4138 return returnCode;
4139 }
4140
4141 /**
4142 ************************************************************************************************************************
4143 * Gfx9Lib::GetMipChainInfo
4144 *
4145 * @brief
4146 * Internal function to get out information about mip chain
4147 *
4148 * @return
4149 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4150 ************************************************************************************************************************
4151 */
GetMipChainInfo(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 bpp,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 numMipLevel,ADDR2_MIP_INFO * pMipInfo) const4152 UINT_32 Gfx9Lib::GetMipChainInfo(
4153 AddrResourceType resourceType,
4154 AddrSwizzleMode swizzleMode,
4155 UINT_32 bpp,
4156 UINT_32 mip0Width,
4157 UINT_32 mip0Height,
4158 UINT_32 mip0Depth,
4159 UINT_32 blockWidth,
4160 UINT_32 blockHeight,
4161 UINT_32 blockDepth,
4162 UINT_32 numMipLevel,
4163 ADDR2_MIP_INFO* pMipInfo) const
4164 {
4165 const Dim3d tailMaxDim =
4166 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4167
4168 UINT_32 mipPitch = mip0Width;
4169 UINT_32 mipHeight = mip0Height;
4170 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4171 UINT_32 offset = 0;
4172 UINT_32 firstMipIdInTail = numMipLevel;
4173 BOOL_32 inTail = FALSE;
4174 BOOL_32 finalDim = FALSE;
4175 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4176 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4177
4178 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4179 {
4180 if (inTail)
4181 {
4182 if (finalDim == FALSE)
4183 {
4184 UINT_32 mipSize;
4185
4186 if (is3dThick)
4187 {
4188 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4189 }
4190 else
4191 {
4192 mipSize = mipPitch * mipHeight * (bpp >> 3);
4193 }
4194
4195 if (mipSize <= 256)
4196 {
4197 UINT_32 index = Log2(bpp >> 3);
4198
4199 if (is3dThick)
4200 {
4201 mipPitch = Block256_3dZ[index].w;
4202 mipHeight = Block256_3dZ[index].h;
4203 mipDepth = Block256_3dZ[index].d;
4204 }
4205 else
4206 {
4207 mipPitch = Block256_2d[index].w;
4208 mipHeight = Block256_2d[index].h;
4209 }
4210
4211 finalDim = TRUE;
4212 }
4213 }
4214 }
4215 else
4216 {
4217 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4218 mipPitch, mipHeight, mipDepth);
4219
4220 if (inTail)
4221 {
4222 firstMipIdInTail = mipId;
4223 mipPitch = tailMaxDim.w;
4224 mipHeight = tailMaxDim.h;
4225
4226 if (is3dThick)
4227 {
4228 mipDepth = tailMaxDim.d;
4229 }
4230 }
4231 else
4232 {
4233 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4234 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4235
4236 if (is3dThick)
4237 {
4238 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4239 }
4240 }
4241 }
4242
4243 if (pMipInfo != NULL)
4244 {
4245 pMipInfo[mipId].pitch = mipPitch;
4246 pMipInfo[mipId].height = mipHeight;
4247 pMipInfo[mipId].depth = mipDepth;
4248 pMipInfo[mipId].offset = offset;
4249 }
4250
4251 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4252
4253 if (finalDim)
4254 {
4255 if (is3dThin)
4256 {
4257 mipDepth = Max(mipDepth >> 1, 1u);
4258 }
4259 }
4260 else
4261 {
4262 mipPitch = Max(mipPitch >> 1, 1u);
4263 mipHeight = Max(mipHeight >> 1, 1u);
4264
4265 if (is3dThick || is3dThin)
4266 {
4267 mipDepth = Max(mipDepth >> 1, 1u);
4268 }
4269 }
4270 }
4271
4272 return firstMipIdInTail;
4273 }
4274
4275 /**
4276 ************************************************************************************************************************
4277 * Gfx9Lib::GetMetaMiptailInfo
4278 *
4279 * @brief
4280 * Get mip tail coordinate information.
4281 *
4282 * @return
4283 * N/A
4284 ************************************************************************************************************************
4285 */
GetMetaMiptailInfo(ADDR2_META_MIP_INFO * pInfo,Dim3d mipCoord,UINT_32 numMipInTail,Dim3d * pMetaBlkDim) const4286 VOID Gfx9Lib::GetMetaMiptailInfo(
4287 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4288 Dim3d mipCoord, ///< [in] mip tail base coord
4289 UINT_32 numMipInTail, ///< [in] number of mips in tail
4290 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4291 ) const
4292 {
4293 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4294 UINT_32 mipWidth = pMetaBlkDim->w;
4295 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4296 UINT_32 mipDepth = pMetaBlkDim->d;
4297 UINT_32 minInc;
4298
4299 if (isThick)
4300 {
4301 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4302 }
4303 else if (pMetaBlkDim->h >= 1024)
4304 {
4305 minInc = 256;
4306 }
4307 else if (pMetaBlkDim->h == 512)
4308 {
4309 minInc = 128;
4310 }
4311 else
4312 {
4313 minInc = 64;
4314 }
4315
4316 UINT_32 blk32MipId = 0xFFFFFFFF;
4317
4318 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4319 {
4320 pInfo[mip].inMiptail = TRUE;
4321 pInfo[mip].startX = mipCoord.w;
4322 pInfo[mip].startY = mipCoord.h;
4323 pInfo[mip].startZ = mipCoord.d;
4324 pInfo[mip].width = mipWidth;
4325 pInfo[mip].height = mipHeight;
4326 pInfo[mip].depth = mipDepth;
4327
4328 if (mipWidth <= 32)
4329 {
4330 if (blk32MipId == 0xFFFFFFFF)
4331 {
4332 blk32MipId = mip;
4333 }
4334
4335 mipCoord.w = pInfo[blk32MipId].startX;
4336 mipCoord.h = pInfo[blk32MipId].startY;
4337 mipCoord.d = pInfo[blk32MipId].startZ;
4338
4339 switch (mip - blk32MipId)
4340 {
4341 case 0:
4342 mipCoord.w += 32; // 16x16
4343 break;
4344 case 1:
4345 mipCoord.h += 32; // 8x8
4346 break;
4347 case 2:
4348 mipCoord.h += 32; // 4x4
4349 mipCoord.w += 16;
4350 break;
4351 case 3:
4352 mipCoord.h += 32; // 2x2
4353 mipCoord.w += 32;
4354 break;
4355 case 4:
4356 mipCoord.h += 32; // 1x1
4357 mipCoord.w += 48;
4358 break;
4359 // The following are for BC/ASTC formats
4360 case 5:
4361 mipCoord.h += 48; // 1/2 x 1/2
4362 break;
4363 case 6:
4364 mipCoord.h += 48; // 1/4 x 1/4
4365 mipCoord.w += 16;
4366 break;
4367 case 7:
4368 mipCoord.h += 48; // 1/8 x 1/8
4369 mipCoord.w += 32;
4370 break;
4371 case 8:
4372 mipCoord.h += 48; // 1/16 x 1/16
4373 mipCoord.w += 48;
4374 break;
4375 default:
4376 ADDR_ASSERT_ALWAYS();
4377 break;
4378 }
4379
4380 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4381 mipHeight = mipWidth;
4382
4383 if (isThick)
4384 {
4385 mipDepth = mipWidth;
4386 }
4387 }
4388 else
4389 {
4390 if (mipWidth <= minInc)
4391 {
4392 // if we're below the minimal increment...
4393 if (isThick)
4394 {
4395 // For 3d, just go in z direction
4396 mipCoord.d += mipDepth;
4397 }
4398 else
4399 {
4400 // For 2d, first go across, then down
4401 if ((mipWidth * 2) == minInc)
4402 {
4403 // if we're 2 mips below, that's when we go back in x, and down in y
4404 mipCoord.w -= minInc;
4405 mipCoord.h += minInc;
4406 }
4407 else
4408 {
4409 // otherwise, just go across in x
4410 mipCoord.w += minInc;
4411 }
4412 }
4413 }
4414 else
4415 {
4416 // On even mip, go down, otherwise, go across
4417 if (mip & 1)
4418 {
4419 mipCoord.w += mipWidth;
4420 }
4421 else
4422 {
4423 mipCoord.h += mipHeight;
4424 }
4425 }
4426 // Divide the width by 2
4427 mipWidth >>= 1;
4428 // After the first mip in tail, the mip is always a square
4429 mipHeight = mipWidth;
4430 // ...or for 3d, a cube
4431 if (isThick)
4432 {
4433 mipDepth = mipWidth;
4434 }
4435 }
4436 }
4437 }
4438
4439 /**
4440 ************************************************************************************************************************
4441 * Gfx9Lib::GetMipStartPos
4442 *
4443 * @brief
4444 * Internal function to get out information about mip logical start position
4445 *
4446 * @return
4447 * logical start position in macro block width/heith/depth of one mip level within one slice
4448 ************************************************************************************************************************
4449 */
GetMipStartPos(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 width,UINT_32 height,UINT_32 depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 mipId,UINT_32 log2ElementBytes,UINT_32 * pMipTailBytesOffset) const4450 Dim3d Gfx9Lib::GetMipStartPos(
4451 AddrResourceType resourceType,
4452 AddrSwizzleMode swizzleMode,
4453 UINT_32 width,
4454 UINT_32 height,
4455 UINT_32 depth,
4456 UINT_32 blockWidth,
4457 UINT_32 blockHeight,
4458 UINT_32 blockDepth,
4459 UINT_32 mipId,
4460 UINT_32 log2ElementBytes,
4461 UINT_32* pMipTailBytesOffset) const
4462 {
4463 Dim3d mipStartPos = {0};
4464 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4465
4466 // Report mip in tail if Mip0 is already in mip tail
4467 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4468 UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
4469 UINT_32 mipIndexInTail = mipId;
4470
4471 if (inMipTail == FALSE)
4472 {
4473 // Mip 0 dimension, unit in block
4474 UINT_32 mipWidthInBlk = width / blockWidth;
4475 UINT_32 mipHeightInBlk = height / blockHeight;
4476 UINT_32 mipDepthInBlk = depth / blockDepth;
4477 AddrMajorMode majorMode = GetMajorMode(resourceType,
4478 swizzleMode,
4479 mipWidthInBlk,
4480 mipHeightInBlk,
4481 mipDepthInBlk);
4482
4483 UINT_32 endingMip = mipId + 1;
4484
4485 for (UINT_32 i = 1; i <= mipId; i++)
4486 {
4487 if ((i == 1) || (i == 3))
4488 {
4489 if (majorMode == ADDR_MAJOR_Y)
4490 {
4491 mipStartPos.w += mipWidthInBlk;
4492 }
4493 else
4494 {
4495 mipStartPos.h += mipHeightInBlk;
4496 }
4497 }
4498 else
4499 {
4500 if (majorMode == ADDR_MAJOR_X)
4501 {
4502 mipStartPos.w += mipWidthInBlk;
4503 }
4504 else if (majorMode == ADDR_MAJOR_Y)
4505 {
4506 mipStartPos.h += mipHeightInBlk;
4507 }
4508 else
4509 {
4510 mipStartPos.d += mipDepthInBlk;
4511 }
4512 }
4513
4514 BOOL_32 inTail = FALSE;
4515
4516 if (IsThick(resourceType, swizzleMode))
4517 {
4518 UINT_32 dim = log2blkSize % 3;
4519
4520 if (dim == 0)
4521 {
4522 inTail =
4523 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4524 }
4525 else if (dim == 1)
4526 {
4527 inTail =
4528 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4529 }
4530 else
4531 {
4532 inTail =
4533 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4534 }
4535 }
4536 else
4537 {
4538 if (log2blkSize & 1)
4539 {
4540 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4541 }
4542 else
4543 {
4544 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4545 }
4546 }
4547
4548 if (inTail)
4549 {
4550 endingMip = i;
4551 break;
4552 }
4553
4554 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4555 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4556 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4557 }
4558
4559 if (mipId >= endingMip)
4560 {
4561 inMipTail = TRUE;
4562 mipIndexInTail = mipId - endingMip;
4563 }
4564 }
4565
4566 if (inMipTail)
4567 {
4568 UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4569 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4570 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4571 }
4572
4573 return mipStartPos;
4574 }
4575
4576 /**
4577 ************************************************************************************************************************
4578 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4579 *
4580 * @brief
4581 * Internal function to calculate address from coord for tiled swizzle surface
4582 *
4583 * @return
4584 * ADDR_E_RETURNCODE
4585 ************************************************************************************************************************
4586 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4587 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4588 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4589 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4590 ) const
4591 {
4592 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4593 localIn.swizzleMode = pIn->swizzleMode;
4594 localIn.flags = pIn->flags;
4595 localIn.resourceType = pIn->resourceType;
4596 localIn.bpp = pIn->bpp;
4597 localIn.width = Max(pIn->unalignedWidth, 1u);
4598 localIn.height = Max(pIn->unalignedHeight, 1u);
4599 localIn.numSlices = Max(pIn->numSlices, 1u);
4600 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4601 localIn.numSamples = Max(pIn->numSamples, 1u);
4602 localIn.numFrags = Max(pIn->numFrags, 1u);
4603 if (localIn.numMipLevels <= 1)
4604 {
4605 localIn.pitchInElement = pIn->pitchInElement;
4606 }
4607
4608 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4609 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4610
4611 BOOL_32 valid = (returnCode == ADDR_OK) &&
4612 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4613 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4614 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4615
4616 if (valid)
4617 {
4618 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4619 Dim3d mipStartPos = {0};
4620 UINT_32 mipTailBytesOffset = 0;
4621
4622 if (pIn->numMipLevels > 1)
4623 {
4624 // Mip-map chain cannot be MSAA surface
4625 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4626
4627 mipStartPos = GetMipStartPos(pIn->resourceType,
4628 pIn->swizzleMode,
4629 localOut.pitch,
4630 localOut.height,
4631 localOut.numSlices,
4632 localOut.blockWidth,
4633 localOut.blockHeight,
4634 localOut.blockSlices,
4635 pIn->mipId,
4636 log2ElementBytes,
4637 &mipTailBytesOffset);
4638 }
4639
4640 UINT_32 interleaveOffset = 0;
4641 UINT_32 pipeBits = 0;
4642 UINT_32 pipeXor = 0;
4643 UINT_32 bankBits = 0;
4644 UINT_32 bankXor = 0;
4645
4646 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4647 {
4648 UINT_32 blockOffset = 0;
4649 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4650
4651 if (IsZOrderSwizzle(pIn->swizzleMode))
4652 {
4653 // Morton generation
4654 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4655 {
4656 UINT_32 totalLowBits = 6 - log2ElementBytes;
4657 UINT_32 mortBits = totalLowBits / 2;
4658 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4659 // Are 9 bits enough?
4660 UINT_32 highBitsValue =
4661 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4662 blockOffset = lowBitsValue | highBitsValue;
4663 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4664 }
4665 else
4666 {
4667 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4668 }
4669
4670 // Fill LSBs with sample bits
4671 if (pIn->numSamples > 1)
4672 {
4673 blockOffset *= pIn->numSamples;
4674 blockOffset |= pIn->sample;
4675 }
4676
4677 // Shift according to BytesPP
4678 blockOffset <<= log2ElementBytes;
4679 }
4680 else
4681 {
4682 // Micro block offset
4683 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4684 blockOffset = microBlockOffset;
4685
4686 // Micro block dimension
4687 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4688 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4689 // Morton generation, does 12 bit enough?
4690 blockOffset |=
4691 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4692
4693 // Sample bits start location
4694 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4695 // Join sample bits information to the highest Macro block bits
4696 if (IsNonPrtXor(pIn->swizzleMode))
4697 {
4698 // Non-prt-Xor : xor highest Macro block bits with sample bits
4699 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4700 }
4701 else
4702 {
4703 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4704 // after this op, the blockOffset only contains log2 Macro block size bits
4705 blockOffset %= (1 << sampleStart);
4706 blockOffset |= (pIn->sample << sampleStart);
4707 ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4708 }
4709 }
4710
4711 if (IsXor(pIn->swizzleMode))
4712 {
4713 // Mask off bits above Macro block bits to keep page synonyms working for prt
4714 if (IsPrt(pIn->swizzleMode))
4715 {
4716 blockOffset &= ((1 << log2blkSize) - 1);
4717 }
4718
4719 // Preserve offset inside pipe interleave
4720 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4721 blockOffset >>= m_pipeInterleaveLog2;
4722
4723 // Pipe/Se xor bits
4724 pipeBits = GetPipeXorBits(log2blkSize);
4725 // Pipe xor
4726 pipeXor = FoldXor2d(blockOffset, pipeBits);
4727 blockOffset >>= pipeBits;
4728
4729 // Bank xor bits
4730 bankBits = GetBankXorBits(log2blkSize);
4731 // Bank Xor
4732 bankXor = FoldXor2d(blockOffset, bankBits);
4733 blockOffset >>= bankBits;
4734
4735 // Put all the part back together
4736 blockOffset <<= bankBits;
4737 blockOffset |= bankXor;
4738 blockOffset <<= pipeBits;
4739 blockOffset |= pipeXor;
4740 blockOffset <<= m_pipeInterleaveLog2;
4741 blockOffset |= interleaveOffset;
4742 }
4743
4744 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4745 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4746
4747 blockOffset |= mipTailBytesOffset;
4748
4749 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4750 {
4751 // Apply slice xor if not MSAA/PRT
4752 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4753 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4754 (m_pipeInterleaveLog2 + pipeBits));
4755 }
4756
4757 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4758 bankBits, pipeBits, &blockOffset);
4759
4760 blockOffset %= (1 << log2blkSize);
4761
4762 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4763 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4764 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4765 UINT_32 macroBlockIndex =
4766 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4767 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4768 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4769
4770 UINT_64 macroBlockOffset = (static_cast<UINT_64>(macroBlockIndex) <<
4771 GetBlockSizeLog2(pIn->swizzleMode));
4772
4773 pOut->addr = blockOffset | macroBlockOffset;
4774 }
4775 else
4776 {
4777 UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4778
4779 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4780
4781 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4782 (pIn->y / microBlockDim.h),
4783 (pIn->slice / microBlockDim.d),
4784 8);
4785
4786 blockOffset <<= 10;
4787 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4788
4789 if (IsXor(pIn->swizzleMode))
4790 {
4791 // Mask off bits above Macro block bits to keep page synonyms working for prt
4792 if (IsPrt(pIn->swizzleMode))
4793 {
4794 blockOffset &= ((1 << log2blkSize) - 1);
4795 }
4796
4797 // Preserve offset inside pipe interleave
4798 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4799 blockOffset >>= m_pipeInterleaveLog2;
4800
4801 // Pipe/Se xor bits
4802 pipeBits = GetPipeXorBits(log2blkSize);
4803 // Pipe xor
4804 pipeXor = FoldXor3d(blockOffset, pipeBits);
4805 blockOffset >>= pipeBits;
4806
4807 // Bank xor bits
4808 bankBits = GetBankXorBits(log2blkSize);
4809 // Bank Xor
4810 bankXor = FoldXor3d(blockOffset, bankBits);
4811 blockOffset >>= bankBits;
4812
4813 // Put all the part back together
4814 blockOffset <<= bankBits;
4815 blockOffset |= bankXor;
4816 blockOffset <<= pipeBits;
4817 blockOffset |= pipeXor;
4818 blockOffset <<= m_pipeInterleaveLog2;
4819 blockOffset |= interleaveOffset;
4820 }
4821
4822 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4823 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4824 blockOffset |= mipTailBytesOffset;
4825
4826 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4827 bankBits, pipeBits, &blockOffset);
4828
4829 blockOffset %= (1 << log2blkSize);
4830
4831 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
4832 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4833 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4834
4835 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4836 UINT_32 sliceSizeInBlock =
4837 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4838 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4839
4840 pOut->addr = blockOffset | (blockIndex << log2blkSize);
4841 }
4842 }
4843 else
4844 {
4845 returnCode = ADDR_INVALIDPARAMS;
4846 }
4847
4848 return returnCode;
4849 }
4850
4851 /**
4852 ************************************************************************************************************************
4853 * Gfx9Lib::ComputeSurfaceInfoLinear
4854 *
4855 * @brief
4856 * Internal function to calculate padding for linear swizzle 2D/3D surface
4857 *
4858 * @return
4859 * N/A
4860 ************************************************************************************************************************
4861 */
ComputeSurfaceLinearPadding(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pMipmap0PaddedWidth,UINT_32 * pSlice0PaddedHeight,ADDR2_MIP_INFO * pMipInfo) const4862 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4863 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
4864 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
4865 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
4866 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
4867 ) const
4868 {
4869 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4870
4871 UINT_32 elementBytes = pIn->bpp >> 3;
4872 UINT_32 pitchAlignInElement = 0;
4873
4874 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4875 {
4876 ADDR_ASSERT(pIn->numMipLevels <= 1);
4877 ADDR_ASSERT(pIn->numSlices <= 1);
4878 pitchAlignInElement = 1;
4879 }
4880 else
4881 {
4882 pitchAlignInElement = (256 / elementBytes);
4883 }
4884
4885 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
4886 UINT_32 slice0PaddedHeight = pIn->height;
4887
4888 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4889 &mipChainWidth, &slice0PaddedHeight);
4890
4891 if (returnCode == ADDR_OK)
4892 {
4893 UINT_32 mipChainHeight = 0;
4894 UINT_32 mipHeight = pIn->height;
4895
4896 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4897 {
4898 if (pMipInfo != NULL)
4899 {
4900 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4901 pMipInfo[i].pitch = mipChainWidth;
4902 pMipInfo[i].height = mipHeight;
4903 pMipInfo[i].depth = 1;
4904 }
4905
4906 mipChainHeight += mipHeight;
4907 mipHeight = RoundHalf(mipHeight);
4908 mipHeight = Max(mipHeight, 1u);
4909 }
4910
4911 *pMipmap0PaddedWidth = mipChainWidth;
4912 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4913 }
4914
4915 return returnCode;
4916 }
4917
4918 } // V2
4919 } // Addr
4920