1 /*
2 ************************************************************************************************************************
3 *
4 * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
5 * SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8
9 /**
10 ************************************************************************************************************************
11 * @file gfx10addrlib.cpp
12 * @brief Contain the implementation for the Gfx10Lib class.
13 ************************************************************************************************************************
14 */
15
16 #include "gfx10addrlib.h"
17 #include "addrcommon.h"
18 #include "gfx10_gb_reg.h"
19
20 #include "amdgpu_asic_addr.h"
21
22 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
23 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
24
25 namespace Addr
26 {
27 /**
28 ************************************************************************************************************************
29 * Gfx10HwlInit
30 *
31 * @brief
32 * Creates an Gfx10Lib object.
33 *
34 * @return
35 * Returns an Gfx10Lib object pointer.
36 ************************************************************************************************************************
37 */
Gfx10HwlInit(const Client * pClient)38 Addr::Lib* Gfx10HwlInit(const Client* pClient)
39 {
40 return V2::Gfx10Lib::CreateObj(pClient);
41 }
42
43 namespace V2
44 {
45
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
47 // Static Const Member
48 ////////////////////////////////////////////////////////////////////////////////////////////////////
49
50 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
51 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
52 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR
53 {{0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_S
54 {{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_D
55 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
56
57 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
58 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_S
59 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_D
60 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
61
62 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
63 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_S
64 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_D
65 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
66
67 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
68 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
69 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
70 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
71
72 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
73 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_S_T
74 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_D_T
75 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
76
77 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
78 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_S_X
79 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_D_X
80 {{0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_4KB_R_X
81
82 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_Z_X
83 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_S_X
84 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_D_X
85 {{0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_64KB_R_X
86
87 {{0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_VAR_Z_X
88 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
89 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
90 {{0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_VAR_R_X
91 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR_GENERAL
92 };
93
94 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
95
96 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
97 const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
98
99 /**
100 ************************************************************************************************************************
101 * Gfx10Lib::Gfx10Lib
102 *
103 * @brief
104 * Constructor
105 *
106 ************************************************************************************************************************
107 */
Gfx10Lib(const Client * pClient)108 Gfx10Lib::Gfx10Lib(const Client* pClient)
109 :
110 Lib(pClient),
111 m_numPkrLog2(0),
112 m_numSaLog2(0),
113 m_colorBaseIndex(0),
114 m_xmaskBaseIndex(0),
115 m_htileBaseIndex(0),
116 m_dccBaseIndex(0)
117 {
118 memset(&m_settings, 0, sizeof(m_settings));
119 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
120 }
121
122 /**
123 ************************************************************************************************************************
124 * Gfx10Lib::~Gfx10Lib
125 *
126 * @brief
127 * Destructor
128 ************************************************************************************************************************
129 */
~Gfx10Lib()130 Gfx10Lib::~Gfx10Lib()
131 {
132 }
133
134 /**
135 ************************************************************************************************************************
136 * Gfx10Lib::HwlComputeHtileInfo
137 *
138 * @brief
139 * Interface function stub of AddrComputeHtilenfo
140 *
141 * @return
142 * ADDR_E_RETURNCODE
143 ************************************************************************************************************************
144 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const145 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
146 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
147 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
148 ) const
149 {
150 ADDR_E_RETURNCODE ret = ADDR_OK;
151
152 if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
153 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
154 (pIn->hTileFlags.pipeAligned != TRUE))
155 {
156 ret = ADDR_INVALIDPARAMS;
157 }
158 else
159 {
160 Dim3d metaBlk = {};
161 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
162 ADDR_RSRC_TEX_2D,
163 pIn->swizzleMode,
164 0,
165 0,
166 TRUE,
167 &metaBlk);
168
169 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
170 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
171 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
172 pOut->metaBlkWidth = metaBlk.w;
173 pOut->metaBlkHeight = metaBlk.h;
174
175 if (pIn->numMipLevels > 1)
176 {
177 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
178
179 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
180
181 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
182 {
183 UINT_32 mipWidth, mipHeight;
184
185 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
186
187 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
188 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
189
190 const UINT_32 pitchInM = mipWidth / metaBlk.w;
191 const UINT_32 heightInM = mipHeight / metaBlk.h;
192 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
193
194 if (pOut->pMipInfo != NULL)
195 {
196 pOut->pMipInfo[i].inMiptail = FALSE;
197 pOut->pMipInfo[i].offset = offset;
198 pOut->pMipInfo[i].sliceSize = mipSliceSize;
199 }
200
201 offset += mipSliceSize;
202 }
203
204 pOut->sliceSize = offset;
205 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
206 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
207
208 if (pOut->pMipInfo != NULL)
209 {
210 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
211 {
212 pOut->pMipInfo[i].inMiptail = TRUE;
213 pOut->pMipInfo[i].offset = 0;
214 pOut->pMipInfo[i].sliceSize = 0;
215 }
216
217 if (pIn->firstMipIdInTail != pIn->numMipLevels)
218 {
219 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
220 }
221 }
222 }
223 else
224 {
225 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
226 const UINT_32 heightInM = pOut->height / metaBlk.h;
227
228 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
229 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
230 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
231
232 if (pOut->pMipInfo != NULL)
233 {
234 pOut->pMipInfo[0].inMiptail = FALSE;
235 pOut->pMipInfo[0].offset = 0;
236 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
237 }
238 }
239
240 // Get the HTILE address equation (copied from HtileAddrFromCoord).
241 // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
242 const UINT_32 index = m_xmaskBaseIndex;
243 const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
244
245 ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
246 pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];
247 }
248
249 return ret;
250 }
251
252 /**
253 ************************************************************************************************************************
254 * Gfx10Lib::HwlComputeCmaskInfo
255 *
256 * @brief
257 * Interface function stub of AddrComputeCmaskInfo
258 *
259 * @return
260 * ADDR_E_RETURNCODE
261 ************************************************************************************************************************
262 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const263 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
264 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
265 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
266 ) const
267 {
268 ADDR_E_RETURNCODE ret = ADDR_OK;
269
270 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
271 (pIn->cMaskFlags.pipeAligned != TRUE) ||
272 ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
273 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
274 {
275 ret = ADDR_INVALIDPARAMS;
276 }
277 else
278 {
279 Dim3d metaBlk = {};
280 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
281 ADDR_RSRC_TEX_2D,
282 pIn->swizzleMode,
283 0,
284 0,
285 TRUE,
286 &metaBlk);
287
288 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
289 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
290 pOut->baseAlign = metaBlkSize;
291 pOut->metaBlkWidth = metaBlk.w;
292 pOut->metaBlkHeight = metaBlk.h;
293
294 if (pIn->numMipLevels > 1)
295 {
296 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
297
298 UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
299
300 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
301 {
302 UINT_32 mipWidth, mipHeight;
303
304 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
305
306 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
307 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
308
309 const UINT_32 pitchInM = mipWidth / metaBlk.w;
310 const UINT_32 heightInM = mipHeight / metaBlk.h;
311
312 if (pOut->pMipInfo != NULL)
313 {
314 pOut->pMipInfo[i].inMiptail = FALSE;
315 pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;
316 pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
317 }
318
319 metaBlkPerSlice += pitchInM * heightInM;
320 }
321
322 pOut->metaBlkNumPerSlice = metaBlkPerSlice;
323
324 if (pOut->pMipInfo != NULL)
325 {
326 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
327 {
328 pOut->pMipInfo[i].inMiptail = TRUE;
329 pOut->pMipInfo[i].offset = 0;
330 pOut->pMipInfo[i].sliceSize = 0;
331 }
332
333 if (pIn->firstMipIdInTail != pIn->numMipLevels)
334 {
335 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
336 }
337 }
338 }
339 else
340 {
341 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
342 const UINT_32 heightInM = pOut->height / metaBlk.h;
343
344 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
345
346 if (pOut->pMipInfo != NULL)
347 {
348 pOut->pMipInfo[0].inMiptail = FALSE;
349 pOut->pMipInfo[0].offset = 0;
350 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
351 }
352 }
353
354 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
355 pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
356
357 // Get the CMASK address equation (copied from CmaskAddrFromCoord)
358 const UINT_32 fmaskBpp = GetFmaskBpp(1, 1);
359 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
360 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
361 const UINT_8* patIdxTable =
362 (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
363 (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
364
365 ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
366 pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]];
367 }
368
369 return ret;
370 }
371
372 /**
373 ************************************************************************************************************************
374 * Gfx10Lib::HwlComputeDccInfo
375 *
376 * @brief
377 * Interface function to compute DCC key info
378 *
379 * @return
380 * ADDR_E_RETURNCODE
381 ************************************************************************************************************************
382 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const383 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
384 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
385 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
386 ) const
387 {
388 ADDR_E_RETURNCODE ret = ADDR_OK;
389
390 if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
391 {
392 // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
393 // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
394 ret = ADDR_INVALIDPARAMS;
395 }
396 else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
397 {
398 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
399 ret = ADDR_INVALIDPARAMS;
400 }
401 else
402 {
403 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
404
405 {
406 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
407 ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
408
409 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
410
411 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
412 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
413 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
414 }
415
416 if (ret == ADDR_OK)
417 {
418 Dim3d metaBlk = {};
419 const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
420 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
421 pIn->resourceType,
422 pIn->swizzleMode,
423 elemLog2,
424 numFragLog2,
425 pIn->dccKeyFlags.pipeAligned,
426 &metaBlk);
427
428 pOut->dccRamBaseAlign = metaBlkSize;
429 pOut->metaBlkWidth = metaBlk.w;
430 pOut->metaBlkHeight = metaBlk.h;
431 pOut->metaBlkDepth = metaBlk.d;
432 pOut->metaBlkSize = metaBlkSize;
433
434 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
435 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
436 pOut->depth = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
437
438 if (pIn->numMipLevels > 1)
439 {
440 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
441
442 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
443
444 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
445 {
446 UINT_32 mipWidth, mipHeight;
447
448 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
449
450 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
451 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
452
453 const UINT_32 pitchInM = mipWidth / metaBlk.w;
454 const UINT_32 heightInM = mipHeight / metaBlk.h;
455 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
456
457 if (pOut->pMipInfo != NULL)
458 {
459 pOut->pMipInfo[i].inMiptail = FALSE;
460 pOut->pMipInfo[i].offset = offset;
461 pOut->pMipInfo[i].sliceSize = mipSliceSize;
462 }
463
464 offset += mipSliceSize;
465 }
466
467 pOut->dccRamSliceSize = offset;
468 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
469 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
470
471 if (pOut->pMipInfo != NULL)
472 {
473 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
474 {
475 pOut->pMipInfo[i].inMiptail = TRUE;
476 pOut->pMipInfo[i].offset = 0;
477 pOut->pMipInfo[i].sliceSize = 0;
478 }
479
480 if (pIn->firstMipIdInTail != pIn->numMipLevels)
481 {
482 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
483 }
484 }
485 }
486 else
487 {
488 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
489 const UINT_32 heightInM = pOut->height / metaBlk.h;
490
491 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
492 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
493 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
494
495 if (pOut->pMipInfo != NULL)
496 {
497 pOut->pMipInfo[0].inMiptail = FALSE;
498 pOut->pMipInfo[0].offset = 0;
499 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
500 }
501 }
502
503 // Get the DCC address equation (copied from DccAddrFromCoord)
504 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
505 const UINT_32 numPipeLog2 = m_pipesLog2;
506 UINT_32 index = m_dccBaseIndex + elemLog2;
507 const UINT_8* patIdxTable;
508
509 if (m_settings.supportRbPlus)
510 {
511 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
512
513 if (pIn->dccKeyFlags.pipeAligned)
514 {
515 index += MaxNumOfBpp;
516
517 if (m_numPkrLog2 < 2)
518 {
519 index += m_pipesLog2 * MaxNumOfBpp;
520 }
521 else
522 {
523 // 4 groups for "m_numPkrLog2 < 2" case
524 index += 4 * MaxNumOfBpp;
525
526 const UINT_32 dccPipePerPkr = 3;
527
528 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
529 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
530 }
531 }
532 }
533 else
534 {
535 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
536
537 if (pIn->dccKeyFlags.pipeAligned)
538 {
539 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
540 }
541 else
542 {
543 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
544 }
545 }
546
547 ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
548 pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];
549 }
550 }
551
552 return ret;
553 }
554
555 /**
556 ************************************************************************************************************************
557 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
558 *
559 * @brief
560 * Interface function stub of AddrComputeCmaskAddrFromCoord
561 *
562 * @return
563 * ADDR_E_RETURNCODE
564 ************************************************************************************************************************
565 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)566 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
567 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
568 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
569 {
570 // Only support pipe aligned CMask
571 ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
572
573 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
574 input.size = sizeof(input);
575 input.cMaskFlags = pIn->cMaskFlags;
576 input.colorFlags = pIn->colorFlags;
577 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
578 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
579 input.numSlices = Max(pIn->numSlices, 1u);
580 input.swizzleMode = pIn->swizzleMode;
581 input.resourceType = pIn->resourceType;
582
583 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
584 output.size = sizeof(output);
585
586 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
587
588 if (returnCode == ADDR_OK)
589 {
590 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
591 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
592 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
593 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
594 const UINT_8* patIdxTable =
595 (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
596 (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
597
598 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
599 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
600 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
601 blkSizeLog2 + 1, // +1 for nibble offset
602 pIn->x,
603 pIn->y,
604 pIn->slice,
605 0);
606 const UINT_32 xb = pIn->x / output.metaBlkWidth;
607 const UINT_32 yb = pIn->y / output.metaBlkHeight;
608 const UINT_32 pb = output.pitch / output.metaBlkWidth;
609 const UINT_32 blkIndex = (yb * pb) + xb;
610 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
611
612 pOut->addr = (output.sliceSize * pIn->slice) +
613 (blkIndex * (1 << blkSizeLog2)) +
614 ((blkOffset >> 1) ^ pipeXor);
615 pOut->bitPosition = (blkOffset & 1) << 2;
616 }
617
618 return returnCode;
619 }
620
621 /**
622 ************************************************************************************************************************
623 * Gfx10Lib::HwlComputeHtileAddrFromCoord
624 *
625 * @brief
626 * Interface function stub of AddrComputeHtileAddrFromCoord
627 *
628 * @return
629 * ADDR_E_RETURNCODE
630 ************************************************************************************************************************
631 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)632 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
633 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
634 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
635 {
636 ADDR_E_RETURNCODE returnCode = ADDR_OK;
637
638 if (pIn->numMipLevels > 1)
639 {
640 returnCode = ADDR_NOTIMPLEMENTED;
641 }
642 else
643 {
644 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
645 input.size = sizeof(input);
646 input.hTileFlags = pIn->hTileFlags;
647 input.depthFlags = pIn->depthflags;
648 input.swizzleMode = pIn->swizzleMode;
649 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
650 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
651 input.numSlices = Max(pIn->numSlices, 1u);
652 input.numMipLevels = 1;
653
654 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
655 output.size = sizeof(output);
656
657 returnCode = ComputeHtileInfo(&input, &output);
658
659 if (returnCode == ADDR_OK)
660 {
661 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
662 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
663 const UINT_32 index = m_htileBaseIndex + numSampleLog2;
664 const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
665
666 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
667 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
668 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
669 blkSizeLog2 + 1, // +1 for nibble offset
670 pIn->x,
671 pIn->y,
672 pIn->slice,
673 0);
674 const UINT_32 xb = pIn->x / output.metaBlkWidth;
675 const UINT_32 yb = pIn->y / output.metaBlkHeight;
676 const UINT_32 pb = output.pitch / output.metaBlkWidth;
677 const UINT_32 blkIndex = (yb * pb) + xb;
678 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
679
680 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
681 (blkIndex * (1 << blkSizeLog2)) +
682 ((blkOffset >> 1) ^ pipeXor);
683 }
684 }
685
686 return returnCode;
687 }
688
689 /**
690 ************************************************************************************************************************
691 * Gfx10Lib::HwlComputeHtileCoordFromAddr
692 *
693 * @brief
694 * Interface function stub of AddrComputeHtileCoordFromAddr
695 *
696 * @return
697 * ADDR_E_RETURNCODE
698 ************************************************************************************************************************
699 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)700 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
701 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
702 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
703 {
704 ADDR_NOT_IMPLEMENTED();
705
706 return ADDR_OK;
707 }
708
709 /**
710 ************************************************************************************************************************
711 * Gfx10Lib::HwlSupportComputeDccAddrFromCoord
712 *
713 * @brief
714 * Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
715 *
716 * @return
717 * ADDR_E_RETURNCODE
718 ************************************************************************************************************************
719 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)720 ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(
721 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
722 {
723 ADDR_E_RETURNCODE returnCode = ADDR_OK;
724
725 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
726 (pIn->swizzleMode != ADDR_SW_64KB_R_X) ||
727 (pIn->dccKeyFlags.linear == TRUE) ||
728 (pIn->numFrags > 1) ||
729 (pIn->numMipLevels > 1) ||
730 (pIn->mipId > 0))
731 {
732 returnCode = ADDR_NOTSUPPORTED;
733 }
734 else if ((pIn->pitch == 0) ||
735 (pIn->metaBlkWidth == 0) ||
736 (pIn->metaBlkHeight == 0) ||
737 (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
738 {
739 returnCode = ADDR_NOTSUPPORTED;
740 }
741
742 return returnCode;
743 }
744
745 /**
746 ************************************************************************************************************************
747 * Gfx10Lib::HwlComputeDccAddrFromCoord
748 *
749 * @brief
750 * Interface function stub of AddrComputeDccAddrFromCoord
751 *
752 * @return
753 * N/A
754 ************************************************************************************************************************
755 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)756 VOID Gfx10Lib::HwlComputeDccAddrFromCoord(
757 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
758 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
759 {
760 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
761 const UINT_32 numPipeLog2 = m_pipesLog2;
762 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
763 UINT_32 index = m_dccBaseIndex + elemLog2;
764 const UINT_8* patIdxTable;
765
766 if (m_settings.supportRbPlus)
767 {
768 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
769
770 if (pIn->dccKeyFlags.pipeAligned)
771 {
772 index += MaxNumOfBpp;
773
774 if (m_numPkrLog2 < 2)
775 {
776 index += m_pipesLog2 * MaxNumOfBpp;
777 }
778 else
779 {
780 // 4 groups for "m_numPkrLog2 < 2" case
781 index += 4 * MaxNumOfBpp;
782
783 const UINT_32 dccPipePerPkr = 3;
784
785 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
786 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
787 }
788 }
789 }
790 else
791 {
792 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
793
794 if (pIn->dccKeyFlags.pipeAligned)
795 {
796 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
797 }
798 else
799 {
800 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
801 }
802 }
803
804 const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
805 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
806 const UINT_32 blkOffset =
807 ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
808 blkSizeLog2 + 1, // +1 for nibble offset
809 pIn->x,
810 pIn->y,
811 pIn->slice,
812 0);
813 const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
814 const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
815 const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
816 const UINT_32 blkIndex = (yb * pb) + xb;
817 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
818
819 pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
820 (blkIndex * (1 << blkSizeLog2)) +
821 ((blkOffset >> 1) ^ pipeXor);
822 }
823
824 /**
825 ************************************************************************************************************************
826 * Gfx10Lib::HwlInitGlobalParams
827 *
828 * @brief
829 * Initializes global parameters
830 *
831 * @return
832 * TRUE if all settings are valid
833 *
834 ************************************************************************************************************************
835 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)836 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
837 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
838 {
839 BOOL_32 valid = TRUE;
840 GB_ADDR_CONFIG_GFX10 gbAddrConfig;
841
842 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
843
844 // These values are copied from CModel code
845 switch (gbAddrConfig.bits.NUM_PIPES)
846 {
847 case ADDR_CONFIG_1_PIPE:
848 m_pipes = 1;
849 m_pipesLog2 = 0;
850 break;
851 case ADDR_CONFIG_2_PIPE:
852 m_pipes = 2;
853 m_pipesLog2 = 1;
854 break;
855 case ADDR_CONFIG_4_PIPE:
856 m_pipes = 4;
857 m_pipesLog2 = 2;
858 break;
859 case ADDR_CONFIG_8_PIPE:
860 m_pipes = 8;
861 m_pipesLog2 = 3;
862 break;
863 case ADDR_CONFIG_16_PIPE:
864 m_pipes = 16;
865 m_pipesLog2 = 4;
866 break;
867 case ADDR_CONFIG_32_PIPE:
868 m_pipes = 32;
869 m_pipesLog2 = 5;
870 break;
871 case ADDR_CONFIG_64_PIPE:
872 m_pipes = 64;
873 m_pipesLog2 = 6;
874 break;
875 default:
876 ADDR_ASSERT_ALWAYS();
877 valid = FALSE;
878 break;
879 }
880
881 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
882 {
883 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
884 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
885 m_pipeInterleaveLog2 = 8;
886 break;
887 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
888 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
889 m_pipeInterleaveLog2 = 9;
890 break;
891 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
892 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
893 m_pipeInterleaveLog2 = 10;
894 break;
895 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
896 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
897 m_pipeInterleaveLog2 = 11;
898 break;
899 default:
900 ADDR_ASSERT_ALWAYS();
901 valid = FALSE;
902 break;
903 }
904
905 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
906 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
907 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
908 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
909
910 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
911 {
912 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
913 m_maxCompFrag = 1;
914 m_maxCompFragLog2 = 0;
915 break;
916 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
917 m_maxCompFrag = 2;
918 m_maxCompFragLog2 = 1;
919 break;
920 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
921 m_maxCompFrag = 4;
922 m_maxCompFragLog2 = 2;
923 break;
924 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
925 m_maxCompFrag = 8;
926 m_maxCompFragLog2 = 3;
927 break;
928 default:
929 ADDR_ASSERT_ALWAYS();
930 valid = FALSE;
931 break;
932 }
933
934 {
935 // Skip unaligned case
936 m_xmaskBaseIndex += MaxNumOfBppCMask;
937 m_htileBaseIndex += MaxNumOfAA;
938
939 m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfBppCMask;
940 m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
941 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
942
943 if (m_settings.supportRbPlus)
944 {
945 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
946 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
947
948 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
949
950 ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
951 sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
952
953 if (m_numPkrLog2 >= 2)
954 {
955 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
956 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfBppCMask;
957 m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
958 }
959 }
960 else
961 {
962 const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
963 static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +
964 1;
965
966 ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
967 ADDR_C_ASSERT(sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]) ==
968 (numPipeType + 1) * MaxNumOfBppCMask);
969 }
970 }
971
972 if (m_settings.supportRbPlus)
973 {
974 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
975 // corresponding SW_64KB_* mode
976 m_blockVarSizeLog2 = m_pipesLog2 + 14;
977 }
978
979 if (valid)
980 {
981 InitEquationTable();
982 }
983
984 return valid;
985 }
986
987 /**
988 ************************************************************************************************************************
989 * Gfx10Lib::HwlConvertChipFamily
990 *
991 * @brief
992 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
993 * @return
994 * ChipFamily
995 ************************************************************************************************************************
996 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)997 ChipFamily Gfx10Lib::HwlConvertChipFamily(
998 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
999 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1000 {
1001 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
1002
1003 m_settings.dccUnsup3DSwDis = 1;
1004 m_settings.dsMipmapHtileFix = 1;
1005
1006 switch (chipFamily)
1007 {
1008 case FAMILY_NV:
1009 if (ASICREV_IS_NAVI10_P(chipRevision))
1010 {
1011 m_settings.dsMipmapHtileFix = 0;
1012 m_settings.isDcn20 = 1;
1013 }
1014
1015 if (ASICREV_IS_NAVI12_P(chipRevision))
1016 {
1017 m_settings.isDcn20 = 1;
1018 }
1019
1020 if (ASICREV_IS_NAVI14_M(chipRevision))
1021 {
1022 m_settings.isDcn20 = 1;
1023 }
1024
1025 if (ASICREV_IS_NAVI21_M(chipRevision))
1026 {
1027 m_settings.supportRbPlus = 1;
1028 m_settings.dccUnsup3DSwDis = 0;
1029 }
1030
1031 if (ASICREV_IS_NAVI22_P(chipRevision))
1032 {
1033 m_settings.supportRbPlus = 1;
1034 m_settings.dccUnsup3DSwDis = 0;
1035 }
1036
1037 if (ASICREV_IS_NAVI23_P(chipRevision))
1038 {
1039 m_settings.supportRbPlus = 1;
1040 m_settings.dccUnsup3DSwDis = 0;
1041 }
1042
1043 if (ASICREV_IS_NAVI24_P(chipRevision))
1044 {
1045 m_settings.supportRbPlus = 1;
1046 m_settings.dccUnsup3DSwDis = 0;
1047 }
1048 break;
1049
1050 case FAMILY_VGH:
1051 if (ASICREV_IS_VANGOGH(chipRevision))
1052 {
1053 m_settings.supportRbPlus = 1;
1054 m_settings.dccUnsup3DSwDis = 0;
1055 }
1056 else
1057 {
1058 ADDR_ASSERT(!"Unknown chip revision");
1059 }
1060 break;
1061
1062 case FAMILY_RMB:
1063 if (ASICREV_IS_REMBRANDT(chipRevision))
1064 {
1065 m_settings.supportRbPlus = 1;
1066 m_settings.dccUnsup3DSwDis = 0;
1067 }
1068 else
1069 {
1070 ADDR_ASSERT(!"Unknown chip revision");
1071 }
1072 break;
1073 case FAMILY_RPL:
1074 if (ASICREV_IS_RAPHAEL(chipRevision))
1075 {
1076 m_settings.supportRbPlus = 1;
1077 m_settings.dccUnsup3DSwDis = 0;
1078 }
1079 break;
1080 case FAMILY_MDN:
1081 if (ASICREV_IS_MENDOCINO(chipRevision))
1082 {
1083 m_settings.supportRbPlus = 1;
1084 m_settings.dccUnsup3DSwDis = 0;
1085 }
1086 else
1087 {
1088 ADDR_ASSERT(!"Unknown chip revision");
1089 }
1090 break;
1091 default:
1092 ADDR_ASSERT(!"Unknown chip family");
1093 break;
1094 }
1095
1096 m_configFlags.use32bppFor422Fmt = TRUE;
1097
1098 return family;
1099 }
1100
1101 /**
1102 ************************************************************************************************************************
1103 * Gfx10Lib::GetBlk256SizeLog2
1104 *
1105 * @brief
1106 * Get block 256 size
1107 *
1108 * @return
1109 * N/A
1110 ************************************************************************************************************************
1111 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1112 void Gfx10Lib::GetBlk256SizeLog2(
1113 AddrResourceType resourceType, ///< [in] Resource type
1114 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1115 UINT_32 elemLog2, ///< [in] element size log2
1116 UINT_32 numSamplesLog2, ///< [in] number of samples
1117 Dim3d* pBlock ///< [out] block size
1118 ) const
1119 {
1120 if (IsThin(resourceType, swizzleMode))
1121 {
1122 UINT_32 blockBits = 8 - elemLog2;
1123
1124 if (IsZOrderSwizzle(swizzleMode))
1125 {
1126 blockBits -= numSamplesLog2;
1127 }
1128
1129 pBlock->w = (blockBits >> 1) + (blockBits & 1);
1130 pBlock->h = (blockBits >> 1);
1131 pBlock->d = 0;
1132 }
1133 else
1134 {
1135 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1136
1137 UINT_32 blockBits = 8 - elemLog2;
1138
1139 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1140 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1141 pBlock->h = (blockBits / 3);
1142 }
1143 }
1144
1145 /**
1146 ************************************************************************************************************************
1147 * Gfx10Lib::GetCompressedBlockSizeLog2
1148 *
1149 * @brief
1150 * Get compress block size
1151 *
1152 * @return
1153 * N/A
1154 ************************************************************************************************************************
1155 */
GetCompressedBlockSizeLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1156 void Gfx10Lib::GetCompressedBlockSizeLog2(
1157 Gfx10DataType dataType, ///< [in] Data type
1158 AddrResourceType resourceType, ///< [in] Resource type
1159 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1160 UINT_32 elemLog2, ///< [in] element size log2
1161 UINT_32 numSamplesLog2, ///< [in] number of samples
1162 Dim3d* pBlock ///< [out] block size
1163 ) const
1164 {
1165 if (dataType == Gfx10DataColor)
1166 {
1167 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1168 }
1169 else
1170 {
1171 ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1172 pBlock->w = 3;
1173 pBlock->h = 3;
1174 pBlock->d = 0;
1175 }
1176 }
1177
1178 /**
1179 ************************************************************************************************************************
1180 * Gfx10Lib::GetMetaOverlapLog2
1181 *
1182 * @brief
1183 * Get meta block overlap
1184 *
1185 * @return
1186 * N/A
1187 ************************************************************************************************************************
1188 */
GetMetaOverlapLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const1189 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1190 Gfx10DataType dataType, ///< [in] Data type
1191 AddrResourceType resourceType, ///< [in] Resource type
1192 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1193 UINT_32 elemLog2, ///< [in] element size log2
1194 UINT_32 numSamplesLog2 ///< [in] number of samples
1195 ) const
1196 {
1197 Dim3d compBlock;
1198 Dim3d microBlock;
1199
1200 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1201 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, µBlock);
1202
1203 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
1204 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1205 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
1206 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
1207 INT_32 overlap = numPipesLog2 - maxSizeLog2;
1208
1209 if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1210 {
1211 overlap++;
1212 }
1213
1214 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1215 if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1216 {
1217 overlap--;
1218 }
1219 overlap = Max(overlap, 0);
1220 return overlap;
1221 }
1222
1223 /**
1224 ************************************************************************************************************************
1225 * Gfx10Lib::Get3DMetaOverlapLog2
1226 *
1227 * @brief
1228 * Get 3d meta block overlap
1229 *
1230 * @return
1231 * N/A
1232 ************************************************************************************************************************
1233 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const1234 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1235 AddrResourceType resourceType, ///< [in] Resource type
1236 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1237 UINT_32 elemLog2 ///< [in] element size log2
1238 ) const
1239 {
1240 Dim3d microBlock;
1241 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, µBlock);
1242
1243 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1244
1245 if (m_settings.supportRbPlus)
1246 {
1247 overlap++;
1248 }
1249
1250 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1251 {
1252 overlap = 0;
1253 }
1254 return overlap;
1255 }
1256
1257 /**
1258 ************************************************************************************************************************
1259 * Gfx10Lib::GetPipeRotateAmount
1260 *
1261 * @brief
1262 * Get pipe rotate amount
1263 *
1264 * @return
1265 * Pipe rotate amount
1266 ************************************************************************************************************************
1267 */
1268
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const1269 INT_32 Gfx10Lib::GetPipeRotateAmount(
1270 AddrResourceType resourceType, ///< [in] Resource type
1271 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
1272 ) const
1273 {
1274 INT_32 amount = 0;
1275
1276 if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1277 {
1278 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1279 1 : m_pipesLog2 - (m_numSaLog2 + 1);
1280 }
1281
1282 return amount;
1283 }
1284
1285 /**
1286 ************************************************************************************************************************
1287 * Gfx10Lib::GetMetaBlkSize
1288 *
1289 * @brief
1290 * Get metadata block size
1291 *
1292 * @return
1293 * Meta block size
1294 ************************************************************************************************************************
1295 */
GetMetaBlkSize(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const1296 UINT_32 Gfx10Lib::GetMetaBlkSize(
1297 Gfx10DataType dataType, ///< [in] Data type
1298 AddrResourceType resourceType, ///< [in] Resource type
1299 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1300 UINT_32 elemLog2, ///< [in] element size log2
1301 UINT_32 numSamplesLog2, ///< [in] number of samples
1302 BOOL_32 pipeAlign, ///< [in] pipe align
1303 Dim3d* pBlock ///< [out] block size
1304 ) const
1305 {
1306 INT_32 metablkSizeLog2;
1307
1308 {
1309 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
1310 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
1311 const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1312 const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1313 numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1314 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
1315 INT_32 numPipesLog2 = m_pipesLog2;
1316
1317 if (IsThin(resourceType, swizzleMode))
1318 {
1319 if ((pipeAlign == FALSE) ||
1320 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1321 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
1322 {
1323 if (pipeAlign)
1324 {
1325 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1326 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1327 }
1328 else
1329 {
1330 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1331 }
1332 }
1333 else
1334 {
1335 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1336 {
1337 numPipesLog2++;
1338 }
1339
1340 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1341
1342 if (numPipesLog2 >= 4)
1343 {
1344 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1345
1346 // In 16Bpe 8xaa, we have an extra overlap bit
1347 if ((pipeRotateLog2 > 0) &&
1348 (elemLog2 == 4) &&
1349 (numSamplesLog2 == 3) &&
1350 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1351 {
1352 overlapLog2++;
1353 }
1354
1355 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1356 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1357
1358 if (m_settings.supportRbPlus &&
1359 IsRtOptSwizzle(swizzleMode) &&
1360 (numPipesLog2 == 6) &&
1361 (numSamplesLog2 == 3) &&
1362 (m_maxCompFragLog2 == 3) &&
1363 (metablkSizeLog2 < 15))
1364 {
1365 metablkSizeLog2 = 15;
1366 }
1367 }
1368 else
1369 {
1370 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1371 }
1372
1373 if (dataType == Gfx10DataDepthStencil)
1374 {
1375 // For htile surfaces, pad meta block size to 2K * num_pipes
1376 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1377 }
1378
1379 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1380
1381 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1382 {
1383 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1384
1385 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1386 }
1387 }
1388
1389 const INT_32 metablkBitsLog2 =
1390 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1391 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1392 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1393 pBlock->d = 1;
1394 }
1395 else
1396 {
1397 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1398
1399 if (pipeAlign)
1400 {
1401 if (m_settings.supportRbPlus &&
1402 (m_pipesLog2 == m_numSaLog2 + 1) &&
1403 (m_pipesLog2 > 1) &&
1404 IsRbAligned(resourceType, swizzleMode))
1405 {
1406 numPipesLog2++;
1407 }
1408
1409 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1410
1411 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1412 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1413 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1414 }
1415 else
1416 {
1417 metablkSizeLog2 = 12;
1418 }
1419
1420 const INT_32 metablkBitsLog2 =
1421 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1422 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1423 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1424 pBlock->d = 1 << (metablkBitsLog2 / 3);
1425 }
1426 }
1427
1428 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1429 }
1430
1431 /**
1432 ************************************************************************************************************************
1433 * Gfx10Lib::ConvertSwizzlePatternToEquation
1434 *
1435 * @brief
1436 * Convert swizzle pattern to equation.
1437 *
1438 * @return
1439 * N/A
1440 ************************************************************************************************************************
1441 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1442 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1443 UINT_32 elemLog2, ///< [in] element bytes log2
1444 AddrResourceType rsrcType, ///< [in] resource type
1445 AddrSwizzleMode swMode, ///< [in] swizzle mode
1446 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor
1447 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1448 const
1449 {
1450 // Get full swizzle pattern and store it as an ADDR_BIT_SETTING list
1451 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
1452 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1453
1454 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
1455 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1456 memset(pEquation, 0, sizeof(ADDR_EQUATION));
1457 pEquation->numBits = blockSizeLog2;
1458 pEquation->numBitComponents = pPatInfo->maxItemCount;
1459 pEquation->stackedDepthSlices = FALSE;
1460
1461 for (UINT_32 i = 0; i < elemLog2; i++)
1462 {
1463 pEquation->addr[i].channel = 0;
1464 pEquation->addr[i].valid = 1;
1465 pEquation->addr[i].index = i;
1466 }
1467
1468 if (IsXor(swMode) == FALSE)
1469 {
1470 // Use simplified logic when we only have one bit-component
1471 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1472 {
1473 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1474
1475 if (pSwizzle[i].x != 0)
1476 {
1477 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1478
1479 pEquation->addr[i].channel = 0;
1480 pEquation->addr[i].valid = 1;
1481 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1482 }
1483 else if (pSwizzle[i].y != 0)
1484 {
1485 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1486
1487 pEquation->addr[i].channel = 1;
1488 pEquation->addr[i].valid = 1;
1489 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1490 }
1491 else
1492 {
1493 ADDR_ASSERT(pSwizzle[i].z != 0);
1494 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1495
1496 pEquation->addr[i].channel = 2;
1497 pEquation->addr[i].valid = 1;
1498 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1499 }
1500 }
1501 }
1502 else
1503 {
1504 Dim3d dim;
1505 ComputeBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, rsrcType, swMode);
1506
1507 const UINT_32 blkXLog2 = Log2(dim.w);
1508 const UINT_32 blkYLog2 = Log2(dim.h);
1509 const UINT_32 blkZLog2 = Log2(dim.d);
1510 const UINT_32 blkXMask = dim.w - 1;
1511 const UINT_32 blkYMask = dim.h - 1;
1512 const UINT_32 blkZMask = dim.d - 1;
1513
1514 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1515 memcpy(&swizzle, pSwizzle, sizeof(swizzle));
1516 UINT_32 xMask = 0;
1517 UINT_32 yMask = 0;
1518 UINT_32 zMask = 0;
1519
1520 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1521 {
1522 for (UINT_32 bitComp = 0; bitComp < ADDR_MAX_EQUATION_COMP; bitComp++)
1523 {
1524 if (swizzle[i].value == 0)
1525 {
1526 ADDR_ASSERT(bitComp != 0); // Bits above element size must have at least one addr-bit
1527 ADDR_ASSERT(bitComp <= pPatInfo->maxItemCount);
1528 break;
1529 }
1530
1531 if (swizzle[i].x != 0)
1532 {
1533 const UINT_32 xLog2 = BitScanForward(swizzle[i].x);
1534 swizzle[i].x = UnsetLeastBit(swizzle[i].x);
1535 xMask |= (1 << xLog2);
1536
1537 pEquation->comps[bitComp][i].channel = 0;
1538 pEquation->comps[bitComp][i].valid = 1;
1539 pEquation->comps[bitComp][i].index = xLog2 + elemLog2;
1540 }
1541 else if (swizzle[i].y != 0)
1542 {
1543 const UINT_32 yLog2 = BitScanForward(swizzle[i].y);
1544 swizzle[i].y = UnsetLeastBit(swizzle[i].y);
1545 yMask |= (1 << yLog2);
1546
1547 pEquation->comps[bitComp][i].channel = 1;
1548 pEquation->comps[bitComp][i].valid = 1;
1549 pEquation->comps[bitComp][i].index = yLog2;
1550 }
1551 else if (swizzle[i].z != 0)
1552 {
1553 const UINT_32 zLog2 = BitScanForward(swizzle[i].z);
1554 swizzle[i].z = UnsetLeastBit(swizzle[i].z);
1555 zMask |= (1 << zLog2);
1556
1557 pEquation->comps[bitComp][i].channel = 2;
1558 pEquation->comps[bitComp][i].valid = 1;
1559 pEquation->comps[bitComp][i].index = zLog2;
1560 }
1561 else
1562 {
1563 // This function doesn't handle MSAA (must update block dims, here, and consumers)
1564 ADDR_ASSERT_ALWAYS();
1565 }
1566 }
1567 ADDR_ASSERT(swizzle[i].value == 0); // We missed an xor? Are there too many?
1568 }
1569
1570 // We missed an address bit for coords inside the block?
1571 // That means two coords will land on the same addr, which is bad.
1572 ADDR_ASSERT(((xMask & blkXMask) == blkXMask) &&
1573 ((yMask & blkYMask) == blkYMask) &&
1574 ((zMask & blkZMask) == blkZMask));
1575 // We're sourcing from outside our block? That won't fly for PRTs, which need to be movable.
1576 // Non-xor modes can also be used for 2D PRTs but they're handled in the simplified logic above.
1577 ADDR_ASSERT((IsPrt(swMode) == false) ||
1578 ((xMask == blkXMask) &&
1579 (yMask == blkYMask) &&
1580 (zMask == blkZMask)));
1581 }
1582 }
1583
1584 /**
1585 ************************************************************************************************************************
1586 * Gfx10Lib::InitEquationTable
1587 *
1588 * @brief
1589 * Initialize Equation table.
1590 *
1591 * @return
1592 * N/A
1593 ************************************************************************************************************************
1594 */
InitEquationTable()1595 VOID Gfx10Lib::InitEquationTable()
1596 {
1597 memset(m_equationTable, 0, sizeof(m_equationTable));
1598
1599 // Iterate through resourceTypes, up to MaxRsrcType where a "resourceType" refers to AddrResourceType (1D/2D/3D)
1600 // resources. This starts with rsrcTypeIdx = 0, however there is an offset added that will start us off at
1601 // computing 2D resources.
1602 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1603 {
1604 // Add offset. Start iterating from ADDR_RSRC_TEX_2D
1605 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1606
1607 // Iterate through the maximum number of swizzlemodes a type can hold
1608 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1609 {
1610 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1611
1612 // Iterate through the different bits-per-pixel settings (8bpp/16bpp/32bpp/64bpp/128bpp)
1613 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1614 {
1615 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1616 // May or may not return a ADDR_SW_PATINFO for a completely different swizzle mode, essentially
1617 // overwriting the choice.
1618 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1619
1620 if (pPatInfo != NULL)
1621 {
1622 ADDR_ASSERT(IsValidSwMode(swMode));
1623 ADDR_EQUATION equation = {};
1624
1625 // Passing in pPatInfo to get the addr equation
1626 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1627
1628 equationIndex = m_numEquations;
1629 ADDR_ASSERT(equationIndex < EquationTableSize);
1630 // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
1631 m_equationTable[equationIndex] = equation;
1632 // Increment m_numEquations
1633 m_numEquations++;
1634 }
1635 // equationIndex, which is used to look up equations in m_equationTable, will be cached for every
1636 // iteration in this nested for-loop
1637 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1638 }
1639 }
1640 }
1641 }
1642
1643 /**
1644 ************************************************************************************************************************
1645 * Gfx10Lib::HwlGetEquationIndex
1646 *
1647 * @brief
1648 * Interface function stub of GetEquationIndex
1649 *
1650 * @return
1651 * ADDR_E_RETURNCODE
1652 ************************************************************************************************************************
1653 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const1654 UINT_32 Gfx10Lib::HwlGetEquationIndex(
1655 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
1656 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
1657 ) const
1658 {
1659 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1660
1661 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1662 (pIn->resourceType == ADDR_RSRC_TEX_3D))
1663 {
1664 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1665 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
1666 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
1667
1668 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1669 }
1670
1671 if (pOut->pMipInfo != NULL)
1672 {
1673 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1674 {
1675 pOut->pMipInfo[i].equationIndex = equationIdx;
1676 }
1677 }
1678
1679 return equationIdx;
1680 }
1681
1682 /**
1683 ************************************************************************************************************************
1684 * Gfx10Lib::GetValidDisplaySwizzleModes
1685 *
1686 * @brief
1687 * Get valid swizzle modes mask for displayable surface
1688 *
1689 * @return
1690 * Valid swizzle modes mask for displayable surface
1691 ************************************************************************************************************************
1692 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const1693 UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
1694 UINT_32 bpp
1695 ) const
1696 {
1697 UINT_32 swModeMask = 0;
1698
1699 if (bpp <= 64)
1700 {
1701 if (m_settings.isDcn20)
1702 {
1703 swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
1704 }
1705 else
1706 {
1707 swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
1708 }
1709 }
1710
1711 return swModeMask;
1712 }
1713
1714 /**
1715 ************************************************************************************************************************
1716 * Gfx10Lib::IsValidDisplaySwizzleMode
1717 *
1718 * @brief
1719 * Check if a swizzle mode is supported by display engine
1720 *
1721 * @return
1722 * TRUE is swizzle mode is supported by display engine
1723 ************************************************************************************************************************
1724 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const1725 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
1726 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
1727 ) const
1728 {
1729 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
1730
1731 return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
1732 }
1733
1734 /**
1735 ************************************************************************************************************************
1736 * Gfx10Lib::GetMaxNumMipsInTail
1737 *
1738 * @brief
1739 * Return max number of mips in tails
1740 *
1741 * @return
1742 * Max number of mips in tails
1743 ************************************************************************************************************************
1744 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const1745 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
1746 UINT_32 blockSizeLog2, ///< block size log2
1747 BOOL_32 isThin ///< is thin or thick
1748 ) const
1749 {
1750 UINT_32 effectiveLog2 = blockSizeLog2;
1751
1752 if (isThin == FALSE)
1753 {
1754 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
1755 }
1756
1757 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
1758 }
1759
1760 /**
1761 ************************************************************************************************************************
1762 * Gfx10Lib::HwlComputePipeBankXor
1763 *
1764 * @brief
1765 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
1766 *
1767 * @return
1768 * PipeBankXor value
1769 ************************************************************************************************************************
1770 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const1771 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
1772 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
1773 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
1774 ) const
1775 {
1776 if (IsNonPrtXor(pIn->swizzleMode))
1777 {
1778 const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
1779
1780 // No pipe xor...
1781 const UINT_32 pipeXor = 0;
1782 UINT_32 bankXor = 0;
1783
1784 const UINT_32 XorPatternLen = 8;
1785 static const UINT_32 XorBankRot1b[XorPatternLen] = {0, 1, 0, 1, 0, 1, 0, 1};
1786 static const UINT_32 XorBankRot2b[XorPatternLen] = {0, 2, 1, 3, 2, 0, 3, 1};
1787 static const UINT_32 XorBankRot3b[XorPatternLen] = {0, 4, 2, 6, 1, 5, 3, 7};
1788 static const UINT_32 XorBankRot4b[XorPatternLen] = {0, 8, 4, 12, 2, 10, 6, 14};
1789 static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
1790
1791 switch (bankBits)
1792 {
1793 case 1:
1794 case 2:
1795 case 3:
1796 case 4:
1797 bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
1798 break;
1799 default:
1800 // valid bank bits should be 0~4
1801 ADDR_ASSERT_ALWAYS();
1802 case 0:
1803 break;
1804 }
1805
1806 pOut->pipeBankXor = bankXor | pipeXor;
1807 }
1808 else
1809 {
1810 pOut->pipeBankXor = 0;
1811 }
1812
1813 return ADDR_OK;
1814 }
1815
1816 /**
1817 ************************************************************************************************************************
1818 * Gfx10Lib::HwlComputeSlicePipeBankXor
1819 *
1820 * @brief
1821 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
1822 *
1823 * @return
1824 * PipeBankXor value
1825 ************************************************************************************************************************
1826 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const1827 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
1828 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
1829 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
1830 ) const
1831 {
1832 if (IsNonPrtXor(pIn->swizzleMode))
1833 {
1834 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
1835 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
1836 const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
1837
1838 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
1839
1840 if (pIn->bpe != 0)
1841 {
1842 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
1843 pIn->resourceType,
1844 Log2(pIn->bpe >> 3),
1845 1);
1846
1847 if (pPatInfo != NULL)
1848 {
1849 ADDR_BIT_SETTING fullSwizzlePattern[20];
1850 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1851
1852 const UINT_32 pipeBankXorOffset =
1853 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
1854 blockBits,
1855 0,
1856 0,
1857 pIn->slice,
1858 0);
1859
1860 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
1861
1862 // Should have no bit set under pipe interleave
1863 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
1864
1865 // This assertion firing means old approach doesn't calculate a correct sliceXor value...
1866 ADDR_ASSERT(pipeBankXor == pipeXor);
1867
1868 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
1869 }
1870 }
1871 }
1872 else
1873 {
1874 pOut->pipeBankXor = 0;
1875 }
1876
1877 return ADDR_OK;
1878 }
1879
1880 /**
1881 ************************************************************************************************************************
1882 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
1883 *
1884 * @brief
1885 * Compute sub resource offset to support swizzle pattern
1886 *
1887 * @return
1888 * Offset
1889 ************************************************************************************************************************
1890 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const1891 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
1892 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
1893 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
1894 ) const
1895 {
1896 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
1897
1898 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
1899
1900 return ADDR_OK;
1901 }
1902
1903 /**
1904 ************************************************************************************************************************
1905 * Gfx10Lib::HwlComputeNonBlockCompressedView
1906 *
1907 * @brief
1908 * Compute non-block-compressed view for a given mipmap level/slice.
1909 *
1910 * @return
1911 * ADDR_E_RETURNCODE
1912 ************************************************************************************************************************
1913 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const1914 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(
1915 const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure
1916 ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure
1917 ) const
1918 {
1919 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1920
1921 if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE)
1922 {
1923 // Only thin swizzle mode can have a NonBC view...
1924 returnCode = ADDR_INVALIDPARAMS;
1925 }
1926 else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
1927 ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
1928 {
1929 // Only support BC1~BC7, ASTC, or ETC2 for now...
1930 returnCode = ADDR_NOTSUPPORTED;
1931 }
1932 else
1933 {
1934 UINT_32 bcWidth, bcHeight;
1935 UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
1936
1937 ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
1938 infoIn.flags = pIn->flags;
1939 infoIn.swizzleMode = pIn->swizzleMode;
1940 infoIn.resourceType = pIn->resourceType;
1941 infoIn.bpp = bpp;
1942 infoIn.width = RoundUpQuotient(pIn->width, bcWidth);
1943 infoIn.height = RoundUpQuotient(pIn->height, bcHeight);
1944 infoIn.numSlices = pIn->numSlices;
1945 infoIn.numMipLevels = pIn->numMipLevels;
1946 infoIn.numSamples = 1;
1947 infoIn.numFrags = 1;
1948
1949 ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
1950 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
1951
1952 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
1953 infoOut.pMipInfo = mipInfo;
1954
1955 const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
1956
1957 if (tiled)
1958 {
1959 returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
1960 }
1961 else
1962 {
1963 returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
1964 }
1965
1966 if (returnCode == ADDR_OK)
1967 {
1968 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
1969 subOffIn.swizzleMode = infoIn.swizzleMode;
1970 subOffIn.resourceType = infoIn.resourceType;
1971 subOffIn.slice = pIn->slice;
1972 subOffIn.sliceSize = infoOut.sliceSize;
1973 subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
1974 subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset;
1975
1976 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
1977
1978 // For any mipmap level, move nonBc view base address by offset
1979 HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
1980 pOut->offset = subOffOut.offset;
1981
1982 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
1983 slicePbXorIn.bpe = infoIn.bpp;
1984 slicePbXorIn.swizzleMode = infoIn.swizzleMode;
1985 slicePbXorIn.resourceType = infoIn.resourceType;
1986 slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
1987 slicePbXorIn.slice = pIn->slice;
1988
1989 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
1990
1991 // For any mipmap level, nonBc view should use computed pbXor
1992 HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
1993 pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
1994
1995 const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
1996 const UINT_32 requestMipWidth = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth);
1997 const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight);
1998
1999 if (inTail)
2000 {
2001 // For mipmap level that is in mip tail block, hack a lot of things...
2002 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2003 // are fit in tail block:
2004
2005 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2006 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2007
2008 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2009 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2010
2011 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2012 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2013
2014 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2015 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2016 }
2017 // This check should cover at least mipId == 0
2018 else if (requestMipWidth << pIn->mipId == infoIn.width)
2019 {
2020 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2021 // - only one mipmap level and mipId = 0
2022 pOut->mipId = 0;
2023 pOut->numMipLevels = 1;
2024
2025 // (mip0) width = requestMipWidth
2026 pOut->unalignedWidth = requestMipWidth;
2027
2028 // (mip0) height = requestMipHeight
2029 pOut->unalignedHeight = requestMipHeight;
2030 }
2031 else
2032 {
2033 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2034 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2035 // because single mip view may have different pitch value than original (multiple) mip view...
2036 // A simple case would be:
2037 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2038 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2039 // mip0 width = 0x101/mip1 width = 0x80
2040 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2041 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2042
2043 // - 2 levels and mipId = 1
2044 pOut->mipId = 1;
2045 pOut->numMipLevels = 2;
2046
2047 const UINT_32 upperMipWidth = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth);
2048 const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight);
2049
2050 const BOOL_32 needToAvoidInTail =
2051 tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2052 TRUE : FALSE;
2053
2054 const UINT_32 hwMipWidth = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2055 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2056
2057 const BOOL_32 needExtraWidth =
2058 ((upperMipWidth < requestMipWidth * 2) ||
2059 ((upperMipWidth == requestMipWidth * 2) &&
2060 ((needToAvoidInTail == TRUE) ||
2061 (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2062
2063 const BOOL_32 needExtraHeight =
2064 ((upperMipHeight < requestMipHeight * 2) ||
2065 ((upperMipHeight == requestMipHeight * 2) &&
2066 ((needToAvoidInTail == TRUE) ||
2067 (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2068
2069 // (mip0) width = requestLastMipLevelWidth
2070 pOut->unalignedWidth = upperMipWidth + (needExtraWidth ? 1: 0);
2071
2072 // (mip0) height = requestLastMipLevelHeight
2073 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2074 }
2075
2076 // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2077 ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2078 // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2079 ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2080 }
2081 }
2082
2083 return returnCode;
2084 }
2085
2086 /**
2087 ************************************************************************************************************************
2088 * Gfx10Lib::ValidateNonSwModeParams
2089 *
2090 * @brief
2091 * Validate compute surface info params except swizzle mode
2092 *
2093 * @return
2094 * TRUE if parameters are valid, FALSE otherwise
2095 ************************************************************************************************************************
2096 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2097 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2098 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2099 {
2100 BOOL_32 valid = TRUE;
2101
2102 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2103 {
2104 ADDR_ASSERT_ALWAYS();
2105 valid = FALSE;
2106 }
2107
2108 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2109 {
2110 ADDR_ASSERT_ALWAYS();
2111 valid = FALSE;
2112 }
2113
2114 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2115 const AddrResourceType rsrcType = pIn->resourceType;
2116 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2117 const BOOL_32 msaa = (pIn->numFrags > 1);
2118 const BOOL_32 display = flags.display;
2119 const BOOL_32 tex3d = IsTex3d(rsrcType);
2120 const BOOL_32 tex2d = IsTex2d(rsrcType);
2121 const BOOL_32 tex1d = IsTex1d(rsrcType);
2122 const BOOL_32 stereo = flags.qbStereo;
2123
2124 // Resource type check
2125 if (tex1d)
2126 {
2127 if (msaa || display || stereo)
2128 {
2129 ADDR_ASSERT_ALWAYS();
2130 valid = FALSE;
2131 }
2132 }
2133 else if (tex2d)
2134 {
2135 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2136 {
2137 ADDR_ASSERT_ALWAYS();
2138 valid = FALSE;
2139 }
2140 }
2141 else if (tex3d)
2142 {
2143 if (msaa || display || stereo)
2144 {
2145 ADDR_ASSERT_ALWAYS();
2146 valid = FALSE;
2147 }
2148 }
2149 else
2150 {
2151 ADDR_ASSERT_ALWAYS();
2152 valid = FALSE;
2153 }
2154
2155 return valid;
2156 }
2157
2158 /**
2159 ************************************************************************************************************************
2160 * Gfx10Lib::ValidateSwModeParams
2161 *
2162 * @brief
2163 * Validate compute surface info related to swizzle mode
2164 *
2165 * @return
2166 * TRUE if parameters are valid, FALSE otherwise
2167 ************************************************************************************************************************
2168 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2169 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2170 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2171 {
2172 BOOL_32 valid = TRUE;
2173
2174 if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2175 {
2176 ADDR_ASSERT_ALWAYS();
2177 valid = FALSE;
2178 }
2179 else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2180 {
2181 {
2182 ADDR_ASSERT_ALWAYS();
2183 valid = FALSE;
2184 }
2185 }
2186
2187 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2188 const AddrResourceType rsrcType = pIn->resourceType;
2189 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2190 const BOOL_32 msaa = (pIn->numFrags > 1);
2191 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2192 const BOOL_32 color = flags.color;
2193 const BOOL_32 display = flags.display;
2194 const BOOL_32 tex3d = IsTex3d(rsrcType);
2195 const BOOL_32 tex2d = IsTex2d(rsrcType);
2196 const BOOL_32 tex1d = IsTex1d(rsrcType);
2197 const BOOL_32 thin3d = flags.view3dAs2dArray;
2198 const BOOL_32 linear = IsLinear(swizzle);
2199 const BOOL_32 blk256B = IsBlock256b(swizzle);
2200 const BOOL_32 blkVar = IsBlockVariable(swizzle);
2201 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2202 const BOOL_32 prt = flags.prt;
2203 const BOOL_32 fmask = flags.fmask;
2204
2205 // Misc check
2206 if ((pIn->numFrags > 1) &&
2207 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2208 {
2209 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2210 ADDR_ASSERT_ALWAYS();
2211 valid = FALSE;
2212 }
2213
2214 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2215 {
2216 ADDR_ASSERT_ALWAYS();
2217 valid = FALSE;
2218 }
2219
2220 if ((pIn->bpp == 96) && (linear == FALSE))
2221 {
2222 ADDR_ASSERT_ALWAYS();
2223 valid = FALSE;
2224 }
2225
2226 const UINT_32 swizzleMask = 1 << swizzle;
2227
2228 // Resource type check
2229 if (tex1d)
2230 {
2231 if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2232 {
2233 ADDR_ASSERT_ALWAYS();
2234 valid = FALSE;
2235 }
2236 }
2237 else if (tex2d)
2238 {
2239 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2240 {
2241 {
2242 ADDR_ASSERT_ALWAYS();
2243 valid = FALSE;
2244 }
2245 }
2246 else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2247 (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2248 {
2249 ADDR_ASSERT_ALWAYS();
2250 valid = FALSE;
2251 }
2252 }
2253 else if (tex3d)
2254 {
2255 if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2256 (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2257 (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2258 {
2259 ADDR_ASSERT_ALWAYS();
2260 valid = FALSE;
2261 }
2262 }
2263
2264 // Swizzle type check
2265 if (linear)
2266 {
2267 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2268 {
2269 ADDR_ASSERT_ALWAYS();
2270 valid = FALSE;
2271 }
2272 }
2273 else if (IsZOrderSwizzle(swizzle))
2274 {
2275 if ((pIn->bpp > 64) ||
2276 (msaa && (color || (pIn->bpp > 32))) ||
2277 ElemLib::IsBlockCompressed(pIn->format) ||
2278 ElemLib::IsMacroPixelPacked(pIn->format))
2279 {
2280 ADDR_ASSERT_ALWAYS();
2281 valid = FALSE;
2282 }
2283 }
2284 else if (IsStandardSwizzle(rsrcType, swizzle))
2285 {
2286 if (zbuffer || msaa)
2287 {
2288 ADDR_ASSERT_ALWAYS();
2289 valid = FALSE;
2290 }
2291 }
2292 else if (IsDisplaySwizzle(rsrcType, swizzle))
2293 {
2294 if (zbuffer || msaa)
2295 {
2296 ADDR_ASSERT_ALWAYS();
2297 valid = FALSE;
2298 }
2299 }
2300 else if (IsRtOptSwizzle(swizzle))
2301 {
2302 if (zbuffer)
2303 {
2304 ADDR_ASSERT_ALWAYS();
2305 valid = FALSE;
2306 }
2307 }
2308 else
2309 {
2310 {
2311 ADDR_ASSERT_ALWAYS();
2312 valid = FALSE;
2313 }
2314 }
2315
2316 // Block type check
2317 if (blk256B)
2318 {
2319 if (zbuffer || tex3d || msaa)
2320 {
2321 ADDR_ASSERT_ALWAYS();
2322 valid = FALSE;
2323 }
2324 }
2325 else if (blkVar)
2326 {
2327 if (m_blockVarSizeLog2 == 0)
2328 {
2329 ADDR_ASSERT_ALWAYS();
2330 valid = FALSE;
2331 }
2332 }
2333
2334 return valid;
2335 }
2336
2337 /**
2338 ************************************************************************************************************************
2339 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2340 *
2341 * @brief
2342 * Compute surface info sanity check
2343 *
2344 * @return
2345 * Offset
2346 ************************************************************************************************************************
2347 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2348 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2349 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2350 ) const
2351 {
2352 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2353 }
2354
2355 /**
2356 ************************************************************************************************************************
2357 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2358 *
2359 * @brief
2360 * Internal function to get suggested surface information for client to use
2361 *
2362 * @return
2363 * ADDR_E_RETURNCODE
2364 ************************************************************************************************************************
2365 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2366 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2367 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2368 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2369 ) const
2370 {
2371 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2372
2373 if (pIn->flags.fmask)
2374 {
2375 const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2376 const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2377
2378 if (forbid64KbBlockType && forbidVarBlockType)
2379 {
2380 // Invalid combination...
2381 ADDR_ASSERT_ALWAYS();
2382 returnCode = ADDR_INVALIDPARAMS;
2383 }
2384 else
2385 {
2386 pOut->resourceType = ADDR_RSRC_TEX_2D;
2387 pOut->validBlockSet.value = 0;
2388 pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1;
2389 pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1;
2390 pOut->validSwModeSet.value = 0;
2391 pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1;
2392 pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType ? 0 : 1;
2393 pOut->canXor = TRUE;
2394 pOut->validSwTypeSet.value = AddrSwSetZ;
2395 pOut->clientPreferredSwSet = pOut->validSwTypeSet;
2396
2397 BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2398
2399 if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2400 {
2401 const UINT_8 maxFmaskSwizzleModeType = 2;
2402 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2403 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2404 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2405 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2406 const UINT_32 width = Max(pIn->width, 1u);
2407 const UINT_32 height = Max(pIn->height, 1u);
2408 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2409
2410 AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2411 Dim3d blkDim[maxFmaskSwizzleModeType] = {{}, {}};
2412 Dim3d padDim[maxFmaskSwizzleModeType] = {{}, {}};
2413 UINT_64 padSize[maxFmaskSwizzleModeType] = {};
2414
2415 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2416 {
2417 ComputeBlockDimensionForSurf(&blkDim[i].w,
2418 &blkDim[i].h,
2419 &blkDim[i].d,
2420 fmaskBpp,
2421 1,
2422 pOut->resourceType,
2423 swMode[i]);
2424
2425 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2426 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2427 }
2428
2429 if (Addr2BlockTypeWithinMemoryBudget(padSize[0],
2430 padSize[1],
2431 ratioLow,
2432 ratioHi,
2433 pIn->memoryBudget,
2434 GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))
2435 {
2436 use64KbBlockType = FALSE;
2437 }
2438 }
2439 else if (forbidVarBlockType)
2440 {
2441 use64KbBlockType = TRUE;
2442 }
2443
2444 if (use64KbBlockType)
2445 {
2446 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2447 }
2448 else
2449 {
2450 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2451 }
2452 }
2453 }
2454 else
2455 {
2456 UINT_32 bpp = pIn->bpp;
2457 UINT_32 width = Max(pIn->width, 1u);
2458 UINT_32 height = Max(pIn->height, 1u);
2459
2460 // Set format to INVALID will skip this conversion
2461 if (pIn->format != ADDR_FMT_INVALID)
2462 {
2463 ElemMode elemMode = ADDR_UNCOMPRESSED;
2464 UINT_32 expandX, expandY;
2465
2466 // Get compression/expansion factors and element mode which indicates compression/expansion
2467 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2468 &elemMode,
2469 &expandX,
2470 &expandY);
2471
2472 UINT_32 basePitch = 0;
2473 GetElemLib()->AdjustSurfaceInfo(elemMode,
2474 expandX,
2475 expandY,
2476 &bpp,
2477 &basePitch,
2478 &width,
2479 &height);
2480 }
2481
2482 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2483 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2484 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2485 const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2486 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
2487
2488 // Pre sanity check on non swizzle mode parameters
2489 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2490 localIn.flags = pIn->flags;
2491 localIn.resourceType = pIn->resourceType;
2492 localIn.format = pIn->format;
2493 localIn.bpp = bpp;
2494 localIn.width = width;
2495 localIn.height = height;
2496 localIn.numSlices = numSlices;
2497 localIn.numMipLevels = numMipLevels;
2498 localIn.numSamples = numSamples;
2499 localIn.numFrags = numFrags;
2500
2501 if (ValidateNonSwModeParams(&localIn))
2502 {
2503 // Forbid swizzle mode(s) by client setting
2504 ADDR2_SWMODE_SET allowedSwModeSet = {};
2505 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2506 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
2507 allowedSwModeSet.value |=
2508 pIn->forbiddenBlock.macroThin4KB ? 0 :
2509 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2510 allowedSwModeSet.value |=
2511 pIn->forbiddenBlock.macroThick4KB ? 0 :
2512 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2513 allowedSwModeSet.value |=
2514 pIn->forbiddenBlock.macroThin64KB ? 0 :
2515 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2516 allowedSwModeSet.value |=
2517 pIn->forbiddenBlock.macroThick64KB ? 0 :
2518 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2519 allowedSwModeSet.value |=
2520 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2521
2522 if (pIn->preferredSwSet.value != 0)
2523 {
2524 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2525 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2526 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2527 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2528 }
2529
2530 if (pIn->noXor)
2531 {
2532 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2533 }
2534
2535 if (pIn->maxAlign > 0)
2536 {
2537 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2538 {
2539 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2540 }
2541
2542 if (pIn->maxAlign < Size64K)
2543 {
2544 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2545 }
2546
2547 if (pIn->maxAlign < Size4K)
2548 {
2549 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2550 }
2551
2552 if (pIn->maxAlign < Size256)
2553 {
2554 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2555 }
2556 }
2557
2558 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2559 switch (pIn->resourceType)
2560 {
2561 case ADDR_RSRC_TEX_1D:
2562 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2563 break;
2564
2565 case ADDR_RSRC_TEX_2D:
2566 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2567 break;
2568
2569 case ADDR_RSRC_TEX_3D:
2570 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2571
2572 if (pIn->flags.view3dAs2dArray)
2573 {
2574 allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2575 }
2576 break;
2577
2578 default:
2579 ADDR_ASSERT_ALWAYS();
2580 allowedSwModeSet.value = 0;
2581 break;
2582 }
2583
2584 if (ElemLib::IsBlockCompressed(pIn->format) ||
2585 ElemLib::IsMacroPixelPacked(pIn->format) ||
2586 (bpp > 64) ||
2587 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2588 {
2589 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2590 }
2591
2592 if (pIn->format == ADDR_FMT_32_32_32)
2593 {
2594 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2595 }
2596
2597 if (msaa)
2598 {
2599 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2600 }
2601
2602 if (pIn->flags.depth || pIn->flags.stencil)
2603 {
2604 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2605 }
2606
2607 if (pIn->flags.display)
2608 {
2609 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
2610 }
2611
2612 if (allowedSwModeSet.value != 0)
2613 {
2614 #if DEBUG
2615 // Post sanity check, at least AddrLib should accept the output generated by its own
2616 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2617
2618 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2619 {
2620 if (validateSwModeSet & 1)
2621 {
2622 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2623 ADDR_ASSERT(ValidateSwModeParams(&localIn));
2624 }
2625
2626 validateSwModeSet >>= 1;
2627 }
2628 #endif
2629
2630 pOut->resourceType = pIn->resourceType;
2631 pOut->validSwModeSet = allowedSwModeSet;
2632 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
2633 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2634 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
2635
2636 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2637
2638 if (pOut->clientPreferredSwSet.value == 0)
2639 {
2640 pOut->clientPreferredSwSet.value = AddrSwSetAll;
2641 }
2642
2643 // Apply optional restrictions
2644 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
2645 {
2646 if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
2647 {
2648 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
2649 // the GL2 in VAR mode, so it should be avoided.
2650 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2651 }
2652 else
2653 {
2654 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
2655 // But we have to suffer from low performance because there is no other choice...
2656 ADDR_ASSERT_ALWAYS();
2657 }
2658 }
2659
2660 if (pIn->flags.needEquation)
2661 {
2662 UINT_32 components = pIn->flags.allowExtEquation ? ADDR_MAX_EQUATION_COMP :
2663 ADDR_MAX_LEGACY_EQUATION_COMP;
2664 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
2665 }
2666
2667 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
2668 {
2669 pOut->swizzleMode = ADDR_SW_LINEAR;
2670 }
2671 else
2672 {
2673 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
2674
2675 if ((height > 1) && (computeMinSize == FALSE))
2676 {
2677 // Always ignore linear swizzle mode if:
2678 // 1. This is a (2D/3D) resource with height > 1
2679 // 2. Client doesn't require computing minimize size
2680 allowedSwModeSet.swLinear = 0;
2681 }
2682
2683 // A bitfield where each bit represents a block type. Each swizzle mode maps to a block.
2684 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2685
2686 // Determine block size if there are 2 or more block type candidates
2687 if (IsPow2(allowedBlockSet.value) == FALSE)
2688 {
2689 // Tracks a valid SwizzleMode for each valid block type
2690 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
2691
2692 swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
2693
2694 if (m_blockVarSizeLog2 != 0)
2695 {
2696 swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
2697 }
2698
2699 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
2700 {
2701 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
2702 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;
2703 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
2704 }
2705 else
2706 {
2707 swMode[AddrBlockMicro] = ADDR_SW_256B_S;
2708 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S;
2709 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
2710 }
2711
2712 // Tracks the size of each valid swizzle mode's surface in bytes
2713 UINT_64 padSize[AddrBlockMaxTiledType] = {};
2714
2715 const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
2716 const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
2717 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2718 UINT_32 minSizeBlk = AddrBlockMicro; // Tracks the most optimal block to use
2719 UINT_64 minSize = 0; // Tracks the minimum acceptable block type
2720
2721 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
2722
2723 // Iterate through all block types
2724 for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
2725 {
2726 if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
2727 {
2728 localIn.swizzleMode = swMode[i];
2729
2730 if (localIn.swizzleMode == ADDR_SW_LINEAR)
2731 {
2732 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
2733 }
2734 else
2735 {
2736 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
2737 }
2738
2739 if (returnCode == ADDR_OK)
2740 {
2741 padSize[i] = localOut.surfSize;
2742
2743 if (minSize == 0)
2744 {
2745 minSize = padSize[i];
2746 minSizeBlk = i;
2747 }
2748 else
2749 {
2750 // Checks if the block type is within the memory budget but favors larger blocks
2751 if (Addr2BlockTypeWithinMemoryBudget(
2752 minSize,
2753 padSize[i],
2754 ratioLow,
2755 ratioHi,
2756 0.0,
2757 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))
2758 {
2759 minSize = padSize[i];
2760 minSizeBlk = i;
2761 }
2762 }
2763 }
2764 else
2765 {
2766 ADDR_ASSERT_ALWAYS();
2767 break;
2768 }
2769 }
2770 }
2771
2772 if (pIn->memoryBudget > 1.0)
2773 {
2774 // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
2775 // smaller-block type again in coming loop
2776 switch (minSizeBlk)
2777 {
2778 case AddrBlockThick64KB:
2779 allowedBlockSet.macroThin64KB = 0;
2780 case AddrBlockThinVar:
2781 case AddrBlockThin64KB:
2782 allowedBlockSet.macroThick4KB = 0;
2783 case AddrBlockThick4KB:
2784 allowedBlockSet.macroThin4KB = 0;
2785 case AddrBlockThin4KB:
2786 allowedBlockSet.micro = 0;
2787 case AddrBlockMicro:
2788 allowedBlockSet.linear = 0;
2789 case AddrBlockLinear:
2790 break;
2791
2792 default:
2793 ADDR_ASSERT_ALWAYS();
2794 break;
2795 }
2796
2797 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2798 {
2799 if ((i != minSizeBlk) &&
2800 Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
2801 {
2802 if (Addr2BlockTypeWithinMemoryBudget(
2803 minSize,
2804 padSize[i],
2805 0,
2806 0,
2807 pIn->memoryBudget,
2808 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)
2809 {
2810 // Clear the block type if the memory waste is unacceptable
2811 allowedBlockSet.value &= ~(1u << (i - 1));
2812 }
2813 }
2814 }
2815
2816 // Remove VAR block type if bigger block type is allowed
2817 if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))
2818 {
2819 if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)
2820 {
2821 allowedBlockSet.var = 0;
2822 }
2823 }
2824
2825 // Remove linear block type if 2 or more block types are allowed
2826 if (IsPow2(allowedBlockSet.value) == FALSE)
2827 {
2828 allowedBlockSet.linear = 0;
2829 }
2830
2831 // Select the biggest allowed block type
2832 minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
2833
2834 if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
2835 {
2836 minSizeBlk = AddrBlockLinear;
2837 }
2838 }
2839
2840 switch (minSizeBlk)
2841 {
2842 case AddrBlockLinear:
2843 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2844 break;
2845
2846 case AddrBlockMicro:
2847 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2848 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
2849 break;
2850
2851 case AddrBlockThin4KB:
2852 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2853 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
2854 break;
2855
2856 case AddrBlockThick4KB:
2857 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2858 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
2859 break;
2860
2861 case AddrBlockThin64KB:
2862 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2863 Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
2864 break;
2865
2866 case AddrBlockThick64KB:
2867 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2868 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
2869 break;
2870
2871 case AddrBlockThinVar:
2872 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
2873 break;
2874
2875 default:
2876 ADDR_ASSERT_ALWAYS();
2877 allowedSwModeSet.value = 0;
2878 break;
2879 }
2880 }
2881
2882 // Block type should be determined.
2883 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
2884
2885 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
2886
2887 // Determine swizzle type if there are 2 or more swizzle type candidates
2888 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
2889 {
2890 if (ElemLib::IsBlockCompressed(pIn->format))
2891 {
2892 if (allowedSwSet.sw_D)
2893 {
2894 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2895 }
2896 else if (allowedSwSet.sw_S)
2897 {
2898 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2899 }
2900 else
2901 {
2902 ADDR_ASSERT(allowedSwSet.sw_R);
2903 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2904 }
2905 }
2906 else if (ElemLib::IsMacroPixelPacked(pIn->format))
2907 {
2908 if (allowedSwSet.sw_S)
2909 {
2910 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2911 }
2912 else if (allowedSwSet.sw_D)
2913 {
2914 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2915 }
2916 else
2917 {
2918 ADDR_ASSERT(allowedSwSet.sw_R);
2919 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2920 }
2921 }
2922 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2923 {
2924 if (pIn->flags.color &&
2925 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
2926 allowedSwSet.sw_D)
2927 {
2928 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2929 }
2930 else if (allowedSwSet.sw_S)
2931 {
2932 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2933 }
2934 else if (allowedSwSet.sw_R)
2935 {
2936 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2937 }
2938 else
2939 {
2940 ADDR_ASSERT(allowedSwSet.sw_Z);
2941 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2942 }
2943 }
2944 else
2945 {
2946 if (allowedSwSet.sw_R)
2947 {
2948 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2949 }
2950 else if (allowedSwSet.sw_D)
2951 {
2952 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2953 }
2954 else if (allowedSwSet.sw_S)
2955 {
2956 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2957 }
2958 else
2959 {
2960 ADDR_ASSERT(allowedSwSet.sw_Z);
2961 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2962 }
2963 }
2964
2965 // Swizzle type should be determined.
2966 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
2967 }
2968
2969 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2970 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2971 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2972 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
2973 }
2974 }
2975 else
2976 {
2977 // Invalid combination...
2978 ADDR_ASSERT_ALWAYS();
2979 returnCode = ADDR_INVALIDPARAMS;
2980 }
2981 }
2982 else
2983 {
2984 // Invalid combination...
2985 ADDR_ASSERT_ALWAYS();
2986 returnCode = ADDR_INVALIDPARAMS;
2987 }
2988 }
2989
2990 return returnCode;
2991 }
2992
2993 /**
2994 ************************************************************************************************************************
2995 * Gfx10Lib::ComputeStereoInfo
2996 *
2997 * @brief
2998 * Compute height alignment and right eye pipeBankXor for stereo surface
2999 *
3000 * @return
3001 * Error code
3002 *
3003 ************************************************************************************************************************
3004 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const3005 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3006 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
3007 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
3008 UINT_32* pRightXor ///< Right eye xor
3009 ) const
3010 {
3011 ADDR_E_RETURNCODE ret = ADDR_OK;
3012
3013 *pRightXor = 0;
3014
3015 if (IsNonPrtXor(pIn->swizzleMode))
3016 {
3017 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3018 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3019 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3020 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3021 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3022
3023 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3024 {
3025 UINT_32 yMax = 0;
3026 UINT_32 yPosMask = 0;
3027
3028 // First get "max y bit"
3029 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3030 {
3031 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3032
3033 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3034 (m_equationTable[eqIndex].addr[i].index > yMax))
3035 {
3036 yMax = m_equationTable[eqIndex].addr[i].index;
3037 }
3038
3039 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3040 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3041 (m_equationTable[eqIndex].xor1[i].index > yMax))
3042 {
3043 yMax = m_equationTable[eqIndex].xor1[i].index;
3044 }
3045
3046 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3047 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3048 (m_equationTable[eqIndex].xor2[i].index > yMax))
3049 {
3050 yMax = m_equationTable[eqIndex].xor2[i].index;
3051 }
3052 }
3053
3054 // Then loop again for populating a position mask of "max Y bit"
3055 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3056 {
3057 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3058 (m_equationTable[eqIndex].addr[i].index == yMax))
3059 {
3060 yPosMask |= 1u << i;
3061 }
3062 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3063 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3064 (m_equationTable[eqIndex].xor1[i].index == yMax))
3065 {
3066 yPosMask |= 1u << i;
3067 }
3068 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3069 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3070 (m_equationTable[eqIndex].xor2[i].index == yMax))
3071 {
3072 yPosMask |= 1u << i;
3073 }
3074 }
3075
3076 const UINT_32 additionalAlign = 1 << yMax;
3077
3078 if (additionalAlign >= *pAlignY)
3079 {
3080 *pAlignY = additionalAlign;
3081
3082 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3083
3084 if ((alignedHeight >> yMax) & 1)
3085 {
3086 *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3087 }
3088 }
3089 }
3090 else
3091 {
3092 ret = ADDR_INVALIDPARAMS;
3093 }
3094 }
3095
3096 return ret;
3097 }
3098
3099 /**
3100 ************************************************************************************************************************
3101 * Gfx10Lib::HwlComputeSurfaceInfoTiled
3102 *
3103 * @brief
3104 * Internal function to calculate alignment for tiled surface
3105 *
3106 * @return
3107 * ADDR_E_RETURNCODE
3108 ************************************************************************************************************************
3109 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3110 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3111 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3112 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3113 ) const
3114 {
3115 ADDR_E_RETURNCODE ret;
3116
3117 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3118 pOut->mipChainPitch = 0;
3119 pOut->mipChainHeight = 0;
3120 pOut->mipChainSlice = 0;
3121 pOut->epitchIsHeight = FALSE;
3122
3123 // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3124 pOut->mipChainInTail = FALSE;
3125 pOut->firstMipIdInTail = pIn->numMipLevels;
3126
3127 if (IsBlock256b(pIn->swizzleMode))
3128 {
3129 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3130 }
3131 else
3132 {
3133 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3134 }
3135
3136 return ret;
3137 }
3138
3139 /**
3140 ************************************************************************************************************************
3141 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
3142 *
3143 * @brief
3144 * Internal function to calculate alignment for micro tiled surface
3145 *
3146 * @return
3147 * ADDR_E_RETURNCODE
3148 ************************************************************************************************************************
3149 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3150 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3151 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3152 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3153 ) const
3154 {
3155 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3156 &pOut->blockHeight,
3157 &pOut->blockSlices,
3158 pIn->bpp,
3159 pIn->numFrags,
3160 pIn->resourceType,
3161 pIn->swizzleMode);
3162
3163 if (ret == ADDR_OK)
3164 {
3165 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3166
3167 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3168 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3169 pOut->numSlices = pIn->numSlices;
3170 pOut->baseAlign = blockSize;
3171
3172 if (pIn->numMipLevels > 1)
3173 {
3174 const UINT_32 mip0Width = pIn->width;
3175 const UINT_32 mip0Height = pIn->height;
3176 UINT_64 mipSliceSize = 0;
3177
3178 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3179 {
3180 UINT_32 mipWidth, mipHeight;
3181
3182 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3183
3184 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
3185 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3186
3187 if (pOut->pMipInfo != NULL)
3188 {
3189 pOut->pMipInfo[i].pitch = mipActualWidth;
3190 pOut->pMipInfo[i].height = mipActualHeight;
3191 pOut->pMipInfo[i].depth = 1;
3192 pOut->pMipInfo[i].offset = mipSliceSize;
3193 pOut->pMipInfo[i].mipTailOffset = 0;
3194 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3195 }
3196
3197 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3198 }
3199
3200 pOut->sliceSize = mipSliceSize;
3201 pOut->surfSize = mipSliceSize * pOut->numSlices;
3202 }
3203 else
3204 {
3205 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3206 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3207
3208 if (pOut->pMipInfo != NULL)
3209 {
3210 pOut->pMipInfo[0].pitch = pOut->pitch;
3211 pOut->pMipInfo[0].height = pOut->height;
3212 pOut->pMipInfo[0].depth = 1;
3213 pOut->pMipInfo[0].offset = 0;
3214 pOut->pMipInfo[0].mipTailOffset = 0;
3215 pOut->pMipInfo[0].macroBlockOffset = 0;
3216 }
3217 }
3218
3219 }
3220
3221 return ret;
3222 }
3223
3224 /**
3225 ************************************************************************************************************************
3226 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
3227 *
3228 * @brief
3229 * Internal function to calculate alignment for macro tiled surface
3230 *
3231 * @return
3232 * ADDR_E_RETURNCODE
3233 ************************************************************************************************************************
3234 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3235 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3236 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3237 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3238 ) const
3239 {
3240 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3241 &pOut->blockHeight,
3242 &pOut->blockSlices,
3243 pIn->bpp,
3244 pIn->numFrags,
3245 pIn->resourceType,
3246 pIn->swizzleMode);
3247
3248 if (returnCode == ADDR_OK)
3249 {
3250 UINT_32 heightAlign = pOut->blockHeight;
3251
3252 if (pIn->flags.qbStereo)
3253 {
3254 UINT_32 rightXor = 0;
3255
3256 returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3257
3258 if (returnCode == ADDR_OK)
3259 {
3260 pOut->pStereoInfo->rightSwizzle = rightXor;
3261 }
3262 }
3263
3264 if (returnCode == ADDR_OK)
3265 {
3266 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3267 const UINT_32 blockSize = 1 << blockSizeLog2;
3268
3269 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3270 pOut->height = PowTwoAlign(pIn->height, heightAlign);
3271 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3272 pOut->baseAlign = blockSize;
3273
3274 if (pIn->numMipLevels > 1)
3275 {
3276 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3277 pIn->swizzleMode,
3278 pOut->blockWidth,
3279 pOut->blockHeight,
3280 pOut->blockSlices);
3281 const UINT_32 mip0Width = pIn->width;
3282 const UINT_32 mip0Height = pIn->height;
3283 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3284 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3285 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3286 const UINT_32 index = Log2(pIn->bpp >> 3);
3287 UINT_32 firstMipInTail = pIn->numMipLevels;
3288 UINT_64 mipChainSliceSize = 0;
3289 UINT_64 mipSize[MaxMipLevels];
3290 UINT_64 mipSliceSize[MaxMipLevels];
3291
3292 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
3293 Dim3d fixedTailMaxDim = tailMaxDim;
3294
3295 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3296 {
3297 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3298 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3299 }
3300
3301 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3302 {
3303 UINT_32 mipWidth, mipHeight, mipDepth;
3304
3305 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3306
3307 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3308 {
3309 firstMipInTail = i;
3310 mipChainSliceSize += blockSize / pOut->blockSlices;
3311 break;
3312 }
3313 else
3314 {
3315 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3316 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3317 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3318 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3319
3320 mipSize[i] = sliceSize * depth;
3321 mipSliceSize[i] = sliceSize * pOut->blockSlices;
3322 mipChainSliceSize += sliceSize;
3323
3324 if (pOut->pMipInfo != NULL)
3325 {
3326 pOut->pMipInfo[i].pitch = pitch;
3327 pOut->pMipInfo[i].height = height;
3328 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3329 }
3330 }
3331 }
3332
3333 pOut->sliceSize = mipChainSliceSize;
3334 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3335 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3336 pOut->firstMipIdInTail = firstMipInTail;
3337
3338 if (pOut->pMipInfo != NULL)
3339 {
3340 UINT_64 offset = 0;
3341 UINT_64 macroBlkOffset = 0;
3342 UINT_32 tailMaxDepth = 0;
3343
3344 if (firstMipInTail != pIn->numMipLevels)
3345 {
3346 UINT_32 mipWidth, mipHeight;
3347
3348 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3349 &mipWidth, &mipHeight, &tailMaxDepth);
3350
3351 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3352 macroBlkOffset = blockSize;
3353 }
3354
3355 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3356 {
3357 pOut->pMipInfo[i].offset = offset;
3358 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3359 pOut->pMipInfo[i].mipTailOffset = 0;
3360
3361 offset += mipSize[i];
3362 macroBlkOffset += mipSliceSize[i];
3363 }
3364
3365 UINT_32 pitch = tailMaxDim.w;
3366 UINT_32 height = tailMaxDim.h;
3367 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3368
3369 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3370
3371 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3372 {
3373 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3374 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3375
3376 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
3377 pOut->pMipInfo[i].mipTailOffset = mipOffset;
3378 pOut->pMipInfo[i].macroBlockOffset = 0;
3379
3380 pOut->pMipInfo[i].pitch = pitch;
3381 pOut->pMipInfo[i].height = height;
3382 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3383
3384 UINT_32 mipX = ((mipOffset >> 9) & 1) |
3385 ((mipOffset >> 10) & 2) |
3386 ((mipOffset >> 11) & 4) |
3387 ((mipOffset >> 12) & 8) |
3388 ((mipOffset >> 13) & 16) |
3389 ((mipOffset >> 14) & 32);
3390 UINT_32 mipY = ((mipOffset >> 8) & 1) |
3391 ((mipOffset >> 9) & 2) |
3392 ((mipOffset >> 10) & 4) |
3393 ((mipOffset >> 11) & 8) |
3394 ((mipOffset >> 12) & 16) |
3395 ((mipOffset >> 13) & 32);
3396
3397 if (blockSizeLog2 & 1)
3398 {
3399 const UINT_32 temp = mipX;
3400 mipX = mipY;
3401 mipY = temp;
3402
3403 if (index & 1)
3404 {
3405 mipY = (mipY << 1) | (mipX & 1);
3406 mipX = mipX >> 1;
3407 }
3408 }
3409
3410 if (isThin)
3411 {
3412 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3413 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3414 pOut->pMipInfo[i].mipTailCoordZ = 0;
3415
3416 pitch = Max(pitch >> 1, Block256_2d[index].w);
3417 height = Max(height >> 1, Block256_2d[index].h);
3418 }
3419 else
3420 {
3421 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3422 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3423 pOut->pMipInfo[i].mipTailCoordZ = 0;
3424
3425 pitch = Max(pitch >> 1, Block256_3d[index].w);
3426 height = Max(height >> 1, Block256_3d[index].h);
3427 }
3428 }
3429 }
3430 }
3431 else
3432 {
3433 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3434 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3435
3436 if (pOut->pMipInfo != NULL)
3437 {
3438 pOut->pMipInfo[0].pitch = pOut->pitch;
3439 pOut->pMipInfo[0].height = pOut->height;
3440 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3441 pOut->pMipInfo[0].offset = 0;
3442 pOut->pMipInfo[0].mipTailOffset = 0;
3443 pOut->pMipInfo[0].macroBlockOffset = 0;
3444 pOut->pMipInfo[0].mipTailCoordX = 0;
3445 pOut->pMipInfo[0].mipTailCoordY = 0;
3446 pOut->pMipInfo[0].mipTailCoordZ = 0;
3447 }
3448 }
3449 }
3450 }
3451
3452 return returnCode;
3453 }
3454
3455 /**
3456 ************************************************************************************************************************
3457 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3458 *
3459 * @brief
3460 * Internal function to calculate address from coord for tiled swizzle surface
3461 *
3462 * @return
3463 * ADDR_E_RETURNCODE
3464 ************************************************************************************************************************
3465 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3466 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3467 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3468 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3469 ) const
3470 {
3471 ADDR_E_RETURNCODE ret;
3472
3473 if (IsBlock256b(pIn->swizzleMode))
3474 {
3475 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3476 }
3477 else
3478 {
3479 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3480 }
3481
3482 return ret;
3483 }
3484
3485 /**
3486 ************************************************************************************************************************
3487 * Gfx10Lib::ComputeOffsetFromEquation
3488 *
3489 * @brief
3490 * Compute offset from equation
3491 *
3492 * @return
3493 * Offset
3494 ************************************************************************************************************************
3495 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3496 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3497 const ADDR_EQUATION* pEq, ///< Equation
3498 UINT_32 x, ///< x coord in bytes
3499 UINT_32 y, ///< y coord in pixel
3500 UINT_32 z ///< z coord in slice
3501 ) const
3502 {
3503 UINT_32 offset = 0;
3504
3505 for (UINT_32 i = 0; i < pEq->numBits; i++)
3506 {
3507 UINT_32 v = 0;
3508
3509 for (UINT_32 c = 0; c < pEq->numBitComponents; c++)
3510 {
3511 if (pEq->comps[c][i].valid)
3512 {
3513 if (pEq->comps[c][i].channel == 0)
3514 {
3515 v ^= (x >> pEq->comps[c][i].index) & 1;
3516 }
3517 else if (pEq->comps[c][i].channel == 1)
3518 {
3519 v ^= (y >> pEq->comps[c][i].index) & 1;
3520 }
3521 else
3522 {
3523 ADDR_ASSERT(pEq->comps[c][i].channel == 2);
3524 v ^= (z >> pEq->comps[c][i].index) & 1;
3525 }
3526 }
3527 }
3528
3529 offset |= (v << i);
3530 }
3531
3532 return offset;
3533 }
3534
3535 /**
3536 ************************************************************************************************************************
3537 * Gfx10Lib::ComputeOffsetFromSwizzlePattern
3538 *
3539 * @brief
3540 * Compute offset from swizzle pattern
3541 *
3542 * @return
3543 * Offset
3544 ************************************************************************************************************************
3545 */
ComputeOffsetFromSwizzlePattern(const UINT_64 * pPattern,UINT_32 numBits,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const3546 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3547 const UINT_64* pPattern, ///< Swizzle pattern
3548 UINT_32 numBits, ///< Number of bits in pattern
3549 UINT_32 x, ///< x coord in pixel
3550 UINT_32 y, ///< y coord in pixel
3551 UINT_32 z, ///< z coord in slice
3552 UINT_32 s ///< sample id
3553 ) const
3554 {
3555 UINT_32 offset = 0;
3556 const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3557
3558 for (UINT_32 i = 0; i < numBits; i++)
3559 {
3560 UINT_32 v = 0;
3561
3562 if (pSwizzlePattern[i].x != 0)
3563 {
3564 UINT_16 mask = pSwizzlePattern[i].x;
3565 UINT_32 xBits = x;
3566
3567 while (mask != 0)
3568 {
3569 if (mask & 1)
3570 {
3571 v ^= xBits & 1;
3572 }
3573
3574 xBits >>= 1;
3575 mask >>= 1;
3576 }
3577 }
3578
3579 if (pSwizzlePattern[i].y != 0)
3580 {
3581 UINT_16 mask = pSwizzlePattern[i].y;
3582 UINT_32 yBits = y;
3583
3584 while (mask != 0)
3585 {
3586 if (mask & 1)
3587 {
3588 v ^= yBits & 1;
3589 }
3590
3591 yBits >>= 1;
3592 mask >>= 1;
3593 }
3594 }
3595
3596 if (pSwizzlePattern[i].z != 0)
3597 {
3598 UINT_16 mask = pSwizzlePattern[i].z;
3599 UINT_32 zBits = z;
3600
3601 while (mask != 0)
3602 {
3603 if (mask & 1)
3604 {
3605 v ^= zBits & 1;
3606 }
3607
3608 zBits >>= 1;
3609 mask >>= 1;
3610 }
3611 }
3612
3613 if (pSwizzlePattern[i].s != 0)
3614 {
3615 UINT_16 mask = pSwizzlePattern[i].s;
3616 UINT_32 sBits = s;
3617
3618 while (mask != 0)
3619 {
3620 if (mask & 1)
3621 {
3622 v ^= sBits & 1;
3623 }
3624
3625 sBits >>= 1;
3626 mask >>= 1;
3627 }
3628 }
3629
3630 offset |= (v << i);
3631 }
3632
3633 return offset;
3634 }
3635
3636 /**
3637 ************************************************************************************************************************
3638 * Gfx10Lib::GetSwizzlePatternInfo
3639 *
3640 * @brief
3641 * Get swizzle pattern
3642 *
3643 * @return
3644 * Swizzle pattern information
3645 ************************************************************************************************************************
3646 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const3647 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
3648 AddrSwizzleMode swizzleMode, ///< Swizzle mode
3649 AddrResourceType resourceType, ///< Resource type
3650 UINT_32 elemLog2, ///< Element size in bytes log2
3651 UINT_32 numFrag ///< Number of fragment
3652 ) const
3653 {
3654 // Now elemLog2 is going to be used to access the correct index insode of the pPatInfo array so we will start from
3655 // the right location
3656 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
3657 const ADDR_SW_PATINFO* patInfo = NULL;
3658 const UINT_32 swizzleMask = 1 << swizzleMode;
3659
3660 if (IsBlockVariable(swizzleMode))
3661 {
3662 if (m_blockVarSizeLog2 != 0)
3663 {
3664 ADDR_ASSERT(m_settings.supportRbPlus);
3665
3666 if (IsRtOptSwizzle(swizzleMode))
3667 {
3668 if (numFrag == 1)
3669 {
3670 patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
3671 }
3672 else if (numFrag == 2)
3673 {
3674 patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
3675 }
3676 else if (numFrag == 4)
3677 {
3678 patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
3679 }
3680 else
3681 {
3682 ADDR_ASSERT(numFrag == 8);
3683 patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
3684 }
3685 }
3686 else if (IsZOrderSwizzle(swizzleMode))
3687 {
3688 if (numFrag == 1)
3689 {
3690 patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
3691 }
3692 else if (numFrag == 2)
3693 {
3694 patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
3695 }
3696 else if (numFrag == 4)
3697 {
3698 patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
3699 }
3700 else
3701 {
3702 ADDR_ASSERT(numFrag == 8);
3703 patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
3704 }
3705 }
3706 }
3707 }
3708 else if (IsLinear(swizzleMode) == FALSE)
3709 {
3710 if (resourceType == ADDR_RSRC_TEX_3D)
3711 {
3712 ADDR_ASSERT(numFrag == 1);
3713
3714 if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
3715 {
3716 if (IsRtOptSwizzle(swizzleMode))
3717 {
3718 if (swizzleMode == ADDR_SW_4KB_R_X)
3719 {
3720 patInfo = NULL;
3721 }
3722 else
3723 {
3724 patInfo = m_settings.supportRbPlus ?
3725 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
3726 }
3727 }
3728 else if (IsZOrderSwizzle(swizzleMode))
3729 {
3730 patInfo = m_settings.supportRbPlus ?
3731 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
3732 }
3733 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3734 {
3735 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
3736 patInfo = m_settings.supportRbPlus ?
3737 GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
3738 }
3739 else
3740 {
3741 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
3742
3743 if (IsBlock4kb(swizzleMode))
3744 {
3745 if (swizzleMode == ADDR_SW_4KB_S)
3746 {
3747 patInfo = m_settings.supportRbPlus ?
3748 GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
3749 }
3750 else
3751 {
3752 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3753 patInfo = m_settings.supportRbPlus ?
3754 GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
3755 }
3756 }
3757 else
3758 {
3759 if (swizzleMode == ADDR_SW_64KB_S)
3760 {
3761 patInfo = m_settings.supportRbPlus ?
3762 GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
3763 }
3764 else if (swizzleMode == ADDR_SW_64KB_S_X)
3765 {
3766 patInfo = m_settings.supportRbPlus ?
3767 GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
3768 }
3769 else
3770 {
3771 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3772 patInfo = m_settings.supportRbPlus ?
3773 GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
3774 }
3775 }
3776 }
3777 }
3778 }
3779 else
3780 {
3781 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
3782 {
3783 if (IsBlock256b(swizzleMode))
3784 {
3785 if (swizzleMode == ADDR_SW_256B_S)
3786 {
3787 patInfo = m_settings.supportRbPlus ?
3788 GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
3789 }
3790 else
3791 {
3792 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
3793 patInfo = m_settings.supportRbPlus ?
3794 GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
3795 }
3796 }
3797 else if (IsBlock4kb(swizzleMode))
3798 {
3799 if (IsStandardSwizzle(resourceType, swizzleMode))
3800 {
3801 if (swizzleMode == ADDR_SW_4KB_S)
3802 {
3803 patInfo = m_settings.supportRbPlus ?
3804 GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
3805 }
3806 else
3807 {
3808 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3809 patInfo = m_settings.supportRbPlus ?
3810 GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
3811 }
3812 }
3813 else
3814 {
3815 if (swizzleMode == ADDR_SW_4KB_D)
3816 {
3817 patInfo = m_settings.supportRbPlus ?
3818 GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
3819 }
3820 else if (swizzleMode == ADDR_SW_4KB_R_X)
3821 {
3822 patInfo = NULL;
3823 }
3824 else
3825 {
3826 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
3827 patInfo = m_settings.supportRbPlus ?
3828 GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
3829 }
3830 }
3831 }
3832 else
3833 {
3834 if (IsRtOptSwizzle(swizzleMode))
3835 {
3836 if (numFrag == 1)
3837 {
3838 patInfo = m_settings.supportRbPlus ?
3839 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
3840 }
3841 else if (numFrag == 2)
3842 {
3843 patInfo = m_settings.supportRbPlus ?
3844 GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
3845 }
3846 else if (numFrag == 4)
3847 {
3848 patInfo = m_settings.supportRbPlus ?
3849 GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
3850 }
3851 else
3852 {
3853 ADDR_ASSERT(numFrag == 8);
3854 patInfo = m_settings.supportRbPlus ?
3855 GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
3856 }
3857 }
3858 else if (IsZOrderSwizzle(swizzleMode))
3859 {
3860 if (numFrag == 1)
3861 {
3862 patInfo = m_settings.supportRbPlus ?
3863 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
3864 }
3865 else if (numFrag == 2)
3866 {
3867 patInfo = m_settings.supportRbPlus ?
3868 GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
3869 }
3870 else if (numFrag == 4)
3871 {
3872 patInfo = m_settings.supportRbPlus ?
3873 GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
3874 }
3875 else
3876 {
3877 ADDR_ASSERT(numFrag == 8);
3878 patInfo = m_settings.supportRbPlus ?
3879 GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
3880 }
3881 }
3882 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3883 {
3884 if (swizzleMode == ADDR_SW_64KB_D)
3885 {
3886 patInfo = m_settings.supportRbPlus ?
3887 GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
3888 }
3889 else if (swizzleMode == ADDR_SW_64KB_D_X)
3890 {
3891 patInfo = m_settings.supportRbPlus ?
3892 GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
3893 }
3894 else
3895 {
3896 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
3897 patInfo = m_settings.supportRbPlus ?
3898 GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
3899 }
3900 }
3901 else
3902 {
3903 if (swizzleMode == ADDR_SW_64KB_S)
3904 {
3905 patInfo = m_settings.supportRbPlus ?
3906 GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
3907 }
3908 else if (swizzleMode == ADDR_SW_64KB_S_X)
3909 {
3910 patInfo = m_settings.supportRbPlus ?
3911 GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
3912 }
3913 else
3914 {
3915 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3916 patInfo = m_settings.supportRbPlus ?
3917 GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
3918 }
3919 }
3920 }
3921 }
3922 }
3923 }
3924
3925 return (patInfo != NULL) ? &patInfo[index] : NULL;
3926 }
3927
3928 /**
3929 ************************************************************************************************************************
3930 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
3931 *
3932 * @brief
3933 * Internal function to calculate address from coord for micro tiled swizzle surface
3934 *
3935 * @return
3936 * ADDR_E_RETURNCODE
3937 ************************************************************************************************************************
3938 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3939 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3940 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3941 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3942 ) const
3943 {
3944 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3945 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3946 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3947 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
3948
3949 localIn.swizzleMode = pIn->swizzleMode;
3950 localIn.flags = pIn->flags;
3951 localIn.resourceType = pIn->resourceType;
3952 localIn.bpp = pIn->bpp;
3953 localIn.width = Max(pIn->unalignedWidth, 1u);
3954 localIn.height = Max(pIn->unalignedHeight, 1u);
3955 localIn.numSlices = Max(pIn->numSlices, 1u);
3956 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3957 localIn.numSamples = Max(pIn->numSamples, 1u);
3958 localIn.numFrags = Max(pIn->numFrags, 1u);
3959 localOut.pMipInfo = mipInfo;
3960
3961 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
3962
3963 if (ret == ADDR_OK)
3964 {
3965 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3966 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3967 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3968 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3969
3970 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3971 {
3972 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3973 const UINT_32 yb = pIn->y / localOut.blockHeight;
3974 const UINT_32 xb = pIn->x / localOut.blockWidth;
3975 const UINT_32 blockIndex = yb * pb + xb;
3976 const UINT_32 blockSize = 256;
3977 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3978 pIn->x << elemLog2,
3979 pIn->y,
3980 0);
3981 pOut->addr = localOut.sliceSize * pIn->slice +
3982 mipInfo[pIn->mipId].macroBlockOffset +
3983 (blockIndex * blockSize) +
3984 blk256Offset;
3985 }
3986 else
3987 {
3988 ret = ADDR_INVALIDPARAMS;
3989 }
3990 }
3991
3992 return ret;
3993 }
3994
3995 /**
3996 ************************************************************************************************************************
3997 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
3998 *
3999 * @brief
4000 * Internal function to calculate address from coord for macro tiled swizzle surface
4001 *
4002 * @return
4003 * ADDR_E_RETURNCODE
4004 ************************************************************************************************************************
4005 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4006 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4007 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4008 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4009 ) const
4010 {
4011 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
4012 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4013 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4014 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4015
4016 localIn.swizzleMode = pIn->swizzleMode;
4017 localIn.flags = pIn->flags;
4018 localIn.resourceType = pIn->resourceType;
4019 localIn.bpp = pIn->bpp;
4020 localIn.width = Max(pIn->unalignedWidth, 1u);
4021 localIn.height = Max(pIn->unalignedHeight, 1u);
4022 localIn.numSlices = Max(pIn->numSlices, 1u);
4023 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4024 localIn.numSamples = Max(pIn->numSamples, 1u);
4025 localIn.numFrags = Max(pIn->numFrags, 1u);
4026 localOut.pMipInfo = mipInfo;
4027
4028 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4029
4030 if (ret == ADDR_OK)
4031 {
4032 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4033 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4034 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
4035 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
4036 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4037 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4038 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4039
4040 if (localIn.numFrags > 1)
4041 {
4042 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4043 pIn->resourceType,
4044 elemLog2,
4045 localIn.numFrags);
4046
4047 if (pPatInfo != NULL)
4048 {
4049 const UINT_32 pb = localOut.pitch / localOut.blockWidth;
4050 const UINT_32 yb = pIn->y / localOut.blockHeight;
4051 const UINT_32 xb = pIn->x / localOut.blockWidth;
4052 const UINT_64 blkIdx = yb * pb + xb;
4053
4054 ADDR_BIT_SETTING fullSwizzlePattern[20];
4055 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4056
4057 const UINT_32 blkOffset =
4058 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4059 blkSizeLog2,
4060 pIn->x,
4061 pIn->y,
4062 pIn->slice,
4063 pIn->sample);
4064
4065 pOut->addr = (localOut.sliceSize * pIn->slice) +
4066 (blkIdx << blkSizeLog2) +
4067 (blkOffset ^ pipeBankXor);
4068 }
4069 else
4070 {
4071 ret = ADDR_INVALIDPARAMS;
4072 }
4073 }
4074 else
4075 {
4076 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4077 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4078 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4079
4080 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4081 {
4082 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4083 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
4084 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4085 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4086 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4087 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4088 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4089 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4090 const UINT_32 yb = pIn->y / localOut.blockHeight;
4091 const UINT_32 xb = pIn->x / localOut.blockWidth;
4092 const UINT_64 blkIdx = yb * pb + xb;
4093 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4094 x << elemLog2,
4095 y,
4096 z);
4097 pOut->addr = sliceSize * sliceId +
4098 mipInfo[pIn->mipId].macroBlockOffset +
4099 (blkIdx << blkSizeLog2) +
4100 (blkOffset ^ pipeBankXor);
4101 }
4102 else
4103 {
4104 ret = ADDR_INVALIDPARAMS;
4105 }
4106 }
4107 }
4108
4109 return ret;
4110 }
4111
4112 /**
4113 ************************************************************************************************************************
4114 * Gfx10Lib::HwlComputeMaxBaseAlignments
4115 *
4116 * @brief
4117 * Gets maximum alignments
4118 * @return
4119 * maximum alignments
4120 ************************************************************************************************************************
4121 */
HwlComputeMaxBaseAlignments() const4122 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4123 {
4124 return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4125 }
4126
4127 /**
4128 ************************************************************************************************************************
4129 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4130 *
4131 * @brief
4132 * Gets maximum alignments for metadata
4133 * @return
4134 * maximum alignments for metadata
4135 ************************************************************************************************************************
4136 */
HwlComputeMaxMetaBaseAlignments() const4137 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4138 {
4139 Dim3d metaBlk;
4140
4141 const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4142 {
4143 ADDR_SW_64KB_Z_X,
4144 m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4145 };
4146
4147 UINT_32 maxBaseAlignHtile = 0;
4148 UINT_32 maxBaseAlignCmask = 0;
4149
4150 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4151 {
4152 for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4153 {
4154 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4155 {
4156 // Max base alignment for Htile
4157 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4158 ADDR_RSRC_TEX_2D,
4159 ValidSwizzleModeForXmask[swIdx],
4160 bppLog2,
4161 numFragLog2,
4162 TRUE,
4163 &metaBlk);
4164
4165 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4166 }
4167 }
4168
4169 // Max base alignment for Cmask
4170 const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4171 ADDR_RSRC_TEX_2D,
4172 ValidSwizzleModeForXmask[swIdx],
4173 0,
4174 0,
4175 TRUE,
4176 &metaBlk);
4177
4178 maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4179 }
4180
4181 // Max base alignment for 2D Dcc
4182 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4183 {
4184 ADDR_SW_64KB_S_X,
4185 ADDR_SW_64KB_D_X,
4186 ADDR_SW_64KB_R_X,
4187 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4188 };
4189
4190 UINT_32 maxBaseAlignDcc2D = 0;
4191
4192 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4193 {
4194 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4195 {
4196 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4197 {
4198 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4199 ADDR_RSRC_TEX_2D,
4200 ValidSwizzleModeForDcc2D[swIdx],
4201 bppLog2,
4202 numFragLog2,
4203 TRUE,
4204 &metaBlk);
4205
4206 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4207 }
4208 }
4209 }
4210
4211 // Max base alignment for 3D Dcc
4212 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4213 {
4214 ADDR_SW_64KB_Z_X,
4215 ADDR_SW_64KB_S_X,
4216 ADDR_SW_64KB_D_X,
4217 ADDR_SW_64KB_R_X,
4218 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4219 };
4220
4221 UINT_32 maxBaseAlignDcc3D = 0;
4222
4223 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4224 {
4225 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4226 {
4227 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4228 ADDR_RSRC_TEX_3D,
4229 ValidSwizzleModeForDcc3D[swIdx],
4230 bppLog2,
4231 0,
4232 TRUE,
4233 &metaBlk);
4234
4235 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4236 }
4237 }
4238
4239 return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4240 }
4241
4242 /**
4243 ************************************************************************************************************************
4244 * Gfx10Lib::GetMetaElementSizeLog2
4245 *
4246 * @brief
4247 * Gets meta data element size log2
4248 * @return
4249 * Meta data element size log2
4250 ************************************************************************************************************************
4251 */
GetMetaElementSizeLog2(Gfx10DataType dataType)4252 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4253 Gfx10DataType dataType) ///< Data surface type
4254 {
4255 INT_32 elemSizeLog2 = 0;
4256
4257 if (dataType == Gfx10DataColor)
4258 {
4259 elemSizeLog2 = 0;
4260 }
4261 else if (dataType == Gfx10DataDepthStencil)
4262 {
4263 elemSizeLog2 = 2;
4264 }
4265 else
4266 {
4267 ADDR_ASSERT(dataType == Gfx10DataFmask);
4268 elemSizeLog2 = -1;
4269 }
4270
4271 return elemSizeLog2;
4272 }
4273
4274 /**
4275 ************************************************************************************************************************
4276 * Gfx10Lib::GetMetaCacheSizeLog2
4277 *
4278 * @brief
4279 * Gets meta data cache line size log2
4280 * @return
4281 * Meta data cache line size log2
4282 ************************************************************************************************************************
4283 */
GetMetaCacheSizeLog2(Gfx10DataType dataType)4284 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4285 Gfx10DataType dataType) ///< Data surface type
4286 {
4287 INT_32 cacheSizeLog2 = 0;
4288
4289 if (dataType == Gfx10DataColor)
4290 {
4291 cacheSizeLog2 = 6;
4292 }
4293 else if (dataType == Gfx10DataDepthStencil)
4294 {
4295 cacheSizeLog2 = 8;
4296 }
4297 else
4298 {
4299 ADDR_ASSERT(dataType == Gfx10DataFmask);
4300 cacheSizeLog2 = 8;
4301 }
4302 return cacheSizeLog2;
4303 }
4304
4305 /**
4306 ************************************************************************************************************************
4307 * Gfx10Lib::HwlComputeSurfaceInfoLinear
4308 *
4309 * @brief
4310 * Internal function to calculate alignment for linear surface
4311 *
4312 * @return
4313 * ADDR_E_RETURNCODE
4314 ************************************************************************************************************************
4315 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4316 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4317 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4318 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4319 ) const
4320 {
4321 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4322
4323 if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4324 {
4325 returnCode = ADDR_INVALIDPARAMS;
4326 }
4327 else
4328 {
4329 const UINT_32 elementBytes = pIn->bpp >> 3;
4330 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4331 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4332 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
4333 UINT_32 actualHeight = pIn->height;
4334 UINT_64 sliceSize = 0;
4335
4336 if (pIn->numMipLevels > 1)
4337 {
4338 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4339 {
4340 UINT_32 mipWidth, mipHeight;
4341
4342 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4343
4344 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4345
4346 if (pOut->pMipInfo != NULL)
4347 {
4348 pOut->pMipInfo[i].pitch = mipActualWidth;
4349 pOut->pMipInfo[i].height = mipHeight;
4350 pOut->pMipInfo[i].depth = mipDepth;
4351 pOut->pMipInfo[i].offset = sliceSize;
4352 pOut->pMipInfo[i].mipTailOffset = 0;
4353 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4354 }
4355
4356 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4357 }
4358 }
4359 else
4360 {
4361 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4362
4363 if (returnCode == ADDR_OK)
4364 {
4365 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4366
4367 if (pOut->pMipInfo != NULL)
4368 {
4369 pOut->pMipInfo[0].pitch = pitch;
4370 pOut->pMipInfo[0].height = actualHeight;
4371 pOut->pMipInfo[0].depth = mipDepth;
4372 pOut->pMipInfo[0].offset = 0;
4373 pOut->pMipInfo[0].mipTailOffset = 0;
4374 pOut->pMipInfo[0].macroBlockOffset = 0;
4375 }
4376 }
4377 }
4378
4379 if (returnCode == ADDR_OK)
4380 {
4381 pOut->pitch = pitch;
4382 pOut->height = actualHeight;
4383 pOut->numSlices = pIn->numSlices;
4384 pOut->sliceSize = sliceSize;
4385 pOut->surfSize = sliceSize * pOut->numSlices;
4386 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4387 pOut->blockWidth = pitchAlign;
4388 pOut->blockHeight = 1;
4389 pOut->blockSlices = 1;
4390
4391 // Following members are useless on GFX10
4392 pOut->mipChainPitch = 0;
4393 pOut->mipChainHeight = 0;
4394 pOut->mipChainSlice = 0;
4395 pOut->epitchIsHeight = FALSE;
4396
4397 // Post calculation validate
4398 ADDR_ASSERT(pOut->sliceSize > 0);
4399 }
4400 }
4401
4402 return returnCode;
4403 }
4404
4405 } // V2
4406 } // Addr
4407