1 /*
2 ************************************************************************************************************************
3 *
4 * Copyright (C) 2007-2024 Advanced Micro Devices, Inc. All rights reserved.
5 * SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8
9 /**
10 ************************************************************************************************************************
11 * @file gfx10addrlib.cpp
12 * @brief Contain the implementation for the Gfx10Lib class.
13 ************************************************************************************************************************
14 */
15
16 #include "gfx10addrlib.h"
17 #include "addrcommon.h"
18 #include "addrswizzler.h"
19 #include "gfx10_gb_reg.h"
20
21 #include "amdgpu_asic_addr.h"
22
23 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
24 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
25
26 namespace Addr
27 {
28 /**
29 ************************************************************************************************************************
30 * Gfx10HwlInit
31 *
32 * @brief
33 * Creates an Gfx10Lib object.
34 *
35 * @return
36 * Returns an Gfx10Lib object pointer.
37 ************************************************************************************************************************
38 */
Gfx10HwlInit(const Client * pClient)39 Addr::Lib* Gfx10HwlInit(const Client* pClient)
40 {
41 return V2::Gfx10Lib::CreateObj(pClient);
42 }
43
44 namespace V2
45 {
46
47 ////////////////////////////////////////////////////////////////////////////////////////////////////
48 // Static Const Member
49 ////////////////////////////////////////////////////////////////////////////////////////////////////
50
51 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
52 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
53 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR
54 {{0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_S
55 {{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_D
56 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
57
58 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
59 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_S
60 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_D
61 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
62
63 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
64 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_S
65 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_D
66 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
67
68 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
69 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
70 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
71 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
72
73 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
74 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_S_T
75 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_D_T
76 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
77
78 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
79 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_S_X
80 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_D_X
81 {{0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_4KB_R_X
82
83 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_Z_X
84 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_S_X
85 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_D_X
86 {{0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_64KB_R_X
87
88 {{0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_VAR_Z_X
89 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
90 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
91 {{0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_VAR_R_X
92 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR_GENERAL
93 };
94
95 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
96
97 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
98 const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
99
100 /**
101 ************************************************************************************************************************
102 * Gfx10Lib::Gfx10Lib
103 *
104 * @brief
105 * Constructor
106 *
107 ************************************************************************************************************************
108 */
Gfx10Lib(const Client * pClient)109 Gfx10Lib::Gfx10Lib(const Client* pClient)
110 :
111 Lib(pClient),
112 m_numPkrLog2(0),
113 m_numSaLog2(0),
114 m_colorBaseIndex(0),
115 m_xmaskBaseIndex(0),
116 m_htileBaseIndex(0),
117 m_dccBaseIndex(0)
118 {
119 memset(&m_settings, 0, sizeof(m_settings));
120 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
121 }
122
123 /**
124 ************************************************************************************************************************
125 * Gfx10Lib::~Gfx10Lib
126 *
127 * @brief
128 * Destructor
129 ************************************************************************************************************************
130 */
~Gfx10Lib()131 Gfx10Lib::~Gfx10Lib()
132 {
133 }
134
135 /**
136 ************************************************************************************************************************
137 * Gfx10Lib::HwlComputeHtileInfo
138 *
139 * @brief
140 * Interface function stub of AddrComputeHtilenfo
141 *
142 * @return
143 * ADDR_E_RETURNCODE
144 ************************************************************************************************************************
145 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const146 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
147 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
148 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
149 ) const
150 {
151 ADDR_E_RETURNCODE ret = ADDR_OK;
152
153 if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
154 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
155 (pIn->hTileFlags.pipeAligned != TRUE))
156 {
157 ret = ADDR_INVALIDPARAMS;
158 }
159 else
160 {
161 Dim3d metaBlk = {};
162 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
163 ADDR_RSRC_TEX_2D,
164 pIn->swizzleMode,
165 0,
166 0,
167 TRUE,
168 &metaBlk);
169
170 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
171 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
172 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
173 pOut->metaBlkWidth = metaBlk.w;
174 pOut->metaBlkHeight = metaBlk.h;
175
176 if (pIn->numMipLevels > 1)
177 {
178 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
179
180 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
181
182 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
183 {
184 UINT_32 mipWidth, mipHeight;
185
186 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
187
188 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
189 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
190
191 const UINT_32 pitchInM = mipWidth / metaBlk.w;
192 const UINT_32 heightInM = mipHeight / metaBlk.h;
193 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
194
195 if (pOut->pMipInfo != NULL)
196 {
197 pOut->pMipInfo[i].inMiptail = FALSE;
198 pOut->pMipInfo[i].offset = offset;
199 pOut->pMipInfo[i].sliceSize = mipSliceSize;
200 }
201
202 offset += mipSliceSize;
203 }
204
205 pOut->sliceSize = offset;
206 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
207 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
208
209 if (pOut->pMipInfo != NULL)
210 {
211 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
212 {
213 pOut->pMipInfo[i].inMiptail = TRUE;
214 pOut->pMipInfo[i].offset = 0;
215 pOut->pMipInfo[i].sliceSize = 0;
216 }
217
218 if (pIn->firstMipIdInTail != pIn->numMipLevels)
219 {
220 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
221 }
222 }
223 }
224 else
225 {
226 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
227 const UINT_32 heightInM = pOut->height / metaBlk.h;
228
229 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
230 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
231 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
232
233 if (pOut->pMipInfo != NULL)
234 {
235 pOut->pMipInfo[0].inMiptail = FALSE;
236 pOut->pMipInfo[0].offset = 0;
237 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
238 }
239 }
240
241 // Get the HTILE address equation (copied from HtileAddrFromCoord).
242 // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
243 const UINT_32 index = m_xmaskBaseIndex;
244 const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
245
246 ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
247 pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];
248 }
249
250 return ret;
251 }
252
253 /**
254 ************************************************************************************************************************
255 * Gfx10Lib::HwlComputeCmaskInfo
256 *
257 * @brief
258 * Interface function stub of AddrComputeCmaskInfo
259 *
260 * @return
261 * ADDR_E_RETURNCODE
262 ************************************************************************************************************************
263 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const264 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
265 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
266 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
267 ) const
268 {
269 ADDR_E_RETURNCODE ret = ADDR_OK;
270
271 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
272 (pIn->cMaskFlags.pipeAligned != TRUE) ||
273 ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
274 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
275 {
276 ret = ADDR_INVALIDPARAMS;
277 }
278 else
279 {
280 Dim3d metaBlk = {};
281 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
282 ADDR_RSRC_TEX_2D,
283 pIn->swizzleMode,
284 0,
285 0,
286 TRUE,
287 &metaBlk);
288
289 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
290 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
291 pOut->baseAlign = metaBlkSize;
292 pOut->metaBlkWidth = metaBlk.w;
293 pOut->metaBlkHeight = metaBlk.h;
294
295 if (pIn->numMipLevels > 1)
296 {
297 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
298
299 UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
300
301 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
302 {
303 UINT_32 mipWidth, mipHeight;
304
305 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
306
307 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
308 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
309
310 const UINT_32 pitchInM = mipWidth / metaBlk.w;
311 const UINT_32 heightInM = mipHeight / metaBlk.h;
312
313 if (pOut->pMipInfo != NULL)
314 {
315 pOut->pMipInfo[i].inMiptail = FALSE;
316 pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;
317 pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
318 }
319
320 metaBlkPerSlice += pitchInM * heightInM;
321 }
322
323 pOut->metaBlkNumPerSlice = metaBlkPerSlice;
324
325 if (pOut->pMipInfo != NULL)
326 {
327 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
328 {
329 pOut->pMipInfo[i].inMiptail = TRUE;
330 pOut->pMipInfo[i].offset = 0;
331 pOut->pMipInfo[i].sliceSize = 0;
332 }
333
334 if (pIn->firstMipIdInTail != pIn->numMipLevels)
335 {
336 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
337 }
338 }
339 }
340 else
341 {
342 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
343 const UINT_32 heightInM = pOut->height / metaBlk.h;
344
345 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
346
347 if (pOut->pMipInfo != NULL)
348 {
349 pOut->pMipInfo[0].inMiptail = FALSE;
350 pOut->pMipInfo[0].offset = 0;
351 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
352 }
353 }
354
355 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
356 pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
357
358 // Get the CMASK address equation (copied from CmaskAddrFromCoord)
359 const UINT_32 fmaskBpp = GetFmaskBpp(1, 1);
360 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
361 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
362 const UINT_8* patIdxTable =
363 (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
364 (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
365
366 ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
367 pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]];
368 }
369
370 return ret;
371 }
372
373 /**
374 ************************************************************************************************************************
375 * Gfx10Lib::HwlComputeDccInfo
376 *
377 * @brief
378 * Interface function to compute DCC key info
379 *
380 * @return
381 * ADDR_E_RETURNCODE
382 ************************************************************************************************************************
383 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const384 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
385 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
386 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
387 ) const
388 {
389 ADDR_E_RETURNCODE ret = ADDR_OK;
390
391 if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
392 {
393 // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
394 // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
395 ret = ADDR_INVALIDPARAMS;
396 }
397 else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
398 {
399 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
400 ret = ADDR_INVALIDPARAMS;
401 }
402 else
403 {
404 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
405 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
406 ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
407
408 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
409
410 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
411 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
412 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
413
414 if (ret == ADDR_OK)
415 {
416 Dim3d metaBlk = {};
417 const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
418 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
419 pIn->resourceType,
420 pIn->swizzleMode,
421 elemLog2,
422 numFragLog2,
423 pIn->dccKeyFlags.pipeAligned,
424 &metaBlk);
425
426 pOut->dccRamBaseAlign = metaBlkSize;
427 pOut->metaBlkWidth = metaBlk.w;
428 pOut->metaBlkHeight = metaBlk.h;
429 pOut->metaBlkDepth = metaBlk.d;
430 pOut->metaBlkSize = metaBlkSize;
431
432 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
433 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
434 pOut->depth = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
435
436 if (pIn->numMipLevels > 1)
437 {
438 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
439
440 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
441
442 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
443 {
444 UINT_32 mipWidth, mipHeight;
445
446 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
447
448 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
449 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
450
451 const UINT_32 pitchInM = mipWidth / metaBlk.w;
452 const UINT_32 heightInM = mipHeight / metaBlk.h;
453 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
454
455 if (pOut->pMipInfo != NULL)
456 {
457 pOut->pMipInfo[i].inMiptail = FALSE;
458 pOut->pMipInfo[i].offset = offset;
459 pOut->pMipInfo[i].sliceSize = mipSliceSize;
460 }
461
462 offset += mipSliceSize;
463 }
464
465 pOut->dccRamSliceSize = offset;
466 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
467 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
468
469 if (pOut->pMipInfo != NULL)
470 {
471 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
472 {
473 pOut->pMipInfo[i].inMiptail = TRUE;
474 pOut->pMipInfo[i].offset = 0;
475 pOut->pMipInfo[i].sliceSize = 0;
476 }
477
478 if (pIn->firstMipIdInTail != pIn->numMipLevels)
479 {
480 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
481 }
482 }
483 }
484 else
485 {
486 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
487 const UINT_32 heightInM = pOut->height / metaBlk.h;
488
489 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
490 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
491 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
492
493 if (pOut->pMipInfo != NULL)
494 {
495 pOut->pMipInfo[0].inMiptail = FALSE;
496 pOut->pMipInfo[0].offset = 0;
497 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
498 }
499 }
500
501 // Get the DCC address equation (copied from DccAddrFromCoord)
502 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
503 const UINT_32 numPipeLog2 = m_pipesLog2;
504 UINT_32 index = m_dccBaseIndex + elemLog2;
505 const UINT_8* patIdxTable;
506
507 if (m_settings.supportRbPlus)
508 {
509 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
510
511 if (pIn->dccKeyFlags.pipeAligned)
512 {
513 index += MaxNumOfBpp;
514
515 if (m_numPkrLog2 < 2)
516 {
517 index += m_pipesLog2 * MaxNumOfBpp;
518 }
519 else
520 {
521 // 4 groups for "m_numPkrLog2 < 2" case
522 index += 4 * MaxNumOfBpp;
523
524 const UINT_32 dccPipePerPkr = 3;
525
526 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
527 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
528 }
529 }
530 }
531 else
532 {
533 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
534
535 if (pIn->dccKeyFlags.pipeAligned)
536 {
537 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
538 }
539 else
540 {
541 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
542 }
543 }
544
545 ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
546 pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];
547 }
548 }
549
550 return ret;
551 }
552
553 /**
554 ************************************************************************************************************************
555 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
556 *
557 * @brief
558 * Interface function stub of AddrComputeCmaskAddrFromCoord
559 *
560 * @return
561 * ADDR_E_RETURNCODE
562 ************************************************************************************************************************
563 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)564 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
565 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
566 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
567 {
568 // Only support pipe aligned CMask
569 ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
570
571 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
572 input.size = sizeof(input);
573 input.cMaskFlags = pIn->cMaskFlags;
574 input.colorFlags = pIn->colorFlags;
575 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
576 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
577 input.numSlices = Max(pIn->numSlices, 1u);
578 input.swizzleMode = pIn->swizzleMode;
579 input.resourceType = pIn->resourceType;
580
581 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
582 output.size = sizeof(output);
583
584 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
585
586 if (returnCode == ADDR_OK)
587 {
588 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
589 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
590 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
591 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
592 const UINT_8* patIdxTable =
593 (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
594 (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
595
596 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
597 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
598 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
599 blkSizeLog2 + 1, // +1 for nibble offset
600 pIn->x,
601 pIn->y,
602 pIn->slice,
603 0);
604 const UINT_32 xb = pIn->x / output.metaBlkWidth;
605 const UINT_32 yb = pIn->y / output.metaBlkHeight;
606 const UINT_32 pb = output.pitch / output.metaBlkWidth;
607 const UINT_32 blkIndex = (yb * pb) + xb;
608 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
609
610 pOut->addr = (output.sliceSize * pIn->slice) +
611 (blkIndex * (1 << blkSizeLog2)) +
612 ((blkOffset >> 1) ^ pipeXor);
613 pOut->bitPosition = (blkOffset & 1) << 2;
614 }
615
616 return returnCode;
617 }
618
619 /**
620 ************************************************************************************************************************
621 * Gfx10Lib::HwlComputeHtileAddrFromCoord
622 *
623 * @brief
624 * Interface function stub of AddrComputeHtileAddrFromCoord
625 *
626 * @return
627 * ADDR_E_RETURNCODE
628 ************************************************************************************************************************
629 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)630 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
631 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
632 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
633 {
634 ADDR_E_RETURNCODE returnCode = ADDR_OK;
635
636 if (pIn->numMipLevels > 1)
637 {
638 returnCode = ADDR_NOTIMPLEMENTED;
639 }
640 else
641 {
642 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
643 input.size = sizeof(input);
644 input.hTileFlags = pIn->hTileFlags;
645 input.depthFlags = pIn->depthflags;
646 input.swizzleMode = pIn->swizzleMode;
647 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
648 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
649 input.numSlices = Max(pIn->numSlices, 1u);
650 input.numMipLevels = 1;
651
652 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
653 output.size = sizeof(output);
654
655 returnCode = ComputeHtileInfo(&input, &output);
656
657 if (returnCode == ADDR_OK)
658 {
659 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
660 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
661 const UINT_32 index = m_htileBaseIndex + numSampleLog2;
662 const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
663
664 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
665 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
666 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
667 blkSizeLog2 + 1, // +1 for nibble offset
668 pIn->x,
669 pIn->y,
670 pIn->slice,
671 0);
672 const UINT_32 xb = pIn->x / output.metaBlkWidth;
673 const UINT_32 yb = pIn->y / output.metaBlkHeight;
674 const UINT_32 pb = output.pitch / output.metaBlkWidth;
675 const UINT_32 blkIndex = (yb * pb) + xb;
676 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
677
678 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
679 (blkIndex * (1 << blkSizeLog2)) +
680 ((blkOffset >> 1) ^ pipeXor);
681 }
682 }
683
684 return returnCode;
685 }
686
687 /**
688 ************************************************************************************************************************
689 * Gfx10Lib::HwlComputeHtileCoordFromAddr
690 *
691 * @brief
692 * Interface function stub of AddrComputeHtileCoordFromAddr
693 *
694 * @return
695 * ADDR_E_RETURNCODE
696 ************************************************************************************************************************
697 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)698 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
699 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
700 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
701 {
702 ADDR_NOT_IMPLEMENTED();
703
704 return ADDR_OK;
705 }
706
707 /**
708 ************************************************************************************************************************
709 * Gfx10Lib::HwlSupportComputeDccAddrFromCoord
710 *
711 * @brief
712 * Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
713 *
714 * @return
715 * ADDR_E_RETURNCODE
716 ************************************************************************************************************************
717 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)718 ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(
719 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
720 {
721 ADDR_E_RETURNCODE returnCode = ADDR_OK;
722
723 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
724 (pIn->swizzleMode != ADDR_SW_64KB_R_X) ||
725 (pIn->dccKeyFlags.linear == TRUE) ||
726 (pIn->numFrags > 1) ||
727 (pIn->numMipLevels > 1) ||
728 (pIn->mipId > 0))
729 {
730 returnCode = ADDR_NOTSUPPORTED;
731 }
732 else if ((pIn->pitch == 0) ||
733 (pIn->metaBlkWidth == 0) ||
734 (pIn->metaBlkHeight == 0) ||
735 (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
736 {
737 returnCode = ADDR_NOTSUPPORTED;
738 }
739
740 return returnCode;
741 }
742
743 /**
744 ************************************************************************************************************************
745 * Gfx10Lib::HwlComputeDccAddrFromCoord
746 *
747 * @brief
748 * Interface function stub of AddrComputeDccAddrFromCoord
749 *
750 * @return
751 * N/A
752 ************************************************************************************************************************
753 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)754 VOID Gfx10Lib::HwlComputeDccAddrFromCoord(
755 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
756 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
757 {
758 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
759 const UINT_32 numPipeLog2 = m_pipesLog2;
760 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
761 UINT_32 index = m_dccBaseIndex + elemLog2;
762 const UINT_8* patIdxTable;
763
764 if (m_settings.supportRbPlus)
765 {
766 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
767
768 if (pIn->dccKeyFlags.pipeAligned)
769 {
770 index += MaxNumOfBpp;
771
772 if (m_numPkrLog2 < 2)
773 {
774 index += m_pipesLog2 * MaxNumOfBpp;
775 }
776 else
777 {
778 // 4 groups for "m_numPkrLog2 < 2" case
779 index += 4 * MaxNumOfBpp;
780
781 const UINT_32 dccPipePerPkr = 3;
782
783 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
784 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
785 }
786 }
787 }
788 else
789 {
790 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
791
792 if (pIn->dccKeyFlags.pipeAligned)
793 {
794 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
795 }
796 else
797 {
798 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
799 }
800 }
801
802 const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
803 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
804 const UINT_32 blkOffset =
805 ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
806 blkSizeLog2 + 1, // +1 for nibble offset
807 pIn->x,
808 pIn->y,
809 pIn->slice,
810 0);
811 const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
812 const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
813 const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
814 const UINT_32 blkIndex = (yb * pb) + xb;
815 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
816
817 pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
818 (blkIndex * (1 << blkSizeLog2)) +
819 ((blkOffset >> 1) ^ pipeXor);
820 }
821
822 /**
823 ************************************************************************************************************************
824 * Gfx10Lib::HwlInitGlobalParams
825 *
826 * @brief
827 * Initializes global parameters
828 *
829 * @return
830 * TRUE if all settings are valid
831 *
832 ************************************************************************************************************************
833 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)834 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
835 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
836 {
837 BOOL_32 valid = TRUE;
838 GB_ADDR_CONFIG_GFX10 gbAddrConfig;
839
840 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
841
842 // These values are copied from CModel code
843 switch (gbAddrConfig.bits.NUM_PIPES)
844 {
845 case ADDR_CONFIG_1_PIPE:
846 m_pipes = 1;
847 m_pipesLog2 = 0;
848 break;
849 case ADDR_CONFIG_2_PIPE:
850 m_pipes = 2;
851 m_pipesLog2 = 1;
852 break;
853 case ADDR_CONFIG_4_PIPE:
854 m_pipes = 4;
855 m_pipesLog2 = 2;
856 break;
857 case ADDR_CONFIG_8_PIPE:
858 m_pipes = 8;
859 m_pipesLog2 = 3;
860 break;
861 case ADDR_CONFIG_16_PIPE:
862 m_pipes = 16;
863 m_pipesLog2 = 4;
864 break;
865 case ADDR_CONFIG_32_PIPE:
866 m_pipes = 32;
867 m_pipesLog2 = 5;
868 break;
869 case ADDR_CONFIG_64_PIPE:
870 m_pipes = 64;
871 m_pipesLog2 = 6;
872 break;
873 default:
874 ADDR_ASSERT_ALWAYS();
875 valid = FALSE;
876 break;
877 }
878
879 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
880 {
881 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
882 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
883 m_pipeInterleaveLog2 = 8;
884 break;
885 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
886 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
887 m_pipeInterleaveLog2 = 9;
888 break;
889 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
890 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
891 m_pipeInterleaveLog2 = 10;
892 break;
893 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
894 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
895 m_pipeInterleaveLog2 = 11;
896 break;
897 default:
898 ADDR_ASSERT_ALWAYS();
899 valid = FALSE;
900 break;
901 }
902
903 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
904 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
905 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
906 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
907
908 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
909 {
910 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
911 m_maxCompFrag = 1;
912 m_maxCompFragLog2 = 0;
913 break;
914 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
915 m_maxCompFrag = 2;
916 m_maxCompFragLog2 = 1;
917 break;
918 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
919 m_maxCompFrag = 4;
920 m_maxCompFragLog2 = 2;
921 break;
922 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
923 m_maxCompFrag = 8;
924 m_maxCompFragLog2 = 3;
925 break;
926 default:
927 ADDR_ASSERT_ALWAYS();
928 valid = FALSE;
929 break;
930 }
931
932 // Skip unaligned case
933 m_xmaskBaseIndex += MaxNumOfBppCMask;
934 m_htileBaseIndex += MaxNumOfAA;
935
936 m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfBppCMask;
937 m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
938 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
939
940 if (m_settings.supportRbPlus)
941 {
942 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
943 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
944
945 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
946
947 ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
948 sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
949
950 if (m_numPkrLog2 >= 2)
951 {
952 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
953 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfBppCMask;
954 m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
955 }
956 }
957 else
958 {
959 const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
960 static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +
961 1;
962
963 ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
964 ADDR_C_ASSERT(sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]) ==
965 (numPipeType + 1) * MaxNumOfBppCMask);
966 }
967
968 if (m_settings.supportRbPlus)
969 {
970 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
971 // corresponding SW_64KB_* mode
972 m_blockVarSizeLog2 = m_pipesLog2 + 14;
973 }
974
975 if (valid)
976 {
977 InitEquationTable();
978 }
979
980 return valid;
981 }
982
983 /**
984 ************************************************************************************************************************
985 * Gfx10Lib::HwlConvertChipFamily
986 *
987 * @brief
988 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
989 * @return
990 * ChipFamily
991 ************************************************************************************************************************
992 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)993 ChipFamily Gfx10Lib::HwlConvertChipFamily(
994 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
995 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
996 {
997 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
998
999 m_settings.dccUnsup3DSwDis = 1;
1000 m_settings.dsMipmapHtileFix = 1;
1001
1002 switch (chipFamily)
1003 {
1004 case FAMILY_NV:
1005 if (ASICREV_IS_NAVI10_P(chipRevision))
1006 {
1007 m_settings.dsMipmapHtileFix = 0;
1008 m_settings.isDcn20 = 1;
1009 }
1010
1011 if (ASICREV_IS_NAVI12_P(chipRevision))
1012 {
1013 m_settings.isDcn20 = 1;
1014 }
1015
1016 if (ASICREV_IS_NAVI14_M(chipRevision))
1017 {
1018 m_settings.isDcn20 = 1;
1019 }
1020
1021 if (ASICREV_IS_NAVI21_M(chipRevision))
1022 {
1023 m_settings.supportRbPlus = 1;
1024 m_settings.dccUnsup3DSwDis = 0;
1025 }
1026
1027 if (ASICREV_IS_NAVI22_P(chipRevision))
1028 {
1029 m_settings.supportRbPlus = 1;
1030 m_settings.dccUnsup3DSwDis = 0;
1031 }
1032
1033 if (ASICREV_IS_NAVI23_P(chipRevision))
1034 {
1035 m_settings.supportRbPlus = 1;
1036 m_settings.dccUnsup3DSwDis = 0;
1037 }
1038
1039 if (ASICREV_IS_NAVI24_P(chipRevision))
1040 {
1041 m_settings.supportRbPlus = 1;
1042 m_settings.dccUnsup3DSwDis = 0;
1043 }
1044 break;
1045
1046 case FAMILY_VGH:
1047 if (ASICREV_IS_VANGOGH(chipRevision))
1048 {
1049 m_settings.supportRbPlus = 1;
1050 m_settings.dccUnsup3DSwDis = 0;
1051 }
1052 else
1053 {
1054 ADDR_ASSERT(!"Unknown chip revision");
1055 }
1056
1057 break;
1058 case FAMILY_RMB:
1059 if (ASICREV_IS_REMBRANDT(chipRevision))
1060 {
1061 m_settings.supportRbPlus = 1;
1062 m_settings.dccUnsup3DSwDis = 0;
1063 }
1064 else
1065 {
1066 ADDR_ASSERT(!"Unknown chip revision");
1067 }
1068
1069 break;
1070 case FAMILY_RPL:
1071 if (ASICREV_IS_RAPHAEL(chipRevision))
1072 {
1073 m_settings.supportRbPlus = 1;
1074 m_settings.dccUnsup3DSwDis = 0;
1075 }
1076 break;
1077 case FAMILY_MDN:
1078 if (ASICREV_IS_MENDOCINO(chipRevision))
1079 {
1080 m_settings.supportRbPlus = 1;
1081 m_settings.dccUnsup3DSwDis = 0;
1082 }
1083 else
1084 {
1085 ADDR_ASSERT(!"Unknown chip revision");
1086 }
1087 break;
1088 default:
1089 ADDR_ASSERT(!"Unknown chip family");
1090 break;
1091 }
1092
1093 m_configFlags.use32bppFor422Fmt = TRUE;
1094
1095 return family;
1096 }
1097
1098 /**
1099 ************************************************************************************************************************
1100 * Gfx10Lib::GetBlk256SizeLog2
1101 *
1102 * @brief
1103 * Get block 256 size
1104 *
1105 * @return
1106 * N/A
1107 ************************************************************************************************************************
1108 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1109 void Gfx10Lib::GetBlk256SizeLog2(
1110 AddrResourceType resourceType, ///< [in] Resource type
1111 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1112 UINT_32 elemLog2, ///< [in] element size log2
1113 UINT_32 numSamplesLog2, ///< [in] number of samples
1114 Dim3d* pBlock ///< [out] block size
1115 ) const
1116 {
1117 if (IsThin(resourceType, swizzleMode))
1118 {
1119 UINT_32 blockBits = 8 - elemLog2;
1120
1121 if (IsZOrderSwizzle(swizzleMode))
1122 {
1123 blockBits -= numSamplesLog2;
1124 }
1125
1126 pBlock->w = (blockBits >> 1) + (blockBits & 1);
1127 pBlock->h = (blockBits >> 1);
1128 pBlock->d = 0;
1129 }
1130 else
1131 {
1132 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1133
1134 UINT_32 blockBits = 8 - elemLog2;
1135
1136 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1137 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1138 pBlock->h = (blockBits / 3);
1139 }
1140 }
1141
1142 /**
1143 ************************************************************************************************************************
1144 * Gfx10Lib::GetCompressedBlockSizeLog2
1145 *
1146 * @brief
1147 * Get compress block size
1148 *
1149 * @return
1150 * N/A
1151 ************************************************************************************************************************
1152 */
GetCompressedBlockSizeLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1153 void Gfx10Lib::GetCompressedBlockSizeLog2(
1154 Gfx10DataType dataType, ///< [in] Data type
1155 AddrResourceType resourceType, ///< [in] Resource type
1156 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1157 UINT_32 elemLog2, ///< [in] element size log2
1158 UINT_32 numSamplesLog2, ///< [in] number of samples
1159 Dim3d* pBlock ///< [out] block size
1160 ) const
1161 {
1162 if (dataType == Gfx10DataColor)
1163 {
1164 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1165 }
1166 else
1167 {
1168 ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1169 pBlock->w = 3;
1170 pBlock->h = 3;
1171 pBlock->d = 0;
1172 }
1173 }
1174
1175 /**
1176 ************************************************************************************************************************
1177 * Gfx10Lib::GetMetaOverlapLog2
1178 *
1179 * @brief
1180 * Get meta block overlap
1181 *
1182 * @return
1183 * N/A
1184 ************************************************************************************************************************
1185 */
GetMetaOverlapLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const1186 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1187 Gfx10DataType dataType, ///< [in] Data type
1188 AddrResourceType resourceType, ///< [in] Resource type
1189 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1190 UINT_32 elemLog2, ///< [in] element size log2
1191 UINT_32 numSamplesLog2 ///< [in] number of samples
1192 ) const
1193 {
1194 Dim3d compBlock;
1195 Dim3d microBlock;
1196
1197 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1198 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, µBlock);
1199
1200 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
1201 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1202 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
1203 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
1204 INT_32 overlap = numPipesLog2 - maxSizeLog2;
1205
1206 if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1207 {
1208 overlap++;
1209 }
1210
1211 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1212 if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1213 {
1214 overlap--;
1215 }
1216 overlap = Max(overlap, 0);
1217 return overlap;
1218 }
1219
1220 /**
1221 ************************************************************************************************************************
1222 * Gfx10Lib::Get3DMetaOverlapLog2
1223 *
1224 * @brief
1225 * Get 3d meta block overlap
1226 *
1227 * @return
1228 * N/A
1229 ************************************************************************************************************************
1230 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const1231 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1232 AddrResourceType resourceType, ///< [in] Resource type
1233 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1234 UINT_32 elemLog2 ///< [in] element size log2
1235 ) const
1236 {
1237 Dim3d microBlock;
1238 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, µBlock);
1239
1240 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1241
1242 if (m_settings.supportRbPlus)
1243 {
1244 overlap++;
1245 }
1246
1247 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1248 {
1249 overlap = 0;
1250 }
1251 return overlap;
1252 }
1253
1254 /**
1255 ************************************************************************************************************************
1256 * Gfx10Lib::GetPipeRotateAmount
1257 *
1258 * @brief
1259 * Get pipe rotate amount
1260 *
1261 * @return
1262 * Pipe rotate amount
1263 ************************************************************************************************************************
1264 */
1265
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const1266 INT_32 Gfx10Lib::GetPipeRotateAmount(
1267 AddrResourceType resourceType, ///< [in] Resource type
1268 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
1269 ) const
1270 {
1271 INT_32 amount = 0;
1272
1273 if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1274 {
1275 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1276 1 : m_pipesLog2 - (m_numSaLog2 + 1);
1277 }
1278
1279 return amount;
1280 }
1281
1282 /**
1283 ************************************************************************************************************************
1284 * Gfx10Lib::GetMetaBlkSize
1285 *
1286 * @brief
1287 * Get metadata block size
1288 *
1289 * @return
1290 * Meta block size
1291 ************************************************************************************************************************
1292 */
GetMetaBlkSize(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const1293 UINT_32 Gfx10Lib::GetMetaBlkSize(
1294 Gfx10DataType dataType, ///< [in] Data type
1295 AddrResourceType resourceType, ///< [in] Resource type
1296 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1297 UINT_32 elemLog2, ///< [in] element size log2
1298 UINT_32 numSamplesLog2, ///< [in] number of samples
1299 BOOL_32 pipeAlign, ///< [in] pipe align
1300 Dim3d* pBlock ///< [out] block size
1301 ) const
1302 {
1303 INT_32 metablkSizeLog2;
1304
1305 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
1306 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
1307 const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1308 const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1309 numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1310 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
1311 INT_32 numPipesLog2 = m_pipesLog2;
1312
1313 if (IsThin(resourceType, swizzleMode))
1314 {
1315 if ((pipeAlign == FALSE) ||
1316 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1317 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
1318 {
1319 if (pipeAlign)
1320 {
1321 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1322 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1323 }
1324 else
1325 {
1326 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1327 }
1328 }
1329 else
1330 {
1331 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1332 {
1333 numPipesLog2++;
1334 }
1335
1336 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1337
1338 if (numPipesLog2 >= 4)
1339 {
1340 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1341
1342 // In 16Bpe 8xaa, we have an extra overlap bit
1343 if ((pipeRotateLog2 > 0) &&
1344 (elemLog2 == 4) &&
1345 (numSamplesLog2 == 3) &&
1346 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1347 {
1348 overlapLog2++;
1349 }
1350
1351 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1352 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1353
1354 if (m_settings.supportRbPlus &&
1355 IsRtOptSwizzle(swizzleMode) &&
1356 (numPipesLog2 == 6) &&
1357 (numSamplesLog2 == 3) &&
1358 (m_maxCompFragLog2 == 3) &&
1359 (metablkSizeLog2 < 15))
1360 {
1361 metablkSizeLog2 = 15;
1362 }
1363 }
1364 else
1365 {
1366 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1367 }
1368
1369 if (dataType == Gfx10DataDepthStencil)
1370 {
1371 // For htile surfaces, pad meta block size to 2K * num_pipes
1372 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1373 }
1374
1375 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1376
1377 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1378 {
1379 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1380
1381 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1382 }
1383 }
1384
1385 const INT_32 metablkBitsLog2 =
1386 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1387 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1388 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1389 pBlock->d = 1;
1390 }
1391 else
1392 {
1393 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1394
1395 if (pipeAlign)
1396 {
1397 if (m_settings.supportRbPlus &&
1398 (m_pipesLog2 == m_numSaLog2 + 1) &&
1399 (m_pipesLog2 > 1) &&
1400 IsRbAligned(resourceType, swizzleMode))
1401 {
1402 numPipesLog2++;
1403 }
1404
1405 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1406
1407 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1408 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1409 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1410 }
1411 else
1412 {
1413 metablkSizeLog2 = 12;
1414 }
1415
1416 const INT_32 metablkBitsLog2 =
1417 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1418 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1419 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1420 pBlock->d = 1 << (metablkBitsLog2 / 3);
1421 }
1422
1423 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1424 }
1425
1426 /**
1427 ************************************************************************************************************************
1428 * Gfx10Lib::ConvertSwizzlePatternToEquation
1429 *
1430 * @brief
1431 * Convert swizzle pattern to equation.
1432 *
1433 * @return
1434 * N/A
1435 ************************************************************************************************************************
1436 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1437 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1438 UINT_32 elemLog2, ///< [in] element bytes log2
1439 AddrResourceType rsrcType, ///< [in] resource type
1440 AddrSwizzleMode swMode, ///< [in] swizzle mode
1441 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor
1442 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1443 const
1444 {
1445 // Get full swizzle pattern and store it as an ADDR_BIT_SETTING list
1446 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
1447 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1448
1449 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
1450 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1451 memset(pEquation, 0, sizeof(ADDR_EQUATION));
1452 pEquation->numBits = blockSizeLog2;
1453 pEquation->numBitComponents = pPatInfo->maxItemCount;
1454 pEquation->stackedDepthSlices = FALSE;
1455
1456 for (UINT_32 i = 0; i < elemLog2; i++)
1457 {
1458 pEquation->addr[i].channel = 0;
1459 pEquation->addr[i].valid = 1;
1460 pEquation->addr[i].index = i;
1461 }
1462
1463 if (IsXor(swMode) == FALSE)
1464 {
1465 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1466 {
1467 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1468
1469 if (pSwizzle[i].x != 0)
1470 {
1471 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1472
1473 pEquation->addr[i].channel = 0;
1474 pEquation->addr[i].valid = 1;
1475 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1476 }
1477 else if (pSwizzle[i].y != 0)
1478 {
1479 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1480
1481 pEquation->addr[i].channel = 1;
1482 pEquation->addr[i].valid = 1;
1483 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1484 }
1485 else
1486 {
1487 ADDR_ASSERT(pSwizzle[i].z != 0);
1488 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1489
1490 pEquation->addr[i].channel = 2;
1491 pEquation->addr[i].valid = 1;
1492 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1493 }
1494
1495 pEquation->xor1[i].value = 0;
1496 pEquation->xor2[i].value = 0;
1497 }
1498 }
1499 else if (IsThin(rsrcType, swMode))
1500 {
1501 Dim3d dim;
1502 ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1503
1504 const UINT_32 blkXLog2 = Log2(dim.w);
1505 const UINT_32 blkYLog2 = Log2(dim.h);
1506 const UINT_32 blkXMask = dim.w - 1;
1507 const UINT_32 blkYMask = dim.h - 1;
1508
1509 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1510 UINT_32 xMask = 0;
1511 UINT_32 yMask = 0;
1512 UINT_32 bMask = (1 << elemLog2) - 1;
1513
1514 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1515 {
1516 if (IsPow2(pSwizzle[i].value))
1517 {
1518 if (pSwizzle[i].x != 0)
1519 {
1520 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1521 xMask |= pSwizzle[i].x;
1522
1523 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1524
1525 ADDR_ASSERT(xLog2 < blkXLog2);
1526
1527 pEquation->addr[i].channel = 0;
1528 pEquation->addr[i].valid = 1;
1529 pEquation->addr[i].index = xLog2 + elemLog2;
1530 }
1531 else
1532 {
1533 ADDR_ASSERT(pSwizzle[i].y != 0);
1534 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1535 yMask |= pSwizzle[i].y;
1536
1537 pEquation->addr[i].channel = 1;
1538 pEquation->addr[i].valid = 1;
1539 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1540
1541 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1542 }
1543
1544 swizzle[i].value = 0;
1545 bMask |= 1 << i;
1546 }
1547 else
1548 {
1549 if (pSwizzle[i].z != 0)
1550 {
1551 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1552
1553 pEquation->xor2[i].channel = 2;
1554 pEquation->xor2[i].valid = 1;
1555 pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1556 }
1557
1558 swizzle[i].x = pSwizzle[i].x;
1559 swizzle[i].y = pSwizzle[i].y;
1560 swizzle[i].z = swizzle[i].s = 0;
1561
1562 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1563
1564 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1565
1566 if (xHi != 0)
1567 {
1568 ADDR_ASSERT(IsPow2(xHi));
1569 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1570
1571 pEquation->xor1[i].channel = 0;
1572 pEquation->xor1[i].valid = 1;
1573 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1574
1575 swizzle[i].x &= blkXMask;
1576 }
1577
1578 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1579
1580 if (yHi != 0)
1581 {
1582 ADDR_ASSERT(IsPow2(yHi));
1583
1584 if (xHi == 0)
1585 {
1586 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1587 pEquation->xor1[i].channel = 1;
1588 pEquation->xor1[i].valid = 1;
1589 pEquation->xor1[i].index = Log2(yHi);
1590 }
1591 else
1592 {
1593 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1594 pEquation->xor2[i].channel = 1;
1595 pEquation->xor2[i].valid = 1;
1596 pEquation->xor2[i].index = Log2(yHi);
1597 }
1598
1599 swizzle[i].y &= blkYMask;
1600 }
1601
1602 if (swizzle[i].value == 0)
1603 {
1604 bMask |= 1 << i;
1605 }
1606 }
1607 }
1608
1609 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1610 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1611
1612 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1613
1614 while (bMask != blockMask)
1615 {
1616 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1617 {
1618 if ((bMask & (1 << i)) == 0)
1619 {
1620 if (IsPow2(swizzle[i].value))
1621 {
1622 if (swizzle[i].x != 0)
1623 {
1624 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1625 xMask |= swizzle[i].x;
1626
1627 const UINT_32 xLog2 = Log2(swizzle[i].x);
1628
1629 ADDR_ASSERT(xLog2 < blkXLog2);
1630
1631 pEquation->addr[i].channel = 0;
1632 pEquation->addr[i].valid = 1;
1633 pEquation->addr[i].index = xLog2 + elemLog2;
1634 }
1635 else
1636 {
1637 ADDR_ASSERT(swizzle[i].y != 0);
1638 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1639 yMask |= swizzle[i].y;
1640
1641 pEquation->addr[i].channel = 1;
1642 pEquation->addr[i].valid = 1;
1643 pEquation->addr[i].index = Log2(swizzle[i].y);
1644
1645 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1646 }
1647
1648 swizzle[i].value = 0;
1649 bMask |= 1 << i;
1650 }
1651 else
1652 {
1653 const UINT_32 x = swizzle[i].x & xMask;
1654 const UINT_32 y = swizzle[i].y & yMask;
1655
1656 if (x != 0)
1657 {
1658 ADDR_ASSERT(IsPow2(x));
1659
1660 if (pEquation->xor1[i].value == 0)
1661 {
1662 pEquation->xor1[i].channel = 0;
1663 pEquation->xor1[i].valid = 1;
1664 pEquation->xor1[i].index = Log2(x) + elemLog2;
1665 }
1666 else
1667 {
1668 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1669 pEquation->xor2[i].channel = 0;
1670 pEquation->xor2[i].valid = 1;
1671 pEquation->xor2[i].index = Log2(x) + elemLog2;
1672 }
1673 }
1674
1675 if (y != 0)
1676 {
1677 ADDR_ASSERT(IsPow2(y));
1678
1679 if (pEquation->xor1[i].value == 0)
1680 {
1681 pEquation->xor1[i].channel = 1;
1682 pEquation->xor1[i].valid = 1;
1683 pEquation->xor1[i].index = Log2(y);
1684 }
1685 else
1686 {
1687 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1688 pEquation->xor2[i].channel = 1;
1689 pEquation->xor2[i].valid = 1;
1690 pEquation->xor2[i].index = Log2(y);
1691 }
1692 }
1693
1694 swizzle[i].x &= ~x;
1695 swizzle[i].y &= ~y;
1696 }
1697 }
1698 }
1699 }
1700
1701 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1702 }
1703 else
1704 {
1705 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1706 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1707 const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1708 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1709 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1710 const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1711
1712 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1713 UINT_32 xMask = 0;
1714 UINT_32 yMask = 0;
1715 UINT_32 zMask = 0;
1716 UINT_32 bMask = (1 << elemLog2) - 1;
1717
1718 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1719 {
1720 if (IsPow2(pSwizzle[i].value))
1721 {
1722 if (pSwizzle[i].x != 0)
1723 {
1724 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1725 xMask |= pSwizzle[i].x;
1726
1727 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1728
1729 ADDR_ASSERT(xLog2 < blkXLog2);
1730
1731 pEquation->addr[i].channel = 0;
1732 pEquation->addr[i].valid = 1;
1733 pEquation->addr[i].index = xLog2 + elemLog2;
1734 }
1735 else if (pSwizzle[i].y != 0)
1736 {
1737 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1738 yMask |= pSwizzle[i].y;
1739
1740 pEquation->addr[i].channel = 1;
1741 pEquation->addr[i].valid = 1;
1742 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1743
1744 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1745 }
1746 else
1747 {
1748 ADDR_ASSERT(pSwizzle[i].z != 0);
1749 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1750 zMask |= pSwizzle[i].z;
1751
1752 pEquation->addr[i].channel = 2;
1753 pEquation->addr[i].valid = 1;
1754 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1755
1756 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1757 }
1758
1759 swizzle[i].value = 0;
1760 bMask |= 1 << i;
1761 }
1762 else
1763 {
1764 swizzle[i].x = pSwizzle[i].x;
1765 swizzle[i].y = pSwizzle[i].y;
1766 swizzle[i].z = pSwizzle[i].z;
1767 swizzle[i].s = 0;
1768
1769 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1770
1771 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1772 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1773 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1774
1775 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1776
1777 if (xHi != 0)
1778 {
1779 ADDR_ASSERT(IsPow2(xHi));
1780 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1781
1782 pEquation->xor1[i].channel = 0;
1783 pEquation->xor1[i].valid = 1;
1784 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1785
1786 swizzle[i].x &= blkXMask;
1787 }
1788
1789 if (yHi != 0)
1790 {
1791 ADDR_ASSERT(IsPow2(yHi));
1792
1793 if (pEquation->xor1[i].value == 0)
1794 {
1795 pEquation->xor1[i].channel = 1;
1796 pEquation->xor1[i].valid = 1;
1797 pEquation->xor1[i].index = Log2(yHi);
1798 }
1799 else
1800 {
1801 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1802 pEquation->xor2[i].channel = 1;
1803 pEquation->xor2[i].valid = 1;
1804 pEquation->xor2[i].index = Log2(yHi);
1805 }
1806
1807 swizzle[i].y &= blkYMask;
1808 }
1809
1810 if (zHi != 0)
1811 {
1812 ADDR_ASSERT(IsPow2(zHi));
1813
1814 if (pEquation->xor1[i].value == 0)
1815 {
1816 pEquation->xor1[i].channel = 2;
1817 pEquation->xor1[i].valid = 1;
1818 pEquation->xor1[i].index = Log2(zHi);
1819 }
1820 else
1821 {
1822 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1823 pEquation->xor2[i].channel = 2;
1824 pEquation->xor2[i].valid = 1;
1825 pEquation->xor2[i].index = Log2(zHi);
1826 }
1827
1828 swizzle[i].z &= blkZMask;
1829 }
1830
1831 if (swizzle[i].value == 0)
1832 {
1833 bMask |= 1 << i;
1834 }
1835 }
1836 }
1837
1838 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1839 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1840
1841 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1842
1843 while (bMask != blockMask)
1844 {
1845 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1846 {
1847 if ((bMask & (1 << i)) == 0)
1848 {
1849 if (IsPow2(swizzle[i].value))
1850 {
1851 if (swizzle[i].x != 0)
1852 {
1853 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1854 xMask |= swizzle[i].x;
1855
1856 const UINT_32 xLog2 = Log2(swizzle[i].x);
1857
1858 ADDR_ASSERT(xLog2 < blkXLog2);
1859
1860 pEquation->addr[i].channel = 0;
1861 pEquation->addr[i].valid = 1;
1862 pEquation->addr[i].index = xLog2 + elemLog2;
1863 }
1864 else if (swizzle[i].y != 0)
1865 {
1866 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1867 yMask |= swizzle[i].y;
1868
1869 pEquation->addr[i].channel = 1;
1870 pEquation->addr[i].valid = 1;
1871 pEquation->addr[i].index = Log2(swizzle[i].y);
1872
1873 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1874 }
1875 else
1876 {
1877 ADDR_ASSERT(swizzle[i].z != 0);
1878 ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1879 zMask |= swizzle[i].z;
1880
1881 pEquation->addr[i].channel = 2;
1882 pEquation->addr[i].valid = 1;
1883 pEquation->addr[i].index = Log2(swizzle[i].z);
1884
1885 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1886 }
1887
1888 swizzle[i].value = 0;
1889 bMask |= 1 << i;
1890 }
1891 else
1892 {
1893 const UINT_32 x = swizzle[i].x & xMask;
1894 const UINT_32 y = swizzle[i].y & yMask;
1895 const UINT_32 z = swizzle[i].z & zMask;
1896
1897 if (x != 0)
1898 {
1899 ADDR_ASSERT(IsPow2(x));
1900
1901 if (pEquation->xor1[i].value == 0)
1902 {
1903 pEquation->xor1[i].channel = 0;
1904 pEquation->xor1[i].valid = 1;
1905 pEquation->xor1[i].index = Log2(x) + elemLog2;
1906 }
1907 else
1908 {
1909 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1910 pEquation->xor2[i].channel = 0;
1911 pEquation->xor2[i].valid = 1;
1912 pEquation->xor2[i].index = Log2(x) + elemLog2;
1913 }
1914 }
1915
1916 if (y != 0)
1917 {
1918 ADDR_ASSERT(IsPow2(y));
1919
1920 if (pEquation->xor1[i].value == 0)
1921 {
1922 pEquation->xor1[i].channel = 1;
1923 pEquation->xor1[i].valid = 1;
1924 pEquation->xor1[i].index = Log2(y);
1925 }
1926 else
1927 {
1928 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1929 pEquation->xor2[i].channel = 1;
1930 pEquation->xor2[i].valid = 1;
1931 pEquation->xor2[i].index = Log2(y);
1932 }
1933 }
1934
1935 if (z != 0)
1936 {
1937 ADDR_ASSERT(IsPow2(z));
1938
1939 if (pEquation->xor1[i].value == 0)
1940 {
1941 pEquation->xor1[i].channel = 2;
1942 pEquation->xor1[i].valid = 1;
1943 pEquation->xor1[i].index = Log2(z);
1944 }
1945 else
1946 {
1947 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1948 pEquation->xor2[i].channel = 2;
1949 pEquation->xor2[i].valid = 1;
1950 pEquation->xor2[i].index = Log2(z);
1951 }
1952 }
1953
1954 swizzle[i].x &= ~x;
1955 swizzle[i].y &= ~y;
1956 swizzle[i].z &= ~z;
1957 }
1958 }
1959 }
1960 }
1961
1962 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1963 }
1964 }
1965
1966 /**
1967 ************************************************************************************************************************
1968 * Gfx10Lib::InitEquationTable
1969 *
1970 * @brief
1971 * Initialize Equation table.
1972 *
1973 * @return
1974 * N/A
1975 ************************************************************************************************************************
1976 */
InitEquationTable()1977 VOID Gfx10Lib::InitEquationTable()
1978 {
1979 memset(m_equationTable, 0, sizeof(m_equationTable));
1980
1981 // Iterate through resourceTypes, up to MaxRsrcType where a "resourceType" refers to AddrResourceType (1D/2D/3D)
1982 // resources. This starts with rsrcTypeIdx = 0, however there is an offset added that will start us off at
1983 // computing 2D resources.
1984 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1985 {
1986 // Add offset. Start iterating from ADDR_RSRC_TEX_2D
1987 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1988
1989 // Iterate through the maximum number of swizzlemodes a type can hold
1990 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1991 {
1992 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1993
1994 // Iterate through the different bits-per-pixel settings (8bpp/16bpp/32bpp/64bpp/128bpp)
1995 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1996 {
1997 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1998 // May or may not return a ADDR_SW_PATINFO for a completely different swizzle mode, essentially
1999 // overwriting the choice.
2000 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
2001
2002 if (pPatInfo != NULL)
2003 {
2004 ADDR_ASSERT(IsValidSwMode(swMode));
2005 if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
2006 {
2007 ADDR_EQUATION equation = {};
2008
2009 // Passing in pPatInfo to get the addr equation
2010 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
2011
2012 equationIndex = m_numEquations;
2013 ADDR_ASSERT(equationIndex < EquationTableSize);
2014 // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
2015 m_equationTable[equationIndex] = equation;
2016 // Increment m_numEquations
2017 m_numEquations++;
2018 }
2019 else // There is no equationIndex
2020 {
2021 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
2022 ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
2023 ADDR_ASSERT(rsrcTypeIdx == 1);
2024 ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
2025 ADDR_ASSERT(m_settings.supportRbPlus == 1);
2026 }
2027 }
2028 // equationIndex, which is used to look up equations in m_equationTable, will be cached for every
2029 // iteration in this nested for-loop
2030 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
2031 }
2032 }
2033 }
2034 }
2035
2036 /**
2037 ************************************************************************************************************************
2038 * Gfx10Lib::HwlGetEquationIndex
2039 *
2040 * @brief
2041 * Interface function stub of GetEquationIndex
2042 *
2043 * @return
2044 * ADDR_E_RETURNCODE
2045 ************************************************************************************************************************
2046 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2047 UINT_32 Gfx10Lib::HwlGetEquationIndex(
2048 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
2049 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
2050 ) const
2051 {
2052 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
2053
2054 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
2055 (pIn->resourceType == ADDR_RSRC_TEX_3D))
2056 {
2057 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
2058 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
2059 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
2060
2061 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
2062 }
2063
2064 if (pOut->pMipInfo != NULL)
2065 {
2066 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2067 {
2068 pOut->pMipInfo[i].equationIndex = equationIdx;
2069 }
2070 }
2071
2072 return equationIdx;
2073 }
2074
2075 /**
2076 ************************************************************************************************************************
2077 * Gfx10Lib::GetValidDisplaySwizzleModes
2078 *
2079 * @brief
2080 * Get valid swizzle modes mask for displayable surface
2081 *
2082 * @return
2083 * Valid swizzle modes mask for displayable surface
2084 ************************************************************************************************************************
2085 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const2086 UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
2087 UINT_32 bpp
2088 ) const
2089 {
2090 UINT_32 swModeMask = 0;
2091
2092 if (bpp <= 64)
2093 {
2094 if (m_settings.isDcn20)
2095 {
2096 swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
2097 }
2098 else
2099 {
2100 swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
2101 }
2102 }
2103
2104 return swModeMask;
2105 }
2106
2107 /**
2108 ************************************************************************************************************************
2109 * Gfx10Lib::IsValidDisplaySwizzleMode
2110 *
2111 * @brief
2112 * Check if a swizzle mode is supported by display engine
2113 *
2114 * @return
2115 * TRUE is swizzle mode is supported by display engine
2116 ************************************************************************************************************************
2117 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2118 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
2119 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2120 ) const
2121 {
2122 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
2123
2124 return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
2125 }
2126
2127 /**
2128 ************************************************************************************************************************
2129 * Gfx10Lib::GetMaxNumMipsInTail
2130 *
2131 * @brief
2132 * Return max number of mips in tails
2133 *
2134 * @return
2135 * Max number of mips in tails
2136 ************************************************************************************************************************
2137 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const2138 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
2139 UINT_32 blockSizeLog2, ///< block size log2
2140 BOOL_32 isThin ///< is thin or thick
2141 ) const
2142 {
2143 UINT_32 effectiveLog2 = blockSizeLog2;
2144
2145 if (isThin == FALSE)
2146 {
2147 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
2148 }
2149
2150 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
2151 }
2152
2153 /**
2154 ************************************************************************************************************************
2155 * Gfx10Lib::HwlComputePipeBankXor
2156 *
2157 * @brief
2158 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2159 *
2160 * @return
2161 * PipeBankXor value
2162 ************************************************************************************************************************
2163 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2164 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2165 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2166 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2167 ) const
2168 {
2169 if (IsNonPrtXor(pIn->swizzleMode))
2170 {
2171 const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
2172
2173 // No pipe xor...
2174 const UINT_32 pipeXor = 0;
2175 UINT_32 bankXor = 0;
2176
2177 const UINT_32 XorPatternLen = 8;
2178 static const UINT_32 XorBankRot1b[XorPatternLen] = {0, 1, 0, 1, 0, 1, 0, 1};
2179 static const UINT_32 XorBankRot2b[XorPatternLen] = {0, 2, 1, 3, 2, 0, 3, 1};
2180 static const UINT_32 XorBankRot3b[XorPatternLen] = {0, 4, 2, 6, 1, 5, 3, 7};
2181 static const UINT_32 XorBankRot4b[XorPatternLen] = {0, 8, 4, 12, 2, 10, 6, 14};
2182 static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
2183
2184 switch (bankBits)
2185 {
2186 case 1:
2187 case 2:
2188 case 3:
2189 case 4:
2190 bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
2191 break;
2192 default:
2193 // valid bank bits should be 0~4
2194 ADDR_ASSERT_ALWAYS();
2195 case 0:
2196 break;
2197 }
2198
2199 pOut->pipeBankXor = bankXor | pipeXor;
2200 }
2201 else
2202 {
2203 pOut->pipeBankXor = 0;
2204 }
2205
2206 return ADDR_OK;
2207 }
2208
2209 /**
2210 ************************************************************************************************************************
2211 * Gfx10Lib::HwlComputeSlicePipeBankXor
2212 *
2213 * @brief
2214 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2215 *
2216 * @return
2217 * PipeBankXor value
2218 ************************************************************************************************************************
2219 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const2220 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2221 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2222 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2223 ) const
2224 {
2225 if (IsNonPrtXor(pIn->swizzleMode))
2226 {
2227 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2228 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2229 const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2230
2231 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2232
2233 if (pIn->bpe != 0)
2234 {
2235 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
2236 pIn->resourceType,
2237 Log2(pIn->bpe >> 3),
2238 1);
2239
2240 if (pPatInfo != NULL)
2241 {
2242 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
2243 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
2244
2245 const UINT_32 pipeBankXorOffset =
2246 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
2247 blockBits,
2248 0,
2249 0,
2250 pIn->slice,
2251 0);
2252
2253 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
2254
2255 // Should have no bit set under pipe interleave
2256 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
2257
2258 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
2259 }
2260 }
2261 }
2262 else
2263 {
2264 pOut->pipeBankXor = 0;
2265 }
2266
2267 return ADDR_OK;
2268 }
2269
2270 /**
2271 ************************************************************************************************************************
2272 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2273 *
2274 * @brief
2275 * Compute sub resource offset to support swizzle pattern
2276 *
2277 * @return
2278 * Offset
2279 ************************************************************************************************************************
2280 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const2281 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2282 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
2283 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
2284 ) const
2285 {
2286 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2287
2288 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2289
2290 return ADDR_OK;
2291 }
2292
2293 /**
2294 ************************************************************************************************************************
2295 * Gfx10Lib::HwlComputeNonBlockCompressedView
2296 *
2297 * @brief
2298 * Compute non-block-compressed view for a given mipmap level/slice.
2299 *
2300 * @return
2301 * ADDR_E_RETURNCODE
2302 ************************************************************************************************************************
2303 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const2304 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(
2305 const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure
2306 ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure
2307 ) const
2308 {
2309 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2310
2311 if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE)
2312 {
2313 // Only thin swizzle mode can have a NonBC view...
2314 returnCode = ADDR_INVALIDPARAMS;
2315 }
2316 else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
2317 ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
2318 {
2319 // Only support BC1~BC7, ASTC, or ETC2 for now...
2320 returnCode = ADDR_NOTSUPPORTED;
2321 }
2322 else
2323 {
2324 UINT_32 bcWidth, bcHeight;
2325 UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
2326
2327 ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
2328 infoIn.flags = pIn->flags;
2329 infoIn.swizzleMode = pIn->swizzleMode;
2330 infoIn.resourceType = pIn->resourceType;
2331 infoIn.bpp = bpp;
2332 infoIn.width = RoundUpQuotient(pIn->width, bcWidth);
2333 infoIn.height = RoundUpQuotient(pIn->height, bcHeight);
2334 infoIn.numSlices = pIn->numSlices;
2335 infoIn.numMipLevels = pIn->numMipLevels;
2336 infoIn.numSamples = 1;
2337 infoIn.numFrags = 1;
2338
2339 ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
2340 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
2341
2342 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
2343 infoOut.pMipInfo = mipInfo;
2344
2345 const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
2346
2347 if (tiled)
2348 {
2349 returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
2350 }
2351 else
2352 {
2353 returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
2354 }
2355
2356 if (returnCode == ADDR_OK)
2357 {
2358 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
2359 subOffIn.swizzleMode = infoIn.swizzleMode;
2360 subOffIn.resourceType = infoIn.resourceType;
2361 subOffIn.slice = pIn->slice;
2362 subOffIn.sliceSize = infoOut.sliceSize;
2363 subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
2364 subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset;
2365
2366 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
2367
2368 // For any mipmap level, move nonBc view base address by offset
2369 HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
2370 pOut->offset = subOffOut.offset;
2371
2372 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
2373 slicePbXorIn.bpe = infoIn.bpp;
2374 slicePbXorIn.swizzleMode = infoIn.swizzleMode;
2375 slicePbXorIn.resourceType = infoIn.resourceType;
2376 slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2377 slicePbXorIn.slice = pIn->slice;
2378
2379 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2380
2381 // For any mipmap level, nonBc view should use computed pbXor
2382 HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2383 pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2384
2385 const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2386 const UINT_32 requestMipWidth = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth);
2387 const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight);
2388
2389 if (inTail)
2390 {
2391 // For mipmap level that is in mip tail block, hack a lot of things...
2392 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2393 // are fit in tail block:
2394
2395 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2396 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2397
2398 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2399 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2400
2401 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2402 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2403
2404 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2405 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2406 }
2407 // This check should cover at least mipId == 0
2408 else if (requestMipWidth << pIn->mipId == infoIn.width)
2409 {
2410 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2411 // - only one mipmap level and mipId = 0
2412 pOut->mipId = 0;
2413 pOut->numMipLevels = 1;
2414
2415 // (mip0) width = requestMipWidth
2416 pOut->unalignedWidth = requestMipWidth;
2417
2418 // (mip0) height = requestMipHeight
2419 pOut->unalignedHeight = requestMipHeight;
2420 }
2421 else
2422 {
2423 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2424 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2425 // because single mip view may have different pitch value than original (multiple) mip view...
2426 // A simple case would be:
2427 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2428 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2429 // mip0 width = 0x101/mip1 width = 0x80
2430 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2431 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2432
2433 // - 2 levels and mipId = 1
2434 pOut->mipId = 1;
2435 pOut->numMipLevels = 2;
2436
2437 const UINT_32 upperMipWidth = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth);
2438 const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight);
2439
2440 const BOOL_32 needToAvoidInTail =
2441 tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2442 TRUE : FALSE;
2443
2444 const UINT_32 hwMipWidth = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2445 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2446
2447 const BOOL_32 needExtraWidth =
2448 ((upperMipWidth < requestMipWidth * 2) ||
2449 ((upperMipWidth == requestMipWidth * 2) &&
2450 ((needToAvoidInTail == TRUE) ||
2451 (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2452
2453 const BOOL_32 needExtraHeight =
2454 ((upperMipHeight < requestMipHeight * 2) ||
2455 ((upperMipHeight == requestMipHeight * 2) &&
2456 ((needToAvoidInTail == TRUE) ||
2457 (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2458
2459 // (mip0) width = requestLastMipLevelWidth
2460 pOut->unalignedWidth = upperMipWidth + (needExtraWidth ? 1: 0);
2461
2462 // (mip0) height = requestLastMipLevelHeight
2463 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2464 }
2465
2466 // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2467 ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2468 // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2469 ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2470 }
2471 }
2472
2473 return returnCode;
2474 }
2475
2476 /**
2477 ************************************************************************************************************************
2478 * Gfx10Lib::ValidateNonSwModeParams
2479 *
2480 * @brief
2481 * Validate compute surface info params except swizzle mode
2482 *
2483 * @return
2484 * TRUE if parameters are valid, FALSE otherwise
2485 ************************************************************************************************************************
2486 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2487 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2488 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2489 {
2490 BOOL_32 valid = TRUE;
2491
2492 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2493 {
2494 ADDR_ASSERT_ALWAYS();
2495 valid = FALSE;
2496 }
2497
2498 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2499 {
2500 ADDR_ASSERT_ALWAYS();
2501 valid = FALSE;
2502 }
2503
2504 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2505 const AddrResourceType rsrcType = pIn->resourceType;
2506 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2507 const BOOL_32 msaa = (pIn->numFrags > 1);
2508 const BOOL_32 display = flags.display;
2509 const BOOL_32 tex3d = IsTex3d(rsrcType);
2510 const BOOL_32 tex2d = IsTex2d(rsrcType);
2511 const BOOL_32 tex1d = IsTex1d(rsrcType);
2512 const BOOL_32 stereo = flags.qbStereo;
2513
2514 // Resource type check
2515 if (tex1d)
2516 {
2517 if (msaa || display || stereo)
2518 {
2519 ADDR_ASSERT_ALWAYS();
2520 valid = FALSE;
2521 }
2522 }
2523 else if (tex2d)
2524 {
2525 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2526 {
2527 ADDR_ASSERT_ALWAYS();
2528 valid = FALSE;
2529 }
2530 }
2531 else if (tex3d)
2532 {
2533 if (msaa || display || stereo)
2534 {
2535 ADDR_ASSERT_ALWAYS();
2536 valid = FALSE;
2537 }
2538 }
2539 else
2540 {
2541 ADDR_ASSERT_ALWAYS();
2542 valid = FALSE;
2543 }
2544
2545 return valid;
2546 }
2547
2548 /**
2549 ************************************************************************************************************************
2550 * Gfx10Lib::ValidateSwModeParams
2551 *
2552 * @brief
2553 * Validate compute surface info related to swizzle mode
2554 *
2555 * @return
2556 * TRUE if parameters are valid, FALSE otherwise
2557 ************************************************************************************************************************
2558 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2559 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2560 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2561 {
2562 BOOL_32 valid = TRUE;
2563
2564 if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2565 {
2566 ADDR_ASSERT_ALWAYS();
2567 valid = FALSE;
2568 }
2569 else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2570 {
2571 ADDR_ASSERT_ALWAYS();
2572 valid = FALSE;
2573 }
2574
2575 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2576 const AddrResourceType rsrcType = pIn->resourceType;
2577 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2578 const BOOL_32 msaa = (pIn->numFrags > 1);
2579 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2580 const BOOL_32 color = flags.color;
2581 const BOOL_32 display = flags.display;
2582 const BOOL_32 tex3d = IsTex3d(rsrcType);
2583 const BOOL_32 tex2d = IsTex2d(rsrcType);
2584 const BOOL_32 tex1d = IsTex1d(rsrcType);
2585 const BOOL_32 thin3d = flags.view3dAs2dArray;
2586 const BOOL_32 linear = IsLinear(swizzle);
2587 const BOOL_32 blk256B = IsBlock256b(swizzle);
2588 const BOOL_32 blkVar = IsBlockVariable(swizzle);
2589 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2590 const BOOL_32 prt = flags.prt;
2591 const BOOL_32 fmask = flags.fmask;
2592
2593 // Misc check
2594 if ((pIn->numFrags > 1) &&
2595 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2596 {
2597 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2598 ADDR_ASSERT_ALWAYS();
2599 valid = FALSE;
2600 }
2601
2602 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2603 {
2604 ADDR_ASSERT_ALWAYS();
2605 valid = FALSE;
2606 }
2607
2608 if ((pIn->bpp == 96) && (linear == FALSE))
2609 {
2610 ADDR_ASSERT_ALWAYS();
2611 valid = FALSE;
2612 }
2613
2614 const UINT_32 swizzleMask = 1 << swizzle;
2615
2616 // Resource type check
2617 if (tex1d)
2618 {
2619 if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2620 {
2621 ADDR_ASSERT_ALWAYS();
2622 valid = FALSE;
2623 }
2624 }
2625 else if (tex2d)
2626 {
2627 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2628 {
2629 ADDR_ASSERT_ALWAYS();
2630 valid = FALSE;
2631 }
2632 else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2633 (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2634 {
2635 ADDR_ASSERT_ALWAYS();
2636 valid = FALSE;
2637 }
2638 }
2639 else if (tex3d)
2640 {
2641 if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2642 (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2643 (thin3d && ((swizzleMask & Gfx10Rsrc3dViewAs2dSwModeMask) == 0)))
2644 {
2645 ADDR_ASSERT_ALWAYS();
2646 valid = FALSE;
2647 }
2648 }
2649
2650 // Swizzle type check
2651 if (linear)
2652 {
2653 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2654 {
2655 ADDR_ASSERT_ALWAYS();
2656 valid = FALSE;
2657 }
2658 }
2659 else if (IsZOrderSwizzle(swizzle))
2660 {
2661 if ((pIn->bpp > 64) ||
2662 (msaa && (color || (pIn->bpp > 32))) ||
2663 ElemLib::IsBlockCompressed(pIn->format) ||
2664 ElemLib::IsMacroPixelPacked(pIn->format))
2665 {
2666 ADDR_ASSERT_ALWAYS();
2667 valid = FALSE;
2668 }
2669 }
2670 else if (IsStandardSwizzle(rsrcType, swizzle))
2671 {
2672 if (zbuffer || msaa)
2673 {
2674 ADDR_ASSERT_ALWAYS();
2675 valid = FALSE;
2676 }
2677 }
2678 else if (IsDisplaySwizzle(rsrcType, swizzle))
2679 {
2680 if (zbuffer || msaa)
2681 {
2682 ADDR_ASSERT_ALWAYS();
2683 valid = FALSE;
2684 }
2685 }
2686 else if (IsRtOptSwizzle(swizzle))
2687 {
2688 if (zbuffer)
2689 {
2690 ADDR_ASSERT_ALWAYS();
2691 valid = FALSE;
2692 }
2693 }
2694 else
2695 {
2696 ADDR_ASSERT_ALWAYS();
2697 valid = FALSE;
2698 }
2699
2700 // Block type check
2701 if (blk256B)
2702 {
2703 if (zbuffer || tex3d || msaa)
2704 {
2705 ADDR_ASSERT_ALWAYS();
2706 valid = FALSE;
2707 }
2708 }
2709 else if (blkVar)
2710 {
2711 if (m_blockVarSizeLog2 == 0)
2712 {
2713 ADDR_ASSERT_ALWAYS();
2714 valid = FALSE;
2715 }
2716 }
2717
2718 return valid;
2719 }
2720
2721 /**
2722 ************************************************************************************************************************
2723 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2724 *
2725 * @brief
2726 * Compute surface info sanity check
2727 *
2728 * @return
2729 * Offset
2730 ************************************************************************************************************************
2731 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2732 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2733 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2734 ) const
2735 {
2736 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2737 }
2738
2739 /**
2740 ************************************************************************************************************************
2741 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2742 *
2743 * @brief
2744 * Internal function to get suggested surface information for client to use
2745 *
2746 * @return
2747 * ADDR_E_RETURNCODE
2748 ************************************************************************************************************************
2749 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2750 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2751 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2752 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2753 ) const
2754 {
2755 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2756
2757 if (pIn->flags.fmask)
2758 {
2759 const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2760 const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2761
2762 if (forbid64KbBlockType && forbidVarBlockType)
2763 {
2764 // Invalid combination...
2765 ADDR_ASSERT_ALWAYS();
2766 returnCode = ADDR_INVALIDPARAMS;
2767 }
2768 else
2769 {
2770 pOut->resourceType = ADDR_RSRC_TEX_2D;
2771 pOut->validBlockSet.value = 0;
2772 pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1;
2773 pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1;
2774 pOut->validSwModeSet.value = 0;
2775 pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1;
2776 pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType ? 0 : 1;
2777 pOut->canXor = TRUE;
2778 pOut->validSwTypeSet.value = AddrSwSetZ;
2779 pOut->clientPreferredSwSet = pOut->validSwTypeSet;
2780
2781 BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2782
2783 if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2784 {
2785 const UINT_8 maxFmaskSwizzleModeType = 2;
2786 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2787 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2788 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2789 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2790 const UINT_32 width = Max(pIn->width, 1u);
2791 const UINT_32 height = Max(pIn->height, 1u);
2792 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2793
2794 AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2795 Dim3d blkDim[maxFmaskSwizzleModeType] = {{}, {}};
2796 Dim3d padDim[maxFmaskSwizzleModeType] = {{}, {}};
2797 UINT_64 padSize[maxFmaskSwizzleModeType] = {};
2798
2799 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2800 {
2801 ComputeBlockDimensionForSurf(&blkDim[i].w,
2802 &blkDim[i].h,
2803 &blkDim[i].d,
2804 fmaskBpp,
2805 1,
2806 pOut->resourceType,
2807 swMode[i]);
2808
2809 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2810 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2811 }
2812
2813 if (Addr2BlockTypeWithinMemoryBudget(padSize[0],
2814 padSize[1],
2815 ratioLow,
2816 ratioHi,
2817 pIn->memoryBudget,
2818 GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))
2819 {
2820 use64KbBlockType = FALSE;
2821 }
2822 }
2823 else if (forbidVarBlockType)
2824 {
2825 use64KbBlockType = TRUE;
2826 }
2827
2828 if (use64KbBlockType)
2829 {
2830 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2831 }
2832 else
2833 {
2834 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2835 }
2836 }
2837 }
2838 else
2839 {
2840 UINT_32 bpp = pIn->bpp;
2841 UINT_32 width = Max(pIn->width, 1u);
2842 UINT_32 height = Max(pIn->height, 1u);
2843
2844 // Set format to INVALID will skip this conversion
2845 if (pIn->format != ADDR_FMT_INVALID)
2846 {
2847 ElemMode elemMode = ADDR_UNCOMPRESSED;
2848 UINT_32 expandX, expandY;
2849
2850 // Get compression/expansion factors and element mode which indicates compression/expansion
2851 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2852 &elemMode,
2853 &expandX,
2854 &expandY);
2855
2856 UINT_32 basePitch = 0;
2857 GetElemLib()->AdjustSurfaceInfo(elemMode,
2858 expandX,
2859 expandY,
2860 &bpp,
2861 &basePitch,
2862 &width,
2863 &height);
2864 }
2865
2866 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2867 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2868 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2869 const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2870 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
2871
2872 // Pre sanity check on non swizzle mode parameters
2873 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2874 localIn.flags = pIn->flags;
2875 localIn.resourceType = pIn->resourceType;
2876 localIn.format = pIn->format;
2877 localIn.bpp = bpp;
2878 localIn.width = width;
2879 localIn.height = height;
2880 localIn.numSlices = numSlices;
2881 localIn.numMipLevels = numMipLevels;
2882 localIn.numSamples = numSamples;
2883 localIn.numFrags = numFrags;
2884
2885 if (ValidateNonSwModeParams(&localIn))
2886 {
2887 // Forbid swizzle mode(s) by client setting
2888 ADDR2_SWMODE_SET allowedSwModeSet = {};
2889 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2890 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
2891 allowedSwModeSet.value |=
2892 pIn->forbiddenBlock.macroThin4KB ? 0 :
2893 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2894 allowedSwModeSet.value |=
2895 pIn->forbiddenBlock.macroThick4KB ? 0 :
2896 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2897 allowedSwModeSet.value |=
2898 pIn->forbiddenBlock.macroThin64KB ? 0 :
2899 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2900 allowedSwModeSet.value |=
2901 pIn->forbiddenBlock.macroThick64KB ? 0 :
2902 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2903 allowedSwModeSet.value |=
2904 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2905
2906 if (pIn->preferredSwSet.value != 0)
2907 {
2908 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2909 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2910 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2911 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2912 }
2913
2914 if (pIn->noXor)
2915 {
2916 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2917 }
2918
2919 if (pIn->maxAlign > 0)
2920 {
2921 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2922 {
2923 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2924 }
2925
2926 if (pIn->maxAlign < Size64K)
2927 {
2928 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2929 }
2930
2931 if (pIn->maxAlign < Size4K)
2932 {
2933 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2934 }
2935
2936 if (pIn->maxAlign < Size256)
2937 {
2938 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2939 }
2940 }
2941
2942 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2943 switch (pIn->resourceType)
2944 {
2945 case ADDR_RSRC_TEX_1D:
2946 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2947 break;
2948
2949 case ADDR_RSRC_TEX_2D:
2950 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2951 break;
2952
2953 case ADDR_RSRC_TEX_3D:
2954 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2955
2956 if (pIn->flags.view3dAs2dArray)
2957 {
2958 // SW_LINEAR can be used for 3D thin images, including BCn image format.
2959 allowedSwModeSet.value &= Gfx10Rsrc3dViewAs2dSwModeMask;
2960 }
2961 break;
2962
2963 default:
2964 ADDR_ASSERT_ALWAYS();
2965 allowedSwModeSet.value = 0;
2966 break;
2967 }
2968
2969 if (ElemLib::IsBlockCompressed(pIn->format) ||
2970 ElemLib::IsMacroPixelPacked(pIn->format) ||
2971 (bpp > 64) ||
2972 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2973 {
2974 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2975 }
2976
2977 if (pIn->format == ADDR_FMT_32_32_32)
2978 {
2979 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2980 }
2981
2982 if (msaa)
2983 {
2984 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2985 }
2986
2987 if (pIn->flags.depth || pIn->flags.stencil)
2988 {
2989 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2990 }
2991
2992 if (pIn->flags.display)
2993 {
2994 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
2995 }
2996
2997 if (allowedSwModeSet.value != 0)
2998 {
2999 #if DEBUG
3000 // Post sanity check, at least AddrLib should accept the output generated by its own
3001 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3002
3003 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3004 {
3005 if (validateSwModeSet & 1)
3006 {
3007 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3008 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3009 }
3010
3011 validateSwModeSet >>= 1;
3012 }
3013 #endif
3014
3015 pOut->resourceType = pIn->resourceType;
3016 pOut->validSwModeSet = allowedSwModeSet;
3017 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
3018 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3019 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3020
3021 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3022
3023 if (pOut->clientPreferredSwSet.value == 0)
3024 {
3025 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3026 }
3027
3028 // Apply optional restrictions
3029 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
3030 {
3031 if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
3032 {
3033 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
3034 // the GL2 in VAR mode, so it should be avoided.
3035 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
3036 }
3037 else
3038 {
3039 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
3040 // But we have to suffer from low performance because there is no other choice...
3041 ADDR_ASSERT_ALWAYS();
3042 }
3043 }
3044
3045 if (pIn->flags.needEquation)
3046 {
3047 UINT_32 components = pIn->flags.allowExtEquation ? ADDR_MAX_EQUATION_COMP :
3048 ADDR_MAX_LEGACY_EQUATION_COMP;
3049 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
3050 }
3051
3052 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
3053 {
3054 pOut->swizzleMode = ADDR_SW_LINEAR;
3055 }
3056 else
3057 {
3058 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3059
3060 if ((height > 1) && (computeMinSize == FALSE))
3061 {
3062 // Always ignore linear swizzle mode if:
3063 // 1. This is a (2D/3D) resource with height > 1
3064 // 2. Client doesn't require computing minimize size
3065 allowedSwModeSet.swLinear = 0;
3066 }
3067
3068 // A bitfield where each bit represents a block type. Each swizzle mode maps to a block.
3069 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3070
3071 // Determine block size if there are 2 or more block type candidates
3072 if (IsPow2(allowedBlockSet.value) == FALSE)
3073 {
3074 // Tracks a valid SwizzleMode for each valid block type
3075 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3076
3077 swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3078
3079 if (m_blockVarSizeLog2 != 0)
3080 {
3081 swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
3082 }
3083
3084 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3085 {
3086 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3087 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;
3088 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3089 }
3090 else
3091 {
3092 swMode[AddrBlockMicro] = ADDR_SW_256B_S;
3093 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S;
3094 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
3095 }
3096
3097 // Tracks the size of each valid swizzle mode's surface in bytes
3098 UINT_64 padSize[AddrBlockMaxTiledType] = {};
3099
3100 const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3101 const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3102 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3103 UINT_32 minSizeBlk = AddrBlockMicro; // Tracks the most optimal block to use
3104 UINT_64 minSize = 0; // Tracks the minimum acceptable block type
3105
3106 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3107
3108 // Iterate through all block types
3109 for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3110 {
3111 if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3112 {
3113 localIn.swizzleMode = swMode[i];
3114
3115 if (localIn.swizzleMode == ADDR_SW_LINEAR)
3116 {
3117 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3118 }
3119 else
3120 {
3121 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3122 }
3123
3124 if (returnCode == ADDR_OK)
3125 {
3126 padSize[i] = localOut.surfSize;
3127
3128 if (minSize == 0)
3129 {
3130 minSize = padSize[i];
3131 minSizeBlk = i;
3132 }
3133 else
3134 {
3135 // Checks if the block type is within the memory budget but favors larger blocks
3136 if (Addr2BlockTypeWithinMemoryBudget(
3137 minSize,
3138 padSize[i],
3139 ratioLow,
3140 ratioHi,
3141 0.0,
3142 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))
3143 {
3144 minSize = padSize[i];
3145 minSizeBlk = i;
3146 }
3147 }
3148 }
3149 else
3150 {
3151 ADDR_ASSERT_ALWAYS();
3152 break;
3153 }
3154 }
3155 }
3156
3157 if (pIn->memoryBudget > 1.0)
3158 {
3159 // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3160 // smaller-block type again in coming loop
3161 switch (minSizeBlk)
3162 {
3163 case AddrBlockThick64KB:
3164 allowedBlockSet.macroThin64KB = 0;
3165 case AddrBlockThinVar:
3166 case AddrBlockThin64KB:
3167 allowedBlockSet.macroThick4KB = 0;
3168 case AddrBlockThick4KB:
3169 allowedBlockSet.macroThin4KB = 0;
3170 case AddrBlockThin4KB:
3171 allowedBlockSet.micro = 0;
3172 case AddrBlockMicro:
3173 allowedBlockSet.linear = 0;
3174 case AddrBlockLinear:
3175 break;
3176
3177 default:
3178 ADDR_ASSERT_ALWAYS();
3179 break;
3180 }
3181
3182 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3183 {
3184 if ((i != minSizeBlk) &&
3185 Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3186 {
3187 if (Addr2BlockTypeWithinMemoryBudget(
3188 minSize,
3189 padSize[i],
3190 0,
3191 0,
3192 pIn->memoryBudget,
3193 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)
3194 {
3195 // Clear the block type if the memory waste is unacceptable
3196 allowedBlockSet.value &= ~(1u << (i - 1));
3197 }
3198 }
3199 }
3200
3201 // Remove VAR block type if bigger block type is allowed
3202 if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))
3203 {
3204 if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)
3205 {
3206 allowedBlockSet.var = 0;
3207 }
3208 }
3209
3210 // Remove linear block type if 2 or more block types are allowed
3211 if (IsPow2(allowedBlockSet.value) == FALSE)
3212 {
3213 allowedBlockSet.linear = 0;
3214 }
3215
3216 // Select the biggest allowed block type
3217 minSizeBlk = Log2(allowedBlockSet.value) + 1;
3218
3219 if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3220 {
3221 minSizeBlk = AddrBlockLinear;
3222 }
3223 }
3224
3225 switch (minSizeBlk)
3226 {
3227 case AddrBlockLinear:
3228 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3229 break;
3230
3231 case AddrBlockMicro:
3232 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3233 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
3234 break;
3235
3236 case AddrBlockThin4KB:
3237 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3238 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
3239 break;
3240
3241 case AddrBlockThick4KB:
3242 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3243 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
3244 break;
3245
3246 case AddrBlockThin64KB:
3247 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3248 Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
3249 break;
3250
3251 case AddrBlockThick64KB:
3252 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3253 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
3254 break;
3255
3256 case AddrBlockThinVar:
3257 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
3258 break;
3259
3260 default:
3261 ADDR_ASSERT_ALWAYS();
3262 allowedSwModeSet.value = 0;
3263 break;
3264 }
3265 }
3266
3267 // Block type should be determined.
3268 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3269
3270 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3271
3272 // Determine swizzle type if there are 2 or more swizzle type candidates
3273 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3274 {
3275 if (ElemLib::IsBlockCompressed(pIn->format))
3276 {
3277 if (allowedSwSet.sw_D)
3278 {
3279 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3280 }
3281 else if (allowedSwSet.sw_S)
3282 {
3283 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3284 }
3285 else
3286 {
3287 ADDR_ASSERT(allowedSwSet.sw_R);
3288 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3289 }
3290 }
3291 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3292 {
3293 if (allowedSwSet.sw_S)
3294 {
3295 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3296 }
3297 else if (allowedSwSet.sw_D)
3298 {
3299 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3300 }
3301 else
3302 {
3303 ADDR_ASSERT(allowedSwSet.sw_R);
3304 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3305 }
3306 }
3307 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
3308 {
3309 if (pIn->flags.color &&
3310 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
3311 allowedSwSet.sw_D)
3312 {
3313 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3314 }
3315 else if (allowedSwSet.sw_S)
3316 {
3317 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3318 }
3319 else if (allowedSwSet.sw_R)
3320 {
3321 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3322 }
3323 else
3324 {
3325 ADDR_ASSERT(allowedSwSet.sw_Z);
3326 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3327 }
3328 }
3329 else
3330 {
3331 if (allowedSwSet.sw_R)
3332 {
3333 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3334 }
3335 else if (allowedSwSet.sw_D)
3336 {
3337 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3338 }
3339 else if (allowedSwSet.sw_S)
3340 {
3341 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3342 }
3343 else
3344 {
3345 ADDR_ASSERT(allowedSwSet.sw_Z);
3346 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3347 }
3348 }
3349
3350 // Swizzle type should be determined.
3351 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3352 }
3353
3354 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
3355 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3356 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3357 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2(allowedSwModeSet.value));
3358 }
3359 }
3360 else
3361 {
3362 // Invalid combination...
3363 ADDR_ASSERT_ALWAYS();
3364 returnCode = ADDR_INVALIDPARAMS;
3365 }
3366 }
3367 else
3368 {
3369 // Invalid combination...
3370 ADDR_ASSERT_ALWAYS();
3371 returnCode = ADDR_INVALIDPARAMS;
3372 }
3373 }
3374
3375 return returnCode;
3376 }
3377
3378 /**
3379 ************************************************************************************************************************
3380 * Gfx10Lib::HwlGetPossibleSwizzleModes
3381 *
3382 * @brief
3383 * Returns a list of swizzle modes that are valid from the hardware's perspective for the client to choose from
3384 *
3385 * @return
3386 * ADDR_E_RETURNCODE
3387 ************************************************************************************************************************
3388 */
HwlGetPossibleSwizzleModes(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3389 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPossibleSwizzleModes(
3390 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
3391 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
3392 ) const
3393 {
3394 ADDR_E_RETURNCODE returnCode = ADDR_OK;
3395 UINT_32 bpp = pIn->bpp;
3396 UINT_32 width = Max(pIn->width, 1u);
3397 UINT_32 height = Max(pIn->height, 1u);
3398
3399 // Set format to INVALID will skip this conversion
3400 if (pIn->format != ADDR_FMT_INVALID)
3401 {
3402 ElemMode elemMode = ADDR_UNCOMPRESSED;
3403 UINT_32 expandX, expandY;
3404
3405 // Get compression/expansion factors and element mode which indicates compression/expansion
3406 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
3407 &elemMode,
3408 &expandX,
3409 &expandY);
3410
3411 UINT_32 basePitch = 0;
3412 GetElemLib()->AdjustSurfaceInfo(elemMode,
3413 expandX,
3414 expandY,
3415 &bpp,
3416 &basePitch,
3417 &width,
3418 &height);
3419 }
3420
3421 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3422 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3423 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
3424 const BOOL_32 msaa = numSamples > 1;
3425
3426 // Pre sanity check on non swizzle mode parameters
3427 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3428 localIn.flags = pIn->flags;
3429 localIn.resourceType = pIn->resourceType;
3430 localIn.format = pIn->format;
3431 localIn.bpp = bpp;
3432 localIn.width = width;
3433 localIn.height = height;
3434 localIn.numSlices = numSlices;
3435 localIn.numMipLevels = numMipLevels;
3436 localIn.numSamples = numSamples;
3437 localIn.numFrags = numSamples;
3438
3439 if (ValidateNonSwModeParams(&localIn))
3440 {
3441 // Forbid swizzle mode(s) by client setting
3442 ADDR2_SWMODE_SET allowedSwModeSet = {};
3443 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
3444 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
3445 allowedSwModeSet.value |=
3446 pIn->forbiddenBlock.macroThin4KB ? 0 :
3447 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
3448 allowedSwModeSet.value |=
3449 pIn->forbiddenBlock.macroThick4KB ? 0 :
3450 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
3451 allowedSwModeSet.value |=
3452 pIn->forbiddenBlock.macroThin64KB ? 0 :
3453 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
3454 allowedSwModeSet.value |=
3455 pIn->forbiddenBlock.macroThick64KB ? 0 :
3456 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
3457 allowedSwModeSet.value |=
3458 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
3459
3460 if (pIn->preferredSwSet.value != 0)
3461 {
3462 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
3463 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
3464 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
3465 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
3466 }
3467
3468 if (pIn->noXor)
3469 {
3470 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
3471 }
3472
3473 if (pIn->maxAlign > 0)
3474 {
3475 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
3476 {
3477 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
3478 }
3479
3480 if (pIn->maxAlign < Size64K)
3481 {
3482 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
3483 }
3484
3485 if (pIn->maxAlign < Size4K)
3486 {
3487 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
3488 }
3489
3490 if (pIn->maxAlign < Size256)
3491 {
3492 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
3493 }
3494 }
3495
3496 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3497 switch (pIn->resourceType)
3498 {
3499 case ADDR_RSRC_TEX_1D:
3500 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
3501 break;
3502
3503 case ADDR_RSRC_TEX_2D:
3504 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
3505 break;
3506
3507 case ADDR_RSRC_TEX_3D:
3508 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
3509
3510 if (pIn->flags.view3dAs2dArray)
3511 {
3512 // SW_LINEAR can be used for 3D thin images, including BCn image format.
3513 allowedSwModeSet.value &= Gfx10Rsrc3dViewAs2dSwModeMask;
3514 }
3515 break;
3516
3517 default:
3518 ADDR_ASSERT_ALWAYS();
3519 allowedSwModeSet.value = 0;
3520 break;
3521 }
3522
3523 if (ElemLib::IsBlockCompressed(pIn->format) ||
3524 ElemLib::IsMacroPixelPacked(pIn->format) ||
3525 (bpp > 64) ||
3526 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3527 {
3528 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
3529 }
3530
3531 if (pIn->format == ADDR_FMT_32_32_32)
3532 {
3533 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3534 }
3535
3536 if (msaa)
3537 {
3538 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
3539 }
3540
3541 if (pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask)
3542 {
3543 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3544 }
3545
3546 if (pIn->flags.display)
3547 {
3548 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
3549 }
3550
3551 if (pIn->flags.needEquation)
3552 {
3553 UINT_32 components = pIn->flags.allowExtEquation ? ADDR_MAX_EQUATION_COMP :
3554 ADDR_MAX_LEGACY_EQUATION_COMP;
3555 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
3556 }
3557
3558 if (pIn->flags.requireMetadata)
3559 {
3560 // Linear images can never be compressed
3561 allowedSwModeSet.value &= ~Gfx10LinearSwModeMask;
3562 if (pIn->flags.color)
3563 {
3564 // 256B formats must not be pipe-aligned (can't use in CB)
3565 allowedSwModeSet.value &= ~(Gfx10Blk256BSwModeMask);
3566 // D/S formats must not be pipe-aligned
3567 allowedSwModeSet.value &= ~(Gfx10DisplaySwModeMask | Gfx10StandardSwModeMask);
3568 }
3569 }
3570
3571 if (allowedSwModeSet.value != 0)
3572 {
3573 #if DEBUG
3574 // Post sanity check, at least AddrLib should accept the output generated by its own
3575 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3576
3577 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3578 {
3579 if (validateSwModeSet & 1)
3580 {
3581 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3582 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3583 }
3584
3585 validateSwModeSet >>= 1;
3586 }
3587 #endif
3588
3589 pOut->resourceType = pIn->resourceType;
3590 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3591
3592 if (pOut->clientPreferredSwSet.value == 0)
3593 {
3594 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3595 }
3596
3597 pOut->validSwModeSet = allowedSwModeSet;
3598 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
3599 }
3600 else
3601 {
3602 // Invalid combination...
3603 ADDR_ASSERT_ALWAYS();
3604 returnCode = ADDR_INVALIDPARAMS;
3605 }
3606 }
3607 else
3608 {
3609 // Invalid combination...
3610 ADDR_ASSERT_ALWAYS();
3611 returnCode = ADDR_INVALIDPARAMS;
3612 }
3613
3614 return returnCode;
3615 }
3616
3617 /**
3618 ************************************************************************************************************************
3619 * Gfx10Lib::ComputeStereoInfo
3620 *
3621 * @brief
3622 * Compute height alignment and right eye pipeBankXor for stereo surface
3623 *
3624 * @return
3625 * Error code
3626 *
3627 ************************************************************************************************************************
3628 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const3629 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3630 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
3631 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
3632 UINT_32* pRightXor ///< Right eye xor
3633 ) const
3634 {
3635 ADDR_E_RETURNCODE ret = ADDR_OK;
3636
3637 *pRightXor = 0;
3638
3639 if (IsNonPrtXor(pIn->swizzleMode))
3640 {
3641 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3642 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3643 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3644 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3645 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3646
3647 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3648 {
3649 UINT_32 yMax = 0;
3650 UINT_32 yPosMask = 0;
3651
3652 // First get "max y bit"
3653 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3654 {
3655 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3656
3657 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3658 (m_equationTable[eqIndex].addr[i].index > yMax))
3659 {
3660 yMax = m_equationTable[eqIndex].addr[i].index;
3661 }
3662
3663 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3664 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3665 (m_equationTable[eqIndex].xor1[i].index > yMax))
3666 {
3667 yMax = m_equationTable[eqIndex].xor1[i].index;
3668 }
3669
3670 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3671 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3672 (m_equationTable[eqIndex].xor2[i].index > yMax))
3673 {
3674 yMax = m_equationTable[eqIndex].xor2[i].index;
3675 }
3676 }
3677
3678 // Then loop again for populating a position mask of "max Y bit"
3679 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3680 {
3681 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3682 (m_equationTable[eqIndex].addr[i].index == yMax))
3683 {
3684 yPosMask |= 1u << i;
3685 }
3686 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3687 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3688 (m_equationTable[eqIndex].xor1[i].index == yMax))
3689 {
3690 yPosMask |= 1u << i;
3691 }
3692 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3693 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3694 (m_equationTable[eqIndex].xor2[i].index == yMax))
3695 {
3696 yPosMask |= 1u << i;
3697 }
3698 }
3699
3700 const UINT_32 additionalAlign = 1 << yMax;
3701
3702 if (additionalAlign >= *pAlignY)
3703 {
3704 *pAlignY = additionalAlign;
3705
3706 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3707
3708 if ((alignedHeight >> yMax) & 1)
3709 {
3710 *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3711 }
3712 }
3713 }
3714 else
3715 {
3716 ret = ADDR_INVALIDPARAMS;
3717 }
3718 }
3719
3720 return ret;
3721 }
3722
3723 /**
3724 ************************************************************************************************************************
3725 * Gfx10Lib::HwlComputeSurfaceInfoTiled
3726 *
3727 * @brief
3728 * Internal function to calculate alignment for tiled surface
3729 *
3730 * @return
3731 * ADDR_E_RETURNCODE
3732 ************************************************************************************************************************
3733 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3734 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3735 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3736 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3737 ) const
3738 {
3739 ADDR_E_RETURNCODE ret;
3740
3741 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3742 pOut->mipChainPitch = 0;
3743 pOut->mipChainHeight = 0;
3744 pOut->mipChainSlice = 0;
3745 pOut->epitchIsHeight = FALSE;
3746
3747 // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3748 pOut->mipChainInTail = FALSE;
3749 pOut->firstMipIdInTail = pIn->numMipLevels;
3750
3751 if (IsBlock256b(pIn->swizzleMode))
3752 {
3753 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3754 }
3755 else
3756 {
3757 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3758 }
3759
3760 return ret;
3761 }
3762
3763 /**
3764 ************************************************************************************************************************
3765 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
3766 *
3767 * @brief
3768 * Internal function to calculate alignment for micro tiled surface
3769 *
3770 * @return
3771 * ADDR_E_RETURNCODE
3772 ************************************************************************************************************************
3773 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3774 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3775 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3776 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3777 ) const
3778 {
3779 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3780 &pOut->blockHeight,
3781 &pOut->blockSlices,
3782 pIn->bpp,
3783 pIn->numFrags,
3784 pIn->resourceType,
3785 pIn->swizzleMode);
3786
3787 if (ret == ADDR_OK)
3788 {
3789 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3790
3791 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3792 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3793 pOut->numSlices = pIn->numSlices;
3794 pOut->baseAlign = blockSize;
3795
3796 if (pIn->numMipLevels > 1)
3797 {
3798 const UINT_32 mip0Width = pIn->width;
3799 const UINT_32 mip0Height = pIn->height;
3800 UINT_64 mipSliceSize = 0;
3801
3802 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3803 {
3804 UINT_32 mipWidth, mipHeight;
3805
3806 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3807
3808 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
3809 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3810
3811 if (pOut->pMipInfo != NULL)
3812 {
3813 pOut->pMipInfo[i].pitch = mipActualWidth;
3814 pOut->pMipInfo[i].height = mipActualHeight;
3815 pOut->pMipInfo[i].depth = 1;
3816 pOut->pMipInfo[i].offset = mipSliceSize;
3817 pOut->pMipInfo[i].mipTailOffset = 0;
3818 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3819 }
3820
3821 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3822 }
3823
3824 pOut->sliceSize = mipSliceSize;
3825 pOut->surfSize = mipSliceSize * pOut->numSlices;
3826 }
3827 else
3828 {
3829 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3830 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3831
3832 if (pOut->pMipInfo != NULL)
3833 {
3834 pOut->pMipInfo[0].pitch = pOut->pitch;
3835 pOut->pMipInfo[0].height = pOut->height;
3836 pOut->pMipInfo[0].depth = 1;
3837 pOut->pMipInfo[0].offset = 0;
3838 pOut->pMipInfo[0].mipTailOffset = 0;
3839 pOut->pMipInfo[0].macroBlockOffset = 0;
3840 }
3841 }
3842
3843 }
3844
3845 return ret;
3846 }
3847
3848 /**
3849 ************************************************************************************************************************
3850 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
3851 *
3852 * @brief
3853 * Internal function to calculate alignment for macro tiled surface
3854 *
3855 * @return
3856 * ADDR_E_RETURNCODE
3857 ************************************************************************************************************************
3858 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3859 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3860 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3861 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3862 ) const
3863 {
3864 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3865 &pOut->blockHeight,
3866 &pOut->blockSlices,
3867 pIn->bpp,
3868 pIn->numFrags,
3869 pIn->resourceType,
3870 pIn->swizzleMode);
3871
3872 if (returnCode == ADDR_OK)
3873 {
3874 UINT_32 heightAlign = pOut->blockHeight;
3875
3876 if (pIn->flags.qbStereo)
3877 {
3878 UINT_32 rightXor = 0;
3879
3880 returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3881
3882 if (returnCode == ADDR_OK)
3883 {
3884 pOut->pStereoInfo->rightSwizzle = rightXor;
3885 }
3886 }
3887
3888 if (returnCode == ADDR_OK)
3889 {
3890 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3891 const UINT_32 blockSize = 1 << blockSizeLog2;
3892
3893 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3894 pOut->height = PowTwoAlign(pIn->height, heightAlign);
3895 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3896 pOut->baseAlign = blockSize;
3897
3898 if (pIn->numMipLevels > 1)
3899 {
3900 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3901 pIn->swizzleMode,
3902 pOut->blockWidth,
3903 pOut->blockHeight,
3904 pOut->blockSlices);
3905 const UINT_32 mip0Width = pIn->width;
3906 const UINT_32 mip0Height = pIn->height;
3907 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3908 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3909 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3910 const UINT_32 index = Log2(pIn->bpp >> 3);
3911 UINT_32 firstMipInTail = pIn->numMipLevels;
3912 UINT_64 mipChainSliceSize = 0;
3913 UINT_64 mipSize[MaxMipLevels];
3914 UINT_64 mipSliceSize[MaxMipLevels];
3915
3916 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
3917 Dim3d fixedTailMaxDim = tailMaxDim;
3918
3919 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3920 {
3921 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3922 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3923 }
3924
3925 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3926 {
3927 UINT_32 mipWidth, mipHeight, mipDepth;
3928
3929 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3930
3931 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3932 {
3933 firstMipInTail = i;
3934 mipChainSliceSize += blockSize / pOut->blockSlices;
3935 break;
3936 }
3937 else
3938 {
3939 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3940 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3941 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3942 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3943
3944 mipSize[i] = sliceSize * depth;
3945 mipSliceSize[i] = sliceSize * pOut->blockSlices;
3946 mipChainSliceSize += sliceSize;
3947
3948 if (pOut->pMipInfo != NULL)
3949 {
3950 pOut->pMipInfo[i].pitch = pitch;
3951 pOut->pMipInfo[i].height = height;
3952 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3953 }
3954 }
3955 }
3956
3957 pOut->sliceSize = mipChainSliceSize;
3958 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3959 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3960 pOut->firstMipIdInTail = firstMipInTail;
3961
3962 if (pOut->pMipInfo != NULL)
3963 {
3964 UINT_64 offset = 0;
3965 UINT_64 macroBlkOffset = 0;
3966 UINT_32 tailMaxDepth = 0;
3967
3968 if (firstMipInTail != pIn->numMipLevels)
3969 {
3970 UINT_32 mipWidth, mipHeight;
3971
3972 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3973 &mipWidth, &mipHeight, &tailMaxDepth);
3974
3975 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3976 macroBlkOffset = blockSize;
3977 }
3978
3979 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3980 {
3981 pOut->pMipInfo[i].offset = offset;
3982 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3983 pOut->pMipInfo[i].mipTailOffset = 0;
3984
3985 offset += mipSize[i];
3986 macroBlkOffset += mipSliceSize[i];
3987 }
3988
3989 UINT_32 pitch = tailMaxDim.w;
3990 UINT_32 height = tailMaxDim.h;
3991 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3992
3993 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3994
3995 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3996 {
3997 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3998 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3999
4000 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
4001 pOut->pMipInfo[i].mipTailOffset = mipOffset;
4002 pOut->pMipInfo[i].macroBlockOffset = 0;
4003
4004 pOut->pMipInfo[i].pitch = pitch;
4005 pOut->pMipInfo[i].height = height;
4006 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
4007
4008 UINT_32 mipX = ((mipOffset >> 9) & 1) |
4009 ((mipOffset >> 10) & 2) |
4010 ((mipOffset >> 11) & 4) |
4011 ((mipOffset >> 12) & 8) |
4012 ((mipOffset >> 13) & 16) |
4013 ((mipOffset >> 14) & 32);
4014 UINT_32 mipY = ((mipOffset >> 8) & 1) |
4015 ((mipOffset >> 9) & 2) |
4016 ((mipOffset >> 10) & 4) |
4017 ((mipOffset >> 11) & 8) |
4018 ((mipOffset >> 12) & 16) |
4019 ((mipOffset >> 13) & 32);
4020
4021 if (blockSizeLog2 & 1)
4022 {
4023 const UINT_32 temp = mipX;
4024 mipX = mipY;
4025 mipY = temp;
4026
4027 if (index & 1)
4028 {
4029 mipY = (mipY << 1) | (mipX & 1);
4030 mipX = mipX >> 1;
4031 }
4032 }
4033
4034 if (isThin)
4035 {
4036 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
4037 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
4038 pOut->pMipInfo[i].mipTailCoordZ = 0;
4039
4040 pitch = Max(pitch >> 1, Block256_2d[index].w);
4041 height = Max(height >> 1, Block256_2d[index].h);
4042 }
4043 else
4044 {
4045 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
4046 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
4047 pOut->pMipInfo[i].mipTailCoordZ = 0;
4048
4049 pitch = Max(pitch >> 1, Block256_3d[index].w);
4050 height = Max(height >> 1, Block256_3d[index].h);
4051 }
4052 }
4053 }
4054 }
4055 else
4056 {
4057 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
4058 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4059
4060 if (pOut->pMipInfo != NULL)
4061 {
4062 pOut->pMipInfo[0].pitch = pOut->pitch;
4063 pOut->pMipInfo[0].height = pOut->height;
4064 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
4065 pOut->pMipInfo[0].offset = 0;
4066 pOut->pMipInfo[0].mipTailOffset = 0;
4067 pOut->pMipInfo[0].macroBlockOffset = 0;
4068 pOut->pMipInfo[0].mipTailCoordX = 0;
4069 pOut->pMipInfo[0].mipTailCoordY = 0;
4070 pOut->pMipInfo[0].mipTailCoordZ = 0;
4071 }
4072 }
4073 }
4074 }
4075
4076 return returnCode;
4077 }
4078
4079 /**
4080 ************************************************************************************************************************
4081 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
4082 *
4083 * @brief
4084 * Internal function to calculate address from coord for tiled swizzle surface
4085 *
4086 * @return
4087 * ADDR_E_RETURNCODE
4088 ************************************************************************************************************************
4089 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4090 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
4091 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4092 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4093 ) const
4094 {
4095 ADDR_E_RETURNCODE ret;
4096
4097 if (IsBlock256b(pIn->swizzleMode))
4098 {
4099 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
4100 }
4101 else
4102 {
4103 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
4104 }
4105
4106 return ret;
4107 }
4108
4109 /**
4110 ************************************************************************************************************************
4111 * Gfx10Lib::HwlCopyMemToSurface
4112 *
4113 * @brief
4114 * Copy multiple regions from memory to a non-linear surface.
4115 *
4116 * @return
4117 * Error or success.
4118 ************************************************************************************************************************
4119 */
HwlCopyMemToSurface(const ADDR2_COPY_MEMSURFACE_INPUT * pIn,const ADDR2_COPY_MEMSURFACE_REGION * pRegions,UINT_32 regionCount) const4120 ADDR_E_RETURNCODE Gfx10Lib::HwlCopyMemToSurface(
4121 const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
4122 const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
4123 UINT_32 regionCount
4124 ) const
4125 {
4126 // Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
4127 // optimized for a particular micro-swizzle mode if available.
4128 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4129 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4130 ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
4131 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4132 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4133
4134 if (pIn->numSamples > 1)
4135 {
4136 // TODO: MSAA
4137 returnCode = ADDR_NOTIMPLEMENTED;
4138 }
4139 if (IsBlockVariable(pIn->swizzleMode))
4140 {
4141 // TODO: larger LUTs for worst-case var swizzle.
4142 returnCode = ADDR_NOTIMPLEMENTED;
4143 }
4144
4145 localIn.size = sizeof(localIn);
4146 localIn.flags = pIn->flags;
4147 localIn.swizzleMode = pIn->swizzleMode;
4148 localIn.resourceType = pIn->resourceType;
4149 localIn.format = pIn->format;
4150 localIn.bpp = pIn->bpp;
4151 localIn.width = Max(pIn->unAlignedDims.width, 1u);
4152 localIn.height = Max(pIn->unAlignedDims.height, 1u);
4153 localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
4154 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4155 localIn.numSamples = Max(pIn->numSamples, 1u);
4156
4157 localOut.size = sizeof(localOut);
4158 localOut.pMipInfo = mipInfo;
4159
4160 if (returnCode == ADDR_OK)
4161 {
4162 returnCode = ComputeSurfaceInfo(&localIn, &localOut);
4163 }
4164 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4165 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4166 pIn->resourceType,
4167 Log2(pIn->bpp >> 3),
4168 pIn->numSamples);
4169
4170 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT] = {};
4171 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4172 ADDR_EXTENT3D blockExtent = {
4173 localOut.blockWidth,
4174 localOut.blockHeight,
4175 localOut.blockSlices
4176 };
4177
4178 LutAddresser addresser = LutAddresser();
4179 addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
4180 UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyMemImgFunc();
4181 if (pfnCopyUnaligned == nullptr)
4182 {
4183 ADDR_ASSERT_ALWAYS();
4184 returnCode = ADDR_INVALIDPARAMS;
4185 }
4186
4187 if (returnCode == ADDR_OK)
4188 {
4189 for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
4190 {
4191 const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
4192 const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
4193 UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
4194 UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
4195
4196 UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
4197 UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
4198 UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
4199
4200 for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
4201 {
4202 // The copy functions take the base address of the hardware slice, not the logical slice. Those are
4203 // not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
4204 // for unaligned copies.
4205 UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
4206 UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
4207
4208 UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
4209 UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
4210
4211 ADDR_COORD2D sliceOrigin = { xStart, yStart };
4212 ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
4213
4214 pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
4215 VoidPtrInc(pCurRegion->pMem, memOffset),
4216 pCurRegion->memRowPitch,
4217 yBlks,
4218 sliceOrigin,
4219 sliceExtent,
4220 sliceXor,
4221 addresser);
4222 }
4223 }
4224 }
4225 return returnCode;
4226 }
4227
4228 /**
4229 ************************************************************************************************************************
4230 * Gfx10Lib::HwlCopySurfaceToMem
4231 *
4232 * @brief
4233 * Copy multiple regions from a non-linear surface to memory.
4234 *
4235 * @return
4236 * Error or success.
4237 ************************************************************************************************************************
4238 */
HwlCopySurfaceToMem(const ADDR2_COPY_MEMSURFACE_INPUT * pIn,const ADDR2_COPY_MEMSURFACE_REGION * pRegions,UINT_32 regionCount) const4239 ADDR_E_RETURNCODE Gfx10Lib::HwlCopySurfaceToMem(
4240 const ADDR2_COPY_MEMSURFACE_INPUT* pIn,
4241 const ADDR2_COPY_MEMSURFACE_REGION* pRegions,
4242 UINT_32 regionCount
4243 ) const
4244 {
4245 // Copy memory to tiled surface. We will use the 'swizzler' object to dispatch to a version of the copy routine
4246 // optimized for a particular micro-swizzle mode if available.
4247 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4248 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4249 ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {{0}};
4250 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4251 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4252
4253 if (pIn->numSamples > 1)
4254 {
4255 // TODO: MSAA
4256 returnCode = ADDR_NOTIMPLEMENTED;
4257 }
4258 if (IsBlockVariable(pIn->swizzleMode))
4259 {
4260 // TODO: larger LUTs for worst-case var swizzle.
4261 returnCode = ADDR_NOTIMPLEMENTED;
4262 }
4263
4264 localIn.size = sizeof(localIn);
4265 localIn.flags = pIn->flags;
4266 localIn.swizzleMode = pIn->swizzleMode;
4267 localIn.resourceType = pIn->resourceType;
4268 localIn.format = pIn->format;
4269 localIn.bpp = pIn->bpp;
4270 localIn.width = Max(pIn->unAlignedDims.width, 1u);
4271 localIn.height = Max(pIn->unAlignedDims.height, 1u);
4272 localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
4273 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4274 localIn.numSamples = Max(pIn->numSamples, 1u);
4275
4276 localOut.size = sizeof(localOut);
4277 localOut.pMipInfo = mipInfo;
4278
4279 if (returnCode == ADDR_OK)
4280 {
4281 returnCode = ComputeSurfaceInfo(&localIn, &localOut);
4282 }
4283 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4284 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4285 pIn->resourceType,
4286 Log2(pIn->bpp >> 3),
4287 pIn->numSamples);
4288
4289 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT] = {};
4290 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4291 ADDR_EXTENT3D blockExtent = {
4292 localOut.blockWidth,
4293 localOut.blockHeight,
4294 localOut.blockSlices
4295 };
4296
4297 LutAddresser addresser = LutAddresser();
4298 addresser.Init(fullSwizzlePattern, ADDR_MAX_EQUATION_BIT, blockExtent, blkSizeLog2);
4299 UnalignedCopyMemImgFunc pfnCopyUnaligned = addresser.GetCopyImgMemFunc();
4300 if (pfnCopyUnaligned == nullptr)
4301 {
4302 ADDR_ASSERT_ALWAYS();
4303 returnCode = ADDR_INVALIDPARAMS;
4304 }
4305
4306 if (returnCode == ADDR_OK)
4307 {
4308 for (UINT_32 regionIdx = 0; regionIdx < regionCount; regionIdx++)
4309 {
4310 const ADDR2_COPY_MEMSURFACE_REGION* pCurRegion = &pRegions[regionIdx];
4311 const ADDR2_MIP_INFO* pMipInfo = &mipInfo[pCurRegion->mipId];
4312 UINT_64 mipOffset = pIn->singleSubres ? 0 : pMipInfo->macroBlockOffset;
4313 UINT_32 yBlks = pMipInfo->pitch / localOut.blockWidth;
4314
4315 UINT_32 xStart = pCurRegion->x + pMipInfo->mipTailCoordX;
4316 UINT_32 yStart = pCurRegion->y + pMipInfo->mipTailCoordY;
4317 UINT_32 sliceStart = pCurRegion->slice + pMipInfo->mipTailCoordZ;
4318
4319 for (UINT_32 slice = sliceStart; slice < (sliceStart + pCurRegion->copyDims.depth); slice++)
4320 {
4321 // The copy functions take the base address of the hardware slice, not the logical slice. Those are
4322 // not the same thing in 3D swizzles. Logical slices within 3D swizzles are handled by sliceXor
4323 // for unaligned copies.
4324 UINT_32 sliceBlkStart = PowTwoAlignDown(slice, localOut.blockSlices);
4325 UINT_32 sliceXor = pIn->pbXor ^ addresser.GetAddressZ(slice);
4326
4327 UINT_64 memOffset = ((slice - pCurRegion->slice) * pCurRegion->memSlicePitch);
4328 UINT_64 imgOffset = mipOffset + (sliceBlkStart * localOut.sliceSize);
4329
4330 ADDR_COORD2D sliceOrigin = { xStart, yStart };
4331 ADDR_EXTENT2D sliceExtent = { pCurRegion->copyDims.width, pCurRegion->copyDims.height };
4332
4333 pfnCopyUnaligned(VoidPtrInc(pIn->pMappedSurface, imgOffset),
4334 VoidPtrInc(pCurRegion->pMem, memOffset),
4335 pCurRegion->memRowPitch,
4336 yBlks,
4337 sliceOrigin,
4338 sliceExtent,
4339 sliceXor,
4340 addresser);
4341 }
4342 }
4343 }
4344 return returnCode;
4345 }
4346
4347 /**
4348 ************************************************************************************************************************
4349 * Gfx10Lib::ComputeOffsetFromEquation
4350 *
4351 * @brief
4352 * Compute offset from equation
4353 *
4354 * @return
4355 * Offset
4356 ************************************************************************************************************************
4357 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const4358 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
4359 const ADDR_EQUATION* pEq, ///< Equation
4360 UINT_32 x, ///< x coord in bytes
4361 UINT_32 y, ///< y coord in pixel
4362 UINT_32 z ///< z coord in slice
4363 ) const
4364 {
4365 UINT_32 offset = 0;
4366
4367 for (UINT_32 i = 0; i < pEq->numBits; i++)
4368 {
4369 UINT_32 v = 0;
4370
4371 for (UINT_32 c = 0; c < pEq->numBitComponents; c++)
4372 {
4373 if (pEq->comps[c][i].valid)
4374 {
4375 if (pEq->comps[c][i].channel == 0)
4376 {
4377 v ^= (x >> pEq->comps[c][i].index) & 1;
4378 }
4379 else if (pEq->comps[c][i].channel == 1)
4380 {
4381 v ^= (y >> pEq->comps[c][i].index) & 1;
4382 }
4383 else
4384 {
4385 ADDR_ASSERT(pEq->comps[c][i].channel == 2);
4386 v ^= (z >> pEq->comps[c][i].index) & 1;
4387 }
4388 }
4389 }
4390
4391 offset |= (v << i);
4392 }
4393
4394 return offset;
4395 }
4396
4397 /**
4398 ************************************************************************************************************************
4399 * Gfx10Lib::GetSwizzlePatternInfo
4400 *
4401 * @brief
4402 * Get swizzle pattern
4403 *
4404 * @return
4405 * Swizzle pattern information
4406 ************************************************************************************************************************
4407 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const4408 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
4409 AddrSwizzleMode swizzleMode, ///< Swizzle mode
4410 AddrResourceType resourceType, ///< Resource type
4411 UINT_32 elemLog2, ///< Element size in bytes log2
4412 UINT_32 numFrag ///< Number of fragment
4413 ) const
4414 {
4415 // Now elemLog2 is going to be used to access the correct index insode of the pPatInfo array so we will start from
4416 // the right location
4417 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
4418 const ADDR_SW_PATINFO* patInfo = NULL;
4419 const UINT_32 swizzleMask = 1 << swizzleMode;
4420
4421 if (IsBlockVariable(swizzleMode))
4422 {
4423 if (m_blockVarSizeLog2 != 0)
4424 {
4425 ADDR_ASSERT(m_settings.supportRbPlus);
4426
4427 if (IsRtOptSwizzle(swizzleMode))
4428 {
4429 if (numFrag == 1)
4430 {
4431 patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
4432 }
4433 else if (numFrag == 2)
4434 {
4435 patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
4436 }
4437 else if (numFrag == 4)
4438 {
4439 patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
4440 }
4441 else
4442 {
4443 ADDR_ASSERT(numFrag == 8);
4444 patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
4445 }
4446 }
4447 else if (IsZOrderSwizzle(swizzleMode))
4448 {
4449 if (numFrag == 1)
4450 {
4451 patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
4452 }
4453 else if (numFrag == 2)
4454 {
4455 patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
4456 }
4457 else if (numFrag == 4)
4458 {
4459 patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
4460 }
4461 else
4462 {
4463 ADDR_ASSERT(numFrag == 8);
4464 patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
4465 }
4466 }
4467 }
4468 }
4469 else if (IsLinear(swizzleMode) == FALSE)
4470 {
4471 if (resourceType == ADDR_RSRC_TEX_3D)
4472 {
4473 ADDR_ASSERT(numFrag == 1);
4474
4475 if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
4476 {
4477 if (IsRtOptSwizzle(swizzleMode))
4478 {
4479 if (swizzleMode == ADDR_SW_4KB_R_X)
4480 {
4481 patInfo = NULL;
4482 }
4483 else
4484 {
4485 patInfo = m_settings.supportRbPlus ?
4486 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4487 }
4488 }
4489 else if (IsZOrderSwizzle(swizzleMode))
4490 {
4491 patInfo = m_settings.supportRbPlus ?
4492 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4493 }
4494 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4495 {
4496 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
4497 patInfo = m_settings.supportRbPlus ?
4498 GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
4499 }
4500 else
4501 {
4502 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
4503
4504 if (IsBlock4kb(swizzleMode))
4505 {
4506 if (swizzleMode == ADDR_SW_4KB_S)
4507 {
4508 patInfo = m_settings.supportRbPlus ?
4509 GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
4510 }
4511 else
4512 {
4513 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4514 patInfo = m_settings.supportRbPlus ?
4515 GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
4516 }
4517 }
4518 else
4519 {
4520 if (swizzleMode == ADDR_SW_64KB_S)
4521 {
4522 patInfo = m_settings.supportRbPlus ?
4523 GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
4524 }
4525 else if (swizzleMode == ADDR_SW_64KB_S_X)
4526 {
4527 patInfo = m_settings.supportRbPlus ?
4528 GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
4529 }
4530 else
4531 {
4532 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4533 patInfo = m_settings.supportRbPlus ?
4534 GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
4535 }
4536 }
4537 }
4538 }
4539 }
4540 else
4541 {
4542 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
4543 {
4544 if (IsBlock256b(swizzleMode))
4545 {
4546 if (swizzleMode == ADDR_SW_256B_S)
4547 {
4548 patInfo = m_settings.supportRbPlus ?
4549 GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
4550 }
4551 else
4552 {
4553 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
4554 patInfo = m_settings.supportRbPlus ?
4555 GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
4556 }
4557 }
4558 else if (IsBlock4kb(swizzleMode))
4559 {
4560 if (IsStandardSwizzle(resourceType, swizzleMode))
4561 {
4562 if (swizzleMode == ADDR_SW_4KB_S)
4563 {
4564 patInfo = m_settings.supportRbPlus ?
4565 GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
4566 }
4567 else
4568 {
4569 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4570 patInfo = m_settings.supportRbPlus ?
4571 GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
4572 }
4573 }
4574 else
4575 {
4576 if (swizzleMode == ADDR_SW_4KB_D)
4577 {
4578 patInfo = m_settings.supportRbPlus ?
4579 GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
4580 }
4581 else if (swizzleMode == ADDR_SW_4KB_R_X)
4582 {
4583 patInfo = NULL;
4584 }
4585 else
4586 {
4587 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
4588 patInfo = m_settings.supportRbPlus ?
4589 GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
4590 }
4591 }
4592 }
4593 else
4594 {
4595 if (IsRtOptSwizzle(swizzleMode))
4596 {
4597 if (numFrag == 1)
4598 {
4599 patInfo = m_settings.supportRbPlus ?
4600 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4601 }
4602 else if (numFrag == 2)
4603 {
4604 patInfo = m_settings.supportRbPlus ?
4605 GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
4606 }
4607 else if (numFrag == 4)
4608 {
4609 patInfo = m_settings.supportRbPlus ?
4610 GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
4611 }
4612 else
4613 {
4614 ADDR_ASSERT(numFrag == 8);
4615 patInfo = m_settings.supportRbPlus ?
4616 GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
4617 }
4618 }
4619 else if (IsZOrderSwizzle(swizzleMode))
4620 {
4621 if (numFrag == 1)
4622 {
4623 patInfo = m_settings.supportRbPlus ?
4624 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4625 }
4626 else if (numFrag == 2)
4627 {
4628 patInfo = m_settings.supportRbPlus ?
4629 GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
4630 }
4631 else if (numFrag == 4)
4632 {
4633 patInfo = m_settings.supportRbPlus ?
4634 GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
4635 }
4636 else
4637 {
4638 ADDR_ASSERT(numFrag == 8);
4639 patInfo = m_settings.supportRbPlus ?
4640 GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
4641 }
4642 }
4643 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4644 {
4645 if (swizzleMode == ADDR_SW_64KB_D)
4646 {
4647 patInfo = m_settings.supportRbPlus ?
4648 GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
4649 }
4650 else if (swizzleMode == ADDR_SW_64KB_D_X)
4651 {
4652 patInfo = m_settings.supportRbPlus ?
4653 GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
4654 }
4655 else
4656 {
4657 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
4658 patInfo = m_settings.supportRbPlus ?
4659 GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
4660 }
4661 }
4662 else
4663 {
4664 if (swizzleMode == ADDR_SW_64KB_S)
4665 {
4666 patInfo = m_settings.supportRbPlus ?
4667 GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
4668 }
4669 else if (swizzleMode == ADDR_SW_64KB_S_X)
4670 {
4671 patInfo = m_settings.supportRbPlus ?
4672 GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
4673 }
4674 else
4675 {
4676 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4677 patInfo = m_settings.supportRbPlus ?
4678 GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
4679 }
4680 }
4681 }
4682 }
4683 }
4684 }
4685
4686 return (patInfo != NULL) ? &patInfo[index] : NULL;
4687 }
4688
4689 /**
4690 ************************************************************************************************************************
4691 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
4692 *
4693 * @brief
4694 * Internal function to calculate address from coord for micro tiled swizzle surface
4695 *
4696 * @return
4697 * ADDR_E_RETURNCODE
4698 ************************************************************************************************************************
4699 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4700 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
4701 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4702 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4703 ) const
4704 {
4705 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
4706 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4707 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4708 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4709
4710 localIn.swizzleMode = pIn->swizzleMode;
4711 localIn.flags = pIn->flags;
4712 localIn.resourceType = pIn->resourceType;
4713 localIn.bpp = pIn->bpp;
4714 localIn.width = Max(pIn->unalignedWidth, 1u);
4715 localIn.height = Max(pIn->unalignedHeight, 1u);
4716 localIn.numSlices = Max(pIn->numSlices, 1u);
4717 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4718 localIn.numSamples = Max(pIn->numSamples, 1u);
4719 localIn.numFrags = Max(pIn->numFrags, 1u);
4720 localOut.pMipInfo = mipInfo;
4721
4722 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
4723
4724 if (ret == ADDR_OK)
4725 {
4726 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4727 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
4728 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4729 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
4730
4731 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4732 {
4733 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4734 const UINT_32 yb = pIn->y / localOut.blockHeight;
4735 const UINT_32 xb = pIn->x / localOut.blockWidth;
4736 const UINT_32 blockIndex = yb * pb + xb;
4737 const UINT_32 blockSize = 256;
4738 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4739 pIn->x << elemLog2,
4740 pIn->y,
4741 0);
4742 pOut->addr = localOut.sliceSize * pIn->slice +
4743 mipInfo[pIn->mipId].macroBlockOffset +
4744 (blockIndex * blockSize) +
4745 blk256Offset;
4746 }
4747 else
4748 {
4749 ret = ADDR_INVALIDPARAMS;
4750 }
4751 }
4752
4753 return ret;
4754 }
4755
4756 /**
4757 ************************************************************************************************************************
4758 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
4759 *
4760 * @brief
4761 * Internal function to calculate address from coord for macro tiled swizzle surface
4762 *
4763 * @return
4764 * ADDR_E_RETURNCODE
4765 ************************************************************************************************************************
4766 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4767 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4768 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4769 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4770 ) const
4771 {
4772 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
4773 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4774 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4775 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4776
4777 localIn.swizzleMode = pIn->swizzleMode;
4778 localIn.flags = pIn->flags;
4779 localIn.resourceType = pIn->resourceType;
4780 localIn.bpp = pIn->bpp;
4781 localIn.width = Max(pIn->unalignedWidth, 1u);
4782 localIn.height = Max(pIn->unalignedHeight, 1u);
4783 localIn.numSlices = Max(pIn->numSlices, 1u);
4784 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4785 localIn.numSamples = Max(pIn->numSamples, 1u);
4786 localIn.numFrags = Max(pIn->numFrags, 1u);
4787 localOut.pMipInfo = mipInfo;
4788
4789 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4790
4791 if (ret == ADDR_OK)
4792 {
4793 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4794 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4795 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
4796 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
4797 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4798 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4799 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4800
4801 if (localIn.numFrags > 1)
4802 {
4803 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4804 pIn->resourceType,
4805 elemLog2,
4806 localIn.numFrags);
4807
4808 if (pPatInfo != NULL)
4809 {
4810 const UINT_32 pb = localOut.pitch / localOut.blockWidth;
4811 const UINT_32 yb = pIn->y / localOut.blockHeight;
4812 const UINT_32 xb = pIn->x / localOut.blockWidth;
4813 const UINT_64 blkIdx = yb * pb + xb;
4814
4815 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
4816 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4817
4818 const UINT_32 blkOffset =
4819 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4820 blkSizeLog2,
4821 pIn->x,
4822 pIn->y,
4823 pIn->slice,
4824 pIn->sample);
4825
4826 pOut->addr = (localOut.sliceSize * pIn->slice) +
4827 (blkIdx << blkSizeLog2) +
4828 (blkOffset ^ pipeBankXor);
4829 }
4830 else
4831 {
4832 ret = ADDR_INVALIDPARAMS;
4833 }
4834 }
4835 else
4836 {
4837 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4838 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4839 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4840
4841 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4842 {
4843 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4844 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
4845 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4846 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4847 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4848 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4849 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4850 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4851 const UINT_32 yb = pIn->y / localOut.blockHeight;
4852 const UINT_32 xb = pIn->x / localOut.blockWidth;
4853 const UINT_64 blkIdx = yb * pb + xb;
4854 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4855 x << elemLog2,
4856 y,
4857 z);
4858 pOut->addr = sliceSize * sliceId +
4859 mipInfo[pIn->mipId].macroBlockOffset +
4860 (blkIdx << blkSizeLog2) +
4861 (blkOffset ^ pipeBankXor);
4862 }
4863 else
4864 {
4865 ret = ADDR_INVALIDPARAMS;
4866 }
4867 }
4868 }
4869
4870 return ret;
4871 }
4872
4873 /**
4874 ************************************************************************************************************************
4875 * Gfx10Lib::HwlComputeMaxBaseAlignments
4876 *
4877 * @brief
4878 * Gets maximum alignments
4879 * @return
4880 * maximum alignments
4881 ************************************************************************************************************************
4882 */
HwlComputeMaxBaseAlignments() const4883 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4884 {
4885 return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4886 }
4887
4888 /**
4889 ************************************************************************************************************************
4890 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4891 *
4892 * @brief
4893 * Gets maximum alignments for metadata
4894 * @return
4895 * maximum alignments for metadata
4896 ************************************************************************************************************************
4897 */
HwlComputeMaxMetaBaseAlignments() const4898 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4899 {
4900 Dim3d metaBlk;
4901
4902 const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4903 {
4904 ADDR_SW_64KB_Z_X,
4905 m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4906 };
4907
4908 UINT_32 maxBaseAlignHtile = 0;
4909 UINT_32 maxBaseAlignCmask = 0;
4910
4911 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4912 {
4913 for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4914 {
4915 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4916 {
4917 // Max base alignment for Htile
4918 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4919 ADDR_RSRC_TEX_2D,
4920 ValidSwizzleModeForXmask[swIdx],
4921 bppLog2,
4922 numFragLog2,
4923 TRUE,
4924 &metaBlk);
4925
4926 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4927 }
4928 }
4929
4930 // Max base alignment for Cmask
4931 const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4932 ADDR_RSRC_TEX_2D,
4933 ValidSwizzleModeForXmask[swIdx],
4934 0,
4935 0,
4936 TRUE,
4937 &metaBlk);
4938
4939 maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4940 }
4941
4942 // Max base alignment for 2D Dcc
4943 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4944 {
4945 ADDR_SW_64KB_S_X,
4946 ADDR_SW_64KB_D_X,
4947 ADDR_SW_64KB_R_X,
4948 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4949 };
4950
4951 UINT_32 maxBaseAlignDcc2D = 0;
4952
4953 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4954 {
4955 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4956 {
4957 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4958 {
4959 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4960 ADDR_RSRC_TEX_2D,
4961 ValidSwizzleModeForDcc2D[swIdx],
4962 bppLog2,
4963 numFragLog2,
4964 TRUE,
4965 &metaBlk);
4966
4967 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4968 }
4969 }
4970 }
4971
4972 // Max base alignment for 3D Dcc
4973 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4974 {
4975 ADDR_SW_64KB_Z_X,
4976 ADDR_SW_64KB_S_X,
4977 ADDR_SW_64KB_D_X,
4978 ADDR_SW_64KB_R_X,
4979 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4980 };
4981
4982 UINT_32 maxBaseAlignDcc3D = 0;
4983
4984 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4985 {
4986 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4987 {
4988 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4989 ADDR_RSRC_TEX_3D,
4990 ValidSwizzleModeForDcc3D[swIdx],
4991 bppLog2,
4992 0,
4993 TRUE,
4994 &metaBlk);
4995
4996 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4997 }
4998 }
4999
5000 return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
5001 }
5002
5003 /**
5004 ************************************************************************************************************************
5005 * Gfx10Lib::GetMetaElementSizeLog2
5006 *
5007 * @brief
5008 * Gets meta data element size log2
5009 * @return
5010 * Meta data element size log2
5011 ************************************************************************************************************************
5012 */
GetMetaElementSizeLog2(Gfx10DataType dataType)5013 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
5014 Gfx10DataType dataType) ///< Data surface type
5015 {
5016 INT_32 elemSizeLog2 = 0;
5017
5018 if (dataType == Gfx10DataColor)
5019 {
5020 elemSizeLog2 = 0;
5021 }
5022 else if (dataType == Gfx10DataDepthStencil)
5023 {
5024 elemSizeLog2 = 2;
5025 }
5026 else
5027 {
5028 ADDR_ASSERT(dataType == Gfx10DataFmask);
5029 elemSizeLog2 = -1;
5030 }
5031
5032 return elemSizeLog2;
5033 }
5034
5035 /**
5036 ************************************************************************************************************************
5037 * Gfx10Lib::GetMetaCacheSizeLog2
5038 *
5039 * @brief
5040 * Gets meta data cache line size log2
5041 * @return
5042 * Meta data cache line size log2
5043 ************************************************************************************************************************
5044 */
GetMetaCacheSizeLog2(Gfx10DataType dataType)5045 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
5046 Gfx10DataType dataType) ///< Data surface type
5047 {
5048 INT_32 cacheSizeLog2 = 0;
5049
5050 if (dataType == Gfx10DataColor)
5051 {
5052 cacheSizeLog2 = 6;
5053 }
5054 else if (dataType == Gfx10DataDepthStencil)
5055 {
5056 cacheSizeLog2 = 8;
5057 }
5058 else
5059 {
5060 ADDR_ASSERT(dataType == Gfx10DataFmask);
5061 cacheSizeLog2 = 8;
5062 }
5063 return cacheSizeLog2;
5064 }
5065
5066 /**
5067 ************************************************************************************************************************
5068 * Gfx10Lib::HwlComputeSurfaceInfoLinear
5069 *
5070 * @brief
5071 * Internal function to calculate alignment for linear surface
5072 *
5073 * @return
5074 * ADDR_E_RETURNCODE
5075 ************************************************************************************************************************
5076 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const5077 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
5078 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
5079 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
5080 ) const
5081 {
5082 ADDR_E_RETURNCODE returnCode = ADDR_OK;
5083
5084 if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
5085 {
5086 returnCode = ADDR_INVALIDPARAMS;
5087 }
5088 else
5089 {
5090 const UINT_32 elementBytes = pIn->bpp >> 3;
5091 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
5092 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
5093 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
5094 UINT_32 actualHeight = pIn->height;
5095 UINT_64 sliceSize = 0;
5096
5097 if (pIn->numMipLevels > 1)
5098 {
5099 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
5100 {
5101 UINT_32 mipWidth, mipHeight;
5102
5103 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
5104
5105 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
5106
5107 if (pOut->pMipInfo != NULL)
5108 {
5109 pOut->pMipInfo[i].pitch = mipActualWidth;
5110 pOut->pMipInfo[i].height = mipHeight;
5111 pOut->pMipInfo[i].depth = mipDepth;
5112 pOut->pMipInfo[i].offset = sliceSize;
5113 pOut->pMipInfo[i].mipTailOffset = 0;
5114 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
5115 }
5116
5117 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
5118 }
5119 }
5120 else
5121 {
5122 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
5123
5124 if (returnCode == ADDR_OK)
5125 {
5126 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
5127
5128 if (pOut->pMipInfo != NULL)
5129 {
5130 pOut->pMipInfo[0].pitch = pitch;
5131 pOut->pMipInfo[0].height = actualHeight;
5132 pOut->pMipInfo[0].depth = mipDepth;
5133 pOut->pMipInfo[0].offset = 0;
5134 pOut->pMipInfo[0].mipTailOffset = 0;
5135 pOut->pMipInfo[0].macroBlockOffset = 0;
5136 }
5137 }
5138 }
5139
5140 if (returnCode == ADDR_OK)
5141 {
5142 pOut->pitch = pitch;
5143 pOut->height = actualHeight;
5144 pOut->numSlices = pIn->numSlices;
5145 pOut->sliceSize = sliceSize;
5146 pOut->surfSize = sliceSize * pOut->numSlices;
5147 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
5148 pOut->blockWidth = pitchAlign;
5149 pOut->blockHeight = 1;
5150 pOut->blockSlices = 1;
5151
5152 // Following members are useless on GFX10
5153 pOut->mipChainPitch = 0;
5154 pOut->mipChainHeight = 0;
5155 pOut->mipChainSlice = 0;
5156 pOut->epitchIsHeight = FALSE;
5157
5158 // Post calculation validate
5159 ADDR_ASSERT(pOut->sliceSize > 0);
5160 }
5161 }
5162
5163 return returnCode;
5164 }
5165
5166 } // V2
5167 } // Addr
5168