1 /*
2 ************************************************************************************************************************
3 *
4 * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE
23 *
24 ***********************************************************************************************************************/
25
26 /**
27 ************************************************************************************************************************
28 * @file gfx10addrlib.cpp
29 * @brief Contain the implementation for the Gfx10Lib class.
30 ************************************************************************************************************************
31 */
32
33 #include "gfx10addrlib.h"
34 #include "gfx10_gb_reg.h"
35
36 #include "amdgpu_asic_addr.h"
37
38 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40
41 namespace Addr
42 {
43 /**
44 ************************************************************************************************************************
45 * Gfx10HwlInit
46 *
47 * @brief
48 * Creates an Gfx10Lib object.
49 *
50 * @return
51 * Returns an Gfx10Lib object pointer.
52 ************************************************************************************************************************
53 */
Gfx10HwlInit(const Client * pClient)54 Addr::Lib* Gfx10HwlInit(const Client* pClient)
55 {
56 return V2::Gfx10Lib::CreateObj(pClient);
57 }
58
59 namespace V2
60 {
61
62 ////////////////////////////////////////////////////////////////////////////////////////////////////
63 // Static Const Member
64 ////////////////////////////////////////////////////////////////////////////////////////////////////
65
66 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
67 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
68 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR
69 {{0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_S
70 {{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_D
71 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
72
73 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
74 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_S
75 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_D
76 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
77
78 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
79 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_S
80 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_D
81 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
82
83 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
84 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
85 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
86 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
87
88 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
89 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_S_T
90 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_D_T
91 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
92
93 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
94 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_S_X
95 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_D_X
96 {{0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_4KB_R_X
97
98 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_Z_X
99 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_S_X
100 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_D_X
101 {{0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_64KB_R_X
102
103 {{0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_VAR_Z_X
104 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
105 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
106 {{0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_VAR_R_X
107 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR_GENERAL
108 };
109
110 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
111
112 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
113 const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
114
115 /**
116 ************************************************************************************************************************
117 * Gfx10Lib::Gfx10Lib
118 *
119 * @brief
120 * Constructor
121 *
122 ************************************************************************************************************************
123 */
Gfx10Lib(const Client * pClient)124 Gfx10Lib::Gfx10Lib(const Client* pClient)
125 :
126 Lib(pClient),
127 m_numPkrLog2(0),
128 m_numSaLog2(0),
129 m_colorBaseIndex(0),
130 m_xmaskBaseIndex(0),
131 m_dccBaseIndex(0)
132 {
133 memset(&m_settings, 0, sizeof(m_settings));
134 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
135 }
136
137 /**
138 ************************************************************************************************************************
139 * Gfx10Lib::~Gfx10Lib
140 *
141 * @brief
142 * Destructor
143 ************************************************************************************************************************
144 */
~Gfx10Lib()145 Gfx10Lib::~Gfx10Lib()
146 {
147 }
148
149 /**
150 ************************************************************************************************************************
151 * Gfx10Lib::HwlComputeHtileInfo
152 *
153 * @brief
154 * Interface function stub of AddrComputeHtilenfo
155 *
156 * @return
157 * ADDR_E_RETURNCODE
158 ************************************************************************************************************************
159 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const160 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
161 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
162 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
163 ) const
164 {
165 ADDR_E_RETURNCODE ret = ADDR_OK;
166
167 if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
168 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
169 (pIn->hTileFlags.pipeAligned != TRUE))
170 {
171 ret = ADDR_INVALIDPARAMS;
172 }
173 else
174 {
175 Dim3d metaBlk = {};
176 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
177 ADDR_RSRC_TEX_2D,
178 pIn->swizzleMode,
179 0,
180 0,
181 TRUE,
182 &metaBlk);
183
184 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
185 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
186 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
187 pOut->metaBlkWidth = metaBlk.w;
188 pOut->metaBlkHeight = metaBlk.h;
189
190 if (pIn->numMipLevels > 1)
191 {
192 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
193
194 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
195
196 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
197 {
198 UINT_32 mipWidth, mipHeight;
199
200 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
201
202 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
203 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
204
205 const UINT_32 pitchInM = mipWidth / metaBlk.w;
206 const UINT_32 heightInM = mipHeight / metaBlk.h;
207 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
208
209 if (pOut->pMipInfo != NULL)
210 {
211 pOut->pMipInfo[i].inMiptail = FALSE;
212 pOut->pMipInfo[i].offset = offset;
213 pOut->pMipInfo[i].sliceSize = mipSliceSize;
214 }
215
216 offset += mipSliceSize;
217 }
218
219 pOut->sliceSize = offset;
220 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
221 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
222
223 if (pOut->pMipInfo != NULL)
224 {
225 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
226 {
227 pOut->pMipInfo[i].inMiptail = TRUE;
228 pOut->pMipInfo[i].offset = 0;
229 pOut->pMipInfo[i].sliceSize = 0;
230 }
231
232 if (pIn->firstMipIdInTail != pIn->numMipLevels)
233 {
234 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
235 }
236 }
237 }
238 else
239 {
240 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
241 const UINT_32 heightInM = pOut->height / metaBlk.h;
242
243 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
244 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
245 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
246
247 if (pOut->pMipInfo != NULL)
248 {
249 pOut->pMipInfo[0].inMiptail = FALSE;
250 pOut->pMipInfo[0].offset = 0;
251 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
252 }
253 }
254
255 // Get the HTILE address equation (copied from HtileAddrFromCoord).
256 // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
257 const UINT_32 index = m_xmaskBaseIndex;
258 const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
259
260 ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
261 pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];
262 }
263
264 return ret;
265 }
266
267 /**
268 ************************************************************************************************************************
269 * Gfx10Lib::HwlComputeCmaskInfo
270 *
271 * @brief
272 * Interface function stub of AddrComputeCmaskInfo
273 *
274 * @return
275 * ADDR_E_RETURNCODE
276 ************************************************************************************************************************
277 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const278 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
279 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
280 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
281 ) const
282 {
283 ADDR_E_RETURNCODE ret = ADDR_OK;
284
285 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
286 (pIn->cMaskFlags.pipeAligned != TRUE) ||
287 ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
288 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
289 {
290 ret = ADDR_INVALIDPARAMS;
291 }
292 else
293 {
294 Dim3d metaBlk = {};
295 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
296 ADDR_RSRC_TEX_2D,
297 pIn->swizzleMode,
298 0,
299 0,
300 TRUE,
301 &metaBlk);
302
303 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
304 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
305 pOut->baseAlign = metaBlkSize;
306 pOut->metaBlkWidth = metaBlk.w;
307 pOut->metaBlkHeight = metaBlk.h;
308
309 if (pIn->numMipLevels > 1)
310 {
311 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
312
313 UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
314
315 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
316 {
317 UINT_32 mipWidth, mipHeight;
318
319 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
320
321 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
322 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
323
324 const UINT_32 pitchInM = mipWidth / metaBlk.w;
325 const UINT_32 heightInM = mipHeight / metaBlk.h;
326
327 if (pOut->pMipInfo != NULL)
328 {
329 pOut->pMipInfo[i].inMiptail = FALSE;
330 pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;
331 pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
332 }
333
334 metaBlkPerSlice += pitchInM * heightInM;
335 }
336
337 pOut->metaBlkNumPerSlice = metaBlkPerSlice;
338
339 if (pOut->pMipInfo != NULL)
340 {
341 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
342 {
343 pOut->pMipInfo[i].inMiptail = TRUE;
344 pOut->pMipInfo[i].offset = 0;
345 pOut->pMipInfo[i].sliceSize = 0;
346 }
347
348 if (pIn->firstMipIdInTail != pIn->numMipLevels)
349 {
350 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
351 }
352 }
353 }
354 else
355 {
356 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
357 const UINT_32 heightInM = pOut->height / metaBlk.h;
358
359 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
360
361 if (pOut->pMipInfo != NULL)
362 {
363 pOut->pMipInfo[0].inMiptail = FALSE;
364 pOut->pMipInfo[0].offset = 0;
365 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
366 }
367 }
368
369 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
370 pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
371
372 // Get the CMASK address equation (copied from CmaskAddrFromCoord)
373 const UINT_32 fmaskBpp = GetFmaskBpp(1, 1);
374 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
375 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
376 const UINT_8* patIdxTable =
377 (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
378 (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
379
380 ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
381 pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]];
382 }
383
384 return ret;
385 }
386
387 /**
388 ************************************************************************************************************************
389 * Gfx10Lib::HwlComputeDccInfo
390 *
391 * @brief
392 * Interface function to compute DCC key info
393 *
394 * @return
395 * ADDR_E_RETURNCODE
396 ************************************************************************************************************************
397 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const398 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
399 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
400 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
401 ) const
402 {
403 ADDR_E_RETURNCODE ret = ADDR_OK;
404
405 if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
406 {
407 // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
408 // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
409 ret = ADDR_INVALIDPARAMS;
410 }
411 else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
412 {
413 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
414 ret = ADDR_INVALIDPARAMS;
415 }
416 else
417 {
418 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
419
420 {
421 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
422 ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
423
424 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
425
426 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
427 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
428 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
429 }
430
431 if (ret == ADDR_OK)
432 {
433 Dim3d metaBlk = {};
434 const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
435 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
436 pIn->resourceType,
437 pIn->swizzleMode,
438 elemLog2,
439 numFragLog2,
440 pIn->dccKeyFlags.pipeAligned,
441 &metaBlk);
442
443 pOut->dccRamBaseAlign = metaBlkSize;
444 pOut->metaBlkWidth = metaBlk.w;
445 pOut->metaBlkHeight = metaBlk.h;
446 pOut->metaBlkDepth = metaBlk.d;
447 pOut->metaBlkSize = metaBlkSize;
448
449 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
450 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
451 pOut->depth = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
452
453 if (pIn->numMipLevels > 1)
454 {
455 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
456
457 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
458
459 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
460 {
461 UINT_32 mipWidth, mipHeight;
462
463 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
464
465 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
466 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
467
468 const UINT_32 pitchInM = mipWidth / metaBlk.w;
469 const UINT_32 heightInM = mipHeight / metaBlk.h;
470 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
471
472 if (pOut->pMipInfo != NULL)
473 {
474 pOut->pMipInfo[i].inMiptail = FALSE;
475 pOut->pMipInfo[i].offset = offset;
476 pOut->pMipInfo[i].sliceSize = mipSliceSize;
477 }
478
479 offset += mipSliceSize;
480 }
481
482 pOut->dccRamSliceSize = offset;
483 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
484 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
485
486 if (pOut->pMipInfo != NULL)
487 {
488 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
489 {
490 pOut->pMipInfo[i].inMiptail = TRUE;
491 pOut->pMipInfo[i].offset = 0;
492 pOut->pMipInfo[i].sliceSize = 0;
493 }
494
495 if (pIn->firstMipIdInTail != pIn->numMipLevels)
496 {
497 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
498 }
499 }
500 }
501 else
502 {
503 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
504 const UINT_32 heightInM = pOut->height / metaBlk.h;
505
506 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
507 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
508 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
509
510 if (pOut->pMipInfo != NULL)
511 {
512 pOut->pMipInfo[0].inMiptail = FALSE;
513 pOut->pMipInfo[0].offset = 0;
514 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
515 }
516 }
517
518 // Get the DCC address equation (copied from DccAddrFromCoord)
519 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
520 const UINT_32 numPipeLog2 = m_pipesLog2;
521 UINT_32 index = m_dccBaseIndex + elemLog2;
522 const UINT_8* patIdxTable;
523
524 if (m_settings.supportRbPlus)
525 {
526 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
527
528 if (pIn->dccKeyFlags.pipeAligned)
529 {
530 index += MaxNumOfBpp;
531
532 if (m_numPkrLog2 < 2)
533 {
534 index += m_pipesLog2 * MaxNumOfBpp;
535 }
536 else
537 {
538 // 4 groups for "m_numPkrLog2 < 2" case
539 index += 4 * MaxNumOfBpp;
540
541 const UINT_32 dccPipePerPkr = 3;
542
543 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
544 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
545 }
546 }
547 }
548 else
549 {
550 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
551
552 if (pIn->dccKeyFlags.pipeAligned)
553 {
554 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
555 }
556 else
557 {
558 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
559 }
560 }
561
562 ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
563 pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];
564 }
565 }
566
567 return ret;
568 }
569
570 /**
571 ************************************************************************************************************************
572 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
573 *
574 * @brief
575 * Interface function stub of AddrComputeCmaskAddrFromCoord
576 *
577 * @return
578 * ADDR_E_RETURNCODE
579 ************************************************************************************************************************
580 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)581 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
582 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
583 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
584 {
585 // Only support pipe aligned CMask
586 ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
587
588 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
589 input.size = sizeof(input);
590 input.cMaskFlags = pIn->cMaskFlags;
591 input.colorFlags = pIn->colorFlags;
592 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
593 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
594 input.numSlices = Max(pIn->numSlices, 1u);
595 input.swizzleMode = pIn->swizzleMode;
596 input.resourceType = pIn->resourceType;
597
598 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
599 output.size = sizeof(output);
600
601 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
602
603 if (returnCode == ADDR_OK)
604 {
605 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
606 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
607 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
608 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
609 const UINT_8* patIdxTable =
610 (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
611 (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
612
613 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
614 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
615 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
616 blkSizeLog2 + 1, // +1 for nibble offset
617 pIn->x,
618 pIn->y,
619 pIn->slice,
620 0);
621 const UINT_32 xb = pIn->x / output.metaBlkWidth;
622 const UINT_32 yb = pIn->y / output.metaBlkHeight;
623 const UINT_32 pb = output.pitch / output.metaBlkWidth;
624 const UINT_32 blkIndex = (yb * pb) + xb;
625 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
626
627 pOut->addr = (output.sliceSize * pIn->slice) +
628 (blkIndex * (1 << blkSizeLog2)) +
629 ((blkOffset >> 1) ^ pipeXor);
630 pOut->bitPosition = (blkOffset & 1) << 2;
631 }
632
633 return returnCode;
634 }
635
636 /**
637 ************************************************************************************************************************
638 * Gfx10Lib::HwlComputeHtileAddrFromCoord
639 *
640 * @brief
641 * Interface function stub of AddrComputeHtileAddrFromCoord
642 *
643 * @return
644 * ADDR_E_RETURNCODE
645 ************************************************************************************************************************
646 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)647 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
648 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
649 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
650 {
651 ADDR_E_RETURNCODE returnCode = ADDR_OK;
652
653 if (pIn->numMipLevels > 1)
654 {
655 returnCode = ADDR_NOTIMPLEMENTED;
656 }
657 else
658 {
659 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
660 input.size = sizeof(input);
661 input.hTileFlags = pIn->hTileFlags;
662 input.depthFlags = pIn->depthflags;
663 input.swizzleMode = pIn->swizzleMode;
664 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
665 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
666 input.numSlices = Max(pIn->numSlices, 1u);
667 input.numMipLevels = 1;
668
669 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
670 output.size = sizeof(output);
671
672 returnCode = ComputeHtileInfo(&input, &output);
673
674 if (returnCode == ADDR_OK)
675 {
676 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
677 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
678 const UINT_32 index = m_xmaskBaseIndex + numSampleLog2;
679 const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
680
681 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
682 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
683 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
684 blkSizeLog2 + 1, // +1 for nibble offset
685 pIn->x,
686 pIn->y,
687 pIn->slice,
688 0);
689 const UINT_32 xb = pIn->x / output.metaBlkWidth;
690 const UINT_32 yb = pIn->y / output.metaBlkHeight;
691 const UINT_32 pb = output.pitch / output.metaBlkWidth;
692 const UINT_32 blkIndex = (yb * pb) + xb;
693 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
694
695 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
696 (blkIndex * (1 << blkSizeLog2)) +
697 ((blkOffset >> 1) ^ pipeXor);
698 }
699 }
700
701 return returnCode;
702 }
703
704 /**
705 ************************************************************************************************************************
706 * Gfx10Lib::HwlComputeHtileCoordFromAddr
707 *
708 * @brief
709 * Interface function stub of AddrComputeHtileCoordFromAddr
710 *
711 * @return
712 * ADDR_E_RETURNCODE
713 ************************************************************************************************************************
714 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)715 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
716 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
717 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
718 {
719 ADDR_NOT_IMPLEMENTED();
720
721 return ADDR_OK;
722 }
723
724 /**
725 ************************************************************************************************************************
726 * Gfx10Lib::HwlSupportComputeDccAddrFromCoord
727 *
728 * @brief
729 * Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
730 *
731 * @return
732 * ADDR_E_RETURNCODE
733 ************************************************************************************************************************
734 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)735 ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(
736 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
737 {
738 ADDR_E_RETURNCODE returnCode = ADDR_OK;
739
740 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
741 (pIn->swizzleMode != ADDR_SW_64KB_R_X) ||
742 (pIn->dccKeyFlags.linear == TRUE) ||
743 (pIn->numFrags > 1) ||
744 (pIn->numMipLevels > 1) ||
745 (pIn->mipId > 0))
746 {
747 returnCode = ADDR_NOTSUPPORTED;
748 }
749 else if ((pIn->pitch == 0) ||
750 (pIn->metaBlkWidth == 0) ||
751 (pIn->metaBlkHeight == 0) ||
752 (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
753 {
754 returnCode = ADDR_NOTSUPPORTED;
755 }
756
757 return returnCode;
758 }
759
760 /**
761 ************************************************************************************************************************
762 * Gfx10Lib::HwlComputeDccAddrFromCoord
763 *
764 * @brief
765 * Interface function stub of AddrComputeDccAddrFromCoord
766 *
767 * @return
768 * N/A
769 ************************************************************************************************************************
770 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)771 VOID Gfx10Lib::HwlComputeDccAddrFromCoord(
772 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
773 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
774 {
775 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
776 const UINT_32 numPipeLog2 = m_pipesLog2;
777 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
778 UINT_32 index = m_dccBaseIndex + elemLog2;
779 const UINT_8* patIdxTable;
780
781 if (m_settings.supportRbPlus)
782 {
783 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
784
785 if (pIn->dccKeyFlags.pipeAligned)
786 {
787 index += MaxNumOfBpp;
788
789 if (m_numPkrLog2 < 2)
790 {
791 index += m_pipesLog2 * MaxNumOfBpp;
792 }
793 else
794 {
795 // 4 groups for "m_numPkrLog2 < 2" case
796 index += 4 * MaxNumOfBpp;
797
798 const UINT_32 dccPipePerPkr = 3;
799
800 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
801 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
802 }
803 }
804 }
805 else
806 {
807 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
808
809 if (pIn->dccKeyFlags.pipeAligned)
810 {
811 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
812 }
813 else
814 {
815 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
816 }
817 }
818
819 const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
820 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
821 const UINT_32 blkOffset =
822 ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
823 blkSizeLog2 + 1, // +1 for nibble offset
824 pIn->x,
825 pIn->y,
826 pIn->slice,
827 0);
828 const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
829 const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
830 const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
831 const UINT_32 blkIndex = (yb * pb) + xb;
832 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
833
834 pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
835 (blkIndex * (1 << blkSizeLog2)) +
836 ((blkOffset >> 1) ^ pipeXor);
837 }
838
839 /**
840 ************************************************************************************************************************
841 * Gfx10Lib::HwlInitGlobalParams
842 *
843 * @brief
844 * Initializes global parameters
845 *
846 * @return
847 * TRUE if all settings are valid
848 *
849 ************************************************************************************************************************
850 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)851 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
852 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
853 {
854 BOOL_32 valid = TRUE;
855 GB_ADDR_CONFIG_GFX10 gbAddrConfig;
856
857 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
858
859 // These values are copied from CModel code
860 switch (gbAddrConfig.bits.NUM_PIPES)
861 {
862 case ADDR_CONFIG_1_PIPE:
863 m_pipes = 1;
864 m_pipesLog2 = 0;
865 break;
866 case ADDR_CONFIG_2_PIPE:
867 m_pipes = 2;
868 m_pipesLog2 = 1;
869 break;
870 case ADDR_CONFIG_4_PIPE:
871 m_pipes = 4;
872 m_pipesLog2 = 2;
873 break;
874 case ADDR_CONFIG_8_PIPE:
875 m_pipes = 8;
876 m_pipesLog2 = 3;
877 break;
878 case ADDR_CONFIG_16_PIPE:
879 m_pipes = 16;
880 m_pipesLog2 = 4;
881 break;
882 case ADDR_CONFIG_32_PIPE:
883 m_pipes = 32;
884 m_pipesLog2 = 5;
885 break;
886 case ADDR_CONFIG_64_PIPE:
887 m_pipes = 64;
888 m_pipesLog2 = 6;
889 break;
890 default:
891 ADDR_ASSERT_ALWAYS();
892 valid = FALSE;
893 break;
894 }
895
896 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
897 {
898 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
899 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
900 m_pipeInterleaveLog2 = 8;
901 break;
902 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
903 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
904 m_pipeInterleaveLog2 = 9;
905 break;
906 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
907 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
908 m_pipeInterleaveLog2 = 10;
909 break;
910 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
911 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
912 m_pipeInterleaveLog2 = 11;
913 break;
914 default:
915 ADDR_ASSERT_ALWAYS();
916 valid = FALSE;
917 break;
918 }
919
920 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
921 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
922 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
923 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
924
925 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
926 {
927 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
928 m_maxCompFrag = 1;
929 m_maxCompFragLog2 = 0;
930 break;
931 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
932 m_maxCompFrag = 2;
933 m_maxCompFragLog2 = 1;
934 break;
935 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
936 m_maxCompFrag = 4;
937 m_maxCompFragLog2 = 2;
938 break;
939 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
940 m_maxCompFrag = 8;
941 m_maxCompFragLog2 = 3;
942 break;
943 default:
944 ADDR_ASSERT_ALWAYS();
945 valid = FALSE;
946 break;
947 }
948
949 {
950 // Skip unaligned case
951 m_xmaskBaseIndex += MaxNumOfAA;
952
953 m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA;
954 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
955
956 if (m_settings.supportRbPlus)
957 {
958 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
959 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
960
961 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
962
963 ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
964 sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
965
966 if (m_numPkrLog2 >= 2)
967 {
968 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
969 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
970 }
971 }
972 else
973 {
974 const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
975 static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +
976 1;
977
978 ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
979
980 ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) ==
981 sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]));
982 }
983 }
984
985 if (m_settings.supportRbPlus)
986 {
987 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
988 // corresponding SW_64KB_* mode
989 m_blockVarSizeLog2 = m_pipesLog2 + 14;
990 }
991
992 if (valid)
993 {
994 InitEquationTable();
995 }
996
997 return valid;
998 }
999
1000 /**
1001 ************************************************************************************************************************
1002 * Gfx10Lib::HwlConvertChipFamily
1003 *
1004 * @brief
1005 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1006 * @return
1007 * ChipFamily
1008 ************************************************************************************************************************
1009 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)1010 ChipFamily Gfx10Lib::HwlConvertChipFamily(
1011 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
1012 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1013 {
1014 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
1015
1016 m_settings.dccUnsup3DSwDis = 1;
1017 m_settings.dsMipmapHtileFix = 1;
1018
1019 switch (chipFamily)
1020 {
1021 case FAMILY_NV:
1022 if (ASICREV_IS_NAVI10_P(chipRevision))
1023 {
1024 m_settings.dsMipmapHtileFix = 0;
1025 m_settings.isDcn20 = 1;
1026 }
1027
1028 if (ASICREV_IS_NAVI12_P(chipRevision))
1029 {
1030 m_settings.isDcn20 = 1;
1031 }
1032
1033 if (ASICREV_IS_NAVI14_M(chipRevision))
1034 {
1035 m_settings.isDcn20 = 1;
1036 }
1037
1038 if (ASICREV_IS_NAVI21_M(chipRevision))
1039 {
1040 m_settings.supportRbPlus = 1;
1041 m_settings.dccUnsup3DSwDis = 0;
1042 }
1043
1044 if (ASICREV_IS_NAVI22_P(chipRevision))
1045 {
1046 m_settings.supportRbPlus = 1;
1047 m_settings.dccUnsup3DSwDis = 0;
1048 }
1049
1050 if (ASICREV_IS_NAVI23_P(chipRevision))
1051 {
1052 m_settings.supportRbPlus = 1;
1053 m_settings.dccUnsup3DSwDis = 0;
1054 }
1055
1056 if (ASICREV_IS_NAVI24_P(chipRevision))
1057 {
1058 m_settings.supportRbPlus = 1;
1059 m_settings.dccUnsup3DSwDis = 0;
1060 }
1061 break;
1062
1063 case FAMILY_VGH:
1064 if (ASICREV_IS_VANGOGH(chipRevision))
1065 {
1066 m_settings.supportRbPlus = 1;
1067 m_settings.dccUnsup3DSwDis = 0;
1068 }
1069 else
1070 {
1071 ADDR_ASSERT(!"Unknown chip revision");
1072 }
1073 break;
1074
1075 case FAMILY_RMB:
1076 if (ASICREV_IS_REMBRANDT(chipRevision))
1077 {
1078 m_settings.supportRbPlus = 1;
1079 m_settings.dccUnsup3DSwDis = 0;
1080 }
1081 else
1082 {
1083 ADDR_ASSERT(!"Unknown chip revision");
1084 }
1085 break;
1086 case FAMILY_GC_10_3_6:
1087 if (ASICREV_IS_GFX1036(chipRevision))
1088 {
1089 m_settings.supportRbPlus = 1;
1090 m_settings.dccUnsup3DSwDis = 0;
1091 }
1092 break;
1093 case FAMILY_GC_10_3_7:
1094 if (ASICREV_IS_GFX1037(chipRevision))
1095 {
1096 m_settings.supportRbPlus = 1;
1097 m_settings.dccUnsup3DSwDis = 0;
1098 }
1099 else
1100 {
1101 ADDR_ASSERT(!"Unknown chip revision");
1102 }
1103 break;
1104 default:
1105 ADDR_ASSERT(!"Unknown chip family");
1106 break;
1107 }
1108
1109 m_configFlags.use32bppFor422Fmt = TRUE;
1110
1111 return family;
1112 }
1113
1114 /**
1115 ************************************************************************************************************************
1116 * Gfx10Lib::GetBlk256SizeLog2
1117 *
1118 * @brief
1119 * Get block 256 size
1120 *
1121 * @return
1122 * N/A
1123 ************************************************************************************************************************
1124 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1125 void Gfx10Lib::GetBlk256SizeLog2(
1126 AddrResourceType resourceType, ///< [in] Resource type
1127 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1128 UINT_32 elemLog2, ///< [in] element size log2
1129 UINT_32 numSamplesLog2, ///< [in] number of samples
1130 Dim3d* pBlock ///< [out] block size
1131 ) const
1132 {
1133 if (IsThin(resourceType, swizzleMode))
1134 {
1135 UINT_32 blockBits = 8 - elemLog2;
1136
1137 if (IsZOrderSwizzle(swizzleMode))
1138 {
1139 blockBits -= numSamplesLog2;
1140 }
1141
1142 pBlock->w = (blockBits >> 1) + (blockBits & 1);
1143 pBlock->h = (blockBits >> 1);
1144 pBlock->d = 0;
1145 }
1146 else
1147 {
1148 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1149
1150 UINT_32 blockBits = 8 - elemLog2;
1151
1152 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1153 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1154 pBlock->h = (blockBits / 3);
1155 }
1156 }
1157
1158 /**
1159 ************************************************************************************************************************
1160 * Gfx10Lib::GetCompressedBlockSizeLog2
1161 *
1162 * @brief
1163 * Get compress block size
1164 *
1165 * @return
1166 * N/A
1167 ************************************************************************************************************************
1168 */
GetCompressedBlockSizeLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1169 void Gfx10Lib::GetCompressedBlockSizeLog2(
1170 Gfx10DataType dataType, ///< [in] Data type
1171 AddrResourceType resourceType, ///< [in] Resource type
1172 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1173 UINT_32 elemLog2, ///< [in] element size log2
1174 UINT_32 numSamplesLog2, ///< [in] number of samples
1175 Dim3d* pBlock ///< [out] block size
1176 ) const
1177 {
1178 if (dataType == Gfx10DataColor)
1179 {
1180 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1181 }
1182 else
1183 {
1184 ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1185 pBlock->w = 3;
1186 pBlock->h = 3;
1187 pBlock->d = 0;
1188 }
1189 }
1190
1191 /**
1192 ************************************************************************************************************************
1193 * Gfx10Lib::GetMetaOverlapLog2
1194 *
1195 * @brief
1196 * Get meta block overlap
1197 *
1198 * @return
1199 * N/A
1200 ************************************************************************************************************************
1201 */
GetMetaOverlapLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const1202 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1203 Gfx10DataType dataType, ///< [in] Data type
1204 AddrResourceType resourceType, ///< [in] Resource type
1205 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1206 UINT_32 elemLog2, ///< [in] element size log2
1207 UINT_32 numSamplesLog2 ///< [in] number of samples
1208 ) const
1209 {
1210 Dim3d compBlock;
1211 Dim3d microBlock;
1212
1213 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1214 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, µBlock);
1215
1216 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
1217 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1218 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
1219 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
1220 INT_32 overlap = numPipesLog2 - maxSizeLog2;
1221
1222 if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1223 {
1224 overlap++;
1225 }
1226
1227 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1228 if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1229 {
1230 overlap--;
1231 }
1232 overlap = Max(overlap, 0);
1233 return overlap;
1234 }
1235
1236 /**
1237 ************************************************************************************************************************
1238 * Gfx10Lib::Get3DMetaOverlapLog2
1239 *
1240 * @brief
1241 * Get 3d meta block overlap
1242 *
1243 * @return
1244 * N/A
1245 ************************************************************************************************************************
1246 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const1247 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1248 AddrResourceType resourceType, ///< [in] Resource type
1249 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1250 UINT_32 elemLog2 ///< [in] element size log2
1251 ) const
1252 {
1253 Dim3d microBlock;
1254 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, µBlock);
1255
1256 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1257
1258 if (m_settings.supportRbPlus)
1259 {
1260 overlap++;
1261 }
1262
1263 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1264 {
1265 overlap = 0;
1266 }
1267 return overlap;
1268 }
1269
1270 /**
1271 ************************************************************************************************************************
1272 * Gfx10Lib::GetPipeRotateAmount
1273 *
1274 * @brief
1275 * Get pipe rotate amount
1276 *
1277 * @return
1278 * Pipe rotate amount
1279 ************************************************************************************************************************
1280 */
1281
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const1282 INT_32 Gfx10Lib::GetPipeRotateAmount(
1283 AddrResourceType resourceType, ///< [in] Resource type
1284 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
1285 ) const
1286 {
1287 INT_32 amount = 0;
1288
1289 if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1290 {
1291 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1292 1 : m_pipesLog2 - (m_numSaLog2 + 1);
1293 }
1294
1295 return amount;
1296 }
1297
1298 /**
1299 ************************************************************************************************************************
1300 * Gfx10Lib::GetMetaBlkSize
1301 *
1302 * @brief
1303 * Get metadata block size
1304 *
1305 * @return
1306 * Meta block size
1307 ************************************************************************************************************************
1308 */
GetMetaBlkSize(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const1309 UINT_32 Gfx10Lib::GetMetaBlkSize(
1310 Gfx10DataType dataType, ///< [in] Data type
1311 AddrResourceType resourceType, ///< [in] Resource type
1312 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1313 UINT_32 elemLog2, ///< [in] element size log2
1314 UINT_32 numSamplesLog2, ///< [in] number of samples
1315 BOOL_32 pipeAlign, ///< [in] pipe align
1316 Dim3d* pBlock ///< [out] block size
1317 ) const
1318 {
1319 INT_32 metablkSizeLog2;
1320
1321 {
1322 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
1323 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
1324 const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1325 const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1326 numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1327 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
1328 INT_32 numPipesLog2 = m_pipesLog2;
1329
1330 if (IsThin(resourceType, swizzleMode))
1331 {
1332 if ((pipeAlign == FALSE) ||
1333 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1334 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
1335 {
1336 if (pipeAlign)
1337 {
1338 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1339 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1340 }
1341 else
1342 {
1343 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1344 }
1345 }
1346 else
1347 {
1348 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1349 {
1350 numPipesLog2++;
1351 }
1352
1353 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1354
1355 if (numPipesLog2 >= 4)
1356 {
1357 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1358
1359 // In 16Bpe 8xaa, we have an extra overlap bit
1360 if ((pipeRotateLog2 > 0) &&
1361 (elemLog2 == 4) &&
1362 (numSamplesLog2 == 3) &&
1363 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1364 {
1365 overlapLog2++;
1366 }
1367
1368 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1369 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1370
1371 if (m_settings.supportRbPlus &&
1372 IsRtOptSwizzle(swizzleMode) &&
1373 (numPipesLog2 == 6) &&
1374 (numSamplesLog2 == 3) &&
1375 (m_maxCompFragLog2 == 3) &&
1376 (metablkSizeLog2 < 15))
1377 {
1378 metablkSizeLog2 = 15;
1379 }
1380 }
1381 else
1382 {
1383 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1384 }
1385
1386 if (dataType == Gfx10DataDepthStencil)
1387 {
1388 // For htile surfaces, pad meta block size to 2K * num_pipes
1389 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1390 }
1391
1392 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1393
1394 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1395 {
1396 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1397
1398 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1399 }
1400 }
1401
1402 const INT_32 metablkBitsLog2 =
1403 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1404 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1405 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1406 pBlock->d = 1;
1407 }
1408 else
1409 {
1410 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1411
1412 if (pipeAlign)
1413 {
1414 if (m_settings.supportRbPlus &&
1415 (m_pipesLog2 == m_numSaLog2 + 1) &&
1416 (m_pipesLog2 > 1) &&
1417 IsRbAligned(resourceType, swizzleMode))
1418 {
1419 numPipesLog2++;
1420 }
1421
1422 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1423
1424 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1425 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1426 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1427 }
1428 else
1429 {
1430 metablkSizeLog2 = 12;
1431 }
1432
1433 const INT_32 metablkBitsLog2 =
1434 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1435 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1436 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1437 pBlock->d = 1 << (metablkBitsLog2 / 3);
1438 }
1439 }
1440
1441 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1442 }
1443
1444 /**
1445 ************************************************************************************************************************
1446 * Gfx10Lib::ConvertSwizzlePatternToEquation
1447 *
1448 * @brief
1449 * Convert swizzle pattern to equation.
1450 *
1451 * @return
1452 * N/A
1453 ************************************************************************************************************************
1454 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1455 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1456 UINT_32 elemLog2, ///< [in] element bytes log2
1457 AddrResourceType rsrcType, ///< [in] resource type
1458 AddrSwizzleMode swMode, ///< [in] swizzle mode
1459 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor
1460 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1461 const
1462 {
1463 // Get full swizzle pattern and store it as an ADDR_BIT_SETTING list
1464 ADDR_BIT_SETTING fullSwizzlePattern[20];
1465 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1466
1467 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
1468 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1469 pEquation->numBits = blockSizeLog2;
1470 pEquation->stackedDepthSlices = FALSE;
1471
1472 for (UINT_32 i = 0; i < elemLog2; i++)
1473 {
1474 pEquation->addr[i].channel = 0;
1475 pEquation->addr[i].valid = 1;
1476 pEquation->addr[i].index = i;
1477 }
1478
1479 if (IsXor(swMode) == FALSE)
1480 {
1481 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1482 {
1483 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1484
1485 if (pSwizzle[i].x != 0)
1486 {
1487 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1488
1489 pEquation->addr[i].channel = 0;
1490 pEquation->addr[i].valid = 1;
1491 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1492 }
1493 else if (pSwizzle[i].y != 0)
1494 {
1495 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1496
1497 pEquation->addr[i].channel = 1;
1498 pEquation->addr[i].valid = 1;
1499 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1500 }
1501 else
1502 {
1503 ADDR_ASSERT(pSwizzle[i].z != 0);
1504 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1505
1506 pEquation->addr[i].channel = 2;
1507 pEquation->addr[i].valid = 1;
1508 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1509 }
1510
1511 pEquation->xor1[i].value = 0;
1512 pEquation->xor2[i].value = 0;
1513 }
1514 }
1515 else if (IsThin(rsrcType, swMode))
1516 {
1517 Dim3d dim;
1518 ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1519
1520 const UINT_32 blkXLog2 = Log2(dim.w);
1521 const UINT_32 blkYLog2 = Log2(dim.h);
1522 const UINT_32 blkXMask = dim.w - 1;
1523 const UINT_32 blkYMask = dim.h - 1;
1524
1525 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1526 UINT_32 xMask = 0;
1527 UINT_32 yMask = 0;
1528 UINT_32 bMask = (1 << elemLog2) - 1;
1529
1530 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1531 {
1532 if (IsPow2(pSwizzle[i].value))
1533 {
1534 if (pSwizzle[i].x != 0)
1535 {
1536 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1537 xMask |= pSwizzle[i].x;
1538
1539 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1540
1541 ADDR_ASSERT(xLog2 < blkXLog2);
1542
1543 pEquation->addr[i].channel = 0;
1544 pEquation->addr[i].valid = 1;
1545 pEquation->addr[i].index = xLog2 + elemLog2;
1546 }
1547 else
1548 {
1549 ADDR_ASSERT(pSwizzle[i].y != 0);
1550 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1551 yMask |= pSwizzle[i].y;
1552
1553 pEquation->addr[i].channel = 1;
1554 pEquation->addr[i].valid = 1;
1555 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1556
1557 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1558 }
1559
1560 swizzle[i].value = 0;
1561 bMask |= 1 << i;
1562 }
1563 else
1564 {
1565 if (pSwizzle[i].z != 0)
1566 {
1567 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1568
1569 pEquation->xor2[i].channel = 2;
1570 pEquation->xor2[i].valid = 1;
1571 pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1572 }
1573
1574 swizzle[i].x = pSwizzle[i].x;
1575 swizzle[i].y = pSwizzle[i].y;
1576 swizzle[i].z = swizzle[i].s = 0;
1577
1578 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1579
1580 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1581
1582 if (xHi != 0)
1583 {
1584 ADDR_ASSERT(IsPow2(xHi));
1585 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1586
1587 pEquation->xor1[i].channel = 0;
1588 pEquation->xor1[i].valid = 1;
1589 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1590
1591 swizzle[i].x &= blkXMask;
1592 }
1593
1594 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1595
1596 if (yHi != 0)
1597 {
1598 ADDR_ASSERT(IsPow2(yHi));
1599
1600 if (xHi == 0)
1601 {
1602 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1603 pEquation->xor1[i].channel = 1;
1604 pEquation->xor1[i].valid = 1;
1605 pEquation->xor1[i].index = Log2(yHi);
1606 }
1607 else
1608 {
1609 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1610 pEquation->xor2[i].channel = 1;
1611 pEquation->xor2[i].valid = 1;
1612 pEquation->xor2[i].index = Log2(yHi);
1613 }
1614
1615 swizzle[i].y &= blkYMask;
1616 }
1617
1618 if (swizzle[i].value == 0)
1619 {
1620 bMask |= 1 << i;
1621 }
1622 }
1623 }
1624
1625 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1626 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1627
1628 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1629
1630 while (bMask != blockMask)
1631 {
1632 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1633 {
1634 if ((bMask & (1 << i)) == 0)
1635 {
1636 if (IsPow2(swizzle[i].value))
1637 {
1638 if (swizzle[i].x != 0)
1639 {
1640 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1641 xMask |= swizzle[i].x;
1642
1643 const UINT_32 xLog2 = Log2(swizzle[i].x);
1644
1645 ADDR_ASSERT(xLog2 < blkXLog2);
1646
1647 pEquation->addr[i].channel = 0;
1648 pEquation->addr[i].valid = 1;
1649 pEquation->addr[i].index = xLog2 + elemLog2;
1650 }
1651 else
1652 {
1653 ADDR_ASSERT(swizzle[i].y != 0);
1654 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1655 yMask |= swizzle[i].y;
1656
1657 pEquation->addr[i].channel = 1;
1658 pEquation->addr[i].valid = 1;
1659 pEquation->addr[i].index = Log2(swizzle[i].y);
1660
1661 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1662 }
1663
1664 swizzle[i].value = 0;
1665 bMask |= 1 << i;
1666 }
1667 else
1668 {
1669 const UINT_32 x = swizzle[i].x & xMask;
1670 const UINT_32 y = swizzle[i].y & yMask;
1671
1672 if (x != 0)
1673 {
1674 ADDR_ASSERT(IsPow2(x));
1675
1676 if (pEquation->xor1[i].value == 0)
1677 {
1678 pEquation->xor1[i].channel = 0;
1679 pEquation->xor1[i].valid = 1;
1680 pEquation->xor1[i].index = Log2(x) + elemLog2;
1681 }
1682 else
1683 {
1684 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1685 pEquation->xor2[i].channel = 0;
1686 pEquation->xor2[i].valid = 1;
1687 pEquation->xor2[i].index = Log2(x) + elemLog2;
1688 }
1689 }
1690
1691 if (y != 0)
1692 {
1693 ADDR_ASSERT(IsPow2(y));
1694
1695 if (pEquation->xor1[i].value == 0)
1696 {
1697 pEquation->xor1[i].channel = 1;
1698 pEquation->xor1[i].valid = 1;
1699 pEquation->xor1[i].index = Log2(y);
1700 }
1701 else
1702 {
1703 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1704 pEquation->xor2[i].channel = 1;
1705 pEquation->xor2[i].valid = 1;
1706 pEquation->xor2[i].index = Log2(y);
1707 }
1708 }
1709
1710 swizzle[i].x &= ~x;
1711 swizzle[i].y &= ~y;
1712 }
1713 }
1714 }
1715 }
1716
1717 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1718 }
1719 else
1720 {
1721 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1722 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1723 const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1724 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1725 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1726 const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1727
1728 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1729 UINT_32 xMask = 0;
1730 UINT_32 yMask = 0;
1731 UINT_32 zMask = 0;
1732 UINT_32 bMask = (1 << elemLog2) - 1;
1733
1734 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1735 {
1736 if (IsPow2(pSwizzle[i].value))
1737 {
1738 if (pSwizzle[i].x != 0)
1739 {
1740 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1741 xMask |= pSwizzle[i].x;
1742
1743 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1744
1745 ADDR_ASSERT(xLog2 < blkXLog2);
1746
1747 pEquation->addr[i].channel = 0;
1748 pEquation->addr[i].valid = 1;
1749 pEquation->addr[i].index = xLog2 + elemLog2;
1750 }
1751 else if (pSwizzle[i].y != 0)
1752 {
1753 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1754 yMask |= pSwizzle[i].y;
1755
1756 pEquation->addr[i].channel = 1;
1757 pEquation->addr[i].valid = 1;
1758 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1759
1760 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1761 }
1762 else
1763 {
1764 ADDR_ASSERT(pSwizzle[i].z != 0);
1765 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1766 zMask |= pSwizzle[i].z;
1767
1768 pEquation->addr[i].channel = 2;
1769 pEquation->addr[i].valid = 1;
1770 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1771
1772 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1773 }
1774
1775 swizzle[i].value = 0;
1776 bMask |= 1 << i;
1777 }
1778 else
1779 {
1780 swizzle[i].x = pSwizzle[i].x;
1781 swizzle[i].y = pSwizzle[i].y;
1782 swizzle[i].z = pSwizzle[i].z;
1783 swizzle[i].s = 0;
1784
1785 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1786
1787 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1788 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1789 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1790
1791 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1792
1793 if (xHi != 0)
1794 {
1795 ADDR_ASSERT(IsPow2(xHi));
1796 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1797
1798 pEquation->xor1[i].channel = 0;
1799 pEquation->xor1[i].valid = 1;
1800 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1801
1802 swizzle[i].x &= blkXMask;
1803 }
1804
1805 if (yHi != 0)
1806 {
1807 ADDR_ASSERT(IsPow2(yHi));
1808
1809 if (pEquation->xor1[i].value == 0)
1810 {
1811 pEquation->xor1[i].channel = 1;
1812 pEquation->xor1[i].valid = 1;
1813 pEquation->xor1[i].index = Log2(yHi);
1814 }
1815 else
1816 {
1817 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1818 pEquation->xor2[i].channel = 1;
1819 pEquation->xor2[i].valid = 1;
1820 pEquation->xor2[i].index = Log2(yHi);
1821 }
1822
1823 swizzle[i].y &= blkYMask;
1824 }
1825
1826 if (zHi != 0)
1827 {
1828 ADDR_ASSERT(IsPow2(zHi));
1829
1830 if (pEquation->xor1[i].value == 0)
1831 {
1832 pEquation->xor1[i].channel = 2;
1833 pEquation->xor1[i].valid = 1;
1834 pEquation->xor1[i].index = Log2(zHi);
1835 }
1836 else
1837 {
1838 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1839 pEquation->xor2[i].channel = 2;
1840 pEquation->xor2[i].valid = 1;
1841 pEquation->xor2[i].index = Log2(zHi);
1842 }
1843
1844 swizzle[i].z &= blkZMask;
1845 }
1846
1847 if (swizzle[i].value == 0)
1848 {
1849 bMask |= 1 << i;
1850 }
1851 }
1852 }
1853
1854 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1855 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1856
1857 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1858
1859 while (bMask != blockMask)
1860 {
1861 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1862 {
1863 if ((bMask & (1 << i)) == 0)
1864 {
1865 if (IsPow2(swizzle[i].value))
1866 {
1867 if (swizzle[i].x != 0)
1868 {
1869 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1870 xMask |= swizzle[i].x;
1871
1872 const UINT_32 xLog2 = Log2(swizzle[i].x);
1873
1874 ADDR_ASSERT(xLog2 < blkXLog2);
1875
1876 pEquation->addr[i].channel = 0;
1877 pEquation->addr[i].valid = 1;
1878 pEquation->addr[i].index = xLog2 + elemLog2;
1879 }
1880 else if (swizzle[i].y != 0)
1881 {
1882 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1883 yMask |= swizzle[i].y;
1884
1885 pEquation->addr[i].channel = 1;
1886 pEquation->addr[i].valid = 1;
1887 pEquation->addr[i].index = Log2(swizzle[i].y);
1888
1889 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1890 }
1891 else
1892 {
1893 ADDR_ASSERT(swizzle[i].z != 0);
1894 ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1895 zMask |= swizzle[i].z;
1896
1897 pEquation->addr[i].channel = 2;
1898 pEquation->addr[i].valid = 1;
1899 pEquation->addr[i].index = Log2(swizzle[i].z);
1900
1901 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1902 }
1903
1904 swizzle[i].value = 0;
1905 bMask |= 1 << i;
1906 }
1907 else
1908 {
1909 const UINT_32 x = swizzle[i].x & xMask;
1910 const UINT_32 y = swizzle[i].y & yMask;
1911 const UINT_32 z = swizzle[i].z & zMask;
1912
1913 if (x != 0)
1914 {
1915 ADDR_ASSERT(IsPow2(x));
1916
1917 if (pEquation->xor1[i].value == 0)
1918 {
1919 pEquation->xor1[i].channel = 0;
1920 pEquation->xor1[i].valid = 1;
1921 pEquation->xor1[i].index = Log2(x) + elemLog2;
1922 }
1923 else
1924 {
1925 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1926 pEquation->xor2[i].channel = 0;
1927 pEquation->xor2[i].valid = 1;
1928 pEquation->xor2[i].index = Log2(x) + elemLog2;
1929 }
1930 }
1931
1932 if (y != 0)
1933 {
1934 ADDR_ASSERT(IsPow2(y));
1935
1936 if (pEquation->xor1[i].value == 0)
1937 {
1938 pEquation->xor1[i].channel = 1;
1939 pEquation->xor1[i].valid = 1;
1940 pEquation->xor1[i].index = Log2(y);
1941 }
1942 else
1943 {
1944 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1945 pEquation->xor2[i].channel = 1;
1946 pEquation->xor2[i].valid = 1;
1947 pEquation->xor2[i].index = Log2(y);
1948 }
1949 }
1950
1951 if (z != 0)
1952 {
1953 ADDR_ASSERT(IsPow2(z));
1954
1955 if (pEquation->xor1[i].value == 0)
1956 {
1957 pEquation->xor1[i].channel = 2;
1958 pEquation->xor1[i].valid = 1;
1959 pEquation->xor1[i].index = Log2(z);
1960 }
1961 else
1962 {
1963 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1964 pEquation->xor2[i].channel = 2;
1965 pEquation->xor2[i].valid = 1;
1966 pEquation->xor2[i].index = Log2(z);
1967 }
1968 }
1969
1970 swizzle[i].x &= ~x;
1971 swizzle[i].y &= ~y;
1972 swizzle[i].z &= ~z;
1973 }
1974 }
1975 }
1976 }
1977
1978 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1979 }
1980 }
1981
1982 /**
1983 ************************************************************************************************************************
1984 * Gfx10Lib::InitEquationTable
1985 *
1986 * @brief
1987 * Initialize Equation table.
1988 *
1989 * @return
1990 * N/A
1991 ************************************************************************************************************************
1992 */
InitEquationTable()1993 VOID Gfx10Lib::InitEquationTable()
1994 {
1995 memset(m_equationTable, 0, sizeof(m_equationTable));
1996
1997 // Iterate through resourceTypes, up to MaxRsrcType where a "resourceType" refers to AddrResourceType (1D/2D/3D)
1998 // resources. This starts with rsrcTypeIdx = 0, however there is an offset added that will start us off at
1999 // computing 2D resources.
2000 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2001 {
2002 // Add offset. Start iterating from ADDR_RSRC_TEX_2D
2003 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2004
2005 // Iterate through the maximum number of swizzlemodes a type can hold
2006 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2007 {
2008 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2009
2010 // Iterate through the different bits-per-pixel settings (8bpp/16bpp/32bpp/64bpp/128bpp)
2011 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
2012 {
2013 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2014 // May or may not return a ADDR_SW_PATINFO for a completely different swizzle mode, essentially
2015 // overwriting the choice.
2016 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
2017
2018 if (pPatInfo != NULL)
2019 {
2020 ADDR_ASSERT(IsValidSwMode(swMode));
2021
2022 if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
2023 {
2024 ADDR_EQUATION equation = {};
2025
2026 // Passing in pPatInfo to get the addr equation
2027 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
2028
2029 equationIndex = m_numEquations;
2030 ADDR_ASSERT(equationIndex < EquationTableSize);
2031 // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
2032 m_equationTable[equationIndex] = equation;
2033 // Increment m_numEquations
2034 m_numEquations++;
2035 }
2036 else // There is no equationIndex
2037 {
2038 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
2039 ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
2040 ADDR_ASSERT(rsrcTypeIdx == 1);
2041 ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
2042 ADDR_ASSERT(m_settings.supportRbPlus == 1);
2043 }
2044 }
2045 // equationIndex, which is used to look up equations in m_equationTable, will be cached for every
2046 // iteration in this nested for-loop
2047 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
2048 }
2049 }
2050 }
2051 }
2052
2053 /**
2054 ************************************************************************************************************************
2055 * Gfx10Lib::HwlGetEquationIndex
2056 *
2057 * @brief
2058 * Interface function stub of GetEquationIndex
2059 *
2060 * @return
2061 * ADDR_E_RETURNCODE
2062 ************************************************************************************************************************
2063 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2064 UINT_32 Gfx10Lib::HwlGetEquationIndex(
2065 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
2066 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
2067 ) const
2068 {
2069 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
2070
2071 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
2072 (pIn->resourceType == ADDR_RSRC_TEX_3D))
2073 {
2074 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
2075 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
2076 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
2077
2078 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
2079 }
2080
2081 if (pOut->pMipInfo != NULL)
2082 {
2083 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2084 {
2085 pOut->pMipInfo[i].equationIndex = equationIdx;
2086 }
2087 }
2088
2089 return equationIdx;
2090 }
2091
2092 /**
2093 ************************************************************************************************************************
2094 * Gfx10Lib::GetValidDisplaySwizzleModes
2095 *
2096 * @brief
2097 * Get valid swizzle modes mask for displayable surface
2098 *
2099 * @return
2100 * Valid swizzle modes mask for displayable surface
2101 ************************************************************************************************************************
2102 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const2103 UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
2104 UINT_32 bpp
2105 ) const
2106 {
2107 UINT_32 swModeMask = 0;
2108
2109 if (bpp <= 64)
2110 {
2111 if (m_settings.isDcn20)
2112 {
2113 swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
2114 }
2115 else
2116 {
2117 swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
2118 }
2119 }
2120
2121 return swModeMask;
2122 }
2123
2124 /**
2125 ************************************************************************************************************************
2126 * Gfx10Lib::IsValidDisplaySwizzleMode
2127 *
2128 * @brief
2129 * Check if a swizzle mode is supported by display engine
2130 *
2131 * @return
2132 * TRUE is swizzle mode is supported by display engine
2133 ************************************************************************************************************************
2134 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2135 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
2136 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2137 ) const
2138 {
2139 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
2140
2141 return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
2142 }
2143
2144 /**
2145 ************************************************************************************************************************
2146 * Gfx10Lib::GetMaxNumMipsInTail
2147 *
2148 * @brief
2149 * Return max number of mips in tails
2150 *
2151 * @return
2152 * Max number of mips in tails
2153 ************************************************************************************************************************
2154 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const2155 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
2156 UINT_32 blockSizeLog2, ///< block size log2
2157 BOOL_32 isThin ///< is thin or thick
2158 ) const
2159 {
2160 UINT_32 effectiveLog2 = blockSizeLog2;
2161
2162 if (isThin == FALSE)
2163 {
2164 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
2165 }
2166
2167 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
2168 }
2169
2170 /**
2171 ************************************************************************************************************************
2172 * Gfx10Lib::HwlComputePipeBankXor
2173 *
2174 * @brief
2175 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2176 *
2177 * @return
2178 * PipeBankXor value
2179 ************************************************************************************************************************
2180 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2181 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2182 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2183 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2184 ) const
2185 {
2186 if (IsNonPrtXor(pIn->swizzleMode))
2187 {
2188 const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
2189
2190 // No pipe xor...
2191 const UINT_32 pipeXor = 0;
2192 UINT_32 bankXor = 0;
2193
2194 const UINT_32 XorPatternLen = 8;
2195 static const UINT_32 XorBankRot1b[XorPatternLen] = {0, 1, 0, 1, 0, 1, 0, 1};
2196 static const UINT_32 XorBankRot2b[XorPatternLen] = {0, 2, 1, 3, 2, 0, 3, 1};
2197 static const UINT_32 XorBankRot3b[XorPatternLen] = {0, 4, 2, 6, 1, 5, 3, 7};
2198 static const UINT_32 XorBankRot4b[XorPatternLen] = {0, 8, 4, 12, 2, 10, 6, 14};
2199 static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
2200
2201 switch (bankBits)
2202 {
2203 case 1:
2204 case 2:
2205 case 3:
2206 case 4:
2207 bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
2208 break;
2209 default:
2210 // valid bank bits should be 0~4
2211 ADDR_ASSERT_ALWAYS();
2212 case 0:
2213 break;
2214 }
2215
2216 pOut->pipeBankXor = bankXor | pipeXor;
2217 }
2218 else
2219 {
2220 pOut->pipeBankXor = 0;
2221 }
2222
2223 return ADDR_OK;
2224 }
2225
2226 /**
2227 ************************************************************************************************************************
2228 * Gfx10Lib::HwlComputeSlicePipeBankXor
2229 *
2230 * @brief
2231 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2232 *
2233 * @return
2234 * PipeBankXor value
2235 ************************************************************************************************************************
2236 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const2237 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2238 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2239 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2240 ) const
2241 {
2242 if (IsNonPrtXor(pIn->swizzleMode))
2243 {
2244 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2245 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2246 const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2247
2248 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2249
2250 if (pIn->bpe != 0)
2251 {
2252 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
2253 pIn->resourceType,
2254 Log2(pIn->bpe >> 3),
2255 1);
2256
2257 if (pPatInfo != NULL)
2258 {
2259 ADDR_BIT_SETTING fullSwizzlePattern[20];
2260 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
2261
2262 const UINT_32 pipeBankXorOffset =
2263 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
2264 blockBits,
2265 0,
2266 0,
2267 pIn->slice,
2268 0);
2269
2270 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
2271
2272 // Should have no bit set under pipe interleave
2273 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
2274
2275 // This assertion firing means old approach doesn't calculate a correct sliceXor value...
2276 ADDR_ASSERT(pipeBankXor == pipeXor);
2277
2278 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
2279 }
2280 }
2281 }
2282 else
2283 {
2284 pOut->pipeBankXor = 0;
2285 }
2286
2287 return ADDR_OK;
2288 }
2289
2290 /**
2291 ************************************************************************************************************************
2292 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2293 *
2294 * @brief
2295 * Compute sub resource offset to support swizzle pattern
2296 *
2297 * @return
2298 * Offset
2299 ************************************************************************************************************************
2300 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const2301 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2302 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
2303 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
2304 ) const
2305 {
2306 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2307
2308 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2309
2310 return ADDR_OK;
2311 }
2312
2313 /**
2314 ************************************************************************************************************************
2315 * Gfx10Lib::HwlComputeNonBlockCompressedView
2316 *
2317 * @brief
2318 * Compute non-block-compressed view for a given mipmap level/slice.
2319 *
2320 * @return
2321 * ADDR_E_RETURNCODE
2322 ************************************************************************************************************************
2323 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const2324 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(
2325 const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure
2326 ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure
2327 ) const
2328 {
2329 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2330
2331 if (pIn->resourceType != ADDR_RSRC_TEX_2D)
2332 {
2333 // Only 2D resource can have a NonBC view...
2334 returnCode = ADDR_INVALIDPARAMS;
2335 }
2336 else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
2337 ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
2338 {
2339 // Only support BC1~BC7, ASTC, or ETC2 for now...
2340 returnCode = ADDR_NOTSUPPORTED;
2341 }
2342 else
2343 {
2344 UINT_32 bcWidth, bcHeight;
2345 UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
2346
2347 ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
2348 infoIn.flags = pIn->flags;
2349 infoIn.swizzleMode = pIn->swizzleMode;
2350 infoIn.resourceType = pIn->resourceType;
2351 infoIn.bpp = bpp;
2352 infoIn.width = RoundUpQuotient(pIn->width, bcWidth);
2353 infoIn.height = RoundUpQuotient(pIn->height, bcHeight);
2354 infoIn.numSlices = pIn->numSlices;
2355 infoIn.numMipLevels = pIn->numMipLevels;
2356 infoIn.numSamples = 1;
2357 infoIn.numFrags = 1;
2358
2359 ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
2360
2361 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
2362 infoOut.pMipInfo = mipInfo;
2363
2364 const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
2365
2366 if (tiled)
2367 {
2368 returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
2369 }
2370 else
2371 {
2372 returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
2373 }
2374
2375 if (returnCode == ADDR_OK)
2376 {
2377 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
2378 subOffIn.swizzleMode = infoIn.swizzleMode;
2379 subOffIn.resourceType = infoIn.resourceType;
2380 subOffIn.slice = pIn->slice;
2381 subOffIn.sliceSize = infoOut.sliceSize;
2382 subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
2383 subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset;
2384
2385 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
2386
2387 // For any mipmap level, move nonBc view base address by offset
2388 HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
2389 pOut->offset = subOffOut.offset;
2390
2391 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
2392 slicePbXorIn.bpe = infoIn.bpp;
2393 slicePbXorIn.swizzleMode = infoIn.swizzleMode;
2394 slicePbXorIn.resourceType = infoIn.resourceType;
2395 slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2396 slicePbXorIn.slice = pIn->slice;
2397
2398 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2399
2400 // For any mipmap level, nonBc view should use computed pbXor
2401 HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2402 pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2403
2404 const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2405 const UINT_32 requestMipWidth = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth);
2406 const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight);
2407
2408 if (inTail)
2409 {
2410 // For mipmap level that is in mip tail block, hack a lot of things...
2411 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2412 // are fit in tail block:
2413
2414 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2415 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2416
2417 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2418 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2419
2420 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2421 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2422
2423 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2424 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2425 }
2426 // This check should cover at least mipId == 0
2427 else if (requestMipWidth << pIn->mipId == infoIn.width)
2428 {
2429 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2430 // - only one mipmap level and mipId = 0
2431 pOut->mipId = 0;
2432 pOut->numMipLevels = 1;
2433
2434 // (mip0) width = requestMipWidth
2435 pOut->unalignedWidth = requestMipWidth;
2436
2437 // (mip0) height = requestMipHeight
2438 pOut->unalignedHeight = requestMipHeight;
2439 }
2440 else
2441 {
2442 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2443 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2444 // because single mip view may have different pitch value than original (multiple) mip view...
2445 // A simple case would be:
2446 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2447 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2448 // mip0 width = 0x101/mip1 width = 0x80
2449 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2450 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2451
2452 // - 2 levels and mipId = 1
2453 pOut->mipId = 1;
2454 pOut->numMipLevels = 2;
2455
2456 const UINT_32 upperMipWidth = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth);
2457 const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight);
2458
2459 const BOOL_32 needToAvoidInTail =
2460 tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2461 TRUE : FALSE;
2462
2463 const UINT_32 hwMipWidth = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2464 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2465
2466 const BOOL_32 needExtraWidth =
2467 ((upperMipWidth < requestMipWidth * 2) ||
2468 ((upperMipWidth == requestMipWidth * 2) &&
2469 ((needToAvoidInTail == TRUE) ||
2470 (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2471
2472 const BOOL_32 needExtraHeight =
2473 ((upperMipHeight < requestMipHeight * 2) ||
2474 ((upperMipHeight == requestMipHeight * 2) &&
2475 ((needToAvoidInTail == TRUE) ||
2476 (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2477
2478 // (mip0) width = requestLastMipLevelWidth
2479 pOut->unalignedWidth = upperMipWidth + (needExtraWidth ? 1: 0);
2480
2481 // (mip0) height = requestLastMipLevelHeight
2482 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2483 }
2484
2485 // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2486 ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2487 // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2488 ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2489 }
2490 }
2491
2492 return returnCode;
2493 }
2494
2495 /**
2496 ************************************************************************************************************************
2497 * Gfx10Lib::ValidateNonSwModeParams
2498 *
2499 * @brief
2500 * Validate compute surface info params except swizzle mode
2501 *
2502 * @return
2503 * TRUE if parameters are valid, FALSE otherwise
2504 ************************************************************************************************************************
2505 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2506 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2507 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2508 {
2509 BOOL_32 valid = TRUE;
2510
2511 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2512 {
2513 ADDR_ASSERT_ALWAYS();
2514 valid = FALSE;
2515 }
2516
2517 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2518 {
2519 ADDR_ASSERT_ALWAYS();
2520 valid = FALSE;
2521 }
2522
2523 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2524 const AddrResourceType rsrcType = pIn->resourceType;
2525 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2526 const BOOL_32 msaa = (pIn->numFrags > 1);
2527 const BOOL_32 display = flags.display;
2528 const BOOL_32 tex3d = IsTex3d(rsrcType);
2529 const BOOL_32 tex2d = IsTex2d(rsrcType);
2530 const BOOL_32 tex1d = IsTex1d(rsrcType);
2531 const BOOL_32 stereo = flags.qbStereo;
2532
2533 // Resource type check
2534 if (tex1d)
2535 {
2536 if (msaa || display || stereo)
2537 {
2538 ADDR_ASSERT_ALWAYS();
2539 valid = FALSE;
2540 }
2541 }
2542 else if (tex2d)
2543 {
2544 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2545 {
2546 ADDR_ASSERT_ALWAYS();
2547 valid = FALSE;
2548 }
2549 }
2550 else if (tex3d)
2551 {
2552 if (msaa || display || stereo)
2553 {
2554 ADDR_ASSERT_ALWAYS();
2555 valid = FALSE;
2556 }
2557 }
2558 else
2559 {
2560 ADDR_ASSERT_ALWAYS();
2561 valid = FALSE;
2562 }
2563
2564 return valid;
2565 }
2566
2567 /**
2568 ************************************************************************************************************************
2569 * Gfx10Lib::ValidateSwModeParams
2570 *
2571 * @brief
2572 * Validate compute surface info related to swizzle mode
2573 *
2574 * @return
2575 * TRUE if parameters are valid, FALSE otherwise
2576 ************************************************************************************************************************
2577 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2578 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2579 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2580 {
2581 BOOL_32 valid = TRUE;
2582
2583 if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2584 {
2585 ADDR_ASSERT_ALWAYS();
2586 valid = FALSE;
2587 }
2588 else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2589 {
2590 {
2591 ADDR_ASSERT_ALWAYS();
2592 valid = FALSE;
2593 }
2594 }
2595
2596 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2597 const AddrResourceType rsrcType = pIn->resourceType;
2598 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2599 const BOOL_32 msaa = (pIn->numFrags > 1);
2600 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2601 const BOOL_32 color = flags.color;
2602 const BOOL_32 display = flags.display;
2603 const BOOL_32 tex3d = IsTex3d(rsrcType);
2604 const BOOL_32 tex2d = IsTex2d(rsrcType);
2605 const BOOL_32 tex1d = IsTex1d(rsrcType);
2606 const BOOL_32 thin3d = flags.view3dAs2dArray;
2607 const BOOL_32 linear = IsLinear(swizzle);
2608 const BOOL_32 blk256B = IsBlock256b(swizzle);
2609 const BOOL_32 blkVar = IsBlockVariable(swizzle);
2610 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2611 const BOOL_32 prt = flags.prt;
2612 const BOOL_32 fmask = flags.fmask;
2613
2614 // Misc check
2615 if ((pIn->numFrags > 1) &&
2616 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2617 {
2618 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2619 ADDR_ASSERT_ALWAYS();
2620 valid = FALSE;
2621 }
2622
2623 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2624 {
2625 ADDR_ASSERT_ALWAYS();
2626 valid = FALSE;
2627 }
2628
2629 if ((pIn->bpp == 96) && (linear == FALSE))
2630 {
2631 ADDR_ASSERT_ALWAYS();
2632 valid = FALSE;
2633 }
2634
2635 const UINT_32 swizzleMask = 1 << swizzle;
2636
2637 // Resource type check
2638 if (tex1d)
2639 {
2640 if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2641 {
2642 ADDR_ASSERT_ALWAYS();
2643 valid = FALSE;
2644 }
2645 }
2646 else if (tex2d)
2647 {
2648 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2649 {
2650 {
2651 ADDR_ASSERT_ALWAYS();
2652 valid = FALSE;
2653 }
2654 }
2655 else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2656 (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2657 {
2658 ADDR_ASSERT_ALWAYS();
2659 valid = FALSE;
2660 }
2661 }
2662 else if (tex3d)
2663 {
2664 if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2665 (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2666 (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2667 {
2668 ADDR_ASSERT_ALWAYS();
2669 valid = FALSE;
2670 }
2671 }
2672
2673 // Swizzle type check
2674 if (linear)
2675 {
2676 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2677 {
2678 ADDR_ASSERT_ALWAYS();
2679 valid = FALSE;
2680 }
2681 }
2682 else if (IsZOrderSwizzle(swizzle))
2683 {
2684 if ((pIn->bpp > 64) ||
2685 (msaa && (color || (pIn->bpp > 32))) ||
2686 ElemLib::IsBlockCompressed(pIn->format) ||
2687 ElemLib::IsMacroPixelPacked(pIn->format))
2688 {
2689 ADDR_ASSERT_ALWAYS();
2690 valid = FALSE;
2691 }
2692 }
2693 else if (IsStandardSwizzle(rsrcType, swizzle))
2694 {
2695 if (zbuffer || msaa)
2696 {
2697 ADDR_ASSERT_ALWAYS();
2698 valid = FALSE;
2699 }
2700 }
2701 else if (IsDisplaySwizzle(rsrcType, swizzle))
2702 {
2703 if (zbuffer || msaa)
2704 {
2705 ADDR_ASSERT_ALWAYS();
2706 valid = FALSE;
2707 }
2708 }
2709 else if (IsRtOptSwizzle(swizzle))
2710 {
2711 if (zbuffer)
2712 {
2713 ADDR_ASSERT_ALWAYS();
2714 valid = FALSE;
2715 }
2716 }
2717 else
2718 {
2719 {
2720 ADDR_ASSERT_ALWAYS();
2721 valid = FALSE;
2722 }
2723 }
2724
2725 // Block type check
2726 if (blk256B)
2727 {
2728 if (zbuffer || tex3d || msaa)
2729 {
2730 ADDR_ASSERT_ALWAYS();
2731 valid = FALSE;
2732 }
2733 }
2734 else if (blkVar)
2735 {
2736 if (m_blockVarSizeLog2 == 0)
2737 {
2738 ADDR_ASSERT_ALWAYS();
2739 valid = FALSE;
2740 }
2741 }
2742
2743 return valid;
2744 }
2745
2746 /**
2747 ************************************************************************************************************************
2748 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2749 *
2750 * @brief
2751 * Compute surface info sanity check
2752 *
2753 * @return
2754 * Offset
2755 ************************************************************************************************************************
2756 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2757 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2758 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2759 ) const
2760 {
2761 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2762 }
2763
2764 /**
2765 ************************************************************************************************************************
2766 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2767 *
2768 * @brief
2769 * Internal function to get suggested surface information for client to use
2770 *
2771 * @return
2772 * ADDR_E_RETURNCODE
2773 ************************************************************************************************************************
2774 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2775 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2776 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2777 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2778 ) const
2779 {
2780 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2781
2782 if (pIn->flags.fmask)
2783 {
2784 const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2785 const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2786
2787 if (forbid64KbBlockType && forbidVarBlockType)
2788 {
2789 // Invalid combination...
2790 ADDR_ASSERT_ALWAYS();
2791 returnCode = ADDR_INVALIDPARAMS;
2792 }
2793 else
2794 {
2795 pOut->resourceType = ADDR_RSRC_TEX_2D;
2796 pOut->validBlockSet.value = 0;
2797 pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1;
2798 pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1;
2799 pOut->validSwModeSet.value = 0;
2800 pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1;
2801 pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType ? 0 : 1;
2802 pOut->canXor = TRUE;
2803 pOut->validSwTypeSet.value = AddrSwSetZ;
2804 pOut->clientPreferredSwSet = pOut->validSwTypeSet;
2805
2806 BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2807
2808 if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2809 {
2810 const UINT_8 maxFmaskSwizzleModeType = 2;
2811 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2812 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2813 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2814 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2815 const UINT_32 width = Max(pIn->width, 1u);
2816 const UINT_32 height = Max(pIn->height, 1u);
2817 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2818
2819 AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2820 Dim3d blkDim[maxFmaskSwizzleModeType] = {{}, {}};
2821 Dim3d padDim[maxFmaskSwizzleModeType] = {{}, {}};
2822 UINT_64 padSize[maxFmaskSwizzleModeType] = {};
2823
2824 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2825 {
2826 ComputeBlockDimensionForSurf(&blkDim[i].w,
2827 &blkDim[i].h,
2828 &blkDim[i].d,
2829 fmaskBpp,
2830 1,
2831 pOut->resourceType,
2832 swMode[i]);
2833
2834 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2835 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2836 }
2837
2838 if (BlockTypeWithinMemoryBudget(padSize[0],
2839 padSize[1],
2840 ratioLow,
2841 ratioHi,
2842 pIn->memoryBudget,
2843 GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))
2844 {
2845 use64KbBlockType = FALSE;
2846 }
2847 }
2848 else if (forbidVarBlockType)
2849 {
2850 use64KbBlockType = TRUE;
2851 }
2852
2853 if (use64KbBlockType)
2854 {
2855 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2856 }
2857 else
2858 {
2859 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2860 }
2861 }
2862 }
2863 else
2864 {
2865 UINT_32 bpp = pIn->bpp;
2866 UINT_32 width = Max(pIn->width, 1u);
2867 UINT_32 height = Max(pIn->height, 1u);
2868
2869 // Set format to INVALID will skip this conversion
2870 if (pIn->format != ADDR_FMT_INVALID)
2871 {
2872 ElemMode elemMode = ADDR_UNCOMPRESSED;
2873 UINT_32 expandX, expandY;
2874
2875 // Get compression/expansion factors and element mode which indicates compression/expansion
2876 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2877 &elemMode,
2878 &expandX,
2879 &expandY);
2880
2881 UINT_32 basePitch = 0;
2882 GetElemLib()->AdjustSurfaceInfo(elemMode,
2883 expandX,
2884 expandY,
2885 &bpp,
2886 &basePitch,
2887 &width,
2888 &height);
2889 }
2890
2891 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2892 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2893 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2894 const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2895 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
2896
2897 // Pre sanity check on non swizzle mode parameters
2898 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2899 localIn.flags = pIn->flags;
2900 localIn.resourceType = pIn->resourceType;
2901 localIn.format = pIn->format;
2902 localIn.bpp = bpp;
2903 localIn.width = width;
2904 localIn.height = height;
2905 localIn.numSlices = numSlices;
2906 localIn.numMipLevels = numMipLevels;
2907 localIn.numSamples = numSamples;
2908 localIn.numFrags = numFrags;
2909
2910 if (ValidateNonSwModeParams(&localIn))
2911 {
2912 // Forbid swizzle mode(s) by client setting
2913 ADDR2_SWMODE_SET allowedSwModeSet = {};
2914 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2915 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
2916 allowedSwModeSet.value |=
2917 pIn->forbiddenBlock.macroThin4KB ? 0 :
2918 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2919 allowedSwModeSet.value |=
2920 pIn->forbiddenBlock.macroThick4KB ? 0 :
2921 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2922 allowedSwModeSet.value |=
2923 pIn->forbiddenBlock.macroThin64KB ? 0 :
2924 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2925 allowedSwModeSet.value |=
2926 pIn->forbiddenBlock.macroThick64KB ? 0 :
2927 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2928 allowedSwModeSet.value |=
2929 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2930
2931 if (pIn->preferredSwSet.value != 0)
2932 {
2933 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2934 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2935 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2936 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2937 }
2938
2939 if (pIn->noXor)
2940 {
2941 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2942 }
2943
2944 if (pIn->maxAlign > 0)
2945 {
2946 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2947 {
2948 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2949 }
2950
2951 if (pIn->maxAlign < Size64K)
2952 {
2953 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2954 }
2955
2956 if (pIn->maxAlign < Size4K)
2957 {
2958 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2959 }
2960
2961 if (pIn->maxAlign < Size256)
2962 {
2963 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2964 }
2965 }
2966
2967 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2968 switch (pIn->resourceType)
2969 {
2970 case ADDR_RSRC_TEX_1D:
2971 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2972 break;
2973
2974 case ADDR_RSRC_TEX_2D:
2975 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2976 break;
2977
2978 case ADDR_RSRC_TEX_3D:
2979 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2980
2981 if (pIn->flags.view3dAs2dArray)
2982 {
2983 allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2984 }
2985 break;
2986
2987 default:
2988 ADDR_ASSERT_ALWAYS();
2989 allowedSwModeSet.value = 0;
2990 break;
2991 }
2992
2993 if (ElemLib::IsBlockCompressed(pIn->format) ||
2994 ElemLib::IsMacroPixelPacked(pIn->format) ||
2995 (bpp > 64) ||
2996 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2997 {
2998 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2999 }
3000
3001 if (pIn->format == ADDR_FMT_32_32_32)
3002 {
3003 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3004 }
3005
3006 if (msaa)
3007 {
3008 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
3009 }
3010
3011 if (pIn->flags.depth || pIn->flags.stencil)
3012 {
3013 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3014 }
3015
3016 if (pIn->flags.display)
3017 {
3018 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
3019 }
3020
3021
3022 if (allowedSwModeSet.value != 0)
3023 {
3024 #if DEBUG
3025 // Post sanity check, at least AddrLib should accept the output generated by its own
3026 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3027
3028 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3029 {
3030 if (validateSwModeSet & 1)
3031 {
3032 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3033 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3034 }
3035
3036 validateSwModeSet >>= 1;
3037 }
3038 #endif
3039
3040 pOut->resourceType = pIn->resourceType;
3041 pOut->validSwModeSet = allowedSwModeSet;
3042 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
3043 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3044 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3045
3046 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3047
3048 if (pOut->clientPreferredSwSet.value == 0)
3049 {
3050 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3051 }
3052
3053 // Apply optional restrictions
3054 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
3055 {
3056 if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
3057 {
3058 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
3059 // the GL2 in VAR mode, so it should be avoided.
3060 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
3061 }
3062 else
3063 {
3064 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
3065 // But we have to suffer from low performance because there is no other choice...
3066 ADDR_ASSERT_ALWAYS();
3067 }
3068 }
3069
3070 if (pIn->flags.needEquation)
3071 {
3072 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3073 }
3074
3075 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
3076 {
3077 pOut->swizzleMode = ADDR_SW_LINEAR;
3078 }
3079 else
3080 {
3081 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3082
3083 if ((height > 1) && (computeMinSize == FALSE))
3084 {
3085 // Always ignore linear swizzle mode if:
3086 // 1. This is a (2D/3D) resource with height > 1
3087 // 2. Client doesn't require computing minimize size
3088 allowedSwModeSet.swLinear = 0;
3089 }
3090
3091 // A bitfield where each bit represents a block type. Each swizzle mode maps to a block.
3092 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3093
3094 // Determine block size if there are 2 or more block type candidates
3095 if (IsPow2(allowedBlockSet.value) == FALSE)
3096 {
3097 // Tracks a valid SwizzleMode for each valid block type
3098 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3099
3100 swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3101
3102 if (m_blockVarSizeLog2 != 0)
3103 {
3104 swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
3105 }
3106
3107 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3108 {
3109 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3110 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;
3111 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3112 }
3113 else
3114 {
3115 swMode[AddrBlockMicro] = ADDR_SW_256B_S;
3116 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S;
3117 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
3118 }
3119
3120 // Tracks the size of each valid swizzle mode's surface in bytes
3121 UINT_64 padSize[AddrBlockMaxTiledType] = {};
3122
3123 const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3124 const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3125 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3126 UINT_32 minSizeBlk = AddrBlockMicro; // Tracks the most optimal block to use
3127 UINT_64 minSize = 0; // Tracks the minimum acceptable block type
3128
3129 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3130
3131 // Iterate through all block types
3132 for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3133 {
3134 if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3135 {
3136 localIn.swizzleMode = swMode[i];
3137
3138 if (localIn.swizzleMode == ADDR_SW_LINEAR)
3139 {
3140 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3141 }
3142 else
3143 {
3144 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3145 }
3146
3147 if (returnCode == ADDR_OK)
3148 {
3149 padSize[i] = localOut.surfSize;
3150
3151 if (minSize == 0)
3152 {
3153 minSize = padSize[i];
3154 minSizeBlk = i;
3155 }
3156 else
3157 {
3158 // Checks if the block type is within the memory budget but favors larger blocks
3159 if (BlockTypeWithinMemoryBudget(
3160 minSize,
3161 padSize[i],
3162 ratioLow,
3163 ratioHi,
3164 0.0,
3165 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))
3166 {
3167 minSize = padSize[i];
3168 minSizeBlk = i;
3169 }
3170 }
3171 }
3172 else
3173 {
3174 ADDR_ASSERT_ALWAYS();
3175 break;
3176 }
3177 }
3178 }
3179
3180 if (pIn->memoryBudget > 1.0)
3181 {
3182 // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3183 // smaller-block type again in coming loop
3184 switch (minSizeBlk)
3185 {
3186 case AddrBlockThick64KB:
3187 allowedBlockSet.macroThin64KB = 0;
3188 case AddrBlockThinVar:
3189 case AddrBlockThin64KB:
3190 allowedBlockSet.macroThick4KB = 0;
3191 case AddrBlockThick4KB:
3192 allowedBlockSet.macroThin4KB = 0;
3193 case AddrBlockThin4KB:
3194 allowedBlockSet.micro = 0;
3195 case AddrBlockMicro:
3196 allowedBlockSet.linear = 0;
3197 case AddrBlockLinear:
3198 break;
3199
3200 default:
3201 ADDR_ASSERT_ALWAYS();
3202 break;
3203 }
3204
3205 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3206 {
3207 if ((i != minSizeBlk) &&
3208 IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3209 {
3210 if (BlockTypeWithinMemoryBudget(
3211 minSize,
3212 padSize[i],
3213 0,
3214 0,
3215 pIn->memoryBudget,
3216 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)
3217 {
3218 // Clear the block type if the memory waste is unacceptable
3219 allowedBlockSet.value &= ~(1u << (i - 1));
3220 }
3221 }
3222 }
3223
3224 // Remove VAR block type if bigger block type is allowed
3225 if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))
3226 {
3227 if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)
3228 {
3229 allowedBlockSet.var = 0;
3230 }
3231 }
3232
3233 // Remove linear block type if 2 or more block types are allowed
3234 if (IsPow2(allowedBlockSet.value) == FALSE)
3235 {
3236 allowedBlockSet.linear = 0;
3237 }
3238
3239 // Select the biggest allowed block type
3240 minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3241
3242 if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3243 {
3244 minSizeBlk = AddrBlockLinear;
3245 }
3246 }
3247
3248 switch (minSizeBlk)
3249 {
3250 case AddrBlockLinear:
3251 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3252 break;
3253
3254 case AddrBlockMicro:
3255 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3256 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
3257 break;
3258
3259 case AddrBlockThin4KB:
3260 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3261 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
3262 break;
3263
3264 case AddrBlockThick4KB:
3265 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3266 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
3267 break;
3268
3269 case AddrBlockThin64KB:
3270 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3271 Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
3272 break;
3273
3274 case AddrBlockThick64KB:
3275 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3276 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
3277 break;
3278
3279 case AddrBlockThinVar:
3280 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
3281 break;
3282
3283 default:
3284 ADDR_ASSERT_ALWAYS();
3285 allowedSwModeSet.value = 0;
3286 break;
3287 }
3288 }
3289
3290 // Block type should be determined.
3291 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3292
3293 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3294
3295 // Determine swizzle type if there are 2 or more swizzle type candidates
3296 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3297 {
3298 if (ElemLib::IsBlockCompressed(pIn->format))
3299 {
3300 if (allowedSwSet.sw_D)
3301 {
3302 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3303 }
3304 else if (allowedSwSet.sw_S)
3305 {
3306 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3307 }
3308 else
3309 {
3310 ADDR_ASSERT(allowedSwSet.sw_R);
3311 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3312 }
3313 }
3314 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3315 {
3316 if (allowedSwSet.sw_S)
3317 {
3318 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3319 }
3320 else if (allowedSwSet.sw_D)
3321 {
3322 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3323 }
3324 else
3325 {
3326 ADDR_ASSERT(allowedSwSet.sw_R);
3327 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3328 }
3329 }
3330 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
3331 {
3332 if (pIn->flags.color &&
3333 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
3334 allowedSwSet.sw_D)
3335 {
3336 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3337 }
3338 else if (allowedSwSet.sw_S)
3339 {
3340 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3341 }
3342 else if (allowedSwSet.sw_R)
3343 {
3344 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3345 }
3346 else
3347 {
3348 ADDR_ASSERT(allowedSwSet.sw_Z);
3349 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3350 }
3351 }
3352 else
3353 {
3354 if (allowedSwSet.sw_R)
3355 {
3356 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3357 }
3358 else if (allowedSwSet.sw_D)
3359 {
3360 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3361 }
3362 else if (allowedSwSet.sw_S)
3363 {
3364 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3365 }
3366 else
3367 {
3368 ADDR_ASSERT(allowedSwSet.sw_Z);
3369 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3370 }
3371 }
3372
3373 // Swizzle type should be determined.
3374 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3375 }
3376
3377 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
3378 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3379 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3380 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3381 }
3382 }
3383 else
3384 {
3385 // Invalid combination...
3386 ADDR_ASSERT_ALWAYS();
3387 returnCode = ADDR_INVALIDPARAMS;
3388 }
3389 }
3390 else
3391 {
3392 // Invalid combination...
3393 ADDR_ASSERT_ALWAYS();
3394 returnCode = ADDR_INVALIDPARAMS;
3395 }
3396 }
3397
3398 return returnCode;
3399 }
3400
3401 /**
3402 ************************************************************************************************************************
3403 * Gfx10Lib::ComputeStereoInfo
3404 *
3405 * @brief
3406 * Compute height alignment and right eye pipeBankXor for stereo surface
3407 *
3408 * @return
3409 * Error code
3410 *
3411 ************************************************************************************************************************
3412 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const3413 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3414 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
3415 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
3416 UINT_32* pRightXor ///< Right eye xor
3417 ) const
3418 {
3419 ADDR_E_RETURNCODE ret = ADDR_OK;
3420
3421 *pRightXor = 0;
3422
3423 if (IsNonPrtXor(pIn->swizzleMode))
3424 {
3425 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3426 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3427 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3428 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3429 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3430
3431 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3432 {
3433 UINT_32 yMax = 0;
3434 UINT_32 yPosMask = 0;
3435
3436 // First get "max y bit"
3437 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3438 {
3439 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3440
3441 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3442 (m_equationTable[eqIndex].addr[i].index > yMax))
3443 {
3444 yMax = m_equationTable[eqIndex].addr[i].index;
3445 }
3446
3447 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3448 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3449 (m_equationTable[eqIndex].xor1[i].index > yMax))
3450 {
3451 yMax = m_equationTable[eqIndex].xor1[i].index;
3452 }
3453
3454 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3455 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3456 (m_equationTable[eqIndex].xor2[i].index > yMax))
3457 {
3458 yMax = m_equationTable[eqIndex].xor2[i].index;
3459 }
3460 }
3461
3462 // Then loop again for populating a position mask of "max Y bit"
3463 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3464 {
3465 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3466 (m_equationTable[eqIndex].addr[i].index == yMax))
3467 {
3468 yPosMask |= 1u << i;
3469 }
3470 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3471 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3472 (m_equationTable[eqIndex].xor1[i].index == yMax))
3473 {
3474 yPosMask |= 1u << i;
3475 }
3476 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3477 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3478 (m_equationTable[eqIndex].xor2[i].index == yMax))
3479 {
3480 yPosMask |= 1u << i;
3481 }
3482 }
3483
3484 const UINT_32 additionalAlign = 1 << yMax;
3485
3486 if (additionalAlign >= *pAlignY)
3487 {
3488 *pAlignY = additionalAlign;
3489
3490 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3491
3492 if ((alignedHeight >> yMax) & 1)
3493 {
3494 *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3495 }
3496 }
3497 }
3498 else
3499 {
3500 ret = ADDR_INVALIDPARAMS;
3501 }
3502 }
3503
3504 return ret;
3505 }
3506
3507 /**
3508 ************************************************************************************************************************
3509 * Gfx10Lib::HwlComputeSurfaceInfoTiled
3510 *
3511 * @brief
3512 * Internal function to calculate alignment for tiled surface
3513 *
3514 * @return
3515 * ADDR_E_RETURNCODE
3516 ************************************************************************************************************************
3517 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3518 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3519 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3520 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3521 ) const
3522 {
3523 ADDR_E_RETURNCODE ret;
3524
3525 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3526 pOut->mipChainPitch = 0;
3527 pOut->mipChainHeight = 0;
3528 pOut->mipChainSlice = 0;
3529 pOut->epitchIsHeight = FALSE;
3530
3531 // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3532 pOut->mipChainInTail = FALSE;
3533 pOut->firstMipIdInTail = pIn->numMipLevels;
3534
3535 if (IsBlock256b(pIn->swizzleMode))
3536 {
3537 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3538 }
3539 else
3540 {
3541 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3542 }
3543
3544 return ret;
3545 }
3546
3547 /**
3548 ************************************************************************************************************************
3549 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
3550 *
3551 * @brief
3552 * Internal function to calculate alignment for micro tiled surface
3553 *
3554 * @return
3555 * ADDR_E_RETURNCODE
3556 ************************************************************************************************************************
3557 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3558 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3559 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3560 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3561 ) const
3562 {
3563 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3564 &pOut->blockHeight,
3565 &pOut->blockSlices,
3566 pIn->bpp,
3567 pIn->numFrags,
3568 pIn->resourceType,
3569 pIn->swizzleMode);
3570
3571 if (ret == ADDR_OK)
3572 {
3573 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3574
3575 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3576 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3577 pOut->numSlices = pIn->numSlices;
3578 pOut->baseAlign = blockSize;
3579
3580 if (pIn->numMipLevels > 1)
3581 {
3582 const UINT_32 mip0Width = pIn->width;
3583 const UINT_32 mip0Height = pIn->height;
3584 UINT_64 mipSliceSize = 0;
3585
3586 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3587 {
3588 UINT_32 mipWidth, mipHeight;
3589
3590 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3591
3592 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
3593 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3594
3595 if (pOut->pMipInfo != NULL)
3596 {
3597 pOut->pMipInfo[i].pitch = mipActualWidth;
3598 pOut->pMipInfo[i].height = mipActualHeight;
3599 pOut->pMipInfo[i].depth = 1;
3600 pOut->pMipInfo[i].offset = mipSliceSize;
3601 pOut->pMipInfo[i].mipTailOffset = 0;
3602 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3603 }
3604
3605 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3606 }
3607
3608 pOut->sliceSize = mipSliceSize;
3609 pOut->surfSize = mipSliceSize * pOut->numSlices;
3610 }
3611 else
3612 {
3613 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3614 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3615
3616 if (pOut->pMipInfo != NULL)
3617 {
3618 pOut->pMipInfo[0].pitch = pOut->pitch;
3619 pOut->pMipInfo[0].height = pOut->height;
3620 pOut->pMipInfo[0].depth = 1;
3621 pOut->pMipInfo[0].offset = 0;
3622 pOut->pMipInfo[0].mipTailOffset = 0;
3623 pOut->pMipInfo[0].macroBlockOffset = 0;
3624 }
3625 }
3626
3627 }
3628
3629 return ret;
3630 }
3631
3632 /**
3633 ************************************************************************************************************************
3634 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
3635 *
3636 * @brief
3637 * Internal function to calculate alignment for macro tiled surface
3638 *
3639 * @return
3640 * ADDR_E_RETURNCODE
3641 ************************************************************************************************************************
3642 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3643 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3644 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3645 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3646 ) const
3647 {
3648 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3649 &pOut->blockHeight,
3650 &pOut->blockSlices,
3651 pIn->bpp,
3652 pIn->numFrags,
3653 pIn->resourceType,
3654 pIn->swizzleMode);
3655
3656 if (returnCode == ADDR_OK)
3657 {
3658 UINT_32 heightAlign = pOut->blockHeight;
3659
3660 if (pIn->flags.qbStereo)
3661 {
3662 UINT_32 rightXor = 0;
3663
3664 returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3665
3666 if (returnCode == ADDR_OK)
3667 {
3668 pOut->pStereoInfo->rightSwizzle = rightXor;
3669 }
3670 }
3671
3672 if (returnCode == ADDR_OK)
3673 {
3674 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3675 const UINT_32 blockSize = 1 << blockSizeLog2;
3676
3677 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3678 pOut->height = PowTwoAlign(pIn->height, heightAlign);
3679 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3680 pOut->baseAlign = blockSize;
3681
3682 if (pIn->numMipLevels > 1)
3683 {
3684 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3685 pIn->swizzleMode,
3686 pOut->blockWidth,
3687 pOut->blockHeight,
3688 pOut->blockSlices);
3689 const UINT_32 mip0Width = pIn->width;
3690 const UINT_32 mip0Height = pIn->height;
3691 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3692 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3693 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3694 const UINT_32 index = Log2(pIn->bpp >> 3);
3695 UINT_32 firstMipInTail = pIn->numMipLevels;
3696 UINT_64 mipChainSliceSize = 0;
3697 UINT_64 mipSize[MaxMipLevels];
3698 UINT_64 mipSliceSize[MaxMipLevels];
3699
3700 Dim3d fixedTailMaxDim = tailMaxDim;
3701
3702 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3703 {
3704 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3705 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3706 }
3707
3708 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3709 {
3710 UINT_32 mipWidth, mipHeight, mipDepth;
3711
3712 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3713
3714 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3715 {
3716 firstMipInTail = i;
3717 mipChainSliceSize += blockSize / pOut->blockSlices;
3718 break;
3719 }
3720 else
3721 {
3722 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3723 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3724 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3725 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3726
3727 mipSize[i] = sliceSize * depth;
3728 mipSliceSize[i] = sliceSize * pOut->blockSlices;
3729 mipChainSliceSize += sliceSize;
3730
3731 if (pOut->pMipInfo != NULL)
3732 {
3733 pOut->pMipInfo[i].pitch = pitch;
3734 pOut->pMipInfo[i].height = height;
3735 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3736 }
3737 }
3738 }
3739
3740 pOut->sliceSize = mipChainSliceSize;
3741 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3742 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3743 pOut->firstMipIdInTail = firstMipInTail;
3744
3745 if (pOut->pMipInfo != NULL)
3746 {
3747 UINT_64 offset = 0;
3748 UINT_64 macroBlkOffset = 0;
3749 UINT_32 tailMaxDepth = 0;
3750
3751 if (firstMipInTail != pIn->numMipLevels)
3752 {
3753 UINT_32 mipWidth, mipHeight;
3754
3755 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3756 &mipWidth, &mipHeight, &tailMaxDepth);
3757
3758 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3759 macroBlkOffset = blockSize;
3760 }
3761
3762 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3763 {
3764 pOut->pMipInfo[i].offset = offset;
3765 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3766 pOut->pMipInfo[i].mipTailOffset = 0;
3767
3768 offset += mipSize[i];
3769 macroBlkOffset += mipSliceSize[i];
3770 }
3771
3772 UINT_32 pitch = tailMaxDim.w;
3773 UINT_32 height = tailMaxDim.h;
3774 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3775
3776 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3777
3778 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3779 {
3780 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3781 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3782
3783 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
3784 pOut->pMipInfo[i].mipTailOffset = mipOffset;
3785 pOut->pMipInfo[i].macroBlockOffset = 0;
3786
3787 pOut->pMipInfo[i].pitch = pitch;
3788 pOut->pMipInfo[i].height = height;
3789 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3790
3791 UINT_32 mipX = ((mipOffset >> 9) & 1) |
3792 ((mipOffset >> 10) & 2) |
3793 ((mipOffset >> 11) & 4) |
3794 ((mipOffset >> 12) & 8) |
3795 ((mipOffset >> 13) & 16) |
3796 ((mipOffset >> 14) & 32);
3797 UINT_32 mipY = ((mipOffset >> 8) & 1) |
3798 ((mipOffset >> 9) & 2) |
3799 ((mipOffset >> 10) & 4) |
3800 ((mipOffset >> 11) & 8) |
3801 ((mipOffset >> 12) & 16) |
3802 ((mipOffset >> 13) & 32);
3803
3804 if (blockSizeLog2 & 1)
3805 {
3806 const UINT_32 temp = mipX;
3807 mipX = mipY;
3808 mipY = temp;
3809
3810 if (index & 1)
3811 {
3812 mipY = (mipY << 1) | (mipX & 1);
3813 mipX = mipX >> 1;
3814 }
3815 }
3816
3817 if (isThin)
3818 {
3819 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3820 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3821 pOut->pMipInfo[i].mipTailCoordZ = 0;
3822
3823 pitch = Max(pitch >> 1, Block256_2d[index].w);
3824 height = Max(height >> 1, Block256_2d[index].h);
3825 }
3826 else
3827 {
3828 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3829 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3830 pOut->pMipInfo[i].mipTailCoordZ = 0;
3831
3832 pitch = Max(pitch >> 1, Block256_3d[index].w);
3833 height = Max(height >> 1, Block256_3d[index].h);
3834 }
3835 }
3836 }
3837 }
3838 else
3839 {
3840 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3841 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3842
3843 if (pOut->pMipInfo != NULL)
3844 {
3845 pOut->pMipInfo[0].pitch = pOut->pitch;
3846 pOut->pMipInfo[0].height = pOut->height;
3847 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3848 pOut->pMipInfo[0].offset = 0;
3849 pOut->pMipInfo[0].mipTailOffset = 0;
3850 pOut->pMipInfo[0].macroBlockOffset = 0;
3851 pOut->pMipInfo[0].mipTailCoordX = 0;
3852 pOut->pMipInfo[0].mipTailCoordY = 0;
3853 pOut->pMipInfo[0].mipTailCoordZ = 0;
3854 }
3855 }
3856 }
3857 }
3858
3859 return returnCode;
3860 }
3861
3862 /**
3863 ************************************************************************************************************************
3864 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3865 *
3866 * @brief
3867 * Internal function to calculate address from coord for tiled swizzle surface
3868 *
3869 * @return
3870 * ADDR_E_RETURNCODE
3871 ************************************************************************************************************************
3872 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3873 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3874 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3875 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3876 ) const
3877 {
3878 ADDR_E_RETURNCODE ret;
3879
3880 if (IsBlock256b(pIn->swizzleMode))
3881 {
3882 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3883 }
3884 else
3885 {
3886 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3887 }
3888
3889 return ret;
3890 }
3891
3892 /**
3893 ************************************************************************************************************************
3894 * Gfx10Lib::ComputeOffsetFromEquation
3895 *
3896 * @brief
3897 * Compute offset from equation
3898 *
3899 * @return
3900 * Offset
3901 ************************************************************************************************************************
3902 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3903 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3904 const ADDR_EQUATION* pEq, ///< Equation
3905 UINT_32 x, ///< x coord in bytes
3906 UINT_32 y, ///< y coord in pixel
3907 UINT_32 z ///< z coord in slice
3908 ) const
3909 {
3910 UINT_32 offset = 0;
3911
3912 for (UINT_32 i = 0; i < pEq->numBits; i++)
3913 {
3914 UINT_32 v = 0;
3915
3916 if (pEq->addr[i].valid)
3917 {
3918 if (pEq->addr[i].channel == 0)
3919 {
3920 v ^= (x >> pEq->addr[i].index) & 1;
3921 }
3922 else if (pEq->addr[i].channel == 1)
3923 {
3924 v ^= (y >> pEq->addr[i].index) & 1;
3925 }
3926 else
3927 {
3928 ADDR_ASSERT(pEq->addr[i].channel == 2);
3929 v ^= (z >> pEq->addr[i].index) & 1;
3930 }
3931 }
3932
3933 if (pEq->xor1[i].valid)
3934 {
3935 if (pEq->xor1[i].channel == 0)
3936 {
3937 v ^= (x >> pEq->xor1[i].index) & 1;
3938 }
3939 else if (pEq->xor1[i].channel == 1)
3940 {
3941 v ^= (y >> pEq->xor1[i].index) & 1;
3942 }
3943 else
3944 {
3945 ADDR_ASSERT(pEq->xor1[i].channel == 2);
3946 v ^= (z >> pEq->xor1[i].index) & 1;
3947 }
3948 }
3949
3950 if (pEq->xor2[i].valid)
3951 {
3952 if (pEq->xor2[i].channel == 0)
3953 {
3954 v ^= (x >> pEq->xor2[i].index) & 1;
3955 }
3956 else if (pEq->xor2[i].channel == 1)
3957 {
3958 v ^= (y >> pEq->xor2[i].index) & 1;
3959 }
3960 else
3961 {
3962 ADDR_ASSERT(pEq->xor2[i].channel == 2);
3963 v ^= (z >> pEq->xor2[i].index) & 1;
3964 }
3965 }
3966
3967 offset |= (v << i);
3968 }
3969
3970 return offset;
3971 }
3972
3973 /**
3974 ************************************************************************************************************************
3975 * Gfx10Lib::ComputeOffsetFromSwizzlePattern
3976 *
3977 * @brief
3978 * Compute offset from swizzle pattern
3979 *
3980 * @return
3981 * Offset
3982 ************************************************************************************************************************
3983 */
ComputeOffsetFromSwizzlePattern(const UINT_64 * pPattern,UINT_32 numBits,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const3984 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3985 const UINT_64* pPattern, ///< Swizzle pattern
3986 UINT_32 numBits, ///< Number of bits in pattern
3987 UINT_32 x, ///< x coord in pixel
3988 UINT_32 y, ///< y coord in pixel
3989 UINT_32 z, ///< z coord in slice
3990 UINT_32 s ///< sample id
3991 ) const
3992 {
3993 UINT_32 offset = 0;
3994 const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3995
3996 for (UINT_32 i = 0; i < numBits; i++)
3997 {
3998 UINT_32 v = 0;
3999
4000 if (pSwizzlePattern[i].x != 0)
4001 {
4002 UINT_16 mask = pSwizzlePattern[i].x;
4003 UINT_32 xBits = x;
4004
4005 while (mask != 0)
4006 {
4007 if (mask & 1)
4008 {
4009 v ^= xBits & 1;
4010 }
4011
4012 xBits >>= 1;
4013 mask >>= 1;
4014 }
4015 }
4016
4017 if (pSwizzlePattern[i].y != 0)
4018 {
4019 UINT_16 mask = pSwizzlePattern[i].y;
4020 UINT_32 yBits = y;
4021
4022 while (mask != 0)
4023 {
4024 if (mask & 1)
4025 {
4026 v ^= yBits & 1;
4027 }
4028
4029 yBits >>= 1;
4030 mask >>= 1;
4031 }
4032 }
4033
4034 if (pSwizzlePattern[i].z != 0)
4035 {
4036 UINT_16 mask = pSwizzlePattern[i].z;
4037 UINT_32 zBits = z;
4038
4039 while (mask != 0)
4040 {
4041 if (mask & 1)
4042 {
4043 v ^= zBits & 1;
4044 }
4045
4046 zBits >>= 1;
4047 mask >>= 1;
4048 }
4049 }
4050
4051 if (pSwizzlePattern[i].s != 0)
4052 {
4053 UINT_16 mask = pSwizzlePattern[i].s;
4054 UINT_32 sBits = s;
4055
4056 while (mask != 0)
4057 {
4058 if (mask & 1)
4059 {
4060 v ^= sBits & 1;
4061 }
4062
4063 sBits >>= 1;
4064 mask >>= 1;
4065 }
4066 }
4067
4068 offset |= (v << i);
4069 }
4070
4071 return offset;
4072 }
4073
4074 /**
4075 ************************************************************************************************************************
4076 * Gfx10Lib::GetSwizzlePatternInfo
4077 *
4078 * @brief
4079 * Get swizzle pattern
4080 *
4081 * @return
4082 * Swizzle pattern information
4083 ************************************************************************************************************************
4084 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const4085 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
4086 AddrSwizzleMode swizzleMode, ///< Swizzle mode
4087 AddrResourceType resourceType, ///< Resource type
4088 UINT_32 elemLog2, ///< Element size in bytes log2
4089 UINT_32 numFrag ///< Number of fragment
4090 ) const
4091 {
4092 // Now elemLog2 is going to be used to access the correct index insode of the pPatInfo array so we will start from
4093 // the right location
4094 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
4095 const ADDR_SW_PATINFO* patInfo = NULL;
4096 const UINT_32 swizzleMask = 1 << swizzleMode;
4097
4098 if (IsBlockVariable(swizzleMode))
4099 {
4100 if (m_blockVarSizeLog2 != 0)
4101 {
4102 ADDR_ASSERT(m_settings.supportRbPlus);
4103
4104 if (IsRtOptSwizzle(swizzleMode))
4105 {
4106 if (numFrag == 1)
4107 {
4108 patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
4109 }
4110 else if (numFrag == 2)
4111 {
4112 patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
4113 }
4114 else if (numFrag == 4)
4115 {
4116 patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
4117 }
4118 else
4119 {
4120 ADDR_ASSERT(numFrag == 8);
4121 patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
4122 }
4123 }
4124 else if (IsZOrderSwizzle(swizzleMode))
4125 {
4126 if (numFrag == 1)
4127 {
4128 patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
4129 }
4130 else if (numFrag == 2)
4131 {
4132 patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
4133 }
4134 else if (numFrag == 4)
4135 {
4136 patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
4137 }
4138 else
4139 {
4140 ADDR_ASSERT(numFrag == 8);
4141 patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
4142 }
4143 }
4144 }
4145 }
4146 else if (IsLinear(swizzleMode) == FALSE)
4147 {
4148 if (resourceType == ADDR_RSRC_TEX_3D)
4149 {
4150 ADDR_ASSERT(numFrag == 1);
4151
4152 if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
4153 {
4154 if (IsRtOptSwizzle(swizzleMode))
4155 {
4156 if (swizzleMode == ADDR_SW_4KB_R_X)
4157 {
4158 patInfo = NULL;
4159 }
4160 else
4161 {
4162 patInfo = m_settings.supportRbPlus ?
4163 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4164 }
4165 }
4166 else if (IsZOrderSwizzle(swizzleMode))
4167 {
4168 patInfo = m_settings.supportRbPlus ?
4169 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4170 }
4171 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4172 {
4173 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
4174 patInfo = m_settings.supportRbPlus ?
4175 GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
4176 }
4177 else
4178 {
4179 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
4180
4181 if (IsBlock4kb(swizzleMode))
4182 {
4183 if (swizzleMode == ADDR_SW_4KB_S)
4184 {
4185 patInfo = m_settings.supportRbPlus ?
4186 GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
4187 }
4188 else
4189 {
4190 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4191 patInfo = m_settings.supportRbPlus ?
4192 GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
4193 }
4194 }
4195 else
4196 {
4197 if (swizzleMode == ADDR_SW_64KB_S)
4198 {
4199 patInfo = m_settings.supportRbPlus ?
4200 GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
4201 }
4202 else if (swizzleMode == ADDR_SW_64KB_S_X)
4203 {
4204 patInfo = m_settings.supportRbPlus ?
4205 GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
4206 }
4207 else
4208 {
4209 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4210 patInfo = m_settings.supportRbPlus ?
4211 GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
4212 }
4213 }
4214 }
4215 }
4216 }
4217 else
4218 {
4219 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
4220 {
4221 if (IsBlock256b(swizzleMode))
4222 {
4223 if (swizzleMode == ADDR_SW_256B_S)
4224 {
4225 patInfo = m_settings.supportRbPlus ?
4226 GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
4227 }
4228 else
4229 {
4230 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
4231 patInfo = m_settings.supportRbPlus ?
4232 GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
4233 }
4234 }
4235 else if (IsBlock4kb(swizzleMode))
4236 {
4237 if (IsStandardSwizzle(resourceType, swizzleMode))
4238 {
4239 if (swizzleMode == ADDR_SW_4KB_S)
4240 {
4241 patInfo = m_settings.supportRbPlus ?
4242 GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
4243 }
4244 else
4245 {
4246 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4247 patInfo = m_settings.supportRbPlus ?
4248 GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
4249 }
4250 }
4251 else
4252 {
4253 if (swizzleMode == ADDR_SW_4KB_D)
4254 {
4255 patInfo = m_settings.supportRbPlus ?
4256 GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
4257 }
4258 else if (swizzleMode == ADDR_SW_4KB_R_X)
4259 {
4260 patInfo = NULL;
4261 }
4262 else
4263 {
4264 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
4265 patInfo = m_settings.supportRbPlus ?
4266 GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
4267 }
4268 }
4269 }
4270 else
4271 {
4272 if (IsRtOptSwizzle(swizzleMode))
4273 {
4274 if (numFrag == 1)
4275 {
4276 patInfo = m_settings.supportRbPlus ?
4277 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4278 }
4279 else if (numFrag == 2)
4280 {
4281 patInfo = m_settings.supportRbPlus ?
4282 GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
4283 }
4284 else if (numFrag == 4)
4285 {
4286 patInfo = m_settings.supportRbPlus ?
4287 GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
4288 }
4289 else
4290 {
4291 ADDR_ASSERT(numFrag == 8);
4292 patInfo = m_settings.supportRbPlus ?
4293 GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
4294 }
4295 }
4296 else if (IsZOrderSwizzle(swizzleMode))
4297 {
4298 if (numFrag == 1)
4299 {
4300 patInfo = m_settings.supportRbPlus ?
4301 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4302 }
4303 else if (numFrag == 2)
4304 {
4305 patInfo = m_settings.supportRbPlus ?
4306 GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
4307 }
4308 else if (numFrag == 4)
4309 {
4310 patInfo = m_settings.supportRbPlus ?
4311 GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
4312 }
4313 else
4314 {
4315 ADDR_ASSERT(numFrag == 8);
4316 patInfo = m_settings.supportRbPlus ?
4317 GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
4318 }
4319 }
4320 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4321 {
4322 if (swizzleMode == ADDR_SW_64KB_D)
4323 {
4324 patInfo = m_settings.supportRbPlus ?
4325 GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
4326 }
4327 else if (swizzleMode == ADDR_SW_64KB_D_X)
4328 {
4329 patInfo = m_settings.supportRbPlus ?
4330 GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
4331 }
4332 else
4333 {
4334 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
4335 patInfo = m_settings.supportRbPlus ?
4336 GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
4337 }
4338 }
4339 else
4340 {
4341 if (swizzleMode == ADDR_SW_64KB_S)
4342 {
4343 patInfo = m_settings.supportRbPlus ?
4344 GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
4345 }
4346 else if (swizzleMode == ADDR_SW_64KB_S_X)
4347 {
4348 patInfo = m_settings.supportRbPlus ?
4349 GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
4350 }
4351 else
4352 {
4353 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4354 patInfo = m_settings.supportRbPlus ?
4355 GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
4356 }
4357 }
4358 }
4359 }
4360 }
4361 }
4362
4363 return (patInfo != NULL) ? &patInfo[index] : NULL;
4364 }
4365
4366 /**
4367 ************************************************************************************************************************
4368 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
4369 *
4370 * @brief
4371 * Internal function to calculate address from coord for micro tiled swizzle surface
4372 *
4373 * @return
4374 * ADDR_E_RETURNCODE
4375 ************************************************************************************************************************
4376 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4377 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
4378 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4379 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4380 ) const
4381 {
4382 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
4383 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4384 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4385
4386 localIn.swizzleMode = pIn->swizzleMode;
4387 localIn.flags = pIn->flags;
4388 localIn.resourceType = pIn->resourceType;
4389 localIn.bpp = pIn->bpp;
4390 localIn.width = Max(pIn->unalignedWidth, 1u);
4391 localIn.height = Max(pIn->unalignedHeight, 1u);
4392 localIn.numSlices = Max(pIn->numSlices, 1u);
4393 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4394 localIn.numSamples = Max(pIn->numSamples, 1u);
4395 localIn.numFrags = Max(pIn->numFrags, 1u);
4396 localOut.pMipInfo = mipInfo;
4397
4398 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
4399
4400 if (ret == ADDR_OK)
4401 {
4402 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4403 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
4404 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4405 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
4406
4407 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4408 {
4409 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4410 const UINT_32 yb = pIn->y / localOut.blockHeight;
4411 const UINT_32 xb = pIn->x / localOut.blockWidth;
4412 const UINT_32 blockIndex = yb * pb + xb;
4413 const UINT_32 blockSize = 256;
4414 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4415 pIn->x << elemLog2,
4416 pIn->y,
4417 0);
4418 pOut->addr = localOut.sliceSize * pIn->slice +
4419 mipInfo[pIn->mipId].macroBlockOffset +
4420 (blockIndex * blockSize) +
4421 blk256Offset;
4422 }
4423 else
4424 {
4425 ret = ADDR_INVALIDPARAMS;
4426 }
4427 }
4428
4429 return ret;
4430 }
4431
4432 /**
4433 ************************************************************************************************************************
4434 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
4435 *
4436 * @brief
4437 * Internal function to calculate address from coord for macro tiled swizzle surface
4438 *
4439 * @return
4440 * ADDR_E_RETURNCODE
4441 ************************************************************************************************************************
4442 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4443 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4444 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4445 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4446 ) const
4447 {
4448 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
4449 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4450 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4451
4452 localIn.swizzleMode = pIn->swizzleMode;
4453 localIn.flags = pIn->flags;
4454 localIn.resourceType = pIn->resourceType;
4455 localIn.bpp = pIn->bpp;
4456 localIn.width = Max(pIn->unalignedWidth, 1u);
4457 localIn.height = Max(pIn->unalignedHeight, 1u);
4458 localIn.numSlices = Max(pIn->numSlices, 1u);
4459 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4460 localIn.numSamples = Max(pIn->numSamples, 1u);
4461 localIn.numFrags = Max(pIn->numFrags, 1u);
4462 localOut.pMipInfo = mipInfo;
4463
4464 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4465
4466 if (ret == ADDR_OK)
4467 {
4468 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4469 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4470 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
4471 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
4472 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4473 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4474 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4475
4476 if (localIn.numFrags > 1)
4477 {
4478 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4479 pIn->resourceType,
4480 elemLog2,
4481 localIn.numFrags);
4482
4483 if (pPatInfo != NULL)
4484 {
4485 const UINT_32 pb = localOut.pitch / localOut.blockWidth;
4486 const UINT_32 yb = pIn->y / localOut.blockHeight;
4487 const UINT_32 xb = pIn->x / localOut.blockWidth;
4488 const UINT_64 blkIdx = yb * pb + xb;
4489
4490 ADDR_BIT_SETTING fullSwizzlePattern[20];
4491 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4492
4493 const UINT_32 blkOffset =
4494 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4495 blkSizeLog2,
4496 pIn->x,
4497 pIn->y,
4498 pIn->slice,
4499 pIn->sample);
4500
4501 pOut->addr = (localOut.sliceSize * pIn->slice) +
4502 (blkIdx << blkSizeLog2) +
4503 (blkOffset ^ pipeBankXor);
4504 }
4505 else
4506 {
4507 ret = ADDR_INVALIDPARAMS;
4508 }
4509 }
4510 else
4511 {
4512 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4513 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4514 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4515
4516 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4517 {
4518 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4519 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
4520 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4521 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4522 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4523 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4524 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4525 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4526 const UINT_32 yb = pIn->y / localOut.blockHeight;
4527 const UINT_32 xb = pIn->x / localOut.blockWidth;
4528 const UINT_64 blkIdx = yb * pb + xb;
4529 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4530 x << elemLog2,
4531 y,
4532 z);
4533 pOut->addr = sliceSize * sliceId +
4534 mipInfo[pIn->mipId].macroBlockOffset +
4535 (blkIdx << blkSizeLog2) +
4536 (blkOffset ^ pipeBankXor);
4537 }
4538 else
4539 {
4540 ret = ADDR_INVALIDPARAMS;
4541 }
4542 }
4543 }
4544
4545 return ret;
4546 }
4547
4548 /**
4549 ************************************************************************************************************************
4550 * Gfx10Lib::HwlComputeMaxBaseAlignments
4551 *
4552 * @brief
4553 * Gets maximum alignments
4554 * @return
4555 * maximum alignments
4556 ************************************************************************************************************************
4557 */
HwlComputeMaxBaseAlignments() const4558 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4559 {
4560 return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4561 }
4562
4563 /**
4564 ************************************************************************************************************************
4565 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4566 *
4567 * @brief
4568 * Gets maximum alignments for metadata
4569 * @return
4570 * maximum alignments for metadata
4571 ************************************************************************************************************************
4572 */
HwlComputeMaxMetaBaseAlignments() const4573 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4574 {
4575 Dim3d metaBlk;
4576
4577 const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4578 {
4579 ADDR_SW_64KB_Z_X,
4580 m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4581 };
4582
4583 UINT_32 maxBaseAlignHtile = 0;
4584 UINT_32 maxBaseAlignCmask = 0;
4585
4586 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4587 {
4588 for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4589 {
4590 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4591 {
4592 // Max base alignment for Htile
4593 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4594 ADDR_RSRC_TEX_2D,
4595 ValidSwizzleModeForXmask[swIdx],
4596 bppLog2,
4597 numFragLog2,
4598 TRUE,
4599 &metaBlk);
4600
4601 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4602 }
4603 }
4604
4605 // Max base alignment for Cmask
4606 const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4607 ADDR_RSRC_TEX_2D,
4608 ValidSwizzleModeForXmask[swIdx],
4609 0,
4610 0,
4611 TRUE,
4612 &metaBlk);
4613
4614 maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4615 }
4616
4617 // Max base alignment for 2D Dcc
4618 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4619 {
4620 ADDR_SW_64KB_S_X,
4621 ADDR_SW_64KB_D_X,
4622 ADDR_SW_64KB_R_X,
4623 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4624 };
4625
4626 UINT_32 maxBaseAlignDcc2D = 0;
4627
4628 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4629 {
4630 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4631 {
4632 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4633 {
4634 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4635 ADDR_RSRC_TEX_2D,
4636 ValidSwizzleModeForDcc2D[swIdx],
4637 bppLog2,
4638 numFragLog2,
4639 TRUE,
4640 &metaBlk);
4641
4642 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4643 }
4644 }
4645 }
4646
4647 // Max base alignment for 3D Dcc
4648 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4649 {
4650 ADDR_SW_64KB_Z_X,
4651 ADDR_SW_64KB_S_X,
4652 ADDR_SW_64KB_D_X,
4653 ADDR_SW_64KB_R_X,
4654 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4655 };
4656
4657 UINT_32 maxBaseAlignDcc3D = 0;
4658
4659 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4660 {
4661 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4662 {
4663 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4664 ADDR_RSRC_TEX_3D,
4665 ValidSwizzleModeForDcc3D[swIdx],
4666 bppLog2,
4667 0,
4668 TRUE,
4669 &metaBlk);
4670
4671 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4672 }
4673 }
4674
4675 return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4676 }
4677
4678 /**
4679 ************************************************************************************************************************
4680 * Gfx10Lib::GetMetaElementSizeLog2
4681 *
4682 * @brief
4683 * Gets meta data element size log2
4684 * @return
4685 * Meta data element size log2
4686 ************************************************************************************************************************
4687 */
GetMetaElementSizeLog2(Gfx10DataType dataType)4688 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4689 Gfx10DataType dataType) ///< Data surface type
4690 {
4691 INT_32 elemSizeLog2 = 0;
4692
4693 if (dataType == Gfx10DataColor)
4694 {
4695 elemSizeLog2 = 0;
4696 }
4697 else if (dataType == Gfx10DataDepthStencil)
4698 {
4699 elemSizeLog2 = 2;
4700 }
4701 else
4702 {
4703 ADDR_ASSERT(dataType == Gfx10DataFmask);
4704 elemSizeLog2 = -1;
4705 }
4706
4707 return elemSizeLog2;
4708 }
4709
4710 /**
4711 ************************************************************************************************************************
4712 * Gfx10Lib::GetMetaCacheSizeLog2
4713 *
4714 * @brief
4715 * Gets meta data cache line size log2
4716 * @return
4717 * Meta data cache line size log2
4718 ************************************************************************************************************************
4719 */
GetMetaCacheSizeLog2(Gfx10DataType dataType)4720 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4721 Gfx10DataType dataType) ///< Data surface type
4722 {
4723 INT_32 cacheSizeLog2 = 0;
4724
4725 if (dataType == Gfx10DataColor)
4726 {
4727 cacheSizeLog2 = 6;
4728 }
4729 else if (dataType == Gfx10DataDepthStencil)
4730 {
4731 cacheSizeLog2 = 8;
4732 }
4733 else
4734 {
4735 ADDR_ASSERT(dataType == Gfx10DataFmask);
4736 cacheSizeLog2 = 8;
4737 }
4738 return cacheSizeLog2;
4739 }
4740
4741 /**
4742 ************************************************************************************************************************
4743 * Gfx10Lib::HwlComputeSurfaceInfoLinear
4744 *
4745 * @brief
4746 * Internal function to calculate alignment for linear surface
4747 *
4748 * @return
4749 * ADDR_E_RETURNCODE
4750 ************************************************************************************************************************
4751 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4752 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4753 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4754 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4755 ) const
4756 {
4757 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4758
4759 if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4760 {
4761 returnCode = ADDR_INVALIDPARAMS;
4762 }
4763 else
4764 {
4765 const UINT_32 elementBytes = pIn->bpp >> 3;
4766 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4767 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4768 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
4769 UINT_32 actualHeight = pIn->height;
4770 UINT_64 sliceSize = 0;
4771
4772 if (pIn->numMipLevels > 1)
4773 {
4774 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4775 {
4776 UINT_32 mipWidth, mipHeight;
4777
4778 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4779
4780 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4781
4782 if (pOut->pMipInfo != NULL)
4783 {
4784 pOut->pMipInfo[i].pitch = mipActualWidth;
4785 pOut->pMipInfo[i].height = mipHeight;
4786 pOut->pMipInfo[i].depth = mipDepth;
4787 pOut->pMipInfo[i].offset = sliceSize;
4788 pOut->pMipInfo[i].mipTailOffset = 0;
4789 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4790 }
4791
4792 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4793 }
4794 }
4795 else
4796 {
4797 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4798
4799 if (returnCode == ADDR_OK)
4800 {
4801 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4802
4803 if (pOut->pMipInfo != NULL)
4804 {
4805 pOut->pMipInfo[0].pitch = pitch;
4806 pOut->pMipInfo[0].height = actualHeight;
4807 pOut->pMipInfo[0].depth = mipDepth;
4808 pOut->pMipInfo[0].offset = 0;
4809 pOut->pMipInfo[0].mipTailOffset = 0;
4810 pOut->pMipInfo[0].macroBlockOffset = 0;
4811 }
4812 }
4813 }
4814
4815 if (returnCode == ADDR_OK)
4816 {
4817 pOut->pitch = pitch;
4818 pOut->height = actualHeight;
4819 pOut->numSlices = pIn->numSlices;
4820 pOut->sliceSize = sliceSize;
4821 pOut->surfSize = sliceSize * pOut->numSlices;
4822 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4823 pOut->blockWidth = pitchAlign;
4824 pOut->blockHeight = 1;
4825 pOut->blockSlices = 1;
4826
4827 // Following members are useless on GFX10
4828 pOut->mipChainPitch = 0;
4829 pOut->mipChainHeight = 0;
4830 pOut->mipChainSlice = 0;
4831 pOut->epitchIsHeight = FALSE;
4832
4833 // Post calculation validate
4834 ADDR_ASSERT(pOut->sliceSize > 0);
4835 }
4836 }
4837
4838 return returnCode;
4839 }
4840
4841 } // V2
4842 } // Addr
4843