1 /*
2 * Copyright © 2007-2019 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /**
28 ************************************************************************************************************************
29 * @file gfx10addrlib.cpp
30 * @brief Contain the implementation for the Gfx10Lib class.
31 ************************************************************************************************************************
32 */
33
34 #include "gfx10addrlib.h"
35 #include "gfx10_gb_reg.h"
36
37 #include "amdgpu_asic_addr.h"
38
39 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
41
42 namespace Addr
43 {
44 /**
45 ************************************************************************************************************************
46 * Gfx10HwlInit
47 *
48 * @brief
49 * Creates an Gfx10Lib object.
50 *
51 * @return
52 * Returns an Gfx10Lib object pointer.
53 ************************************************************************************************************************
54 */
Gfx10HwlInit(const Client * pClient)55 Addr::Lib* Gfx10HwlInit(const Client* pClient)
56 {
57 return V2::Gfx10Lib::CreateObj(pClient);
58 }
59
60 namespace V2
61 {
62
63 ////////////////////////////////////////////////////////////////////////////////////////////////////
64 // Static Const Member
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66
67 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
68 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
69 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
70 {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
71 {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
72 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
73
74 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
75 {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
76 {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
77 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
78
79 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
80 {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
81 {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
82 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
83
84 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
85 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
86 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
88
89 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
91 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
92 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
93
94 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
95 {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X
96 {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X
97 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
98
99 {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
100 {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
101 {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
102 {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X
103
104 {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X
105 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
106 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107 {0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X
108 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
109 };
110
111 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
112
113 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114 const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
115
116 /**
117 ************************************************************************************************************************
118 * Gfx10Lib::Gfx10Lib
119 *
120 * @brief
121 * Constructor
122 *
123 ************************************************************************************************************************
124 */
Gfx10Lib(const Client * pClient)125 Gfx10Lib::Gfx10Lib(const Client* pClient)
126 :
127 Lib(pClient),
128 m_numPkrLog2(0),
129 m_numSaLog2(0),
130 m_colorBaseIndex(0),
131 m_xmaskBaseIndex(0),
132 m_dccBaseIndex(0)
133 {
134 memset(&m_settings, 0, sizeof(m_settings));
135 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
136 }
137
138 /**
139 ************************************************************************************************************************
140 * Gfx10Lib::~Gfx10Lib
141 *
142 * @brief
143 * Destructor
144 ************************************************************************************************************************
145 */
~Gfx10Lib()146 Gfx10Lib::~Gfx10Lib()
147 {
148 }
149
150 /**
151 ************************************************************************************************************************
152 * Gfx10Lib::HwlComputeHtileInfo
153 *
154 * @brief
155 * Interface function stub of AddrComputeHtilenfo
156 *
157 * @return
158 * ADDR_E_RETURNCODE
159 ************************************************************************************************************************
160 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const161 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
162 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
163 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
164 ) const
165 {
166 ADDR_E_RETURNCODE ret = ADDR_OK;
167
168 if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
169 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
170 (pIn->hTileFlags.pipeAligned != TRUE))
171 {
172 ret = ADDR_INVALIDPARAMS;
173 }
174 else
175 {
176 Dim3d metaBlk = {0};
177 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
178 ADDR_RSRC_TEX_2D,
179 pIn->swizzleMode,
180 0,
181 0,
182 TRUE,
183 &metaBlk);
184
185 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
186 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
187 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
188 pOut->metaBlkWidth = metaBlk.w;
189 pOut->metaBlkHeight = metaBlk.h;
190
191 if (pIn->numMipLevels > 1)
192 {
193 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
194
195 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
196
197 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
198 {
199 UINT_32 mipWidth, mipHeight;
200
201 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
202
203 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
204 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
205
206 const UINT_32 pitchInM = mipWidth / metaBlk.w;
207 const UINT_32 heightInM = mipHeight / metaBlk.h;
208 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
209
210 if (pOut->pMipInfo != NULL)
211 {
212 pOut->pMipInfo[i].inMiptail = FALSE;
213 pOut->pMipInfo[i].offset = offset;
214 pOut->pMipInfo[i].sliceSize = mipSliceSize;
215 }
216
217 offset += mipSliceSize;
218 }
219
220 pOut->sliceSize = offset;
221 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
222 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
223
224 if (pOut->pMipInfo != NULL)
225 {
226 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
227 {
228 pOut->pMipInfo[i].inMiptail = TRUE;
229 pOut->pMipInfo[i].offset = 0;
230 pOut->pMipInfo[i].sliceSize = 0;
231 }
232
233 if (pIn->firstMipIdInTail != pIn->numMipLevels)
234 {
235 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
236 }
237 }
238 }
239 else
240 {
241 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
242 const UINT_32 heightInM = pOut->height / metaBlk.h;
243
244 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
245 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
246 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
247
248 if (pOut->pMipInfo != NULL)
249 {
250 pOut->pMipInfo[0].inMiptail = FALSE;
251 pOut->pMipInfo[0].offset = 0;
252 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
253 }
254 }
255 }
256
257 return ret;
258 }
259
260 /**
261 ************************************************************************************************************************
262 * Gfx10Lib::HwlComputeCmaskInfo
263 *
264 * @brief
265 * Interface function stub of AddrComputeCmaskInfo
266 *
267 * @return
268 * ADDR_E_RETURNCODE
269 ************************************************************************************************************************
270 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const271 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
272 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
273 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
274 ) const
275 {
276 ADDR_E_RETURNCODE ret = ADDR_OK;
277
278 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
279 (pIn->cMaskFlags.pipeAligned != TRUE) ||
280 ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
281 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
282 {
283 ret = ADDR_INVALIDPARAMS;
284 }
285 else
286 {
287 Dim3d metaBlk = {0};
288 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
289 ADDR_RSRC_TEX_2D,
290 pIn->swizzleMode,
291 0,
292 0,
293 TRUE,
294 &metaBlk);
295
296 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
297 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
298 pOut->baseAlign = metaBlkSize;
299 pOut->metaBlkWidth = metaBlk.w;
300 pOut->metaBlkHeight = metaBlk.h;
301
302 if (pIn->numMipLevels > 1)
303 {
304 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
305
306 UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
307
308 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
309 {
310 UINT_32 mipWidth, mipHeight;
311
312 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
313
314 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
315 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
316
317 const UINT_32 pitchInM = mipWidth / metaBlk.w;
318 const UINT_32 heightInM = mipHeight / metaBlk.h;
319
320 if (pOut->pMipInfo != NULL)
321 {
322 pOut->pMipInfo[i].inMiptail = FALSE;
323 pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;
324 pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
325 }
326
327 metaBlkPerSlice += pitchInM * heightInM;
328 }
329
330 pOut->metaBlkNumPerSlice = metaBlkPerSlice;
331
332 if (pOut->pMipInfo != NULL)
333 {
334 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
335 {
336 pOut->pMipInfo[i].inMiptail = TRUE;
337 pOut->pMipInfo[i].offset = 0;
338 pOut->pMipInfo[i].sliceSize = 0;
339 }
340
341 if (pIn->firstMipIdInTail != pIn->numMipLevels)
342 {
343 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
344 }
345 }
346 }
347 else
348 {
349 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
350 const UINT_32 heightInM = pOut->height / metaBlk.h;
351
352 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
353
354 if (pOut->pMipInfo != NULL)
355 {
356 pOut->pMipInfo[0].inMiptail = FALSE;
357 pOut->pMipInfo[0].offset = 0;
358 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
359 }
360 }
361
362 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
363 pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
364 }
365
366 return ret;
367 }
368
369 /**
370 ************************************************************************************************************************
371 * Gfx10Lib::HwlComputeDccInfo
372 *
373 * @brief
374 * Interface function to compute DCC key info
375 *
376 * @return
377 * ADDR_E_RETURNCODE
378 ************************************************************************************************************************
379 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const380 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
381 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
382 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
383 ) const
384 {
385 ADDR_E_RETURNCODE ret = ADDR_OK;
386
387 if (pIn->swizzleMode != ADDR_SW_64KB_Z_X && pIn->swizzleMode != ADDR_SW_64KB_R_X)
388 {
389 // Hardware does not support DCC for this swizzle mode.
390 ret = ADDR_INVALIDPARAMS;
391 }
392 else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
393 {
394 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
395 ret = ADDR_INVALIDPARAMS;
396 }
397 else
398 {
399 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
400
401 {
402 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
403 ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
404
405 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
406
407 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
408 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
409 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
410 }
411
412 if (ret == ADDR_OK)
413 {
414 Dim3d metaBlk = {0};
415 const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
416 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
417 pIn->resourceType,
418 pIn->swizzleMode,
419 elemLog2,
420 numFragLog2,
421 pIn->dccKeyFlags.pipeAligned,
422 &metaBlk);
423
424 pOut->dccRamBaseAlign = metaBlkSize;
425 pOut->metaBlkWidth = metaBlk.w;
426 pOut->metaBlkHeight = metaBlk.h;
427 pOut->metaBlkDepth = metaBlk.d;
428 pOut->metaBlkSize = metaBlkSize;
429
430 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
431 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
432 pOut->depth = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
433
434 if (pIn->numMipLevels > 1)
435 {
436 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
437
438 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
439
440 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
441 {
442 UINT_32 mipWidth, mipHeight;
443
444 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
445
446 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
447 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
448
449 const UINT_32 pitchInM = mipWidth / metaBlk.w;
450 const UINT_32 heightInM = mipHeight / metaBlk.h;
451 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
452
453 if (pOut->pMipInfo != NULL)
454 {
455 pOut->pMipInfo[i].inMiptail = FALSE;
456 pOut->pMipInfo[i].offset = offset;
457 pOut->pMipInfo[i].sliceSize = mipSliceSize;
458 }
459
460 offset += mipSliceSize;
461 }
462
463 pOut->dccRamSliceSize = offset;
464 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
465 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
466
467 if (pOut->pMipInfo != NULL)
468 {
469 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
470 {
471 pOut->pMipInfo[i].inMiptail = TRUE;
472 pOut->pMipInfo[i].offset = 0;
473 pOut->pMipInfo[i].sliceSize = 0;
474 }
475
476 if (pIn->firstMipIdInTail != pIn->numMipLevels)
477 {
478 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
479 }
480 }
481 }
482 else
483 {
484 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
485 const UINT_32 heightInM = pOut->height / metaBlk.h;
486
487 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
488 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
489 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
490
491 if (pOut->pMipInfo != NULL)
492 {
493 pOut->pMipInfo[0].inMiptail = FALSE;
494 pOut->pMipInfo[0].offset = 0;
495 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
496 }
497 }
498 }
499 }
500
501 return ret;
502 }
503
504 /**
505 ************************************************************************************************************************
506 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
507 *
508 * @brief
509 * Interface function stub of AddrComputeCmaskAddrFromCoord
510 *
511 * @return
512 * ADDR_E_RETURNCODE
513 ************************************************************************************************************************
514 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)515 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
516 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
517 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
518 {
519 // Only support pipe aligned CMask
520 ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
521
522 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
523 input.size = sizeof(input);
524 input.cMaskFlags = pIn->cMaskFlags;
525 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
526 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
527 input.numSlices = Max(pIn->numSlices, 1u);
528 input.swizzleMode = pIn->swizzleMode;
529 input.resourceType = pIn->resourceType;
530
531 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
532 output.size = sizeof(output);
533
534 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
535
536 if (returnCode == ADDR_OK)
537 {
538 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
539 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
540 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
541 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
542 const UINT_8* patIdxTable =
543 (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
544 (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
545
546
547 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
548 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
549 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
550 blkSizeLog2 + 1, // +1 for nibble offset
551 pIn->x,
552 pIn->y,
553 pIn->slice,
554 0);
555 const UINT_32 xb = pIn->x / output.metaBlkWidth;
556 const UINT_32 yb = pIn->y / output.metaBlkHeight;
557 const UINT_32 pb = output.pitch / output.metaBlkWidth;
558 const UINT_32 blkIndex = (yb * pb) + xb;
559 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
560
561 pOut->addr = (output.sliceSize * pIn->slice) +
562 (blkIndex * (1 << blkSizeLog2)) +
563 ((blkOffset >> 1) ^ pipeXor);
564 pOut->bitPosition = (blkOffset & 1) << 2;
565 }
566
567 return returnCode;
568 }
569
570 /**
571 ************************************************************************************************************************
572 * Gfx10Lib::HwlComputeHtileAddrFromCoord
573 *
574 * @brief
575 * Interface function stub of AddrComputeHtileAddrFromCoord
576 *
577 * @return
578 * ADDR_E_RETURNCODE
579 ************************************************************************************************************************
580 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)581 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
582 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
583 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
584 {
585 ADDR_E_RETURNCODE returnCode = ADDR_OK;
586
587 if (pIn->numMipLevels > 1)
588 {
589 returnCode = ADDR_NOTIMPLEMENTED;
590 }
591 else
592 {
593 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
594 input.size = sizeof(input);
595 input.hTileFlags = pIn->hTileFlags;
596 input.depthFlags = pIn->depthflags;
597 input.swizzleMode = pIn->swizzleMode;
598 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
599 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
600 input.numSlices = Max(pIn->numSlices, 1u);
601 input.numMipLevels = 1;
602
603 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
604 output.size = sizeof(output);
605
606 returnCode = ComputeHtileInfo(&input, &output);
607
608 if (returnCode == ADDR_OK)
609 {
610 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
611 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
612 const UINT_32 index = m_xmaskBaseIndex + numSampleLog2;
613 const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
614
615
616 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
617 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
618 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
619 blkSizeLog2 + 1, // +1 for nibble offset
620 pIn->x,
621 pIn->y,
622 pIn->slice,
623 0);
624 const UINT_32 xb = pIn->x / output.metaBlkWidth;
625 const UINT_32 yb = pIn->y / output.metaBlkHeight;
626 const UINT_32 pb = output.pitch / output.metaBlkWidth;
627 const UINT_32 blkIndex = (yb * pb) + xb;
628 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
629
630 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
631 (blkIndex * (1 << blkSizeLog2)) +
632 ((blkOffset >> 1) ^ pipeXor);
633 }
634 }
635
636 return returnCode;
637 }
638
639 /**
640 ************************************************************************************************************************
641 * Gfx10Lib::HwlComputeHtileCoordFromAddr
642 *
643 * @brief
644 * Interface function stub of AddrComputeHtileCoordFromAddr
645 *
646 * @return
647 * ADDR_E_RETURNCODE
648 ************************************************************************************************************************
649 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)650 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
651 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
652 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
653 {
654 ADDR_NOT_IMPLEMENTED();
655
656 return ADDR_OK;
657 }
658
659 /**
660 ************************************************************************************************************************
661 * Gfx10Lib::HwlComputeDccAddrFromCoord
662 *
663 * @brief
664 * Interface function stub of AddrComputeDccAddrFromCoord
665 *
666 * @return
667 * ADDR_E_RETURNCODE
668 ************************************************************************************************************************
669 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)670 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord(
671 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
672 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
673 {
674 ADDR_E_RETURNCODE returnCode = ADDR_OK;
675
676 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
677 (pIn->swizzleMode != ADDR_SW_64KB_R_X) ||
678 (pIn->dccKeyFlags.linear == TRUE) ||
679 (pIn->numFrags > 1) ||
680 (pIn->numMipLevels > 1) ||
681 (pIn->mipId > 0))
682 {
683 returnCode = ADDR_NOTSUPPORTED;
684 }
685 else
686 {
687 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
688 const UINT_32 numPipeLog2 = m_pipesLog2;
689 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
690 UINT_32 index = m_dccBaseIndex + elemLog2;
691 const UINT_8* patIdxTable;
692
693 if (m_settings.supportRbPlus)
694 {
695 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
696
697 if (pIn->dccKeyFlags.pipeAligned)
698 {
699 index += MaxNumOfBpp;
700
701 if (m_numPkrLog2 < 2)
702 {
703 index += m_pipesLog2 * MaxNumOfBpp;
704 }
705 else
706 {
707 // 4 groups for "m_numPkrLog2 < 2" case
708 index += 4 * MaxNumOfBpp;
709
710 const UINT_32 dccPipePerPkr = 3;
711
712 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
713 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
714 }
715 }
716 }
717 else
718 {
719 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
720
721 if (pIn->dccKeyFlags.pipeAligned)
722 {
723 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
724 }
725 else
726 {
727 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
728 }
729 }
730
731 const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
732 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
733 const UINT_32 blkOffset =
734 ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
735 blkSizeLog2 + 1, // +1 for nibble offset
736 pIn->x,
737 pIn->y,
738 pIn->slice,
739 0);
740 const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
741 const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
742 const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
743 const UINT_32 blkIndex = (yb * pb) + xb;
744 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
745
746 pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
747 (blkIndex * (1 << blkSizeLog2)) +
748 ((blkOffset >> 1) ^ pipeXor);
749 }
750
751 return returnCode;
752 }
753
754 /**
755 ************************************************************************************************************************
756 * Gfx10Lib::HwlInitGlobalParams
757 *
758 * @brief
759 * Initializes global parameters
760 *
761 * @return
762 * TRUE if all settings are valid
763 *
764 ************************************************************************************************************************
765 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)766 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
767 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
768 {
769 BOOL_32 valid = TRUE;
770 GB_ADDR_CONFIG_gfx10 gbAddrConfig;
771
772 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
773
774 // These values are copied from CModel code
775 switch (gbAddrConfig.bits.NUM_PIPES)
776 {
777 case ADDR_CONFIG_1_PIPE:
778 m_pipes = 1;
779 m_pipesLog2 = 0;
780 break;
781 case ADDR_CONFIG_2_PIPE:
782 m_pipes = 2;
783 m_pipesLog2 = 1;
784 break;
785 case ADDR_CONFIG_4_PIPE:
786 m_pipes = 4;
787 m_pipesLog2 = 2;
788 break;
789 case ADDR_CONFIG_8_PIPE:
790 m_pipes = 8;
791 m_pipesLog2 = 3;
792 break;
793 case ADDR_CONFIG_16_PIPE:
794 m_pipes = 16;
795 m_pipesLog2 = 4;
796 break;
797 case ADDR_CONFIG_32_PIPE:
798 m_pipes = 32;
799 m_pipesLog2 = 5;
800 break;
801 case ADDR_CONFIG_64_PIPE:
802 m_pipes = 64;
803 m_pipesLog2 = 6;
804 break;
805 default:
806 ADDR_ASSERT_ALWAYS();
807 valid = FALSE;
808 break;
809 }
810
811 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
812 {
813 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
814 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
815 m_pipeInterleaveLog2 = 8;
816 break;
817 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
818 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
819 m_pipeInterleaveLog2 = 9;
820 break;
821 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
822 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
823 m_pipeInterleaveLog2 = 10;
824 break;
825 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
826 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
827 m_pipeInterleaveLog2 = 11;
828 break;
829 default:
830 ADDR_ASSERT_ALWAYS();
831 valid = FALSE;
832 break;
833 }
834
835 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
836 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
837 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
838 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
839
840 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
841 {
842 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
843 m_maxCompFrag = 1;
844 m_maxCompFragLog2 = 0;
845 break;
846 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
847 m_maxCompFrag = 2;
848 m_maxCompFragLog2 = 1;
849 break;
850 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
851 m_maxCompFrag = 4;
852 m_maxCompFragLog2 = 2;
853 break;
854 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
855 m_maxCompFrag = 8;
856 m_maxCompFragLog2 = 3;
857 break;
858 default:
859 ADDR_ASSERT_ALWAYS();
860 valid = FALSE;
861 break;
862 }
863
864 {
865 // Skip unaligned case
866 m_xmaskBaseIndex += MaxNumOfAA;
867
868 m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA;
869 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
870
871 if (m_settings.supportRbPlus)
872 {
873 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
874 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
875
876 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
877
878 ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
879 sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
880
881 if (m_numPkrLog2 >= 2)
882 {
883 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
884 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
885 }
886 }
887 else
888 {
889 const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
890 static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +
891 1;
892
893 ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
894
895 ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) ==
896 sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]));
897 }
898 }
899
900 if (m_settings.supportRbPlus)
901 {
902 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
903 // corresponding SW_64KB_* mode
904 m_blockVarSizeLog2 = m_pipesLog2 + 14;
905 }
906
907
908 if (valid)
909 {
910 InitEquationTable();
911 }
912
913 return valid;
914 }
915
916 /**
917 ************************************************************************************************************************
918 * Gfx10Lib::HwlConvertChipFamily
919 *
920 * @brief
921 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
922 * @return
923 * ChipFamily
924 ************************************************************************************************************************
925 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)926 ChipFamily Gfx10Lib::HwlConvertChipFamily(
927 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
928 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
929 {
930 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
931
932 m_settings.dccUnsup3DSwDis = 1;
933 m_settings.dsMipmapHtileFix = 1;
934
935 switch (chipFamily)
936 {
937 case FAMILY_NV:
938 if (ASICREV_IS_NAVI10_P(chipRevision))
939 {
940 m_settings.dsMipmapHtileFix = 0;
941 m_settings.isDcn20 = 1;
942 }
943
944 if (ASICREV_IS_NAVI12_P(chipRevision))
945 {
946 m_settings.isDcn20 = 1;
947 }
948
949 if (ASICREV_IS_NAVI14_M(chipRevision))
950 {
951 m_settings.isDcn20 = 1;
952 }
953
954 if (ASICREV_IS_SIENNA_CICHLID(chipRevision))
955 {
956 m_settings.supportRbPlus = 1;
957 m_settings.dccUnsup3DSwDis = 0;
958 }
959
960 if (ASICREV_IS_NAVY_FLOUNDER(chipRevision))
961 {
962 m_settings.supportRbPlus = 1;
963 m_settings.dccUnsup3DSwDis = 0;
964 }
965
966 if (ASICREV_IS_DIMGREY_CAVEFISH(chipRevision))
967 {
968 m_settings.supportRbPlus = 1;
969 m_settings.dccUnsup3DSwDis = 0;
970 }
971 break;
972
973 case FAMILY_VGH:
974 if (ASICREV_IS_VANGOGH(chipRevision))
975 {
976 m_settings.supportRbPlus = 1;
977 m_settings.dccUnsup3DSwDis = 0;
978 }
979 else
980 {
981 ADDR_ASSERT(!"Unknown chip revision");
982 }
983 break;
984
985 default:
986 ADDR_ASSERT(!"Unknown chip family");
987 break;
988 }
989
990 m_configFlags.use32bppFor422Fmt = TRUE;
991
992 return family;
993 }
994
995 /**
996 ************************************************************************************************************************
997 * Gfx10Lib::GetBlk256SizeLog2
998 *
999 * @brief
1000 * Get block 256 size
1001 *
1002 * @return
1003 * N/A
1004 ************************************************************************************************************************
1005 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1006 void Gfx10Lib::GetBlk256SizeLog2(
1007 AddrResourceType resourceType, ///< [in] Resource type
1008 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1009 UINT_32 elemLog2, ///< [in] element size log2
1010 UINT_32 numSamplesLog2, ///< [in] number of samples
1011 Dim3d* pBlock ///< [out] block size
1012 ) const
1013 {
1014 if (IsThin(resourceType, swizzleMode))
1015 {
1016 UINT_32 blockBits = 8 - elemLog2;
1017
1018 if (IsZOrderSwizzle(swizzleMode))
1019 {
1020 blockBits -= numSamplesLog2;
1021 }
1022
1023 pBlock->w = (blockBits >> 1) + (blockBits & 1);
1024 pBlock->h = (blockBits >> 1);
1025 pBlock->d = 0;
1026 }
1027 else
1028 {
1029 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1030
1031 UINT_32 blockBits = 8 - elemLog2;
1032
1033 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1034 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1035 pBlock->h = (blockBits / 3);
1036 }
1037 }
1038
1039 /**
1040 ************************************************************************************************************************
1041 * Gfx10Lib::GetCompressedBlockSizeLog2
1042 *
1043 * @brief
1044 * Get compress block size
1045 *
1046 * @return
1047 * N/A
1048 ************************************************************************************************************************
1049 */
GetCompressedBlockSizeLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1050 void Gfx10Lib::GetCompressedBlockSizeLog2(
1051 Gfx10DataType dataType, ///< [in] Data type
1052 AddrResourceType resourceType, ///< [in] Resource type
1053 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1054 UINT_32 elemLog2, ///< [in] element size log2
1055 UINT_32 numSamplesLog2, ///< [in] number of samples
1056 Dim3d* pBlock ///< [out] block size
1057 ) const
1058 {
1059 if (dataType == Gfx10DataColor)
1060 {
1061 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1062 }
1063 else
1064 {
1065 ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1066 pBlock->w = 3;
1067 pBlock->h = 3;
1068 pBlock->d = 0;
1069 }
1070 }
1071
1072 /**
1073 ************************************************************************************************************************
1074 * Gfx10Lib::GetMetaOverlapLog2
1075 *
1076 * @brief
1077 * Get meta block overlap
1078 *
1079 * @return
1080 * N/A
1081 ************************************************************************************************************************
1082 */
GetMetaOverlapLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const1083 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1084 Gfx10DataType dataType, ///< [in] Data type
1085 AddrResourceType resourceType, ///< [in] Resource type
1086 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1087 UINT_32 elemLog2, ///< [in] element size log2
1088 UINT_32 numSamplesLog2 ///< [in] number of samples
1089 ) const
1090 {
1091 Dim3d compBlock;
1092 Dim3d microBlock;
1093
1094 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1095 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, µBlock);
1096
1097 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
1098 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1099 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
1100 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
1101 INT_32 overlap = numPipesLog2 - maxSizeLog2;
1102
1103 if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1104 {
1105 overlap++;
1106 }
1107
1108 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1109 if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1110 {
1111 overlap--;
1112 }
1113 overlap = Max(overlap, 0);
1114 return overlap;
1115 }
1116
1117 /**
1118 ************************************************************************************************************************
1119 * Gfx10Lib::Get3DMetaOverlapLog2
1120 *
1121 * @brief
1122 * Get 3d meta block overlap
1123 *
1124 * @return
1125 * N/A
1126 ************************************************************************************************************************
1127 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const1128 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1129 AddrResourceType resourceType, ///< [in] Resource type
1130 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1131 UINT_32 elemLog2 ///< [in] element size log2
1132 ) const
1133 {
1134 Dim3d microBlock;
1135 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, µBlock);
1136
1137 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1138
1139 if (m_settings.supportRbPlus)
1140 {
1141 overlap++;
1142 }
1143
1144 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1145 {
1146 overlap = 0;
1147 }
1148 return overlap;
1149 }
1150
1151 /**
1152 ************************************************************************************************************************
1153 * Gfx10Lib::GetPipeRotateAmount
1154 *
1155 * @brief
1156 * Get pipe rotate amount
1157 *
1158 * @return
1159 * Pipe rotate amount
1160 ************************************************************************************************************************
1161 */
1162
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const1163 INT_32 Gfx10Lib::GetPipeRotateAmount(
1164 AddrResourceType resourceType, ///< [in] Resource type
1165 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
1166 ) const
1167 {
1168 INT_32 amount = 0;
1169
1170 if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1171 {
1172 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1173 1 : m_pipesLog2 - (m_numSaLog2 + 1);
1174 }
1175
1176 return amount;
1177 }
1178
1179 /**
1180 ************************************************************************************************************************
1181 * Gfx10Lib::GetMetaBlkSize
1182 *
1183 * @brief
1184 * Get metadata block size
1185 *
1186 * @return
1187 * Meta block size
1188 ************************************************************************************************************************
1189 */
GetMetaBlkSize(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const1190 UINT_32 Gfx10Lib::GetMetaBlkSize(
1191 Gfx10DataType dataType, ///< [in] Data type
1192 AddrResourceType resourceType, ///< [in] Resource type
1193 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1194 UINT_32 elemLog2, ///< [in] element size log2
1195 UINT_32 numSamplesLog2, ///< [in] number of samples
1196 BOOL_32 pipeAlign, ///< [in] pipe align
1197 Dim3d* pBlock ///< [out] block size
1198 ) const
1199 {
1200 INT_32 metablkSizeLog2;
1201
1202 {
1203 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
1204 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
1205 const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1206 const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1207 numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1208 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
1209 INT_32 numPipesLog2 = m_pipesLog2;
1210
1211 if (IsThin(resourceType, swizzleMode))
1212 {
1213 if ((pipeAlign == FALSE) ||
1214 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1215 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
1216 {
1217 if (pipeAlign)
1218 {
1219 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1220 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1221 }
1222 else
1223 {
1224 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1225 }
1226 }
1227 else
1228 {
1229 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1230 {
1231 numPipesLog2++;
1232 }
1233
1234 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1235
1236 if (numPipesLog2 >= 4)
1237 {
1238 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1239
1240 // In 16Bpe 8xaa, we have an extra overlap bit
1241 if ((pipeRotateLog2 > 0) &&
1242 (elemLog2 == 4) &&
1243 (numSamplesLog2 == 3) &&
1244 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1245 {
1246 overlapLog2++;
1247 }
1248
1249 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1250 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1251
1252 if (m_settings.supportRbPlus &&
1253 IsRtOptSwizzle(swizzleMode) &&
1254 (numPipesLog2 == 6) &&
1255 (numSamplesLog2 == 3) &&
1256 (m_maxCompFragLog2 == 3) &&
1257 (metablkSizeLog2 < 15))
1258 {
1259 metablkSizeLog2 = 15;
1260 }
1261 }
1262 else
1263 {
1264 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1265 }
1266
1267 if (dataType == Gfx10DataDepthStencil)
1268 {
1269 // For htile surfaces, pad meta block size to 2K * num_pipes
1270 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1271 }
1272
1273 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1274
1275 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1276 {
1277 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1278
1279 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1280 }
1281 }
1282
1283 const INT_32 metablkBitsLog2 =
1284 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1285 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1286 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1287 pBlock->d = 1;
1288 }
1289 else
1290 {
1291 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1292
1293 if (pipeAlign)
1294 {
1295 if (m_settings.supportRbPlus &&
1296 (m_pipesLog2 == m_numSaLog2 + 1) &&
1297 (m_pipesLog2 > 1) &&
1298 IsRbAligned(resourceType, swizzleMode))
1299 {
1300 numPipesLog2++;
1301 }
1302
1303 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1304
1305 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1306 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1307 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1308 }
1309 else
1310 {
1311 metablkSizeLog2 = 12;
1312 }
1313
1314 const INT_32 metablkBitsLog2 =
1315 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1316 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1317 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1318 pBlock->d = 1 << (metablkBitsLog2 / 3);
1319 }
1320 }
1321
1322 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1323 }
1324
1325 /**
1326 ************************************************************************************************************************
1327 * Gfx10Lib::ConvertSwizzlePatternToEquation
1328 *
1329 * @brief
1330 * Convert swizzle pattern to equation.
1331 *
1332 * @return
1333 * N/A
1334 ************************************************************************************************************************
1335 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1336 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1337 UINT_32 elemLog2, ///< [in] element bytes log2
1338 AddrResourceType rsrcType, ///< [in] resource type
1339 AddrSwizzleMode swMode, ///< [in] swizzle mode
1340 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor
1341 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1342 const
1343 {
1344 ADDR_BIT_SETTING fullSwizzlePattern[20];
1345 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1346
1347 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
1348 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1349
1350 pEquation->numBits = blockSizeLog2;
1351 pEquation->stackedDepthSlices = FALSE;
1352
1353 for (UINT_32 i = 0; i < elemLog2; i++)
1354 {
1355 pEquation->addr[i].channel = 0;
1356 pEquation->addr[i].valid = 1;
1357 pEquation->addr[i].index = i;
1358 }
1359
1360 if (IsXor(swMode) == FALSE)
1361 {
1362 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1363 {
1364 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1365
1366 if (pSwizzle[i].x != 0)
1367 {
1368 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1369
1370 pEquation->addr[i].channel = 0;
1371 pEquation->addr[i].valid = 1;
1372 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1373 }
1374 else if (pSwizzle[i].y != 0)
1375 {
1376 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1377
1378 pEquation->addr[i].channel = 1;
1379 pEquation->addr[i].valid = 1;
1380 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1381 }
1382 else
1383 {
1384 ADDR_ASSERT(pSwizzle[i].z != 0);
1385 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1386
1387 pEquation->addr[i].channel = 2;
1388 pEquation->addr[i].valid = 1;
1389 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1390 }
1391
1392 pEquation->xor1[i].value = 0;
1393 pEquation->xor2[i].value = 0;
1394 }
1395 }
1396 else if (IsThin(rsrcType, swMode))
1397 {
1398 Dim3d dim;
1399 ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1400
1401 const UINT_32 blkXLog2 = Log2(dim.w);
1402 const UINT_32 blkYLog2 = Log2(dim.h);
1403 const UINT_32 blkXMask = dim.w - 1;
1404 const UINT_32 blkYMask = dim.h - 1;
1405
1406 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1407 UINT_32 xMask = 0;
1408 UINT_32 yMask = 0;
1409 UINT_32 bMask = (1 << elemLog2) - 1;
1410
1411 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1412 {
1413 if (IsPow2(pSwizzle[i].value))
1414 {
1415 if (pSwizzle[i].x != 0)
1416 {
1417 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1418 xMask |= pSwizzle[i].x;
1419
1420 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1421
1422 ADDR_ASSERT(xLog2 < blkXLog2);
1423
1424 pEquation->addr[i].channel = 0;
1425 pEquation->addr[i].valid = 1;
1426 pEquation->addr[i].index = xLog2 + elemLog2;
1427 }
1428 else
1429 {
1430 ADDR_ASSERT(pSwizzle[i].y != 0);
1431 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1432 yMask |= pSwizzle[i].y;
1433
1434 pEquation->addr[i].channel = 1;
1435 pEquation->addr[i].valid = 1;
1436 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1437
1438 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1439 }
1440
1441 swizzle[i].value = 0;
1442 bMask |= 1 << i;
1443 }
1444 else
1445 {
1446 if (pSwizzle[i].z != 0)
1447 {
1448 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1449
1450 pEquation->xor2[i].channel = 2;
1451 pEquation->xor2[i].valid = 1;
1452 pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1453 }
1454
1455 swizzle[i].x = pSwizzle[i].x;
1456 swizzle[i].y = pSwizzle[i].y;
1457 swizzle[i].z = swizzle[i].s = 0;
1458
1459 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1460
1461 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1462
1463 if (xHi != 0)
1464 {
1465 ADDR_ASSERT(IsPow2(xHi));
1466 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1467
1468 pEquation->xor1[i].channel = 0;
1469 pEquation->xor1[i].valid = 1;
1470 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1471
1472 swizzle[i].x &= blkXMask;
1473 }
1474
1475 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1476
1477 if (yHi != 0)
1478 {
1479 ADDR_ASSERT(IsPow2(yHi));
1480
1481 if (xHi == 0)
1482 {
1483 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1484 pEquation->xor1[i].channel = 1;
1485 pEquation->xor1[i].valid = 1;
1486 pEquation->xor1[i].index = Log2(yHi);
1487 }
1488 else
1489 {
1490 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1491 pEquation->xor2[i].channel = 1;
1492 pEquation->xor2[i].valid = 1;
1493 pEquation->xor2[i].index = Log2(yHi);
1494 }
1495
1496 swizzle[i].y &= blkYMask;
1497 }
1498
1499 if (swizzle[i].value == 0)
1500 {
1501 bMask |= 1 << i;
1502 }
1503 }
1504 }
1505
1506 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1507 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1508
1509 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1510
1511 while (bMask != blockMask)
1512 {
1513 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1514 {
1515 if ((bMask & (1 << i)) == 0)
1516 {
1517 if (IsPow2(swizzle[i].value))
1518 {
1519 if (swizzle[i].x != 0)
1520 {
1521 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1522 xMask |= swizzle[i].x;
1523
1524 const UINT_32 xLog2 = Log2(swizzle[i].x);
1525
1526 ADDR_ASSERT(xLog2 < blkXLog2);
1527
1528 pEquation->addr[i].channel = 0;
1529 pEquation->addr[i].valid = 1;
1530 pEquation->addr[i].index = xLog2 + elemLog2;
1531 }
1532 else
1533 {
1534 ADDR_ASSERT(swizzle[i].y != 0);
1535 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1536 yMask |= swizzle[i].y;
1537
1538 pEquation->addr[i].channel = 1;
1539 pEquation->addr[i].valid = 1;
1540 pEquation->addr[i].index = Log2(swizzle[i].y);
1541
1542 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1543 }
1544
1545 swizzle[i].value = 0;
1546 bMask |= 1 << i;
1547 }
1548 else
1549 {
1550 const UINT_32 x = swizzle[i].x & xMask;
1551 const UINT_32 y = swizzle[i].y & yMask;
1552
1553 if (x != 0)
1554 {
1555 ADDR_ASSERT(IsPow2(x));
1556
1557 if (pEquation->xor1[i].value == 0)
1558 {
1559 pEquation->xor1[i].channel = 0;
1560 pEquation->xor1[i].valid = 1;
1561 pEquation->xor1[i].index = Log2(x) + elemLog2;
1562 }
1563 else
1564 {
1565 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1566 pEquation->xor2[i].channel = 0;
1567 pEquation->xor2[i].valid = 1;
1568 pEquation->xor2[i].index = Log2(x) + elemLog2;
1569 }
1570 }
1571
1572 if (y != 0)
1573 {
1574 ADDR_ASSERT(IsPow2(y));
1575
1576 if (pEquation->xor1[i].value == 0)
1577 {
1578 pEquation->xor1[i].channel = 1;
1579 pEquation->xor1[i].valid = 1;
1580 pEquation->xor1[i].index = Log2(y);
1581 }
1582 else
1583 {
1584 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1585 pEquation->xor2[i].channel = 1;
1586 pEquation->xor2[i].valid = 1;
1587 pEquation->xor2[i].index = Log2(y);
1588 }
1589 }
1590
1591 swizzle[i].x &= ~x;
1592 swizzle[i].y &= ~y;
1593 }
1594 }
1595 }
1596 }
1597
1598 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1599 }
1600 else
1601 {
1602 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1603 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1604 const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1605 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1606 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1607 const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1608
1609 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1610 UINT_32 xMask = 0;
1611 UINT_32 yMask = 0;
1612 UINT_32 zMask = 0;
1613 UINT_32 bMask = (1 << elemLog2) - 1;
1614
1615 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1616 {
1617 if (IsPow2(pSwizzle[i].value))
1618 {
1619 if (pSwizzle[i].x != 0)
1620 {
1621 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1622 xMask |= pSwizzle[i].x;
1623
1624 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1625
1626 ADDR_ASSERT(xLog2 < blkXLog2);
1627
1628 pEquation->addr[i].channel = 0;
1629 pEquation->addr[i].valid = 1;
1630 pEquation->addr[i].index = xLog2 + elemLog2;
1631 }
1632 else if (pSwizzle[i].y != 0)
1633 {
1634 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1635 yMask |= pSwizzle[i].y;
1636
1637 pEquation->addr[i].channel = 1;
1638 pEquation->addr[i].valid = 1;
1639 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1640
1641 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1642 }
1643 else
1644 {
1645 ADDR_ASSERT(pSwizzle[i].z != 0);
1646 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1647 zMask |= pSwizzle[i].z;
1648
1649 pEquation->addr[i].channel = 2;
1650 pEquation->addr[i].valid = 1;
1651 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1652
1653 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1654 }
1655
1656 swizzle[i].value = 0;
1657 bMask |= 1 << i;
1658 }
1659 else
1660 {
1661 swizzle[i].x = pSwizzle[i].x;
1662 swizzle[i].y = pSwizzle[i].y;
1663 swizzle[i].z = pSwizzle[i].z;
1664 swizzle[i].s = 0;
1665
1666 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1667
1668 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1669 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1670 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1671
1672 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1673
1674 if (xHi != 0)
1675 {
1676 ADDR_ASSERT(IsPow2(xHi));
1677 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1678
1679 pEquation->xor1[i].channel = 0;
1680 pEquation->xor1[i].valid = 1;
1681 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1682
1683 swizzle[i].x &= blkXMask;
1684 }
1685
1686 if (yHi != 0)
1687 {
1688 ADDR_ASSERT(IsPow2(yHi));
1689
1690 if (pEquation->xor1[i].value == 0)
1691 {
1692 pEquation->xor1[i].channel = 1;
1693 pEquation->xor1[i].valid = 1;
1694 pEquation->xor1[i].index = Log2(yHi);
1695 }
1696 else
1697 {
1698 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1699 pEquation->xor2[i].channel = 1;
1700 pEquation->xor2[i].valid = 1;
1701 pEquation->xor2[i].index = Log2(yHi);
1702 }
1703
1704 swizzle[i].y &= blkYMask;
1705 }
1706
1707 if (zHi != 0)
1708 {
1709 ADDR_ASSERT(IsPow2(zHi));
1710
1711 if (pEquation->xor1[i].value == 0)
1712 {
1713 pEquation->xor1[i].channel = 2;
1714 pEquation->xor1[i].valid = 1;
1715 pEquation->xor1[i].index = Log2(zHi);
1716 }
1717 else
1718 {
1719 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1720 pEquation->xor2[i].channel = 2;
1721 pEquation->xor2[i].valid = 1;
1722 pEquation->xor2[i].index = Log2(zHi);
1723 }
1724
1725 swizzle[i].z &= blkZMask;
1726 }
1727
1728 if (swizzle[i].value == 0)
1729 {
1730 bMask |= 1 << i;
1731 }
1732 }
1733 }
1734
1735 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1736 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1737
1738 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1739
1740 while (bMask != blockMask)
1741 {
1742 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1743 {
1744 if ((bMask & (1 << i)) == 0)
1745 {
1746 if (IsPow2(swizzle[i].value))
1747 {
1748 if (swizzle[i].x != 0)
1749 {
1750 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1751 xMask |= swizzle[i].x;
1752
1753 const UINT_32 xLog2 = Log2(swizzle[i].x);
1754
1755 ADDR_ASSERT(xLog2 < blkXLog2);
1756
1757 pEquation->addr[i].channel = 0;
1758 pEquation->addr[i].valid = 1;
1759 pEquation->addr[i].index = xLog2 + elemLog2;
1760 }
1761 else if (swizzle[i].y != 0)
1762 {
1763 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1764 yMask |= swizzle[i].y;
1765
1766 pEquation->addr[i].channel = 1;
1767 pEquation->addr[i].valid = 1;
1768 pEquation->addr[i].index = Log2(swizzle[i].y);
1769
1770 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1771 }
1772 else
1773 {
1774 ADDR_ASSERT(swizzle[i].z != 0);
1775 ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1776 zMask |= swizzle[i].z;
1777
1778 pEquation->addr[i].channel = 2;
1779 pEquation->addr[i].valid = 1;
1780 pEquation->addr[i].index = Log2(swizzle[i].z);
1781
1782 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1783 }
1784
1785 swizzle[i].value = 0;
1786 bMask |= 1 << i;
1787 }
1788 else
1789 {
1790 const UINT_32 x = swizzle[i].x & xMask;
1791 const UINT_32 y = swizzle[i].y & yMask;
1792 const UINT_32 z = swizzle[i].z & zMask;
1793
1794 if (x != 0)
1795 {
1796 ADDR_ASSERT(IsPow2(x));
1797
1798 if (pEquation->xor1[i].value == 0)
1799 {
1800 pEquation->xor1[i].channel = 0;
1801 pEquation->xor1[i].valid = 1;
1802 pEquation->xor1[i].index = Log2(x) + elemLog2;
1803 }
1804 else
1805 {
1806 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1807 pEquation->xor2[i].channel = 0;
1808 pEquation->xor2[i].valid = 1;
1809 pEquation->xor2[i].index = Log2(x) + elemLog2;
1810 }
1811 }
1812
1813 if (y != 0)
1814 {
1815 ADDR_ASSERT(IsPow2(y));
1816
1817 if (pEquation->xor1[i].value == 0)
1818 {
1819 pEquation->xor1[i].channel = 1;
1820 pEquation->xor1[i].valid = 1;
1821 pEquation->xor1[i].index = Log2(y);
1822 }
1823 else
1824 {
1825 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1826 pEquation->xor2[i].channel = 1;
1827 pEquation->xor2[i].valid = 1;
1828 pEquation->xor2[i].index = Log2(y);
1829 }
1830 }
1831
1832 if (z != 0)
1833 {
1834 ADDR_ASSERT(IsPow2(z));
1835
1836 if (pEquation->xor1[i].value == 0)
1837 {
1838 pEquation->xor1[i].channel = 2;
1839 pEquation->xor1[i].valid = 1;
1840 pEquation->xor1[i].index = Log2(z);
1841 }
1842 else
1843 {
1844 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1845 pEquation->xor2[i].channel = 2;
1846 pEquation->xor2[i].valid = 1;
1847 pEquation->xor2[i].index = Log2(z);
1848 }
1849 }
1850
1851 swizzle[i].x &= ~x;
1852 swizzle[i].y &= ~y;
1853 swizzle[i].z &= ~z;
1854 }
1855 }
1856 }
1857 }
1858
1859 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1860 }
1861 }
1862
1863 /**
1864 ************************************************************************************************************************
1865 * Gfx10Lib::InitEquationTable
1866 *
1867 * @brief
1868 * Initialize Equation table.
1869 *
1870 * @return
1871 * N/A
1872 ************************************************************************************************************************
1873 */
InitEquationTable()1874 VOID Gfx10Lib::InitEquationTable()
1875 {
1876 memset(m_equationTable, 0, sizeof(m_equationTable));
1877
1878 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1879 {
1880 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1881
1882 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1883 {
1884 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1885
1886 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1887 {
1888 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1889 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1890
1891 if (pPatInfo != NULL)
1892 {
1893 ADDR_ASSERT(IsValidSwMode(swMode));
1894
1895 if (pPatInfo->maxItemCount <= 3)
1896 {
1897 ADDR_EQUATION equation = {0};
1898
1899 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1900
1901 equationIndex = m_numEquations;
1902 ADDR_ASSERT(equationIndex < EquationTableSize);
1903
1904 m_equationTable[equationIndex] = equation;
1905
1906 m_numEquations++;
1907 }
1908 else
1909 {
1910 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
1911 ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
1912 ADDR_ASSERT(rsrcTypeIdx == 1);
1913 ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
1914 ADDR_ASSERT(m_settings.supportRbPlus == 1);
1915 }
1916 }
1917
1918 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1919 }
1920 }
1921 }
1922 }
1923
1924 /**
1925 ************************************************************************************************************************
1926 * Gfx10Lib::HwlGetEquationIndex
1927 *
1928 * @brief
1929 * Interface function stub of GetEquationIndex
1930 *
1931 * @return
1932 * ADDR_E_RETURNCODE
1933 ************************************************************************************************************************
1934 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const1935 UINT_32 Gfx10Lib::HwlGetEquationIndex(
1936 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
1937 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
1938 ) const
1939 {
1940 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1941
1942 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1943 (pIn->resourceType == ADDR_RSRC_TEX_3D))
1944 {
1945 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1946 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
1947 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
1948
1949 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1950 }
1951
1952 if (pOut->pMipInfo != NULL)
1953 {
1954 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1955 {
1956 pOut->pMipInfo[i].equationIndex = equationIdx;
1957 }
1958 }
1959
1960 return equationIdx;
1961 }
1962
1963 /**
1964 ************************************************************************************************************************
1965 * Gfx10Lib::GetValidDisplaySwizzleModes
1966 *
1967 * @brief
1968 * Get valid swizzle modes mask for displayable surface
1969 *
1970 * @return
1971 * Valid swizzle modes mask for displayable surface
1972 ************************************************************************************************************************
1973 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const1974 UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
1975 UINT_32 bpp
1976 ) const
1977 {
1978 UINT_32 swModeMask = 0;
1979
1980 if (bpp <= 64)
1981 {
1982 if (m_settings.isDcn20)
1983 {
1984 swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
1985 }
1986 else
1987 {
1988 swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
1989 }
1990 }
1991
1992 return swModeMask;
1993 }
1994
1995 /**
1996 ************************************************************************************************************************
1997 * Gfx10Lib::IsValidDisplaySwizzleMode
1998 *
1999 * @brief
2000 * Check if a swizzle mode is supported by display engine
2001 *
2002 * @return
2003 * TRUE is swizzle mode is supported by display engine
2004 ************************************************************************************************************************
2005 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2006 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
2007 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2008 ) const
2009 {
2010 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
2011
2012 return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
2013 }
2014
2015 /**
2016 ************************************************************************************************************************
2017 * Gfx10Lib::GetMaxNumMipsInTail
2018 *
2019 * @brief
2020 * Return max number of mips in tails
2021 *
2022 * @return
2023 * Max number of mips in tails
2024 ************************************************************************************************************************
2025 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const2026 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
2027 UINT_32 blockSizeLog2, ///< block size log2
2028 BOOL_32 isThin ///< is thin or thick
2029 ) const
2030 {
2031 UINT_32 effectiveLog2 = blockSizeLog2;
2032
2033 if (isThin == FALSE)
2034 {
2035 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
2036 }
2037
2038 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
2039 }
2040
2041 /**
2042 ************************************************************************************************************************
2043 * Gfx10Lib::HwlComputePipeBankXor
2044 *
2045 * @brief
2046 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2047 *
2048 * @return
2049 * PipeBankXor value
2050 ************************************************************************************************************************
2051 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2052 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2053 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2054 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2055 ) const
2056 {
2057 if (IsNonPrtXor(pIn->swizzleMode))
2058 {
2059 const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
2060
2061 // No pipe xor...
2062 const UINT_32 pipeXor = 0;
2063 UINT_32 bankXor = 0;
2064
2065 const UINT_32 XorPatternLen = 8;
2066 static const UINT_32 XorBankRot1b[XorPatternLen] = {0, 1, 0, 1, 0, 1, 0, 1};
2067 static const UINT_32 XorBankRot2b[XorPatternLen] = {0, 2, 1, 3, 2, 0, 3, 1};
2068 static const UINT_32 XorBankRot3b[XorPatternLen] = {0, 4, 2, 6, 1, 5, 3, 7};
2069 static const UINT_32 XorBankRot4b[XorPatternLen] = {0, 8, 4, 12, 2, 10, 6, 14};
2070 static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
2071
2072 switch (bankBits)
2073 {
2074 case 1:
2075 case 2:
2076 case 3:
2077 case 4:
2078 bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
2079 break;
2080 default:
2081 // valid bank bits should be 0~4
2082 ADDR_ASSERT_ALWAYS();
2083 case 0:
2084 break;
2085 }
2086
2087 pOut->pipeBankXor = bankXor | pipeXor;
2088 }
2089 else
2090 {
2091 pOut->pipeBankXor = 0;
2092 }
2093
2094 return ADDR_OK;
2095 }
2096
2097 /**
2098 ************************************************************************************************************************
2099 * Gfx10Lib::HwlComputeSlicePipeBankXor
2100 *
2101 * @brief
2102 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2103 *
2104 * @return
2105 * PipeBankXor value
2106 ************************************************************************************************************************
2107 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const2108 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2109 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2110 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2111 ) const
2112 {
2113 if (IsNonPrtXor(pIn->swizzleMode))
2114 {
2115 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2116 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2117 const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2118
2119 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2120
2121 if (pIn->bpe != 0)
2122 {
2123 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
2124 pIn->resourceType,
2125 Log2(pIn->bpe >> 3),
2126 1);
2127
2128 if (pPatInfo != NULL)
2129 {
2130 ADDR_BIT_SETTING fullSwizzlePattern[20];
2131 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
2132
2133 const UINT_32 pipeBankXorOffset =
2134 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
2135 blockBits,
2136 0,
2137 0,
2138 pIn->slice,
2139 0);
2140
2141 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
2142
2143 // Should have no bit set under pipe interleave
2144 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
2145
2146 // This assertion firing means old approach doesn't calculate a correct sliceXor value...
2147 ADDR_ASSERT(pipeBankXor == pipeXor);
2148
2149 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
2150 }
2151 }
2152 }
2153 else
2154 {
2155 pOut->pipeBankXor = 0;
2156 }
2157
2158 return ADDR_OK;
2159 }
2160
2161 /**
2162 ************************************************************************************************************************
2163 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2164 *
2165 * @brief
2166 * Compute sub resource offset to support swizzle pattern
2167 *
2168 * @return
2169 * Offset
2170 ************************************************************************************************************************
2171 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const2172 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2173 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
2174 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
2175 ) const
2176 {
2177 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2178
2179 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2180
2181 return ADDR_OK;
2182 }
2183
2184 /**
2185 ************************************************************************************************************************
2186 * Gfx10Lib::ValidateNonSwModeParams
2187 *
2188 * @brief
2189 * Validate compute surface info params except swizzle mode
2190 *
2191 * @return
2192 * TRUE if parameters are valid, FALSE otherwise
2193 ************************************************************************************************************************
2194 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2195 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2196 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2197 {
2198 BOOL_32 valid = TRUE;
2199
2200 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2201 {
2202 ADDR_ASSERT_ALWAYS();
2203 valid = FALSE;
2204 }
2205
2206 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2207 {
2208 ADDR_ASSERT_ALWAYS();
2209 valid = FALSE;
2210 }
2211
2212 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2213 const AddrResourceType rsrcType = pIn->resourceType;
2214 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2215 const BOOL_32 msaa = (pIn->numFrags > 1);
2216 const BOOL_32 display = flags.display;
2217 const BOOL_32 tex3d = IsTex3d(rsrcType);
2218 const BOOL_32 tex2d = IsTex2d(rsrcType);
2219 const BOOL_32 tex1d = IsTex1d(rsrcType);
2220 const BOOL_32 stereo = flags.qbStereo;
2221
2222
2223 // Resource type check
2224 if (tex1d)
2225 {
2226 if (msaa || display || stereo)
2227 {
2228 ADDR_ASSERT_ALWAYS();
2229 valid = FALSE;
2230 }
2231 }
2232 else if (tex2d)
2233 {
2234 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2235 {
2236 ADDR_ASSERT_ALWAYS();
2237 valid = FALSE;
2238 }
2239 }
2240 else if (tex3d)
2241 {
2242 if (msaa || display || stereo)
2243 {
2244 ADDR_ASSERT_ALWAYS();
2245 valid = FALSE;
2246 }
2247 }
2248 else
2249 {
2250 ADDR_ASSERT_ALWAYS();
2251 valid = FALSE;
2252 }
2253
2254 return valid;
2255 }
2256
2257 /**
2258 ************************************************************************************************************************
2259 * Gfx10Lib::ValidateSwModeParams
2260 *
2261 * @brief
2262 * Validate compute surface info related to swizzle mode
2263 *
2264 * @return
2265 * TRUE if parameters are valid, FALSE otherwise
2266 ************************************************************************************************************************
2267 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2268 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2269 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2270 {
2271 BOOL_32 valid = TRUE;
2272
2273 if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2274 {
2275 ADDR_ASSERT_ALWAYS();
2276 valid = FALSE;
2277 }
2278 else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2279 {
2280 {
2281 ADDR_ASSERT_ALWAYS();
2282 valid = FALSE;
2283 }
2284 }
2285
2286 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2287 const AddrResourceType rsrcType = pIn->resourceType;
2288 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2289 const BOOL_32 msaa = (pIn->numFrags > 1);
2290 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2291 const BOOL_32 color = flags.color;
2292 const BOOL_32 display = flags.display;
2293 const BOOL_32 tex3d = IsTex3d(rsrcType);
2294 const BOOL_32 tex2d = IsTex2d(rsrcType);
2295 const BOOL_32 tex1d = IsTex1d(rsrcType);
2296 const BOOL_32 thin3d = flags.view3dAs2dArray;
2297 const BOOL_32 linear = IsLinear(swizzle);
2298 const BOOL_32 blk256B = IsBlock256b(swizzle);
2299 const BOOL_32 blkVar = IsBlockVariable(swizzle);
2300 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2301 const BOOL_32 prt = flags.prt;
2302 const BOOL_32 fmask = flags.fmask;
2303
2304 // Misc check
2305 if ((pIn->numFrags > 1) &&
2306 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2307 {
2308 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2309 ADDR_ASSERT_ALWAYS();
2310 valid = FALSE;
2311 }
2312
2313 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2314 {
2315 ADDR_ASSERT_ALWAYS();
2316 valid = FALSE;
2317 }
2318
2319 if ((pIn->bpp == 96) && (linear == FALSE))
2320 {
2321 ADDR_ASSERT_ALWAYS();
2322 valid = FALSE;
2323 }
2324
2325 const UINT_32 swizzleMask = 1 << swizzle;
2326
2327 // Resource type check
2328 if (tex1d)
2329 {
2330 if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2331 {
2332 ADDR_ASSERT_ALWAYS();
2333 valid = FALSE;
2334 }
2335 }
2336 else if (tex2d)
2337 {
2338 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2339 {
2340 {
2341 ADDR_ASSERT_ALWAYS();
2342 valid = FALSE;
2343 }
2344 }
2345 else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2346 (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2347 {
2348 ADDR_ASSERT_ALWAYS();
2349 valid = FALSE;
2350 }
2351
2352 }
2353 else if (tex3d)
2354 {
2355 if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2356 (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2357 (thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2358 {
2359 ADDR_ASSERT_ALWAYS();
2360 valid = FALSE;
2361 }
2362 }
2363
2364 // Swizzle type check
2365 if (linear)
2366 {
2367 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2368 {
2369 ADDR_ASSERT_ALWAYS();
2370 valid = FALSE;
2371 }
2372 }
2373 else if (IsZOrderSwizzle(swizzle))
2374 {
2375 if ((pIn->bpp > 64) ||
2376 (msaa && (color || (pIn->bpp > 32))) ||
2377 ElemLib::IsBlockCompressed(pIn->format) ||
2378 ElemLib::IsMacroPixelPacked(pIn->format))
2379 {
2380 ADDR_ASSERT_ALWAYS();
2381 valid = FALSE;
2382 }
2383 }
2384 else if (IsStandardSwizzle(rsrcType, swizzle))
2385 {
2386 if (zbuffer || msaa)
2387 {
2388 ADDR_ASSERT_ALWAYS();
2389 valid = FALSE;
2390 }
2391 }
2392 else if (IsDisplaySwizzle(rsrcType, swizzle))
2393 {
2394 if (zbuffer || msaa)
2395 {
2396 ADDR_ASSERT_ALWAYS();
2397 valid = FALSE;
2398 }
2399 }
2400 else if (IsRtOptSwizzle(swizzle))
2401 {
2402 if (zbuffer)
2403 {
2404 ADDR_ASSERT_ALWAYS();
2405 valid = FALSE;
2406 }
2407 }
2408 else
2409 {
2410 {
2411 ADDR_ASSERT_ALWAYS();
2412 valid = FALSE;
2413 }
2414 }
2415
2416 // Block type check
2417 if (blk256B)
2418 {
2419 if (zbuffer || tex3d || msaa)
2420 {
2421 ADDR_ASSERT_ALWAYS();
2422 valid = FALSE;
2423 }
2424 }
2425 else if (blkVar)
2426 {
2427 if (m_blockVarSizeLog2 == 0)
2428 {
2429 ADDR_ASSERT_ALWAYS();
2430 valid = FALSE;
2431 }
2432 }
2433
2434 return valid;
2435 }
2436
2437 /**
2438 ************************************************************************************************************************
2439 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2440 *
2441 * @brief
2442 * Compute surface info sanity check
2443 *
2444 * @return
2445 * Offset
2446 ************************************************************************************************************************
2447 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2448 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2449 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2450 ) const
2451 {
2452 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2453 }
2454
2455 /**
2456 ************************************************************************************************************************
2457 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2458 *
2459 * @brief
2460 * Internal function to get suggested surface information for cliet to use
2461 *
2462 * @return
2463 * ADDR_E_RETURNCODE
2464 ************************************************************************************************************************
2465 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2466 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2467 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2468 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2469 ) const
2470 {
2471 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2472
2473 if (pIn->flags.fmask)
2474 {
2475 const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2476 const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2477
2478 if (forbid64KbBlockType && forbidVarBlockType)
2479 {
2480 // Invalid combination...
2481 ADDR_ASSERT_ALWAYS();
2482 returnCode = ADDR_INVALIDPARAMS;
2483 }
2484 else
2485 {
2486 pOut->resourceType = ADDR_RSRC_TEX_2D;
2487 pOut->validBlockSet.value = 0;
2488 pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1;
2489 pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1;
2490 pOut->validSwModeSet.value = 0;
2491 pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1;
2492 pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType ? 0 : 1;
2493 pOut->canXor = TRUE;
2494 pOut->validSwTypeSet.value = AddrSwSetZ;
2495 pOut->clientPreferredSwSet = pOut->validSwTypeSet;
2496
2497 BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2498
2499 if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2500 {
2501 const UINT_8 maxFmaskSwizzleModeType = 2;
2502 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2503 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2504 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2505 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2506 const UINT_32 width = Max(pIn->width, 1u);
2507 const UINT_32 height = Max(pIn->height, 1u);
2508 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2509
2510 AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2511 Dim3d blkDim[maxFmaskSwizzleModeType] = {{}, {}};
2512 Dim3d padDim[maxFmaskSwizzleModeType] = {{}, {}};
2513 UINT_64 padSize[maxFmaskSwizzleModeType] = {0};
2514
2515 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2516 {
2517 ComputeBlockDimensionForSurf(&blkDim[i].w,
2518 &blkDim[i].h,
2519 &blkDim[i].d,
2520 fmaskBpp,
2521 1,
2522 pOut->resourceType,
2523 swMode[i]);
2524
2525 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2526 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2527 }
2528
2529 if (GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0]))
2530 {
2531 if ((padSize[1] * ratioHi) <= (padSize[0] * ratioLow))
2532 {
2533 use64KbBlockType = FALSE;
2534 }
2535 }
2536 else
2537 {
2538 if ((padSize[1] * ratioLow) < (padSize[0] * ratioHi))
2539 {
2540 use64KbBlockType = FALSE;
2541 }
2542 }
2543 }
2544 else if (forbidVarBlockType)
2545 {
2546 use64KbBlockType = TRUE;
2547 }
2548
2549 if (use64KbBlockType)
2550 {
2551 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2552 }
2553 else
2554 {
2555 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2556 }
2557 }
2558 }
2559 else
2560 {
2561 UINT_32 bpp = pIn->bpp;
2562 UINT_32 width = Max(pIn->width, 1u);
2563 UINT_32 height = Max(pIn->height, 1u);
2564
2565 // Set format to INVALID will skip this conversion
2566 if (pIn->format != ADDR_FMT_INVALID)
2567 {
2568 ElemMode elemMode = ADDR_UNCOMPRESSED;
2569 UINT_32 expandX, expandY;
2570
2571 // Get compression/expansion factors and element mode which indicates compression/expansion
2572 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2573 &elemMode,
2574 &expandX,
2575 &expandY);
2576
2577 UINT_32 basePitch = 0;
2578 GetElemLib()->AdjustSurfaceInfo(elemMode,
2579 expandX,
2580 expandY,
2581 &bpp,
2582 &basePitch,
2583 &width,
2584 &height);
2585 }
2586
2587 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2588 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2589 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2590 const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2591 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
2592
2593 // Pre sanity check on non swizzle mode parameters
2594 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
2595 localIn.flags = pIn->flags;
2596 localIn.resourceType = pIn->resourceType;
2597 localIn.format = pIn->format;
2598 localIn.bpp = bpp;
2599 localIn.width = width;
2600 localIn.height = height;
2601 localIn.numSlices = numSlices;
2602 localIn.numMipLevels = numMipLevels;
2603 localIn.numSamples = numSamples;
2604 localIn.numFrags = numFrags;
2605
2606 if (ValidateNonSwModeParams(&localIn))
2607 {
2608 // Forbid swizzle mode(s) by client setting
2609 ADDR2_SWMODE_SET allowedSwModeSet = {0};
2610 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2611 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
2612 allowedSwModeSet.value |=
2613 pIn->forbiddenBlock.macroThin4KB ? 0 :
2614 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2615 allowedSwModeSet.value |=
2616 pIn->forbiddenBlock.macroThick4KB ? 0 :
2617 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2618 allowedSwModeSet.value |=
2619 pIn->forbiddenBlock.macroThin64KB ? 0 :
2620 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2621 allowedSwModeSet.value |=
2622 pIn->forbiddenBlock.macroThick64KB ? 0 :
2623 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2624 allowedSwModeSet.value |=
2625 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2626
2627 if (pIn->preferredSwSet.value != 0)
2628 {
2629 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2630 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2631 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2632 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2633 }
2634
2635 if (pIn->noXor)
2636 {
2637 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2638 }
2639
2640 if (pIn->maxAlign > 0)
2641 {
2642 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2643 {
2644 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2645 }
2646
2647 if (pIn->maxAlign < Size64K)
2648 {
2649 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2650 }
2651
2652 if (pIn->maxAlign < Size4K)
2653 {
2654 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2655 }
2656
2657 if (pIn->maxAlign < Size256)
2658 {
2659 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2660 }
2661 }
2662
2663 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2664 switch (pIn->resourceType)
2665 {
2666 case ADDR_RSRC_TEX_1D:
2667 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2668 break;
2669
2670 case ADDR_RSRC_TEX_2D:
2671 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2672
2673 break;
2674
2675 case ADDR_RSRC_TEX_3D:
2676 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2677
2678 if (pIn->flags.view3dAs2dArray)
2679 {
2680 allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2681 }
2682 break;
2683
2684 default:
2685 ADDR_ASSERT_ALWAYS();
2686 allowedSwModeSet.value = 0;
2687 break;
2688 }
2689
2690 if (ElemLib::IsBlockCompressed(pIn->format) ||
2691 ElemLib::IsMacroPixelPacked(pIn->format) ||
2692 (bpp > 64) ||
2693 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2694 {
2695 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2696 }
2697
2698 if (pIn->format == ADDR_FMT_32_32_32)
2699 {
2700 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2701 }
2702
2703 if (msaa)
2704 {
2705 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2706 }
2707
2708 if (pIn->flags.depth || pIn->flags.stencil)
2709 {
2710 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2711 }
2712
2713 if (pIn->flags.display)
2714 {
2715 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
2716 }
2717
2718 if (allowedSwModeSet.value != 0)
2719 {
2720 #if DEBUG
2721 // Post sanity check, at least AddrLib should accept the output generated by its own
2722 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2723
2724 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2725 {
2726 if (validateSwModeSet & 1)
2727 {
2728 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2729 ADDR_ASSERT(ValidateSwModeParams(&localIn));
2730 }
2731
2732 validateSwModeSet >>= 1;
2733 }
2734 #endif
2735
2736 pOut->resourceType = pIn->resourceType;
2737 pOut->validSwModeSet = allowedSwModeSet;
2738 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
2739 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2740 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
2741
2742 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2743
2744 if (pOut->clientPreferredSwSet.value == 0)
2745 {
2746 pOut->clientPreferredSwSet.value = AddrSwSetAll;
2747 }
2748
2749 // Apply optional restrictions
2750 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
2751 {
2752 if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
2753 {
2754 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
2755 // the GL2 in VAR mode, so it should be avoided.
2756 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2757 }
2758 else
2759 {
2760 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
2761 // But we have to suffer from low performance because there is no other choice...
2762 ADDR_ASSERT_ALWAYS();
2763 }
2764 }
2765
2766 if (pIn->flags.needEquation)
2767 {
2768 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
2769 }
2770
2771 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
2772 {
2773 pOut->swizzleMode = ADDR_SW_LINEAR;
2774 }
2775 else
2776 {
2777 // Always ignore linear swizzle mode if there is other choice.
2778 allowedSwModeSet.swLinear = 0;
2779
2780 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
2781
2782 // Determine block size if there are 2 or more block type candidates
2783 if (IsPow2(allowedBlockSet.value) == FALSE)
2784 {
2785 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_LINEAR};
2786
2787 if (m_blockVarSizeLog2 != 0)
2788 {
2789 swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
2790 }
2791
2792 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
2793 {
2794 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
2795 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;
2796 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
2797 }
2798 else
2799 {
2800 swMode[AddrBlockMicro] = ADDR_SW_256B_S;
2801 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S;
2802 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
2803 }
2804
2805 Dim3d blkDim[AddrBlockMaxTiledType] = {0};
2806 Dim3d padDim[AddrBlockMaxTiledType] = {0};
2807 UINT_64 padSize[AddrBlockMaxTiledType] = {0};
2808
2809 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2810 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2811 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2812 UINT_32 minSizeBlk = AddrBlockMicro;
2813 UINT_64 minSize = 0;
2814
2815 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2816 {
2817 if (allowedBlockSet.value & (1 << i))
2818 {
2819 ComputeBlockDimensionForSurf(&blkDim[i].w,
2820 &blkDim[i].h,
2821 &blkDim[i].d,
2822 bpp,
2823 numFrags,
2824 pOut->resourceType,
2825 swMode[i]);
2826
2827 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2828 padSize[i] = PowTwoAlign(padSize[i] * numFrags, sizeAlignInElement);
2829
2830 if (minSize == 0)
2831 {
2832 minSize = padSize[i];
2833 minSizeBlk = i;
2834 }
2835 else
2836 {
2837 // Due to the fact that VAR block size = 16KB * m_pipes, it is possible that VAR
2838 // block size < 64KB. And ratio[Hi/Low] logic implicitly requires iterating from
2839 // smaller block type to bigger block type. So we have to correct comparing logic
2840 // according to the size of existing "minimun block" and size of coming/comparing
2841 // block. The new logic can also be useful to any future change about AddrBlockType.
2842 if (GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk]))
2843 {
2844 if ((padSize[i] * ratioHi) <= (minSize * ratioLow))
2845 {
2846 minSize = padSize[i];
2847 minSizeBlk = i;
2848 }
2849 }
2850 else
2851 {
2852 if ((padSize[i] * ratioLow) < (minSize * ratioHi))
2853 {
2854 minSize = padSize[i];
2855 minSizeBlk = i;
2856 }
2857 }
2858 }
2859 }
2860 }
2861
2862 if ((allowedBlockSet.micro == TRUE) &&
2863 (width <= blkDim[AddrBlockMicro].w) &&
2864 (height <= blkDim[AddrBlockMicro].h))
2865 {
2866 minSizeBlk = AddrBlockMicro;
2867 }
2868
2869 if (minSizeBlk == AddrBlockMicro)
2870 {
2871 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2872 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
2873 }
2874 else if (minSizeBlk == AddrBlockThick4KB)
2875 {
2876 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2877 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
2878 }
2879 else if (minSizeBlk == AddrBlockThin4KB)
2880 {
2881 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2882 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
2883 }
2884 else if (minSizeBlk == AddrBlockThick64KB)
2885 {
2886 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2887 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
2888 }
2889 else if (minSizeBlk == AddrBlockThin64KB)
2890 {
2891 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2892 Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
2893 }
2894 else
2895 {
2896 ADDR_ASSERT(minSizeBlk == AddrBlockThinVar);
2897 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
2898 }
2899 }
2900
2901 // Block type should be determined.
2902 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
2903
2904 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
2905
2906 // Determine swizzle type if there are 2 or more swizzle type candidates
2907 if (IsPow2(allowedSwSet.value) == FALSE)
2908 {
2909 if (ElemLib::IsBlockCompressed(pIn->format))
2910 {
2911 if (allowedSwSet.sw_D)
2912 {
2913 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2914 }
2915 else if (allowedSwSet.sw_S)
2916 {
2917 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2918 }
2919 else
2920 {
2921 ADDR_ASSERT(allowedSwSet.sw_R);
2922 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2923 }
2924 }
2925 else if (ElemLib::IsMacroPixelPacked(pIn->format))
2926 {
2927 if (allowedSwSet.sw_S)
2928 {
2929 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2930 }
2931 else if (allowedSwSet.sw_D)
2932 {
2933 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2934 }
2935 else
2936 {
2937 ADDR_ASSERT(allowedSwSet.sw_R);
2938 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2939 }
2940 }
2941 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2942 {
2943 if (pIn->flags.color &&
2944 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
2945 allowedSwSet.sw_D)
2946 {
2947 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2948 }
2949 else if (allowedSwSet.sw_S)
2950 {
2951 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2952 }
2953 else if (allowedSwSet.sw_R)
2954 {
2955 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2956 }
2957 else
2958 {
2959 ADDR_ASSERT(allowedSwSet.sw_Z);
2960 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2961 }
2962 }
2963 else
2964 {
2965 if (allowedSwSet.sw_R)
2966 {
2967 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
2968 }
2969 else if (allowedSwSet.sw_D)
2970 {
2971 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
2972 }
2973 else if (allowedSwSet.sw_S)
2974 {
2975 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
2976 }
2977 else
2978 {
2979 ADDR_ASSERT(allowedSwSet.sw_Z);
2980 allowedSwModeSet.value &= Gfx10ZSwModeMask;
2981 }
2982 }
2983 }
2984
2985 // Swizzle type should be determined.
2986 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
2987
2988 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2989 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2990 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2991 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
2992 }
2993 }
2994 else
2995 {
2996 // Invalid combination...
2997 ADDR_ASSERT_ALWAYS();
2998 returnCode = ADDR_INVALIDPARAMS;
2999 }
3000 }
3001 else
3002 {
3003 // Invalid combination...
3004 ADDR_ASSERT_ALWAYS();
3005 returnCode = ADDR_INVALIDPARAMS;
3006 }
3007 }
3008
3009 return returnCode;
3010 }
3011
3012 /**
3013 ************************************************************************************************************************
3014 * Gfx10Lib::ComputeStereoInfo
3015 *
3016 * @brief
3017 * Compute height alignment and right eye pipeBankXor for stereo surface
3018 *
3019 * @return
3020 * Error code
3021 *
3022 ************************************************************************************************************************
3023 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 blkHeight,UINT_32 * pAlignY,UINT_32 * pRightXor) const3024 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3025 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
3026 UINT_32 blkHeight, ///< Block height
3027 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
3028 UINT_32* pRightXor ///< Right eye xor
3029 ) const
3030 {
3031 ADDR_E_RETURNCODE ret = ADDR_OK;
3032
3033 *pAlignY = 1;
3034 *pRightXor = 0;
3035
3036 if (IsNonPrtXor(pIn->swizzleMode))
3037 {
3038 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3039 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3040 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3041 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3042 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3043
3044 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3045 {
3046 UINT_32 yMax = 0;
3047 UINT_32 yPos = 0;
3048
3049 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3050 {
3051 if (m_equationTable[eqIndex].xor1[i].value == 0)
3052 {
3053 break;
3054 }
3055
3056 ADDR_ASSERT(m_equationTable[eqIndex].xor1[i].valid == 1);
3057
3058 if ((m_equationTable[eqIndex].xor1[i].channel == 1) &&
3059 (m_equationTable[eqIndex].xor1[i].index > yMax))
3060 {
3061 yMax = m_equationTable[eqIndex].xor1[i].index;
3062 yPos = i;
3063 }
3064 }
3065
3066 const UINT_32 additionalAlign = 1 << yMax;
3067
3068 if (additionalAlign >= blkHeight)
3069 {
3070 *pAlignY *= (additionalAlign / blkHeight);
3071
3072 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3073
3074 if ((alignedHeight >> yMax) & 1)
3075 {
3076 *pRightXor = 1 << (yPos - m_pipeInterleaveLog2);
3077 }
3078 }
3079 }
3080 else
3081 {
3082 ret = ADDR_INVALIDPARAMS;
3083 }
3084 }
3085
3086 return ret;
3087 }
3088
3089 /**
3090 ************************************************************************************************************************
3091 * Gfx10Lib::HwlComputeSurfaceInfoTiled
3092 *
3093 * @brief
3094 * Internal function to calculate alignment for tiled surface
3095 *
3096 * @return
3097 * ADDR_E_RETURNCODE
3098 ************************************************************************************************************************
3099 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3100 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3101 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3102 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3103 ) const
3104 {
3105 ADDR_E_RETURNCODE ret;
3106
3107 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3108 pOut->mipChainPitch = 0;
3109 pOut->mipChainHeight = 0;
3110 pOut->mipChainSlice = 0;
3111 pOut->epitchIsHeight = FALSE;
3112
3113 // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3114 pOut->mipChainInTail = FALSE;
3115 pOut->firstMipIdInTail = pIn->numMipLevels;
3116
3117 if (IsBlock256b(pIn->swizzleMode))
3118 {
3119 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3120 }
3121 else
3122 {
3123 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3124 }
3125
3126 return ret;
3127 }
3128
3129
3130 /**
3131 ************************************************************************************************************************
3132 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
3133 *
3134 * @brief
3135 * Internal function to calculate alignment for micro tiled surface
3136 *
3137 * @return
3138 * ADDR_E_RETURNCODE
3139 ************************************************************************************************************************
3140 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3141 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3142 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3143 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3144 ) const
3145 {
3146 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3147 &pOut->blockHeight,
3148 &pOut->blockSlices,
3149 pIn->bpp,
3150 pIn->numFrags,
3151 pIn->resourceType,
3152 pIn->swizzleMode);
3153
3154 if (ret == ADDR_OK)
3155 {
3156 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3157
3158 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3159 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3160 pOut->numSlices = pIn->numSlices;
3161 pOut->baseAlign = blockSize;
3162
3163 if (pIn->numMipLevels > 1)
3164 {
3165 const UINT_32 mip0Width = pIn->width;
3166 const UINT_32 mip0Height = pIn->height;
3167 UINT_64 mipSliceSize = 0;
3168
3169 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3170 {
3171 UINT_32 mipWidth, mipHeight;
3172
3173 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3174
3175 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
3176 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3177
3178 if (pOut->pMipInfo != NULL)
3179 {
3180 pOut->pMipInfo[i].pitch = mipActualWidth;
3181 pOut->pMipInfo[i].height = mipActualHeight;
3182 pOut->pMipInfo[i].depth = 1;
3183 pOut->pMipInfo[i].offset = mipSliceSize;
3184 pOut->pMipInfo[i].mipTailOffset = 0;
3185 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3186 }
3187
3188 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3189 }
3190
3191 pOut->sliceSize = mipSliceSize;
3192 pOut->surfSize = mipSliceSize * pOut->numSlices;
3193 }
3194 else
3195 {
3196 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3197 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3198
3199 if (pOut->pMipInfo != NULL)
3200 {
3201 pOut->pMipInfo[0].pitch = pOut->pitch;
3202 pOut->pMipInfo[0].height = pOut->height;
3203 pOut->pMipInfo[0].depth = 1;
3204 pOut->pMipInfo[0].offset = 0;
3205 pOut->pMipInfo[0].mipTailOffset = 0;
3206 pOut->pMipInfo[0].macroBlockOffset = 0;
3207 }
3208 }
3209
3210 }
3211
3212 return ret;
3213 }
3214
3215 /**
3216 ************************************************************************************************************************
3217 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
3218 *
3219 * @brief
3220 * Internal function to calculate alignment for macro tiled surface
3221 *
3222 * @return
3223 * ADDR_E_RETURNCODE
3224 ************************************************************************************************************************
3225 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3226 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3227 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3228 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3229 ) const
3230 {
3231 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3232 &pOut->blockHeight,
3233 &pOut->blockSlices,
3234 pIn->bpp,
3235 pIn->numFrags,
3236 pIn->resourceType,
3237 pIn->swizzleMode);
3238
3239 if (returnCode == ADDR_OK)
3240 {
3241 UINT_32 heightAlign = pOut->blockHeight;
3242
3243 if (pIn->flags.qbStereo)
3244 {
3245 UINT_32 rightXor = 0;
3246 UINT_32 alignY = 1;
3247
3248 returnCode = ComputeStereoInfo(pIn, heightAlign, &alignY, &rightXor);
3249
3250 if (returnCode == ADDR_OK)
3251 {
3252 pOut->pStereoInfo->rightSwizzle = rightXor;
3253
3254 heightAlign *= alignY;
3255 }
3256 }
3257
3258 if (returnCode == ADDR_OK)
3259 {
3260 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3261 const UINT_32 blockSize = 1 << blockSizeLog2;
3262
3263 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3264 pOut->height = PowTwoAlign(pIn->height, heightAlign);
3265 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3266 pOut->baseAlign = blockSize;
3267
3268 if (pIn->numMipLevels > 1)
3269 {
3270 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3271 pIn->swizzleMode,
3272 pOut->blockWidth,
3273 pOut->blockHeight,
3274 pOut->blockSlices);
3275 const UINT_32 mip0Width = pIn->width;
3276 const UINT_32 mip0Height = pIn->height;
3277 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3278 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3279 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3280 const UINT_32 index = Log2(pIn->bpp >> 3);
3281 UINT_32 firstMipInTail = pIn->numMipLevels;
3282 UINT_64 mipChainSliceSize = 0;
3283 UINT_64 mipSize[MaxMipLevels];
3284 UINT_64 mipSliceSize[MaxMipLevels];
3285
3286 Dim3d fixedTailMaxDim = tailMaxDim;
3287
3288 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3289 {
3290 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3291 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3292 }
3293
3294 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3295 {
3296 UINT_32 mipWidth, mipHeight, mipDepth;
3297
3298 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3299
3300 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3301 {
3302 firstMipInTail = i;
3303 mipChainSliceSize += blockSize / pOut->blockSlices;
3304 break;
3305 }
3306 else
3307 {
3308 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3309 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3310 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3311 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3312
3313 mipSize[i] = sliceSize * depth;
3314 mipSliceSize[i] = sliceSize * pOut->blockSlices;
3315 mipChainSliceSize += sliceSize;
3316
3317 if (pOut->pMipInfo != NULL)
3318 {
3319 pOut->pMipInfo[i].pitch = pitch;
3320 pOut->pMipInfo[i].height = height;
3321 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3322 }
3323 }
3324 }
3325
3326 pOut->sliceSize = mipChainSliceSize;
3327 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3328 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3329 pOut->firstMipIdInTail = firstMipInTail;
3330
3331 if (pOut->pMipInfo != NULL)
3332 {
3333 UINT_64 offset = 0;
3334 UINT_64 macroBlkOffset = 0;
3335 UINT_32 tailMaxDepth = 0;
3336
3337 if (firstMipInTail != pIn->numMipLevels)
3338 {
3339 UINT_32 mipWidth, mipHeight;
3340
3341 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3342 &mipWidth, &mipHeight, &tailMaxDepth);
3343
3344 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3345 macroBlkOffset = blockSize;
3346 }
3347
3348 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3349 {
3350 pOut->pMipInfo[i].offset = offset;
3351 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3352 pOut->pMipInfo[i].mipTailOffset = 0;
3353
3354 offset += mipSize[i];
3355 macroBlkOffset += mipSliceSize[i];
3356 }
3357
3358 UINT_32 pitch = tailMaxDim.w;
3359 UINT_32 height = tailMaxDim.h;
3360 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3361
3362 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3363
3364 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3365 {
3366 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3367 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3368
3369 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
3370 pOut->pMipInfo[i].mipTailOffset = mipOffset;
3371 pOut->pMipInfo[i].macroBlockOffset = 0;
3372
3373 pOut->pMipInfo[i].pitch = pitch;
3374 pOut->pMipInfo[i].height = height;
3375 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3376
3377 UINT_32 mipX = ((mipOffset >> 9) & 1) |
3378 ((mipOffset >> 10) & 2) |
3379 ((mipOffset >> 11) & 4) |
3380 ((mipOffset >> 12) & 8) |
3381 ((mipOffset >> 13) & 16) |
3382 ((mipOffset >> 14) & 32);
3383 UINT_32 mipY = ((mipOffset >> 8) & 1) |
3384 ((mipOffset >> 9) & 2) |
3385 ((mipOffset >> 10) & 4) |
3386 ((mipOffset >> 11) & 8) |
3387 ((mipOffset >> 12) & 16) |
3388 ((mipOffset >> 13) & 32);
3389
3390 if (blockSizeLog2 & 1)
3391 {
3392 const UINT_32 temp = mipX;
3393 mipX = mipY;
3394 mipY = temp;
3395
3396 if (index & 1)
3397 {
3398 mipY = (mipY << 1) | (mipX & 1);
3399 mipX = mipX >> 1;
3400 }
3401 }
3402
3403 if (isThin)
3404 {
3405 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3406 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3407 pOut->pMipInfo[i].mipTailCoordZ = 0;
3408
3409 pitch = Max(pitch >> 1, Block256_2d[index].w);
3410 height = Max(height >> 1, Block256_2d[index].h);
3411 }
3412 else
3413 {
3414 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3415 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3416 pOut->pMipInfo[i].mipTailCoordZ = 0;
3417
3418 pitch = Max(pitch >> 1, Block256_3d[index].w);
3419 height = Max(height >> 1, Block256_3d[index].h);
3420 }
3421 }
3422 }
3423 }
3424 else
3425 {
3426 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3427 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3428
3429 if (pOut->pMipInfo != NULL)
3430 {
3431 pOut->pMipInfo[0].pitch = pOut->pitch;
3432 pOut->pMipInfo[0].height = pOut->height;
3433 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3434 pOut->pMipInfo[0].offset = 0;
3435 pOut->pMipInfo[0].mipTailOffset = 0;
3436 pOut->pMipInfo[0].macroBlockOffset = 0;
3437 pOut->pMipInfo[0].mipTailCoordX = 0;
3438 pOut->pMipInfo[0].mipTailCoordY = 0;
3439 pOut->pMipInfo[0].mipTailCoordZ = 0;
3440 }
3441 }
3442 }
3443 }
3444
3445 return returnCode;
3446 }
3447
3448 /**
3449 ************************************************************************************************************************
3450 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3451 *
3452 * @brief
3453 * Internal function to calculate address from coord for tiled swizzle surface
3454 *
3455 * @return
3456 * ADDR_E_RETURNCODE
3457 ************************************************************************************************************************
3458 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3459 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3460 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3461 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3462 ) const
3463 {
3464 ADDR_E_RETURNCODE ret;
3465
3466 if (IsBlock256b(pIn->swizzleMode))
3467 {
3468 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3469 }
3470 else
3471 {
3472 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3473 }
3474
3475 return ret;
3476 }
3477
3478 /**
3479 ************************************************************************************************************************
3480 * Gfx10Lib::ComputeOffsetFromEquation
3481 *
3482 * @brief
3483 * Compute offset from equation
3484 *
3485 * @return
3486 * Offset
3487 ************************************************************************************************************************
3488 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3489 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3490 const ADDR_EQUATION* pEq, ///< Equation
3491 UINT_32 x, ///< x coord in bytes
3492 UINT_32 y, ///< y coord in pixel
3493 UINT_32 z ///< z coord in slice
3494 ) const
3495 {
3496 UINT_32 offset = 0;
3497
3498 for (UINT_32 i = 0; i < pEq->numBits; i++)
3499 {
3500 UINT_32 v = 0;
3501
3502 if (pEq->addr[i].valid)
3503 {
3504 if (pEq->addr[i].channel == 0)
3505 {
3506 v ^= (x >> pEq->addr[i].index) & 1;
3507 }
3508 else if (pEq->addr[i].channel == 1)
3509 {
3510 v ^= (y >> pEq->addr[i].index) & 1;
3511 }
3512 else
3513 {
3514 ADDR_ASSERT(pEq->addr[i].channel == 2);
3515 v ^= (z >> pEq->addr[i].index) & 1;
3516 }
3517 }
3518
3519 if (pEq->xor1[i].valid)
3520 {
3521 if (pEq->xor1[i].channel == 0)
3522 {
3523 v ^= (x >> pEq->xor1[i].index) & 1;
3524 }
3525 else if (pEq->xor1[i].channel == 1)
3526 {
3527 v ^= (y >> pEq->xor1[i].index) & 1;
3528 }
3529 else
3530 {
3531 ADDR_ASSERT(pEq->xor1[i].channel == 2);
3532 v ^= (z >> pEq->xor1[i].index) & 1;
3533 }
3534 }
3535
3536 if (pEq->xor2[i].valid)
3537 {
3538 if (pEq->xor2[i].channel == 0)
3539 {
3540 v ^= (x >> pEq->xor2[i].index) & 1;
3541 }
3542 else if (pEq->xor2[i].channel == 1)
3543 {
3544 v ^= (y >> pEq->xor2[i].index) & 1;
3545 }
3546 else
3547 {
3548 ADDR_ASSERT(pEq->xor2[i].channel == 2);
3549 v ^= (z >> pEq->xor2[i].index) & 1;
3550 }
3551 }
3552
3553 offset |= (v << i);
3554 }
3555
3556 return offset;
3557 }
3558
3559 /**
3560 ************************************************************************************************************************
3561 * Gfx10Lib::ComputeOffsetFromSwizzlePattern
3562 *
3563 * @brief
3564 * Compute offset from swizzle pattern
3565 *
3566 * @return
3567 * Offset
3568 ************************************************************************************************************************
3569 */
ComputeOffsetFromSwizzlePattern(const UINT_64 * pPattern,UINT_32 numBits,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const3570 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3571 const UINT_64* pPattern, ///< Swizzle pattern
3572 UINT_32 numBits, ///< Number of bits in pattern
3573 UINT_32 x, ///< x coord in pixel
3574 UINT_32 y, ///< y coord in pixel
3575 UINT_32 z, ///< z coord in slice
3576 UINT_32 s ///< sample id
3577 ) const
3578 {
3579 UINT_32 offset = 0;
3580 const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3581
3582 for (UINT_32 i = 0; i < numBits; i++)
3583 {
3584 UINT_32 v = 0;
3585
3586 if (pSwizzlePattern[i].x != 0)
3587 {
3588 UINT_16 mask = pSwizzlePattern[i].x;
3589 UINT_32 xBits = x;
3590
3591 while (mask != 0)
3592 {
3593 if (mask & 1)
3594 {
3595 v ^= xBits & 1;
3596 }
3597
3598 xBits >>= 1;
3599 mask >>= 1;
3600 }
3601 }
3602
3603 if (pSwizzlePattern[i].y != 0)
3604 {
3605 UINT_16 mask = pSwizzlePattern[i].y;
3606 UINT_32 yBits = y;
3607
3608 while (mask != 0)
3609 {
3610 if (mask & 1)
3611 {
3612 v ^= yBits & 1;
3613 }
3614
3615 yBits >>= 1;
3616 mask >>= 1;
3617 }
3618 }
3619
3620 if (pSwizzlePattern[i].z != 0)
3621 {
3622 UINT_16 mask = pSwizzlePattern[i].z;
3623 UINT_32 zBits = z;
3624
3625 while (mask != 0)
3626 {
3627 if (mask & 1)
3628 {
3629 v ^= zBits & 1;
3630 }
3631
3632 zBits >>= 1;
3633 mask >>= 1;
3634 }
3635 }
3636
3637 if (pSwizzlePattern[i].s != 0)
3638 {
3639 UINT_16 mask = pSwizzlePattern[i].s;
3640 UINT_32 sBits = s;
3641
3642 while (mask != 0)
3643 {
3644 if (mask & 1)
3645 {
3646 v ^= sBits & 1;
3647 }
3648
3649 sBits >>= 1;
3650 mask >>= 1;
3651 }
3652 }
3653
3654 offset |= (v << i);
3655 }
3656
3657 return offset;
3658 }
3659
3660 /**
3661 ************************************************************************************************************************
3662 * Gfx10Lib::GetSwizzlePatternInfo
3663 *
3664 * @brief
3665 * Get swizzle pattern
3666 *
3667 * @return
3668 * Swizzle pattern information
3669 ************************************************************************************************************************
3670 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const3671 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
3672 AddrSwizzleMode swizzleMode, ///< Swizzle mode
3673 AddrResourceType resourceType, ///< Resource type
3674 UINT_32 elemLog2, ///< Element size in bytes log2
3675 UINT_32 numFrag ///< Number of fragment
3676 ) const
3677 {
3678 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
3679 const ADDR_SW_PATINFO* patInfo = NULL;
3680 const UINT_32 swizzleMask = 1 << swizzleMode;
3681
3682 if (IsBlockVariable(swizzleMode))
3683 {
3684 if (m_blockVarSizeLog2 != 0)
3685 {
3686 ADDR_ASSERT(m_settings.supportRbPlus);
3687
3688 if (IsRtOptSwizzle(swizzleMode))
3689 {
3690 if (numFrag == 1)
3691 {
3692 patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
3693 }
3694 else if (numFrag == 2)
3695 {
3696 patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
3697 }
3698 else if (numFrag == 4)
3699 {
3700 patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
3701 }
3702 else
3703 {
3704 ADDR_ASSERT(numFrag == 8);
3705 patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
3706 }
3707 }
3708 else if (IsZOrderSwizzle(swizzleMode))
3709 {
3710 if (numFrag == 1)
3711 {
3712 patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
3713 }
3714 else if (numFrag == 2)
3715 {
3716 patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
3717 }
3718 else if (numFrag == 4)
3719 {
3720 patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
3721 }
3722 else
3723 {
3724 ADDR_ASSERT(numFrag == 8);
3725 patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
3726 }
3727 }
3728 }
3729 }
3730 else if (IsLinear(swizzleMode) == FALSE)
3731 {
3732 if (resourceType == ADDR_RSRC_TEX_3D)
3733 {
3734 ADDR_ASSERT(numFrag == 1);
3735
3736 if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
3737 {
3738 if (IsRtOptSwizzle(swizzleMode))
3739 {
3740 patInfo = m_settings.supportRbPlus ?
3741 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
3742 }
3743 else if (IsZOrderSwizzle(swizzleMode))
3744 {
3745 patInfo = m_settings.supportRbPlus ?
3746 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
3747 }
3748 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3749 {
3750 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
3751 patInfo = m_settings.supportRbPlus ?
3752 GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
3753 }
3754 else
3755 {
3756 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
3757
3758 if (IsBlock4kb(swizzleMode))
3759 {
3760 if (swizzleMode == ADDR_SW_4KB_S)
3761 {
3762 patInfo = m_settings.supportRbPlus ?
3763 GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
3764 }
3765 else
3766 {
3767 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3768 patInfo = m_settings.supportRbPlus ?
3769 GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
3770 }
3771 }
3772 else
3773 {
3774 if (swizzleMode == ADDR_SW_64KB_S)
3775 {
3776 patInfo = m_settings.supportRbPlus ?
3777 GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
3778 }
3779 else if (swizzleMode == ADDR_SW_64KB_S_X)
3780 {
3781 patInfo = m_settings.supportRbPlus ?
3782 GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
3783 }
3784 else
3785 {
3786 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3787 patInfo = m_settings.supportRbPlus ?
3788 GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
3789 }
3790 }
3791 }
3792 }
3793 }
3794 else
3795 {
3796 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
3797 {
3798 if (IsBlock256b(swizzleMode))
3799 {
3800 if (swizzleMode == ADDR_SW_256B_S)
3801 {
3802 patInfo = m_settings.supportRbPlus ?
3803 GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
3804 }
3805 else
3806 {
3807 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
3808 patInfo = m_settings.supportRbPlus ?
3809 GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
3810 }
3811 }
3812 else if (IsBlock4kb(swizzleMode))
3813 {
3814 if (IsStandardSwizzle(resourceType, swizzleMode))
3815 {
3816 if (swizzleMode == ADDR_SW_4KB_S)
3817 {
3818 patInfo = m_settings.supportRbPlus ?
3819 GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
3820 }
3821 else
3822 {
3823 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
3824 patInfo = m_settings.supportRbPlus ?
3825 GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
3826 }
3827 }
3828 else
3829 {
3830 if (swizzleMode == ADDR_SW_4KB_D)
3831 {
3832 patInfo = m_settings.supportRbPlus ?
3833 GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
3834 }
3835 else
3836 {
3837 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
3838 patInfo = m_settings.supportRbPlus ?
3839 GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
3840 }
3841 }
3842 }
3843 else
3844 {
3845 if (IsRtOptSwizzle(swizzleMode))
3846 {
3847 if (numFrag == 1)
3848 {
3849 patInfo = m_settings.supportRbPlus ?
3850 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
3851 }
3852 else if (numFrag == 2)
3853 {
3854 patInfo = m_settings.supportRbPlus ?
3855 GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
3856 }
3857 else if (numFrag == 4)
3858 {
3859 patInfo = m_settings.supportRbPlus ?
3860 GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
3861 }
3862 else
3863 {
3864 ADDR_ASSERT(numFrag == 8);
3865 patInfo = m_settings.supportRbPlus ?
3866 GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
3867 }
3868 }
3869 else if (IsZOrderSwizzle(swizzleMode))
3870 {
3871 if (numFrag == 1)
3872 {
3873 patInfo = m_settings.supportRbPlus ?
3874 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
3875 }
3876 else if (numFrag == 2)
3877 {
3878 patInfo = m_settings.supportRbPlus ?
3879 GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
3880 }
3881 else if (numFrag == 4)
3882 {
3883 patInfo = m_settings.supportRbPlus ?
3884 GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
3885 }
3886 else
3887 {
3888 ADDR_ASSERT(numFrag == 8);
3889 patInfo = m_settings.supportRbPlus ?
3890 GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
3891 }
3892 }
3893 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3894 {
3895 if (swizzleMode == ADDR_SW_64KB_D)
3896 {
3897 patInfo = m_settings.supportRbPlus ?
3898 GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
3899 }
3900 else if (swizzleMode == ADDR_SW_64KB_D_X)
3901 {
3902 patInfo = m_settings.supportRbPlus ?
3903 GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
3904 }
3905 else
3906 {
3907 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
3908 patInfo = m_settings.supportRbPlus ?
3909 GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
3910 }
3911 }
3912 else
3913 {
3914 if (swizzleMode == ADDR_SW_64KB_S)
3915 {
3916 patInfo = m_settings.supportRbPlus ?
3917 GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
3918 }
3919 else if (swizzleMode == ADDR_SW_64KB_S_X)
3920 {
3921 patInfo = m_settings.supportRbPlus ?
3922 GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
3923 }
3924 else
3925 {
3926 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
3927 patInfo = m_settings.supportRbPlus ?
3928 GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
3929 }
3930 }
3931 }
3932 }
3933 }
3934 }
3935
3936 return (patInfo != NULL) ? &patInfo[index] : NULL;
3937 }
3938
3939
3940 /**
3941 ************************************************************************************************************************
3942 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
3943 *
3944 * @brief
3945 * Internal function to calculate address from coord for micro tiled swizzle surface
3946 *
3947 * @return
3948 * ADDR_E_RETURNCODE
3949 ************************************************************************************************************************
3950 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3951 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
3952 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3953 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3954 ) const
3955 {
3956 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3957 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
3958 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
3959
3960 localIn.swizzleMode = pIn->swizzleMode;
3961 localIn.flags = pIn->flags;
3962 localIn.resourceType = pIn->resourceType;
3963 localIn.bpp = pIn->bpp;
3964 localIn.width = Max(pIn->unalignedWidth, 1u);
3965 localIn.height = Max(pIn->unalignedHeight, 1u);
3966 localIn.numSlices = Max(pIn->numSlices, 1u);
3967 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
3968 localIn.numSamples = Max(pIn->numSamples, 1u);
3969 localIn.numFrags = Max(pIn->numFrags, 1u);
3970 localOut.pMipInfo = mipInfo;
3971
3972 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
3973
3974 if (ret == ADDR_OK)
3975 {
3976 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3977 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3978 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3979 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3980
3981 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3982 {
3983 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
3984 const UINT_32 yb = pIn->y / localOut.blockHeight;
3985 const UINT_32 xb = pIn->x / localOut.blockWidth;
3986 const UINT_32 blockIndex = yb * pb + xb;
3987 const UINT_32 blockSize = 256;
3988 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
3989 pIn->x << elemLog2,
3990 pIn->y,
3991 0);
3992 pOut->addr = localOut.sliceSize * pIn->slice +
3993 mipInfo[pIn->mipId].macroBlockOffset +
3994 (blockIndex * blockSize) +
3995 blk256Offset;
3996 }
3997 else
3998 {
3999 ret = ADDR_INVALIDPARAMS;
4000 }
4001 }
4002
4003 return ret;
4004 }
4005
4006 /**
4007 ************************************************************************************************************************
4008 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
4009 *
4010 * @brief
4011 * Internal function to calculate address from coord for macro tiled swizzle surface
4012 *
4013 * @return
4014 * ADDR_E_RETURNCODE
4015 ************************************************************************************************************************
4016 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4017 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4018 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4019 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4020 ) const
4021 {
4022 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4023 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4024 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4025
4026 localIn.swizzleMode = pIn->swizzleMode;
4027 localIn.flags = pIn->flags;
4028 localIn.resourceType = pIn->resourceType;
4029 localIn.bpp = pIn->bpp;
4030 localIn.width = Max(pIn->unalignedWidth, 1u);
4031 localIn.height = Max(pIn->unalignedHeight, 1u);
4032 localIn.numSlices = Max(pIn->numSlices, 1u);
4033 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4034 localIn.numSamples = Max(pIn->numSamples, 1u);
4035 localIn.numFrags = Max(pIn->numFrags, 1u);
4036 localOut.pMipInfo = mipInfo;
4037
4038 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4039
4040 if (ret == ADDR_OK)
4041 {
4042 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4043 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4044 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
4045 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
4046 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4047 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4048 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4049
4050 if (localIn.numFrags > 1)
4051 {
4052 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4053 pIn->resourceType,
4054 elemLog2,
4055 localIn.numFrags);
4056
4057 if (pPatInfo != NULL)
4058 {
4059 const UINT_32 pb = localOut.pitch / localOut.blockWidth;
4060 const UINT_32 yb = pIn->y / localOut.blockHeight;
4061 const UINT_32 xb = pIn->x / localOut.blockWidth;
4062 const UINT_64 blkIdx = yb * pb + xb;
4063
4064 ADDR_BIT_SETTING fullSwizzlePattern[20];
4065 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4066
4067 const UINT_32 blkOffset =
4068 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4069 blkSizeLog2,
4070 pIn->x,
4071 pIn->y,
4072 pIn->slice,
4073 pIn->sample);
4074
4075 pOut->addr = (localOut.sliceSize * pIn->slice) +
4076 (blkIdx << blkSizeLog2) +
4077 (blkOffset ^ pipeBankXor);
4078 }
4079 else
4080 {
4081 ret = ADDR_INVALIDPARAMS;
4082 }
4083 }
4084 else
4085 {
4086 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4087 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4088 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4089
4090 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4091 {
4092 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4093 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
4094 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4095 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4096 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4097 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4098 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4099 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4100 const UINT_32 yb = pIn->y / localOut.blockHeight;
4101 const UINT_32 xb = pIn->x / localOut.blockWidth;
4102 const UINT_64 blkIdx = yb * pb + xb;
4103 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4104 x << elemLog2,
4105 y,
4106 z);
4107 pOut->addr = sliceSize * sliceId +
4108 mipInfo[pIn->mipId].macroBlockOffset +
4109 (blkIdx << blkSizeLog2) +
4110 (blkOffset ^ pipeBankXor);
4111 }
4112 else
4113 {
4114 ret = ADDR_INVALIDPARAMS;
4115 }
4116 }
4117 }
4118
4119 return ret;
4120 }
4121
4122 /**
4123 ************************************************************************************************************************
4124 * Gfx10Lib::HwlComputeMaxBaseAlignments
4125 *
4126 * @brief
4127 * Gets maximum alignments
4128 * @return
4129 * maximum alignments
4130 ************************************************************************************************************************
4131 */
HwlComputeMaxBaseAlignments() const4132 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4133 {
4134 return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4135 }
4136
4137 /**
4138 ************************************************************************************************************************
4139 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4140 *
4141 * @brief
4142 * Gets maximum alignments for metadata
4143 * @return
4144 * maximum alignments for metadata
4145 ************************************************************************************************************************
4146 */
HwlComputeMaxMetaBaseAlignments() const4147 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4148 {
4149 Dim3d metaBlk;
4150
4151 const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4152 {
4153 ADDR_SW_64KB_Z_X,
4154 m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4155 };
4156
4157 UINT_32 maxBaseAlignHtile = 0;
4158 UINT_32 maxBaseAlignCmask = 0;
4159
4160 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4161 {
4162 for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4163 {
4164 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4165 {
4166 // Max base alignment for Htile
4167 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4168 ADDR_RSRC_TEX_2D,
4169 ValidSwizzleModeForXmask[swIdx],
4170 bppLog2,
4171 numFragLog2,
4172 TRUE,
4173 &metaBlk);
4174
4175 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4176 }
4177 }
4178
4179 // Max base alignment for Cmask
4180 const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4181 ADDR_RSRC_TEX_2D,
4182 ValidSwizzleModeForXmask[swIdx],
4183 0,
4184 0,
4185 TRUE,
4186 &metaBlk);
4187
4188 maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4189 }
4190
4191 // Max base alignment for 2D Dcc
4192 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4193 {
4194 ADDR_SW_64KB_S_X,
4195 ADDR_SW_64KB_D_X,
4196 ADDR_SW_64KB_R_X,
4197 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4198 };
4199
4200 UINT_32 maxBaseAlignDcc2D = 0;
4201
4202 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4203 {
4204 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4205 {
4206 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4207 {
4208 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4209 ADDR_RSRC_TEX_2D,
4210 ValidSwizzleModeForDcc2D[swIdx],
4211 bppLog2,
4212 numFragLog2,
4213 TRUE,
4214 &metaBlk);
4215
4216 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4217 }
4218 }
4219 }
4220
4221 // Max base alignment for 3D Dcc
4222 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4223 {
4224 ADDR_SW_64KB_Z_X,
4225 ADDR_SW_64KB_S_X,
4226 ADDR_SW_64KB_D_X,
4227 ADDR_SW_64KB_R_X,
4228 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4229 };
4230
4231 UINT_32 maxBaseAlignDcc3D = 0;
4232
4233 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4234 {
4235 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4236 {
4237 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4238 ADDR_RSRC_TEX_3D,
4239 ValidSwizzleModeForDcc3D[swIdx],
4240 bppLog2,
4241 0,
4242 TRUE,
4243 &metaBlk);
4244
4245 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4246 }
4247 }
4248
4249 return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4250 }
4251
4252 /**
4253 ************************************************************************************************************************
4254 * Gfx10Lib::GetMetaElementSizeLog2
4255 *
4256 * @brief
4257 * Gets meta data element size log2
4258 * @return
4259 * Meta data element size log2
4260 ************************************************************************************************************************
4261 */
GetMetaElementSizeLog2(Gfx10DataType dataType)4262 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4263 Gfx10DataType dataType) ///< Data surface type
4264 {
4265 INT_32 elemSizeLog2 = 0;
4266
4267 if (dataType == Gfx10DataColor)
4268 {
4269 elemSizeLog2 = 0;
4270 }
4271 else if (dataType == Gfx10DataDepthStencil)
4272 {
4273 elemSizeLog2 = 2;
4274 }
4275 else
4276 {
4277 ADDR_ASSERT(dataType == Gfx10DataFmask);
4278 elemSizeLog2 = -1;
4279 }
4280
4281 return elemSizeLog2;
4282 }
4283
4284 /**
4285 ************************************************************************************************************************
4286 * Gfx10Lib::GetMetaCacheSizeLog2
4287 *
4288 * @brief
4289 * Gets meta data cache line size log2
4290 * @return
4291 * Meta data cache line size log2
4292 ************************************************************************************************************************
4293 */
GetMetaCacheSizeLog2(Gfx10DataType dataType)4294 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4295 Gfx10DataType dataType) ///< Data surface type
4296 {
4297 INT_32 cacheSizeLog2 = 0;
4298
4299 if (dataType == Gfx10DataColor)
4300 {
4301 cacheSizeLog2 = 6;
4302 }
4303 else if (dataType == Gfx10DataDepthStencil)
4304 {
4305 cacheSizeLog2 = 8;
4306 }
4307 else
4308 {
4309 ADDR_ASSERT(dataType == Gfx10DataFmask);
4310 cacheSizeLog2 = 8;
4311 }
4312 return cacheSizeLog2;
4313 }
4314
4315 /**
4316 ************************************************************************************************************************
4317 * Gfx10Lib::HwlComputeSurfaceInfoLinear
4318 *
4319 * @brief
4320 * Internal function to calculate alignment for linear surface
4321 *
4322 * @return
4323 * ADDR_E_RETURNCODE
4324 ************************************************************************************************************************
4325 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4326 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4327 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4328 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4329 ) const
4330 {
4331 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4332
4333 if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4334 {
4335 returnCode = ADDR_INVALIDPARAMS;
4336 }
4337 else
4338 {
4339 const UINT_32 elementBytes = pIn->bpp >> 3;
4340 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4341 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4342 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
4343 UINT_32 actualHeight = pIn->height;
4344 UINT_64 sliceSize = 0;
4345
4346 if (pIn->numMipLevels > 1)
4347 {
4348 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4349 {
4350 UINT_32 mipWidth, mipHeight;
4351
4352 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4353
4354 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4355
4356 if (pOut->pMipInfo != NULL)
4357 {
4358 pOut->pMipInfo[i].pitch = mipActualWidth;
4359 pOut->pMipInfo[i].height = mipHeight;
4360 pOut->pMipInfo[i].depth = mipDepth;
4361 pOut->pMipInfo[i].offset = sliceSize;
4362 pOut->pMipInfo[i].mipTailOffset = 0;
4363 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4364 }
4365
4366 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4367 }
4368 }
4369 else
4370 {
4371 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4372
4373 if (returnCode == ADDR_OK)
4374 {
4375 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4376
4377 if (pOut->pMipInfo != NULL)
4378 {
4379 pOut->pMipInfo[0].pitch = pitch;
4380 pOut->pMipInfo[0].height = actualHeight;
4381 pOut->pMipInfo[0].depth = mipDepth;
4382 pOut->pMipInfo[0].offset = 0;
4383 pOut->pMipInfo[0].mipTailOffset = 0;
4384 pOut->pMipInfo[0].macroBlockOffset = 0;
4385 }
4386 }
4387 }
4388
4389 if (returnCode == ADDR_OK)
4390 {
4391 pOut->pitch = pitch;
4392 pOut->height = actualHeight;
4393 pOut->numSlices = pIn->numSlices;
4394 pOut->sliceSize = sliceSize;
4395 pOut->surfSize = sliceSize * pOut->numSlices;
4396 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4397 pOut->blockWidth = pitchAlign;
4398 pOut->blockHeight = 1;
4399 pOut->blockSlices = 1;
4400
4401 // Following members are useless on GFX10
4402 pOut->mipChainPitch = 0;
4403 pOut->mipChainHeight = 0;
4404 pOut->mipChainSlice = 0;
4405 pOut->epitchIsHeight = FALSE;
4406
4407 // Post calculation validate
4408 ADDR_ASSERT(pOut->sliceSize > 0);
4409 }
4410 }
4411
4412 return returnCode;
4413 }
4414
4415 } // V2
4416 } // Addr
4417