1 /*
2 ************************************************************************************************************************
3 *
4 * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE
23 *
24 ***********************************************************************************************************************/
25
26 /**
27 ************************************************************************************************************************
28 * @file gfx9addrlib.cpp
29 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
30 ************************************************************************************************************************
31 */
32
33 #include "gfx9addrlib.h"
34
35 #include "gfx9_gb_reg.h"
36
37 #include "amdgpu_asic_addr.h"
38
39 ////////////////////////////////////////////////////////////////////////////////////////////////////
40 ////////////////////////////////////////////////////////////////////////////////////////////////////
41
42 namespace Addr
43 {
44
45 /**
46 ************************************************************************************************************************
47 * Gfx9HwlInit
48 *
49 * @brief
50 * Creates an Gfx9Lib object.
51 *
52 * @return
53 * Returns an Gfx9Lib object pointer.
54 ************************************************************************************************************************
55 */
Gfx9HwlInit(const Client * pClient)56 Addr::Lib* Gfx9HwlInit(const Client* pClient)
57 {
58 return V2::Gfx9Lib::CreateObj(pClient);
59 }
60
61 namespace V2
62 {
63
64 ////////////////////////////////////////////////////////////////////////////////////////////////////
65 // Static Const Member
66 ////////////////////////////////////////////////////////////////////////////////////////////////////
67
68 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
69 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
70 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR
71 {{0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_S
72 {{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_D
73 {{0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}}, // ADDR_SW_256B_R
74
75 {{0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_Z
76 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_S
77 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_D
78 {{0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}}, // ADDR_SW_4KB_R
79
80 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_Z
81 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_S
82 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_D
83 {{0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}}, // ADDR_SW_64KB_R
84
85 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
86 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
87 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
88 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
89
90 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_Z_T
91 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_S_T
92 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_D_T
93 {{0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0}}, // ADDR_SW_64KB_R_T
94
95 {{0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_Z_x
96 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_S_x
97 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_D_x
98 {{0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0}}, // ADDR_SW_4KB_R_x
99
100 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_Z_X
101 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_S_X
102 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_D_X
103 {{0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}}, // ADDR_SW_64KB_R_X
104
105 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
106 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
107 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
108 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
109 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR_GENERAL
110 };
111
112 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
113
114 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
115
116 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
117
118 /**
119 ************************************************************************************************************************
120 * Gfx9Lib::Gfx9Lib
121 *
122 * @brief
123 * Constructor
124 *
125 ************************************************************************************************************************
126 */
Gfx9Lib(const Client * pClient)127 Gfx9Lib::Gfx9Lib(const Client* pClient)
128 :
129 Lib(pClient)
130 {
131 memset(&m_settings, 0, sizeof(m_settings));
132 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
133 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
134 m_metaEqOverrideIndex = 0;
135 }
136
137 /**
138 ************************************************************************************************************************
139 * Gfx9Lib::~Gfx9Lib
140 *
141 * @brief
142 * Destructor
143 ************************************************************************************************************************
144 */
~Gfx9Lib()145 Gfx9Lib::~Gfx9Lib()
146 {
147 }
148
149 /**
150 ************************************************************************************************************************
151 * Gfx9Lib::HwlComputeHtileInfo
152 *
153 * @brief
154 * Interface function stub of AddrComputeHtilenfo
155 *
156 * @return
157 * ADDR_E_RETURNCODE
158 ************************************************************************************************************************
159 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const160 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
161 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
162 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
163 ) const
164 {
165 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
166 pIn->swizzleMode);
167
168 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
169
170 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
171
172 if ((numPipeTotal == 1) && (numRbTotal == 1))
173 {
174 numCompressBlkPerMetaBlkLog2 = 10;
175 }
176 else
177 {
178 if (m_settings.applyAliasFix)
179 {
180 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
181 }
182 else
183 {
184 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
185 }
186 }
187
188 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
189
190 Dim3d metaBlkDim = {8, 8, 1};
191 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
192 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
193 UINT_32 heightAmp = totalAmpBits - widthAmp;
194 metaBlkDim.w <<= widthAmp;
195 metaBlkDim.h <<= heightAmp;
196
197 #if DEBUG
198 Dim3d metaBlkDimDbg = {8, 8, 1};
199 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
200 {
201 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
202 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
203 {
204 metaBlkDimDbg.h <<= 1;
205 }
206 else
207 {
208 metaBlkDimDbg.w <<= 1;
209 }
210 }
211 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
212 #endif
213
214 UINT_32 numMetaBlkX;
215 UINT_32 numMetaBlkY;
216 UINT_32 numMetaBlkZ;
217
218 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
219 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
220 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
221
222 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
223 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
224
225 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
226 {
227 align *= (numPipeTotal >> 1);
228 }
229
230 align = Max(align, metaBlkSize);
231
232 if (m_settings.metaBaseAlignFix)
233 {
234 align = Max(align, GetBlockSize(pIn->swizzleMode));
235 }
236
237 if (m_settings.htileAlignFix)
238 {
239 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
240 const INT_32 htileCachelineSizeLog2 = 11;
241 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
242
243 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
244
245 align <<= rbMaskPadding;
246 }
247
248 pOut->pitch = numMetaBlkX * metaBlkDim.w;
249 pOut->height = numMetaBlkY * metaBlkDim.h;
250 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
251
252 pOut->metaBlkWidth = metaBlkDim.w;
253 pOut->metaBlkHeight = metaBlkDim.h;
254 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
255
256 pOut->baseAlign = align;
257 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
258
259 return ADDR_OK;
260 }
261
262 /**
263 ************************************************************************************************************************
264 * Gfx9Lib::HwlComputeCmaskInfo
265 *
266 * @brief
267 * Interface function stub of AddrComputeCmaskInfo
268 *
269 * @return
270 * ADDR_E_RETURNCODE
271 ************************************************************************************************************************
272 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const273 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
274 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
275 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
276 ) const
277 {
278 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
279
280 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
281 pIn->swizzleMode);
282
283 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
284
285 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
286
287 if ((numPipeTotal == 1) && (numRbTotal == 1))
288 {
289 numCompressBlkPerMetaBlkLog2 = 13;
290 }
291 else
292 {
293 if (m_settings.applyAliasFix)
294 {
295 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
296 }
297 else
298 {
299 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
300 }
301
302 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
303 }
304
305 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
306
307 Dim2d metaBlkDim = {8, 8};
308 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
309 UINT_32 heightAmp = totalAmpBits >> 1;
310 UINT_32 widthAmp = totalAmpBits - heightAmp;
311 metaBlkDim.w <<= widthAmp;
312 metaBlkDim.h <<= heightAmp;
313
314 #if DEBUG
315 Dim2d metaBlkDimDbg = {8, 8};
316 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
317 {
318 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
319 {
320 metaBlkDimDbg.h <<= 1;
321 }
322 else
323 {
324 metaBlkDimDbg.w <<= 1;
325 }
326 }
327 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
328 #endif
329
330 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
331 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
332 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
333
334 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
335
336 if (m_settings.metaBaseAlignFix)
337 {
338 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
339 }
340
341 pOut->pitch = numMetaBlkX * metaBlkDim.w;
342 pOut->height = numMetaBlkY * metaBlkDim.h;
343 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
344 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
345 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
346
347 pOut->metaBlkWidth = metaBlkDim.w;
348 pOut->metaBlkHeight = metaBlkDim.h;
349
350 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
351
352 // Get the CMASK address equation (copied from CmaskAddrFromCoord)
353 UINT_32 fmaskBpp = GetFmaskBpp(1, 1);
354 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
355 UINT_32 metaBlkWidthLog2 = Log2(pOut->metaBlkWidth);
356 UINT_32 metaBlkHeightLog2 = Log2(pOut->metaBlkHeight);
357
358 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
359 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
360 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
361
362 CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
363
364 // Generate the CMASK address equation.
365 pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
366 bool checked = false;
367 for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
368 CoordTerm &bit = (*eq)[b];
369
370 unsigned c;
371 for (c = 0; c < bit.getsize(); c++) {
372 Coordinate &coord = bit[c];
373 pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
374 pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
375 }
376 for (; c < 5; c++)
377 pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
378 }
379
380 // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
381 for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
382 CoordTerm &prev = (*eq)[b - 1];
383 CoordTerm &cur = (*eq)[b];
384
385 if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
386 prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
387 prev[0].getord() + 1 == cur[0].getord())
388 pOut->equation.gfx9.num_bits = b;
389 else
390 break;
391 }
392
393 pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
394 pIn->swizzleMode);
395
396 return ADDR_OK;
397 }
398
399 /**
400 ************************************************************************************************************************
401 * Gfx9Lib::GetMetaMipInfo
402 *
403 * @brief
404 * Get meta mip info
405 *
406 * @return
407 * N/A
408 ************************************************************************************************************************
409 */
GetMetaMipInfo(UINT_32 numMipLevels,Dim3d * pMetaBlkDim,BOOL_32 dataThick,ADDR2_META_MIP_INFO * pInfo,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 * pNumMetaBlkX,UINT_32 * pNumMetaBlkY,UINT_32 * pNumMetaBlkZ) const410 VOID Gfx9Lib::GetMetaMipInfo(
411 UINT_32 numMipLevels, ///< [in] number of mip levels
412 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
413 BOOL_32 dataThick, ///< [in] data surface is thick
414 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
415 UINT_32 mip0Width, ///< [in] mip0 width
416 UINT_32 mip0Height, ///< [in] mip0 height
417 UINT_32 mip0Depth, ///< [in] mip0 depth
418 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
419 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
420 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
421 const
422 {
423 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
424 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
425 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
426 UINT_32 tailWidth = pMetaBlkDim->w;
427 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
428 UINT_32 tailDepth = pMetaBlkDim->d;
429 BOOL_32 inTail = FALSE;
430 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
431
432 if (numMipLevels > 1)
433 {
434 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
435 {
436 // Z major
437 major = ADDR_MAJOR_Z;
438 }
439 else if (numMetaBlkX >= numMetaBlkY)
440 {
441 // X major
442 major = ADDR_MAJOR_X;
443 }
444 else
445 {
446 // Y major
447 major = ADDR_MAJOR_Y;
448 }
449
450 inTail = ((mip0Width <= tailWidth) &&
451 (mip0Height <= tailHeight) &&
452 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
453
454 if (inTail == FALSE)
455 {
456 UINT_32 orderLimit;
457 UINT_32 *pMipDim;
458 UINT_32 *pOrderDim;
459
460 if (major == ADDR_MAJOR_Z)
461 {
462 // Z major
463 pMipDim = &numMetaBlkY;
464 pOrderDim = &numMetaBlkZ;
465 orderLimit = 4;
466 }
467 else if (major == ADDR_MAJOR_X)
468 {
469 // X major
470 pMipDim = &numMetaBlkY;
471 pOrderDim = &numMetaBlkX;
472 orderLimit = 4;
473 }
474 else
475 {
476 // Y major
477 pMipDim = &numMetaBlkX;
478 pOrderDim = &numMetaBlkY;
479 orderLimit = 2;
480 }
481
482 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
483 {
484 *pMipDim += 2;
485 }
486 else
487 {
488 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
489 }
490 }
491 }
492
493 if (pInfo != NULL)
494 {
495 UINT_32 mipWidth = mip0Width;
496 UINT_32 mipHeight = mip0Height;
497 UINT_32 mipDepth = mip0Depth;
498 Dim3d mipCoord = {0};
499
500 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
501 {
502 if (inTail)
503 {
504 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
505 pMetaBlkDim);
506 break;
507 }
508 else
509 {
510 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
511 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
512 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
513
514 pInfo[mip].inMiptail = FALSE;
515 pInfo[mip].startX = mipCoord.w;
516 pInfo[mip].startY = mipCoord.h;
517 pInfo[mip].startZ = mipCoord.d;
518 pInfo[mip].width = mipWidth;
519 pInfo[mip].height = mipHeight;
520 pInfo[mip].depth = dataThick ? mipDepth : 1;
521
522 if ((mip >= 3) || (mip & 1))
523 {
524 switch (major)
525 {
526 case ADDR_MAJOR_X:
527 mipCoord.w += mipWidth;
528 break;
529 case ADDR_MAJOR_Y:
530 mipCoord.h += mipHeight;
531 break;
532 case ADDR_MAJOR_Z:
533 mipCoord.d += mipDepth;
534 break;
535 default:
536 break;
537 }
538 }
539 else
540 {
541 switch (major)
542 {
543 case ADDR_MAJOR_X:
544 mipCoord.h += mipHeight;
545 break;
546 case ADDR_MAJOR_Y:
547 mipCoord.w += mipWidth;
548 break;
549 case ADDR_MAJOR_Z:
550 mipCoord.h += mipHeight;
551 break;
552 default:
553 break;
554 }
555 }
556
557 mipWidth = Max(mipWidth >> 1, 1u);
558 mipHeight = Max(mipHeight >> 1, 1u);
559 mipDepth = Max(mipDepth >> 1, 1u);
560
561 inTail = ((mipWidth <= tailWidth) &&
562 (mipHeight <= tailHeight) &&
563 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
564 }
565 }
566 }
567
568 *pNumMetaBlkX = numMetaBlkX;
569 *pNumMetaBlkY = numMetaBlkY;
570 *pNumMetaBlkZ = numMetaBlkZ;
571 }
572
573 /**
574 ************************************************************************************************************************
575 * Gfx9Lib::HwlComputeDccInfo
576 *
577 * @brief
578 * Interface function to compute DCC key info
579 *
580 * @return
581 * ADDR_E_RETURNCODE
582 ************************************************************************************************************************
583 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const584 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
585 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
586 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
587 ) const
588 {
589 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
590 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
591 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
592
593 if (dataLinear)
594 {
595 metaLinear = TRUE;
596 }
597 else if (metaLinear == TRUE)
598 {
599 pipeAligned = FALSE;
600 }
601
602 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
603
604 if (metaLinear)
605 {
606 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
607 ADDR_ASSERT_ALWAYS();
608
609 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
610 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
611 }
612 else
613 {
614 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
615
616 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
617
618 UINT_32 numFrags = Max(pIn->numFrags, 1u);
619 UINT_32 numSlices = Max(pIn->numSlices, 1u);
620
621 minMetaBlkSize /= numFrags;
622
623 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
624
625 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
626
627 if ((numPipeTotal > 1) || (numRbTotal > 1))
628 {
629 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
630
631 numCompressBlkPerMetaBlk =
632 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
633
634 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
635 {
636 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
637 }
638 }
639
640 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
641 Dim3d metaBlkDim = compressBlkDim;
642
643 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
644 {
645 if ((metaBlkDim.h < metaBlkDim.w) ||
646 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
647 {
648 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
649 {
650 metaBlkDim.h <<= 1;
651 }
652 else
653 {
654 metaBlkDim.d <<= 1;
655 }
656 }
657 else
658 {
659 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
660 {
661 metaBlkDim.w <<= 1;
662 }
663 else
664 {
665 metaBlkDim.d <<= 1;
666 }
667 }
668 }
669
670 UINT_32 numMetaBlkX;
671 UINT_32 numMetaBlkY;
672 UINT_32 numMetaBlkZ;
673
674 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
675 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
676 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
677
678 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
679
680 if (numFrags > m_maxCompFrag)
681 {
682 sizeAlign *= (numFrags / m_maxCompFrag);
683 }
684
685 if (m_settings.metaBaseAlignFix)
686 {
687 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
688 }
689
690 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
691 numCompressBlkPerMetaBlk * numFrags;
692 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
693 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
694
695 pOut->pitch = numMetaBlkX * metaBlkDim.w;
696 pOut->height = numMetaBlkY * metaBlkDim.h;
697 pOut->depth = numMetaBlkZ * metaBlkDim.d;
698
699 pOut->compressBlkWidth = compressBlkDim.w;
700 pOut->compressBlkHeight = compressBlkDim.h;
701 pOut->compressBlkDepth = compressBlkDim.d;
702
703 pOut->metaBlkWidth = metaBlkDim.w;
704 pOut->metaBlkHeight = metaBlkDim.h;
705 pOut->metaBlkDepth = metaBlkDim.d;
706 pOut->metaBlkSize = numCompressBlkPerMetaBlk * numFrags;
707
708 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
709 pOut->fastClearSizePerSlice =
710 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
711
712 // Get the DCC address equation (copied from DccAddrFromCoord)
713 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
714 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
715 UINT_32 metaBlkWidthLog2 = Log2(pOut->metaBlkWidth);
716 UINT_32 metaBlkHeightLog2 = Log2(pOut->metaBlkHeight);
717 UINT_32 metaBlkDepthLog2 = Log2(pOut->metaBlkDepth);
718 UINT_32 compBlkWidthLog2 = Log2(pOut->compressBlkWidth);
719 UINT_32 compBlkHeightLog2 = Log2(pOut->compressBlkHeight);
720 UINT_32 compBlkDepthLog2 = Log2(pOut->compressBlkDepth);
721
722 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
723 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
724 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
725 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
726
727 CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
728
729 // Generate the DCC address equation.
730 pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
731 bool checked = false;
732 for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
733 CoordTerm &bit = (*eq)[b];
734
735 unsigned c;
736 for (c = 0; c < bit.getsize(); c++) {
737 Coordinate &coord = bit[c];
738 pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
739 pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
740 }
741 for (; c < 5; c++)
742 pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
743 }
744
745 // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
746 for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
747 CoordTerm &prev = (*eq)[b - 1];
748 CoordTerm &cur = (*eq)[b];
749
750 if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
751 prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
752 prev[0].getord() + 1 == cur[0].getord())
753 pOut->equation.gfx9.num_bits = b;
754 else
755 break;
756 }
757
758 pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
759 pIn->swizzleMode);
760 }
761
762 return ADDR_OK;
763 }
764
765 /**
766 ************************************************************************************************************************
767 * Gfx9Lib::HwlComputeMaxBaseAlignments
768 *
769 * @brief
770 * Gets maximum alignments
771 * @return
772 * maximum alignments
773 ************************************************************************************************************************
774 */
HwlComputeMaxBaseAlignments() const775 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
776 {
777 return Size64K;
778 }
779
780 /**
781 ************************************************************************************************************************
782 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
783 *
784 * @brief
785 * Gets maximum alignments for metadata
786 * @return
787 * maximum alignments for metadata
788 ************************************************************************************************************************
789 */
HwlComputeMaxMetaBaseAlignments() const790 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
791 {
792 // Max base alignment for Htile
793 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
794 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
795
796 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
797 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
798 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
799 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
800
801 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
802
803 if (maxNumPipeTotal > 2)
804 {
805 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
806 }
807
808 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
809
810 if (m_settings.metaBaseAlignFix)
811 {
812 maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
813 }
814
815 if (m_settings.htileAlignFix)
816 {
817 maxBaseAlignHtile *= maxNumPipeTotal;
818 }
819
820 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
821
822 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
823 UINT_32 maxBaseAlignDcc3D = 65536;
824
825 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
826 {
827 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
828 }
829
830 // Max base alignment for Msaa Dcc
831 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
832
833 if (m_settings.metaBaseAlignFix)
834 {
835 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
836 }
837
838 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
839 }
840
841 /**
842 ************************************************************************************************************************
843 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
844 *
845 * @brief
846 * Interface function stub of AddrComputeCmaskAddrFromCoord
847 *
848 * @return
849 * ADDR_E_RETURNCODE
850 ************************************************************************************************************************
851 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)852 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
853 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
854 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
855 {
856 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
857 input.size = sizeof(input);
858 input.cMaskFlags = pIn->cMaskFlags;
859 input.colorFlags = pIn->colorFlags;
860 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
861 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
862 input.numSlices = Max(pIn->numSlices, 1u);
863 input.swizzleMode = pIn->swizzleMode;
864 input.resourceType = pIn->resourceType;
865
866 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
867 output.size = sizeof(output);
868
869 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
870
871 if (returnCode == ADDR_OK)
872 {
873 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
874 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
875 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
876 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
877
878 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
879 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
880 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
881
882 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
883
884 UINT_32 xb = pIn->x / output.metaBlkWidth;
885 UINT_32 yb = pIn->y / output.metaBlkHeight;
886 UINT_32 zb = pIn->slice;
887
888 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
889 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
890 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
891
892 UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
893 UINT_64 address = pMetaEq->solve(coords);
894
895 pOut->addr = address >> 1;
896 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
897
898
899 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
900 pIn->swizzleMode);
901
902 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
903
904 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
905 }
906
907 return returnCode;
908 }
909
910 /**
911 ************************************************************************************************************************
912 * Gfx9Lib::HwlComputeHtileAddrFromCoord
913 *
914 * @brief
915 * Interface function stub of AddrComputeHtileAddrFromCoord
916 *
917 * @return
918 * ADDR_E_RETURNCODE
919 ************************************************************************************************************************
920 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)921 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
922 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
923 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
924 {
925 ADDR_E_RETURNCODE returnCode = ADDR_OK;
926
927 if (pIn->numMipLevels > 1)
928 {
929 returnCode = ADDR_NOTIMPLEMENTED;
930 }
931 else
932 {
933 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
934 input.size = sizeof(input);
935 input.hTileFlags = pIn->hTileFlags;
936 input.depthFlags = pIn->depthflags;
937 input.swizzleMode = pIn->swizzleMode;
938 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
939 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
940 input.numSlices = Max(pIn->numSlices, 1u);
941 input.numMipLevels = Max(pIn->numMipLevels, 1u);
942
943 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
944 output.size = sizeof(output);
945
946 returnCode = ComputeHtileInfo(&input, &output);
947
948 if (returnCode == ADDR_OK)
949 {
950 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
951 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
952 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
953 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
954
955 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
956 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
957 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
958
959 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
960
961 UINT_32 xb = pIn->x / output.metaBlkWidth;
962 UINT_32 yb = pIn->y / output.metaBlkHeight;
963 UINT_32 zb = pIn->slice;
964
965 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
966 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
967 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
968
969 UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
970 UINT_64 address = pMetaEq->solve(coords);
971
972 pOut->addr = address >> 1;
973
974 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
975 pIn->swizzleMode);
976
977 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
978
979 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
980 }
981 }
982
983 return returnCode;
984 }
985
986 /**
987 ************************************************************************************************************************
988 * Gfx9Lib::HwlComputeHtileCoordFromAddr
989 *
990 * @brief
991 * Interface function stub of AddrComputeHtileCoordFromAddr
992 *
993 * @return
994 * ADDR_E_RETURNCODE
995 ************************************************************************************************************************
996 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)997 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
998 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
999 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
1000 {
1001 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1002
1003 if (pIn->numMipLevels > 1)
1004 {
1005 returnCode = ADDR_NOTIMPLEMENTED;
1006 }
1007 else
1008 {
1009 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
1010 input.size = sizeof(input);
1011 input.hTileFlags = pIn->hTileFlags;
1012 input.swizzleMode = pIn->swizzleMode;
1013 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
1014 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
1015 input.numSlices = Max(pIn->numSlices, 1u);
1016 input.numMipLevels = Max(pIn->numMipLevels, 1u);
1017
1018 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
1019 output.size = sizeof(output);
1020
1021 returnCode = ComputeHtileInfo(&input, &output);
1022
1023 if (returnCode == ADDR_OK)
1024 {
1025 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1026 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
1027 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1028 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
1029
1030 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
1031 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
1032 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
1033
1034 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1035
1036 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
1037 pIn->swizzleMode);
1038
1039 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1040
1041 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
1042
1043 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
1044 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1045
1046 UINT_32 coords[NUM_DIMS];
1047 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
1048
1049 pOut->slice = coords[DIM_M] / sliceSizeInBlock;
1050 pOut->y = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
1051 pOut->x = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
1052 }
1053 }
1054
1055 return returnCode;
1056 }
1057
1058 /**
1059 ************************************************************************************************************************
1060 * Gfx9Lib::HwlSupportComputeDccAddrFromCoord
1061 *
1062 * @brief
1063 * Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
1064 *
1065 * @return
1066 * ADDR_E_RETURNCODE
1067 ************************************************************************************************************************
1068 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)1069 ADDR_E_RETURNCODE Gfx9Lib::HwlSupportComputeDccAddrFromCoord(
1070 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
1071 {
1072 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1073
1074 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
1075 {
1076 returnCode = ADDR_NOTSUPPORTED;
1077 }
1078 else if ((pIn->pitch == 0) ||
1079 (pIn->height == 0) ||
1080 (pIn->compressBlkWidth == 0) ||
1081 (pIn->compressBlkHeight == 0) ||
1082 (pIn->compressBlkDepth == 0) ||
1083 (pIn->metaBlkWidth == 0) ||
1084 (pIn->metaBlkHeight == 0) ||
1085 (pIn->metaBlkDepth == 0) ||
1086 (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
1087 {
1088 returnCode = ADDR_NOTSUPPORTED;
1089 }
1090
1091 return returnCode;
1092 }
1093
1094 /**
1095 ************************************************************************************************************************
1096 * Gfx9Lib::HwlComputeDccAddrFromCoord
1097 *
1098 * @brief
1099 * Interface function stub of AddrComputeDccAddrFromCoord
1100 *
1101 * @return
1102 * N/A
1103 ************************************************************************************************************************
1104 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)1105 VOID Gfx9Lib::HwlComputeDccAddrFromCoord(
1106 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
1107 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
1108 {
1109 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1110 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
1111 UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth);
1112 UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
1113 UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth);
1114 UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth);
1115 UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
1116 UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth);
1117
1118 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1119 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1120 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1121 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1122
1123 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1124
1125 UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1126 UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1127 UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1128
1129 UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth;
1130 UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1131 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1132
1133 UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex};
1134 UINT_64 address = pMetaEq->solve(coords);
1135
1136 pOut->addr = address >> 1;
1137
1138 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1139 pIn->swizzleMode);
1140
1141 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1142
1143 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1144 }
1145
1146 /**
1147 ************************************************************************************************************************
1148 * Gfx9Lib::HwlInitGlobalParams
1149 *
1150 * @brief
1151 * Initializes global parameters
1152 *
1153 * @return
1154 * TRUE if all settings are valid
1155 *
1156 ************************************************************************************************************************
1157 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1158 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1159 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1160 {
1161 BOOL_32 valid = TRUE;
1162
1163 if (m_settings.isArcticIsland)
1164 {
1165 GB_ADDR_CONFIG_GFX9 gbAddrConfig;
1166
1167 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1168
1169 // These values are copied from CModel code
1170 switch (gbAddrConfig.bits.NUM_PIPES)
1171 {
1172 case ADDR_CONFIG_1_PIPE:
1173 m_pipes = 1;
1174 m_pipesLog2 = 0;
1175 break;
1176 case ADDR_CONFIG_2_PIPE:
1177 m_pipes = 2;
1178 m_pipesLog2 = 1;
1179 break;
1180 case ADDR_CONFIG_4_PIPE:
1181 m_pipes = 4;
1182 m_pipesLog2 = 2;
1183 break;
1184 case ADDR_CONFIG_8_PIPE:
1185 m_pipes = 8;
1186 m_pipesLog2 = 3;
1187 break;
1188 case ADDR_CONFIG_16_PIPE:
1189 m_pipes = 16;
1190 m_pipesLog2 = 4;
1191 break;
1192 case ADDR_CONFIG_32_PIPE:
1193 m_pipes = 32;
1194 m_pipesLog2 = 5;
1195 break;
1196 default:
1197 ADDR_ASSERT_ALWAYS();
1198 break;
1199 }
1200
1201 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1202 {
1203 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1204 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1205 m_pipeInterleaveLog2 = 8;
1206 break;
1207 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1208 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1209 m_pipeInterleaveLog2 = 9;
1210 break;
1211 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1212 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1213 m_pipeInterleaveLog2 = 10;
1214 break;
1215 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1216 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1217 m_pipeInterleaveLog2 = 11;
1218 break;
1219 default:
1220 ADDR_ASSERT_ALWAYS();
1221 break;
1222 }
1223
1224 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1225 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1226 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1227
1228 switch (gbAddrConfig.bits.NUM_BANKS)
1229 {
1230 case ADDR_CONFIG_1_BANK:
1231 m_banks = 1;
1232 m_banksLog2 = 0;
1233 break;
1234 case ADDR_CONFIG_2_BANK:
1235 m_banks = 2;
1236 m_banksLog2 = 1;
1237 break;
1238 case ADDR_CONFIG_4_BANK:
1239 m_banks = 4;
1240 m_banksLog2 = 2;
1241 break;
1242 case ADDR_CONFIG_8_BANK:
1243 m_banks = 8;
1244 m_banksLog2 = 3;
1245 break;
1246 case ADDR_CONFIG_16_BANK:
1247 m_banks = 16;
1248 m_banksLog2 = 4;
1249 break;
1250 default:
1251 ADDR_ASSERT_ALWAYS();
1252 break;
1253 }
1254
1255 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1256 {
1257 case ADDR_CONFIG_1_SHADER_ENGINE:
1258 m_se = 1;
1259 m_seLog2 = 0;
1260 break;
1261 case ADDR_CONFIG_2_SHADER_ENGINE:
1262 m_se = 2;
1263 m_seLog2 = 1;
1264 break;
1265 case ADDR_CONFIG_4_SHADER_ENGINE:
1266 m_se = 4;
1267 m_seLog2 = 2;
1268 break;
1269 case ADDR_CONFIG_8_SHADER_ENGINE:
1270 m_se = 8;
1271 m_seLog2 = 3;
1272 break;
1273 default:
1274 ADDR_ASSERT_ALWAYS();
1275 break;
1276 }
1277
1278 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1279 {
1280 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1281 m_rbPerSe = 1;
1282 m_rbPerSeLog2 = 0;
1283 break;
1284 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1285 m_rbPerSe = 2;
1286 m_rbPerSeLog2 = 1;
1287 break;
1288 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1289 m_rbPerSe = 4;
1290 m_rbPerSeLog2 = 2;
1291 break;
1292 default:
1293 ADDR_ASSERT_ALWAYS();
1294 break;
1295 }
1296
1297 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1298 {
1299 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1300 m_maxCompFrag = 1;
1301 m_maxCompFragLog2 = 0;
1302 break;
1303 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1304 m_maxCompFrag = 2;
1305 m_maxCompFragLog2 = 1;
1306 break;
1307 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1308 m_maxCompFrag = 4;
1309 m_maxCompFragLog2 = 2;
1310 break;
1311 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1312 m_maxCompFrag = 8;
1313 m_maxCompFragLog2 = 3;
1314 break;
1315 default:
1316 ADDR_ASSERT_ALWAYS();
1317 break;
1318 }
1319
1320 if ((m_rbPerSeLog2 == 1) &&
1321 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1322 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1323 {
1324 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1325
1326 ADDR_ASSERT(m_settings.isRaven == FALSE);
1327
1328 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1329
1330 if (m_settings.isVega12)
1331 {
1332 m_settings.htileCacheRbConflict = 1;
1333 }
1334 }
1335
1336 // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1337 m_blockVarSizeLog2 = 0;
1338 }
1339 else
1340 {
1341 valid = FALSE;
1342 ADDR_NOT_IMPLEMENTED();
1343 }
1344
1345 if (valid)
1346 {
1347 InitEquationTable();
1348 }
1349
1350 return valid;
1351 }
1352
1353 /**
1354 ************************************************************************************************************************
1355 * Gfx9Lib::HwlConvertChipFamily
1356 *
1357 * @brief
1358 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1359 * @return
1360 * ChipFamily
1361 ************************************************************************************************************************
1362 */
HwlConvertChipFamily(UINT_32 uChipFamily,UINT_32 uChipRevision)1363 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1364 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1365 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1366 {
1367 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1368
1369 switch (uChipFamily)
1370 {
1371 case FAMILY_AI:
1372 m_settings.isArcticIsland = 1;
1373 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1374 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1375 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1376 m_settings.isDce12 = 1;
1377
1378 if (m_settings.isVega10 == 0)
1379 {
1380 m_settings.htileAlignFix = 1;
1381 m_settings.applyAliasFix = 1;
1382 }
1383
1384 m_settings.metaBaseAlignFix = 1;
1385
1386 m_settings.depthPipeXorDisable = 1;
1387 break;
1388 case FAMILY_RV:
1389 m_settings.isArcticIsland = 1;
1390
1391 if (ASICREV_IS_RAVEN(uChipRevision))
1392 {
1393 m_settings.isRaven = 1;
1394
1395 m_settings.depthPipeXorDisable = 1;
1396 }
1397
1398 if (ASICREV_IS_RAVEN2(uChipRevision))
1399 {
1400 m_settings.isRaven = 1;
1401 }
1402
1403 if (m_settings.isRaven == 0)
1404 {
1405 m_settings.htileAlignFix = 1;
1406 m_settings.applyAliasFix = 1;
1407 }
1408
1409 m_settings.isDcn1 = m_settings.isRaven;
1410
1411 if (ASICREV_IS_RENOIR(uChipRevision))
1412 {
1413 m_settings.isRaven = 1;
1414 m_settings.isDcn2 = 1;
1415 }
1416
1417 m_settings.metaBaseAlignFix = 1;
1418 break;
1419
1420 default:
1421 ADDR_ASSERT(!"No Chip found");
1422 break;
1423 }
1424
1425 return family;
1426 }
1427
1428 /**
1429 ************************************************************************************************************************
1430 * Gfx9Lib::InitRbEquation
1431 *
1432 * @brief
1433 * Init RB equation
1434 * @return
1435 * N/A
1436 ************************************************************************************************************************
1437 */
GetRbEquation(CoordEq * pRbEq,UINT_32 numRbPerSeLog2,UINT_32 numSeLog2) const1438 VOID Gfx9Lib::GetRbEquation(
1439 CoordEq* pRbEq, ///< [out] rb equation
1440 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1441 UINT_32 numSeLog2) ///< [in] number of shader engine
1442 const
1443 {
1444 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1445 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1446 Coordinate cx(DIM_X, rbRegion);
1447 Coordinate cy(DIM_Y, rbRegion);
1448
1449 UINT_32 start = 0;
1450 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1451
1452 // Clear the rb equation
1453 pRbEq->resize(0);
1454 pRbEq->resize(numRbTotalLog2);
1455
1456 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1457 {
1458 // Special case when more than 1 SE, and 2 RB per SE
1459 (*pRbEq)[0].add(cx);
1460 (*pRbEq)[0].add(cy);
1461 cx++;
1462 cy++;
1463
1464 if (m_settings.applyAliasFix == false)
1465 {
1466 (*pRbEq)[0].add(cy);
1467 }
1468
1469 (*pRbEq)[0].add(cy);
1470 start++;
1471 }
1472
1473 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1474
1475 for (UINT_32 i = 0; i < numBits; i++)
1476 {
1477 UINT_32 idx =
1478 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1479
1480 if ((i % 2) == 1)
1481 {
1482 (*pRbEq)[idx].add(cx);
1483 cx++;
1484 }
1485 else
1486 {
1487 (*pRbEq)[idx].add(cy);
1488 cy++;
1489 }
1490 }
1491 }
1492
1493 /**
1494 ************************************************************************************************************************
1495 * Gfx9Lib::GetDataEquation
1496 *
1497 * @brief
1498 * Get data equation for fmask and Z
1499 * @return
1500 * N/A
1501 ************************************************************************************************************************
1502 */
GetDataEquation(CoordEq * pDataEq,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2) const1503 VOID Gfx9Lib::GetDataEquation(
1504 CoordEq* pDataEq, ///< [out] data surface equation
1505 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1506 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1507 AddrResourceType resourceType, ///< [in] data surface resource type
1508 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1509 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1510 const
1511 {
1512 Coordinate cx(DIM_X, 0);
1513 Coordinate cy(DIM_Y, 0);
1514 Coordinate cz(DIM_Z, 0);
1515 Coordinate cs(DIM_S, 0);
1516
1517 // Clear the equation
1518 pDataEq->resize(0);
1519 pDataEq->resize(27);
1520
1521 if (dataSurfaceType == Gfx9DataColor)
1522 {
1523 if (IsLinear(swizzleMode))
1524 {
1525 Coordinate cm(DIM_M, 0);
1526
1527 pDataEq->resize(49);
1528
1529 for (UINT_32 i = 0; i < 49; i++)
1530 {
1531 (*pDataEq)[i].add(cm);
1532 cm++;
1533 }
1534 }
1535 else if (IsThick(resourceType, swizzleMode))
1536 {
1537 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1538 UINT_32 i;
1539 if (IsStandardSwizzle(resourceType, swizzleMode))
1540 {
1541 // Standard 3d swizzle
1542 // Fill in bottom x bits
1543 for (i = elementBytesLog2; i < 4; i++)
1544 {
1545 (*pDataEq)[i].add(cx);
1546 cx++;
1547 }
1548 // Fill in 2 bits of y and then z
1549 for (i = 4; i < 6; i++)
1550 {
1551 (*pDataEq)[i].add(cy);
1552 cy++;
1553 }
1554 for (i = 6; i < 8; i++)
1555 {
1556 (*pDataEq)[i].add(cz);
1557 cz++;
1558 }
1559 if (elementBytesLog2 < 2)
1560 {
1561 // fill in z & y bit
1562 (*pDataEq)[8].add(cz);
1563 (*pDataEq)[9].add(cy);
1564 cz++;
1565 cy++;
1566 }
1567 else if (elementBytesLog2 == 2)
1568 {
1569 // fill in y and x bit
1570 (*pDataEq)[8].add(cy);
1571 (*pDataEq)[9].add(cx);
1572 cy++;
1573 cx++;
1574 }
1575 else
1576 {
1577 // fill in 2 x bits
1578 (*pDataEq)[8].add(cx);
1579 cx++;
1580 (*pDataEq)[9].add(cx);
1581 cx++;
1582 }
1583 }
1584 else
1585 {
1586 // Z 3d swizzle
1587 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1588 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1589 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1590 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1591 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1592 {
1593 (*pDataEq)[i].add(cz);
1594 cz++;
1595 }
1596 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1597 {
1598 // add an x and z
1599 (*pDataEq)[6].add(cx);
1600 (*pDataEq)[7].add(cz);
1601 cx++;
1602 cz++;
1603 }
1604 else if (elementBytesLog2 == 2)
1605 {
1606 // add a y and z
1607 (*pDataEq)[6].add(cy);
1608 (*pDataEq)[7].add(cz);
1609 cy++;
1610 cz++;
1611 }
1612 // add y and x
1613 (*pDataEq)[8].add(cy);
1614 (*pDataEq)[9].add(cx);
1615 cy++;
1616 cx++;
1617 }
1618 // Fill in bit 10 and up
1619 pDataEq->mort3d( cz, cy, cx, 10 );
1620 }
1621 else if (IsThin(resourceType, swizzleMode))
1622 {
1623 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1624 // Color 2D
1625 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1626 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1627 UINT_32 i;
1628 // Fill in bottom x bits
1629 for (i = elementBytesLog2; i < 4; i++)
1630 {
1631 (*pDataEq)[i].add(cx);
1632 cx++;
1633 }
1634 // Fill in bottom y bits
1635 for (i = 4; i < 4 + microYBits; i++)
1636 {
1637 (*pDataEq)[i].add(cy);
1638 cy++;
1639 }
1640 // Fill in last of the micro_x bits
1641 for (i = 4 + microYBits; i < 8; i++)
1642 {
1643 (*pDataEq)[i].add(cx);
1644 cx++;
1645 }
1646 // Fill in x/y bits below sample split
1647 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1648 // Fill in sample bits
1649 for (i = 0; i < numSamplesLog2; i++)
1650 {
1651 cs.set(DIM_S, i);
1652 (*pDataEq)[tileSplitStart + i].add(cs);
1653 }
1654 // Fill in x/y bits above sample split
1655 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1656 {
1657 pDataEq->mort2d(cx, cy, blockSizeLog2);
1658 }
1659 else
1660 {
1661 pDataEq->mort2d(cy, cx, blockSizeLog2);
1662 }
1663 }
1664 else
1665 {
1666 ADDR_ASSERT_ALWAYS();
1667 }
1668 }
1669 else
1670 {
1671 // Fmask or depth
1672 UINT_32 sampleStart = elementBytesLog2;
1673 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1674 UINT_32 ymajStart = 6 + numSamplesLog2;
1675
1676 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1677 {
1678 cs.set(DIM_S, s);
1679 (*pDataEq)[sampleStart + s].add(cs);
1680 }
1681
1682 // Put in the x-major order pixel bits
1683 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1684 // Put in the y-major order pixel bits
1685 pDataEq->mort2d(cy, cx, ymajStart);
1686 }
1687 }
1688
1689 /**
1690 ************************************************************************************************************************
1691 * Gfx9Lib::GetPipeEquation
1692 *
1693 * @brief
1694 * Get pipe equation
1695 * @return
1696 * N/A
1697 ************************************************************************************************************************
1698 */
GetPipeEquation(CoordEq * pPipeEq,CoordEq * pDataEq,UINT_32 pipeInterleaveLog2,UINT_32 numPipeLog2,UINT_32 numSamplesLog2,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType) const1699 VOID Gfx9Lib::GetPipeEquation(
1700 CoordEq* pPipeEq, ///< [out] pipe equation
1701 CoordEq* pDataEq, ///< [in] data equation
1702 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1703 UINT_32 numPipeLog2, ///< [in] number of pipes
1704 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1705 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1706 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1707 AddrResourceType resourceType ///< [in] data surface resource type
1708 ) const
1709 {
1710 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1711 CoordEq dataEq;
1712
1713 pDataEq->copy(dataEq);
1714
1715 if (dataSurfaceType == Gfx9DataColor)
1716 {
1717 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1718 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1719 }
1720
1721 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1722
1723 // This section should only apply to z/stencil, maybe fmask
1724 // If the pipe bit is below the comp block size,
1725 // then keep moving up the address until we find a bit that is above
1726 UINT_32 pipeStart = 0;
1727
1728 if (dataSurfaceType != Gfx9DataColor)
1729 {
1730 Coordinate tileMin(DIM_X, 3);
1731
1732 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1733 {
1734 pipeStart++;
1735 }
1736
1737 // if pipe is 0, then the first pipe bit is above the comp block size,
1738 // so we don't need to do anything
1739 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1740 // we will get the same pipe equation
1741 if (pipeStart != 0)
1742 {
1743 for (UINT_32 i = 0; i < numPipeLog2; i++)
1744 {
1745 // Copy the jth bit above pipe interleave to the current pipe equation bit
1746 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1747 }
1748 }
1749 }
1750
1751 if (IsPrt(swizzleMode))
1752 {
1753 // Clear out bits above the block size if prt's are enabled
1754 dataEq.resize(blockSizeLog2);
1755 dataEq.resize(48);
1756 }
1757
1758 if (IsXor(swizzleMode))
1759 {
1760 CoordEq xorMask;
1761
1762 if (IsThick(resourceType, swizzleMode))
1763 {
1764 CoordEq xorMask2;
1765
1766 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1767
1768 xorMask.resize(numPipeLog2);
1769
1770 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1771 {
1772 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1773 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1774 }
1775 }
1776 else
1777 {
1778 // Xor in the bits above the pipe+gpu bits
1779 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1780
1781 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1782 {
1783 Coordinate co;
1784 CoordEq xorMask2;
1785 // if 1xaa and not prt, then xor in the z bits
1786 xorMask2.resize(0);
1787 xorMask2.resize(numPipeLog2);
1788 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1789 {
1790 co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1791 xorMask2[pipeIdx].add(co);
1792 }
1793
1794 pPipeEq->xorin(xorMask2);
1795 }
1796 }
1797
1798 xorMask.reverse();
1799 pPipeEq->xorin(xorMask);
1800 }
1801 }
1802 /**
1803 ************************************************************************************************************************
1804 * Gfx9Lib::GetMetaEquation
1805 *
1806 * @brief
1807 * Get meta equation for cmask/htile/DCC
1808 * @return
1809 * Pointer to a calculated meta equation
1810 ************************************************************************************************************************
1811 */
GetMetaEquation(const MetaEqParams & metaEqParams)1812 const CoordEq* Gfx9Lib::GetMetaEquation(
1813 const MetaEqParams& metaEqParams)
1814 {
1815 UINT_32 cachedMetaEqIndex;
1816
1817 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1818 {
1819 if (memcmp(&metaEqParams,
1820 &m_cachedMetaEqKey[cachedMetaEqIndex],
1821 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1822 {
1823 break;
1824 }
1825 }
1826
1827 CoordEq* pMetaEq = NULL;
1828
1829 if (cachedMetaEqIndex < MaxCachedMetaEq)
1830 {
1831 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1832 }
1833 else
1834 {
1835 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1836
1837 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1838
1839 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1840
1841 GenMetaEquation(pMetaEq,
1842 metaEqParams.maxMip,
1843 metaEqParams.elementBytesLog2,
1844 metaEqParams.numSamplesLog2,
1845 metaEqParams.metaFlag,
1846 metaEqParams.dataSurfaceType,
1847 metaEqParams.swizzleMode,
1848 metaEqParams.resourceType,
1849 metaEqParams.metaBlkWidthLog2,
1850 metaEqParams.metaBlkHeightLog2,
1851 metaEqParams.metaBlkDepthLog2,
1852 metaEqParams.compBlkWidthLog2,
1853 metaEqParams.compBlkHeightLog2,
1854 metaEqParams.compBlkDepthLog2);
1855 }
1856
1857 return pMetaEq;
1858 }
1859
1860 /**
1861 ************************************************************************************************************************
1862 * Gfx9Lib::GenMetaEquation
1863 *
1864 * @brief
1865 * Get meta equation for cmask/htile/DCC
1866 * @return
1867 * N/A
1868 ************************************************************************************************************************
1869 */
GenMetaEquation(CoordEq * pMetaEq,UINT_32 maxMip,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2,ADDR2_META_FLAGS metaFlag,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 metaBlkWidthLog2,UINT_32 metaBlkHeightLog2,UINT_32 metaBlkDepthLog2,UINT_32 compBlkWidthLog2,UINT_32 compBlkHeightLog2,UINT_32 compBlkDepthLog2) const1870 VOID Gfx9Lib::GenMetaEquation(
1871 CoordEq* pMetaEq, ///< [out] meta equation
1872 UINT_32 maxMip, ///< [in] max mip Id
1873 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1874 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1875 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1876 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1877 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1878 AddrResourceType resourceType, ///< [in] data surface resource type
1879 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1880 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1881 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1882 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1883 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1884 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1885 const
1886 {
1887 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1888 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1889
1890 // Get the correct data address and rb equation
1891 CoordEq dataEq;
1892 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1893 elementBytesLog2, numSamplesLog2);
1894
1895 // Get pipe and rb equations
1896 CoordEq pipeEquation;
1897 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1898 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1899 numPipeTotalLog2 = pipeEquation.getsize();
1900
1901 if (metaFlag.linear)
1902 {
1903 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1904 ADDR_ASSERT_ALWAYS();
1905
1906 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1907
1908 dataEq.copy(*pMetaEq);
1909
1910 if (IsLinear(swizzleMode))
1911 {
1912 if (metaFlag.pipeAligned)
1913 {
1914 // Remove the pipe bits
1915 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1916 pMetaEq->shift(-shift, pipeInterleaveLog2);
1917 }
1918 // Divide by comp block size, which for linear (which is always color) is 256 B
1919 pMetaEq->shift(-8);
1920
1921 if (metaFlag.pipeAligned)
1922 {
1923 // Put pipe bits back in
1924 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1925
1926 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1927 {
1928 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1929 }
1930 }
1931 }
1932
1933 pMetaEq->shift(1);
1934 }
1935 else
1936 {
1937 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1938 UINT_32 compFragLog2 =
1939 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1940 maxCompFragLog2 : numSamplesLog2;
1941
1942 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1943
1944 // Make sure the metaaddr is cleared
1945 pMetaEq->resize(0);
1946 pMetaEq->resize(27);
1947
1948 if (IsThick(resourceType, swizzleMode))
1949 {
1950 Coordinate cx(DIM_X, 0);
1951 Coordinate cy(DIM_Y, 0);
1952 Coordinate cz(DIM_Z, 0);
1953
1954 if (maxMip > 0)
1955 {
1956 pMetaEq->mort3d(cy, cx, cz);
1957 }
1958 else
1959 {
1960 pMetaEq->mort3d(cx, cy, cz);
1961 }
1962 }
1963 else
1964 {
1965 Coordinate cx(DIM_X, 0);
1966 Coordinate cy(DIM_Y, 0);
1967 Coordinate cs;
1968
1969 if (maxMip > 0)
1970 {
1971 pMetaEq->mort2d(cy, cx, compFragLog2);
1972 }
1973 else
1974 {
1975 pMetaEq->mort2d(cx, cy, compFragLog2);
1976 }
1977
1978 //------------------------------------------------------------------------------------------------------------------------
1979 // Put the compressible fragments at the lsb
1980 // the uncompressible frags will be at the msb of the micro address
1981 //------------------------------------------------------------------------------------------------------------------------
1982 for (UINT_32 s = 0; s < compFragLog2; s++)
1983 {
1984 cs.set(DIM_S, s);
1985 (*pMetaEq)[s].add(cs);
1986 }
1987 }
1988
1989 // Keep a copy of the pipe equations
1990 CoordEq origPipeEquation;
1991 pipeEquation.copy(origPipeEquation);
1992
1993 Coordinate co;
1994 // filter out everything under the compressed block size
1995 co.set(DIM_X, compBlkWidthLog2);
1996 pMetaEq->Filter('<', co, 0, DIM_X);
1997 co.set(DIM_Y, compBlkHeightLog2);
1998 pMetaEq->Filter('<', co, 0, DIM_Y);
1999 co.set(DIM_Z, compBlkDepthLog2);
2000 pMetaEq->Filter('<', co, 0, DIM_Z);
2001
2002 // For non-color, filter out sample bits
2003 if (dataSurfaceType != Gfx9DataColor)
2004 {
2005 co.set(DIM_X, 0);
2006 pMetaEq->Filter('<', co, 0, DIM_S);
2007 }
2008
2009 // filter out everything above the metablock size
2010 co.set(DIM_X, metaBlkWidthLog2 - 1);
2011 pMetaEq->Filter('>', co, 0, DIM_X);
2012 co.set(DIM_Y, metaBlkHeightLog2 - 1);
2013 pMetaEq->Filter('>', co, 0, DIM_Y);
2014 co.set(DIM_Z, metaBlkDepthLog2 - 1);
2015 pMetaEq->Filter('>', co, 0, DIM_Z);
2016
2017 // filter out everything above the metablock size for the channel bits
2018 co.set(DIM_X, metaBlkWidthLog2 - 1);
2019 pipeEquation.Filter('>', co, 0, DIM_X);
2020 co.set(DIM_Y, metaBlkHeightLog2 - 1);
2021 pipeEquation.Filter('>', co, 0, DIM_Y);
2022 co.set(DIM_Z, metaBlkDepthLog2 - 1);
2023 pipeEquation.Filter('>', co, 0, DIM_Z);
2024
2025 // Make sure we still have the same number of channel bits
2026 if (pipeEquation.getsize() != numPipeTotalLog2)
2027 {
2028 ADDR_ASSERT_ALWAYS();
2029 }
2030
2031 // Loop through all channel and rb bits,
2032 // and make sure these components exist in the metadata address
2033 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2034 {
2035 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
2036 {
2037 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
2038 {
2039 ADDR_ASSERT_ALWAYS();
2040 }
2041 }
2042 }
2043
2044 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
2045 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
2046 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
2047 CoordEq origRbEquation;
2048
2049 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
2050
2051 CoordEq rbEquation = origRbEquation;
2052
2053 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2054 {
2055 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
2056 {
2057 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
2058 {
2059 ADDR_ASSERT_ALWAYS();
2060 }
2061 }
2062 }
2063
2064 if (m_settings.applyAliasFix)
2065 {
2066 co.set(DIM_Z, -1);
2067 }
2068
2069 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
2070 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2071 {
2072 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
2073 {
2074 BOOL_32 isRbEquationInPipeEquation = FALSE;
2075
2076 if (m_settings.applyAliasFix)
2077 {
2078 CoordTerm filteredPipeEq;
2079 filteredPipeEq = pipeEquation[j];
2080
2081 filteredPipeEq.Filter('>', co, 0, DIM_Z);
2082
2083 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
2084 }
2085 else
2086 {
2087 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
2088 }
2089
2090 if (isRbEquationInPipeEquation)
2091 {
2092 rbEquation[i].Clear();
2093 }
2094 }
2095 }
2096
2097 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
2098
2099 // Loop through each bit of the channel, get the smallest coordinate,
2100 // and remove it from the metaaddr, and rb_equation
2101 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2102 {
2103 pipeEquation[i].getsmallest(co);
2104
2105 UINT_32 old_size = pMetaEq->getsize();
2106 pMetaEq->Filter('=', co);
2107 UINT_32 new_size = pMetaEq->getsize();
2108 if (new_size != old_size-1)
2109 {
2110 ADDR_ASSERT_ALWAYS();
2111 }
2112 pipeEquation.remove(co);
2113 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2114 {
2115 if (rbEquation[j].remove(co))
2116 {
2117 // if we actually removed something from this bit, then add the remaining
2118 // channel bits, as these can be removed for this bit
2119 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2120 {
2121 if (pipeEquation[i][k] != co)
2122 {
2123 rbEquation[j].add(pipeEquation[i][k]);
2124 rbAppendedWithPipeBits[j] = true;
2125 }
2126 }
2127 }
2128 }
2129 }
2130
2131 // Loop through the rb bits and see what remain;
2132 // filter out the smallest coordinate if it remains
2133 UINT_32 rbBitsLeft = 0;
2134 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2135 {
2136 BOOL_32 isRbEqAppended = FALSE;
2137
2138 if (m_settings.applyAliasFix)
2139 {
2140 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2141 }
2142 else
2143 {
2144 isRbEqAppended = (rbEquation[i].getsize() > 0);
2145 }
2146
2147 if (isRbEqAppended)
2148 {
2149 rbBitsLeft++;
2150 rbEquation[i].getsmallest(co);
2151 UINT_32 old_size = pMetaEq->getsize();
2152 pMetaEq->Filter('=', co);
2153 UINT_32 new_size = pMetaEq->getsize();
2154 if (new_size != old_size - 1)
2155 {
2156 // assert warning
2157 }
2158 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2159 {
2160 if (rbEquation[j].remove(co))
2161 {
2162 // if we actually removed something from this bit, then add the remaining
2163 // rb bits, as these can be removed for this bit
2164 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2165 {
2166 if (rbEquation[i][k] != co)
2167 {
2168 rbEquation[j].add(rbEquation[i][k]);
2169 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2170 }
2171 }
2172 }
2173 }
2174 }
2175 }
2176
2177 // capture the size of the metaaddr
2178 UINT_32 metaSize = pMetaEq->getsize();
2179 // resize to 49 bits...make this a nibble address
2180 pMetaEq->resize(49);
2181 // Concatenate the macro address above the current address
2182 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2183 {
2184 co.set(DIM_M, j);
2185 (*pMetaEq)[i].add(co);
2186 }
2187
2188 // Multiply by meta element size (in nibbles)
2189 if (dataSurfaceType == Gfx9DataColor)
2190 {
2191 pMetaEq->shift(1);
2192 }
2193 else if (dataSurfaceType == Gfx9DataDepthStencil)
2194 {
2195 pMetaEq->shift(3);
2196 }
2197
2198 //------------------------------------------------------------------------------------------
2199 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2200 // Shift up from pipe interleave number of channel
2201 // and rb bits left, and uncompressed fragments
2202 //------------------------------------------------------------------------------------------
2203
2204 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2205
2206 // Put in the channel bits
2207 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2208 {
2209 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2210 }
2211
2212 // Put in remaining rb bits
2213 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2214 {
2215 BOOL_32 isRbEqAppended = FALSE;
2216
2217 if (m_settings.applyAliasFix)
2218 {
2219 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2220 }
2221 else
2222 {
2223 isRbEqAppended = (rbEquation[i].getsize() > 0);
2224 }
2225
2226 if (isRbEqAppended)
2227 {
2228 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2229 // Mark any rb bit we add in to the rb mask
2230 j++;
2231 }
2232 }
2233
2234 //------------------------------------------------------------------------------------------
2235 // Put in the uncompressed fragment bits
2236 //------------------------------------------------------------------------------------------
2237 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2238 {
2239 co.set(DIM_S, compFragLog2 + i);
2240 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2241 }
2242 }
2243 }
2244
2245 /**
2246 ************************************************************************************************************************
2247 * Gfx9Lib::IsEquationSupported
2248 *
2249 * @brief
2250 * Check if equation is supported for given swizzle mode and resource type.
2251 *
2252 * @return
2253 * TRUE if supported
2254 ************************************************************************************************************************
2255 */
IsEquationSupported(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2) const2256 BOOL_32 Gfx9Lib::IsEquationSupported(
2257 AddrResourceType rsrcType,
2258 AddrSwizzleMode swMode,
2259 UINT_32 elementBytesLog2) const
2260 {
2261 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2262 (IsValidSwMode(swMode) == TRUE) &&
2263 (IsLinear(swMode) == FALSE) &&
2264 (((IsTex2d(rsrcType) == TRUE) &&
2265 ((elementBytesLog2 < 4) ||
2266 ((IsRotateSwizzle(swMode) == FALSE) &&
2267 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2268 ((IsTex3d(rsrcType) == TRUE) &&
2269 (IsRotateSwizzle(swMode) == FALSE) &&
2270 (IsBlock256b(swMode) == FALSE)));
2271
2272 return supported;
2273 }
2274
2275 /**
2276 ************************************************************************************************************************
2277 * Gfx9Lib::InitEquationTable
2278 *
2279 * @brief
2280 * Initialize Equation table.
2281 *
2282 * @return
2283 * N/A
2284 ************************************************************************************************************************
2285 */
InitEquationTable()2286 VOID Gfx9Lib::InitEquationTable()
2287 {
2288 memset(m_equationTable, 0, sizeof(m_equationTable));
2289
2290 // Loop all possible resource type (2D/3D)
2291 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2292 {
2293 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2294
2295 // Loop all possible swizzle mode
2296 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2297 {
2298 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2299
2300 // Loop all possible bpp
2301 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2302 {
2303 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2304
2305 // Check if the input is supported
2306 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2307 {
2308 ADDR_EQUATION equation;
2309 ADDR_E_RETURNCODE retCode;
2310
2311 memset(&equation, 0, sizeof(ADDR_EQUATION));
2312
2313 // Generate the equation
2314 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2315 {
2316 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2317 }
2318 else if (IsThin(rsrcType, swMode))
2319 {
2320 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2321 }
2322 else
2323 {
2324 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2325 }
2326
2327 // Only fill the equation into the table if the return code is ADDR_OK,
2328 // otherwise if the return code is not ADDR_OK, it indicates this is not
2329 // a valid input, we do nothing but just fill invalid equation index
2330 // into the lookup table.
2331 if (retCode == ADDR_OK)
2332 {
2333 equationIndex = m_numEquations;
2334 ADDR_ASSERT(equationIndex < EquationTableSize);
2335
2336 m_equationTable[equationIndex] = equation;
2337
2338 m_numEquations++;
2339 }
2340 else
2341 {
2342 ADDR_ASSERT_ALWAYS();
2343 }
2344 }
2345
2346 // Fill the index into the lookup table, if the combination is not supported
2347 // fill the invalid equation index
2348 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2349 }
2350 }
2351 }
2352 }
2353
2354 /**
2355 ************************************************************************************************************************
2356 * Gfx9Lib::HwlGetEquationIndex
2357 *
2358 * @brief
2359 * Interface function stub of GetEquationIndex
2360 *
2361 * @return
2362 * ADDR_E_RETURNCODE
2363 ************************************************************************************************************************
2364 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2365 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2366 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2367 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2368 ) const
2369 {
2370 AddrResourceType rsrcType = pIn->resourceType;
2371 AddrSwizzleMode swMode = pIn->swizzleMode;
2372 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2373 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2374
2375 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2376 {
2377 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2378 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2379
2380 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2381 }
2382
2383 if (pOut->pMipInfo != NULL)
2384 {
2385 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2386 {
2387 pOut->pMipInfo[i].equationIndex = index;
2388 }
2389 }
2390
2391 return index;
2392 }
2393
2394 /**
2395 ************************************************************************************************************************
2396 * Gfx9Lib::HwlComputeBlock256Equation
2397 *
2398 * @brief
2399 * Interface function stub of ComputeBlock256Equation
2400 *
2401 * @return
2402 * ADDR_E_RETURNCODE
2403 ************************************************************************************************************************
2404 */
HwlComputeBlock256Equation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2405 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2406 AddrResourceType rsrcType,
2407 AddrSwizzleMode swMode,
2408 UINT_32 elementBytesLog2,
2409 ADDR_EQUATION* pEquation) const
2410 {
2411 ADDR_E_RETURNCODE ret = ADDR_OK;
2412
2413 pEquation->numBits = 8;
2414
2415 UINT_32 i = 0;
2416 for (; i < elementBytesLog2; i++)
2417 {
2418 InitChannel(1, 0 , i, &pEquation->addr[i]);
2419 }
2420
2421 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2422
2423 const UINT_32 maxBitsUsed = 4;
2424 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2425 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2426
2427 for (i = 0; i < maxBitsUsed; i++)
2428 {
2429 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2430 InitChannel(1, 1, i, &y[i]);
2431 }
2432
2433 if (IsStandardSwizzle(rsrcType, swMode))
2434 {
2435 switch (elementBytesLog2)
2436 {
2437 case 0:
2438 pixelBit[0] = x[0];
2439 pixelBit[1] = x[1];
2440 pixelBit[2] = x[2];
2441 pixelBit[3] = x[3];
2442 pixelBit[4] = y[0];
2443 pixelBit[5] = y[1];
2444 pixelBit[6] = y[2];
2445 pixelBit[7] = y[3];
2446 break;
2447 case 1:
2448 pixelBit[0] = x[0];
2449 pixelBit[1] = x[1];
2450 pixelBit[2] = x[2];
2451 pixelBit[3] = y[0];
2452 pixelBit[4] = y[1];
2453 pixelBit[5] = y[2];
2454 pixelBit[6] = x[3];
2455 break;
2456 case 2:
2457 pixelBit[0] = x[0];
2458 pixelBit[1] = x[1];
2459 pixelBit[2] = y[0];
2460 pixelBit[3] = y[1];
2461 pixelBit[4] = y[2];
2462 pixelBit[5] = x[2];
2463 break;
2464 case 3:
2465 pixelBit[0] = x[0];
2466 pixelBit[1] = y[0];
2467 pixelBit[2] = y[1];
2468 pixelBit[3] = x[1];
2469 pixelBit[4] = x[2];
2470 break;
2471 case 4:
2472 pixelBit[0] = y[0];
2473 pixelBit[1] = y[1];
2474 pixelBit[2] = x[0];
2475 pixelBit[3] = x[1];
2476 break;
2477 default:
2478 ADDR_ASSERT_ALWAYS();
2479 ret = ADDR_INVALIDPARAMS;
2480 break;
2481 }
2482 }
2483 else if (IsDisplaySwizzle(rsrcType, swMode))
2484 {
2485 switch (elementBytesLog2)
2486 {
2487 case 0:
2488 pixelBit[0] = x[0];
2489 pixelBit[1] = x[1];
2490 pixelBit[2] = x[2];
2491 pixelBit[3] = y[1];
2492 pixelBit[4] = y[0];
2493 pixelBit[5] = y[2];
2494 pixelBit[6] = x[3];
2495 pixelBit[7] = y[3];
2496 break;
2497 case 1:
2498 pixelBit[0] = x[0];
2499 pixelBit[1] = x[1];
2500 pixelBit[2] = x[2];
2501 pixelBit[3] = y[0];
2502 pixelBit[4] = y[1];
2503 pixelBit[5] = y[2];
2504 pixelBit[6] = x[3];
2505 break;
2506 case 2:
2507 pixelBit[0] = x[0];
2508 pixelBit[1] = x[1];
2509 pixelBit[2] = y[0];
2510 pixelBit[3] = x[2];
2511 pixelBit[4] = y[1];
2512 pixelBit[5] = y[2];
2513 break;
2514 case 3:
2515 pixelBit[0] = x[0];
2516 pixelBit[1] = y[0];
2517 pixelBit[2] = x[1];
2518 pixelBit[3] = x[2];
2519 pixelBit[4] = y[1];
2520 break;
2521 case 4:
2522 pixelBit[0] = x[0];
2523 pixelBit[1] = y[0];
2524 pixelBit[2] = x[1];
2525 pixelBit[3] = y[1];
2526 break;
2527 default:
2528 ADDR_ASSERT_ALWAYS();
2529 ret = ADDR_INVALIDPARAMS;
2530 break;
2531 }
2532 }
2533 else if (IsRotateSwizzle(swMode))
2534 {
2535 switch (elementBytesLog2)
2536 {
2537 case 0:
2538 pixelBit[0] = y[0];
2539 pixelBit[1] = y[1];
2540 pixelBit[2] = y[2];
2541 pixelBit[3] = x[1];
2542 pixelBit[4] = x[0];
2543 pixelBit[5] = x[2];
2544 pixelBit[6] = x[3];
2545 pixelBit[7] = y[3];
2546 break;
2547 case 1:
2548 pixelBit[0] = y[0];
2549 pixelBit[1] = y[1];
2550 pixelBit[2] = y[2];
2551 pixelBit[3] = x[0];
2552 pixelBit[4] = x[1];
2553 pixelBit[5] = x[2];
2554 pixelBit[6] = x[3];
2555 break;
2556 case 2:
2557 pixelBit[0] = y[0];
2558 pixelBit[1] = y[1];
2559 pixelBit[2] = x[0];
2560 pixelBit[3] = y[2];
2561 pixelBit[4] = x[1];
2562 pixelBit[5] = x[2];
2563 break;
2564 case 3:
2565 pixelBit[0] = y[0];
2566 pixelBit[1] = x[0];
2567 pixelBit[2] = y[1];
2568 pixelBit[3] = x[1];
2569 pixelBit[4] = x[2];
2570 break;
2571 default:
2572 ADDR_ASSERT_ALWAYS();
2573 case 4:
2574 ret = ADDR_INVALIDPARAMS;
2575 break;
2576 }
2577 }
2578 else
2579 {
2580 ADDR_ASSERT_ALWAYS();
2581 ret = ADDR_INVALIDPARAMS;
2582 }
2583
2584 // Post validation
2585 if (ret == ADDR_OK)
2586 {
2587 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2588 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2589 (microBlockDim.w * (1 << elementBytesLog2)));
2590 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2591 }
2592
2593 return ret;
2594 }
2595
2596 /**
2597 ************************************************************************************************************************
2598 * Gfx9Lib::HwlComputeThinEquation
2599 *
2600 * @brief
2601 * Interface function stub of ComputeThinEquation
2602 *
2603 * @return
2604 * ADDR_E_RETURNCODE
2605 ************************************************************************************************************************
2606 */
HwlComputeThinEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2607 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2608 AddrResourceType rsrcType,
2609 AddrSwizzleMode swMode,
2610 UINT_32 elementBytesLog2,
2611 ADDR_EQUATION* pEquation) const
2612 {
2613 ADDR_E_RETURNCODE ret = ADDR_OK;
2614
2615 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2616
2617 UINT_32 maxXorBits = blockSizeLog2;
2618 if (IsNonPrtXor(swMode))
2619 {
2620 // For non-prt-xor, maybe need to initialize some more bits for xor
2621 // The highest xor bit used in equation will be max the following 3 items:
2622 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2623 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2624 // 3. blockSizeLog2
2625
2626 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2627 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2628 GetPipeXorBits(blockSizeLog2) +
2629 2 * GetBankXorBits(blockSizeLog2));
2630 }
2631
2632 const UINT_32 maxBitsUsed = 14;
2633 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2634 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2635 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2636
2637 const UINT_32 extraXorBits = 16;
2638 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2639 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2640
2641 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2642 {
2643 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2644 InitChannel(1, 1, i, &y[i]);
2645 }
2646
2647 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2648
2649 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2650 {
2651 InitChannel(1, 0 , i, &pixelBit[i]);
2652 }
2653
2654 UINT_32 xIdx = 0;
2655 UINT_32 yIdx = 0;
2656 UINT_32 lowBits = 0;
2657
2658 if (IsZOrderSwizzle(swMode))
2659 {
2660 if (elementBytesLog2 <= 3)
2661 {
2662 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2663 {
2664 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2665 }
2666
2667 lowBits = 6;
2668 }
2669 else
2670 {
2671 ret = ADDR_INVALIDPARAMS;
2672 }
2673 }
2674 else
2675 {
2676 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2677
2678 if (ret == ADDR_OK)
2679 {
2680 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2681 xIdx = Log2(microBlockDim.w);
2682 yIdx = Log2(microBlockDim.h);
2683 lowBits = 8;
2684 }
2685 }
2686
2687 if (ret == ADDR_OK)
2688 {
2689 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2690 {
2691 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2692 }
2693
2694 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2695 {
2696 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2697 }
2698
2699 if (IsXor(swMode))
2700 {
2701 // Fill XOR bits
2702 UINT_32 pipeStart = m_pipeInterleaveLog2;
2703 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2704
2705 UINT_32 bankStart = pipeStart + pipeXorBits;
2706 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2707
2708 for (UINT_32 i = 0; i < pipeXorBits; i++)
2709 {
2710 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2711 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2712 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2713
2714 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2715 }
2716
2717 for (UINT_32 i = 0; i < bankXorBits; i++)
2718 {
2719 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2720 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2721 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2722
2723 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2724 }
2725
2726 if (IsPrt(swMode) == FALSE)
2727 {
2728 for (UINT_32 i = 0; i < pipeXorBits; i++)
2729 {
2730 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2731 }
2732
2733 for (UINT_32 i = 0; i < bankXorBits; i++)
2734 {
2735 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2736 }
2737 }
2738 }
2739
2740 pEquation->numBits = blockSizeLog2;
2741 }
2742
2743 return ret;
2744 }
2745
2746 /**
2747 ************************************************************************************************************************
2748 * Gfx9Lib::HwlComputeThickEquation
2749 *
2750 * @brief
2751 * Interface function stub of ComputeThickEquation
2752 *
2753 * @return
2754 * ADDR_E_RETURNCODE
2755 ************************************************************************************************************************
2756 */
HwlComputeThickEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2757 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2758 AddrResourceType rsrcType,
2759 AddrSwizzleMode swMode,
2760 UINT_32 elementBytesLog2,
2761 ADDR_EQUATION* pEquation) const
2762 {
2763 ADDR_E_RETURNCODE ret = ADDR_OK;
2764
2765 ADDR_ASSERT(IsTex3d(rsrcType));
2766
2767 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2768
2769 UINT_32 maxXorBits = blockSizeLog2;
2770 if (IsNonPrtXor(swMode))
2771 {
2772 // For non-prt-xor, maybe need to initialize some more bits for xor
2773 // The highest xor bit used in equation will be max the following 3:
2774 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2775 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2776 // 3. blockSizeLog2
2777
2778 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2779 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2780 GetPipeXorBits(blockSizeLog2) +
2781 3 * GetBankXorBits(blockSizeLog2));
2782 }
2783
2784 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2785 {
2786 InitChannel(1, 0 , i, &pEquation->addr[i]);
2787 }
2788
2789 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2790
2791 const UINT_32 maxBitsUsed = 12;
2792 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2793 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2794 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2795 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2796
2797 const UINT_32 extraXorBits = 24;
2798 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2799 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2800
2801 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2802 {
2803 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2804 InitChannel(1, 1, i, &y[i]);
2805 InitChannel(1, 2, i, &z[i]);
2806 }
2807
2808 if (IsZOrderSwizzle(swMode))
2809 {
2810 switch (elementBytesLog2)
2811 {
2812 case 0:
2813 pixelBit[0] = x[0];
2814 pixelBit[1] = y[0];
2815 pixelBit[2] = x[1];
2816 pixelBit[3] = y[1];
2817 pixelBit[4] = z[0];
2818 pixelBit[5] = z[1];
2819 pixelBit[6] = x[2];
2820 pixelBit[7] = z[2];
2821 pixelBit[8] = y[2];
2822 pixelBit[9] = x[3];
2823 break;
2824 case 1:
2825 pixelBit[0] = x[0];
2826 pixelBit[1] = y[0];
2827 pixelBit[2] = x[1];
2828 pixelBit[3] = y[1];
2829 pixelBit[4] = z[0];
2830 pixelBit[5] = z[1];
2831 pixelBit[6] = z[2];
2832 pixelBit[7] = y[2];
2833 pixelBit[8] = x[2];
2834 break;
2835 case 2:
2836 pixelBit[0] = x[0];
2837 pixelBit[1] = y[0];
2838 pixelBit[2] = x[1];
2839 pixelBit[3] = z[0];
2840 pixelBit[4] = y[1];
2841 pixelBit[5] = z[1];
2842 pixelBit[6] = y[2];
2843 pixelBit[7] = x[2];
2844 break;
2845 case 3:
2846 pixelBit[0] = x[0];
2847 pixelBit[1] = y[0];
2848 pixelBit[2] = z[0];
2849 pixelBit[3] = x[1];
2850 pixelBit[4] = z[1];
2851 pixelBit[5] = y[1];
2852 pixelBit[6] = x[2];
2853 break;
2854 case 4:
2855 pixelBit[0] = x[0];
2856 pixelBit[1] = y[0];
2857 pixelBit[2] = z[0];
2858 pixelBit[3] = z[1];
2859 pixelBit[4] = y[1];
2860 pixelBit[5] = x[1];
2861 break;
2862 default:
2863 ADDR_ASSERT_ALWAYS();
2864 ret = ADDR_INVALIDPARAMS;
2865 break;
2866 }
2867 }
2868 else if (IsStandardSwizzle(rsrcType, swMode))
2869 {
2870 switch (elementBytesLog2)
2871 {
2872 case 0:
2873 pixelBit[0] = x[0];
2874 pixelBit[1] = x[1];
2875 pixelBit[2] = x[2];
2876 pixelBit[3] = x[3];
2877 pixelBit[4] = y[0];
2878 pixelBit[5] = y[1];
2879 pixelBit[6] = z[0];
2880 pixelBit[7] = z[1];
2881 pixelBit[8] = z[2];
2882 pixelBit[9] = y[2];
2883 break;
2884 case 1:
2885 pixelBit[0] = x[0];
2886 pixelBit[1] = x[1];
2887 pixelBit[2] = x[2];
2888 pixelBit[3] = y[0];
2889 pixelBit[4] = y[1];
2890 pixelBit[5] = z[0];
2891 pixelBit[6] = z[1];
2892 pixelBit[7] = z[2];
2893 pixelBit[8] = y[2];
2894 break;
2895 case 2:
2896 pixelBit[0] = x[0];
2897 pixelBit[1] = x[1];
2898 pixelBit[2] = y[0];
2899 pixelBit[3] = y[1];
2900 pixelBit[4] = z[0];
2901 pixelBit[5] = z[1];
2902 pixelBit[6] = y[2];
2903 pixelBit[7] = x[2];
2904 break;
2905 case 3:
2906 pixelBit[0] = x[0];
2907 pixelBit[1] = y[0];
2908 pixelBit[2] = y[1];
2909 pixelBit[3] = z[0];
2910 pixelBit[4] = z[1];
2911 pixelBit[5] = x[1];
2912 pixelBit[6] = x[2];
2913 break;
2914 case 4:
2915 pixelBit[0] = y[0];
2916 pixelBit[1] = y[1];
2917 pixelBit[2] = z[0];
2918 pixelBit[3] = z[1];
2919 pixelBit[4] = x[0];
2920 pixelBit[5] = x[1];
2921 break;
2922 default:
2923 ADDR_ASSERT_ALWAYS();
2924 ret = ADDR_INVALIDPARAMS;
2925 break;
2926 }
2927 }
2928 else
2929 {
2930 ADDR_ASSERT_ALWAYS();
2931 ret = ADDR_INVALIDPARAMS;
2932 }
2933
2934 if (ret == ADDR_OK)
2935 {
2936 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2937 UINT_32 xIdx = Log2(microBlockDim.w);
2938 UINT_32 yIdx = Log2(microBlockDim.h);
2939 UINT_32 zIdx = Log2(microBlockDim.d);
2940
2941 pixelBit = pEquation->addr;
2942
2943 const UINT_32 lowBits = 10;
2944 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2945 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2946
2947 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2948 {
2949 if ((i % 3) == 0)
2950 {
2951 pixelBit[i] = x[xIdx++];
2952 }
2953 else if ((i % 3) == 1)
2954 {
2955 pixelBit[i] = z[zIdx++];
2956 }
2957 else
2958 {
2959 pixelBit[i] = y[yIdx++];
2960 }
2961 }
2962
2963 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2964 {
2965 if ((i % 3) == 0)
2966 {
2967 xorExtra[i - blockSizeLog2] = x[xIdx++];
2968 }
2969 else if ((i % 3) == 1)
2970 {
2971 xorExtra[i - blockSizeLog2] = z[zIdx++];
2972 }
2973 else
2974 {
2975 xorExtra[i - blockSizeLog2] = y[yIdx++];
2976 }
2977 }
2978
2979 if (IsXor(swMode))
2980 {
2981 // Fill XOR bits
2982 UINT_32 pipeStart = m_pipeInterleaveLog2;
2983 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2984 for (UINT_32 i = 0; i < pipeXorBits; i++)
2985 {
2986 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2987 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2988 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2989
2990 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2991
2992 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2993 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2994 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2995
2996 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2997 }
2998
2999 UINT_32 bankStart = pipeStart + pipeXorBits;
3000 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
3001 for (UINT_32 i = 0; i < bankXorBits; i++)
3002 {
3003 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
3004 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
3005 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
3006
3007 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
3008
3009 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
3010 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
3011 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
3012
3013 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
3014 }
3015 }
3016
3017 pEquation->numBits = blockSizeLog2;
3018 }
3019
3020 return ret;
3021 }
3022
3023 /**
3024 ************************************************************************************************************************
3025 * Gfx9Lib::IsValidDisplaySwizzleMode
3026 *
3027 * @brief
3028 * Check if a swizzle mode is supported by display engine
3029 *
3030 * @return
3031 * TRUE is swizzle mode is supported by display engine
3032 ************************************************************************************************************************
3033 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3034 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
3035 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3036 {
3037 BOOL_32 support = FALSE;
3038
3039 const UINT_32 swizzleMask = 1 << pIn->swizzleMode;
3040
3041 if (m_settings.isDce12)
3042 {
3043 if (pIn->bpp == 32)
3044 {
3045 support = (Dce12Bpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3046 }
3047 else if (pIn->bpp <= 64)
3048 {
3049 support = (Dce12NonBpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3050 }
3051 }
3052 else if (m_settings.isDcn1)
3053 {
3054 if (pIn->bpp < 64)
3055 {
3056 support = (Dcn1NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3057 }
3058 else if (pIn->bpp == 64)
3059 {
3060 support = (Dcn1Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3061 }
3062 }
3063 else if (m_settings.isDcn2)
3064 {
3065 if (pIn->bpp < 64)
3066 {
3067 support = (Dcn2NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3068 }
3069 else if (pIn->bpp == 64)
3070 {
3071 support = (Dcn2Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3072 }
3073 }
3074 else
3075 {
3076 ADDR_NOT_IMPLEMENTED();
3077 }
3078
3079 return support;
3080 }
3081
3082 /**
3083 ************************************************************************************************************************
3084 * Gfx9Lib::HwlComputePipeBankXor
3085 *
3086 * @brief
3087 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3088 *
3089 * @return
3090 * PipeBankXor value
3091 ************************************************************************************************************************
3092 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const3093 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3094 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3095 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
3096 {
3097 if (IsXor(pIn->swizzleMode))
3098 {
3099 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3100 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3101 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3102
3103 UINT_32 pipeXor = 0;
3104 UINT_32 bankXor = 0;
3105
3106 const UINT_32 bankMask = (1 << bankBits) - 1;
3107 const UINT_32 index = pIn->surfIndex & bankMask;
3108
3109 const UINT_32 bpp = pIn->flags.fmask ?
3110 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3111 if (bankBits == 4)
3112 {
3113 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3114 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3115
3116 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3117 }
3118 else if (bankBits > 0)
3119 {
3120 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3121 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3122 bankXor = (index * bankIncrease) & bankMask;
3123 }
3124
3125 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3126 }
3127 else
3128 {
3129 pOut->pipeBankXor = 0;
3130 }
3131
3132 return ADDR_OK;
3133 }
3134
3135 /**
3136 ************************************************************************************************************************
3137 * Gfx9Lib::HwlComputeSlicePipeBankXor
3138 *
3139 * @brief
3140 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3141 *
3142 * @return
3143 * PipeBankXor value
3144 ************************************************************************************************************************
3145 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const3146 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3147 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3148 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3149 {
3150 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3151 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3152 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3153
3154 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3155 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3156
3157 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3158
3159 return ADDR_OK;
3160 }
3161
3162 /**
3163 ************************************************************************************************************************
3164 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3165 *
3166 * @brief
3167 * Compute sub resource offset to support swizzle pattern
3168 *
3169 * @return
3170 * Offset
3171 ************************************************************************************************************************
3172 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const3173 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3174 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3175 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3176 {
3177 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3178
3179 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3180 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3181 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3182 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3183 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3184 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3185
3186 pOut->offset = pIn->slice * pIn->sliceSize +
3187 pIn->macroBlockOffset +
3188 (pIn->mipTailOffset ^ pipeBankXor) -
3189 static_cast<UINT_64>(pipeBankXor);
3190 return ADDR_OK;
3191 }
3192
3193 /**
3194 ************************************************************************************************************************
3195 * Gfx9Lib::ValidateNonSwModeParams
3196 *
3197 * @brief
3198 * Validate compute surface info params except swizzle mode
3199 *
3200 * @return
3201 * TRUE if parameters are valid, FALSE otherwise
3202 ************************************************************************************************************************
3203 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3204 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3205 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3206 {
3207 BOOL_32 valid = TRUE;
3208
3209 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3210 {
3211 ADDR_ASSERT_ALWAYS();
3212 valid = FALSE;
3213 }
3214
3215 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3216 {
3217 ADDR_ASSERT_ALWAYS();
3218 valid = FALSE;
3219 }
3220
3221 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3222 const BOOL_32 msaa = (pIn->numFrags > 1);
3223 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3224
3225 const AddrResourceType rsrcType = pIn->resourceType;
3226 const BOOL_32 tex3d = IsTex3d(rsrcType);
3227 const BOOL_32 tex2d = IsTex2d(rsrcType);
3228 const BOOL_32 tex1d = IsTex1d(rsrcType);
3229
3230 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3231 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3232 const BOOL_32 display = flags.display || flags.rotated;
3233 const BOOL_32 stereo = flags.qbStereo;
3234 const BOOL_32 fmask = flags.fmask;
3235
3236 // Resource type check
3237 if (tex1d)
3238 {
3239 if (msaa || zbuffer || display || stereo || isBc || fmask)
3240 {
3241 ADDR_ASSERT_ALWAYS();
3242 valid = FALSE;
3243 }
3244 }
3245 else if (tex2d)
3246 {
3247 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3248 {
3249 ADDR_ASSERT_ALWAYS();
3250 valid = FALSE;
3251 }
3252 }
3253 else if (tex3d)
3254 {
3255 if (msaa || zbuffer || display || stereo || fmask)
3256 {
3257 ADDR_ASSERT_ALWAYS();
3258 valid = FALSE;
3259 }
3260 }
3261 else
3262 {
3263 ADDR_ASSERT_ALWAYS();
3264 valid = FALSE;
3265 }
3266
3267 return valid;
3268 }
3269
3270 /**
3271 ************************************************************************************************************************
3272 * Gfx9Lib::ValidateSwModeParams
3273 *
3274 * @brief
3275 * Validate compute surface info related to swizzle mode
3276 *
3277 * @return
3278 * TRUE if parameters are valid, FALSE otherwise
3279 ************************************************************************************************************************
3280 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3281 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3282 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3283 {
3284 BOOL_32 valid = TRUE;
3285
3286 if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3287 {
3288 ADDR_ASSERT_ALWAYS();
3289 valid = FALSE;
3290 }
3291
3292 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3293 const BOOL_32 msaa = (pIn->numFrags > 1);
3294 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3295 const BOOL_32 is422 = ElemLib::IsMacroPixelPacked(pIn->format);
3296
3297 const AddrResourceType rsrcType = pIn->resourceType;
3298 const BOOL_32 tex3d = IsTex3d(rsrcType);
3299 const BOOL_32 tex2d = IsTex2d(rsrcType);
3300 const BOOL_32 tex1d = IsTex1d(rsrcType);
3301
3302 const AddrSwizzleMode swizzle = pIn->swizzleMode;
3303 const BOOL_32 linear = IsLinear(swizzle);
3304 const BOOL_32 blk256B = IsBlock256b(swizzle);
3305 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3306
3307 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3308 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3309 const BOOL_32 color = flags.color;
3310 const BOOL_32 texture = flags.texture;
3311 const BOOL_32 display = flags.display || flags.rotated;
3312 const BOOL_32 prt = flags.prt;
3313 const BOOL_32 fmask = flags.fmask;
3314
3315 const BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3316 const BOOL_32 zMaxMip = tex3d && mipmap &&
3317 (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3318
3319 // Misc check
3320 if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3321 {
3322 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3323 ADDR_ASSERT_ALWAYS();
3324 valid = FALSE;
3325 }
3326
3327 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3328 {
3329 ADDR_ASSERT_ALWAYS();
3330 valid = FALSE;
3331 }
3332
3333 if ((pIn->bpp == 96) && (linear == FALSE))
3334 {
3335 ADDR_ASSERT_ALWAYS();
3336 valid = FALSE;
3337 }
3338
3339 if (prt && isNonPrtXor)
3340 {
3341 ADDR_ASSERT_ALWAYS();
3342 valid = FALSE;
3343 }
3344
3345 // Resource type check
3346 if (tex1d)
3347 {
3348 if (linear == FALSE)
3349 {
3350 ADDR_ASSERT_ALWAYS();
3351 valid = FALSE;
3352 }
3353 }
3354
3355 // Swizzle type check
3356 if (linear)
3357 {
3358 if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3359 ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3360 {
3361 ADDR_ASSERT_ALWAYS();
3362 valid = FALSE;
3363 }
3364 }
3365 else if (IsZOrderSwizzle(swizzle))
3366 {
3367 if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3368 {
3369 ADDR_ASSERT_ALWAYS();
3370 valid = FALSE;
3371 }
3372 }
3373 else if (IsStandardSwizzle(swizzle))
3374 {
3375 if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3376 {
3377 ADDR_ASSERT_ALWAYS();
3378 valid = FALSE;
3379 }
3380 }
3381 else if (IsDisplaySwizzle(swizzle))
3382 {
3383 if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3384 {
3385 ADDR_ASSERT_ALWAYS();
3386 valid = FALSE;
3387 }
3388 }
3389 else if (IsRotateSwizzle(swizzle))
3390 {
3391 if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3392 {
3393 ADDR_ASSERT_ALWAYS();
3394 valid = FALSE;
3395 }
3396 }
3397 else
3398 {
3399 ADDR_ASSERT_ALWAYS();
3400 valid = FALSE;
3401 }
3402
3403 // Block type check
3404 if (blk256B)
3405 {
3406 if (prt || zbuffer || tex3d || mipmap || msaa)
3407 {
3408 ADDR_ASSERT_ALWAYS();
3409 valid = FALSE;
3410 }
3411 }
3412
3413 return valid;
3414 }
3415
3416 /**
3417 ************************************************************************************************************************
3418 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3419 *
3420 * @brief
3421 * Compute surface info sanity check
3422 *
3423 * @return
3424 * ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3425 ************************************************************************************************************************
3426 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3427 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3428 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3429 {
3430 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3431 }
3432
3433 /**
3434 ************************************************************************************************************************
3435 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3436 *
3437 * @brief
3438 * Internal function to get suggested surface information for cliet to use
3439 *
3440 * @return
3441 * ADDR_E_RETURNCODE
3442 ************************************************************************************************************************
3443 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3444 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3445 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3446 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3447 {
3448 ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3449 ElemLib* pElemLib = GetElemLib();
3450
3451 UINT_32 bpp = pIn->bpp;
3452 UINT_32 width = Max(pIn->width, 1u);
3453 UINT_32 height = Max(pIn->height, 1u);
3454 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3455 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3456
3457 if (pIn->flags.fmask)
3458 {
3459 bpp = GetFmaskBpp(numSamples, numFrags);
3460 numFrags = 1;
3461 numSamples = 1;
3462 pOut->resourceType = ADDR_RSRC_TEX_2D;
3463 }
3464 else
3465 {
3466 // Set format to INVALID will skip this conversion
3467 if (pIn->format != ADDR_FMT_INVALID)
3468 {
3469 UINT_32 expandX, expandY;
3470
3471 // Don't care for this case
3472 ElemMode elemMode = ADDR_UNCOMPRESSED;
3473
3474 // Get compression/expansion factors and element mode which indicates compression/expansion
3475 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3476 &elemMode,
3477 &expandX,
3478 &expandY);
3479
3480 UINT_32 basePitch = 0;
3481 GetElemLib()->AdjustSurfaceInfo(elemMode,
3482 expandX,
3483 expandY,
3484 &bpp,
3485 &basePitch,
3486 &width,
3487 &height);
3488 }
3489
3490 // The output may get changed for volume(3D) texture resource in future
3491 pOut->resourceType = pIn->resourceType;
3492 }
3493
3494 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3495 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3496 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3497 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3498
3499 // Pre sanity check on non swizzle mode parameters
3500 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3501 localIn.flags = pIn->flags;
3502 localIn.resourceType = pOut->resourceType;
3503 localIn.format = pIn->format;
3504 localIn.bpp = bpp;
3505 localIn.width = width;
3506 localIn.height = height;
3507 localIn.numSlices = numSlices;
3508 localIn.numMipLevels = numMipLevels;
3509 localIn.numSamples = numSamples;
3510 localIn.numFrags = numFrags;
3511
3512 if (ValidateNonSwModeParams(&localIn))
3513 {
3514 // Forbid swizzle mode(s) by client setting
3515 ADDR2_SWMODE_SET allowedSwModeSet = {};
3516 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3517 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3518 allowedSwModeSet.value |=
3519 pIn->forbiddenBlock.macroThin4KB ? 0 :
3520 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3521 allowedSwModeSet.value |=
3522 pIn->forbiddenBlock.macroThick4KB ? 0 :
3523 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3524 allowedSwModeSet.value |=
3525 pIn->forbiddenBlock.macroThin64KB ? 0 :
3526 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3527 allowedSwModeSet.value |=
3528 pIn->forbiddenBlock.macroThick64KB ? 0 :
3529 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3530
3531 if (pIn->preferredSwSet.value != 0)
3532 {
3533 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3534 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3535 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3536 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3537 }
3538
3539 if (pIn->noXor)
3540 {
3541 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3542 }
3543
3544 if (pIn->maxAlign > 0)
3545 {
3546 if (pIn->maxAlign < Size64K)
3547 {
3548 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3549 }
3550
3551 if (pIn->maxAlign < Size4K)
3552 {
3553 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3554 }
3555
3556 if (pIn->maxAlign < Size256)
3557 {
3558 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3559 }
3560 }
3561
3562 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3563 switch (pOut->resourceType)
3564 {
3565 case ADDR_RSRC_TEX_1D:
3566 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3567 break;
3568
3569 case ADDR_RSRC_TEX_2D:
3570 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3571
3572 if (bpp > 64)
3573 {
3574 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3575 }
3576 break;
3577
3578 case ADDR_RSRC_TEX_3D:
3579 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3580
3581 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3582 {
3583 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3584 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3585 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3586 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3587 }
3588
3589 if ((bpp == 128) && pIn->flags.color)
3590 {
3591 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3592 }
3593
3594 if (pIn->flags.view3dAs2dArray)
3595 {
3596 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3597 }
3598 break;
3599
3600 default:
3601 ADDR_ASSERT_ALWAYS();
3602 allowedSwModeSet.value = 0;
3603 break;
3604 }
3605
3606 if (pIn->format == ADDR_FMT_32_32_32)
3607 {
3608 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3609 }
3610
3611 if (ElemLib::IsBlockCompressed(pIn->format))
3612 {
3613 if (pIn->flags.texture)
3614 {
3615 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3616 }
3617 else
3618 {
3619 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3620 }
3621 }
3622
3623 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3624 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3625 {
3626 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3627 }
3628
3629 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3630 {
3631 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3632
3633 if (pIn->flags.noMetadata == FALSE)
3634 {
3635 if (pIn->flags.depth &&
3636 pIn->flags.texture &&
3637 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3638 {
3639 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3640 // equation from wrong address within memory range a tile covered and use the
3641 // garbage data for compressed Z reading which finally leads to corruption.
3642 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3643 }
3644
3645 if (m_settings.htileCacheRbConflict &&
3646 (pIn->flags.depth || pIn->flags.stencil) &&
3647 (numSlices > 1) &&
3648 (pIn->flags.metaRbUnaligned == FALSE) &&
3649 (pIn->flags.metaPipeUnaligned == FALSE))
3650 {
3651 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3652 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3653 }
3654 }
3655 }
3656
3657 if (msaa)
3658 {
3659 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3660 }
3661
3662 if ((numFrags > 1) &&
3663 (Size4K < (m_pipeInterleaveBytes * numFrags)))
3664 {
3665 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3666 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3667 }
3668
3669 if (numMipLevels > 1)
3670 {
3671 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3672 }
3673
3674 if (displayRsrc)
3675 {
3676 if (m_settings.isDce12)
3677 {
3678 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3679 }
3680 else if (m_settings.isDcn1)
3681 {
3682 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3683 }
3684 else if (m_settings.isDcn2)
3685 {
3686 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
3687 }
3688 else
3689 {
3690 ADDR_NOT_IMPLEMENTED();
3691 }
3692 }
3693
3694 if (allowedSwModeSet.value != 0)
3695 {
3696 #if DEBUG
3697 // Post sanity check, at least AddrLib should accept the output generated by its own
3698 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3699
3700 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3701 {
3702 if (validateSwModeSet & 1)
3703 {
3704 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3705 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3706 }
3707
3708 validateSwModeSet >>= 1;
3709 }
3710 #endif
3711
3712 pOut->validSwModeSet = allowedSwModeSet;
3713 pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3714 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3715 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3716
3717 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3718
3719 if (pOut->clientPreferredSwSet.value == 0)
3720 {
3721 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3722 }
3723
3724 // Apply optional restrictions
3725 if (pIn->flags.needEquation)
3726 {
3727 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3728 }
3729
3730 if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3731 {
3732 pOut->swizzleMode = ADDR_SW_LINEAR;
3733 }
3734 else
3735 {
3736 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3737
3738 if ((height > 1) && (computeMinSize == FALSE))
3739 {
3740 // Always ignore linear swizzle mode if:
3741 // 1. This is a (2D/3D) resource with height > 1
3742 // 2. Client doesn't require computing minimize size
3743 allowedSwModeSet.swLinear = 0;
3744 }
3745
3746 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3747
3748 // Determine block size if there are 2 or more block type candidates
3749 if (IsPow2(allowedBlockSet.value) == FALSE)
3750 {
3751 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3752
3753 swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3754 swMode[AddrBlockMicro] = ADDR_SW_256B_D;
3755 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D;
3756 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3757
3758 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3759 {
3760 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3761 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3762 }
3763
3764 UINT_64 padSize[AddrBlockMaxTiledType] = {};
3765
3766 const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3767 const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3768 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3769 UINT_32 minSizeBlk = AddrBlockMicro;
3770 UINT_64 minSize = 0;
3771
3772 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3773
3774 for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3775 {
3776 if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3777 {
3778 localIn.swizzleMode = swMode[i];
3779
3780 if (localIn.swizzleMode == ADDR_SW_LINEAR)
3781 {
3782 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3783 }
3784 else
3785 {
3786 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3787 }
3788
3789 if (returnCode == ADDR_OK)
3790 {
3791 padSize[i] = localOut.surfSize;
3792
3793 if ((minSize == 0) ||
3794 BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
3795 {
3796 minSize = padSize[i];
3797 minSizeBlk = i;
3798 }
3799 }
3800 else
3801 {
3802 ADDR_ASSERT_ALWAYS();
3803 break;
3804 }
3805 }
3806 }
3807
3808 if (pIn->memoryBudget > 1.0)
3809 {
3810 // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3811 // smaller-block type again in coming loop
3812 switch (minSizeBlk)
3813 {
3814 case AddrBlockThick64KB:
3815 allowedBlockSet.macroThin64KB = 0;
3816 case AddrBlockThin64KB:
3817 allowedBlockSet.macroThick4KB = 0;
3818 case AddrBlockThick4KB:
3819 allowedBlockSet.macroThin4KB = 0;
3820 case AddrBlockThin4KB:
3821 allowedBlockSet.micro = 0;
3822 case AddrBlockMicro:
3823 allowedBlockSet.linear = 0;
3824 case AddrBlockLinear:
3825 break;
3826
3827 default:
3828 ADDR_ASSERT_ALWAYS();
3829 break;
3830 }
3831
3832 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3833 {
3834 if ((i != minSizeBlk) &&
3835 IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3836 {
3837 if (BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
3838 {
3839 // Clear the block type if the memory waste is unacceptable
3840 allowedBlockSet.value &= ~(1u << (i - 1));
3841 }
3842 }
3843 }
3844
3845 // Remove linear block type if 2 or more block types are allowed
3846 if (IsPow2(allowedBlockSet.value) == FALSE)
3847 {
3848 allowedBlockSet.linear = 0;
3849 }
3850
3851 // Select the biggest allowed block type
3852 minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3853
3854 if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3855 {
3856 minSizeBlk = AddrBlockLinear;
3857 }
3858 }
3859
3860 switch (minSizeBlk)
3861 {
3862 case AddrBlockLinear:
3863 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3864 break;
3865
3866 case AddrBlockMicro:
3867 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3868 allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3869 break;
3870
3871 case AddrBlockThin4KB:
3872 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3873 Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3874 break;
3875
3876 case AddrBlockThick4KB:
3877 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3878 allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3879 break;
3880
3881 case AddrBlockThin64KB:
3882 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3883 Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3884 break;
3885
3886 case AddrBlockThick64KB:
3887 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3888 allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3889 break;
3890
3891 default:
3892 ADDR_ASSERT_ALWAYS();
3893 allowedSwModeSet.value = 0;
3894 break;
3895 }
3896 }
3897
3898 // Block type should be determined.
3899 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3900
3901 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3902
3903 // Determine swizzle type if there are 2 or more swizzle type candidates
3904 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3905 {
3906 if (ElemLib::IsBlockCompressed(pIn->format))
3907 {
3908 if (allowedSwSet.sw_D)
3909 {
3910 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3911 }
3912 else
3913 {
3914 ADDR_ASSERT(allowedSwSet.sw_S);
3915 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3916 }
3917 }
3918 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3919 {
3920 if (allowedSwSet.sw_S)
3921 {
3922 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3923 }
3924 else if (allowedSwSet.sw_D)
3925 {
3926 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3927 }
3928 else
3929 {
3930 ADDR_ASSERT(allowedSwSet.sw_R);
3931 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3932 }
3933 }
3934 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3935 {
3936 if (pIn->flags.color && allowedSwSet.sw_D)
3937 {
3938 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3939 }
3940 else if (allowedSwSet.sw_Z)
3941 {
3942 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3943 }
3944 else
3945 {
3946 ADDR_ASSERT(allowedSwSet.sw_S);
3947 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3948 }
3949 }
3950 else
3951 {
3952 if (pIn->flags.rotated && allowedSwSet.sw_R)
3953 {
3954 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3955 }
3956 else if (allowedSwSet.sw_D)
3957 {
3958 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3959 }
3960 else if (allowedSwSet.sw_S)
3961 {
3962 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3963 }
3964 else
3965 {
3966 ADDR_ASSERT(allowedSwSet.sw_Z);
3967 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3968 }
3969 }
3970
3971 // Swizzle type should be determined.
3972 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3973 }
3974
3975 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3976 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3977 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3978 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3979 }
3980
3981 returnCode = ADDR_OK;
3982 }
3983 else
3984 {
3985 // Invalid combination...
3986 ADDR_ASSERT_ALWAYS();
3987 }
3988 }
3989 else
3990 {
3991 // Invalid combination...
3992 ADDR_ASSERT_ALWAYS();
3993 }
3994
3995 return returnCode;
3996 }
3997
3998 /**
3999 ************************************************************************************************************************
4000 * Gfx9Lib::ComputeStereoInfo
4001 *
4002 * @brief
4003 * Compute height alignment and right eye pipeBankXor for stereo surface
4004 *
4005 * @return
4006 * Error code
4007 *
4008 ************************************************************************************************************************
4009 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 * pHeightAlign) const4010 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
4011 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
4012 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
4013 UINT_32* pHeightAlign
4014 ) const
4015 {
4016 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4017
4018 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
4019
4020 if (eqIndex < m_numEquations)
4021 {
4022 if (IsXor(pIn->swizzleMode))
4023 {
4024 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4025 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
4026 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
4027 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
4028 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
4029 const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
4030
4031 ADDR_ASSERT(maxYCoordBlock256 ==
4032 GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
4033
4034 const UINT_32 maxYCoordInBaseEquation =
4035 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
4036
4037 ADDR_ASSERT(maxYCoordInBaseEquation ==
4038 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
4039
4040 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
4041
4042 ADDR_ASSERT(maxYCoordInPipeXor ==
4043 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
4044
4045 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
4046 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
4047
4048 ADDR_ASSERT(maxYCoordInBankXor ==
4049 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
4050
4051 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
4052
4053 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
4054 {
4055 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
4056
4057 if (pOut->pStereoInfo != NULL)
4058 {
4059 pOut->pStereoInfo->rightSwizzle = 0;
4060
4061 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
4062 {
4063 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
4064 {
4065 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
4066 }
4067
4068 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
4069 {
4070 pOut->pStereoInfo->rightSwizzle |=
4071 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
4072 }
4073
4074 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
4075 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
4076 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
4077 }
4078 }
4079 }
4080 }
4081 }
4082 else
4083 {
4084 ADDR_ASSERT_ALWAYS();
4085 returnCode = ADDR_ERROR;
4086 }
4087
4088 return returnCode;
4089 }
4090
4091 /**
4092 ************************************************************************************************************************
4093 * Gfx9Lib::HwlComputeSurfaceInfoTiled
4094 *
4095 * @brief
4096 * Internal function to calculate alignment for tiled surface
4097 *
4098 * @return
4099 * ADDR_E_RETURNCODE
4100 ************************************************************************************************************************
4101 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4102 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
4103 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4104 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4105 ) const
4106 {
4107 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
4108 &pOut->blockHeight,
4109 &pOut->blockSlices,
4110 pIn->bpp,
4111 pIn->numFrags,
4112 pIn->resourceType,
4113 pIn->swizzleMode);
4114
4115 if (returnCode == ADDR_OK)
4116 {
4117 UINT_32 pitchAlignInElement = pOut->blockWidth;
4118
4119 if ((IsTex2d(pIn->resourceType) == TRUE) &&
4120 (pIn->flags.display || pIn->flags.rotated) &&
4121 (pIn->numMipLevels <= 1) &&
4122 (pIn->numSamples <= 1) &&
4123 (pIn->numFrags <= 1))
4124 {
4125 // Display engine needs pitch align to be at least 32 pixels.
4126 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
4127 }
4128
4129 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4130
4131 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
4132 {
4133 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
4134 {
4135 returnCode = ADDR_INVALIDPARAMS;
4136 }
4137 else if (pIn->pitchInElement < pOut->pitch)
4138 {
4139 returnCode = ADDR_INVALIDPARAMS;
4140 }
4141 else
4142 {
4143 pOut->pitch = pIn->pitchInElement;
4144 }
4145 }
4146
4147 UINT_32 heightAlign = 0;
4148
4149 if (pIn->flags.qbStereo)
4150 {
4151 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
4152 }
4153
4154 if (returnCode == ADDR_OK)
4155 {
4156 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
4157
4158 if (heightAlign > 1)
4159 {
4160 pOut->height = PowTwoAlign(pOut->height, heightAlign);
4161 }
4162
4163 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
4164
4165 pOut->epitchIsHeight = FALSE;
4166 pOut->mipChainInTail = FALSE;
4167 pOut->firstMipIdInTail = pIn->numMipLevels;
4168
4169 pOut->mipChainPitch = pOut->pitch;
4170 pOut->mipChainHeight = pOut->height;
4171 pOut->mipChainSlice = pOut->numSlices;
4172
4173 if (pIn->numMipLevels > 1)
4174 {
4175 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4176 pIn->swizzleMode,
4177 pIn->bpp,
4178 pIn->width,
4179 pIn->height,
4180 pIn->numSlices,
4181 pOut->blockWidth,
4182 pOut->blockHeight,
4183 pOut->blockSlices,
4184 pIn->numMipLevels,
4185 pOut->pMipInfo);
4186
4187 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4188
4189 if (endingMipId == 0)
4190 {
4191 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4192 pIn->swizzleMode,
4193 pOut->blockWidth,
4194 pOut->blockHeight,
4195 pOut->blockSlices);
4196
4197 pOut->epitchIsHeight = TRUE;
4198 pOut->pitch = tailMaxDim.w;
4199 pOut->height = tailMaxDim.h;
4200 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4201 tailMaxDim.d : pIn->numSlices;
4202 pOut->mipChainInTail = TRUE;
4203 }
4204 else
4205 {
4206 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
4207 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4208
4209 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4210 pIn->swizzleMode,
4211 mip0WidthInBlk,
4212 mip0HeightInBlk,
4213 pOut->numSlices / pOut->blockSlices);
4214 if (majorMode == ADDR_MAJOR_Y)
4215 {
4216 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4217
4218 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4219 {
4220 mip1WidthInBlk++;
4221 }
4222
4223 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4224
4225 pOut->epitchIsHeight = FALSE;
4226 }
4227 else
4228 {
4229 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4230
4231 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4232 {
4233 mip1HeightInBlk++;
4234 }
4235
4236 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4237
4238 pOut->epitchIsHeight = TRUE;
4239 }
4240 }
4241
4242 if (pOut->pMipInfo != NULL)
4243 {
4244 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4245
4246 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4247 {
4248 Dim3d mipStartPos = {0};
4249 UINT_32 mipTailOffsetInBytes = 0;
4250
4251 mipStartPos = GetMipStartPos(pIn->resourceType,
4252 pIn->swizzleMode,
4253 pOut->pitch,
4254 pOut->height,
4255 pOut->numSlices,
4256 pOut->blockWidth,
4257 pOut->blockHeight,
4258 pOut->blockSlices,
4259 i,
4260 elementBytesLog2,
4261 &mipTailOffsetInBytes);
4262
4263 UINT_32 pitchInBlock =
4264 pOut->mipChainPitch / pOut->blockWidth;
4265 UINT_32 sliceInBlock =
4266 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4267 UINT_64 blockIndex =
4268 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4269 UINT_64 macroBlockOffset =
4270 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4271
4272 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4273 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
4274 }
4275 }
4276 }
4277 else if (pOut->pMipInfo != NULL)
4278 {
4279 pOut->pMipInfo[0].pitch = pOut->pitch;
4280 pOut->pMipInfo[0].height = pOut->height;
4281 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4282 pOut->pMipInfo[0].offset = 0;
4283 }
4284
4285 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4286 (pIn->bpp >> 3) * pIn->numFrags;
4287 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
4288 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4289
4290 if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4291 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4292 (pIn->flags.texture == TRUE) &&
4293 (pIn->flags.noMetadata == FALSE) &&
4294 (pIn->flags.metaPipeUnaligned == FALSE))
4295 {
4296 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4297 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4298 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4299 // them, which may cause invalid metadata to be fetched.
4300 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4301 }
4302
4303 if (pIn->flags.prt)
4304 {
4305 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4306 }
4307 }
4308 }
4309
4310 return returnCode;
4311 }
4312
4313 /**
4314 ************************************************************************************************************************
4315 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4316 *
4317 * @brief
4318 * Internal function to calculate alignment for linear surface
4319 *
4320 * @return
4321 * ADDR_E_RETURNCODE
4322 ************************************************************************************************************************
4323 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4324 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4325 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4326 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4327 ) const
4328 {
4329 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4330 UINT_32 pitch = 0;
4331 UINT_32 actualHeight = 0;
4332 UINT_32 elementBytes = pIn->bpp >> 3;
4333 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4334
4335 if (IsTex1d(pIn->resourceType))
4336 {
4337 if (pIn->height > 1)
4338 {
4339 returnCode = ADDR_INVALIDPARAMS;
4340 }
4341 else
4342 {
4343 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4344
4345 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4346 actualHeight = pIn->numMipLevels;
4347
4348 if (pIn->flags.prt == FALSE)
4349 {
4350 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4351 &pitch, &actualHeight);
4352 }
4353
4354 if (returnCode == ADDR_OK)
4355 {
4356 if (pOut->pMipInfo != NULL)
4357 {
4358 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4359 {
4360 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4361 pOut->pMipInfo[i].pitch = pitch;
4362 pOut->pMipInfo[i].height = 1;
4363 pOut->pMipInfo[i].depth = 1;
4364 }
4365 }
4366 }
4367 }
4368 }
4369 else
4370 {
4371 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4372 }
4373
4374 if ((pitch == 0) || (actualHeight == 0))
4375 {
4376 returnCode = ADDR_INVALIDPARAMS;
4377 }
4378
4379 if (returnCode == ADDR_OK)
4380 {
4381 pOut->pitch = pitch;
4382 pOut->height = pIn->height;
4383 pOut->numSlices = pIn->numSlices;
4384 pOut->mipChainPitch = pitch;
4385 pOut->mipChainHeight = actualHeight;
4386 pOut->mipChainSlice = pOut->numSlices;
4387 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4388 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4389 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4390 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4391 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4392 pOut->blockHeight = 1;
4393 pOut->blockSlices = 1;
4394 }
4395
4396 // Post calculation validate
4397 ADDR_ASSERT(pOut->sliceSize > 0);
4398
4399 return returnCode;
4400 }
4401
4402 /**
4403 ************************************************************************************************************************
4404 * Gfx9Lib::GetMipChainInfo
4405 *
4406 * @brief
4407 * Internal function to get out information about mip chain
4408 *
4409 * @return
4410 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4411 ************************************************************************************************************************
4412 */
GetMipChainInfo(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 bpp,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 numMipLevel,ADDR2_MIP_INFO * pMipInfo) const4413 UINT_32 Gfx9Lib::GetMipChainInfo(
4414 AddrResourceType resourceType,
4415 AddrSwizzleMode swizzleMode,
4416 UINT_32 bpp,
4417 UINT_32 mip0Width,
4418 UINT_32 mip0Height,
4419 UINT_32 mip0Depth,
4420 UINT_32 blockWidth,
4421 UINT_32 blockHeight,
4422 UINT_32 blockDepth,
4423 UINT_32 numMipLevel,
4424 ADDR2_MIP_INFO* pMipInfo) const
4425 {
4426 const Dim3d tailMaxDim =
4427 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4428
4429 UINT_32 mipPitch = mip0Width;
4430 UINT_32 mipHeight = mip0Height;
4431 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4432 UINT_32 offset = 0;
4433 UINT_32 firstMipIdInTail = numMipLevel;
4434 BOOL_32 inTail = FALSE;
4435 BOOL_32 finalDim = FALSE;
4436 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4437 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4438
4439 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4440 {
4441 if (inTail)
4442 {
4443 if (finalDim == FALSE)
4444 {
4445 UINT_32 mipSize;
4446
4447 if (is3dThick)
4448 {
4449 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4450 }
4451 else
4452 {
4453 mipSize = mipPitch * mipHeight * (bpp >> 3);
4454 }
4455
4456 if (mipSize <= 256)
4457 {
4458 UINT_32 index = Log2(bpp >> 3);
4459
4460 if (is3dThick)
4461 {
4462 mipPitch = Block256_3dZ[index].w;
4463 mipHeight = Block256_3dZ[index].h;
4464 mipDepth = Block256_3dZ[index].d;
4465 }
4466 else
4467 {
4468 mipPitch = Block256_2d[index].w;
4469 mipHeight = Block256_2d[index].h;
4470 }
4471
4472 finalDim = TRUE;
4473 }
4474 }
4475 }
4476 else
4477 {
4478 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4479 mipPitch, mipHeight, mipDepth);
4480
4481 if (inTail)
4482 {
4483 firstMipIdInTail = mipId;
4484 mipPitch = tailMaxDim.w;
4485 mipHeight = tailMaxDim.h;
4486
4487 if (is3dThick)
4488 {
4489 mipDepth = tailMaxDim.d;
4490 }
4491 }
4492 else
4493 {
4494 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4495 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4496
4497 if (is3dThick)
4498 {
4499 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4500 }
4501 }
4502 }
4503
4504 if (pMipInfo != NULL)
4505 {
4506 pMipInfo[mipId].pitch = mipPitch;
4507 pMipInfo[mipId].height = mipHeight;
4508 pMipInfo[mipId].depth = mipDepth;
4509 pMipInfo[mipId].offset = offset;
4510 }
4511
4512 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4513
4514 if (finalDim)
4515 {
4516 if (is3dThin)
4517 {
4518 mipDepth = Max(mipDepth >> 1, 1u);
4519 }
4520 }
4521 else
4522 {
4523 mipPitch = Max(mipPitch >> 1, 1u);
4524 mipHeight = Max(mipHeight >> 1, 1u);
4525
4526 if (is3dThick || is3dThin)
4527 {
4528 mipDepth = Max(mipDepth >> 1, 1u);
4529 }
4530 }
4531 }
4532
4533 return firstMipIdInTail;
4534 }
4535
4536 /**
4537 ************************************************************************************************************************
4538 * Gfx9Lib::GetMetaMiptailInfo
4539 *
4540 * @brief
4541 * Get mip tail coordinate information.
4542 *
4543 * @return
4544 * N/A
4545 ************************************************************************************************************************
4546 */
GetMetaMiptailInfo(ADDR2_META_MIP_INFO * pInfo,Dim3d mipCoord,UINT_32 numMipInTail,Dim3d * pMetaBlkDim) const4547 VOID Gfx9Lib::GetMetaMiptailInfo(
4548 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4549 Dim3d mipCoord, ///< [in] mip tail base coord
4550 UINT_32 numMipInTail, ///< [in] number of mips in tail
4551 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4552 ) const
4553 {
4554 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4555 UINT_32 mipWidth = pMetaBlkDim->w;
4556 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4557 UINT_32 mipDepth = pMetaBlkDim->d;
4558 UINT_32 minInc;
4559
4560 if (isThick)
4561 {
4562 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4563 }
4564 else if (pMetaBlkDim->h >= 1024)
4565 {
4566 minInc = 256;
4567 }
4568 else if (pMetaBlkDim->h == 512)
4569 {
4570 minInc = 128;
4571 }
4572 else
4573 {
4574 minInc = 64;
4575 }
4576
4577 UINT_32 blk32MipId = 0xFFFFFFFF;
4578
4579 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4580 {
4581 pInfo[mip].inMiptail = TRUE;
4582 pInfo[mip].startX = mipCoord.w;
4583 pInfo[mip].startY = mipCoord.h;
4584 pInfo[mip].startZ = mipCoord.d;
4585 pInfo[mip].width = mipWidth;
4586 pInfo[mip].height = mipHeight;
4587 pInfo[mip].depth = mipDepth;
4588
4589 if (mipWidth <= 32)
4590 {
4591 if (blk32MipId == 0xFFFFFFFF)
4592 {
4593 blk32MipId = mip;
4594 }
4595
4596 mipCoord.w = pInfo[blk32MipId].startX;
4597 mipCoord.h = pInfo[blk32MipId].startY;
4598 mipCoord.d = pInfo[blk32MipId].startZ;
4599
4600 switch (mip - blk32MipId)
4601 {
4602 case 0:
4603 mipCoord.w += 32; // 16x16
4604 break;
4605 case 1:
4606 mipCoord.h += 32; // 8x8
4607 break;
4608 case 2:
4609 mipCoord.h += 32; // 4x4
4610 mipCoord.w += 16;
4611 break;
4612 case 3:
4613 mipCoord.h += 32; // 2x2
4614 mipCoord.w += 32;
4615 break;
4616 case 4:
4617 mipCoord.h += 32; // 1x1
4618 mipCoord.w += 48;
4619 break;
4620 // The following are for BC/ASTC formats
4621 case 5:
4622 mipCoord.h += 48; // 1/2 x 1/2
4623 break;
4624 case 6:
4625 mipCoord.h += 48; // 1/4 x 1/4
4626 mipCoord.w += 16;
4627 break;
4628 case 7:
4629 mipCoord.h += 48; // 1/8 x 1/8
4630 mipCoord.w += 32;
4631 break;
4632 case 8:
4633 mipCoord.h += 48; // 1/16 x 1/16
4634 mipCoord.w += 48;
4635 break;
4636 default:
4637 ADDR_ASSERT_ALWAYS();
4638 break;
4639 }
4640
4641 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4642 mipHeight = mipWidth;
4643
4644 if (isThick)
4645 {
4646 mipDepth = mipWidth;
4647 }
4648 }
4649 else
4650 {
4651 if (mipWidth <= minInc)
4652 {
4653 // if we're below the minimal increment...
4654 if (isThick)
4655 {
4656 // For 3d, just go in z direction
4657 mipCoord.d += mipDepth;
4658 }
4659 else
4660 {
4661 // For 2d, first go across, then down
4662 if ((mipWidth * 2) == minInc)
4663 {
4664 // if we're 2 mips below, that's when we go back in x, and down in y
4665 mipCoord.w -= minInc;
4666 mipCoord.h += minInc;
4667 }
4668 else
4669 {
4670 // otherwise, just go across in x
4671 mipCoord.w += minInc;
4672 }
4673 }
4674 }
4675 else
4676 {
4677 // On even mip, go down, otherwise, go across
4678 if (mip & 1)
4679 {
4680 mipCoord.w += mipWidth;
4681 }
4682 else
4683 {
4684 mipCoord.h += mipHeight;
4685 }
4686 }
4687 // Divide the width by 2
4688 mipWidth >>= 1;
4689 // After the first mip in tail, the mip is always a square
4690 mipHeight = mipWidth;
4691 // ...or for 3d, a cube
4692 if (isThick)
4693 {
4694 mipDepth = mipWidth;
4695 }
4696 }
4697 }
4698 }
4699
4700 /**
4701 ************************************************************************************************************************
4702 * Gfx9Lib::GetMipStartPos
4703 *
4704 * @brief
4705 * Internal function to get out information about mip logical start position
4706 *
4707 * @return
4708 * logical start position in macro block width/heith/depth of one mip level within one slice
4709 ************************************************************************************************************************
4710 */
GetMipStartPos(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 width,UINT_32 height,UINT_32 depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 mipId,UINT_32 log2ElementBytes,UINT_32 * pMipTailBytesOffset) const4711 Dim3d Gfx9Lib::GetMipStartPos(
4712 AddrResourceType resourceType,
4713 AddrSwizzleMode swizzleMode,
4714 UINT_32 width,
4715 UINT_32 height,
4716 UINT_32 depth,
4717 UINT_32 blockWidth,
4718 UINT_32 blockHeight,
4719 UINT_32 blockDepth,
4720 UINT_32 mipId,
4721 UINT_32 log2ElementBytes,
4722 UINT_32* pMipTailBytesOffset) const
4723 {
4724 Dim3d mipStartPos = {0};
4725 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4726
4727 // Report mip in tail if Mip0 is already in mip tail
4728 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4729 UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
4730 UINT_32 mipIndexInTail = mipId;
4731
4732 if (inMipTail == FALSE)
4733 {
4734 // Mip 0 dimension, unit in block
4735 UINT_32 mipWidthInBlk = width / blockWidth;
4736 UINT_32 mipHeightInBlk = height / blockHeight;
4737 UINT_32 mipDepthInBlk = depth / blockDepth;
4738 AddrMajorMode majorMode = GetMajorMode(resourceType,
4739 swizzleMode,
4740 mipWidthInBlk,
4741 mipHeightInBlk,
4742 mipDepthInBlk);
4743
4744 UINT_32 endingMip = mipId + 1;
4745
4746 for (UINT_32 i = 1; i <= mipId; i++)
4747 {
4748 if ((i == 1) || (i == 3))
4749 {
4750 if (majorMode == ADDR_MAJOR_Y)
4751 {
4752 mipStartPos.w += mipWidthInBlk;
4753 }
4754 else
4755 {
4756 mipStartPos.h += mipHeightInBlk;
4757 }
4758 }
4759 else
4760 {
4761 if (majorMode == ADDR_MAJOR_X)
4762 {
4763 mipStartPos.w += mipWidthInBlk;
4764 }
4765 else if (majorMode == ADDR_MAJOR_Y)
4766 {
4767 mipStartPos.h += mipHeightInBlk;
4768 }
4769 else
4770 {
4771 mipStartPos.d += mipDepthInBlk;
4772 }
4773 }
4774
4775 BOOL_32 inTail = FALSE;
4776
4777 if (IsThick(resourceType, swizzleMode))
4778 {
4779 UINT_32 dim = log2BlkSize % 3;
4780
4781 if (dim == 0)
4782 {
4783 inTail =
4784 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4785 }
4786 else if (dim == 1)
4787 {
4788 inTail =
4789 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4790 }
4791 else
4792 {
4793 inTail =
4794 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4795 }
4796 }
4797 else
4798 {
4799 if (log2BlkSize & 1)
4800 {
4801 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4802 }
4803 else
4804 {
4805 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4806 }
4807 }
4808
4809 if (inTail)
4810 {
4811 endingMip = i;
4812 break;
4813 }
4814
4815 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4816 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4817 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4818 }
4819
4820 if (mipId >= endingMip)
4821 {
4822 inMipTail = TRUE;
4823 mipIndexInTail = mipId - endingMip;
4824 }
4825 }
4826
4827 if (inMipTail)
4828 {
4829 UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4830 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4831 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4832 }
4833
4834 return mipStartPos;
4835 }
4836
4837 /**
4838 ************************************************************************************************************************
4839 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4840 *
4841 * @brief
4842 * Internal function to calculate address from coord for tiled swizzle surface
4843 *
4844 * @return
4845 * ADDR_E_RETURNCODE
4846 ************************************************************************************************************************
4847 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4848 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4849 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4850 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4851 ) const
4852 {
4853 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4854 localIn.swizzleMode = pIn->swizzleMode;
4855 localIn.flags = pIn->flags;
4856 localIn.resourceType = pIn->resourceType;
4857 localIn.bpp = pIn->bpp;
4858 localIn.width = Max(pIn->unalignedWidth, 1u);
4859 localIn.height = Max(pIn->unalignedHeight, 1u);
4860 localIn.numSlices = Max(pIn->numSlices, 1u);
4861 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4862 localIn.numSamples = Max(pIn->numSamples, 1u);
4863 localIn.numFrags = Max(pIn->numFrags, 1u);
4864 if (localIn.numMipLevels <= 1)
4865 {
4866 localIn.pitchInElement = pIn->pitchInElement;
4867 }
4868
4869 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4870 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4871
4872 BOOL_32 valid = (returnCode == ADDR_OK) &&
4873 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4874 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4875 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4876
4877 if (valid)
4878 {
4879 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4880 Dim3d mipStartPos = {0};
4881 UINT_32 mipTailBytesOffset = 0;
4882
4883 if (pIn->numMipLevels > 1)
4884 {
4885 // Mip-map chain cannot be MSAA surface
4886 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4887
4888 mipStartPos = GetMipStartPos(pIn->resourceType,
4889 pIn->swizzleMode,
4890 localOut.pitch,
4891 localOut.height,
4892 localOut.numSlices,
4893 localOut.blockWidth,
4894 localOut.blockHeight,
4895 localOut.blockSlices,
4896 pIn->mipId,
4897 log2ElementBytes,
4898 &mipTailBytesOffset);
4899 }
4900
4901 UINT_32 interleaveOffset = 0;
4902 UINT_32 pipeBits = 0;
4903 UINT_32 pipeXor = 0;
4904 UINT_32 bankBits = 0;
4905 UINT_32 bankXor = 0;
4906
4907 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4908 {
4909 UINT_32 blockOffset = 0;
4910 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4911
4912 if (IsZOrderSwizzle(pIn->swizzleMode))
4913 {
4914 // Morton generation
4915 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4916 {
4917 UINT_32 totalLowBits = 6 - log2ElementBytes;
4918 UINT_32 mortBits = totalLowBits / 2;
4919 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4920 // Are 9 bits enough?
4921 UINT_32 highBitsValue =
4922 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4923 blockOffset = lowBitsValue | highBitsValue;
4924 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4925 }
4926 else
4927 {
4928 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4929 }
4930
4931 // Fill LSBs with sample bits
4932 if (pIn->numSamples > 1)
4933 {
4934 blockOffset *= pIn->numSamples;
4935 blockOffset |= pIn->sample;
4936 }
4937
4938 // Shift according to BytesPP
4939 blockOffset <<= log2ElementBytes;
4940 }
4941 else
4942 {
4943 // Micro block offset
4944 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4945 blockOffset = microBlockOffset;
4946
4947 // Micro block dimension
4948 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4949 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4950 // Morton generation, does 12 bit enough?
4951 blockOffset |=
4952 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4953
4954 // Sample bits start location
4955 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4956 // Join sample bits information to the highest Macro block bits
4957 if (IsNonPrtXor(pIn->swizzleMode))
4958 {
4959 // Non-prt-Xor : xor highest Macro block bits with sample bits
4960 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4961 }
4962 else
4963 {
4964 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4965 // after this op, the blockOffset only contains log2 Macro block size bits
4966 blockOffset %= (1 << sampleStart);
4967 blockOffset |= (pIn->sample << sampleStart);
4968 ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4969 }
4970 }
4971
4972 if (IsXor(pIn->swizzleMode))
4973 {
4974 // Mask off bits above Macro block bits to keep page synonyms working for prt
4975 if (IsPrt(pIn->swizzleMode))
4976 {
4977 blockOffset &= ((1 << log2BlkSize) - 1);
4978 }
4979
4980 // Preserve offset inside pipe interleave
4981 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4982 blockOffset >>= m_pipeInterleaveLog2;
4983
4984 // Pipe/Se xor bits
4985 pipeBits = GetPipeXorBits(log2BlkSize);
4986 // Pipe xor
4987 pipeXor = FoldXor2d(blockOffset, pipeBits);
4988 blockOffset >>= pipeBits;
4989
4990 // Bank xor bits
4991 bankBits = GetBankXorBits(log2BlkSize);
4992 // Bank Xor
4993 bankXor = FoldXor2d(blockOffset, bankBits);
4994 blockOffset >>= bankBits;
4995
4996 // Put all the part back together
4997 blockOffset <<= bankBits;
4998 blockOffset |= bankXor;
4999 blockOffset <<= pipeBits;
5000 blockOffset |= pipeXor;
5001 blockOffset <<= m_pipeInterleaveLog2;
5002 blockOffset |= interleaveOffset;
5003 }
5004
5005 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
5006 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
5007
5008 blockOffset |= mipTailBytesOffset;
5009
5010 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
5011 {
5012 // Apply slice xor if not MSAA/PRT
5013 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
5014 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
5015 (m_pipeInterleaveLog2 + pipeBits));
5016 }
5017
5018 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5019 bankBits, pipeBits, &blockOffset);
5020
5021 blockOffset %= (1 << log2BlkSize);
5022
5023 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
5024 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
5025 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
5026 UINT_64 macroBlockIndex =
5027 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
5028 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
5029 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
5030
5031 pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
5032 }
5033 else
5034 {
5035 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
5036
5037 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
5038
5039 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
5040 (pIn->y / microBlockDim.h),
5041 (pIn->slice / microBlockDim.d),
5042 8);
5043
5044 blockOffset <<= 10;
5045 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
5046
5047 if (IsXor(pIn->swizzleMode))
5048 {
5049 // Mask off bits above Macro block bits to keep page synonyms working for prt
5050 if (IsPrt(pIn->swizzleMode))
5051 {
5052 blockOffset &= ((1 << log2BlkSize) - 1);
5053 }
5054
5055 // Preserve offset inside pipe interleave
5056 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
5057 blockOffset >>= m_pipeInterleaveLog2;
5058
5059 // Pipe/Se xor bits
5060 pipeBits = GetPipeXorBits(log2BlkSize);
5061 // Pipe xor
5062 pipeXor = FoldXor3d(blockOffset, pipeBits);
5063 blockOffset >>= pipeBits;
5064
5065 // Bank xor bits
5066 bankBits = GetBankXorBits(log2BlkSize);
5067 // Bank Xor
5068 bankXor = FoldXor3d(blockOffset, bankBits);
5069 blockOffset >>= bankBits;
5070
5071 // Put all the part back together
5072 blockOffset <<= bankBits;
5073 blockOffset |= bankXor;
5074 blockOffset <<= pipeBits;
5075 blockOffset |= pipeXor;
5076 blockOffset <<= m_pipeInterleaveLog2;
5077 blockOffset |= interleaveOffset;
5078 }
5079
5080 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
5081 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
5082 blockOffset |= mipTailBytesOffset;
5083
5084 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5085 bankBits, pipeBits, &blockOffset);
5086
5087 blockOffset %= (1 << log2BlkSize);
5088
5089 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
5090 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
5091 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
5092
5093 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
5094 UINT_32 sliceSizeInBlock =
5095 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
5096 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
5097
5098 pOut->addr = blockOffset | (blockIndex << log2BlkSize);
5099 }
5100 }
5101 else
5102 {
5103 returnCode = ADDR_INVALIDPARAMS;
5104 }
5105
5106 return returnCode;
5107 }
5108
5109 /**
5110 ************************************************************************************************************************
5111 * Gfx9Lib::ComputeSurfaceInfoLinear
5112 *
5113 * @brief
5114 * Internal function to calculate padding for linear swizzle 2D/3D surface
5115 *
5116 * @return
5117 * N/A
5118 ************************************************************************************************************************
5119 */
ComputeSurfaceLinearPadding(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pMipmap0PaddedWidth,UINT_32 * pSlice0PaddedHeight,ADDR2_MIP_INFO * pMipInfo) const5120 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
5121 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
5122 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
5123 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
5124 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
5125 ) const
5126 {
5127 ADDR_E_RETURNCODE returnCode = ADDR_OK;
5128
5129 UINT_32 elementBytes = pIn->bpp >> 3;
5130 UINT_32 pitchAlignInElement = 0;
5131
5132 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
5133 {
5134 ADDR_ASSERT(pIn->numMipLevels <= 1);
5135 ADDR_ASSERT(pIn->numSlices <= 1);
5136 pitchAlignInElement = 1;
5137 }
5138 else
5139 {
5140 pitchAlignInElement = (256 / elementBytes);
5141 }
5142
5143 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
5144 UINT_32 slice0PaddedHeight = pIn->height;
5145
5146 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
5147 &mipChainWidth, &slice0PaddedHeight);
5148
5149 if (returnCode == ADDR_OK)
5150 {
5151 UINT_32 mipChainHeight = 0;
5152 UINT_32 mipHeight = pIn->height;
5153 UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
5154
5155 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
5156 {
5157 if (pMipInfo != NULL)
5158 {
5159 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
5160 pMipInfo[i].pitch = mipChainWidth;
5161 pMipInfo[i].height = mipHeight;
5162 pMipInfo[i].depth = mipDepth;
5163 }
5164
5165 mipChainHeight += mipHeight;
5166 mipHeight = RoundHalf(mipHeight);
5167 mipHeight = Max(mipHeight, 1u);
5168 }
5169
5170 *pMipmap0PaddedWidth = mipChainWidth;
5171 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
5172 }
5173
5174 return returnCode;
5175 }
5176
5177 /**
5178 ************************************************************************************************************************
5179 * Gfx9Lib::ComputeThinBlockDimension
5180 *
5181 * @brief
5182 * Internal function to get thin block width/height/depth in element from surface input params.
5183 *
5184 * @return
5185 * N/A
5186 ************************************************************************************************************************
5187 */
ComputeThinBlockDimension(UINT_32 * pWidth,UINT_32 * pHeight,UINT_32 * pDepth,UINT_32 bpp,UINT_32 numSamples,AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const5188 VOID Gfx9Lib::ComputeThinBlockDimension(
5189 UINT_32* pWidth,
5190 UINT_32* pHeight,
5191 UINT_32* pDepth,
5192 UINT_32 bpp,
5193 UINT_32 numSamples,
5194 AddrResourceType resourceType,
5195 AddrSwizzleMode swizzleMode) const
5196 {
5197 ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5198
5199 const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
5200 const UINT_32 eleBytes = bpp >> 3;
5201 const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5202 const UINT_32 log2blkSizeIn256B = log2BlkSize - 8;
5203 const UINT_32 widthAmp = log2blkSizeIn256B / 2;
5204 const UINT_32 heightAmp = log2blkSizeIn256B - widthAmp;
5205
5206 ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5207
5208 *pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5209 *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5210 *pDepth = 1;
5211
5212 if (numSamples > 1)
5213 {
5214 const UINT_32 log2sample = Log2(numSamples);
5215 const UINT_32 q = log2sample >> 1;
5216 const UINT_32 r = log2sample & 1;
5217
5218 if (log2BlkSize & 1)
5219 {
5220 *pWidth >>= q;
5221 *pHeight >>= (q + r);
5222 }
5223 else
5224 {
5225 *pWidth >>= (q + r);
5226 *pHeight >>= q;
5227 }
5228 }
5229 }
5230
5231 } // V2
5232 } // Addr
5233