• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file TilingFunctions.h
24 *
25 * @brief Tiling functions.
26 *
27 ******************************************************************************/
28 #pragma once
29 
30 #include "core/state.h"
31 #include "core/format_traits.h"
32 #include "memory/tilingtraits.h"
33 #include "memory/SurfaceState.h"
34 
35 #include <algorithm>
36 
37 #define MAX_NUM_LOD 15
38 
39 #define GFX_ALIGN(x, a) (((x) + ((a) - 1)) - (((x) + ((a) - 1)) & ((a) - 1))) // Alt implementation with bitwise not (~) has issue with uint32 align used with 64-bit value, since ~'ed value will remain 32-bit.
40 
41 //////////////////////////////////////////////////////////////////////////
42 /// SimdTile SSE(2x2), AVX(4x2), or AVX-512(4x4?)
43 //////////////////////////////////////////////////////////////////////////
44 template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
45 struct SimdTile
46 {
47     // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
48     float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD_WIDTH];
49 
50     //////////////////////////////////////////////////////////////////////////
51     /// @brief Retrieve color from simd.
52     /// @param index - linear index to color within simd.
53     /// @param outputColor - output color
GetSwizzledColorSimdTile54     INLINE void GetSwizzledColor(
55         uint32_t index,
56         float outputColor[4])
57     {
58         // SOA pattern for 2x2 is a subset of 4x2.
59         //   0 1 4 5
60         //   2 3 6 7
61         // The offset converts pattern to linear
62 #if (SIMD_TILE_X_DIM == 4)
63         static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
64 #elif (SIMD_TILE_X_DIM == 2)
65         static const uint32_t offset[] = { 0, 1, 2, 3 };
66 #endif
67 
68         for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
69         {
70             outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
71         }
72     }
73 
74     //////////////////////////////////////////////////////////////////////////
75     /// @brief Retrieve color from simd.
76     /// @param index - linear index to color within simd.
77     /// @param outputColor - output color
SetSwizzledColorSimdTile78     INLINE void SetSwizzledColor(
79         uint32_t index,
80         const float src[4])
81     {
82         // SOA pattern for 2x2 is a subset of 4x2.
83         //   0 1 4 5
84         //   2 3 6 7
85         // The offset converts pattern to linear
86 #if (SIMD_TILE_X_DIM == 4)
87         static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
88 #elif (SIMD_TILE_X_DIM == 2)
89         static const uint32_t offset[] = { 0, 1, 2, 3 };
90 #endif
91 
92         // Only loop over the components needed for destination.
93         for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
94         {
95             this->color[i][offset[index]] = src[i];
96         }
97     }
98 };
99 
100 template<>
101 struct SimdTile <R8_UINT,R8_UINT>
102 {
103     // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
104     uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD_WIDTH];
105 
106     //////////////////////////////////////////////////////////////////////////
107     /// @brief Retrieve color from simd.
108     /// @param index - linear index to color within simd.
109     /// @param outputColor - output color
110     INLINE void GetSwizzledColor(
111         uint32_t index,
112         float outputColor[4])
113     {
114         // SOA pattern for 2x2 is a subset of 4x2.
115         //   0 1 4 5
116         //   2 3 6 7
117         // The offset converts pattern to linear
118 #if (SIMD_TILE_X_DIM == 4)
119         static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
120 #elif (SIMD_TILE_X_DIM == 2)
121         static const uint32_t offset[] = { 0, 1, 2, 3 };
122 #endif
123 
124         for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
125         {
126             uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
127             outputColor[i] = *(float*)&src;
128         }
129     }
130 
131     //////////////////////////////////////////////////////////////////////////
132     /// @brief Retrieve color from simd.
133     /// @param index - linear index to color within simd.
134     /// @param outputColor - output color
135     INLINE void SetSwizzledColor(
136         uint32_t index,
137         const float src[4])
138     {
139         // SOA pattern for 2x2 is a subset of 4x2.
140         //   0 1 4 5
141         //   2 3 6 7
142         // The offset converts pattern to linear
143 #if (SIMD_TILE_X_DIM == 4)
144         static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
145 #elif (SIMD_TILE_X_DIM == 2)
146         static const uint32_t offset[] = { 0, 1, 2, 3 };
147 #endif
148 
149         // Only loop over the components needed for destination.
150         for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
151         {
152             this->color[i][offset[index]] = *(uint8_t*)&src[i];
153         }
154     }
155 };
156 
157 //////////////////////////////////////////////////////////////////////////
158 /// SimdTile 8x2 for AVX-512
159 //////////////////////////////////////////////////////////////////////////
160 
161 template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
162 struct SimdTile_16
163 {
164     // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
165     float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD16_WIDTH];
166 
167     //////////////////////////////////////////////////////////////////////////
168     /// @brief Retrieve color from simd.
169     /// @param index - linear index to color within simd.
170     /// @param outputColor - output color
171     INLINE void GetSwizzledColor(
172         uint32_t index,
173         float outputColor[4])
174     {
175         // SOA pattern for 8x2..
176         //   0 1 4 5 8 9 C D
177         //   2 3 6 7 A B E F
178         // The offset converts pattern to linear
179         static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
180 
181         for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
182         {
183             outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
184         }
185     }
186 
187     //////////////////////////////////////////////////////////////////////////
188     /// @brief Retrieve color from simd.
189     /// @param index - linear index to color within simd.
190     /// @param outputColor - output color
191     INLINE void SetSwizzledColor(
192         uint32_t index,
193         const float src[4])
194     {
195         // SOA pattern for 8x2..
196         //   0 1 4 5 8 9 C D
197         //   2 3 6 7 A B E F
198         // The offset converts pattern to linear
199         static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
200 
201         for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
202         {
203             this->color[i][offset[index]] = src[i];
204         }
205     }
206 };
207 
208 template<>
209 struct SimdTile_16 <R8_UINT, R8_UINT>
210 {
211     // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
212     uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD16_WIDTH];
213 
214     //////////////////////////////////////////////////////////////////////////
215     /// @brief Retrieve color from simd.
216     /// @param index - linear index to color within simd.
217     /// @param outputColor - output color
218     INLINE void GetSwizzledColor(
219         uint32_t index,
220         float outputColor[4])
221     {
222         // SOA pattern for 8x2..
223         //   0 1 4 5 8 9 C D
224         //   2 3 6 7 A B E F
225         // The offset converts pattern to linear
226         static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
227 
228         for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
229         {
230             uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
231             outputColor[i] = *(float*)&src;
232         }
233     }
234 
235     //////////////////////////////////////////////////////////////////////////
236     /// @brief Retrieve color from simd.
237     /// @param index - linear index to color within simd.
238     /// @param outputColor - output color
239     INLINE void SetSwizzledColor(
240         uint32_t index,
241         const float src[4])
242     {
243         // SOA pattern for 8x2..
244         //   0 1 4 5 8 9 C D
245         //   2 3 6 7 A B E F
246         // The offset converts pattern to linear
247         static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
248 
249         for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
250         {
251             this->color[i][offset[index]] = *(uint8_t*)&src[i];
252         }
253     }
254 };
255 
256 //////////////////////////////////////////////////////////////////////////
257 /// @brief Computes lod offset for 1D surface at specified lod.
258 /// @param baseWidth - width of basemip (mip 0).
259 /// @param hAlign - horizontal alignment per miip, in texels
260 /// @param lod - lod index
261 /// @param offset - output offset.
262 INLINE void ComputeLODOffset1D(
263     const SWR_FORMAT_INFO& info,
264     uint32_t baseWidth,
265     uint32_t hAlign,
266     uint32_t lod,
267     uint32_t &offset)
268 {
269     if (lod == 0)
270     {
271         offset = 0;
272     }
273     else
274     {
275         uint32_t curWidth = baseWidth;
276         // @note hAlign is already in blocks for compressed formats so upconvert
277         //       so that we have the desired alignment post-divide.
278         if (info.isBC)
279         {
280             hAlign *= info.bcWidth;
281         }
282 
283         offset = GFX_ALIGN(curWidth, hAlign);
284         for (uint32_t l = 1; l < lod; ++l)
285         {
286             curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
287             offset += GFX_ALIGN(curWidth, hAlign);
288         }
289 
290         if (info.isSubsampled || info.isBC)
291         {
292             offset /= info.bcWidth;
293         }
294     }
295 }
296 
297 //////////////////////////////////////////////////////////////////////////
298 /// @brief Computes x lod offset for 2D surface at specified lod.
299 /// @param baseWidth - width of basemip (mip 0).
300 /// @param hAlign - horizontal alignment per mip, in texels
301 /// @param lod - lod index
302 /// @param offset - output offset.
303 INLINE void ComputeLODOffsetX(
304     const SWR_FORMAT_INFO& info,
305     uint32_t baseWidth,
306     uint32_t hAlign,
307     uint32_t lod,
308     uint32_t &offset)
309 {
310     if (lod < 2)
311     {
312         offset = 0;
313     }
314     else
315     {
316         uint32_t curWidth = baseWidth;
317         // @note hAlign is already in blocks for compressed formats so upconvert
318         //       so that we have the desired alignment post-divide.
319         if (info.isBC)
320         {
321             hAlign *= info.bcWidth;
322         }
323 
324         curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
325         curWidth = GFX_ALIGN(curWidth, hAlign);
326 
327         if (info.isSubsampled || info.isBC)
328         {
329             curWidth /= info.bcWidth;
330         }
331 
332         offset = curWidth;
333     }
334 }
335 
336 //////////////////////////////////////////////////////////////////////////
337 /// @brief Computes y lod offset for 2D surface at specified lod.
338 /// @param baseWidth - width of basemip (mip 0).
339 /// @param vAlign - vertical alignment per mip, in rows
340 /// @param lod - lod index
341 /// @param offset - output offset.
342 INLINE void ComputeLODOffsetY(
343     const SWR_FORMAT_INFO& info,
344     uint32_t baseHeight,
345     uint32_t vAlign,
346     uint32_t lod,
347     uint32_t &offset)
348 {
349     if (lod == 0)
350     {
351         offset = 0;
352     }
353     else
354     {
355         offset = 0;
356         uint32_t mipHeight = baseHeight;
357 
358         // @note vAlign is already in blocks for compressed formats so upconvert
359         //       so that we have the desired alignment post-divide.
360         if (info.isBC)
361         {
362             vAlign *= info.bcHeight;
363         }
364 
365         for (uint32_t l = 1; l <= lod; ++l)
366         {
367             uint32_t alignedMipHeight = GFX_ALIGN(mipHeight, vAlign);
368             offset += ((l != 2) ? alignedMipHeight : 0);
369             mipHeight = std::max<uint32_t>(mipHeight >> 1, 1U);
370         }
371 
372         if (info.isBC)
373         {
374             offset /= info.bcHeight;
375         }
376     }
377 }
378 
379 //////////////////////////////////////////////////////////////////////////
380 /// @brief Computes 1D surface offset
381 /// @param x - offset from start of array slice at given lod.
382 /// @param array - array slice index
383 /// @param lod - lod index
384 /// @param pState - surface state
385 /// @param xOffsetBytes - output offset in bytes.
386 template<bool UseCachedOffsets>
387 INLINE void ComputeSurfaceOffset1D(
388     uint32_t x,
389     uint32_t array,
390     uint32_t lod,
391     const SWR_SURFACE_STATE *pState,
392     uint32_t &xOffsetBytes)
393 {
394     const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
395     uint32_t lodOffset;
396 
397     if (UseCachedOffsets)
398     {
399         lodOffset = pState->lodOffsets[0][lod];
400     }
401     else
402     {
403         ComputeLODOffset1D(info, pState->width, pState->halign, lod, lodOffset);
404     }
405 
406     xOffsetBytes = (array * pState->qpitch + lodOffset + x) * info.Bpp;
407 }
408 
409 //////////////////////////////////////////////////////////////////////////
410 /// @brief Adjusts the array slice for legacy TileY MSAA
411 /// @param pState - surface state
412 /// @param array - array slice index
413 /// @param sampleNum - requested sample
414 INLINE void AdjustCoordsForMSAA(const SWR_SURFACE_STATE *pState, uint32_t& x, uint32_t& y, uint32_t& arrayIndex, uint32_t sampleNum)
415 {
416     /// @todo: might want to templatize adjusting for sample slices when we support tileYS/tileYF.
417     if((pState->tileMode == SWR_TILE_MODE_YMAJOR ||
418         pState->tileMode == SWR_TILE_MODE_WMAJOR) &&
419        pState->bInterleavedSamples)
420     {
421         uint32_t newX, newY, newSampleX, newSampleY;
422         switch(pState->numSamples)
423         {
424         case 1:
425             newX = x;
426             newY = y;
427             newSampleX = newSampleY = 0;
428             break;
429         case 2:
430         {
431             assert(pState->type == SURFACE_2D);
432             static const uint32_t xMask = 0xFFFFFFFD;
433             static const uint32_t sampleMaskX = 0x1;
434             newX = pdep_u32(x, xMask);
435             newY = y;
436             newSampleX = pext_u32(sampleNum, sampleMaskX);
437             newSampleY = 0;
438         }
439             break;
440         case 4:
441         {
442             assert(pState->type == SURFACE_2D);
443             static const uint32_t mask = 0xFFFFFFFD;
444             static const uint32_t sampleMaskX = 0x1;
445             static const uint32_t sampleMaskY = 0x2;
446             newX = pdep_u32(x, mask);
447             newY = pdep_u32(y, mask);
448             newSampleX = pext_u32(sampleNum, sampleMaskX);
449             newSampleY = pext_u32(sampleNum, sampleMaskY);
450         }
451             break;
452         case 8:
453         {
454             assert(pState->type == SURFACE_2D);
455             static const uint32_t xMask = 0xFFFFFFF9;
456             static const uint32_t yMask = 0xFFFFFFFD;
457             static const uint32_t sampleMaskX = 0x5;
458             static const uint32_t sampleMaskY = 0x2;
459             newX = pdep_u32(x, xMask);
460             newY = pdep_u32(y, yMask);
461             newSampleX = pext_u32(sampleNum, sampleMaskX);
462             newSampleY = pext_u32(sampleNum, sampleMaskY);
463         }
464             break;
465         case 16:
466         {
467             assert(pState->type == SURFACE_2D);
468             static const uint32_t mask = 0xFFFFFFF9;
469             static const uint32_t sampleMaskX = 0x5;
470             static const uint32_t sampleMaskY = 0xA;
471             newX = pdep_u32(x, mask);
472             newY = pdep_u32(y, mask);
473             newSampleX = pext_u32(sampleNum, sampleMaskX);
474             newSampleY = pext_u32(sampleNum, sampleMaskY);
475         }
476             break;
477         default:
478             assert(0 && "Unsupported sample count");
479             newX = newY = 0;
480             newSampleX = newSampleY = 0;
481             break;
482         }
483         x = newX | (newSampleX << 1);
484         y = newY | (newSampleY << 1);
485     }
486     else if(pState->tileMode == SWR_TILE_MODE_YMAJOR ||
487             pState->tileMode == SWR_TILE_NONE)
488     {
489         uint32_t sampleShift;
490         switch(pState->numSamples)
491         {
492         case 1:
493             assert(sampleNum == 0);
494             sampleShift = 0;
495             break;
496         case 2:
497             assert(pState->type == SURFACE_2D);
498             sampleShift = 1;
499             break;
500         case 4:
501             assert(pState->type == SURFACE_2D);
502             sampleShift = 2;
503             break;
504         case 8:
505             assert(pState->type == SURFACE_2D);
506             sampleShift = 3;
507             break;
508         case 16:
509             assert(pState->type == SURFACE_2D);
510             sampleShift = 4;
511             break;
512         default:
513             assert(0 && "Unsupported sample count");
514             sampleShift = 0;
515             break;
516         }
517         arrayIndex = (arrayIndex << sampleShift) | sampleNum;
518     }
519 }
520 
521 //////////////////////////////////////////////////////////////////////////
522 /// @brief Computes 2D surface offset
523 /// @param x - horizontal offset from start of array slice and lod.
524 /// @param y - vertical offset from start of array slice and lod.
525 /// @param array - array slice index
526 /// @param lod - lod index
527 /// @param pState - surface state
528 /// @param xOffsetBytes - output x offset in bytes.
529 /// @param yOffsetRows - output y offset in bytes.
530 template<bool UseCachedOffsets>
531 INLINE void ComputeSurfaceOffset2D(uint32_t x, uint32_t y, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows)
532 {
533     const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
534     uint32_t lodOffsetX, lodOffsetY;
535 
536     if (UseCachedOffsets)
537     {
538         lodOffsetX = pState->lodOffsets[0][lod];
539         lodOffsetY = pState->lodOffsets[1][lod];
540     }
541     else
542     {
543         ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
544         ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
545     }
546 
547     AdjustCoordsForMSAA(pState, x, y, array, sampleNum);
548     xOffsetBytes = (x + lodOffsetX + pState->xOffset) * info.Bpp;
549     yOffsetRows = (array * pState->qpitch) + lodOffsetY + y + pState->yOffset;
550 }
551 
552 //////////////////////////////////////////////////////////////////////////
553 /// @brief Computes 3D surface offset
554 /// @param x - horizontal offset from start of array slice and lod.
555 /// @param y - vertical offset from start of array slice and lod.
556 /// @param z - depth offset from start of array slice and lod.
557 /// @param lod - lod index
558 /// @param pState - surface state
559 /// @param xOffsetBytes - output x offset in bytes.
560 /// @param yOffsetRows - output y offset in rows.
561 /// @param zOffsetSlices - output y offset in slices.
562 template<bool UseCachedOffsets>
563 INLINE void ComputeSurfaceOffset3D(uint32_t x, uint32_t y, uint32_t z, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows, uint32_t &zOffsetSlices)
564 {
565     const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
566     uint32_t lodOffsetX, lodOffsetY;
567 
568     if (UseCachedOffsets)
569     {
570         lodOffsetX = pState->lodOffsets[0][lod];
571         lodOffsetY = pState->lodOffsets[1][lod];
572     }
573     else
574     {
575         ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
576         ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
577     }
578 
579     xOffsetBytes = (x + lodOffsetX) * info.Bpp;
580     yOffsetRows = lodOffsetY + y;
581     zOffsetSlices = z;
582 }
583 
584 //////////////////////////////////////////////////////////////////////////
585 /// @brief Swizzles the linear x,y offsets depending on surface tiling mode
586 ///        and returns final surface address
587 /// @param xOffsetBytes - x offset from base of surface in bytes
588 /// @param yOffsetRows - y offset from base of surface in rows
589 /// @param pState - pointer to the surface state
590 template<typename TTraits>
591 INLINE uint32_t ComputeTileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
592 {
593     return ComputeOffset2D<TTraits>(pState->pitch, xOffsetBytes, yOffsetRows);
594 }
595 
596 //////////////////////////////////////////////////////////////////////////
597 /// @brief Swizzles the linear x,y offsets depending on surface tiling mode
598 ///        and returns final surface address
599 /// @param xOffsetBytes - x offset from base of surface in bytes
600 /// @param yOffsetRows - y offset from base of surface in rows
601 /// @param pState - pointer to the surface state
602 template<typename TTraits>
603 INLINE uint32_t ComputeTileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
604 {
605     return ComputeOffset3D<TTraits>(pState->qpitch, pState->pitch, xOffsetBytes, yOffsetRows, zOffsetSlices);
606 }
607 
608 //////////////////////////////////////////////////////////////////////////
609 /// @brief Swizzles the linear x,y offsets depending on surface tiling mode
610 ///        and returns final surface address
611 /// @param xOffsetBytes - x offset from base of surface in bytes
612 /// @param yOffsetRows - y offset from base of surface in rows
613 /// @param pState - pointer to the surface state
614 INLINE
615 uint32_t TileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
616 {
617     switch (pState->tileMode)
618     {
619     case SWR_TILE_NONE: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, pState);
620     case SWR_TILE_SWRZ: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, pState);
621     case SWR_TILE_MODE_XMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_XMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
622     case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, pState);
623     case SWR_TILE_MODE_WMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_WMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
624     default: SWR_INVALID("Unsupported tiling mode");
625     }
626     return 0;
627 }
628 
629 //////////////////////////////////////////////////////////////////////////
630 /// @brief Swizzles the linear x,y,z offsets depending on surface tiling mode
631 ///        and returns final surface address
632 /// @param xOffsetBytes - x offset from base of surface in bytes
633 /// @param yOffsetRows - y offset from base of surface in rows
634 /// @param zOffsetSlices - z offset from base of surface in slices
635 /// @param pState - pointer to the surface state
636 INLINE
637 uint32_t TileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
638 {
639     switch (pState->tileMode)
640     {
641     case SWR_TILE_NONE: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
642     case SWR_TILE_SWRZ: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
643     case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
644     default: SWR_INVALID("Unsupported tiling mode");
645     }
646     return 0;
647 }
648 
649 template<bool UseCachedOffsets>
650 INLINE
651 uint32_t ComputeSurfaceOffset(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
652 {
653     uint32_t offsetX = 0, offsetY = 0, offsetZ = 0;
654     switch (pState->type)
655     {
656     case SURFACE_BUFFER:
657     case SURFACE_STRUCTURED_BUFFER:
658         offsetX = x * pState->pitch;
659         return offsetX;
660         break;
661     case SURFACE_1D:
662         ComputeSurfaceOffset1D<UseCachedOffsets>(x, array, lod, pState, offsetX);
663         return TileSwizzle2D(offsetX, 0, pState);
664         break;
665     case SURFACE_2D:
666         ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
667         return TileSwizzle2D(offsetX, offsetY, pState);
668     case SURFACE_3D:
669         ComputeSurfaceOffset3D<UseCachedOffsets>(x, y, z, lod, pState, offsetX, offsetY, offsetZ);
670         return TileSwizzle3D(offsetX, offsetY, offsetZ, pState);
671         break;
672     case SURFACE_CUBE:
673         ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
674         return TileSwizzle2D(offsetX, offsetY, pState);
675         break;
676     default: SWR_INVALID("Unsupported format");
677     }
678 
679     return 0;
680 }
681 
682 typedef void*(*PFN_COMPUTESURFADDR)(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, const SWR_SURFACE_STATE*);
683 
684 //////////////////////////////////////////////////////////////////////////
685 /// @brief Computes surface address at the given location and lod
686 /// @param x - x location in pixels
687 /// @param y - y location in rows
688 /// @param z - z location for 3D surfaces
689 /// @param array - array slice for 1D and 2D surfaces
690 /// @param lod - level of detail
691 /// @param pState - pointer to the surface state
692 template<bool UseCachedOffsets, bool IsRead>
693 INLINE
694 void* ComputeSurfaceAddress(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
695 {
696     return (void*)(pState->xpBaseAddress + ComputeSurfaceOffset<UseCachedOffsets>(x, y, z, array, sampleNum, lod, pState));
697 }
698