1 /* 2 ************************************************************************************************************************ 3 * 4 * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. 5 * 6 ***********************************************************************************************************************/ 7 /** 8 **************************************************************************************************** 9 * @file addrswizzler.cpp 10 * @brief Contains code for efficient CPU swizzling. 11 **************************************************************************************************** 12 */ 13 #ifndef __ADDR_SWIZZLER_H__ 14 #define __ADDR_SWIZZLER_H__ 15 16 #include "addrlib.h" 17 #include "addrcommon.h" 18 19 namespace Addr 20 { 21 22 // Forward decl 23 class LutAddresser; 24 25 typedef void (*UnalignedCopyMemImgFunc)( 26 void* pImgBlockSliceStart, // Block corresponding to beginning of slice 27 void* pBuf, // Pointer to data starting from the copy origin. 28 size_t bufStrideY, // Stride of each row in pBuf 29 UINT_32 imageBlocksY, // Width of the image slice, in blocks. 30 ADDR_COORD2D origin, // Absolute origin, in elements 31 ADDR_EXTENT2D extent, // Size to copy, in elements 32 UINT_32 sliceXor, // Includes pipeBankXor and z XOR 33 const LutAddresser& addresser); 34 35 // This class calculates and holds up to four lookup tables (x/y/z/s) which can be used to cheaply calculate the 36 // position of a pixel within a block at the cost of some precomputation and memory usage. 37 // 38 // This works for all equations and does something like this: 39 // offset = blockAddr ^ XLut[x & xMask] ^ YLut[Y & ymask]... 40 class LutAddresser 41 { 42 public: 43 constexpr static UINT_32 MaxLutSize = 2100; // Sized to fit the largest non-VAR LUT size 44 45 LutAddresser(); 46 47 void Init(const ADDR_BIT_SETTING* pEq, UINT_32 eqSize, ADDR_EXTENT3D blockSize, UINT_8 blkBits); 48 49 // Does a full calculation to get the offset within a block. Takes an *absolute* coordinate, 50 // not the coordinate within the block. 51 UINT_32 GetBlockOffset( 52 UINT_32 x, 53 UINT_32 y, 54 UINT_32 z, 55 UINT_32 s = 0, 56 UINT_32 pipeBankXor = 0) 57 { 58 return GetAddressX(x) ^ GetAddressY(y) ^ GetAddressZ(z) ^ GetAddressS(s) ^ pipeBankXor; 59 } 60 61 // Get the block size GetBlockBits()62 UINT_32 GetBlockBits() const { return m_blockBits; } GetBlockXBits()63 UINT_32 GetBlockXBits() const { return Log2(m_blockSize.width); } GetBlockYBits()64 UINT_32 GetBlockYBits() const { return Log2(m_blockSize.height); } GetBlockZBits()65 UINT_32 GetBlockZBits() const { return Log2(m_blockSize.depth); } 66 67 // "Fast single channel" functions to get the part that each channel contributes to be XORd together. GetAddressX(UINT_32 x)68 UINT_32 GetAddressX(UINT_32 x) const { return m_pXLut[x & m_xLutMask];} GetAddressY(UINT_32 y)69 UINT_32 GetAddressY(UINT_32 y) const { return m_pYLut[y & m_yLutMask];} GetAddressZ(UINT_32 z)70 UINT_32 GetAddressZ(UINT_32 z) const { return m_pZLut[z & m_zLutMask];} GetAddressS(UINT_32 s)71 UINT_32 GetAddressS(UINT_32 s) const { return m_pSLut[s & m_sLutMask];} 72 73 // Get a function that can copy a single 2D slice of an image with this swizzle. 74 UnalignedCopyMemImgFunc GetCopyMemImgFunc() const; 75 UnalignedCopyMemImgFunc GetCopyImgMemFunc() const; 76 private: 77 // Calculate general properties of the swizzle equations 78 void InitSwizzleProps(); 79 // Fills a LUT for each channel. 80 void InitLuts(); 81 // Evaluate coordinate without LUTs 82 UINT_32 EvalEquation(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s); 83 84 // Pointers within m_lutData corresponding to where each LUT starts 85 // m_lutData[0] always has a value of 0 and thus can be considered an empty 1-entry LUT for "don't care" channels 86 UINT_32* m_pXLut; 87 UINT_32* m_pYLut; 88 UINT_32* m_pZLut; 89 UINT_32* m_pSLut; 90 91 // Size of each LUT, minus 1 to form a mask. A mask of 0 is valid for an empty LUT. 92 UINT_32 m_xLutMask; 93 UINT_32 m_yLutMask; 94 UINT_32 m_zLutMask; 95 UINT_32 m_sLutMask; 96 97 // Number of bits in the block (aka Log2(blkSize)) 98 UINT_32 m_blockBits; 99 100 // The block size 101 ADDR_EXTENT3D m_blockSize; 102 103 // Number of 'x' bits at the bottom of the equation. Must be a pow2 and at least 1. 104 // This will be used as a simple optimization to batch together operations on adjacent x pixels. 105 UINT_32 m_maxExpandX; 106 107 // BPE for this equation. 108 UINT_32 m_bpeLog2; 109 110 // The full equation 111 ADDR_BIT_SETTING m_bit[ADDR_MAX_EQUATION_BIT]; 112 113 // Backing store for the LUT tables. 114 UINT_32 m_lutData[MaxLutSize]; 115 }; 116 117 } 118 119 #endif // __ADDR_SWIZZLER_H__