1 /*! 2 * \copy 3 * Copyright (c) 2013, Cisco Systems 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 * 31 */ 32 33 #ifndef MC_H 34 #define MC_H 35 36 #include "typedefs.h" 37 38 typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 39 int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight); 40 41 typedef void (*PWelsLumaHalfpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 42 int32_t iWidth, int32_t iHeight); 43 typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, 44 int32_t, int32_t); 45 46 typedef struct TagMcFunc { 47 PWelsLumaHalfpelMcFunc pfLumaHalfpelHor; 48 PWelsLumaHalfpelMcFunc pfLumaHalfpelVer; 49 PWelsLumaHalfpelMcFunc pfLumaHalfpelCen; 50 PWelsMcFunc pMcChromaFunc; 51 52 PWelsMcFunc pMcLumaFunc; 53 PWelsSampleAveragingFunc pfSampleAveraging; 54 } SMcFunc; 55 56 namespace WelsCommon { 57 58 void InitMcFunc (SMcFunc* pMcFunc, uint32_t iCpu); 59 60 } // namespace WelsCommon 61 62 63 #if defined(__cplusplus) 64 extern "C" { 65 #endif//__cplusplus 66 67 #if defined(HAVE_NEON) 68 void McCopyWidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); 69 70 void McCopyWidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); 71 72 void McCopyWidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); 73 74 void McChromaWidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 75 int32_t* pWeights, int32_t iHeight); 76 77 void McChromaWidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 78 int32_t* pWeights, int32_t iHeight); 79 80 void PixelAvgWidthEq16_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight); 81 void PixelAvgWidthEq8_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight); 82 void PixelAvgWidthEq4_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight); 83 84 void McHorVer01WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 85 int32_t iHeight); 86 void McHorVer01WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 87 int32_t iHeight); 88 void McHorVer01WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 89 int32_t iHeight); 90 void McHorVer03WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 91 int32_t iHeight); 92 void McHorVer03WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 93 int32_t iHeight); 94 void McHorVer03WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 95 int32_t iHeight); 96 97 void McHorVer10WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 98 int32_t iHeight); 99 void McHorVer10WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 100 int32_t iHeight); 101 void McHorVer10WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 102 int32_t iHeight); 103 void McHorVer30WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 104 int32_t iHeight); 105 void McHorVer30WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 106 int32_t iHeight); 107 void McHorVer30WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 108 int32_t iHeight); 109 110 //horizontal filter to gain half sample, that is (2, 0) location in quarter sample 111 void McHorVer20WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 112 int32_t iHeight); 113 void McHorVer20WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 114 int32_t iHeight); 115 void McHorVer20WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 116 int32_t iHeight); 117 118 //vertical filter to gain half sample, that is (0, 2) location in quarter sample 119 void McHorVer02WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 120 int32_t iHeight); 121 void McHorVer02WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 122 int32_t iHeight); 123 void McHorVer02WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 124 int32_t iHeight); 125 126 //horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample 127 void McHorVer22WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 128 int32_t iHeight); 129 void McHorVer22WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 130 int32_t iHeight); 131 void McHorVer22WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 132 int32_t iHeight); 133 134 void PixStrideAvgWidthEq16_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, 135 const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); 136 void PixStrideAvgWidthEq8_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, 137 const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); 138 139 void McHorVer20Width17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 140 int32_t iHeight);// width+1 141 void McHorVer20Width9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 142 int32_t iHeight);// width+1 143 void McHorVer20Width5_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 144 int32_t iHeight);// width+1 145 146 void McHorVer02Height17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 147 int32_t iHeight);// height+1 148 void McHorVer02Height9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 149 int32_t iHeight);// height+1 150 void McHorVer02Height5_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 151 int32_t iHeight);// height+1 152 153 void McHorVer22Width17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 154 int32_t iHeight);//width+1&&height+1 155 void McHorVer22Width9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 156 int32_t iHeight);//width+1&&height+1 157 void McHorVer22Width5_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 158 int32_t iHeight);//width+1&&height+1 159 #endif 160 161 #if defined(HAVE_NEON_AARCH64) 162 void McCopyWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 163 int32_t iHeight); 164 void McCopyWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 165 int32_t iHeight); 166 void McCopyWidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 167 int32_t iHeight); 168 void McChromaWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 169 int32_t* pWeights, int32_t iHeight); 170 void McChromaWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 171 int32_t* pWeights, int32_t iHeight); 172 void PixelAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, 173 const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); 174 void PixelAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, 175 const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); 176 void PixelAvgWidthEq4_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, 177 const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); 178 void McHorVer01WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 179 int32_t iHeight); 180 void McHorVer01WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 181 int32_t iHeight); 182 void McHorVer01WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 183 int32_t iHeight); 184 void McHorVer03WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 185 int32_t iHeight); 186 void McHorVer03WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 187 int32_t iHeight); 188 void McHorVer03WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 189 int32_t iHeight); 190 void McHorVer10WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 191 int32_t iHeight); 192 void McHorVer10WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 193 int32_t iHeight); 194 void McHorVer10WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 195 int32_t iHeight); 196 void McHorVer30WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 197 int32_t iHeight); 198 void McHorVer30WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 199 int32_t iHeight); 200 void McHorVer30WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 201 int32_t iHeight); 202 //horizontal filter to gain half sample, that is (2, 0) location in quarter sample 203 void McHorVer20WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 204 int32_t iHeight); 205 void McHorVer20WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 206 int32_t iHeight); 207 void McHorVer20WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 208 int32_t iHeight); 209 //vertical filter to gain half sample, that is (0, 2) location in quarter sample 210 void McHorVer02WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 211 int32_t iHeight); 212 void McHorVer02WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 213 int32_t iHeight); 214 void McHorVer02WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 215 int32_t iHeight); 216 //horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample 217 void McHorVer22WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 218 int32_t iHeight); 219 void McHorVer22WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 220 int32_t iHeight); 221 void McHorVer22WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 222 int32_t iHeight); 223 void PixStrideAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, 224 const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); 225 void PixStrideAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, 226 const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); 227 void McHorVer20Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 228 int32_t iHeight);// width+1 229 void McHorVer20Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 230 int32_t iHeight);// width+1 231 void McHorVer20Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 232 int32_t iHeight);// width+1 233 void McHorVer02Height17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 234 int32_t iHeight);// height+1 235 void McHorVer02Height9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 236 int32_t iHeight);// height+1 237 void McHorVer02Height5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 238 int32_t iHeight);// height+1 239 void McHorVer22Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 240 int32_t iHeight);//width+1&&height+1 241 void McHorVer22Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 242 int32_t iHeight);//width+1&&height+1 243 void McHorVer22Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 244 int32_t iHeight);//width+1&&height+1 245 #endif 246 247 #if defined(X86_ASM) 248 //***************************************************************************// 249 // MMXEXT definition // 250 //***************************************************************************// 251 void McHorVer20WidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 252 int32_t iHeight); 253 void McChromaWidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 254 const uint8_t* kpABCD, int32_t iHeight); 255 void McCopyWidthEq8_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 256 int32_t iHeight); 257 void PixelAvgWidthEq4_mmx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, 258 const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); 259 void PixelAvgWidthEq8_mmx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, 260 const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); 261 262 //***************************************************************************// 263 // SSE2 definition // 264 //***************************************************************************// 265 void McChromaWidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 266 const uint8_t* kpABCD, int32_t iHeight); 267 void McCopyWidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 268 int32_t iHeight); 269 void McHorVer20WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 270 int32_t iHeight); 271 void McHorVer20WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 272 int32_t iHeight); 273 void McHorVer02WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 274 int32_t iHeight); 275 void McHorVer22Width8HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 276 int32_t iHeight); 277 void McHorVer22Width8VerLastAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride, 278 int32_t iWidth, int32_t iHeight); 279 void McHorVer22Width8VerLastUnAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride, 280 int32_t iWidth, int32_t iHeight); 281 282 void PixelAvgWidthEq16_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, 283 const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); 284 285 void McHorVer20Width9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 286 int32_t iWidth, 287 int32_t iHeight); 288 void McHorVer20Width5_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 289 int32_t iWidth, int32_t iHeight); 290 291 void McHorVer02Height9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 292 int32_t iWidth, 293 int32_t iHeight); 294 void McHorVer02Height5_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 295 int32_t iWidth, int32_t iHeight); 296 297 void McHorVer22HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pTap, int32_t iTapStride, 298 int32_t iWidth, 299 int32_t iHeight); 300 void McHorVer22Width5HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pTap, int32_t iTapStride, 301 int32_t iWidth, int32_t iHeight); 302 void McHorVer22Width4VerLastAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride, 303 int32_t iWidth, int32_t iHeight); 304 void McHorVer22Width4VerLastUnAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride, 305 int32_t iWidth, int32_t iHeight); 306 307 //***************************************************************************// 308 // SSE3 definition // 309 //***************************************************************************// 310 void McCopyWidthEq16_sse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 311 int32_t iHeight); 312 313 //***************************************************************************// 314 // SSSE3 definition // 315 //***************************************************************************// 316 void McChromaWidthEq8_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 317 const uint8_t* kpABCD, int32_t iHeight); 318 void McHorVer02_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 319 int32_t iWidth, int32_t iHeight); 320 void McHorVer02Width4S16ToU8_ssse3 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); 321 void McHorVer02Width5S16ToU8_ssse3 (const int16_t* pSrc, int32_t iSrcStride, 322 uint8_t* pDst, int32_t iDstStride, int32_t iHeight); 323 void McHorVer02WidthGe8S16ToU8_ssse3 (const int16_t* pSrc, int32_t iSrcStride, 324 uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight); 325 void McHorVer20_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 326 int32_t iWidth, int32_t iHeight); 327 void McHorVer20Width4U8ToS16_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight); 328 void McHorVer20Width5Or9Or17_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, 329 uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight); 330 void McHorVer20Width8U8ToS16_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, 331 int16_t* pDst, int32_t iDstStride, int32_t iHeight); 332 void McHorVer20Width9Or17U8ToS16_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, 333 int16_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight); 334 335 //***************************************************************************// 336 // AVX2 definition // 337 //***************************************************************************// 338 #ifdef HAVE_AVX2 339 void McHorVer02_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 340 int32_t iWidth, int32_t iHeight); 341 void McHorVer02Width4S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); 342 void McHorVer02Width5S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); 343 void McHorVer02Width8S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); 344 void McHorVer02Width9S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); 345 void McHorVer02Width16Or17S16ToU8_avx2 (const int16_t* pSrc, int32_t iSrcStride, 346 uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight); 347 void McHorVer20_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 348 int32_t iWidth, int32_t iHeight); 349 void McHorVer20Width5Or9Or17_avx2 (const uint8_t* pSrc, int32_t iSrcStride, 350 uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight); 351 void McHorVer20Width4U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight); 352 void McHorVer20Width8U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight); 353 void McHorVer20Width16U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight); 354 void McHorVer20Width17U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight); 355 #endif //HAVE_AVX2 356 357 #endif //X86_ASM 358 359 //***************************************************************************// 360 // LSX definition // 361 //***************************************************************************// 362 #if defined(HAVE_LSX) 363 void McCopyWidthEq4_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); 364 void McCopyWidthEq8_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); 365 void McCopyWidthEq16_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight); 366 367 void McChromaWidthEq4_lsx (const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride, 368 const uint8_t *pABCD, int32_t iHeight); 369 void McChromaWidthEq8_lsx (const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride, 370 const uint8_t *pABCD, int32_t iHeight); 371 void PixelAvgWidthEq4_lsx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, 372 const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); 373 void PixelAvgWidthEq8_lsx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, 374 const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); 375 void PixelAvgWidthEq16_lsx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, 376 const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); 377 void McHorVer02WidthEq8_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 378 int32_t iHeight); 379 void McHorVer02WidthEq16_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 380 int32_t iHeight); 381 void McHorVer20WidthEq4_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 382 int32_t iHeight); 383 void McHorVer20WidthEq5_lsx (const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride, 384 int32_t iHeight); 385 void McHorVer20WidthEq8_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 386 int32_t iHeight); 387 void McHorVer20WidthEq9_lsx (const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride, 388 int32_t iHeight); 389 void McHorVer20WidthEq17_lsx (const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride, 390 int iHeight); 391 void McHorVer20WidthEq16_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 392 int32_t iHeight); 393 void McHorVer22WidthEq5_lsx(const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride, 394 int32_t iHeight); 395 void McHorVer22WidthEq8_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 396 int32_t iHeight); 397 void McHorVer22WidthEq9_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 398 int32_t iHeight); 399 void McHorVer22WidthEq17_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, 400 int32_t iHeight); 401 #endif//HAVE_LSX 402 403 #if defined(__cplusplus) 404 } 405 #endif//__cplusplus 406 407 #endif//MC_H 408