• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
33 #ifndef MC_H
34 #define MC_H
35 
36 #include "typedefs.h"
37 
38 typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
39                              int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight);
40 
41 typedef void (*PWelsLumaHalfpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
42                                         int32_t iWidth, int32_t iHeight);
43 typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t,
44     int32_t, int32_t);
45 
46 typedef struct TagMcFunc {
47   PWelsLumaHalfpelMcFunc      pfLumaHalfpelHor;
48   PWelsLumaHalfpelMcFunc      pfLumaHalfpelVer;
49   PWelsLumaHalfpelMcFunc      pfLumaHalfpelCen;
50   PWelsMcFunc                 pMcChromaFunc;
51 
52   PWelsMcFunc                 pMcLumaFunc;
53   PWelsSampleAveragingFunc    pfSampleAveraging;
54 } SMcFunc;
55 
56 namespace WelsCommon {
57 
58 void InitMcFunc (SMcFunc* pMcFunc, uint32_t iCpu);
59 
60 } // namespace WelsCommon
61 
62 
63 #if defined(__cplusplus)
64 extern "C" {
65 #endif//__cplusplus
66 
67 #if defined(HAVE_NEON)
68 void McCopyWidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
69 
70 void McCopyWidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
71 
72 void McCopyWidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
73 
74 void McChromaWidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
75                             int32_t* pWeights, int32_t iHeight);
76 
77 void McChromaWidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
78                             int32_t* pWeights, int32_t iHeight);
79 
80 void PixelAvgWidthEq16_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight);
81 void PixelAvgWidthEq8_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight);
82 void PixelAvgWidthEq4_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight);
83 
84 void McHorVer01WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
85                                int32_t iHeight);
86 void McHorVer01WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
87                               int32_t iHeight);
88 void McHorVer01WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
89                               int32_t iHeight);
90 void McHorVer03WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
91                                int32_t iHeight);
92 void McHorVer03WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
93                               int32_t iHeight);
94 void McHorVer03WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
95                               int32_t iHeight);
96 
97 void McHorVer10WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
98                                int32_t iHeight);
99 void McHorVer10WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
100                               int32_t iHeight);
101 void McHorVer10WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
102                               int32_t iHeight);
103 void McHorVer30WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
104                                int32_t iHeight);
105 void McHorVer30WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
106                               int32_t iHeight);
107 void McHorVer30WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
108                               int32_t iHeight);
109 
110 //horizontal filter to gain half sample, that is (2, 0) location in quarter sample
111 void McHorVer20WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
112                                int32_t iHeight);
113 void McHorVer20WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
114                               int32_t iHeight);
115 void McHorVer20WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
116                               int32_t iHeight);
117 
118 //vertical filter to gain half sample, that is (0, 2) location in quarter sample
119 void McHorVer02WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
120                                int32_t iHeight);
121 void McHorVer02WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
122                               int32_t iHeight);
123 void McHorVer02WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
124                               int32_t iHeight);
125 
126 //horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
127 void McHorVer22WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
128                                int32_t iHeight);
129 void McHorVer22WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
130                               int32_t iHeight);
131 void McHorVer22WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
132                               int32_t iHeight);
133 
134 void PixStrideAvgWidthEq16_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
135                                  const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
136 void PixStrideAvgWidthEq8_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
137                                 const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
138 
139 void McHorVer20Width17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
140                              int32_t iHeight);// width+1
141 void McHorVer20Width9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
142                             int32_t iHeight);// width+1
143 void McHorVer20Width5_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
144                             int32_t iHeight);// width+1
145 
146 void McHorVer02Height17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
147                               int32_t iHeight);// height+1
148 void McHorVer02Height9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
149                              int32_t iHeight);// height+1
150 void McHorVer02Height5_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
151                              int32_t iHeight);// height+1
152 
153 void McHorVer22Width17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
154                              int32_t iHeight);//width+1&&height+1
155 void McHorVer22Width9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
156                             int32_t iHeight);//width+1&&height+1
157 void McHorVer22Width5_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
158                             int32_t iHeight);//width+1&&height+1
159 #endif
160 
161 #if defined(HAVE_NEON_AARCH64)
162 void McCopyWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
163                                   int32_t iHeight);
164 void McCopyWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
165                                   int32_t iHeight);
166 void McCopyWidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
167                                    int32_t iHeight);
168 void McChromaWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
169                                     int32_t* pWeights, int32_t iHeight);
170 void McChromaWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
171                                     int32_t* pWeights, int32_t iHeight);
172 void PixelAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
173                                      const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
174 void PixelAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
175                                     const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
176 void PixelAvgWidthEq4_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
177                                     const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
178 void McHorVer01WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
179                                        int32_t iHeight);
180 void McHorVer01WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
181                                       int32_t iHeight);
182 void McHorVer01WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
183                                       int32_t iHeight);
184 void McHorVer03WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
185                                        int32_t iHeight);
186 void McHorVer03WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
187                                       int32_t iHeight);
188 void McHorVer03WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
189                                       int32_t iHeight);
190 void McHorVer10WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
191                                        int32_t iHeight);
192 void McHorVer10WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
193                                       int32_t iHeight);
194 void McHorVer10WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
195                                       int32_t iHeight);
196 void McHorVer30WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
197                                        int32_t iHeight);
198 void McHorVer30WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
199                                       int32_t iHeight);
200 void McHorVer30WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
201                                       int32_t iHeight);
202 //horizontal filter to gain half sample, that is (2, 0) location in quarter sample
203 void McHorVer20WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
204                                        int32_t iHeight);
205 void McHorVer20WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
206                                       int32_t iHeight);
207 void McHorVer20WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
208                                       int32_t iHeight);
209 //vertical filter to gain half sample, that is (0, 2) location in quarter sample
210 void McHorVer02WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
211                                        int32_t iHeight);
212 void McHorVer02WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
213                                       int32_t iHeight);
214 void McHorVer02WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
215                                       int32_t iHeight);
216 //horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
217 void McHorVer22WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
218                                        int32_t iHeight);
219 void McHorVer22WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
220                                       int32_t iHeight);
221 void McHorVer22WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
222                                       int32_t iHeight);
223 void PixStrideAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
224     const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
225 void PixStrideAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
226                                         const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
227 void McHorVer20Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
228                                      int32_t iHeight);// width+1
229 void McHorVer20Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
230                                     int32_t iHeight);// width+1
231 void McHorVer20Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
232                                     int32_t iHeight);// width+1
233 void McHorVer02Height17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
234                                       int32_t iHeight);// height+1
235 void McHorVer02Height9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
236                                      int32_t iHeight);// height+1
237 void McHorVer02Height5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
238                                      int32_t iHeight);// height+1
239 void McHorVer22Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
240                                      int32_t iHeight);//width+1&&height+1
241 void McHorVer22Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
242                                     int32_t iHeight);//width+1&&height+1
243 void McHorVer22Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
244                                     int32_t iHeight);//width+1&&height+1
245 #endif
246 
247 #if defined(X86_ASM)
248 //***************************************************************************//
249 //                       MMXEXT definition                                   //
250 //***************************************************************************//
251 void McHorVer20WidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
252                              int32_t iHeight);
253 void McChromaWidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
254                            const uint8_t* kpABCD, int32_t iHeight);
255 void McCopyWidthEq8_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
256                          int32_t iHeight);
257 void PixelAvgWidthEq4_mmx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
258                            const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
259 void PixelAvgWidthEq8_mmx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
260                            const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
261 
262 //***************************************************************************//
263 //                       SSE2 definition                                     //
264 //***************************************************************************//
265 void McChromaWidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
266                             const uint8_t* kpABCD, int32_t iHeight);
267 void McCopyWidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
268                            int32_t iHeight);
269 void McHorVer20WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
270                               int32_t iHeight);
271 void McHorVer20WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
272                                int32_t iHeight);
273 void McHorVer02WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
274                               int32_t iHeight);
275 void McHorVer22Width8HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
276                                     int32_t iHeight);
277 void McHorVer22Width8VerLastAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
278                                         int32_t iWidth, int32_t iHeight);
279 void McHorVer22Width8VerLastUnAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
280     int32_t iWidth, int32_t iHeight);
281 
282 void PixelAvgWidthEq16_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
283                              const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
284 
285 void McHorVer20Width9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
286                                 int32_t iWidth,
287                                 int32_t iHeight);
288 void McHorVer20Width5_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
289                                 int32_t iWidth, int32_t iHeight);
290 
291 void McHorVer02Height9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
292                                  int32_t iWidth,
293                                  int32_t iHeight);
294 void McHorVer02Height5_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
295                                  int32_t iWidth, int32_t iHeight);
296 
297 void McHorVer22HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pTap, int32_t iTapStride,
298                               int32_t iWidth,
299                               int32_t iHeight);
300 void McHorVer22Width5HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pTap, int32_t iTapStride,
301                               int32_t iWidth, int32_t iHeight);
302 void McHorVer22Width4VerLastAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
303                                         int32_t iWidth, int32_t iHeight);
304 void McHorVer22Width4VerLastUnAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
305         int32_t iWidth, int32_t iHeight);
306 
307 //***************************************************************************//
308 //                       SSE3 definition                                     //
309 //***************************************************************************//
310 void McCopyWidthEq16_sse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
311                            int32_t iHeight);
312 
313 //***************************************************************************//
314 //                       SSSE3 definition                                    //
315 //***************************************************************************//
316 void McChromaWidthEq8_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
317                              const uint8_t* kpABCD, int32_t iHeight);
318 void McHorVer02_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
319                        int32_t iWidth, int32_t iHeight);
320 void McHorVer02Width4S16ToU8_ssse3 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
321 void McHorVer02Width5S16ToU8_ssse3 (const int16_t* pSrc, int32_t iSrcStride,
322                                     uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
323 void McHorVer02WidthGe8S16ToU8_ssse3 (const int16_t* pSrc, int32_t iSrcStride,
324                                       uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight);
325 void McHorVer20_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
326                        int32_t iWidth, int32_t iHeight);
327 void McHorVer20Width4U8ToS16_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight);
328 void McHorVer20Width5Or9Or17_ssse3 (const uint8_t* pSrc, int32_t iSrcStride,
329                                     uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight);
330 void McHorVer20Width8U8ToS16_ssse3 (const uint8_t* pSrc, int32_t iSrcStride,
331                                     int16_t* pDst, int32_t iDstStride, int32_t iHeight);
332 void McHorVer20Width9Or17U8ToS16_ssse3 (const uint8_t* pSrc, int32_t iSrcStride,
333                                         int16_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight);
334 
335 //***************************************************************************//
336 //                       AVX2 definition                                     //
337 //***************************************************************************//
338 #ifdef HAVE_AVX2
339 void McHorVer02_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
340                       int32_t iWidth, int32_t iHeight);
341 void McHorVer02Width4S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
342 void McHorVer02Width5S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
343 void McHorVer02Width8S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
344 void McHorVer02Width9S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
345 void McHorVer02Width16Or17S16ToU8_avx2 (const int16_t* pSrc, int32_t iSrcStride,
346                                         uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight);
347 void McHorVer20_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
348                       int32_t iWidth, int32_t iHeight);
349 void McHorVer20Width5Or9Or17_avx2 (const uint8_t* pSrc, int32_t iSrcStride,
350                                    uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight);
351 void McHorVer20Width4U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight);
352 void McHorVer20Width8U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight);
353 void McHorVer20Width16U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight);
354 void McHorVer20Width17U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight);
355 #endif //HAVE_AVX2
356 
357 #endif //X86_ASM
358 
359 //***************************************************************************//
360 //                       LSX definition                                      //
361 //***************************************************************************//
362 #if defined(HAVE_LSX)
363 void McCopyWidthEq4_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
364 void McCopyWidthEq8_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
365 void McCopyWidthEq16_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
366 
367 void McChromaWidthEq4_lsx (const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride,
368                            const uint8_t *pABCD, int32_t iHeight);
369 void McChromaWidthEq8_lsx (const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride,
370                            const uint8_t *pABCD, int32_t iHeight);
371 void PixelAvgWidthEq4_lsx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
372                            const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
373 void PixelAvgWidthEq8_lsx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
374                            const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
375 void PixelAvgWidthEq16_lsx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
376                            const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
377 void McHorVer02WidthEq8_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
378                              int32_t iHeight);
379 void McHorVer02WidthEq16_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
380                               int32_t iHeight);
381 void McHorVer20WidthEq4_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
382                              int32_t iHeight);
383 void McHorVer20WidthEq5_lsx (const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride,
384                              int32_t iHeight);
385 void McHorVer20WidthEq8_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
386                              int32_t iHeight);
387 void McHorVer20WidthEq9_lsx (const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride,
388                              int32_t iHeight);
389 void McHorVer20WidthEq17_lsx (const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride,
390                               int iHeight);
391 void McHorVer20WidthEq16_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
392                               int32_t iHeight);
393 void McHorVer22WidthEq5_lsx(const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride,
394                             int32_t iHeight);
395 void McHorVer22WidthEq8_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
396                              int32_t iHeight);
397 void McHorVer22WidthEq9_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
398                              int32_t iHeight);
399 void McHorVer22WidthEq17_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
400                              int32_t iHeight);
401 #endif//HAVE_LSX
402 
403 #if defined(__cplusplus)
404 }
405 #endif//__cplusplus
406 
407 #endif//MC_H
408