• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/row.h"
12 
13 #include <string.h>  // For memset.
14 
15 #include "libyuv/basic_types.h"
16 
17 #ifdef __cplusplus
18 namespace libyuv {
19 extern "C" {
20 #endif
21 
22 // memset for vin is meant to clear the source buffer so that
23 // SIMD that reads full multiple of 16 bytes will not trigger msan errors.
24 // memset is not needed for production, as the garbage values are processed but
25 // not used, although there may be edge cases for subsampling.
26 // The size of the buffer is based on the largest read, which can be inferred
27 // by the source type (e.g. ARGB) and the mask (last parameter), or by examining
28 // the source code for how much the source pointers are advanced.
29 
30 // Subsampled source needs to be increase by 1 of not even.
31 #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
32 
33 // Any 4 planes to 1
34 #define ANY41(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)               \
35   void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,                   \
36                const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
37                int width) {                                                  \
38     SIMD_ALIGNED(uint8_t vin[64 * 4]);                                       \
39     SIMD_ALIGNED(uint8_t vout[64]);                                          \
40     memset(vin, 0, sizeof(vin)); /* for msan */                              \
41     int r = width & MASK;                                                    \
42     int n = width & ~MASK;                                                   \
43     if (n > 0) {                                                             \
44       ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, n);                      \
45     }                                                                        \
46     memcpy(vin, y_buf + n, r);                                               \
47     memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));                \
48     memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
49     memcpy(vin + 192, a_buf + n, r);                                         \
50     ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, MASK + 1);           \
51     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP);    \
52   }
53 
54 #ifdef HAS_MERGEARGBROW_SSE2
55 ANY41(MergeARGBRow_Any_SSE2, MergeARGBRow_SSE2, 0, 0, 4, 7)
56 #endif
57 #ifdef HAS_MERGEARGBROW_AVX2
58 ANY41(MergeARGBRow_Any_AVX2, MergeARGBRow_AVX2, 0, 0, 4, 15)
59 #endif
60 #ifdef HAS_MERGEARGBROW_NEON
61 ANY41(MergeARGBRow_Any_NEON, MergeARGBRow_NEON, 0, 0, 4, 15)
62 #endif
63 
64 // Note that odd width replication includes 444 due to implementation
65 // on arm that subsamples 444 to 422 internally.
66 // Any 4 planes to 1 with yuvconstants
67 #define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)              \
68   void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,                   \
69                const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
70                const struct YuvConstants* yuvconstants, int width) {         \
71     SIMD_ALIGNED(uint8_t vin[64 * 4]);                                       \
72     SIMD_ALIGNED(uint8_t vout[64]);                                          \
73     memset(vin, 0, sizeof(vin)); /* for msan */                              \
74     int r = width & MASK;                                                    \
75     int n = width & ~MASK;                                                   \
76     if (n > 0) {                                                             \
77       ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);        \
78     }                                                                        \
79     memcpy(vin, y_buf + n, r);                                               \
80     memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));                \
81     memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
82     memcpy(vin + 192, a_buf + n, r);                                         \
83     if (width & 1) {                                                         \
84       vin[64 + SS(r, UVSHIFT)] = vin[64 + SS(r, UVSHIFT) - 1];               \
85       vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1];             \
86     }                                                                        \
87     ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, yuvconstants,        \
88              MASK + 1);                                                      \
89     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP);    \
90   }
91 
92 #ifdef HAS_I444ALPHATOARGBROW_SSSE3
93 ANY41C(I444AlphaToARGBRow_Any_SSSE3, I444AlphaToARGBRow_SSSE3, 0, 0, 4, 7)
94 #endif
95 #ifdef HAS_I444ALPHATOARGBROW_AVX2
96 ANY41C(I444AlphaToARGBRow_Any_AVX2, I444AlphaToARGBRow_AVX2, 0, 0, 4, 15)
97 #endif
98 #ifdef HAS_I422ALPHATOARGBROW_SSSE3
99 ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
100 #endif
101 #ifdef HAS_I422ALPHATOARGBROW_AVX2
102 ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15)
103 #endif
104 #ifdef HAS_I444ALPHATOARGBROW_NEON
105 ANY41C(I444AlphaToARGBRow_Any_NEON, I444AlphaToARGBRow_NEON, 0, 0, 4, 7)
106 #endif
107 #ifdef HAS_I422ALPHATOARGBROW_NEON
108 ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7)
109 #endif
110 #ifdef HAS_I444ALPHATOARGBROW_MSA
111 ANY41C(I444AlphaToARGBRow_Any_MSA, I444AlphaToARGBRow_MSA, 0, 0, 4, 7)
112 #endif
113 #ifdef HAS_I422ALPHATOARGBROW_MSA
114 ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
115 #endif
116 #ifdef HAS_I422ALPHATOARGBROW_LSX
117 ANY41C(I422AlphaToARGBRow_Any_LSX, I422AlphaToARGBRow_LSX, 1, 0, 4, 15)
118 #endif
119 #ifdef HAS_I422ALPHATOARGBROW_LASX
120 ANY41C(I422AlphaToARGBRow_Any_LASX, I422AlphaToARGBRow_LASX, 1, 0, 4, 15)
121 #endif
122 #undef ANY41C
123 
124 // Any 4 planes to 1 plane of 8 bit with yuvconstants
125 #define ANY41CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK)      \
126   void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, const T* a_buf, \
127                uint8_t* dst_ptr, const struct YuvConstants* yuvconstants,      \
128                int width) {                                                    \
129     SIMD_ALIGNED(T vin[16 * 4]);                                               \
130     SIMD_ALIGNED(uint8_t vout[64]);                                            \
131     memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                       \
132     int r = width & MASK;                                                      \
133     int n = width & ~MASK;                                                     \
134     if (n > 0) {                                                               \
135       ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);          \
136     }                                                                          \
137     memcpy(vin, y_buf + n, r * SBPP);                                          \
138     memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);           \
139     memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);           \
140     memcpy(vin + 48, a_buf + n, r * SBPP);                                     \
141     ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, yuvconstants, MASK + 1); \
142     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP);      \
143   }
144 
145 #ifdef HAS_I210ALPHATOARGBROW_SSSE3
146 ANY41CT(I210AlphaToARGBRow_Any_SSSE3,
147         I210AlphaToARGBRow_SSSE3,
148         1,
149         0,
150         uint16_t,
151         2,
152         4,
153         7)
154 #endif
155 
156 #ifdef HAS_I210ALPHATOARGBROW_AVX2
157 ANY41CT(I210AlphaToARGBRow_Any_AVX2,
158         I210AlphaToARGBRow_AVX2,
159         1,
160         0,
161         uint16_t,
162         2,
163         4,
164         15)
165 #endif
166 
167 #ifdef HAS_I410ALPHATOARGBROW_SSSE3
168 ANY41CT(I410AlphaToARGBRow_Any_SSSE3,
169         I410AlphaToARGBRow_SSSE3,
170         0,
171         0,
172         uint16_t,
173         2,
174         4,
175         7)
176 #endif
177 
178 #ifdef HAS_I410ALPHATOARGBROW_AVX2
179 ANY41CT(I410AlphaToARGBRow_Any_AVX2,
180         I410AlphaToARGBRow_AVX2,
181         0,
182         0,
183         uint16_t,
184         2,
185         4,
186         15)
187 #endif
188 
189 #undef ANY41CT
190 
191 // Any 4 planes to 1 plane with parameter
192 #define ANY41PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK)          \
193   void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \
194                const STYPE* a_buf, DTYPE* dst_ptr, int depth, int width) { \
195     SIMD_ALIGNED(STYPE vin[16 * 4]);                                       \
196     SIMD_ALIGNED(DTYPE vout[64]);                                          \
197     memset(vin, 0, sizeof(vin)); /* for msan */                            \
198     int r = width & MASK;                                                  \
199     int n = width & ~MASK;                                                 \
200     if (n > 0) {                                                           \
201       ANY_SIMD(r_buf, g_buf, b_buf, a_buf, dst_ptr, depth, n);             \
202     }                                                                      \
203     memcpy(vin, r_buf + n, r * SBPP);                                      \
204     memcpy(vin + 16, g_buf + n, r * SBPP);                                 \
205     memcpy(vin + 32, b_buf + n, r * SBPP);                                 \
206     memcpy(vin + 48, a_buf + n, r * SBPP);                                 \
207     ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, depth, MASK + 1);    \
208     memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP);                    \
209   }
210 
211 #ifdef HAS_MERGEAR64ROW_AVX2
212 ANY41PT(MergeAR64Row_Any_AVX2, MergeAR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15)
213 #endif
214 
215 #ifdef HAS_MERGEAR64ROW_NEON
216 ANY41PT(MergeAR64Row_Any_NEON, MergeAR64Row_NEON, uint16_t, 2, uint16_t, 8, 7)
217 #endif
218 
219 #ifdef HAS_MERGEARGB16TO8ROW_AVX2
220 ANY41PT(MergeARGB16To8Row_Any_AVX2,
221         MergeARGB16To8Row_AVX2,
222         uint16_t,
223         2,
224         uint8_t,
225         4,
226         15)
227 #endif
228 
229 #ifdef HAS_MERGEARGB16TO8ROW_NEON
230 ANY41PT(MergeARGB16To8Row_Any_NEON,
231         MergeARGB16To8Row_NEON,
232         uint16_t,
233         2,
234         uint8_t,
235         4,
236         7)
237 #endif
238 
239 #undef ANY41PT
240 
241 // Any 3 planes to 1.
242 #define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)            \
243   void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,                \
244                const uint8_t* v_buf, uint8_t* dst_ptr, int width) {       \
245     SIMD_ALIGNED(uint8_t vin[64 * 3]);                                    \
246     SIMD_ALIGNED(uint8_t vout[64]);                                       \
247     memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
248     int r = width & MASK;                                                 \
249     int n = width & ~MASK;                                                \
250     if (n > 0) {                                                          \
251       ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n);                          \
252     }                                                                     \
253     memcpy(vin, y_buf + n, r);                                            \
254     memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));             \
255     memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
256     ANY_SIMD(vin, vin + 64, vin + 128, vout, MASK + 1);                   \
257     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
258   }
259 
260 // Merge functions.
261 #ifdef HAS_MERGERGBROW_SSSE3
262 ANY31(MergeRGBRow_Any_SSSE3, MergeRGBRow_SSSE3, 0, 0, 3, 15)
263 #endif
264 #ifdef HAS_MERGERGBROW_NEON
265 ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15)
266 #endif
267 #ifdef HAS_MERGEXRGBROW_SSE2
268 ANY31(MergeXRGBRow_Any_SSE2, MergeXRGBRow_SSE2, 0, 0, 4, 7)
269 #endif
270 #ifdef HAS_MERGEXRGBROW_AVX2
271 ANY31(MergeXRGBRow_Any_AVX2, MergeXRGBRow_AVX2, 0, 0, 4, 15)
272 #endif
273 #ifdef HAS_MERGEXRGBROW_NEON
274 ANY31(MergeXRGBRow_Any_NEON, MergeXRGBRow_NEON, 0, 0, 4, 15)
275 #endif
276 #ifdef HAS_I422TOYUY2ROW_SSE2
277 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
278 ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
279 #endif
280 #ifdef HAS_I422TOYUY2ROW_AVX2
281 ANY31(I422ToYUY2Row_Any_AVX2, I422ToYUY2Row_AVX2, 1, 1, 4, 31)
282 ANY31(I422ToUYVYRow_Any_AVX2, I422ToUYVYRow_AVX2, 1, 1, 4, 31)
283 #endif
284 #ifdef HAS_I422TOYUY2ROW_NEON
285 ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
286 #endif
287 #ifdef HAS_I422TOYUY2ROW_MSA
288 ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31)
289 #endif
290 #ifdef HAS_I422TOYUY2ROW_LSX
291 ANY31(I422ToYUY2Row_Any_LSX, I422ToYUY2Row_LSX, 1, 1, 4, 15)
292 #endif
293 #ifdef HAS_I422TOYUY2ROW_LASX
294 ANY31(I422ToYUY2Row_Any_LASX, I422ToYUY2Row_LASX, 1, 1, 4, 31)
295 #endif
296 #ifdef HAS_I422TOUYVYROW_NEON
297 ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
298 #endif
299 #ifdef HAS_I422TOUYVYROW_MSA
300 ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31)
301 #endif
302 #ifdef HAS_I422TOUYVYROW_LSX
303 ANY31(I422ToUYVYRow_Any_LSX, I422ToUYVYRow_LSX, 1, 1, 4, 15)
304 #endif
305 #ifdef HAS_I422TOUYVYROW_LASX
306 ANY31(I422ToUYVYRow_Any_LASX, I422ToUYVYRow_LASX, 1, 1, 4, 31)
307 #endif
308 #ifdef HAS_BLENDPLANEROW_AVX2
309 ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31)
310 #endif
311 #ifdef HAS_BLENDPLANEROW_SSSE3
312 ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
313 #endif
314 #undef ANY31
315 
316 // Note that odd width replication includes 444 due to implementation
317 // on arm that subsamples 444 to 422 internally.
318 // Any 3 planes to 1 with yuvconstants
319 #define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)           \
320   void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,                \
321                const uint8_t* v_buf, uint8_t* dst_ptr,                    \
322                const struct YuvConstants* yuvconstants, int width) {      \
323     SIMD_ALIGNED(uint8_t vin[128 * 3]);                                   \
324     SIMD_ALIGNED(uint8_t vout[128]);                                      \
325     memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
326     int r = width & MASK;                                                 \
327     int n = width & ~MASK;                                                \
328     if (n > 0) {                                                          \
329       ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
330     }                                                                     \
331     memcpy(vin, y_buf + n, r);                                            \
332     memcpy(vin + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
333     memcpy(vin + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));            \
334     if (width & 1) {                                                      \
335       vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1];          \
336       vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1];          \
337     }                                                                     \
338     ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1);    \
339     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
340   }
341 
342 #ifdef HAS_I422TOARGBROW_SSSE3
343 ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
344 #endif
345 #ifdef HAS_I422TORGBAROW_SSSE3
346 ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
347 #endif
348 #ifdef HAS_I422TOARGB4444ROW_SSSE3
349 ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
350 #endif
351 #ifdef HAS_I422TOARGB1555ROW_SSSE3
352 ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
353 #endif
354 #ifdef HAS_I422TORGB565ROW_SSSE3
355 ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
356 #endif
357 #ifdef HAS_I422TORGB24ROW_SSSE3
358 ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 15)
359 #endif
360 #ifdef HAS_I422TOAR30ROW_SSSE3
361 ANY31C(I422ToAR30Row_Any_SSSE3, I422ToAR30Row_SSSE3, 1, 0, 4, 7)
362 #endif
363 #ifdef HAS_I422TOAR30ROW_AVX2
364 ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15)
365 #endif
366 #ifdef HAS_I444TOARGBROW_SSSE3
367 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
368 #endif
369 #ifdef HAS_I444TORGB24ROW_SSSE3
370 ANY31C(I444ToRGB24Row_Any_SSSE3, I444ToRGB24Row_SSSE3, 0, 0, 3, 15)
371 #endif
372 #ifdef HAS_I422TORGB24ROW_AVX2
373 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31)
374 #endif
375 #ifdef HAS_I422TOARGBROW_AVX2
376 ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
377 #endif
378 #ifdef HAS_I422TOARGBROW_AVX512BW
379 ANY31C(I422ToARGBRow_Any_AVX512BW, I422ToARGBRow_AVX512BW, 1, 0, 4, 31)
380 #endif
381 #ifdef HAS_I422TORGBAROW_AVX2
382 ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
383 #endif
384 #ifdef HAS_I444TOARGBROW_AVX2
385 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
386 #endif
387 #ifdef HAS_I444TORGB24ROW_AVX2
388 ANY31C(I444ToRGB24Row_Any_AVX2, I444ToRGB24Row_AVX2, 0, 0, 3, 31)
389 #endif
390 #ifdef HAS_I422TOARGB4444ROW_AVX2
391 ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15)
392 #endif
393 #ifdef HAS_I422TOARGB1555ROW_AVX2
394 ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15)
395 #endif
396 #ifdef HAS_I422TORGB565ROW_AVX2
397 ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15)
398 #endif
399 #ifdef HAS_I444TORGB24ROW_NEON
400 ANY31C(I444ToRGB24Row_Any_NEON, I444ToRGB24Row_NEON, 0, 0, 3, 7)
401 #endif
402 #ifdef HAS_I422TOARGBROW_NEON
403 ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
404 ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
405 ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
406 ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
407 ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
408 ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
409 ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
410 #endif
411 #ifdef HAS_I422TOARGBROW_MSA
412 ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7)
413 ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
414 ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7)
415 ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15)
416 ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7)
417 ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7)
418 ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
419 #endif
420 #ifdef HAS_I422TOARGBROW_LSX
421 ANY31C(I422ToARGBRow_Any_LSX, I422ToARGBRow_LSX, 1, 0, 4, 15)
422 ANY31C(I422ToRGBARow_Any_LSX, I422ToRGBARow_LSX, 1, 0, 4, 15)
423 ANY31C(I422ToRGB24Row_Any_LSX, I422ToRGB24Row_LSX, 1, 0, 3, 15)
424 ANY31C(I422ToRGB565Row_Any_LSX, I422ToRGB565Row_LSX, 1, 0, 2, 15)
425 ANY31C(I422ToARGB4444Row_Any_LSX, I422ToARGB4444Row_LSX, 1, 0, 2, 15)
426 ANY31C(I422ToARGB1555Row_Any_LSX, I422ToARGB1555Row_LSX, 1, 0, 2, 15)
427 #endif
428 #ifdef HAS_I422TOARGBROW_LASX
429 ANY31C(I422ToARGBRow_Any_LASX, I422ToARGBRow_LASX, 1, 0, 4, 31)
430 ANY31C(I422ToRGBARow_Any_LASX, I422ToRGBARow_LASX, 1, 0, 4, 31)
431 ANY31C(I422ToRGB24Row_Any_LASX, I422ToRGB24Row_LASX, 1, 0, 3, 31)
432 ANY31C(I422ToRGB565Row_Any_LASX, I422ToRGB565Row_LASX, 1, 0, 2, 31)
433 ANY31C(I422ToARGB4444Row_Any_LASX, I422ToARGB4444Row_LASX, 1, 0, 2, 31)
434 ANY31C(I422ToARGB1555Row_Any_LASX, I422ToARGB1555Row_LASX, 1, 0, 2, 31)
435 #endif
436 #ifdef HAS_I444TOARGBROW_LSX
437 ANY31C(I444ToARGBRow_Any_LSX, I444ToARGBRow_LSX, 0, 0, 4, 15)
438 #endif
439 #undef ANY31C
440 
441 // Any 3 planes of 16 bit to 1 with yuvconstants
442 // TODO(fbarchard): consider sharing this code with ANY31C
443 #define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
444   void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf,            \
445                uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
446                int width) {                                               \
447     SIMD_ALIGNED(T vin[16 * 3]);                                          \
448     SIMD_ALIGNED(uint8_t vout[64]);                                       \
449     memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                  \
450     int r = width & MASK;                                                 \
451     int n = width & ~MASK;                                                \
452     if (n > 0) {                                                          \
453       ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);            \
454     }                                                                     \
455     memcpy(vin, y_buf + n, r * SBPP);                                     \
456     memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);      \
457     memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);      \
458     ANY_SIMD(vin, vin + 16, vin + 32, vout, yuvconstants, MASK + 1);      \
459     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
460   }
461 
462 #ifdef HAS_I210TOAR30ROW_SSSE3
463 ANY31CT(I210ToAR30Row_Any_SSSE3, I210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
464 #endif
465 #ifdef HAS_I210TOARGBROW_SSSE3
466 ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
467 #endif
468 #ifdef HAS_I210TOARGBROW_AVX2
469 ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
470 #endif
471 #ifdef HAS_I210TOAR30ROW_AVX2
472 ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
473 #endif
474 #ifdef HAS_I410TOAR30ROW_SSSE3
475 ANY31CT(I410ToAR30Row_Any_SSSE3, I410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7)
476 #endif
477 #ifdef HAS_I410TOARGBROW_SSSE3
478 ANY31CT(I410ToARGBRow_Any_SSSE3, I410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
479 #endif
480 #ifdef HAS_I410TOARGBROW_AVX2
481 ANY31CT(I410ToARGBRow_Any_AVX2, I410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
482 #endif
483 #ifdef HAS_I410TOAR30ROW_AVX2
484 ANY31CT(I410ToAR30Row_Any_AVX2, I410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
485 #endif
486 #ifdef HAS_I212TOAR30ROW_SSSE3
487 ANY31CT(I212ToAR30Row_Any_SSSE3, I212ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
488 #endif
489 #ifdef HAS_I212TOARGBROW_SSSE3
490 ANY31CT(I212ToARGBRow_Any_SSSE3, I212ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
491 #endif
492 #ifdef HAS_I212TOARGBROW_AVX2
493 ANY31CT(I212ToARGBRow_Any_AVX2, I212ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
494 #endif
495 #ifdef HAS_I212TOAR30ROW_AVX2
496 ANY31CT(I212ToAR30Row_Any_AVX2, I212ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
497 #endif
498 #undef ANY31CT
499 
500 // Any 3 planes to 1 plane with parameter
501 #define ANY31PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK)          \
502   void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \
503                DTYPE* dst_ptr, int depth, int width) {                     \
504     SIMD_ALIGNED(STYPE vin[16 * 3]);                                       \
505     SIMD_ALIGNED(DTYPE vout[64]);                                          \
506     memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                   \
507     int r = width & MASK;                                                  \
508     int n = width & ~MASK;                                                 \
509     if (n > 0) {                                                           \
510       ANY_SIMD(r_buf, g_buf, b_buf, dst_ptr, depth, n);                    \
511     }                                                                      \
512     memcpy(vin, r_buf + n, r * SBPP);                                      \
513     memcpy(vin + 16, g_buf + n, r * SBPP);                                 \
514     memcpy(vin + 32, b_buf + n, r * SBPP);                                 \
515     ANY_SIMD(vin, vin + 16, vin + 32, vout, depth, MASK + 1);              \
516     memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP);                    \
517   }
518 
519 #ifdef HAS_MERGEXR30ROW_AVX2
520 ANY31PT(MergeXR30Row_Any_AVX2, MergeXR30Row_AVX2, uint16_t, 2, uint8_t, 4, 15)
521 #endif
522 
523 #ifdef HAS_MERGEXR30ROW_NEON
524 ANY31PT(MergeXR30Row_Any_NEON, MergeXR30Row_NEON, uint16_t, 2, uint8_t, 4, 3)
525 ANY31PT(MergeXR30Row_10_Any_NEON,
526         MergeXR30Row_10_NEON,
527         uint16_t,
528         2,
529         uint8_t,
530         4,
531         3)
532 #endif
533 
534 #ifdef HAS_MERGEXR64ROW_AVX2
535 ANY31PT(MergeXR64Row_Any_AVX2, MergeXR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15)
536 #endif
537 
538 #ifdef HAS_MERGEXR64ROW_NEON
539 ANY31PT(MergeXR64Row_Any_NEON, MergeXR64Row_NEON, uint16_t, 2, uint16_t, 8, 7)
540 #endif
541 
542 #ifdef HAS_MERGEXRGB16TO8ROW_AVX2
543 ANY31PT(MergeXRGB16To8Row_Any_AVX2,
544         MergeXRGB16To8Row_AVX2,
545         uint16_t,
546         2,
547         uint8_t,
548         4,
549         15)
550 #endif
551 
552 #ifdef HAS_MERGEXRGB16TO8ROW_NEON
553 ANY31PT(MergeXRGB16To8Row_Any_NEON,
554         MergeXRGB16To8Row_NEON,
555         uint16_t,
556         2,
557         uint8_t,
558         4,
559         7)
560 #endif
561 
562 #undef ANY31PT
563 
564 // Any 2 planes to 1.
565 #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)             \
566   void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
567                int width) {                                                   \
568     SIMD_ALIGNED(uint8_t vin[128 * 2]);                                       \
569     SIMD_ALIGNED(uint8_t vout[128]);                                          \
570     memset(vin, 0, sizeof(vin)); /* for msan */                               \
571     int r = width & MASK;                                                     \
572     int n = width & ~MASK;                                                    \
573     if (n > 0) {                                                              \
574       ANY_SIMD(y_buf, uv_buf, dst_ptr, n);                                    \
575     }                                                                         \
576     memcpy(vin, y_buf + n * SBPP, r * SBPP);                                  \
577     memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2,                        \
578            SS(r, UVSHIFT) * SBPP2);                                           \
579     ANY_SIMD(vin, vin + 128, vout, MASK + 1);                                 \
580     memcpy(dst_ptr + n * BPP, vout, r * BPP);                                 \
581   }
582 
583 // Merge functions.
584 #ifdef HAS_MERGEUVROW_SSE2
585 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
586 #endif
587 #ifdef HAS_MERGEUVROW_AVX2
588 ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 15)
589 #endif
590 #ifdef HAS_MERGEUVROW_AVX512BW
591 ANY21(MergeUVRow_Any_AVX512BW, MergeUVRow_AVX512BW, 0, 1, 1, 2, 31)
592 #endif
593 #ifdef HAS_MERGEUVROW_NEON
594 ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15)
595 #endif
596 #ifdef HAS_MERGEUVROW_MSA
597 ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15)
598 #endif
599 #ifdef HAS_MERGEUVROW_LSX
600 ANY21(MergeUVRow_Any_LSX, MergeUVRow_LSX, 0, 1, 1, 2, 15)
601 #endif
602 #ifdef HAS_NV21TOYUV24ROW_NEON
603 ANY21(NV21ToYUV24Row_Any_NEON, NV21ToYUV24Row_NEON, 1, 1, 2, 3, 15)
604 #endif
605 #ifdef HAS_NV21TOYUV24ROW_SSSE3
606 ANY21(NV21ToYUV24Row_Any_SSSE3, NV21ToYUV24Row_SSSE3, 1, 1, 2, 3, 15)
607 #endif
608 #ifdef HAS_NV21TOYUV24ROW_AVX2
609 ANY21(NV21ToYUV24Row_Any_AVX2, NV21ToYUV24Row_AVX2, 1, 1, 2, 3, 31)
610 #endif
611 // Math functions.
612 #ifdef HAS_ARGBMULTIPLYROW_SSE2
613 ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
614 #endif
615 #ifdef HAS_ARGBADDROW_SSE2
616 ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3)
617 #endif
618 #ifdef HAS_ARGBSUBTRACTROW_SSE2
619 ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3)
620 #endif
621 #ifdef HAS_ARGBMULTIPLYROW_AVX2
622 ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7)
623 #endif
624 #ifdef HAS_ARGBADDROW_AVX2
625 ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7)
626 #endif
627 #ifdef HAS_ARGBSUBTRACTROW_AVX2
628 ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7)
629 #endif
630 #ifdef HAS_ARGBMULTIPLYROW_NEON
631 ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7)
632 #endif
633 #ifdef HAS_ARGBADDROW_NEON
634 ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7)
635 #endif
636 #ifdef HAS_ARGBSUBTRACTROW_NEON
637 ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7)
638 #endif
639 #ifdef HAS_ARGBMULTIPLYROW_MSA
640 ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3)
641 #endif
642 #ifdef HAS_ARGBMULTIPLYROW_LSX
643 ANY21(ARGBMultiplyRow_Any_LSX, ARGBMultiplyRow_LSX, 0, 4, 4, 4, 3)
644 #endif
645 #ifdef HAS_ARGBMULTIPLYROW_LASX
646 ANY21(ARGBMultiplyRow_Any_LASX, ARGBMultiplyRow_LASX, 0, 4, 4, 4, 7)
647 #endif
648 #ifdef HAS_ARGBADDROW_MSA
649 ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7)
650 #endif
651 #ifdef HAS_ARGBADDROW_LSX
652 ANY21(ARGBAddRow_Any_LSX, ARGBAddRow_LSX, 0, 4, 4, 4, 3)
653 #endif
654 #ifdef HAS_ARGBADDROW_LASX
655 ANY21(ARGBAddRow_Any_LASX, ARGBAddRow_LASX, 0, 4, 4, 4, 7)
656 #endif
657 #ifdef HAS_ARGBSUBTRACTROW_MSA
658 ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7)
659 #endif
660 #ifdef HAS_ARGBSUBTRACTROW_LSX
661 ANY21(ARGBSubtractRow_Any_LSX, ARGBSubtractRow_LSX, 0, 4, 4, 4, 3)
662 #endif
663 #ifdef HAS_ARGBSUBTRACTROW_LASX
664 ANY21(ARGBSubtractRow_Any_LASX, ARGBSubtractRow_LASX, 0, 4, 4, 4, 7)
665 #endif
666 #ifdef HAS_SOBELROW_SSE2
667 ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15)
668 #endif
669 #ifdef HAS_SOBELROW_NEON
670 ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7)
671 #endif
672 #ifdef HAS_SOBELROW_MSA
673 ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15)
674 #endif
675 #ifdef HAS_SOBELROW_LSX
676 ANY21(SobelRow_Any_LSX, SobelRow_LSX, 0, 1, 1, 4, 15)
677 #endif
678 #ifdef HAS_SOBELTOPLANEROW_SSE2
679 ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15)
680 #endif
681 #ifdef HAS_SOBELTOPLANEROW_NEON
682 ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15)
683 #endif
684 #ifdef HAS_SOBELTOPLANEROW_MSA
685 ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31)
686 #endif
687 #ifdef HAS_SOBELTOPLANEROW_LSX
688 ANY21(SobelToPlaneRow_Any_LSX, SobelToPlaneRow_LSX, 0, 1, 1, 1, 31)
689 #endif
690 #ifdef HAS_SOBELXYROW_SSE2
691 ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15)
692 #endif
693 #ifdef HAS_SOBELXYROW_NEON
694 ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
695 #endif
696 #ifdef HAS_SOBELXYROW_MSA
697 ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15)
698 #endif
699 #ifdef HAS_SOBELXYROW_LSX
700 ANY21(SobelXYRow_Any_LSX, SobelXYRow_LSX, 0, 1, 1, 4, 15)
701 #endif
702 #undef ANY21
703 
704 // Any 2 planes to 1 with stride
705 // width is measured in source pixels. 4 bytes contains 2 pixels
706 #define ANY21S(NAMEANY, ANY_SIMD, SBPP, BPP, MASK)                        \
707   void NAMEANY(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_uv, \
708                int width) {                                               \
709     SIMD_ALIGNED(uint8_t vin[32 * 2]);                                    \
710     SIMD_ALIGNED(uint8_t vout[32]);                                       \
711     memset(vin, 0, sizeof(vin)); /* for msan */                           \
712     int awidth = (width + 1) / 2;                                         \
713     int r = awidth & MASK;                                                \
714     int n = awidth & ~MASK;                                               \
715     if (n > 0) {                                                          \
716       ANY_SIMD(src_yuy2, stride_yuy2, dst_uv, n * 2);                     \
717     }                                                                     \
718     memcpy(vin, src_yuy2 + n * SBPP, r * SBPP);                           \
719     memcpy(vin + 32, src_yuy2 + stride_yuy2 + n * SBPP, r * SBPP);        \
720     ANY_SIMD(vin, 32, vout, MASK + 1);                                    \
721     memcpy(dst_uv + n * BPP, vout, r * BPP);                              \
722   }
723 
724 #ifdef HAS_YUY2TONVUVROW_NEON
725 ANY21S(YUY2ToNVUVRow_Any_NEON, YUY2ToNVUVRow_NEON, 4, 2, 7)
726 #endif
727 #ifdef HAS_YUY2TONVUVROW_SSE2
728 ANY21S(YUY2ToNVUVRow_Any_SSE2, YUY2ToNVUVRow_SSE2, 4, 2, 7)
729 #endif
730 #ifdef HAS_YUY2TONVUVROW_AVX2
731 ANY21S(YUY2ToNVUVRow_Any_AVX2, YUY2ToNVUVRow_AVX2, 4, 2, 15)
732 #endif
733 
734 // Any 2 planes to 1 with yuvconstants
735 #define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)            \
736   void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
737                const struct YuvConstants* yuvconstants, int width) {          \
738     SIMD_ALIGNED(uint8_t vin[128 * 2]);                                       \
739     SIMD_ALIGNED(uint8_t vout[128]);                                          \
740     memset(vin, 0, sizeof(vin)); /* for msan */                               \
741     int r = width & MASK;                                                     \
742     int n = width & ~MASK;                                                    \
743     if (n > 0) {                                                              \
744       ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n);                      \
745     }                                                                         \
746     memcpy(vin, y_buf + n * SBPP, r * SBPP);                                  \
747     memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2,                        \
748            SS(r, UVSHIFT) * SBPP2);                                           \
749     ANY_SIMD(vin, vin + 128, vout, yuvconstants, MASK + 1);                   \
750     memcpy(dst_ptr + n * BPP, vout, r * BPP);                                 \
751   }
752 
753 // Biplanar to RGB.
754 #ifdef HAS_NV12TOARGBROW_SSSE3
755 ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
756 #endif
757 #ifdef HAS_NV12TOARGBROW_AVX2
758 ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
759 #endif
760 #ifdef HAS_NV12TOARGBROW_NEON
761 ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
762 #endif
763 #ifdef HAS_NV12TOARGBROW_MSA
764 ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7)
765 #endif
766 #ifdef HAS_NV12TOARGBROW_LSX
767 ANY21C(NV12ToARGBRow_Any_LSX, NV12ToARGBRow_LSX, 1, 1, 2, 4, 7)
768 #endif
769 #ifdef HAS_NV12TOARGBROW_LASX
770 ANY21C(NV12ToARGBRow_Any_LASX, NV12ToARGBRow_LASX, 1, 1, 2, 4, 15)
771 #endif
772 #ifdef HAS_NV21TOARGBROW_SSSE3
773 ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
774 #endif
775 #ifdef HAS_NV21TOARGBROW_AVX2
776 ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
777 #endif
778 #ifdef HAS_NV21TOARGBROW_NEON
779 ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
780 #endif
781 #ifdef HAS_NV21TOARGBROW_MSA
782 ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7)
783 #endif
784 #ifdef HAS_NV21TOARGBROW_LSX
785 ANY21C(NV21ToARGBRow_Any_LSX, NV21ToARGBRow_LSX, 1, 1, 2, 4, 7)
786 #endif
787 #ifdef HAS_NV21TOARGBROW_LASX
788 ANY21C(NV21ToARGBRow_Any_LASX, NV21ToARGBRow_LASX, 1, 1, 2, 4, 15)
789 #endif
790 #ifdef HAS_NV12TORGB24ROW_NEON
791 ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7)
792 #endif
793 #ifdef HAS_NV21TORGB24ROW_NEON
794 ANY21C(NV21ToRGB24Row_Any_NEON, NV21ToRGB24Row_NEON, 1, 1, 2, 3, 7)
795 #endif
796 #ifdef HAS_NV12TORGB24ROW_SSSE3
797 ANY21C(NV12ToRGB24Row_Any_SSSE3, NV12ToRGB24Row_SSSE3, 1, 1, 2, 3, 15)
798 #endif
799 #ifdef HAS_NV21TORGB24ROW_SSSE3
800 ANY21C(NV21ToRGB24Row_Any_SSSE3, NV21ToRGB24Row_SSSE3, 1, 1, 2, 3, 15)
801 #endif
802 #ifdef HAS_NV12TORGB24ROW_AVX2
803 ANY21C(NV12ToRGB24Row_Any_AVX2, NV12ToRGB24Row_AVX2, 1, 1, 2, 3, 31)
804 #endif
805 #ifdef HAS_NV21TORGB24ROW_AVX2
806 ANY21C(NV21ToRGB24Row_Any_AVX2, NV21ToRGB24Row_AVX2, 1, 1, 2, 3, 31)
807 #endif
808 #ifdef HAS_NV12TORGB565ROW_SSSE3
809 ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
810 #endif
811 #ifdef HAS_NV12TORGB565ROW_AVX2
812 ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
813 #endif
814 #ifdef HAS_NV12TORGB565ROW_NEON
815 ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
816 #endif
817 #ifdef HAS_NV12TORGB565ROW_MSA
818 ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7)
819 #endif
820 #ifdef HAS_NV12TORGB565ROW_LSX
821 ANY21C(NV12ToRGB565Row_Any_LSX, NV12ToRGB565Row_LSX, 1, 1, 2, 2, 7)
822 #endif
823 #ifdef HAS_NV12TORGB565ROW_LASX
824 ANY21C(NV12ToRGB565Row_Any_LASX, NV12ToRGB565Row_LASX, 1, 1, 2, 2, 15)
825 #endif
826 #undef ANY21C
827 
828 // Any 2 planes of 16 bit to 1 with yuvconstants
829 #define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK)     \
830   void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr,             \
831                const struct YuvConstants* yuvconstants, int width) {          \
832     SIMD_ALIGNED(T vin[16 * 2]);                                              \
833     SIMD_ALIGNED(uint8_t vout[64]);                                           \
834     memset(vin, 0, sizeof(vin)); /* for msan */                               \
835     int r = width & MASK;                                                     \
836     int n = width & ~MASK;                                                    \
837     if (n > 0) {                                                              \
838       ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n);                      \
839     }                                                                         \
840     memcpy(vin, y_buf + n, r * SBPP);                                         \
841     memcpy(vin + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \
842     ANY_SIMD(vin, vin + 16, vout, yuvconstants, MASK + 1);                    \
843     memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP);     \
844   }
845 
846 #ifdef HAS_P210TOAR30ROW_SSSE3
847 ANY21CT(P210ToAR30Row_Any_SSSE3, P210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
848 #endif
849 #ifdef HAS_P210TOARGBROW_SSSE3
850 ANY21CT(P210ToARGBRow_Any_SSSE3, P210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
851 #endif
852 #ifdef HAS_P210TOARGBROW_AVX2
853 ANY21CT(P210ToARGBRow_Any_AVX2, P210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
854 #endif
855 #ifdef HAS_P210TOAR30ROW_AVX2
856 ANY21CT(P210ToAR30Row_Any_AVX2, P210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
857 #endif
858 #ifdef HAS_P410TOAR30ROW_SSSE3
859 ANY21CT(P410ToAR30Row_Any_SSSE3, P410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7)
860 #endif
861 #ifdef HAS_P410TOARGBROW_SSSE3
862 ANY21CT(P410ToARGBRow_Any_SSSE3, P410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
863 #endif
864 #ifdef HAS_P410TOARGBROW_AVX2
865 ANY21CT(P410ToARGBRow_Any_AVX2, P410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
866 #endif
867 #ifdef HAS_P410TOAR30ROW_AVX2
868 ANY21CT(P410ToAR30Row_Any_AVX2, P410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
869 #endif
870 
871 #undef ANY21CT
872 
873 // Any 2 16 bit planes with parameter to 1
874 #define ANY21PT(NAMEANY, ANY_SIMD, T, BPP, MASK)                     \
875   void NAMEANY(const T* src_u, const T* src_v, T* dst_uv, int depth, \
876                int width) {                                          \
877     SIMD_ALIGNED(T vin[16 * 2]);                                     \
878     SIMD_ALIGNED(T vout[16]);                                        \
879     memset(vin, 0, sizeof(vin)); /* for msan */                      \
880     int r = width & MASK;                                            \
881     int n = width & ~MASK;                                           \
882     if (n > 0) {                                                     \
883       ANY_SIMD(src_u, src_v, dst_uv, depth, n);                      \
884     }                                                                \
885     memcpy(vin, src_u + n, r * BPP);                                 \
886     memcpy(vin + 16, src_v + n, r * BPP);                            \
887     ANY_SIMD(vin, vin + 16, vout, depth, MASK + 1);                  \
888     memcpy(dst_uv + n * 2, vout, r * BPP * 2);                       \
889   }
890 
891 #ifdef HAS_MERGEUVROW_16_AVX2
892 ANY21PT(MergeUVRow_16_Any_AVX2, MergeUVRow_16_AVX2, uint16_t, 2, 7)
893 #endif
894 #ifdef HAS_MERGEUVROW_16_NEON
895 ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7)
896 #endif
897 
898 #undef ANY21CT
899 
900 // Any 1 to 1.
901 #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)               \
902   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {    \
903     SIMD_ALIGNED(uint8_t vin[128]);                                      \
904     SIMD_ALIGNED(uint8_t vout[128]);                                     \
905     memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                 \
906     int r = width & MASK;                                                \
907     int n = width & ~MASK;                                               \
908     if (n > 0) {                                                         \
909       ANY_SIMD(src_ptr, dst_ptr, n);                                     \
910     }                                                                    \
911     memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
912     ANY_SIMD(vin, vout, MASK + 1);                                       \
913     memcpy(dst_ptr + n * BPP, vout, r * BPP);                            \
914   }
915 
916 #ifdef HAS_COPYROW_AVX
917 ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
918 #endif
919 #ifdef HAS_COPYROW_SSE2
920 ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31)
921 #endif
922 #ifdef HAS_COPYROW_NEON
923 ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31)
924 #endif
925 #if defined(HAS_ARGBTORGB24ROW_SSSE3)
926 ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15)
927 ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15)
928 ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3)
929 ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3)
930 ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3)
931 #endif
932 #if defined(HAS_ARGBTORGB24ROW_AVX2)
933 ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31)
934 #endif
935 #if defined(HAS_ARGBTORGB24ROW_AVX512VBMI)
936 ANY11(ARGBToRGB24Row_Any_AVX512VBMI, ARGBToRGB24Row_AVX512VBMI, 0, 4, 3, 31)
937 #endif
938 #if defined(HAS_ARGBTORAWROW_AVX2)
939 ANY11(ARGBToRAWRow_Any_AVX2, ARGBToRAWRow_AVX2, 0, 4, 3, 31)
940 #endif
941 #if defined(HAS_ARGBTORGB565ROW_AVX2)
942 ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
943 #endif
944 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
945 ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
946 ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
947 #endif
948 #if defined(HAS_ABGRTOAR30ROW_SSSE3)
949 ANY11(ABGRToAR30Row_Any_SSSE3, ABGRToAR30Row_SSSE3, 0, 4, 4, 3)
950 #endif
951 #if defined(HAS_ARGBTOAR30ROW_SSSE3)
952 ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3)
953 #endif
954 #if defined(HAS_ABGRTOAR30ROW_AVX2)
955 ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7)
956 #endif
957 #if defined(HAS_ARGBTOAR30ROW_AVX2)
958 ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7)
959 #endif
960 #if defined(HAS_J400TOARGBROW_SSE2)
961 ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
962 #endif
963 #if defined(HAS_J400TOARGBROW_AVX2)
964 ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
965 #endif
966 #if defined(HAS_RGB24TOARGBROW_SSSE3)
967 ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
968 ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
969 ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
970 ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7)
971 ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7)
972 #endif
973 #if defined(HAS_RAWTORGBAROW_SSSE3)
974 ANY11(RAWToRGBARow_Any_SSSE3, RAWToRGBARow_SSSE3, 0, 3, 4, 15)
975 #endif
976 #if defined(HAS_RAWTORGB24ROW_SSSE3)
977 ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7)
978 #endif
979 #if defined(HAS_RGB565TOARGBROW_AVX2)
980 ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15)
981 #endif
982 #if defined(HAS_ARGB1555TOARGBROW_AVX2)
983 ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
984 #endif
985 #if defined(HAS_ARGB4444TOARGBROW_AVX2)
986 ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
987 #endif
988 #if defined(HAS_ARGBTORGB24ROW_NEON)
989 ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 15)
990 ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
991 ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7)
992 ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
993 ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
994 ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
995 #endif
996 #if defined(HAS_ARGBTORGB24ROW_MSA)
997 ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15)
998 ANY11(ARGBToRAWRow_Any_MSA, ARGBToRAWRow_MSA, 0, 4, 3, 15)
999 ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7)
1000 ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7)
1001 ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7)
1002 ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15)
1003 #endif
1004 #if defined(HAS_ARGBTORGB24ROW_LSX)
1005 ANY11(ARGBToRGB24Row_Any_LSX, ARGBToRGB24Row_LSX, 0, 4, 3, 15)
1006 ANY11(ARGBToRAWRow_Any_LSX, ARGBToRAWRow_LSX, 0, 4, 3, 15)
1007 ANY11(ARGBToRGB565Row_Any_LSX, ARGBToRGB565Row_LSX, 0, 4, 2, 7)
1008 ANY11(ARGBToARGB1555Row_Any_LSX, ARGBToARGB1555Row_LSX, 0, 4, 2, 7)
1009 ANY11(ARGBToARGB4444Row_Any_LSX, ARGBToARGB4444Row_LSX, 0, 4, 2, 7)
1010 #endif
1011 #if defined(HAS_ARGBTORGB24ROW_LASX)
1012 ANY11(ARGBToRGB24Row_Any_LASX, ARGBToRGB24Row_LASX, 0, 4, 3, 31)
1013 ANY11(ARGBToRAWRow_Any_LASX, ARGBToRAWRow_LASX, 0, 4, 3, 31)
1014 ANY11(ARGBToRGB565Row_Any_LASX, ARGBToRGB565Row_LASX, 0, 4, 2, 15)
1015 ANY11(ARGBToARGB1555Row_Any_LASX, ARGBToARGB1555Row_LASX, 0, 4, 2, 15)
1016 ANY11(ARGBToARGB4444Row_Any_LASX, ARGBToARGB4444Row_LASX, 0, 4, 2, 15)
1017 #endif
1018 #if defined(HAS_J400TOARGBROW_LSX)
1019 ANY11(J400ToARGBRow_Any_LSX, J400ToARGBRow_LSX, 0, 1, 4, 15)
1020 #endif
1021 #if defined(HAS_RAWTORGB24ROW_NEON)
1022 ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
1023 #endif
1024 #if defined(HAS_RAWTORGB24ROW_MSA)
1025 ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15)
1026 #endif
1027 #if defined(HAS_RAWTORGB24ROW_LSX)
1028 ANY11(RAWToRGB24Row_Any_LSX, RAWToRGB24Row_LSX, 0, 3, 3, 15)
1029 #endif
1030 #ifdef HAS_ARGBTOYROW_AVX2
1031 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
1032 #endif
1033 #ifdef HAS_ABGRTOYROW_AVX2
1034 ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31)
1035 #endif
1036 #ifdef HAS_ARGBTOYJROW_AVX2
1037 ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
1038 #endif
1039 #ifdef HAS_ABGRTOYJROW_AVX2
1040 ANY11(ABGRToYJRow_Any_AVX2, ABGRToYJRow_AVX2, 0, 4, 1, 31)
1041 #endif
1042 #ifdef HAS_RGBATOYJROW_AVX2
1043 ANY11(RGBAToYJRow_Any_AVX2, RGBAToYJRow_AVX2, 0, 4, 1, 31)
1044 #endif
1045 #ifdef HAS_UYVYTOYROW_AVX2
1046 ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31)
1047 #endif
1048 #ifdef HAS_YUY2TOYROW_AVX2
1049 ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31)
1050 #endif
1051 #ifdef HAS_ARGBTOYROW_SSSE3
1052 ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)
1053 #endif
1054 #ifdef HAS_BGRATOYROW_SSSE3
1055 ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15)
1056 ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15)
1057 ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15)
1058 #endif
1059 #ifdef HAS_YUY2TOYROW_SSE2
1060 ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15)
1061 ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
1062 #endif
1063 #ifdef HAS_ARGBTOYJROW_SSSE3
1064 ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15)
1065 #endif
1066 #ifdef HAS_ABGRTOYJROW_SSSE3
1067 ANY11(ABGRToYJRow_Any_SSSE3, ABGRToYJRow_SSSE3, 0, 4, 1, 15)
1068 #endif
1069 #ifdef HAS_RGBATOYJROW_SSSE3
1070 ANY11(RGBAToYJRow_Any_SSSE3, RGBAToYJRow_SSSE3, 0, 4, 1, 15)
1071 #endif
1072 #ifdef HAS_ARGBTOYROW_NEON
1073 ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 15)
1074 #endif
1075 #ifdef HAS_ARGBTOYROW_MSA
1076 ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
1077 #endif
1078 #ifdef HAS_ARGBTOYROW_LSX
1079 ANY11(ARGBToYRow_Any_LSX, ARGBToYRow_LSX, 0, 4, 1, 15)
1080 #endif
1081 #ifdef HAS_ARGBTOYROW_LASX
1082 ANY11(ARGBToYRow_Any_LASX, ARGBToYRow_LASX, 0, 4, 1, 31)
1083 #endif
1084 #ifdef HAS_ARGBTOYJROW_NEON
1085 ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 15)
1086 #endif
1087 #ifdef HAS_ABGRTOYJROW_NEON
1088 ANY11(ABGRToYJRow_Any_NEON, ABGRToYJRow_NEON, 0, 4, 1, 15)
1089 #endif
1090 #ifdef HAS_RGBATOYJROW_NEON
1091 ANY11(RGBAToYJRow_Any_NEON, RGBAToYJRow_NEON, 0, 4, 1, 15)
1092 #endif
1093 #ifdef HAS_ARGBTOYJROW_MSA
1094 ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
1095 #endif
1096 #ifdef HAS_ARGBTOYJROW_LSX
1097 ANY11(ARGBToYJRow_Any_LSX, ARGBToYJRow_LSX, 0, 4, 1, 15)
1098 #endif
1099 #ifdef HAS_RGBATOYJROW_LSX
1100 ANY11(RGBAToYJRow_Any_LSX, RGBAToYJRow_LSX, 0, 4, 1, 15)
1101 #endif
1102 #ifdef HAS_ABGRTOYJROW_LSX
1103 ANY11(ABGRToYJRow_Any_LSX, ABGRToYJRow_LSX, 0, 4, 1, 15)
1104 #endif
1105 #ifdef HAS_RGBATOYJROW_LASX
1106 ANY11(RGBAToYJRow_Any_LASX, RGBAToYJRow_LASX, 0, 4, 1, 31)
1107 #endif
1108 #ifdef HAS_ARGBTOYJROW_LASX
1109 ANY11(ARGBToYJRow_Any_LASX, ARGBToYJRow_LASX, 0, 4, 1, 31)
1110 #endif
1111 #ifdef HAS_ABGRTOYJROW_LASX
1112 ANY11(ABGRToYJRow_Any_LASX, ABGRToYJRow_LASX, 0, 4, 1, 31)
1113 #endif
1114 #ifdef HAS_BGRATOYROW_NEON
1115 ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 15)
1116 #endif
1117 #ifdef HAS_BGRATOYROW_MSA
1118 ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15)
1119 #endif
1120 #ifdef HAS_BGRATOYROW_LSX
1121 ANY11(BGRAToYRow_Any_LSX, BGRAToYRow_LSX, 0, 4, 1, 15)
1122 #endif
1123 #ifdef HAS_BGRATOYROW_LASX
1124 ANY11(BGRAToYRow_Any_LASX, BGRAToYRow_LASX, 0, 4, 1, 31)
1125 #endif
1126 #ifdef HAS_ABGRTOYROW_NEON
1127 ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 15)
1128 #endif
1129 #ifdef HAS_ABGRTOYROW_MSA
1130 ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7)
1131 #endif
1132 #ifdef HAS_ABGRTOYROW_LSX
1133 ANY11(ABGRToYRow_Any_LSX, ABGRToYRow_LSX, 0, 4, 1, 15)
1134 #endif
1135 #ifdef HAS_ABGRTOYROW_LASX
1136 ANY11(ABGRToYRow_Any_LASX, ABGRToYRow_LASX, 0, 4, 1, 31)
1137 #endif
1138 #ifdef HAS_RGBATOYROW_NEON
1139 ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 15)
1140 #endif
1141 #ifdef HAS_RGBATOYROW_MSA
1142 ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15)
1143 #endif
1144 #ifdef HAS_RGBATOYROW_LSX
1145 ANY11(RGBAToYRow_Any_LSX, RGBAToYRow_LSX, 0, 4, 1, 15)
1146 #endif
1147 #ifdef HAS_RGBATOYROW_LASX
1148 ANY11(RGBAToYRow_Any_LASX, RGBAToYRow_LASX, 0, 4, 1, 31)
1149 #endif
1150 #ifdef HAS_RGB24TOYROW_NEON
1151 ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 15)
1152 #endif
1153 #ifdef HAS_RGB24TOYJROW_AVX2
1154 ANY11(RGB24ToYJRow_Any_AVX2, RGB24ToYJRow_AVX2, 0, 3, 1, 31)
1155 #endif
1156 #ifdef HAS_RGB24TOYJROW_SSSE3
1157 ANY11(RGB24ToYJRow_Any_SSSE3, RGB24ToYJRow_SSSE3, 0, 3, 1, 15)
1158 #endif
1159 #ifdef HAS_RGB24TOYJROW_NEON
1160 ANY11(RGB24ToYJRow_Any_NEON, RGB24ToYJRow_NEON, 0, 3, 1, 15)
1161 #endif
1162 #ifdef HAS_RGB24TOYROW_MSA
1163 ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
1164 #endif
1165 #ifdef HAS_RGB24TOYROW_LSX
1166 ANY11(RGB24ToYRow_Any_LSX, RGB24ToYRow_LSX, 0, 3, 1, 15)
1167 #endif
1168 #ifdef HAS_RGB24TOYJROW_LSX
1169 ANY11(RGB24ToYJRow_Any_LSX, RGB24ToYJRow_LSX, 0, 3, 1, 15)
1170 #endif
1171 #ifdef HAS_RGB24TOYJROW_LASX
1172 ANY11(RGB24ToYJRow_Any_LASX, RGB24ToYJRow_LASX, 0, 3, 1, 31)
1173 #endif
1174 #ifdef HAS_RGB24TOYROW_LASX
1175 ANY11(RGB24ToYRow_Any_LASX, RGB24ToYRow_LASX, 0, 3, 1, 31)
1176 #endif
1177 #ifdef HAS_RAWTOYROW_NEON
1178 ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 15)
1179 #endif
1180 #ifdef HAS_RAWTOYJROW_AVX2
1181 ANY11(RAWToYJRow_Any_AVX2, RAWToYJRow_AVX2, 0, 3, 1, 31)
1182 #endif
1183 #ifdef HAS_RAWTOYJROW_SSSE3
1184 ANY11(RAWToYJRow_Any_SSSE3, RAWToYJRow_SSSE3, 0, 3, 1, 15)
1185 #endif
1186 #ifdef HAS_RAWTOYJROW_NEON
1187 ANY11(RAWToYJRow_Any_NEON, RAWToYJRow_NEON, 0, 3, 1, 15)
1188 #endif
1189 #ifdef HAS_RAWTOYROW_MSA
1190 ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15)
1191 #endif
1192 #ifdef HAS_RAWTOYROW_LSX
1193 ANY11(RAWToYRow_Any_LSX, RAWToYRow_LSX, 0, 3, 1, 15)
1194 #endif
1195 #ifdef HAS_RAWTOYROW_LASX
1196 ANY11(RAWToYRow_Any_LASX, RAWToYRow_LASX, 0, 3, 1, 31)
1197 #endif
1198 #ifdef HAS_RAWTOYJROW_LSX
1199 ANY11(RAWToYJRow_Any_LSX, RAWToYJRow_LSX, 0, 3, 1, 15)
1200 #endif
1201 #ifdef HAS_RAWTOYJROW_LASX
1202 ANY11(RAWToYJRow_Any_LASX, RAWToYJRow_LASX, 0, 3, 1, 31)
1203 #endif
1204 #ifdef HAS_RGB565TOYROW_NEON
1205 ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
1206 #endif
1207 #ifdef HAS_RGB565TOYROW_MSA
1208 ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15)
1209 #endif
1210 #ifdef HAS_RGB565TOYROW_LSX
1211 ANY11(RGB565ToYRow_Any_LSX, RGB565ToYRow_LSX, 0, 2, 1, 15)
1212 #endif
1213 #ifdef HAS_RGB565TOYROW_LASX
1214 ANY11(RGB565ToYRow_Any_LASX, RGB565ToYRow_LASX, 0, 2, 1, 31)
1215 #endif
1216 #ifdef HAS_ARGB1555TOYROW_NEON
1217 ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7)
1218 #endif
1219 #ifdef HAS_ARGB1555TOYROW_MSA
1220 ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15)
1221 #endif
1222 #ifdef HAS_ARGB1555TOYROW_LSX
1223 ANY11(ARGB1555ToYRow_Any_LSX, ARGB1555ToYRow_LSX, 0, 2, 1, 15)
1224 #endif
1225 #ifdef HAS_ARGB1555TOYROW_LASX
1226 ANY11(ARGB1555ToYRow_Any_LASX, ARGB1555ToYRow_LASX, 0, 2, 1, 31)
1227 #endif
1228 #ifdef HAS_ARGB4444TOYROW_NEON
1229 ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7)
1230 #endif
1231 #ifdef HAS_YUY2TOYROW_NEON
1232 ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15)
1233 #endif
1234 #ifdef HAS_UYVYTOYROW_NEON
1235 ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15)
1236 #endif
1237 #ifdef HAS_YUY2TOYROW_MSA
1238 ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31)
1239 #endif
1240 #ifdef HAS_YUY2TOYROW_LSX
1241 ANY11(YUY2ToYRow_Any_LSX, YUY2ToYRow_LSX, 1, 4, 1, 15)
1242 #endif
1243 #ifdef HAS_YUY2TOYROW_LASX
1244 ANY11(YUY2ToYRow_Any_LASX, YUY2ToYRow_LASX, 1, 4, 1, 31)
1245 #endif
1246 #ifdef HAS_UYVYTOYROW_MSA
1247 ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
1248 #endif
1249 #ifdef HAS_UYVYTOYROW_LSX
1250 ANY11(UYVYToYRow_Any_LSX, UYVYToYRow_LSX, 1, 4, 1, 15)
1251 #endif
1252 #ifdef HAS_UYVYTOYROW_LASX
1253 ANY11(UYVYToYRow_Any_LASX, UYVYToYRow_LASX, 1, 4, 1, 31)
1254 #endif
1255 #ifdef HAS_AYUVTOYROW_NEON
1256 ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15)
1257 #endif
1258 #ifdef HAS_SWAPUVROW_SSSE3
1259 ANY11(SwapUVRow_Any_SSSE3, SwapUVRow_SSSE3, 0, 2, 2, 15)
1260 #endif
1261 #ifdef HAS_SWAPUVROW_AVX2
1262 ANY11(SwapUVRow_Any_AVX2, SwapUVRow_AVX2, 0, 2, 2, 31)
1263 #endif
1264 #ifdef HAS_SWAPUVROW_NEON
1265 ANY11(SwapUVRow_Any_NEON, SwapUVRow_NEON, 0, 2, 2, 15)
1266 #endif
1267 #ifdef HAS_RGB24TOARGBROW_NEON
1268 ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
1269 #endif
1270 #ifdef HAS_RGB24TOARGBROW_MSA
1271 ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15)
1272 #endif
1273 #ifdef HAS_RGB24TOARGBROW_LSX
1274 ANY11(RGB24ToARGBRow_Any_LSX, RGB24ToARGBRow_LSX, 0, 3, 4, 15)
1275 #endif
1276 #ifdef HAS_RGB24TOARGBROW_LASX
1277 ANY11(RGB24ToARGBRow_Any_LASX, RGB24ToARGBRow_LASX, 0, 3, 4, 31)
1278 #endif
1279 #ifdef HAS_RAWTOARGBROW_NEON
1280 ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7)
1281 #endif
1282 #ifdef HAS_RAWTORGBAROW_NEON
1283 ANY11(RAWToRGBARow_Any_NEON, RAWToRGBARow_NEON, 0, 3, 4, 7)
1284 #endif
1285 #ifdef HAS_RAWTOARGBROW_MSA
1286 ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15)
1287 #endif
1288 #ifdef HAS_RAWTOARGBROW_LSX
1289 ANY11(RAWToARGBRow_Any_LSX, RAWToARGBRow_LSX, 0, 3, 4, 15)
1290 #endif
1291 #ifdef HAS_RAWTOARGBROW_LASX
1292 ANY11(RAWToARGBRow_Any_LASX, RAWToARGBRow_LASX, 0, 3, 4, 31)
1293 #endif
1294 #ifdef HAS_RGB565TOARGBROW_NEON
1295 ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7)
1296 #endif
1297 #ifdef HAS_RGB565TOARGBROW_MSA
1298 ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15)
1299 #endif
1300 #ifdef HAS_RGB565TOARGBROW_LSX
1301 ANY11(RGB565ToARGBRow_Any_LSX, RGB565ToARGBRow_LSX, 0, 2, 4, 15)
1302 #endif
1303 #ifdef HAS_RGB565TOARGBROW_LASX
1304 ANY11(RGB565ToARGBRow_Any_LASX, RGB565ToARGBRow_LASX, 0, 2, 4, 31)
1305 #endif
1306 #ifdef HAS_ARGB1555TOARGBROW_NEON
1307 ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
1308 #endif
1309 #ifdef HAS_ARGB1555TOARGBROW_MSA
1310 ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15)
1311 #endif
1312 #ifdef HAS_ARGB1555TOARGBROW_LSX
1313 ANY11(ARGB1555ToARGBRow_Any_LSX, ARGB1555ToARGBRow_LSX, 0, 2, 4, 15)
1314 #endif
1315 #ifdef HAS_ARGB1555TOARGBROW_LASX
1316 ANY11(ARGB1555ToARGBRow_Any_LASX, ARGB1555ToARGBRow_LASX, 0, 2, 4, 31)
1317 #endif
1318 #ifdef HAS_ARGB4444TOARGBROW_NEON
1319 ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
1320 #endif
1321 #ifdef HAS_ARGB4444TOARGBROW_MSA
1322 ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
1323 #endif
1324 #ifdef HAS_ARGB4444TOARGBROW_LSX
1325 ANY11(ARGB4444ToARGBRow_Any_LSX, ARGB4444ToARGBRow_LSX, 0, 2, 4, 15)
1326 #endif
1327 #ifdef HAS_ARGB4444TOARGBROW_LASX
1328 ANY11(ARGB4444ToARGBRow_Any_LASX, ARGB4444ToARGBRow_LASX, 0, 2, 4, 31)
1329 #endif
1330 #ifdef HAS_ARGBATTENUATEROW_SSSE3
1331 ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3)
1332 #endif
1333 #ifdef HAS_ARGBUNATTENUATEROW_SSE2
1334 ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3)
1335 #endif
1336 #ifdef HAS_ARGBATTENUATEROW_AVX2
1337 ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7)
1338 #endif
1339 #ifdef HAS_ARGBUNATTENUATEROW_AVX2
1340 ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7)
1341 #endif
1342 #ifdef HAS_ARGBATTENUATEROW_NEON
1343 ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
1344 #endif
1345 #ifdef HAS_ARGBATTENUATEROW_MSA
1346 ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7)
1347 #endif
1348 #ifdef HAS_ARGBATTENUATEROW_LSX
1349 ANY11(ARGBAttenuateRow_Any_LSX, ARGBAttenuateRow_LSX, 0, 4, 4, 7)
1350 #endif
1351 #ifdef HAS_ARGBATTENUATEROW_LASX
1352 ANY11(ARGBAttenuateRow_Any_LASX, ARGBAttenuateRow_LASX, 0, 4, 4, 15)
1353 #endif
1354 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
1355 ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
1356 #endif
1357 #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
1358 ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 31)
1359 #endif
1360 #ifdef HAS_ARGBEXTRACTALPHAROW_NEON
1361 ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
1362 #endif
1363 #ifdef HAS_ARGBEXTRACTALPHAROW_MSA
1364 ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15)
1365 #endif
1366 #ifdef HAS_ARGBEXTRACTALPHAROW_LSX
1367 ANY11(ARGBExtractAlphaRow_Any_LSX, ARGBExtractAlphaRow_LSX, 0, 4, 1, 15)
1368 #endif
1369 #undef ANY11
1370 
1371 // Any 1 to 1 blended.  Destination is read, modify, write.
1372 #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)              \
1373   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) {    \
1374     SIMD_ALIGNED(uint8_t vin[64]);                                       \
1375     SIMD_ALIGNED(uint8_t vout[64]);                                      \
1376     memset(vin, 0, sizeof(vin));   /* for msan */                        \
1377     memset(vout, 0, sizeof(vout)); /* for msan */                        \
1378     int r = width & MASK;                                                \
1379     int n = width & ~MASK;                                               \
1380     if (n > 0) {                                                         \
1381       ANY_SIMD(src_ptr, dst_ptr, n);                                     \
1382     }                                                                    \
1383     memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
1384     memcpy(vout, dst_ptr + n * BPP, r * BPP);                            \
1385     ANY_SIMD(vin, vout, MASK + 1);                                       \
1386     memcpy(dst_ptr + n * BPP, vout, r * BPP);                            \
1387   }
1388 
1389 #ifdef HAS_ARGBCOPYALPHAROW_AVX2
1390 ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15)
1391 #endif
1392 #ifdef HAS_ARGBCOPYALPHAROW_SSE2
1393 ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7)
1394 #endif
1395 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
1396 ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15)
1397 #endif
1398 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
1399 ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
1400 #endif
1401 #undef ANY11B
1402 
1403 // Any 1 to 1 with parameter.
1404 #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK)                          \
1405   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \
1406     SIMD_ALIGNED(uint8_t vin[64]);                                             \
1407     SIMD_ALIGNED(uint8_t vout[64]);                                            \
1408     memset(vin, 0, sizeof(vin)); /* for msan */                                \
1409     int r = width & MASK;                                                      \
1410     int n = width & ~MASK;                                                     \
1411     if (n > 0) {                                                               \
1412       ANY_SIMD(src_ptr, dst_ptr, param, n);                                    \
1413     }                                                                          \
1414     memcpy(vin, src_ptr + n * SBPP, r * SBPP);                                 \
1415     ANY_SIMD(vin, vout, param, MASK + 1);                                      \
1416     memcpy(dst_ptr + n * BPP, vout, r * BPP);                                  \
1417   }
1418 
1419 #if defined(HAS_I400TOARGBROW_SSE2)
1420 ANY11P(I400ToARGBRow_Any_SSE2,
1421        I400ToARGBRow_SSE2,
1422        const struct YuvConstants*,
1423        1,
1424        4,
1425        7)
1426 #endif
1427 #if defined(HAS_I400TOARGBROW_AVX2)
1428 ANY11P(I400ToARGBRow_Any_AVX2,
1429        I400ToARGBRow_AVX2,
1430        const struct YuvConstants*,
1431        1,
1432        4,
1433        15)
1434 #endif
1435 #if defined(HAS_I400TOARGBROW_NEON)
1436 ANY11P(I400ToARGBRow_Any_NEON,
1437        I400ToARGBRow_NEON,
1438        const struct YuvConstants*,
1439        1,
1440        4,
1441        7)
1442 #endif
1443 #if defined(HAS_I400TOARGBROW_MSA)
1444 ANY11P(I400ToARGBRow_Any_MSA,
1445        I400ToARGBRow_MSA,
1446        const struct YuvConstants*,
1447        1,
1448        4,
1449        15)
1450 #endif
1451 #if defined(HAS_I400TOARGBROW_LSX)
1452 ANY11P(I400ToARGBRow_Any_LSX,
1453        I400ToARGBRow_LSX,
1454        const struct YuvConstants*,
1455        1,
1456        4,
1457        15)
1458 #endif
1459 
1460 #if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
1461 ANY11P(ARGBToRGB565DitherRow_Any_SSE2,
1462        ARGBToRGB565DitherRow_SSE2,
1463        const uint32_t,
1464        4,
1465        2,
1466        3)
1467 #endif
1468 #if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
1469 ANY11P(ARGBToRGB565DitherRow_Any_AVX2,
1470        ARGBToRGB565DitherRow_AVX2,
1471        const uint32_t,
1472        4,
1473        2,
1474        7)
1475 #endif
1476 #if defined(HAS_ARGBTORGB565DITHERROW_NEON)
1477 ANY11P(ARGBToRGB565DitherRow_Any_NEON,
1478        ARGBToRGB565DitherRow_NEON,
1479        const uint32_t,
1480        4,
1481        2,
1482        7)
1483 #endif
1484 #if defined(HAS_ARGBTORGB565DITHERROW_MSA)
1485 ANY11P(ARGBToRGB565DitherRow_Any_MSA,
1486        ARGBToRGB565DitherRow_MSA,
1487        const uint32_t,
1488        4,
1489        2,
1490        7)
1491 #endif
1492 #if defined(HAS_ARGBTORGB565DITHERROW_LSX)
1493 ANY11P(ARGBToRGB565DitherRow_Any_LSX,
1494        ARGBToRGB565DitherRow_LSX,
1495        const uint32_t,
1496        4,
1497        2,
1498        7)
1499 #endif
1500 #if defined(HAS_ARGBTORGB565DITHERROW_LASX)
1501 ANY11P(ARGBToRGB565DitherRow_Any_LASX,
1502        ARGBToRGB565DitherRow_LASX,
1503        const uint32_t,
1504        4,
1505        2,
1506        15)
1507 #endif
1508 #ifdef HAS_ARGBSHUFFLEROW_SSSE3
1509 ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7)
1510 #endif
1511 #ifdef HAS_ARGBSHUFFLEROW_AVX2
1512 ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15)
1513 #endif
1514 #ifdef HAS_ARGBSHUFFLEROW_NEON
1515 ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3)
1516 #endif
1517 #ifdef HAS_ARGBSHUFFLEROW_MSA
1518 ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7)
1519 #endif
1520 #ifdef HAS_ARGBSHUFFLEROW_LSX
1521 ANY11P(ARGBShuffleRow_Any_LSX, ARGBShuffleRow_LSX, const uint8_t*, 4, 4, 7)
1522 #endif
1523 #ifdef HAS_ARGBSHUFFLEROW_LASX
1524 ANY11P(ARGBShuffleRow_Any_LASX, ARGBShuffleRow_LASX, const uint8_t*, 4, 4, 15)
1525 #endif
1526 #undef ANY11P
1527 #undef ANY11P
1528 
1529 // Any 1 to 1 with type
1530 #define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK)  \
1531   void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int width) { \
1532     SIMD_ALIGNED(uint8_t vin[(MASK + 1) * SBPP]);                 \
1533     SIMD_ALIGNED(uint8_t vout[(MASK + 1) * BPP]);                 \
1534     memset(vin, 0, sizeof(vin)); /* for msan */                   \
1535     int r = width & MASK;                                         \
1536     int n = width & ~MASK;                                        \
1537     if (n > 0) {                                                  \
1538       ANY_SIMD(src_ptr, dst_ptr, n);                              \
1539     }                                                             \
1540     memcpy(vin, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP);        \
1541     ANY_SIMD((STYPE*)vin, (DTYPE*)vout, MASK + 1);                \
1542     memcpy((uint8_t*)(dst_ptr) + n * BPP, vout, r * BPP);         \
1543   }
1544 
1545 #ifdef HAS_ARGBTOAR64ROW_SSSE3
1546 ANY11T(ARGBToAR64Row_Any_SSSE3, ARGBToAR64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3)
1547 #endif
1548 
1549 #ifdef HAS_ARGBTOAB64ROW_SSSE3
1550 ANY11T(ARGBToAB64Row_Any_SSSE3, ARGBToAB64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3)
1551 #endif
1552 
1553 #ifdef HAS_AR64TOARGBROW_SSSE3
1554 ANY11T(AR64ToARGBRow_Any_SSSE3, AR64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3)
1555 #endif
1556 
1557 #ifdef HAS_ARGBTOAR64ROW_SSSE3
1558 ANY11T(AB64ToARGBRow_Any_SSSE3, AB64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3)
1559 #endif
1560 
1561 #ifdef HAS_ARGBTOAR64ROW_AVX2
1562 ANY11T(ARGBToAR64Row_Any_AVX2, ARGBToAR64Row_AVX2, 4, 8, uint8_t, uint16_t, 7)
1563 #endif
1564 
1565 #ifdef HAS_ARGBTOAB64ROW_AVX2
1566 ANY11T(ARGBToAB64Row_Any_AVX2, ARGBToAB64Row_AVX2, 4, 8, uint8_t, uint16_t, 7)
1567 #endif
1568 
1569 #ifdef HAS_AR64TOARGBROW_AVX2
1570 ANY11T(AR64ToARGBRow_Any_AVX2, AR64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7)
1571 #endif
1572 
1573 #ifdef HAS_ARGBTOAR64ROW_AVX2
1574 ANY11T(AB64ToARGBRow_Any_AVX2, AB64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7)
1575 #endif
1576 
1577 #ifdef HAS_ARGBTOAR64ROW_NEON
1578 ANY11T(ARGBToAR64Row_Any_NEON, ARGBToAR64Row_NEON, 4, 8, uint8_t, uint16_t, 7)
1579 #endif
1580 
1581 #ifdef HAS_ARGBTOAB64ROW_NEON
1582 ANY11T(ARGBToAB64Row_Any_NEON, ARGBToAB64Row_NEON, 4, 8, uint8_t, uint16_t, 7)
1583 #endif
1584 
1585 #ifdef HAS_AR64TOARGBROW_NEON
1586 ANY11T(AR64ToARGBRow_Any_NEON, AR64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
1587 #endif
1588 
1589 #ifdef HAS_ARGBTOAR64ROW_NEON
1590 ANY11T(AB64ToARGBRow_Any_NEON, AB64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
1591 #endif
1592 
1593 #undef ANY11T
1594 
1595 // Any 1 to 1 with parameter and shorts.  BPP measures in shorts.
1596 #define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK)             \
1597   void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \
1598     SIMD_ALIGNED(STYPE vin[32]);                                             \
1599     SIMD_ALIGNED(DTYPE vout[32]);                                            \
1600     memset(vin, 0, sizeof(vin)); /* for msan */                              \
1601     int r = width & MASK;                                                    \
1602     int n = width & ~MASK;                                                   \
1603     if (n > 0) {                                                             \
1604       ANY_SIMD(src_ptr, dst_ptr, scale, n);                                  \
1605     }                                                                        \
1606     memcpy(vin, src_ptr + n, r * SBPP);                                      \
1607     ANY_SIMD(vin, vout, scale, MASK + 1);                                    \
1608     memcpy(dst_ptr + n, vout, r * BPP);                                      \
1609   }
1610 
1611 #ifdef HAS_CONVERT16TO8ROW_SSSE3
1612 ANY11C(Convert16To8Row_Any_SSSE3,
1613        Convert16To8Row_SSSE3,
1614        2,
1615        1,
1616        uint16_t,
1617        uint8_t,
1618        15)
1619 #endif
1620 #ifdef HAS_CONVERT16TO8ROW_AVX2
1621 ANY11C(Convert16To8Row_Any_AVX2,
1622        Convert16To8Row_AVX2,
1623        2,
1624        1,
1625        uint16_t,
1626        uint8_t,
1627        31)
1628 #endif
1629 #ifdef HAS_CONVERT16TO8ROW_NEON
1630 ANY11C(Convert16To8Row_Any_NEON,
1631        Convert16To8Row_NEON,
1632        2,
1633        1,
1634        uint16_t,
1635        uint8_t,
1636        15)
1637 #endif
1638 #ifdef HAS_CONVERT8TO16ROW_SSE2
1639 ANY11C(Convert8To16Row_Any_SSE2,
1640        Convert8To16Row_SSE2,
1641        1,
1642        2,
1643        uint8_t,
1644        uint16_t,
1645        15)
1646 #endif
1647 #ifdef HAS_CONVERT8TO16ROW_AVX2
1648 ANY11C(Convert8To16Row_Any_AVX2,
1649        Convert8To16Row_AVX2,
1650        1,
1651        2,
1652        uint8_t,
1653        uint16_t,
1654        31)
1655 #endif
1656 #ifdef HAS_MULTIPLYROW_16_AVX2
1657 ANY11C(MultiplyRow_16_Any_AVX2,
1658        MultiplyRow_16_AVX2,
1659        2,
1660        2,
1661        uint16_t,
1662        uint16_t,
1663        31)
1664 #endif
1665 #ifdef HAS_MULTIPLYROW_16_NEON
1666 ANY11C(MultiplyRow_16_Any_NEON,
1667        MultiplyRow_16_NEON,
1668        2,
1669        2,
1670        uint16_t,
1671        uint16_t,
1672        15)
1673 #endif
1674 #ifdef HAS_DIVIDEROW_16_AVX2
1675 ANY11C(DivideRow_16_Any_AVX2, DivideRow_16_AVX2, 2, 2, uint16_t, uint16_t, 31)
1676 #endif
1677 #ifdef HAS_DIVIDEROW_16_NEON
1678 ANY11C(DivideRow_16_Any_NEON, DivideRow_16_NEON, 2, 2, uint16_t, uint16_t, 15)
1679 #endif
1680 #undef ANY11C
1681 
1682 // Any 1 to 1 with parameter and shorts to byte.  BPP measures in shorts.
1683 #define ANY11P16(NAMEANY, ANY_SIMD, ST, T, SBPP, BPP, MASK)             \
1684   void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \
1685     SIMD_ALIGNED(ST vin[32]);                                           \
1686     SIMD_ALIGNED(T vout[32]);                                           \
1687     memset(vin, 0, sizeof(vin)); /* for msan */                         \
1688     int r = width & MASK;                                               \
1689     int n = width & ~MASK;                                              \
1690     if (n > 0) {                                                        \
1691       ANY_SIMD(src_ptr, dst_ptr, param, n);                             \
1692     }                                                                   \
1693     memcpy(vin, src_ptr + n, r * SBPP);                                 \
1694     ANY_SIMD(vin, vout, param, MASK + 1);                               \
1695     memcpy(dst_ptr + n, vout, r * BPP);                                 \
1696   }
1697 
1698 #ifdef HAS_HALFFLOATROW_SSE2
1699 ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, uint16_t, uint16_t, 2, 2, 7)
1700 #endif
1701 #ifdef HAS_HALFFLOATROW_AVX2
1702 ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, uint16_t, uint16_t, 2, 2, 15)
1703 #endif
1704 #ifdef HAS_HALFFLOATROW_F16C
1705 ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, uint16_t, uint16_t, 2, 2, 15)
1706 ANY11P16(HalfFloat1Row_Any_F16C,
1707          HalfFloat1Row_F16C,
1708          uint16_t,
1709          uint16_t,
1710          2,
1711          2,
1712          15)
1713 #endif
1714 #ifdef HAS_HALFFLOATROW_NEON
1715 ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, uint16_t, uint16_t, 2, 2, 7)
1716 ANY11P16(HalfFloat1Row_Any_NEON,
1717          HalfFloat1Row_NEON,
1718          uint16_t,
1719          uint16_t,
1720          2,
1721          2,
1722          7)
1723 #endif
1724 #ifdef HAS_HALFFLOATROW_MSA
1725 ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, uint16_t, uint16_t, 2, 2, 31)
1726 #endif
1727 #ifdef HAS_BYTETOFLOATROW_NEON
1728 ANY11P16(ByteToFloatRow_Any_NEON, ByteToFloatRow_NEON, uint8_t, float, 1, 3, 7)
1729 #endif
1730 #ifdef HAS_HALFFLOATROW_LSX
1731 ANY11P16(HalfFloatRow_Any_LSX, HalfFloatRow_LSX, uint16_t, uint16_t, 2, 2, 31)
1732 #endif
1733 #undef ANY11P16
1734 
1735 // Any 1 to 1 with yuvconstants
1736 #define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)              \
1737   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr,                 \
1738                const struct YuvConstants* yuvconstants, int width) {     \
1739     SIMD_ALIGNED(uint8_t vin[128]);                                      \
1740     SIMD_ALIGNED(uint8_t vout[128]);                                     \
1741     memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */                 \
1742     int r = width & MASK;                                                \
1743     int n = width & ~MASK;                                               \
1744     if (n > 0) {                                                         \
1745       ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n);                       \
1746     }                                                                    \
1747     memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
1748     ANY_SIMD(vin, vout, yuvconstants, MASK + 1);                         \
1749     memcpy(dst_ptr + n * BPP, vout, r * BPP);                            \
1750   }
1751 
1752 #if defined(HAS_YUY2TOARGBROW_SSSE3)
1753 ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
1754 ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
1755 #endif
1756 #if defined(HAS_YUY2TOARGBROW_AVX2)
1757 ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
1758 ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
1759 #endif
1760 #if defined(HAS_YUY2TOARGBROW_NEON)
1761 ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
1762 ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
1763 #endif
1764 #if defined(HAS_YUY2TOARGBROW_MSA)
1765 ANY11C(YUY2ToARGBRow_Any_MSA, YUY2ToARGBRow_MSA, 1, 4, 4, 7)
1766 ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7)
1767 #endif
1768 #if defined(HAS_YUY2TOARGBROW_LSX)
1769 ANY11C(YUY2ToARGBRow_Any_LSX, YUY2ToARGBRow_LSX, 1, 4, 4, 7)
1770 ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7)
1771 #endif
1772 #undef ANY11C
1773 
1774 // Any 1 to 1 interpolate.  Takes 2 rows of source via stride.
1775 #define ANY11I(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK)           \
1776   void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride, \
1777                int width, int source_y_fraction) {                   \
1778     SIMD_ALIGNED(TS vin[64 * 2]);                                    \
1779     SIMD_ALIGNED(TD vout[64]);                                       \
1780     memset(vin, 0, sizeof(vin)); /* for msan */                      \
1781     int r = width & MASK;                                            \
1782     int n = width & ~MASK;                                           \
1783     if (n > 0) {                                                     \
1784       ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction);  \
1785     }                                                                \
1786     memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS));          \
1787     if (source_y_fraction) {                                         \
1788       memcpy(vin + 64, src_ptr + src_stride + n * SBPP,              \
1789              r * SBPP * sizeof(TS));                                 \
1790     }                                                                \
1791     ANY_SIMD(vout, vin, 64, MASK + 1, source_y_fraction);            \
1792     memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD));           \
1793   }
1794 
1795 #ifdef HAS_INTERPOLATEROW_AVX2
1796 ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, uint8_t, uint8_t, 1, 1, 31)
1797 #endif
1798 #ifdef HAS_INTERPOLATEROW_SSSE3
1799 ANY11I(InterpolateRow_Any_SSSE3,
1800        InterpolateRow_SSSE3,
1801        uint8_t,
1802        uint8_t,
1803        1,
1804        1,
1805        15)
1806 #endif
1807 #ifdef HAS_INTERPOLATEROW_NEON
1808 ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, uint8_t, uint8_t, 1, 1, 15)
1809 #endif
1810 #ifdef HAS_INTERPOLATEROW_MSA
1811 ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, uint8_t, uint8_t, 1, 1, 31)
1812 #endif
1813 #ifdef HAS_INTERPOLATEROW_LSX
1814 ANY11I(InterpolateRow_Any_LSX, InterpolateRow_LSX, uint8_t, uint8_t, 1, 1, 31)
1815 #endif
1816 
1817 #ifdef HAS_INTERPOLATEROW_16_NEON
1818 ANY11I(InterpolateRow_16_Any_NEON,
1819        InterpolateRow_16_NEON,
1820        uint16_t,
1821        uint16_t,
1822        1,
1823        1,
1824        7)
1825 #endif
1826 #undef ANY11I
1827 
1828 // Any 1 to 1 interpolate with scale param
1829 #define ANY11IS(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK)                \
1830   void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride,       \
1831                int scale, int width, int source_y_fraction) {              \
1832     SIMD_ALIGNED(TS vin[64 * 2]);                                          \
1833     SIMD_ALIGNED(TD vout[64]);                                             \
1834     memset(vin, 0, sizeof(vin)); /* for msan */                            \
1835     int r = width & MASK;                                                  \
1836     int n = width & ~MASK;                                                 \
1837     if (n > 0) {                                                           \
1838       ANY_SIMD(dst_ptr, src_ptr, src_stride, scale, n, source_y_fraction); \
1839     }                                                                      \
1840     memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS));                \
1841     if (source_y_fraction) {                                               \
1842       memcpy(vin + 64, src_ptr + src_stride + n * SBPP,                    \
1843              r * SBPP * sizeof(TS));                                       \
1844     }                                                                      \
1845     ANY_SIMD(vout, vin, 64, scale, MASK + 1, source_y_fraction);           \
1846     memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD));                 \
1847   }
1848 
1849 #ifdef HAS_INTERPOLATEROW_16TO8_NEON
1850 ANY11IS(InterpolateRow_16To8_Any_NEON,
1851         InterpolateRow_16To8_NEON,
1852         uint8_t,
1853         uint16_t,
1854         1,
1855         1,
1856         7)
1857 #endif
1858 #ifdef HAS_INTERPOLATEROW_16TO8_AVX2
1859 ANY11IS(InterpolateRow_16To8_Any_AVX2,
1860         InterpolateRow_16To8_AVX2,
1861         uint8_t,
1862         uint16_t,
1863         1,
1864         1,
1865         31)
1866 #endif
1867 
1868 #undef ANY11IS
1869 
1870 // Any 1 to 1 mirror.
1871 #define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK)                          \
1872   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
1873     SIMD_ALIGNED(uint8_t vin[64]);                                    \
1874     SIMD_ALIGNED(uint8_t vout[64]);                                   \
1875     memset(vin, 0, sizeof(vin)); /* for msan */                       \
1876     int r = width & MASK;                                             \
1877     int n = width & ~MASK;                                            \
1878     if (n > 0) {                                                      \
1879       ANY_SIMD(src_ptr + r * BPP, dst_ptr, n);                        \
1880     }                                                                 \
1881     memcpy(vin, src_ptr, r* BPP);                                     \
1882     ANY_SIMD(vin, vout, MASK + 1);                                    \
1883     memcpy(dst_ptr + n * BPP, vout + (MASK + 1 - r) * BPP, r * BPP);  \
1884   }
1885 
1886 #ifdef HAS_MIRRORROW_AVX2
1887 ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
1888 #endif
1889 #ifdef HAS_MIRRORROW_SSSE3
1890 ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
1891 #endif
1892 #ifdef HAS_MIRRORROW_NEON
1893 ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31)
1894 #endif
1895 #ifdef HAS_MIRRORROW_MSA
1896 ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
1897 #endif
1898 #ifdef HAS_MIRRORROW_LSX
1899 ANY11M(MirrorRow_Any_LSX, MirrorRow_LSX, 1, 31)
1900 #endif
1901 #ifdef HAS_MIRRORROW_LASX
1902 ANY11M(MirrorRow_Any_LASX, MirrorRow_LASX, 1, 63)
1903 #endif
1904 #ifdef HAS_MIRRORUVROW_AVX2
1905 ANY11M(MirrorUVRow_Any_AVX2, MirrorUVRow_AVX2, 2, 15)
1906 #endif
1907 #ifdef HAS_MIRRORUVROW_SSSE3
1908 ANY11M(MirrorUVRow_Any_SSSE3, MirrorUVRow_SSSE3, 2, 7)
1909 #endif
1910 #ifdef HAS_MIRRORUVROW_NEON
1911 ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31)
1912 #endif
1913 #ifdef HAS_MIRRORUVROW_MSA
1914 ANY11M(MirrorUVRow_Any_MSA, MirrorUVRow_MSA, 2, 7)
1915 #endif
1916 #ifdef HAS_MIRRORUVROW_LSX
1917 ANY11M(MirrorUVRow_Any_LSX, MirrorUVRow_LSX, 2, 7)
1918 #endif
1919 #ifdef HAS_MIRRORUVROW_LASX
1920 ANY11M(MirrorUVRow_Any_LASX, MirrorUVRow_LASX, 2, 15)
1921 #endif
1922 #ifdef HAS_ARGBMIRRORROW_AVX2
1923 ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
1924 #endif
1925 #ifdef HAS_ARGBMIRRORROW_SSE2
1926 ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
1927 #endif
1928 #ifdef HAS_ARGBMIRRORROW_NEON
1929 ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 7)
1930 #endif
1931 #ifdef HAS_ARGBMIRRORROW_MSA
1932 ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
1933 #endif
1934 #ifdef HAS_ARGBMIRRORROW_LSX
1935 ANY11M(ARGBMirrorRow_Any_LSX, ARGBMirrorRow_LSX, 4, 7)
1936 #endif
1937 #ifdef HAS_ARGBMIRRORROW_LASX
1938 ANY11M(ARGBMirrorRow_Any_LASX, ARGBMirrorRow_LASX, 4, 15)
1939 #endif
1940 #ifdef HAS_RGB24MIRRORROW_SSSE3
1941 ANY11M(RGB24MirrorRow_Any_SSSE3, RGB24MirrorRow_SSSE3, 3, 15)
1942 #endif
1943 #ifdef HAS_RGB24MIRRORROW_NEON
1944 ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15)
1945 #endif
1946 #undef ANY11M
1947 
1948 // Any 1 plane. (memset)
1949 #define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK)        \
1950   void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \
1951     SIMD_ALIGNED(uint8_t vout[64]);                  \
1952     int r = width & MASK;                            \
1953     int n = width & ~MASK;                           \
1954     if (n > 0) {                                     \
1955       ANY_SIMD(dst_ptr, v32, n);                     \
1956     }                                                \
1957     ANY_SIMD(vout, v32, MASK + 1);                   \
1958     memcpy(dst_ptr + n * BPP, vout, r * BPP);        \
1959   }
1960 
1961 #ifdef HAS_SETROW_X86
1962 ANY1(SetRow_Any_X86, SetRow_X86, uint8_t, 1, 3)
1963 #endif
1964 #ifdef HAS_SETROW_NEON
1965 ANY1(SetRow_Any_NEON, SetRow_NEON, uint8_t, 1, 15)
1966 #endif
1967 #ifdef HAS_SETROW_LSX
1968 ANY1(SetRow_Any_LSX, SetRow_LSX, uint8_t, 1, 15)
1969 #endif
1970 #ifdef HAS_ARGBSETROW_NEON
1971 ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3)
1972 #endif
1973 #ifdef HAS_ARGBSETROW_MSA
1974 ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3)
1975 #endif
1976 #ifdef HAS_ARGBSETROW_LSX
1977 ANY1(ARGBSetRow_Any_LSX, ARGBSetRow_LSX, uint32_t, 4, 3)
1978 #endif
1979 #undef ANY1
1980 
1981 // Any 1 to 2.  Outputs UV planes.
1982 #define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK)         \
1983   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
1984                int width) {                                            \
1985     SIMD_ALIGNED(uint8_t vin[128]);                                    \
1986     SIMD_ALIGNED(uint8_t vout[128 * 2]);                               \
1987     memset(vin, 0, sizeof(vin)); /* for msan */                        \
1988     int r = width & MASK;                                              \
1989     int n = width & ~MASK;                                             \
1990     if (n > 0) {                                                       \
1991       ANY_SIMD(src_ptr, dst_u, dst_v, n);                              \
1992     }                                                                  \
1993     memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
1994     ANY_SIMD(vin, vout, vout + 128, MASK + 1);                         \
1995     memcpy(dst_u + (n >> DUVSHIFT), vout, SS(r, DUVSHIFT));            \
1996     memcpy(dst_v + (n >> DUVSHIFT), vout + 128, SS(r, DUVSHIFT));      \
1997   }
1998 
1999 #ifdef HAS_SPLITUVROW_SSE2
2000 ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
2001 #endif
2002 #ifdef HAS_SPLITUVROW_AVX2
2003 ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
2004 #endif
2005 #ifdef HAS_SPLITUVROW_NEON
2006 ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
2007 #endif
2008 #ifdef HAS_SPLITUVROW_MSA
2009 ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31)
2010 #endif
2011 #ifdef HAS_SPLITUVROW_LSX
2012 ANY12(SplitUVRow_Any_LSX, SplitUVRow_LSX, 0, 2, 0, 31)
2013 #endif
2014 #ifdef HAS_ARGBTOUV444ROW_SSSE3
2015 ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
2016 #endif
2017 #ifdef HAS_YUY2TOUV422ROW_AVX2
2018 ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31)
2019 ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31)
2020 #endif
2021 #ifdef HAS_YUY2TOUV422ROW_SSE2
2022 ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15)
2023 ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
2024 #endif
2025 #ifdef HAS_YUY2TOUV422ROW_NEON
2026 ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
2027 ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
2028 ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
2029 #endif
2030 #ifdef HAS_YUY2TOUV422ROW_MSA
2031 ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15)
2032 ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31)
2033 ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
2034 #endif
2035 #ifdef HAS_YUY2TOUV422ROW_LSX
2036 ANY12(ARGBToUV444Row_Any_LSX, ARGBToUV444Row_LSX, 0, 4, 0, 15)
2037 ANY12(YUY2ToUV422Row_Any_LSX, YUY2ToUV422Row_LSX, 1, 4, 1, 15)
2038 ANY12(UYVYToUV422Row_Any_LSX, UYVYToUV422Row_LSX, 1, 4, 1, 15)
2039 #endif
2040 #ifdef HAS_YUY2TOUV422ROW_LASX
2041 ANY12(ARGBToUV444Row_Any_LASX, ARGBToUV444Row_LASX, 0, 4, 0, 31)
2042 ANY12(YUY2ToUV422Row_Any_LASX, YUY2ToUV422Row_LASX, 1, 4, 1, 31)
2043 ANY12(UYVYToUV422Row_Any_LASX, UYVYToUV422Row_LASX, 1, 4, 1, 31)
2044 #endif
2045 #undef ANY12
2046 
2047 // Any 2 16 bit planes with parameter to 1
2048 #define ANY12PT(NAMEANY, ANY_SIMD, T, BPP, MASK)                            \
2049   void NAMEANY(const T* src_uv, T* dst_u, T* dst_v, int depth, int width) { \
2050     SIMD_ALIGNED(T vin[16 * 2]);                                            \
2051     SIMD_ALIGNED(T vout[16 * 2]);                                           \
2052     memset(vin, 0, sizeof(vin)); /* for msan */                             \
2053     int r = width & MASK;                                                   \
2054     int n = width & ~MASK;                                                  \
2055     if (n > 0) {                                                            \
2056       ANY_SIMD(src_uv, dst_u, dst_v, depth, n);                             \
2057     }                                                                       \
2058     memcpy(vin, src_uv + n * 2, r * BPP * 2);                               \
2059     ANY_SIMD(vin, vout, vout + 16, depth, MASK + 1);                        \
2060     memcpy(dst_u + n, vout, r * BPP);                                       \
2061     memcpy(dst_v + n, vout + 16, r * BPP);                                  \
2062   }
2063 
2064 #ifdef HAS_SPLITUVROW_16_AVX2
2065 ANY12PT(SplitUVRow_16_Any_AVX2, SplitUVRow_16_AVX2, uint16_t, 2, 15)
2066 #endif
2067 
2068 #ifdef HAS_SPLITUVROW_16_NEON
2069 ANY12PT(SplitUVRow_16_Any_NEON, SplitUVRow_16_NEON, uint16_t, 2, 7)
2070 #endif
2071 
2072 #undef ANY21CT
2073 
2074 // Any 1 to 3.  Outputs RGB planes.
2075 #define ANY13(NAMEANY, ANY_SIMD, BPP, MASK)                            \
2076   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
2077                uint8_t* dst_b, int width) {                            \
2078     SIMD_ALIGNED(uint8_t vin[16 * 3]);                                 \
2079     SIMD_ALIGNED(uint8_t vout[16 * 3]);                                \
2080     memset(vin, 0, sizeof(vin)); /* for msan */                        \
2081     int r = width & MASK;                                              \
2082     int n = width & ~MASK;                                             \
2083     if (n > 0) {                                                       \
2084       ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n);                       \
2085     }                                                                  \
2086     memcpy(vin, src_ptr + n * BPP, r * BPP);                           \
2087     ANY_SIMD(vin, vout, vout + 16, vout + 32, MASK + 1);               \
2088     memcpy(dst_r + n, vout, r);                                        \
2089     memcpy(dst_g + n, vout + 16, r);                                   \
2090     memcpy(dst_b + n, vout + 32, r);                                   \
2091   }
2092 
2093 #ifdef HAS_SPLITRGBROW_SSSE3
2094 ANY13(SplitRGBRow_Any_SSSE3, SplitRGBRow_SSSE3, 3, 15)
2095 #endif
2096 #ifdef HAS_SPLITRGBROW_NEON
2097 ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15)
2098 #endif
2099 #ifdef HAS_SPLITXRGBROW_SSE2
2100 ANY13(SplitXRGBRow_Any_SSE2, SplitXRGBRow_SSE2, 4, 7)
2101 #endif
2102 #ifdef HAS_SPLITXRGBROW_SSSE3
2103 ANY13(SplitXRGBRow_Any_SSSE3, SplitXRGBRow_SSSE3, 4, 7)
2104 #endif
2105 #ifdef HAS_SPLITXRGBROW_AVX2
2106 ANY13(SplitXRGBRow_Any_AVX2, SplitXRGBRow_AVX2, 4, 15)
2107 #endif
2108 #ifdef HAS_SPLITXRGBROW_NEON
2109 ANY13(SplitXRGBRow_Any_NEON, SplitXRGBRow_NEON, 4, 15)
2110 #endif
2111 
2112 // Any 1 to 4.  Outputs ARGB planes.
2113 #define ANY14(NAMEANY, ANY_SIMD, BPP, MASK)                            \
2114   void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
2115                uint8_t* dst_b, uint8_t* dst_a, int width) {            \
2116     SIMD_ALIGNED(uint8_t vin[16 * 4]);                                 \
2117     SIMD_ALIGNED(uint8_t vout[16 * 4]);                                \
2118     memset(vin, 0, sizeof(vin)); /* for msan */                        \
2119     int r = width & MASK;                                              \
2120     int n = width & ~MASK;                                             \
2121     if (n > 0) {                                                       \
2122       ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n);                \
2123     }                                                                  \
2124     memcpy(vin, src_ptr + n * BPP, r * BPP);                           \
2125     ANY_SIMD(vin, vout, vout + 16, vout + 32, vout + 48, MASK + 1);    \
2126     memcpy(dst_r + n, vout, r);                                        \
2127     memcpy(dst_g + n, vout + 16, r);                                   \
2128     memcpy(dst_b + n, vout + 32, r);                                   \
2129     memcpy(dst_a + n, vout + 48, r);                                   \
2130   }
2131 
2132 #ifdef HAS_SPLITARGBROW_SSE2
2133 ANY14(SplitARGBRow_Any_SSE2, SplitARGBRow_SSE2, 4, 7)
2134 #endif
2135 #ifdef HAS_SPLITARGBROW_SSSE3
2136 ANY14(SplitARGBRow_Any_SSSE3, SplitARGBRow_SSSE3, 4, 7)
2137 #endif
2138 #ifdef HAS_SPLITARGBROW_AVX2
2139 ANY14(SplitARGBRow_Any_AVX2, SplitARGBRow_AVX2, 4, 15)
2140 #endif
2141 #ifdef HAS_SPLITARGBROW_NEON
2142 ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
2143 #endif
2144 
2145 // Any 1 to 2 with source stride (2 rows of source).  Outputs UV planes.
2146 // 128 byte row allows for 32 avx ARGB pixels.
2147 #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK)                        \
2148   void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u,       \
2149                uint8_t* dst_v, int width) {                                  \
2150     SIMD_ALIGNED(uint8_t vin[128 * 2]);                                      \
2151     SIMD_ALIGNED(uint8_t vout[128 * 2]);                                     \
2152     memset(vin, 0, sizeof(vin)); /* for msan */                              \
2153     int r = width & MASK;                                                    \
2154     int n = width & ~MASK;                                                   \
2155     if (n > 0) {                                                             \
2156       ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n);                        \
2157     }                                                                        \
2158     memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);       \
2159     memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP,           \
2160            SS(r, UVSHIFT) * BPP);                                            \
2161     if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
2162       memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP,   \
2163              BPP);                                                           \
2164       memcpy(vin + 128 + SS(r, UVSHIFT) * BPP,                               \
2165              vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP);                   \
2166     }                                                                        \
2167     ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1);                          \
2168     memcpy(dst_u + (n >> 1), vout, SS(r, 1));                                \
2169     memcpy(dst_v + (n >> 1), vout + 128, SS(r, 1));                          \
2170   }
2171 
2172 #ifdef HAS_ARGBTOUVROW_AVX2
2173 ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
2174 #endif
2175 #ifdef HAS_ABGRTOUVROW_AVX2
2176 ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31)
2177 #endif
2178 #ifdef HAS_ARGBTOUVJROW_AVX2
2179 ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
2180 #endif
2181 #ifdef HAS_ABGRTOUVJROW_AVX2
2182 ANY12S(ABGRToUVJRow_Any_AVX2, ABGRToUVJRow_AVX2, 0, 4, 31)
2183 #endif
2184 #ifdef HAS_ARGBTOUVJROW_SSSE3
2185 ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
2186 #endif
2187 #ifdef HAS_ABGRTOUVJROW_SSSE3
2188 ANY12S(ABGRToUVJRow_Any_SSSE3, ABGRToUVJRow_SSSE3, 0, 4, 15)
2189 #endif
2190 #ifdef HAS_ARGBTOUVROW_SSSE3
2191 ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
2192 ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
2193 ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
2194 ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
2195 #endif
2196 #ifdef HAS_YUY2TOUVROW_AVX2
2197 ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31)
2198 ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31)
2199 #endif
2200 #ifdef HAS_YUY2TOUVROW_SSE2
2201 ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15)
2202 ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15)
2203 #endif
2204 #ifdef HAS_ARGBTOUVROW_NEON
2205 ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15)
2206 #endif
2207 #ifdef HAS_ARGBTOUVROW_MSA
2208 ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31)
2209 #endif
2210 #ifdef HAS_ARGBTOUVROW_LSX
2211 ANY12S(ARGBToUVRow_Any_LSX, ARGBToUVRow_LSX, 0, 4, 15)
2212 #endif
2213 #ifdef HAS_ARGBTOUVROW_LASX
2214 ANY12S(ARGBToUVRow_Any_LASX, ARGBToUVRow_LASX, 0, 4, 31)
2215 #endif
2216 #ifdef HAS_ARGBTOUVJROW_NEON
2217 ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
2218 #endif
2219 #ifdef HAS_ABGRTOUVJROW_NEON
2220 ANY12S(ABGRToUVJRow_Any_NEON, ABGRToUVJRow_NEON, 0, 4, 15)
2221 #endif
2222 #ifdef HAS_ARGBTOUVJROW_MSA
2223 ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31)
2224 #endif
2225 #ifdef HAS_ARGBTOUVJROW_LSX
2226 ANY12S(ARGBToUVJRow_Any_LSX, ARGBToUVJRow_LSX, 0, 4, 15)
2227 #endif
2228 #ifdef HAS_ARGBTOUVJROW_LASX
2229 ANY12S(ARGBToUVJRow_Any_LASX, ARGBToUVJRow_LASX, 0, 4, 31)
2230 #endif
2231 #ifdef HAS_BGRATOUVROW_NEON
2232 ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15)
2233 #endif
2234 #ifdef HAS_BGRATOUVROW_MSA
2235 ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 15)
2236 #endif
2237 #ifdef HAS_BGRATOUVROW_LSX
2238 ANY12S(BGRAToUVRow_Any_LSX, BGRAToUVRow_LSX, 0, 4, 15)
2239 #endif
2240 #ifdef HAS_ABGRTOUVROW_NEON
2241 ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15)
2242 #endif
2243 #ifdef HAS_ABGRTOUVROW_MSA
2244 ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 15)
2245 #endif
2246 #ifdef HAS_ABGRTOUVROW_LSX
2247 ANY12S(ABGRToUVRow_Any_LSX, ABGRToUVRow_LSX, 0, 4, 15)
2248 #endif
2249 #ifdef HAS_RGBATOUVROW_NEON
2250 ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15)
2251 #endif
2252 #ifdef HAS_RGBATOUVROW_MSA
2253 ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 15)
2254 #endif
2255 #ifdef HAS_RGBATOUVROW_LSX
2256 ANY12S(RGBAToUVRow_Any_LSX, RGBAToUVRow_LSX, 0, 4, 15)
2257 #endif
2258 #ifdef HAS_RGB24TOUVROW_NEON
2259 ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15)
2260 #endif
2261 #ifdef HAS_RGB24TOUVJROW_NEON
2262 ANY12S(RGB24ToUVJRow_Any_NEON, RGB24ToUVJRow_NEON, 0, 3, 15)
2263 #endif
2264 #ifdef HAS_RGB24TOUVROW_MSA
2265 ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15)
2266 #endif
2267 #ifdef HAS_RGB24TOUVROW_LSX
2268 ANY12S(RGB24ToUVRow_Any_LSX, RGB24ToUVRow_LSX, 0, 3, 15)
2269 #endif
2270 #ifdef HAS_RGB24TOUVROW_LASX
2271 ANY12S(RGB24ToUVRow_Any_LASX, RGB24ToUVRow_LASX, 0, 3, 31)
2272 #endif
2273 #ifdef HAS_RAWTOUVROW_NEON
2274 ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15)
2275 #endif
2276 #ifdef HAS_RAWTOUVJROW_NEON
2277 ANY12S(RAWToUVJRow_Any_NEON, RAWToUVJRow_NEON, 0, 3, 15)
2278 #endif
2279 #ifdef HAS_RAWTOUVROW_MSA
2280 ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15)
2281 #endif
2282 #ifdef HAS_RAWTOUVROW_LSX
2283 ANY12S(RAWToUVRow_Any_LSX, RAWToUVRow_LSX, 0, 3, 15)
2284 #endif
2285 #ifdef HAS_RAWTOUVROW_LASX
2286 ANY12S(RAWToUVRow_Any_LASX, RAWToUVRow_LASX, 0, 3, 31)
2287 #endif
2288 #ifdef HAS_RGB565TOUVROW_NEON
2289 ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15)
2290 #endif
2291 #ifdef HAS_RGB565TOUVROW_MSA
2292 ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15)
2293 #endif
2294 #ifdef HAS_RGB565TOUVROW_LSX
2295 ANY12S(RGB565ToUVRow_Any_LSX, RGB565ToUVRow_LSX, 0, 2, 15)
2296 #endif
2297 #ifdef HAS_RGB565TOUVROW_LASX
2298 ANY12S(RGB565ToUVRow_Any_LASX, RGB565ToUVRow_LASX, 0, 2, 31)
2299 #endif
2300 #ifdef HAS_ARGB1555TOUVROW_NEON
2301 ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15)
2302 #endif
2303 #ifdef HAS_ARGB1555TOUVROW_MSA
2304 ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15)
2305 #endif
2306 #ifdef HAS_ARGB1555TOUVROW_LSX
2307 ANY12S(ARGB1555ToUVRow_Any_LSX, ARGB1555ToUVRow_LSX, 0, 2, 15)
2308 #endif
2309 #ifdef HAS_ARGB1555TOUVROW_LASX
2310 ANY12S(ARGB1555ToUVRow_Any_LASX, ARGB1555ToUVRow_LASX, 0, 2, 31)
2311 #endif
2312 #ifdef HAS_ARGB4444TOUVROW_NEON
2313 ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15)
2314 #endif
2315 #ifdef HAS_YUY2TOUVROW_NEON
2316 ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
2317 #endif
2318 #ifdef HAS_UYVYTOUVROW_NEON
2319 ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
2320 #endif
2321 #ifdef HAS_YUY2TOUVROW_MSA
2322 ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
2323 #endif
2324 #ifdef HAS_YUY2TOUVROW_LSX
2325 ANY12S(YUY2ToUVRow_Any_LSX, YUY2ToUVRow_LSX, 1, 4, 15)
2326 #endif
2327 #ifdef HAS_YUY2TOUVROW_LASX
2328 ANY12S(YUY2ToUVRow_Any_LASX, YUY2ToUVRow_LASX, 1, 4, 31)
2329 #endif
2330 #ifdef HAS_UYVYTOUVROW_MSA
2331 ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31)
2332 #endif
2333 #ifdef HAS_UYVYTOUVROW_LSX
2334 ANY12S(UYVYToUVRow_Any_LSX, UYVYToUVRow_LSX, 1, 4, 15)
2335 #endif
2336 #ifdef HAS_UYVYTOUVROW_LASX
2337 ANY12S(UYVYToUVRow_Any_LASX, UYVYToUVRow_LASX, 1, 4, 31)
2338 #endif
2339 #undef ANY12S
2340 
2341 // Any 1 to 1 with source stride (2 rows of source).  Outputs UV plane.
2342 // 128 byte row allows for 32 avx ARGB pixels.
2343 #define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK)                        \
2344   void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_vu,      \
2345                int width) {                                                  \
2346     SIMD_ALIGNED(uint8_t vin[128 * 2]);                                      \
2347     SIMD_ALIGNED(uint8_t vout[128]);                                         \
2348     memset(vin, 0, sizeof(vin)); /* for msan */                              \
2349     int r = width & MASK;                                                    \
2350     int n = width & ~MASK;                                                   \
2351     if (n > 0) {                                                             \
2352       ANY_SIMD(src_ptr, src_stride, dst_vu, n);                              \
2353     }                                                                        \
2354     memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);       \
2355     memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP,           \
2356            SS(r, UVSHIFT) * BPP);                                            \
2357     if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
2358       memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP,   \
2359              BPP);                                                           \
2360       memcpy(vin + 128 + SS(r, UVSHIFT) * BPP,                               \
2361              vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP);                   \
2362     }                                                                        \
2363     ANY_SIMD(vin, 128, vout, MASK + 1);                                      \
2364     memcpy(dst_vu + (n >> 1) * 2, vout, SS(r, 1) * 2);                       \
2365   }
2366 
2367 #ifdef HAS_AYUVTOVUROW_NEON
2368 ANY11S(AYUVToUVRow_Any_NEON, AYUVToUVRow_NEON, 0, 4, 15)
2369 ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
2370 #endif
2371 #undef ANY11S
2372 
2373 #define ANYDETILE(NAMEANY, ANY_SIMD, T, BPP, MASK)                           \
2374   void NAMEANY(const T* src, ptrdiff_t src_tile_stride, T* dst, int width) { \
2375     SIMD_ALIGNED(T vin[16]);                                                 \
2376     SIMD_ALIGNED(T vout[16]);                                                \
2377     memset(vin, 0, sizeof(vin)); /* for msan */                              \
2378     int r = width & MASK;                                                    \
2379     int n = width & ~MASK;                                                   \
2380     if (n > 0) {                                                             \
2381       ANY_SIMD(src, src_tile_stride, dst, n);                                \
2382     }                                                                        \
2383     memcpy(vin, src + (n / 16) * src_tile_stride, r * BPP);                  \
2384     ANY_SIMD(vin, src_tile_stride, vout, MASK + 1);                          \
2385     memcpy(dst + n, vout, r * BPP);                                          \
2386   }
2387 
2388 #ifdef HAS_DETILEROW_NEON
2389 ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, uint8_t, 1, 15)
2390 #endif
2391 #ifdef HAS_DETILEROW_SSE2
2392 ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, uint8_t, 1, 15)
2393 #endif
2394 #ifdef HAS_DETILEROW_16_NEON
2395 ANYDETILE(DetileRow_16_Any_NEON, DetileRow_16_NEON, uint16_t, 2, 15)
2396 #endif
2397 #ifdef HAS_DETILEROW_16_SSE2
2398 ANYDETILE(DetileRow_16_Any_SSE2, DetileRow_16_SSE2, uint16_t, 2, 15)
2399 #endif
2400 #ifdef HAS_DETILEROW_16_AVX
2401 ANYDETILE(DetileRow_16_Any_AVX, DetileRow_16_AVX, uint16_t, 2, 15)
2402 #endif
2403 
2404 // DetileSplitUVRow width is in bytes
2405 #define ANYDETILESPLITUV(NAMEANY, ANY_SIMD, MASK)                \
2406   void NAMEANY(const uint8_t* src_uv, ptrdiff_t src_tile_stride, \
2407                uint8_t* dst_u, uint8_t* dst_v, int width) {      \
2408     SIMD_ALIGNED(uint8_t vin[16]);                               \
2409     SIMD_ALIGNED(uint8_t vout[8 * 2]);                           \
2410     memset(vin, 0, sizeof(vin)); /* for msan */                  \
2411     int r = width & MASK;                                        \
2412     int n = width & ~MASK;                                       \
2413     if (n > 0) {                                                 \
2414       ANY_SIMD(src_uv, src_tile_stride, dst_u, dst_v, n);        \
2415     }                                                            \
2416     memcpy(vin, src_uv + (n / 16) * src_tile_stride, r);         \
2417     ANY_SIMD(vin, src_tile_stride, vout, vout + 8, r);           \
2418     memcpy(dst_u + n / 2, vout, (r + 1) / 2);                    \
2419     memcpy(dst_v + n / 2, vout + 8, (r + 1) / 2);                \
2420   }
2421 
2422 #ifdef HAS_DETILESPLITUVROW_NEON
2423 ANYDETILESPLITUV(DetileSplitUVRow_Any_NEON, DetileSplitUVRow_NEON, 15)
2424 #endif
2425 #ifdef HAS_DETILESPLITUVROW_SSSE3
2426 ANYDETILESPLITUV(DetileSplitUVRow_Any_SSSE3, DetileSplitUVRow_SSSE3, 15)
2427 #endif
2428 
2429 #define ANYDETILEMERGE(NAMEANY, ANY_SIMD, MASK)                                \
2430   void NAMEANY(const uint8_t* src_y, ptrdiff_t src_y_tile_stride,              \
2431                const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride,            \
2432                uint8_t* dst_yuy2, int width) {                                 \
2433     SIMD_ALIGNED(uint8_t vin[16 * 2]);                                         \
2434     SIMD_ALIGNED(uint8_t vout[16 * 2]);                                        \
2435     memset(vin, 0, sizeof(vin)); /* for msan */                                \
2436     int r = width & MASK;                                                      \
2437     int n = width & ~MASK;                                                     \
2438     if (n > 0) {                                                               \
2439       ANY_SIMD(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2, \
2440                n);                                                             \
2441     }                                                                          \
2442     memcpy(vin, src_y + (n / 16) * src_y_tile_stride, r);                      \
2443     memcpy(vin + 16, src_uv + (n / 16) * src_uv_tile_stride, r);               \
2444     ANY_SIMD(vin, src_y_tile_stride, vin + 16, src_uv_tile_stride, vout, r);   \
2445     memcpy(dst_yuy2 + 2 * n, vout, 2 * r);                                     \
2446   }
2447 
2448 #ifdef HAS_DETILETOYUY2_NEON
2449 ANYDETILEMERGE(DetileToYUY2_Any_NEON, DetileToYUY2_NEON, 15)
2450 #endif
2451 
2452 #ifdef HAS_DETILETOYUY2_SSE2
2453 ANYDETILEMERGE(DetileToYUY2_Any_SSE2, DetileToYUY2_SSE2, 15)
2454 #endif
2455 
2456 #ifdef __cplusplus
2457 }  // extern "C"
2458 }  // namespace libyuv
2459 #endif
2460