1 /*
2 * quarterpel DSP functions
3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <stddef.h>
24 #include <stdint.h>
25
26 #include "config.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/cpu.h"
29 #include "libavutil/x86/cpu.h"
30 #include "libavcodec/pixels.h"
31 #include "libavcodec/qpeldsp.h"
32 #include "fpel.h"
33
34 void ff_put_pixels8_l2_mmxext(uint8_t *dst,
35 const uint8_t *src1, const uint8_t *src2,
36 int dstStride, int src1Stride, int h);
37 void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst,
38 const uint8_t *src1, const uint8_t *src2,
39 int dstStride, int src1Stride, int h);
40 void ff_avg_pixels8_l2_mmxext(uint8_t *dst,
41 const uint8_t *src1, const uint8_t *src2,
42 int dstStride, int src1Stride, int h);
43 void ff_put_pixels16_l2_mmxext(uint8_t *dst,
44 const uint8_t *src1, const uint8_t *src2,
45 int dstStride, int src1Stride, int h);
46 void ff_avg_pixels16_l2_mmxext(uint8_t *dst,
47 const uint8_t *src1, const uint8_t *src2,
48 int dstStride, int src1Stride, int h);
49 void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst,
50 const uint8_t *src1, const uint8_t *src2,
51 int dstStride, int src1Stride, int h);
52 void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
53 int dstStride, int srcStride, int h);
54 void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
55 int dstStride, int srcStride, int h);
56 void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst,
57 const uint8_t *src,
58 int dstStride, int srcStride,
59 int h);
60 void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
61 int dstStride, int srcStride, int h);
62 void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
63 int dstStride, int srcStride, int h);
64 void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst,
65 const uint8_t *src,
66 int dstStride, int srcStride,
67 int h);
68 void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
69 int dstStride, int srcStride);
70 void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
71 int dstStride, int srcStride);
72 void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst,
73 const uint8_t *src,
74 int dstStride, int srcStride);
75 void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
76 int dstStride, int srcStride);
77 void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
78 int dstStride, int srcStride);
79 void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
80 const uint8_t *src,
81 int dstStride, int srcStride);
82 #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx
83 #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx
84
85 #if HAVE_X86ASM
86
87 #define ff_put_pixels16_mmxext ff_put_pixels16_mmx
88 #define ff_put_pixels8_mmxext ff_put_pixels8_mmx
89
90 #define QPEL_OP(OPNAME, RND, MMX) \
91 static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, \
92 const uint8_t *src, \
93 ptrdiff_t stride) \
94 { \
95 ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
96 } \
97 \
98 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
99 const uint8_t *src, \
100 ptrdiff_t stride) \
101 { \
102 uint64_t temp[8]; \
103 uint8_t *const half = (uint8_t *) temp; \
104 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
105 stride, 8); \
106 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
107 stride, stride, 8); \
108 } \
109 \
110 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \
111 const uint8_t *src, \
112 ptrdiff_t stride) \
113 { \
114 ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
115 stride, 8); \
116 } \
117 \
118 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \
119 const uint8_t *src, \
120 ptrdiff_t stride) \
121 { \
122 uint64_t temp[8]; \
123 uint8_t *const half = (uint8_t *) temp; \
124 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
125 stride, 8); \
126 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \
127 stride, 8); \
128 } \
129 \
130 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \
131 const uint8_t *src, \
132 ptrdiff_t stride) \
133 { \
134 uint64_t temp[8]; \
135 uint8_t *const half = (uint8_t *) temp; \
136 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
137 8, stride); \
138 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
139 stride, stride, 8); \
140 } \
141 \
142 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \
143 const uint8_t *src, \
144 ptrdiff_t stride) \
145 { \
146 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
147 stride, stride); \
148 } \
149 \
150 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \
151 const uint8_t *src, \
152 ptrdiff_t stride) \
153 { \
154 uint64_t temp[8]; \
155 uint8_t *const half = (uint8_t *) temp; \
156 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
157 8, stride); \
158 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
159 stride, 8); \
160 } \
161 \
162 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \
163 const uint8_t *src, \
164 ptrdiff_t stride) \
165 { \
166 uint64_t half[8 + 9]; \
167 uint8_t *const halfH = (uint8_t *) half + 64; \
168 uint8_t *const halfHV = (uint8_t *) half; \
169 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
170 stride, 9); \
171 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
172 stride, 9); \
173 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
174 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
175 stride, 8, 8); \
176 } \
177 \
178 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \
179 const uint8_t *src, \
180 ptrdiff_t stride) \
181 { \
182 uint64_t half[8 + 9]; \
183 uint8_t *const halfH = (uint8_t *) half + 64; \
184 uint8_t *const halfHV = (uint8_t *) half; \
185 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
186 stride, 9); \
187 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
188 stride, 9); \
189 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
190 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
191 stride, 8, 8); \
192 } \
193 \
194 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \
195 const uint8_t *src, \
196 ptrdiff_t stride) \
197 { \
198 uint64_t half[8 + 9]; \
199 uint8_t *const halfH = (uint8_t *) half + 64; \
200 uint8_t *const halfHV = (uint8_t *) half; \
201 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
202 stride, 9); \
203 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
204 stride, 9); \
205 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
206 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
207 stride, 8, 8); \
208 } \
209 \
210 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \
211 const uint8_t *src, \
212 ptrdiff_t stride) \
213 { \
214 uint64_t half[8 + 9]; \
215 uint8_t *const halfH = (uint8_t *) half + 64; \
216 uint8_t *const halfHV = (uint8_t *) half; \
217 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
218 stride, 9); \
219 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
220 stride, 9); \
221 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
222 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
223 stride, 8, 8); \
224 } \
225 \
226 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \
227 const uint8_t *src, \
228 ptrdiff_t stride) \
229 { \
230 uint64_t half[8 + 9]; \
231 uint8_t *const halfH = (uint8_t *) half + 64; \
232 uint8_t *const halfHV = (uint8_t *) half; \
233 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
234 stride, 9); \
235 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
236 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
237 stride, 8, 8); \
238 } \
239 \
240 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \
241 const uint8_t *src, \
242 ptrdiff_t stride) \
243 { \
244 uint64_t half[8 + 9]; \
245 uint8_t *const halfH = (uint8_t *) half + 64; \
246 uint8_t *const halfHV = (uint8_t *) half; \
247 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
248 stride, 9); \
249 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
250 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
251 stride, 8, 8); \
252 } \
253 \
254 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \
255 const uint8_t *src, \
256 ptrdiff_t stride) \
257 { \
258 uint64_t half[8 + 9]; \
259 uint8_t *const halfH = (uint8_t *) half; \
260 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
261 stride, 9); \
262 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
263 8, stride, 9); \
264 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
265 stride, 8); \
266 } \
267 \
268 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \
269 const uint8_t *src, \
270 ptrdiff_t stride) \
271 { \
272 uint64_t half[8 + 9]; \
273 uint8_t *const halfH = (uint8_t *) half; \
274 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
275 stride, 9); \
276 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
277 stride, 9); \
278 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
279 stride, 8); \
280 } \
281 \
282 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \
283 const uint8_t *src, \
284 ptrdiff_t stride) \
285 { \
286 uint64_t half[9]; \
287 uint8_t *const halfH = (uint8_t *) half; \
288 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
289 stride, 9); \
290 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
291 stride, 8); \
292 } \
293 \
294 static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, \
295 const uint8_t *src, \
296 ptrdiff_t stride) \
297 { \
298 ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
299 } \
300 \
301 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
302 const uint8_t *src, \
303 ptrdiff_t stride) \
304 { \
305 uint64_t temp[32]; \
306 uint8_t *const half = (uint8_t *) temp; \
307 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
308 stride, 16); \
309 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
310 stride, 16); \
311 } \
312 \
313 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \
314 const uint8_t *src, \
315 ptrdiff_t stride) \
316 { \
317 ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
318 stride, stride, 16);\
319 } \
320 \
321 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \
322 const uint8_t *src, \
323 ptrdiff_t stride) \
324 { \
325 uint64_t temp[32]; \
326 uint8_t *const half = (uint8_t*) temp; \
327 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
328 stride, 16); \
329 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \
330 stride, stride, 16); \
331 } \
332 \
333 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \
334 const uint8_t *src, \
335 ptrdiff_t stride) \
336 { \
337 uint64_t temp[32]; \
338 uint8_t *const half = (uint8_t *) temp; \
339 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
340 stride); \
341 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
342 stride, 16); \
343 } \
344 \
345 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \
346 const uint8_t *src, \
347 ptrdiff_t stride) \
348 { \
349 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
350 stride, stride); \
351 } \
352 \
353 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \
354 const uint8_t *src, \
355 ptrdiff_t stride) \
356 { \
357 uint64_t temp[32]; \
358 uint8_t *const half = (uint8_t *) temp; \
359 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
360 stride); \
361 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \
362 stride, stride, 16); \
363 } \
364 \
365 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \
366 const uint8_t *src, \
367 ptrdiff_t stride) \
368 { \
369 uint64_t half[16 * 2 + 17 * 2]; \
370 uint8_t *const halfH = (uint8_t *) half + 256; \
371 uint8_t *const halfHV = (uint8_t *) half; \
372 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
373 stride, 17); \
374 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
375 stride, 17); \
376 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
377 16, 16); \
378 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
379 stride, 16, 16); \
380 } \
381 \
382 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \
383 const uint8_t *src, \
384 ptrdiff_t stride) \
385 { \
386 uint64_t half[16 * 2 + 17 * 2]; \
387 uint8_t *const halfH = (uint8_t *) half + 256; \
388 uint8_t *const halfHV = (uint8_t *) half; \
389 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
390 stride, 17); \
391 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
392 stride, 17); \
393 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
394 16, 16); \
395 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
396 stride, 16, 16); \
397 } \
398 \
399 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \
400 const uint8_t *src, \
401 ptrdiff_t stride) \
402 { \
403 uint64_t half[16 * 2 + 17 * 2]; \
404 uint8_t *const halfH = (uint8_t *) half + 256; \
405 uint8_t *const halfHV = (uint8_t *) half; \
406 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
407 stride, 17); \
408 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
409 stride, 17); \
410 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
411 16, 16); \
412 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
413 stride, 16, 16); \
414 } \
415 \
416 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \
417 const uint8_t *src, \
418 ptrdiff_t stride) \
419 { \
420 uint64_t half[16 * 2 + 17 * 2]; \
421 uint8_t *const halfH = (uint8_t *) half + 256; \
422 uint8_t *const halfHV = (uint8_t *) half; \
423 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
424 stride, 17); \
425 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
426 stride, 17); \
427 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
428 16, 16); \
429 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
430 stride, 16, 16); \
431 } \
432 \
433 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \
434 const uint8_t *src, \
435 ptrdiff_t stride) \
436 { \
437 uint64_t half[16 * 2 + 17 * 2]; \
438 uint8_t *const halfH = (uint8_t *) half + 256; \
439 uint8_t *const halfHV = (uint8_t *) half; \
440 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
441 stride, 17); \
442 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
443 16, 16); \
444 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
445 stride, 16, 16); \
446 } \
447 \
448 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \
449 const uint8_t *src, \
450 ptrdiff_t stride) \
451 { \
452 uint64_t half[16 * 2 + 17 * 2]; \
453 uint8_t *const halfH = (uint8_t *) half + 256; \
454 uint8_t *const halfHV = (uint8_t *) half; \
455 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
456 stride, 17); \
457 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
458 16, 16); \
459 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
460 stride, 16, 16); \
461 } \
462 \
463 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \
464 const uint8_t *src, \
465 ptrdiff_t stride) \
466 { \
467 uint64_t half[17 * 2]; \
468 uint8_t *const halfH = (uint8_t *) half; \
469 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
470 stride, 17); \
471 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
472 stride, 17); \
473 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
474 stride, 16); \
475 } \
476 \
477 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \
478 const uint8_t *src, \
479 ptrdiff_t stride) \
480 { \
481 uint64_t half[17 * 2]; \
482 uint8_t *const halfH = (uint8_t *) half; \
483 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
484 stride, 17); \
485 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
486 stride, 17); \
487 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
488 stride, 16); \
489 } \
490 \
491 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \
492 const uint8_t *src, \
493 ptrdiff_t stride) \
494 { \
495 uint64_t half[17 * 2]; \
496 uint8_t *const halfH = (uint8_t *) half; \
497 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
498 stride, 17); \
499 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
500 stride, 16); \
501 }
502
QPEL_OP(put_,_,mmxext)503 QPEL_OP(put_, _, mmxext)
504 QPEL_OP(avg_, _, mmxext)
505 QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
506
507 #endif /* HAVE_X86ASM */
508
509 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
510 do { \
511 c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
512 c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
513 c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
514 c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
515 c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
516 c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
517 c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
518 c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
519 c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
520 c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
521 c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
522 c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
523 c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
524 c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
525 c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
526 c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
527 } while (0)
528
529 av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
530 {
531 int cpu_flags = av_get_cpu_flags();
532
533 if (X86_MMXEXT(cpu_flags)) {
534 #if HAVE_MMXEXT_EXTERNAL
535 SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
536 SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
537
538 SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
539 SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
540 SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
541 SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
542 #endif /* HAVE_MMXEXT_EXTERNAL */
543 }
544 }
545