• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;******************************************************************************
2;* SIMD-optimized fullpel functions
3;* Copyright (c) 2008 Loren Merritt
4;* Copyright (c) 2003-2013 Michael Niedermayer
5;* Copyright (c) 2013 Daniel Kang
6;*
7;* This file is part of FFmpeg.
8;*
9;* FFmpeg is free software; you can redistribute it and/or
10;* modify it under the terms of the GNU Lesser General Public
11;* License as published by the Free Software Foundation; either
12;* version 2.1 of the License, or (at your option) any later version.
13;*
14;* FFmpeg is distributed in the hope that it will be useful,
15;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17;* Lesser General Public License for more details.
18;*
19;* You should have received a copy of the GNU Lesser General Public
20;* License along with FFmpeg; if not, write to the Free Software
21;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22;******************************************************************************
23
24%include "libavutil/x86/x86util.asm"
25
26SECTION .text
27
28%macro PAVGB_MMX 4
29    LOAD   %3, %1
30    por    %3, %2
31    pxor   %2, %1
32    pand   %2, %4
33    psrlq  %2, 1
34    psubb  %3, %2
35    SWAP   %2, %3
36%endmacro
37
38; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels,
39;                        ptrdiff_t line_size, int h)
40%macro OP_PIXELS 2
41%if %2 == mmsize/2
42%define LOAD movh
43%define SAVE movh
44%define LEN  mmsize
45%else
46%define LOAD movu
47%define SAVE mova
48%define LEN  %2
49%endif
50cglobal %1_pixels%2, 4,5,4
51    lea          r4, [r2*3]
52%ifidn %1, avg
53%if notcpuflag(mmxext)
54    pcmpeqd      m6, m6
55    paddb        m6, m6
56%endif
57%endif
58.loop:
59%assign %%i 0
60%rep LEN/mmsize
61    LOAD         m0, [r1 + %%i]
62    LOAD         m1, [r1+r2 + %%i]
63    LOAD         m2, [r1+r2*2 + %%i]
64    LOAD         m3, [r1+r4 + %%i]
65%ifidn %1, avg
66%if notcpuflag(mmxext)
67    PAVGB_MMX    [r0 + %%i], m0, m4, m6
68    PAVGB_MMX    [r0+r2 + %%i], m1, m5, m6
69    PAVGB_MMX    [r0+r2*2 + %%i], m2, m4, m6
70    PAVGB_MMX    [r0+r4 + %%i], m3, m5, m6
71%else
72    pavgb        m0, [r0 + %%i]
73    pavgb        m1, [r0+r2 + %%i]
74    pavgb        m2, [r0+r2*2 + %%i]
75    pavgb        m3, [r0+r4 + %%i]
76%endif
77%endif
78    SAVE       [r0 + %%i], m0
79    SAVE    [r0+r2 + %%i], m1
80    SAVE  [r0+r2*2 + %%i], m2
81    SAVE    [r0+r4 + %%i], m3
82%assign %%i %%i+mmsize
83%endrep
84    sub         r3d, 4
85    lea          r1, [r1+r2*4]
86    lea          r0, [r0+r2*4]
87    jne       .loop
88    RET
89%endmacro
90
91INIT_MMX mmx
92OP_PIXELS put, 4
93OP_PIXELS avg, 4
94OP_PIXELS put, 8
95OP_PIXELS avg, 8
96OP_PIXELS put, 16
97OP_PIXELS avg, 16
98
99INIT_MMX mmxext
100OP_PIXELS avg, 4
101OP_PIXELS avg, 8
102OP_PIXELS avg, 16
103
104INIT_XMM sse2
105OP_PIXELS put, 16
106OP_PIXELS avg, 16
107