• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;*****************************************************************************
2;* SIMD-optimized pixel operations
3;*****************************************************************************
4;* Copyright (c) 2000, 2001 Fabrice Bellard
5;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6;*
7;* This file is part of FFmpeg.
8;*
9;* FFmpeg is free software; you can redistribute it and/or
10;* modify it under the terms of the GNU Lesser General Public
11;* License as published by the Free Software Foundation; either
12;* version 2.1 of the License, or (at your option) any later version.
13;*
14;* FFmpeg is distributed in the hope that it will be useful,
15;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17;* Lesser General Public License for more details.
18;*
19;* You should have received a copy of the GNU Lesser General Public
20;* License along with FFmpeg; if not, write to the Free Software
21;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22;*****************************************************************************
23
24%include "libavutil/x86/x86util.asm"
25
26SECTION .text
27
28INIT_MMX mmx
29; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t stride)
30cglobal get_pixels, 3,4
31    add          r0, 128
32    mov          r3, -128
33    pxor         m7, m7
34.loop:
35    mova         m0, [r1]
36    mova         m2, [r1+r2]
37    mova         m1, m0
38    mova         m3, m2
39    punpcklbw    m0, m7
40    punpckhbw    m1, m7
41    punpcklbw    m2, m7
42    punpckhbw    m3, m7
43    mova [r0+r3+ 0], m0
44    mova [r0+r3+ 8], m1
45    mova [r0+r3+16], m2
46    mova [r0+r3+24], m3
47    lea          r1, [r1+r2*2]
48    add          r3, 32
49    js .loop
50    REP_RET
51
52INIT_XMM sse2
53cglobal get_pixels, 3, 4, 5
54    lea          r3, [r2*3]
55    pxor         m4, m4
56    movh         m0, [r1]
57    movh         m1, [r1+r2]
58    movh         m2, [r1+r2*2]
59    movh         m3, [r1+r3]
60    lea          r1, [r1+r2*4]
61    punpcklbw    m0, m4
62    punpcklbw    m1, m4
63    punpcklbw    m2, m4
64    punpcklbw    m3, m4
65    mova       [r0], m0
66    mova  [r0+0x10], m1
67    mova  [r0+0x20], m2
68    mova  [r0+0x30], m3
69    movh         m0, [r1]
70    movh         m1, [r1+r2*1]
71    movh         m2, [r1+r2*2]
72    movh         m3, [r1+r3]
73    punpcklbw    m0, m4
74    punpcklbw    m1, m4
75    punpcklbw    m2, m4
76    punpcklbw    m3, m4
77    mova  [r0+0x40], m0
78    mova  [r0+0x50], m1
79    mova  [r0+0x60], m2
80    mova  [r0+0x70], m3
81    RET
82
83; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
84;                         ptrdiff_t stride);
85%macro DIFF_PIXELS 0
86cglobal diff_pixels, 4,5,5
87    pxor         m4, m4
88    add          r0,  128
89    mov          r4, -128
90.loop:
91    movq         m0, [r1]
92    movq         m2, [r2]
93%if mmsize == 8
94    movq         m1, m0
95    movq         m3, m2
96    punpcklbw    m0, m4
97    punpckhbw    m1, m4
98    punpcklbw    m2, m4
99    punpckhbw    m3, m4
100%else
101    movq         m1, [r1+r3]
102    movq         m3, [r2+r3]
103    punpcklbw    m0, m4
104    punpcklbw    m1, m4
105    punpcklbw    m2, m4
106    punpcklbw    m3, m4
107%endif
108    psubw        m0, m2
109    psubw        m1, m3
110    mova  [r0+r4+0], m0
111    mova  [r0+r4+mmsize], m1
112%if mmsize == 8
113    add          r1, r3
114    add          r2, r3
115%else
116    lea          r1, [r1+r3*2]
117    lea          r2, [r2+r3*2]
118%endif
119    add          r4, 2 * mmsize
120    jne .loop
121    RET
122%endmacro
123
124INIT_MMX mmx
125DIFF_PIXELS
126
127INIT_XMM sse2
128DIFF_PIXELS
129