• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;*****************************************************************************
2;* SIMD-optimized pixel operations
3;*****************************************************************************
4;* Copyright (c) 2000, 2001 Fabrice Bellard
5;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6;*
7;* This file is part of FFmpeg.
8;*
9;* FFmpeg is free software; you can redistribute it and/or
10;* modify it under the terms of the GNU Lesser General Public
11;* License as published by the Free Software Foundation; either
12;* version 2.1 of the License, or (at your option) any later version.
13;*
14;* FFmpeg is distributed in the hope that it will be useful,
15;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17;* Lesser General Public License for more details.
18;*
19;* You should have received a copy of the GNU Lesser General Public
20;* License along with FFmpeg; if not, write to the Free Software
21;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22;*****************************************************************************
23
24%include "libavutil/x86/x86util.asm"
25
26SECTION .text
27
28INIT_XMM sse2
29cglobal get_pixels, 3, 4, 5
30    lea          r3, [r2*3]
31    pxor         m4, m4
32    movh         m0, [r1]
33    movh         m1, [r1+r2]
34    movh         m2, [r1+r2*2]
35    movh         m3, [r1+r3]
36    lea          r1, [r1+r2*4]
37    punpcklbw    m0, m4
38    punpcklbw    m1, m4
39    punpcklbw    m2, m4
40    punpcklbw    m3, m4
41    mova       [r0], m0
42    mova  [r0+0x10], m1
43    mova  [r0+0x20], m2
44    mova  [r0+0x30], m3
45    movh         m0, [r1]
46    movh         m1, [r1+r2*1]
47    movh         m2, [r1+r2*2]
48    movh         m3, [r1+r3]
49    punpcklbw    m0, m4
50    punpcklbw    m1, m4
51    punpcklbw    m2, m4
52    punpcklbw    m3, m4
53    mova  [r0+0x40], m0
54    mova  [r0+0x50], m1
55    mova  [r0+0x60], m2
56    mova  [r0+0x70], m3
57    RET
58
59; void ff_diff_pixels(int16_t *block, const uint8_t *s1, const uint8_t *s2,
60;                     ptrdiff_t stride);
61INIT_XMM sse2
62cglobal diff_pixels, 4,5,5
63    pxor         m4, m4
64    add          r0,  128
65    mov          r4, -128
66.loop:
67    movq         m0, [r1]
68    movq         m2, [r2]
69    movq         m1, [r1+r3]
70    movq         m3, [r2+r3]
71    punpcklbw    m0, m4
72    punpcklbw    m1, m4
73    punpcklbw    m2, m4
74    punpcklbw    m3, m4
75    psubw        m0, m2
76    psubw        m1, m3
77    mova  [r0+r4+0], m0
78    mova  [r0+r4+mmsize], m1
79    lea          r1, [r1+r3*2]
80    lea          r2, [r2+r3*2]
81    add          r4, 2 * mmsize
82    jne .loop
83    RET
84