• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * SIMD-optimized pixel operations
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/attributes.h"
22 #include "libavcodec/pixblockdsp.h"
23 #include "asm.h"
24 
get_pixels_mvi(int16_t * restrict block,const uint8_t * restrict pixels,ptrdiff_t stride)25 static void get_pixels_mvi(int16_t *restrict block,
26                            const uint8_t *restrict pixels, ptrdiff_t stride)
27 {
28     int h = 8;
29 
30     do {
31         uint64_t p;
32 
33         p = ldq(pixels);
34         stq(unpkbw(p),       block);
35         stq(unpkbw(p >> 32), block + 4);
36 
37         pixels += stride;
38         block += 8;
39     } while (--h);
40 }
41 
diff_pixels_mvi(int16_t * block,const uint8_t * s1,const uint8_t * s2,ptrdiff_t stride)42 static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
43                             ptrdiff_t stride)
44 {
45     int h = 8;
46     uint64_t mask = 0x4040;
47 
48     mask |= mask << 16;
49     mask |= mask << 32;
50     do {
51         uint64_t x, y, c, d, a;
52         uint64_t signs;
53 
54         x = ldq(s1);
55         y = ldq(s2);
56         c = cmpbge(x, y);
57         d = x - y;
58         a = zap(mask, c);       /* We use 0x4040404040404040 here...  */
59         d += 4 * a;             /* ...so we can use s4addq here.      */
60         signs = zap(-1, c);
61 
62         stq(unpkbw(d)       | (unpkbw(signs)       << 8), block);
63         stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
64 
65         s1 += stride;
66         s2 += stride;
67         block += 8;
68     } while (--h);
69 }
70 
ff_pixblockdsp_init_alpha(PixblockDSPContext * c,AVCodecContext * avctx,unsigned high_bit_depth)71 av_cold void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx,
72                                        unsigned high_bit_depth)
73 {
74     if (amask(AMASK_MVI) == 0) {
75         if (!high_bit_depth)
76             c->get_pixels = get_pixels_mvi;
77         c->diff_pixels = diff_pixels_mvi;
78     }
79 }
80