1 /*
2 * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <arm_neon.h>
12
13 #include "./vpx_dsp_rtcd.h"
14 #include "vpx/vpx_integer.h"
15
vpx_highbd_convolve_copy_neon(const uint16_t * src,ptrdiff_t src_stride,uint16_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int filter_x_stride,const int16_t * filter_y,int filter_y_stride,int w,int h,int bd)16 void vpx_highbd_convolve_copy_neon(const uint16_t *src, ptrdiff_t src_stride,
17 uint16_t *dst, ptrdiff_t dst_stride,
18 const int16_t *filter_x, int filter_x_stride,
19 const int16_t *filter_y, int filter_y_stride,
20 int w, int h, int bd) {
21 (void)filter_x;
22 (void)filter_x_stride;
23 (void)filter_y;
24 (void)filter_y_stride;
25 (void)bd;
26
27 if (w < 8) { // copy4
28 do {
29 vst1_u16(dst, vld1_u16(src));
30 src += src_stride;
31 dst += dst_stride;
32 vst1_u16(dst, vld1_u16(src));
33 src += src_stride;
34 dst += dst_stride;
35 h -= 2;
36 } while (h > 0);
37 } else if (w == 8) { // copy8
38 do {
39 vst1q_u16(dst, vld1q_u16(src));
40 src += src_stride;
41 dst += dst_stride;
42 vst1q_u16(dst, vld1q_u16(src));
43 src += src_stride;
44 dst += dst_stride;
45 h -= 2;
46 } while (h > 0);
47 } else if (w < 32) { // copy16
48 do {
49 vst2q_u16(dst, vld2q_u16(src));
50 src += src_stride;
51 dst += dst_stride;
52 vst2q_u16(dst, vld2q_u16(src));
53 src += src_stride;
54 dst += dst_stride;
55 vst2q_u16(dst, vld2q_u16(src));
56 src += src_stride;
57 dst += dst_stride;
58 vst2q_u16(dst, vld2q_u16(src));
59 src += src_stride;
60 dst += dst_stride;
61 h -= 4;
62 } while (h > 0);
63 } else if (w == 32) { // copy32
64 do {
65 vst4q_u16(dst, vld4q_u16(src));
66 src += src_stride;
67 dst += dst_stride;
68 vst4q_u16(dst, vld4q_u16(src));
69 src += src_stride;
70 dst += dst_stride;
71 vst4q_u16(dst, vld4q_u16(src));
72 src += src_stride;
73 dst += dst_stride;
74 vst4q_u16(dst, vld4q_u16(src));
75 src += src_stride;
76 dst += dst_stride;
77 h -= 4;
78 } while (h > 0);
79 } else { // copy64
80 do {
81 vst4q_u16(dst, vld4q_u16(src));
82 vst4q_u16(dst + 32, vld4q_u16(src + 32));
83 src += src_stride;
84 dst += dst_stride;
85 vst4q_u16(dst, vld4q_u16(src));
86 vst4q_u16(dst + 32, vld4q_u16(src + 32));
87 src += src_stride;
88 dst += dst_stride;
89 vst4q_u16(dst, vld4q_u16(src));
90 vst4q_u16(dst + 32, vld4q_u16(src + 32));
91 src += src_stride;
92 dst += dst_stride;
93 vst4q_u16(dst, vld4q_u16(src));
94 vst4q_u16(dst + 32, vld4q_u16(src + 32));
95 src += src_stride;
96 dst += dst_stride;
97 h -= 4;
98 } while (h > 0);
99 }
100 }
101