• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*!
2 * \copy
3 *     Copyright (c)  2013, Cisco Systems
4 *     All rights reserved.
5 *
6 *     Redistribution and use in source and binary forms, with or without
7 *     modification, are permitted provided that the following conditions
8 *     are met:
9 *
10 *        * Redistributions of source code must retain the above copyright
11 *          notice, this list of conditions and the following disclaimer.
12 *
13 *        * Redistributions in binary form must reproduce the above copyright
14 *          notice, this list of conditions and the following disclaimer in
15 *          the documentation and/or other materials provided with the
16 *          distribution.
17 *
18 *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 *     POSSIBILITY OF SUCH DAMAGE.
30 *
31 */
32
33#ifdef HAVE_NEON
34#include "arm_arch_common_macro.S"
35
36.macro SQR_ADD_16BYTES arg0, arg1, arg2
37    vmull.u8 q3, \arg0, \arg0
38    vmull.u8 q8, \arg1, \arg1
39    vpadal.u16 \arg2, q3
40    vpadal.u16 \arg2, q8
41.endm
42
43
44WELS_ASM_FUNC_BEGIN SampleVariance16x16_neon
45    stmdb sp!, {r4}
46
47    vld1.8   {q15}, [r0], r1 //save the ref data (16bytes)
48    vld1.8   {q14}, [r2], r3 //save the src data (16bytes)
49
50
51    vabd.u8  q13, q14, q15
52    vmull.u8 q12, d27, d27
53    vmull.u8 q11, d26, d26
54    vaddl.u16 q12, d24, d25
55    vpadal.u16 q12, q11     //sqr
56
57    vaddl.u8 q13, d26, d27 //sum
58
59    vaddl.u8 q10, d28, d29 //sum_cur
60
61    vmull.u8 q9,  d29, d29
62    vmull.u8 q8,  d28, d28
63    vaddl.u16 q9, d18, d19       //sqr_cur
64    vpadal.u16 q9, q8
65
66    mov r4, #15
67pixel_var_16x16_loop0:
68
69    vld1.8 {q0}, [r0], r1 //save the ref data (16bytes)
70    vld1.8 {q1}, [r2], r3 //save the src data (16bytes)
71
72    vabd.u8 q2, q0, q1
73
74    //q10 save sum_cur
75    vpadal.u8 q10, q1
76
77    //q12 save sqr
78    SQR_ADD_16BYTES d4, d5, q12
79
80    //q13 save sum
81    vpadal.u8 q13, q2
82
83    subs r4, #1
84
85    //q9 save sqr_cur
86    SQR_ADD_16BYTES d2, d3, q9
87
88    bne pixel_var_16x16_loop0
89
90    vadd.u16 d0, d26, d27 //sum
91    vadd.u16 d1, d20, d21 //sum_cur
92    vpaddl.u16 q0, q0
93    vadd.u32 d2, d24, d25 //sqr
94    vadd.u32 d3, d18, d19 //sqr_cur
95    vpadd.u32 d0, d0, d1
96    vpadd.u32 d1, d2, d3
97
98    ldr       r4, [sp, #4]
99
100    vshr.u32  q0, q0, #8
101    vmul.u32  d0, d0
102    vsub.u32  d0, d1, d0
103    vmovl.u32 q0, d0
104    vst2.16  {d0[0], d1[0]}, [r4]
105
106    ldmia sp!, {r4}
107
108WELS_ASM_FUNC_END
109
110#endif
111