1/*! 2 * \copy 3 * Copyright (c) 2013, Cisco Systems 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 * 31 */ 32 33#ifdef HAVE_NEON 34#include "arm_arch_common_macro.S" 35 36.macro SQR_ADD_16BYTES arg0, arg1, arg2 37 vmull.u8 q3, \arg0, \arg0 38 vmull.u8 q8, \arg1, \arg1 39 vpadal.u16 \arg2, q3 40 vpadal.u16 \arg2, q8 41.endm 42 43 44WELS_ASM_FUNC_BEGIN SampleVariance16x16_neon 45 stmdb sp!, {r4} 46 47 vld1.8 {q15}, [r0], r1 //save the ref data (16bytes) 48 vld1.8 {q14}, [r2], r3 //save the src data (16bytes) 49 50 51 vabd.u8 q13, q14, q15 52 vmull.u8 q12, d27, d27 53 vmull.u8 q11, d26, d26 54 vaddl.u16 q12, d24, d25 55 vpadal.u16 q12, q11 //sqr 56 57 vaddl.u8 q13, d26, d27 //sum 58 59 vaddl.u8 q10, d28, d29 //sum_cur 60 61 vmull.u8 q9, d29, d29 62 vmull.u8 q8, d28, d28 63 vaddl.u16 q9, d18, d19 //sqr_cur 64 vpadal.u16 q9, q8 65 66 mov r4, #15 67pixel_var_16x16_loop0: 68 69 vld1.8 {q0}, [r0], r1 //save the ref data (16bytes) 70 vld1.8 {q1}, [r2], r3 //save the src data (16bytes) 71 72 vabd.u8 q2, q0, q1 73 74 //q10 save sum_cur 75 vpadal.u8 q10, q1 76 77 //q12 save sqr 78 SQR_ADD_16BYTES d4, d5, q12 79 80 //q13 save sum 81 vpadal.u8 q13, q2 82 83 subs r4, #1 84 85 //q9 save sqr_cur 86 SQR_ADD_16BYTES d2, d3, q9 87 88 bne pixel_var_16x16_loop0 89 90 vadd.u16 d0, d26, d27 //sum 91 vadd.u16 d1, d20, d21 //sum_cur 92 vpaddl.u16 q0, q0 93 vadd.u32 d2, d24, d25 //sqr 94 vadd.u32 d3, d18, d19 //sqr_cur 95 vpadd.u32 d0, d0, d1 96 vpadd.u32 d1, d2, d3 97 98 ldr r4, [sp, #4] 99 100 vshr.u32 q0, q0, #8 101 vmul.u32 d0, d0 102 vsub.u32 d0, d1, d0 103 vmovl.u32 q0, d0 104 vst2.16 {d0[0], d1[0]}, [r4] 105 106 ldmia sp!, {r4} 107 108WELS_ASM_FUNC_END 109 110#endif 111