1/*! 2 * \copy 3 * Copyright (c) 2013, Cisco Systems 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 * 31 */ 32 33#ifdef HAVE_NEON 34#include "arm_arch_common_macro.S" 35 36 37WELS_ASM_FUNC_BEGIN ExpandPictureLuma_neon 38 stmdb sp!, {r4-r8} 39 //Save the dst 40 mov r7, r0 41 mov r8, r3 42 43 add r4, r7, r2 44 sub r4, #1 45 //For the left and right expand 46_expand_picture_luma_loop2: 47 sub r5, r7, #32 48 add r6, r4, #1 49 50 vld1.8 {d0[], d1[]}, [r7], r1 51 vld1.8 {d2[], d3[]}, [r4], r1 52 53 vst1.8 {q0}, [r5]! 54 vst1.8 {q0}, [r5] 55 vst1.8 {q1}, [r6]! 56 vst1.8 {q1}, [r6] 57 subs r8, #1 58 bne _expand_picture_luma_loop2 59 60 //for the top and bottom expand 61 add r2, #64 62 sub r0, #32 63 mla r4, r1, r3, r0 64 sub r4, r1 65_expand_picture_luma_loop0: 66 mov r5, #32 67 mls r5, r5, r1, r0 68 add r6, r4, r1 69 vld1.8 {q0}, [r0]! 70 vld1.8 {q1}, [r4]! 71 72 mov r8, #32 73_expand_picture_luma_loop1: 74 vst1.8 {q0}, [r5], r1 75 vst1.8 {q1}, [r6], r1 76 subs r8, #1 77 bne _expand_picture_luma_loop1 78 79 subs r2, #16 80 bne _expand_picture_luma_loop0 81 82 //vldreq.32 d0, [r0] 83 84 ldmia sp!, {r4-r8} 85WELS_ASM_FUNC_END 86 87 88WELS_ASM_FUNC_BEGIN ExpandPictureChroma_neon 89 stmdb sp!, {r4-r9} 90 //Save the dst 91 mov r7, r0 92 mov r8, r3 93 94 add r4, r7, r2 95 sub r4, #1 96 //For the left and right expand 97_expand_picture_chroma_loop2: 98 sub r5, r7, #16 99 add r6, r4, #1 100 101 vld1.8 {d0[], d1[]}, [r7], r1 102 vld1.8 {d2[], d3[]}, [r4], r1 103 104 vst1.8 {q0}, [r5] 105 vst1.8 {q1}, [r6] 106 subs r8, #1 107 bne _expand_picture_chroma_loop2 108 109 //for the top and bottom expand 110 add r2, #32 111 mov r9, r2 112 bic r2, #15 113 sub r0, #16 114 mla r4, r1, r3, r0 115 sub r4, r1 116_expand_picture_chroma_loop0: 117 mov r5, #16 118 mls r5, r5, r1, r0 119 add r6, r4, r1 120 vld1.8 {q0}, [r0]! 121 vld1.8 {q1}, [r4]! 122 123 mov r8, #16 124_expand_picture_chroma_loop1: 125 vst1.8 {q0}, [r5], r1 126 vst1.8 {q1}, [r6], r1 127 subs r8, #1 128 bne _expand_picture_chroma_loop1 129 130 subs r2, #16 131 bne _expand_picture_chroma_loop0 132 133 //vldreq.32 d0, [r0] 134 135 and r9, #15 136 cmp r9, #8 137 bne _expand_picture_chroma_end 138 mov r5, #16 139 mls r5, r5, r1, r0 140 add r6, r4, r1 141 vld1.8 {d0}, [r0]! 142 vld1.8 {d2}, [r4]! 143 mov r8, #16 144_expand_picture_chroma_loop3: 145 vst1.8 {d0}, [r5], r1 146 vst1.8 {d2}, [r6], r1 147 subs r8, #1 148 bne _expand_picture_chroma_loop3 149_expand_picture_chroma_end: 150 151 ldmia sp!, {r4-r9} 152WELS_ASM_FUNC_END 153 154#endif 155