1/*! 2 * \copy 3 * Copyright (c) 2013, Cisco Systems 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 * 31 */ 32 33#ifdef HAVE_NEON_AARCH64 34#include "arm_arch64_common_macro.S" 35//void ExpandPictureLuma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH); 36WELS_ASM_AARCH64_FUNC_BEGIN ExpandPictureLuma_AArch64_neon 37 SIGN_EXTENSION x1,w1 38 SIGN_EXTENSION x2,w2 39 SIGN_EXTENSION x3,w3 40 mov x7, x0 41 mov x8, x3 42 add x4, x7, x2 43 sub x4, x4, #1 44 mov x10, #16 45 //For the left and right expand 46_expand_picture_luma_loop2: 47 sub x5, x7, #32 48 add x6, x4, #1 49 ld1r {v0.16b}, [x7], x1 50 ld1r {v2.16b}, [x4], x1 51 mov v1.16b, v0.16b 52 mov v3.16b, v2.16b 53 st2 {v0.16b, v1.16b}, [x5] 54 st2 {v2.16b, v3.16b}, [x6] 55 sub x8, x8, #1 56 cbnz x8, _expand_picture_luma_loop2 57 //for the top and bottom expand 58 add x2, x2, #64 59 sub x0, x0, #32 60 madd x4, x1, x3, x0 61 sub x4, x4, x1 62_expand_picture_luma_loop0: 63 mov x5, #32 64 msub x5, x5, x1, x0 65 add x6, x4, x1 66 ld1 {v0.16b}, [x0], x10 67 ld1 {v1.16b}, [x4], x10 68 mov x8, #32 69_expand_picture_luma_loop1: 70 st1 {v0.16b}, [x5], x1 71 st1 {v1.16b}, [x6], x1 72 sub x8, x8, #1 73 cbnz x8, _expand_picture_luma_loop1 74 75 sub x2, x2, #16 76 cbnz x2, _expand_picture_luma_loop0 77WELS_ASM_AARCH64_FUNC_END 78 79//void ExpandPictureChroma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, 80// const int32_t kiPicH); 81WELS_ASM_AARCH64_FUNC_BEGIN ExpandPictureChroma_AArch64_neon 82 //Save the dst 83 SIGN_EXTENSION x1,w1 84 SIGN_EXTENSION x2,w2 85 SIGN_EXTENSION x3,w3 86 mov x7, x0 87 mov x8, x3 88 mov x10, #16 89 add x4, x7, x2 90 sub x4, x4, #1 91 //For the left and right expand 92_expand_picture_chroma_loop2: 93 sub x5, x7, #16 94 add x6, x4, #1 95 96 ld1r {v0.16b}, [x7], x1 97 ld1r {v1.16b}, [x4], x1 98 99 st1 {v0.16b}, [x5] 100 st1 {v1.16b}, [x6] 101 sub x8, x8, #1 102 cbnz x8, _expand_picture_chroma_loop2 103 104 //for the top and bottom expand 105 add x2, x2, #32 106 // 107 mov x9, x2 108 mov x11, #15 109 bic x2, x2, x11 110 // 111 sub x0, x0, #16 112 madd x4, x1, x3, x0 113 sub x4, x4, x1 114_expand_picture_chroma_loop0: 115 mov x5, #16 116 msub x5, x5, x1, x0 117 add x6, x4, x1 118 ld1 {v0.16b}, [x0], x10 119 ld1 {v1.16b}, [x4], x10 120 121 mov x8, #16 122_expand_picture_chroma_loop1: 123 st1 {v0.16b}, [x5], x1 124 st1 {v1.16b}, [x6], x1 125 sub x8, x8, #1 126 cbnz x8, _expand_picture_chroma_loop1 127 128 sub x2, x2, #16 129 cbnz x2, _expand_picture_chroma_loop0 130 131 and x9, x9, #15 132 sub x9, x9, #8 133 cbnz x9, _expand_picture_chroma_end 134 mov x5, #16 135 msub x5, x5, x1, x0 136 add x6, x4, x1 137 ld1 {v0.8b}, [x0] 138 ld1 {v1.8b}, [x4] 139 140 mov x8, #16 141_expand_picture_chroma_loop3: 142 st1 {v0.8b}, [x5], x1 143 st1 {v1.8b}, [x6], x1 144 sub x8, x8, #1 145 cbnz x8, _expand_picture_chroma_loop3 146_expand_picture_chroma_end: 147 148WELS_ASM_AARCH64_FUNC_END 149#endif 150 151