1/*! 2 * \copy 3 * Copyright (c) 2013, Cisco Systems 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 * 31 */ 32 33#ifdef HAVE_NEON_AARCH64 34#include "arm_arch64_common_macro.S" 35 36.macro LOAD_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 37// { // input: $0~$3, src*, src_stride 38 ld1 {\arg0\().d}[0], [\arg4], \arg5 39 ld1 {\arg1\().d}[0], [\arg4], \arg5 40 ld1 {\arg2\().d}[0], [\arg4], \arg5 41 ld1 {\arg3\().d}[0], [\arg4], \arg5 42// } 43.endm 44 45.macro STORE_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 46// { // input: $0~$3, dst*, dst_stride 47 st1 {\arg0\().d}[0], [\arg4], \arg5 48 st1 {\arg1\().d}[0], [\arg4], \arg5 49 st1 {\arg2\().d}[0], [\arg4], \arg5 50 st1 {\arg3\().d}[0], [\arg4], \arg5 51// } 52.endm 53 54.macro LOAD_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 55// { // input: $0~$3, src*, src_stride 56 ld1 {\arg0\().8b}, [\arg4], \arg5 57 ld1 {\arg1\().8b}, [\arg4], \arg5 58 ld1 {\arg2\().8b}, [\arg4], \arg5 59 ld1 {\arg3\().8b}, [\arg4], \arg5 60// } 61.endm 62 63.macro STORE_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 64// { // input: $0~$3, dst*, dst_stride 65 st1 {\arg0\().8b}, [\arg4], \arg5 66 st1 {\arg1\().8b}, [\arg4], \arg5 67 st1 {\arg2\().8b}, [\arg4], \arg5 68 st1 {\arg3\().8b}, [\arg4], \arg5 69// } 70.endm 71 72.macro LOAD16_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 73// { // input: $0~$3, src*, src_stride 74 ld1 {\arg0\().2d}, [\arg4], \arg5 75 ld1 {\arg1\().2d}, [\arg4], \arg5 76 ld1 {\arg2\().2d}, [\arg4], \arg5 77 ld1 {\arg3\().2d}, [\arg4], \arg5 78// } 79.endm 80 81.macro STORE16_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 82// { // input: $0~$3, dst*, dst_stride 83 st1 {\arg0\().2d}, [\arg4], \arg5 84 st1 {\arg1\().2d}, [\arg4], \arg5 85 st1 {\arg2\().2d}, [\arg4], \arg5 86 st1 {\arg3\().2d}, [\arg4], \arg5 87// } 88.endm 89 90.macro LOAD16_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 91// { // input: $0~$3, src*, src_stride 92 ld1 {\arg0\().16b}, [\arg4], \arg5 93 ld1 {\arg1\().16b}, [\arg4], \arg5 94 ld1 {\arg2\().16b}, [\arg4], \arg5 95 ld1 {\arg3\().16b}, [\arg4], \arg5 96// } 97.endm 98 99.macro STORE16_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5 100// { // input: $0~$3, dst*, dst_stride 101 st1 {\arg0\().16b}, [\arg4], \arg5 102 st1 {\arg1\().16b}, [\arg4], \arg5 103 st1 {\arg2\().16b}, [\arg4], \arg5 104 st1 {\arg3\().16b}, [\arg4], \arg5 105// } 106.endm 107 108//void WelsCopy8x8_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); 109WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy8x8_AArch64_neon 110 SIGN_EXTENSION x1,w1 111 SIGN_EXTENSION x3,w3 112 LOAD_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 113 114 STORE_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 115 116 LOAD_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x2, x3 117 118 STORE_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x0, x1 119 120WELS_ASM_AARCH64_FUNC_END 121 122 123WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy16x16_AArch64_neon 124 SIGN_EXTENSION x1,w1 125 SIGN_EXTENSION x3,w3 126 LOAD16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 127 128 STORE16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 129 130 LOAD16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3 131 132 STORE16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1 133 134 LOAD16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 135 136 STORE16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 137 138 LOAD16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3 139 140 STORE16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1 141 142WELS_ASM_AARCH64_FUNC_END 143 144 145WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy16x16NotAligned_AArch64_neon 146 SIGN_EXTENSION x1,w1 147 SIGN_EXTENSION x3,w3 148 LOAD16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 149 150 STORE16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 151 152 LOAD16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3 153 154 STORE16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1 155 156 LOAD16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 157 158 STORE16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 159 160 LOAD16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3 161 162 STORE16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1 163 164WELS_ASM_AARCH64_FUNC_END 165 166 167WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy16x8NotAligned_AArch64_neon 168 SIGN_EXTENSION x1,w1 169 SIGN_EXTENSION x3,w3 170 LOAD16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 171 172 STORE16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 173 174 LOAD16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3 175 176 STORE16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1 177 178WELS_ASM_AARCH64_FUNC_END 179 180 181WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy8x16_AArch64_neon 182 SIGN_EXTENSION x1,w1 183 SIGN_EXTENSION x3,w3 184 LOAD_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 185 186 STORE_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 187 188 LOAD_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x2, x3 189 190 STORE_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x0, x1 191 192 LOAD_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3 193 194 STORE_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1 195 196 LOAD_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x2, x3 197 198 STORE_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x0, x1 199 200WELS_ASM_AARCH64_FUNC_END 201 202#endif 203