1@/****************************************************************************** 2@ * 3@ * Copyright (C) 2015 The Android Open Source Project 4@ * 5@ * Licensed under the Apache License, Version 2.0 (the "License"); 6@ * you may not use this file except in compliance with the License. 7@ * You may obtain a copy of the License at: 8@ * 9@ * http://www.apache.org/licenses/LICENSE-2.0 10@ * 11@ * Unless required by applicable law or agreed to in writing, software 12@ * distributed under the License is distributed on an "AS IS" BASIS, 13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@ * See the License for the specific language governing permissions and 15@ * limitations under the License. 16@ * 17@ ***************************************************************************** 18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19@*/ 20@** 21@ ******************************************************************************* 22@ * @file 23@ * ih264_mem_fns_neon.s 24@ * 25@ * @brief 26@ * Contains function definitions for memory manipulation 27@ * 28@ * @author 29@ * Naveen SR 30@ * 31@ * @par List of Functions: 32@ * - ih264_memcpy_mul_8_a9q() 33@ * - ih264_memcpy_a9q() 34@ * - ih264_memset_mul_8_a9q() 35@ * - ih264_memset_a9q() 36@ * - ih264_memset_16bit_mul_8_a9q() 37@ * - ih264_memset_a9q() 38@ * 39@ * @remarks 40@ * None 41@ * 42@ ******************************************************************************* 43@* 44 45@** 46@******************************************************************************* 47@* 48@* @brief 49@* memcpy of a 1d array 50@* 51@* @par Description: 52@* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes 53@* 54@* @param[in] pu1_dst 55@* UWORD8 pointer to the destination 56@* 57@* @param[in] pu1_src 58@* UWORD8 pointer to the source 59@* 60@* @param[in] num_bytes 61@* number of bytes to copy 62@* @returns 63@* 64@* @remarks 65@* None 66@* 67@******************************************************************************* 68@* 69@void ih264_memcpy_mul_8(UWORD8 *pu1_dst, 70@ UWORD8 *pu1_src, 71@ UWORD32 num_bytes) 72@**************Variables Vs Registers************************* 73@ r0 => *pu1_dst 74@ r1 => *pu1_src 75@ r2 => num_bytes 76 77.text 78.p2align 2 79 80 81 .global ih264_memcpy_mul_8_a9q 82 83ih264_memcpy_mul_8_a9q: 84 85loop_neon_memcpy_mul_8: 86 @ Memcpy 8 bytes 87 vld1.8 d0, [r1]! 88 vst1.8 d0, [r0]! 89 90 subs r2, r2, #8 91 bne loop_neon_memcpy_mul_8 92 bx lr 93 94 95 96@******************************************************************************* 97@* 98@void ih264_memcpy(UWORD8 *pu1_dst, 99@ UWORD8 *pu1_src, 100@ UWORD32 num_bytes) 101@**************Variables Vs Registers************************* 102@ r0 => *pu1_dst 103@ r1 => *pu1_src 104@ r2 => num_bytes 105 106 107 108 .global ih264_memcpy_a9q 109 110ih264_memcpy_a9q: 111 subs r2, #8 112 blt memcpy 113loop_neon_memcpy: 114 @ Memcpy 8 bytes 115 vld1.8 d0, [r1]! 116 vst1.8 d0, [r0]! 117 118 subs r2, #8 119 bge loop_neon_memcpy 120 cmp r2, #-8 121 bxeq lr 122 123memcpy: 124 add r2, #8 125 126loop_memcpy: 127 ldrb r3, [r1], #1 128 strb r3, [r0], #1 129 subs r2, #1 130 bne loop_memcpy 131 bx lr 132 133 134 135 136@void ih264_memset_mul_8(UWORD8 *pu1_dst, 137@ UWORD8 value, 138@ UWORD32 num_bytes) 139@**************Variables Vs Registers************************* 140@ r0 => *pu1_dst 141@ r1 => value 142@ r2 => num_bytes 143 144 145 146 147 148 .global ih264_memset_mul_8_a9q 149 150ih264_memset_mul_8_a9q: 151 152@ Assumptions: numbytes is either 8, 16 or 32 153 vdup.8 d0, r1 154loop_memset_mul_8: 155 @ Memset 8 bytes 156 vst1.8 d0, [r0]! 157 158 subs r2, r2, #8 159 bne loop_memset_mul_8 160 161 bx lr 162 163 164 165 166@void ih264_memset(UWORD8 *pu1_dst, 167@ UWORD8 value, 168@ UWORD8 num_bytes) 169@**************Variables Vs Registers************************* 170@ r0 => *pu1_dst 171@ r1 => value 172@ r2 => num_bytes 173 174 175 176 .global ih264_memset_a9q 177 178ih264_memset_a9q: 179 subs r2, #8 180 blt memset 181 vdup.8 d0, r1 182loop_neon_memset: 183 @ Memcpy 8 bytes 184 vst1.8 d0, [r0]! 185 186 subs r2, #8 187 bge loop_neon_memset 188 cmp r2, #-8 189 bxeq lr 190 191memset: 192 add r2, #8 193 194loop_memset: 195 strb r1, [r0], #1 196 subs r2, #1 197 bne loop_memset 198 bx lr 199 200 201 202 203@void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst, 204@ UWORD16 value, 205@ UWORD32 num_words) 206@**************Variables Vs Registers************************* 207@ r0 => *pu2_dst 208@ r1 => value 209@ r2 => num_words 210 211 212 213 214 215 .global ih264_memset_16bit_mul_8_a9q 216 217ih264_memset_16bit_mul_8_a9q: 218 219@ Assumptions: num_words is either 8, 16 or 32 220 221 @ Memset 8 words 222 vdup.16 d0, r1 223loop_memset_16bit_mul_8: 224 vst1.16 d0, [r0]! 225 vst1.16 d0, [r0]! 226 227 subs r2, r2, #8 228 bne loop_memset_16bit_mul_8 229 230 bx lr 231 232 233 234 235@void ih264_memset_16bit(UWORD16 *pu2_dst, 236@ UWORD16 value, 237@ UWORD32 num_words) 238@**************Variables Vs Registers************************* 239@ r0 => *pu2_dst 240@ r1 => value 241@ r2 => num_words 242 243 244 245 .global ih264_memset_16bit_a9q 246 247ih264_memset_16bit_a9q: 248 subs r2, #8 249 blt memset_16bit 250 vdup.16 d0, r1 251loop_neon_memset_16bit: 252 @ Memset 8 words 253 vst1.16 d0, [r0]! 254 vst1.16 d0, [r0]! 255 256 subs r2, #8 257 bge loop_neon_memset_16bit 258 cmp r2, #-8 259 bxeq lr 260 261memset_16bit: 262 add r2, #8 263 264loop_memset_16bit: 265 strh r1, [r0], #2 266 subs r2, #1 267 bne loop_memset_16bit 268 bx lr 269 270 271 272 273