1//****************************************************************************** 2//* 3//* Copyright (C) 2015 The Android Open Source Project 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//***************************************************************************** 18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19//*/ 20///** 21// ******************************************************************************* 22// * @file 23// * ih264_mem_fns_neon.s 24// * 25// * @brief 26// * Contains function definitions for memory manipulation 27// * 28// * @author 29// * Naveen SR 30// * 31// * @par List of Functions: 32// * - ih264_memcpy_av8() 33// * - ih264_memcpy_mul_8_av8() 34// * - ih264_memset_mul_8_av8() 35// * - ih264_memset_16bit_mul_8_av8() 36// * - ih264_memset_16bit_av8() 37// * 38// * @remarks 39// * None 40// * 41// ******************************************************************************* 42//*/ 43 44.text 45.p2align 2 46.include "ih264_neon_macros.s" 47///** 48//******************************************************************************* 49//* 50//* @brief 51//* memcpy of a 1d array 52//* 53//* @par Description: 54//* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes 55//* 56//* @param[in] pu1_dst 57//* UWORD8 pointer to the destination 58//* 59//* @param[in] pu1_src 60//* UWORD8 pointer to the source 61//* 62//* @param[in] num_bytes 63//* number of bytes to copy 64//* @returns 65//* 66//* @remarks 67//* None 68//* 69//******************************************************************************* 70//*/ 71//void ih264_memcpy_mul_8(UWORD8 *pu1_dst, 72// UWORD8 *pu1_src, 73// UWORD8 num_bytes) 74//**************Variables Vs Registers************************* 75// x0 => *pu1_dst 76// x1 => *pu1_src 77// x2 => num_bytes 78 79 80 81 82 83 .global ih264_memcpy_mul_8_av8 84 85ih264_memcpy_mul_8_av8: 86 87loop_neon_memcpy_mul_8: 88 // Memcpy 8 bytes 89 ld1 {v0.8b}, [x1], #8 90 st1 {v0.8b}, [x0], #8 91 92 subs x2, x2, #8 93 bne loop_neon_memcpy_mul_8 94 ret 95 96 97 98//******************************************************************************* 99//*/ 100//void ih264_memcpy(UWORD8 *pu1_dst, 101// UWORD8 *pu1_src, 102// UWORD8 num_bytes) 103//**************Variables Vs Registers************************* 104// x0 => *pu1_dst 105// x1 => *pu1_src 106// x2 => num_bytes 107 108 109 110 .global ih264_memcpy_av8 111 112ih264_memcpy_av8: 113 subs x2, x2, #8 114 blt arm_memcpy 115loop_neon_memcpy: 116 // Memcpy 8 bytes 117 ld1 {v0.8b}, [x1], #8 118 st1 {v0.8b}, [x0], #8 119 120 subs x2, x2, #8 121 bge loop_neon_memcpy 122 cmn x2, #8 123 beq end_func1 124 125arm_memcpy: 126 add x2, x2, #8 127 128loop_arm_memcpy: 129 ldrb w3, [x1], #1 130 sxtw x3, w3 131 strb w3, [x0], #1 132 sxtw x3, w3 133 subs x2, x2, #1 134 bne loop_arm_memcpy 135 ret 136end_func1: 137 ret 138 139 140//void ih264_memset_mul_8(UWORD8 *pu1_dst, 141// UWORD8 value, 142// UWORD8 num_bytes) 143//**************Variables Vs Registers************************* 144// x0 => *pu1_dst 145// x1 => value 146// x2 => num_bytes 147 148 149 .global ih264_memset_mul_8_av8 150 151ih264_memset_mul_8_av8: 152 153// Assumptions: numbytes is either 8, 16 or 32 154 dup v0.8b, w1 155loop_memset_mul_8: 156 // Memset 8 bytes 157 st1 {v0.8b}, [x0], #8 158 159 subs x2, x2, #8 160 bne loop_memset_mul_8 161 162 ret 163 164 165//void ih264_memset(UWORD8 *pu1_dst, 166// UWORD8 value, 167// UWORD8 num_bytes) 168//**************Variables Vs Registers************************* 169// x0 => *pu1_dst 170// x1 => value 171// x2 => num_bytes 172 173 174 175 .global ih264_memset_av8 176 177ih264_memset_av8: 178 subs x2, x2, #8 179 blt arm_memset 180 dup v0.8b, w1 181loop_neon_memset: 182 // Memcpy 8 bytes 183 st1 {v0.8b}, [x0], #8 184 185 subs x2, x2, #8 186 bge loop_neon_memset 187 cmn x2, #8 188 beq end_func2 189 190arm_memset: 191 add x2, x2, #8 192 193loop_arm_memset: 194 strb w1, [x0], #1 195 sxtw x1, w1 196 subs x2, x2, #1 197 bne loop_arm_memset 198 ret 199end_func2: 200 ret 201 202 203 204 205 206//void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst, 207// UWORD16 value, 208// UWORD8 num_words) 209//**************Variables Vs Registers************************* 210// x0 => *pu2_dst 211// x1 => value 212// x2 => num_words 213 214 215 .global ih264_memset_16bit_mul_8_av8 216 217ih264_memset_16bit_mul_8_av8: 218 219// Assumptions: num_words is either 8, 16 or 32 220 221 // Memset 8 words 222 dup v0.4h, w1 223loop_memset_16bit_mul_8: 224 st1 {v0.4h}, [x0], #8 225 st1 {v0.4h}, [x0], #8 226 227 subs x2, x2, #8 228 bne loop_memset_16bit_mul_8 229 230 ret 231 232 233 234//void ih264_memset_16bit(UWORD16 *pu2_dst, 235// UWORD16 value, 236// UWORD8 num_words) 237//**************Variables Vs Registers************************* 238// x0 => *pu2_dst 239// x1 => value 240// x2 => num_words 241 242 243 244 .global ih264_memset_16bit_av8 245 246ih264_memset_16bit_av8: 247 subs x2, x2, #8 248 blt arm_memset_16bit 249 dup v0.4h, w1 250loop_neon_memset_16bit: 251 // Memset 8 words 252 st1 {v0.4h}, [x0], #8 253 st1 {v0.4h}, [x0], #8 254 255 subs x2, x2, #8 256 bge loop_neon_memset_16bit 257 cmn x2, #8 258 beq end_func3 259 260arm_memset_16bit: 261 add x2, x2, #8 262 263loop_arm_memset_16bit: 264 strh w1, [x0], #2 265 sxtw x1, w1 266 subs x2, x2, #1 267 bne loop_arm_memset_16bit 268 ret 269 270end_func3: 271 ret 272 273 274 275