1/* 2 * Copyright (c) 2021-2021 Huawei Device Co., Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 7 * 1. Redistributions of source code must retain the above copyright notice, this list of 8 * conditions and the following disclaimer. 9 * 10 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 11 * of conditions and the following disclaimer in the documentation and/or other materials 12 * provided with the distribution. 13 * 14 * 3. Neither the name of the copyright holder nor the names of its contributors may be used 15 * to endorse or promote products derived from this software without specific prior written 16 * permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 20 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 .syntax unified 32 .arch armv7-a 33 .fpu neon 34 .globl memset @ -- Begin function memset 35 .p2align 2 36 .type memset,%function 37memset: 38 @ r0 = address 39 @ r1 = char 40 @ r2 = count 41 @ returns original address in r0 42 .fnstart 43 44 push {r4} 45 cmp r2, #0 46 beq Lreturn 47 vdup.8 q0, r1 48 mov r4, r0 @ r4 = r0 = address 49 50L64_byte_alignment: 51 ands r3, r0, #7 52 beq L64_byte_aligned 53 rsb r3, r3, #8 @ r3 = unalignedCnt = 8 - (address % 7) 54 cmp r2, r3 55 movlo r3, r2 56 sub r2, r2, r3 57 58Lloop1: 59 strb r1, [r4], #1 60 subs r3, r3, #1 61 bgt Lloop1 62 63/** 64 * Set 64 bytes each time, and use floating-point registers to improve efficiency. 65 */ 66L64_byte_aligned: 67 vmov q1, q0 68 vmov q2, q0 69 cmp r2, #64 70 blo L32_byte_aligned 71 vmov q3, q0 72 sub r2, r2, #64 73Lloop2: 74 vstmia r4!, {d0 - d7} 75 subs r2, r2, #64 76 bgt Lloop2 77 78/** 79 * The dichotomy handles the case of less than 64 bytes, 80 * and the front will subtract 64 more, and you need to make it up at this time. 81 */ 82 add r2, r2, #64 83L32_byte_aligned: 84 cmp r2, #0 85 beq Lreturn 86 cmp r2, #32 87 blo L16_byte_aligned 88 sub r2, r2, #32 89 vstmia r4!, {d0 - d3} 90 91L16_byte_aligned: 92 cmp r2, #0 93 beq Lreturn 94 cmp r2, #16 95 blo L8_byte_aligned 96 sub r2, r2, #16 97 vstmia r4!, {d0 - d1} 98 99L8_byte_aligned: 100 cmp r2, #0 101 beq Lreturn 102 cmp r2, #8 103 blo L4_byte_aligned 104 sub r2, r2, #8 105 vstmia r4!, {d0} 106 107L4_byte_aligned: 108 cmp r2, #0 109 beq Lreturn 110 cmp r2, #4 111 blo Lless_4_byte 112 sub r2, r2, #4 113 vst1.32 {d0[0]}, [r4]! 114 115Lless_4_byte: 116 cmp r2, #0 117 beq Lreturn 118 strb r1, [r4], #1 119 sub r2, r2, #1 120 b Lless_4_byte 121 122Lreturn: 123 pop {r4} 124 bx lr 125Lfunc_end: 126 .size memset, Lfunc_end - memset 127 .cantunwind 128 .fnend @ -- End function 129