1/* 2 * Copyright (c) 2021-2021 Huawei Device Co., Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 7 * 1. Redistributions of source code must retain the above copyright notice, this list of 8 * conditions and the following disclaimer. 9 * 10 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 11 * of conditions and the following disclaimer in the documentation and/or other materials 12 * provided with the distribution. 13 * 14 * 3. Neither the name of the copyright holder nor the names of its contributors may be used 15 * to endorse or promote products derived from this software without specific prior written 16 * permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 20 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 .syntax unified 32 .arch armv7-a 33 .fpu neon 34 35#define FUNCTION(x) \ 36.globl x; \ 37.p2align 2; \ 38.type x,%function; \ 39x: 40 41#if defined(LOSCFG_KERNEL_LMS) 42FUNCTION(__memset) 43#else 44FUNCTION(memset) 45#endif 46 @ r0 = address 47 @ r1 = char 48 @ r2 = count 49 @ returns original address in r0 50 .fnstart 51 52 push {r4} 53 cmp r2, #0 54 beq Lreturn 55 vdup.8 q0, r1 56 mov r4, r0 @ r4 = r0 = address 57 58L64_byte_alignment: 59 ands r3, r0, #7 60 beq L64_byte_aligned 61 rsb r3, r3, #8 @ r3 = unalignedCnt = 8 - (address % 7) 62 cmp r2, r3 63 movlo r3, r2 64 sub r2, r2, r3 65 66Lloop1: 67 strb r1, [r4], #1 68 subs r3, r3, #1 69 bgt Lloop1 70 71/** 72 * Set 64 bytes each time, and use floating-point registers to improve efficiency. 73 */ 74L64_byte_aligned: 75 vmov q1, q0 76 vmov q2, q0 77 cmp r2, #64 78 blo L32_byte_aligned 79 vmov q3, q0 80 sub r2, r2, #64 81Lloop2: 82 vstmia r4!, {d0 - d7} 83 subs r2, r2, #64 84 bgt Lloop2 85 86/** 87 * The dichotomy handles the case of less than 64 bytes, 88 * and the front will subtract 64 more, and you need to make it up at this time. 89 */ 90 add r2, r2, #64 91L32_byte_aligned: 92 cmp r2, #0 93 beq Lreturn 94 cmp r2, #32 95 blo L16_byte_aligned 96 sub r2, r2, #32 97 vstmia r4!, {d0 - d3} 98 99L16_byte_aligned: 100 cmp r2, #0 101 beq Lreturn 102 cmp r2, #16 103 blo L8_byte_aligned 104 sub r2, r2, #16 105 vstmia r4!, {d0 - d1} 106 107L8_byte_aligned: 108 cmp r2, #0 109 beq Lreturn 110 cmp r2, #8 111 blo L4_byte_aligned 112 sub r2, r2, #8 113 vstmia r4!, {d0} 114 115L4_byte_aligned: 116 cmp r2, #0 117 beq Lreturn 118 cmp r2, #4 119 blo Lless_4_byte 120 sub r2, r2, #4 121 vst1.32 {d0[0]}, [r4]! 122 123Lless_4_byte: 124 cmp r2, #0 125 beq Lreturn 126 strb r1, [r4], #1 127 sub r2, r2, #1 128 b Lless_4_byte 129 130Lreturn: 131 pop {r4} 132 bx lr 133Lfunc_end: 134#if defined(LOSCFG_KERNEL_LMS) 135 .size __memset, Lfunc_end - __memset 136#else 137 .size memset, Lfunc_end - memset 138#endif 139 .cantunwind 140 .fnend @ -- End function 141