• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (c) 2021-2021 Huawei Device Co., Ltd. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this list of
8 *    conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
11 *    of conditions and the following disclaimer in the documentation and/or other materials
12 *    provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its contributors may be used
15 *    to endorse or promote products derived from this software without specific prior written
16 *    permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31    .syntax     unified
32    .arch       armv7-a
33    .fpu        neon
34    .globl      memset          @ -- Begin function memset
35    .p2align    2
36    .type       memset,%function
37memset:
38    @ r0 = address
39    @ r1 = char
40    @ r2 = count
41    @ returns original address in r0
42    .fnstart
43
44    push    {r4}
45    cmp     r2, #0
46    beq     Lreturn
47    vdup.8  q0, r1
48    mov     r4, r0              @ r4 = r0 = address
49
50L64_byte_alignment:
51    ands    r3, r0, #7
52    beq     L64_byte_aligned
53    rsb     r3, r3, #8          @ r3 = unalignedCnt = 8 - (address % 7)
54    cmp     r2, r3
55    movlo   r3, r2
56    sub     r2, r2, r3
57
58Lloop1:
59    strb    r1, [r4], #1
60    subs    r3, r3, #1
61    bgt     Lloop1
62
63/**
64 * Set 64 bytes each time, and use floating-point registers to improve efficiency.
65 */
66L64_byte_aligned:
67    vmov    q1, q0
68    vmov    q2, q0
69    cmp     r2, #64
70    blo     L32_byte_aligned
71    vmov    q3, q0
72    sub     r2, r2, #64
73Lloop2:
74    vstmia  r4!, {d0 - d7}
75    subs    r2, r2, #64
76    bgt     Lloop2
77
78/**
79 * The dichotomy handles the case of less than 64 bytes,
80 * and the front will subtract 64 more, and you need to make it up at this time.
81 */
82    add     r2, r2, #64
83L32_byte_aligned:
84    cmp     r2, #0
85    beq     Lreturn
86    cmp     r2, #32
87    blo     L16_byte_aligned
88    sub     r2, r2, #32
89    vstmia  r4!, {d0 - d3}
90
91L16_byte_aligned:
92    cmp     r2, #0
93    beq     Lreturn
94    cmp     r2, #16
95    blo     L8_byte_aligned
96    sub     r2, r2, #16
97    vstmia  r4!, {d0 - d1}
98
99L8_byte_aligned:
100    cmp     r2, #0
101    beq     Lreturn
102    cmp     r2, #8
103    blo     L4_byte_aligned
104    sub     r2, r2, #8
105    vstmia  r4!, {d0}
106
107L4_byte_aligned:
108    cmp     r2, #0
109    beq     Lreturn
110    cmp     r2, #4
111    blo     Lless_4_byte
112    sub     r2, r2, #4
113    vst1.32 {d0[0]}, [r4]!
114
115Lless_4_byte:
116    cmp     r2, #0
117    beq     Lreturn
118    strb    r1, [r4], #1
119    sub     r2, r2, #1
120    b       Lless_4_byte
121
122Lreturn:
123    pop     {r4}
124    bx      lr
125Lfunc_end:
126    .size memset, Lfunc_end - memset
127    .cantunwind
128    .fnend                      @ -- End function
129