• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (c) 2022-2022 Huawei Device Co., Ltd. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this list of
8 *    conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
11 *    of conditions and the following disclaimer in the documentation and/or other materials
12 *    provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its contributors may be used
15 *    to endorse or promote products derived from this software without specific prior written
16 *    permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#ifdef  __ARM_FEATURE_MVE
32
33    .syntax     unified
34    .globl      memcmp          @ -- Begin function memcmp
35    .p2align    2
36    .type       memcmp,%function
37memcmp:
38    @ r0 = str1
39    @ r1 = str2
40    @ r2 = count
41    .fnstart
42    push    {r4, r5, r6, r7, r8, lr}
43    pld     [r0, #0]
44    pld     [r1, #0]
45
46    /**
47     * if (str1 == str2) || (n == 0) return;
48     */
49    cmp     r0, r1
50    it      ne
51    cmpne   r2, #0
52    beq     Lreturn_0
53
54    /**
55     * Determine whether the first byte is different.
56     */
57    ldrb    r3, [r0]        @ r3 = *str1
58    ldrb    r4, [r1]        @ r4 = *str2
59    pld     [r0, #64]
60    pld     [r1, #64]
61    cmp     r3, r4
62    itt     ne
63    subne   r0, r3, r4
64    bne     Lreturn
65
66#ifdef LOSCFG_ARCH_UNALIGNED_EXC
67    /**
68     * Check address alignment.
69     */
70    and     r5, r0, #3
71    and     r6, r1, #3
72    orrs    r5, r6
73    bgt     Lunaligned_cmp
74#endif
75
76/**
77 * Comparing 32 bytes each time, using floating-point registers to improve efficiency.
78 */
79L32_byte_cmp:
80    cmp     r2, #32
81    blo     L16_byte_cmp
82    sub     r2, r2, #32
83    vldmia.32   r0!, {s0 - s7}
84    vldmia.32   r1!, {s8 - s15}
85    vsub.i8     q0, q0, q2      @ q0: Difference of the first 16 bytes
86    vsub.i8     q1, q1, q3      @ q1: Difference of the last 16 bytes
87    pld     [r0, #64]
88    pld     [r1, #64]
89
90    /**
91     * Swap d1 and d2 so that after bitwise OR,
92     * d4 represents the result of the first half of the comparison,
93     * d5 represents the result of the second half of the comparison.
94     */
95    vmov    d6, d1
96    vmov    d1, d2
97    vmov    d2, d6
98    vorr    q2, q0, q1
99
100    /**
101     * Determine whether q2 is zero.
102     */
103    vmov    r3, r4, d4
104    orr     r5, r3, r4
105    vmov    r3, r4, d5
106    orr     r6, r3, r4
107    orr     r5, r5, r6
108    cmp     r5, #0
109    beq     L32_byte_cmp
110
111L32_byte_diff_pre:
112    /**
113     * Restore the swapped d1 and d2.
114     */
115    vmov    d6, d1
116    vmov    d1, d2
117    vmov    d2, d6
118
119/**
120 * Going to the diff branch shows that a certain byte must be different at this time.
121 * We use r3 to indicate whether the first half of the multibytes are equal,
122 * and r4 to indicate whether the second half of the multibytes are equal.
123 */
124L32_byte_diff:
125    vmov    r3, r4, d4
126    orr     r3, r3, r4
127    /**
128     * Adjust the two pointers back.
129     */
130    sub     r0, #32
131    sub     r1, #32
132    cmp     r3, #0
133    ittt    eq
134    addeq   r0, #16
135    addeq   r1, #16
136    beq     L16_byte_diff_back
137    vmov    r3, r4, d0
138    vmov    r5, r6, d1
139    b       L16_byte_diff
140
141L16_byte_diff_back:
142    vmov    r3, r4, d2
143    vmov    r5, r6, d3
144
145L16_byte_diff:
146    orr     r7, r3, r4
147    cmp     r7, #0
148    ittt    eq
149    addeq   r0, #8
150    addeq   r1, #8
151    beq     L8_byte_diff_back
152    b       L8_byte_diff
153
154L8_byte_diff_back:
155    mov     r3, r5
156    mov     r4, r6
157
158L8_byte_diff:
159    cmp     r3, #0
160    ittt    eq
161    addeq   r0, #4
162    addeq   r1, #4
163    beq     L4_byte_diff
164
165Lunaligned_cmp:
166L4_byte_diff:
167    ldrb    r5, [r0], #1
168    ldrb    r6, [r1], #1
169    subs    r5, r5, r6
170    beq     L4_byte_diff
171    mov     r0, r5
172    b       Lreturn
173
174/**
175 * The dichotomy handles the case of less than 32 bytes.
176 */
177L16_byte_cmp:
178    cmp     r2, #16
179    blo     L8_byte_cmp
180    sub     r2, r2, #16
181    vldmia.32   r0!, {s0 - s3}
182    vldmia.32   r1!, {s8 - s11}
183    vsub.i8     q0, q0, q2
184    pld     [r0, #64]
185    pld     [r1, #64]
186
187    vmov    r3, r4, d0
188    orr     r5, r3, r4
189    vmov    r3, r4, d1
190    orr     r6, r3, r4
191    orr     r5, r5, r6
192    cmp     r5, #0
193    beq     L8_byte_cmp
194
195    sub     r0, #16
196    sub     r1, #16
197    vmov    r3, r4, d0
198    vmov    r5, r6, d1
199    b       L16_byte_diff
200
201L8_byte_cmp:
202    cmp     r2, #8
203    blo     L4_byte_cmp
204    sub     r2, r2, #8
205    vldmia.32   r0!, {s0 - s1}
206    vldmia.32   r1!, {s8 - s9}
207    vsub.i8     q0, q0, q2
208
209    vmov    r3, r4, d0
210    orr     r7, r3, r4
211    cmp     r7, #0
212    beq     L4_byte_cmp
213    sub     r0, #8
214    sub     r1, #8
215    b       L8_byte_diff
216
217L4_byte_cmp:
218    cmp     r2, #4
219    blo     Lless_4_byte_cmp
220    sub     r2, r2, #4
221    ldr     r3, [r0], #4
222    ldr     r4, [r1], #4
223    cmp     r3, r4
224    beq     Lless_4_byte_cmp
225    sub     r0, #4
226    sub     r1, #4
227    b       L4_byte_diff
228
229Lless_4_byte_cmp:
230    cmp     r2, #0
231    beq     Lreturn_0
232    sub     r2, r2, #1
233    ldrb    r3, [r0], #1
234    ldrb    r4, [r1], #1
235    sub     r5, r3, r4
236    cmp     r5, #0
237    itt     ne
238    movne   r0, r5
239    bne     Lreturn
240    b       Lless_4_byte_cmp
241
242Lreturn_0:
243    mov r0, #0
244Lreturn:
245    pop {r4, r5, r6, r7, r8, pc}
246Lfunc_end:
247    .size memcmp, Lfunc_end - memcmp
248    .cantunwind
249    .fnend                      @ -- End function
250
251#else
252#error 'MVE is not enabled in compile options'
253#endif
254