• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (c) 2021-2021 Huawei Device Co., Ltd. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice, this list of
8 *    conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
11 *    of conditions and the following disclaimer in the documentation and/or other materials
12 *    provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its contributors may be used
15 *    to endorse or promote products derived from this software without specific prior written
16 *    permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31    .syntax     unified
32    .arch       armv7-a
33    .fpu        neon
34    .globl      memcmp          @ -- Begin function memcmp
35    .p2align    2
36    .type       memcmp,%function
37    .code       32              @memcmp
38memcmp:
39    @ r0 = str1
40    @ r1 = str2
41    @ r2 = count
42    .fnstart
43    push    {r4, r5, r6, r7, lr}
44    pld     [r0, #0]
45    pld     [r1, #0]
46    /**
47     * if (str1 == str2) || (n == 0) return;
48     */
49    cmp     r0, r1
50    cmpne   r2, #0
51    beq     Lreturn_0
52    /**
53     * Determine whether the first byte is different.
54     */
55    ldrb    r3, [r0]        @ r3 = *str1
56    ldrb    r4, [r1]        @ r4 = *str2
57    pld     [r0, #64]
58    pld     [r1, #64]
59    cmp     r3, r4
60    subne   r0, r3, r4
61    bne     Lreturn
62
63/**
64 * Comparing 32 bytes each time, using floating-point registers to improve efficiency.
65 */
66L32_byte_cmp:
67    cmp     r2, #32
68    blo     L16_byte_cmp
69    sub     r2, r2, #32
70    vld1.8      {d0 - d3}, [r0]!
71    vld1.8      {d4 - d7}, [r1]!
72    vsub.i8     q0, q0, q2      @ q0: Difference of the first 16 bytes
73    vsub.i8     q1, q1, q3      @ q1: Difference of the last 16 bytes
74    pld     [r0, #64]
75    pld     [r1, #64]
76
77    vorr    d4, d0, d1          @ d4: Save the result of calculating whether the first 16 bytes are equal.
78    vorr    d5, d2, d3          @ d5: Save the result of calculating whether the last 16 bytes are equal.
79    vorr    d6, d4, d5          @ d6: Save the result of 32 bytes calculation whether they are equal.
80    vmov    r3, r4, d6
81    orr     r5, r3, r4
82    cmp     r5, #0
83    beq     L32_byte_cmp
84
85/**
86 * Going to the diff branch shows that a certain byte must be different at this time.
87 * We use r3 to indicate whether the first half of the multibytes are equal,
88 * and r4 to indicate whether the second half of the multibytes are equal.
89 */
90L32_byte_diff:
91    vmov    r3, r4, d4
92    orr     r3, r3, r4
93    /**
94     * Adjust the two pointers back.
95     */
96    sub     r0, #32
97    sub     r1, #32
98    cmp     r3, #0
99    addeq   r0, #16
100    addeq   r1, #16
101    beq     L16_byte_diff_back
102    vmov    r3, r4, d0
103    vmov    r5, r6, d1
104    b       L16_byte_diff
105
106L16_byte_diff_back:
107    vmov    r3, r4, d2
108    vmov    r5, r6, d3
109
110L16_byte_diff:
111    orr     r7, r3, r4
112    cmp     r7, #0
113    addeq   r0, #8
114    addeq   r1, #8
115    beq     L8_byte_diff_back
116    b       L8_byte_diff
117
118L8_byte_diff_back:
119    mov     r3, r5
120    mov     r4, r6
121
122L8_byte_diff:
123    cmp     r3, #0
124    addeq   r0, #4
125    addeq   r1, #4
126    beq     L4_byte_diff
127
128L4_byte_diff:
129    ldrb    r5, [r0], #1
130    ldrb    r6, [r1], #1
131    subs    r5, r5, r6
132    beq     L4_byte_diff
133    mov     r0, r5
134    b       Lreturn
135
136/**
137 * The dichotomy handles the case of less than 32 bytes.
138 */
139L16_byte_cmp:
140    cmp     r2, #16
141    blo     L8_byte_cmp
142    sub     r2, r2, #16
143    vld1.8      {d0 - d1}, [r0]!
144    vld1.8      {d4 - d5}, [r1]!
145    vsub.i8     q0, q0, q2
146    pld     [r0, #64]
147    pld     [r1, #64]
148
149    vorr    d4, d0, d1
150    vmov    r3, r4, d4
151    orr     r3, r3, r4
152    cmp     r3, #0
153    beq     L8_byte_cmp
154    sub     r0, #16
155    sub     r1, #16
156    vmov    r3, r4, d0
157    vmov    r5, r6, d1
158    b       L16_byte_diff
159
160L8_byte_cmp:
161    cmp     r2, #8
162    blo     L4_byte_cmp
163    sub     r2, r2, #8
164    vld1.8      {d0}, [r0]!
165    vld1.8      {d4}, [r1]!
166    vsub.i8     d0, d0, d4
167
168    vmov    r3, r4, d0
169    orr     r7, r3, r4
170    cmp     r7, #0
171    beq     L4_byte_cmp
172    sub     r0, #8
173    sub     r1, #8
174    b       L8_byte_diff
175
176L4_byte_cmp:
177    cmp     r2, #4
178    blo     Lless_4_byte_cmp
179    sub     r2, r2, #4
180    ldr     r3, [r0], #4
181    ldr     r4, [r1], #4
182    cmp     r3, r4
183    beq     Lless_4_byte_cmp
184    sub     r0, #4
185    sub     r1, #4
186    b       L4_byte_diff
187
188Lless_4_byte_cmp:
189    cmp     r2, #0
190    beq     Lreturn_0
191    sub     r2, r2, #1
192    ldrb    r3, [r0], #1
193    ldrb    r4, [r1], #1
194    sub     r5, r3, r4
195    cmp     r5, #0
196    movne   r0, r5
197    bne     Lreturn
198    b       Lless_4_byte_cmp
199
200Lreturn_0:
201    mov r0, #0
202Lreturn:
203    pop {r4, r5, r6, r7, pc}
204Lfunc_end:
205    .size memcmp, Lfunc_end - memcmp
206    .cantunwind
207    .fnend                      @ -- End function