1/* 2 * Copyright (c) 2021-2021 Huawei Device Co., Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 7 * 1. Redistributions of source code must retain the above copyright notice, this list of 8 * conditions and the following disclaimer. 9 * 10 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 11 * of conditions and the following disclaimer in the documentation and/or other materials 12 * provided with the distribution. 13 * 14 * 3. Neither the name of the copyright holder nor the names of its contributors may be used 15 * to endorse or promote products derived from this software without specific prior written 16 * permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 20 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 .syntax unified 32 .arch armv7-a 33 .fpu neon 34 .globl memcmp @ -- Begin function memcmp 35 .p2align 2 36 .type memcmp,%function 37 .code 32 @memcmp 38memcmp: 39 @ r0 = str1 40 @ r1 = str2 41 @ r2 = count 42 .fnstart 43 push {r4, r5, r6, r7, lr} 44 pld [r0, #0] 45 pld [r1, #0] 46 /** 47 * if (str1 == str2) || (n == 0) return; 48 */ 49 cmp r0, r1 50 cmpne r2, #0 51 beq Lreturn_0 52 /** 53 * Determine whether the first byte is different. 54 */ 55 ldrb r3, [r0] @ r3 = *str1 56 ldrb r4, [r1] @ r4 = *str2 57 pld [r0, #64] 58 pld [r1, #64] 59 cmp r3, r4 60 subne r0, r3, r4 61 bne Lreturn 62 63/** 64 * Comparing 32 bytes each time, using floating-point registers to improve efficiency. 65 */ 66L32_byte_cmp: 67 cmp r2, #32 68 blo L16_byte_cmp 69 sub r2, r2, #32 70 vld1.8 {d0 - d3}, [r0]! 71 vld1.8 {d4 - d7}, [r1]! 72 vsub.i8 q0, q0, q2 @ q0: Difference of the first 16 bytes 73 vsub.i8 q1, q1, q3 @ q1: Difference of the last 16 bytes 74 pld [r0, #64] 75 pld [r1, #64] 76 77 vorr d4, d0, d1 @ d4: Save the result of calculating whether the first 16 bytes are equal. 78 vorr d5, d2, d3 @ d5: Save the result of calculating whether the last 16 bytes are equal. 79 vorr d6, d4, d5 @ d6: Save the result of 32 bytes calculation whether they are equal. 80 vmov r3, r4, d6 81 orr r5, r3, r4 82 cmp r5, #0 83 beq L32_byte_cmp 84 85/** 86 * Going to the diff branch shows that a certain byte must be different at this time. 87 * We use r3 to indicate whether the first half of the multibytes are equal, 88 * and r4 to indicate whether the second half of the multibytes are equal. 89 */ 90L32_byte_diff: 91 vmov r3, r4, d4 92 orr r3, r3, r4 93 /** 94 * Adjust the two pointers back. 95 */ 96 sub r0, #32 97 sub r1, #32 98 cmp r3, #0 99 addeq r0, #16 100 addeq r1, #16 101 beq L16_byte_diff_back 102 vmov r3, r4, d0 103 vmov r5, r6, d1 104 b L16_byte_diff 105 106L16_byte_diff_back: 107 vmov r3, r4, d2 108 vmov r5, r6, d3 109 110L16_byte_diff: 111 orr r7, r3, r4 112 cmp r7, #0 113 addeq r0, #8 114 addeq r1, #8 115 beq L8_byte_diff_back 116 b L8_byte_diff 117 118L8_byte_diff_back: 119 mov r3, r5 120 mov r4, r6 121 122L8_byte_diff: 123 cmp r3, #0 124 addeq r0, #4 125 addeq r1, #4 126 beq L4_byte_diff 127 128L4_byte_diff: 129 ldrb r5, [r0], #1 130 ldrb r6, [r1], #1 131 subs r5, r5, r6 132 beq L4_byte_diff 133 mov r0, r5 134 b Lreturn 135 136/** 137 * The dichotomy handles the case of less than 32 bytes. 138 */ 139L16_byte_cmp: 140 cmp r2, #16 141 blo L8_byte_cmp 142 sub r2, r2, #16 143 vld1.8 {d0 - d1}, [r0]! 144 vld1.8 {d4 - d5}, [r1]! 145 vsub.i8 q0, q0, q2 146 pld [r0, #64] 147 pld [r1, #64] 148 149 vorr d4, d0, d1 150 vmov r3, r4, d4 151 orr r3, r3, r4 152 cmp r3, #0 153 beq L8_byte_cmp 154 sub r0, #16 155 sub r1, #16 156 vmov r3, r4, d0 157 vmov r5, r6, d1 158 b L16_byte_diff 159 160L8_byte_cmp: 161 cmp r2, #8 162 blo L4_byte_cmp 163 sub r2, r2, #8 164 vld1.8 {d0}, [r0]! 165 vld1.8 {d4}, [r1]! 166 vsub.i8 d0, d0, d4 167 168 vmov r3, r4, d0 169 orr r7, r3, r4 170 cmp r7, #0 171 beq L4_byte_cmp 172 sub r0, #8 173 sub r1, #8 174 b L8_byte_diff 175 176L4_byte_cmp: 177 cmp r2, #4 178 blo Lless_4_byte_cmp 179 sub r2, r2, #4 180 ldr r3, [r0], #4 181 ldr r4, [r1], #4 182 cmp r3, r4 183 beq Lless_4_byte_cmp 184 sub r0, #4 185 sub r1, #4 186 b L4_byte_diff 187 188Lless_4_byte_cmp: 189 cmp r2, #0 190 beq Lreturn_0 191 sub r2, r2, #1 192 ldrb r3, [r0], #1 193 ldrb r4, [r1], #1 194 sub r5, r3, r4 195 cmp r5, #0 196 movne r0, r5 197 bne Lreturn 198 b Lless_4_byte_cmp 199 200Lreturn_0: 201 mov r0, #0 202Lreturn: 203 pop {r4, r5, r6, r7, pc} 204Lfunc_end: 205 .size memcmp, Lfunc_end - memcmp 206 .cantunwind 207 .fnend @ -- End function