1/* 2 * Copyright (c) 2022-2022 Huawei Device Co., Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without modification, 5 * are permitted provided that the following conditions are met: 6 * 7 * 1. Redistributions of source code must retain the above copyright notice, this list of 8 * conditions and the following disclaimer. 9 * 10 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 11 * of conditions and the following disclaimer in the documentation and/or other materials 12 * provided with the distribution. 13 * 14 * 3. Neither the name of the copyright holder nor the names of its contributors may be used 15 * to endorse or promote products derived from this software without specific prior written 16 * permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 20 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31#ifdef __ARM_FEATURE_MVE 32 33 .syntax unified 34 .globl memcmp @ -- Begin function memcmp 35 .p2align 2 36 .type memcmp,%function 37memcmp: 38 @ r0 = str1 39 @ r1 = str2 40 @ r2 = count 41 .fnstart 42 push {r4, r5, r6, r7, r8, lr} 43 pld [r0, #0] 44 pld [r1, #0] 45 46 /** 47 * if (str1 == str2) || (n == 0) return; 48 */ 49 cmp r0, r1 50 it ne 51 cmpne r2, #0 52 beq Lreturn_0 53 54 /** 55 * Determine whether the first byte is different. 56 */ 57 ldrb r3, [r0] @ r3 = *str1 58 ldrb r4, [r1] @ r4 = *str2 59 pld [r0, #64] 60 pld [r1, #64] 61 cmp r3, r4 62 itt ne 63 subne r0, r3, r4 64 bne Lreturn 65 66#ifdef LOSCFG_ARCH_UNALIGNED_EXC 67 /** 68 * Check address alignment. 69 */ 70 and r5, r0, #3 71 and r6, r1, #3 72 orrs r5, r6 73 bgt Lunaligned_cmp 74#endif 75 76/** 77 * Comparing 32 bytes each time, using floating-point registers to improve efficiency. 78 */ 79L32_byte_cmp: 80 cmp r2, #32 81 blo L16_byte_cmp 82 sub r2, r2, #32 83 vldmia.32 r0!, {s0 - s7} 84 vldmia.32 r1!, {s8 - s15} 85 vsub.i8 q0, q0, q2 @ q0: Difference of the first 16 bytes 86 vsub.i8 q1, q1, q3 @ q1: Difference of the last 16 bytes 87 pld [r0, #64] 88 pld [r1, #64] 89 90 /** 91 * Swap d1 and d2 so that after bitwise OR, 92 * d4 represents the result of the first half of the comparison, 93 * d5 represents the result of the second half of the comparison. 94 */ 95 vmov d6, d1 96 vmov d1, d2 97 vmov d2, d6 98 vorr q2, q0, q1 99 100 /** 101 * Determine whether q2 is zero. 102 */ 103 vmov r3, r4, d4 104 orr r5, r3, r4 105 vmov r3, r4, d5 106 orr r6, r3, r4 107 orr r5, r5, r6 108 cmp r5, #0 109 beq L32_byte_cmp 110 111L32_byte_diff_pre: 112 /** 113 * Restore the swapped d1 and d2. 114 */ 115 vmov d6, d1 116 vmov d1, d2 117 vmov d2, d6 118 119/** 120 * Going to the diff branch shows that a certain byte must be different at this time. 121 * We use r3 to indicate whether the first half of the multibytes are equal, 122 * and r4 to indicate whether the second half of the multibytes are equal. 123 */ 124L32_byte_diff: 125 vmov r3, r4, d4 126 orr r3, r3, r4 127 /** 128 * Adjust the two pointers back. 129 */ 130 sub r0, #32 131 sub r1, #32 132 cmp r3, #0 133 ittt eq 134 addeq r0, #16 135 addeq r1, #16 136 beq L16_byte_diff_back 137 vmov r3, r4, d0 138 vmov r5, r6, d1 139 b L16_byte_diff 140 141L16_byte_diff_back: 142 vmov r3, r4, d2 143 vmov r5, r6, d3 144 145L16_byte_diff: 146 orr r7, r3, r4 147 cmp r7, #0 148 ittt eq 149 addeq r0, #8 150 addeq r1, #8 151 beq L8_byte_diff_back 152 b L8_byte_diff 153 154L8_byte_diff_back: 155 mov r3, r5 156 mov r4, r6 157 158L8_byte_diff: 159 cmp r3, #0 160 ittt eq 161 addeq r0, #4 162 addeq r1, #4 163 beq L4_byte_diff 164 165Lunaligned_cmp: 166L4_byte_diff: 167 ldrb r5, [r0], #1 168 ldrb r6, [r1], #1 169 subs r5, r5, r6 170 beq L4_byte_diff 171 mov r0, r5 172 b Lreturn 173 174/** 175 * The dichotomy handles the case of less than 32 bytes. 176 */ 177L16_byte_cmp: 178 cmp r2, #16 179 blo L8_byte_cmp 180 sub r2, r2, #16 181 vldmia.32 r0!, {s0 - s3} 182 vldmia.32 r1!, {s8 - s11} 183 vsub.i8 q0, q0, q2 184 pld [r0, #64] 185 pld [r1, #64] 186 187 vmov r3, r4, d0 188 orr r5, r3, r4 189 vmov r3, r4, d1 190 orr r6, r3, r4 191 orr r5, r5, r6 192 cmp r5, #0 193 beq L8_byte_cmp 194 195 sub r0, #16 196 sub r1, #16 197 vmov r3, r4, d0 198 vmov r5, r6, d1 199 b L16_byte_diff 200 201L8_byte_cmp: 202 cmp r2, #8 203 blo L4_byte_cmp 204 sub r2, r2, #8 205 vldmia.32 r0!, {s0 - s1} 206 vldmia.32 r1!, {s8 - s9} 207 vsub.i8 q0, q0, q2 208 209 vmov r3, r4, d0 210 orr r7, r3, r4 211 cmp r7, #0 212 beq L4_byte_cmp 213 sub r0, #8 214 sub r1, #8 215 b L8_byte_diff 216 217L4_byte_cmp: 218 cmp r2, #4 219 blo Lless_4_byte_cmp 220 sub r2, r2, #4 221 ldr r3, [r0], #4 222 ldr r4, [r1], #4 223 cmp r3, r4 224 beq Lless_4_byte_cmp 225 sub r0, #4 226 sub r1, #4 227 b L4_byte_diff 228 229Lless_4_byte_cmp: 230 cmp r2, #0 231 beq Lreturn_0 232 sub r2, r2, #1 233 ldrb r3, [r0], #1 234 ldrb r4, [r1], #1 235 sub r5, r3, r4 236 cmp r5, #0 237 itt ne 238 movne r0, r5 239 bne Lreturn 240 b Lless_4_byte_cmp 241 242Lreturn_0: 243 mov r0, #0 244Lreturn: 245 pop {r4, r5, r6, r7, r8, pc} 246Lfunc_end: 247 .size memcmp, Lfunc_end - memcmp 248 .cantunwind 249 .fnend @ -- End function 250 251#else 252#error 'MVE is not enabled in compile options' 253#endif 254