1/* 2 * strcmp - compare two strings 3 * 4 * Copyright (c) 2012-2020, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8 9/* Assumptions: 10 * 11 * ARMv8-a, AArch64. 12 * MTE compatible. 13 */ 14 15#include "../asmdefs.h" 16 17#define REP8_01 0x0101010101010101 18#define REP8_7f 0x7f7f7f7f7f7f7f7f 19 20#define src1 x0 21#define src2 x1 22#define result x0 23 24#define data1 x2 25#define data1w w2 26#define data2 x3 27#define data2w w3 28#define has_nul x4 29#define diff x5 30#define off1 x5 31#define syndrome x6 32#define tmp x6 33#define data3 x7 34#define zeroones x8 35#define shift x9 36#define off2 x10 37 38/* On big-endian early bytes are at MSB and on little-endian LSB. 39 LS_FW means shifting towards early bytes. */ 40#ifdef __AARCH64EB__ 41# define LS_FW lsl 42#else 43# define LS_FW lsr 44#endif 45 46/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 47 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 48 can be done in parallel across the entire word. 49 Since carry propagation makes 0x1 bytes before a NUL byte appear 50 NUL too in big-endian, byte-reverse the data before the NUL check. */ 51 52 53ENTRY (__strcmp_aarch64_mte) 54 PTR_ARG (0) 55 PTR_ARG (1) 56 sub off2, src2, src1 57 mov zeroones, REP8_01 58 and tmp, src1, 7 59 tst off2, 7 60 b.ne L(misaligned8) 61 cbnz tmp, L(mutual_align) 62 63 .p2align 4 64 65L(loop_aligned): 66 ldr data2, [src1, off2] 67 ldr data1, [src1], 8 68L(start_realigned): 69#ifdef __AARCH64EB__ 70 rev tmp, data1 71 sub has_nul, tmp, zeroones 72 orr tmp, tmp, REP8_7f 73#else 74 sub has_nul, data1, zeroones 75 orr tmp, data1, REP8_7f 76#endif 77 bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */ 78 ccmp data1, data2, 0, eq 79 b.eq L(loop_aligned) 80#ifdef __AARCH64EB__ 81 rev has_nul, has_nul 82#endif 83 eor diff, data1, data2 84 orr syndrome, diff, has_nul 85L(end): 86#ifndef __AARCH64EB__ 87 rev syndrome, syndrome 88 rev data1, data1 89 rev data2, data2 90#endif 91 clz shift, syndrome 92 /* The most-significant-non-zero bit of the syndrome marks either the 93 first bit that is different, or the top bit of the first zero byte. 94 Shifting left now will bring the critical information into the 95 top bits. */ 96 lsl data1, data1, shift 97 lsl data2, data2, shift 98 /* But we need to zero-extend (char is unsigned) the value and then 99 perform a signed 32-bit subtraction. */ 100 lsr data1, data1, 56 101 sub result, data1, data2, lsr 56 102 ret 103 104 .p2align 4 105 106L(mutual_align): 107 /* Sources are mutually aligned, but are not currently at an 108 alignment boundary. Round down the addresses and then mask off 109 the bytes that precede the start point. */ 110 bic src1, src1, 7 111 ldr data2, [src1, off2] 112 ldr data1, [src1], 8 113 neg shift, src2, lsl 3 /* Bits to alignment -64. */ 114 mov tmp, -1 115 LS_FW tmp, tmp, shift 116 orr data1, data1, tmp 117 orr data2, data2, tmp 118 b L(start_realigned) 119 120L(misaligned8): 121 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 122 checking to make sure that we don't access beyond the end of SRC2. */ 123 cbz tmp, L(src1_aligned) 124L(do_misaligned): 125 ldrb data1w, [src1], 1 126 ldrb data2w, [src2], 1 127 cmp data1w, 0 128 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 129 b.ne L(done) 130 tst src1, 7 131 b.ne L(do_misaligned) 132 133L(src1_aligned): 134 neg shift, src2, lsl 3 135 bic src2, src2, 7 136 ldr data3, [src2], 8 137#ifdef __AARCH64EB__ 138 rev data3, data3 139#endif 140 lsr tmp, zeroones, shift 141 orr data3, data3, tmp 142 sub has_nul, data3, zeroones 143 orr tmp, data3, REP8_7f 144 bics has_nul, has_nul, tmp 145 b.ne L(tail) 146 147 sub off1, src2, src1 148 149 .p2align 4 150 151L(loop_unaligned): 152 ldr data3, [src1, off1] 153 ldr data2, [src1, off2] 154#ifdef __AARCH64EB__ 155 rev data3, data3 156#endif 157 sub has_nul, data3, zeroones 158 orr tmp, data3, REP8_7f 159 ldr data1, [src1], 8 160 bics has_nul, has_nul, tmp 161 ccmp data1, data2, 0, eq 162 b.eq L(loop_unaligned) 163 164 lsl tmp, has_nul, shift 165#ifdef __AARCH64EB__ 166 rev tmp, tmp 167#endif 168 eor diff, data1, data2 169 orr syndrome, diff, tmp 170 cbnz syndrome, L(end) 171L(tail): 172 ldr data1, [src1] 173 neg shift, shift 174 lsr data2, data3, shift 175 lsr has_nul, has_nul, shift 176#ifdef __AARCH64EB__ 177 rev data2, data2 178 rev has_nul, has_nul 179#endif 180 eor diff, data1, data2 181 orr syndrome, diff, has_nul 182 b L(end) 183 184L(done): 185 sub result, data1, data2 186 ret 187 188END (__strcmp_aarch64_mte) 189 190