1/* 2 * strchr - find a character in a string 3 * 4 * Copyright (c) 2020-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD. 11 * MTE compatible. 12 */ 13 14#include "asmdefs.h" 15 16#define srcin x0 17#define chrin w1 18#define result x0 19 20#define src x2 21#define tmp1 x1 22#define tmp2 x3 23 24#define vrepchr v0 25#define vdata v1 26#define qdata q1 27#define vhas_nul v2 28#define vhas_chr v3 29#define vrepmask v4 30#define vend v5 31#define dend d5 32 33/* Core algorithm. 34 35 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits 36 per byte. Bits 0-1 are set if the relevant byte matched the requested 37 character, bits 2-3 are set if the byte is NUL or matched. Count trailing 38 zeroes gives the position of the matching byte if it is a multiple of 4. 39 If it is not a multiple of 4, there was no match. */ 40 41ENTRY (__strchr_aarch64_mte) 42 PTR_ARG (0) 43 bic src, srcin, 15 44 dup vrepchr.16b, chrin 45 ld1 {vdata.16b}, [src] 46 movi vrepmask.16b, 0x33 47 cmeq vhas_nul.16b, vdata.16b, 0 48 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 49 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 50 lsl tmp2, srcin, 2 51 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 52 fmov tmp1, dend 53 lsr tmp1, tmp1, tmp2 54 cbz tmp1, L(loop) 55 56 rbit tmp1, tmp1 57 clz tmp1, tmp1 58 /* Tmp1 is an even multiple of 2 if the target character was 59 found first. Otherwise we've found the end of string. */ 60 tst tmp1, 2 61 add result, srcin, tmp1, lsr 2 62 csel result, result, xzr, eq 63 ret 64 65 .p2align 4 66L(loop): 67 ldr qdata, [src, 16] 68 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 69 cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b 70 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 71 fmov tmp1, dend 72 cbnz tmp1, L(end) 73 ldr qdata, [src, 32]! 74 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 75 cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b 76 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 77 fmov tmp1, dend 78 cbz tmp1, L(loop) 79 sub src, src, 16 80L(end): 81 82#ifdef __AARCH64EB__ 83 bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b 84 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 85 fmov tmp1, dend 86#else 87 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 88 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 89 fmov tmp1, dend 90 rbit tmp1, tmp1 91#endif 92 add src, src, 16 93 clz tmp1, tmp1 94 /* Tmp1 is a multiple of 4 if the target character was found. */ 95 tst tmp1, 2 96 add result, src, tmp1, lsr 2 97 csel result, result, xzr, eq 98 ret 99 100END (__strchr_aarch64_mte) 101 102