1/* 2 * strchr - find a character in a string 3 * 4 * Copyright (c) 2020, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD. 11 * MTE compatible. 12 */ 13 14#include "../asmdefs.h" 15 16#define srcin x0 17#define chrin w1 18#define result x0 19 20#define src x2 21#define tmp1 x1 22#define wtmp2 w3 23#define tmp3 x3 24 25#define vrepchr v0 26#define vdata v1 27#define qdata q1 28#define vhas_nul v2 29#define vhas_chr v3 30#define vrepmask v4 31#define vrepmask2 v5 32#define vend v6 33#define dend d6 34 35/* Core algorithm. 36 37 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits 38 per byte. For even bytes, bits 0-1 are set if the relevant byte matched the 39 requested character, bits 2-3 are set if the byte is NUL (or matched), and 40 bits 4-7 are not used and must be zero if none of bits 0-3 are set). Odd 41 bytes set bits 4-7 so that adjacent bytes can be merged. Since the bits 42 in the syndrome reflect the order in which things occur in the original 43 string, counting trailing zeros identifies exactly which byte matched. */ 44 45ENTRY (__strchr_aarch64_mte) 46 PTR_ARG (0) 47 bic src, srcin, 15 48 dup vrepchr.16b, chrin 49 ld1 {vdata.16b}, [src] 50 mov wtmp2, 0x3003 51 dup vrepmask.8h, wtmp2 52 cmeq vhas_nul.16b, vdata.16b, 0 53 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 54 mov wtmp2, 0xf00f 55 dup vrepmask2.8h, wtmp2 56 57 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 58 and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b 59 lsl tmp3, srcin, 2 60 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ 61 62 fmov tmp1, dend 63 lsr tmp1, tmp1, tmp3 64 cbz tmp1, L(loop) 65 66 rbit tmp1, tmp1 67 clz tmp1, tmp1 68 /* Tmp1 is an even multiple of 2 if the target character was 69 found first. Otherwise we've found the end of string. */ 70 tst tmp1, 2 71 add result, srcin, tmp1, lsr 2 72 csel result, result, xzr, eq 73 ret 74 75 .p2align 4 76L(loop): 77 ldr qdata, [src, 16]! 78 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 79 cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b 80 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 81 fmov tmp1, dend 82 cbz tmp1, L(loop) 83 84#ifdef __AARCH64EB__ 85 bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b 86 and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b 87 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ 88 fmov tmp1, dend 89#else 90 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 91 and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b 92 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ 93 fmov tmp1, dend 94 rbit tmp1, tmp1 95#endif 96 clz tmp1, tmp1 97 /* Tmp1 is an even multiple of 2 if the target character was 98 found first. Otherwise we've found the end of string. */ 99 tst tmp1, 2 100 add result, src, tmp1, lsr 2 101 csel result, result, xzr, eq 102 ret 103 104END (__strchr_aarch64_mte) 105 106