1/* 2 * strchrnul - find a character or nul in a string 3 * 4 * Copyright (c) 2020-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD. 11 * MTE compatible. 12 */ 13 14#include "asmdefs.h" 15 16#define srcin x0 17#define chrin w1 18#define result x0 19 20#define src x2 21#define tmp1 x1 22#define tmp2 x3 23 24#define vrepchr v0 25#define vdata v1 26#define qdata q1 27#define vhas_nul v2 28#define vhas_chr v3 29#define vend v4 30#define dend d4 31 32/* 33 Core algorithm: 34 For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits 35 per byte. We take 4 bits of every comparison byte with shift right and narrow 36 by 4 instruction. Since the bits in the nibble mask reflect the order in 37 which things occur in the original string, counting leading zeros identifies 38 exactly which byte matched. */ 39 40ENTRY (__strchrnul_aarch64_mte) 41 PTR_ARG (0) 42 bic src, srcin, 15 43 dup vrepchr.16b, chrin 44 ld1 {vdata.16b}, [src] 45 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 46 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b 47 lsl tmp2, srcin, 2 48 shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ 49 fmov tmp1, dend 50 lsr tmp1, tmp1, tmp2 /* Mask padding bits. */ 51 cbz tmp1, L(loop) 52 53 rbit tmp1, tmp1 54 clz tmp1, tmp1 55 add result, srcin, tmp1, lsr 2 56 ret 57 58 .p2align 4 59L(loop): 60 ldr qdata, [src, 16] 61 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 62 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b 63 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b 64 fmov tmp1, dend 65 cbnz tmp1, L(end) 66 ldr qdata, [src, 32]! 67 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 68 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b 69 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b 70 fmov tmp1, dend 71 cbz tmp1, L(loop) 72 sub src, src, 16 73L(end): 74 shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ 75 add src, src, 16 76 fmov tmp1, dend 77#ifndef __AARCH64EB__ 78 rbit tmp1, tmp1 79#endif 80 clz tmp1, tmp1 81 add result, src, tmp1, lsr 2 82 ret 83 84END (__strchrnul_aarch64_mte) 85 86