1/* 2 * strlen - calculate the length of a string. 3 * 4 * Copyright (c) 2020, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD. 11 * MTE compatible. 12 */ 13 14#include "../asmdefs.h" 15 16#define srcin x0 17#define result x0 18 19#define src x1 20#define synd x2 21#define tmp x3 22#define wtmp w3 23#define shift x4 24 25#define data q0 26#define vdata v0 27#define vhas_nul v1 28#define vrepmask v2 29#define vend v3 30#define dend d3 31 32/* Core algorithm: 33 34 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits 35 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the 36 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are 37 set likewise for odd bytes so that adjacent bytes can be merged. Since the 38 bits in the syndrome reflect the order in which things occur in the original 39 string, counting trailing zeros identifies exactly which byte matched. */ 40 41ENTRY (__strlen_aarch64_mte) 42 PTR_ARG (0) 43 bic src, srcin, 15 44 mov wtmp, 0xf00f 45 ld1 {vdata.16b}, [src] 46 dup vrepmask.8h, wtmp 47 cmeq vhas_nul.16b, vdata.16b, 0 48 lsl shift, srcin, 2 49 and vhas_nul.16b, vhas_nul.16b, vrepmask.16b 50 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ 51 fmov synd, dend 52 lsr synd, synd, shift 53 cbz synd, L(loop) 54 55 rbit synd, synd 56 clz result, synd 57 lsr result, result, 2 58 ret 59 60 .p2align 5 61L(loop): 62 ldr data, [src, 16]! 63 cmeq vhas_nul.16b, vdata.16b, 0 64 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 65 fmov synd, dend 66 cbz synd, L(loop) 67 68 and vhas_nul.16b, vhas_nul.16b, vrepmask.16b 69 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ 70 sub result, src, srcin 71 fmov synd, dend 72#ifndef __AARCH64EB__ 73 rbit synd, synd 74#endif 75 clz tmp, synd 76 add result, result, tmp, lsr 2 77 ret 78 79END (__strlen_aarch64_mte) 80 81