1/* 2 * Copyright (c) 2017 ARM Ltd 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the company may not be used to endorse or promote 14 * products derived from this software without specific prior written 15 * permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* Assumptions: 30 * 31 * ARMv8-a, AArch64, unaligned accesses. 32 */ 33 34#include <private/bionic_asm.h> 35 36#define L(l) .L ## l 37 38/* Parameters and result. */ 39#define src1 x0 40#define src2 x1 41#define limit x2 42#define result w0 43 44/* Internal variables. */ 45#define data1 x3 46#define data1w w3 47#define data1h x4 48#define data2 x5 49#define data2w w5 50#define data2h x6 51#define tmp1 x7 52#define tmp2 x8 53 54/* Small inputs of less than 8 bytes are handled separately. This allows the 55 main code to be speed up using unaligned loads since there are now at least 56 8 bytes to be compared. If the first 8 bytes are equal, align src1. 57 This ensures each iteration does at most one unaligned access even if both 58 src1 and src2 are unaligned, and mutually aligned inputs behave as if 59 aligned. After the main loop, process the last 16 bytes using unaligned 60 accesses. */ 61 62ENTRY(memcmp) 63.p2align 6 64 subs limit, limit, 8 65 b.lo L(less8) 66 67 /* Limit >= 8, so check first 8 bytes using unaligned loads. */ 68 ldr data1, [src1], 8 69 ldr data2, [src2], 8 70 cmp data1, data2 71 b.ne L(return) 72 73 subs limit, limit, 8 74 b.gt L(more16) 75 76 ldr data1, [src1, limit] 77 ldr data2, [src2, limit] 78 b L(return) 79 80L(more16): 81 ldr data1, [src1], 8 82 ldr data2, [src2], 8 83 cmp data1, data2 84 bne L(return) 85 86 /* Jump directly to comparing the last 16 bytes for 32 byte (or less) 87 strings. */ 88 subs limit, limit, 16 89 b.ls L(last_bytes) 90 91 /* We overlap loads between 0-32 bytes at either side of SRC1 when we 92 try to align, so limit it only to strings larger than 128 bytes. */ 93 cmp limit, 96 94 b.ls L(loop16) 95 96 /* Align src1 and adjust src2 with bytes not yet done. */ 97 and tmp1, src1, 15 98 add limit, limit, tmp1 99 sub src1, src1, tmp1 100 sub src2, src2, tmp1 101 102 /* Loop performing 16 bytes per iteration using aligned src1. 103 Limit is pre-decremented by 16 and must be larger than zero. 104 Exit if <= 16 bytes left to do or if the data is not equal. */ 105 .p2align 4 106L(loop16): 107 ldp data1, data1h, [src1], 16 108 ldp data2, data2h, [src2], 16 109 subs limit, limit, 16 110 ccmp data1, data2, 0, hi 111 ccmp data1h, data2h, 0, eq 112 b.eq L(loop16) 113 114 cmp data1, data2 115 bne L(return) 116 mov data1, data1h 117 mov data2, data2h 118 cmp data1, data2 119 bne L(return) 120 121 /* Compare last 1-16 bytes using unaligned access. */ 122L(last_bytes): 123 add src1, src1, limit 124 add src2, src2, limit 125 ldp data1, data1h, [src1] 126 ldp data2, data2h, [src2] 127 cmp data1, data2 128 bne L(return) 129 mov data1, data1h 130 mov data2, data2h 131 cmp data1, data2 132 133 /* Compare data bytes and set return value to 0, -1 or 1. */ 134L(return): 135#ifndef __AARCH64EB__ 136 rev data1, data1 137 rev data2, data2 138#endif 139 cmp data1, data2 140L(ret_eq): 141 cset result, ne 142 cneg result, result, lo 143 ret 144 145 .p2align 4 146 /* Compare up to 8 bytes. Limit is [-8..-1]. */ 147L(less8): 148 adds limit, limit, 4 149 b.lo L(less4) 150 ldr data1w, [src1], 4 151 ldr data2w, [src2], 4 152 cmp data1w, data2w 153 b.ne L(return) 154 sub limit, limit, 4 155L(less4): 156 adds limit, limit, 4 157 beq L(ret_eq) 158L(byte_loop): 159 ldrb data1w, [src1], 1 160 ldrb data2w, [src2], 1 161 subs limit, limit, 1 162 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 163 b.eq L(byte_loop) 164 sub result, data1w, data2w 165 ret 166 167END(memcmp) 168