1// Copyright (c) 2010-2011, Linaro Limited 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions 6// are met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// 11// * Redistributions in binary form must reproduce the above copyright 12// notice, this list of conditions and the following disclaimer in the 13// documentation and/or other materials provided with the distribution. 14// 15// * Neither the name of Linaro Limited nor the names of its 16// contributors may be used to endorse or promote products derived 17// from this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30// 31 32// 33// Written by Dave Gilbert <david.gilbert@linaro.org> 34// 35// This memchr routine is optimised on a Cortex-A9 and should work on 36// all ARMv7 processors. It has a fast past for short sizes, and has 37// an optimised path for large data sets; the worst case is finding the 38// match early in a large data set. 39// 40 41 42// 2011-02-07 david.gilbert@linaro.org 43// Extracted from local git a5b438d861 44// 2011-07-14 david.gilbert@linaro.org 45// Import endianness fix from local git ea786f1b 46// 2011-12-07 david.gilbert@linaro.org 47// Removed unneeded cbz from align loop 48 49// this lets us check a flag in a 00/ff byte easily in either endianness 50#define CHARTSTMASK(c) 1<<(c*8) 51 52 .text 53 .thumb 54 .syntax unified 55 56 .type ASM_PFX(InternalMemScanMem8), %function 57ASM_GLOBAL ASM_PFX(InternalMemScanMem8) 58ASM_PFX(InternalMemScanMem8): 59 // r0 = start of memory to scan 60 // r1 = length 61 // r2 = character to look for 62 // returns r0 = pointer to character or NULL if not found 63 uxtb r2, r2 // Don't think we can trust the caller to actually pass a char 64 65 cmp r1, #16 // If it's short don't bother with anything clever 66 blt 20f 67 68 tst r0, #7 // If it's already aligned skip the next bit 69 beq 10f 70 71 // Work up to an aligned point 725: 73 ldrb r3, [r0],#1 74 subs r1, r1, #1 75 cmp r3, r2 76 beq 50f // If it matches exit found 77 tst r0, #7 78 bne 5b // If not aligned yet then do next byte 79 8010: 81 // At this point, we are aligned, we know we have at least 8 bytes to work with 82 push {r4-r7} 83 orr r2, r2, r2, lsl #8 // expand the match word across to all bytes 84 orr r2, r2, r2, lsl #16 85 bic r4, r1, #7 // Number of double words to work with 86 mvns r7, #0 // all F's 87 movs r3, #0 88 8915: 90 ldmia r0!, {r5,r6} 91 subs r4, r4, #8 92 eor r5, r5, r2 // Get it so that r5,r6 have 00's where the bytes match the target 93 eor r6, r6, r2 94 uadd8 r5, r5, r7 // Parallel add 0xff - sets the GE bits for anything that wasn't 0 95 sel r5, r3, r7 // bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION 96 uadd8 r6, r6, r7 // Parallel add 0xff - sets the GE bits for anything that wasn't 0 97 sel r6, r5, r7 // chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION 98 cbnz r6, 60f 99 bne 15b // (Flags from the subs above) If not run out of bytes then go around again 100 101 pop {r4-r7} 102 and r2, r2, #0xff // Get r2 back to a single character from the expansion above 103 and r1, r1, #7 // Leave the count remaining as the number after the double words have been done 104 10520: 106 cbz r1, 40f // 0 length or hit the end already then not found 107 10821: // Post aligned section, or just a short call 109 ldrb r3, [r0], #1 110 subs r1, r1, #1 111 eor r3, r3, r2 // r3 = 0 if match - doesn't break flags from sub 112 cbz r3, 50f 113 bne 21b // on r1 flags 114 11540: 116 movs r0, #0 // not found 117 bx lr 118 11950: 120 subs r0, r0, #1 // found 121 bx lr 122 12360: // We're here because the fast path found a hit - now we have to track down exactly which word it was 124 // r0 points to the start of the double word after the one that was tested 125 // r5 has the 00/ff pattern for the first word, r6 has the chained value 126 subs r0, r0, #3 127 cmp r5, #0 128 it eq 129 moveq.n r5, r6 // the end is in the 2nd word 130 it ne 131 subne.n r0, r0, #4 // or 2nd byte of 1st word 132 133 // r0 currently points to the 3rd byte of the word containing the hit 134 tst r5, #CHARTSTMASK(0) // 1st character 135 bne 61f 136 adds r0, r0, #1 137 tst r5, #CHARTSTMASK(1) // 2nd character 138 bne 61f 139 adds r0, r0 ,#1 140 tst r5, #(3 << 15) // 2nd & 3rd character 141 // If not the 3rd must be the last one 142 it eq 143 addeq.n r0, r0, #1 144 14561: 146 pop {r4-r7} 147 subs r0, r0, #1 148 bx lr 149