1/* 2 * Copyright (C) 2017 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 24 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 27 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31#include <private/bionic_asm.h> 32 33#define PLDOFFS (16) 34#define PLDSIZE (128) /* L2 cache line size */ 35 36 .code 32 37ENTRY(__memcpy_chk) 38 cmp r2, r3 39 bls memcpy 40 41 // Preserve lr for backtrace. 42 push {lr} 43 .cfi_def_cfa_offset 4 44 .cfi_rel_offset lr, 0 45 46 bl __memcpy_chk_fail 47END(__memcpy_chk) 48 49ENTRY(memcpy) 50 push {r0} 51 .cfi_def_cfa_offset 4 52 .cfi_rel_offset r0, 0 53 cmp r2, #4 54 blt .Lneon_lt4 55 cmp r2, #16 56 blt .Lneon_lt16 57 cmp r2, #32 58 blt .Lneon_16 59 cmp r2, #128 60 blt .Lneon_copy_32_a 61 /* Copy blocks of 128-bytes (word-aligned) at a time*/ 62 /* Code below is optimized for PLDSIZE=128 only */ 63 mov r12, r2, lsr #7 64 cmp r12, #PLDOFFS 65 ble .Lneon_copy_128_loop_nopld 66 sub r12, #PLDOFFS 67 pld [r1, #(PLDOFFS-1)*PLDSIZE] 68.Lneon_copy_128_loop_outer: 69 pld [r1, #(PLDOFFS*PLDSIZE)] 70 pld [r1, #(PLDOFFS)*(PLDSIZE)+64] 71 vld1.32 {q0, q1}, [r1]! 72 vld1.32 {q2, q3}, [r1]! 73 vld1.32 {q8, q9}, [r1]! 74 vld1.32 {q10, q11}, [r1]! 75 subs r12, r12, #1 76 vst1.32 {q0, q1}, [r0]! 77 vst1.32 {q2, q3}, [r0]! 78 vst1.32 {q8, q9}, [r0]! 79 vst1.32 {q10, q11}, [r0]! 80 bne .Lneon_copy_128_loop_outer 81 mov r12, #PLDOFFS 82.Lneon_copy_128_loop_nopld: 83 vld1.32 {q0, q1}, [r1]! 84 vld1.32 {q2, q3}, [r1]! 85 vld1.32 {q8, q9}, [r1]! 86 vld1.32 {q10, q11}, [r1]! 87 subs r12, r12, #1 88 vst1.32 {q0, q1}, [r0]! 89 vst1.32 {q2, q3}, [r0]! 90 vst1.32 {q8, q9}, [r0]! 91 vst1.32 {q10, q11}, [r0]! 92 bne .Lneon_copy_128_loop_nopld 93 ands r2, r2, #0x7f 94 beq .Lneon_exit 95 cmp r2, #32 96 blt .Lneon_16 97 nop 98 /* Copy blocks of 32-bytes (word aligned) at a time*/ 99.Lneon_copy_32_a: 100 mov r12, r2, lsr #5 101.Lneon_copy_32_loop_a: 102 vld1.32 {q0,q1}, [r1]! 103 subs r12, r12, #1 104 vst1.32 {q0,q1}, [r0]! 105 bne .Lneon_copy_32_loop_a 106 ands r2, r2, #0x1f 107 beq .Lneon_exit 108.Lneon_16: 109 subs r2, r2, #16 110 blt .Lneon_lt16 111 vld1.32 {q8}, [r1]! 112 vst1.32 {q8}, [r0]! 113 beq .Lneon_exit 114.Lneon_lt16: 115 movs r12, r2, lsl #29 116 bcc .Lneon_skip8 117 ldr r3, [r1], #4 118 ldr r12, [r1], #4 119 str r3, [r0], #4 120 str r12, [r0], #4 121.Lneon_skip8: 122 bpl .Lneon_lt4 123 ldr r3, [r1], #4 124 str r3, [r0], #4 125.Lneon_lt4: 126 movs r2, r2, lsl #31 127 bcc .Lneon_lt2 128 ldrh r3, [r1], #2 129 strh r3, [r0], #2 130.Lneon_lt2: 131 bpl .Lneon_exit 132 ldrb r12, [r1] 133 strb r12, [r0] 134.Lneon_exit: 135 pop {r0} 136 bx lr 137 138END(memcpy) 139