• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *  * Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 *  * Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in
14 *    the documentation and/or other materials provided with the
15 *    distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
24 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
25 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
27 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <private/bionic_asm.h>
32
33#define PLDOFFS (16)
34#define PLDSIZE (128) /* L2 cache line size */
35
36        .code 32
37ENTRY(__memcpy_chk)
38        cmp         r2, r3
39        bls         memcpy
40
41        // Preserve lr for backtrace.
42        push        {lr}
43        .cfi_def_cfa_offset 4
44        .cfi_rel_offset lr, 0
45
46        bl          __memcpy_chk_fail
47END(__memcpy_chk)
48
49ENTRY(memcpy)
50        push            {r0}
51        .cfi_def_cfa_offset 4
52        .cfi_rel_offset r0, 0
53        cmp             r2, #4
54        blt             .Lneon_lt4
55        cmp             r2, #16
56        blt             .Lneon_lt16
57        cmp             r2, #32
58        blt             .Lneon_16
59        cmp              r2, #128
60        blt              .Lneon_copy_32_a
61        /* Copy blocks of 128-bytes (word-aligned) at a time*/
62        /* Code below is optimized for PLDSIZE=128 only */
63        mov             r12, r2, lsr #7
64        cmp             r12, #PLDOFFS
65        ble             .Lneon_copy_128_loop_nopld
66        sub             r12, #PLDOFFS
67        pld             [r1, #(PLDOFFS-1)*PLDSIZE]
68.Lneon_copy_128_loop_outer:
69        pld             [r1, #(PLDOFFS*PLDSIZE)]
70        pld             [r1, #(PLDOFFS)*(PLDSIZE)+64]
71        vld1.32         {q0, q1}, [r1]!
72        vld1.32         {q2, q3}, [r1]!
73        vld1.32         {q8, q9}, [r1]!
74        vld1.32         {q10, q11}, [r1]!
75        subs            r12, r12, #1
76        vst1.32         {q0, q1}, [r0]!
77        vst1.32         {q2, q3}, [r0]!
78        vst1.32         {q8, q9}, [r0]!
79        vst1.32         {q10, q11}, [r0]!
80        bne             .Lneon_copy_128_loop_outer
81        mov             r12, #PLDOFFS
82.Lneon_copy_128_loop_nopld:
83        vld1.32         {q0, q1}, [r1]!
84        vld1.32         {q2, q3}, [r1]!
85        vld1.32         {q8, q9}, [r1]!
86        vld1.32         {q10, q11}, [r1]!
87        subs            r12, r12, #1
88        vst1.32         {q0, q1}, [r0]!
89        vst1.32         {q2, q3}, [r0]!
90        vst1.32         {q8, q9}, [r0]!
91        vst1.32         {q10, q11}, [r0]!
92        bne             .Lneon_copy_128_loop_nopld
93        ands            r2, r2, #0x7f
94        beq             .Lneon_exit
95        cmp             r2, #32
96        blt             .Lneon_16
97        nop
98        /* Copy blocks of 32-bytes (word aligned) at a time*/
99.Lneon_copy_32_a:
100        mov             r12, r2, lsr #5
101.Lneon_copy_32_loop_a:
102        vld1.32         {q0,q1}, [r1]!
103        subs            r12, r12, #1
104        vst1.32         {q0,q1}, [r0]!
105        bne             .Lneon_copy_32_loop_a
106        ands            r2, r2, #0x1f
107        beq             .Lneon_exit
108.Lneon_16:
109        subs            r2, r2, #16
110        blt             .Lneon_lt16
111        vld1.32         {q8}, [r1]!
112        vst1.32         {q8}, [r0]!
113        beq             .Lneon_exit
114.Lneon_lt16:
115        movs            r12, r2, lsl #29
116        bcc             .Lneon_skip8
117        ldr             r3, [r1], #4
118        ldr             r12, [r1], #4
119        str             r3, [r0], #4
120        str             r12, [r0], #4
121.Lneon_skip8:
122        bpl             .Lneon_lt4
123        ldr             r3, [r1], #4
124        str             r3, [r0], #4
125.Lneon_lt4:
126        movs            r2, r2, lsl #31
127        bcc             .Lneon_lt2
128        ldrh            r3, [r1], #2
129        strh            r3, [r0], #2
130.Lneon_lt2:
131        bpl             .Lneon_exit
132        ldrb            r12, [r1]
133        strb            r12, [r0]
134.Lneon_exit:
135        pop             {r0}
136        bx              lr
137
138END(memcpy)
139