• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (c) 2011 - 2013, ARM Ltd
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 *    products derived from this software without specific prior written
15 *    permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29
30.text
31.align 2
32
33
34GCC_ASM_EXPORT(memcpy)
35
36
37// Taken from Newlib BSD implementation.
38ASM_PFX(memcpy):
39        // Copy dst to x6, so we can preserve return value.
40        mov     x6, x0
41
42        // NOTE: although size_t is unsigned, this code uses signed
43        // comparisons on x2 so relies on nb never having its top bit
44        // set. In practice this is not going to be a real problem.
45
46        // Require at least 64 bytes to be worth aligning.
47        cmp     x2, #64
48        blt     qwordcopy
49
50        // Compute offset to align destination to 16 bytes.
51        neg     x3, x0
52        and     x3, x3, 15
53
54        cbz     x3, blockcopy           // offset == 0 is likely
55
56        // We know there is at least 64 bytes to be done, so we
57        // do a 16 byte misaligned copy at first and then later do
58        // all 16-byte aligned copies.  Some bytes will be copied
59        // twice, but there's no harm in that since memcpy does not
60        // guarantee correctness on overlap.
61
62        sub     x2, x2, x3              // nb -= offset
63        ldp     x4, x5, [x1]
64        add     x1, x1, x3
65        stp     x4, x5, [x6]
66        add     x6, x6, x3
67
68        // The destination pointer is now qword (16 byte) aligned.
69        // (The src pointer might be.)
70
71blockcopy:
72        // Copy 64 bytes at a time.
73        subs    x2, x2, #64
74        blt     3f
752:      subs    x2, x2, #64
76        ldp     x4, x5, [x1,#0]
77        ldp     x8, x9, [x1,#16]
78        ldp     x10,x11,[x1,#32]
79        ldp     x12,x13,[x1,#48]
80        add     x1, x1, #64
81        stp     x4, x5, [x6,#0]
82        stp     x8, x9, [x6,#16]
83        stp     x10,x11,[x6,#32]
84        stp     x12,x13,[x6,#48]
85        add     x6, x6, #64
86        bge     2b
87
88        // Unwind pre-decrement
893:      add     x2, x2, #64
90
91qwordcopy:
92        // Copy 0-48 bytes, 16 bytes at a time.
93        subs    x2, x2, #16
94        blt     tailcopy
952:      ldp     x4, x5, [x1],#16
96        subs    x2, x2, #16
97        stp     x4, x5, [x6],#16
98        bge     2b
99
100        // No need to unwind the pre-decrement, it would not change
101        // the low 4 bits of the count. But how likely is it for the
102        // byte count to be multiple of 16? Is it worth the overhead
103        // of testing for x2 == -16?
104
105tailcopy:
106        // Copy trailing 0-15 bytes.
107        tbz     x2, #3, 1f
108        ldr     x4, [x1],#8             // copy 8 bytes
109        str     x4, [x6],#8
1101:
111        tbz     x2, #2, 1f
112        ldr     w4, [x1],#4             // copy 4 bytes
113        str     w4, [x6],#4
1141:
115        tbz     x2, #1, 1f
116        ldrh    w4, [x1],#2             // copy 2 bytes
117        strh    w4, [x6],#2
1181:
119        tbz     x2, #0, return
120        ldrb    w4, [x1]                // copy 1 byte
121        strb    w4, [x6]
122
123return:
124        // This is the only return point of memcpy.
125        ret
126