• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*	$NetBSD: memcpy_arm.S,v 1.1 2003/10/14 07:51:45 scw Exp $	*/
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include "asm.h"
33
34// We do not need to check whether the addresses are in the
35// kernel or virtual address spaces, since we only access them
36// using user privileges.
37
38.syntax unified
39.arm
40
41// size_t _arm_user_copy(void *dst, const void *src, size_t len)
42FUNCTION(_arm_user_copy)
43    /* save leaf functions having to store this away */
44    stmdb   sp!, {r0, r1, r2, lr}       /* _arm_user_copy() returns dest addr */
45
46    subs    r2, r2, #4
47    blt     .Lmemcpy_l4         /* less than 4 bytes */
48    ands    r12, r0, #3
49    bne     .Lmemcpy_destul     /* oh unaligned destination addr */
50    ands    r12, r1, #3
51    bne     .Lmemcpy_srcul      /* oh unaligned source addr */
52
53.Lmemcpy_t8:
54    /* We have aligned source and destination */
55    subs    r2, r2, #8
56    blt     .Lmemcpy_l12        /* less than 12 bytes (4 from above) */
57    subs    r2, r2, #0x14
58    blt     .Lmemcpy_l32        /* less than 32 bytes (12 from above) */
59    stmdb   sp!, {r4}           /* borrow r4 */
60
61    /* blat 32 bytes at a time */
62    /* XXX for really big copies perhaps we should use more registers */
63.Lmemcpy_loop32:
640:  ldmia   r1!, {r3, r4, r12, lr}
651:  stmia   r0!, {r3, r4, r12, lr}
662:  ldmia   r1!, {r3, r4, r12, lr}
673:  stmia   r0!, {r3, r4, r12, lr}
68    subs    r2, r2, #0x20
69    bge     .Lmemcpy_loop32
70
71    cmn     r2, #0x10
724:  ldmiage r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
735:  stmiage r0!, {r3, r4, r12, lr}
74    subge   r2, r2, #0x10
75    ldmia   sp!, {r4}           /* return r4 */
76
77.Lmemcpy_l32:
78    adds    r2, r2, #0x14
79
80    /* blat 12 bytes at a time */
81.Lmemcpy_loop12:
826:  ldmiage r1!, {r3, r12, lr}
837:  stmiage r0!, {r3, r12, lr}
84    subsge  r2, r2, #0x0c
85    bge     .Lmemcpy_loop12
86
87.Lmemcpy_l12:
88    adds    r2, r2, #8
89    blt     .Lmemcpy_l4
90
91    subs    r2, r2, #4
928:  ldrlt   r3, [r1], #4
939:  strlt   r3, [r0], #4
9410: ldmiage r1!, {r3, r12}
9511: stmiage r0!, {r3, r12}
96    subge   r2, r2, #4
97
98.Lmemcpy_l4:
99    /* less than 4 bytes to go */
100    adds    r2, r2, #4
101    beq     .Lmemcpy_return
102    /* copy the crud byte at a time */
103    cmp     r2, #2
10412: ldrb    r3, [r1], #1
10513: strb    r3, [r0], #1
10614: ldrbge  r3, [r1], #1
10715: strbge  r3, [r0], #1
10816: ldrbgt  r3, [r1], #1
10917: strbgt  r3, [r0], #1
110
111.Lmemcpy_return:
112    ldmia   sp!, {r0, r1, r2, lr}
113    mov     r0, 0
114    bx      lr
115
116    /* erg - unaligned destination */
117.Lmemcpy_destul:
118    rsb     r12, r12, #4
119    cmp     r12, #2
120
121    /* align destination with byte copies */
12218: ldrb    r3, [r1], #1
12319: strb    r3, [r0], #1
12420: ldrbge  r3, [r1], #1
12521: strbge  r3, [r0], #1
12622: ldrbgt  r3, [r1], #1
12723: strbgt  r3, [r0], #1
128    subs    r2, r2, r12
129    blt     .Lmemcpy_l4         /* less the 4 bytes */
130
131    ands    r12, r1, #3
132    beq     .Lmemcpy_t8         /* we have an aligned source */
133
134    /* erg - unaligned source */
135    /* This is where it gets nasty ... */
136.Lmemcpy_srcul:
137    bic     r1, r1, #3
13824: ldr     lr, [r1], #4
139    cmp     r12, #2
140    bgt     .Lmemcpy_srcul3
141    beq     .Lmemcpy_srcul2
142    cmp     r2, #0x0c
143    blt     .Lmemcpy_srcul1loop4
144    sub     r2, r2, #0x0c
145    stmdb   sp!, {r4, r5}
146
147.Lmemcpy_srcul1loop16:
148    mov     r3, lr, lsr #8
14925: ldmia   r1!, {r4, r5, r12, lr}
150    orr     r3, r3, r4, lsl #24
151    mov     r4, r4, lsr #8
152    orr     r4, r4, r5, lsl #24
153    mov     r5, r5, lsr #8
154    orr     r5, r5, r12, lsl #24
155    mov     r12, r12, lsr #8
156    orr     r12, r12, lr, lsl #24
15726: stmia   r0!, {r3-r5, r12}
158    subs    r2, r2, #0x10
159    bge     .Lmemcpy_srcul1loop16
160    ldmia   sp!, {r4, r5}
161    adds    r2, r2, #0x0c
162    blt     .Lmemcpy_srcul1l4
163
164.Lmemcpy_srcul1loop4:
165    mov     r12, lr, lsr #8
16627: ldr     lr, [r1], #4
167    orr     r12, r12, lr, lsl #24
16828: str     r12, [r0], #4
169    subs    r2, r2, #4
170    bge     .Lmemcpy_srcul1loop4
171
172.Lmemcpy_srcul1l4:
173    sub     r1, r1, #3
174    b       .Lmemcpy_l4
175
176.Lmemcpy_srcul2:
177    cmp     r2, #0x0c
178    blt     .Lmemcpy_srcul2loop4
179    sub     r2, r2, #0x0c
180    stmdb   sp!, {r4, r5}
181
182.Lmemcpy_srcul2loop16:
183    mov     r3, lr, lsr #16
18429: ldmia   r1!, {r4, r5, r12, lr}
185    orr     r3, r3, r4, lsl #16
186    mov     r4, r4, lsr #16
187    orr     r4, r4, r5, lsl #16
188    mov     r5, r5, lsr #16
189    orr     r5, r5, r12, lsl #16
190    mov     r12, r12, lsr #16
191    orr     r12, r12, lr, lsl #16
19230: stmia   r0!, {r3-r5, r12}
193    subs    r2, r2, #0x10
194    bge     .Lmemcpy_srcul2loop16
195    ldmia   sp!, {r4, r5}
196    adds    r2, r2, #0x0c
197    blt     .Lmemcpy_srcul2l4
198
199.Lmemcpy_srcul2loop4:
200    mov     r12, lr, lsr #16
20131: ldr     lr, [r1], #4
202    orr     r12, r12, lr, lsl #16
20332: str     r12, [r0], #4
204    subs    r2, r2, #4
205    bge     .Lmemcpy_srcul2loop4
206
207.Lmemcpy_srcul2l4:
208    sub     r1, r1, #2
209    b       .Lmemcpy_l4
210
211.Lmemcpy_srcul3:
212    cmp     r2, #0x0c
213    blt     .Lmemcpy_srcul3loop4
214    sub     r2, r2, #0x0c
215    stmdb   sp!, {r4, r5}
216
217.Lmemcpy_srcul3loop16:
218    mov     r3, lr, lsr #24
21933: ldmia   r1!, {r4, r5, r12, lr}
220    orr     r3, r3, r4, lsl #8
221    mov     r4, r4, lsr #24
222    orr     r4, r4, r5, lsl #8
223    mov     r5, r5, lsr #24
224    orr     r5, r5, r12, lsl #8
225    mov     r12, r12, lsr #24
226    orr     r12, r12, lr, lsl #8
22734: stmia   r0!, {r3-r5, r12}
228    subs    r2, r2, #0x10
229    bge     .Lmemcpy_srcul3loop16
230    ldmia   sp!, {r4, r5}
231    adds    r2, r2, #0x0c
232    blt     .Lmemcpy_srcul3l4
233
234.Lmemcpy_srcul3loop4:
235    mov     r12, lr, lsr #24
23635: ldr     lr, [r1], #4
237    orr     r12, r12, lr, lsl #8
23836: str     r12, [r0], #4
239    subs    r2, r2, #4
240    bge     .Lmemcpy_srcul3loop4
241
242.Lmemcpy_srcul3l4:
243    sub     r1, r1, #1
244    b       .Lmemcpy_l4
245
246.Lfix_return1:
247    ldmia   sp!, {r4}
248.Lfix_return2:
249    ldmia   sp!, {r0, r1}
250    cmp     r0, r1
251    bhs     .Lfix_return
252    sub     r0, r2, r0
253    b       .Lfix_return_done
254.Lfix_return:
255    sub     r0, r2, r1
256.Lfix_return_done:
257    ldmia   sp!, {r2, lr}
258    sub     r0, r2, r0
259    bx      lr
260.Lfix_return3:
261    ldmia   sp!, {r4, r5}
262    b       .Lfix_return2
263
264.pushsection __exc_table, "a"
265    .long   0b,  .Lfix_return1
266    .long   1b,  .Lfix_return1
267    .long   2b,  .Lfix_return1
268    .long   3b,  .Lfix_return1
269    .long   4b,  .Lfix_return1
270    .long   5b,  .Lfix_return1
271    .long   6b,  .Lfix_return2
272    .long   7b,  .Lfix_return2
273    .long   8b,  .Lfix_return2
274    .long   9b,  .Lfix_return2
275    .long   10b, .Lfix_return2
276    .long   11b, .Lfix_return2
277    .long   12b, .Lfix_return2
278    .long   13b, .Lfix_return2
279    .long   14b, .Lfix_return2
280    .long   15b, .Lfix_return2
281    .long   16b, .Lfix_return2
282    .long   17b, .Lfix_return2
283    .long   18b, .Lfix_return2
284    .long   19b, .Lfix_return2
285    .long   20b, .Lfix_return2
286    .long   21b, .Lfix_return2
287    .long   22b, .Lfix_return2
288    .long   23b, .Lfix_return2
289    .long   24b, .Lfix_return2
290    .long   25b, .Lfix_return3
291    .long   26b, .Lfix_return3
292    .long   27b, .Lfix_return2
293    .long   28b, .Lfix_return2
294    .long   29b, .Lfix_return3
295    .long   30b, .Lfix_return3
296    .long   31b, .Lfix_return2
297    .long   32b, .Lfix_return2
298    .long   33b, .Lfix_return3
299    .long   34b, .Lfix_return3
300    .long   35b, .Lfix_return2
301    .long   36b, .Lfix_return2
302.popsection
303