1/* $NetBSD: memcpy_arm.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include "asm.h" 33 34// We do not need to check whether the addresses are in the 35// kernel or virtual address spaces, since we only access them 36// using user privileges. 37 38.syntax unified 39.arm 40 41// size_t _arm_user_copy(void *dst, const void *src, size_t len) 42FUNCTION(_arm_user_copy) 43 /* save leaf functions having to store this away */ 44 stmdb sp!, {r0, r1, r2, lr} /* _arm_user_copy() returns dest addr */ 45 46 subs r2, r2, #4 47 blt .Lmemcpy_l4 /* less than 4 bytes */ 48 ands r12, r0, #3 49 bne .Lmemcpy_destul /* oh unaligned destination addr */ 50 ands r12, r1, #3 51 bne .Lmemcpy_srcul /* oh unaligned source addr */ 52 53.Lmemcpy_t8: 54 /* We have aligned source and destination */ 55 subs r2, r2, #8 56 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 57 subs r2, r2, #0x14 58 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 59 stmdb sp!, {r4} /* borrow r4 */ 60 61 /* blat 32 bytes at a time */ 62 /* XXX for really big copies perhaps we should use more registers */ 63.Lmemcpy_loop32: 640: ldmia r1!, {r3, r4, r12, lr} 651: stmia r0!, {r3, r4, r12, lr} 662: ldmia r1!, {r3, r4, r12, lr} 673: stmia r0!, {r3, r4, r12, lr} 68 subs r2, r2, #0x20 69 bge .Lmemcpy_loop32 70 71 cmn r2, #0x10 724: ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 735: stmiage r0!, {r3, r4, r12, lr} 74 subge r2, r2, #0x10 75 ldmia sp!, {r4} /* return r4 */ 76 77.Lmemcpy_l32: 78 adds r2, r2, #0x14 79 80 /* blat 12 bytes at a time */ 81.Lmemcpy_loop12: 826: ldmiage r1!, {r3, r12, lr} 837: stmiage r0!, {r3, r12, lr} 84 subsge r2, r2, #0x0c 85 bge .Lmemcpy_loop12 86 87.Lmemcpy_l12: 88 adds r2, r2, #8 89 blt .Lmemcpy_l4 90 91 subs r2, r2, #4 928: ldrlt r3, [r1], #4 939: strlt r3, [r0], #4 9410: ldmiage r1!, {r3, r12} 9511: stmiage r0!, {r3, r12} 96 subge r2, r2, #4 97 98.Lmemcpy_l4: 99 /* less than 4 bytes to go */ 100 adds r2, r2, #4 101 beq .Lmemcpy_return 102 /* copy the crud byte at a time */ 103 cmp r2, #2 10412: ldrb r3, [r1], #1 10513: strb r3, [r0], #1 10614: ldrbge r3, [r1], #1 10715: strbge r3, [r0], #1 10816: ldrbgt r3, [r1], #1 10917: strbgt r3, [r0], #1 110 111.Lmemcpy_return: 112 ldmia sp!, {r0, r1, r2, lr} 113 mov r0, 0 114 bx lr 115 116 /* erg - unaligned destination */ 117.Lmemcpy_destul: 118 rsb r12, r12, #4 119 cmp r12, #2 120 121 /* align destination with byte copies */ 12218: ldrb r3, [r1], #1 12319: strb r3, [r0], #1 12420: ldrbge r3, [r1], #1 12521: strbge r3, [r0], #1 12622: ldrbgt r3, [r1], #1 12723: strbgt r3, [r0], #1 128 subs r2, r2, r12 129 blt .Lmemcpy_l4 /* less the 4 bytes */ 130 131 ands r12, r1, #3 132 beq .Lmemcpy_t8 /* we have an aligned source */ 133 134 /* erg - unaligned source */ 135 /* This is where it gets nasty ... */ 136.Lmemcpy_srcul: 137 bic r1, r1, #3 13824: ldr lr, [r1], #4 139 cmp r12, #2 140 bgt .Lmemcpy_srcul3 141 beq .Lmemcpy_srcul2 142 cmp r2, #0x0c 143 blt .Lmemcpy_srcul1loop4 144 sub r2, r2, #0x0c 145 stmdb sp!, {r4, r5} 146 147.Lmemcpy_srcul1loop16: 148 mov r3, lr, lsr #8 14925: ldmia r1!, {r4, r5, r12, lr} 150 orr r3, r3, r4, lsl #24 151 mov r4, r4, lsr #8 152 orr r4, r4, r5, lsl #24 153 mov r5, r5, lsr #8 154 orr r5, r5, r12, lsl #24 155 mov r12, r12, lsr #8 156 orr r12, r12, lr, lsl #24 15726: stmia r0!, {r3-r5, r12} 158 subs r2, r2, #0x10 159 bge .Lmemcpy_srcul1loop16 160 ldmia sp!, {r4, r5} 161 adds r2, r2, #0x0c 162 blt .Lmemcpy_srcul1l4 163 164.Lmemcpy_srcul1loop4: 165 mov r12, lr, lsr #8 16627: ldr lr, [r1], #4 167 orr r12, r12, lr, lsl #24 16828: str r12, [r0], #4 169 subs r2, r2, #4 170 bge .Lmemcpy_srcul1loop4 171 172.Lmemcpy_srcul1l4: 173 sub r1, r1, #3 174 b .Lmemcpy_l4 175 176.Lmemcpy_srcul2: 177 cmp r2, #0x0c 178 blt .Lmemcpy_srcul2loop4 179 sub r2, r2, #0x0c 180 stmdb sp!, {r4, r5} 181 182.Lmemcpy_srcul2loop16: 183 mov r3, lr, lsr #16 18429: ldmia r1!, {r4, r5, r12, lr} 185 orr r3, r3, r4, lsl #16 186 mov r4, r4, lsr #16 187 orr r4, r4, r5, lsl #16 188 mov r5, r5, lsr #16 189 orr r5, r5, r12, lsl #16 190 mov r12, r12, lsr #16 191 orr r12, r12, lr, lsl #16 19230: stmia r0!, {r3-r5, r12} 193 subs r2, r2, #0x10 194 bge .Lmemcpy_srcul2loop16 195 ldmia sp!, {r4, r5} 196 adds r2, r2, #0x0c 197 blt .Lmemcpy_srcul2l4 198 199.Lmemcpy_srcul2loop4: 200 mov r12, lr, lsr #16 20131: ldr lr, [r1], #4 202 orr r12, r12, lr, lsl #16 20332: str r12, [r0], #4 204 subs r2, r2, #4 205 bge .Lmemcpy_srcul2loop4 206 207.Lmemcpy_srcul2l4: 208 sub r1, r1, #2 209 b .Lmemcpy_l4 210 211.Lmemcpy_srcul3: 212 cmp r2, #0x0c 213 blt .Lmemcpy_srcul3loop4 214 sub r2, r2, #0x0c 215 stmdb sp!, {r4, r5} 216 217.Lmemcpy_srcul3loop16: 218 mov r3, lr, lsr #24 21933: ldmia r1!, {r4, r5, r12, lr} 220 orr r3, r3, r4, lsl #8 221 mov r4, r4, lsr #24 222 orr r4, r4, r5, lsl #8 223 mov r5, r5, lsr #24 224 orr r5, r5, r12, lsl #8 225 mov r12, r12, lsr #24 226 orr r12, r12, lr, lsl #8 22734: stmia r0!, {r3-r5, r12} 228 subs r2, r2, #0x10 229 bge .Lmemcpy_srcul3loop16 230 ldmia sp!, {r4, r5} 231 adds r2, r2, #0x0c 232 blt .Lmemcpy_srcul3l4 233 234.Lmemcpy_srcul3loop4: 235 mov r12, lr, lsr #24 23635: ldr lr, [r1], #4 237 orr r12, r12, lr, lsl #8 23836: str r12, [r0], #4 239 subs r2, r2, #4 240 bge .Lmemcpy_srcul3loop4 241 242.Lmemcpy_srcul3l4: 243 sub r1, r1, #1 244 b .Lmemcpy_l4 245 246.Lfix_return1: 247 ldmia sp!, {r4} 248.Lfix_return2: 249 ldmia sp!, {r0, r1} 250 cmp r0, r1 251 bhs .Lfix_return 252 sub r0, r2, r0 253 b .Lfix_return_done 254.Lfix_return: 255 sub r0, r2, r1 256.Lfix_return_done: 257 ldmia sp!, {r2, lr} 258 sub r0, r2, r0 259 bx lr 260.Lfix_return3: 261 ldmia sp!, {r4, r5} 262 b .Lfix_return2 263 264.pushsection __exc_table, "a" 265 .long 0b, .Lfix_return1 266 .long 1b, .Lfix_return1 267 .long 2b, .Lfix_return1 268 .long 3b, .Lfix_return1 269 .long 4b, .Lfix_return1 270 .long 5b, .Lfix_return1 271 .long 6b, .Lfix_return2 272 .long 7b, .Lfix_return2 273 .long 8b, .Lfix_return2 274 .long 9b, .Lfix_return2 275 .long 10b, .Lfix_return2 276 .long 11b, .Lfix_return2 277 .long 12b, .Lfix_return2 278 .long 13b, .Lfix_return2 279 .long 14b, .Lfix_return2 280 .long 15b, .Lfix_return2 281 .long 16b, .Lfix_return2 282 .long 17b, .Lfix_return2 283 .long 18b, .Lfix_return2 284 .long 19b, .Lfix_return2 285 .long 20b, .Lfix_return2 286 .long 21b, .Lfix_return2 287 .long 22b, .Lfix_return2 288 .long 23b, .Lfix_return2 289 .long 24b, .Lfix_return2 290 .long 25b, .Lfix_return3 291 .long 26b, .Lfix_return3 292 .long 27b, .Lfix_return2 293 .long 28b, .Lfix_return2 294 .long 29b, .Lfix_return3 295 .long 30b, .Lfix_return3 296 .long 31b, .Lfix_return2 297 .long 32b, .Lfix_return2 298 .long 33b, .Lfix_return3 299 .long 34b, .Lfix_return3 300 .long 35b, .Lfix_return2 301 .long 36b, .Lfix_return2 302.popsection 303