1/* $OpenBSD: memcpy.S,v 1.1.1.1 2006/10/10 22:07:10 miod Exp $ */ 2/* $NetBSD: memcpy.S,v 1.2 2006/04/22 23:53:47 uwe Exp $ */ 3 4/* 5 * Copyright (c) 2000 SHIMIZU Ryo <ryo@misakimix.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31#include <machine/asm.h> 32 33#if !defined(MEMCOPY) && !defined(MEMMOVE) && !defined(BCOPY) 34#define MEMCOPY 35#endif 36 37#if defined(MEMCOPY) || defined(MEMMOVE) 38#define REG_DST0 r3 39#define REG_SRC r5 40#define REG_DST r4 41#else 42#define REG_SRC r4 43#define REG_DST r5 44#endif 45 46#define REG_LEN r6 47 48#if defined(MEMCOPY) 49ENTRY(memcpy) 50#elif defined(MEMMOVE) 51ENTRY(memmove) 52#elif defined(BCOPY) 53ENTRY(bcopy) 54#endif 55#ifdef REG_DST0 56 mov REG_DST,REG_DST0 57#endif 58 cmp/eq REG_DST,REG_SRC /* if ( src == dst ) return; */ 59 bt/s bcopy_return 60 cmp/hi REG_DST,REG_SRC 61 bf/s bcopy_overlap 62 63 mov REG_SRC,r0 64 xor REG_DST,r0 65 and #3,r0 66 mov r0,r1 67 tst r0,r0 /* (src ^ dst) & 3 */ 68 bf/s word_align 69 70longword_align: 71 tst REG_LEN,REG_LEN /* if ( len==0 ) return; */ 72 bt/s bcopy_return 73 74 75 mov REG_SRC,r0 76 tst #1,r0 /* if ( src & 1 ) */ 77 bt 1f 78 mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 79 add #-1,REG_LEN 80 mov.b r0,@REG_DST 81 add #1,REG_DST 821: 83 84 85 mov #1,r0 86 cmp/hi r0,REG_LEN /* if ( (len > 1) && */ 87 bf/s 1f 88 mov REG_SRC,r0 89 tst #2,r0 /* (src & 2) { */ 90 bt 1f 91 mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */ 92 add #-2,REG_LEN /* len -= 2; */ 93 mov.w r0,@REG_DST 94 add #2,REG_DST /* } */ 951: 96 97 98 mov #3,r1 99 cmp/hi r1,REG_LEN /* while ( len > 3 ) { */ 100 bf/s no_align_delay 101 tst REG_LEN,REG_LEN 1022: 103 mov.l @REG_SRC+,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */ 104 add #-4,REG_LEN /* len -= 4; */ 105 mov.l r0,@REG_DST 106 cmp/hi r1,REG_LEN 107 bt/s 2b 108 add #4,REG_DST /* } */ 109 110 bra no_align_delay 111 tst REG_LEN,REG_LEN 112 113 114word_align: 115 mov r1,r0 116 tst #1,r0 117 bf/s no_align_delay 118 tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */ 119 bt bcopy_return 120 121 122 mov REG_SRC,r0 /* if ( src & 1 ) */ 123 tst #1,r0 124 bt 1f 125 mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 126 add #-1,REG_LEN 127 mov.b r0,@REG_DST 128 add #1,REG_DST 1291: 130 131 132 mov #1,r1 133 cmp/hi r1,REG_LEN /* while ( len > 1 ) { */ 134 bf/s no_align_delay 135 tst REG_LEN,REG_LEN 1362: 137 mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */ 138 add #-2,REG_LEN /* len -= 2; */ 139 mov.w r0,@REG_DST 140 cmp/hi r1,REG_LEN 141 bt/s 2b 142 add #2,REG_DST /* } */ 143 144 145no_align: 146 tst REG_LEN,REG_LEN /* while ( len!= ) { */ 147no_align_delay: 148 bt bcopy_return 1491: 150 mov.b @REG_SRC+,r0 /* *dst++ = *src++; */ 151 add #-1,REG_LEN /* len--; */ 152 mov.b r0,@REG_DST 153 tst REG_LEN,REG_LEN 154 bf/s 1b 155 add #1,REG_DST /* } */ 156bcopy_return: 157 rts 158#ifdef REG_DST0 159 mov REG_DST0,r0 160#else 161 nop 162#endif 163 164 165bcopy_overlap: 166 add REG_LEN,REG_SRC 167 add REG_LEN,REG_DST 168 169 mov REG_SRC,r0 170 xor REG_DST,r0 171 and #3,r0 172 mov r0,r1 173 tst r0,r0 /* (src ^ dst) & 3 */ 174 bf/s ov_word_align 175 176ov_longword_align: 177 tst REG_LEN,REG_LEN /* if ( len==0 ) return; */ 178 bt/s bcopy_return 179 180 181 mov REG_SRC,r0 182 tst #1,r0 /* if ( src & 1 ) */ 183 bt 1f 184 add #-1,REG_SRC /* *--dst = *--src; */ 185 mov.b @REG_SRC,r0 186 mov.b r0,@-REG_DST 187 add #-1,REG_LEN 1881: 189 190 191 mov #1,r0 192 cmp/hi r0,REG_LEN /* if ( (len > 1) && */ 193 bf/s 1f 194 mov REG_SRC,r0 195 tst #2,r0 /* (src & 2) { */ 196 bt 1f 197 add #-2,REG_SRC /* *--((unsigned short*)dst) = *--((unsigned short*)src); */ 198 mov.w @REG_SRC,r0 199 add #-2,REG_LEN /* len -= 2; */ 200 mov.w r0,@-REG_DST /* } */ 2011: 202 203 204 mov #3,r1 205 cmp/hi r1,REG_LEN /* while ( len > 3 ) { */ 206 bf/s ov_no_align_delay 207 tst REG_LEN,REG_LEN 2082: 209 add #-4,REG_SRC 210 mov.l @REG_SRC,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */ 211 add #-4,REG_LEN /* len -= 4; */ 212 cmp/hi r1,REG_LEN 213 bt/s 2b 214 mov.l r0,@-REG_DST /* } */ 215 216 bra ov_no_align_delay 217 tst REG_LEN,REG_LEN 218 219 220ov_word_align: 221 mov r1,r0 222 tst #1,r0 223 bf/s ov_no_align_delay 224 tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */ 225 bt bcopy_return 226 227 228 mov REG_SRC,r0 /* if ( src & 1 ) */ 229 tst #1,r0 230 bt 1f 231 add #-1,REG_SRC 232 mov.b @REG_SRC,r0 /* *--dst = *--src; */ 233 add #-1,REG_LEN 234 mov.b r0,@-REG_DST 2351: 236 237 238 mov #1,r1 239 cmp/hi r1,REG_LEN /* while ( len > 1 ) { */ 240 bf/s ov_no_align_delay 241 tst REG_LEN,REG_LEN 2422: 243 add #-2,REG_SRC 244 mov.w @REG_SRC,r0 /* *--((unsigned short*)dst) = *--((unsigned short*)src); */ 245 add #-2,REG_LEN /* len -= 2; */ 246 cmp/hi r1,REG_LEN 247 bt/s 2b 248 mov.w r0,@-REG_DST /* } */ 249 250 251ov_no_align: 252 tst REG_LEN,REG_LEN /* while ( len!= ) { */ 253ov_no_align_delay: 254 bt 9f 2551: 256 add #-1,REG_SRC 257 mov.b @REG_SRC,r0 /* *--dst = *--src; */ 258 add #-1,REG_LEN /* len--; */ 259 tst REG_LEN,REG_LEN 260 bf/s 1b 261 mov.b r0,@-REG_DST /* } */ 2629: 263 rts 264#ifdef REG_DST0 265 mov REG_DST0,r0 266#else 267 nop 268#endif 269