1/* $OpenBSD: memset.S,v 1.1.1.1 2006/10/10 22:07:10 miod Exp $ */ 2/* $NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */ 3 4/*- 5 * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <machine/asm.h> 31 32#define REG_PTR r0 33#define REG_TMP1 r1 34 35#ifdef BZERO 36# define REG_C r2 37# define REG_DST r4 38# define REG_LEN r5 39#else 40# define REG_DST0 r3 41# define REG_DST r4 42# define REG_C r5 43# define REG_LEN r6 44#endif 45 46#ifdef BZERO 47ENTRY(bzero) 48#else 49ENTRY(memset) 50 mov REG_DST,REG_DST0 /* for return value */ 51#endif 52 /* small amount to fill ? */ 53 mov #28,REG_TMP1 54 cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */ 55 bt/s large 56 mov #12,REG_TMP1 /* if (len >= 12) goto small; */ 57 cmp/hs REG_TMP1,REG_LEN 58 bt/s small 59#ifdef BZERO 60 mov #0,REG_C 61#endif 62 /* very little fill (0 ~ 11 bytes) */ 63 tst REG_LEN,REG_LEN 64 add REG_DST,REG_LEN 65 bt/s done 66 add #1,REG_DST 67 68 /* unroll 4 loops */ 69 cmp/eq REG_DST,REG_LEN 701: mov.b REG_C,@-REG_LEN 71 bt/s done 72 cmp/eq REG_DST,REG_LEN 73 mov.b REG_C,@-REG_LEN 74 bt/s done 75 cmp/eq REG_DST,REG_LEN 76 mov.b REG_C,@-REG_LEN 77 bt/s done 78 cmp/eq REG_DST,REG_LEN 79 mov.b REG_C,@-REG_LEN 80 bf/s 1b 81 cmp/eq REG_DST,REG_LEN 82done: 83#ifdef BZERO 84 rts 85 nop 86#else 87 rts 88 mov REG_DST0,r0 89#endif 90 91 92small: 93 mov REG_DST,r0 94 tst #1,r0 95 bt/s small_aligned 96 mov REG_DST,REG_TMP1 97 shll REG_LEN 98 mova 1f,r0 /* 1f must be 4bytes aligned! */ 99 add #16,REG_TMP1 /* REG_TMP1 = dst+16; */ 100 sub REG_LEN,r0 101 jmp @r0 102 mov REG_C,r0 103 104 .align 2 105 mov.b r0,@(15,REG_TMP1) 106 mov.b r0,@(14,REG_TMP1) 107 mov.b r0,@(13,REG_TMP1) 108 mov.b r0,@(12,REG_TMP1) 109 mov.b r0,@(11,REG_TMP1) 110 mov.b r0,@(10,REG_TMP1) 111 mov.b r0,@(9,REG_TMP1) 112 mov.b r0,@(8,REG_TMP1) 113 mov.b r0,@(7,REG_TMP1) 114 mov.b r0,@(6,REG_TMP1) 115 mov.b r0,@(5,REG_TMP1) 116 mov.b r0,@(4,REG_TMP1) 117 mov.b r0,@(3,REG_TMP1) 118 mov.b r0,@(2,REG_TMP1) 119 mov.b r0,@(1,REG_TMP1) 120 mov.b r0,@REG_TMP1 121 mov.b r0,@(15,REG_DST) 122 mov.b r0,@(14,REG_DST) 123 mov.b r0,@(13,REG_DST) 124 mov.b r0,@(12,REG_DST) 125 mov.b r0,@(11,REG_DST) 126 mov.b r0,@(10,REG_DST) 127 mov.b r0,@(9,REG_DST) 128 mov.b r0,@(8,REG_DST) 129 mov.b r0,@(7,REG_DST) 130 mov.b r0,@(6,REG_DST) 131 mov.b r0,@(5,REG_DST) 132 mov.b r0,@(4,REG_DST) 133 mov.b r0,@(3,REG_DST) 134 mov.b r0,@(2,REG_DST) 135 mov.b r0,@(1,REG_DST) 136#ifdef BZERO 137 rts 1381: mov.b r0,@REG_DST 139#else 140 mov.b r0,@REG_DST 1411: rts 142 mov REG_DST0,r0 143#endif 144 145 146/* 2 bytes aligned small fill */ 147small_aligned: 148#ifndef BZERO 149 extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 150 shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 151 or REG_TMP1,REG_C /* REG_C = ????xxxx */ 152#endif 153 154 mov REG_LEN,r0 155 tst #1,r0 /* len is aligned? */ 156 bt/s 1f 157 add #-1,r0 158 mov.b REG_C,@(r0,REG_DST) /* fill last a byte */ 159 mov r0,REG_LEN 1601: 161 162 mova 1f,r0 /* 1f must be 4bytes aligned! */ 163 sub REG_LEN,r0 164 jmp @r0 165 mov REG_C,r0 166 167 .align 2 168 mov.w r0,@(30,REG_DST) 169 mov.w r0,@(28,REG_DST) 170 mov.w r0,@(26,REG_DST) 171 mov.w r0,@(24,REG_DST) 172 mov.w r0,@(22,REG_DST) 173 mov.w r0,@(20,REG_DST) 174 mov.w r0,@(18,REG_DST) 175 mov.w r0,@(16,REG_DST) 176 mov.w r0,@(14,REG_DST) 177 mov.w r0,@(12,REG_DST) 178 mov.w r0,@(10,REG_DST) 179 mov.w r0,@(8,REG_DST) 180 mov.w r0,@(6,REG_DST) 181 mov.w r0,@(4,REG_DST) 182 mov.w r0,@(2,REG_DST) 183#ifdef BZERO 184 rts 1851: mov.w r0,@REG_DST 186#else 187 mov.w r0,@REG_DST 1881: rts 189 mov REG_DST0,r0 190#endif 191 192 193 194 .align 2 195large: 196#ifdef BZERO 197 mov #0,REG_C 198#else 199 extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 200 shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 201 or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */ 202 swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */ 203 xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */ 204#endif 205 206 mov #3,REG_TMP1 207 tst REG_TMP1,REG_DST 208 mov REG_DST,REG_PTR 209 bf/s unaligned_dst 210 add REG_LEN,REG_PTR /* REG_PTR = dst + len; */ 211 tst REG_TMP1,REG_LEN 212 bf/s unaligned_len 213 214aligned: 215 /* fill 32*n bytes */ 216 mov #32,REG_TMP1 217 cmp/hi REG_LEN,REG_TMP1 218 bt 9f 219 .align 2 2201: sub REG_TMP1,REG_PTR 221 mov.l REG_C,@REG_PTR 222 sub REG_TMP1,REG_LEN 223 mov.l REG_C,@(4,REG_PTR) 224 cmp/hi REG_LEN,REG_TMP1 225 mov.l REG_C,@(8,REG_PTR) 226 mov.l REG_C,@(12,REG_PTR) 227 mov.l REG_C,@(16,REG_PTR) 228 mov.l REG_C,@(20,REG_PTR) 229 mov.l REG_C,@(24,REG_PTR) 230 bf/s 1b 231 mov.l REG_C,@(28,REG_PTR) 2329: 233 234 /* fill left 4*n bytes */ 235 cmp/eq REG_DST,REG_PTR 236 bt 9f 237 add #4,REG_DST 238 cmp/eq REG_DST,REG_PTR 2391: mov.l REG_C,@-REG_PTR 240 bt/s 9f 241 cmp/eq REG_DST,REG_PTR 242 mov.l REG_C,@-REG_PTR 243 bt/s 9f 244 cmp/eq REG_DST,REG_PTR 245 mov.l REG_C,@-REG_PTR 246 bt/s 9f 247 cmp/eq REG_DST,REG_PTR 248 mov.l REG_C,@-REG_PTR 249 bf/s 1b 250 cmp/eq REG_DST,REG_PTR 2519: 252#ifdef BZERO 253 rts 254 nop 255#else 256 rts 257 mov REG_DST0,r0 258#endif 259 260 261unaligned_dst: 262 mov #1,REG_TMP1 263 tst REG_TMP1,REG_DST /* if (dst & 1) { */ 264 add #1,REG_TMP1 265 bt/s 2f 266 tst REG_TMP1,REG_DST 267 mov.b REG_C,@REG_DST /* *dst++ = c; */ 268 add #1,REG_DST 269 tst REG_TMP1,REG_DST 2702: /* } */ 271 /* if (dst & 2) { */ 272 bt 4f 273 mov.w REG_C,@REG_DST /* *(u_int16_t*)dst++ = c; */ 274 add #2,REG_DST 2754: /* } */ 276 277 278 tst #3,REG_PTR /* if (ptr & 3) { */ 279 bt/s 4f /* */ 280unaligned_len: 281 tst #1,REG_PTR /* if (ptr & 1) { */ 282 bt/s 2f 283 tst #2,REG_PTR 284 mov.b REG_C,@-REG_PTR /* --ptr = c; */ 2852: /* } */ 286 /* if (ptr & 2) { */ 287 bt 4f 288 mov.w REG_C,@-REG_PTR /* *--(u_int16_t*)ptr = c; */ 2894: /* } */ 290 /* } */ 291 292 mov REG_PTR,REG_LEN 293 bra aligned 294 sub REG_DST,REG_LEN 295 296