1/* 2 * linux/arch/arm/lib/csumpartialcopygeneric.S 3 * 4 * Copyright (C) 1995-2001 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11/* 12 * unsigned int 13 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) 14 * r0 = src, r1 = dst, r2 = len, r3 = sum 15 * Returns : r0 = checksum 16 * 17 * Note that 'tst' and 'teq' preserve the carry flag. 18 */ 19 20src .req r0 21dst .req r1 22len .req r2 23sum .req r3 24 25.Lzero: mov r0, sum 26 load_regs 27 28 /* 29 * Align an unaligned destination pointer. We know that 30 * we have >= 8 bytes here, so we don't need to check 31 * the length. Note that the source pointer hasn't been 32 * aligned yet. 33 */ 34.Ldst_unaligned: 35 tst dst, #1 36 beq .Ldst_16bit 37 38 load1b ip 39 sub len, len, #1 40 adcs sum, sum, ip, put_byte_1 @ update checksum 41 strb ip, [dst], #1 42 tst dst, #2 43 moveq pc, lr @ dst is now 32bit aligned 44 45.Ldst_16bit: load2b r8, ip 46 sub len, len, #2 47 adcs sum, sum, r8, put_byte_0 48 strb r8, [dst], #1 49 adcs sum, sum, ip, put_byte_1 50 strb ip, [dst], #1 51 mov pc, lr @ dst is now 32bit aligned 52 53 /* 54 * Handle 0 to 7 bytes, with any alignment of source and 55 * destination pointers. Note that when we get here, C = 0 56 */ 57.Lless8: teq len, #0 @ check for zero count 58 beq .Lzero 59 60 /* we must have at least one byte. */ 61 tst dst, #1 @ dst 16-bit aligned 62 beq .Lless8_aligned 63 64 /* Align dst */ 65 load1b ip 66 sub len, len, #1 67 adcs sum, sum, ip, put_byte_1 @ update checksum 68 strb ip, [dst], #1 69 tst len, #6 70 beq .Lless8_byteonly 71 721: load2b r8, ip 73 sub len, len, #2 74 adcs sum, sum, r8, put_byte_0 75 strb r8, [dst], #1 76 adcs sum, sum, ip, put_byte_1 77 strb ip, [dst], #1 78.Lless8_aligned: 79 tst len, #6 80 bne 1b 81.Lless8_byteonly: 82 tst len, #1 83 beq .Ldone 84 load1b r8 85 adcs sum, sum, r8, put_byte_0 @ update checksum 86 strb r8, [dst], #1 87 b .Ldone 88 89FN_ENTRY 90 save_regs 91 92 cmp len, #8 @ Ensure that we have at least 93 blo .Lless8 @ 8 bytes to copy. 94 95 adds sum, sum, #0 @ C = 0 96 tst dst, #3 @ Test destination alignment 97 blne .Ldst_unaligned @ align destination, return here 98 99 /* 100 * Ok, the dst pointer is now 32bit aligned, and we know 101 * that we must have more than 4 bytes to copy. Note 102 * that C contains the carry from the dst alignment above. 103 */ 104 105 tst src, #3 @ Test source alignment 106 bne .Lsrc_not_aligned 107 108 /* Routine for src & dst aligned */ 109 110 bics ip, len, #15 111 beq 2f 112 1131: load4l r4, r5, r6, r7 114 stmia dst!, {r4, r5, r6, r7} 115 adcs sum, sum, r4 116 adcs sum, sum, r5 117 adcs sum, sum, r6 118 adcs sum, sum, r7 119 sub ip, ip, #16 120 teq ip, #0 121 bne 1b 122 1232: ands ip, len, #12 124 beq 4f 125 tst ip, #8 126 beq 3f 127 load2l r4, r5 128 stmia dst!, {r4, r5} 129 adcs sum, sum, r4 130 adcs sum, sum, r5 131 tst ip, #4 132 beq 4f 133 1343: load1l r4 135 str r4, [dst], #4 136 adcs sum, sum, r4 137 1384: ands len, len, #3 139 beq .Ldone 140 load1l r4 141 tst len, #2 142 mov r5, r4, get_byte_0 143 beq .Lexit 144 adcs sum, sum, r4, push #16 145 strb r5, [dst], #1 146 mov r5, r4, get_byte_1 147 strb r5, [dst], #1 148 mov r5, r4, get_byte_2 149.Lexit: tst len, #1 150 strneb r5, [dst], #1 151 andne r5, r5, #255 152 adcnes sum, sum, r5, put_byte_0 153 154 /* 155 * If the dst pointer was not 16-bit aligned, we 156 * need to rotate the checksum here to get around 157 * the inefficient byte manipulations in the 158 * architecture independent code. 159 */ 160.Ldone: adc r0, sum, #0 161 ldr sum, [sp, #0] @ dst 162 tst sum, #1 163 movne r0, r0, ror #8 164 load_regs 165 166.Lsrc_not_aligned: 167 adc sum, sum, #0 @ include C from dst alignment 168 and ip, src, #3 169 bic src, src, #3 170 load1l r5 171 cmp ip, #2 172 beq .Lsrc2_aligned 173 bhi .Lsrc3_aligned 174 mov r4, r5, pull #8 @ C = 0 175 bics ip, len, #15 176 beq 2f 1771: load4l r5, r6, r7, r8 178 orr r4, r4, r5, push #24 179 mov r5, r5, pull #8 180 orr r5, r5, r6, push #24 181 mov r6, r6, pull #8 182 orr r6, r6, r7, push #24 183 mov r7, r7, pull #8 184 orr r7, r7, r8, push #24 185 stmia dst!, {r4, r5, r6, r7} 186 adcs sum, sum, r4 187 adcs sum, sum, r5 188 adcs sum, sum, r6 189 adcs sum, sum, r7 190 mov r4, r8, pull #8 191 sub ip, ip, #16 192 teq ip, #0 193 bne 1b 1942: ands ip, len, #12 195 beq 4f 196 tst ip, #8 197 beq 3f 198 load2l r5, r6 199 orr r4, r4, r5, push #24 200 mov r5, r5, pull #8 201 orr r5, r5, r6, push #24 202 stmia dst!, {r4, r5} 203 adcs sum, sum, r4 204 adcs sum, sum, r5 205 mov r4, r6, pull #8 206 tst ip, #4 207 beq 4f 2083: load1l r5 209 orr r4, r4, r5, push #24 210 str r4, [dst], #4 211 adcs sum, sum, r4 212 mov r4, r5, pull #8 2134: ands len, len, #3 214 beq .Ldone 215 mov r5, r4, get_byte_0 216 tst len, #2 217 beq .Lexit 218 adcs sum, sum, r4, push #16 219 strb r5, [dst], #1 220 mov r5, r4, get_byte_1 221 strb r5, [dst], #1 222 mov r5, r4, get_byte_2 223 b .Lexit 224 225.Lsrc2_aligned: mov r4, r5, pull #16 226 adds sum, sum, #0 227 bics ip, len, #15 228 beq 2f 2291: load4l r5, r6, r7, r8 230 orr r4, r4, r5, push #16 231 mov r5, r5, pull #16 232 orr r5, r5, r6, push #16 233 mov r6, r6, pull #16 234 orr r6, r6, r7, push #16 235 mov r7, r7, pull #16 236 orr r7, r7, r8, push #16 237 stmia dst!, {r4, r5, r6, r7} 238 adcs sum, sum, r4 239 adcs sum, sum, r5 240 adcs sum, sum, r6 241 adcs sum, sum, r7 242 mov r4, r8, pull #16 243 sub ip, ip, #16 244 teq ip, #0 245 bne 1b 2462: ands ip, len, #12 247 beq 4f 248 tst ip, #8 249 beq 3f 250 load2l r5, r6 251 orr r4, r4, r5, push #16 252 mov r5, r5, pull #16 253 orr r5, r5, r6, push #16 254 stmia dst!, {r4, r5} 255 adcs sum, sum, r4 256 adcs sum, sum, r5 257 mov r4, r6, pull #16 258 tst ip, #4 259 beq 4f 2603: load1l r5 261 orr r4, r4, r5, push #16 262 str r4, [dst], #4 263 adcs sum, sum, r4 264 mov r4, r5, pull #16 2654: ands len, len, #3 266 beq .Ldone 267 mov r5, r4, get_byte_0 268 tst len, #2 269 beq .Lexit 270 adcs sum, sum, r4 271 strb r5, [dst], #1 272 mov r5, r4, get_byte_1 273 strb r5, [dst], #1 274 tst len, #1 275 beq .Ldone 276 load1b r5 277 b .Lexit 278 279.Lsrc3_aligned: mov r4, r5, pull #24 280 adds sum, sum, #0 281 bics ip, len, #15 282 beq 2f 2831: load4l r5, r6, r7, r8 284 orr r4, r4, r5, push #8 285 mov r5, r5, pull #24 286 orr r5, r5, r6, push #8 287 mov r6, r6, pull #24 288 orr r6, r6, r7, push #8 289 mov r7, r7, pull #24 290 orr r7, r7, r8, push #8 291 stmia dst!, {r4, r5, r6, r7} 292 adcs sum, sum, r4 293 adcs sum, sum, r5 294 adcs sum, sum, r6 295 adcs sum, sum, r7 296 mov r4, r8, pull #24 297 sub ip, ip, #16 298 teq ip, #0 299 bne 1b 3002: ands ip, len, #12 301 beq 4f 302 tst ip, #8 303 beq 3f 304 load2l r5, r6 305 orr r4, r4, r5, push #8 306 mov r5, r5, pull #24 307 orr r5, r5, r6, push #8 308 stmia dst!, {r4, r5} 309 adcs sum, sum, r4 310 adcs sum, sum, r5 311 mov r4, r6, pull #24 312 tst ip, #4 313 beq 4f 3143: load1l r5 315 orr r4, r4, r5, push #8 316 str r4, [dst], #4 317 adcs sum, sum, r4 318 mov r4, r5, pull #24 3194: ands len, len, #3 320 beq .Ldone 321 mov r5, r4, get_byte_0 322 tst len, #2 323 beq .Lexit 324 strb r5, [dst], #1 325 adcs sum, sum, r4 326 load1l r4 327 mov r5, r4, get_byte_0 328 strb r5, [dst], #1 329 adcs sum, sum, r4, push #24 330 mov r5, r4, get_byte_1 331 b .Lexit 332FN_EXIT 333