1/* 2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs. 3 * 4 * This file is subject to the terms and conditions of the GNU General Public 5 * License. See the file COPYING in the main directory of this archive 6 * for more details. No warranty for anything given at all. 7 */ 8#include <linux/linkage.h> 9#include <asm/errno.h> 10#include <asm/asm.h> 11 12/* 13 * Checksum copy with exception handling. 14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the 15 * destination is zeroed. 16 * 17 * Input 18 * rdi source 19 * rsi destination 20 * edx len (32bit) 21 * ecx sum (32bit) 22 * r8 src_err_ptr (int) 23 * r9 dst_err_ptr (int) 24 * 25 * Output 26 * eax 64bit sum. undefined in case of exception. 27 * 28 * Wrappers need to take care of valid exception sum and zeroing. 29 * They also should align source or destination to 8 bytes. 30 */ 31 32 .macro source 3310: 34 _ASM_EXTABLE(10b, .Lbad_source) 35 .endm 36 37 .macro dest 3820: 39 _ASM_EXTABLE(20b, .Lbad_dest) 40 .endm 41 42 .macro ignore L=.Lignore 4330: 44 _ASM_EXTABLE(30b, \L) 45 .endm 46 47 48ENTRY(csum_partial_copy_generic) 49 cmpl $3*64, %edx 50 jle .Lignore 51 52.Lignore: 53 subq $7*8, %rsp 54 movq %rbx, 2*8(%rsp) 55 movq %r12, 3*8(%rsp) 56 movq %r14, 4*8(%rsp) 57 movq %r13, 5*8(%rsp) 58 movq %r15, 6*8(%rsp) 59 60 movq %r8, (%rsp) 61 movq %r9, 1*8(%rsp) 62 63 movl %ecx, %eax 64 movl %edx, %ecx 65 66 xorl %r9d, %r9d 67 movq %rcx, %r12 68 69 shrq $6, %r12 70 jz .Lhandle_tail /* < 64 */ 71 72 clc 73 74 /* main loop. clear in 64 byte blocks */ 75 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ 76 /* r11: temp3, rdx: temp4, r12 loopcnt */ 77 /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */ 78 .p2align 4 79.Lloop: 80 source 81 movq (%rdi), %rbx 82 source 83 movq 8(%rdi), %r8 84 source 85 movq 16(%rdi), %r11 86 source 87 movq 24(%rdi), %rdx 88 89 source 90 movq 32(%rdi), %r10 91 source 92 movq 40(%rdi), %r15 93 source 94 movq 48(%rdi), %r14 95 source 96 movq 56(%rdi), %r13 97 98 ignore 2f 99 prefetcht0 5*64(%rdi) 1002: 101 adcq %rbx, %rax 102 adcq %r8, %rax 103 adcq %r11, %rax 104 adcq %rdx, %rax 105 adcq %r10, %rax 106 adcq %r15, %rax 107 adcq %r14, %rax 108 adcq %r13, %rax 109 110 decl %r12d 111 112 dest 113 movq %rbx, (%rsi) 114 dest 115 movq %r8, 8(%rsi) 116 dest 117 movq %r11, 16(%rsi) 118 dest 119 movq %rdx, 24(%rsi) 120 121 dest 122 movq %r10, 32(%rsi) 123 dest 124 movq %r15, 40(%rsi) 125 dest 126 movq %r14, 48(%rsi) 127 dest 128 movq %r13, 56(%rsi) 129 1303: 131 132 leaq 64(%rdi), %rdi 133 leaq 64(%rsi), %rsi 134 135 jnz .Lloop 136 137 adcq %r9, %rax 138 139 /* do last up to 56 bytes */ 140.Lhandle_tail: 141 /* ecx: count */ 142 movl %ecx, %r10d 143 andl $63, %ecx 144 shrl $3, %ecx 145 jz .Lfold 146 clc 147 .p2align 4 148.Lloop_8: 149 source 150 movq (%rdi), %rbx 151 adcq %rbx, %rax 152 decl %ecx 153 dest 154 movq %rbx, (%rsi) 155 leaq 8(%rsi), %rsi /* preserve carry */ 156 leaq 8(%rdi), %rdi 157 jnz .Lloop_8 158 adcq %r9, %rax /* add in carry */ 159 160.Lfold: 161 /* reduce checksum to 32bits */ 162 movl %eax, %ebx 163 shrq $32, %rax 164 addl %ebx, %eax 165 adcl %r9d, %eax 166 167 /* do last up to 6 bytes */ 168.Lhandle_7: 169 movl %r10d, %ecx 170 andl $7, %ecx 171 shrl $1, %ecx 172 jz .Lhandle_1 173 movl $2, %edx 174 xorl %ebx, %ebx 175 clc 176 .p2align 4 177.Lloop_1: 178 source 179 movw (%rdi), %bx 180 adcl %ebx, %eax 181 decl %ecx 182 dest 183 movw %bx, (%rsi) 184 leaq 2(%rdi), %rdi 185 leaq 2(%rsi), %rsi 186 jnz .Lloop_1 187 adcl %r9d, %eax /* add in carry */ 188 189 /* handle last odd byte */ 190.Lhandle_1: 191 testb $1, %r10b 192 jz .Lende 193 xorl %ebx, %ebx 194 source 195 movb (%rdi), %bl 196 dest 197 movb %bl, (%rsi) 198 addl %ebx, %eax 199 adcl %r9d, %eax /* carry */ 200 201.Lende: 202 movq 2*8(%rsp), %rbx 203 movq 3*8(%rsp), %r12 204 movq 4*8(%rsp), %r14 205 movq 5*8(%rsp), %r13 206 movq 6*8(%rsp), %r15 207 addq $7*8, %rsp 208 ret 209 210 /* Exception handlers. Very simple, zeroing is done in the wrappers */ 211.Lbad_source: 212 movq (%rsp), %rax 213 testq %rax, %rax 214 jz .Lende 215 movl $-EFAULT, (%rax) 216 jmp .Lende 217 218.Lbad_dest: 219 movq 8(%rsp), %rax 220 testq %rax, %rax 221 jz .Lende 222 movl $-EFAULT, (%rax) 223 jmp .Lende 224ENDPROC(csum_partial_copy_generic) 225