1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IP/TCP/UDP checksumming routines 8 * 9 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 11 * Tom May, <ftom@netcom.com> 12 * Pentium Pro/II routines: 13 * Alexander Kjeldaas <astor@guardian.no> 14 * Finn Arne Gangstad <finnag@guardian.no> 15 * Lots of code moved from tcp.c and ip.c; see those files 16 * for more names. 17 * 18 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 19 * handling. 20 * Andi Kleen, add zeroing on error 21 * converted to pure assembler 22 */ 23 24#include <asm/errno.h> 25#include <asm/asm.h> 26#include <asm/export.h> 27 28/* 29 * computes a partial checksum, e.g. for TCP/UDP fragments 30 */ 31 32/* 33unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 34 */ 35 36.text 37.align 4 38.globl csum_partial 39 40#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 41 42 /* 43 * Experiments with Ethernet and SLIP connections show that buff 44 * is aligned on either a 2-byte or 4-byte boundary. We get at 45 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 46 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 47 * alignment for the unrolled loop. 48 */ 49csum_partial: 50 pushl %esi 51 pushl %ebx 52 movl 20(%esp),%eax # Function arg: unsigned int sum 53 movl 16(%esp),%ecx # Function arg: int len 54 movl 12(%esp),%esi # Function arg: unsigned char *buff 55 testl $2, %esi # Check alignment. 56 jz 2f # Jump if alignment is ok. 57 subl $2, %ecx # Alignment uses up two bytes. 58 jae 1f # Jump if we had at least two bytes. 59 addl $2, %ecx # ecx was < 2. Deal with it. 60 jmp 4f 611: movw (%esi), %bx 62 addl $2, %esi 63 addw %bx, %ax 64 adcl $0, %eax 652: 66 movl %ecx, %edx 67 shrl $5, %ecx 68 jz 2f 69 testl %esi, %esi 701: movl (%esi), %ebx 71 adcl %ebx, %eax 72 movl 4(%esi), %ebx 73 adcl %ebx, %eax 74 movl 8(%esi), %ebx 75 adcl %ebx, %eax 76 movl 12(%esi), %ebx 77 adcl %ebx, %eax 78 movl 16(%esi), %ebx 79 adcl %ebx, %eax 80 movl 20(%esi), %ebx 81 adcl %ebx, %eax 82 movl 24(%esi), %ebx 83 adcl %ebx, %eax 84 movl 28(%esi), %ebx 85 adcl %ebx, %eax 86 lea 32(%esi), %esi 87 dec %ecx 88 jne 1b 89 adcl $0, %eax 902: movl %edx, %ecx 91 andl $0x1c, %edx 92 je 4f 93 shrl $2, %edx # This clears CF 943: adcl (%esi), %eax 95 lea 4(%esi), %esi 96 dec %edx 97 jne 3b 98 adcl $0, %eax 994: andl $3, %ecx 100 jz 7f 101 cmpl $2, %ecx 102 jb 5f 103 movw (%esi),%cx 104 leal 2(%esi),%esi 105 je 6f 106 shll $16,%ecx 1075: movb (%esi),%cl 1086: addl %ecx,%eax 109 adcl $0, %eax 1107: 111 popl %ebx 112 popl %esi 113 RET 114 115#else 116 117/* Version for PentiumII/PPro */ 118 119csum_partial: 120 pushl %esi 121 pushl %ebx 122 movl 20(%esp),%eax # Function arg: unsigned int sum 123 movl 16(%esp),%ecx # Function arg: int len 124 movl 12(%esp),%esi # Function arg: const unsigned char *buf 125 126 testl $2, %esi 127 jnz 30f 12810: 129 movl %ecx, %edx 130 movl %ecx, %ebx 131 andl $0x7c, %ebx 132 shrl $7, %ecx 133 addl %ebx,%esi 134 shrl $2, %ebx 135 negl %ebx 136 lea 45f(%ebx,%ebx,2), %ebx 137 testl %esi, %esi 138 jmp *%ebx 139 140 # Handle 2-byte-aligned regions 14120: addw (%esi), %ax 142 lea 2(%esi), %esi 143 adcl $0, %eax 144 jmp 10b 145 14630: subl $2, %ecx 147 ja 20b 148 je 32f 149 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 150 addl %ebx, %eax 151 adcl $0, %eax 152 jmp 80f 15332: 154 addw (%esi), %ax # csumming 2 bytes, 2-aligned 155 adcl $0, %eax 156 jmp 80f 157 15840: 159 addl -128(%esi), %eax 160 adcl -124(%esi), %eax 161 adcl -120(%esi), %eax 162 adcl -116(%esi), %eax 163 adcl -112(%esi), %eax 164 adcl -108(%esi), %eax 165 adcl -104(%esi), %eax 166 adcl -100(%esi), %eax 167 adcl -96(%esi), %eax 168 adcl -92(%esi), %eax 169 adcl -88(%esi), %eax 170 adcl -84(%esi), %eax 171 adcl -80(%esi), %eax 172 adcl -76(%esi), %eax 173 adcl -72(%esi), %eax 174 adcl -68(%esi), %eax 175 adcl -64(%esi), %eax 176 adcl -60(%esi), %eax 177 adcl -56(%esi), %eax 178 adcl -52(%esi), %eax 179 adcl -48(%esi), %eax 180 adcl -44(%esi), %eax 181 adcl -40(%esi), %eax 182 adcl -36(%esi), %eax 183 adcl -32(%esi), %eax 184 adcl -28(%esi), %eax 185 adcl -24(%esi), %eax 186 adcl -20(%esi), %eax 187 adcl -16(%esi), %eax 188 adcl -12(%esi), %eax 189 adcl -8(%esi), %eax 190 adcl -4(%esi), %eax 19145: 192 lea 128(%esi), %esi 193 adcl $0, %eax 194 dec %ecx 195 jge 40b 196 movl %edx, %ecx 19750: andl $3, %ecx 198 jz 80f 199 200 # Handle the last 1-3 bytes without jumping 201 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 202 movl $0xffffff,%ebx # by the shll and shrl instructions 203 shll $3,%ecx 204 shrl %cl,%ebx 205 andl -128(%esi),%ebx # esi is 4-aligned so should be ok 206 addl %ebx,%eax 207 adcl $0,%eax 20880: 209 popl %ebx 210 popl %esi 211 RET 212 213#endif 214 EXPORT_SYMBOL(csum_partial) 215