1; SPDX-License-Identifier: GPL-2.0-only 2; 3; linux/arch/c6x/lib/csum_64plus.s 4; 5; Port on Texas Instruments TMS320C6x architecture 6; 7; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated 8; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) 9; 10#include <linux/linkage.h> 11 12; 13;unsigned int csum_partial_copy(const char *src, char * dst, 14; int len, int sum) 15; 16; A4: src 17; B4: dst 18; A6: len 19; B6: sum 20; return csum in A4 21; 22 23 .text 24ENTRY(csum_partial_copy) 25 MVC .S2 ILC,B30 26 27 MV .D1X B6,A31 ; given csum 28 ZERO .D1 A9 ; csum (a side) 29|| ZERO .D2 B9 ; csum (b side) 30|| SHRU .S2X A6,2,B5 ; len / 4 31 32 ;; Check alignment and size 33 AND .S1 3,A4,A1 34|| AND .S2 3,B4,B0 35 OR .L2X B0,A1,B0 ; non aligned condition 36|| MVC .S2 B5,ILC 37|| MVK .D2 1,B2 38|| MV .D1X B5,A1 ; words condition 39 [!A1] B .S1 L8 40 [B0] BNOP .S1 L6,5 41 42 SPLOOP 1 43 44 ;; Main loop for aligned words 45 LDW .D1T1 *A4++,A7 46 NOP 4 47 MV .S2X A7,B7 48|| EXTU .S1 A7,0,16,A16 49 STW .D2T2 B7,*B4++ 50|| MPYU .M2 B7,B2,B8 51|| ADD .L1 A16,A9,A9 52 NOP 53 SPKERNEL 8,0 54|| ADD .L2 B8,B9,B9 55 56 ZERO .D1 A1 57|| ADD .L1X A9,B9,A9 ; add csum from a and b sides 58 59L6: 60 [!A1] BNOP .S1 L8,5 61 62 ;; Main loop for non-aligned words 63 SPLOOP 2 64 || MVK .L1 1,A2 65 66 LDNW .D1T1 *A4++,A7 67 NOP 3 68 69 NOP 70 MV .S2X A7,B7 71 || EXTU .S1 A7,0,16,A16 72 || MPYU .M1 A7,A2,A8 73 74 ADD .L1 A16,A9,A9 75 SPKERNEL 6,0 76 || STNW .D2T2 B7,*B4++ 77 || ADD .L1 A8,A9,A9 78 79L8: AND .S2X 2,A6,B5 80 CMPGT .L2 B5,0,B0 81 [!B0] BNOP .S1 L82,4 82 83 ;; Manage half-word 84 ZERO .L1 A7 85|| ZERO .D1 A8 86 87#ifdef CONFIG_CPU_BIG_ENDIAN 88 89 LDBU .D1T1 *A4++,A7 90 LDBU .D1T1 *A4++,A8 91 NOP 3 92 SHL .S1 A7,8,A0 93 ADD .S1 A8,A9,A9 94 STB .D2T1 A7,*B4++ 95|| ADD .S1 A0,A9,A9 96 STB .D2T1 A8,*B4++ 97 98#else 99 100 LDBU .D1T1 *A4++,A7 101 LDBU .D1T1 *A4++,A8 102 NOP 3 103 ADD .S1 A7,A9,A9 104 SHL .S1 A8,8,A0 105 106 STB .D2T1 A7,*B4++ 107|| ADD .S1 A0,A9,A9 108 STB .D2T1 A8,*B4++ 109 110#endif 111 112 ;; Manage eventually the last byte 113L82: AND .S2X 1,A6,B0 114 [!B0] BNOP .S1 L9,5 115 116|| ZERO .L1 A7 117 118L83: LDBU .D1T1 *A4++,A7 119 NOP 4 120 121 MV .L2X A7,B7 122 123#ifdef CONFIG_CPU_BIG_ENDIAN 124 125 STB .D2T2 B7,*B4++ 126|| SHL .S1 A7,8,A7 127 ADD .S1 A7,A9,A9 128 129#else 130 131 STB .D2T2 B7,*B4++ 132|| ADD .S1 A7,A9,A9 133 134#endif 135 136 ;; Fold the csum 137L9: SHRU .S2X A9,16,B0 138 [!B0] BNOP .S1 L10,5 139 140L91: SHRU .S2X A9,16,B4 141|| EXTU .S1 A9,16,16,A3 142 ADD .D1X A3,B4,A9 143 144 SHRU .S1 A9,16,A0 145 [A0] BNOP .S1 L91,5 146 147L10: ADD .D1 A31,A9,A9 148 MV .D1 A9,A4 149 150 BNOP .S2 B3,4 151 MVC .S2 B30,ILC 152ENDPROC(csum_partial_copy) 153 154; 155;unsigned short 156;ip_fast_csum(unsigned char *iph, unsigned int ihl) 157;{ 158; unsigned int checksum = 0; 159; unsigned short *tosum = (unsigned short *) iph; 160; int len; 161; 162; len = ihl*4; 163; 164; if (len <= 0) 165; return 0; 166; 167; while(len) { 168; len -= 2; 169; checksum += *tosum++; 170; } 171; if (len & 1) 172; checksum += *(unsigned char*) tosum; 173; 174; while(checksum >> 16) 175; checksum = (checksum & 0xffff) + (checksum >> 16); 176; 177; return ~checksum; 178;} 179; 180; A4: iph 181; B4: ihl 182; return checksum in A4 183; 184 .text 185 186ENTRY(ip_fast_csum) 187 ZERO .D1 A5 188 || MVC .S2 ILC,B30 189 SHL .S2 B4,2,B0 190 CMPGT .L2 B0,0,B1 191 [!B1] BNOP .S1 L15,4 192 [!B1] ZERO .D1 A3 193 194 [!B0] B .S1 L12 195 SHRU .S2 B0,1,B0 196 MVC .S2 B0,ILC 197 NOP 3 198 199 SPLOOP 1 200 LDHU .D1T1 *A4++,A3 201 NOP 3 202 NOP 203 SPKERNEL 5,0 204 || ADD .L1 A3,A5,A5 205 206L12: SHRU .S1 A5,16,A0 207 [!A0] BNOP .S1 L14,5 208 209L13: SHRU .S2X A5,16,B4 210 EXTU .S1 A5,16,16,A3 211 ADD .D1X A3,B4,A5 212 SHRU .S1 A5,16,A0 213 [A0] BNOP .S1 L13,5 214 215L14: NOT .D1 A5,A3 216 EXTU .S1 A3,16,16,A3 217 218L15: BNOP .S2 B3,3 219 MVC .S2 B30,ILC 220 MV .D1 A3,A4 221ENDPROC(ip_fast_csum) 222 223; 224;unsigned short 225;do_csum(unsigned char *buff, unsigned int len) 226;{ 227; int odd, count; 228; unsigned int result = 0; 229; 230; if (len <= 0) 231; goto out; 232; odd = 1 & (unsigned long) buff; 233; if (odd) { 234;#ifdef __LITTLE_ENDIAN 235; result += (*buff << 8); 236;#else 237; result = *buff; 238;#endif 239; len--; 240; buff++; 241; } 242; count = len >> 1; /* nr of 16-bit words.. */ 243; if (count) { 244; if (2 & (unsigned long) buff) { 245; result += *(unsigned short *) buff; 246; count--; 247; len -= 2; 248; buff += 2; 249; } 250; count >>= 1; /* nr of 32-bit words.. */ 251; if (count) { 252; unsigned int carry = 0; 253; do { 254; unsigned int w = *(unsigned int *) buff; 255; count--; 256; buff += 4; 257; result += carry; 258; result += w; 259; carry = (w > result); 260; } while (count); 261; result += carry; 262; result = (result & 0xffff) + (result >> 16); 263; } 264; if (len & 2) { 265; result += *(unsigned short *) buff; 266; buff += 2; 267; } 268; } 269; if (len & 1) 270;#ifdef __LITTLE_ENDIAN 271; result += *buff; 272;#else 273; result += (*buff << 8); 274;#endif 275; result = (result & 0xffff) + (result >> 16); 276; /* add up carry.. */ 277; result = (result & 0xffff) + (result >> 16); 278; if (odd) 279; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); 280;out: 281; return result; 282;} 283; 284; A4: buff 285; B4: len 286; return checksum in A4 287; 288 289ENTRY(do_csum) 290 CMPGT .L2 B4,0,B0 291 [!B0] BNOP .S1 L26,3 292 EXTU .S1 A4,31,31,A0 293 294 MV .L1 A0,A3 295|| MV .S1X B3,A5 296|| MV .L2 B4,B3 297|| ZERO .D1 A1 298 299#ifdef CONFIG_CPU_BIG_ENDIAN 300 [A0] SUB .L2 B3,1,B3 301|| [A0] LDBU .D1T1 *A4++,A1 302#else 303 [!A0] BNOP .S1 L21,5 304|| [A0] LDBU .D1T1 *A4++,A0 305 SUB .L2 B3,1,B3 306|| SHL .S1 A0,8,A1 307L21: 308#endif 309 SHR .S2 B3,1,B0 310 [!B0] BNOP .S1 L24,3 311 MVK .L1 2,A0 312 AND .L1 A4,A0,A0 313 314 [!A0] BNOP .S1 L22,5 315|| [A0] LDHU .D1T1 *A4++,A0 316 SUB .L2 B0,1,B0 317|| SUB .S2 B3,2,B3 318|| ADD .L1 A0,A1,A1 319L22: 320 SHR .S2 B0,1,B0 321|| ZERO .L1 A0 322 323 [!B0] BNOP .S1 L23,5 324|| [B0] MVC .S2 B0,ILC 325 326 SPLOOP 3 327 SPMASK L1 328|| MV .L1 A1,A2 329|| LDW .D1T1 *A4++,A1 330 331 NOP 4 332 ADD .L1 A0,A1,A0 333 ADD .L1 A2,A0,A2 334 335 SPKERNEL 1,2 336|| CMPGTU .L1 A1,A2,A0 337 338 ADD .L1 A0,A2,A6 339 EXTU .S1 A6,16,16,A7 340 SHRU .S2X A6,16,B0 341 NOP 1 342 ADD .L1X A7,B0,A1 343L23: 344 MVK .L2 2,B0 345 AND .L2 B3,B0,B0 346 [B0] LDHU .D1T1 *A4++,A0 347 NOP 4 348 [B0] ADD .L1 A0,A1,A1 349L24: 350 EXTU .S2 B3,31,31,B0 351#ifdef CONFIG_CPU_BIG_ENDIAN 352 [!B0] BNOP .S1 L25,4 353|| [B0] LDBU .D1T1 *A4,A0 354 SHL .S1 A0,8,A0 355 ADD .L1 A0,A1,A1 356L25: 357#else 358 [B0] LDBU .D1T1 *A4,A0 359 NOP 4 360 [B0] ADD .L1 A0,A1,A1 361#endif 362 EXTU .S1 A1,16,16,A0 363 SHRU .S2X A1,16,B0 364 NOP 1 365 ADD .L1X A0,B0,A0 366 SHRU .S1 A0,16,A1 367 ADD .L1 A0,A1,A0 368 EXTU .S1 A0,16,16,A1 369 EXTU .S1 A1,16,24,A2 370 371 EXTU .S1 A1,24,16,A0 372|| MV .L2X A3,B0 373 374 [B0] OR .L1 A0,A2,A1 375L26: 376 NOP 1 377 BNOP .S2X A5,4 378 MV .L1 A1,A4 379ENDPROC(do_csum) 380 381;__wsum csum_partial(const void *buff, int len, __wsum wsum) 382;{ 383; unsigned int sum = (__force unsigned int)wsum; 384; unsigned int result = do_csum(buff, len); 385; 386; /* add in old sum, and carry.. */ 387; result += sum; 388; if (sum > result) 389; result += 1; 390; return (__force __wsum)result; 391;} 392; 393ENTRY(csum_partial) 394 MV .L1X B3,A9 395|| CALLP .S2 do_csum,B3 396|| MV .S1 A6,A8 397 BNOP .S2X A9,2 398 ADD .L1 A8,A4,A1 399 CMPGTU .L1 A8,A1,A0 400 ADD .L1 A1,A0,A4 401ENDPROC(csum_partial) 402 403;unsigned short 404;ip_compute_csum(unsigned char *buff, unsigned int len) 405; 406; A4: buff 407; B4: len 408; return checksum in A4 409 410ENTRY(ip_compute_csum) 411 MV .L1X B3,A9 412|| CALLP .S2 do_csum,B3 413 BNOP .S2X A9,3 414 NOT .S1 A4,A4 415 CLR .S1 A4,16,31,A4 416ENDPROC(ip_compute_csum) 417