1 /* 2 * Compute 16-bit sum in ones' complement arithmetic (with end-around carry). 3 * This sum is often used as a simple checksum in networking. 4 * 5 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6 * See https://llvm.org/LICENSE.txt for license information. 7 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8 */ 9 10 #include "networking.h" 11 #include "chksum_common.h" 12 13 always_inline 14 static inline uint32_t slurp_head32(const void ** pptr,uint32_t * nbytes)15slurp_head32(const void **pptr, uint32_t *nbytes) 16 { 17 uint32_t sum = 0; 18 Assert(*nbytes >= 4); 19 uint32_t off = (uintptr_t) *pptr % 4; 20 if (likely(off != 0)) 21 { 22 /* Get rid of bytes 0..off-1 */ 23 const unsigned char *ptr32 = align_ptr(*pptr, 4); 24 uint32_t mask = ~0U << (CHAR_BIT * off); 25 sum = load32(ptr32) & mask; 26 *pptr = ptr32 + 4; 27 *nbytes -= 4 - off; 28 } 29 return sum; 30 } 31 32 /* Additional loop unrolling would help when not auto-vectorizing */ 33 unsigned short __chksum(const void * ptr,unsigned int nbytes)34__chksum(const void *ptr, unsigned int nbytes) 35 { 36 bool swap = false; 37 uint64_t sum = 0; 38 39 if (nbytes > 300) 40 { 41 /* 4-byte align pointer */ 42 swap = (uintptr_t) ptr & 1; 43 sum = slurp_head32(&ptr, &nbytes); 44 } 45 /* Else benefit of aligning not worth the overhead */ 46 47 /* Sum all 16-byte chunks */ 48 const char *cptr = ptr; 49 for (uint32_t nquads = nbytes / 16; nquads != 0; nquads--) 50 { 51 uint64_t h0 = load32(cptr + 0); 52 uint64_t h1 = load32(cptr + 4); 53 uint64_t h2 = load32(cptr + 8); 54 uint64_t h3 = load32(cptr + 12); 55 sum += h0 + h1 + h2 + h3; 56 cptr += 16; 57 } 58 nbytes %= 16; 59 Assert(nbytes < 16); 60 61 /* Handle any trailing 4-byte chunks */ 62 while (nbytes >= 4) 63 { 64 sum += load32(cptr); 65 cptr += 4; 66 nbytes -= 4; 67 } 68 Assert(nbytes < 4); 69 70 if (nbytes & 2) 71 { 72 sum += load16(cptr); 73 cptr += 2; 74 } 75 76 if (nbytes & 1) 77 { 78 sum += *(uint8_t *)cptr; 79 } 80 81 return fold_and_swap(sum, swap); 82 } 83