1/* Optimised simple memory checksum 2 * 3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public Licence 8 * as published by the Free Software Foundation; either version 9 * 2 of the Licence, or (at your option) any later version. 10 */ 11#include <asm/cache.h> 12 13 .section .text 14 .balign L1_CACHE_BYTES 15 16############################################################################### 17# 18# unsigned int do_csum(const unsigned char *buff, size_t len) 19# 20############################################################################### 21 .globl do_csum 22 .type do_csum,@function 23do_csum: 24 movm [d2,d3],(sp) 25 mov d0,(12,sp) 26 mov d1,(16,sp) 27 mov d1,d2 # count 28 mov d0,a0 # buff 29 clr d1 # accumulator 30 31 cmp +0,d2 32 beq do_csum_done # return if zero-length buffer 33 34 # 4-byte align the buffer pointer 35 btst +3,a0 36 beq do_csum_now_4b_aligned 37 38 btst +1,a0 39 beq do_csum_addr_not_odd 40 movbu (a0),d0 41 inc a0 42 asl +8,d0 43 add d0,d1 44 addc +0,d1 45 add -1,d2 46do_csum_addr_not_odd: 47 48 cmp +2,d2 49 bcs do_csum_fewer_than_4 50 btst +2,a0 51 beq do_csum_now_4b_aligned 52 movhu (a0+),d0 53 add d0,d1 54 addc +0,d1 55 add -2,d2 56 cmp +4,d2 57 bcs do_csum_fewer_than_4 58 59do_csum_now_4b_aligned: 60 # we want to checksum as much as we can in chunks of 32 bytes 61 cmp +31,d2 62 bls do_csum_remainder # 4-byte aligned remainder 63 64 add -32,d2 65 mov +32,d3 66 67do_csum_loop: 68 mov (a0+),d0 69 add d0,d1 70 mov (a0+),e0 71 addc e0,d1 72 mov (a0+),e1 73 addc e1,d1 74 mov (a0+),e3 75 addc e3,d1 76 mov (a0+),d0 77 addc d0,d1 78 mov (a0+),e0 79 addc e0,d1 80 mov (a0+),e1 81 addc e1,d1 82 mov (a0+),e3 83 addc e3,d1 84 addc +0,d1 85 86 sub d3,d2 87 bcc do_csum_loop 88 89 add d3,d2 90 beq do_csum_done 91 92do_csum_remainder: 93 # cut 16-31 bytes down to 0-15 94 cmp +16,d2 95 bcs do_csum_fewer_than_16 96 mov (a0+),d0 97 add d0,d1 98 mov (a0+),e0 99 addc e0,d1 100 mov (a0+),e1 101 addc e1,d1 102 mov (a0+),e3 103 addc e3,d1 104 addc +0,d1 105 add -16,d2 106 beq do_csum_done 107 108do_csum_fewer_than_16: 109 # copy the remaining whole words 110 cmp +4,d2 111 bcs do_csum_fewer_than_4 112 cmp +8,d2 113 bcs do_csum_one_word 114 cmp +12,d2 115 bcs do_csum_two_words 116 mov (a0+),d0 117 add d0,d1 118 addc +0,d1 119do_csum_two_words: 120 mov (a0+),d0 121 add d0,d1 122 addc +0,d1 123do_csum_one_word: 124 mov (a0+),d0 125 add d0,d1 126 addc +0,d1 127 128do_csum_fewer_than_4: 129 and +3,d2 130 beq do_csum_done 131 xor_cmp d0,d0,+2,d2 132 bcs do_csum_fewer_than_2 133 movhu (a0+),d0 134do_csum_fewer_than_2: 135 and +1,d2 136 beq do_csum_add_last_bit 137 movbu (a0),d3 138 add d3,d0 139do_csum_add_last_bit: 140 add d0,d1 141 addc +0,d1 142 143do_csum_done: 144 # compress the checksum down to 16 bits 145 mov +0xffff0000,d2 146 and d1,d2 147 asl +16,d1 148 add d2,d1,d0 149 addc +0xffff,d0 150 lsr +16,d0 151 152 # flip the halves of the word result if the buffer was oddly aligned 153 mov (12,sp),d1 154 and +1,d1 155 beq do_csum_not_oddly_aligned 156 swaph d0,d0 # exchange bits 15:8 with 7:0 157 158do_csum_not_oddly_aligned: 159 ret [d2,d3],8 160 161do_csum_end: 162 .size do_csum, do_csum_end-do_csum 163