• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/* Optimised simple memory checksum
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11#include <asm/cache.h>
12
13        .section .text
14        .balign	L1_CACHE_BYTES
15
16###############################################################################
17#
18# unsigned int do_csum(const unsigned char *buff, size_t len)
19#
20###############################################################################
21	.globl	do_csum
22        .type	do_csum,@function
23do_csum:
24	movm	[d2,d3],(sp)
25	mov	d0,(12,sp)
26	mov	d1,(16,sp)
27	mov	d1,d2				# count
28	mov	d0,a0				# buff
29	clr	d1				# accumulator
30
31	cmp	+0,d2
32	beq	do_csum_done			# return if zero-length buffer
33
34	# 4-byte align the buffer pointer
35	btst	+3,a0
36	beq	do_csum_now_4b_aligned
37
38	btst	+1,a0
39	beq	do_csum_addr_not_odd
40	movbu	(a0),d0
41	inc	a0
42	asl	+8,d0
43	add	d0,d1
44	addc	+0,d1
45	add	-1,d2
46do_csum_addr_not_odd:
47
48	cmp	+2,d2
49	bcs	do_csum_fewer_than_4
50	btst	+2,a0
51	beq	do_csum_now_4b_aligned
52	movhu	(a0+),d0
53	add	d0,d1
54	addc	+0,d1
55	add	-2,d2
56	cmp	+4,d2
57	bcs	do_csum_fewer_than_4
58
59do_csum_now_4b_aligned:
60	# we want to checksum as much as we can in chunks of 32 bytes
61	cmp	+31,d2
62	bls	do_csum_remainder		# 4-byte aligned remainder
63
64	add	-32,d2
65	mov	+32,d3
66
67do_csum_loop:
68	mov	(a0+),d0
69	add	d0,d1
70	mov	(a0+),e0
71	addc	e0,d1
72	mov	(a0+),e1
73	addc	e1,d1
74	mov	(a0+),e3
75	addc	e3,d1
76	mov	(a0+),d0
77	addc	d0,d1
78	mov	(a0+),e0
79	addc	e0,d1
80	mov	(a0+),e1
81	addc	e1,d1
82	mov	(a0+),e3
83	addc	e3,d1
84	addc	+0,d1
85
86	sub	d3,d2
87	bcc	do_csum_loop
88
89	add	d3,d2
90	beq	do_csum_done
91
92do_csum_remainder:
93	# cut 16-31 bytes down to 0-15
94	cmp	+16,d2
95	bcs	do_csum_fewer_than_16
96	mov	(a0+),d0
97	add	d0,d1
98	mov	(a0+),e0
99	addc	e0,d1
100	mov	(a0+),e1
101	addc	e1,d1
102	mov	(a0+),e3
103	addc	e3,d1
104	addc	+0,d1
105	add	-16,d2
106	beq	do_csum_done
107
108do_csum_fewer_than_16:
109	# copy the remaining whole words
110	cmp	+4,d2
111	bcs	do_csum_fewer_than_4
112	cmp	+8,d2
113	bcs	do_csum_one_word
114	cmp	+12,d2
115	bcs	do_csum_two_words
116	mov	(a0+),d0
117	add	d0,d1
118	addc	+0,d1
119do_csum_two_words:
120	mov	(a0+),d0
121	add	d0,d1
122	addc	+0,d1
123do_csum_one_word:
124	mov	(a0+),d0
125	add	d0,d1
126	addc	+0,d1
127
128do_csum_fewer_than_4:
129	and	+3,d2
130	beq	do_csum_done
131	xor_cmp	d0,d0,+2,d2
132	bcs	do_csum_fewer_than_2
133	movhu	(a0+),d0
134do_csum_fewer_than_2:
135	and	+1,d2
136	beq	do_csum_add_last_bit
137	movbu	(a0),d3
138	add	d3,d0
139do_csum_add_last_bit:
140	add	d0,d1
141	addc	+0,d1
142
143do_csum_done:
144	# compress the checksum down to 16 bits
145	mov	+0xffff0000,d2
146	and	d1,d2
147	asl	+16,d1
148	add	d2,d1,d0
149	addc	+0xffff,d0
150	lsr	+16,d0
151
152	# flip the halves of the word result if the buffer was oddly aligned
153	mov	(12,sp),d1
154	and	+1,d1
155	beq	do_csum_not_oddly_aligned
156	swaph	d0,d0				# exchange bits 15:8 with 7:0
157
158do_csum_not_oddly_aligned:
159	ret	[d2,d3],8
160
161do_csum_end:
162	.size	do_csum, do_csum_end-do_csum
163