• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; SPDX-License-Identifier: GPL-2.0-only
2;
3;  linux/arch/c6x/lib/csum_64plus.s
4;
5;  Port on Texas Instruments TMS320C6x architecture
6;
7;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
8;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
9;
10#include <linux/linkage.h>
11
12;
13;unsigned int csum_partial_copy(const char *src, char * dst,
14;				int len, int sum)
15;
16; A4:	src
17; B4:	dst
18; A6:	len
19; B6:	sum
20; return csum in A4
21;
22
23	.text
24ENTRY(csum_partial_copy)
25	MVC	.S2	ILC,B30
26
27	MV	.D1X	B6,A31		; given csum
28	ZERO	.D1	A9		; csum (a side)
29||	ZERO	.D2	B9		; csum (b side)
30||	SHRU	.S2X	A6,2,B5		; len / 4
31
32	;; Check alignment and size
33	AND	.S1	3,A4,A1
34||	AND	.S2	3,B4,B0
35	OR	.L2X	B0,A1,B0	; non aligned condition
36||	MVC	.S2	B5,ILC
37||	MVK	.D2	1,B2
38||	MV	.D1X	B5,A1		; words condition
39  [!A1]	B	.S1	L8
40   [B0] BNOP	.S1	L6,5
41
42	SPLOOP		1
43
44	;; Main loop for aligned words
45	LDW	.D1T1	*A4++,A7
46	NOP	4
47	MV	.S2X	A7,B7
48||	EXTU	.S1	A7,0,16,A16
49	STW	.D2T2	B7,*B4++
50||	MPYU	.M2	B7,B2,B8
51||	ADD	.L1	A16,A9,A9
52	NOP
53	SPKERNEL	8,0
54||	ADD	.L2	B8,B9,B9
55
56	ZERO	.D1	A1
57||	ADD	.L1X	A9,B9,A9	;  add csum from a and b sides
58
59L6:
60  [!A1]	BNOP	.S1	L8,5
61
62	;; Main loop for non-aligned words
63	SPLOOP		2
64 ||	MVK	.L1	1,A2
65
66	LDNW	.D1T1	*A4++,A7
67	NOP		3
68
69	NOP
70	MV	.S2X	A7,B7
71 ||	EXTU	.S1	A7,0,16,A16
72 ||	MPYU	.M1	A7,A2,A8
73
74	ADD	.L1	A16,A9,A9
75	SPKERNEL	6,0
76 ||	STNW	.D2T2	B7,*B4++
77 ||	ADD	.L1	A8,A9,A9
78
79L8:	AND	.S2X	2,A6,B5
80	CMPGT	.L2	B5,0,B0
81  [!B0]	BNOP	.S1	L82,4
82
83	;; Manage half-word
84	ZERO	.L1	A7
85||	ZERO	.D1	A8
86
87#ifdef CONFIG_CPU_BIG_ENDIAN
88
89	LDBU	.D1T1	*A4++,A7
90	LDBU	.D1T1	*A4++,A8
91	NOP		3
92	SHL	.S1	A7,8,A0
93	ADD	.S1	A8,A9,A9
94	STB	.D2T1	A7,*B4++
95||	ADD	.S1	A0,A9,A9
96	STB	.D2T1	A8,*B4++
97
98#else
99
100	LDBU	.D1T1	*A4++,A7
101	LDBU	.D1T1	*A4++,A8
102	NOP		3
103	ADD	.S1	A7,A9,A9
104	SHL	.S1	A8,8,A0
105
106	STB	.D2T1	A7,*B4++
107||	ADD	.S1	A0,A9,A9
108	STB	.D2T1	A8,*B4++
109
110#endif
111
112	;; Manage eventually the last byte
113L82:	AND	.S2X	1,A6,B0
114  [!B0]	BNOP	.S1	L9,5
115
116||	ZERO	.L1	A7
117
118L83:	LDBU	.D1T1	*A4++,A7
119	NOP		4
120
121	MV	.L2X	A7,B7
122
123#ifdef CONFIG_CPU_BIG_ENDIAN
124
125	STB	.D2T2	B7,*B4++
126||	SHL	.S1	A7,8,A7
127	ADD	.S1	A7,A9,A9
128
129#else
130
131	STB	.D2T2	B7,*B4++
132||	ADD	.S1	A7,A9,A9
133
134#endif
135
136	;; Fold the csum
137L9:	SHRU	.S2X	A9,16,B0
138  [!B0]	BNOP	.S1	L10,5
139
140L91:	SHRU	.S2X	A9,16,B4
141||	EXTU	.S1	A9,16,16,A3
142	ADD	.D1X	A3,B4,A9
143
144	SHRU	.S1	A9,16,A0
145   [A0]	BNOP	.S1	L91,5
146
147L10:	ADD	.D1	A31,A9,A9
148	MV	.D1	A9,A4
149
150	BNOP	.S2	B3,4
151	MVC	.S2	B30,ILC
152ENDPROC(csum_partial_copy)
153
154;
155;unsigned short
156;ip_fast_csum(unsigned char *iph, unsigned int ihl)
157;{
158;	unsigned int checksum = 0;
159;	unsigned short *tosum = (unsigned short *) iph;
160;	int len;
161;
162;	len = ihl*4;
163;
164;	if (len <= 0)
165;		return 0;
166;
167;	while(len) {
168;		len -= 2;
169;		checksum += *tosum++;
170;	}
171;	if (len & 1)
172;		checksum += *(unsigned char*) tosum;
173;
174;	while(checksum >> 16)
175;		checksum = (checksum & 0xffff) + (checksum >> 16);
176;
177;	return ~checksum;
178;}
179;
180; A4:	iph
181; B4:	ihl
182; return checksum in A4
183;
184	.text
185
186ENTRY(ip_fast_csum)
187	ZERO	.D1	A5
188 ||	MVC	.S2	ILC,B30
189	SHL	.S2	B4,2,B0
190	CMPGT	.L2	B0,0,B1
191  [!B1] BNOP	.S1	L15,4
192  [!B1]	ZERO	.D1	A3
193
194  [!B0]	B	.S1	L12
195	SHRU	.S2	B0,1,B0
196	MVC	.S2	B0,ILC
197	NOP	3
198
199	SPLOOP	1
200	LDHU	.D1T1	*A4++,A3
201	NOP	3
202	NOP
203	SPKERNEL	5,0
204 ||	ADD	.L1	A3,A5,A5
205
206L12:	SHRU	.S1	A5,16,A0
207  [!A0]	BNOP	.S1	L14,5
208
209L13:	SHRU	.S2X	A5,16,B4
210	EXTU	.S1	A5,16,16,A3
211	ADD	.D1X	A3,B4,A5
212	SHRU	.S1	A5,16,A0
213  [A0]	BNOP	.S1	L13,5
214
215L14:	NOT	.D1	A5,A3
216	EXTU	.S1	A3,16,16,A3
217
218L15:	BNOP	.S2	B3,3
219	MVC	.S2	B30,ILC
220	MV	.D1	A3,A4
221ENDPROC(ip_fast_csum)
222
223;
224;unsigned short
225;do_csum(unsigned char *buff, unsigned int len)
226;{
227;	int odd, count;
228;	unsigned int result = 0;
229;
230;	if (len <= 0)
231;		goto out;
232;	odd = 1 & (unsigned long) buff;
233;	if (odd) {
234;#ifdef __LITTLE_ENDIAN
235;		result += (*buff << 8);
236;#else
237;		result = *buff;
238;#endif
239;		len--;
240;		buff++;
241;	}
242;	count = len >> 1;		/* nr of 16-bit words.. */
243;	if (count) {
244;		if (2 & (unsigned long) buff) {
245;			result += *(unsigned short *) buff;
246;			count--;
247;			len -= 2;
248;			buff += 2;
249;		}
250;		count >>= 1;		/* nr of 32-bit words.. */
251;		if (count) {
252;			unsigned int carry = 0;
253;			do {
254;				unsigned int w = *(unsigned int *) buff;
255;				count--;
256;				buff += 4;
257;				result += carry;
258;				result += w;
259;				carry = (w > result);
260;			} while (count);
261;			result += carry;
262;			result = (result & 0xffff) + (result >> 16);
263;		}
264;		if (len & 2) {
265;			result += *(unsigned short *) buff;
266;			buff += 2;
267;		}
268;	}
269;	if (len & 1)
270;#ifdef __LITTLE_ENDIAN
271;		result += *buff;
272;#else
273;		result += (*buff << 8);
274;#endif
275;	result = (result & 0xffff) + (result >> 16);
276;	/* add up carry.. */
277;	result = (result & 0xffff) + (result >> 16);
278;	if (odd)
279;		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
280;out:
281;	return result;
282;}
283;
284; A4:	buff
285; B4:	len
286; return checksum in A4
287;
288
289ENTRY(do_csum)
290	   CMPGT   .L2	   B4,0,B0
291   [!B0]   BNOP    .S1	   L26,3
292	   EXTU    .S1	   A4,31,31,A0
293
294	   MV	   .L1	   A0,A3
295||	   MV	   .S1X    B3,A5
296||	   MV	   .L2	   B4,B3
297||	   ZERO    .D1	   A1
298
299#ifdef CONFIG_CPU_BIG_ENDIAN
300   [A0]    SUB	   .L2	   B3,1,B3
301|| [A0]    LDBU    .D1T1   *A4++,A1
302#else
303   [!A0]   BNOP    .S1	   L21,5
304|| [A0]    LDBU    .D1T1   *A4++,A0
305	   SUB	   .L2	   B3,1,B3
306||	   SHL	   .S1	   A0,8,A1
307L21:
308#endif
309	   SHR	   .S2	   B3,1,B0
310   [!B0]   BNOP    .S1	   L24,3
311	   MVK	   .L1	   2,A0
312	   AND	   .L1	   A4,A0,A0
313
314   [!A0]   BNOP    .S1	   L22,5
315|| [A0]    LDHU    .D1T1   *A4++,A0
316	   SUB	   .L2	   B0,1,B0
317||	   SUB	   .S2	   B3,2,B3
318||	   ADD	   .L1	   A0,A1,A1
319L22:
320	   SHR	   .S2	   B0,1,B0
321||	   ZERO    .L1	   A0
322
323   [!B0]   BNOP    .S1	   L23,5
324|| [B0]    MVC	   .S2	   B0,ILC
325
326	   SPLOOP  3
327	   SPMASK  L1
328||	   MV	   .L1	   A1,A2
329||	   LDW	   .D1T1   *A4++,A1
330
331	   NOP	   4
332	   ADD	   .L1	   A0,A1,A0
333	   ADD	   .L1	   A2,A0,A2
334
335	   SPKERNEL 1,2
336||	   CMPGTU  .L1	   A1,A2,A0
337
338	   ADD	   .L1	   A0,A2,A6
339	   EXTU    .S1	   A6,16,16,A7
340	   SHRU    .S2X    A6,16,B0
341	   NOP		   1
342	   ADD	   .L1X    A7,B0,A1
343L23:
344	   MVK	   .L2	   2,B0
345	   AND	   .L2	   B3,B0,B0
346   [B0]    LDHU    .D1T1   *A4++,A0
347	   NOP	   4
348   [B0]    ADD	   .L1	   A0,A1,A1
349L24:
350	   EXTU    .S2	   B3,31,31,B0
351#ifdef CONFIG_CPU_BIG_ENDIAN
352   [!B0]   BNOP    .S1	   L25,4
353|| [B0]    LDBU    .D1T1   *A4,A0
354	   SHL	   .S1	   A0,8,A0
355	   ADD	   .L1	   A0,A1,A1
356L25:
357#else
358   [B0]    LDBU    .D1T1   *A4,A0
359	   NOP	   4
360   [B0]    ADD	   .L1	   A0,A1,A1
361#endif
362	   EXTU    .S1	   A1,16,16,A0
363	   SHRU    .S2X    A1,16,B0
364	   NOP	   1
365	   ADD	   .L1X    A0,B0,A0
366	   SHRU    .S1	   A0,16,A1
367	   ADD	   .L1	   A0,A1,A0
368	   EXTU    .S1	   A0,16,16,A1
369	   EXTU    .S1	   A1,16,24,A2
370
371	   EXTU    .S1	   A1,24,16,A0
372||	   MV	   .L2X    A3,B0
373
374   [B0]    OR	   .L1	   A0,A2,A1
375L26:
376	   NOP	   1
377	   BNOP    .S2X    A5,4
378	   MV	   .L1	   A1,A4
379ENDPROC(do_csum)
380
381;__wsum csum_partial(const void *buff, int len, __wsum wsum)
382;{
383;	unsigned int sum = (__force unsigned int)wsum;
384;	unsigned int result = do_csum(buff, len);
385;
386;	/* add in old sum, and carry.. */
387;	result += sum;
388;	if (sum > result)
389;		result += 1;
390;	return (__force __wsum)result;
391;}
392;
393ENTRY(csum_partial)
394	   MV	   .L1X    B3,A9
395||	   CALLP   .S2	   do_csum,B3
396||	   MV	   .S1	   A6,A8
397	   BNOP    .S2X    A9,2
398	   ADD	   .L1	   A8,A4,A1
399	   CMPGTU  .L1	   A8,A1,A0
400	   ADD	   .L1	   A1,A0,A4
401ENDPROC(csum_partial)
402
403;unsigned short
404;ip_compute_csum(unsigned char *buff, unsigned int len)
405;
406; A4:	buff
407; B4:	len
408; return checksum in A4
409
410ENTRY(ip_compute_csum)
411	   MV	   .L1X    B3,A9
412||	   CALLP   .S2	   do_csum,B3
413	   BNOP    .S2X    A9,3
414	   NOT	   .S1	   A4,A4
415	   CLR     .S1	   A4,16,31,A4
416ENDPROC(ip_compute_csum)
417