• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; SPDX-License-Identifier: GPL-2.0-only
2;
3;  linux/arch/c6x/lib/csum_64plus.s
4;
5;  Port on Texas Instruments TMS320C6x architecture
6;
7;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
8;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
9;
10#include <linux/linkage.h>
11
12;
13;unsigned int csum_partial_copy_nocheck(const char *src, char * dst,
14;					int len, int sum)
15;
16; A4:	src
17; B4:	dst
18; A6:	len
19; B6:	sum
20; return csum in A4
21;
22
23	.text
24ENTRY(csum_partial_copy_nocheck)
25	MVC	.S2	ILC,B30
26
27	ZERO	.D1	A9		; csum (a side)
28||	ZERO	.D2	B9		; csum (b side)
29||	SHRU	.S2X	A6,2,B5		; len / 4
30
31	;; Check alignment and size
32	AND	.S1	3,A4,A1
33||	AND	.S2	3,B4,B0
34	OR	.L2X	B0,A1,B0	; non aligned condition
35||	MVC	.S2	B5,ILC
36||	MVK	.D2	1,B2
37||	MV	.D1X	B5,A1		; words condition
38  [!A1]	B	.S1	L8
39   [B0] BNOP	.S1	L6,5
40
41	SPLOOP		1
42
43	;; Main loop for aligned words
44	LDW	.D1T1	*A4++,A7
45	NOP	4
46	MV	.S2X	A7,B7
47||	EXTU	.S1	A7,0,16,A16
48	STW	.D2T2	B7,*B4++
49||	MPYU	.M2	B7,B2,B8
50||	ADD	.L1	A16,A9,A9
51	NOP
52	SPKERNEL	8,0
53||	ADD	.L2	B8,B9,B9
54
55	ZERO	.D1	A1
56||	ADD	.L1X	A9,B9,A9	;  add csum from a and b sides
57
58L6:
59  [!A1]	BNOP	.S1	L8,5
60
61	;; Main loop for non-aligned words
62	SPLOOP		2
63 ||	MVK	.L1	1,A2
64
65	LDNW	.D1T1	*A4++,A7
66	NOP		3
67
68	NOP
69	MV	.S2X	A7,B7
70 ||	EXTU	.S1	A7,0,16,A16
71 ||	MPYU	.M1	A7,A2,A8
72
73	ADD	.L1	A16,A9,A9
74	SPKERNEL	6,0
75 ||	STNW	.D2T2	B7,*B4++
76 ||	ADD	.L1	A8,A9,A9
77
78L8:	AND	.S2X	2,A6,B5
79	CMPGT	.L2	B5,0,B0
80  [!B0]	BNOP	.S1	L82,4
81
82	;; Manage half-word
83	ZERO	.L1	A7
84||	ZERO	.D1	A8
85
86#ifdef CONFIG_CPU_BIG_ENDIAN
87
88	LDBU	.D1T1	*A4++,A7
89	LDBU	.D1T1	*A4++,A8
90	NOP		3
91	SHL	.S1	A7,8,A0
92	ADD	.S1	A8,A9,A9
93	STB	.D2T1	A7,*B4++
94||	ADD	.S1	A0,A9,A9
95	STB	.D2T1	A8,*B4++
96
97#else
98
99	LDBU	.D1T1	*A4++,A7
100	LDBU	.D1T1	*A4++,A8
101	NOP		3
102	ADD	.S1	A7,A9,A9
103	SHL	.S1	A8,8,A0
104
105	STB	.D2T1	A7,*B4++
106||	ADD	.S1	A0,A9,A9
107	STB	.D2T1	A8,*B4++
108
109#endif
110
111	;; Manage eventually the last byte
112L82:	AND	.S2X	1,A6,B0
113  [!B0]	BNOP	.S1	L9,5
114
115||	ZERO	.L1	A7
116
117L83:	LDBU	.D1T1	*A4++,A7
118	NOP		4
119
120	MV	.L2X	A7,B7
121
122#ifdef CONFIG_CPU_BIG_ENDIAN
123
124	STB	.D2T2	B7,*B4++
125||	SHL	.S1	A7,8,A7
126	ADD	.S1	A7,A9,A9
127
128#else
129
130	STB	.D2T2	B7,*B4++
131||	ADD	.S1	A7,A9,A9
132
133#endif
134
135	;; Fold the csum
136L9:	SHRU	.S2X	A9,16,B0
137  [!B0]	BNOP	.S1	L10,5
138
139L91:	SHRU	.S2X	A9,16,B4
140||	EXTU	.S1	A9,16,16,A3
141	ADD	.D1X	A3,B4,A9
142
143	SHRU	.S1	A9,16,A0
144   [A0]	BNOP	.S1	L91,5
145
146L10:	MV	.D1	A9,A4
147
148	BNOP	.S2	B3,4
149	MVC	.S2	B30,ILC
150ENDPROC(csum_partial_copy_nocheck)
151
152;
153;unsigned short
154;ip_fast_csum(unsigned char *iph, unsigned int ihl)
155;{
156;	unsigned int checksum = 0;
157;	unsigned short *tosum = (unsigned short *) iph;
158;	int len;
159;
160;	len = ihl*4;
161;
162;	if (len <= 0)
163;		return 0;
164;
165;	while(len) {
166;		len -= 2;
167;		checksum += *tosum++;
168;	}
169;	if (len & 1)
170;		checksum += *(unsigned char*) tosum;
171;
172;	while(checksum >> 16)
173;		checksum = (checksum & 0xffff) + (checksum >> 16);
174;
175;	return ~checksum;
176;}
177;
178; A4:	iph
179; B4:	ihl
180; return checksum in A4
181;
182	.text
183
184ENTRY(ip_fast_csum)
185	ZERO	.D1	A5
186 ||	MVC	.S2	ILC,B30
187	SHL	.S2	B4,2,B0
188	CMPGT	.L2	B0,0,B1
189  [!B1] BNOP	.S1	L15,4
190  [!B1]	ZERO	.D1	A3
191
192  [!B0]	B	.S1	L12
193	SHRU	.S2	B0,1,B0
194	MVC	.S2	B0,ILC
195	NOP	3
196
197	SPLOOP	1
198	LDHU	.D1T1	*A4++,A3
199	NOP	3
200	NOP
201	SPKERNEL	5,0
202 ||	ADD	.L1	A3,A5,A5
203
204L12:	SHRU	.S1	A5,16,A0
205  [!A0]	BNOP	.S1	L14,5
206
207L13:	SHRU	.S2X	A5,16,B4
208	EXTU	.S1	A5,16,16,A3
209	ADD	.D1X	A3,B4,A5
210	SHRU	.S1	A5,16,A0
211  [A0]	BNOP	.S1	L13,5
212
213L14:	NOT	.D1	A5,A3
214	EXTU	.S1	A3,16,16,A3
215
216L15:	BNOP	.S2	B3,3
217	MVC	.S2	B30,ILC
218	MV	.D1	A3,A4
219ENDPROC(ip_fast_csum)
220
221;
222;unsigned short
223;do_csum(unsigned char *buff, unsigned int len)
224;{
225;	int odd, count;
226;	unsigned int result = 0;
227;
228;	if (len <= 0)
229;		goto out;
230;	odd = 1 & (unsigned long) buff;
231;	if (odd) {
232;#ifdef __LITTLE_ENDIAN
233;		result += (*buff << 8);
234;#else
235;		result = *buff;
236;#endif
237;		len--;
238;		buff++;
239;	}
240;	count = len >> 1;		/* nr of 16-bit words.. */
241;	if (count) {
242;		if (2 & (unsigned long) buff) {
243;			result += *(unsigned short *) buff;
244;			count--;
245;			len -= 2;
246;			buff += 2;
247;		}
248;		count >>= 1;		/* nr of 32-bit words.. */
249;		if (count) {
250;			unsigned int carry = 0;
251;			do {
252;				unsigned int w = *(unsigned int *) buff;
253;				count--;
254;				buff += 4;
255;				result += carry;
256;				result += w;
257;				carry = (w > result);
258;			} while (count);
259;			result += carry;
260;			result = (result & 0xffff) + (result >> 16);
261;		}
262;		if (len & 2) {
263;			result += *(unsigned short *) buff;
264;			buff += 2;
265;		}
266;	}
267;	if (len & 1)
268;#ifdef __LITTLE_ENDIAN
269;		result += *buff;
270;#else
271;		result += (*buff << 8);
272;#endif
273;	result = (result & 0xffff) + (result >> 16);
274;	/* add up carry.. */
275;	result = (result & 0xffff) + (result >> 16);
276;	if (odd)
277;		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
278;out:
279;	return result;
280;}
281;
282; A4:	buff
283; B4:	len
284; return checksum in A4
285;
286
287ENTRY(do_csum)
288	   CMPGT   .L2	   B4,0,B0
289   [!B0]   BNOP    .S1	   L26,3
290	   EXTU    .S1	   A4,31,31,A0
291
292	   MV	   .L1	   A0,A3
293||	   MV	   .S1X    B3,A5
294||	   MV	   .L2	   B4,B3
295||	   ZERO    .D1	   A1
296
297#ifdef CONFIG_CPU_BIG_ENDIAN
298   [A0]    SUB	   .L2	   B3,1,B3
299|| [A0]    LDBU    .D1T1   *A4++,A1
300#else
301   [!A0]   BNOP    .S1	   L21,5
302|| [A0]    LDBU    .D1T1   *A4++,A0
303	   SUB	   .L2	   B3,1,B3
304||	   SHL	   .S1	   A0,8,A1
305L21:
306#endif
307	   SHR	   .S2	   B3,1,B0
308   [!B0]   BNOP    .S1	   L24,3
309	   MVK	   .L1	   2,A0
310	   AND	   .L1	   A4,A0,A0
311
312   [!A0]   BNOP    .S1	   L22,5
313|| [A0]    LDHU    .D1T1   *A4++,A0
314	   SUB	   .L2	   B0,1,B0
315||	   SUB	   .S2	   B3,2,B3
316||	   ADD	   .L1	   A0,A1,A1
317L22:
318	   SHR	   .S2	   B0,1,B0
319||	   ZERO    .L1	   A0
320
321   [!B0]   BNOP    .S1	   L23,5
322|| [B0]    MVC	   .S2	   B0,ILC
323
324	   SPLOOP  3
325	   SPMASK  L1
326||	   MV	   .L1	   A1,A2
327||	   LDW	   .D1T1   *A4++,A1
328
329	   NOP	   4
330	   ADD	   .L1	   A0,A1,A0
331	   ADD	   .L1	   A2,A0,A2
332
333	   SPKERNEL 1,2
334||	   CMPGTU  .L1	   A1,A2,A0
335
336	   ADD	   .L1	   A0,A2,A6
337	   EXTU    .S1	   A6,16,16,A7
338	   SHRU    .S2X    A6,16,B0
339	   NOP		   1
340	   ADD	   .L1X    A7,B0,A1
341L23:
342	   MVK	   .L2	   2,B0
343	   AND	   .L2	   B3,B0,B0
344   [B0]    LDHU    .D1T1   *A4++,A0
345	   NOP	   4
346   [B0]    ADD	   .L1	   A0,A1,A1
347L24:
348	   EXTU    .S2	   B3,31,31,B0
349#ifdef CONFIG_CPU_BIG_ENDIAN
350   [!B0]   BNOP    .S1	   L25,4
351|| [B0]    LDBU    .D1T1   *A4,A0
352	   SHL	   .S1	   A0,8,A0
353	   ADD	   .L1	   A0,A1,A1
354L25:
355#else
356   [B0]    LDBU    .D1T1   *A4,A0
357	   NOP	   4
358   [B0]    ADD	   .L1	   A0,A1,A1
359#endif
360	   EXTU    .S1	   A1,16,16,A0
361	   SHRU    .S2X    A1,16,B0
362	   NOP	   1
363	   ADD	   .L1X    A0,B0,A0
364	   SHRU    .S1	   A0,16,A1
365	   ADD	   .L1	   A0,A1,A0
366	   EXTU    .S1	   A0,16,16,A1
367	   EXTU    .S1	   A1,16,24,A2
368
369	   EXTU    .S1	   A1,24,16,A0
370||	   MV	   .L2X    A3,B0
371
372   [B0]    OR	   .L1	   A0,A2,A1
373L26:
374	   NOP	   1
375	   BNOP    .S2X    A5,4
376	   MV	   .L1	   A1,A4
377ENDPROC(do_csum)
378
379;__wsum csum_partial(const void *buff, int len, __wsum wsum)
380;{
381;	unsigned int sum = (__force unsigned int)wsum;
382;	unsigned int result = do_csum(buff, len);
383;
384;	/* add in old sum, and carry.. */
385;	result += sum;
386;	if (sum > result)
387;		result += 1;
388;	return (__force __wsum)result;
389;}
390;
391ENTRY(csum_partial)
392	   MV	   .L1X    B3,A9
393||	   CALLP   .S2	   do_csum,B3
394||	   MV	   .S1	   A6,A8
395	   BNOP    .S2X    A9,2
396	   ADD	   .L1	   A8,A4,A1
397	   CMPGTU  .L1	   A8,A1,A0
398	   ADD	   .L1	   A1,A0,A4
399ENDPROC(csum_partial)
400
401;unsigned short
402;ip_compute_csum(unsigned char *buff, unsigned int len)
403;
404; A4:	buff
405; B4:	len
406; return checksum in A4
407
408ENTRY(ip_compute_csum)
409	   MV	   .L1X    B3,A9
410||	   CALLP   .S2	   do_csum,B3
411	   BNOP    .S2X    A9,3
412	   NOT	   .S1	   A4,A4
413	   CLR     .S1	   A4,16,31,A4
414ENDPROC(ip_compute_csum)
415