• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#if defined(__i386__)
2.file	"ghash-x86.S"
3.text
4.globl	_gcm_gmult_4bit_mmx
5.private_extern	_gcm_gmult_4bit_mmx
6.align	4
7_gcm_gmult_4bit_mmx:
8L_gcm_gmult_4bit_mmx_begin:
9	pushl	%ebp
10	pushl	%ebx
11	pushl	%esi
12	pushl	%edi
13	movl	20(%esp),%edi
14	movl	24(%esp),%esi
15	call	L000pic_point
16L000pic_point:
17	popl	%eax
18	leal	Lrem_4bit-L000pic_point(%eax),%eax
19	movzbl	15(%edi),%ebx
20	xorl	%ecx,%ecx
21	movl	%ebx,%edx
22	movb	%dl,%cl
23	movl	$14,%ebp
24	shlb	$4,%cl
25	andl	$240,%edx
26	movq	8(%esi,%ecx,1),%mm0
27	movq	(%esi,%ecx,1),%mm1
28	movd	%mm0,%ebx
29	jmp	L001mmx_loop
30.align	4,0x90
31L001mmx_loop:
32	psrlq	$4,%mm0
33	andl	$15,%ebx
34	movq	%mm1,%mm2
35	psrlq	$4,%mm1
36	pxor	8(%esi,%edx,1),%mm0
37	movb	(%edi,%ebp,1),%cl
38	psllq	$60,%mm2
39	pxor	(%eax,%ebx,8),%mm1
40	decl	%ebp
41	movd	%mm0,%ebx
42	pxor	(%esi,%edx,1),%mm1
43	movl	%ecx,%edx
44	pxor	%mm2,%mm0
45	js	L002mmx_break
46	shlb	$4,%cl
47	andl	$15,%ebx
48	psrlq	$4,%mm0
49	andl	$240,%edx
50	movq	%mm1,%mm2
51	psrlq	$4,%mm1
52	pxor	8(%esi,%ecx,1),%mm0
53	psllq	$60,%mm2
54	pxor	(%eax,%ebx,8),%mm1
55	movd	%mm0,%ebx
56	pxor	(%esi,%ecx,1),%mm1
57	pxor	%mm2,%mm0
58	jmp	L001mmx_loop
59.align	4,0x90
60L002mmx_break:
61	shlb	$4,%cl
62	andl	$15,%ebx
63	psrlq	$4,%mm0
64	andl	$240,%edx
65	movq	%mm1,%mm2
66	psrlq	$4,%mm1
67	pxor	8(%esi,%ecx,1),%mm0
68	psllq	$60,%mm2
69	pxor	(%eax,%ebx,8),%mm1
70	movd	%mm0,%ebx
71	pxor	(%esi,%ecx,1),%mm1
72	pxor	%mm2,%mm0
73	psrlq	$4,%mm0
74	andl	$15,%ebx
75	movq	%mm1,%mm2
76	psrlq	$4,%mm1
77	pxor	8(%esi,%edx,1),%mm0
78	psllq	$60,%mm2
79	pxor	(%eax,%ebx,8),%mm1
80	movd	%mm0,%ebx
81	pxor	(%esi,%edx,1),%mm1
82	pxor	%mm2,%mm0
83	psrlq	$32,%mm0
84	movd	%mm1,%edx
85	psrlq	$32,%mm1
86	movd	%mm0,%ecx
87	movd	%mm1,%ebp
88	bswap	%ebx
89	bswap	%edx
90	bswap	%ecx
91	bswap	%ebp
92	emms
93	movl	%ebx,12(%edi)
94	movl	%edx,4(%edi)
95	movl	%ecx,8(%edi)
96	movl	%ebp,(%edi)
97	popl	%edi
98	popl	%esi
99	popl	%ebx
100	popl	%ebp
101	ret
102.globl	_gcm_ghash_4bit_mmx
103.private_extern	_gcm_ghash_4bit_mmx
104.align	4
105_gcm_ghash_4bit_mmx:
106L_gcm_ghash_4bit_mmx_begin:
107	pushl	%ebp
108	pushl	%ebx
109	pushl	%esi
110	pushl	%edi
111	movl	20(%esp),%eax
112	movl	24(%esp),%ebx
113	movl	28(%esp),%ecx
114	movl	32(%esp),%edx
115	movl	%esp,%ebp
116	call	L003pic_point
117L003pic_point:
118	popl	%esi
119	leal	Lrem_8bit-L003pic_point(%esi),%esi
120	subl	$544,%esp
121	andl	$-64,%esp
122	subl	$16,%esp
123	addl	%ecx,%edx
124	movl	%eax,544(%esp)
125	movl	%edx,552(%esp)
126	movl	%ebp,556(%esp)
127	addl	$128,%ebx
128	leal	144(%esp),%edi
129	leal	400(%esp),%ebp
130	movl	-120(%ebx),%edx
131	movq	-120(%ebx),%mm0
132	movq	-128(%ebx),%mm3
133	shll	$4,%edx
134	movb	%dl,(%esp)
135	movl	-104(%ebx),%edx
136	movq	-104(%ebx),%mm2
137	movq	-112(%ebx),%mm5
138	movq	%mm0,-128(%edi)
139	psrlq	$4,%mm0
140	movq	%mm3,(%edi)
141	movq	%mm3,%mm7
142	psrlq	$4,%mm3
143	shll	$4,%edx
144	movb	%dl,1(%esp)
145	movl	-88(%ebx),%edx
146	movq	-88(%ebx),%mm1
147	psllq	$60,%mm7
148	movq	-96(%ebx),%mm4
149	por	%mm7,%mm0
150	movq	%mm2,-120(%edi)
151	psrlq	$4,%mm2
152	movq	%mm5,8(%edi)
153	movq	%mm5,%mm6
154	movq	%mm0,-128(%ebp)
155	psrlq	$4,%mm5
156	movq	%mm3,(%ebp)
157	shll	$4,%edx
158	movb	%dl,2(%esp)
159	movl	-72(%ebx),%edx
160	movq	-72(%ebx),%mm0
161	psllq	$60,%mm6
162	movq	-80(%ebx),%mm3
163	por	%mm6,%mm2
164	movq	%mm1,-112(%edi)
165	psrlq	$4,%mm1
166	movq	%mm4,16(%edi)
167	movq	%mm4,%mm7
168	movq	%mm2,-120(%ebp)
169	psrlq	$4,%mm4
170	movq	%mm5,8(%ebp)
171	shll	$4,%edx
172	movb	%dl,3(%esp)
173	movl	-56(%ebx),%edx
174	movq	-56(%ebx),%mm2
175	psllq	$60,%mm7
176	movq	-64(%ebx),%mm5
177	por	%mm7,%mm1
178	movq	%mm0,-104(%edi)
179	psrlq	$4,%mm0
180	movq	%mm3,24(%edi)
181	movq	%mm3,%mm6
182	movq	%mm1,-112(%ebp)
183	psrlq	$4,%mm3
184	movq	%mm4,16(%ebp)
185	shll	$4,%edx
186	movb	%dl,4(%esp)
187	movl	-40(%ebx),%edx
188	movq	-40(%ebx),%mm1
189	psllq	$60,%mm6
190	movq	-48(%ebx),%mm4
191	por	%mm6,%mm0
192	movq	%mm2,-96(%edi)
193	psrlq	$4,%mm2
194	movq	%mm5,32(%edi)
195	movq	%mm5,%mm7
196	movq	%mm0,-104(%ebp)
197	psrlq	$4,%mm5
198	movq	%mm3,24(%ebp)
199	shll	$4,%edx
200	movb	%dl,5(%esp)
201	movl	-24(%ebx),%edx
202	movq	-24(%ebx),%mm0
203	psllq	$60,%mm7
204	movq	-32(%ebx),%mm3
205	por	%mm7,%mm2
206	movq	%mm1,-88(%edi)
207	psrlq	$4,%mm1
208	movq	%mm4,40(%edi)
209	movq	%mm4,%mm6
210	movq	%mm2,-96(%ebp)
211	psrlq	$4,%mm4
212	movq	%mm5,32(%ebp)
213	shll	$4,%edx
214	movb	%dl,6(%esp)
215	movl	-8(%ebx),%edx
216	movq	-8(%ebx),%mm2
217	psllq	$60,%mm6
218	movq	-16(%ebx),%mm5
219	por	%mm6,%mm1
220	movq	%mm0,-80(%edi)
221	psrlq	$4,%mm0
222	movq	%mm3,48(%edi)
223	movq	%mm3,%mm7
224	movq	%mm1,-88(%ebp)
225	psrlq	$4,%mm3
226	movq	%mm4,40(%ebp)
227	shll	$4,%edx
228	movb	%dl,7(%esp)
229	movl	8(%ebx),%edx
230	movq	8(%ebx),%mm1
231	psllq	$60,%mm7
232	movq	(%ebx),%mm4
233	por	%mm7,%mm0
234	movq	%mm2,-72(%edi)
235	psrlq	$4,%mm2
236	movq	%mm5,56(%edi)
237	movq	%mm5,%mm6
238	movq	%mm0,-80(%ebp)
239	psrlq	$4,%mm5
240	movq	%mm3,48(%ebp)
241	shll	$4,%edx
242	movb	%dl,8(%esp)
243	movl	24(%ebx),%edx
244	movq	24(%ebx),%mm0
245	psllq	$60,%mm6
246	movq	16(%ebx),%mm3
247	por	%mm6,%mm2
248	movq	%mm1,-64(%edi)
249	psrlq	$4,%mm1
250	movq	%mm4,64(%edi)
251	movq	%mm4,%mm7
252	movq	%mm2,-72(%ebp)
253	psrlq	$4,%mm4
254	movq	%mm5,56(%ebp)
255	shll	$4,%edx
256	movb	%dl,9(%esp)
257	movl	40(%ebx),%edx
258	movq	40(%ebx),%mm2
259	psllq	$60,%mm7
260	movq	32(%ebx),%mm5
261	por	%mm7,%mm1
262	movq	%mm0,-56(%edi)
263	psrlq	$4,%mm0
264	movq	%mm3,72(%edi)
265	movq	%mm3,%mm6
266	movq	%mm1,-64(%ebp)
267	psrlq	$4,%mm3
268	movq	%mm4,64(%ebp)
269	shll	$4,%edx
270	movb	%dl,10(%esp)
271	movl	56(%ebx),%edx
272	movq	56(%ebx),%mm1
273	psllq	$60,%mm6
274	movq	48(%ebx),%mm4
275	por	%mm6,%mm0
276	movq	%mm2,-48(%edi)
277	psrlq	$4,%mm2
278	movq	%mm5,80(%edi)
279	movq	%mm5,%mm7
280	movq	%mm0,-56(%ebp)
281	psrlq	$4,%mm5
282	movq	%mm3,72(%ebp)
283	shll	$4,%edx
284	movb	%dl,11(%esp)
285	movl	72(%ebx),%edx
286	movq	72(%ebx),%mm0
287	psllq	$60,%mm7
288	movq	64(%ebx),%mm3
289	por	%mm7,%mm2
290	movq	%mm1,-40(%edi)
291	psrlq	$4,%mm1
292	movq	%mm4,88(%edi)
293	movq	%mm4,%mm6
294	movq	%mm2,-48(%ebp)
295	psrlq	$4,%mm4
296	movq	%mm5,80(%ebp)
297	shll	$4,%edx
298	movb	%dl,12(%esp)
299	movl	88(%ebx),%edx
300	movq	88(%ebx),%mm2
301	psllq	$60,%mm6
302	movq	80(%ebx),%mm5
303	por	%mm6,%mm1
304	movq	%mm0,-32(%edi)
305	psrlq	$4,%mm0
306	movq	%mm3,96(%edi)
307	movq	%mm3,%mm7
308	movq	%mm1,-40(%ebp)
309	psrlq	$4,%mm3
310	movq	%mm4,88(%ebp)
311	shll	$4,%edx
312	movb	%dl,13(%esp)
313	movl	104(%ebx),%edx
314	movq	104(%ebx),%mm1
315	psllq	$60,%mm7
316	movq	96(%ebx),%mm4
317	por	%mm7,%mm0
318	movq	%mm2,-24(%edi)
319	psrlq	$4,%mm2
320	movq	%mm5,104(%edi)
321	movq	%mm5,%mm6
322	movq	%mm0,-32(%ebp)
323	psrlq	$4,%mm5
324	movq	%mm3,96(%ebp)
325	shll	$4,%edx
326	movb	%dl,14(%esp)
327	movl	120(%ebx),%edx
328	movq	120(%ebx),%mm0
329	psllq	$60,%mm6
330	movq	112(%ebx),%mm3
331	por	%mm6,%mm2
332	movq	%mm1,-16(%edi)
333	psrlq	$4,%mm1
334	movq	%mm4,112(%edi)
335	movq	%mm4,%mm7
336	movq	%mm2,-24(%ebp)
337	psrlq	$4,%mm4
338	movq	%mm5,104(%ebp)
339	shll	$4,%edx
340	movb	%dl,15(%esp)
341	psllq	$60,%mm7
342	por	%mm7,%mm1
343	movq	%mm0,-8(%edi)
344	psrlq	$4,%mm0
345	movq	%mm3,120(%edi)
346	movq	%mm3,%mm6
347	movq	%mm1,-16(%ebp)
348	psrlq	$4,%mm3
349	movq	%mm4,112(%ebp)
350	psllq	$60,%mm6
351	por	%mm6,%mm0
352	movq	%mm0,-8(%ebp)
353	movq	%mm3,120(%ebp)
354	movq	(%eax),%mm6
355	movl	8(%eax),%ebx
356	movl	12(%eax),%edx
357.align	4,0x90
358L004outer:
359	xorl	12(%ecx),%edx
360	xorl	8(%ecx),%ebx
361	pxor	(%ecx),%mm6
362	leal	16(%ecx),%ecx
363	movl	%ebx,536(%esp)
364	movq	%mm6,528(%esp)
365	movl	%ecx,548(%esp)
366	xorl	%eax,%eax
367	roll	$8,%edx
368	movb	%dl,%al
369	movl	%eax,%ebp
370	andb	$15,%al
371	shrl	$4,%ebp
372	pxor	%mm0,%mm0
373	roll	$8,%edx
374	pxor	%mm1,%mm1
375	pxor	%mm2,%mm2
376	movq	16(%esp,%eax,8),%mm7
377	movq	144(%esp,%eax,8),%mm6
378	movb	%dl,%al
379	movd	%mm7,%ebx
380	psrlq	$8,%mm7
381	movq	%mm6,%mm3
382	movl	%eax,%edi
383	psrlq	$8,%mm6
384	pxor	272(%esp,%ebp,8),%mm7
385	andb	$15,%al
386	psllq	$56,%mm3
387	shrl	$4,%edi
388	pxor	16(%esp,%eax,8),%mm7
389	roll	$8,%edx
390	pxor	144(%esp,%eax,8),%mm6
391	pxor	%mm3,%mm7
392	pxor	400(%esp,%ebp,8),%mm6
393	xorb	(%esp,%ebp,1),%bl
394	movb	%dl,%al
395	movd	%mm7,%ecx
396	movzbl	%bl,%ebx
397	psrlq	$8,%mm7
398	movq	%mm6,%mm3
399	movl	%eax,%ebp
400	psrlq	$8,%mm6
401	pxor	272(%esp,%edi,8),%mm7
402	andb	$15,%al
403	psllq	$56,%mm3
404	shrl	$4,%ebp
405	pinsrw	$2,(%esi,%ebx,2),%mm2
406	pxor	16(%esp,%eax,8),%mm7
407	roll	$8,%edx
408	pxor	144(%esp,%eax,8),%mm6
409	pxor	%mm3,%mm7
410	pxor	400(%esp,%edi,8),%mm6
411	xorb	(%esp,%edi,1),%cl
412	movb	%dl,%al
413	movl	536(%esp),%edx
414	movd	%mm7,%ebx
415	movzbl	%cl,%ecx
416	psrlq	$8,%mm7
417	movq	%mm6,%mm3
418	movl	%eax,%edi
419	psrlq	$8,%mm6
420	pxor	272(%esp,%ebp,8),%mm7
421	andb	$15,%al
422	psllq	$56,%mm3
423	pxor	%mm2,%mm6
424	shrl	$4,%edi
425	pinsrw	$2,(%esi,%ecx,2),%mm1
426	pxor	16(%esp,%eax,8),%mm7
427	roll	$8,%edx
428	pxor	144(%esp,%eax,8),%mm6
429	pxor	%mm3,%mm7
430	pxor	400(%esp,%ebp,8),%mm6
431	xorb	(%esp,%ebp,1),%bl
432	movb	%dl,%al
433	movd	%mm7,%ecx
434	movzbl	%bl,%ebx
435	psrlq	$8,%mm7
436	movq	%mm6,%mm3
437	movl	%eax,%ebp
438	psrlq	$8,%mm6
439	pxor	272(%esp,%edi,8),%mm7
440	andb	$15,%al
441	psllq	$56,%mm3
442	pxor	%mm1,%mm6
443	shrl	$4,%ebp
444	pinsrw	$2,(%esi,%ebx,2),%mm0
445	pxor	16(%esp,%eax,8),%mm7
446	roll	$8,%edx
447	pxor	144(%esp,%eax,8),%mm6
448	pxor	%mm3,%mm7
449	pxor	400(%esp,%edi,8),%mm6
450	xorb	(%esp,%edi,1),%cl
451	movb	%dl,%al
452	movd	%mm7,%ebx
453	movzbl	%cl,%ecx
454	psrlq	$8,%mm7
455	movq	%mm6,%mm3
456	movl	%eax,%edi
457	psrlq	$8,%mm6
458	pxor	272(%esp,%ebp,8),%mm7
459	andb	$15,%al
460	psllq	$56,%mm3
461	pxor	%mm0,%mm6
462	shrl	$4,%edi
463	pinsrw	$2,(%esi,%ecx,2),%mm2
464	pxor	16(%esp,%eax,8),%mm7
465	roll	$8,%edx
466	pxor	144(%esp,%eax,8),%mm6
467	pxor	%mm3,%mm7
468	pxor	400(%esp,%ebp,8),%mm6
469	xorb	(%esp,%ebp,1),%bl
470	movb	%dl,%al
471	movd	%mm7,%ecx
472	movzbl	%bl,%ebx
473	psrlq	$8,%mm7
474	movq	%mm6,%mm3
475	movl	%eax,%ebp
476	psrlq	$8,%mm6
477	pxor	272(%esp,%edi,8),%mm7
478	andb	$15,%al
479	psllq	$56,%mm3
480	pxor	%mm2,%mm6
481	shrl	$4,%ebp
482	pinsrw	$2,(%esi,%ebx,2),%mm1
483	pxor	16(%esp,%eax,8),%mm7
484	roll	$8,%edx
485	pxor	144(%esp,%eax,8),%mm6
486	pxor	%mm3,%mm7
487	pxor	400(%esp,%edi,8),%mm6
488	xorb	(%esp,%edi,1),%cl
489	movb	%dl,%al
490	movl	532(%esp),%edx
491	movd	%mm7,%ebx
492	movzbl	%cl,%ecx
493	psrlq	$8,%mm7
494	movq	%mm6,%mm3
495	movl	%eax,%edi
496	psrlq	$8,%mm6
497	pxor	272(%esp,%ebp,8),%mm7
498	andb	$15,%al
499	psllq	$56,%mm3
500	pxor	%mm1,%mm6
501	shrl	$4,%edi
502	pinsrw	$2,(%esi,%ecx,2),%mm0
503	pxor	16(%esp,%eax,8),%mm7
504	roll	$8,%edx
505	pxor	144(%esp,%eax,8),%mm6
506	pxor	%mm3,%mm7
507	pxor	400(%esp,%ebp,8),%mm6
508	xorb	(%esp,%ebp,1),%bl
509	movb	%dl,%al
510	movd	%mm7,%ecx
511	movzbl	%bl,%ebx
512	psrlq	$8,%mm7
513	movq	%mm6,%mm3
514	movl	%eax,%ebp
515	psrlq	$8,%mm6
516	pxor	272(%esp,%edi,8),%mm7
517	andb	$15,%al
518	psllq	$56,%mm3
519	pxor	%mm0,%mm6
520	shrl	$4,%ebp
521	pinsrw	$2,(%esi,%ebx,2),%mm2
522	pxor	16(%esp,%eax,8),%mm7
523	roll	$8,%edx
524	pxor	144(%esp,%eax,8),%mm6
525	pxor	%mm3,%mm7
526	pxor	400(%esp,%edi,8),%mm6
527	xorb	(%esp,%edi,1),%cl
528	movb	%dl,%al
529	movd	%mm7,%ebx
530	movzbl	%cl,%ecx
531	psrlq	$8,%mm7
532	movq	%mm6,%mm3
533	movl	%eax,%edi
534	psrlq	$8,%mm6
535	pxor	272(%esp,%ebp,8),%mm7
536	andb	$15,%al
537	psllq	$56,%mm3
538	pxor	%mm2,%mm6
539	shrl	$4,%edi
540	pinsrw	$2,(%esi,%ecx,2),%mm1
541	pxor	16(%esp,%eax,8),%mm7
542	roll	$8,%edx
543	pxor	144(%esp,%eax,8),%mm6
544	pxor	%mm3,%mm7
545	pxor	400(%esp,%ebp,8),%mm6
546	xorb	(%esp,%ebp,1),%bl
547	movb	%dl,%al
548	movd	%mm7,%ecx
549	movzbl	%bl,%ebx
550	psrlq	$8,%mm7
551	movq	%mm6,%mm3
552	movl	%eax,%ebp
553	psrlq	$8,%mm6
554	pxor	272(%esp,%edi,8),%mm7
555	andb	$15,%al
556	psllq	$56,%mm3
557	pxor	%mm1,%mm6
558	shrl	$4,%ebp
559	pinsrw	$2,(%esi,%ebx,2),%mm0
560	pxor	16(%esp,%eax,8),%mm7
561	roll	$8,%edx
562	pxor	144(%esp,%eax,8),%mm6
563	pxor	%mm3,%mm7
564	pxor	400(%esp,%edi,8),%mm6
565	xorb	(%esp,%edi,1),%cl
566	movb	%dl,%al
567	movl	528(%esp),%edx
568	movd	%mm7,%ebx
569	movzbl	%cl,%ecx
570	psrlq	$8,%mm7
571	movq	%mm6,%mm3
572	movl	%eax,%edi
573	psrlq	$8,%mm6
574	pxor	272(%esp,%ebp,8),%mm7
575	andb	$15,%al
576	psllq	$56,%mm3
577	pxor	%mm0,%mm6
578	shrl	$4,%edi
579	pinsrw	$2,(%esi,%ecx,2),%mm2
580	pxor	16(%esp,%eax,8),%mm7
581	roll	$8,%edx
582	pxor	144(%esp,%eax,8),%mm6
583	pxor	%mm3,%mm7
584	pxor	400(%esp,%ebp,8),%mm6
585	xorb	(%esp,%ebp,1),%bl
586	movb	%dl,%al
587	movd	%mm7,%ecx
588	movzbl	%bl,%ebx
589	psrlq	$8,%mm7
590	movq	%mm6,%mm3
591	movl	%eax,%ebp
592	psrlq	$8,%mm6
593	pxor	272(%esp,%edi,8),%mm7
594	andb	$15,%al
595	psllq	$56,%mm3
596	pxor	%mm2,%mm6
597	shrl	$4,%ebp
598	pinsrw	$2,(%esi,%ebx,2),%mm1
599	pxor	16(%esp,%eax,8),%mm7
600	roll	$8,%edx
601	pxor	144(%esp,%eax,8),%mm6
602	pxor	%mm3,%mm7
603	pxor	400(%esp,%edi,8),%mm6
604	xorb	(%esp,%edi,1),%cl
605	movb	%dl,%al
606	movd	%mm7,%ebx
607	movzbl	%cl,%ecx
608	psrlq	$8,%mm7
609	movq	%mm6,%mm3
610	movl	%eax,%edi
611	psrlq	$8,%mm6
612	pxor	272(%esp,%ebp,8),%mm7
613	andb	$15,%al
614	psllq	$56,%mm3
615	pxor	%mm1,%mm6
616	shrl	$4,%edi
617	pinsrw	$2,(%esi,%ecx,2),%mm0
618	pxor	16(%esp,%eax,8),%mm7
619	roll	$8,%edx
620	pxor	144(%esp,%eax,8),%mm6
621	pxor	%mm3,%mm7
622	pxor	400(%esp,%ebp,8),%mm6
623	xorb	(%esp,%ebp,1),%bl
624	movb	%dl,%al
625	movd	%mm7,%ecx
626	movzbl	%bl,%ebx
627	psrlq	$8,%mm7
628	movq	%mm6,%mm3
629	movl	%eax,%ebp
630	psrlq	$8,%mm6
631	pxor	272(%esp,%edi,8),%mm7
632	andb	$15,%al
633	psllq	$56,%mm3
634	pxor	%mm0,%mm6
635	shrl	$4,%ebp
636	pinsrw	$2,(%esi,%ebx,2),%mm2
637	pxor	16(%esp,%eax,8),%mm7
638	roll	$8,%edx
639	pxor	144(%esp,%eax,8),%mm6
640	pxor	%mm3,%mm7
641	pxor	400(%esp,%edi,8),%mm6
642	xorb	(%esp,%edi,1),%cl
643	movb	%dl,%al
644	movl	524(%esp),%edx
645	movd	%mm7,%ebx
646	movzbl	%cl,%ecx
647	psrlq	$8,%mm7
648	movq	%mm6,%mm3
649	movl	%eax,%edi
650	psrlq	$8,%mm6
651	pxor	272(%esp,%ebp,8),%mm7
652	andb	$15,%al
653	psllq	$56,%mm3
654	pxor	%mm2,%mm6
655	shrl	$4,%edi
656	pinsrw	$2,(%esi,%ecx,2),%mm1
657	pxor	16(%esp,%eax,8),%mm7
658	pxor	144(%esp,%eax,8),%mm6
659	xorb	(%esp,%ebp,1),%bl
660	pxor	%mm3,%mm7
661	pxor	400(%esp,%ebp,8),%mm6
662	movzbl	%bl,%ebx
663	pxor	%mm2,%mm2
664	psllq	$4,%mm1
665	movd	%mm7,%ecx
666	psrlq	$4,%mm7
667	movq	%mm6,%mm3
668	psrlq	$4,%mm6
669	shll	$4,%ecx
670	pxor	16(%esp,%edi,8),%mm7
671	psllq	$60,%mm3
672	movzbl	%cl,%ecx
673	pxor	%mm3,%mm7
674	pxor	144(%esp,%edi,8),%mm6
675	pinsrw	$2,(%esi,%ebx,2),%mm0
676	pxor	%mm1,%mm6
677	movd	%mm7,%edx
678	pinsrw	$3,(%esi,%ecx,2),%mm2
679	psllq	$12,%mm0
680	pxor	%mm0,%mm6
681	psrlq	$32,%mm7
682	pxor	%mm2,%mm6
683	movl	548(%esp),%ecx
684	movd	%mm7,%ebx
685	movq	%mm6,%mm3
686	psllw	$8,%mm6
687	psrlw	$8,%mm3
688	por	%mm3,%mm6
689	bswap	%edx
690	pshufw	$27,%mm6,%mm6
691	bswap	%ebx
692	cmpl	552(%esp),%ecx
693	jne	L004outer
694	movl	544(%esp),%eax
695	movl	%edx,12(%eax)
696	movl	%ebx,8(%eax)
697	movq	%mm6,(%eax)
698	movl	556(%esp),%esp
699	emms
700	popl	%edi
701	popl	%esi
702	popl	%ebx
703	popl	%ebp
704	ret
705.globl	_gcm_init_clmul
706.private_extern	_gcm_init_clmul
707.align	4
708_gcm_init_clmul:
709L_gcm_init_clmul_begin:
710	movl	4(%esp),%edx
711	movl	8(%esp),%eax
712	call	L005pic
713L005pic:
714	popl	%ecx
715	leal	Lbswap-L005pic(%ecx),%ecx
716	movdqu	(%eax),%xmm2
717	pshufd	$78,%xmm2,%xmm2
718	pshufd	$255,%xmm2,%xmm4
719	movdqa	%xmm2,%xmm3
720	psllq	$1,%xmm2
721	pxor	%xmm5,%xmm5
722	psrlq	$63,%xmm3
723	pcmpgtd	%xmm4,%xmm5
724	pslldq	$8,%xmm3
725	por	%xmm3,%xmm2
726	pand	16(%ecx),%xmm5
727	pxor	%xmm5,%xmm2
728	movdqa	%xmm2,%xmm0
729	movdqa	%xmm0,%xmm1
730	pshufd	$78,%xmm0,%xmm3
731	pshufd	$78,%xmm2,%xmm4
732	pxor	%xmm0,%xmm3
733	pxor	%xmm2,%xmm4
734.byte	102,15,58,68,194,0
735.byte	102,15,58,68,202,17
736.byte	102,15,58,68,220,0
737	xorps	%xmm0,%xmm3
738	xorps	%xmm1,%xmm3
739	movdqa	%xmm3,%xmm4
740	psrldq	$8,%xmm3
741	pslldq	$8,%xmm4
742	pxor	%xmm3,%xmm1
743	pxor	%xmm4,%xmm0
744	movdqa	%xmm0,%xmm4
745	movdqa	%xmm0,%xmm3
746	psllq	$5,%xmm0
747	pxor	%xmm0,%xmm3
748	psllq	$1,%xmm0
749	pxor	%xmm3,%xmm0
750	psllq	$57,%xmm0
751	movdqa	%xmm0,%xmm3
752	pslldq	$8,%xmm0
753	psrldq	$8,%xmm3
754	pxor	%xmm4,%xmm0
755	pxor	%xmm3,%xmm1
756	movdqa	%xmm0,%xmm4
757	psrlq	$1,%xmm0
758	pxor	%xmm4,%xmm1
759	pxor	%xmm0,%xmm4
760	psrlq	$5,%xmm0
761	pxor	%xmm4,%xmm0
762	psrlq	$1,%xmm0
763	pxor	%xmm1,%xmm0
764	pshufd	$78,%xmm2,%xmm3
765	pshufd	$78,%xmm0,%xmm4
766	pxor	%xmm2,%xmm3
767	movdqu	%xmm2,(%edx)
768	pxor	%xmm0,%xmm4
769	movdqu	%xmm0,16(%edx)
770.byte	102,15,58,15,227,8
771	movdqu	%xmm4,32(%edx)
772	ret
773.globl	_gcm_gmult_clmul
774.private_extern	_gcm_gmult_clmul
775.align	4
776_gcm_gmult_clmul:
777L_gcm_gmult_clmul_begin:
778	movl	4(%esp),%eax
779	movl	8(%esp),%edx
780	call	L006pic
781L006pic:
782	popl	%ecx
783	leal	Lbswap-L006pic(%ecx),%ecx
784	movdqu	(%eax),%xmm0
785	movdqa	(%ecx),%xmm5
786	movups	(%edx),%xmm2
787.byte	102,15,56,0,197
788	movups	32(%edx),%xmm4
789	movdqa	%xmm0,%xmm1
790	pshufd	$78,%xmm0,%xmm3
791	pxor	%xmm0,%xmm3
792.byte	102,15,58,68,194,0
793.byte	102,15,58,68,202,17
794.byte	102,15,58,68,220,0
795	xorps	%xmm0,%xmm3
796	xorps	%xmm1,%xmm3
797	movdqa	%xmm3,%xmm4
798	psrldq	$8,%xmm3
799	pslldq	$8,%xmm4
800	pxor	%xmm3,%xmm1
801	pxor	%xmm4,%xmm0
802	movdqa	%xmm0,%xmm4
803	movdqa	%xmm0,%xmm3
804	psllq	$5,%xmm0
805	pxor	%xmm0,%xmm3
806	psllq	$1,%xmm0
807	pxor	%xmm3,%xmm0
808	psllq	$57,%xmm0
809	movdqa	%xmm0,%xmm3
810	pslldq	$8,%xmm0
811	psrldq	$8,%xmm3
812	pxor	%xmm4,%xmm0
813	pxor	%xmm3,%xmm1
814	movdqa	%xmm0,%xmm4
815	psrlq	$1,%xmm0
816	pxor	%xmm4,%xmm1
817	pxor	%xmm0,%xmm4
818	psrlq	$5,%xmm0
819	pxor	%xmm4,%xmm0
820	psrlq	$1,%xmm0
821	pxor	%xmm1,%xmm0
822.byte	102,15,56,0,197
823	movdqu	%xmm0,(%eax)
824	ret
825.globl	_gcm_ghash_clmul
826.private_extern	_gcm_ghash_clmul
827.align	4
828_gcm_ghash_clmul:
829L_gcm_ghash_clmul_begin:
830	pushl	%ebp
831	pushl	%ebx
832	pushl	%esi
833	pushl	%edi
834	movl	20(%esp),%eax
835	movl	24(%esp),%edx
836	movl	28(%esp),%esi
837	movl	32(%esp),%ebx
838	call	L007pic
839L007pic:
840	popl	%ecx
841	leal	Lbswap-L007pic(%ecx),%ecx
842	movdqu	(%eax),%xmm0
843	movdqa	(%ecx),%xmm5
844	movdqu	(%edx),%xmm2
845.byte	102,15,56,0,197
846	subl	$16,%ebx
847	jz	L008odd_tail
848	movdqu	(%esi),%xmm3
849	movdqu	16(%esi),%xmm6
850.byte	102,15,56,0,221
851.byte	102,15,56,0,245
852	movdqu	32(%edx),%xmm5
853	pxor	%xmm3,%xmm0
854	pshufd	$78,%xmm6,%xmm3
855	movdqa	%xmm6,%xmm7
856	pxor	%xmm6,%xmm3
857	leal	32(%esi),%esi
858.byte	102,15,58,68,242,0
859.byte	102,15,58,68,250,17
860.byte	102,15,58,68,221,0
861	movups	16(%edx),%xmm2
862	nop
863	subl	$32,%ebx
864	jbe	L009even_tail
865	jmp	L010mod_loop
866.align	5,0x90
867L010mod_loop:
868	pshufd	$78,%xmm0,%xmm4
869	movdqa	%xmm0,%xmm1
870	pxor	%xmm0,%xmm4
871	nop
872.byte	102,15,58,68,194,0
873.byte	102,15,58,68,202,17
874.byte	102,15,58,68,229,16
875	movups	(%edx),%xmm2
876	xorps	%xmm6,%xmm0
877	movdqa	(%ecx),%xmm5
878	xorps	%xmm7,%xmm1
879	movdqu	(%esi),%xmm7
880	pxor	%xmm0,%xmm3
881	movdqu	16(%esi),%xmm6
882	pxor	%xmm1,%xmm3
883.byte	102,15,56,0,253
884	pxor	%xmm3,%xmm4
885	movdqa	%xmm4,%xmm3
886	psrldq	$8,%xmm4
887	pslldq	$8,%xmm3
888	pxor	%xmm4,%xmm1
889	pxor	%xmm3,%xmm0
890.byte	102,15,56,0,245
891	pxor	%xmm7,%xmm1
892	movdqa	%xmm6,%xmm7
893	movdqa	%xmm0,%xmm4
894	movdqa	%xmm0,%xmm3
895	psllq	$5,%xmm0
896	pxor	%xmm0,%xmm3
897	psllq	$1,%xmm0
898	pxor	%xmm3,%xmm0
899.byte	102,15,58,68,242,0
900	movups	32(%edx),%xmm5
901	psllq	$57,%xmm0
902	movdqa	%xmm0,%xmm3
903	pslldq	$8,%xmm0
904	psrldq	$8,%xmm3
905	pxor	%xmm4,%xmm0
906	pxor	%xmm3,%xmm1
907	pshufd	$78,%xmm7,%xmm3
908	movdqa	%xmm0,%xmm4
909	psrlq	$1,%xmm0
910	pxor	%xmm7,%xmm3
911	pxor	%xmm4,%xmm1
912.byte	102,15,58,68,250,17
913	movups	16(%edx),%xmm2
914	pxor	%xmm0,%xmm4
915	psrlq	$5,%xmm0
916	pxor	%xmm4,%xmm0
917	psrlq	$1,%xmm0
918	pxor	%xmm1,%xmm0
919.byte	102,15,58,68,221,0
920	leal	32(%esi),%esi
921	subl	$32,%ebx
922	ja	L010mod_loop
923L009even_tail:
924	pshufd	$78,%xmm0,%xmm4
925	movdqa	%xmm0,%xmm1
926	pxor	%xmm0,%xmm4
927.byte	102,15,58,68,194,0
928.byte	102,15,58,68,202,17
929.byte	102,15,58,68,229,16
930	movdqa	(%ecx),%xmm5
931	xorps	%xmm6,%xmm0
932	xorps	%xmm7,%xmm1
933	pxor	%xmm0,%xmm3
934	pxor	%xmm1,%xmm3
935	pxor	%xmm3,%xmm4
936	movdqa	%xmm4,%xmm3
937	psrldq	$8,%xmm4
938	pslldq	$8,%xmm3
939	pxor	%xmm4,%xmm1
940	pxor	%xmm3,%xmm0
941	movdqa	%xmm0,%xmm4
942	movdqa	%xmm0,%xmm3
943	psllq	$5,%xmm0
944	pxor	%xmm0,%xmm3
945	psllq	$1,%xmm0
946	pxor	%xmm3,%xmm0
947	psllq	$57,%xmm0
948	movdqa	%xmm0,%xmm3
949	pslldq	$8,%xmm0
950	psrldq	$8,%xmm3
951	pxor	%xmm4,%xmm0
952	pxor	%xmm3,%xmm1
953	movdqa	%xmm0,%xmm4
954	psrlq	$1,%xmm0
955	pxor	%xmm4,%xmm1
956	pxor	%xmm0,%xmm4
957	psrlq	$5,%xmm0
958	pxor	%xmm4,%xmm0
959	psrlq	$1,%xmm0
960	pxor	%xmm1,%xmm0
961	testl	%ebx,%ebx
962	jnz	L011done
963	movups	(%edx),%xmm2
964L008odd_tail:
965	movdqu	(%esi),%xmm3
966.byte	102,15,56,0,221
967	pxor	%xmm3,%xmm0
968	movdqa	%xmm0,%xmm1
969	pshufd	$78,%xmm0,%xmm3
970	pshufd	$78,%xmm2,%xmm4
971	pxor	%xmm0,%xmm3
972	pxor	%xmm2,%xmm4
973.byte	102,15,58,68,194,0
974.byte	102,15,58,68,202,17
975.byte	102,15,58,68,220,0
976	xorps	%xmm0,%xmm3
977	xorps	%xmm1,%xmm3
978	movdqa	%xmm3,%xmm4
979	psrldq	$8,%xmm3
980	pslldq	$8,%xmm4
981	pxor	%xmm3,%xmm1
982	pxor	%xmm4,%xmm0
983	movdqa	%xmm0,%xmm4
984	movdqa	%xmm0,%xmm3
985	psllq	$5,%xmm0
986	pxor	%xmm0,%xmm3
987	psllq	$1,%xmm0
988	pxor	%xmm3,%xmm0
989	psllq	$57,%xmm0
990	movdqa	%xmm0,%xmm3
991	pslldq	$8,%xmm0
992	psrldq	$8,%xmm3
993	pxor	%xmm4,%xmm0
994	pxor	%xmm3,%xmm1
995	movdqa	%xmm0,%xmm4
996	psrlq	$1,%xmm0
997	pxor	%xmm4,%xmm1
998	pxor	%xmm0,%xmm4
999	psrlq	$5,%xmm0
1000	pxor	%xmm4,%xmm0
1001	psrlq	$1,%xmm0
1002	pxor	%xmm1,%xmm0
1003L011done:
1004.byte	102,15,56,0,197
1005	movdqu	%xmm0,(%eax)
1006	popl	%edi
1007	popl	%esi
1008	popl	%ebx
1009	popl	%ebp
1010	ret
1011.align	6,0x90
1012Lbswap:
1013.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1014.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1015.align	6,0x90
1016Lrem_8bit:
1017.value	0,450,900,582,1800,1738,1164,1358
1018.value	3600,4050,3476,3158,2328,2266,2716,2910
1019.value	7200,7650,8100,7782,6952,6890,6316,6510
1020.value	4656,5106,4532,4214,5432,5370,5820,6014
1021.value	14400,14722,15300,14854,16200,16010,15564,15630
1022.value	13904,14226,13780,13334,12632,12442,13020,13086
1023.value	9312,9634,10212,9766,9064,8874,8428,8494
1024.value	10864,11186,10740,10294,11640,11450,12028,12094
1025.value	28800,28994,29444,29382,30600,30282,29708,30158
1026.value	32400,32594,32020,31958,31128,30810,31260,31710
1027.value	27808,28002,28452,28390,27560,27242,26668,27118
1028.value	25264,25458,24884,24822,26040,25722,26172,26622
1029.value	18624,18690,19268,19078,20424,19978,19532,19854
1030.value	18128,18194,17748,17558,16856,16410,16988,17310
1031.value	21728,21794,22372,22182,21480,21034,20588,20910
1032.value	23280,23346,22900,22710,24056,23610,24188,24510
1033.value	57600,57538,57988,58182,58888,59338,58764,58446
1034.value	61200,61138,60564,60758,59416,59866,60316,59998
1035.value	64800,64738,65188,65382,64040,64490,63916,63598
1036.value	62256,62194,61620,61814,62520,62970,63420,63102
1037.value	55616,55426,56004,56070,56904,57226,56780,56334
1038.value	55120,54930,54484,54550,53336,53658,54236,53790
1039.value	50528,50338,50916,50982,49768,50090,49644,49198
1040.value	52080,51890,51444,51510,52344,52666,53244,52798
1041.value	37248,36930,37380,37830,38536,38730,38156,38094
1042.value	40848,40530,39956,40406,39064,39258,39708,39646
1043.value	36256,35938,36388,36838,35496,35690,35116,35054
1044.value	33712,33394,32820,33270,33976,34170,34620,34558
1045.value	43456,43010,43588,43910,44744,44810,44364,44174
1046.value	42960,42514,42068,42390,41176,41242,41820,41630
1047.value	46560,46114,46692,47014,45800,45866,45420,45230
1048.value	48112,47666,47220,47542,48376,48442,49020,48830
1049.align	6,0x90
1050Lrem_4bit:
1051.long	0,0,0,471859200,0,943718400,0,610271232
1052.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1053.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1054.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1055.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1056.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1057.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
1058.byte	0
1059#endif
1060