• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#if defined(__i386__)
2.text
3.globl	_gcm_gmult_4bit_mmx
4.private_extern	_gcm_gmult_4bit_mmx
5.align	4
6_gcm_gmult_4bit_mmx:
7L_gcm_gmult_4bit_mmx_begin:
8	pushl	%ebp
9	pushl	%ebx
10	pushl	%esi
11	pushl	%edi
12	movl	20(%esp),%edi
13	movl	24(%esp),%esi
14	call	L000pic_point
15L000pic_point:
16	popl	%eax
17	leal	Lrem_4bit-L000pic_point(%eax),%eax
18	movzbl	15(%edi),%ebx
19	xorl	%ecx,%ecx
20	movl	%ebx,%edx
21	movb	%dl,%cl
22	movl	$14,%ebp
23	shlb	$4,%cl
24	andl	$240,%edx
25	movq	8(%esi,%ecx,1),%mm0
26	movq	(%esi,%ecx,1),%mm1
27	movd	%mm0,%ebx
28	jmp	L001mmx_loop
29.align	4,0x90
30L001mmx_loop:
31	psrlq	$4,%mm0
32	andl	$15,%ebx
33	movq	%mm1,%mm2
34	psrlq	$4,%mm1
35	pxor	8(%esi,%edx,1),%mm0
36	movb	(%edi,%ebp,1),%cl
37	psllq	$60,%mm2
38	pxor	(%eax,%ebx,8),%mm1
39	decl	%ebp
40	movd	%mm0,%ebx
41	pxor	(%esi,%edx,1),%mm1
42	movl	%ecx,%edx
43	pxor	%mm2,%mm0
44	js	L002mmx_break
45	shlb	$4,%cl
46	andl	$15,%ebx
47	psrlq	$4,%mm0
48	andl	$240,%edx
49	movq	%mm1,%mm2
50	psrlq	$4,%mm1
51	pxor	8(%esi,%ecx,1),%mm0
52	psllq	$60,%mm2
53	pxor	(%eax,%ebx,8),%mm1
54	movd	%mm0,%ebx
55	pxor	(%esi,%ecx,1),%mm1
56	pxor	%mm2,%mm0
57	jmp	L001mmx_loop
58.align	4,0x90
59L002mmx_break:
60	shlb	$4,%cl
61	andl	$15,%ebx
62	psrlq	$4,%mm0
63	andl	$240,%edx
64	movq	%mm1,%mm2
65	psrlq	$4,%mm1
66	pxor	8(%esi,%ecx,1),%mm0
67	psllq	$60,%mm2
68	pxor	(%eax,%ebx,8),%mm1
69	movd	%mm0,%ebx
70	pxor	(%esi,%ecx,1),%mm1
71	pxor	%mm2,%mm0
72	psrlq	$4,%mm0
73	andl	$15,%ebx
74	movq	%mm1,%mm2
75	psrlq	$4,%mm1
76	pxor	8(%esi,%edx,1),%mm0
77	psllq	$60,%mm2
78	pxor	(%eax,%ebx,8),%mm1
79	movd	%mm0,%ebx
80	pxor	(%esi,%edx,1),%mm1
81	pxor	%mm2,%mm0
82	psrlq	$32,%mm0
83	movd	%mm1,%edx
84	psrlq	$32,%mm1
85	movd	%mm0,%ecx
86	movd	%mm1,%ebp
87	bswap	%ebx
88	bswap	%edx
89	bswap	%ecx
90	bswap	%ebp
91	emms
92	movl	%ebx,12(%edi)
93	movl	%edx,4(%edi)
94	movl	%ecx,8(%edi)
95	movl	%ebp,(%edi)
96	popl	%edi
97	popl	%esi
98	popl	%ebx
99	popl	%ebp
100	ret
101.globl	_gcm_ghash_4bit_mmx
102.private_extern	_gcm_ghash_4bit_mmx
103.align	4
104_gcm_ghash_4bit_mmx:
105L_gcm_ghash_4bit_mmx_begin:
106	pushl	%ebp
107	pushl	%ebx
108	pushl	%esi
109	pushl	%edi
110	movl	20(%esp),%eax
111	movl	24(%esp),%ebx
112	movl	28(%esp),%ecx
113	movl	32(%esp),%edx
114	movl	%esp,%ebp
115	call	L003pic_point
116L003pic_point:
117	popl	%esi
118	leal	Lrem_8bit-L003pic_point(%esi),%esi
119	subl	$544,%esp
120	andl	$-64,%esp
121	subl	$16,%esp
122	addl	%ecx,%edx
123	movl	%eax,544(%esp)
124	movl	%edx,552(%esp)
125	movl	%ebp,556(%esp)
126	addl	$128,%ebx
127	leal	144(%esp),%edi
128	leal	400(%esp),%ebp
129	movl	-120(%ebx),%edx
130	movq	-120(%ebx),%mm0
131	movq	-128(%ebx),%mm3
132	shll	$4,%edx
133	movb	%dl,(%esp)
134	movl	-104(%ebx),%edx
135	movq	-104(%ebx),%mm2
136	movq	-112(%ebx),%mm5
137	movq	%mm0,-128(%edi)
138	psrlq	$4,%mm0
139	movq	%mm3,(%edi)
140	movq	%mm3,%mm7
141	psrlq	$4,%mm3
142	shll	$4,%edx
143	movb	%dl,1(%esp)
144	movl	-88(%ebx),%edx
145	movq	-88(%ebx),%mm1
146	psllq	$60,%mm7
147	movq	-96(%ebx),%mm4
148	por	%mm7,%mm0
149	movq	%mm2,-120(%edi)
150	psrlq	$4,%mm2
151	movq	%mm5,8(%edi)
152	movq	%mm5,%mm6
153	movq	%mm0,-128(%ebp)
154	psrlq	$4,%mm5
155	movq	%mm3,(%ebp)
156	shll	$4,%edx
157	movb	%dl,2(%esp)
158	movl	-72(%ebx),%edx
159	movq	-72(%ebx),%mm0
160	psllq	$60,%mm6
161	movq	-80(%ebx),%mm3
162	por	%mm6,%mm2
163	movq	%mm1,-112(%edi)
164	psrlq	$4,%mm1
165	movq	%mm4,16(%edi)
166	movq	%mm4,%mm7
167	movq	%mm2,-120(%ebp)
168	psrlq	$4,%mm4
169	movq	%mm5,8(%ebp)
170	shll	$4,%edx
171	movb	%dl,3(%esp)
172	movl	-56(%ebx),%edx
173	movq	-56(%ebx),%mm2
174	psllq	$60,%mm7
175	movq	-64(%ebx),%mm5
176	por	%mm7,%mm1
177	movq	%mm0,-104(%edi)
178	psrlq	$4,%mm0
179	movq	%mm3,24(%edi)
180	movq	%mm3,%mm6
181	movq	%mm1,-112(%ebp)
182	psrlq	$4,%mm3
183	movq	%mm4,16(%ebp)
184	shll	$4,%edx
185	movb	%dl,4(%esp)
186	movl	-40(%ebx),%edx
187	movq	-40(%ebx),%mm1
188	psllq	$60,%mm6
189	movq	-48(%ebx),%mm4
190	por	%mm6,%mm0
191	movq	%mm2,-96(%edi)
192	psrlq	$4,%mm2
193	movq	%mm5,32(%edi)
194	movq	%mm5,%mm7
195	movq	%mm0,-104(%ebp)
196	psrlq	$4,%mm5
197	movq	%mm3,24(%ebp)
198	shll	$4,%edx
199	movb	%dl,5(%esp)
200	movl	-24(%ebx),%edx
201	movq	-24(%ebx),%mm0
202	psllq	$60,%mm7
203	movq	-32(%ebx),%mm3
204	por	%mm7,%mm2
205	movq	%mm1,-88(%edi)
206	psrlq	$4,%mm1
207	movq	%mm4,40(%edi)
208	movq	%mm4,%mm6
209	movq	%mm2,-96(%ebp)
210	psrlq	$4,%mm4
211	movq	%mm5,32(%ebp)
212	shll	$4,%edx
213	movb	%dl,6(%esp)
214	movl	-8(%ebx),%edx
215	movq	-8(%ebx),%mm2
216	psllq	$60,%mm6
217	movq	-16(%ebx),%mm5
218	por	%mm6,%mm1
219	movq	%mm0,-80(%edi)
220	psrlq	$4,%mm0
221	movq	%mm3,48(%edi)
222	movq	%mm3,%mm7
223	movq	%mm1,-88(%ebp)
224	psrlq	$4,%mm3
225	movq	%mm4,40(%ebp)
226	shll	$4,%edx
227	movb	%dl,7(%esp)
228	movl	8(%ebx),%edx
229	movq	8(%ebx),%mm1
230	psllq	$60,%mm7
231	movq	(%ebx),%mm4
232	por	%mm7,%mm0
233	movq	%mm2,-72(%edi)
234	psrlq	$4,%mm2
235	movq	%mm5,56(%edi)
236	movq	%mm5,%mm6
237	movq	%mm0,-80(%ebp)
238	psrlq	$4,%mm5
239	movq	%mm3,48(%ebp)
240	shll	$4,%edx
241	movb	%dl,8(%esp)
242	movl	24(%ebx),%edx
243	movq	24(%ebx),%mm0
244	psllq	$60,%mm6
245	movq	16(%ebx),%mm3
246	por	%mm6,%mm2
247	movq	%mm1,-64(%edi)
248	psrlq	$4,%mm1
249	movq	%mm4,64(%edi)
250	movq	%mm4,%mm7
251	movq	%mm2,-72(%ebp)
252	psrlq	$4,%mm4
253	movq	%mm5,56(%ebp)
254	shll	$4,%edx
255	movb	%dl,9(%esp)
256	movl	40(%ebx),%edx
257	movq	40(%ebx),%mm2
258	psllq	$60,%mm7
259	movq	32(%ebx),%mm5
260	por	%mm7,%mm1
261	movq	%mm0,-56(%edi)
262	psrlq	$4,%mm0
263	movq	%mm3,72(%edi)
264	movq	%mm3,%mm6
265	movq	%mm1,-64(%ebp)
266	psrlq	$4,%mm3
267	movq	%mm4,64(%ebp)
268	shll	$4,%edx
269	movb	%dl,10(%esp)
270	movl	56(%ebx),%edx
271	movq	56(%ebx),%mm1
272	psllq	$60,%mm6
273	movq	48(%ebx),%mm4
274	por	%mm6,%mm0
275	movq	%mm2,-48(%edi)
276	psrlq	$4,%mm2
277	movq	%mm5,80(%edi)
278	movq	%mm5,%mm7
279	movq	%mm0,-56(%ebp)
280	psrlq	$4,%mm5
281	movq	%mm3,72(%ebp)
282	shll	$4,%edx
283	movb	%dl,11(%esp)
284	movl	72(%ebx),%edx
285	movq	72(%ebx),%mm0
286	psllq	$60,%mm7
287	movq	64(%ebx),%mm3
288	por	%mm7,%mm2
289	movq	%mm1,-40(%edi)
290	psrlq	$4,%mm1
291	movq	%mm4,88(%edi)
292	movq	%mm4,%mm6
293	movq	%mm2,-48(%ebp)
294	psrlq	$4,%mm4
295	movq	%mm5,80(%ebp)
296	shll	$4,%edx
297	movb	%dl,12(%esp)
298	movl	88(%ebx),%edx
299	movq	88(%ebx),%mm2
300	psllq	$60,%mm6
301	movq	80(%ebx),%mm5
302	por	%mm6,%mm1
303	movq	%mm0,-32(%edi)
304	psrlq	$4,%mm0
305	movq	%mm3,96(%edi)
306	movq	%mm3,%mm7
307	movq	%mm1,-40(%ebp)
308	psrlq	$4,%mm3
309	movq	%mm4,88(%ebp)
310	shll	$4,%edx
311	movb	%dl,13(%esp)
312	movl	104(%ebx),%edx
313	movq	104(%ebx),%mm1
314	psllq	$60,%mm7
315	movq	96(%ebx),%mm4
316	por	%mm7,%mm0
317	movq	%mm2,-24(%edi)
318	psrlq	$4,%mm2
319	movq	%mm5,104(%edi)
320	movq	%mm5,%mm6
321	movq	%mm0,-32(%ebp)
322	psrlq	$4,%mm5
323	movq	%mm3,96(%ebp)
324	shll	$4,%edx
325	movb	%dl,14(%esp)
326	movl	120(%ebx),%edx
327	movq	120(%ebx),%mm0
328	psllq	$60,%mm6
329	movq	112(%ebx),%mm3
330	por	%mm6,%mm2
331	movq	%mm1,-16(%edi)
332	psrlq	$4,%mm1
333	movq	%mm4,112(%edi)
334	movq	%mm4,%mm7
335	movq	%mm2,-24(%ebp)
336	psrlq	$4,%mm4
337	movq	%mm5,104(%ebp)
338	shll	$4,%edx
339	movb	%dl,15(%esp)
340	psllq	$60,%mm7
341	por	%mm7,%mm1
342	movq	%mm0,-8(%edi)
343	psrlq	$4,%mm0
344	movq	%mm3,120(%edi)
345	movq	%mm3,%mm6
346	movq	%mm1,-16(%ebp)
347	psrlq	$4,%mm3
348	movq	%mm4,112(%ebp)
349	psllq	$60,%mm6
350	por	%mm6,%mm0
351	movq	%mm0,-8(%ebp)
352	movq	%mm3,120(%ebp)
353	movq	(%eax),%mm6
354	movl	8(%eax),%ebx
355	movl	12(%eax),%edx
356.align	4,0x90
357L004outer:
358	xorl	12(%ecx),%edx
359	xorl	8(%ecx),%ebx
360	pxor	(%ecx),%mm6
361	leal	16(%ecx),%ecx
362	movl	%ebx,536(%esp)
363	movq	%mm6,528(%esp)
364	movl	%ecx,548(%esp)
365	xorl	%eax,%eax
366	roll	$8,%edx
367	movb	%dl,%al
368	movl	%eax,%ebp
369	andb	$15,%al
370	shrl	$4,%ebp
371	pxor	%mm0,%mm0
372	roll	$8,%edx
373	pxor	%mm1,%mm1
374	pxor	%mm2,%mm2
375	movq	16(%esp,%eax,8),%mm7
376	movq	144(%esp,%eax,8),%mm6
377	movb	%dl,%al
378	movd	%mm7,%ebx
379	psrlq	$8,%mm7
380	movq	%mm6,%mm3
381	movl	%eax,%edi
382	psrlq	$8,%mm6
383	pxor	272(%esp,%ebp,8),%mm7
384	andb	$15,%al
385	psllq	$56,%mm3
386	shrl	$4,%edi
387	pxor	16(%esp,%eax,8),%mm7
388	roll	$8,%edx
389	pxor	144(%esp,%eax,8),%mm6
390	pxor	%mm3,%mm7
391	pxor	400(%esp,%ebp,8),%mm6
392	xorb	(%esp,%ebp,1),%bl
393	movb	%dl,%al
394	movd	%mm7,%ecx
395	movzbl	%bl,%ebx
396	psrlq	$8,%mm7
397	movq	%mm6,%mm3
398	movl	%eax,%ebp
399	psrlq	$8,%mm6
400	pxor	272(%esp,%edi,8),%mm7
401	andb	$15,%al
402	psllq	$56,%mm3
403	shrl	$4,%ebp
404	pinsrw	$2,(%esi,%ebx,2),%mm2
405	pxor	16(%esp,%eax,8),%mm7
406	roll	$8,%edx
407	pxor	144(%esp,%eax,8),%mm6
408	pxor	%mm3,%mm7
409	pxor	400(%esp,%edi,8),%mm6
410	xorb	(%esp,%edi,1),%cl
411	movb	%dl,%al
412	movl	536(%esp),%edx
413	movd	%mm7,%ebx
414	movzbl	%cl,%ecx
415	psrlq	$8,%mm7
416	movq	%mm6,%mm3
417	movl	%eax,%edi
418	psrlq	$8,%mm6
419	pxor	272(%esp,%ebp,8),%mm7
420	andb	$15,%al
421	psllq	$56,%mm3
422	pxor	%mm2,%mm6
423	shrl	$4,%edi
424	pinsrw	$2,(%esi,%ecx,2),%mm1
425	pxor	16(%esp,%eax,8),%mm7
426	roll	$8,%edx
427	pxor	144(%esp,%eax,8),%mm6
428	pxor	%mm3,%mm7
429	pxor	400(%esp,%ebp,8),%mm6
430	xorb	(%esp,%ebp,1),%bl
431	movb	%dl,%al
432	movd	%mm7,%ecx
433	movzbl	%bl,%ebx
434	psrlq	$8,%mm7
435	movq	%mm6,%mm3
436	movl	%eax,%ebp
437	psrlq	$8,%mm6
438	pxor	272(%esp,%edi,8),%mm7
439	andb	$15,%al
440	psllq	$56,%mm3
441	pxor	%mm1,%mm6
442	shrl	$4,%ebp
443	pinsrw	$2,(%esi,%ebx,2),%mm0
444	pxor	16(%esp,%eax,8),%mm7
445	roll	$8,%edx
446	pxor	144(%esp,%eax,8),%mm6
447	pxor	%mm3,%mm7
448	pxor	400(%esp,%edi,8),%mm6
449	xorb	(%esp,%edi,1),%cl
450	movb	%dl,%al
451	movd	%mm7,%ebx
452	movzbl	%cl,%ecx
453	psrlq	$8,%mm7
454	movq	%mm6,%mm3
455	movl	%eax,%edi
456	psrlq	$8,%mm6
457	pxor	272(%esp,%ebp,8),%mm7
458	andb	$15,%al
459	psllq	$56,%mm3
460	pxor	%mm0,%mm6
461	shrl	$4,%edi
462	pinsrw	$2,(%esi,%ecx,2),%mm2
463	pxor	16(%esp,%eax,8),%mm7
464	roll	$8,%edx
465	pxor	144(%esp,%eax,8),%mm6
466	pxor	%mm3,%mm7
467	pxor	400(%esp,%ebp,8),%mm6
468	xorb	(%esp,%ebp,1),%bl
469	movb	%dl,%al
470	movd	%mm7,%ecx
471	movzbl	%bl,%ebx
472	psrlq	$8,%mm7
473	movq	%mm6,%mm3
474	movl	%eax,%ebp
475	psrlq	$8,%mm6
476	pxor	272(%esp,%edi,8),%mm7
477	andb	$15,%al
478	psllq	$56,%mm3
479	pxor	%mm2,%mm6
480	shrl	$4,%ebp
481	pinsrw	$2,(%esi,%ebx,2),%mm1
482	pxor	16(%esp,%eax,8),%mm7
483	roll	$8,%edx
484	pxor	144(%esp,%eax,8),%mm6
485	pxor	%mm3,%mm7
486	pxor	400(%esp,%edi,8),%mm6
487	xorb	(%esp,%edi,1),%cl
488	movb	%dl,%al
489	movl	532(%esp),%edx
490	movd	%mm7,%ebx
491	movzbl	%cl,%ecx
492	psrlq	$8,%mm7
493	movq	%mm6,%mm3
494	movl	%eax,%edi
495	psrlq	$8,%mm6
496	pxor	272(%esp,%ebp,8),%mm7
497	andb	$15,%al
498	psllq	$56,%mm3
499	pxor	%mm1,%mm6
500	shrl	$4,%edi
501	pinsrw	$2,(%esi,%ecx,2),%mm0
502	pxor	16(%esp,%eax,8),%mm7
503	roll	$8,%edx
504	pxor	144(%esp,%eax,8),%mm6
505	pxor	%mm3,%mm7
506	pxor	400(%esp,%ebp,8),%mm6
507	xorb	(%esp,%ebp,1),%bl
508	movb	%dl,%al
509	movd	%mm7,%ecx
510	movzbl	%bl,%ebx
511	psrlq	$8,%mm7
512	movq	%mm6,%mm3
513	movl	%eax,%ebp
514	psrlq	$8,%mm6
515	pxor	272(%esp,%edi,8),%mm7
516	andb	$15,%al
517	psllq	$56,%mm3
518	pxor	%mm0,%mm6
519	shrl	$4,%ebp
520	pinsrw	$2,(%esi,%ebx,2),%mm2
521	pxor	16(%esp,%eax,8),%mm7
522	roll	$8,%edx
523	pxor	144(%esp,%eax,8),%mm6
524	pxor	%mm3,%mm7
525	pxor	400(%esp,%edi,8),%mm6
526	xorb	(%esp,%edi,1),%cl
527	movb	%dl,%al
528	movd	%mm7,%ebx
529	movzbl	%cl,%ecx
530	psrlq	$8,%mm7
531	movq	%mm6,%mm3
532	movl	%eax,%edi
533	psrlq	$8,%mm6
534	pxor	272(%esp,%ebp,8),%mm7
535	andb	$15,%al
536	psllq	$56,%mm3
537	pxor	%mm2,%mm6
538	shrl	$4,%edi
539	pinsrw	$2,(%esi,%ecx,2),%mm1
540	pxor	16(%esp,%eax,8),%mm7
541	roll	$8,%edx
542	pxor	144(%esp,%eax,8),%mm6
543	pxor	%mm3,%mm7
544	pxor	400(%esp,%ebp,8),%mm6
545	xorb	(%esp,%ebp,1),%bl
546	movb	%dl,%al
547	movd	%mm7,%ecx
548	movzbl	%bl,%ebx
549	psrlq	$8,%mm7
550	movq	%mm6,%mm3
551	movl	%eax,%ebp
552	psrlq	$8,%mm6
553	pxor	272(%esp,%edi,8),%mm7
554	andb	$15,%al
555	psllq	$56,%mm3
556	pxor	%mm1,%mm6
557	shrl	$4,%ebp
558	pinsrw	$2,(%esi,%ebx,2),%mm0
559	pxor	16(%esp,%eax,8),%mm7
560	roll	$8,%edx
561	pxor	144(%esp,%eax,8),%mm6
562	pxor	%mm3,%mm7
563	pxor	400(%esp,%edi,8),%mm6
564	xorb	(%esp,%edi,1),%cl
565	movb	%dl,%al
566	movl	528(%esp),%edx
567	movd	%mm7,%ebx
568	movzbl	%cl,%ecx
569	psrlq	$8,%mm7
570	movq	%mm6,%mm3
571	movl	%eax,%edi
572	psrlq	$8,%mm6
573	pxor	272(%esp,%ebp,8),%mm7
574	andb	$15,%al
575	psllq	$56,%mm3
576	pxor	%mm0,%mm6
577	shrl	$4,%edi
578	pinsrw	$2,(%esi,%ecx,2),%mm2
579	pxor	16(%esp,%eax,8),%mm7
580	roll	$8,%edx
581	pxor	144(%esp,%eax,8),%mm6
582	pxor	%mm3,%mm7
583	pxor	400(%esp,%ebp,8),%mm6
584	xorb	(%esp,%ebp,1),%bl
585	movb	%dl,%al
586	movd	%mm7,%ecx
587	movzbl	%bl,%ebx
588	psrlq	$8,%mm7
589	movq	%mm6,%mm3
590	movl	%eax,%ebp
591	psrlq	$8,%mm6
592	pxor	272(%esp,%edi,8),%mm7
593	andb	$15,%al
594	psllq	$56,%mm3
595	pxor	%mm2,%mm6
596	shrl	$4,%ebp
597	pinsrw	$2,(%esi,%ebx,2),%mm1
598	pxor	16(%esp,%eax,8),%mm7
599	roll	$8,%edx
600	pxor	144(%esp,%eax,8),%mm6
601	pxor	%mm3,%mm7
602	pxor	400(%esp,%edi,8),%mm6
603	xorb	(%esp,%edi,1),%cl
604	movb	%dl,%al
605	movd	%mm7,%ebx
606	movzbl	%cl,%ecx
607	psrlq	$8,%mm7
608	movq	%mm6,%mm3
609	movl	%eax,%edi
610	psrlq	$8,%mm6
611	pxor	272(%esp,%ebp,8),%mm7
612	andb	$15,%al
613	psllq	$56,%mm3
614	pxor	%mm1,%mm6
615	shrl	$4,%edi
616	pinsrw	$2,(%esi,%ecx,2),%mm0
617	pxor	16(%esp,%eax,8),%mm7
618	roll	$8,%edx
619	pxor	144(%esp,%eax,8),%mm6
620	pxor	%mm3,%mm7
621	pxor	400(%esp,%ebp,8),%mm6
622	xorb	(%esp,%ebp,1),%bl
623	movb	%dl,%al
624	movd	%mm7,%ecx
625	movzbl	%bl,%ebx
626	psrlq	$8,%mm7
627	movq	%mm6,%mm3
628	movl	%eax,%ebp
629	psrlq	$8,%mm6
630	pxor	272(%esp,%edi,8),%mm7
631	andb	$15,%al
632	psllq	$56,%mm3
633	pxor	%mm0,%mm6
634	shrl	$4,%ebp
635	pinsrw	$2,(%esi,%ebx,2),%mm2
636	pxor	16(%esp,%eax,8),%mm7
637	roll	$8,%edx
638	pxor	144(%esp,%eax,8),%mm6
639	pxor	%mm3,%mm7
640	pxor	400(%esp,%edi,8),%mm6
641	xorb	(%esp,%edi,1),%cl
642	movb	%dl,%al
643	movl	524(%esp),%edx
644	movd	%mm7,%ebx
645	movzbl	%cl,%ecx
646	psrlq	$8,%mm7
647	movq	%mm6,%mm3
648	movl	%eax,%edi
649	psrlq	$8,%mm6
650	pxor	272(%esp,%ebp,8),%mm7
651	andb	$15,%al
652	psllq	$56,%mm3
653	pxor	%mm2,%mm6
654	shrl	$4,%edi
655	pinsrw	$2,(%esi,%ecx,2),%mm1
656	pxor	16(%esp,%eax,8),%mm7
657	pxor	144(%esp,%eax,8),%mm6
658	xorb	(%esp,%ebp,1),%bl
659	pxor	%mm3,%mm7
660	pxor	400(%esp,%ebp,8),%mm6
661	movzbl	%bl,%ebx
662	pxor	%mm2,%mm2
663	psllq	$4,%mm1
664	movd	%mm7,%ecx
665	psrlq	$4,%mm7
666	movq	%mm6,%mm3
667	psrlq	$4,%mm6
668	shll	$4,%ecx
669	pxor	16(%esp,%edi,8),%mm7
670	psllq	$60,%mm3
671	movzbl	%cl,%ecx
672	pxor	%mm3,%mm7
673	pxor	144(%esp,%edi,8),%mm6
674	pinsrw	$2,(%esi,%ebx,2),%mm0
675	pxor	%mm1,%mm6
676	movd	%mm7,%edx
677	pinsrw	$3,(%esi,%ecx,2),%mm2
678	psllq	$12,%mm0
679	pxor	%mm0,%mm6
680	psrlq	$32,%mm7
681	pxor	%mm2,%mm6
682	movl	548(%esp),%ecx
683	movd	%mm7,%ebx
684	movq	%mm6,%mm3
685	psllw	$8,%mm6
686	psrlw	$8,%mm3
687	por	%mm3,%mm6
688	bswap	%edx
689	pshufw	$27,%mm6,%mm6
690	bswap	%ebx
691	cmpl	552(%esp),%ecx
692	jne	L004outer
693	movl	544(%esp),%eax
694	movl	%edx,12(%eax)
695	movl	%ebx,8(%eax)
696	movq	%mm6,(%eax)
697	movl	556(%esp),%esp
698	emms
699	popl	%edi
700	popl	%esi
701	popl	%ebx
702	popl	%ebp
703	ret
704.globl	_gcm_init_clmul
705.private_extern	_gcm_init_clmul
706.align	4
707_gcm_init_clmul:
708L_gcm_init_clmul_begin:
709	movl	4(%esp),%edx
710	movl	8(%esp),%eax
711	call	L005pic
712L005pic:
713	popl	%ecx
714	leal	Lbswap-L005pic(%ecx),%ecx
715	movdqu	(%eax),%xmm2
716	pshufd	$78,%xmm2,%xmm2
717	pshufd	$255,%xmm2,%xmm4
718	movdqa	%xmm2,%xmm3
719	psllq	$1,%xmm2
720	pxor	%xmm5,%xmm5
721	psrlq	$63,%xmm3
722	pcmpgtd	%xmm4,%xmm5
723	pslldq	$8,%xmm3
724	por	%xmm3,%xmm2
725	pand	16(%ecx),%xmm5
726	pxor	%xmm5,%xmm2
727	movdqa	%xmm2,%xmm0
728	movdqa	%xmm0,%xmm1
729	pshufd	$78,%xmm0,%xmm3
730	pshufd	$78,%xmm2,%xmm4
731	pxor	%xmm0,%xmm3
732	pxor	%xmm2,%xmm4
733.byte	102,15,58,68,194,0
734.byte	102,15,58,68,202,17
735.byte	102,15,58,68,220,0
736	xorps	%xmm0,%xmm3
737	xorps	%xmm1,%xmm3
738	movdqa	%xmm3,%xmm4
739	psrldq	$8,%xmm3
740	pslldq	$8,%xmm4
741	pxor	%xmm3,%xmm1
742	pxor	%xmm4,%xmm0
743	movdqa	%xmm0,%xmm4
744	movdqa	%xmm0,%xmm3
745	psllq	$5,%xmm0
746	pxor	%xmm0,%xmm3
747	psllq	$1,%xmm0
748	pxor	%xmm3,%xmm0
749	psllq	$57,%xmm0
750	movdqa	%xmm0,%xmm3
751	pslldq	$8,%xmm0
752	psrldq	$8,%xmm3
753	pxor	%xmm4,%xmm0
754	pxor	%xmm3,%xmm1
755	movdqa	%xmm0,%xmm4
756	psrlq	$1,%xmm0
757	pxor	%xmm4,%xmm1
758	pxor	%xmm0,%xmm4
759	psrlq	$5,%xmm0
760	pxor	%xmm4,%xmm0
761	psrlq	$1,%xmm0
762	pxor	%xmm1,%xmm0
763	pshufd	$78,%xmm2,%xmm3
764	pshufd	$78,%xmm0,%xmm4
765	pxor	%xmm2,%xmm3
766	movdqu	%xmm2,(%edx)
767	pxor	%xmm0,%xmm4
768	movdqu	%xmm0,16(%edx)
769.byte	102,15,58,15,227,8
770	movdqu	%xmm4,32(%edx)
771	ret
772.globl	_gcm_gmult_clmul
773.private_extern	_gcm_gmult_clmul
774.align	4
775_gcm_gmult_clmul:
776L_gcm_gmult_clmul_begin:
777	movl	4(%esp),%eax
778	movl	8(%esp),%edx
779	call	L006pic
780L006pic:
781	popl	%ecx
782	leal	Lbswap-L006pic(%ecx),%ecx
783	movdqu	(%eax),%xmm0
784	movdqa	(%ecx),%xmm5
785	movups	(%edx),%xmm2
786.byte	102,15,56,0,197
787	movups	32(%edx),%xmm4
788	movdqa	%xmm0,%xmm1
789	pshufd	$78,%xmm0,%xmm3
790	pxor	%xmm0,%xmm3
791.byte	102,15,58,68,194,0
792.byte	102,15,58,68,202,17
793.byte	102,15,58,68,220,0
794	xorps	%xmm0,%xmm3
795	xorps	%xmm1,%xmm3
796	movdqa	%xmm3,%xmm4
797	psrldq	$8,%xmm3
798	pslldq	$8,%xmm4
799	pxor	%xmm3,%xmm1
800	pxor	%xmm4,%xmm0
801	movdqa	%xmm0,%xmm4
802	movdqa	%xmm0,%xmm3
803	psllq	$5,%xmm0
804	pxor	%xmm0,%xmm3
805	psllq	$1,%xmm0
806	pxor	%xmm3,%xmm0
807	psllq	$57,%xmm0
808	movdqa	%xmm0,%xmm3
809	pslldq	$8,%xmm0
810	psrldq	$8,%xmm3
811	pxor	%xmm4,%xmm0
812	pxor	%xmm3,%xmm1
813	movdqa	%xmm0,%xmm4
814	psrlq	$1,%xmm0
815	pxor	%xmm4,%xmm1
816	pxor	%xmm0,%xmm4
817	psrlq	$5,%xmm0
818	pxor	%xmm4,%xmm0
819	psrlq	$1,%xmm0
820	pxor	%xmm1,%xmm0
821.byte	102,15,56,0,197
822	movdqu	%xmm0,(%eax)
823	ret
824.globl	_gcm_ghash_clmul
825.private_extern	_gcm_ghash_clmul
826.align	4
827_gcm_ghash_clmul:
828L_gcm_ghash_clmul_begin:
829	pushl	%ebp
830	pushl	%ebx
831	pushl	%esi
832	pushl	%edi
833	movl	20(%esp),%eax
834	movl	24(%esp),%edx
835	movl	28(%esp),%esi
836	movl	32(%esp),%ebx
837	call	L007pic
838L007pic:
839	popl	%ecx
840	leal	Lbswap-L007pic(%ecx),%ecx
841	movdqu	(%eax),%xmm0
842	movdqa	(%ecx),%xmm5
843	movdqu	(%edx),%xmm2
844.byte	102,15,56,0,197
845	subl	$16,%ebx
846	jz	L008odd_tail
847	movdqu	(%esi),%xmm3
848	movdqu	16(%esi),%xmm6
849.byte	102,15,56,0,221
850.byte	102,15,56,0,245
851	movdqu	32(%edx),%xmm5
852	pxor	%xmm3,%xmm0
853	pshufd	$78,%xmm6,%xmm3
854	movdqa	%xmm6,%xmm7
855	pxor	%xmm6,%xmm3
856	leal	32(%esi),%esi
857.byte	102,15,58,68,242,0
858.byte	102,15,58,68,250,17
859.byte	102,15,58,68,221,0
860	movups	16(%edx),%xmm2
861	nop
862	subl	$32,%ebx
863	jbe	L009even_tail
864	jmp	L010mod_loop
865.align	5,0x90
866L010mod_loop:
867	pshufd	$78,%xmm0,%xmm4
868	movdqa	%xmm0,%xmm1
869	pxor	%xmm0,%xmm4
870	nop
871.byte	102,15,58,68,194,0
872.byte	102,15,58,68,202,17
873.byte	102,15,58,68,229,16
874	movups	(%edx),%xmm2
875	xorps	%xmm6,%xmm0
876	movdqa	(%ecx),%xmm5
877	xorps	%xmm7,%xmm1
878	movdqu	(%esi),%xmm7
879	pxor	%xmm0,%xmm3
880	movdqu	16(%esi),%xmm6
881	pxor	%xmm1,%xmm3
882.byte	102,15,56,0,253
883	pxor	%xmm3,%xmm4
884	movdqa	%xmm4,%xmm3
885	psrldq	$8,%xmm4
886	pslldq	$8,%xmm3
887	pxor	%xmm4,%xmm1
888	pxor	%xmm3,%xmm0
889.byte	102,15,56,0,245
890	pxor	%xmm7,%xmm1
891	movdqa	%xmm6,%xmm7
892	movdqa	%xmm0,%xmm4
893	movdqa	%xmm0,%xmm3
894	psllq	$5,%xmm0
895	pxor	%xmm0,%xmm3
896	psllq	$1,%xmm0
897	pxor	%xmm3,%xmm0
898.byte	102,15,58,68,242,0
899	movups	32(%edx),%xmm5
900	psllq	$57,%xmm0
901	movdqa	%xmm0,%xmm3
902	pslldq	$8,%xmm0
903	psrldq	$8,%xmm3
904	pxor	%xmm4,%xmm0
905	pxor	%xmm3,%xmm1
906	pshufd	$78,%xmm7,%xmm3
907	movdqa	%xmm0,%xmm4
908	psrlq	$1,%xmm0
909	pxor	%xmm7,%xmm3
910	pxor	%xmm4,%xmm1
911.byte	102,15,58,68,250,17
912	movups	16(%edx),%xmm2
913	pxor	%xmm0,%xmm4
914	psrlq	$5,%xmm0
915	pxor	%xmm4,%xmm0
916	psrlq	$1,%xmm0
917	pxor	%xmm1,%xmm0
918.byte	102,15,58,68,221,0
919	leal	32(%esi),%esi
920	subl	$32,%ebx
921	ja	L010mod_loop
922L009even_tail:
923	pshufd	$78,%xmm0,%xmm4
924	movdqa	%xmm0,%xmm1
925	pxor	%xmm0,%xmm4
926.byte	102,15,58,68,194,0
927.byte	102,15,58,68,202,17
928.byte	102,15,58,68,229,16
929	movdqa	(%ecx),%xmm5
930	xorps	%xmm6,%xmm0
931	xorps	%xmm7,%xmm1
932	pxor	%xmm0,%xmm3
933	pxor	%xmm1,%xmm3
934	pxor	%xmm3,%xmm4
935	movdqa	%xmm4,%xmm3
936	psrldq	$8,%xmm4
937	pslldq	$8,%xmm3
938	pxor	%xmm4,%xmm1
939	pxor	%xmm3,%xmm0
940	movdqa	%xmm0,%xmm4
941	movdqa	%xmm0,%xmm3
942	psllq	$5,%xmm0
943	pxor	%xmm0,%xmm3
944	psllq	$1,%xmm0
945	pxor	%xmm3,%xmm0
946	psllq	$57,%xmm0
947	movdqa	%xmm0,%xmm3
948	pslldq	$8,%xmm0
949	psrldq	$8,%xmm3
950	pxor	%xmm4,%xmm0
951	pxor	%xmm3,%xmm1
952	movdqa	%xmm0,%xmm4
953	psrlq	$1,%xmm0
954	pxor	%xmm4,%xmm1
955	pxor	%xmm0,%xmm4
956	psrlq	$5,%xmm0
957	pxor	%xmm4,%xmm0
958	psrlq	$1,%xmm0
959	pxor	%xmm1,%xmm0
960	testl	%ebx,%ebx
961	jnz	L011done
962	movups	(%edx),%xmm2
963L008odd_tail:
964	movdqu	(%esi),%xmm3
965.byte	102,15,56,0,221
966	pxor	%xmm3,%xmm0
967	movdqa	%xmm0,%xmm1
968	pshufd	$78,%xmm0,%xmm3
969	pshufd	$78,%xmm2,%xmm4
970	pxor	%xmm0,%xmm3
971	pxor	%xmm2,%xmm4
972.byte	102,15,58,68,194,0
973.byte	102,15,58,68,202,17
974.byte	102,15,58,68,220,0
975	xorps	%xmm0,%xmm3
976	xorps	%xmm1,%xmm3
977	movdqa	%xmm3,%xmm4
978	psrldq	$8,%xmm3
979	pslldq	$8,%xmm4
980	pxor	%xmm3,%xmm1
981	pxor	%xmm4,%xmm0
982	movdqa	%xmm0,%xmm4
983	movdqa	%xmm0,%xmm3
984	psllq	$5,%xmm0
985	pxor	%xmm0,%xmm3
986	psllq	$1,%xmm0
987	pxor	%xmm3,%xmm0
988	psllq	$57,%xmm0
989	movdqa	%xmm0,%xmm3
990	pslldq	$8,%xmm0
991	psrldq	$8,%xmm3
992	pxor	%xmm4,%xmm0
993	pxor	%xmm3,%xmm1
994	movdqa	%xmm0,%xmm4
995	psrlq	$1,%xmm0
996	pxor	%xmm4,%xmm1
997	pxor	%xmm0,%xmm4
998	psrlq	$5,%xmm0
999	pxor	%xmm4,%xmm0
1000	psrlq	$1,%xmm0
1001	pxor	%xmm1,%xmm0
1002L011done:
1003.byte	102,15,56,0,197
1004	movdqu	%xmm0,(%eax)
1005	popl	%edi
1006	popl	%esi
1007	popl	%ebx
1008	popl	%ebp
1009	ret
1010.align	6,0x90
1011Lbswap:
1012.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1013.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1014.align	6,0x90
1015Lrem_8bit:
1016.value	0,450,900,582,1800,1738,1164,1358
1017.value	3600,4050,3476,3158,2328,2266,2716,2910
1018.value	7200,7650,8100,7782,6952,6890,6316,6510
1019.value	4656,5106,4532,4214,5432,5370,5820,6014
1020.value	14400,14722,15300,14854,16200,16010,15564,15630
1021.value	13904,14226,13780,13334,12632,12442,13020,13086
1022.value	9312,9634,10212,9766,9064,8874,8428,8494
1023.value	10864,11186,10740,10294,11640,11450,12028,12094
1024.value	28800,28994,29444,29382,30600,30282,29708,30158
1025.value	32400,32594,32020,31958,31128,30810,31260,31710
1026.value	27808,28002,28452,28390,27560,27242,26668,27118
1027.value	25264,25458,24884,24822,26040,25722,26172,26622
1028.value	18624,18690,19268,19078,20424,19978,19532,19854
1029.value	18128,18194,17748,17558,16856,16410,16988,17310
1030.value	21728,21794,22372,22182,21480,21034,20588,20910
1031.value	23280,23346,22900,22710,24056,23610,24188,24510
1032.value	57600,57538,57988,58182,58888,59338,58764,58446
1033.value	61200,61138,60564,60758,59416,59866,60316,59998
1034.value	64800,64738,65188,65382,64040,64490,63916,63598
1035.value	62256,62194,61620,61814,62520,62970,63420,63102
1036.value	55616,55426,56004,56070,56904,57226,56780,56334
1037.value	55120,54930,54484,54550,53336,53658,54236,53790
1038.value	50528,50338,50916,50982,49768,50090,49644,49198
1039.value	52080,51890,51444,51510,52344,52666,53244,52798
1040.value	37248,36930,37380,37830,38536,38730,38156,38094
1041.value	40848,40530,39956,40406,39064,39258,39708,39646
1042.value	36256,35938,36388,36838,35496,35690,35116,35054
1043.value	33712,33394,32820,33270,33976,34170,34620,34558
1044.value	43456,43010,43588,43910,44744,44810,44364,44174
1045.value	42960,42514,42068,42390,41176,41242,41820,41630
1046.value	46560,46114,46692,47014,45800,45866,45420,45230
1047.value	48112,47666,47220,47542,48376,48442,49020,48830
1048.align	6,0x90
1049Lrem_4bit:
1050.long	0,0,0,471859200,0,943718400,0,610271232
1051.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1052.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1053.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1054.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1055.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1056.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
1057.byte	0
1058#endif
1059