• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#if defined(__i386__)
2.text
3.globl	gcm_gmult_4bit_mmx
4.hidden	gcm_gmult_4bit_mmx
5.type	gcm_gmult_4bit_mmx,@function
6.align	16
7gcm_gmult_4bit_mmx:
8.L_gcm_gmult_4bit_mmx_begin:
9	pushl	%ebp
10	pushl	%ebx
11	pushl	%esi
12	pushl	%edi
13	movl	20(%esp),%edi
14	movl	24(%esp),%esi
15	call	.L000pic_point
16.L000pic_point:
17	popl	%eax
18	leal	.Lrem_4bit-.L000pic_point(%eax),%eax
19	movzbl	15(%edi),%ebx
20	xorl	%ecx,%ecx
21	movl	%ebx,%edx
22	movb	%dl,%cl
23	movl	$14,%ebp
24	shlb	$4,%cl
25	andl	$240,%edx
26	movq	8(%esi,%ecx,1),%mm0
27	movq	(%esi,%ecx,1),%mm1
28	movd	%mm0,%ebx
29	jmp	.L001mmx_loop
30.align	16
31.L001mmx_loop:
32	psrlq	$4,%mm0
33	andl	$15,%ebx
34	movq	%mm1,%mm2
35	psrlq	$4,%mm1
36	pxor	8(%esi,%edx,1),%mm0
37	movb	(%edi,%ebp,1),%cl
38	psllq	$60,%mm2
39	pxor	(%eax,%ebx,8),%mm1
40	decl	%ebp
41	movd	%mm0,%ebx
42	pxor	(%esi,%edx,1),%mm1
43	movl	%ecx,%edx
44	pxor	%mm2,%mm0
45	js	.L002mmx_break
46	shlb	$4,%cl
47	andl	$15,%ebx
48	psrlq	$4,%mm0
49	andl	$240,%edx
50	movq	%mm1,%mm2
51	psrlq	$4,%mm1
52	pxor	8(%esi,%ecx,1),%mm0
53	psllq	$60,%mm2
54	pxor	(%eax,%ebx,8),%mm1
55	movd	%mm0,%ebx
56	pxor	(%esi,%ecx,1),%mm1
57	pxor	%mm2,%mm0
58	jmp	.L001mmx_loop
59.align	16
60.L002mmx_break:
61	shlb	$4,%cl
62	andl	$15,%ebx
63	psrlq	$4,%mm0
64	andl	$240,%edx
65	movq	%mm1,%mm2
66	psrlq	$4,%mm1
67	pxor	8(%esi,%ecx,1),%mm0
68	psllq	$60,%mm2
69	pxor	(%eax,%ebx,8),%mm1
70	movd	%mm0,%ebx
71	pxor	(%esi,%ecx,1),%mm1
72	pxor	%mm2,%mm0
73	psrlq	$4,%mm0
74	andl	$15,%ebx
75	movq	%mm1,%mm2
76	psrlq	$4,%mm1
77	pxor	8(%esi,%edx,1),%mm0
78	psllq	$60,%mm2
79	pxor	(%eax,%ebx,8),%mm1
80	movd	%mm0,%ebx
81	pxor	(%esi,%edx,1),%mm1
82	pxor	%mm2,%mm0
83	psrlq	$32,%mm0
84	movd	%mm1,%edx
85	psrlq	$32,%mm1
86	movd	%mm0,%ecx
87	movd	%mm1,%ebp
88	bswap	%ebx
89	bswap	%edx
90	bswap	%ecx
91	bswap	%ebp
92	emms
93	movl	%ebx,12(%edi)
94	movl	%edx,4(%edi)
95	movl	%ecx,8(%edi)
96	movl	%ebp,(%edi)
97	popl	%edi
98	popl	%esi
99	popl	%ebx
100	popl	%ebp
101	ret
102.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
103.globl	gcm_ghash_4bit_mmx
104.hidden	gcm_ghash_4bit_mmx
105.type	gcm_ghash_4bit_mmx,@function
106.align	16
107gcm_ghash_4bit_mmx:
108.L_gcm_ghash_4bit_mmx_begin:
109	pushl	%ebp
110	pushl	%ebx
111	pushl	%esi
112	pushl	%edi
113	movl	20(%esp),%eax
114	movl	24(%esp),%ebx
115	movl	28(%esp),%ecx
116	movl	32(%esp),%edx
117	movl	%esp,%ebp
118	call	.L003pic_point
119.L003pic_point:
120	popl	%esi
121	leal	.Lrem_8bit-.L003pic_point(%esi),%esi
122	subl	$544,%esp
123	andl	$-64,%esp
124	subl	$16,%esp
125	addl	%ecx,%edx
126	movl	%eax,544(%esp)
127	movl	%edx,552(%esp)
128	movl	%ebp,556(%esp)
129	addl	$128,%ebx
130	leal	144(%esp),%edi
131	leal	400(%esp),%ebp
132	movl	-120(%ebx),%edx
133	movq	-120(%ebx),%mm0
134	movq	-128(%ebx),%mm3
135	shll	$4,%edx
136	movb	%dl,(%esp)
137	movl	-104(%ebx),%edx
138	movq	-104(%ebx),%mm2
139	movq	-112(%ebx),%mm5
140	movq	%mm0,-128(%edi)
141	psrlq	$4,%mm0
142	movq	%mm3,(%edi)
143	movq	%mm3,%mm7
144	psrlq	$4,%mm3
145	shll	$4,%edx
146	movb	%dl,1(%esp)
147	movl	-88(%ebx),%edx
148	movq	-88(%ebx),%mm1
149	psllq	$60,%mm7
150	movq	-96(%ebx),%mm4
151	por	%mm7,%mm0
152	movq	%mm2,-120(%edi)
153	psrlq	$4,%mm2
154	movq	%mm5,8(%edi)
155	movq	%mm5,%mm6
156	movq	%mm0,-128(%ebp)
157	psrlq	$4,%mm5
158	movq	%mm3,(%ebp)
159	shll	$4,%edx
160	movb	%dl,2(%esp)
161	movl	-72(%ebx),%edx
162	movq	-72(%ebx),%mm0
163	psllq	$60,%mm6
164	movq	-80(%ebx),%mm3
165	por	%mm6,%mm2
166	movq	%mm1,-112(%edi)
167	psrlq	$4,%mm1
168	movq	%mm4,16(%edi)
169	movq	%mm4,%mm7
170	movq	%mm2,-120(%ebp)
171	psrlq	$4,%mm4
172	movq	%mm5,8(%ebp)
173	shll	$4,%edx
174	movb	%dl,3(%esp)
175	movl	-56(%ebx),%edx
176	movq	-56(%ebx),%mm2
177	psllq	$60,%mm7
178	movq	-64(%ebx),%mm5
179	por	%mm7,%mm1
180	movq	%mm0,-104(%edi)
181	psrlq	$4,%mm0
182	movq	%mm3,24(%edi)
183	movq	%mm3,%mm6
184	movq	%mm1,-112(%ebp)
185	psrlq	$4,%mm3
186	movq	%mm4,16(%ebp)
187	shll	$4,%edx
188	movb	%dl,4(%esp)
189	movl	-40(%ebx),%edx
190	movq	-40(%ebx),%mm1
191	psllq	$60,%mm6
192	movq	-48(%ebx),%mm4
193	por	%mm6,%mm0
194	movq	%mm2,-96(%edi)
195	psrlq	$4,%mm2
196	movq	%mm5,32(%edi)
197	movq	%mm5,%mm7
198	movq	%mm0,-104(%ebp)
199	psrlq	$4,%mm5
200	movq	%mm3,24(%ebp)
201	shll	$4,%edx
202	movb	%dl,5(%esp)
203	movl	-24(%ebx),%edx
204	movq	-24(%ebx),%mm0
205	psllq	$60,%mm7
206	movq	-32(%ebx),%mm3
207	por	%mm7,%mm2
208	movq	%mm1,-88(%edi)
209	psrlq	$4,%mm1
210	movq	%mm4,40(%edi)
211	movq	%mm4,%mm6
212	movq	%mm2,-96(%ebp)
213	psrlq	$4,%mm4
214	movq	%mm5,32(%ebp)
215	shll	$4,%edx
216	movb	%dl,6(%esp)
217	movl	-8(%ebx),%edx
218	movq	-8(%ebx),%mm2
219	psllq	$60,%mm6
220	movq	-16(%ebx),%mm5
221	por	%mm6,%mm1
222	movq	%mm0,-80(%edi)
223	psrlq	$4,%mm0
224	movq	%mm3,48(%edi)
225	movq	%mm3,%mm7
226	movq	%mm1,-88(%ebp)
227	psrlq	$4,%mm3
228	movq	%mm4,40(%ebp)
229	shll	$4,%edx
230	movb	%dl,7(%esp)
231	movl	8(%ebx),%edx
232	movq	8(%ebx),%mm1
233	psllq	$60,%mm7
234	movq	(%ebx),%mm4
235	por	%mm7,%mm0
236	movq	%mm2,-72(%edi)
237	psrlq	$4,%mm2
238	movq	%mm5,56(%edi)
239	movq	%mm5,%mm6
240	movq	%mm0,-80(%ebp)
241	psrlq	$4,%mm5
242	movq	%mm3,48(%ebp)
243	shll	$4,%edx
244	movb	%dl,8(%esp)
245	movl	24(%ebx),%edx
246	movq	24(%ebx),%mm0
247	psllq	$60,%mm6
248	movq	16(%ebx),%mm3
249	por	%mm6,%mm2
250	movq	%mm1,-64(%edi)
251	psrlq	$4,%mm1
252	movq	%mm4,64(%edi)
253	movq	%mm4,%mm7
254	movq	%mm2,-72(%ebp)
255	psrlq	$4,%mm4
256	movq	%mm5,56(%ebp)
257	shll	$4,%edx
258	movb	%dl,9(%esp)
259	movl	40(%ebx),%edx
260	movq	40(%ebx),%mm2
261	psllq	$60,%mm7
262	movq	32(%ebx),%mm5
263	por	%mm7,%mm1
264	movq	%mm0,-56(%edi)
265	psrlq	$4,%mm0
266	movq	%mm3,72(%edi)
267	movq	%mm3,%mm6
268	movq	%mm1,-64(%ebp)
269	psrlq	$4,%mm3
270	movq	%mm4,64(%ebp)
271	shll	$4,%edx
272	movb	%dl,10(%esp)
273	movl	56(%ebx),%edx
274	movq	56(%ebx),%mm1
275	psllq	$60,%mm6
276	movq	48(%ebx),%mm4
277	por	%mm6,%mm0
278	movq	%mm2,-48(%edi)
279	psrlq	$4,%mm2
280	movq	%mm5,80(%edi)
281	movq	%mm5,%mm7
282	movq	%mm0,-56(%ebp)
283	psrlq	$4,%mm5
284	movq	%mm3,72(%ebp)
285	shll	$4,%edx
286	movb	%dl,11(%esp)
287	movl	72(%ebx),%edx
288	movq	72(%ebx),%mm0
289	psllq	$60,%mm7
290	movq	64(%ebx),%mm3
291	por	%mm7,%mm2
292	movq	%mm1,-40(%edi)
293	psrlq	$4,%mm1
294	movq	%mm4,88(%edi)
295	movq	%mm4,%mm6
296	movq	%mm2,-48(%ebp)
297	psrlq	$4,%mm4
298	movq	%mm5,80(%ebp)
299	shll	$4,%edx
300	movb	%dl,12(%esp)
301	movl	88(%ebx),%edx
302	movq	88(%ebx),%mm2
303	psllq	$60,%mm6
304	movq	80(%ebx),%mm5
305	por	%mm6,%mm1
306	movq	%mm0,-32(%edi)
307	psrlq	$4,%mm0
308	movq	%mm3,96(%edi)
309	movq	%mm3,%mm7
310	movq	%mm1,-40(%ebp)
311	psrlq	$4,%mm3
312	movq	%mm4,88(%ebp)
313	shll	$4,%edx
314	movb	%dl,13(%esp)
315	movl	104(%ebx),%edx
316	movq	104(%ebx),%mm1
317	psllq	$60,%mm7
318	movq	96(%ebx),%mm4
319	por	%mm7,%mm0
320	movq	%mm2,-24(%edi)
321	psrlq	$4,%mm2
322	movq	%mm5,104(%edi)
323	movq	%mm5,%mm6
324	movq	%mm0,-32(%ebp)
325	psrlq	$4,%mm5
326	movq	%mm3,96(%ebp)
327	shll	$4,%edx
328	movb	%dl,14(%esp)
329	movl	120(%ebx),%edx
330	movq	120(%ebx),%mm0
331	psllq	$60,%mm6
332	movq	112(%ebx),%mm3
333	por	%mm6,%mm2
334	movq	%mm1,-16(%edi)
335	psrlq	$4,%mm1
336	movq	%mm4,112(%edi)
337	movq	%mm4,%mm7
338	movq	%mm2,-24(%ebp)
339	psrlq	$4,%mm4
340	movq	%mm5,104(%ebp)
341	shll	$4,%edx
342	movb	%dl,15(%esp)
343	psllq	$60,%mm7
344	por	%mm7,%mm1
345	movq	%mm0,-8(%edi)
346	psrlq	$4,%mm0
347	movq	%mm3,120(%edi)
348	movq	%mm3,%mm6
349	movq	%mm1,-16(%ebp)
350	psrlq	$4,%mm3
351	movq	%mm4,112(%ebp)
352	psllq	$60,%mm6
353	por	%mm6,%mm0
354	movq	%mm0,-8(%ebp)
355	movq	%mm3,120(%ebp)
356	movq	(%eax),%mm6
357	movl	8(%eax),%ebx
358	movl	12(%eax),%edx
359.align	16
360.L004outer:
361	xorl	12(%ecx),%edx
362	xorl	8(%ecx),%ebx
363	pxor	(%ecx),%mm6
364	leal	16(%ecx),%ecx
365	movl	%ebx,536(%esp)
366	movq	%mm6,528(%esp)
367	movl	%ecx,548(%esp)
368	xorl	%eax,%eax
369	roll	$8,%edx
370	movb	%dl,%al
371	movl	%eax,%ebp
372	andb	$15,%al
373	shrl	$4,%ebp
374	pxor	%mm0,%mm0
375	roll	$8,%edx
376	pxor	%mm1,%mm1
377	pxor	%mm2,%mm2
378	movq	16(%esp,%eax,8),%mm7
379	movq	144(%esp,%eax,8),%mm6
380	movb	%dl,%al
381	movd	%mm7,%ebx
382	psrlq	$8,%mm7
383	movq	%mm6,%mm3
384	movl	%eax,%edi
385	psrlq	$8,%mm6
386	pxor	272(%esp,%ebp,8),%mm7
387	andb	$15,%al
388	psllq	$56,%mm3
389	shrl	$4,%edi
390	pxor	16(%esp,%eax,8),%mm7
391	roll	$8,%edx
392	pxor	144(%esp,%eax,8),%mm6
393	pxor	%mm3,%mm7
394	pxor	400(%esp,%ebp,8),%mm6
395	xorb	(%esp,%ebp,1),%bl
396	movb	%dl,%al
397	movd	%mm7,%ecx
398	movzbl	%bl,%ebx
399	psrlq	$8,%mm7
400	movq	%mm6,%mm3
401	movl	%eax,%ebp
402	psrlq	$8,%mm6
403	pxor	272(%esp,%edi,8),%mm7
404	andb	$15,%al
405	psllq	$56,%mm3
406	shrl	$4,%ebp
407	pinsrw	$2,(%esi,%ebx,2),%mm2
408	pxor	16(%esp,%eax,8),%mm7
409	roll	$8,%edx
410	pxor	144(%esp,%eax,8),%mm6
411	pxor	%mm3,%mm7
412	pxor	400(%esp,%edi,8),%mm6
413	xorb	(%esp,%edi,1),%cl
414	movb	%dl,%al
415	movl	536(%esp),%edx
416	movd	%mm7,%ebx
417	movzbl	%cl,%ecx
418	psrlq	$8,%mm7
419	movq	%mm6,%mm3
420	movl	%eax,%edi
421	psrlq	$8,%mm6
422	pxor	272(%esp,%ebp,8),%mm7
423	andb	$15,%al
424	psllq	$56,%mm3
425	pxor	%mm2,%mm6
426	shrl	$4,%edi
427	pinsrw	$2,(%esi,%ecx,2),%mm1
428	pxor	16(%esp,%eax,8),%mm7
429	roll	$8,%edx
430	pxor	144(%esp,%eax,8),%mm6
431	pxor	%mm3,%mm7
432	pxor	400(%esp,%ebp,8),%mm6
433	xorb	(%esp,%ebp,1),%bl
434	movb	%dl,%al
435	movd	%mm7,%ecx
436	movzbl	%bl,%ebx
437	psrlq	$8,%mm7
438	movq	%mm6,%mm3
439	movl	%eax,%ebp
440	psrlq	$8,%mm6
441	pxor	272(%esp,%edi,8),%mm7
442	andb	$15,%al
443	psllq	$56,%mm3
444	pxor	%mm1,%mm6
445	shrl	$4,%ebp
446	pinsrw	$2,(%esi,%ebx,2),%mm0
447	pxor	16(%esp,%eax,8),%mm7
448	roll	$8,%edx
449	pxor	144(%esp,%eax,8),%mm6
450	pxor	%mm3,%mm7
451	pxor	400(%esp,%edi,8),%mm6
452	xorb	(%esp,%edi,1),%cl
453	movb	%dl,%al
454	movd	%mm7,%ebx
455	movzbl	%cl,%ecx
456	psrlq	$8,%mm7
457	movq	%mm6,%mm3
458	movl	%eax,%edi
459	psrlq	$8,%mm6
460	pxor	272(%esp,%ebp,8),%mm7
461	andb	$15,%al
462	psllq	$56,%mm3
463	pxor	%mm0,%mm6
464	shrl	$4,%edi
465	pinsrw	$2,(%esi,%ecx,2),%mm2
466	pxor	16(%esp,%eax,8),%mm7
467	roll	$8,%edx
468	pxor	144(%esp,%eax,8),%mm6
469	pxor	%mm3,%mm7
470	pxor	400(%esp,%ebp,8),%mm6
471	xorb	(%esp,%ebp,1),%bl
472	movb	%dl,%al
473	movd	%mm7,%ecx
474	movzbl	%bl,%ebx
475	psrlq	$8,%mm7
476	movq	%mm6,%mm3
477	movl	%eax,%ebp
478	psrlq	$8,%mm6
479	pxor	272(%esp,%edi,8),%mm7
480	andb	$15,%al
481	psllq	$56,%mm3
482	pxor	%mm2,%mm6
483	shrl	$4,%ebp
484	pinsrw	$2,(%esi,%ebx,2),%mm1
485	pxor	16(%esp,%eax,8),%mm7
486	roll	$8,%edx
487	pxor	144(%esp,%eax,8),%mm6
488	pxor	%mm3,%mm7
489	pxor	400(%esp,%edi,8),%mm6
490	xorb	(%esp,%edi,1),%cl
491	movb	%dl,%al
492	movl	532(%esp),%edx
493	movd	%mm7,%ebx
494	movzbl	%cl,%ecx
495	psrlq	$8,%mm7
496	movq	%mm6,%mm3
497	movl	%eax,%edi
498	psrlq	$8,%mm6
499	pxor	272(%esp,%ebp,8),%mm7
500	andb	$15,%al
501	psllq	$56,%mm3
502	pxor	%mm1,%mm6
503	shrl	$4,%edi
504	pinsrw	$2,(%esi,%ecx,2),%mm0
505	pxor	16(%esp,%eax,8),%mm7
506	roll	$8,%edx
507	pxor	144(%esp,%eax,8),%mm6
508	pxor	%mm3,%mm7
509	pxor	400(%esp,%ebp,8),%mm6
510	xorb	(%esp,%ebp,1),%bl
511	movb	%dl,%al
512	movd	%mm7,%ecx
513	movzbl	%bl,%ebx
514	psrlq	$8,%mm7
515	movq	%mm6,%mm3
516	movl	%eax,%ebp
517	psrlq	$8,%mm6
518	pxor	272(%esp,%edi,8),%mm7
519	andb	$15,%al
520	psllq	$56,%mm3
521	pxor	%mm0,%mm6
522	shrl	$4,%ebp
523	pinsrw	$2,(%esi,%ebx,2),%mm2
524	pxor	16(%esp,%eax,8),%mm7
525	roll	$8,%edx
526	pxor	144(%esp,%eax,8),%mm6
527	pxor	%mm3,%mm7
528	pxor	400(%esp,%edi,8),%mm6
529	xorb	(%esp,%edi,1),%cl
530	movb	%dl,%al
531	movd	%mm7,%ebx
532	movzbl	%cl,%ecx
533	psrlq	$8,%mm7
534	movq	%mm6,%mm3
535	movl	%eax,%edi
536	psrlq	$8,%mm6
537	pxor	272(%esp,%ebp,8),%mm7
538	andb	$15,%al
539	psllq	$56,%mm3
540	pxor	%mm2,%mm6
541	shrl	$4,%edi
542	pinsrw	$2,(%esi,%ecx,2),%mm1
543	pxor	16(%esp,%eax,8),%mm7
544	roll	$8,%edx
545	pxor	144(%esp,%eax,8),%mm6
546	pxor	%mm3,%mm7
547	pxor	400(%esp,%ebp,8),%mm6
548	xorb	(%esp,%ebp,1),%bl
549	movb	%dl,%al
550	movd	%mm7,%ecx
551	movzbl	%bl,%ebx
552	psrlq	$8,%mm7
553	movq	%mm6,%mm3
554	movl	%eax,%ebp
555	psrlq	$8,%mm6
556	pxor	272(%esp,%edi,8),%mm7
557	andb	$15,%al
558	psllq	$56,%mm3
559	pxor	%mm1,%mm6
560	shrl	$4,%ebp
561	pinsrw	$2,(%esi,%ebx,2),%mm0
562	pxor	16(%esp,%eax,8),%mm7
563	roll	$8,%edx
564	pxor	144(%esp,%eax,8),%mm6
565	pxor	%mm3,%mm7
566	pxor	400(%esp,%edi,8),%mm6
567	xorb	(%esp,%edi,1),%cl
568	movb	%dl,%al
569	movl	528(%esp),%edx
570	movd	%mm7,%ebx
571	movzbl	%cl,%ecx
572	psrlq	$8,%mm7
573	movq	%mm6,%mm3
574	movl	%eax,%edi
575	psrlq	$8,%mm6
576	pxor	272(%esp,%ebp,8),%mm7
577	andb	$15,%al
578	psllq	$56,%mm3
579	pxor	%mm0,%mm6
580	shrl	$4,%edi
581	pinsrw	$2,(%esi,%ecx,2),%mm2
582	pxor	16(%esp,%eax,8),%mm7
583	roll	$8,%edx
584	pxor	144(%esp,%eax,8),%mm6
585	pxor	%mm3,%mm7
586	pxor	400(%esp,%ebp,8),%mm6
587	xorb	(%esp,%ebp,1),%bl
588	movb	%dl,%al
589	movd	%mm7,%ecx
590	movzbl	%bl,%ebx
591	psrlq	$8,%mm7
592	movq	%mm6,%mm3
593	movl	%eax,%ebp
594	psrlq	$8,%mm6
595	pxor	272(%esp,%edi,8),%mm7
596	andb	$15,%al
597	psllq	$56,%mm3
598	pxor	%mm2,%mm6
599	shrl	$4,%ebp
600	pinsrw	$2,(%esi,%ebx,2),%mm1
601	pxor	16(%esp,%eax,8),%mm7
602	roll	$8,%edx
603	pxor	144(%esp,%eax,8),%mm6
604	pxor	%mm3,%mm7
605	pxor	400(%esp,%edi,8),%mm6
606	xorb	(%esp,%edi,1),%cl
607	movb	%dl,%al
608	movd	%mm7,%ebx
609	movzbl	%cl,%ecx
610	psrlq	$8,%mm7
611	movq	%mm6,%mm3
612	movl	%eax,%edi
613	psrlq	$8,%mm6
614	pxor	272(%esp,%ebp,8),%mm7
615	andb	$15,%al
616	psllq	$56,%mm3
617	pxor	%mm1,%mm6
618	shrl	$4,%edi
619	pinsrw	$2,(%esi,%ecx,2),%mm0
620	pxor	16(%esp,%eax,8),%mm7
621	roll	$8,%edx
622	pxor	144(%esp,%eax,8),%mm6
623	pxor	%mm3,%mm7
624	pxor	400(%esp,%ebp,8),%mm6
625	xorb	(%esp,%ebp,1),%bl
626	movb	%dl,%al
627	movd	%mm7,%ecx
628	movzbl	%bl,%ebx
629	psrlq	$8,%mm7
630	movq	%mm6,%mm3
631	movl	%eax,%ebp
632	psrlq	$8,%mm6
633	pxor	272(%esp,%edi,8),%mm7
634	andb	$15,%al
635	psllq	$56,%mm3
636	pxor	%mm0,%mm6
637	shrl	$4,%ebp
638	pinsrw	$2,(%esi,%ebx,2),%mm2
639	pxor	16(%esp,%eax,8),%mm7
640	roll	$8,%edx
641	pxor	144(%esp,%eax,8),%mm6
642	pxor	%mm3,%mm7
643	pxor	400(%esp,%edi,8),%mm6
644	xorb	(%esp,%edi,1),%cl
645	movb	%dl,%al
646	movl	524(%esp),%edx
647	movd	%mm7,%ebx
648	movzbl	%cl,%ecx
649	psrlq	$8,%mm7
650	movq	%mm6,%mm3
651	movl	%eax,%edi
652	psrlq	$8,%mm6
653	pxor	272(%esp,%ebp,8),%mm7
654	andb	$15,%al
655	psllq	$56,%mm3
656	pxor	%mm2,%mm6
657	shrl	$4,%edi
658	pinsrw	$2,(%esi,%ecx,2),%mm1
659	pxor	16(%esp,%eax,8),%mm7
660	pxor	144(%esp,%eax,8),%mm6
661	xorb	(%esp,%ebp,1),%bl
662	pxor	%mm3,%mm7
663	pxor	400(%esp,%ebp,8),%mm6
664	movzbl	%bl,%ebx
665	pxor	%mm2,%mm2
666	psllq	$4,%mm1
667	movd	%mm7,%ecx
668	psrlq	$4,%mm7
669	movq	%mm6,%mm3
670	psrlq	$4,%mm6
671	shll	$4,%ecx
672	pxor	16(%esp,%edi,8),%mm7
673	psllq	$60,%mm3
674	movzbl	%cl,%ecx
675	pxor	%mm3,%mm7
676	pxor	144(%esp,%edi,8),%mm6
677	pinsrw	$2,(%esi,%ebx,2),%mm0
678	pxor	%mm1,%mm6
679	movd	%mm7,%edx
680	pinsrw	$3,(%esi,%ecx,2),%mm2
681	psllq	$12,%mm0
682	pxor	%mm0,%mm6
683	psrlq	$32,%mm7
684	pxor	%mm2,%mm6
685	movl	548(%esp),%ecx
686	movd	%mm7,%ebx
687	movq	%mm6,%mm3
688	psllw	$8,%mm6
689	psrlw	$8,%mm3
690	por	%mm3,%mm6
691	bswap	%edx
692	pshufw	$27,%mm6,%mm6
693	bswap	%ebx
694	cmpl	552(%esp),%ecx
695	jne	.L004outer
696	movl	544(%esp),%eax
697	movl	%edx,12(%eax)
698	movl	%ebx,8(%eax)
699	movq	%mm6,(%eax)
700	movl	556(%esp),%esp
701	emms
702	popl	%edi
703	popl	%esi
704	popl	%ebx
705	popl	%ebp
706	ret
707.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
708.globl	gcm_init_clmul
709.hidden	gcm_init_clmul
710.type	gcm_init_clmul,@function
711.align	16
712gcm_init_clmul:
713.L_gcm_init_clmul_begin:
714	movl	4(%esp),%edx
715	movl	8(%esp),%eax
716	call	.L005pic
717.L005pic:
718	popl	%ecx
719	leal	.Lbswap-.L005pic(%ecx),%ecx
720	movdqu	(%eax),%xmm2
721	pshufd	$78,%xmm2,%xmm2
722	pshufd	$255,%xmm2,%xmm4
723	movdqa	%xmm2,%xmm3
724	psllq	$1,%xmm2
725	pxor	%xmm5,%xmm5
726	psrlq	$63,%xmm3
727	pcmpgtd	%xmm4,%xmm5
728	pslldq	$8,%xmm3
729	por	%xmm3,%xmm2
730	pand	16(%ecx),%xmm5
731	pxor	%xmm5,%xmm2
732	movdqa	%xmm2,%xmm0
733	movdqa	%xmm0,%xmm1
734	pshufd	$78,%xmm0,%xmm3
735	pshufd	$78,%xmm2,%xmm4
736	pxor	%xmm0,%xmm3
737	pxor	%xmm2,%xmm4
738.byte	102,15,58,68,194,0
739.byte	102,15,58,68,202,17
740.byte	102,15,58,68,220,0
741	xorps	%xmm0,%xmm3
742	xorps	%xmm1,%xmm3
743	movdqa	%xmm3,%xmm4
744	psrldq	$8,%xmm3
745	pslldq	$8,%xmm4
746	pxor	%xmm3,%xmm1
747	pxor	%xmm4,%xmm0
748	movdqa	%xmm0,%xmm4
749	movdqa	%xmm0,%xmm3
750	psllq	$5,%xmm0
751	pxor	%xmm0,%xmm3
752	psllq	$1,%xmm0
753	pxor	%xmm3,%xmm0
754	psllq	$57,%xmm0
755	movdqa	%xmm0,%xmm3
756	pslldq	$8,%xmm0
757	psrldq	$8,%xmm3
758	pxor	%xmm4,%xmm0
759	pxor	%xmm3,%xmm1
760	movdqa	%xmm0,%xmm4
761	psrlq	$1,%xmm0
762	pxor	%xmm4,%xmm1
763	pxor	%xmm0,%xmm4
764	psrlq	$5,%xmm0
765	pxor	%xmm4,%xmm0
766	psrlq	$1,%xmm0
767	pxor	%xmm1,%xmm0
768	pshufd	$78,%xmm2,%xmm3
769	pshufd	$78,%xmm0,%xmm4
770	pxor	%xmm2,%xmm3
771	movdqu	%xmm2,(%edx)
772	pxor	%xmm0,%xmm4
773	movdqu	%xmm0,16(%edx)
774.byte	102,15,58,15,227,8
775	movdqu	%xmm4,32(%edx)
776	ret
777.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
778.globl	gcm_gmult_clmul
779.hidden	gcm_gmult_clmul
780.type	gcm_gmult_clmul,@function
781.align	16
782gcm_gmult_clmul:
783.L_gcm_gmult_clmul_begin:
784	movl	4(%esp),%eax
785	movl	8(%esp),%edx
786	call	.L006pic
787.L006pic:
788	popl	%ecx
789	leal	.Lbswap-.L006pic(%ecx),%ecx
790	movdqu	(%eax),%xmm0
791	movdqa	(%ecx),%xmm5
792	movups	(%edx),%xmm2
793.byte	102,15,56,0,197
794	movups	32(%edx),%xmm4
795	movdqa	%xmm0,%xmm1
796	pshufd	$78,%xmm0,%xmm3
797	pxor	%xmm0,%xmm3
798.byte	102,15,58,68,194,0
799.byte	102,15,58,68,202,17
800.byte	102,15,58,68,220,0
801	xorps	%xmm0,%xmm3
802	xorps	%xmm1,%xmm3
803	movdqa	%xmm3,%xmm4
804	psrldq	$8,%xmm3
805	pslldq	$8,%xmm4
806	pxor	%xmm3,%xmm1
807	pxor	%xmm4,%xmm0
808	movdqa	%xmm0,%xmm4
809	movdqa	%xmm0,%xmm3
810	psllq	$5,%xmm0
811	pxor	%xmm0,%xmm3
812	psllq	$1,%xmm0
813	pxor	%xmm3,%xmm0
814	psllq	$57,%xmm0
815	movdqa	%xmm0,%xmm3
816	pslldq	$8,%xmm0
817	psrldq	$8,%xmm3
818	pxor	%xmm4,%xmm0
819	pxor	%xmm3,%xmm1
820	movdqa	%xmm0,%xmm4
821	psrlq	$1,%xmm0
822	pxor	%xmm4,%xmm1
823	pxor	%xmm0,%xmm4
824	psrlq	$5,%xmm0
825	pxor	%xmm4,%xmm0
826	psrlq	$1,%xmm0
827	pxor	%xmm1,%xmm0
828.byte	102,15,56,0,197
829	movdqu	%xmm0,(%eax)
830	ret
831.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
832.globl	gcm_ghash_clmul
833.hidden	gcm_ghash_clmul
834.type	gcm_ghash_clmul,@function
835.align	16
836gcm_ghash_clmul:
837.L_gcm_ghash_clmul_begin:
838	pushl	%ebp
839	pushl	%ebx
840	pushl	%esi
841	pushl	%edi
842	movl	20(%esp),%eax
843	movl	24(%esp),%edx
844	movl	28(%esp),%esi
845	movl	32(%esp),%ebx
846	call	.L007pic
847.L007pic:
848	popl	%ecx
849	leal	.Lbswap-.L007pic(%ecx),%ecx
850	movdqu	(%eax),%xmm0
851	movdqa	(%ecx),%xmm5
852	movdqu	(%edx),%xmm2
853.byte	102,15,56,0,197
854	subl	$16,%ebx
855	jz	.L008odd_tail
856	movdqu	(%esi),%xmm3
857	movdqu	16(%esi),%xmm6
858.byte	102,15,56,0,221
859.byte	102,15,56,0,245
860	movdqu	32(%edx),%xmm5
861	pxor	%xmm3,%xmm0
862	pshufd	$78,%xmm6,%xmm3
863	movdqa	%xmm6,%xmm7
864	pxor	%xmm6,%xmm3
865	leal	32(%esi),%esi
866.byte	102,15,58,68,242,0
867.byte	102,15,58,68,250,17
868.byte	102,15,58,68,221,0
869	movups	16(%edx),%xmm2
870	nop
871	subl	$32,%ebx
872	jbe	.L009even_tail
873	jmp	.L010mod_loop
874.align	32
875.L010mod_loop:
876	pshufd	$78,%xmm0,%xmm4
877	movdqa	%xmm0,%xmm1
878	pxor	%xmm0,%xmm4
879	nop
880.byte	102,15,58,68,194,0
881.byte	102,15,58,68,202,17
882.byte	102,15,58,68,229,16
883	movups	(%edx),%xmm2
884	xorps	%xmm6,%xmm0
885	movdqa	(%ecx),%xmm5
886	xorps	%xmm7,%xmm1
887	movdqu	(%esi),%xmm7
888	pxor	%xmm0,%xmm3
889	movdqu	16(%esi),%xmm6
890	pxor	%xmm1,%xmm3
891.byte	102,15,56,0,253
892	pxor	%xmm3,%xmm4
893	movdqa	%xmm4,%xmm3
894	psrldq	$8,%xmm4
895	pslldq	$8,%xmm3
896	pxor	%xmm4,%xmm1
897	pxor	%xmm3,%xmm0
898.byte	102,15,56,0,245
899	pxor	%xmm7,%xmm1
900	movdqa	%xmm6,%xmm7
901	movdqa	%xmm0,%xmm4
902	movdqa	%xmm0,%xmm3
903	psllq	$5,%xmm0
904	pxor	%xmm0,%xmm3
905	psllq	$1,%xmm0
906	pxor	%xmm3,%xmm0
907.byte	102,15,58,68,242,0
908	movups	32(%edx),%xmm5
909	psllq	$57,%xmm0
910	movdqa	%xmm0,%xmm3
911	pslldq	$8,%xmm0
912	psrldq	$8,%xmm3
913	pxor	%xmm4,%xmm0
914	pxor	%xmm3,%xmm1
915	pshufd	$78,%xmm7,%xmm3
916	movdqa	%xmm0,%xmm4
917	psrlq	$1,%xmm0
918	pxor	%xmm7,%xmm3
919	pxor	%xmm4,%xmm1
920.byte	102,15,58,68,250,17
921	movups	16(%edx),%xmm2
922	pxor	%xmm0,%xmm4
923	psrlq	$5,%xmm0
924	pxor	%xmm4,%xmm0
925	psrlq	$1,%xmm0
926	pxor	%xmm1,%xmm0
927.byte	102,15,58,68,221,0
928	leal	32(%esi),%esi
929	subl	$32,%ebx
930	ja	.L010mod_loop
931.L009even_tail:
932	pshufd	$78,%xmm0,%xmm4
933	movdqa	%xmm0,%xmm1
934	pxor	%xmm0,%xmm4
935.byte	102,15,58,68,194,0
936.byte	102,15,58,68,202,17
937.byte	102,15,58,68,229,16
938	movdqa	(%ecx),%xmm5
939	xorps	%xmm6,%xmm0
940	xorps	%xmm7,%xmm1
941	pxor	%xmm0,%xmm3
942	pxor	%xmm1,%xmm3
943	pxor	%xmm3,%xmm4
944	movdqa	%xmm4,%xmm3
945	psrldq	$8,%xmm4
946	pslldq	$8,%xmm3
947	pxor	%xmm4,%xmm1
948	pxor	%xmm3,%xmm0
949	movdqa	%xmm0,%xmm4
950	movdqa	%xmm0,%xmm3
951	psllq	$5,%xmm0
952	pxor	%xmm0,%xmm3
953	psllq	$1,%xmm0
954	pxor	%xmm3,%xmm0
955	psllq	$57,%xmm0
956	movdqa	%xmm0,%xmm3
957	pslldq	$8,%xmm0
958	psrldq	$8,%xmm3
959	pxor	%xmm4,%xmm0
960	pxor	%xmm3,%xmm1
961	movdqa	%xmm0,%xmm4
962	psrlq	$1,%xmm0
963	pxor	%xmm4,%xmm1
964	pxor	%xmm0,%xmm4
965	psrlq	$5,%xmm0
966	pxor	%xmm4,%xmm0
967	psrlq	$1,%xmm0
968	pxor	%xmm1,%xmm0
969	testl	%ebx,%ebx
970	jnz	.L011done
971	movups	(%edx),%xmm2
972.L008odd_tail:
973	movdqu	(%esi),%xmm3
974.byte	102,15,56,0,221
975	pxor	%xmm3,%xmm0
976	movdqa	%xmm0,%xmm1
977	pshufd	$78,%xmm0,%xmm3
978	pshufd	$78,%xmm2,%xmm4
979	pxor	%xmm0,%xmm3
980	pxor	%xmm2,%xmm4
981.byte	102,15,58,68,194,0
982.byte	102,15,58,68,202,17
983.byte	102,15,58,68,220,0
984	xorps	%xmm0,%xmm3
985	xorps	%xmm1,%xmm3
986	movdqa	%xmm3,%xmm4
987	psrldq	$8,%xmm3
988	pslldq	$8,%xmm4
989	pxor	%xmm3,%xmm1
990	pxor	%xmm4,%xmm0
991	movdqa	%xmm0,%xmm4
992	movdqa	%xmm0,%xmm3
993	psllq	$5,%xmm0
994	pxor	%xmm0,%xmm3
995	psllq	$1,%xmm0
996	pxor	%xmm3,%xmm0
997	psllq	$57,%xmm0
998	movdqa	%xmm0,%xmm3
999	pslldq	$8,%xmm0
1000	psrldq	$8,%xmm3
1001	pxor	%xmm4,%xmm0
1002	pxor	%xmm3,%xmm1
1003	movdqa	%xmm0,%xmm4
1004	psrlq	$1,%xmm0
1005	pxor	%xmm4,%xmm1
1006	pxor	%xmm0,%xmm4
1007	psrlq	$5,%xmm0
1008	pxor	%xmm4,%xmm0
1009	psrlq	$1,%xmm0
1010	pxor	%xmm1,%xmm0
1011.L011done:
1012.byte	102,15,56,0,197
1013	movdqu	%xmm0,(%eax)
1014	popl	%edi
1015	popl	%esi
1016	popl	%ebx
1017	popl	%ebp
1018	ret
1019.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
1020.align	64
1021.Lbswap:
1022.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1023.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1024.align	64
1025.Lrem_8bit:
1026.value	0,450,900,582,1800,1738,1164,1358
1027.value	3600,4050,3476,3158,2328,2266,2716,2910
1028.value	7200,7650,8100,7782,6952,6890,6316,6510
1029.value	4656,5106,4532,4214,5432,5370,5820,6014
1030.value	14400,14722,15300,14854,16200,16010,15564,15630
1031.value	13904,14226,13780,13334,12632,12442,13020,13086
1032.value	9312,9634,10212,9766,9064,8874,8428,8494
1033.value	10864,11186,10740,10294,11640,11450,12028,12094
1034.value	28800,28994,29444,29382,30600,30282,29708,30158
1035.value	32400,32594,32020,31958,31128,30810,31260,31710
1036.value	27808,28002,28452,28390,27560,27242,26668,27118
1037.value	25264,25458,24884,24822,26040,25722,26172,26622
1038.value	18624,18690,19268,19078,20424,19978,19532,19854
1039.value	18128,18194,17748,17558,16856,16410,16988,17310
1040.value	21728,21794,22372,22182,21480,21034,20588,20910
1041.value	23280,23346,22900,22710,24056,23610,24188,24510
1042.value	57600,57538,57988,58182,58888,59338,58764,58446
1043.value	61200,61138,60564,60758,59416,59866,60316,59998
1044.value	64800,64738,65188,65382,64040,64490,63916,63598
1045.value	62256,62194,61620,61814,62520,62970,63420,63102
1046.value	55616,55426,56004,56070,56904,57226,56780,56334
1047.value	55120,54930,54484,54550,53336,53658,54236,53790
1048.value	50528,50338,50916,50982,49768,50090,49644,49198
1049.value	52080,51890,51444,51510,52344,52666,53244,52798
1050.value	37248,36930,37380,37830,38536,38730,38156,38094
1051.value	40848,40530,39956,40406,39064,39258,39708,39646
1052.value	36256,35938,36388,36838,35496,35690,35116,35054
1053.value	33712,33394,32820,33270,33976,34170,34620,34558
1054.value	43456,43010,43588,43910,44744,44810,44364,44174
1055.value	42960,42514,42068,42390,41176,41242,41820,41630
1056.value	46560,46114,46692,47014,45800,45866,45420,45230
1057.value	48112,47666,47220,47542,48376,48442,49020,48830
1058.align	64
1059.Lrem_4bit:
1060.long	0,0,0,471859200,0,943718400,0,610271232
1061.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1062.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1063.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1064.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1065.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1066.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
1067.byte	0
1068#endif
1069