• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#if defined(__i386__)
2.file	"ghash-x86.S"
3.text
4.globl	gcm_gmult_4bit_mmx
5.hidden	gcm_gmult_4bit_mmx
6.type	gcm_gmult_4bit_mmx,@function
7.align	16
8gcm_gmult_4bit_mmx:
9.L_gcm_gmult_4bit_mmx_begin:
10	pushl	%ebp
11	pushl	%ebx
12	pushl	%esi
13	pushl	%edi
14	movl	20(%esp),%edi
15	movl	24(%esp),%esi
16	call	.L000pic_point
17.L000pic_point:
18	popl	%eax
19	leal	.Lrem_4bit-.L000pic_point(%eax),%eax
20	movzbl	15(%edi),%ebx
21	xorl	%ecx,%ecx
22	movl	%ebx,%edx
23	movb	%dl,%cl
24	movl	$14,%ebp
25	shlb	$4,%cl
26	andl	$240,%edx
27	movq	8(%esi,%ecx,1),%mm0
28	movq	(%esi,%ecx,1),%mm1
29	movd	%mm0,%ebx
30	jmp	.L001mmx_loop
31.align	16
32.L001mmx_loop:
33	psrlq	$4,%mm0
34	andl	$15,%ebx
35	movq	%mm1,%mm2
36	psrlq	$4,%mm1
37	pxor	8(%esi,%edx,1),%mm0
38	movb	(%edi,%ebp,1),%cl
39	psllq	$60,%mm2
40	pxor	(%eax,%ebx,8),%mm1
41	decl	%ebp
42	movd	%mm0,%ebx
43	pxor	(%esi,%edx,1),%mm1
44	movl	%ecx,%edx
45	pxor	%mm2,%mm0
46	js	.L002mmx_break
47	shlb	$4,%cl
48	andl	$15,%ebx
49	psrlq	$4,%mm0
50	andl	$240,%edx
51	movq	%mm1,%mm2
52	psrlq	$4,%mm1
53	pxor	8(%esi,%ecx,1),%mm0
54	psllq	$60,%mm2
55	pxor	(%eax,%ebx,8),%mm1
56	movd	%mm0,%ebx
57	pxor	(%esi,%ecx,1),%mm1
58	pxor	%mm2,%mm0
59	jmp	.L001mmx_loop
60.align	16
61.L002mmx_break:
62	shlb	$4,%cl
63	andl	$15,%ebx
64	psrlq	$4,%mm0
65	andl	$240,%edx
66	movq	%mm1,%mm2
67	psrlq	$4,%mm1
68	pxor	8(%esi,%ecx,1),%mm0
69	psllq	$60,%mm2
70	pxor	(%eax,%ebx,8),%mm1
71	movd	%mm0,%ebx
72	pxor	(%esi,%ecx,1),%mm1
73	pxor	%mm2,%mm0
74	psrlq	$4,%mm0
75	andl	$15,%ebx
76	movq	%mm1,%mm2
77	psrlq	$4,%mm1
78	pxor	8(%esi,%edx,1),%mm0
79	psllq	$60,%mm2
80	pxor	(%eax,%ebx,8),%mm1
81	movd	%mm0,%ebx
82	pxor	(%esi,%edx,1),%mm1
83	pxor	%mm2,%mm0
84	psrlq	$32,%mm0
85	movd	%mm1,%edx
86	psrlq	$32,%mm1
87	movd	%mm0,%ecx
88	movd	%mm1,%ebp
89	bswap	%ebx
90	bswap	%edx
91	bswap	%ecx
92	bswap	%ebp
93	emms
94	movl	%ebx,12(%edi)
95	movl	%edx,4(%edi)
96	movl	%ecx,8(%edi)
97	movl	%ebp,(%edi)
98	popl	%edi
99	popl	%esi
100	popl	%ebx
101	popl	%ebp
102	ret
103.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
104.globl	gcm_ghash_4bit_mmx
105.hidden	gcm_ghash_4bit_mmx
106.type	gcm_ghash_4bit_mmx,@function
107.align	16
108gcm_ghash_4bit_mmx:
109.L_gcm_ghash_4bit_mmx_begin:
110	pushl	%ebp
111	pushl	%ebx
112	pushl	%esi
113	pushl	%edi
114	movl	20(%esp),%eax
115	movl	24(%esp),%ebx
116	movl	28(%esp),%ecx
117	movl	32(%esp),%edx
118	movl	%esp,%ebp
119	call	.L003pic_point
120.L003pic_point:
121	popl	%esi
122	leal	.Lrem_8bit-.L003pic_point(%esi),%esi
123	subl	$544,%esp
124	andl	$-64,%esp
125	subl	$16,%esp
126	addl	%ecx,%edx
127	movl	%eax,544(%esp)
128	movl	%edx,552(%esp)
129	movl	%ebp,556(%esp)
130	addl	$128,%ebx
131	leal	144(%esp),%edi
132	leal	400(%esp),%ebp
133	movl	-120(%ebx),%edx
134	movq	-120(%ebx),%mm0
135	movq	-128(%ebx),%mm3
136	shll	$4,%edx
137	movb	%dl,(%esp)
138	movl	-104(%ebx),%edx
139	movq	-104(%ebx),%mm2
140	movq	-112(%ebx),%mm5
141	movq	%mm0,-128(%edi)
142	psrlq	$4,%mm0
143	movq	%mm3,(%edi)
144	movq	%mm3,%mm7
145	psrlq	$4,%mm3
146	shll	$4,%edx
147	movb	%dl,1(%esp)
148	movl	-88(%ebx),%edx
149	movq	-88(%ebx),%mm1
150	psllq	$60,%mm7
151	movq	-96(%ebx),%mm4
152	por	%mm7,%mm0
153	movq	%mm2,-120(%edi)
154	psrlq	$4,%mm2
155	movq	%mm5,8(%edi)
156	movq	%mm5,%mm6
157	movq	%mm0,-128(%ebp)
158	psrlq	$4,%mm5
159	movq	%mm3,(%ebp)
160	shll	$4,%edx
161	movb	%dl,2(%esp)
162	movl	-72(%ebx),%edx
163	movq	-72(%ebx),%mm0
164	psllq	$60,%mm6
165	movq	-80(%ebx),%mm3
166	por	%mm6,%mm2
167	movq	%mm1,-112(%edi)
168	psrlq	$4,%mm1
169	movq	%mm4,16(%edi)
170	movq	%mm4,%mm7
171	movq	%mm2,-120(%ebp)
172	psrlq	$4,%mm4
173	movq	%mm5,8(%ebp)
174	shll	$4,%edx
175	movb	%dl,3(%esp)
176	movl	-56(%ebx),%edx
177	movq	-56(%ebx),%mm2
178	psllq	$60,%mm7
179	movq	-64(%ebx),%mm5
180	por	%mm7,%mm1
181	movq	%mm0,-104(%edi)
182	psrlq	$4,%mm0
183	movq	%mm3,24(%edi)
184	movq	%mm3,%mm6
185	movq	%mm1,-112(%ebp)
186	psrlq	$4,%mm3
187	movq	%mm4,16(%ebp)
188	shll	$4,%edx
189	movb	%dl,4(%esp)
190	movl	-40(%ebx),%edx
191	movq	-40(%ebx),%mm1
192	psllq	$60,%mm6
193	movq	-48(%ebx),%mm4
194	por	%mm6,%mm0
195	movq	%mm2,-96(%edi)
196	psrlq	$4,%mm2
197	movq	%mm5,32(%edi)
198	movq	%mm5,%mm7
199	movq	%mm0,-104(%ebp)
200	psrlq	$4,%mm5
201	movq	%mm3,24(%ebp)
202	shll	$4,%edx
203	movb	%dl,5(%esp)
204	movl	-24(%ebx),%edx
205	movq	-24(%ebx),%mm0
206	psllq	$60,%mm7
207	movq	-32(%ebx),%mm3
208	por	%mm7,%mm2
209	movq	%mm1,-88(%edi)
210	psrlq	$4,%mm1
211	movq	%mm4,40(%edi)
212	movq	%mm4,%mm6
213	movq	%mm2,-96(%ebp)
214	psrlq	$4,%mm4
215	movq	%mm5,32(%ebp)
216	shll	$4,%edx
217	movb	%dl,6(%esp)
218	movl	-8(%ebx),%edx
219	movq	-8(%ebx),%mm2
220	psllq	$60,%mm6
221	movq	-16(%ebx),%mm5
222	por	%mm6,%mm1
223	movq	%mm0,-80(%edi)
224	psrlq	$4,%mm0
225	movq	%mm3,48(%edi)
226	movq	%mm3,%mm7
227	movq	%mm1,-88(%ebp)
228	psrlq	$4,%mm3
229	movq	%mm4,40(%ebp)
230	shll	$4,%edx
231	movb	%dl,7(%esp)
232	movl	8(%ebx),%edx
233	movq	8(%ebx),%mm1
234	psllq	$60,%mm7
235	movq	(%ebx),%mm4
236	por	%mm7,%mm0
237	movq	%mm2,-72(%edi)
238	psrlq	$4,%mm2
239	movq	%mm5,56(%edi)
240	movq	%mm5,%mm6
241	movq	%mm0,-80(%ebp)
242	psrlq	$4,%mm5
243	movq	%mm3,48(%ebp)
244	shll	$4,%edx
245	movb	%dl,8(%esp)
246	movl	24(%ebx),%edx
247	movq	24(%ebx),%mm0
248	psllq	$60,%mm6
249	movq	16(%ebx),%mm3
250	por	%mm6,%mm2
251	movq	%mm1,-64(%edi)
252	psrlq	$4,%mm1
253	movq	%mm4,64(%edi)
254	movq	%mm4,%mm7
255	movq	%mm2,-72(%ebp)
256	psrlq	$4,%mm4
257	movq	%mm5,56(%ebp)
258	shll	$4,%edx
259	movb	%dl,9(%esp)
260	movl	40(%ebx),%edx
261	movq	40(%ebx),%mm2
262	psllq	$60,%mm7
263	movq	32(%ebx),%mm5
264	por	%mm7,%mm1
265	movq	%mm0,-56(%edi)
266	psrlq	$4,%mm0
267	movq	%mm3,72(%edi)
268	movq	%mm3,%mm6
269	movq	%mm1,-64(%ebp)
270	psrlq	$4,%mm3
271	movq	%mm4,64(%ebp)
272	shll	$4,%edx
273	movb	%dl,10(%esp)
274	movl	56(%ebx),%edx
275	movq	56(%ebx),%mm1
276	psllq	$60,%mm6
277	movq	48(%ebx),%mm4
278	por	%mm6,%mm0
279	movq	%mm2,-48(%edi)
280	psrlq	$4,%mm2
281	movq	%mm5,80(%edi)
282	movq	%mm5,%mm7
283	movq	%mm0,-56(%ebp)
284	psrlq	$4,%mm5
285	movq	%mm3,72(%ebp)
286	shll	$4,%edx
287	movb	%dl,11(%esp)
288	movl	72(%ebx),%edx
289	movq	72(%ebx),%mm0
290	psllq	$60,%mm7
291	movq	64(%ebx),%mm3
292	por	%mm7,%mm2
293	movq	%mm1,-40(%edi)
294	psrlq	$4,%mm1
295	movq	%mm4,88(%edi)
296	movq	%mm4,%mm6
297	movq	%mm2,-48(%ebp)
298	psrlq	$4,%mm4
299	movq	%mm5,80(%ebp)
300	shll	$4,%edx
301	movb	%dl,12(%esp)
302	movl	88(%ebx),%edx
303	movq	88(%ebx),%mm2
304	psllq	$60,%mm6
305	movq	80(%ebx),%mm5
306	por	%mm6,%mm1
307	movq	%mm0,-32(%edi)
308	psrlq	$4,%mm0
309	movq	%mm3,96(%edi)
310	movq	%mm3,%mm7
311	movq	%mm1,-40(%ebp)
312	psrlq	$4,%mm3
313	movq	%mm4,88(%ebp)
314	shll	$4,%edx
315	movb	%dl,13(%esp)
316	movl	104(%ebx),%edx
317	movq	104(%ebx),%mm1
318	psllq	$60,%mm7
319	movq	96(%ebx),%mm4
320	por	%mm7,%mm0
321	movq	%mm2,-24(%edi)
322	psrlq	$4,%mm2
323	movq	%mm5,104(%edi)
324	movq	%mm5,%mm6
325	movq	%mm0,-32(%ebp)
326	psrlq	$4,%mm5
327	movq	%mm3,96(%ebp)
328	shll	$4,%edx
329	movb	%dl,14(%esp)
330	movl	120(%ebx),%edx
331	movq	120(%ebx),%mm0
332	psllq	$60,%mm6
333	movq	112(%ebx),%mm3
334	por	%mm6,%mm2
335	movq	%mm1,-16(%edi)
336	psrlq	$4,%mm1
337	movq	%mm4,112(%edi)
338	movq	%mm4,%mm7
339	movq	%mm2,-24(%ebp)
340	psrlq	$4,%mm4
341	movq	%mm5,104(%ebp)
342	shll	$4,%edx
343	movb	%dl,15(%esp)
344	psllq	$60,%mm7
345	por	%mm7,%mm1
346	movq	%mm0,-8(%edi)
347	psrlq	$4,%mm0
348	movq	%mm3,120(%edi)
349	movq	%mm3,%mm6
350	movq	%mm1,-16(%ebp)
351	psrlq	$4,%mm3
352	movq	%mm4,112(%ebp)
353	psllq	$60,%mm6
354	por	%mm6,%mm0
355	movq	%mm0,-8(%ebp)
356	movq	%mm3,120(%ebp)
357	movq	(%eax),%mm6
358	movl	8(%eax),%ebx
359	movl	12(%eax),%edx
360.align	16
361.L004outer:
362	xorl	12(%ecx),%edx
363	xorl	8(%ecx),%ebx
364	pxor	(%ecx),%mm6
365	leal	16(%ecx),%ecx
366	movl	%ebx,536(%esp)
367	movq	%mm6,528(%esp)
368	movl	%ecx,548(%esp)
369	xorl	%eax,%eax
370	roll	$8,%edx
371	movb	%dl,%al
372	movl	%eax,%ebp
373	andb	$15,%al
374	shrl	$4,%ebp
375	pxor	%mm0,%mm0
376	roll	$8,%edx
377	pxor	%mm1,%mm1
378	pxor	%mm2,%mm2
379	movq	16(%esp,%eax,8),%mm7
380	movq	144(%esp,%eax,8),%mm6
381	movb	%dl,%al
382	movd	%mm7,%ebx
383	psrlq	$8,%mm7
384	movq	%mm6,%mm3
385	movl	%eax,%edi
386	psrlq	$8,%mm6
387	pxor	272(%esp,%ebp,8),%mm7
388	andb	$15,%al
389	psllq	$56,%mm3
390	shrl	$4,%edi
391	pxor	16(%esp,%eax,8),%mm7
392	roll	$8,%edx
393	pxor	144(%esp,%eax,8),%mm6
394	pxor	%mm3,%mm7
395	pxor	400(%esp,%ebp,8),%mm6
396	xorb	(%esp,%ebp,1),%bl
397	movb	%dl,%al
398	movd	%mm7,%ecx
399	movzbl	%bl,%ebx
400	psrlq	$8,%mm7
401	movq	%mm6,%mm3
402	movl	%eax,%ebp
403	psrlq	$8,%mm6
404	pxor	272(%esp,%edi,8),%mm7
405	andb	$15,%al
406	psllq	$56,%mm3
407	shrl	$4,%ebp
408	pinsrw	$2,(%esi,%ebx,2),%mm2
409	pxor	16(%esp,%eax,8),%mm7
410	roll	$8,%edx
411	pxor	144(%esp,%eax,8),%mm6
412	pxor	%mm3,%mm7
413	pxor	400(%esp,%edi,8),%mm6
414	xorb	(%esp,%edi,1),%cl
415	movb	%dl,%al
416	movl	536(%esp),%edx
417	movd	%mm7,%ebx
418	movzbl	%cl,%ecx
419	psrlq	$8,%mm7
420	movq	%mm6,%mm3
421	movl	%eax,%edi
422	psrlq	$8,%mm6
423	pxor	272(%esp,%ebp,8),%mm7
424	andb	$15,%al
425	psllq	$56,%mm3
426	pxor	%mm2,%mm6
427	shrl	$4,%edi
428	pinsrw	$2,(%esi,%ecx,2),%mm1
429	pxor	16(%esp,%eax,8),%mm7
430	roll	$8,%edx
431	pxor	144(%esp,%eax,8),%mm6
432	pxor	%mm3,%mm7
433	pxor	400(%esp,%ebp,8),%mm6
434	xorb	(%esp,%ebp,1),%bl
435	movb	%dl,%al
436	movd	%mm7,%ecx
437	movzbl	%bl,%ebx
438	psrlq	$8,%mm7
439	movq	%mm6,%mm3
440	movl	%eax,%ebp
441	psrlq	$8,%mm6
442	pxor	272(%esp,%edi,8),%mm7
443	andb	$15,%al
444	psllq	$56,%mm3
445	pxor	%mm1,%mm6
446	shrl	$4,%ebp
447	pinsrw	$2,(%esi,%ebx,2),%mm0
448	pxor	16(%esp,%eax,8),%mm7
449	roll	$8,%edx
450	pxor	144(%esp,%eax,8),%mm6
451	pxor	%mm3,%mm7
452	pxor	400(%esp,%edi,8),%mm6
453	xorb	(%esp,%edi,1),%cl
454	movb	%dl,%al
455	movd	%mm7,%ebx
456	movzbl	%cl,%ecx
457	psrlq	$8,%mm7
458	movq	%mm6,%mm3
459	movl	%eax,%edi
460	psrlq	$8,%mm6
461	pxor	272(%esp,%ebp,8),%mm7
462	andb	$15,%al
463	psllq	$56,%mm3
464	pxor	%mm0,%mm6
465	shrl	$4,%edi
466	pinsrw	$2,(%esi,%ecx,2),%mm2
467	pxor	16(%esp,%eax,8),%mm7
468	roll	$8,%edx
469	pxor	144(%esp,%eax,8),%mm6
470	pxor	%mm3,%mm7
471	pxor	400(%esp,%ebp,8),%mm6
472	xorb	(%esp,%ebp,1),%bl
473	movb	%dl,%al
474	movd	%mm7,%ecx
475	movzbl	%bl,%ebx
476	psrlq	$8,%mm7
477	movq	%mm6,%mm3
478	movl	%eax,%ebp
479	psrlq	$8,%mm6
480	pxor	272(%esp,%edi,8),%mm7
481	andb	$15,%al
482	psllq	$56,%mm3
483	pxor	%mm2,%mm6
484	shrl	$4,%ebp
485	pinsrw	$2,(%esi,%ebx,2),%mm1
486	pxor	16(%esp,%eax,8),%mm7
487	roll	$8,%edx
488	pxor	144(%esp,%eax,8),%mm6
489	pxor	%mm3,%mm7
490	pxor	400(%esp,%edi,8),%mm6
491	xorb	(%esp,%edi,1),%cl
492	movb	%dl,%al
493	movl	532(%esp),%edx
494	movd	%mm7,%ebx
495	movzbl	%cl,%ecx
496	psrlq	$8,%mm7
497	movq	%mm6,%mm3
498	movl	%eax,%edi
499	psrlq	$8,%mm6
500	pxor	272(%esp,%ebp,8),%mm7
501	andb	$15,%al
502	psllq	$56,%mm3
503	pxor	%mm1,%mm6
504	shrl	$4,%edi
505	pinsrw	$2,(%esi,%ecx,2),%mm0
506	pxor	16(%esp,%eax,8),%mm7
507	roll	$8,%edx
508	pxor	144(%esp,%eax,8),%mm6
509	pxor	%mm3,%mm7
510	pxor	400(%esp,%ebp,8),%mm6
511	xorb	(%esp,%ebp,1),%bl
512	movb	%dl,%al
513	movd	%mm7,%ecx
514	movzbl	%bl,%ebx
515	psrlq	$8,%mm7
516	movq	%mm6,%mm3
517	movl	%eax,%ebp
518	psrlq	$8,%mm6
519	pxor	272(%esp,%edi,8),%mm7
520	andb	$15,%al
521	psllq	$56,%mm3
522	pxor	%mm0,%mm6
523	shrl	$4,%ebp
524	pinsrw	$2,(%esi,%ebx,2),%mm2
525	pxor	16(%esp,%eax,8),%mm7
526	roll	$8,%edx
527	pxor	144(%esp,%eax,8),%mm6
528	pxor	%mm3,%mm7
529	pxor	400(%esp,%edi,8),%mm6
530	xorb	(%esp,%edi,1),%cl
531	movb	%dl,%al
532	movd	%mm7,%ebx
533	movzbl	%cl,%ecx
534	psrlq	$8,%mm7
535	movq	%mm6,%mm3
536	movl	%eax,%edi
537	psrlq	$8,%mm6
538	pxor	272(%esp,%ebp,8),%mm7
539	andb	$15,%al
540	psllq	$56,%mm3
541	pxor	%mm2,%mm6
542	shrl	$4,%edi
543	pinsrw	$2,(%esi,%ecx,2),%mm1
544	pxor	16(%esp,%eax,8),%mm7
545	roll	$8,%edx
546	pxor	144(%esp,%eax,8),%mm6
547	pxor	%mm3,%mm7
548	pxor	400(%esp,%ebp,8),%mm6
549	xorb	(%esp,%ebp,1),%bl
550	movb	%dl,%al
551	movd	%mm7,%ecx
552	movzbl	%bl,%ebx
553	psrlq	$8,%mm7
554	movq	%mm6,%mm3
555	movl	%eax,%ebp
556	psrlq	$8,%mm6
557	pxor	272(%esp,%edi,8),%mm7
558	andb	$15,%al
559	psllq	$56,%mm3
560	pxor	%mm1,%mm6
561	shrl	$4,%ebp
562	pinsrw	$2,(%esi,%ebx,2),%mm0
563	pxor	16(%esp,%eax,8),%mm7
564	roll	$8,%edx
565	pxor	144(%esp,%eax,8),%mm6
566	pxor	%mm3,%mm7
567	pxor	400(%esp,%edi,8),%mm6
568	xorb	(%esp,%edi,1),%cl
569	movb	%dl,%al
570	movl	528(%esp),%edx
571	movd	%mm7,%ebx
572	movzbl	%cl,%ecx
573	psrlq	$8,%mm7
574	movq	%mm6,%mm3
575	movl	%eax,%edi
576	psrlq	$8,%mm6
577	pxor	272(%esp,%ebp,8),%mm7
578	andb	$15,%al
579	psllq	$56,%mm3
580	pxor	%mm0,%mm6
581	shrl	$4,%edi
582	pinsrw	$2,(%esi,%ecx,2),%mm2
583	pxor	16(%esp,%eax,8),%mm7
584	roll	$8,%edx
585	pxor	144(%esp,%eax,8),%mm6
586	pxor	%mm3,%mm7
587	pxor	400(%esp,%ebp,8),%mm6
588	xorb	(%esp,%ebp,1),%bl
589	movb	%dl,%al
590	movd	%mm7,%ecx
591	movzbl	%bl,%ebx
592	psrlq	$8,%mm7
593	movq	%mm6,%mm3
594	movl	%eax,%ebp
595	psrlq	$8,%mm6
596	pxor	272(%esp,%edi,8),%mm7
597	andb	$15,%al
598	psllq	$56,%mm3
599	pxor	%mm2,%mm6
600	shrl	$4,%ebp
601	pinsrw	$2,(%esi,%ebx,2),%mm1
602	pxor	16(%esp,%eax,8),%mm7
603	roll	$8,%edx
604	pxor	144(%esp,%eax,8),%mm6
605	pxor	%mm3,%mm7
606	pxor	400(%esp,%edi,8),%mm6
607	xorb	(%esp,%edi,1),%cl
608	movb	%dl,%al
609	movd	%mm7,%ebx
610	movzbl	%cl,%ecx
611	psrlq	$8,%mm7
612	movq	%mm6,%mm3
613	movl	%eax,%edi
614	psrlq	$8,%mm6
615	pxor	272(%esp,%ebp,8),%mm7
616	andb	$15,%al
617	psllq	$56,%mm3
618	pxor	%mm1,%mm6
619	shrl	$4,%edi
620	pinsrw	$2,(%esi,%ecx,2),%mm0
621	pxor	16(%esp,%eax,8),%mm7
622	roll	$8,%edx
623	pxor	144(%esp,%eax,8),%mm6
624	pxor	%mm3,%mm7
625	pxor	400(%esp,%ebp,8),%mm6
626	xorb	(%esp,%ebp,1),%bl
627	movb	%dl,%al
628	movd	%mm7,%ecx
629	movzbl	%bl,%ebx
630	psrlq	$8,%mm7
631	movq	%mm6,%mm3
632	movl	%eax,%ebp
633	psrlq	$8,%mm6
634	pxor	272(%esp,%edi,8),%mm7
635	andb	$15,%al
636	psllq	$56,%mm3
637	pxor	%mm0,%mm6
638	shrl	$4,%ebp
639	pinsrw	$2,(%esi,%ebx,2),%mm2
640	pxor	16(%esp,%eax,8),%mm7
641	roll	$8,%edx
642	pxor	144(%esp,%eax,8),%mm6
643	pxor	%mm3,%mm7
644	pxor	400(%esp,%edi,8),%mm6
645	xorb	(%esp,%edi,1),%cl
646	movb	%dl,%al
647	movl	524(%esp),%edx
648	movd	%mm7,%ebx
649	movzbl	%cl,%ecx
650	psrlq	$8,%mm7
651	movq	%mm6,%mm3
652	movl	%eax,%edi
653	psrlq	$8,%mm6
654	pxor	272(%esp,%ebp,8),%mm7
655	andb	$15,%al
656	psllq	$56,%mm3
657	pxor	%mm2,%mm6
658	shrl	$4,%edi
659	pinsrw	$2,(%esi,%ecx,2),%mm1
660	pxor	16(%esp,%eax,8),%mm7
661	pxor	144(%esp,%eax,8),%mm6
662	xorb	(%esp,%ebp,1),%bl
663	pxor	%mm3,%mm7
664	pxor	400(%esp,%ebp,8),%mm6
665	movzbl	%bl,%ebx
666	pxor	%mm2,%mm2
667	psllq	$4,%mm1
668	movd	%mm7,%ecx
669	psrlq	$4,%mm7
670	movq	%mm6,%mm3
671	psrlq	$4,%mm6
672	shll	$4,%ecx
673	pxor	16(%esp,%edi,8),%mm7
674	psllq	$60,%mm3
675	movzbl	%cl,%ecx
676	pxor	%mm3,%mm7
677	pxor	144(%esp,%edi,8),%mm6
678	pinsrw	$2,(%esi,%ebx,2),%mm0
679	pxor	%mm1,%mm6
680	movd	%mm7,%edx
681	pinsrw	$3,(%esi,%ecx,2),%mm2
682	psllq	$12,%mm0
683	pxor	%mm0,%mm6
684	psrlq	$32,%mm7
685	pxor	%mm2,%mm6
686	movl	548(%esp),%ecx
687	movd	%mm7,%ebx
688	movq	%mm6,%mm3
689	psllw	$8,%mm6
690	psrlw	$8,%mm3
691	por	%mm3,%mm6
692	bswap	%edx
693	pshufw	$27,%mm6,%mm6
694	bswap	%ebx
695	cmpl	552(%esp),%ecx
696	jne	.L004outer
697	movl	544(%esp),%eax
698	movl	%edx,12(%eax)
699	movl	%ebx,8(%eax)
700	movq	%mm6,(%eax)
701	movl	556(%esp),%esp
702	emms
703	popl	%edi
704	popl	%esi
705	popl	%ebx
706	popl	%ebp
707	ret
708.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
709.globl	gcm_init_clmul
710.hidden	gcm_init_clmul
711.type	gcm_init_clmul,@function
712.align	16
713gcm_init_clmul:
714.L_gcm_init_clmul_begin:
715	movl	4(%esp),%edx
716	movl	8(%esp),%eax
717	call	.L005pic
718.L005pic:
719	popl	%ecx
720	leal	.Lbswap-.L005pic(%ecx),%ecx
721	movdqu	(%eax),%xmm2
722	pshufd	$78,%xmm2,%xmm2
723	pshufd	$255,%xmm2,%xmm4
724	movdqa	%xmm2,%xmm3
725	psllq	$1,%xmm2
726	pxor	%xmm5,%xmm5
727	psrlq	$63,%xmm3
728	pcmpgtd	%xmm4,%xmm5
729	pslldq	$8,%xmm3
730	por	%xmm3,%xmm2
731	pand	16(%ecx),%xmm5
732	pxor	%xmm5,%xmm2
733	movdqa	%xmm2,%xmm0
734	movdqa	%xmm0,%xmm1
735	pshufd	$78,%xmm0,%xmm3
736	pshufd	$78,%xmm2,%xmm4
737	pxor	%xmm0,%xmm3
738	pxor	%xmm2,%xmm4
739.byte	102,15,58,68,194,0
740.byte	102,15,58,68,202,17
741.byte	102,15,58,68,220,0
742	xorps	%xmm0,%xmm3
743	xorps	%xmm1,%xmm3
744	movdqa	%xmm3,%xmm4
745	psrldq	$8,%xmm3
746	pslldq	$8,%xmm4
747	pxor	%xmm3,%xmm1
748	pxor	%xmm4,%xmm0
749	movdqa	%xmm0,%xmm4
750	movdqa	%xmm0,%xmm3
751	psllq	$5,%xmm0
752	pxor	%xmm0,%xmm3
753	psllq	$1,%xmm0
754	pxor	%xmm3,%xmm0
755	psllq	$57,%xmm0
756	movdqa	%xmm0,%xmm3
757	pslldq	$8,%xmm0
758	psrldq	$8,%xmm3
759	pxor	%xmm4,%xmm0
760	pxor	%xmm3,%xmm1
761	movdqa	%xmm0,%xmm4
762	psrlq	$1,%xmm0
763	pxor	%xmm4,%xmm1
764	pxor	%xmm0,%xmm4
765	psrlq	$5,%xmm0
766	pxor	%xmm4,%xmm0
767	psrlq	$1,%xmm0
768	pxor	%xmm1,%xmm0
769	pshufd	$78,%xmm2,%xmm3
770	pshufd	$78,%xmm0,%xmm4
771	pxor	%xmm2,%xmm3
772	movdqu	%xmm2,(%edx)
773	pxor	%xmm0,%xmm4
774	movdqu	%xmm0,16(%edx)
775.byte	102,15,58,15,227,8
776	movdqu	%xmm4,32(%edx)
777	ret
778.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
779.globl	gcm_gmult_clmul
780.hidden	gcm_gmult_clmul
781.type	gcm_gmult_clmul,@function
782.align	16
783gcm_gmult_clmul:
784.L_gcm_gmult_clmul_begin:
785	movl	4(%esp),%eax
786	movl	8(%esp),%edx
787	call	.L006pic
788.L006pic:
789	popl	%ecx
790	leal	.Lbswap-.L006pic(%ecx),%ecx
791	movdqu	(%eax),%xmm0
792	movdqa	(%ecx),%xmm5
793	movups	(%edx),%xmm2
794.byte	102,15,56,0,197
795	movups	32(%edx),%xmm4
796	movdqa	%xmm0,%xmm1
797	pshufd	$78,%xmm0,%xmm3
798	pxor	%xmm0,%xmm3
799.byte	102,15,58,68,194,0
800.byte	102,15,58,68,202,17
801.byte	102,15,58,68,220,0
802	xorps	%xmm0,%xmm3
803	xorps	%xmm1,%xmm3
804	movdqa	%xmm3,%xmm4
805	psrldq	$8,%xmm3
806	pslldq	$8,%xmm4
807	pxor	%xmm3,%xmm1
808	pxor	%xmm4,%xmm0
809	movdqa	%xmm0,%xmm4
810	movdqa	%xmm0,%xmm3
811	psllq	$5,%xmm0
812	pxor	%xmm0,%xmm3
813	psllq	$1,%xmm0
814	pxor	%xmm3,%xmm0
815	psllq	$57,%xmm0
816	movdqa	%xmm0,%xmm3
817	pslldq	$8,%xmm0
818	psrldq	$8,%xmm3
819	pxor	%xmm4,%xmm0
820	pxor	%xmm3,%xmm1
821	movdqa	%xmm0,%xmm4
822	psrlq	$1,%xmm0
823	pxor	%xmm4,%xmm1
824	pxor	%xmm0,%xmm4
825	psrlq	$5,%xmm0
826	pxor	%xmm4,%xmm0
827	psrlq	$1,%xmm0
828	pxor	%xmm1,%xmm0
829.byte	102,15,56,0,197
830	movdqu	%xmm0,(%eax)
831	ret
832.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
833.globl	gcm_ghash_clmul
834.hidden	gcm_ghash_clmul
835.type	gcm_ghash_clmul,@function
836.align	16
837gcm_ghash_clmul:
838.L_gcm_ghash_clmul_begin:
839	pushl	%ebp
840	pushl	%ebx
841	pushl	%esi
842	pushl	%edi
843	movl	20(%esp),%eax
844	movl	24(%esp),%edx
845	movl	28(%esp),%esi
846	movl	32(%esp),%ebx
847	call	.L007pic
848.L007pic:
849	popl	%ecx
850	leal	.Lbswap-.L007pic(%ecx),%ecx
851	movdqu	(%eax),%xmm0
852	movdqa	(%ecx),%xmm5
853	movdqu	(%edx),%xmm2
854.byte	102,15,56,0,197
855	subl	$16,%ebx
856	jz	.L008odd_tail
857	movdqu	(%esi),%xmm3
858	movdqu	16(%esi),%xmm6
859.byte	102,15,56,0,221
860.byte	102,15,56,0,245
861	movdqu	32(%edx),%xmm5
862	pxor	%xmm3,%xmm0
863	pshufd	$78,%xmm6,%xmm3
864	movdqa	%xmm6,%xmm7
865	pxor	%xmm6,%xmm3
866	leal	32(%esi),%esi
867.byte	102,15,58,68,242,0
868.byte	102,15,58,68,250,17
869.byte	102,15,58,68,221,0
870	movups	16(%edx),%xmm2
871	nop
872	subl	$32,%ebx
873	jbe	.L009even_tail
874	jmp	.L010mod_loop
875.align	32
876.L010mod_loop:
877	pshufd	$78,%xmm0,%xmm4
878	movdqa	%xmm0,%xmm1
879	pxor	%xmm0,%xmm4
880	nop
881.byte	102,15,58,68,194,0
882.byte	102,15,58,68,202,17
883.byte	102,15,58,68,229,16
884	movups	(%edx),%xmm2
885	xorps	%xmm6,%xmm0
886	movdqa	(%ecx),%xmm5
887	xorps	%xmm7,%xmm1
888	movdqu	(%esi),%xmm7
889	pxor	%xmm0,%xmm3
890	movdqu	16(%esi),%xmm6
891	pxor	%xmm1,%xmm3
892.byte	102,15,56,0,253
893	pxor	%xmm3,%xmm4
894	movdqa	%xmm4,%xmm3
895	psrldq	$8,%xmm4
896	pslldq	$8,%xmm3
897	pxor	%xmm4,%xmm1
898	pxor	%xmm3,%xmm0
899.byte	102,15,56,0,245
900	pxor	%xmm7,%xmm1
901	movdqa	%xmm6,%xmm7
902	movdqa	%xmm0,%xmm4
903	movdqa	%xmm0,%xmm3
904	psllq	$5,%xmm0
905	pxor	%xmm0,%xmm3
906	psllq	$1,%xmm0
907	pxor	%xmm3,%xmm0
908.byte	102,15,58,68,242,0
909	movups	32(%edx),%xmm5
910	psllq	$57,%xmm0
911	movdqa	%xmm0,%xmm3
912	pslldq	$8,%xmm0
913	psrldq	$8,%xmm3
914	pxor	%xmm4,%xmm0
915	pxor	%xmm3,%xmm1
916	pshufd	$78,%xmm7,%xmm3
917	movdqa	%xmm0,%xmm4
918	psrlq	$1,%xmm0
919	pxor	%xmm7,%xmm3
920	pxor	%xmm4,%xmm1
921.byte	102,15,58,68,250,17
922	movups	16(%edx),%xmm2
923	pxor	%xmm0,%xmm4
924	psrlq	$5,%xmm0
925	pxor	%xmm4,%xmm0
926	psrlq	$1,%xmm0
927	pxor	%xmm1,%xmm0
928.byte	102,15,58,68,221,0
929	leal	32(%esi),%esi
930	subl	$32,%ebx
931	ja	.L010mod_loop
932.L009even_tail:
933	pshufd	$78,%xmm0,%xmm4
934	movdqa	%xmm0,%xmm1
935	pxor	%xmm0,%xmm4
936.byte	102,15,58,68,194,0
937.byte	102,15,58,68,202,17
938.byte	102,15,58,68,229,16
939	movdqa	(%ecx),%xmm5
940	xorps	%xmm6,%xmm0
941	xorps	%xmm7,%xmm1
942	pxor	%xmm0,%xmm3
943	pxor	%xmm1,%xmm3
944	pxor	%xmm3,%xmm4
945	movdqa	%xmm4,%xmm3
946	psrldq	$8,%xmm4
947	pslldq	$8,%xmm3
948	pxor	%xmm4,%xmm1
949	pxor	%xmm3,%xmm0
950	movdqa	%xmm0,%xmm4
951	movdqa	%xmm0,%xmm3
952	psllq	$5,%xmm0
953	pxor	%xmm0,%xmm3
954	psllq	$1,%xmm0
955	pxor	%xmm3,%xmm0
956	psllq	$57,%xmm0
957	movdqa	%xmm0,%xmm3
958	pslldq	$8,%xmm0
959	psrldq	$8,%xmm3
960	pxor	%xmm4,%xmm0
961	pxor	%xmm3,%xmm1
962	movdqa	%xmm0,%xmm4
963	psrlq	$1,%xmm0
964	pxor	%xmm4,%xmm1
965	pxor	%xmm0,%xmm4
966	psrlq	$5,%xmm0
967	pxor	%xmm4,%xmm0
968	psrlq	$1,%xmm0
969	pxor	%xmm1,%xmm0
970	testl	%ebx,%ebx
971	jnz	.L011done
972	movups	(%edx),%xmm2
973.L008odd_tail:
974	movdqu	(%esi),%xmm3
975.byte	102,15,56,0,221
976	pxor	%xmm3,%xmm0
977	movdqa	%xmm0,%xmm1
978	pshufd	$78,%xmm0,%xmm3
979	pshufd	$78,%xmm2,%xmm4
980	pxor	%xmm0,%xmm3
981	pxor	%xmm2,%xmm4
982.byte	102,15,58,68,194,0
983.byte	102,15,58,68,202,17
984.byte	102,15,58,68,220,0
985	xorps	%xmm0,%xmm3
986	xorps	%xmm1,%xmm3
987	movdqa	%xmm3,%xmm4
988	psrldq	$8,%xmm3
989	pslldq	$8,%xmm4
990	pxor	%xmm3,%xmm1
991	pxor	%xmm4,%xmm0
992	movdqa	%xmm0,%xmm4
993	movdqa	%xmm0,%xmm3
994	psllq	$5,%xmm0
995	pxor	%xmm0,%xmm3
996	psllq	$1,%xmm0
997	pxor	%xmm3,%xmm0
998	psllq	$57,%xmm0
999	movdqa	%xmm0,%xmm3
1000	pslldq	$8,%xmm0
1001	psrldq	$8,%xmm3
1002	pxor	%xmm4,%xmm0
1003	pxor	%xmm3,%xmm1
1004	movdqa	%xmm0,%xmm4
1005	psrlq	$1,%xmm0
1006	pxor	%xmm4,%xmm1
1007	pxor	%xmm0,%xmm4
1008	psrlq	$5,%xmm0
1009	pxor	%xmm4,%xmm0
1010	psrlq	$1,%xmm0
1011	pxor	%xmm1,%xmm0
1012.L011done:
1013.byte	102,15,56,0,197
1014	movdqu	%xmm0,(%eax)
1015	popl	%edi
1016	popl	%esi
1017	popl	%ebx
1018	popl	%ebp
1019	ret
1020.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
1021.align	64
1022.Lbswap:
1023.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1024.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1025.align	64
1026.Lrem_8bit:
1027.value	0,450,900,582,1800,1738,1164,1358
1028.value	3600,4050,3476,3158,2328,2266,2716,2910
1029.value	7200,7650,8100,7782,6952,6890,6316,6510
1030.value	4656,5106,4532,4214,5432,5370,5820,6014
1031.value	14400,14722,15300,14854,16200,16010,15564,15630
1032.value	13904,14226,13780,13334,12632,12442,13020,13086
1033.value	9312,9634,10212,9766,9064,8874,8428,8494
1034.value	10864,11186,10740,10294,11640,11450,12028,12094
1035.value	28800,28994,29444,29382,30600,30282,29708,30158
1036.value	32400,32594,32020,31958,31128,30810,31260,31710
1037.value	27808,28002,28452,28390,27560,27242,26668,27118
1038.value	25264,25458,24884,24822,26040,25722,26172,26622
1039.value	18624,18690,19268,19078,20424,19978,19532,19854
1040.value	18128,18194,17748,17558,16856,16410,16988,17310
1041.value	21728,21794,22372,22182,21480,21034,20588,20910
1042.value	23280,23346,22900,22710,24056,23610,24188,24510
1043.value	57600,57538,57988,58182,58888,59338,58764,58446
1044.value	61200,61138,60564,60758,59416,59866,60316,59998
1045.value	64800,64738,65188,65382,64040,64490,63916,63598
1046.value	62256,62194,61620,61814,62520,62970,63420,63102
1047.value	55616,55426,56004,56070,56904,57226,56780,56334
1048.value	55120,54930,54484,54550,53336,53658,54236,53790
1049.value	50528,50338,50916,50982,49768,50090,49644,49198
1050.value	52080,51890,51444,51510,52344,52666,53244,52798
1051.value	37248,36930,37380,37830,38536,38730,38156,38094
1052.value	40848,40530,39956,40406,39064,39258,39708,39646
1053.value	36256,35938,36388,36838,35496,35690,35116,35054
1054.value	33712,33394,32820,33270,33976,34170,34620,34558
1055.value	43456,43010,43588,43910,44744,44810,44364,44174
1056.value	42960,42514,42068,42390,41176,41242,41820,41630
1057.value	46560,46114,46692,47014,45800,45866,45420,45230
1058.value	48112,47666,47220,47542,48376,48442,49020,48830
1059.align	64
1060.Lrem_4bit:
1061.long	0,0,0,471859200,0,943718400,0,610271232
1062.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1063.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1064.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1065.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1066.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1067.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
1068.byte	0
1069#endif
1070