• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__,obj
8section	code	use32 class=code align=64
9%elifidn __OUTPUT_FORMAT__,win32
10%ifdef __YASM_VERSION_ID__
11%if __YASM_VERSION_ID__ < 01010000h
12%error yasm version 1.1.0 or later needed.
13%endif
14; Yasm automatically includes .00 and complains about redefining it.
15; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
16%else
17$@feat.00 equ 1
18%endif
19section	.text	code align=64
20%else
21section	.text	code
22%endif
23global	_gcm_gmult_4bit_mmx
24align	16
25_gcm_gmult_4bit_mmx:
26L$_gcm_gmult_4bit_mmx_begin:
27	push	ebp
28	push	ebx
29	push	esi
30	push	edi
31	mov	edi,DWORD [20+esp]
32	mov	esi,DWORD [24+esp]
33	call	L$000pic_point
34L$000pic_point:
35	pop	eax
36	lea	eax,[(L$rem_4bit-L$000pic_point)+eax]
37	movzx	ebx,BYTE [15+edi]
38	xor	ecx,ecx
39	mov	edx,ebx
40	mov	cl,dl
41	mov	ebp,14
42	shl	cl,4
43	and	edx,240
44	movq	mm0,[8+ecx*1+esi]
45	movq	mm1,[ecx*1+esi]
46	movd	ebx,mm0
47	jmp	NEAR L$001mmx_loop
48align	16
49L$001mmx_loop:
50	psrlq	mm0,4
51	and	ebx,15
52	movq	mm2,mm1
53	psrlq	mm1,4
54	pxor	mm0,[8+edx*1+esi]
55	mov	cl,BYTE [ebp*1+edi]
56	psllq	mm2,60
57	pxor	mm1,[ebx*8+eax]
58	dec	ebp
59	movd	ebx,mm0
60	pxor	mm1,[edx*1+esi]
61	mov	edx,ecx
62	pxor	mm0,mm2
63	js	NEAR L$002mmx_break
64	shl	cl,4
65	and	ebx,15
66	psrlq	mm0,4
67	and	edx,240
68	movq	mm2,mm1
69	psrlq	mm1,4
70	pxor	mm0,[8+ecx*1+esi]
71	psllq	mm2,60
72	pxor	mm1,[ebx*8+eax]
73	movd	ebx,mm0
74	pxor	mm1,[ecx*1+esi]
75	pxor	mm0,mm2
76	jmp	NEAR L$001mmx_loop
77align	16
78L$002mmx_break:
79	shl	cl,4
80	and	ebx,15
81	psrlq	mm0,4
82	and	edx,240
83	movq	mm2,mm1
84	psrlq	mm1,4
85	pxor	mm0,[8+ecx*1+esi]
86	psllq	mm2,60
87	pxor	mm1,[ebx*8+eax]
88	movd	ebx,mm0
89	pxor	mm1,[ecx*1+esi]
90	pxor	mm0,mm2
91	psrlq	mm0,4
92	and	ebx,15
93	movq	mm2,mm1
94	psrlq	mm1,4
95	pxor	mm0,[8+edx*1+esi]
96	psllq	mm2,60
97	pxor	mm1,[ebx*8+eax]
98	movd	ebx,mm0
99	pxor	mm1,[edx*1+esi]
100	pxor	mm0,mm2
101	psrlq	mm0,32
102	movd	edx,mm1
103	psrlq	mm1,32
104	movd	ecx,mm0
105	movd	ebp,mm1
106	bswap	ebx
107	bswap	edx
108	bswap	ecx
109	bswap	ebp
110	emms
111	mov	DWORD [12+edi],ebx
112	mov	DWORD [4+edi],edx
113	mov	DWORD [8+edi],ecx
114	mov	DWORD [edi],ebp
115	pop	edi
116	pop	esi
117	pop	ebx
118	pop	ebp
119	ret
120global	_gcm_ghash_4bit_mmx
121align	16
122_gcm_ghash_4bit_mmx:
123L$_gcm_ghash_4bit_mmx_begin:
124	push	ebp
125	push	ebx
126	push	esi
127	push	edi
128	mov	eax,DWORD [20+esp]
129	mov	ebx,DWORD [24+esp]
130	mov	ecx,DWORD [28+esp]
131	mov	edx,DWORD [32+esp]
132	mov	ebp,esp
133	call	L$003pic_point
134L$003pic_point:
135	pop	esi
136	lea	esi,[(L$rem_8bit-L$003pic_point)+esi]
137	sub	esp,544
138	and	esp,-64
139	sub	esp,16
140	add	edx,ecx
141	mov	DWORD [544+esp],eax
142	mov	DWORD [552+esp],edx
143	mov	DWORD [556+esp],ebp
144	add	ebx,128
145	lea	edi,[144+esp]
146	lea	ebp,[400+esp]
147	mov	edx,DWORD [ebx-120]
148	movq	mm0,[ebx-120]
149	movq	mm3,[ebx-128]
150	shl	edx,4
151	mov	BYTE [esp],dl
152	mov	edx,DWORD [ebx-104]
153	movq	mm2,[ebx-104]
154	movq	mm5,[ebx-112]
155	movq	[edi-128],mm0
156	psrlq	mm0,4
157	movq	[edi],mm3
158	movq	mm7,mm3
159	psrlq	mm3,4
160	shl	edx,4
161	mov	BYTE [1+esp],dl
162	mov	edx,DWORD [ebx-88]
163	movq	mm1,[ebx-88]
164	psllq	mm7,60
165	movq	mm4,[ebx-96]
166	por	mm0,mm7
167	movq	[edi-120],mm2
168	psrlq	mm2,4
169	movq	[8+edi],mm5
170	movq	mm6,mm5
171	movq	[ebp-128],mm0
172	psrlq	mm5,4
173	movq	[ebp],mm3
174	shl	edx,4
175	mov	BYTE [2+esp],dl
176	mov	edx,DWORD [ebx-72]
177	movq	mm0,[ebx-72]
178	psllq	mm6,60
179	movq	mm3,[ebx-80]
180	por	mm2,mm6
181	movq	[edi-112],mm1
182	psrlq	mm1,4
183	movq	[16+edi],mm4
184	movq	mm7,mm4
185	movq	[ebp-120],mm2
186	psrlq	mm4,4
187	movq	[8+ebp],mm5
188	shl	edx,4
189	mov	BYTE [3+esp],dl
190	mov	edx,DWORD [ebx-56]
191	movq	mm2,[ebx-56]
192	psllq	mm7,60
193	movq	mm5,[ebx-64]
194	por	mm1,mm7
195	movq	[edi-104],mm0
196	psrlq	mm0,4
197	movq	[24+edi],mm3
198	movq	mm6,mm3
199	movq	[ebp-112],mm1
200	psrlq	mm3,4
201	movq	[16+ebp],mm4
202	shl	edx,4
203	mov	BYTE [4+esp],dl
204	mov	edx,DWORD [ebx-40]
205	movq	mm1,[ebx-40]
206	psllq	mm6,60
207	movq	mm4,[ebx-48]
208	por	mm0,mm6
209	movq	[edi-96],mm2
210	psrlq	mm2,4
211	movq	[32+edi],mm5
212	movq	mm7,mm5
213	movq	[ebp-104],mm0
214	psrlq	mm5,4
215	movq	[24+ebp],mm3
216	shl	edx,4
217	mov	BYTE [5+esp],dl
218	mov	edx,DWORD [ebx-24]
219	movq	mm0,[ebx-24]
220	psllq	mm7,60
221	movq	mm3,[ebx-32]
222	por	mm2,mm7
223	movq	[edi-88],mm1
224	psrlq	mm1,4
225	movq	[40+edi],mm4
226	movq	mm6,mm4
227	movq	[ebp-96],mm2
228	psrlq	mm4,4
229	movq	[32+ebp],mm5
230	shl	edx,4
231	mov	BYTE [6+esp],dl
232	mov	edx,DWORD [ebx-8]
233	movq	mm2,[ebx-8]
234	psllq	mm6,60
235	movq	mm5,[ebx-16]
236	por	mm1,mm6
237	movq	[edi-80],mm0
238	psrlq	mm0,4
239	movq	[48+edi],mm3
240	movq	mm7,mm3
241	movq	[ebp-88],mm1
242	psrlq	mm3,4
243	movq	[40+ebp],mm4
244	shl	edx,4
245	mov	BYTE [7+esp],dl
246	mov	edx,DWORD [8+ebx]
247	movq	mm1,[8+ebx]
248	psllq	mm7,60
249	movq	mm4,[ebx]
250	por	mm0,mm7
251	movq	[edi-72],mm2
252	psrlq	mm2,4
253	movq	[56+edi],mm5
254	movq	mm6,mm5
255	movq	[ebp-80],mm0
256	psrlq	mm5,4
257	movq	[48+ebp],mm3
258	shl	edx,4
259	mov	BYTE [8+esp],dl
260	mov	edx,DWORD [24+ebx]
261	movq	mm0,[24+ebx]
262	psllq	mm6,60
263	movq	mm3,[16+ebx]
264	por	mm2,mm6
265	movq	[edi-64],mm1
266	psrlq	mm1,4
267	movq	[64+edi],mm4
268	movq	mm7,mm4
269	movq	[ebp-72],mm2
270	psrlq	mm4,4
271	movq	[56+ebp],mm5
272	shl	edx,4
273	mov	BYTE [9+esp],dl
274	mov	edx,DWORD [40+ebx]
275	movq	mm2,[40+ebx]
276	psllq	mm7,60
277	movq	mm5,[32+ebx]
278	por	mm1,mm7
279	movq	[edi-56],mm0
280	psrlq	mm0,4
281	movq	[72+edi],mm3
282	movq	mm6,mm3
283	movq	[ebp-64],mm1
284	psrlq	mm3,4
285	movq	[64+ebp],mm4
286	shl	edx,4
287	mov	BYTE [10+esp],dl
288	mov	edx,DWORD [56+ebx]
289	movq	mm1,[56+ebx]
290	psllq	mm6,60
291	movq	mm4,[48+ebx]
292	por	mm0,mm6
293	movq	[edi-48],mm2
294	psrlq	mm2,4
295	movq	[80+edi],mm5
296	movq	mm7,mm5
297	movq	[ebp-56],mm0
298	psrlq	mm5,4
299	movq	[72+ebp],mm3
300	shl	edx,4
301	mov	BYTE [11+esp],dl
302	mov	edx,DWORD [72+ebx]
303	movq	mm0,[72+ebx]
304	psllq	mm7,60
305	movq	mm3,[64+ebx]
306	por	mm2,mm7
307	movq	[edi-40],mm1
308	psrlq	mm1,4
309	movq	[88+edi],mm4
310	movq	mm6,mm4
311	movq	[ebp-48],mm2
312	psrlq	mm4,4
313	movq	[80+ebp],mm5
314	shl	edx,4
315	mov	BYTE [12+esp],dl
316	mov	edx,DWORD [88+ebx]
317	movq	mm2,[88+ebx]
318	psllq	mm6,60
319	movq	mm5,[80+ebx]
320	por	mm1,mm6
321	movq	[edi-32],mm0
322	psrlq	mm0,4
323	movq	[96+edi],mm3
324	movq	mm7,mm3
325	movq	[ebp-40],mm1
326	psrlq	mm3,4
327	movq	[88+ebp],mm4
328	shl	edx,4
329	mov	BYTE [13+esp],dl
330	mov	edx,DWORD [104+ebx]
331	movq	mm1,[104+ebx]
332	psllq	mm7,60
333	movq	mm4,[96+ebx]
334	por	mm0,mm7
335	movq	[edi-24],mm2
336	psrlq	mm2,4
337	movq	[104+edi],mm5
338	movq	mm6,mm5
339	movq	[ebp-32],mm0
340	psrlq	mm5,4
341	movq	[96+ebp],mm3
342	shl	edx,4
343	mov	BYTE [14+esp],dl
344	mov	edx,DWORD [120+ebx]
345	movq	mm0,[120+ebx]
346	psllq	mm6,60
347	movq	mm3,[112+ebx]
348	por	mm2,mm6
349	movq	[edi-16],mm1
350	psrlq	mm1,4
351	movq	[112+edi],mm4
352	movq	mm7,mm4
353	movq	[ebp-24],mm2
354	psrlq	mm4,4
355	movq	[104+ebp],mm5
356	shl	edx,4
357	mov	BYTE [15+esp],dl
358	psllq	mm7,60
359	por	mm1,mm7
360	movq	[edi-8],mm0
361	psrlq	mm0,4
362	movq	[120+edi],mm3
363	movq	mm6,mm3
364	movq	[ebp-16],mm1
365	psrlq	mm3,4
366	movq	[112+ebp],mm4
367	psllq	mm6,60
368	por	mm0,mm6
369	movq	[ebp-8],mm0
370	movq	[120+ebp],mm3
371	movq	mm6,[eax]
372	mov	ebx,DWORD [8+eax]
373	mov	edx,DWORD [12+eax]
374align	16
375L$004outer:
376	xor	edx,DWORD [12+ecx]
377	xor	ebx,DWORD [8+ecx]
378	pxor	mm6,[ecx]
379	lea	ecx,[16+ecx]
380	mov	DWORD [536+esp],ebx
381	movq	[528+esp],mm6
382	mov	DWORD [548+esp],ecx
383	xor	eax,eax
384	rol	edx,8
385	mov	al,dl
386	mov	ebp,eax
387	and	al,15
388	shr	ebp,4
389	pxor	mm0,mm0
390	rol	edx,8
391	pxor	mm1,mm1
392	pxor	mm2,mm2
393	movq	mm7,[16+eax*8+esp]
394	movq	mm6,[144+eax*8+esp]
395	mov	al,dl
396	movd	ebx,mm7
397	psrlq	mm7,8
398	movq	mm3,mm6
399	mov	edi,eax
400	psrlq	mm6,8
401	pxor	mm7,[272+ebp*8+esp]
402	and	al,15
403	psllq	mm3,56
404	shr	edi,4
405	pxor	mm7,[16+eax*8+esp]
406	rol	edx,8
407	pxor	mm6,[144+eax*8+esp]
408	pxor	mm7,mm3
409	pxor	mm6,[400+ebp*8+esp]
410	xor	bl,BYTE [ebp*1+esp]
411	mov	al,dl
412	movd	ecx,mm7
413	movzx	ebx,bl
414	psrlq	mm7,8
415	movq	mm3,mm6
416	mov	ebp,eax
417	psrlq	mm6,8
418	pxor	mm7,[272+edi*8+esp]
419	and	al,15
420	psllq	mm3,56
421	shr	ebp,4
422	pinsrw	mm2,WORD [ebx*2+esi],2
423	pxor	mm7,[16+eax*8+esp]
424	rol	edx,8
425	pxor	mm6,[144+eax*8+esp]
426	pxor	mm7,mm3
427	pxor	mm6,[400+edi*8+esp]
428	xor	cl,BYTE [edi*1+esp]
429	mov	al,dl
430	mov	edx,DWORD [536+esp]
431	movd	ebx,mm7
432	movzx	ecx,cl
433	psrlq	mm7,8
434	movq	mm3,mm6
435	mov	edi,eax
436	psrlq	mm6,8
437	pxor	mm7,[272+ebp*8+esp]
438	and	al,15
439	psllq	mm3,56
440	pxor	mm6,mm2
441	shr	edi,4
442	pinsrw	mm1,WORD [ecx*2+esi],2
443	pxor	mm7,[16+eax*8+esp]
444	rol	edx,8
445	pxor	mm6,[144+eax*8+esp]
446	pxor	mm7,mm3
447	pxor	mm6,[400+ebp*8+esp]
448	xor	bl,BYTE [ebp*1+esp]
449	mov	al,dl
450	movd	ecx,mm7
451	movzx	ebx,bl
452	psrlq	mm7,8
453	movq	mm3,mm6
454	mov	ebp,eax
455	psrlq	mm6,8
456	pxor	mm7,[272+edi*8+esp]
457	and	al,15
458	psllq	mm3,56
459	pxor	mm6,mm1
460	shr	ebp,4
461	pinsrw	mm0,WORD [ebx*2+esi],2
462	pxor	mm7,[16+eax*8+esp]
463	rol	edx,8
464	pxor	mm6,[144+eax*8+esp]
465	pxor	mm7,mm3
466	pxor	mm6,[400+edi*8+esp]
467	xor	cl,BYTE [edi*1+esp]
468	mov	al,dl
469	movd	ebx,mm7
470	movzx	ecx,cl
471	psrlq	mm7,8
472	movq	mm3,mm6
473	mov	edi,eax
474	psrlq	mm6,8
475	pxor	mm7,[272+ebp*8+esp]
476	and	al,15
477	psllq	mm3,56
478	pxor	mm6,mm0
479	shr	edi,4
480	pinsrw	mm2,WORD [ecx*2+esi],2
481	pxor	mm7,[16+eax*8+esp]
482	rol	edx,8
483	pxor	mm6,[144+eax*8+esp]
484	pxor	mm7,mm3
485	pxor	mm6,[400+ebp*8+esp]
486	xor	bl,BYTE [ebp*1+esp]
487	mov	al,dl
488	movd	ecx,mm7
489	movzx	ebx,bl
490	psrlq	mm7,8
491	movq	mm3,mm6
492	mov	ebp,eax
493	psrlq	mm6,8
494	pxor	mm7,[272+edi*8+esp]
495	and	al,15
496	psllq	mm3,56
497	pxor	mm6,mm2
498	shr	ebp,4
499	pinsrw	mm1,WORD [ebx*2+esi],2
500	pxor	mm7,[16+eax*8+esp]
501	rol	edx,8
502	pxor	mm6,[144+eax*8+esp]
503	pxor	mm7,mm3
504	pxor	mm6,[400+edi*8+esp]
505	xor	cl,BYTE [edi*1+esp]
506	mov	al,dl
507	mov	edx,DWORD [532+esp]
508	movd	ebx,mm7
509	movzx	ecx,cl
510	psrlq	mm7,8
511	movq	mm3,mm6
512	mov	edi,eax
513	psrlq	mm6,8
514	pxor	mm7,[272+ebp*8+esp]
515	and	al,15
516	psllq	mm3,56
517	pxor	mm6,mm1
518	shr	edi,4
519	pinsrw	mm0,WORD [ecx*2+esi],2
520	pxor	mm7,[16+eax*8+esp]
521	rol	edx,8
522	pxor	mm6,[144+eax*8+esp]
523	pxor	mm7,mm3
524	pxor	mm6,[400+ebp*8+esp]
525	xor	bl,BYTE [ebp*1+esp]
526	mov	al,dl
527	movd	ecx,mm7
528	movzx	ebx,bl
529	psrlq	mm7,8
530	movq	mm3,mm6
531	mov	ebp,eax
532	psrlq	mm6,8
533	pxor	mm7,[272+edi*8+esp]
534	and	al,15
535	psllq	mm3,56
536	pxor	mm6,mm0
537	shr	ebp,4
538	pinsrw	mm2,WORD [ebx*2+esi],2
539	pxor	mm7,[16+eax*8+esp]
540	rol	edx,8
541	pxor	mm6,[144+eax*8+esp]
542	pxor	mm7,mm3
543	pxor	mm6,[400+edi*8+esp]
544	xor	cl,BYTE [edi*1+esp]
545	mov	al,dl
546	movd	ebx,mm7
547	movzx	ecx,cl
548	psrlq	mm7,8
549	movq	mm3,mm6
550	mov	edi,eax
551	psrlq	mm6,8
552	pxor	mm7,[272+ebp*8+esp]
553	and	al,15
554	psllq	mm3,56
555	pxor	mm6,mm2
556	shr	edi,4
557	pinsrw	mm1,WORD [ecx*2+esi],2
558	pxor	mm7,[16+eax*8+esp]
559	rol	edx,8
560	pxor	mm6,[144+eax*8+esp]
561	pxor	mm7,mm3
562	pxor	mm6,[400+ebp*8+esp]
563	xor	bl,BYTE [ebp*1+esp]
564	mov	al,dl
565	movd	ecx,mm7
566	movzx	ebx,bl
567	psrlq	mm7,8
568	movq	mm3,mm6
569	mov	ebp,eax
570	psrlq	mm6,8
571	pxor	mm7,[272+edi*8+esp]
572	and	al,15
573	psllq	mm3,56
574	pxor	mm6,mm1
575	shr	ebp,4
576	pinsrw	mm0,WORD [ebx*2+esi],2
577	pxor	mm7,[16+eax*8+esp]
578	rol	edx,8
579	pxor	mm6,[144+eax*8+esp]
580	pxor	mm7,mm3
581	pxor	mm6,[400+edi*8+esp]
582	xor	cl,BYTE [edi*1+esp]
583	mov	al,dl
584	mov	edx,DWORD [528+esp]
585	movd	ebx,mm7
586	movzx	ecx,cl
587	psrlq	mm7,8
588	movq	mm3,mm6
589	mov	edi,eax
590	psrlq	mm6,8
591	pxor	mm7,[272+ebp*8+esp]
592	and	al,15
593	psllq	mm3,56
594	pxor	mm6,mm0
595	shr	edi,4
596	pinsrw	mm2,WORD [ecx*2+esi],2
597	pxor	mm7,[16+eax*8+esp]
598	rol	edx,8
599	pxor	mm6,[144+eax*8+esp]
600	pxor	mm7,mm3
601	pxor	mm6,[400+ebp*8+esp]
602	xor	bl,BYTE [ebp*1+esp]
603	mov	al,dl
604	movd	ecx,mm7
605	movzx	ebx,bl
606	psrlq	mm7,8
607	movq	mm3,mm6
608	mov	ebp,eax
609	psrlq	mm6,8
610	pxor	mm7,[272+edi*8+esp]
611	and	al,15
612	psllq	mm3,56
613	pxor	mm6,mm2
614	shr	ebp,4
615	pinsrw	mm1,WORD [ebx*2+esi],2
616	pxor	mm7,[16+eax*8+esp]
617	rol	edx,8
618	pxor	mm6,[144+eax*8+esp]
619	pxor	mm7,mm3
620	pxor	mm6,[400+edi*8+esp]
621	xor	cl,BYTE [edi*1+esp]
622	mov	al,dl
623	movd	ebx,mm7
624	movzx	ecx,cl
625	psrlq	mm7,8
626	movq	mm3,mm6
627	mov	edi,eax
628	psrlq	mm6,8
629	pxor	mm7,[272+ebp*8+esp]
630	and	al,15
631	psllq	mm3,56
632	pxor	mm6,mm1
633	shr	edi,4
634	pinsrw	mm0,WORD [ecx*2+esi],2
635	pxor	mm7,[16+eax*8+esp]
636	rol	edx,8
637	pxor	mm6,[144+eax*8+esp]
638	pxor	mm7,mm3
639	pxor	mm6,[400+ebp*8+esp]
640	xor	bl,BYTE [ebp*1+esp]
641	mov	al,dl
642	movd	ecx,mm7
643	movzx	ebx,bl
644	psrlq	mm7,8
645	movq	mm3,mm6
646	mov	ebp,eax
647	psrlq	mm6,8
648	pxor	mm7,[272+edi*8+esp]
649	and	al,15
650	psllq	mm3,56
651	pxor	mm6,mm0
652	shr	ebp,4
653	pinsrw	mm2,WORD [ebx*2+esi],2
654	pxor	mm7,[16+eax*8+esp]
655	rol	edx,8
656	pxor	mm6,[144+eax*8+esp]
657	pxor	mm7,mm3
658	pxor	mm6,[400+edi*8+esp]
659	xor	cl,BYTE [edi*1+esp]
660	mov	al,dl
661	mov	edx,DWORD [524+esp]
662	movd	ebx,mm7
663	movzx	ecx,cl
664	psrlq	mm7,8
665	movq	mm3,mm6
666	mov	edi,eax
667	psrlq	mm6,8
668	pxor	mm7,[272+ebp*8+esp]
669	and	al,15
670	psllq	mm3,56
671	pxor	mm6,mm2
672	shr	edi,4
673	pinsrw	mm1,WORD [ecx*2+esi],2
674	pxor	mm7,[16+eax*8+esp]
675	pxor	mm6,[144+eax*8+esp]
676	xor	bl,BYTE [ebp*1+esp]
677	pxor	mm7,mm3
678	pxor	mm6,[400+ebp*8+esp]
679	movzx	ebx,bl
680	pxor	mm2,mm2
681	psllq	mm1,4
682	movd	ecx,mm7
683	psrlq	mm7,4
684	movq	mm3,mm6
685	psrlq	mm6,4
686	shl	ecx,4
687	pxor	mm7,[16+edi*8+esp]
688	psllq	mm3,60
689	movzx	ecx,cl
690	pxor	mm7,mm3
691	pxor	mm6,[144+edi*8+esp]
692	pinsrw	mm0,WORD [ebx*2+esi],2
693	pxor	mm6,mm1
694	movd	edx,mm7
695	pinsrw	mm2,WORD [ecx*2+esi],3
696	psllq	mm0,12
697	pxor	mm6,mm0
698	psrlq	mm7,32
699	pxor	mm6,mm2
700	mov	ecx,DWORD [548+esp]
701	movd	ebx,mm7
702	movq	mm3,mm6
703	psllw	mm6,8
704	psrlw	mm3,8
705	por	mm6,mm3
706	bswap	edx
707	pshufw	mm6,mm6,27
708	bswap	ebx
709	cmp	ecx,DWORD [552+esp]
710	jne	NEAR L$004outer
711	mov	eax,DWORD [544+esp]
712	mov	DWORD [12+eax],edx
713	mov	DWORD [8+eax],ebx
714	movq	[eax],mm6
715	mov	esp,DWORD [556+esp]
716	emms
717	pop	edi
718	pop	esi
719	pop	ebx
720	pop	ebp
721	ret
722global	_gcm_init_clmul
723align	16
724_gcm_init_clmul:
725L$_gcm_init_clmul_begin:
726	mov	edx,DWORD [4+esp]
727	mov	eax,DWORD [8+esp]
728	call	L$005pic
729L$005pic:
730	pop	ecx
731	lea	ecx,[(L$bswap-L$005pic)+ecx]
732	movdqu	xmm2,[eax]
733	pshufd	xmm2,xmm2,78
734	pshufd	xmm4,xmm2,255
735	movdqa	xmm3,xmm2
736	psllq	xmm2,1
737	pxor	xmm5,xmm5
738	psrlq	xmm3,63
739	pcmpgtd	xmm5,xmm4
740	pslldq	xmm3,8
741	por	xmm2,xmm3
742	pand	xmm5,[16+ecx]
743	pxor	xmm2,xmm5
744	movdqa	xmm0,xmm2
745	movdqa	xmm1,xmm0
746	pshufd	xmm3,xmm0,78
747	pshufd	xmm4,xmm2,78
748	pxor	xmm3,xmm0
749	pxor	xmm4,xmm2
750db	102,15,58,68,194,0
751db	102,15,58,68,202,17
752db	102,15,58,68,220,0
753	xorps	xmm3,xmm0
754	xorps	xmm3,xmm1
755	movdqa	xmm4,xmm3
756	psrldq	xmm3,8
757	pslldq	xmm4,8
758	pxor	xmm1,xmm3
759	pxor	xmm0,xmm4
760	movdqa	xmm4,xmm0
761	movdqa	xmm3,xmm0
762	psllq	xmm0,5
763	pxor	xmm3,xmm0
764	psllq	xmm0,1
765	pxor	xmm0,xmm3
766	psllq	xmm0,57
767	movdqa	xmm3,xmm0
768	pslldq	xmm0,8
769	psrldq	xmm3,8
770	pxor	xmm0,xmm4
771	pxor	xmm1,xmm3
772	movdqa	xmm4,xmm0
773	psrlq	xmm0,1
774	pxor	xmm1,xmm4
775	pxor	xmm4,xmm0
776	psrlq	xmm0,5
777	pxor	xmm0,xmm4
778	psrlq	xmm0,1
779	pxor	xmm0,xmm1
780	pshufd	xmm3,xmm2,78
781	pshufd	xmm4,xmm0,78
782	pxor	xmm3,xmm2
783	movdqu	[edx],xmm2
784	pxor	xmm4,xmm0
785	movdqu	[16+edx],xmm0
786db	102,15,58,15,227,8
787	movdqu	[32+edx],xmm4
788	ret
789global	_gcm_gmult_clmul
790align	16
791_gcm_gmult_clmul:
792L$_gcm_gmult_clmul_begin:
793	mov	eax,DWORD [4+esp]
794	mov	edx,DWORD [8+esp]
795	call	L$006pic
796L$006pic:
797	pop	ecx
798	lea	ecx,[(L$bswap-L$006pic)+ecx]
799	movdqu	xmm0,[eax]
800	movdqa	xmm5,[ecx]
801	movups	xmm2,[edx]
802db	102,15,56,0,197
803	movups	xmm4,[32+edx]
804	movdqa	xmm1,xmm0
805	pshufd	xmm3,xmm0,78
806	pxor	xmm3,xmm0
807db	102,15,58,68,194,0
808db	102,15,58,68,202,17
809db	102,15,58,68,220,0
810	xorps	xmm3,xmm0
811	xorps	xmm3,xmm1
812	movdqa	xmm4,xmm3
813	psrldq	xmm3,8
814	pslldq	xmm4,8
815	pxor	xmm1,xmm3
816	pxor	xmm0,xmm4
817	movdqa	xmm4,xmm0
818	movdqa	xmm3,xmm0
819	psllq	xmm0,5
820	pxor	xmm3,xmm0
821	psllq	xmm0,1
822	pxor	xmm0,xmm3
823	psllq	xmm0,57
824	movdqa	xmm3,xmm0
825	pslldq	xmm0,8
826	psrldq	xmm3,8
827	pxor	xmm0,xmm4
828	pxor	xmm1,xmm3
829	movdqa	xmm4,xmm0
830	psrlq	xmm0,1
831	pxor	xmm1,xmm4
832	pxor	xmm4,xmm0
833	psrlq	xmm0,5
834	pxor	xmm0,xmm4
835	psrlq	xmm0,1
836	pxor	xmm0,xmm1
837db	102,15,56,0,197
838	movdqu	[eax],xmm0
839	ret
840global	_gcm_ghash_clmul
841align	16
842_gcm_ghash_clmul:
843L$_gcm_ghash_clmul_begin:
844	push	ebp
845	push	ebx
846	push	esi
847	push	edi
848	mov	eax,DWORD [20+esp]
849	mov	edx,DWORD [24+esp]
850	mov	esi,DWORD [28+esp]
851	mov	ebx,DWORD [32+esp]
852	call	L$007pic
853L$007pic:
854	pop	ecx
855	lea	ecx,[(L$bswap-L$007pic)+ecx]
856	movdqu	xmm0,[eax]
857	movdqa	xmm5,[ecx]
858	movdqu	xmm2,[edx]
859db	102,15,56,0,197
860	sub	ebx,16
861	jz	NEAR L$008odd_tail
862	movdqu	xmm3,[esi]
863	movdqu	xmm6,[16+esi]
864db	102,15,56,0,221
865db	102,15,56,0,245
866	movdqu	xmm5,[32+edx]
867	pxor	xmm0,xmm3
868	pshufd	xmm3,xmm6,78
869	movdqa	xmm7,xmm6
870	pxor	xmm3,xmm6
871	lea	esi,[32+esi]
872db	102,15,58,68,242,0
873db	102,15,58,68,250,17
874db	102,15,58,68,221,0
875	movups	xmm2,[16+edx]
876	nop
877	sub	ebx,32
878	jbe	NEAR L$009even_tail
879	jmp	NEAR L$010mod_loop
880align	32
881L$010mod_loop:
882	pshufd	xmm4,xmm0,78
883	movdqa	xmm1,xmm0
884	pxor	xmm4,xmm0
885	nop
886db	102,15,58,68,194,0
887db	102,15,58,68,202,17
888db	102,15,58,68,229,16
889	movups	xmm2,[edx]
890	xorps	xmm0,xmm6
891	movdqa	xmm5,[ecx]
892	xorps	xmm1,xmm7
893	movdqu	xmm7,[esi]
894	pxor	xmm3,xmm0
895	movdqu	xmm6,[16+esi]
896	pxor	xmm3,xmm1
897db	102,15,56,0,253
898	pxor	xmm4,xmm3
899	movdqa	xmm3,xmm4
900	psrldq	xmm4,8
901	pslldq	xmm3,8
902	pxor	xmm1,xmm4
903	pxor	xmm0,xmm3
904db	102,15,56,0,245
905	pxor	xmm1,xmm7
906	movdqa	xmm7,xmm6
907	movdqa	xmm4,xmm0
908	movdqa	xmm3,xmm0
909	psllq	xmm0,5
910	pxor	xmm3,xmm0
911	psllq	xmm0,1
912	pxor	xmm0,xmm3
913db	102,15,58,68,242,0
914	movups	xmm5,[32+edx]
915	psllq	xmm0,57
916	movdqa	xmm3,xmm0
917	pslldq	xmm0,8
918	psrldq	xmm3,8
919	pxor	xmm0,xmm4
920	pxor	xmm1,xmm3
921	pshufd	xmm3,xmm7,78
922	movdqa	xmm4,xmm0
923	psrlq	xmm0,1
924	pxor	xmm3,xmm7
925	pxor	xmm1,xmm4
926db	102,15,58,68,250,17
927	movups	xmm2,[16+edx]
928	pxor	xmm4,xmm0
929	psrlq	xmm0,5
930	pxor	xmm0,xmm4
931	psrlq	xmm0,1
932	pxor	xmm0,xmm1
933db	102,15,58,68,221,0
934	lea	esi,[32+esi]
935	sub	ebx,32
936	ja	NEAR L$010mod_loop
937L$009even_tail:
938	pshufd	xmm4,xmm0,78
939	movdqa	xmm1,xmm0
940	pxor	xmm4,xmm0
941db	102,15,58,68,194,0
942db	102,15,58,68,202,17
943db	102,15,58,68,229,16
944	movdqa	xmm5,[ecx]
945	xorps	xmm0,xmm6
946	xorps	xmm1,xmm7
947	pxor	xmm3,xmm0
948	pxor	xmm3,xmm1
949	pxor	xmm4,xmm3
950	movdqa	xmm3,xmm4
951	psrldq	xmm4,8
952	pslldq	xmm3,8
953	pxor	xmm1,xmm4
954	pxor	xmm0,xmm3
955	movdqa	xmm4,xmm0
956	movdqa	xmm3,xmm0
957	psllq	xmm0,5
958	pxor	xmm3,xmm0
959	psllq	xmm0,1
960	pxor	xmm0,xmm3
961	psllq	xmm0,57
962	movdqa	xmm3,xmm0
963	pslldq	xmm0,8
964	psrldq	xmm3,8
965	pxor	xmm0,xmm4
966	pxor	xmm1,xmm3
967	movdqa	xmm4,xmm0
968	psrlq	xmm0,1
969	pxor	xmm1,xmm4
970	pxor	xmm4,xmm0
971	psrlq	xmm0,5
972	pxor	xmm0,xmm4
973	psrlq	xmm0,1
974	pxor	xmm0,xmm1
975	test	ebx,ebx
976	jnz	NEAR L$011done
977	movups	xmm2,[edx]
978L$008odd_tail:
979	movdqu	xmm3,[esi]
980db	102,15,56,0,221
981	pxor	xmm0,xmm3
982	movdqa	xmm1,xmm0
983	pshufd	xmm3,xmm0,78
984	pshufd	xmm4,xmm2,78
985	pxor	xmm3,xmm0
986	pxor	xmm4,xmm2
987db	102,15,58,68,194,0
988db	102,15,58,68,202,17
989db	102,15,58,68,220,0
990	xorps	xmm3,xmm0
991	xorps	xmm3,xmm1
992	movdqa	xmm4,xmm3
993	psrldq	xmm3,8
994	pslldq	xmm4,8
995	pxor	xmm1,xmm3
996	pxor	xmm0,xmm4
997	movdqa	xmm4,xmm0
998	movdqa	xmm3,xmm0
999	psllq	xmm0,5
1000	pxor	xmm3,xmm0
1001	psllq	xmm0,1
1002	pxor	xmm0,xmm3
1003	psllq	xmm0,57
1004	movdqa	xmm3,xmm0
1005	pslldq	xmm0,8
1006	psrldq	xmm3,8
1007	pxor	xmm0,xmm4
1008	pxor	xmm1,xmm3
1009	movdqa	xmm4,xmm0
1010	psrlq	xmm0,1
1011	pxor	xmm1,xmm4
1012	pxor	xmm4,xmm0
1013	psrlq	xmm0,5
1014	pxor	xmm0,xmm4
1015	psrlq	xmm0,1
1016	pxor	xmm0,xmm1
1017L$011done:
1018db	102,15,56,0,197
1019	movdqu	[eax],xmm0
1020	pop	edi
1021	pop	esi
1022	pop	ebx
1023	pop	ebp
1024	ret
1025align	64
1026L$bswap:
1027db	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1028db	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1029align	64
1030L$rem_8bit:
1031dw	0,450,900,582,1800,1738,1164,1358
1032dw	3600,4050,3476,3158,2328,2266,2716,2910
1033dw	7200,7650,8100,7782,6952,6890,6316,6510
1034dw	4656,5106,4532,4214,5432,5370,5820,6014
1035dw	14400,14722,15300,14854,16200,16010,15564,15630
1036dw	13904,14226,13780,13334,12632,12442,13020,13086
1037dw	9312,9634,10212,9766,9064,8874,8428,8494
1038dw	10864,11186,10740,10294,11640,11450,12028,12094
1039dw	28800,28994,29444,29382,30600,30282,29708,30158
1040dw	32400,32594,32020,31958,31128,30810,31260,31710
1041dw	27808,28002,28452,28390,27560,27242,26668,27118
1042dw	25264,25458,24884,24822,26040,25722,26172,26622
1043dw	18624,18690,19268,19078,20424,19978,19532,19854
1044dw	18128,18194,17748,17558,16856,16410,16988,17310
1045dw	21728,21794,22372,22182,21480,21034,20588,20910
1046dw	23280,23346,22900,22710,24056,23610,24188,24510
1047dw	57600,57538,57988,58182,58888,59338,58764,58446
1048dw	61200,61138,60564,60758,59416,59866,60316,59998
1049dw	64800,64738,65188,65382,64040,64490,63916,63598
1050dw	62256,62194,61620,61814,62520,62970,63420,63102
1051dw	55616,55426,56004,56070,56904,57226,56780,56334
1052dw	55120,54930,54484,54550,53336,53658,54236,53790
1053dw	50528,50338,50916,50982,49768,50090,49644,49198
1054dw	52080,51890,51444,51510,52344,52666,53244,52798
1055dw	37248,36930,37380,37830,38536,38730,38156,38094
1056dw	40848,40530,39956,40406,39064,39258,39708,39646
1057dw	36256,35938,36388,36838,35496,35690,35116,35054
1058dw	33712,33394,32820,33270,33976,34170,34620,34558
1059dw	43456,43010,43588,43910,44744,44810,44364,44174
1060dw	42960,42514,42068,42390,41176,41242,41820,41630
1061dw	46560,46114,46692,47014,45800,45866,45420,45230
1062dw	48112,47666,47220,47542,48376,48442,49020,48830
1063align	64
1064L$rem_4bit:
1065dd	0,0,0,471859200,0,943718400,0,610271232
1066dd	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1067dd	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1068dd	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1069db	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1070db	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1071db	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
1072db	0
1073