• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__, win32
8%ifidn __OUTPUT_FORMAT__,obj
9section	code	use32 class=code align=64
10%elifidn __OUTPUT_FORMAT__,win32
11$@feat.00 equ 1
12section	.text	code align=64
13%else
14section	.text	code
15%endif
16global	_ChaCha20_ctr32
17align	16
18_ChaCha20_ctr32:
19L$_ChaCha20_ctr32_begin:
20	push	ebp
21	push	ebx
22	push	esi
23	push	edi
24	xor	eax,eax
25	cmp	eax,DWORD [28+esp]
26	je	NEAR L$000no_data
27	call	L$pic_point
28L$pic_point:
29	pop	eax
30	lea	ebp,[_OPENSSL_ia32cap_P]
31	test	DWORD [ebp],16777216
32	jz	NEAR L$001x86
33	test	DWORD [4+ebp],512
34	jz	NEAR L$001x86
35	jmp	NEAR L$ssse3_shortcut
36L$001x86:
37	mov	esi,DWORD [32+esp]
38	mov	edi,DWORD [36+esp]
39	sub	esp,132
40	mov	eax,DWORD [esi]
41	mov	ebx,DWORD [4+esi]
42	mov	ecx,DWORD [8+esi]
43	mov	edx,DWORD [12+esi]
44	mov	DWORD [80+esp],eax
45	mov	DWORD [84+esp],ebx
46	mov	DWORD [88+esp],ecx
47	mov	DWORD [92+esp],edx
48	mov	eax,DWORD [16+esi]
49	mov	ebx,DWORD [20+esi]
50	mov	ecx,DWORD [24+esi]
51	mov	edx,DWORD [28+esi]
52	mov	DWORD [96+esp],eax
53	mov	DWORD [100+esp],ebx
54	mov	DWORD [104+esp],ecx
55	mov	DWORD [108+esp],edx
56	mov	eax,DWORD [edi]
57	mov	ebx,DWORD [4+edi]
58	mov	ecx,DWORD [8+edi]
59	mov	edx,DWORD [12+edi]
60	sub	eax,1
61	mov	DWORD [112+esp],eax
62	mov	DWORD [116+esp],ebx
63	mov	DWORD [120+esp],ecx
64	mov	DWORD [124+esp],edx
65	jmp	NEAR L$002entry
66align	16
67L$003outer_loop:
68	mov	DWORD [156+esp],ebx
69	mov	DWORD [152+esp],eax
70	mov	DWORD [160+esp],ecx
71L$002entry:
72	mov	eax,1634760805
73	mov	DWORD [4+esp],857760878
74	mov	DWORD [8+esp],2036477234
75	mov	DWORD [12+esp],1797285236
76	mov	ebx,DWORD [84+esp]
77	mov	ebp,DWORD [88+esp]
78	mov	ecx,DWORD [104+esp]
79	mov	esi,DWORD [108+esp]
80	mov	edx,DWORD [116+esp]
81	mov	edi,DWORD [120+esp]
82	mov	DWORD [20+esp],ebx
83	mov	DWORD [24+esp],ebp
84	mov	DWORD [40+esp],ecx
85	mov	DWORD [44+esp],esi
86	mov	DWORD [52+esp],edx
87	mov	DWORD [56+esp],edi
88	mov	ebx,DWORD [92+esp]
89	mov	edi,DWORD [124+esp]
90	mov	edx,DWORD [112+esp]
91	mov	ebp,DWORD [80+esp]
92	mov	ecx,DWORD [96+esp]
93	mov	esi,DWORD [100+esp]
94	add	edx,1
95	mov	DWORD [28+esp],ebx
96	mov	DWORD [60+esp],edi
97	mov	DWORD [112+esp],edx
98	mov	ebx,10
99	jmp	NEAR L$004loop
100align	16
101L$004loop:
102	add	eax,ebp
103	mov	DWORD [128+esp],ebx
104	mov	ebx,ebp
105	xor	edx,eax
106	rol	edx,16
107	add	ecx,edx
108	xor	ebx,ecx
109	mov	edi,DWORD [52+esp]
110	rol	ebx,12
111	mov	ebp,DWORD [20+esp]
112	add	eax,ebx
113	xor	edx,eax
114	mov	DWORD [esp],eax
115	rol	edx,8
116	mov	eax,DWORD [4+esp]
117	add	ecx,edx
118	mov	DWORD [48+esp],edx
119	xor	ebx,ecx
120	add	eax,ebp
121	rol	ebx,7
122	xor	edi,eax
123	mov	DWORD [32+esp],ecx
124	rol	edi,16
125	mov	DWORD [16+esp],ebx
126	add	esi,edi
127	mov	ecx,DWORD [40+esp]
128	xor	ebp,esi
129	mov	edx,DWORD [56+esp]
130	rol	ebp,12
131	mov	ebx,DWORD [24+esp]
132	add	eax,ebp
133	xor	edi,eax
134	mov	DWORD [4+esp],eax
135	rol	edi,8
136	mov	eax,DWORD [8+esp]
137	add	esi,edi
138	mov	DWORD [52+esp],edi
139	xor	ebp,esi
140	add	eax,ebx
141	rol	ebp,7
142	xor	edx,eax
143	mov	DWORD [36+esp],esi
144	rol	edx,16
145	mov	DWORD [20+esp],ebp
146	add	ecx,edx
147	mov	esi,DWORD [44+esp]
148	xor	ebx,ecx
149	mov	edi,DWORD [60+esp]
150	rol	ebx,12
151	mov	ebp,DWORD [28+esp]
152	add	eax,ebx
153	xor	edx,eax
154	mov	DWORD [8+esp],eax
155	rol	edx,8
156	mov	eax,DWORD [12+esp]
157	add	ecx,edx
158	mov	DWORD [56+esp],edx
159	xor	ebx,ecx
160	add	eax,ebp
161	rol	ebx,7
162	xor	edi,eax
163	rol	edi,16
164	mov	DWORD [24+esp],ebx
165	add	esi,edi
166	xor	ebp,esi
167	rol	ebp,12
168	mov	ebx,DWORD [20+esp]
169	add	eax,ebp
170	xor	edi,eax
171	mov	DWORD [12+esp],eax
172	rol	edi,8
173	mov	eax,DWORD [esp]
174	add	esi,edi
175	mov	edx,edi
176	xor	ebp,esi
177	add	eax,ebx
178	rol	ebp,7
179	xor	edx,eax
180	rol	edx,16
181	mov	DWORD [28+esp],ebp
182	add	ecx,edx
183	xor	ebx,ecx
184	mov	edi,DWORD [48+esp]
185	rol	ebx,12
186	mov	ebp,DWORD [24+esp]
187	add	eax,ebx
188	xor	edx,eax
189	mov	DWORD [esp],eax
190	rol	edx,8
191	mov	eax,DWORD [4+esp]
192	add	ecx,edx
193	mov	DWORD [60+esp],edx
194	xor	ebx,ecx
195	add	eax,ebp
196	rol	ebx,7
197	xor	edi,eax
198	mov	DWORD [40+esp],ecx
199	rol	edi,16
200	mov	DWORD [20+esp],ebx
201	add	esi,edi
202	mov	ecx,DWORD [32+esp]
203	xor	ebp,esi
204	mov	edx,DWORD [52+esp]
205	rol	ebp,12
206	mov	ebx,DWORD [28+esp]
207	add	eax,ebp
208	xor	edi,eax
209	mov	DWORD [4+esp],eax
210	rol	edi,8
211	mov	eax,DWORD [8+esp]
212	add	esi,edi
213	mov	DWORD [48+esp],edi
214	xor	ebp,esi
215	add	eax,ebx
216	rol	ebp,7
217	xor	edx,eax
218	mov	DWORD [44+esp],esi
219	rol	edx,16
220	mov	DWORD [24+esp],ebp
221	add	ecx,edx
222	mov	esi,DWORD [36+esp]
223	xor	ebx,ecx
224	mov	edi,DWORD [56+esp]
225	rol	ebx,12
226	mov	ebp,DWORD [16+esp]
227	add	eax,ebx
228	xor	edx,eax
229	mov	DWORD [8+esp],eax
230	rol	edx,8
231	mov	eax,DWORD [12+esp]
232	add	ecx,edx
233	mov	DWORD [52+esp],edx
234	xor	ebx,ecx
235	add	eax,ebp
236	rol	ebx,7
237	xor	edi,eax
238	rol	edi,16
239	mov	DWORD [28+esp],ebx
240	add	esi,edi
241	xor	ebp,esi
242	mov	edx,DWORD [48+esp]
243	rol	ebp,12
244	mov	ebx,DWORD [128+esp]
245	add	eax,ebp
246	xor	edi,eax
247	mov	DWORD [12+esp],eax
248	rol	edi,8
249	mov	eax,DWORD [esp]
250	add	esi,edi
251	mov	DWORD [56+esp],edi
252	xor	ebp,esi
253	rol	ebp,7
254	dec	ebx
255	jnz	NEAR L$004loop
256	mov	ebx,DWORD [160+esp]
257	add	eax,1634760805
258	add	ebp,DWORD [80+esp]
259	add	ecx,DWORD [96+esp]
260	add	esi,DWORD [100+esp]
261	cmp	ebx,64
262	jb	NEAR L$005tail
263	mov	ebx,DWORD [156+esp]
264	add	edx,DWORD [112+esp]
265	add	edi,DWORD [120+esp]
266	xor	eax,DWORD [ebx]
267	xor	ebp,DWORD [16+ebx]
268	mov	DWORD [esp],eax
269	mov	eax,DWORD [152+esp]
270	xor	ecx,DWORD [32+ebx]
271	xor	esi,DWORD [36+ebx]
272	xor	edx,DWORD [48+ebx]
273	xor	edi,DWORD [56+ebx]
274	mov	DWORD [16+eax],ebp
275	mov	DWORD [32+eax],ecx
276	mov	DWORD [36+eax],esi
277	mov	DWORD [48+eax],edx
278	mov	DWORD [56+eax],edi
279	mov	ebp,DWORD [4+esp]
280	mov	ecx,DWORD [8+esp]
281	mov	esi,DWORD [12+esp]
282	mov	edx,DWORD [20+esp]
283	mov	edi,DWORD [24+esp]
284	add	ebp,857760878
285	add	ecx,2036477234
286	add	esi,1797285236
287	add	edx,DWORD [84+esp]
288	add	edi,DWORD [88+esp]
289	xor	ebp,DWORD [4+ebx]
290	xor	ecx,DWORD [8+ebx]
291	xor	esi,DWORD [12+ebx]
292	xor	edx,DWORD [20+ebx]
293	xor	edi,DWORD [24+ebx]
294	mov	DWORD [4+eax],ebp
295	mov	DWORD [8+eax],ecx
296	mov	DWORD [12+eax],esi
297	mov	DWORD [20+eax],edx
298	mov	DWORD [24+eax],edi
299	mov	ebp,DWORD [28+esp]
300	mov	ecx,DWORD [40+esp]
301	mov	esi,DWORD [44+esp]
302	mov	edx,DWORD [52+esp]
303	mov	edi,DWORD [60+esp]
304	add	ebp,DWORD [92+esp]
305	add	ecx,DWORD [104+esp]
306	add	esi,DWORD [108+esp]
307	add	edx,DWORD [116+esp]
308	add	edi,DWORD [124+esp]
309	xor	ebp,DWORD [28+ebx]
310	xor	ecx,DWORD [40+ebx]
311	xor	esi,DWORD [44+ebx]
312	xor	edx,DWORD [52+ebx]
313	xor	edi,DWORD [60+ebx]
314	lea	ebx,[64+ebx]
315	mov	DWORD [28+eax],ebp
316	mov	ebp,DWORD [esp]
317	mov	DWORD [40+eax],ecx
318	mov	ecx,DWORD [160+esp]
319	mov	DWORD [44+eax],esi
320	mov	DWORD [52+eax],edx
321	mov	DWORD [60+eax],edi
322	mov	DWORD [eax],ebp
323	lea	eax,[64+eax]
324	sub	ecx,64
325	jnz	NEAR L$003outer_loop
326	jmp	NEAR L$006done
327L$005tail:
328	add	edx,DWORD [112+esp]
329	add	edi,DWORD [120+esp]
330	mov	DWORD [esp],eax
331	mov	DWORD [16+esp],ebp
332	mov	DWORD [32+esp],ecx
333	mov	DWORD [36+esp],esi
334	mov	DWORD [48+esp],edx
335	mov	DWORD [56+esp],edi
336	mov	ebp,DWORD [4+esp]
337	mov	ecx,DWORD [8+esp]
338	mov	esi,DWORD [12+esp]
339	mov	edx,DWORD [20+esp]
340	mov	edi,DWORD [24+esp]
341	add	ebp,857760878
342	add	ecx,2036477234
343	add	esi,1797285236
344	add	edx,DWORD [84+esp]
345	add	edi,DWORD [88+esp]
346	mov	DWORD [4+esp],ebp
347	mov	DWORD [8+esp],ecx
348	mov	DWORD [12+esp],esi
349	mov	DWORD [20+esp],edx
350	mov	DWORD [24+esp],edi
351	mov	ebp,DWORD [28+esp]
352	mov	ecx,DWORD [40+esp]
353	mov	esi,DWORD [44+esp]
354	mov	edx,DWORD [52+esp]
355	mov	edi,DWORD [60+esp]
356	add	ebp,DWORD [92+esp]
357	add	ecx,DWORD [104+esp]
358	add	esi,DWORD [108+esp]
359	add	edx,DWORD [116+esp]
360	add	edi,DWORD [124+esp]
361	mov	DWORD [28+esp],ebp
362	mov	ebp,DWORD [156+esp]
363	mov	DWORD [40+esp],ecx
364	mov	ecx,DWORD [152+esp]
365	mov	DWORD [44+esp],esi
366	xor	esi,esi
367	mov	DWORD [52+esp],edx
368	mov	DWORD [60+esp],edi
369	xor	eax,eax
370	xor	edx,edx
371L$007tail_loop:
372	mov	al,BYTE [ebp*1+esi]
373	mov	dl,BYTE [esi*1+esp]
374	lea	esi,[1+esi]
375	xor	al,dl
376	mov	BYTE [esi*1+ecx-1],al
377	dec	ebx
378	jnz	NEAR L$007tail_loop
379L$006done:
380	add	esp,132
381L$000no_data:
382	pop	edi
383	pop	esi
384	pop	ebx
385	pop	ebp
386	ret
387global	_ChaCha20_ssse3
388align	16
389_ChaCha20_ssse3:
390L$_ChaCha20_ssse3_begin:
391	push	ebp
392	push	ebx
393	push	esi
394	push	edi
395L$ssse3_shortcut:
396	mov	edi,DWORD [20+esp]
397	mov	esi,DWORD [24+esp]
398	mov	ecx,DWORD [28+esp]
399	mov	edx,DWORD [32+esp]
400	mov	ebx,DWORD [36+esp]
401	mov	ebp,esp
402	sub	esp,524
403	and	esp,-64
404	mov	DWORD [512+esp],ebp
405	lea	eax,[(L$ssse3_data-L$pic_point)+eax]
406	movdqu	xmm3,[ebx]
407	cmp	ecx,256
408	jb	NEAR L$0081x
409	mov	DWORD [516+esp],edx
410	mov	DWORD [520+esp],ebx
411	sub	ecx,256
412	lea	ebp,[384+esp]
413	movdqu	xmm7,[edx]
414	pshufd	xmm0,xmm3,0
415	pshufd	xmm1,xmm3,85
416	pshufd	xmm2,xmm3,170
417	pshufd	xmm3,xmm3,255
418	paddd	xmm0,[48+eax]
419	pshufd	xmm4,xmm7,0
420	pshufd	xmm5,xmm7,85
421	psubd	xmm0,[64+eax]
422	pshufd	xmm6,xmm7,170
423	pshufd	xmm7,xmm7,255
424	movdqa	[64+ebp],xmm0
425	movdqa	[80+ebp],xmm1
426	movdqa	[96+ebp],xmm2
427	movdqa	[112+ebp],xmm3
428	movdqu	xmm3,[16+edx]
429	movdqa	[ebp-64],xmm4
430	movdqa	[ebp-48],xmm5
431	movdqa	[ebp-32],xmm6
432	movdqa	[ebp-16],xmm7
433	movdqa	xmm7,[32+eax]
434	lea	ebx,[128+esp]
435	pshufd	xmm0,xmm3,0
436	pshufd	xmm1,xmm3,85
437	pshufd	xmm2,xmm3,170
438	pshufd	xmm3,xmm3,255
439	pshufd	xmm4,xmm7,0
440	pshufd	xmm5,xmm7,85
441	pshufd	xmm6,xmm7,170
442	pshufd	xmm7,xmm7,255
443	movdqa	[ebp],xmm0
444	movdqa	[16+ebp],xmm1
445	movdqa	[32+ebp],xmm2
446	movdqa	[48+ebp],xmm3
447	movdqa	[ebp-128],xmm4
448	movdqa	[ebp-112],xmm5
449	movdqa	[ebp-96],xmm6
450	movdqa	[ebp-80],xmm7
451	lea	esi,[128+esi]
452	lea	edi,[128+edi]
453	jmp	NEAR L$009outer_loop
454align	16
455L$009outer_loop:
456	movdqa	xmm1,[ebp-112]
457	movdqa	xmm2,[ebp-96]
458	movdqa	xmm3,[ebp-80]
459	movdqa	xmm5,[ebp-48]
460	movdqa	xmm6,[ebp-32]
461	movdqa	xmm7,[ebp-16]
462	movdqa	[ebx-112],xmm1
463	movdqa	[ebx-96],xmm2
464	movdqa	[ebx-80],xmm3
465	movdqa	[ebx-48],xmm5
466	movdqa	[ebx-32],xmm6
467	movdqa	[ebx-16],xmm7
468	movdqa	xmm2,[32+ebp]
469	movdqa	xmm3,[48+ebp]
470	movdqa	xmm4,[64+ebp]
471	movdqa	xmm5,[80+ebp]
472	movdqa	xmm6,[96+ebp]
473	movdqa	xmm7,[112+ebp]
474	paddd	xmm4,[64+eax]
475	movdqa	[32+ebx],xmm2
476	movdqa	[48+ebx],xmm3
477	movdqa	[64+ebx],xmm4
478	movdqa	[80+ebx],xmm5
479	movdqa	[96+ebx],xmm6
480	movdqa	[112+ebx],xmm7
481	movdqa	[64+ebp],xmm4
482	movdqa	xmm0,[ebp-128]
483	movdqa	xmm6,xmm4
484	movdqa	xmm3,[ebp-64]
485	movdqa	xmm4,[ebp]
486	movdqa	xmm5,[16+ebp]
487	mov	edx,10
488	nop
489align	16
490L$010loop:
491	paddd	xmm0,xmm3
492	movdqa	xmm2,xmm3
493	pxor	xmm6,xmm0
494	pshufb	xmm6,[eax]
495	paddd	xmm4,xmm6
496	pxor	xmm2,xmm4
497	movdqa	xmm3,[ebx-48]
498	movdqa	xmm1,xmm2
499	pslld	xmm2,12
500	psrld	xmm1,20
501	por	xmm2,xmm1
502	movdqa	xmm1,[ebx-112]
503	paddd	xmm0,xmm2
504	movdqa	xmm7,[80+ebx]
505	pxor	xmm6,xmm0
506	movdqa	[ebx-128],xmm0
507	pshufb	xmm6,[16+eax]
508	paddd	xmm4,xmm6
509	movdqa	[64+ebx],xmm6
510	pxor	xmm2,xmm4
511	paddd	xmm1,xmm3
512	movdqa	xmm0,xmm2
513	pslld	xmm2,7
514	psrld	xmm0,25
515	pxor	xmm7,xmm1
516	por	xmm2,xmm0
517	movdqa	[ebx],xmm4
518	pshufb	xmm7,[eax]
519	movdqa	[ebx-64],xmm2
520	paddd	xmm5,xmm7
521	movdqa	xmm4,[32+ebx]
522	pxor	xmm3,xmm5
523	movdqa	xmm2,[ebx-32]
524	movdqa	xmm0,xmm3
525	pslld	xmm3,12
526	psrld	xmm0,20
527	por	xmm3,xmm0
528	movdqa	xmm0,[ebx-96]
529	paddd	xmm1,xmm3
530	movdqa	xmm6,[96+ebx]
531	pxor	xmm7,xmm1
532	movdqa	[ebx-112],xmm1
533	pshufb	xmm7,[16+eax]
534	paddd	xmm5,xmm7
535	movdqa	[80+ebx],xmm7
536	pxor	xmm3,xmm5
537	paddd	xmm0,xmm2
538	movdqa	xmm1,xmm3
539	pslld	xmm3,7
540	psrld	xmm1,25
541	pxor	xmm6,xmm0
542	por	xmm3,xmm1
543	movdqa	[16+ebx],xmm5
544	pshufb	xmm6,[eax]
545	movdqa	[ebx-48],xmm3
546	paddd	xmm4,xmm6
547	movdqa	xmm5,[48+ebx]
548	pxor	xmm2,xmm4
549	movdqa	xmm3,[ebx-16]
550	movdqa	xmm1,xmm2
551	pslld	xmm2,12
552	psrld	xmm1,20
553	por	xmm2,xmm1
554	movdqa	xmm1,[ebx-80]
555	paddd	xmm0,xmm2
556	movdqa	xmm7,[112+ebx]
557	pxor	xmm6,xmm0
558	movdqa	[ebx-96],xmm0
559	pshufb	xmm6,[16+eax]
560	paddd	xmm4,xmm6
561	movdqa	[96+ebx],xmm6
562	pxor	xmm2,xmm4
563	paddd	xmm1,xmm3
564	movdqa	xmm0,xmm2
565	pslld	xmm2,7
566	psrld	xmm0,25
567	pxor	xmm7,xmm1
568	por	xmm2,xmm0
569	pshufb	xmm7,[eax]
570	movdqa	[ebx-32],xmm2
571	paddd	xmm5,xmm7
572	pxor	xmm3,xmm5
573	movdqa	xmm2,[ebx-48]
574	movdqa	xmm0,xmm3
575	pslld	xmm3,12
576	psrld	xmm0,20
577	por	xmm3,xmm0
578	movdqa	xmm0,[ebx-128]
579	paddd	xmm1,xmm3
580	pxor	xmm7,xmm1
581	movdqa	[ebx-80],xmm1
582	pshufb	xmm7,[16+eax]
583	paddd	xmm5,xmm7
584	movdqa	xmm6,xmm7
585	pxor	xmm3,xmm5
586	paddd	xmm0,xmm2
587	movdqa	xmm1,xmm3
588	pslld	xmm3,7
589	psrld	xmm1,25
590	pxor	xmm6,xmm0
591	por	xmm3,xmm1
592	pshufb	xmm6,[eax]
593	movdqa	[ebx-16],xmm3
594	paddd	xmm4,xmm6
595	pxor	xmm2,xmm4
596	movdqa	xmm3,[ebx-32]
597	movdqa	xmm1,xmm2
598	pslld	xmm2,12
599	psrld	xmm1,20
600	por	xmm2,xmm1
601	movdqa	xmm1,[ebx-112]
602	paddd	xmm0,xmm2
603	movdqa	xmm7,[64+ebx]
604	pxor	xmm6,xmm0
605	movdqa	[ebx-128],xmm0
606	pshufb	xmm6,[16+eax]
607	paddd	xmm4,xmm6
608	movdqa	[112+ebx],xmm6
609	pxor	xmm2,xmm4
610	paddd	xmm1,xmm3
611	movdqa	xmm0,xmm2
612	pslld	xmm2,7
613	psrld	xmm0,25
614	pxor	xmm7,xmm1
615	por	xmm2,xmm0
616	movdqa	[32+ebx],xmm4
617	pshufb	xmm7,[eax]
618	movdqa	[ebx-48],xmm2
619	paddd	xmm5,xmm7
620	movdqa	xmm4,[ebx]
621	pxor	xmm3,xmm5
622	movdqa	xmm2,[ebx-16]
623	movdqa	xmm0,xmm3
624	pslld	xmm3,12
625	psrld	xmm0,20
626	por	xmm3,xmm0
627	movdqa	xmm0,[ebx-96]
628	paddd	xmm1,xmm3
629	movdqa	xmm6,[80+ebx]
630	pxor	xmm7,xmm1
631	movdqa	[ebx-112],xmm1
632	pshufb	xmm7,[16+eax]
633	paddd	xmm5,xmm7
634	movdqa	[64+ebx],xmm7
635	pxor	xmm3,xmm5
636	paddd	xmm0,xmm2
637	movdqa	xmm1,xmm3
638	pslld	xmm3,7
639	psrld	xmm1,25
640	pxor	xmm6,xmm0
641	por	xmm3,xmm1
642	movdqa	[48+ebx],xmm5
643	pshufb	xmm6,[eax]
644	movdqa	[ebx-32],xmm3
645	paddd	xmm4,xmm6
646	movdqa	xmm5,[16+ebx]
647	pxor	xmm2,xmm4
648	movdqa	xmm3,[ebx-64]
649	movdqa	xmm1,xmm2
650	pslld	xmm2,12
651	psrld	xmm1,20
652	por	xmm2,xmm1
653	movdqa	xmm1,[ebx-80]
654	paddd	xmm0,xmm2
655	movdqa	xmm7,[96+ebx]
656	pxor	xmm6,xmm0
657	movdqa	[ebx-96],xmm0
658	pshufb	xmm6,[16+eax]
659	paddd	xmm4,xmm6
660	movdqa	[80+ebx],xmm6
661	pxor	xmm2,xmm4
662	paddd	xmm1,xmm3
663	movdqa	xmm0,xmm2
664	pslld	xmm2,7
665	psrld	xmm0,25
666	pxor	xmm7,xmm1
667	por	xmm2,xmm0
668	pshufb	xmm7,[eax]
669	movdqa	[ebx-16],xmm2
670	paddd	xmm5,xmm7
671	pxor	xmm3,xmm5
672	movdqa	xmm0,xmm3
673	pslld	xmm3,12
674	psrld	xmm0,20
675	por	xmm3,xmm0
676	movdqa	xmm0,[ebx-128]
677	paddd	xmm1,xmm3
678	movdqa	xmm6,[64+ebx]
679	pxor	xmm7,xmm1
680	movdqa	[ebx-80],xmm1
681	pshufb	xmm7,[16+eax]
682	paddd	xmm5,xmm7
683	movdqa	[96+ebx],xmm7
684	pxor	xmm3,xmm5
685	movdqa	xmm1,xmm3
686	pslld	xmm3,7
687	psrld	xmm1,25
688	por	xmm3,xmm1
689	dec	edx
690	jnz	NEAR L$010loop
691	movdqa	[ebx-64],xmm3
692	movdqa	[ebx],xmm4
693	movdqa	[16+ebx],xmm5
694	movdqa	[64+ebx],xmm6
695	movdqa	[96+ebx],xmm7
696	movdqa	xmm1,[ebx-112]
697	movdqa	xmm2,[ebx-96]
698	movdqa	xmm3,[ebx-80]
699	paddd	xmm0,[ebp-128]
700	paddd	xmm1,[ebp-112]
701	paddd	xmm2,[ebp-96]
702	paddd	xmm3,[ebp-80]
703	movdqa	xmm6,xmm0
704	punpckldq	xmm0,xmm1
705	movdqa	xmm7,xmm2
706	punpckldq	xmm2,xmm3
707	punpckhdq	xmm6,xmm1
708	punpckhdq	xmm7,xmm3
709	movdqa	xmm1,xmm0
710	punpcklqdq	xmm0,xmm2
711	movdqa	xmm3,xmm6
712	punpcklqdq	xmm6,xmm7
713	punpckhqdq	xmm1,xmm2
714	punpckhqdq	xmm3,xmm7
715	movdqu	xmm4,[esi-128]
716	movdqu	xmm5,[esi-64]
717	movdqu	xmm2,[esi]
718	movdqu	xmm7,[64+esi]
719	lea	esi,[16+esi]
720	pxor	xmm4,xmm0
721	movdqa	xmm0,[ebx-64]
722	pxor	xmm5,xmm1
723	movdqa	xmm1,[ebx-48]
724	pxor	xmm6,xmm2
725	movdqa	xmm2,[ebx-32]
726	pxor	xmm7,xmm3
727	movdqa	xmm3,[ebx-16]
728	movdqu	[edi-128],xmm4
729	movdqu	[edi-64],xmm5
730	movdqu	[edi],xmm6
731	movdqu	[64+edi],xmm7
732	lea	edi,[16+edi]
733	paddd	xmm0,[ebp-64]
734	paddd	xmm1,[ebp-48]
735	paddd	xmm2,[ebp-32]
736	paddd	xmm3,[ebp-16]
737	movdqa	xmm6,xmm0
738	punpckldq	xmm0,xmm1
739	movdqa	xmm7,xmm2
740	punpckldq	xmm2,xmm3
741	punpckhdq	xmm6,xmm1
742	punpckhdq	xmm7,xmm3
743	movdqa	xmm1,xmm0
744	punpcklqdq	xmm0,xmm2
745	movdqa	xmm3,xmm6
746	punpcklqdq	xmm6,xmm7
747	punpckhqdq	xmm1,xmm2
748	punpckhqdq	xmm3,xmm7
749	movdqu	xmm4,[esi-128]
750	movdqu	xmm5,[esi-64]
751	movdqu	xmm2,[esi]
752	movdqu	xmm7,[64+esi]
753	lea	esi,[16+esi]
754	pxor	xmm4,xmm0
755	movdqa	xmm0,[ebx]
756	pxor	xmm5,xmm1
757	movdqa	xmm1,[16+ebx]
758	pxor	xmm6,xmm2
759	movdqa	xmm2,[32+ebx]
760	pxor	xmm7,xmm3
761	movdqa	xmm3,[48+ebx]
762	movdqu	[edi-128],xmm4
763	movdqu	[edi-64],xmm5
764	movdqu	[edi],xmm6
765	movdqu	[64+edi],xmm7
766	lea	edi,[16+edi]
767	paddd	xmm0,[ebp]
768	paddd	xmm1,[16+ebp]
769	paddd	xmm2,[32+ebp]
770	paddd	xmm3,[48+ebp]
771	movdqa	xmm6,xmm0
772	punpckldq	xmm0,xmm1
773	movdqa	xmm7,xmm2
774	punpckldq	xmm2,xmm3
775	punpckhdq	xmm6,xmm1
776	punpckhdq	xmm7,xmm3
777	movdqa	xmm1,xmm0
778	punpcklqdq	xmm0,xmm2
779	movdqa	xmm3,xmm6
780	punpcklqdq	xmm6,xmm7
781	punpckhqdq	xmm1,xmm2
782	punpckhqdq	xmm3,xmm7
783	movdqu	xmm4,[esi-128]
784	movdqu	xmm5,[esi-64]
785	movdqu	xmm2,[esi]
786	movdqu	xmm7,[64+esi]
787	lea	esi,[16+esi]
788	pxor	xmm4,xmm0
789	movdqa	xmm0,[64+ebx]
790	pxor	xmm5,xmm1
791	movdqa	xmm1,[80+ebx]
792	pxor	xmm6,xmm2
793	movdqa	xmm2,[96+ebx]
794	pxor	xmm7,xmm3
795	movdqa	xmm3,[112+ebx]
796	movdqu	[edi-128],xmm4
797	movdqu	[edi-64],xmm5
798	movdqu	[edi],xmm6
799	movdqu	[64+edi],xmm7
800	lea	edi,[16+edi]
801	paddd	xmm0,[64+ebp]
802	paddd	xmm1,[80+ebp]
803	paddd	xmm2,[96+ebp]
804	paddd	xmm3,[112+ebp]
805	movdqa	xmm6,xmm0
806	punpckldq	xmm0,xmm1
807	movdqa	xmm7,xmm2
808	punpckldq	xmm2,xmm3
809	punpckhdq	xmm6,xmm1
810	punpckhdq	xmm7,xmm3
811	movdqa	xmm1,xmm0
812	punpcklqdq	xmm0,xmm2
813	movdqa	xmm3,xmm6
814	punpcklqdq	xmm6,xmm7
815	punpckhqdq	xmm1,xmm2
816	punpckhqdq	xmm3,xmm7
817	movdqu	xmm4,[esi-128]
818	movdqu	xmm5,[esi-64]
819	movdqu	xmm2,[esi]
820	movdqu	xmm7,[64+esi]
821	lea	esi,[208+esi]
822	pxor	xmm4,xmm0
823	pxor	xmm5,xmm1
824	pxor	xmm6,xmm2
825	pxor	xmm7,xmm3
826	movdqu	[edi-128],xmm4
827	movdqu	[edi-64],xmm5
828	movdqu	[edi],xmm6
829	movdqu	[64+edi],xmm7
830	lea	edi,[208+edi]
831	sub	ecx,256
832	jnc	NEAR L$009outer_loop
833	add	ecx,256
834	jz	NEAR L$011done
835	mov	ebx,DWORD [520+esp]
836	lea	esi,[esi-128]
837	mov	edx,DWORD [516+esp]
838	lea	edi,[edi-128]
839	movd	xmm2,DWORD [64+ebp]
840	movdqu	xmm3,[ebx]
841	paddd	xmm2,[96+eax]
842	pand	xmm3,[112+eax]
843	por	xmm3,xmm2
844L$0081x:
845	movdqa	xmm0,[32+eax]
846	movdqu	xmm1,[edx]
847	movdqu	xmm2,[16+edx]
848	movdqa	xmm6,[eax]
849	movdqa	xmm7,[16+eax]
850	mov	DWORD [48+esp],ebp
851	movdqa	[esp],xmm0
852	movdqa	[16+esp],xmm1
853	movdqa	[32+esp],xmm2
854	movdqa	[48+esp],xmm3
855	mov	edx,10
856	jmp	NEAR L$012loop1x
857align	16
858L$013outer1x:
859	movdqa	xmm3,[80+eax]
860	movdqa	xmm0,[esp]
861	movdqa	xmm1,[16+esp]
862	movdqa	xmm2,[32+esp]
863	paddd	xmm3,[48+esp]
864	mov	edx,10
865	movdqa	[48+esp],xmm3
866	jmp	NEAR L$012loop1x
867align	16
868L$012loop1x:
869	paddd	xmm0,xmm1
870	pxor	xmm3,xmm0
871db	102,15,56,0,222
872	paddd	xmm2,xmm3
873	pxor	xmm1,xmm2
874	movdqa	xmm4,xmm1
875	psrld	xmm1,20
876	pslld	xmm4,12
877	por	xmm1,xmm4
878	paddd	xmm0,xmm1
879	pxor	xmm3,xmm0
880db	102,15,56,0,223
881	paddd	xmm2,xmm3
882	pxor	xmm1,xmm2
883	movdqa	xmm4,xmm1
884	psrld	xmm1,25
885	pslld	xmm4,7
886	por	xmm1,xmm4
887	pshufd	xmm2,xmm2,78
888	pshufd	xmm1,xmm1,57
889	pshufd	xmm3,xmm3,147
890	nop
891	paddd	xmm0,xmm1
892	pxor	xmm3,xmm0
893db	102,15,56,0,222
894	paddd	xmm2,xmm3
895	pxor	xmm1,xmm2
896	movdqa	xmm4,xmm1
897	psrld	xmm1,20
898	pslld	xmm4,12
899	por	xmm1,xmm4
900	paddd	xmm0,xmm1
901	pxor	xmm3,xmm0
902db	102,15,56,0,223
903	paddd	xmm2,xmm3
904	pxor	xmm1,xmm2
905	movdqa	xmm4,xmm1
906	psrld	xmm1,25
907	pslld	xmm4,7
908	por	xmm1,xmm4
909	pshufd	xmm2,xmm2,78
910	pshufd	xmm1,xmm1,147
911	pshufd	xmm3,xmm3,57
912	dec	edx
913	jnz	NEAR L$012loop1x
914	paddd	xmm0,[esp]
915	paddd	xmm1,[16+esp]
916	paddd	xmm2,[32+esp]
917	paddd	xmm3,[48+esp]
918	cmp	ecx,64
919	jb	NEAR L$014tail
920	movdqu	xmm4,[esi]
921	movdqu	xmm5,[16+esi]
922	pxor	xmm0,xmm4
923	movdqu	xmm4,[32+esi]
924	pxor	xmm1,xmm5
925	movdqu	xmm5,[48+esi]
926	pxor	xmm2,xmm4
927	pxor	xmm3,xmm5
928	lea	esi,[64+esi]
929	movdqu	[edi],xmm0
930	movdqu	[16+edi],xmm1
931	movdqu	[32+edi],xmm2
932	movdqu	[48+edi],xmm3
933	lea	edi,[64+edi]
934	sub	ecx,64
935	jnz	NEAR L$013outer1x
936	jmp	NEAR L$011done
937L$014tail:
938	movdqa	[esp],xmm0
939	movdqa	[16+esp],xmm1
940	movdqa	[32+esp],xmm2
941	movdqa	[48+esp],xmm3
942	xor	eax,eax
943	xor	edx,edx
944	xor	ebp,ebp
945L$015tail_loop:
946	mov	al,BYTE [ebp*1+esp]
947	mov	dl,BYTE [ebp*1+esi]
948	lea	ebp,[1+ebp]
949	xor	al,dl
950	mov	BYTE [ebp*1+edi-1],al
951	dec	ecx
952	jnz	NEAR L$015tail_loop
953L$011done:
954	mov	esp,DWORD [512+esp]
955	pop	edi
956	pop	esi
957	pop	ebx
958	pop	ebp
959	ret
960align	64
961L$ssse3_data:
962db	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
963db	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
964dd	1634760805,857760878,2036477234,1797285236
965dd	0,1,2,3
966dd	4,4,4,4
967dd	1,0,0,0
968dd	4,0,0,0
969dd	0,-1,-1,-1
970align	64
971db	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
972db	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
973db	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
974db	114,103,62,0
975segment	.bss
976common	_OPENSSL_ia32cap_P 16
977%else
978; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
979ret
980%endif
981