• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#include "ring_core_generated/prefix_symbols_asm.h"
12.text
13
14
15chacha20_poly1305_constants:
16
17.p2align	6
18L$chacha20_consts:
19.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
20.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
21L$rol8:
22.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
23.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
24L$rol16:
25.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
26.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
27L$avx2_init:
28.long	0,0,0,0
29L$sse_inc:
30.long	1,0,0,0
31L$avx2_inc:
32.long	2,0,0,0,2,0,0,0
33L$clamp:
34.quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
35.quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
36.p2align	4
37L$and_masks:
38.byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
39.byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
40.byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
41.byte	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
42.byte	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
43.byte	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
44.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
45.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
46.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
47.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
48.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
49.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
50.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
51.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
52.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
53.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
54
55
56.p2align	6
57poly_hash_ad_internal:
58
59
60	xorq	%r10,%r10
61	xorq	%r11,%r11
62	xorq	%r12,%r12
63	cmpq	$13,%r8
64	jne	L$hash_ad_loop
65L$poly_fast_tls_ad:
66
67	movq	(%rcx),%r10
68	movq	5(%rcx),%r11
69	shrq	$24,%r11
70	movq	$1,%r12
71	movq	0+0+0(%rbp),%rax
72	movq	%rax,%r15
73	mulq	%r10
74	movq	%rax,%r13
75	movq	%rdx,%r14
76	movq	0+0+0(%rbp),%rax
77	mulq	%r11
78	imulq	%r12,%r15
79	addq	%rax,%r14
80	adcq	%rdx,%r15
81	movq	8+0+0(%rbp),%rax
82	movq	%rax,%r9
83	mulq	%r10
84	addq	%rax,%r14
85	adcq	$0,%rdx
86	movq	%rdx,%r10
87	movq	8+0+0(%rbp),%rax
88	mulq	%r11
89	addq	%rax,%r15
90	adcq	$0,%rdx
91	imulq	%r12,%r9
92	addq	%r10,%r15
93	adcq	%rdx,%r9
94	movq	%r13,%r10
95	movq	%r14,%r11
96	movq	%r15,%r12
97	andq	$3,%r12
98	movq	%r15,%r13
99	andq	$-4,%r13
100	movq	%r9,%r14
101	shrdq	$2,%r9,%r15
102	shrq	$2,%r9
103	addq	%r13,%r15
104	adcq	%r14,%r9
105	addq	%r15,%r10
106	adcq	%r9,%r11
107	adcq	$0,%r12
108
109	.byte	0xf3,0xc3
110L$hash_ad_loop:
111
112	cmpq	$16,%r8
113	jb	L$hash_ad_tail
114	addq	0+0(%rcx),%r10
115	adcq	8+0(%rcx),%r11
116	adcq	$1,%r12
117	movq	0+0+0(%rbp),%rax
118	movq	%rax,%r15
119	mulq	%r10
120	movq	%rax,%r13
121	movq	%rdx,%r14
122	movq	0+0+0(%rbp),%rax
123	mulq	%r11
124	imulq	%r12,%r15
125	addq	%rax,%r14
126	adcq	%rdx,%r15
127	movq	8+0+0(%rbp),%rax
128	movq	%rax,%r9
129	mulq	%r10
130	addq	%rax,%r14
131	adcq	$0,%rdx
132	movq	%rdx,%r10
133	movq	8+0+0(%rbp),%rax
134	mulq	%r11
135	addq	%rax,%r15
136	adcq	$0,%rdx
137	imulq	%r12,%r9
138	addq	%r10,%r15
139	adcq	%rdx,%r9
140	movq	%r13,%r10
141	movq	%r14,%r11
142	movq	%r15,%r12
143	andq	$3,%r12
144	movq	%r15,%r13
145	andq	$-4,%r13
146	movq	%r9,%r14
147	shrdq	$2,%r9,%r15
148	shrq	$2,%r9
149	addq	%r13,%r15
150	adcq	%r14,%r9
151	addq	%r15,%r10
152	adcq	%r9,%r11
153	adcq	$0,%r12
154
155	leaq	16(%rcx),%rcx
156	subq	$16,%r8
157	jmp	L$hash_ad_loop
158L$hash_ad_tail:
159	cmpq	$0,%r8
160	je	L$hash_ad_done
161
162	xorq	%r13,%r13
163	xorq	%r14,%r14
164	xorq	%r15,%r15
165	addq	%r8,%rcx
166L$hash_ad_tail_loop:
167	shldq	$8,%r13,%r14
168	shlq	$8,%r13
169	movzbq	-1(%rcx),%r15
170	xorq	%r15,%r13
171	decq	%rcx
172	decq	%r8
173	jne	L$hash_ad_tail_loop
174
175	addq	%r13,%r10
176	adcq	%r14,%r11
177	adcq	$1,%r12
178	movq	0+0+0(%rbp),%rax
179	movq	%rax,%r15
180	mulq	%r10
181	movq	%rax,%r13
182	movq	%rdx,%r14
183	movq	0+0+0(%rbp),%rax
184	mulq	%r11
185	imulq	%r12,%r15
186	addq	%rax,%r14
187	adcq	%rdx,%r15
188	movq	8+0+0(%rbp),%rax
189	movq	%rax,%r9
190	mulq	%r10
191	addq	%rax,%r14
192	adcq	$0,%rdx
193	movq	%rdx,%r10
194	movq	8+0+0(%rbp),%rax
195	mulq	%r11
196	addq	%rax,%r15
197	adcq	$0,%rdx
198	imulq	%r12,%r9
199	addq	%r10,%r15
200	adcq	%rdx,%r9
201	movq	%r13,%r10
202	movq	%r14,%r11
203	movq	%r15,%r12
204	andq	$3,%r12
205	movq	%r15,%r13
206	andq	$-4,%r13
207	movq	%r9,%r14
208	shrdq	$2,%r9,%r15
209	shrq	$2,%r9
210	addq	%r13,%r15
211	adcq	%r14,%r9
212	addq	%r15,%r10
213	adcq	%r9,%r11
214	adcq	$0,%r12
215
216
217L$hash_ad_done:
218	.byte	0xf3,0xc3
219
220
221
222.globl	_chacha20_poly1305_open
223.private_extern _chacha20_poly1305_open
224
225.p2align	6
226_chacha20_poly1305_open:
227
228	pushq	%rbp
229
230	pushq	%rbx
231
232	pushq	%r12
233
234	pushq	%r13
235
236	pushq	%r14
237
238	pushq	%r15
239
240
241
242	pushq	%r9
243
244	subq	$288 + 0 + 32,%rsp
245
246
247	leaq	32(%rsp),%rbp
248	andq	$-32,%rbp
249
250	movq	%rdx,%rbx
251	movq	%r8,0+0+32(%rbp)
252	movq	%rbx,8+0+32(%rbp)
253
254	movl	_OPENSSL_ia32cap_P+8(%rip),%eax
255	andl	$288,%eax
256	xorl	$288,%eax
257	jz	chacha20_poly1305_open_avx2
258
259	cmpq	$128,%rbx
260	jbe	L$open_sse_128
261
262	movdqa	L$chacha20_consts(%rip),%xmm0
263	movdqu	0(%r9),%xmm4
264	movdqu	16(%r9),%xmm8
265	movdqu	32(%r9),%xmm12
266
267	movdqa	%xmm12,%xmm7
268
269	movdqa	%xmm4,0+48(%rbp)
270	movdqa	%xmm8,0+64(%rbp)
271	movdqa	%xmm12,0+96(%rbp)
272	movq	$10,%r10
273L$open_sse_init_rounds:
274	paddd	%xmm4,%xmm0
275	pxor	%xmm0,%xmm12
276	pshufb	L$rol16(%rip),%xmm12
277	paddd	%xmm12,%xmm8
278	pxor	%xmm8,%xmm4
279	movdqa	%xmm4,%xmm3
280	pslld	$12,%xmm3
281	psrld	$20,%xmm4
282	pxor	%xmm3,%xmm4
283	paddd	%xmm4,%xmm0
284	pxor	%xmm0,%xmm12
285	pshufb	L$rol8(%rip),%xmm12
286	paddd	%xmm12,%xmm8
287	pxor	%xmm8,%xmm4
288	movdqa	%xmm4,%xmm3
289	pslld	$7,%xmm3
290	psrld	$25,%xmm4
291	pxor	%xmm3,%xmm4
292.byte	102,15,58,15,228,4
293.byte	102,69,15,58,15,192,8
294.byte	102,69,15,58,15,228,12
295	paddd	%xmm4,%xmm0
296	pxor	%xmm0,%xmm12
297	pshufb	L$rol16(%rip),%xmm12
298	paddd	%xmm12,%xmm8
299	pxor	%xmm8,%xmm4
300	movdqa	%xmm4,%xmm3
301	pslld	$12,%xmm3
302	psrld	$20,%xmm4
303	pxor	%xmm3,%xmm4
304	paddd	%xmm4,%xmm0
305	pxor	%xmm0,%xmm12
306	pshufb	L$rol8(%rip),%xmm12
307	paddd	%xmm12,%xmm8
308	pxor	%xmm8,%xmm4
309	movdqa	%xmm4,%xmm3
310	pslld	$7,%xmm3
311	psrld	$25,%xmm4
312	pxor	%xmm3,%xmm4
313.byte	102,15,58,15,228,12
314.byte	102,69,15,58,15,192,8
315.byte	102,69,15,58,15,228,4
316
317	decq	%r10
318	jne	L$open_sse_init_rounds
319
320	paddd	L$chacha20_consts(%rip),%xmm0
321	paddd	0+48(%rbp),%xmm4
322
323	pand	L$clamp(%rip),%xmm0
324	movdqa	%xmm0,0+0(%rbp)
325	movdqa	%xmm4,0+16(%rbp)
326
327	movq	%r8,%r8
328	call	poly_hash_ad_internal
329L$open_sse_main_loop:
330	cmpq	$256,%rbx
331	jb	L$open_sse_tail
332
333	movdqa	L$chacha20_consts(%rip),%xmm0
334	movdqa	0+48(%rbp),%xmm4
335	movdqa	0+64(%rbp),%xmm8
336	movdqa	%xmm0,%xmm1
337	movdqa	%xmm4,%xmm5
338	movdqa	%xmm8,%xmm9
339	movdqa	%xmm0,%xmm2
340	movdqa	%xmm4,%xmm6
341	movdqa	%xmm8,%xmm10
342	movdqa	%xmm0,%xmm3
343	movdqa	%xmm4,%xmm7
344	movdqa	%xmm8,%xmm11
345	movdqa	0+96(%rbp),%xmm15
346	paddd	L$sse_inc(%rip),%xmm15
347	movdqa	%xmm15,%xmm14
348	paddd	L$sse_inc(%rip),%xmm14
349	movdqa	%xmm14,%xmm13
350	paddd	L$sse_inc(%rip),%xmm13
351	movdqa	%xmm13,%xmm12
352	paddd	L$sse_inc(%rip),%xmm12
353	movdqa	%xmm12,0+96(%rbp)
354	movdqa	%xmm13,0+112(%rbp)
355	movdqa	%xmm14,0+128(%rbp)
356	movdqa	%xmm15,0+144(%rbp)
357
358
359
360	movq	$4,%rcx
361	movq	%rsi,%r8
362L$open_sse_main_loop_rounds:
363	movdqa	%xmm8,0+80(%rbp)
364	movdqa	L$rol16(%rip),%xmm8
365	paddd	%xmm7,%xmm3
366	paddd	%xmm6,%xmm2
367	paddd	%xmm5,%xmm1
368	paddd	%xmm4,%xmm0
369	pxor	%xmm3,%xmm15
370	pxor	%xmm2,%xmm14
371	pxor	%xmm1,%xmm13
372	pxor	%xmm0,%xmm12
373.byte	102,69,15,56,0,248
374.byte	102,69,15,56,0,240
375.byte	102,69,15,56,0,232
376.byte	102,69,15,56,0,224
377	movdqa	0+80(%rbp),%xmm8
378	paddd	%xmm15,%xmm11
379	paddd	%xmm14,%xmm10
380	paddd	%xmm13,%xmm9
381	paddd	%xmm12,%xmm8
382	pxor	%xmm11,%xmm7
383	addq	0+0(%r8),%r10
384	adcq	8+0(%r8),%r11
385	adcq	$1,%r12
386
387	leaq	16(%r8),%r8
388	pxor	%xmm10,%xmm6
389	pxor	%xmm9,%xmm5
390	pxor	%xmm8,%xmm4
391	movdqa	%xmm8,0+80(%rbp)
392	movdqa	%xmm7,%xmm8
393	psrld	$20,%xmm8
394	pslld	$32-20,%xmm7
395	pxor	%xmm8,%xmm7
396	movdqa	%xmm6,%xmm8
397	psrld	$20,%xmm8
398	pslld	$32-20,%xmm6
399	pxor	%xmm8,%xmm6
400	movdqa	%xmm5,%xmm8
401	psrld	$20,%xmm8
402	pslld	$32-20,%xmm5
403	pxor	%xmm8,%xmm5
404	movdqa	%xmm4,%xmm8
405	psrld	$20,%xmm8
406	pslld	$32-20,%xmm4
407	pxor	%xmm8,%xmm4
408	movq	0+0+0(%rbp),%rax
409	movq	%rax,%r15
410	mulq	%r10
411	movq	%rax,%r13
412	movq	%rdx,%r14
413	movq	0+0+0(%rbp),%rax
414	mulq	%r11
415	imulq	%r12,%r15
416	addq	%rax,%r14
417	adcq	%rdx,%r15
418	movdqa	L$rol8(%rip),%xmm8
419	paddd	%xmm7,%xmm3
420	paddd	%xmm6,%xmm2
421	paddd	%xmm5,%xmm1
422	paddd	%xmm4,%xmm0
423	pxor	%xmm3,%xmm15
424	pxor	%xmm2,%xmm14
425	pxor	%xmm1,%xmm13
426	pxor	%xmm0,%xmm12
427.byte	102,69,15,56,0,248
428.byte	102,69,15,56,0,240
429.byte	102,69,15,56,0,232
430.byte	102,69,15,56,0,224
431	movdqa	0+80(%rbp),%xmm8
432	paddd	%xmm15,%xmm11
433	paddd	%xmm14,%xmm10
434	paddd	%xmm13,%xmm9
435	paddd	%xmm12,%xmm8
436	pxor	%xmm11,%xmm7
437	pxor	%xmm10,%xmm6
438	movq	8+0+0(%rbp),%rax
439	movq	%rax,%r9
440	mulq	%r10
441	addq	%rax,%r14
442	adcq	$0,%rdx
443	movq	%rdx,%r10
444	movq	8+0+0(%rbp),%rax
445	mulq	%r11
446	addq	%rax,%r15
447	adcq	$0,%rdx
448	pxor	%xmm9,%xmm5
449	pxor	%xmm8,%xmm4
450	movdqa	%xmm8,0+80(%rbp)
451	movdqa	%xmm7,%xmm8
452	psrld	$25,%xmm8
453	pslld	$32-25,%xmm7
454	pxor	%xmm8,%xmm7
455	movdqa	%xmm6,%xmm8
456	psrld	$25,%xmm8
457	pslld	$32-25,%xmm6
458	pxor	%xmm8,%xmm6
459	movdqa	%xmm5,%xmm8
460	psrld	$25,%xmm8
461	pslld	$32-25,%xmm5
462	pxor	%xmm8,%xmm5
463	movdqa	%xmm4,%xmm8
464	psrld	$25,%xmm8
465	pslld	$32-25,%xmm4
466	pxor	%xmm8,%xmm4
467	movdqa	0+80(%rbp),%xmm8
468	imulq	%r12,%r9
469	addq	%r10,%r15
470	adcq	%rdx,%r9
471.byte	102,15,58,15,255,4
472.byte	102,69,15,58,15,219,8
473.byte	102,69,15,58,15,255,12
474.byte	102,15,58,15,246,4
475.byte	102,69,15,58,15,210,8
476.byte	102,69,15,58,15,246,12
477.byte	102,15,58,15,237,4
478.byte	102,69,15,58,15,201,8
479.byte	102,69,15,58,15,237,12
480.byte	102,15,58,15,228,4
481.byte	102,69,15,58,15,192,8
482.byte	102,69,15,58,15,228,12
483	movdqa	%xmm8,0+80(%rbp)
484	movdqa	L$rol16(%rip),%xmm8
485	paddd	%xmm7,%xmm3
486	paddd	%xmm6,%xmm2
487	paddd	%xmm5,%xmm1
488	paddd	%xmm4,%xmm0
489	pxor	%xmm3,%xmm15
490	pxor	%xmm2,%xmm14
491	movq	%r13,%r10
492	movq	%r14,%r11
493	movq	%r15,%r12
494	andq	$3,%r12
495	movq	%r15,%r13
496	andq	$-4,%r13
497	movq	%r9,%r14
498	shrdq	$2,%r9,%r15
499	shrq	$2,%r9
500	addq	%r13,%r15
501	adcq	%r14,%r9
502	addq	%r15,%r10
503	adcq	%r9,%r11
504	adcq	$0,%r12
505	pxor	%xmm1,%xmm13
506	pxor	%xmm0,%xmm12
507.byte	102,69,15,56,0,248
508.byte	102,69,15,56,0,240
509.byte	102,69,15,56,0,232
510.byte	102,69,15,56,0,224
511	movdqa	0+80(%rbp),%xmm8
512	paddd	%xmm15,%xmm11
513	paddd	%xmm14,%xmm10
514	paddd	%xmm13,%xmm9
515	paddd	%xmm12,%xmm8
516	pxor	%xmm11,%xmm7
517	pxor	%xmm10,%xmm6
518	pxor	%xmm9,%xmm5
519	pxor	%xmm8,%xmm4
520	movdqa	%xmm8,0+80(%rbp)
521	movdqa	%xmm7,%xmm8
522	psrld	$20,%xmm8
523	pslld	$32-20,%xmm7
524	pxor	%xmm8,%xmm7
525	movdqa	%xmm6,%xmm8
526	psrld	$20,%xmm8
527	pslld	$32-20,%xmm6
528	pxor	%xmm8,%xmm6
529	movdqa	%xmm5,%xmm8
530	psrld	$20,%xmm8
531	pslld	$32-20,%xmm5
532	pxor	%xmm8,%xmm5
533	movdqa	%xmm4,%xmm8
534	psrld	$20,%xmm8
535	pslld	$32-20,%xmm4
536	pxor	%xmm8,%xmm4
537	movdqa	L$rol8(%rip),%xmm8
538	paddd	%xmm7,%xmm3
539	paddd	%xmm6,%xmm2
540	paddd	%xmm5,%xmm1
541	paddd	%xmm4,%xmm0
542	pxor	%xmm3,%xmm15
543	pxor	%xmm2,%xmm14
544	pxor	%xmm1,%xmm13
545	pxor	%xmm0,%xmm12
546.byte	102,69,15,56,0,248
547.byte	102,69,15,56,0,240
548.byte	102,69,15,56,0,232
549.byte	102,69,15,56,0,224
550	movdqa	0+80(%rbp),%xmm8
551	paddd	%xmm15,%xmm11
552	paddd	%xmm14,%xmm10
553	paddd	%xmm13,%xmm9
554	paddd	%xmm12,%xmm8
555	pxor	%xmm11,%xmm7
556	pxor	%xmm10,%xmm6
557	pxor	%xmm9,%xmm5
558	pxor	%xmm8,%xmm4
559	movdqa	%xmm8,0+80(%rbp)
560	movdqa	%xmm7,%xmm8
561	psrld	$25,%xmm8
562	pslld	$32-25,%xmm7
563	pxor	%xmm8,%xmm7
564	movdqa	%xmm6,%xmm8
565	psrld	$25,%xmm8
566	pslld	$32-25,%xmm6
567	pxor	%xmm8,%xmm6
568	movdqa	%xmm5,%xmm8
569	psrld	$25,%xmm8
570	pslld	$32-25,%xmm5
571	pxor	%xmm8,%xmm5
572	movdqa	%xmm4,%xmm8
573	psrld	$25,%xmm8
574	pslld	$32-25,%xmm4
575	pxor	%xmm8,%xmm4
576	movdqa	0+80(%rbp),%xmm8
577.byte	102,15,58,15,255,12
578.byte	102,69,15,58,15,219,8
579.byte	102,69,15,58,15,255,4
580.byte	102,15,58,15,246,12
581.byte	102,69,15,58,15,210,8
582.byte	102,69,15,58,15,246,4
583.byte	102,15,58,15,237,12
584.byte	102,69,15,58,15,201,8
585.byte	102,69,15,58,15,237,4
586.byte	102,15,58,15,228,12
587.byte	102,69,15,58,15,192,8
588.byte	102,69,15,58,15,228,4
589
590	decq	%rcx
591	jge	L$open_sse_main_loop_rounds
592	addq	0+0(%r8),%r10
593	adcq	8+0(%r8),%r11
594	adcq	$1,%r12
595	movq	0+0+0(%rbp),%rax
596	movq	%rax,%r15
597	mulq	%r10
598	movq	%rax,%r13
599	movq	%rdx,%r14
600	movq	0+0+0(%rbp),%rax
601	mulq	%r11
602	imulq	%r12,%r15
603	addq	%rax,%r14
604	adcq	%rdx,%r15
605	movq	8+0+0(%rbp),%rax
606	movq	%rax,%r9
607	mulq	%r10
608	addq	%rax,%r14
609	adcq	$0,%rdx
610	movq	%rdx,%r10
611	movq	8+0+0(%rbp),%rax
612	mulq	%r11
613	addq	%rax,%r15
614	adcq	$0,%rdx
615	imulq	%r12,%r9
616	addq	%r10,%r15
617	adcq	%rdx,%r9
618	movq	%r13,%r10
619	movq	%r14,%r11
620	movq	%r15,%r12
621	andq	$3,%r12
622	movq	%r15,%r13
623	andq	$-4,%r13
624	movq	%r9,%r14
625	shrdq	$2,%r9,%r15
626	shrq	$2,%r9
627	addq	%r13,%r15
628	adcq	%r14,%r9
629	addq	%r15,%r10
630	adcq	%r9,%r11
631	adcq	$0,%r12
632
633	leaq	16(%r8),%r8
634	cmpq	$-6,%rcx
635	jg	L$open_sse_main_loop_rounds
636	paddd	L$chacha20_consts(%rip),%xmm3
637	paddd	0+48(%rbp),%xmm7
638	paddd	0+64(%rbp),%xmm11
639	paddd	0+144(%rbp),%xmm15
640	paddd	L$chacha20_consts(%rip),%xmm2
641	paddd	0+48(%rbp),%xmm6
642	paddd	0+64(%rbp),%xmm10
643	paddd	0+128(%rbp),%xmm14
644	paddd	L$chacha20_consts(%rip),%xmm1
645	paddd	0+48(%rbp),%xmm5
646	paddd	0+64(%rbp),%xmm9
647	paddd	0+112(%rbp),%xmm13
648	paddd	L$chacha20_consts(%rip),%xmm0
649	paddd	0+48(%rbp),%xmm4
650	paddd	0+64(%rbp),%xmm8
651	paddd	0+96(%rbp),%xmm12
652	movdqa	%xmm12,0+80(%rbp)
653	movdqu	0 + 0(%rsi),%xmm12
654	pxor	%xmm3,%xmm12
655	movdqu	%xmm12,0 + 0(%rdi)
656	movdqu	16 + 0(%rsi),%xmm12
657	pxor	%xmm7,%xmm12
658	movdqu	%xmm12,16 + 0(%rdi)
659	movdqu	32 + 0(%rsi),%xmm12
660	pxor	%xmm11,%xmm12
661	movdqu	%xmm12,32 + 0(%rdi)
662	movdqu	48 + 0(%rsi),%xmm12
663	pxor	%xmm15,%xmm12
664	movdqu	%xmm12,48 + 0(%rdi)
665	movdqu	0 + 64(%rsi),%xmm3
666	movdqu	16 + 64(%rsi),%xmm7
667	movdqu	32 + 64(%rsi),%xmm11
668	movdqu	48 + 64(%rsi),%xmm15
669	pxor	%xmm3,%xmm2
670	pxor	%xmm7,%xmm6
671	pxor	%xmm11,%xmm10
672	pxor	%xmm14,%xmm15
673	movdqu	%xmm2,0 + 64(%rdi)
674	movdqu	%xmm6,16 + 64(%rdi)
675	movdqu	%xmm10,32 + 64(%rdi)
676	movdqu	%xmm15,48 + 64(%rdi)
677	movdqu	0 + 128(%rsi),%xmm3
678	movdqu	16 + 128(%rsi),%xmm7
679	movdqu	32 + 128(%rsi),%xmm11
680	movdqu	48 + 128(%rsi),%xmm15
681	pxor	%xmm3,%xmm1
682	pxor	%xmm7,%xmm5
683	pxor	%xmm11,%xmm9
684	pxor	%xmm13,%xmm15
685	movdqu	%xmm1,0 + 128(%rdi)
686	movdqu	%xmm5,16 + 128(%rdi)
687	movdqu	%xmm9,32 + 128(%rdi)
688	movdqu	%xmm15,48 + 128(%rdi)
689	movdqu	0 + 192(%rsi),%xmm3
690	movdqu	16 + 192(%rsi),%xmm7
691	movdqu	32 + 192(%rsi),%xmm11
692	movdqu	48 + 192(%rsi),%xmm15
693	pxor	%xmm3,%xmm0
694	pxor	%xmm7,%xmm4
695	pxor	%xmm11,%xmm8
696	pxor	0+80(%rbp),%xmm15
697	movdqu	%xmm0,0 + 192(%rdi)
698	movdqu	%xmm4,16 + 192(%rdi)
699	movdqu	%xmm8,32 + 192(%rdi)
700	movdqu	%xmm15,48 + 192(%rdi)
701
702	leaq	256(%rsi),%rsi
703	leaq	256(%rdi),%rdi
704	subq	$256,%rbx
705	jmp	L$open_sse_main_loop
706L$open_sse_tail:
707
708	testq	%rbx,%rbx
709	jz	L$open_sse_finalize
710	cmpq	$192,%rbx
711	ja	L$open_sse_tail_256
712	cmpq	$128,%rbx
713	ja	L$open_sse_tail_192
714	cmpq	$64,%rbx
715	ja	L$open_sse_tail_128
716	movdqa	L$chacha20_consts(%rip),%xmm0
717	movdqa	0+48(%rbp),%xmm4
718	movdqa	0+64(%rbp),%xmm8
719	movdqa	0+96(%rbp),%xmm12
720	paddd	L$sse_inc(%rip),%xmm12
721	movdqa	%xmm12,0+96(%rbp)
722
723	xorq	%r8,%r8
724	movq	%rbx,%rcx
725	cmpq	$16,%rcx
726	jb	L$open_sse_tail_64_rounds
727L$open_sse_tail_64_rounds_and_x1hash:
728	addq	0+0(%rsi,%r8,1),%r10
729	adcq	8+0(%rsi,%r8,1),%r11
730	adcq	$1,%r12
731	movq	0+0+0(%rbp),%rax
732	movq	%rax,%r15
733	mulq	%r10
734	movq	%rax,%r13
735	movq	%rdx,%r14
736	movq	0+0+0(%rbp),%rax
737	mulq	%r11
738	imulq	%r12,%r15
739	addq	%rax,%r14
740	adcq	%rdx,%r15
741	movq	8+0+0(%rbp),%rax
742	movq	%rax,%r9
743	mulq	%r10
744	addq	%rax,%r14
745	adcq	$0,%rdx
746	movq	%rdx,%r10
747	movq	8+0+0(%rbp),%rax
748	mulq	%r11
749	addq	%rax,%r15
750	adcq	$0,%rdx
751	imulq	%r12,%r9
752	addq	%r10,%r15
753	adcq	%rdx,%r9
754	movq	%r13,%r10
755	movq	%r14,%r11
756	movq	%r15,%r12
757	andq	$3,%r12
758	movq	%r15,%r13
759	andq	$-4,%r13
760	movq	%r9,%r14
761	shrdq	$2,%r9,%r15
762	shrq	$2,%r9
763	addq	%r13,%r15
764	adcq	%r14,%r9
765	addq	%r15,%r10
766	adcq	%r9,%r11
767	adcq	$0,%r12
768
769	subq	$16,%rcx
770L$open_sse_tail_64_rounds:
771	addq	$16,%r8
772	paddd	%xmm4,%xmm0
773	pxor	%xmm0,%xmm12
774	pshufb	L$rol16(%rip),%xmm12
775	paddd	%xmm12,%xmm8
776	pxor	%xmm8,%xmm4
777	movdqa	%xmm4,%xmm3
778	pslld	$12,%xmm3
779	psrld	$20,%xmm4
780	pxor	%xmm3,%xmm4
781	paddd	%xmm4,%xmm0
782	pxor	%xmm0,%xmm12
783	pshufb	L$rol8(%rip),%xmm12
784	paddd	%xmm12,%xmm8
785	pxor	%xmm8,%xmm4
786	movdqa	%xmm4,%xmm3
787	pslld	$7,%xmm3
788	psrld	$25,%xmm4
789	pxor	%xmm3,%xmm4
790.byte	102,15,58,15,228,4
791.byte	102,69,15,58,15,192,8
792.byte	102,69,15,58,15,228,12
793	paddd	%xmm4,%xmm0
794	pxor	%xmm0,%xmm12
795	pshufb	L$rol16(%rip),%xmm12
796	paddd	%xmm12,%xmm8
797	pxor	%xmm8,%xmm4
798	movdqa	%xmm4,%xmm3
799	pslld	$12,%xmm3
800	psrld	$20,%xmm4
801	pxor	%xmm3,%xmm4
802	paddd	%xmm4,%xmm0
803	pxor	%xmm0,%xmm12
804	pshufb	L$rol8(%rip),%xmm12
805	paddd	%xmm12,%xmm8
806	pxor	%xmm8,%xmm4
807	movdqa	%xmm4,%xmm3
808	pslld	$7,%xmm3
809	psrld	$25,%xmm4
810	pxor	%xmm3,%xmm4
811.byte	102,15,58,15,228,12
812.byte	102,69,15,58,15,192,8
813.byte	102,69,15,58,15,228,4
814
815	cmpq	$16,%rcx
816	jae	L$open_sse_tail_64_rounds_and_x1hash
817	cmpq	$160,%r8
818	jne	L$open_sse_tail_64_rounds
819	paddd	L$chacha20_consts(%rip),%xmm0
820	paddd	0+48(%rbp),%xmm4
821	paddd	0+64(%rbp),%xmm8
822	paddd	0+96(%rbp),%xmm12
823
824	jmp	L$open_sse_tail_64_dec_loop
825
826L$open_sse_tail_128:
827	movdqa	L$chacha20_consts(%rip),%xmm0
828	movdqa	0+48(%rbp),%xmm4
829	movdqa	0+64(%rbp),%xmm8
830	movdqa	%xmm0,%xmm1
831	movdqa	%xmm4,%xmm5
832	movdqa	%xmm8,%xmm9
833	movdqa	0+96(%rbp),%xmm13
834	paddd	L$sse_inc(%rip),%xmm13
835	movdqa	%xmm13,%xmm12
836	paddd	L$sse_inc(%rip),%xmm12
837	movdqa	%xmm12,0+96(%rbp)
838	movdqa	%xmm13,0+112(%rbp)
839
840	movq	%rbx,%rcx
841	andq	$-16,%rcx
842	xorq	%r8,%r8
843L$open_sse_tail_128_rounds_and_x1hash:
844	addq	0+0(%rsi,%r8,1),%r10
845	adcq	8+0(%rsi,%r8,1),%r11
846	adcq	$1,%r12
847	movq	0+0+0(%rbp),%rax
848	movq	%rax,%r15
849	mulq	%r10
850	movq	%rax,%r13
851	movq	%rdx,%r14
852	movq	0+0+0(%rbp),%rax
853	mulq	%r11
854	imulq	%r12,%r15
855	addq	%rax,%r14
856	adcq	%rdx,%r15
857	movq	8+0+0(%rbp),%rax
858	movq	%rax,%r9
859	mulq	%r10
860	addq	%rax,%r14
861	adcq	$0,%rdx
862	movq	%rdx,%r10
863	movq	8+0+0(%rbp),%rax
864	mulq	%r11
865	addq	%rax,%r15
866	adcq	$0,%rdx
867	imulq	%r12,%r9
868	addq	%r10,%r15
869	adcq	%rdx,%r9
870	movq	%r13,%r10
871	movq	%r14,%r11
872	movq	%r15,%r12
873	andq	$3,%r12
874	movq	%r15,%r13
875	andq	$-4,%r13
876	movq	%r9,%r14
877	shrdq	$2,%r9,%r15
878	shrq	$2,%r9
879	addq	%r13,%r15
880	adcq	%r14,%r9
881	addq	%r15,%r10
882	adcq	%r9,%r11
883	adcq	$0,%r12
884
885L$open_sse_tail_128_rounds:
886	addq	$16,%r8
887	paddd	%xmm4,%xmm0
888	pxor	%xmm0,%xmm12
889	pshufb	L$rol16(%rip),%xmm12
890	paddd	%xmm12,%xmm8
891	pxor	%xmm8,%xmm4
892	movdqa	%xmm4,%xmm3
893	pslld	$12,%xmm3
894	psrld	$20,%xmm4
895	pxor	%xmm3,%xmm4
896	paddd	%xmm4,%xmm0
897	pxor	%xmm0,%xmm12
898	pshufb	L$rol8(%rip),%xmm12
899	paddd	%xmm12,%xmm8
900	pxor	%xmm8,%xmm4
901	movdqa	%xmm4,%xmm3
902	pslld	$7,%xmm3
903	psrld	$25,%xmm4
904	pxor	%xmm3,%xmm4
905.byte	102,15,58,15,228,4
906.byte	102,69,15,58,15,192,8
907.byte	102,69,15,58,15,228,12
908	paddd	%xmm5,%xmm1
909	pxor	%xmm1,%xmm13
910	pshufb	L$rol16(%rip),%xmm13
911	paddd	%xmm13,%xmm9
912	pxor	%xmm9,%xmm5
913	movdqa	%xmm5,%xmm3
914	pslld	$12,%xmm3
915	psrld	$20,%xmm5
916	pxor	%xmm3,%xmm5
917	paddd	%xmm5,%xmm1
918	pxor	%xmm1,%xmm13
919	pshufb	L$rol8(%rip),%xmm13
920	paddd	%xmm13,%xmm9
921	pxor	%xmm9,%xmm5
922	movdqa	%xmm5,%xmm3
923	pslld	$7,%xmm3
924	psrld	$25,%xmm5
925	pxor	%xmm3,%xmm5
926.byte	102,15,58,15,237,4
927.byte	102,69,15,58,15,201,8
928.byte	102,69,15,58,15,237,12
929	paddd	%xmm4,%xmm0
930	pxor	%xmm0,%xmm12
931	pshufb	L$rol16(%rip),%xmm12
932	paddd	%xmm12,%xmm8
933	pxor	%xmm8,%xmm4
934	movdqa	%xmm4,%xmm3
935	pslld	$12,%xmm3
936	psrld	$20,%xmm4
937	pxor	%xmm3,%xmm4
938	paddd	%xmm4,%xmm0
939	pxor	%xmm0,%xmm12
940	pshufb	L$rol8(%rip),%xmm12
941	paddd	%xmm12,%xmm8
942	pxor	%xmm8,%xmm4
943	movdqa	%xmm4,%xmm3
944	pslld	$7,%xmm3
945	psrld	$25,%xmm4
946	pxor	%xmm3,%xmm4
947.byte	102,15,58,15,228,12
948.byte	102,69,15,58,15,192,8
949.byte	102,69,15,58,15,228,4
950	paddd	%xmm5,%xmm1
951	pxor	%xmm1,%xmm13
952	pshufb	L$rol16(%rip),%xmm13
953	paddd	%xmm13,%xmm9
954	pxor	%xmm9,%xmm5
955	movdqa	%xmm5,%xmm3
956	pslld	$12,%xmm3
957	psrld	$20,%xmm5
958	pxor	%xmm3,%xmm5
959	paddd	%xmm5,%xmm1
960	pxor	%xmm1,%xmm13
961	pshufb	L$rol8(%rip),%xmm13
962	paddd	%xmm13,%xmm9
963	pxor	%xmm9,%xmm5
964	movdqa	%xmm5,%xmm3
965	pslld	$7,%xmm3
966	psrld	$25,%xmm5
967	pxor	%xmm3,%xmm5
968.byte	102,15,58,15,237,12
969.byte	102,69,15,58,15,201,8
970.byte	102,69,15,58,15,237,4
971
972	cmpq	%rcx,%r8
973	jb	L$open_sse_tail_128_rounds_and_x1hash
974	cmpq	$160,%r8
975	jne	L$open_sse_tail_128_rounds
976	paddd	L$chacha20_consts(%rip),%xmm1
977	paddd	0+48(%rbp),%xmm5
978	paddd	0+64(%rbp),%xmm9
979	paddd	0+112(%rbp),%xmm13
980	paddd	L$chacha20_consts(%rip),%xmm0
981	paddd	0+48(%rbp),%xmm4
982	paddd	0+64(%rbp),%xmm8
983	paddd	0+96(%rbp),%xmm12
984	movdqu	0 + 0(%rsi),%xmm3
985	movdqu	16 + 0(%rsi),%xmm7
986	movdqu	32 + 0(%rsi),%xmm11
987	movdqu	48 + 0(%rsi),%xmm15
988	pxor	%xmm3,%xmm1
989	pxor	%xmm7,%xmm5
990	pxor	%xmm11,%xmm9
991	pxor	%xmm13,%xmm15
992	movdqu	%xmm1,0 + 0(%rdi)
993	movdqu	%xmm5,16 + 0(%rdi)
994	movdqu	%xmm9,32 + 0(%rdi)
995	movdqu	%xmm15,48 + 0(%rdi)
996
997	subq	$64,%rbx
998	leaq	64(%rsi),%rsi
999	leaq	64(%rdi),%rdi
1000	jmp	L$open_sse_tail_64_dec_loop
1001
1002L$open_sse_tail_192:
1003	movdqa	L$chacha20_consts(%rip),%xmm0
1004	movdqa	0+48(%rbp),%xmm4
1005	movdqa	0+64(%rbp),%xmm8
1006	movdqa	%xmm0,%xmm1
1007	movdqa	%xmm4,%xmm5
1008	movdqa	%xmm8,%xmm9
1009	movdqa	%xmm0,%xmm2
1010	movdqa	%xmm4,%xmm6
1011	movdqa	%xmm8,%xmm10
1012	movdqa	0+96(%rbp),%xmm14
1013	paddd	L$sse_inc(%rip),%xmm14
1014	movdqa	%xmm14,%xmm13
1015	paddd	L$sse_inc(%rip),%xmm13
1016	movdqa	%xmm13,%xmm12
1017	paddd	L$sse_inc(%rip),%xmm12
1018	movdqa	%xmm12,0+96(%rbp)
1019	movdqa	%xmm13,0+112(%rbp)
1020	movdqa	%xmm14,0+128(%rbp)
1021
1022	movq	%rbx,%rcx
1023	movq	$160,%r8
1024	cmpq	$160,%rcx
1025	cmovgq	%r8,%rcx
1026	andq	$-16,%rcx
1027	xorq	%r8,%r8
1028L$open_sse_tail_192_rounds_and_x1hash:
1029	addq	0+0(%rsi,%r8,1),%r10
1030	adcq	8+0(%rsi,%r8,1),%r11
1031	adcq	$1,%r12
1032	movq	0+0+0(%rbp),%rax
1033	movq	%rax,%r15
1034	mulq	%r10
1035	movq	%rax,%r13
1036	movq	%rdx,%r14
1037	movq	0+0+0(%rbp),%rax
1038	mulq	%r11
1039	imulq	%r12,%r15
1040	addq	%rax,%r14
1041	adcq	%rdx,%r15
1042	movq	8+0+0(%rbp),%rax
1043	movq	%rax,%r9
1044	mulq	%r10
1045	addq	%rax,%r14
1046	adcq	$0,%rdx
1047	movq	%rdx,%r10
1048	movq	8+0+0(%rbp),%rax
1049	mulq	%r11
1050	addq	%rax,%r15
1051	adcq	$0,%rdx
1052	imulq	%r12,%r9
1053	addq	%r10,%r15
1054	adcq	%rdx,%r9
1055	movq	%r13,%r10
1056	movq	%r14,%r11
1057	movq	%r15,%r12
1058	andq	$3,%r12
1059	movq	%r15,%r13
1060	andq	$-4,%r13
1061	movq	%r9,%r14
1062	shrdq	$2,%r9,%r15
1063	shrq	$2,%r9
1064	addq	%r13,%r15
1065	adcq	%r14,%r9
1066	addq	%r15,%r10
1067	adcq	%r9,%r11
1068	adcq	$0,%r12
1069
1070L$open_sse_tail_192_rounds:
1071	addq	$16,%r8
1072	paddd	%xmm4,%xmm0
1073	pxor	%xmm0,%xmm12
1074	pshufb	L$rol16(%rip),%xmm12
1075	paddd	%xmm12,%xmm8
1076	pxor	%xmm8,%xmm4
1077	movdqa	%xmm4,%xmm3
1078	pslld	$12,%xmm3
1079	psrld	$20,%xmm4
1080	pxor	%xmm3,%xmm4
1081	paddd	%xmm4,%xmm0
1082	pxor	%xmm0,%xmm12
1083	pshufb	L$rol8(%rip),%xmm12
1084	paddd	%xmm12,%xmm8
1085	pxor	%xmm8,%xmm4
1086	movdqa	%xmm4,%xmm3
1087	pslld	$7,%xmm3
1088	psrld	$25,%xmm4
1089	pxor	%xmm3,%xmm4
1090.byte	102,15,58,15,228,4
1091.byte	102,69,15,58,15,192,8
1092.byte	102,69,15,58,15,228,12
1093	paddd	%xmm5,%xmm1
1094	pxor	%xmm1,%xmm13
1095	pshufb	L$rol16(%rip),%xmm13
1096	paddd	%xmm13,%xmm9
1097	pxor	%xmm9,%xmm5
1098	movdqa	%xmm5,%xmm3
1099	pslld	$12,%xmm3
1100	psrld	$20,%xmm5
1101	pxor	%xmm3,%xmm5
1102	paddd	%xmm5,%xmm1
1103	pxor	%xmm1,%xmm13
1104	pshufb	L$rol8(%rip),%xmm13
1105	paddd	%xmm13,%xmm9
1106	pxor	%xmm9,%xmm5
1107	movdqa	%xmm5,%xmm3
1108	pslld	$7,%xmm3
1109	psrld	$25,%xmm5
1110	pxor	%xmm3,%xmm5
1111.byte	102,15,58,15,237,4
1112.byte	102,69,15,58,15,201,8
1113.byte	102,69,15,58,15,237,12
1114	paddd	%xmm6,%xmm2
1115	pxor	%xmm2,%xmm14
1116	pshufb	L$rol16(%rip),%xmm14
1117	paddd	%xmm14,%xmm10
1118	pxor	%xmm10,%xmm6
1119	movdqa	%xmm6,%xmm3
1120	pslld	$12,%xmm3
1121	psrld	$20,%xmm6
1122	pxor	%xmm3,%xmm6
1123	paddd	%xmm6,%xmm2
1124	pxor	%xmm2,%xmm14
1125	pshufb	L$rol8(%rip),%xmm14
1126	paddd	%xmm14,%xmm10
1127	pxor	%xmm10,%xmm6
1128	movdqa	%xmm6,%xmm3
1129	pslld	$7,%xmm3
1130	psrld	$25,%xmm6
1131	pxor	%xmm3,%xmm6
1132.byte	102,15,58,15,246,4
1133.byte	102,69,15,58,15,210,8
1134.byte	102,69,15,58,15,246,12
1135	paddd	%xmm4,%xmm0
1136	pxor	%xmm0,%xmm12
1137	pshufb	L$rol16(%rip),%xmm12
1138	paddd	%xmm12,%xmm8
1139	pxor	%xmm8,%xmm4
1140	movdqa	%xmm4,%xmm3
1141	pslld	$12,%xmm3
1142	psrld	$20,%xmm4
1143	pxor	%xmm3,%xmm4
1144	paddd	%xmm4,%xmm0
1145	pxor	%xmm0,%xmm12
1146	pshufb	L$rol8(%rip),%xmm12
1147	paddd	%xmm12,%xmm8
1148	pxor	%xmm8,%xmm4
1149	movdqa	%xmm4,%xmm3
1150	pslld	$7,%xmm3
1151	psrld	$25,%xmm4
1152	pxor	%xmm3,%xmm4
1153.byte	102,15,58,15,228,12
1154.byte	102,69,15,58,15,192,8
1155.byte	102,69,15,58,15,228,4
1156	paddd	%xmm5,%xmm1
1157	pxor	%xmm1,%xmm13
1158	pshufb	L$rol16(%rip),%xmm13
1159	paddd	%xmm13,%xmm9
1160	pxor	%xmm9,%xmm5
1161	movdqa	%xmm5,%xmm3
1162	pslld	$12,%xmm3
1163	psrld	$20,%xmm5
1164	pxor	%xmm3,%xmm5
1165	paddd	%xmm5,%xmm1
1166	pxor	%xmm1,%xmm13
1167	pshufb	L$rol8(%rip),%xmm13
1168	paddd	%xmm13,%xmm9
1169	pxor	%xmm9,%xmm5
1170	movdqa	%xmm5,%xmm3
1171	pslld	$7,%xmm3
1172	psrld	$25,%xmm5
1173	pxor	%xmm3,%xmm5
1174.byte	102,15,58,15,237,12
1175.byte	102,69,15,58,15,201,8
1176.byte	102,69,15,58,15,237,4
1177	paddd	%xmm6,%xmm2
1178	pxor	%xmm2,%xmm14
1179	pshufb	L$rol16(%rip),%xmm14
1180	paddd	%xmm14,%xmm10
1181	pxor	%xmm10,%xmm6
1182	movdqa	%xmm6,%xmm3
1183	pslld	$12,%xmm3
1184	psrld	$20,%xmm6
1185	pxor	%xmm3,%xmm6
1186	paddd	%xmm6,%xmm2
1187	pxor	%xmm2,%xmm14
1188	pshufb	L$rol8(%rip),%xmm14
1189	paddd	%xmm14,%xmm10
1190	pxor	%xmm10,%xmm6
1191	movdqa	%xmm6,%xmm3
1192	pslld	$7,%xmm3
1193	psrld	$25,%xmm6
1194	pxor	%xmm3,%xmm6
1195.byte	102,15,58,15,246,12
1196.byte	102,69,15,58,15,210,8
1197.byte	102,69,15,58,15,246,4
1198
1199	cmpq	%rcx,%r8
1200	jb	L$open_sse_tail_192_rounds_and_x1hash
1201	cmpq	$160,%r8
1202	jne	L$open_sse_tail_192_rounds
1203	cmpq	$176,%rbx
1204	jb	L$open_sse_tail_192_finish
1205	addq	0+160(%rsi),%r10
1206	adcq	8+160(%rsi),%r11
1207	adcq	$1,%r12
1208	movq	0+0+0(%rbp),%rax
1209	movq	%rax,%r15
1210	mulq	%r10
1211	movq	%rax,%r13
1212	movq	%rdx,%r14
1213	movq	0+0+0(%rbp),%rax
1214	mulq	%r11
1215	imulq	%r12,%r15
1216	addq	%rax,%r14
1217	adcq	%rdx,%r15
1218	movq	8+0+0(%rbp),%rax
1219	movq	%rax,%r9
1220	mulq	%r10
1221	addq	%rax,%r14
1222	adcq	$0,%rdx
1223	movq	%rdx,%r10
1224	movq	8+0+0(%rbp),%rax
1225	mulq	%r11
1226	addq	%rax,%r15
1227	adcq	$0,%rdx
1228	imulq	%r12,%r9
1229	addq	%r10,%r15
1230	adcq	%rdx,%r9
1231	movq	%r13,%r10
1232	movq	%r14,%r11
1233	movq	%r15,%r12
1234	andq	$3,%r12
1235	movq	%r15,%r13
1236	andq	$-4,%r13
1237	movq	%r9,%r14
1238	shrdq	$2,%r9,%r15
1239	shrq	$2,%r9
1240	addq	%r13,%r15
1241	adcq	%r14,%r9
1242	addq	%r15,%r10
1243	adcq	%r9,%r11
1244	adcq	$0,%r12
1245
1246	cmpq	$192,%rbx
1247	jb	L$open_sse_tail_192_finish
1248	addq	0+176(%rsi),%r10
1249	adcq	8+176(%rsi),%r11
1250	adcq	$1,%r12
1251	movq	0+0+0(%rbp),%rax
1252	movq	%rax,%r15
1253	mulq	%r10
1254	movq	%rax,%r13
1255	movq	%rdx,%r14
1256	movq	0+0+0(%rbp),%rax
1257	mulq	%r11
1258	imulq	%r12,%r15
1259	addq	%rax,%r14
1260	adcq	%rdx,%r15
1261	movq	8+0+0(%rbp),%rax
1262	movq	%rax,%r9
1263	mulq	%r10
1264	addq	%rax,%r14
1265	adcq	$0,%rdx
1266	movq	%rdx,%r10
1267	movq	8+0+0(%rbp),%rax
1268	mulq	%r11
1269	addq	%rax,%r15
1270	adcq	$0,%rdx
1271	imulq	%r12,%r9
1272	addq	%r10,%r15
1273	adcq	%rdx,%r9
1274	movq	%r13,%r10
1275	movq	%r14,%r11
1276	movq	%r15,%r12
1277	andq	$3,%r12
1278	movq	%r15,%r13
1279	andq	$-4,%r13
1280	movq	%r9,%r14
1281	shrdq	$2,%r9,%r15
1282	shrq	$2,%r9
1283	addq	%r13,%r15
1284	adcq	%r14,%r9
1285	addq	%r15,%r10
1286	adcq	%r9,%r11
1287	adcq	$0,%r12
1288
1289L$open_sse_tail_192_finish:
1290	paddd	L$chacha20_consts(%rip),%xmm2
1291	paddd	0+48(%rbp),%xmm6
1292	paddd	0+64(%rbp),%xmm10
1293	paddd	0+128(%rbp),%xmm14
1294	paddd	L$chacha20_consts(%rip),%xmm1
1295	paddd	0+48(%rbp),%xmm5
1296	paddd	0+64(%rbp),%xmm9
1297	paddd	0+112(%rbp),%xmm13
1298	paddd	L$chacha20_consts(%rip),%xmm0
1299	paddd	0+48(%rbp),%xmm4
1300	paddd	0+64(%rbp),%xmm8
1301	paddd	0+96(%rbp),%xmm12
1302	movdqu	0 + 0(%rsi),%xmm3
1303	movdqu	16 + 0(%rsi),%xmm7
1304	movdqu	32 + 0(%rsi),%xmm11
1305	movdqu	48 + 0(%rsi),%xmm15
1306	pxor	%xmm3,%xmm2
1307	pxor	%xmm7,%xmm6
1308	pxor	%xmm11,%xmm10
1309	pxor	%xmm14,%xmm15
1310	movdqu	%xmm2,0 + 0(%rdi)
1311	movdqu	%xmm6,16 + 0(%rdi)
1312	movdqu	%xmm10,32 + 0(%rdi)
1313	movdqu	%xmm15,48 + 0(%rdi)
1314	movdqu	0 + 64(%rsi),%xmm3
1315	movdqu	16 + 64(%rsi),%xmm7
1316	movdqu	32 + 64(%rsi),%xmm11
1317	movdqu	48 + 64(%rsi),%xmm15
1318	pxor	%xmm3,%xmm1
1319	pxor	%xmm7,%xmm5
1320	pxor	%xmm11,%xmm9
1321	pxor	%xmm13,%xmm15
1322	movdqu	%xmm1,0 + 64(%rdi)
1323	movdqu	%xmm5,16 + 64(%rdi)
1324	movdqu	%xmm9,32 + 64(%rdi)
1325	movdqu	%xmm15,48 + 64(%rdi)
1326
1327	subq	$128,%rbx
1328	leaq	128(%rsi),%rsi
1329	leaq	128(%rdi),%rdi
1330	jmp	L$open_sse_tail_64_dec_loop
1331
1332L$open_sse_tail_256:
1333	movdqa	L$chacha20_consts(%rip),%xmm0
1334	movdqa	0+48(%rbp),%xmm4
1335	movdqa	0+64(%rbp),%xmm8
1336	movdqa	%xmm0,%xmm1
1337	movdqa	%xmm4,%xmm5
1338	movdqa	%xmm8,%xmm9
1339	movdqa	%xmm0,%xmm2
1340	movdqa	%xmm4,%xmm6
1341	movdqa	%xmm8,%xmm10
1342	movdqa	%xmm0,%xmm3
1343	movdqa	%xmm4,%xmm7
1344	movdqa	%xmm8,%xmm11
1345	movdqa	0+96(%rbp),%xmm15
1346	paddd	L$sse_inc(%rip),%xmm15
1347	movdqa	%xmm15,%xmm14
1348	paddd	L$sse_inc(%rip),%xmm14
1349	movdqa	%xmm14,%xmm13
1350	paddd	L$sse_inc(%rip),%xmm13
1351	movdqa	%xmm13,%xmm12
1352	paddd	L$sse_inc(%rip),%xmm12
1353	movdqa	%xmm12,0+96(%rbp)
1354	movdqa	%xmm13,0+112(%rbp)
1355	movdqa	%xmm14,0+128(%rbp)
1356	movdqa	%xmm15,0+144(%rbp)
1357
1358	xorq	%r8,%r8
1359L$open_sse_tail_256_rounds_and_x1hash:
1360	addq	0+0(%rsi,%r8,1),%r10
1361	adcq	8+0(%rsi,%r8,1),%r11
1362	adcq	$1,%r12
1363	movdqa	%xmm11,0+80(%rbp)
1364	paddd	%xmm4,%xmm0
1365	pxor	%xmm0,%xmm12
1366	pshufb	L$rol16(%rip),%xmm12
1367	paddd	%xmm12,%xmm8
1368	pxor	%xmm8,%xmm4
1369	movdqa	%xmm4,%xmm11
1370	pslld	$12,%xmm11
1371	psrld	$20,%xmm4
1372	pxor	%xmm11,%xmm4
1373	paddd	%xmm4,%xmm0
1374	pxor	%xmm0,%xmm12
1375	pshufb	L$rol8(%rip),%xmm12
1376	paddd	%xmm12,%xmm8
1377	pxor	%xmm8,%xmm4
1378	movdqa	%xmm4,%xmm11
1379	pslld	$7,%xmm11
1380	psrld	$25,%xmm4
1381	pxor	%xmm11,%xmm4
1382.byte	102,15,58,15,228,4
1383.byte	102,69,15,58,15,192,8
1384.byte	102,69,15,58,15,228,12
1385	paddd	%xmm5,%xmm1
1386	pxor	%xmm1,%xmm13
1387	pshufb	L$rol16(%rip),%xmm13
1388	paddd	%xmm13,%xmm9
1389	pxor	%xmm9,%xmm5
1390	movdqa	%xmm5,%xmm11
1391	pslld	$12,%xmm11
1392	psrld	$20,%xmm5
1393	pxor	%xmm11,%xmm5
1394	paddd	%xmm5,%xmm1
1395	pxor	%xmm1,%xmm13
1396	pshufb	L$rol8(%rip),%xmm13
1397	paddd	%xmm13,%xmm9
1398	pxor	%xmm9,%xmm5
1399	movdqa	%xmm5,%xmm11
1400	pslld	$7,%xmm11
1401	psrld	$25,%xmm5
1402	pxor	%xmm11,%xmm5
1403.byte	102,15,58,15,237,4
1404.byte	102,69,15,58,15,201,8
1405.byte	102,69,15,58,15,237,12
1406	paddd	%xmm6,%xmm2
1407	pxor	%xmm2,%xmm14
1408	pshufb	L$rol16(%rip),%xmm14
1409	paddd	%xmm14,%xmm10
1410	pxor	%xmm10,%xmm6
1411	movdqa	%xmm6,%xmm11
1412	pslld	$12,%xmm11
1413	psrld	$20,%xmm6
1414	pxor	%xmm11,%xmm6
1415	paddd	%xmm6,%xmm2
1416	pxor	%xmm2,%xmm14
1417	pshufb	L$rol8(%rip),%xmm14
1418	paddd	%xmm14,%xmm10
1419	pxor	%xmm10,%xmm6
1420	movdqa	%xmm6,%xmm11
1421	pslld	$7,%xmm11
1422	psrld	$25,%xmm6
1423	pxor	%xmm11,%xmm6
1424.byte	102,15,58,15,246,4
1425.byte	102,69,15,58,15,210,8
1426.byte	102,69,15,58,15,246,12
1427	movdqa	0+80(%rbp),%xmm11
1428	movq	0+0+0(%rbp),%rax
1429	movq	%rax,%r15
1430	mulq	%r10
1431	movq	%rax,%r13
1432	movq	%rdx,%r14
1433	movq	0+0+0(%rbp),%rax
1434	mulq	%r11
1435	imulq	%r12,%r15
1436	addq	%rax,%r14
1437	adcq	%rdx,%r15
1438	movdqa	%xmm9,0+80(%rbp)
1439	paddd	%xmm7,%xmm3
1440	pxor	%xmm3,%xmm15
1441	pshufb	L$rol16(%rip),%xmm15
1442	paddd	%xmm15,%xmm11
1443	pxor	%xmm11,%xmm7
1444	movdqa	%xmm7,%xmm9
1445	pslld	$12,%xmm9
1446	psrld	$20,%xmm7
1447	pxor	%xmm9,%xmm7
1448	paddd	%xmm7,%xmm3
1449	pxor	%xmm3,%xmm15
1450	pshufb	L$rol8(%rip),%xmm15
1451	paddd	%xmm15,%xmm11
1452	pxor	%xmm11,%xmm7
1453	movdqa	%xmm7,%xmm9
1454	pslld	$7,%xmm9
1455	psrld	$25,%xmm7
1456	pxor	%xmm9,%xmm7
1457.byte	102,15,58,15,255,4
1458.byte	102,69,15,58,15,219,8
1459.byte	102,69,15,58,15,255,12
1460	movdqa	0+80(%rbp),%xmm9
1461	movq	8+0+0(%rbp),%rax
1462	movq	%rax,%r9
1463	mulq	%r10
1464	addq	%rax,%r14
1465	adcq	$0,%rdx
1466	movq	%rdx,%r10
1467	movq	8+0+0(%rbp),%rax
1468	mulq	%r11
1469	addq	%rax,%r15
1470	adcq	$0,%rdx
1471	movdqa	%xmm11,0+80(%rbp)
1472	paddd	%xmm4,%xmm0
1473	pxor	%xmm0,%xmm12
1474	pshufb	L$rol16(%rip),%xmm12
1475	paddd	%xmm12,%xmm8
1476	pxor	%xmm8,%xmm4
1477	movdqa	%xmm4,%xmm11
1478	pslld	$12,%xmm11
1479	psrld	$20,%xmm4
1480	pxor	%xmm11,%xmm4
1481	paddd	%xmm4,%xmm0
1482	pxor	%xmm0,%xmm12
1483	pshufb	L$rol8(%rip),%xmm12
1484	paddd	%xmm12,%xmm8
1485	pxor	%xmm8,%xmm4
1486	movdqa	%xmm4,%xmm11
1487	pslld	$7,%xmm11
1488	psrld	$25,%xmm4
1489	pxor	%xmm11,%xmm4
1490.byte	102,15,58,15,228,12
1491.byte	102,69,15,58,15,192,8
1492.byte	102,69,15,58,15,228,4
1493	paddd	%xmm5,%xmm1
1494	pxor	%xmm1,%xmm13
1495	pshufb	L$rol16(%rip),%xmm13
1496	paddd	%xmm13,%xmm9
1497	pxor	%xmm9,%xmm5
1498	movdqa	%xmm5,%xmm11
1499	pslld	$12,%xmm11
1500	psrld	$20,%xmm5
1501	pxor	%xmm11,%xmm5
1502	paddd	%xmm5,%xmm1
1503	pxor	%xmm1,%xmm13
1504	pshufb	L$rol8(%rip),%xmm13
1505	paddd	%xmm13,%xmm9
1506	pxor	%xmm9,%xmm5
1507	movdqa	%xmm5,%xmm11
1508	pslld	$7,%xmm11
1509	psrld	$25,%xmm5
1510	pxor	%xmm11,%xmm5
1511.byte	102,15,58,15,237,12
1512.byte	102,69,15,58,15,201,8
1513.byte	102,69,15,58,15,237,4
1514	imulq	%r12,%r9
1515	addq	%r10,%r15
1516	adcq	%rdx,%r9
1517	paddd	%xmm6,%xmm2
1518	pxor	%xmm2,%xmm14
1519	pshufb	L$rol16(%rip),%xmm14
1520	paddd	%xmm14,%xmm10
1521	pxor	%xmm10,%xmm6
1522	movdqa	%xmm6,%xmm11
1523	pslld	$12,%xmm11
1524	psrld	$20,%xmm6
1525	pxor	%xmm11,%xmm6
1526	paddd	%xmm6,%xmm2
1527	pxor	%xmm2,%xmm14
1528	pshufb	L$rol8(%rip),%xmm14
1529	paddd	%xmm14,%xmm10
1530	pxor	%xmm10,%xmm6
1531	movdqa	%xmm6,%xmm11
1532	pslld	$7,%xmm11
1533	psrld	$25,%xmm6
1534	pxor	%xmm11,%xmm6
1535.byte	102,15,58,15,246,12
1536.byte	102,69,15,58,15,210,8
1537.byte	102,69,15,58,15,246,4
1538	movdqa	0+80(%rbp),%xmm11
1539	movq	%r13,%r10
1540	movq	%r14,%r11
1541	movq	%r15,%r12
1542	andq	$3,%r12
1543	movq	%r15,%r13
1544	andq	$-4,%r13
1545	movq	%r9,%r14
1546	shrdq	$2,%r9,%r15
1547	shrq	$2,%r9
1548	addq	%r13,%r15
1549	adcq	%r14,%r9
1550	addq	%r15,%r10
1551	adcq	%r9,%r11
1552	adcq	$0,%r12
1553	movdqa	%xmm9,0+80(%rbp)
1554	paddd	%xmm7,%xmm3
1555	pxor	%xmm3,%xmm15
1556	pshufb	L$rol16(%rip),%xmm15
1557	paddd	%xmm15,%xmm11
1558	pxor	%xmm11,%xmm7
1559	movdqa	%xmm7,%xmm9
1560	pslld	$12,%xmm9
1561	psrld	$20,%xmm7
1562	pxor	%xmm9,%xmm7
1563	paddd	%xmm7,%xmm3
1564	pxor	%xmm3,%xmm15
1565	pshufb	L$rol8(%rip),%xmm15
1566	paddd	%xmm15,%xmm11
1567	pxor	%xmm11,%xmm7
1568	movdqa	%xmm7,%xmm9
1569	pslld	$7,%xmm9
1570	psrld	$25,%xmm7
1571	pxor	%xmm9,%xmm7
1572.byte	102,15,58,15,255,12
1573.byte	102,69,15,58,15,219,8
1574.byte	102,69,15,58,15,255,4
1575	movdqa	0+80(%rbp),%xmm9
1576
1577	addq	$16,%r8
1578	cmpq	$160,%r8
1579	jb	L$open_sse_tail_256_rounds_and_x1hash
1580
1581	movq	%rbx,%rcx
1582	andq	$-16,%rcx
1583L$open_sse_tail_256_hash:
1584	addq	0+0(%rsi,%r8,1),%r10
1585	adcq	8+0(%rsi,%r8,1),%r11
1586	adcq	$1,%r12
1587	movq	0+0+0(%rbp),%rax
1588	movq	%rax,%r15
1589	mulq	%r10
1590	movq	%rax,%r13
1591	movq	%rdx,%r14
1592	movq	0+0+0(%rbp),%rax
1593	mulq	%r11
1594	imulq	%r12,%r15
1595	addq	%rax,%r14
1596	adcq	%rdx,%r15
1597	movq	8+0+0(%rbp),%rax
1598	movq	%rax,%r9
1599	mulq	%r10
1600	addq	%rax,%r14
1601	adcq	$0,%rdx
1602	movq	%rdx,%r10
1603	movq	8+0+0(%rbp),%rax
1604	mulq	%r11
1605	addq	%rax,%r15
1606	adcq	$0,%rdx
1607	imulq	%r12,%r9
1608	addq	%r10,%r15
1609	adcq	%rdx,%r9
1610	movq	%r13,%r10
1611	movq	%r14,%r11
1612	movq	%r15,%r12
1613	andq	$3,%r12
1614	movq	%r15,%r13
1615	andq	$-4,%r13
1616	movq	%r9,%r14
1617	shrdq	$2,%r9,%r15
1618	shrq	$2,%r9
1619	addq	%r13,%r15
1620	adcq	%r14,%r9
1621	addq	%r15,%r10
1622	adcq	%r9,%r11
1623	adcq	$0,%r12
1624
1625	addq	$16,%r8
1626	cmpq	%rcx,%r8
1627	jb	L$open_sse_tail_256_hash
1628	paddd	L$chacha20_consts(%rip),%xmm3
1629	paddd	0+48(%rbp),%xmm7
1630	paddd	0+64(%rbp),%xmm11
1631	paddd	0+144(%rbp),%xmm15
1632	paddd	L$chacha20_consts(%rip),%xmm2
1633	paddd	0+48(%rbp),%xmm6
1634	paddd	0+64(%rbp),%xmm10
1635	paddd	0+128(%rbp),%xmm14
1636	paddd	L$chacha20_consts(%rip),%xmm1
1637	paddd	0+48(%rbp),%xmm5
1638	paddd	0+64(%rbp),%xmm9
1639	paddd	0+112(%rbp),%xmm13
1640	paddd	L$chacha20_consts(%rip),%xmm0
1641	paddd	0+48(%rbp),%xmm4
1642	paddd	0+64(%rbp),%xmm8
1643	paddd	0+96(%rbp),%xmm12
1644	movdqa	%xmm12,0+80(%rbp)
1645	movdqu	0 + 0(%rsi),%xmm12
1646	pxor	%xmm3,%xmm12
1647	movdqu	%xmm12,0 + 0(%rdi)
1648	movdqu	16 + 0(%rsi),%xmm12
1649	pxor	%xmm7,%xmm12
1650	movdqu	%xmm12,16 + 0(%rdi)
1651	movdqu	32 + 0(%rsi),%xmm12
1652	pxor	%xmm11,%xmm12
1653	movdqu	%xmm12,32 + 0(%rdi)
1654	movdqu	48 + 0(%rsi),%xmm12
1655	pxor	%xmm15,%xmm12
1656	movdqu	%xmm12,48 + 0(%rdi)
1657	movdqu	0 + 64(%rsi),%xmm3
1658	movdqu	16 + 64(%rsi),%xmm7
1659	movdqu	32 + 64(%rsi),%xmm11
1660	movdqu	48 + 64(%rsi),%xmm15
1661	pxor	%xmm3,%xmm2
1662	pxor	%xmm7,%xmm6
1663	pxor	%xmm11,%xmm10
1664	pxor	%xmm14,%xmm15
1665	movdqu	%xmm2,0 + 64(%rdi)
1666	movdqu	%xmm6,16 + 64(%rdi)
1667	movdqu	%xmm10,32 + 64(%rdi)
1668	movdqu	%xmm15,48 + 64(%rdi)
1669	movdqu	0 + 128(%rsi),%xmm3
1670	movdqu	16 + 128(%rsi),%xmm7
1671	movdqu	32 + 128(%rsi),%xmm11
1672	movdqu	48 + 128(%rsi),%xmm15
1673	pxor	%xmm3,%xmm1
1674	pxor	%xmm7,%xmm5
1675	pxor	%xmm11,%xmm9
1676	pxor	%xmm13,%xmm15
1677	movdqu	%xmm1,0 + 128(%rdi)
1678	movdqu	%xmm5,16 + 128(%rdi)
1679	movdqu	%xmm9,32 + 128(%rdi)
1680	movdqu	%xmm15,48 + 128(%rdi)
1681
1682	movdqa	0+80(%rbp),%xmm12
1683	subq	$192,%rbx
1684	leaq	192(%rsi),%rsi
1685	leaq	192(%rdi),%rdi
1686
1687
1688L$open_sse_tail_64_dec_loop:
1689	cmpq	$16,%rbx
1690	jb	L$open_sse_tail_16_init
1691	subq	$16,%rbx
1692	movdqu	(%rsi),%xmm3
1693	pxor	%xmm3,%xmm0
1694	movdqu	%xmm0,(%rdi)
1695	leaq	16(%rsi),%rsi
1696	leaq	16(%rdi),%rdi
1697	movdqa	%xmm4,%xmm0
1698	movdqa	%xmm8,%xmm4
1699	movdqa	%xmm12,%xmm8
1700	jmp	L$open_sse_tail_64_dec_loop
1701L$open_sse_tail_16_init:
1702	movdqa	%xmm0,%xmm1
1703
1704
1705L$open_sse_tail_16:
1706	testq	%rbx,%rbx
1707	jz	L$open_sse_finalize
1708
1709
1710
1711	pxor	%xmm3,%xmm3
1712	leaq	-1(%rsi,%rbx,1),%rsi
1713	movq	%rbx,%r8
1714L$open_sse_tail_16_compose:
1715	pslldq	$1,%xmm3
1716	pinsrb	$0,(%rsi),%xmm3
1717	subq	$1,%rsi
1718	subq	$1,%r8
1719	jnz	L$open_sse_tail_16_compose
1720
1721.byte	102,73,15,126,221
1722	pextrq	$1,%xmm3,%r14
1723
1724	pxor	%xmm1,%xmm3
1725
1726
1727L$open_sse_tail_16_extract:
1728	pextrb	$0,%xmm3,(%rdi)
1729	psrldq	$1,%xmm3
1730	addq	$1,%rdi
1731	subq	$1,%rbx
1732	jne	L$open_sse_tail_16_extract
1733
1734	addq	%r13,%r10
1735	adcq	%r14,%r11
1736	adcq	$1,%r12
1737	movq	0+0+0(%rbp),%rax
1738	movq	%rax,%r15
1739	mulq	%r10
1740	movq	%rax,%r13
1741	movq	%rdx,%r14
1742	movq	0+0+0(%rbp),%rax
1743	mulq	%r11
1744	imulq	%r12,%r15
1745	addq	%rax,%r14
1746	adcq	%rdx,%r15
1747	movq	8+0+0(%rbp),%rax
1748	movq	%rax,%r9
1749	mulq	%r10
1750	addq	%rax,%r14
1751	adcq	$0,%rdx
1752	movq	%rdx,%r10
1753	movq	8+0+0(%rbp),%rax
1754	mulq	%r11
1755	addq	%rax,%r15
1756	adcq	$0,%rdx
1757	imulq	%r12,%r9
1758	addq	%r10,%r15
1759	adcq	%rdx,%r9
1760	movq	%r13,%r10
1761	movq	%r14,%r11
1762	movq	%r15,%r12
1763	andq	$3,%r12
1764	movq	%r15,%r13
1765	andq	$-4,%r13
1766	movq	%r9,%r14
1767	shrdq	$2,%r9,%r15
1768	shrq	$2,%r9
1769	addq	%r13,%r15
1770	adcq	%r14,%r9
1771	addq	%r15,%r10
1772	adcq	%r9,%r11
1773	adcq	$0,%r12
1774
1775
1776L$open_sse_finalize:
1777	addq	0+0+32(%rbp),%r10
1778	adcq	8+0+32(%rbp),%r11
1779	adcq	$1,%r12
1780	movq	0+0+0(%rbp),%rax
1781	movq	%rax,%r15
1782	mulq	%r10
1783	movq	%rax,%r13
1784	movq	%rdx,%r14
1785	movq	0+0+0(%rbp),%rax
1786	mulq	%r11
1787	imulq	%r12,%r15
1788	addq	%rax,%r14
1789	adcq	%rdx,%r15
1790	movq	8+0+0(%rbp),%rax
1791	movq	%rax,%r9
1792	mulq	%r10
1793	addq	%rax,%r14
1794	adcq	$0,%rdx
1795	movq	%rdx,%r10
1796	movq	8+0+0(%rbp),%rax
1797	mulq	%r11
1798	addq	%rax,%r15
1799	adcq	$0,%rdx
1800	imulq	%r12,%r9
1801	addq	%r10,%r15
1802	adcq	%rdx,%r9
1803	movq	%r13,%r10
1804	movq	%r14,%r11
1805	movq	%r15,%r12
1806	andq	$3,%r12
1807	movq	%r15,%r13
1808	andq	$-4,%r13
1809	movq	%r9,%r14
1810	shrdq	$2,%r9,%r15
1811	shrq	$2,%r9
1812	addq	%r13,%r15
1813	adcq	%r14,%r9
1814	addq	%r15,%r10
1815	adcq	%r9,%r11
1816	adcq	$0,%r12
1817
1818
1819	movq	%r10,%r13
1820	movq	%r11,%r14
1821	movq	%r12,%r15
1822	subq	$-5,%r10
1823	sbbq	$-1,%r11
1824	sbbq	$3,%r12
1825	cmovcq	%r13,%r10
1826	cmovcq	%r14,%r11
1827	cmovcq	%r15,%r12
1828
1829	addq	0+0+16(%rbp),%r10
1830	adcq	8+0+16(%rbp),%r11
1831
1832
1833	addq	$288 + 0 + 32,%rsp
1834
1835
1836	popq	%r9
1837
1838	movq	%r10,(%r9)
1839	movq	%r11,8(%r9)
1840	popq	%r15
1841
1842	popq	%r14
1843
1844	popq	%r13
1845
1846	popq	%r12
1847
1848	popq	%rbx
1849
1850	popq	%rbp
1851
1852	.byte	0xf3,0xc3
1853
1854L$open_sse_128:
1855
1856	movdqu	L$chacha20_consts(%rip),%xmm0
1857	movdqa	%xmm0,%xmm1
1858	movdqa	%xmm0,%xmm2
1859	movdqu	0(%r9),%xmm4
1860	movdqa	%xmm4,%xmm5
1861	movdqa	%xmm4,%xmm6
1862	movdqu	16(%r9),%xmm8
1863	movdqa	%xmm8,%xmm9
1864	movdqa	%xmm8,%xmm10
1865	movdqu	32(%r9),%xmm12
1866	movdqa	%xmm12,%xmm13
1867	paddd	L$sse_inc(%rip),%xmm13
1868	movdqa	%xmm13,%xmm14
1869	paddd	L$sse_inc(%rip),%xmm14
1870	movdqa	%xmm4,%xmm7
1871	movdqa	%xmm8,%xmm11
1872	movdqa	%xmm13,%xmm15
1873	movq	$10,%r10
1874
1875L$open_sse_128_rounds:
1876	paddd	%xmm4,%xmm0
1877	pxor	%xmm0,%xmm12
1878	pshufb	L$rol16(%rip),%xmm12
1879	paddd	%xmm12,%xmm8
1880	pxor	%xmm8,%xmm4
1881	movdqa	%xmm4,%xmm3
1882	pslld	$12,%xmm3
1883	psrld	$20,%xmm4
1884	pxor	%xmm3,%xmm4
1885	paddd	%xmm4,%xmm0
1886	pxor	%xmm0,%xmm12
1887	pshufb	L$rol8(%rip),%xmm12
1888	paddd	%xmm12,%xmm8
1889	pxor	%xmm8,%xmm4
1890	movdqa	%xmm4,%xmm3
1891	pslld	$7,%xmm3
1892	psrld	$25,%xmm4
1893	pxor	%xmm3,%xmm4
1894.byte	102,15,58,15,228,4
1895.byte	102,69,15,58,15,192,8
1896.byte	102,69,15,58,15,228,12
1897	paddd	%xmm5,%xmm1
1898	pxor	%xmm1,%xmm13
1899	pshufb	L$rol16(%rip),%xmm13
1900	paddd	%xmm13,%xmm9
1901	pxor	%xmm9,%xmm5
1902	movdqa	%xmm5,%xmm3
1903	pslld	$12,%xmm3
1904	psrld	$20,%xmm5
1905	pxor	%xmm3,%xmm5
1906	paddd	%xmm5,%xmm1
1907	pxor	%xmm1,%xmm13
1908	pshufb	L$rol8(%rip),%xmm13
1909	paddd	%xmm13,%xmm9
1910	pxor	%xmm9,%xmm5
1911	movdqa	%xmm5,%xmm3
1912	pslld	$7,%xmm3
1913	psrld	$25,%xmm5
1914	pxor	%xmm3,%xmm5
1915.byte	102,15,58,15,237,4
1916.byte	102,69,15,58,15,201,8
1917.byte	102,69,15,58,15,237,12
1918	paddd	%xmm6,%xmm2
1919	pxor	%xmm2,%xmm14
1920	pshufb	L$rol16(%rip),%xmm14
1921	paddd	%xmm14,%xmm10
1922	pxor	%xmm10,%xmm6
1923	movdqa	%xmm6,%xmm3
1924	pslld	$12,%xmm3
1925	psrld	$20,%xmm6
1926	pxor	%xmm3,%xmm6
1927	paddd	%xmm6,%xmm2
1928	pxor	%xmm2,%xmm14
1929	pshufb	L$rol8(%rip),%xmm14
1930	paddd	%xmm14,%xmm10
1931	pxor	%xmm10,%xmm6
1932	movdqa	%xmm6,%xmm3
1933	pslld	$7,%xmm3
1934	psrld	$25,%xmm6
1935	pxor	%xmm3,%xmm6
1936.byte	102,15,58,15,246,4
1937.byte	102,69,15,58,15,210,8
1938.byte	102,69,15,58,15,246,12
1939	paddd	%xmm4,%xmm0
1940	pxor	%xmm0,%xmm12
1941	pshufb	L$rol16(%rip),%xmm12
1942	paddd	%xmm12,%xmm8
1943	pxor	%xmm8,%xmm4
1944	movdqa	%xmm4,%xmm3
1945	pslld	$12,%xmm3
1946	psrld	$20,%xmm4
1947	pxor	%xmm3,%xmm4
1948	paddd	%xmm4,%xmm0
1949	pxor	%xmm0,%xmm12
1950	pshufb	L$rol8(%rip),%xmm12
1951	paddd	%xmm12,%xmm8
1952	pxor	%xmm8,%xmm4
1953	movdqa	%xmm4,%xmm3
1954	pslld	$7,%xmm3
1955	psrld	$25,%xmm4
1956	pxor	%xmm3,%xmm4
1957.byte	102,15,58,15,228,12
1958.byte	102,69,15,58,15,192,8
1959.byte	102,69,15,58,15,228,4
1960	paddd	%xmm5,%xmm1
1961	pxor	%xmm1,%xmm13
1962	pshufb	L$rol16(%rip),%xmm13
1963	paddd	%xmm13,%xmm9
1964	pxor	%xmm9,%xmm5
1965	movdqa	%xmm5,%xmm3
1966	pslld	$12,%xmm3
1967	psrld	$20,%xmm5
1968	pxor	%xmm3,%xmm5
1969	paddd	%xmm5,%xmm1
1970	pxor	%xmm1,%xmm13
1971	pshufb	L$rol8(%rip),%xmm13
1972	paddd	%xmm13,%xmm9
1973	pxor	%xmm9,%xmm5
1974	movdqa	%xmm5,%xmm3
1975	pslld	$7,%xmm3
1976	psrld	$25,%xmm5
1977	pxor	%xmm3,%xmm5
1978.byte	102,15,58,15,237,12
1979.byte	102,69,15,58,15,201,8
1980.byte	102,69,15,58,15,237,4
1981	paddd	%xmm6,%xmm2
1982	pxor	%xmm2,%xmm14
1983	pshufb	L$rol16(%rip),%xmm14
1984	paddd	%xmm14,%xmm10
1985	pxor	%xmm10,%xmm6
1986	movdqa	%xmm6,%xmm3
1987	pslld	$12,%xmm3
1988	psrld	$20,%xmm6
1989	pxor	%xmm3,%xmm6
1990	paddd	%xmm6,%xmm2
1991	pxor	%xmm2,%xmm14
1992	pshufb	L$rol8(%rip),%xmm14
1993	paddd	%xmm14,%xmm10
1994	pxor	%xmm10,%xmm6
1995	movdqa	%xmm6,%xmm3
1996	pslld	$7,%xmm3
1997	psrld	$25,%xmm6
1998	pxor	%xmm3,%xmm6
1999.byte	102,15,58,15,246,12
2000.byte	102,69,15,58,15,210,8
2001.byte	102,69,15,58,15,246,4
2002
2003	decq	%r10
2004	jnz	L$open_sse_128_rounds
2005	paddd	L$chacha20_consts(%rip),%xmm0
2006	paddd	L$chacha20_consts(%rip),%xmm1
2007	paddd	L$chacha20_consts(%rip),%xmm2
2008	paddd	%xmm7,%xmm4
2009	paddd	%xmm7,%xmm5
2010	paddd	%xmm7,%xmm6
2011	paddd	%xmm11,%xmm9
2012	paddd	%xmm11,%xmm10
2013	paddd	%xmm15,%xmm13
2014	paddd	L$sse_inc(%rip),%xmm15
2015	paddd	%xmm15,%xmm14
2016
2017	pand	L$clamp(%rip),%xmm0
2018	movdqa	%xmm0,0+0(%rbp)
2019	movdqa	%xmm4,0+16(%rbp)
2020
2021	movq	%r8,%r8
2022	call	poly_hash_ad_internal
2023L$open_sse_128_xor_hash:
2024	cmpq	$16,%rbx
2025	jb	L$open_sse_tail_16
2026	subq	$16,%rbx
2027	addq	0+0(%rsi),%r10
2028	adcq	8+0(%rsi),%r11
2029	adcq	$1,%r12
2030
2031
2032	movdqu	0(%rsi),%xmm3
2033	pxor	%xmm3,%xmm1
2034	movdqu	%xmm1,0(%rdi)
2035	leaq	16(%rsi),%rsi
2036	leaq	16(%rdi),%rdi
2037	movq	0+0+0(%rbp),%rax
2038	movq	%rax,%r15
2039	mulq	%r10
2040	movq	%rax,%r13
2041	movq	%rdx,%r14
2042	movq	0+0+0(%rbp),%rax
2043	mulq	%r11
2044	imulq	%r12,%r15
2045	addq	%rax,%r14
2046	adcq	%rdx,%r15
2047	movq	8+0+0(%rbp),%rax
2048	movq	%rax,%r9
2049	mulq	%r10
2050	addq	%rax,%r14
2051	adcq	$0,%rdx
2052	movq	%rdx,%r10
2053	movq	8+0+0(%rbp),%rax
2054	mulq	%r11
2055	addq	%rax,%r15
2056	adcq	$0,%rdx
2057	imulq	%r12,%r9
2058	addq	%r10,%r15
2059	adcq	%rdx,%r9
2060	movq	%r13,%r10
2061	movq	%r14,%r11
2062	movq	%r15,%r12
2063	andq	$3,%r12
2064	movq	%r15,%r13
2065	andq	$-4,%r13
2066	movq	%r9,%r14
2067	shrdq	$2,%r9,%r15
2068	shrq	$2,%r9
2069	addq	%r13,%r15
2070	adcq	%r14,%r9
2071	addq	%r15,%r10
2072	adcq	%r9,%r11
2073	adcq	$0,%r12
2074
2075
2076	movdqa	%xmm5,%xmm1
2077	movdqa	%xmm9,%xmm5
2078	movdqa	%xmm13,%xmm9
2079	movdqa	%xmm2,%xmm13
2080	movdqa	%xmm6,%xmm2
2081	movdqa	%xmm10,%xmm6
2082	movdqa	%xmm14,%xmm10
2083	jmp	L$open_sse_128_xor_hash
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093.globl	_chacha20_poly1305_seal
2094.private_extern _chacha20_poly1305_seal
2095
2096.p2align	6
2097_chacha20_poly1305_seal:
2098
2099	pushq	%rbp
2100
2101	pushq	%rbx
2102
2103	pushq	%r12
2104
2105	pushq	%r13
2106
2107	pushq	%r14
2108
2109	pushq	%r15
2110
2111
2112
2113	pushq	%r9
2114
2115	subq	$288 + 0 + 32,%rsp
2116
2117	leaq	32(%rsp),%rbp
2118	andq	$-32,%rbp
2119
2120	movq	56(%r9),%rbx
2121	addq	%rdx,%rbx
2122	movq	%r8,0+0+32(%rbp)
2123	movq	%rbx,8+0+32(%rbp)
2124	movq	%rdx,%rbx
2125
2126	movl	_OPENSSL_ia32cap_P+8(%rip),%eax
2127	andl	$288,%eax
2128	xorl	$288,%eax
2129	jz	chacha20_poly1305_seal_avx2
2130
2131	cmpq	$128,%rbx
2132	jbe	L$seal_sse_128
2133
2134	movdqa	L$chacha20_consts(%rip),%xmm0
2135	movdqu	0(%r9),%xmm4
2136	movdqu	16(%r9),%xmm8
2137	movdqu	32(%r9),%xmm12
2138
2139	movdqa	%xmm0,%xmm1
2140	movdqa	%xmm0,%xmm2
2141	movdqa	%xmm0,%xmm3
2142	movdqa	%xmm4,%xmm5
2143	movdqa	%xmm4,%xmm6
2144	movdqa	%xmm4,%xmm7
2145	movdqa	%xmm8,%xmm9
2146	movdqa	%xmm8,%xmm10
2147	movdqa	%xmm8,%xmm11
2148	movdqa	%xmm12,%xmm15
2149	paddd	L$sse_inc(%rip),%xmm12
2150	movdqa	%xmm12,%xmm14
2151	paddd	L$sse_inc(%rip),%xmm12
2152	movdqa	%xmm12,%xmm13
2153	paddd	L$sse_inc(%rip),%xmm12
2154
2155	movdqa	%xmm4,0+48(%rbp)
2156	movdqa	%xmm8,0+64(%rbp)
2157	movdqa	%xmm12,0+96(%rbp)
2158	movdqa	%xmm13,0+112(%rbp)
2159	movdqa	%xmm14,0+128(%rbp)
2160	movdqa	%xmm15,0+144(%rbp)
2161	movq	$10,%r10
2162L$seal_sse_init_rounds:
2163	movdqa	%xmm8,0+80(%rbp)
2164	movdqa	L$rol16(%rip),%xmm8
2165	paddd	%xmm7,%xmm3
2166	paddd	%xmm6,%xmm2
2167	paddd	%xmm5,%xmm1
2168	paddd	%xmm4,%xmm0
2169	pxor	%xmm3,%xmm15
2170	pxor	%xmm2,%xmm14
2171	pxor	%xmm1,%xmm13
2172	pxor	%xmm0,%xmm12
2173.byte	102,69,15,56,0,248
2174.byte	102,69,15,56,0,240
2175.byte	102,69,15,56,0,232
2176.byte	102,69,15,56,0,224
2177	movdqa	0+80(%rbp),%xmm8
2178	paddd	%xmm15,%xmm11
2179	paddd	%xmm14,%xmm10
2180	paddd	%xmm13,%xmm9
2181	paddd	%xmm12,%xmm8
2182	pxor	%xmm11,%xmm7
2183	pxor	%xmm10,%xmm6
2184	pxor	%xmm9,%xmm5
2185	pxor	%xmm8,%xmm4
2186	movdqa	%xmm8,0+80(%rbp)
2187	movdqa	%xmm7,%xmm8
2188	psrld	$20,%xmm8
2189	pslld	$32-20,%xmm7
2190	pxor	%xmm8,%xmm7
2191	movdqa	%xmm6,%xmm8
2192	psrld	$20,%xmm8
2193	pslld	$32-20,%xmm6
2194	pxor	%xmm8,%xmm6
2195	movdqa	%xmm5,%xmm8
2196	psrld	$20,%xmm8
2197	pslld	$32-20,%xmm5
2198	pxor	%xmm8,%xmm5
2199	movdqa	%xmm4,%xmm8
2200	psrld	$20,%xmm8
2201	pslld	$32-20,%xmm4
2202	pxor	%xmm8,%xmm4
2203	movdqa	L$rol8(%rip),%xmm8
2204	paddd	%xmm7,%xmm3
2205	paddd	%xmm6,%xmm2
2206	paddd	%xmm5,%xmm1
2207	paddd	%xmm4,%xmm0
2208	pxor	%xmm3,%xmm15
2209	pxor	%xmm2,%xmm14
2210	pxor	%xmm1,%xmm13
2211	pxor	%xmm0,%xmm12
2212.byte	102,69,15,56,0,248
2213.byte	102,69,15,56,0,240
2214.byte	102,69,15,56,0,232
2215.byte	102,69,15,56,0,224
2216	movdqa	0+80(%rbp),%xmm8
2217	paddd	%xmm15,%xmm11
2218	paddd	%xmm14,%xmm10
2219	paddd	%xmm13,%xmm9
2220	paddd	%xmm12,%xmm8
2221	pxor	%xmm11,%xmm7
2222	pxor	%xmm10,%xmm6
2223	pxor	%xmm9,%xmm5
2224	pxor	%xmm8,%xmm4
2225	movdqa	%xmm8,0+80(%rbp)
2226	movdqa	%xmm7,%xmm8
2227	psrld	$25,%xmm8
2228	pslld	$32-25,%xmm7
2229	pxor	%xmm8,%xmm7
2230	movdqa	%xmm6,%xmm8
2231	psrld	$25,%xmm8
2232	pslld	$32-25,%xmm6
2233	pxor	%xmm8,%xmm6
2234	movdqa	%xmm5,%xmm8
2235	psrld	$25,%xmm8
2236	pslld	$32-25,%xmm5
2237	pxor	%xmm8,%xmm5
2238	movdqa	%xmm4,%xmm8
2239	psrld	$25,%xmm8
2240	pslld	$32-25,%xmm4
2241	pxor	%xmm8,%xmm4
2242	movdqa	0+80(%rbp),%xmm8
2243.byte	102,15,58,15,255,4
2244.byte	102,69,15,58,15,219,8
2245.byte	102,69,15,58,15,255,12
2246.byte	102,15,58,15,246,4
2247.byte	102,69,15,58,15,210,8
2248.byte	102,69,15,58,15,246,12
2249.byte	102,15,58,15,237,4
2250.byte	102,69,15,58,15,201,8
2251.byte	102,69,15,58,15,237,12
2252.byte	102,15,58,15,228,4
2253.byte	102,69,15,58,15,192,8
2254.byte	102,69,15,58,15,228,12
2255	movdqa	%xmm8,0+80(%rbp)
2256	movdqa	L$rol16(%rip),%xmm8
2257	paddd	%xmm7,%xmm3
2258	paddd	%xmm6,%xmm2
2259	paddd	%xmm5,%xmm1
2260	paddd	%xmm4,%xmm0
2261	pxor	%xmm3,%xmm15
2262	pxor	%xmm2,%xmm14
2263	pxor	%xmm1,%xmm13
2264	pxor	%xmm0,%xmm12
2265.byte	102,69,15,56,0,248
2266.byte	102,69,15,56,0,240
2267.byte	102,69,15,56,0,232
2268.byte	102,69,15,56,0,224
2269	movdqa	0+80(%rbp),%xmm8
2270	paddd	%xmm15,%xmm11
2271	paddd	%xmm14,%xmm10
2272	paddd	%xmm13,%xmm9
2273	paddd	%xmm12,%xmm8
2274	pxor	%xmm11,%xmm7
2275	pxor	%xmm10,%xmm6
2276	pxor	%xmm9,%xmm5
2277	pxor	%xmm8,%xmm4
2278	movdqa	%xmm8,0+80(%rbp)
2279	movdqa	%xmm7,%xmm8
2280	psrld	$20,%xmm8
2281	pslld	$32-20,%xmm7
2282	pxor	%xmm8,%xmm7
2283	movdqa	%xmm6,%xmm8
2284	psrld	$20,%xmm8
2285	pslld	$32-20,%xmm6
2286	pxor	%xmm8,%xmm6
2287	movdqa	%xmm5,%xmm8
2288	psrld	$20,%xmm8
2289	pslld	$32-20,%xmm5
2290	pxor	%xmm8,%xmm5
2291	movdqa	%xmm4,%xmm8
2292	psrld	$20,%xmm8
2293	pslld	$32-20,%xmm4
2294	pxor	%xmm8,%xmm4
2295	movdqa	L$rol8(%rip),%xmm8
2296	paddd	%xmm7,%xmm3
2297	paddd	%xmm6,%xmm2
2298	paddd	%xmm5,%xmm1
2299	paddd	%xmm4,%xmm0
2300	pxor	%xmm3,%xmm15
2301	pxor	%xmm2,%xmm14
2302	pxor	%xmm1,%xmm13
2303	pxor	%xmm0,%xmm12
2304.byte	102,69,15,56,0,248
2305.byte	102,69,15,56,0,240
2306.byte	102,69,15,56,0,232
2307.byte	102,69,15,56,0,224
2308	movdqa	0+80(%rbp),%xmm8
2309	paddd	%xmm15,%xmm11
2310	paddd	%xmm14,%xmm10
2311	paddd	%xmm13,%xmm9
2312	paddd	%xmm12,%xmm8
2313	pxor	%xmm11,%xmm7
2314	pxor	%xmm10,%xmm6
2315	pxor	%xmm9,%xmm5
2316	pxor	%xmm8,%xmm4
2317	movdqa	%xmm8,0+80(%rbp)
2318	movdqa	%xmm7,%xmm8
2319	psrld	$25,%xmm8
2320	pslld	$32-25,%xmm7
2321	pxor	%xmm8,%xmm7
2322	movdqa	%xmm6,%xmm8
2323	psrld	$25,%xmm8
2324	pslld	$32-25,%xmm6
2325	pxor	%xmm8,%xmm6
2326	movdqa	%xmm5,%xmm8
2327	psrld	$25,%xmm8
2328	pslld	$32-25,%xmm5
2329	pxor	%xmm8,%xmm5
2330	movdqa	%xmm4,%xmm8
2331	psrld	$25,%xmm8
2332	pslld	$32-25,%xmm4
2333	pxor	%xmm8,%xmm4
2334	movdqa	0+80(%rbp),%xmm8
2335.byte	102,15,58,15,255,12
2336.byte	102,69,15,58,15,219,8
2337.byte	102,69,15,58,15,255,4
2338.byte	102,15,58,15,246,12
2339.byte	102,69,15,58,15,210,8
2340.byte	102,69,15,58,15,246,4
2341.byte	102,15,58,15,237,12
2342.byte	102,69,15,58,15,201,8
2343.byte	102,69,15,58,15,237,4
2344.byte	102,15,58,15,228,12
2345.byte	102,69,15,58,15,192,8
2346.byte	102,69,15,58,15,228,4
2347
2348	decq	%r10
2349	jnz	L$seal_sse_init_rounds
2350	paddd	L$chacha20_consts(%rip),%xmm3
2351	paddd	0+48(%rbp),%xmm7
2352	paddd	0+64(%rbp),%xmm11
2353	paddd	0+144(%rbp),%xmm15
2354	paddd	L$chacha20_consts(%rip),%xmm2
2355	paddd	0+48(%rbp),%xmm6
2356	paddd	0+64(%rbp),%xmm10
2357	paddd	0+128(%rbp),%xmm14
2358	paddd	L$chacha20_consts(%rip),%xmm1
2359	paddd	0+48(%rbp),%xmm5
2360	paddd	0+64(%rbp),%xmm9
2361	paddd	0+112(%rbp),%xmm13
2362	paddd	L$chacha20_consts(%rip),%xmm0
2363	paddd	0+48(%rbp),%xmm4
2364	paddd	0+64(%rbp),%xmm8
2365	paddd	0+96(%rbp),%xmm12
2366
2367
2368	pand	L$clamp(%rip),%xmm3
2369	movdqa	%xmm3,0+0(%rbp)
2370	movdqa	%xmm7,0+16(%rbp)
2371
2372	movq	%r8,%r8
2373	call	poly_hash_ad_internal
2374	movdqu	0 + 0(%rsi),%xmm3
2375	movdqu	16 + 0(%rsi),%xmm7
2376	movdqu	32 + 0(%rsi),%xmm11
2377	movdqu	48 + 0(%rsi),%xmm15
2378	pxor	%xmm3,%xmm2
2379	pxor	%xmm7,%xmm6
2380	pxor	%xmm11,%xmm10
2381	pxor	%xmm14,%xmm15
2382	movdqu	%xmm2,0 + 0(%rdi)
2383	movdqu	%xmm6,16 + 0(%rdi)
2384	movdqu	%xmm10,32 + 0(%rdi)
2385	movdqu	%xmm15,48 + 0(%rdi)
2386	movdqu	0 + 64(%rsi),%xmm3
2387	movdqu	16 + 64(%rsi),%xmm7
2388	movdqu	32 + 64(%rsi),%xmm11
2389	movdqu	48 + 64(%rsi),%xmm15
2390	pxor	%xmm3,%xmm1
2391	pxor	%xmm7,%xmm5
2392	pxor	%xmm11,%xmm9
2393	pxor	%xmm13,%xmm15
2394	movdqu	%xmm1,0 + 64(%rdi)
2395	movdqu	%xmm5,16 + 64(%rdi)
2396	movdqu	%xmm9,32 + 64(%rdi)
2397	movdqu	%xmm15,48 + 64(%rdi)
2398
2399	cmpq	$192,%rbx
2400	ja	L$seal_sse_main_init
2401	movq	$128,%rcx
2402	subq	$128,%rbx
2403	leaq	128(%rsi),%rsi
2404	jmp	L$seal_sse_128_tail_hash
2405L$seal_sse_main_init:
2406	movdqu	0 + 128(%rsi),%xmm3
2407	movdqu	16 + 128(%rsi),%xmm7
2408	movdqu	32 + 128(%rsi),%xmm11
2409	movdqu	48 + 128(%rsi),%xmm15
2410	pxor	%xmm3,%xmm0
2411	pxor	%xmm7,%xmm4
2412	pxor	%xmm11,%xmm8
2413	pxor	%xmm12,%xmm15
2414	movdqu	%xmm0,0 + 128(%rdi)
2415	movdqu	%xmm4,16 + 128(%rdi)
2416	movdqu	%xmm8,32 + 128(%rdi)
2417	movdqu	%xmm15,48 + 128(%rdi)
2418
2419	movq	$192,%rcx
2420	subq	$192,%rbx
2421	leaq	192(%rsi),%rsi
2422	movq	$2,%rcx
2423	movq	$8,%r8
2424	cmpq	$64,%rbx
2425	jbe	L$seal_sse_tail_64
2426	cmpq	$128,%rbx
2427	jbe	L$seal_sse_tail_128
2428	cmpq	$192,%rbx
2429	jbe	L$seal_sse_tail_192
2430
2431L$seal_sse_main_loop:
2432	movdqa	L$chacha20_consts(%rip),%xmm0
2433	movdqa	0+48(%rbp),%xmm4
2434	movdqa	0+64(%rbp),%xmm8
2435	movdqa	%xmm0,%xmm1
2436	movdqa	%xmm4,%xmm5
2437	movdqa	%xmm8,%xmm9
2438	movdqa	%xmm0,%xmm2
2439	movdqa	%xmm4,%xmm6
2440	movdqa	%xmm8,%xmm10
2441	movdqa	%xmm0,%xmm3
2442	movdqa	%xmm4,%xmm7
2443	movdqa	%xmm8,%xmm11
2444	movdqa	0+96(%rbp),%xmm15
2445	paddd	L$sse_inc(%rip),%xmm15
2446	movdqa	%xmm15,%xmm14
2447	paddd	L$sse_inc(%rip),%xmm14
2448	movdqa	%xmm14,%xmm13
2449	paddd	L$sse_inc(%rip),%xmm13
2450	movdqa	%xmm13,%xmm12
2451	paddd	L$sse_inc(%rip),%xmm12
2452	movdqa	%xmm12,0+96(%rbp)
2453	movdqa	%xmm13,0+112(%rbp)
2454	movdqa	%xmm14,0+128(%rbp)
2455	movdqa	%xmm15,0+144(%rbp)
2456
2457.p2align	5
2458L$seal_sse_main_rounds:
2459	movdqa	%xmm8,0+80(%rbp)
2460	movdqa	L$rol16(%rip),%xmm8
2461	paddd	%xmm7,%xmm3
2462	paddd	%xmm6,%xmm2
2463	paddd	%xmm5,%xmm1
2464	paddd	%xmm4,%xmm0
2465	pxor	%xmm3,%xmm15
2466	pxor	%xmm2,%xmm14
2467	pxor	%xmm1,%xmm13
2468	pxor	%xmm0,%xmm12
2469.byte	102,69,15,56,0,248
2470.byte	102,69,15,56,0,240
2471.byte	102,69,15,56,0,232
2472.byte	102,69,15,56,0,224
2473	movdqa	0+80(%rbp),%xmm8
2474	paddd	%xmm15,%xmm11
2475	paddd	%xmm14,%xmm10
2476	paddd	%xmm13,%xmm9
2477	paddd	%xmm12,%xmm8
2478	pxor	%xmm11,%xmm7
2479	addq	0+0(%rdi),%r10
2480	adcq	8+0(%rdi),%r11
2481	adcq	$1,%r12
2482	pxor	%xmm10,%xmm6
2483	pxor	%xmm9,%xmm5
2484	pxor	%xmm8,%xmm4
2485	movdqa	%xmm8,0+80(%rbp)
2486	movdqa	%xmm7,%xmm8
2487	psrld	$20,%xmm8
2488	pslld	$32-20,%xmm7
2489	pxor	%xmm8,%xmm7
2490	movdqa	%xmm6,%xmm8
2491	psrld	$20,%xmm8
2492	pslld	$32-20,%xmm6
2493	pxor	%xmm8,%xmm6
2494	movdqa	%xmm5,%xmm8
2495	psrld	$20,%xmm8
2496	pslld	$32-20,%xmm5
2497	pxor	%xmm8,%xmm5
2498	movdqa	%xmm4,%xmm8
2499	psrld	$20,%xmm8
2500	pslld	$32-20,%xmm4
2501	pxor	%xmm8,%xmm4
2502	movq	0+0+0(%rbp),%rax
2503	movq	%rax,%r15
2504	mulq	%r10
2505	movq	%rax,%r13
2506	movq	%rdx,%r14
2507	movq	0+0+0(%rbp),%rax
2508	mulq	%r11
2509	imulq	%r12,%r15
2510	addq	%rax,%r14
2511	adcq	%rdx,%r15
2512	movdqa	L$rol8(%rip),%xmm8
2513	paddd	%xmm7,%xmm3
2514	paddd	%xmm6,%xmm2
2515	paddd	%xmm5,%xmm1
2516	paddd	%xmm4,%xmm0
2517	pxor	%xmm3,%xmm15
2518	pxor	%xmm2,%xmm14
2519	pxor	%xmm1,%xmm13
2520	pxor	%xmm0,%xmm12
2521.byte	102,69,15,56,0,248
2522.byte	102,69,15,56,0,240
2523.byte	102,69,15,56,0,232
2524.byte	102,69,15,56,0,224
2525	movdqa	0+80(%rbp),%xmm8
2526	paddd	%xmm15,%xmm11
2527	paddd	%xmm14,%xmm10
2528	paddd	%xmm13,%xmm9
2529	paddd	%xmm12,%xmm8
2530	pxor	%xmm11,%xmm7
2531	pxor	%xmm10,%xmm6
2532	movq	8+0+0(%rbp),%rax
2533	movq	%rax,%r9
2534	mulq	%r10
2535	addq	%rax,%r14
2536	adcq	$0,%rdx
2537	movq	%rdx,%r10
2538	movq	8+0+0(%rbp),%rax
2539	mulq	%r11
2540	addq	%rax,%r15
2541	adcq	$0,%rdx
2542	pxor	%xmm9,%xmm5
2543	pxor	%xmm8,%xmm4
2544	movdqa	%xmm8,0+80(%rbp)
2545	movdqa	%xmm7,%xmm8
2546	psrld	$25,%xmm8
2547	pslld	$32-25,%xmm7
2548	pxor	%xmm8,%xmm7
2549	movdqa	%xmm6,%xmm8
2550	psrld	$25,%xmm8
2551	pslld	$32-25,%xmm6
2552	pxor	%xmm8,%xmm6
2553	movdqa	%xmm5,%xmm8
2554	psrld	$25,%xmm8
2555	pslld	$32-25,%xmm5
2556	pxor	%xmm8,%xmm5
2557	movdqa	%xmm4,%xmm8
2558	psrld	$25,%xmm8
2559	pslld	$32-25,%xmm4
2560	pxor	%xmm8,%xmm4
2561	movdqa	0+80(%rbp),%xmm8
2562	imulq	%r12,%r9
2563	addq	%r10,%r15
2564	adcq	%rdx,%r9
2565.byte	102,15,58,15,255,4
2566.byte	102,69,15,58,15,219,8
2567.byte	102,69,15,58,15,255,12
2568.byte	102,15,58,15,246,4
2569.byte	102,69,15,58,15,210,8
2570.byte	102,69,15,58,15,246,12
2571.byte	102,15,58,15,237,4
2572.byte	102,69,15,58,15,201,8
2573.byte	102,69,15,58,15,237,12
2574.byte	102,15,58,15,228,4
2575.byte	102,69,15,58,15,192,8
2576.byte	102,69,15,58,15,228,12
2577	movdqa	%xmm8,0+80(%rbp)
2578	movdqa	L$rol16(%rip),%xmm8
2579	paddd	%xmm7,%xmm3
2580	paddd	%xmm6,%xmm2
2581	paddd	%xmm5,%xmm1
2582	paddd	%xmm4,%xmm0
2583	pxor	%xmm3,%xmm15
2584	pxor	%xmm2,%xmm14
2585	movq	%r13,%r10
2586	movq	%r14,%r11
2587	movq	%r15,%r12
2588	andq	$3,%r12
2589	movq	%r15,%r13
2590	andq	$-4,%r13
2591	movq	%r9,%r14
2592	shrdq	$2,%r9,%r15
2593	shrq	$2,%r9
2594	addq	%r13,%r15
2595	adcq	%r14,%r9
2596	addq	%r15,%r10
2597	adcq	%r9,%r11
2598	adcq	$0,%r12
2599	pxor	%xmm1,%xmm13
2600	pxor	%xmm0,%xmm12
2601.byte	102,69,15,56,0,248
2602.byte	102,69,15,56,0,240
2603.byte	102,69,15,56,0,232
2604.byte	102,69,15,56,0,224
2605	movdqa	0+80(%rbp),%xmm8
2606	paddd	%xmm15,%xmm11
2607	paddd	%xmm14,%xmm10
2608	paddd	%xmm13,%xmm9
2609	paddd	%xmm12,%xmm8
2610	pxor	%xmm11,%xmm7
2611	pxor	%xmm10,%xmm6
2612	pxor	%xmm9,%xmm5
2613	pxor	%xmm8,%xmm4
2614	movdqa	%xmm8,0+80(%rbp)
2615	movdqa	%xmm7,%xmm8
2616	psrld	$20,%xmm8
2617	pslld	$32-20,%xmm7
2618	pxor	%xmm8,%xmm7
2619	movdqa	%xmm6,%xmm8
2620	psrld	$20,%xmm8
2621	pslld	$32-20,%xmm6
2622	pxor	%xmm8,%xmm6
2623	movdqa	%xmm5,%xmm8
2624	psrld	$20,%xmm8
2625	pslld	$32-20,%xmm5
2626	pxor	%xmm8,%xmm5
2627	movdqa	%xmm4,%xmm8
2628	psrld	$20,%xmm8
2629	pslld	$32-20,%xmm4
2630	pxor	%xmm8,%xmm4
2631	movdqa	L$rol8(%rip),%xmm8
2632	paddd	%xmm7,%xmm3
2633	paddd	%xmm6,%xmm2
2634	paddd	%xmm5,%xmm1
2635	paddd	%xmm4,%xmm0
2636	pxor	%xmm3,%xmm15
2637	pxor	%xmm2,%xmm14
2638	pxor	%xmm1,%xmm13
2639	pxor	%xmm0,%xmm12
2640.byte	102,69,15,56,0,248
2641.byte	102,69,15,56,0,240
2642.byte	102,69,15,56,0,232
2643.byte	102,69,15,56,0,224
2644	movdqa	0+80(%rbp),%xmm8
2645	paddd	%xmm15,%xmm11
2646	paddd	%xmm14,%xmm10
2647	paddd	%xmm13,%xmm9
2648	paddd	%xmm12,%xmm8
2649	pxor	%xmm11,%xmm7
2650	pxor	%xmm10,%xmm6
2651	pxor	%xmm9,%xmm5
2652	pxor	%xmm8,%xmm4
2653	movdqa	%xmm8,0+80(%rbp)
2654	movdqa	%xmm7,%xmm8
2655	psrld	$25,%xmm8
2656	pslld	$32-25,%xmm7
2657	pxor	%xmm8,%xmm7
2658	movdqa	%xmm6,%xmm8
2659	psrld	$25,%xmm8
2660	pslld	$32-25,%xmm6
2661	pxor	%xmm8,%xmm6
2662	movdqa	%xmm5,%xmm8
2663	psrld	$25,%xmm8
2664	pslld	$32-25,%xmm5
2665	pxor	%xmm8,%xmm5
2666	movdqa	%xmm4,%xmm8
2667	psrld	$25,%xmm8
2668	pslld	$32-25,%xmm4
2669	pxor	%xmm8,%xmm4
2670	movdqa	0+80(%rbp),%xmm8
2671.byte	102,15,58,15,255,12
2672.byte	102,69,15,58,15,219,8
2673.byte	102,69,15,58,15,255,4
2674.byte	102,15,58,15,246,12
2675.byte	102,69,15,58,15,210,8
2676.byte	102,69,15,58,15,246,4
2677.byte	102,15,58,15,237,12
2678.byte	102,69,15,58,15,201,8
2679.byte	102,69,15,58,15,237,4
2680.byte	102,15,58,15,228,12
2681.byte	102,69,15,58,15,192,8
2682.byte	102,69,15,58,15,228,4
2683
2684	leaq	16(%rdi),%rdi
2685	decq	%r8
2686	jge	L$seal_sse_main_rounds
2687	addq	0+0(%rdi),%r10
2688	adcq	8+0(%rdi),%r11
2689	adcq	$1,%r12
2690	movq	0+0+0(%rbp),%rax
2691	movq	%rax,%r15
2692	mulq	%r10
2693	movq	%rax,%r13
2694	movq	%rdx,%r14
2695	movq	0+0+0(%rbp),%rax
2696	mulq	%r11
2697	imulq	%r12,%r15
2698	addq	%rax,%r14
2699	adcq	%rdx,%r15
2700	movq	8+0+0(%rbp),%rax
2701	movq	%rax,%r9
2702	mulq	%r10
2703	addq	%rax,%r14
2704	adcq	$0,%rdx
2705	movq	%rdx,%r10
2706	movq	8+0+0(%rbp),%rax
2707	mulq	%r11
2708	addq	%rax,%r15
2709	adcq	$0,%rdx
2710	imulq	%r12,%r9
2711	addq	%r10,%r15
2712	adcq	%rdx,%r9
2713	movq	%r13,%r10
2714	movq	%r14,%r11
2715	movq	%r15,%r12
2716	andq	$3,%r12
2717	movq	%r15,%r13
2718	andq	$-4,%r13
2719	movq	%r9,%r14
2720	shrdq	$2,%r9,%r15
2721	shrq	$2,%r9
2722	addq	%r13,%r15
2723	adcq	%r14,%r9
2724	addq	%r15,%r10
2725	adcq	%r9,%r11
2726	adcq	$0,%r12
2727
2728	leaq	16(%rdi),%rdi
2729	decq	%rcx
2730	jg	L$seal_sse_main_rounds
2731	paddd	L$chacha20_consts(%rip),%xmm3
2732	paddd	0+48(%rbp),%xmm7
2733	paddd	0+64(%rbp),%xmm11
2734	paddd	0+144(%rbp),%xmm15
2735	paddd	L$chacha20_consts(%rip),%xmm2
2736	paddd	0+48(%rbp),%xmm6
2737	paddd	0+64(%rbp),%xmm10
2738	paddd	0+128(%rbp),%xmm14
2739	paddd	L$chacha20_consts(%rip),%xmm1
2740	paddd	0+48(%rbp),%xmm5
2741	paddd	0+64(%rbp),%xmm9
2742	paddd	0+112(%rbp),%xmm13
2743	paddd	L$chacha20_consts(%rip),%xmm0
2744	paddd	0+48(%rbp),%xmm4
2745	paddd	0+64(%rbp),%xmm8
2746	paddd	0+96(%rbp),%xmm12
2747
2748	movdqa	%xmm14,0+80(%rbp)
2749	movdqa	%xmm14,0+80(%rbp)
2750	movdqu	0 + 0(%rsi),%xmm14
2751	pxor	%xmm3,%xmm14
2752	movdqu	%xmm14,0 + 0(%rdi)
2753	movdqu	16 + 0(%rsi),%xmm14
2754	pxor	%xmm7,%xmm14
2755	movdqu	%xmm14,16 + 0(%rdi)
2756	movdqu	32 + 0(%rsi),%xmm14
2757	pxor	%xmm11,%xmm14
2758	movdqu	%xmm14,32 + 0(%rdi)
2759	movdqu	48 + 0(%rsi),%xmm14
2760	pxor	%xmm15,%xmm14
2761	movdqu	%xmm14,48 + 0(%rdi)
2762
2763	movdqa	0+80(%rbp),%xmm14
2764	movdqu	0 + 64(%rsi),%xmm3
2765	movdqu	16 + 64(%rsi),%xmm7
2766	movdqu	32 + 64(%rsi),%xmm11
2767	movdqu	48 + 64(%rsi),%xmm15
2768	pxor	%xmm3,%xmm2
2769	pxor	%xmm7,%xmm6
2770	pxor	%xmm11,%xmm10
2771	pxor	%xmm14,%xmm15
2772	movdqu	%xmm2,0 + 64(%rdi)
2773	movdqu	%xmm6,16 + 64(%rdi)
2774	movdqu	%xmm10,32 + 64(%rdi)
2775	movdqu	%xmm15,48 + 64(%rdi)
2776	movdqu	0 + 128(%rsi),%xmm3
2777	movdqu	16 + 128(%rsi),%xmm7
2778	movdqu	32 + 128(%rsi),%xmm11
2779	movdqu	48 + 128(%rsi),%xmm15
2780	pxor	%xmm3,%xmm1
2781	pxor	%xmm7,%xmm5
2782	pxor	%xmm11,%xmm9
2783	pxor	%xmm13,%xmm15
2784	movdqu	%xmm1,0 + 128(%rdi)
2785	movdqu	%xmm5,16 + 128(%rdi)
2786	movdqu	%xmm9,32 + 128(%rdi)
2787	movdqu	%xmm15,48 + 128(%rdi)
2788
2789	cmpq	$256,%rbx
2790	ja	L$seal_sse_main_loop_xor
2791
2792	movq	$192,%rcx
2793	subq	$192,%rbx
2794	leaq	192(%rsi),%rsi
2795	jmp	L$seal_sse_128_tail_hash
2796L$seal_sse_main_loop_xor:
2797	movdqu	0 + 192(%rsi),%xmm3
2798	movdqu	16 + 192(%rsi),%xmm7
2799	movdqu	32 + 192(%rsi),%xmm11
2800	movdqu	48 + 192(%rsi),%xmm15
2801	pxor	%xmm3,%xmm0
2802	pxor	%xmm7,%xmm4
2803	pxor	%xmm11,%xmm8
2804	pxor	%xmm12,%xmm15
2805	movdqu	%xmm0,0 + 192(%rdi)
2806	movdqu	%xmm4,16 + 192(%rdi)
2807	movdqu	%xmm8,32 + 192(%rdi)
2808	movdqu	%xmm15,48 + 192(%rdi)
2809
2810	leaq	256(%rsi),%rsi
2811	subq	$256,%rbx
2812	movq	$6,%rcx
2813	movq	$4,%r8
2814	cmpq	$192,%rbx
2815	jg	L$seal_sse_main_loop
2816	movq	%rbx,%rcx
2817	testq	%rbx,%rbx
2818	je	L$seal_sse_128_tail_hash
2819	movq	$6,%rcx
2820	cmpq	$128,%rbx
2821	ja	L$seal_sse_tail_192
2822	cmpq	$64,%rbx
2823	ja	L$seal_sse_tail_128
2824
2825L$seal_sse_tail_64:
2826	movdqa	L$chacha20_consts(%rip),%xmm0
2827	movdqa	0+48(%rbp),%xmm4
2828	movdqa	0+64(%rbp),%xmm8
2829	movdqa	0+96(%rbp),%xmm12
2830	paddd	L$sse_inc(%rip),%xmm12
2831	movdqa	%xmm12,0+96(%rbp)
2832
2833L$seal_sse_tail_64_rounds_and_x2hash:
2834	addq	0+0(%rdi),%r10
2835	adcq	8+0(%rdi),%r11
2836	adcq	$1,%r12
2837	movq	0+0+0(%rbp),%rax
2838	movq	%rax,%r15
2839	mulq	%r10
2840	movq	%rax,%r13
2841	movq	%rdx,%r14
2842	movq	0+0+0(%rbp),%rax
2843	mulq	%r11
2844	imulq	%r12,%r15
2845	addq	%rax,%r14
2846	adcq	%rdx,%r15
2847	movq	8+0+0(%rbp),%rax
2848	movq	%rax,%r9
2849	mulq	%r10
2850	addq	%rax,%r14
2851	adcq	$0,%rdx
2852	movq	%rdx,%r10
2853	movq	8+0+0(%rbp),%rax
2854	mulq	%r11
2855	addq	%rax,%r15
2856	adcq	$0,%rdx
2857	imulq	%r12,%r9
2858	addq	%r10,%r15
2859	adcq	%rdx,%r9
2860	movq	%r13,%r10
2861	movq	%r14,%r11
2862	movq	%r15,%r12
2863	andq	$3,%r12
2864	movq	%r15,%r13
2865	andq	$-4,%r13
2866	movq	%r9,%r14
2867	shrdq	$2,%r9,%r15
2868	shrq	$2,%r9
2869	addq	%r13,%r15
2870	adcq	%r14,%r9
2871	addq	%r15,%r10
2872	adcq	%r9,%r11
2873	adcq	$0,%r12
2874
2875	leaq	16(%rdi),%rdi
2876L$seal_sse_tail_64_rounds_and_x1hash:
2877	paddd	%xmm4,%xmm0
2878	pxor	%xmm0,%xmm12
2879	pshufb	L$rol16(%rip),%xmm12
2880	paddd	%xmm12,%xmm8
2881	pxor	%xmm8,%xmm4
2882	movdqa	%xmm4,%xmm3
2883	pslld	$12,%xmm3
2884	psrld	$20,%xmm4
2885	pxor	%xmm3,%xmm4
2886	paddd	%xmm4,%xmm0
2887	pxor	%xmm0,%xmm12
2888	pshufb	L$rol8(%rip),%xmm12
2889	paddd	%xmm12,%xmm8
2890	pxor	%xmm8,%xmm4
2891	movdqa	%xmm4,%xmm3
2892	pslld	$7,%xmm3
2893	psrld	$25,%xmm4
2894	pxor	%xmm3,%xmm4
2895.byte	102,15,58,15,228,4
2896.byte	102,69,15,58,15,192,8
2897.byte	102,69,15,58,15,228,12
2898	paddd	%xmm4,%xmm0
2899	pxor	%xmm0,%xmm12
2900	pshufb	L$rol16(%rip),%xmm12
2901	paddd	%xmm12,%xmm8
2902	pxor	%xmm8,%xmm4
2903	movdqa	%xmm4,%xmm3
2904	pslld	$12,%xmm3
2905	psrld	$20,%xmm4
2906	pxor	%xmm3,%xmm4
2907	paddd	%xmm4,%xmm0
2908	pxor	%xmm0,%xmm12
2909	pshufb	L$rol8(%rip),%xmm12
2910	paddd	%xmm12,%xmm8
2911	pxor	%xmm8,%xmm4
2912	movdqa	%xmm4,%xmm3
2913	pslld	$7,%xmm3
2914	psrld	$25,%xmm4
2915	pxor	%xmm3,%xmm4
2916.byte	102,15,58,15,228,12
2917.byte	102,69,15,58,15,192,8
2918.byte	102,69,15,58,15,228,4
2919	addq	0+0(%rdi),%r10
2920	adcq	8+0(%rdi),%r11
2921	adcq	$1,%r12
2922	movq	0+0+0(%rbp),%rax
2923	movq	%rax,%r15
2924	mulq	%r10
2925	movq	%rax,%r13
2926	movq	%rdx,%r14
2927	movq	0+0+0(%rbp),%rax
2928	mulq	%r11
2929	imulq	%r12,%r15
2930	addq	%rax,%r14
2931	adcq	%rdx,%r15
2932	movq	8+0+0(%rbp),%rax
2933	movq	%rax,%r9
2934	mulq	%r10
2935	addq	%rax,%r14
2936	adcq	$0,%rdx
2937	movq	%rdx,%r10
2938	movq	8+0+0(%rbp),%rax
2939	mulq	%r11
2940	addq	%rax,%r15
2941	adcq	$0,%rdx
2942	imulq	%r12,%r9
2943	addq	%r10,%r15
2944	adcq	%rdx,%r9
2945	movq	%r13,%r10
2946	movq	%r14,%r11
2947	movq	%r15,%r12
2948	andq	$3,%r12
2949	movq	%r15,%r13
2950	andq	$-4,%r13
2951	movq	%r9,%r14
2952	shrdq	$2,%r9,%r15
2953	shrq	$2,%r9
2954	addq	%r13,%r15
2955	adcq	%r14,%r9
2956	addq	%r15,%r10
2957	adcq	%r9,%r11
2958	adcq	$0,%r12
2959
2960	leaq	16(%rdi),%rdi
2961	decq	%rcx
2962	jg	L$seal_sse_tail_64_rounds_and_x2hash
2963	decq	%r8
2964	jge	L$seal_sse_tail_64_rounds_and_x1hash
2965	paddd	L$chacha20_consts(%rip),%xmm0
2966	paddd	0+48(%rbp),%xmm4
2967	paddd	0+64(%rbp),%xmm8
2968	paddd	0+96(%rbp),%xmm12
2969
2970	jmp	L$seal_sse_128_tail_xor
2971
2972L$seal_sse_tail_128:
2973	movdqa	L$chacha20_consts(%rip),%xmm0
2974	movdqa	0+48(%rbp),%xmm4
2975	movdqa	0+64(%rbp),%xmm8
2976	movdqa	%xmm0,%xmm1
2977	movdqa	%xmm4,%xmm5
2978	movdqa	%xmm8,%xmm9
2979	movdqa	0+96(%rbp),%xmm13
2980	paddd	L$sse_inc(%rip),%xmm13
2981	movdqa	%xmm13,%xmm12
2982	paddd	L$sse_inc(%rip),%xmm12
2983	movdqa	%xmm12,0+96(%rbp)
2984	movdqa	%xmm13,0+112(%rbp)
2985
2986L$seal_sse_tail_128_rounds_and_x2hash:
2987	addq	0+0(%rdi),%r10
2988	adcq	8+0(%rdi),%r11
2989	adcq	$1,%r12
2990	movq	0+0+0(%rbp),%rax
2991	movq	%rax,%r15
2992	mulq	%r10
2993	movq	%rax,%r13
2994	movq	%rdx,%r14
2995	movq	0+0+0(%rbp),%rax
2996	mulq	%r11
2997	imulq	%r12,%r15
2998	addq	%rax,%r14
2999	adcq	%rdx,%r15
3000	movq	8+0+0(%rbp),%rax
3001	movq	%rax,%r9
3002	mulq	%r10
3003	addq	%rax,%r14
3004	adcq	$0,%rdx
3005	movq	%rdx,%r10
3006	movq	8+0+0(%rbp),%rax
3007	mulq	%r11
3008	addq	%rax,%r15
3009	adcq	$0,%rdx
3010	imulq	%r12,%r9
3011	addq	%r10,%r15
3012	adcq	%rdx,%r9
3013	movq	%r13,%r10
3014	movq	%r14,%r11
3015	movq	%r15,%r12
3016	andq	$3,%r12
3017	movq	%r15,%r13
3018	andq	$-4,%r13
3019	movq	%r9,%r14
3020	shrdq	$2,%r9,%r15
3021	shrq	$2,%r9
3022	addq	%r13,%r15
3023	adcq	%r14,%r9
3024	addq	%r15,%r10
3025	adcq	%r9,%r11
3026	adcq	$0,%r12
3027
3028	leaq	16(%rdi),%rdi
3029L$seal_sse_tail_128_rounds_and_x1hash:
3030	paddd	%xmm4,%xmm0
3031	pxor	%xmm0,%xmm12
3032	pshufb	L$rol16(%rip),%xmm12
3033	paddd	%xmm12,%xmm8
3034	pxor	%xmm8,%xmm4
3035	movdqa	%xmm4,%xmm3
3036	pslld	$12,%xmm3
3037	psrld	$20,%xmm4
3038	pxor	%xmm3,%xmm4
3039	paddd	%xmm4,%xmm0
3040	pxor	%xmm0,%xmm12
3041	pshufb	L$rol8(%rip),%xmm12
3042	paddd	%xmm12,%xmm8
3043	pxor	%xmm8,%xmm4
3044	movdqa	%xmm4,%xmm3
3045	pslld	$7,%xmm3
3046	psrld	$25,%xmm4
3047	pxor	%xmm3,%xmm4
3048.byte	102,15,58,15,228,4
3049.byte	102,69,15,58,15,192,8
3050.byte	102,69,15,58,15,228,12
3051	paddd	%xmm5,%xmm1
3052	pxor	%xmm1,%xmm13
3053	pshufb	L$rol16(%rip),%xmm13
3054	paddd	%xmm13,%xmm9
3055	pxor	%xmm9,%xmm5
3056	movdqa	%xmm5,%xmm3
3057	pslld	$12,%xmm3
3058	psrld	$20,%xmm5
3059	pxor	%xmm3,%xmm5
3060	paddd	%xmm5,%xmm1
3061	pxor	%xmm1,%xmm13
3062	pshufb	L$rol8(%rip),%xmm13
3063	paddd	%xmm13,%xmm9
3064	pxor	%xmm9,%xmm5
3065	movdqa	%xmm5,%xmm3
3066	pslld	$7,%xmm3
3067	psrld	$25,%xmm5
3068	pxor	%xmm3,%xmm5
3069.byte	102,15,58,15,237,4
3070.byte	102,69,15,58,15,201,8
3071.byte	102,69,15,58,15,237,12
3072	addq	0+0(%rdi),%r10
3073	adcq	8+0(%rdi),%r11
3074	adcq	$1,%r12
3075	movq	0+0+0(%rbp),%rax
3076	movq	%rax,%r15
3077	mulq	%r10
3078	movq	%rax,%r13
3079	movq	%rdx,%r14
3080	movq	0+0+0(%rbp),%rax
3081	mulq	%r11
3082	imulq	%r12,%r15
3083	addq	%rax,%r14
3084	adcq	%rdx,%r15
3085	movq	8+0+0(%rbp),%rax
3086	movq	%rax,%r9
3087	mulq	%r10
3088	addq	%rax,%r14
3089	adcq	$0,%rdx
3090	movq	%rdx,%r10
3091	movq	8+0+0(%rbp),%rax
3092	mulq	%r11
3093	addq	%rax,%r15
3094	adcq	$0,%rdx
3095	imulq	%r12,%r9
3096	addq	%r10,%r15
3097	adcq	%rdx,%r9
3098	movq	%r13,%r10
3099	movq	%r14,%r11
3100	movq	%r15,%r12
3101	andq	$3,%r12
3102	movq	%r15,%r13
3103	andq	$-4,%r13
3104	movq	%r9,%r14
3105	shrdq	$2,%r9,%r15
3106	shrq	$2,%r9
3107	addq	%r13,%r15
3108	adcq	%r14,%r9
3109	addq	%r15,%r10
3110	adcq	%r9,%r11
3111	adcq	$0,%r12
3112	paddd	%xmm4,%xmm0
3113	pxor	%xmm0,%xmm12
3114	pshufb	L$rol16(%rip),%xmm12
3115	paddd	%xmm12,%xmm8
3116	pxor	%xmm8,%xmm4
3117	movdqa	%xmm4,%xmm3
3118	pslld	$12,%xmm3
3119	psrld	$20,%xmm4
3120	pxor	%xmm3,%xmm4
3121	paddd	%xmm4,%xmm0
3122	pxor	%xmm0,%xmm12
3123	pshufb	L$rol8(%rip),%xmm12
3124	paddd	%xmm12,%xmm8
3125	pxor	%xmm8,%xmm4
3126	movdqa	%xmm4,%xmm3
3127	pslld	$7,%xmm3
3128	psrld	$25,%xmm4
3129	pxor	%xmm3,%xmm4
3130.byte	102,15,58,15,228,12
3131.byte	102,69,15,58,15,192,8
3132.byte	102,69,15,58,15,228,4
3133	paddd	%xmm5,%xmm1
3134	pxor	%xmm1,%xmm13
3135	pshufb	L$rol16(%rip),%xmm13
3136	paddd	%xmm13,%xmm9
3137	pxor	%xmm9,%xmm5
3138	movdqa	%xmm5,%xmm3
3139	pslld	$12,%xmm3
3140	psrld	$20,%xmm5
3141	pxor	%xmm3,%xmm5
3142	paddd	%xmm5,%xmm1
3143	pxor	%xmm1,%xmm13
3144	pshufb	L$rol8(%rip),%xmm13
3145	paddd	%xmm13,%xmm9
3146	pxor	%xmm9,%xmm5
3147	movdqa	%xmm5,%xmm3
3148	pslld	$7,%xmm3
3149	psrld	$25,%xmm5
3150	pxor	%xmm3,%xmm5
3151.byte	102,15,58,15,237,12
3152.byte	102,69,15,58,15,201,8
3153.byte	102,69,15,58,15,237,4
3154
3155	leaq	16(%rdi),%rdi
3156	decq	%rcx
3157	jg	L$seal_sse_tail_128_rounds_and_x2hash
3158	decq	%r8
3159	jge	L$seal_sse_tail_128_rounds_and_x1hash
3160	paddd	L$chacha20_consts(%rip),%xmm1
3161	paddd	0+48(%rbp),%xmm5
3162	paddd	0+64(%rbp),%xmm9
3163	paddd	0+112(%rbp),%xmm13
3164	paddd	L$chacha20_consts(%rip),%xmm0
3165	paddd	0+48(%rbp),%xmm4
3166	paddd	0+64(%rbp),%xmm8
3167	paddd	0+96(%rbp),%xmm12
3168	movdqu	0 + 0(%rsi),%xmm3
3169	movdqu	16 + 0(%rsi),%xmm7
3170	movdqu	32 + 0(%rsi),%xmm11
3171	movdqu	48 + 0(%rsi),%xmm15
3172	pxor	%xmm3,%xmm1
3173	pxor	%xmm7,%xmm5
3174	pxor	%xmm11,%xmm9
3175	pxor	%xmm13,%xmm15
3176	movdqu	%xmm1,0 + 0(%rdi)
3177	movdqu	%xmm5,16 + 0(%rdi)
3178	movdqu	%xmm9,32 + 0(%rdi)
3179	movdqu	%xmm15,48 + 0(%rdi)
3180
3181	movq	$64,%rcx
3182	subq	$64,%rbx
3183	leaq	64(%rsi),%rsi
3184	jmp	L$seal_sse_128_tail_hash
3185
3186L$seal_sse_tail_192:
3187	movdqa	L$chacha20_consts(%rip),%xmm0
3188	movdqa	0+48(%rbp),%xmm4
3189	movdqa	0+64(%rbp),%xmm8
3190	movdqa	%xmm0,%xmm1
3191	movdqa	%xmm4,%xmm5
3192	movdqa	%xmm8,%xmm9
3193	movdqa	%xmm0,%xmm2
3194	movdqa	%xmm4,%xmm6
3195	movdqa	%xmm8,%xmm10
3196	movdqa	0+96(%rbp),%xmm14
3197	paddd	L$sse_inc(%rip),%xmm14
3198	movdqa	%xmm14,%xmm13
3199	paddd	L$sse_inc(%rip),%xmm13
3200	movdqa	%xmm13,%xmm12
3201	paddd	L$sse_inc(%rip),%xmm12
3202	movdqa	%xmm12,0+96(%rbp)
3203	movdqa	%xmm13,0+112(%rbp)
3204	movdqa	%xmm14,0+128(%rbp)
3205
3206L$seal_sse_tail_192_rounds_and_x2hash:
3207	addq	0+0(%rdi),%r10
3208	adcq	8+0(%rdi),%r11
3209	adcq	$1,%r12
3210	movq	0+0+0(%rbp),%rax
3211	movq	%rax,%r15
3212	mulq	%r10
3213	movq	%rax,%r13
3214	movq	%rdx,%r14
3215	movq	0+0+0(%rbp),%rax
3216	mulq	%r11
3217	imulq	%r12,%r15
3218	addq	%rax,%r14
3219	adcq	%rdx,%r15
3220	movq	8+0+0(%rbp),%rax
3221	movq	%rax,%r9
3222	mulq	%r10
3223	addq	%rax,%r14
3224	adcq	$0,%rdx
3225	movq	%rdx,%r10
3226	movq	8+0+0(%rbp),%rax
3227	mulq	%r11
3228	addq	%rax,%r15
3229	adcq	$0,%rdx
3230	imulq	%r12,%r9
3231	addq	%r10,%r15
3232	adcq	%rdx,%r9
3233	movq	%r13,%r10
3234	movq	%r14,%r11
3235	movq	%r15,%r12
3236	andq	$3,%r12
3237	movq	%r15,%r13
3238	andq	$-4,%r13
3239	movq	%r9,%r14
3240	shrdq	$2,%r9,%r15
3241	shrq	$2,%r9
3242	addq	%r13,%r15
3243	adcq	%r14,%r9
3244	addq	%r15,%r10
3245	adcq	%r9,%r11
3246	adcq	$0,%r12
3247
3248	leaq	16(%rdi),%rdi
3249L$seal_sse_tail_192_rounds_and_x1hash:
3250	paddd	%xmm4,%xmm0
3251	pxor	%xmm0,%xmm12
3252	pshufb	L$rol16(%rip),%xmm12
3253	paddd	%xmm12,%xmm8
3254	pxor	%xmm8,%xmm4
3255	movdqa	%xmm4,%xmm3
3256	pslld	$12,%xmm3
3257	psrld	$20,%xmm4
3258	pxor	%xmm3,%xmm4
3259	paddd	%xmm4,%xmm0
3260	pxor	%xmm0,%xmm12
3261	pshufb	L$rol8(%rip),%xmm12
3262	paddd	%xmm12,%xmm8
3263	pxor	%xmm8,%xmm4
3264	movdqa	%xmm4,%xmm3
3265	pslld	$7,%xmm3
3266	psrld	$25,%xmm4
3267	pxor	%xmm3,%xmm4
3268.byte	102,15,58,15,228,4
3269.byte	102,69,15,58,15,192,8
3270.byte	102,69,15,58,15,228,12
3271	paddd	%xmm5,%xmm1
3272	pxor	%xmm1,%xmm13
3273	pshufb	L$rol16(%rip),%xmm13
3274	paddd	%xmm13,%xmm9
3275	pxor	%xmm9,%xmm5
3276	movdqa	%xmm5,%xmm3
3277	pslld	$12,%xmm3
3278	psrld	$20,%xmm5
3279	pxor	%xmm3,%xmm5
3280	paddd	%xmm5,%xmm1
3281	pxor	%xmm1,%xmm13
3282	pshufb	L$rol8(%rip),%xmm13
3283	paddd	%xmm13,%xmm9
3284	pxor	%xmm9,%xmm5
3285	movdqa	%xmm5,%xmm3
3286	pslld	$7,%xmm3
3287	psrld	$25,%xmm5
3288	pxor	%xmm3,%xmm5
3289.byte	102,15,58,15,237,4
3290.byte	102,69,15,58,15,201,8
3291.byte	102,69,15,58,15,237,12
3292	paddd	%xmm6,%xmm2
3293	pxor	%xmm2,%xmm14
3294	pshufb	L$rol16(%rip),%xmm14
3295	paddd	%xmm14,%xmm10
3296	pxor	%xmm10,%xmm6
3297	movdqa	%xmm6,%xmm3
3298	pslld	$12,%xmm3
3299	psrld	$20,%xmm6
3300	pxor	%xmm3,%xmm6
3301	paddd	%xmm6,%xmm2
3302	pxor	%xmm2,%xmm14
3303	pshufb	L$rol8(%rip),%xmm14
3304	paddd	%xmm14,%xmm10
3305	pxor	%xmm10,%xmm6
3306	movdqa	%xmm6,%xmm3
3307	pslld	$7,%xmm3
3308	psrld	$25,%xmm6
3309	pxor	%xmm3,%xmm6
3310.byte	102,15,58,15,246,4
3311.byte	102,69,15,58,15,210,8
3312.byte	102,69,15,58,15,246,12
3313	addq	0+0(%rdi),%r10
3314	adcq	8+0(%rdi),%r11
3315	adcq	$1,%r12
3316	movq	0+0+0(%rbp),%rax
3317	movq	%rax,%r15
3318	mulq	%r10
3319	movq	%rax,%r13
3320	movq	%rdx,%r14
3321	movq	0+0+0(%rbp),%rax
3322	mulq	%r11
3323	imulq	%r12,%r15
3324	addq	%rax,%r14
3325	adcq	%rdx,%r15
3326	movq	8+0+0(%rbp),%rax
3327	movq	%rax,%r9
3328	mulq	%r10
3329	addq	%rax,%r14
3330	adcq	$0,%rdx
3331	movq	%rdx,%r10
3332	movq	8+0+0(%rbp),%rax
3333	mulq	%r11
3334	addq	%rax,%r15
3335	adcq	$0,%rdx
3336	imulq	%r12,%r9
3337	addq	%r10,%r15
3338	adcq	%rdx,%r9
3339	movq	%r13,%r10
3340	movq	%r14,%r11
3341	movq	%r15,%r12
3342	andq	$3,%r12
3343	movq	%r15,%r13
3344	andq	$-4,%r13
3345	movq	%r9,%r14
3346	shrdq	$2,%r9,%r15
3347	shrq	$2,%r9
3348	addq	%r13,%r15
3349	adcq	%r14,%r9
3350	addq	%r15,%r10
3351	adcq	%r9,%r11
3352	adcq	$0,%r12
3353	paddd	%xmm4,%xmm0
3354	pxor	%xmm0,%xmm12
3355	pshufb	L$rol16(%rip),%xmm12
3356	paddd	%xmm12,%xmm8
3357	pxor	%xmm8,%xmm4
3358	movdqa	%xmm4,%xmm3
3359	pslld	$12,%xmm3
3360	psrld	$20,%xmm4
3361	pxor	%xmm3,%xmm4
3362	paddd	%xmm4,%xmm0
3363	pxor	%xmm0,%xmm12
3364	pshufb	L$rol8(%rip),%xmm12
3365	paddd	%xmm12,%xmm8
3366	pxor	%xmm8,%xmm4
3367	movdqa	%xmm4,%xmm3
3368	pslld	$7,%xmm3
3369	psrld	$25,%xmm4
3370	pxor	%xmm3,%xmm4
3371.byte	102,15,58,15,228,12
3372.byte	102,69,15,58,15,192,8
3373.byte	102,69,15,58,15,228,4
3374	paddd	%xmm5,%xmm1
3375	pxor	%xmm1,%xmm13
3376	pshufb	L$rol16(%rip),%xmm13
3377	paddd	%xmm13,%xmm9
3378	pxor	%xmm9,%xmm5
3379	movdqa	%xmm5,%xmm3
3380	pslld	$12,%xmm3
3381	psrld	$20,%xmm5
3382	pxor	%xmm3,%xmm5
3383	paddd	%xmm5,%xmm1
3384	pxor	%xmm1,%xmm13
3385	pshufb	L$rol8(%rip),%xmm13
3386	paddd	%xmm13,%xmm9
3387	pxor	%xmm9,%xmm5
3388	movdqa	%xmm5,%xmm3
3389	pslld	$7,%xmm3
3390	psrld	$25,%xmm5
3391	pxor	%xmm3,%xmm5
3392.byte	102,15,58,15,237,12
3393.byte	102,69,15,58,15,201,8
3394.byte	102,69,15,58,15,237,4
3395	paddd	%xmm6,%xmm2
3396	pxor	%xmm2,%xmm14
3397	pshufb	L$rol16(%rip),%xmm14
3398	paddd	%xmm14,%xmm10
3399	pxor	%xmm10,%xmm6
3400	movdqa	%xmm6,%xmm3
3401	pslld	$12,%xmm3
3402	psrld	$20,%xmm6
3403	pxor	%xmm3,%xmm6
3404	paddd	%xmm6,%xmm2
3405	pxor	%xmm2,%xmm14
3406	pshufb	L$rol8(%rip),%xmm14
3407	paddd	%xmm14,%xmm10
3408	pxor	%xmm10,%xmm6
3409	movdqa	%xmm6,%xmm3
3410	pslld	$7,%xmm3
3411	psrld	$25,%xmm6
3412	pxor	%xmm3,%xmm6
3413.byte	102,15,58,15,246,12
3414.byte	102,69,15,58,15,210,8
3415.byte	102,69,15,58,15,246,4
3416
3417	leaq	16(%rdi),%rdi
3418	decq	%rcx
3419	jg	L$seal_sse_tail_192_rounds_and_x2hash
3420	decq	%r8
3421	jge	L$seal_sse_tail_192_rounds_and_x1hash
3422	paddd	L$chacha20_consts(%rip),%xmm2
3423	paddd	0+48(%rbp),%xmm6
3424	paddd	0+64(%rbp),%xmm10
3425	paddd	0+128(%rbp),%xmm14
3426	paddd	L$chacha20_consts(%rip),%xmm1
3427	paddd	0+48(%rbp),%xmm5
3428	paddd	0+64(%rbp),%xmm9
3429	paddd	0+112(%rbp),%xmm13
3430	paddd	L$chacha20_consts(%rip),%xmm0
3431	paddd	0+48(%rbp),%xmm4
3432	paddd	0+64(%rbp),%xmm8
3433	paddd	0+96(%rbp),%xmm12
3434	movdqu	0 + 0(%rsi),%xmm3
3435	movdqu	16 + 0(%rsi),%xmm7
3436	movdqu	32 + 0(%rsi),%xmm11
3437	movdqu	48 + 0(%rsi),%xmm15
3438	pxor	%xmm3,%xmm2
3439	pxor	%xmm7,%xmm6
3440	pxor	%xmm11,%xmm10
3441	pxor	%xmm14,%xmm15
3442	movdqu	%xmm2,0 + 0(%rdi)
3443	movdqu	%xmm6,16 + 0(%rdi)
3444	movdqu	%xmm10,32 + 0(%rdi)
3445	movdqu	%xmm15,48 + 0(%rdi)
3446	movdqu	0 + 64(%rsi),%xmm3
3447	movdqu	16 + 64(%rsi),%xmm7
3448	movdqu	32 + 64(%rsi),%xmm11
3449	movdqu	48 + 64(%rsi),%xmm15
3450	pxor	%xmm3,%xmm1
3451	pxor	%xmm7,%xmm5
3452	pxor	%xmm11,%xmm9
3453	pxor	%xmm13,%xmm15
3454	movdqu	%xmm1,0 + 64(%rdi)
3455	movdqu	%xmm5,16 + 64(%rdi)
3456	movdqu	%xmm9,32 + 64(%rdi)
3457	movdqu	%xmm15,48 + 64(%rdi)
3458
3459	movq	$128,%rcx
3460	subq	$128,%rbx
3461	leaq	128(%rsi),%rsi
3462
3463L$seal_sse_128_tail_hash:
3464	cmpq	$16,%rcx
3465	jb	L$seal_sse_128_tail_xor
3466	addq	0+0(%rdi),%r10
3467	adcq	8+0(%rdi),%r11
3468	adcq	$1,%r12
3469	movq	0+0+0(%rbp),%rax
3470	movq	%rax,%r15
3471	mulq	%r10
3472	movq	%rax,%r13
3473	movq	%rdx,%r14
3474	movq	0+0+0(%rbp),%rax
3475	mulq	%r11
3476	imulq	%r12,%r15
3477	addq	%rax,%r14
3478	adcq	%rdx,%r15
3479	movq	8+0+0(%rbp),%rax
3480	movq	%rax,%r9
3481	mulq	%r10
3482	addq	%rax,%r14
3483	adcq	$0,%rdx
3484	movq	%rdx,%r10
3485	movq	8+0+0(%rbp),%rax
3486	mulq	%r11
3487	addq	%rax,%r15
3488	adcq	$0,%rdx
3489	imulq	%r12,%r9
3490	addq	%r10,%r15
3491	adcq	%rdx,%r9
3492	movq	%r13,%r10
3493	movq	%r14,%r11
3494	movq	%r15,%r12
3495	andq	$3,%r12
3496	movq	%r15,%r13
3497	andq	$-4,%r13
3498	movq	%r9,%r14
3499	shrdq	$2,%r9,%r15
3500	shrq	$2,%r9
3501	addq	%r13,%r15
3502	adcq	%r14,%r9
3503	addq	%r15,%r10
3504	adcq	%r9,%r11
3505	adcq	$0,%r12
3506
3507	subq	$16,%rcx
3508	leaq	16(%rdi),%rdi
3509	jmp	L$seal_sse_128_tail_hash
3510
3511L$seal_sse_128_tail_xor:
3512	cmpq	$16,%rbx
3513	jb	L$seal_sse_tail_16
3514	subq	$16,%rbx
3515
3516	movdqu	0(%rsi),%xmm3
3517	pxor	%xmm3,%xmm0
3518	movdqu	%xmm0,0(%rdi)
3519
3520	addq	0(%rdi),%r10
3521	adcq	8(%rdi),%r11
3522	adcq	$1,%r12
3523	leaq	16(%rsi),%rsi
3524	leaq	16(%rdi),%rdi
3525	movq	0+0+0(%rbp),%rax
3526	movq	%rax,%r15
3527	mulq	%r10
3528	movq	%rax,%r13
3529	movq	%rdx,%r14
3530	movq	0+0+0(%rbp),%rax
3531	mulq	%r11
3532	imulq	%r12,%r15
3533	addq	%rax,%r14
3534	adcq	%rdx,%r15
3535	movq	8+0+0(%rbp),%rax
3536	movq	%rax,%r9
3537	mulq	%r10
3538	addq	%rax,%r14
3539	adcq	$0,%rdx
3540	movq	%rdx,%r10
3541	movq	8+0+0(%rbp),%rax
3542	mulq	%r11
3543	addq	%rax,%r15
3544	adcq	$0,%rdx
3545	imulq	%r12,%r9
3546	addq	%r10,%r15
3547	adcq	%rdx,%r9
3548	movq	%r13,%r10
3549	movq	%r14,%r11
3550	movq	%r15,%r12
3551	andq	$3,%r12
3552	movq	%r15,%r13
3553	andq	$-4,%r13
3554	movq	%r9,%r14
3555	shrdq	$2,%r9,%r15
3556	shrq	$2,%r9
3557	addq	%r13,%r15
3558	adcq	%r14,%r9
3559	addq	%r15,%r10
3560	adcq	%r9,%r11
3561	adcq	$0,%r12
3562
3563
3564	movdqa	%xmm4,%xmm0
3565	movdqa	%xmm8,%xmm4
3566	movdqa	%xmm12,%xmm8
3567	movdqa	%xmm1,%xmm12
3568	movdqa	%xmm5,%xmm1
3569	movdqa	%xmm9,%xmm5
3570	movdqa	%xmm13,%xmm9
3571	jmp	L$seal_sse_128_tail_xor
3572
3573L$seal_sse_tail_16:
3574	testq	%rbx,%rbx
3575	jz	L$process_blocks_of_extra_in
3576
3577	movq	%rbx,%r8
3578	movq	%rbx,%rcx
3579	leaq	-1(%rsi,%rbx,1),%rsi
3580	pxor	%xmm15,%xmm15
3581L$seal_sse_tail_16_compose:
3582	pslldq	$1,%xmm15
3583	pinsrb	$0,(%rsi),%xmm15
3584	leaq	-1(%rsi),%rsi
3585	decq	%rcx
3586	jne	L$seal_sse_tail_16_compose
3587
3588
3589	pxor	%xmm0,%xmm15
3590
3591
3592	movq	%rbx,%rcx
3593	movdqu	%xmm15,%xmm0
3594L$seal_sse_tail_16_extract:
3595	pextrb	$0,%xmm0,(%rdi)
3596	psrldq	$1,%xmm0
3597	addq	$1,%rdi
3598	subq	$1,%rcx
3599	jnz	L$seal_sse_tail_16_extract
3600
3601
3602
3603
3604
3605
3606
3607
3608	movq	288 + 0 + 32(%rsp),%r9
3609	movq	56(%r9),%r14
3610	movq	48(%r9),%r13
3611	testq	%r14,%r14
3612	jz	L$process_partial_block
3613
3614	movq	$16,%r15
3615	subq	%rbx,%r15
3616	cmpq	%r15,%r14
3617
3618	jge	L$load_extra_in
3619	movq	%r14,%r15
3620
3621L$load_extra_in:
3622
3623
3624	leaq	-1(%r13,%r15,1),%rsi
3625
3626
3627	addq	%r15,%r13
3628	subq	%r15,%r14
3629	movq	%r13,48(%r9)
3630	movq	%r14,56(%r9)
3631
3632
3633
3634	addq	%r15,%r8
3635
3636
3637	pxor	%xmm11,%xmm11
3638L$load_extra_load_loop:
3639	pslldq	$1,%xmm11
3640	pinsrb	$0,(%rsi),%xmm11
3641	leaq	-1(%rsi),%rsi
3642	subq	$1,%r15
3643	jnz	L$load_extra_load_loop
3644
3645
3646
3647
3648	movq	%rbx,%r15
3649
3650L$load_extra_shift_loop:
3651	pslldq	$1,%xmm11
3652	subq	$1,%r15
3653	jnz	L$load_extra_shift_loop
3654
3655
3656
3657
3658	leaq	L$and_masks(%rip),%r15
3659	shlq	$4,%rbx
3660	pand	-16(%r15,%rbx,1),%xmm15
3661
3662
3663	por	%xmm11,%xmm15
3664
3665
3666
3667.byte	102,77,15,126,253
3668	pextrq	$1,%xmm15,%r14
3669	addq	%r13,%r10
3670	adcq	%r14,%r11
3671	adcq	$1,%r12
3672	movq	0+0+0(%rbp),%rax
3673	movq	%rax,%r15
3674	mulq	%r10
3675	movq	%rax,%r13
3676	movq	%rdx,%r14
3677	movq	0+0+0(%rbp),%rax
3678	mulq	%r11
3679	imulq	%r12,%r15
3680	addq	%rax,%r14
3681	adcq	%rdx,%r15
3682	movq	8+0+0(%rbp),%rax
3683	movq	%rax,%r9
3684	mulq	%r10
3685	addq	%rax,%r14
3686	adcq	$0,%rdx
3687	movq	%rdx,%r10
3688	movq	8+0+0(%rbp),%rax
3689	mulq	%r11
3690	addq	%rax,%r15
3691	adcq	$0,%rdx
3692	imulq	%r12,%r9
3693	addq	%r10,%r15
3694	adcq	%rdx,%r9
3695	movq	%r13,%r10
3696	movq	%r14,%r11
3697	movq	%r15,%r12
3698	andq	$3,%r12
3699	movq	%r15,%r13
3700	andq	$-4,%r13
3701	movq	%r9,%r14
3702	shrdq	$2,%r9,%r15
3703	shrq	$2,%r9
3704	addq	%r13,%r15
3705	adcq	%r14,%r9
3706	addq	%r15,%r10
3707	adcq	%r9,%r11
3708	adcq	$0,%r12
3709
3710
3711L$process_blocks_of_extra_in:
3712
3713	movq	288+32+0 (%rsp),%r9
3714	movq	48(%r9),%rsi
3715	movq	56(%r9),%r8
3716	movq	%r8,%rcx
3717	shrq	$4,%r8
3718
3719L$process_extra_hash_loop:
3720	jz	process_extra_in_trailer
3721	addq	0+0(%rsi),%r10
3722	adcq	8+0(%rsi),%r11
3723	adcq	$1,%r12
3724	movq	0+0+0(%rbp),%rax
3725	movq	%rax,%r15
3726	mulq	%r10
3727	movq	%rax,%r13
3728	movq	%rdx,%r14
3729	movq	0+0+0(%rbp),%rax
3730	mulq	%r11
3731	imulq	%r12,%r15
3732	addq	%rax,%r14
3733	adcq	%rdx,%r15
3734	movq	8+0+0(%rbp),%rax
3735	movq	%rax,%r9
3736	mulq	%r10
3737	addq	%rax,%r14
3738	adcq	$0,%rdx
3739	movq	%rdx,%r10
3740	movq	8+0+0(%rbp),%rax
3741	mulq	%r11
3742	addq	%rax,%r15
3743	adcq	$0,%rdx
3744	imulq	%r12,%r9
3745	addq	%r10,%r15
3746	adcq	%rdx,%r9
3747	movq	%r13,%r10
3748	movq	%r14,%r11
3749	movq	%r15,%r12
3750	andq	$3,%r12
3751	movq	%r15,%r13
3752	andq	$-4,%r13
3753	movq	%r9,%r14
3754	shrdq	$2,%r9,%r15
3755	shrq	$2,%r9
3756	addq	%r13,%r15
3757	adcq	%r14,%r9
3758	addq	%r15,%r10
3759	adcq	%r9,%r11
3760	adcq	$0,%r12
3761
3762	leaq	16(%rsi),%rsi
3763	subq	$1,%r8
3764	jmp	L$process_extra_hash_loop
3765process_extra_in_trailer:
3766	andq	$15,%rcx
3767	movq	%rcx,%rbx
3768	jz	L$do_length_block
3769	leaq	-1(%rsi,%rcx,1),%rsi
3770
3771L$process_extra_in_trailer_load:
3772	pslldq	$1,%xmm15
3773	pinsrb	$0,(%rsi),%xmm15
3774	leaq	-1(%rsi),%rsi
3775	subq	$1,%rcx
3776	jnz	L$process_extra_in_trailer_load
3777
3778L$process_partial_block:
3779
3780	leaq	L$and_masks(%rip),%r15
3781	shlq	$4,%rbx
3782	pand	-16(%r15,%rbx,1),%xmm15
3783.byte	102,77,15,126,253
3784	pextrq	$1,%xmm15,%r14
3785	addq	%r13,%r10
3786	adcq	%r14,%r11
3787	adcq	$1,%r12
3788	movq	0+0+0(%rbp),%rax
3789	movq	%rax,%r15
3790	mulq	%r10
3791	movq	%rax,%r13
3792	movq	%rdx,%r14
3793	movq	0+0+0(%rbp),%rax
3794	mulq	%r11
3795	imulq	%r12,%r15
3796	addq	%rax,%r14
3797	adcq	%rdx,%r15
3798	movq	8+0+0(%rbp),%rax
3799	movq	%rax,%r9
3800	mulq	%r10
3801	addq	%rax,%r14
3802	adcq	$0,%rdx
3803	movq	%rdx,%r10
3804	movq	8+0+0(%rbp),%rax
3805	mulq	%r11
3806	addq	%rax,%r15
3807	adcq	$0,%rdx
3808	imulq	%r12,%r9
3809	addq	%r10,%r15
3810	adcq	%rdx,%r9
3811	movq	%r13,%r10
3812	movq	%r14,%r11
3813	movq	%r15,%r12
3814	andq	$3,%r12
3815	movq	%r15,%r13
3816	andq	$-4,%r13
3817	movq	%r9,%r14
3818	shrdq	$2,%r9,%r15
3819	shrq	$2,%r9
3820	addq	%r13,%r15
3821	adcq	%r14,%r9
3822	addq	%r15,%r10
3823	adcq	%r9,%r11
3824	adcq	$0,%r12
3825
3826
3827L$do_length_block:
3828	addq	0+0+32(%rbp),%r10
3829	adcq	8+0+32(%rbp),%r11
3830	adcq	$1,%r12
3831	movq	0+0+0(%rbp),%rax
3832	movq	%rax,%r15
3833	mulq	%r10
3834	movq	%rax,%r13
3835	movq	%rdx,%r14
3836	movq	0+0+0(%rbp),%rax
3837	mulq	%r11
3838	imulq	%r12,%r15
3839	addq	%rax,%r14
3840	adcq	%rdx,%r15
3841	movq	8+0+0(%rbp),%rax
3842	movq	%rax,%r9
3843	mulq	%r10
3844	addq	%rax,%r14
3845	adcq	$0,%rdx
3846	movq	%rdx,%r10
3847	movq	8+0+0(%rbp),%rax
3848	mulq	%r11
3849	addq	%rax,%r15
3850	adcq	$0,%rdx
3851	imulq	%r12,%r9
3852	addq	%r10,%r15
3853	adcq	%rdx,%r9
3854	movq	%r13,%r10
3855	movq	%r14,%r11
3856	movq	%r15,%r12
3857	andq	$3,%r12
3858	movq	%r15,%r13
3859	andq	$-4,%r13
3860	movq	%r9,%r14
3861	shrdq	$2,%r9,%r15
3862	shrq	$2,%r9
3863	addq	%r13,%r15
3864	adcq	%r14,%r9
3865	addq	%r15,%r10
3866	adcq	%r9,%r11
3867	adcq	$0,%r12
3868
3869
3870	movq	%r10,%r13
3871	movq	%r11,%r14
3872	movq	%r12,%r15
3873	subq	$-5,%r10
3874	sbbq	$-1,%r11
3875	sbbq	$3,%r12
3876	cmovcq	%r13,%r10
3877	cmovcq	%r14,%r11
3878	cmovcq	%r15,%r12
3879
3880	addq	0+0+16(%rbp),%r10
3881	adcq	8+0+16(%rbp),%r11
3882
3883
3884	addq	$288 + 0 + 32,%rsp
3885
3886
3887	popq	%r9
3888
3889	movq	%r10,(%r9)
3890	movq	%r11,8(%r9)
3891	popq	%r15
3892
3893	popq	%r14
3894
3895	popq	%r13
3896
3897	popq	%r12
3898
3899	popq	%rbx
3900
3901	popq	%rbp
3902
3903	.byte	0xf3,0xc3
3904
3905L$seal_sse_128:
3906
3907	movdqu	L$chacha20_consts(%rip),%xmm0
3908	movdqa	%xmm0,%xmm1
3909	movdqa	%xmm0,%xmm2
3910	movdqu	0(%r9),%xmm4
3911	movdqa	%xmm4,%xmm5
3912	movdqa	%xmm4,%xmm6
3913	movdqu	16(%r9),%xmm8
3914	movdqa	%xmm8,%xmm9
3915	movdqa	%xmm8,%xmm10
3916	movdqu	32(%r9),%xmm14
3917	movdqa	%xmm14,%xmm12
3918	paddd	L$sse_inc(%rip),%xmm12
3919	movdqa	%xmm12,%xmm13
3920	paddd	L$sse_inc(%rip),%xmm13
3921	movdqa	%xmm4,%xmm7
3922	movdqa	%xmm8,%xmm11
3923	movdqa	%xmm12,%xmm15
3924	movq	$10,%r10
3925
3926L$seal_sse_128_rounds:
3927	paddd	%xmm4,%xmm0
3928	pxor	%xmm0,%xmm12
3929	pshufb	L$rol16(%rip),%xmm12
3930	paddd	%xmm12,%xmm8
3931	pxor	%xmm8,%xmm4
3932	movdqa	%xmm4,%xmm3
3933	pslld	$12,%xmm3
3934	psrld	$20,%xmm4
3935	pxor	%xmm3,%xmm4
3936	paddd	%xmm4,%xmm0
3937	pxor	%xmm0,%xmm12
3938	pshufb	L$rol8(%rip),%xmm12
3939	paddd	%xmm12,%xmm8
3940	pxor	%xmm8,%xmm4
3941	movdqa	%xmm4,%xmm3
3942	pslld	$7,%xmm3
3943	psrld	$25,%xmm4
3944	pxor	%xmm3,%xmm4
3945.byte	102,15,58,15,228,4
3946.byte	102,69,15,58,15,192,8
3947.byte	102,69,15,58,15,228,12
3948	paddd	%xmm5,%xmm1
3949	pxor	%xmm1,%xmm13
3950	pshufb	L$rol16(%rip),%xmm13
3951	paddd	%xmm13,%xmm9
3952	pxor	%xmm9,%xmm5
3953	movdqa	%xmm5,%xmm3
3954	pslld	$12,%xmm3
3955	psrld	$20,%xmm5
3956	pxor	%xmm3,%xmm5
3957	paddd	%xmm5,%xmm1
3958	pxor	%xmm1,%xmm13
3959	pshufb	L$rol8(%rip),%xmm13
3960	paddd	%xmm13,%xmm9
3961	pxor	%xmm9,%xmm5
3962	movdqa	%xmm5,%xmm3
3963	pslld	$7,%xmm3
3964	psrld	$25,%xmm5
3965	pxor	%xmm3,%xmm5
3966.byte	102,15,58,15,237,4
3967.byte	102,69,15,58,15,201,8
3968.byte	102,69,15,58,15,237,12
3969	paddd	%xmm6,%xmm2
3970	pxor	%xmm2,%xmm14
3971	pshufb	L$rol16(%rip),%xmm14
3972	paddd	%xmm14,%xmm10
3973	pxor	%xmm10,%xmm6
3974	movdqa	%xmm6,%xmm3
3975	pslld	$12,%xmm3
3976	psrld	$20,%xmm6
3977	pxor	%xmm3,%xmm6
3978	paddd	%xmm6,%xmm2
3979	pxor	%xmm2,%xmm14
3980	pshufb	L$rol8(%rip),%xmm14
3981	paddd	%xmm14,%xmm10
3982	pxor	%xmm10,%xmm6
3983	movdqa	%xmm6,%xmm3
3984	pslld	$7,%xmm3
3985	psrld	$25,%xmm6
3986	pxor	%xmm3,%xmm6
3987.byte	102,15,58,15,246,4
3988.byte	102,69,15,58,15,210,8
3989.byte	102,69,15,58,15,246,12
3990	paddd	%xmm4,%xmm0
3991	pxor	%xmm0,%xmm12
3992	pshufb	L$rol16(%rip),%xmm12
3993	paddd	%xmm12,%xmm8
3994	pxor	%xmm8,%xmm4
3995	movdqa	%xmm4,%xmm3
3996	pslld	$12,%xmm3
3997	psrld	$20,%xmm4
3998	pxor	%xmm3,%xmm4
3999	paddd	%xmm4,%xmm0
4000	pxor	%xmm0,%xmm12
4001	pshufb	L$rol8(%rip),%xmm12
4002	paddd	%xmm12,%xmm8
4003	pxor	%xmm8,%xmm4
4004	movdqa	%xmm4,%xmm3
4005	pslld	$7,%xmm3
4006	psrld	$25,%xmm4
4007	pxor	%xmm3,%xmm4
4008.byte	102,15,58,15,228,12
4009.byte	102,69,15,58,15,192,8
4010.byte	102,69,15,58,15,228,4
4011	paddd	%xmm5,%xmm1
4012	pxor	%xmm1,%xmm13
4013	pshufb	L$rol16(%rip),%xmm13
4014	paddd	%xmm13,%xmm9
4015	pxor	%xmm9,%xmm5
4016	movdqa	%xmm5,%xmm3
4017	pslld	$12,%xmm3
4018	psrld	$20,%xmm5
4019	pxor	%xmm3,%xmm5
4020	paddd	%xmm5,%xmm1
4021	pxor	%xmm1,%xmm13
4022	pshufb	L$rol8(%rip),%xmm13
4023	paddd	%xmm13,%xmm9
4024	pxor	%xmm9,%xmm5
4025	movdqa	%xmm5,%xmm3
4026	pslld	$7,%xmm3
4027	psrld	$25,%xmm5
4028	pxor	%xmm3,%xmm5
4029.byte	102,15,58,15,237,12
4030.byte	102,69,15,58,15,201,8
4031.byte	102,69,15,58,15,237,4
4032	paddd	%xmm6,%xmm2
4033	pxor	%xmm2,%xmm14
4034	pshufb	L$rol16(%rip),%xmm14
4035	paddd	%xmm14,%xmm10
4036	pxor	%xmm10,%xmm6
4037	movdqa	%xmm6,%xmm3
4038	pslld	$12,%xmm3
4039	psrld	$20,%xmm6
4040	pxor	%xmm3,%xmm6
4041	paddd	%xmm6,%xmm2
4042	pxor	%xmm2,%xmm14
4043	pshufb	L$rol8(%rip),%xmm14
4044	paddd	%xmm14,%xmm10
4045	pxor	%xmm10,%xmm6
4046	movdqa	%xmm6,%xmm3
4047	pslld	$7,%xmm3
4048	psrld	$25,%xmm6
4049	pxor	%xmm3,%xmm6
4050.byte	102,15,58,15,246,12
4051.byte	102,69,15,58,15,210,8
4052.byte	102,69,15,58,15,246,4
4053
4054	decq	%r10
4055	jnz	L$seal_sse_128_rounds
4056	paddd	L$chacha20_consts(%rip),%xmm0
4057	paddd	L$chacha20_consts(%rip),%xmm1
4058	paddd	L$chacha20_consts(%rip),%xmm2
4059	paddd	%xmm7,%xmm4
4060	paddd	%xmm7,%xmm5
4061	paddd	%xmm7,%xmm6
4062	paddd	%xmm11,%xmm8
4063	paddd	%xmm11,%xmm9
4064	paddd	%xmm15,%xmm12
4065	paddd	L$sse_inc(%rip),%xmm15
4066	paddd	%xmm15,%xmm13
4067
4068	pand	L$clamp(%rip),%xmm2
4069	movdqa	%xmm2,0+0(%rbp)
4070	movdqa	%xmm6,0+16(%rbp)
4071
4072	movq	%r8,%r8
4073	call	poly_hash_ad_internal
4074	jmp	L$seal_sse_128_tail_xor
4075
4076
4077
4078
4079
4080.p2align	6
4081chacha20_poly1305_open_avx2:
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094	vzeroupper
4095	vmovdqa	L$chacha20_consts(%rip),%ymm0
4096	vbroadcasti128	0(%r9),%ymm4
4097	vbroadcasti128	16(%r9),%ymm8
4098	vbroadcasti128	32(%r9),%ymm12
4099	vpaddd	L$avx2_init(%rip),%ymm12,%ymm12
4100	cmpq	$192,%rbx
4101	jbe	L$open_avx2_192
4102	cmpq	$320,%rbx
4103	jbe	L$open_avx2_320
4104
4105	vmovdqa	%ymm4,0+64(%rbp)
4106	vmovdqa	%ymm8,0+96(%rbp)
4107	vmovdqa	%ymm12,0+160(%rbp)
4108	movq	$10,%r10
4109L$open_avx2_init_rounds:
4110	vpaddd	%ymm4,%ymm0,%ymm0
4111	vpxor	%ymm0,%ymm12,%ymm12
4112	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4113	vpaddd	%ymm12,%ymm8,%ymm8
4114	vpxor	%ymm8,%ymm4,%ymm4
4115	vpsrld	$20,%ymm4,%ymm3
4116	vpslld	$12,%ymm4,%ymm4
4117	vpxor	%ymm3,%ymm4,%ymm4
4118	vpaddd	%ymm4,%ymm0,%ymm0
4119	vpxor	%ymm0,%ymm12,%ymm12
4120	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4121	vpaddd	%ymm12,%ymm8,%ymm8
4122	vpxor	%ymm8,%ymm4,%ymm4
4123	vpslld	$7,%ymm4,%ymm3
4124	vpsrld	$25,%ymm4,%ymm4
4125	vpxor	%ymm3,%ymm4,%ymm4
4126	vpalignr	$12,%ymm12,%ymm12,%ymm12
4127	vpalignr	$8,%ymm8,%ymm8,%ymm8
4128	vpalignr	$4,%ymm4,%ymm4,%ymm4
4129	vpaddd	%ymm4,%ymm0,%ymm0
4130	vpxor	%ymm0,%ymm12,%ymm12
4131	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4132	vpaddd	%ymm12,%ymm8,%ymm8
4133	vpxor	%ymm8,%ymm4,%ymm4
4134	vpsrld	$20,%ymm4,%ymm3
4135	vpslld	$12,%ymm4,%ymm4
4136	vpxor	%ymm3,%ymm4,%ymm4
4137	vpaddd	%ymm4,%ymm0,%ymm0
4138	vpxor	%ymm0,%ymm12,%ymm12
4139	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4140	vpaddd	%ymm12,%ymm8,%ymm8
4141	vpxor	%ymm8,%ymm4,%ymm4
4142	vpslld	$7,%ymm4,%ymm3
4143	vpsrld	$25,%ymm4,%ymm4
4144	vpxor	%ymm3,%ymm4,%ymm4
4145	vpalignr	$4,%ymm12,%ymm12,%ymm12
4146	vpalignr	$8,%ymm8,%ymm8,%ymm8
4147	vpalignr	$12,%ymm4,%ymm4,%ymm4
4148
4149	decq	%r10
4150	jne	L$open_avx2_init_rounds
4151	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
4152	vpaddd	0+64(%rbp),%ymm4,%ymm4
4153	vpaddd	0+96(%rbp),%ymm8,%ymm8
4154	vpaddd	0+160(%rbp),%ymm12,%ymm12
4155
4156	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
4157
4158	vpand	L$clamp(%rip),%ymm3,%ymm3
4159	vmovdqa	%ymm3,0+0(%rbp)
4160
4161	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
4162	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
4163
4164	movq	%r8,%r8
4165	call	poly_hash_ad_internal
4166
4167	xorq	%rcx,%rcx
4168L$open_avx2_init_hash:
4169	addq	0+0(%rsi,%rcx,1),%r10
4170	adcq	8+0(%rsi,%rcx,1),%r11
4171	adcq	$1,%r12
4172	movq	0+0+0(%rbp),%rax
4173	movq	%rax,%r15
4174	mulq	%r10
4175	movq	%rax,%r13
4176	movq	%rdx,%r14
4177	movq	0+0+0(%rbp),%rax
4178	mulq	%r11
4179	imulq	%r12,%r15
4180	addq	%rax,%r14
4181	adcq	%rdx,%r15
4182	movq	8+0+0(%rbp),%rax
4183	movq	%rax,%r9
4184	mulq	%r10
4185	addq	%rax,%r14
4186	adcq	$0,%rdx
4187	movq	%rdx,%r10
4188	movq	8+0+0(%rbp),%rax
4189	mulq	%r11
4190	addq	%rax,%r15
4191	adcq	$0,%rdx
4192	imulq	%r12,%r9
4193	addq	%r10,%r15
4194	adcq	%rdx,%r9
4195	movq	%r13,%r10
4196	movq	%r14,%r11
4197	movq	%r15,%r12
4198	andq	$3,%r12
4199	movq	%r15,%r13
4200	andq	$-4,%r13
4201	movq	%r9,%r14
4202	shrdq	$2,%r9,%r15
4203	shrq	$2,%r9
4204	addq	%r13,%r15
4205	adcq	%r14,%r9
4206	addq	%r15,%r10
4207	adcq	%r9,%r11
4208	adcq	$0,%r12
4209
4210	addq	$16,%rcx
4211	cmpq	$64,%rcx
4212	jne	L$open_avx2_init_hash
4213
4214	vpxor	0(%rsi),%ymm0,%ymm0
4215	vpxor	32(%rsi),%ymm4,%ymm4
4216
4217	vmovdqu	%ymm0,0(%rdi)
4218	vmovdqu	%ymm4,32(%rdi)
4219	leaq	64(%rsi),%rsi
4220	leaq	64(%rdi),%rdi
4221	subq	$64,%rbx
4222L$open_avx2_main_loop:
4223
4224	cmpq	$512,%rbx
4225	jb	L$open_avx2_main_loop_done
4226	vmovdqa	L$chacha20_consts(%rip),%ymm0
4227	vmovdqa	0+64(%rbp),%ymm4
4228	vmovdqa	0+96(%rbp),%ymm8
4229	vmovdqa	%ymm0,%ymm1
4230	vmovdqa	%ymm4,%ymm5
4231	vmovdqa	%ymm8,%ymm9
4232	vmovdqa	%ymm0,%ymm2
4233	vmovdqa	%ymm4,%ymm6
4234	vmovdqa	%ymm8,%ymm10
4235	vmovdqa	%ymm0,%ymm3
4236	vmovdqa	%ymm4,%ymm7
4237	vmovdqa	%ymm8,%ymm11
4238	vmovdqa	L$avx2_inc(%rip),%ymm12
4239	vpaddd	0+160(%rbp),%ymm12,%ymm15
4240	vpaddd	%ymm15,%ymm12,%ymm14
4241	vpaddd	%ymm14,%ymm12,%ymm13
4242	vpaddd	%ymm13,%ymm12,%ymm12
4243	vmovdqa	%ymm15,0+256(%rbp)
4244	vmovdqa	%ymm14,0+224(%rbp)
4245	vmovdqa	%ymm13,0+192(%rbp)
4246	vmovdqa	%ymm12,0+160(%rbp)
4247
4248	xorq	%rcx,%rcx
4249L$open_avx2_main_loop_rounds:
4250	addq	0+0(%rsi,%rcx,1),%r10
4251	adcq	8+0(%rsi,%rcx,1),%r11
4252	adcq	$1,%r12
4253	vmovdqa	%ymm8,0+128(%rbp)
4254	vmovdqa	L$rol16(%rip),%ymm8
4255	vpaddd	%ymm7,%ymm3,%ymm3
4256	vpaddd	%ymm6,%ymm2,%ymm2
4257	vpaddd	%ymm5,%ymm1,%ymm1
4258	vpaddd	%ymm4,%ymm0,%ymm0
4259	vpxor	%ymm3,%ymm15,%ymm15
4260	vpxor	%ymm2,%ymm14,%ymm14
4261	vpxor	%ymm1,%ymm13,%ymm13
4262	vpxor	%ymm0,%ymm12,%ymm12
4263	movq	0+0+0(%rbp),%rdx
4264	movq	%rdx,%r15
4265	mulxq	%r10,%r13,%r14
4266	mulxq	%r11,%rax,%rdx
4267	imulq	%r12,%r15
4268	addq	%rax,%r14
4269	adcq	%rdx,%r15
4270	vpshufb	%ymm8,%ymm15,%ymm15
4271	vpshufb	%ymm8,%ymm14,%ymm14
4272	vpshufb	%ymm8,%ymm13,%ymm13
4273	vpshufb	%ymm8,%ymm12,%ymm12
4274	vpaddd	%ymm15,%ymm11,%ymm11
4275	vpaddd	%ymm14,%ymm10,%ymm10
4276	vpaddd	%ymm13,%ymm9,%ymm9
4277	vpaddd	0+128(%rbp),%ymm12,%ymm8
4278	vpxor	%ymm11,%ymm7,%ymm7
4279	movq	8+0+0(%rbp),%rdx
4280	mulxq	%r10,%r10,%rax
4281	addq	%r10,%r14
4282	mulxq	%r11,%r11,%r9
4283	adcq	%r11,%r15
4284	adcq	$0,%r9
4285	imulq	%r12,%rdx
4286	vpxor	%ymm10,%ymm6,%ymm6
4287	vpxor	%ymm9,%ymm5,%ymm5
4288	vpxor	%ymm8,%ymm4,%ymm4
4289	vmovdqa	%ymm8,0+128(%rbp)
4290	vpsrld	$20,%ymm7,%ymm8
4291	vpslld	$32-20,%ymm7,%ymm7
4292	vpxor	%ymm8,%ymm7,%ymm7
4293	vpsrld	$20,%ymm6,%ymm8
4294	vpslld	$32-20,%ymm6,%ymm6
4295	vpxor	%ymm8,%ymm6,%ymm6
4296	vpsrld	$20,%ymm5,%ymm8
4297	vpslld	$32-20,%ymm5,%ymm5
4298	addq	%rax,%r15
4299	adcq	%rdx,%r9
4300	vpxor	%ymm8,%ymm5,%ymm5
4301	vpsrld	$20,%ymm4,%ymm8
4302	vpslld	$32-20,%ymm4,%ymm4
4303	vpxor	%ymm8,%ymm4,%ymm4
4304	vmovdqa	L$rol8(%rip),%ymm8
4305	vpaddd	%ymm7,%ymm3,%ymm3
4306	vpaddd	%ymm6,%ymm2,%ymm2
4307	vpaddd	%ymm5,%ymm1,%ymm1
4308	vpaddd	%ymm4,%ymm0,%ymm0
4309	vpxor	%ymm3,%ymm15,%ymm15
4310	movq	%r13,%r10
4311	movq	%r14,%r11
4312	movq	%r15,%r12
4313	andq	$3,%r12
4314	movq	%r15,%r13
4315	andq	$-4,%r13
4316	movq	%r9,%r14
4317	shrdq	$2,%r9,%r15
4318	shrq	$2,%r9
4319	addq	%r13,%r15
4320	adcq	%r14,%r9
4321	addq	%r15,%r10
4322	adcq	%r9,%r11
4323	adcq	$0,%r12
4324	vpxor	%ymm2,%ymm14,%ymm14
4325	vpxor	%ymm1,%ymm13,%ymm13
4326	vpxor	%ymm0,%ymm12,%ymm12
4327	vpshufb	%ymm8,%ymm15,%ymm15
4328	vpshufb	%ymm8,%ymm14,%ymm14
4329	vpshufb	%ymm8,%ymm13,%ymm13
4330	vpshufb	%ymm8,%ymm12,%ymm12
4331	vpaddd	%ymm15,%ymm11,%ymm11
4332	vpaddd	%ymm14,%ymm10,%ymm10
4333	addq	0+16(%rsi,%rcx,1),%r10
4334	adcq	8+16(%rsi,%rcx,1),%r11
4335	adcq	$1,%r12
4336	vpaddd	%ymm13,%ymm9,%ymm9
4337	vpaddd	0+128(%rbp),%ymm12,%ymm8
4338	vpxor	%ymm11,%ymm7,%ymm7
4339	vpxor	%ymm10,%ymm6,%ymm6
4340	vpxor	%ymm9,%ymm5,%ymm5
4341	vpxor	%ymm8,%ymm4,%ymm4
4342	vmovdqa	%ymm8,0+128(%rbp)
4343	vpsrld	$25,%ymm7,%ymm8
4344	movq	0+0+0(%rbp),%rdx
4345	movq	%rdx,%r15
4346	mulxq	%r10,%r13,%r14
4347	mulxq	%r11,%rax,%rdx
4348	imulq	%r12,%r15
4349	addq	%rax,%r14
4350	adcq	%rdx,%r15
4351	vpslld	$32-25,%ymm7,%ymm7
4352	vpxor	%ymm8,%ymm7,%ymm7
4353	vpsrld	$25,%ymm6,%ymm8
4354	vpslld	$32-25,%ymm6,%ymm6
4355	vpxor	%ymm8,%ymm6,%ymm6
4356	vpsrld	$25,%ymm5,%ymm8
4357	vpslld	$32-25,%ymm5,%ymm5
4358	vpxor	%ymm8,%ymm5,%ymm5
4359	vpsrld	$25,%ymm4,%ymm8
4360	vpslld	$32-25,%ymm4,%ymm4
4361	vpxor	%ymm8,%ymm4,%ymm4
4362	vmovdqa	0+128(%rbp),%ymm8
4363	vpalignr	$4,%ymm7,%ymm7,%ymm7
4364	vpalignr	$8,%ymm11,%ymm11,%ymm11
4365	vpalignr	$12,%ymm15,%ymm15,%ymm15
4366	vpalignr	$4,%ymm6,%ymm6,%ymm6
4367	vpalignr	$8,%ymm10,%ymm10,%ymm10
4368	vpalignr	$12,%ymm14,%ymm14,%ymm14
4369	movq	8+0+0(%rbp),%rdx
4370	mulxq	%r10,%r10,%rax
4371	addq	%r10,%r14
4372	mulxq	%r11,%r11,%r9
4373	adcq	%r11,%r15
4374	adcq	$0,%r9
4375	imulq	%r12,%rdx
4376	vpalignr	$4,%ymm5,%ymm5,%ymm5
4377	vpalignr	$8,%ymm9,%ymm9,%ymm9
4378	vpalignr	$12,%ymm13,%ymm13,%ymm13
4379	vpalignr	$4,%ymm4,%ymm4,%ymm4
4380	vpalignr	$8,%ymm8,%ymm8,%ymm8
4381	vpalignr	$12,%ymm12,%ymm12,%ymm12
4382	vmovdqa	%ymm8,0+128(%rbp)
4383	vmovdqa	L$rol16(%rip),%ymm8
4384	vpaddd	%ymm7,%ymm3,%ymm3
4385	vpaddd	%ymm6,%ymm2,%ymm2
4386	vpaddd	%ymm5,%ymm1,%ymm1
4387	vpaddd	%ymm4,%ymm0,%ymm0
4388	vpxor	%ymm3,%ymm15,%ymm15
4389	vpxor	%ymm2,%ymm14,%ymm14
4390	vpxor	%ymm1,%ymm13,%ymm13
4391	vpxor	%ymm0,%ymm12,%ymm12
4392	vpshufb	%ymm8,%ymm15,%ymm15
4393	vpshufb	%ymm8,%ymm14,%ymm14
4394	addq	%rax,%r15
4395	adcq	%rdx,%r9
4396	vpshufb	%ymm8,%ymm13,%ymm13
4397	vpshufb	%ymm8,%ymm12,%ymm12
4398	vpaddd	%ymm15,%ymm11,%ymm11
4399	vpaddd	%ymm14,%ymm10,%ymm10
4400	vpaddd	%ymm13,%ymm9,%ymm9
4401	vpaddd	0+128(%rbp),%ymm12,%ymm8
4402	vpxor	%ymm11,%ymm7,%ymm7
4403	vpxor	%ymm10,%ymm6,%ymm6
4404	vpxor	%ymm9,%ymm5,%ymm5
4405	movq	%r13,%r10
4406	movq	%r14,%r11
4407	movq	%r15,%r12
4408	andq	$3,%r12
4409	movq	%r15,%r13
4410	andq	$-4,%r13
4411	movq	%r9,%r14
4412	shrdq	$2,%r9,%r15
4413	shrq	$2,%r9
4414	addq	%r13,%r15
4415	adcq	%r14,%r9
4416	addq	%r15,%r10
4417	adcq	%r9,%r11
4418	adcq	$0,%r12
4419	vpxor	%ymm8,%ymm4,%ymm4
4420	vmovdqa	%ymm8,0+128(%rbp)
4421	vpsrld	$20,%ymm7,%ymm8
4422	vpslld	$32-20,%ymm7,%ymm7
4423	vpxor	%ymm8,%ymm7,%ymm7
4424	vpsrld	$20,%ymm6,%ymm8
4425	vpslld	$32-20,%ymm6,%ymm6
4426	vpxor	%ymm8,%ymm6,%ymm6
4427	addq	0+32(%rsi,%rcx,1),%r10
4428	adcq	8+32(%rsi,%rcx,1),%r11
4429	adcq	$1,%r12
4430
4431	leaq	48(%rcx),%rcx
4432	vpsrld	$20,%ymm5,%ymm8
4433	vpslld	$32-20,%ymm5,%ymm5
4434	vpxor	%ymm8,%ymm5,%ymm5
4435	vpsrld	$20,%ymm4,%ymm8
4436	vpslld	$32-20,%ymm4,%ymm4
4437	vpxor	%ymm8,%ymm4,%ymm4
4438	vmovdqa	L$rol8(%rip),%ymm8
4439	vpaddd	%ymm7,%ymm3,%ymm3
4440	vpaddd	%ymm6,%ymm2,%ymm2
4441	vpaddd	%ymm5,%ymm1,%ymm1
4442	vpaddd	%ymm4,%ymm0,%ymm0
4443	vpxor	%ymm3,%ymm15,%ymm15
4444	vpxor	%ymm2,%ymm14,%ymm14
4445	vpxor	%ymm1,%ymm13,%ymm13
4446	vpxor	%ymm0,%ymm12,%ymm12
4447	vpshufb	%ymm8,%ymm15,%ymm15
4448	vpshufb	%ymm8,%ymm14,%ymm14
4449	vpshufb	%ymm8,%ymm13,%ymm13
4450	movq	0+0+0(%rbp),%rdx
4451	movq	%rdx,%r15
4452	mulxq	%r10,%r13,%r14
4453	mulxq	%r11,%rax,%rdx
4454	imulq	%r12,%r15
4455	addq	%rax,%r14
4456	adcq	%rdx,%r15
4457	vpshufb	%ymm8,%ymm12,%ymm12
4458	vpaddd	%ymm15,%ymm11,%ymm11
4459	vpaddd	%ymm14,%ymm10,%ymm10
4460	vpaddd	%ymm13,%ymm9,%ymm9
4461	vpaddd	0+128(%rbp),%ymm12,%ymm8
4462	vpxor	%ymm11,%ymm7,%ymm7
4463	vpxor	%ymm10,%ymm6,%ymm6
4464	vpxor	%ymm9,%ymm5,%ymm5
4465	movq	8+0+0(%rbp),%rdx
4466	mulxq	%r10,%r10,%rax
4467	addq	%r10,%r14
4468	mulxq	%r11,%r11,%r9
4469	adcq	%r11,%r15
4470	adcq	$0,%r9
4471	imulq	%r12,%rdx
4472	vpxor	%ymm8,%ymm4,%ymm4
4473	vmovdqa	%ymm8,0+128(%rbp)
4474	vpsrld	$25,%ymm7,%ymm8
4475	vpslld	$32-25,%ymm7,%ymm7
4476	vpxor	%ymm8,%ymm7,%ymm7
4477	vpsrld	$25,%ymm6,%ymm8
4478	vpslld	$32-25,%ymm6,%ymm6
4479	vpxor	%ymm8,%ymm6,%ymm6
4480	addq	%rax,%r15
4481	adcq	%rdx,%r9
4482	vpsrld	$25,%ymm5,%ymm8
4483	vpslld	$32-25,%ymm5,%ymm5
4484	vpxor	%ymm8,%ymm5,%ymm5
4485	vpsrld	$25,%ymm4,%ymm8
4486	vpslld	$32-25,%ymm4,%ymm4
4487	vpxor	%ymm8,%ymm4,%ymm4
4488	vmovdqa	0+128(%rbp),%ymm8
4489	vpalignr	$12,%ymm7,%ymm7,%ymm7
4490	vpalignr	$8,%ymm11,%ymm11,%ymm11
4491	vpalignr	$4,%ymm15,%ymm15,%ymm15
4492	vpalignr	$12,%ymm6,%ymm6,%ymm6
4493	vpalignr	$8,%ymm10,%ymm10,%ymm10
4494	vpalignr	$4,%ymm14,%ymm14,%ymm14
4495	vpalignr	$12,%ymm5,%ymm5,%ymm5
4496	vpalignr	$8,%ymm9,%ymm9,%ymm9
4497	vpalignr	$4,%ymm13,%ymm13,%ymm13
4498	vpalignr	$12,%ymm4,%ymm4,%ymm4
4499	vpalignr	$8,%ymm8,%ymm8,%ymm8
4500	movq	%r13,%r10
4501	movq	%r14,%r11
4502	movq	%r15,%r12
4503	andq	$3,%r12
4504	movq	%r15,%r13
4505	andq	$-4,%r13
4506	movq	%r9,%r14
4507	shrdq	$2,%r9,%r15
4508	shrq	$2,%r9
4509	addq	%r13,%r15
4510	adcq	%r14,%r9
4511	addq	%r15,%r10
4512	adcq	%r9,%r11
4513	adcq	$0,%r12
4514	vpalignr	$4,%ymm12,%ymm12,%ymm12
4515
4516	cmpq	$60*8,%rcx
4517	jne	L$open_avx2_main_loop_rounds
4518	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
4519	vpaddd	0+64(%rbp),%ymm7,%ymm7
4520	vpaddd	0+96(%rbp),%ymm11,%ymm11
4521	vpaddd	0+256(%rbp),%ymm15,%ymm15
4522	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
4523	vpaddd	0+64(%rbp),%ymm6,%ymm6
4524	vpaddd	0+96(%rbp),%ymm10,%ymm10
4525	vpaddd	0+224(%rbp),%ymm14,%ymm14
4526	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
4527	vpaddd	0+64(%rbp),%ymm5,%ymm5
4528	vpaddd	0+96(%rbp),%ymm9,%ymm9
4529	vpaddd	0+192(%rbp),%ymm13,%ymm13
4530	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
4531	vpaddd	0+64(%rbp),%ymm4,%ymm4
4532	vpaddd	0+96(%rbp),%ymm8,%ymm8
4533	vpaddd	0+160(%rbp),%ymm12,%ymm12
4534
4535	vmovdqa	%ymm0,0+128(%rbp)
4536	addq	0+60*8(%rsi),%r10
4537	adcq	8+60*8(%rsi),%r11
4538	adcq	$1,%r12
4539	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
4540	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
4541	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
4542	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
4543	vpxor	0+0(%rsi),%ymm0,%ymm0
4544	vpxor	32+0(%rsi),%ymm3,%ymm3
4545	vpxor	64+0(%rsi),%ymm7,%ymm7
4546	vpxor	96+0(%rsi),%ymm11,%ymm11
4547	vmovdqu	%ymm0,0+0(%rdi)
4548	vmovdqu	%ymm3,32+0(%rdi)
4549	vmovdqu	%ymm7,64+0(%rdi)
4550	vmovdqu	%ymm11,96+0(%rdi)
4551
4552	vmovdqa	0+128(%rbp),%ymm0
4553	movq	0+0+0(%rbp),%rax
4554	movq	%rax,%r15
4555	mulq	%r10
4556	movq	%rax,%r13
4557	movq	%rdx,%r14
4558	movq	0+0+0(%rbp),%rax
4559	mulq	%r11
4560	imulq	%r12,%r15
4561	addq	%rax,%r14
4562	adcq	%rdx,%r15
4563	movq	8+0+0(%rbp),%rax
4564	movq	%rax,%r9
4565	mulq	%r10
4566	addq	%rax,%r14
4567	adcq	$0,%rdx
4568	movq	%rdx,%r10
4569	movq	8+0+0(%rbp),%rax
4570	mulq	%r11
4571	addq	%rax,%r15
4572	adcq	$0,%rdx
4573	imulq	%r12,%r9
4574	addq	%r10,%r15
4575	adcq	%rdx,%r9
4576	movq	%r13,%r10
4577	movq	%r14,%r11
4578	movq	%r15,%r12
4579	andq	$3,%r12
4580	movq	%r15,%r13
4581	andq	$-4,%r13
4582	movq	%r9,%r14
4583	shrdq	$2,%r9,%r15
4584	shrq	$2,%r9
4585	addq	%r13,%r15
4586	adcq	%r14,%r9
4587	addq	%r15,%r10
4588	adcq	%r9,%r11
4589	adcq	$0,%r12
4590	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
4591	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
4592	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
4593	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
4594	vpxor	0+128(%rsi),%ymm3,%ymm3
4595	vpxor	32+128(%rsi),%ymm2,%ymm2
4596	vpxor	64+128(%rsi),%ymm6,%ymm6
4597	vpxor	96+128(%rsi),%ymm10,%ymm10
4598	vmovdqu	%ymm3,0+128(%rdi)
4599	vmovdqu	%ymm2,32+128(%rdi)
4600	vmovdqu	%ymm6,64+128(%rdi)
4601	vmovdqu	%ymm10,96+128(%rdi)
4602	addq	0+60*8+16(%rsi),%r10
4603	adcq	8+60*8+16(%rsi),%r11
4604	adcq	$1,%r12
4605	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
4606	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
4607	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
4608	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
4609	vpxor	0+256(%rsi),%ymm3,%ymm3
4610	vpxor	32+256(%rsi),%ymm1,%ymm1
4611	vpxor	64+256(%rsi),%ymm5,%ymm5
4612	vpxor	96+256(%rsi),%ymm9,%ymm9
4613	vmovdqu	%ymm3,0+256(%rdi)
4614	vmovdqu	%ymm1,32+256(%rdi)
4615	vmovdqu	%ymm5,64+256(%rdi)
4616	vmovdqu	%ymm9,96+256(%rdi)
4617	movq	0+0+0(%rbp),%rax
4618	movq	%rax,%r15
4619	mulq	%r10
4620	movq	%rax,%r13
4621	movq	%rdx,%r14
4622	movq	0+0+0(%rbp),%rax
4623	mulq	%r11
4624	imulq	%r12,%r15
4625	addq	%rax,%r14
4626	adcq	%rdx,%r15
4627	movq	8+0+0(%rbp),%rax
4628	movq	%rax,%r9
4629	mulq	%r10
4630	addq	%rax,%r14
4631	adcq	$0,%rdx
4632	movq	%rdx,%r10
4633	movq	8+0+0(%rbp),%rax
4634	mulq	%r11
4635	addq	%rax,%r15
4636	adcq	$0,%rdx
4637	imulq	%r12,%r9
4638	addq	%r10,%r15
4639	adcq	%rdx,%r9
4640	movq	%r13,%r10
4641	movq	%r14,%r11
4642	movq	%r15,%r12
4643	andq	$3,%r12
4644	movq	%r15,%r13
4645	andq	$-4,%r13
4646	movq	%r9,%r14
4647	shrdq	$2,%r9,%r15
4648	shrq	$2,%r9
4649	addq	%r13,%r15
4650	adcq	%r14,%r9
4651	addq	%r15,%r10
4652	adcq	%r9,%r11
4653	adcq	$0,%r12
4654	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
4655	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
4656	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
4657	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
4658	vpxor	0+384(%rsi),%ymm3,%ymm3
4659	vpxor	32+384(%rsi),%ymm0,%ymm0
4660	vpxor	64+384(%rsi),%ymm4,%ymm4
4661	vpxor	96+384(%rsi),%ymm8,%ymm8
4662	vmovdqu	%ymm3,0+384(%rdi)
4663	vmovdqu	%ymm0,32+384(%rdi)
4664	vmovdqu	%ymm4,64+384(%rdi)
4665	vmovdqu	%ymm8,96+384(%rdi)
4666
4667	leaq	512(%rsi),%rsi
4668	leaq	512(%rdi),%rdi
4669	subq	$512,%rbx
4670	jmp	L$open_avx2_main_loop
4671L$open_avx2_main_loop_done:
4672	testq	%rbx,%rbx
4673	vzeroupper
4674	je	L$open_sse_finalize
4675
4676	cmpq	$384,%rbx
4677	ja	L$open_avx2_tail_512
4678	cmpq	$256,%rbx
4679	ja	L$open_avx2_tail_384
4680	cmpq	$128,%rbx
4681	ja	L$open_avx2_tail_256
4682	vmovdqa	L$chacha20_consts(%rip),%ymm0
4683	vmovdqa	0+64(%rbp),%ymm4
4684	vmovdqa	0+96(%rbp),%ymm8
4685	vmovdqa	L$avx2_inc(%rip),%ymm12
4686	vpaddd	0+160(%rbp),%ymm12,%ymm12
4687	vmovdqa	%ymm12,0+160(%rbp)
4688
4689	xorq	%r8,%r8
4690	movq	%rbx,%rcx
4691	andq	$-16,%rcx
4692	testq	%rcx,%rcx
4693	je	L$open_avx2_tail_128_rounds
4694L$open_avx2_tail_128_rounds_and_x1hash:
4695	addq	0+0(%rsi,%r8,1),%r10
4696	adcq	8+0(%rsi,%r8,1),%r11
4697	adcq	$1,%r12
4698	movq	0+0+0(%rbp),%rax
4699	movq	%rax,%r15
4700	mulq	%r10
4701	movq	%rax,%r13
4702	movq	%rdx,%r14
4703	movq	0+0+0(%rbp),%rax
4704	mulq	%r11
4705	imulq	%r12,%r15
4706	addq	%rax,%r14
4707	adcq	%rdx,%r15
4708	movq	8+0+0(%rbp),%rax
4709	movq	%rax,%r9
4710	mulq	%r10
4711	addq	%rax,%r14
4712	adcq	$0,%rdx
4713	movq	%rdx,%r10
4714	movq	8+0+0(%rbp),%rax
4715	mulq	%r11
4716	addq	%rax,%r15
4717	adcq	$0,%rdx
4718	imulq	%r12,%r9
4719	addq	%r10,%r15
4720	adcq	%rdx,%r9
4721	movq	%r13,%r10
4722	movq	%r14,%r11
4723	movq	%r15,%r12
4724	andq	$3,%r12
4725	movq	%r15,%r13
4726	andq	$-4,%r13
4727	movq	%r9,%r14
4728	shrdq	$2,%r9,%r15
4729	shrq	$2,%r9
4730	addq	%r13,%r15
4731	adcq	%r14,%r9
4732	addq	%r15,%r10
4733	adcq	%r9,%r11
4734	adcq	$0,%r12
4735
4736L$open_avx2_tail_128_rounds:
4737	addq	$16,%r8
4738	vpaddd	%ymm4,%ymm0,%ymm0
4739	vpxor	%ymm0,%ymm12,%ymm12
4740	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4741	vpaddd	%ymm12,%ymm8,%ymm8
4742	vpxor	%ymm8,%ymm4,%ymm4
4743	vpsrld	$20,%ymm4,%ymm3
4744	vpslld	$12,%ymm4,%ymm4
4745	vpxor	%ymm3,%ymm4,%ymm4
4746	vpaddd	%ymm4,%ymm0,%ymm0
4747	vpxor	%ymm0,%ymm12,%ymm12
4748	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4749	vpaddd	%ymm12,%ymm8,%ymm8
4750	vpxor	%ymm8,%ymm4,%ymm4
4751	vpslld	$7,%ymm4,%ymm3
4752	vpsrld	$25,%ymm4,%ymm4
4753	vpxor	%ymm3,%ymm4,%ymm4
4754	vpalignr	$12,%ymm12,%ymm12,%ymm12
4755	vpalignr	$8,%ymm8,%ymm8,%ymm8
4756	vpalignr	$4,%ymm4,%ymm4,%ymm4
4757	vpaddd	%ymm4,%ymm0,%ymm0
4758	vpxor	%ymm0,%ymm12,%ymm12
4759	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4760	vpaddd	%ymm12,%ymm8,%ymm8
4761	vpxor	%ymm8,%ymm4,%ymm4
4762	vpsrld	$20,%ymm4,%ymm3
4763	vpslld	$12,%ymm4,%ymm4
4764	vpxor	%ymm3,%ymm4,%ymm4
4765	vpaddd	%ymm4,%ymm0,%ymm0
4766	vpxor	%ymm0,%ymm12,%ymm12
4767	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4768	vpaddd	%ymm12,%ymm8,%ymm8
4769	vpxor	%ymm8,%ymm4,%ymm4
4770	vpslld	$7,%ymm4,%ymm3
4771	vpsrld	$25,%ymm4,%ymm4
4772	vpxor	%ymm3,%ymm4,%ymm4
4773	vpalignr	$4,%ymm12,%ymm12,%ymm12
4774	vpalignr	$8,%ymm8,%ymm8,%ymm8
4775	vpalignr	$12,%ymm4,%ymm4,%ymm4
4776
4777	cmpq	%rcx,%r8
4778	jb	L$open_avx2_tail_128_rounds_and_x1hash
4779	cmpq	$160,%r8
4780	jne	L$open_avx2_tail_128_rounds
4781	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
4782	vpaddd	0+64(%rbp),%ymm4,%ymm4
4783	vpaddd	0+96(%rbp),%ymm8,%ymm8
4784	vpaddd	0+160(%rbp),%ymm12,%ymm12
4785	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
4786	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
4787	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
4788	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
4789	vmovdqa	%ymm3,%ymm8
4790
4791	jmp	L$open_avx2_tail_128_xor
4792
4793L$open_avx2_tail_256:
4794	vmovdqa	L$chacha20_consts(%rip),%ymm0
4795	vmovdqa	0+64(%rbp),%ymm4
4796	vmovdqa	0+96(%rbp),%ymm8
4797	vmovdqa	%ymm0,%ymm1
4798	vmovdqa	%ymm4,%ymm5
4799	vmovdqa	%ymm8,%ymm9
4800	vmovdqa	L$avx2_inc(%rip),%ymm12
4801	vpaddd	0+160(%rbp),%ymm12,%ymm13
4802	vpaddd	%ymm13,%ymm12,%ymm12
4803	vmovdqa	%ymm12,0+160(%rbp)
4804	vmovdqa	%ymm13,0+192(%rbp)
4805
4806	movq	%rbx,0+128(%rbp)
4807	movq	%rbx,%rcx
4808	subq	$128,%rcx
4809	shrq	$4,%rcx
4810	movq	$10,%r8
4811	cmpq	$10,%rcx
4812	cmovgq	%r8,%rcx
4813	movq	%rsi,%rbx
4814	xorq	%r8,%r8
4815L$open_avx2_tail_256_rounds_and_x1hash:
4816	addq	0+0(%rbx),%r10
4817	adcq	8+0(%rbx),%r11
4818	adcq	$1,%r12
4819	movq	0+0+0(%rbp),%rdx
4820	movq	%rdx,%r15
4821	mulxq	%r10,%r13,%r14
4822	mulxq	%r11,%rax,%rdx
4823	imulq	%r12,%r15
4824	addq	%rax,%r14
4825	adcq	%rdx,%r15
4826	movq	8+0+0(%rbp),%rdx
4827	mulxq	%r10,%r10,%rax
4828	addq	%r10,%r14
4829	mulxq	%r11,%r11,%r9
4830	adcq	%r11,%r15
4831	adcq	$0,%r9
4832	imulq	%r12,%rdx
4833	addq	%rax,%r15
4834	adcq	%rdx,%r9
4835	movq	%r13,%r10
4836	movq	%r14,%r11
4837	movq	%r15,%r12
4838	andq	$3,%r12
4839	movq	%r15,%r13
4840	andq	$-4,%r13
4841	movq	%r9,%r14
4842	shrdq	$2,%r9,%r15
4843	shrq	$2,%r9
4844	addq	%r13,%r15
4845	adcq	%r14,%r9
4846	addq	%r15,%r10
4847	adcq	%r9,%r11
4848	adcq	$0,%r12
4849
4850	leaq	16(%rbx),%rbx
4851L$open_avx2_tail_256_rounds:
4852	vpaddd	%ymm4,%ymm0,%ymm0
4853	vpxor	%ymm0,%ymm12,%ymm12
4854	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4855	vpaddd	%ymm12,%ymm8,%ymm8
4856	vpxor	%ymm8,%ymm4,%ymm4
4857	vpsrld	$20,%ymm4,%ymm3
4858	vpslld	$12,%ymm4,%ymm4
4859	vpxor	%ymm3,%ymm4,%ymm4
4860	vpaddd	%ymm4,%ymm0,%ymm0
4861	vpxor	%ymm0,%ymm12,%ymm12
4862	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4863	vpaddd	%ymm12,%ymm8,%ymm8
4864	vpxor	%ymm8,%ymm4,%ymm4
4865	vpslld	$7,%ymm4,%ymm3
4866	vpsrld	$25,%ymm4,%ymm4
4867	vpxor	%ymm3,%ymm4,%ymm4
4868	vpalignr	$12,%ymm12,%ymm12,%ymm12
4869	vpalignr	$8,%ymm8,%ymm8,%ymm8
4870	vpalignr	$4,%ymm4,%ymm4,%ymm4
4871	vpaddd	%ymm5,%ymm1,%ymm1
4872	vpxor	%ymm1,%ymm13,%ymm13
4873	vpshufb	L$rol16(%rip),%ymm13,%ymm13
4874	vpaddd	%ymm13,%ymm9,%ymm9
4875	vpxor	%ymm9,%ymm5,%ymm5
4876	vpsrld	$20,%ymm5,%ymm3
4877	vpslld	$12,%ymm5,%ymm5
4878	vpxor	%ymm3,%ymm5,%ymm5
4879	vpaddd	%ymm5,%ymm1,%ymm1
4880	vpxor	%ymm1,%ymm13,%ymm13
4881	vpshufb	L$rol8(%rip),%ymm13,%ymm13
4882	vpaddd	%ymm13,%ymm9,%ymm9
4883	vpxor	%ymm9,%ymm5,%ymm5
4884	vpslld	$7,%ymm5,%ymm3
4885	vpsrld	$25,%ymm5,%ymm5
4886	vpxor	%ymm3,%ymm5,%ymm5
4887	vpalignr	$12,%ymm13,%ymm13,%ymm13
4888	vpalignr	$8,%ymm9,%ymm9,%ymm9
4889	vpalignr	$4,%ymm5,%ymm5,%ymm5
4890
4891	incq	%r8
4892	vpaddd	%ymm4,%ymm0,%ymm0
4893	vpxor	%ymm0,%ymm12,%ymm12
4894	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4895	vpaddd	%ymm12,%ymm8,%ymm8
4896	vpxor	%ymm8,%ymm4,%ymm4
4897	vpsrld	$20,%ymm4,%ymm3
4898	vpslld	$12,%ymm4,%ymm4
4899	vpxor	%ymm3,%ymm4,%ymm4
4900	vpaddd	%ymm4,%ymm0,%ymm0
4901	vpxor	%ymm0,%ymm12,%ymm12
4902	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4903	vpaddd	%ymm12,%ymm8,%ymm8
4904	vpxor	%ymm8,%ymm4,%ymm4
4905	vpslld	$7,%ymm4,%ymm3
4906	vpsrld	$25,%ymm4,%ymm4
4907	vpxor	%ymm3,%ymm4,%ymm4
4908	vpalignr	$4,%ymm12,%ymm12,%ymm12
4909	vpalignr	$8,%ymm8,%ymm8,%ymm8
4910	vpalignr	$12,%ymm4,%ymm4,%ymm4
4911	vpaddd	%ymm5,%ymm1,%ymm1
4912	vpxor	%ymm1,%ymm13,%ymm13
4913	vpshufb	L$rol16(%rip),%ymm13,%ymm13
4914	vpaddd	%ymm13,%ymm9,%ymm9
4915	vpxor	%ymm9,%ymm5,%ymm5
4916	vpsrld	$20,%ymm5,%ymm3
4917	vpslld	$12,%ymm5,%ymm5
4918	vpxor	%ymm3,%ymm5,%ymm5
4919	vpaddd	%ymm5,%ymm1,%ymm1
4920	vpxor	%ymm1,%ymm13,%ymm13
4921	vpshufb	L$rol8(%rip),%ymm13,%ymm13
4922	vpaddd	%ymm13,%ymm9,%ymm9
4923	vpxor	%ymm9,%ymm5,%ymm5
4924	vpslld	$7,%ymm5,%ymm3
4925	vpsrld	$25,%ymm5,%ymm5
4926	vpxor	%ymm3,%ymm5,%ymm5
4927	vpalignr	$4,%ymm13,%ymm13,%ymm13
4928	vpalignr	$8,%ymm9,%ymm9,%ymm9
4929	vpalignr	$12,%ymm5,%ymm5,%ymm5
4930	vpaddd	%ymm6,%ymm2,%ymm2
4931	vpxor	%ymm2,%ymm14,%ymm14
4932	vpshufb	L$rol16(%rip),%ymm14,%ymm14
4933	vpaddd	%ymm14,%ymm10,%ymm10
4934	vpxor	%ymm10,%ymm6,%ymm6
4935	vpsrld	$20,%ymm6,%ymm3
4936	vpslld	$12,%ymm6,%ymm6
4937	vpxor	%ymm3,%ymm6,%ymm6
4938	vpaddd	%ymm6,%ymm2,%ymm2
4939	vpxor	%ymm2,%ymm14,%ymm14
4940	vpshufb	L$rol8(%rip),%ymm14,%ymm14
4941	vpaddd	%ymm14,%ymm10,%ymm10
4942	vpxor	%ymm10,%ymm6,%ymm6
4943	vpslld	$7,%ymm6,%ymm3
4944	vpsrld	$25,%ymm6,%ymm6
4945	vpxor	%ymm3,%ymm6,%ymm6
4946	vpalignr	$4,%ymm14,%ymm14,%ymm14
4947	vpalignr	$8,%ymm10,%ymm10,%ymm10
4948	vpalignr	$12,%ymm6,%ymm6,%ymm6
4949
4950	cmpq	%rcx,%r8
4951	jb	L$open_avx2_tail_256_rounds_and_x1hash
4952	cmpq	$10,%r8
4953	jne	L$open_avx2_tail_256_rounds
4954	movq	%rbx,%r8
4955	subq	%rsi,%rbx
4956	movq	%rbx,%rcx
4957	movq	0+128(%rbp),%rbx
4958L$open_avx2_tail_256_hash:
4959	addq	$16,%rcx
4960	cmpq	%rbx,%rcx
4961	jg	L$open_avx2_tail_256_done
4962	addq	0+0(%r8),%r10
4963	adcq	8+0(%r8),%r11
4964	adcq	$1,%r12
4965	movq	0+0+0(%rbp),%rdx
4966	movq	%rdx,%r15
4967	mulxq	%r10,%r13,%r14
4968	mulxq	%r11,%rax,%rdx
4969	imulq	%r12,%r15
4970	addq	%rax,%r14
4971	adcq	%rdx,%r15
4972	movq	8+0+0(%rbp),%rdx
4973	mulxq	%r10,%r10,%rax
4974	addq	%r10,%r14
4975	mulxq	%r11,%r11,%r9
4976	adcq	%r11,%r15
4977	adcq	$0,%r9
4978	imulq	%r12,%rdx
4979	addq	%rax,%r15
4980	adcq	%rdx,%r9
4981	movq	%r13,%r10
4982	movq	%r14,%r11
4983	movq	%r15,%r12
4984	andq	$3,%r12
4985	movq	%r15,%r13
4986	andq	$-4,%r13
4987	movq	%r9,%r14
4988	shrdq	$2,%r9,%r15
4989	shrq	$2,%r9
4990	addq	%r13,%r15
4991	adcq	%r14,%r9
4992	addq	%r15,%r10
4993	adcq	%r9,%r11
4994	adcq	$0,%r12
4995
4996	leaq	16(%r8),%r8
4997	jmp	L$open_avx2_tail_256_hash
4998L$open_avx2_tail_256_done:
4999	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
5000	vpaddd	0+64(%rbp),%ymm5,%ymm5
5001	vpaddd	0+96(%rbp),%ymm9,%ymm9
5002	vpaddd	0+192(%rbp),%ymm13,%ymm13
5003	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
5004	vpaddd	0+64(%rbp),%ymm4,%ymm4
5005	vpaddd	0+96(%rbp),%ymm8,%ymm8
5006	vpaddd	0+160(%rbp),%ymm12,%ymm12
5007	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5008	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5009	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5010	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5011	vpxor	0+0(%rsi),%ymm3,%ymm3
5012	vpxor	32+0(%rsi),%ymm1,%ymm1
5013	vpxor	64+0(%rsi),%ymm5,%ymm5
5014	vpxor	96+0(%rsi),%ymm9,%ymm9
5015	vmovdqu	%ymm3,0+0(%rdi)
5016	vmovdqu	%ymm1,32+0(%rdi)
5017	vmovdqu	%ymm5,64+0(%rdi)
5018	vmovdqu	%ymm9,96+0(%rdi)
5019	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5020	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5021	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5022	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5023	vmovdqa	%ymm3,%ymm8
5024
5025	leaq	128(%rsi),%rsi
5026	leaq	128(%rdi),%rdi
5027	subq	$128,%rbx
5028	jmp	L$open_avx2_tail_128_xor
5029
5030L$open_avx2_tail_384:
5031	vmovdqa	L$chacha20_consts(%rip),%ymm0
5032	vmovdqa	0+64(%rbp),%ymm4
5033	vmovdqa	0+96(%rbp),%ymm8
5034	vmovdqa	%ymm0,%ymm1
5035	vmovdqa	%ymm4,%ymm5
5036	vmovdqa	%ymm8,%ymm9
5037	vmovdqa	%ymm0,%ymm2
5038	vmovdqa	%ymm4,%ymm6
5039	vmovdqa	%ymm8,%ymm10
5040	vmovdqa	L$avx2_inc(%rip),%ymm12
5041	vpaddd	0+160(%rbp),%ymm12,%ymm14
5042	vpaddd	%ymm14,%ymm12,%ymm13
5043	vpaddd	%ymm13,%ymm12,%ymm12
5044	vmovdqa	%ymm12,0+160(%rbp)
5045	vmovdqa	%ymm13,0+192(%rbp)
5046	vmovdqa	%ymm14,0+224(%rbp)
5047
5048	movq	%rbx,0+128(%rbp)
5049	movq	%rbx,%rcx
5050	subq	$256,%rcx
5051	shrq	$4,%rcx
5052	addq	$6,%rcx
5053	movq	$10,%r8
5054	cmpq	$10,%rcx
5055	cmovgq	%r8,%rcx
5056	movq	%rsi,%rbx
5057	xorq	%r8,%r8
5058L$open_avx2_tail_384_rounds_and_x2hash:
5059	addq	0+0(%rbx),%r10
5060	adcq	8+0(%rbx),%r11
5061	adcq	$1,%r12
5062	movq	0+0+0(%rbp),%rdx
5063	movq	%rdx,%r15
5064	mulxq	%r10,%r13,%r14
5065	mulxq	%r11,%rax,%rdx
5066	imulq	%r12,%r15
5067	addq	%rax,%r14
5068	adcq	%rdx,%r15
5069	movq	8+0+0(%rbp),%rdx
5070	mulxq	%r10,%r10,%rax
5071	addq	%r10,%r14
5072	mulxq	%r11,%r11,%r9
5073	adcq	%r11,%r15
5074	adcq	$0,%r9
5075	imulq	%r12,%rdx
5076	addq	%rax,%r15
5077	adcq	%rdx,%r9
5078	movq	%r13,%r10
5079	movq	%r14,%r11
5080	movq	%r15,%r12
5081	andq	$3,%r12
5082	movq	%r15,%r13
5083	andq	$-4,%r13
5084	movq	%r9,%r14
5085	shrdq	$2,%r9,%r15
5086	shrq	$2,%r9
5087	addq	%r13,%r15
5088	adcq	%r14,%r9
5089	addq	%r15,%r10
5090	adcq	%r9,%r11
5091	adcq	$0,%r12
5092
5093	leaq	16(%rbx),%rbx
5094L$open_avx2_tail_384_rounds_and_x1hash:
5095	vpaddd	%ymm6,%ymm2,%ymm2
5096	vpxor	%ymm2,%ymm14,%ymm14
5097	vpshufb	L$rol16(%rip),%ymm14,%ymm14
5098	vpaddd	%ymm14,%ymm10,%ymm10
5099	vpxor	%ymm10,%ymm6,%ymm6
5100	vpsrld	$20,%ymm6,%ymm3
5101	vpslld	$12,%ymm6,%ymm6
5102	vpxor	%ymm3,%ymm6,%ymm6
5103	vpaddd	%ymm6,%ymm2,%ymm2
5104	vpxor	%ymm2,%ymm14,%ymm14
5105	vpshufb	L$rol8(%rip),%ymm14,%ymm14
5106	vpaddd	%ymm14,%ymm10,%ymm10
5107	vpxor	%ymm10,%ymm6,%ymm6
5108	vpslld	$7,%ymm6,%ymm3
5109	vpsrld	$25,%ymm6,%ymm6
5110	vpxor	%ymm3,%ymm6,%ymm6
5111	vpalignr	$12,%ymm14,%ymm14,%ymm14
5112	vpalignr	$8,%ymm10,%ymm10,%ymm10
5113	vpalignr	$4,%ymm6,%ymm6,%ymm6
5114	vpaddd	%ymm5,%ymm1,%ymm1
5115	vpxor	%ymm1,%ymm13,%ymm13
5116	vpshufb	L$rol16(%rip),%ymm13,%ymm13
5117	vpaddd	%ymm13,%ymm9,%ymm9
5118	vpxor	%ymm9,%ymm5,%ymm5
5119	vpsrld	$20,%ymm5,%ymm3
5120	vpslld	$12,%ymm5,%ymm5
5121	vpxor	%ymm3,%ymm5,%ymm5
5122	vpaddd	%ymm5,%ymm1,%ymm1
5123	vpxor	%ymm1,%ymm13,%ymm13
5124	vpshufb	L$rol8(%rip),%ymm13,%ymm13
5125	vpaddd	%ymm13,%ymm9,%ymm9
5126	vpxor	%ymm9,%ymm5,%ymm5
5127	vpslld	$7,%ymm5,%ymm3
5128	vpsrld	$25,%ymm5,%ymm5
5129	vpxor	%ymm3,%ymm5,%ymm5
5130	vpalignr	$12,%ymm13,%ymm13,%ymm13
5131	vpalignr	$8,%ymm9,%ymm9,%ymm9
5132	vpalignr	$4,%ymm5,%ymm5,%ymm5
5133	vpaddd	%ymm4,%ymm0,%ymm0
5134	vpxor	%ymm0,%ymm12,%ymm12
5135	vpshufb	L$rol16(%rip),%ymm12,%ymm12
5136	vpaddd	%ymm12,%ymm8,%ymm8
5137	vpxor	%ymm8,%ymm4,%ymm4
5138	vpsrld	$20,%ymm4,%ymm3
5139	vpslld	$12,%ymm4,%ymm4
5140	vpxor	%ymm3,%ymm4,%ymm4
5141	vpaddd	%ymm4,%ymm0,%ymm0
5142	vpxor	%ymm0,%ymm12,%ymm12
5143	vpshufb	L$rol8(%rip),%ymm12,%ymm12
5144	vpaddd	%ymm12,%ymm8,%ymm8
5145	vpxor	%ymm8,%ymm4,%ymm4
5146	vpslld	$7,%ymm4,%ymm3
5147	vpsrld	$25,%ymm4,%ymm4
5148	vpxor	%ymm3,%ymm4,%ymm4
5149	vpalignr	$12,%ymm12,%ymm12,%ymm12
5150	vpalignr	$8,%ymm8,%ymm8,%ymm8
5151	vpalignr	$4,%ymm4,%ymm4,%ymm4
5152	addq	0+0(%rbx),%r10
5153	adcq	8+0(%rbx),%r11
5154	adcq	$1,%r12
5155	movq	0+0+0(%rbp),%rax
5156	movq	%rax,%r15
5157	mulq	%r10
5158	movq	%rax,%r13
5159	movq	%rdx,%r14
5160	movq	0+0+0(%rbp),%rax
5161	mulq	%r11
5162	imulq	%r12,%r15
5163	addq	%rax,%r14
5164	adcq	%rdx,%r15
5165	movq	8+0+0(%rbp),%rax
5166	movq	%rax,%r9
5167	mulq	%r10
5168	addq	%rax,%r14
5169	adcq	$0,%rdx
5170	movq	%rdx,%r10
5171	movq	8+0+0(%rbp),%rax
5172	mulq	%r11
5173	addq	%rax,%r15
5174	adcq	$0,%rdx
5175	imulq	%r12,%r9
5176	addq	%r10,%r15
5177	adcq	%rdx,%r9
5178	movq	%r13,%r10
5179	movq	%r14,%r11
5180	movq	%r15,%r12
5181	andq	$3,%r12
5182	movq	%r15,%r13
5183	andq	$-4,%r13
5184	movq	%r9,%r14
5185	shrdq	$2,%r9,%r15
5186	shrq	$2,%r9
5187	addq	%r13,%r15
5188	adcq	%r14,%r9
5189	addq	%r15,%r10
5190	adcq	%r9,%r11
5191	adcq	$0,%r12
5192
5193	leaq	16(%rbx),%rbx
5194	incq	%r8
5195	vpaddd	%ymm6,%ymm2,%ymm2
5196	vpxor	%ymm2,%ymm14,%ymm14
5197	vpshufb	L$rol16(%rip),%ymm14,%ymm14
5198	vpaddd	%ymm14,%ymm10,%ymm10
5199	vpxor	%ymm10,%ymm6,%ymm6
5200	vpsrld	$20,%ymm6,%ymm3
5201	vpslld	$12,%ymm6,%ymm6
5202	vpxor	%ymm3,%ymm6,%ymm6
5203	vpaddd	%ymm6,%ymm2,%ymm2
5204	vpxor	%ymm2,%ymm14,%ymm14
5205	vpshufb	L$rol8(%rip),%ymm14,%ymm14
5206	vpaddd	%ymm14,%ymm10,%ymm10
5207	vpxor	%ymm10,%ymm6,%ymm6
5208	vpslld	$7,%ymm6,%ymm3
5209	vpsrld	$25,%ymm6,%ymm6
5210	vpxor	%ymm3,%ymm6,%ymm6
5211	vpalignr	$4,%ymm14,%ymm14,%ymm14
5212	vpalignr	$8,%ymm10,%ymm10,%ymm10
5213	vpalignr	$12,%ymm6,%ymm6,%ymm6
5214	vpaddd	%ymm5,%ymm1,%ymm1
5215	vpxor	%ymm1,%ymm13,%ymm13
5216	vpshufb	L$rol16(%rip),%ymm13,%ymm13
5217	vpaddd	%ymm13,%ymm9,%ymm9
5218	vpxor	%ymm9,%ymm5,%ymm5
5219	vpsrld	$20,%ymm5,%ymm3
5220	vpslld	$12,%ymm5,%ymm5
5221	vpxor	%ymm3,%ymm5,%ymm5
5222	vpaddd	%ymm5,%ymm1,%ymm1
5223	vpxor	%ymm1,%ymm13,%ymm13
5224	vpshufb	L$rol8(%rip),%ymm13,%ymm13
5225	vpaddd	%ymm13,%ymm9,%ymm9
5226	vpxor	%ymm9,%ymm5,%ymm5
5227	vpslld	$7,%ymm5,%ymm3
5228	vpsrld	$25,%ymm5,%ymm5
5229	vpxor	%ymm3,%ymm5,%ymm5
5230	vpalignr	$4,%ymm13,%ymm13,%ymm13
5231	vpalignr	$8,%ymm9,%ymm9,%ymm9
5232	vpalignr	$12,%ymm5,%ymm5,%ymm5
5233	vpaddd	%ymm4,%ymm0,%ymm0
5234	vpxor	%ymm0,%ymm12,%ymm12
5235	vpshufb	L$rol16(%rip),%ymm12,%ymm12
5236	vpaddd	%ymm12,%ymm8,%ymm8
5237	vpxor	%ymm8,%ymm4,%ymm4
5238	vpsrld	$20,%ymm4,%ymm3
5239	vpslld	$12,%ymm4,%ymm4
5240	vpxor	%ymm3,%ymm4,%ymm4
5241	vpaddd	%ymm4,%ymm0,%ymm0
5242	vpxor	%ymm0,%ymm12,%ymm12
5243	vpshufb	L$rol8(%rip),%ymm12,%ymm12
5244	vpaddd	%ymm12,%ymm8,%ymm8
5245	vpxor	%ymm8,%ymm4,%ymm4
5246	vpslld	$7,%ymm4,%ymm3
5247	vpsrld	$25,%ymm4,%ymm4
5248	vpxor	%ymm3,%ymm4,%ymm4
5249	vpalignr	$4,%ymm12,%ymm12,%ymm12
5250	vpalignr	$8,%ymm8,%ymm8,%ymm8
5251	vpalignr	$12,%ymm4,%ymm4,%ymm4
5252
5253	cmpq	%rcx,%r8
5254	jb	L$open_avx2_tail_384_rounds_and_x2hash
5255	cmpq	$10,%r8
5256	jne	L$open_avx2_tail_384_rounds_and_x1hash
5257	movq	%rbx,%r8
5258	subq	%rsi,%rbx
5259	movq	%rbx,%rcx
5260	movq	0+128(%rbp),%rbx
5261L$open_avx2_384_tail_hash:
5262	addq	$16,%rcx
5263	cmpq	%rbx,%rcx
5264	jg	L$open_avx2_384_tail_done
5265	addq	0+0(%r8),%r10
5266	adcq	8+0(%r8),%r11
5267	adcq	$1,%r12
5268	movq	0+0+0(%rbp),%rdx
5269	movq	%rdx,%r15
5270	mulxq	%r10,%r13,%r14
5271	mulxq	%r11,%rax,%rdx
5272	imulq	%r12,%r15
5273	addq	%rax,%r14
5274	adcq	%rdx,%r15
5275	movq	8+0+0(%rbp),%rdx
5276	mulxq	%r10,%r10,%rax
5277	addq	%r10,%r14
5278	mulxq	%r11,%r11,%r9
5279	adcq	%r11,%r15
5280	adcq	$0,%r9
5281	imulq	%r12,%rdx
5282	addq	%rax,%r15
5283	adcq	%rdx,%r9
5284	movq	%r13,%r10
5285	movq	%r14,%r11
5286	movq	%r15,%r12
5287	andq	$3,%r12
5288	movq	%r15,%r13
5289	andq	$-4,%r13
5290	movq	%r9,%r14
5291	shrdq	$2,%r9,%r15
5292	shrq	$2,%r9
5293	addq	%r13,%r15
5294	adcq	%r14,%r9
5295	addq	%r15,%r10
5296	adcq	%r9,%r11
5297	adcq	$0,%r12
5298
5299	leaq	16(%r8),%r8
5300	jmp	L$open_avx2_384_tail_hash
5301L$open_avx2_384_tail_done:
5302	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
5303	vpaddd	0+64(%rbp),%ymm6,%ymm6
5304	vpaddd	0+96(%rbp),%ymm10,%ymm10
5305	vpaddd	0+224(%rbp),%ymm14,%ymm14
5306	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
5307	vpaddd	0+64(%rbp),%ymm5,%ymm5
5308	vpaddd	0+96(%rbp),%ymm9,%ymm9
5309	vpaddd	0+192(%rbp),%ymm13,%ymm13
5310	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
5311	vpaddd	0+64(%rbp),%ymm4,%ymm4
5312	vpaddd	0+96(%rbp),%ymm8,%ymm8
5313	vpaddd	0+160(%rbp),%ymm12,%ymm12
5314	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
5315	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
5316	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
5317	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
5318	vpxor	0+0(%rsi),%ymm3,%ymm3
5319	vpxor	32+0(%rsi),%ymm2,%ymm2
5320	vpxor	64+0(%rsi),%ymm6,%ymm6
5321	vpxor	96+0(%rsi),%ymm10,%ymm10
5322	vmovdqu	%ymm3,0+0(%rdi)
5323	vmovdqu	%ymm2,32+0(%rdi)
5324	vmovdqu	%ymm6,64+0(%rdi)
5325	vmovdqu	%ymm10,96+0(%rdi)
5326	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5327	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5328	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5329	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5330	vpxor	0+128(%rsi),%ymm3,%ymm3
5331	vpxor	32+128(%rsi),%ymm1,%ymm1
5332	vpxor	64+128(%rsi),%ymm5,%ymm5
5333	vpxor	96+128(%rsi),%ymm9,%ymm9
5334	vmovdqu	%ymm3,0+128(%rdi)
5335	vmovdqu	%ymm1,32+128(%rdi)
5336	vmovdqu	%ymm5,64+128(%rdi)
5337	vmovdqu	%ymm9,96+128(%rdi)
5338	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5339	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5340	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5341	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5342	vmovdqa	%ymm3,%ymm8
5343
5344	leaq	256(%rsi),%rsi
5345	leaq	256(%rdi),%rdi
5346	subq	$256,%rbx
5347	jmp	L$open_avx2_tail_128_xor
5348
5349L$open_avx2_tail_512:
5350	vmovdqa	L$chacha20_consts(%rip),%ymm0
5351	vmovdqa	0+64(%rbp),%ymm4
5352	vmovdqa	0+96(%rbp),%ymm8
5353	vmovdqa	%ymm0,%ymm1
5354	vmovdqa	%ymm4,%ymm5
5355	vmovdqa	%ymm8,%ymm9
5356	vmovdqa	%ymm0,%ymm2
5357	vmovdqa	%ymm4,%ymm6
5358	vmovdqa	%ymm8,%ymm10
5359	vmovdqa	%ymm0,%ymm3
5360	vmovdqa	%ymm4,%ymm7
5361	vmovdqa	%ymm8,%ymm11
5362	vmovdqa	L$avx2_inc(%rip),%ymm12
5363	vpaddd	0+160(%rbp),%ymm12,%ymm15
5364	vpaddd	%ymm15,%ymm12,%ymm14
5365	vpaddd	%ymm14,%ymm12,%ymm13
5366	vpaddd	%ymm13,%ymm12,%ymm12
5367	vmovdqa	%ymm15,0+256(%rbp)
5368	vmovdqa	%ymm14,0+224(%rbp)
5369	vmovdqa	%ymm13,0+192(%rbp)
5370	vmovdqa	%ymm12,0+160(%rbp)
5371
5372	xorq	%rcx,%rcx
5373	movq	%rsi,%r8
5374L$open_avx2_tail_512_rounds_and_x2hash:
5375	addq	0+0(%r8),%r10
5376	adcq	8+0(%r8),%r11
5377	adcq	$1,%r12
5378	movq	0+0+0(%rbp),%rax
5379	movq	%rax,%r15
5380	mulq	%r10
5381	movq	%rax,%r13
5382	movq	%rdx,%r14
5383	movq	0+0+0(%rbp),%rax
5384	mulq	%r11
5385	imulq	%r12,%r15
5386	addq	%rax,%r14
5387	adcq	%rdx,%r15
5388	movq	8+0+0(%rbp),%rax
5389	movq	%rax,%r9
5390	mulq	%r10
5391	addq	%rax,%r14
5392	adcq	$0,%rdx
5393	movq	%rdx,%r10
5394	movq	8+0+0(%rbp),%rax
5395	mulq	%r11
5396	addq	%rax,%r15
5397	adcq	$0,%rdx
5398	imulq	%r12,%r9
5399	addq	%r10,%r15
5400	adcq	%rdx,%r9
5401	movq	%r13,%r10
5402	movq	%r14,%r11
5403	movq	%r15,%r12
5404	andq	$3,%r12
5405	movq	%r15,%r13
5406	andq	$-4,%r13
5407	movq	%r9,%r14
5408	shrdq	$2,%r9,%r15
5409	shrq	$2,%r9
5410	addq	%r13,%r15
5411	adcq	%r14,%r9
5412	addq	%r15,%r10
5413	adcq	%r9,%r11
5414	adcq	$0,%r12
5415
5416	leaq	16(%r8),%r8
5417L$open_avx2_tail_512_rounds_and_x1hash:
5418	vmovdqa	%ymm8,0+128(%rbp)
5419	vmovdqa	L$rol16(%rip),%ymm8
5420	vpaddd	%ymm7,%ymm3,%ymm3
5421	vpaddd	%ymm6,%ymm2,%ymm2
5422	vpaddd	%ymm5,%ymm1,%ymm1
5423	vpaddd	%ymm4,%ymm0,%ymm0
5424	vpxor	%ymm3,%ymm15,%ymm15
5425	vpxor	%ymm2,%ymm14,%ymm14
5426	vpxor	%ymm1,%ymm13,%ymm13
5427	vpxor	%ymm0,%ymm12,%ymm12
5428	vpshufb	%ymm8,%ymm15,%ymm15
5429	vpshufb	%ymm8,%ymm14,%ymm14
5430	vpshufb	%ymm8,%ymm13,%ymm13
5431	vpshufb	%ymm8,%ymm12,%ymm12
5432	vpaddd	%ymm15,%ymm11,%ymm11
5433	vpaddd	%ymm14,%ymm10,%ymm10
5434	vpaddd	%ymm13,%ymm9,%ymm9
5435	vpaddd	0+128(%rbp),%ymm12,%ymm8
5436	vpxor	%ymm11,%ymm7,%ymm7
5437	vpxor	%ymm10,%ymm6,%ymm6
5438	vpxor	%ymm9,%ymm5,%ymm5
5439	vpxor	%ymm8,%ymm4,%ymm4
5440	vmovdqa	%ymm8,0+128(%rbp)
5441	vpsrld	$20,%ymm7,%ymm8
5442	vpslld	$32-20,%ymm7,%ymm7
5443	vpxor	%ymm8,%ymm7,%ymm7
5444	vpsrld	$20,%ymm6,%ymm8
5445	vpslld	$32-20,%ymm6,%ymm6
5446	vpxor	%ymm8,%ymm6,%ymm6
5447	vpsrld	$20,%ymm5,%ymm8
5448	vpslld	$32-20,%ymm5,%ymm5
5449	vpxor	%ymm8,%ymm5,%ymm5
5450	vpsrld	$20,%ymm4,%ymm8
5451	vpslld	$32-20,%ymm4,%ymm4
5452	vpxor	%ymm8,%ymm4,%ymm4
5453	vmovdqa	L$rol8(%rip),%ymm8
5454	vpaddd	%ymm7,%ymm3,%ymm3
5455	addq	0+0(%r8),%r10
5456	adcq	8+0(%r8),%r11
5457	adcq	$1,%r12
5458	movq	0+0+0(%rbp),%rdx
5459	movq	%rdx,%r15
5460	mulxq	%r10,%r13,%r14
5461	mulxq	%r11,%rax,%rdx
5462	imulq	%r12,%r15
5463	addq	%rax,%r14
5464	adcq	%rdx,%r15
5465	movq	8+0+0(%rbp),%rdx
5466	mulxq	%r10,%r10,%rax
5467	addq	%r10,%r14
5468	mulxq	%r11,%r11,%r9
5469	adcq	%r11,%r15
5470	adcq	$0,%r9
5471	imulq	%r12,%rdx
5472	addq	%rax,%r15
5473	adcq	%rdx,%r9
5474	movq	%r13,%r10
5475	movq	%r14,%r11
5476	movq	%r15,%r12
5477	andq	$3,%r12
5478	movq	%r15,%r13
5479	andq	$-4,%r13
5480	movq	%r9,%r14
5481	shrdq	$2,%r9,%r15
5482	shrq	$2,%r9
5483	addq	%r13,%r15
5484	adcq	%r14,%r9
5485	addq	%r15,%r10
5486	adcq	%r9,%r11
5487	adcq	$0,%r12
5488	vpaddd	%ymm6,%ymm2,%ymm2
5489	vpaddd	%ymm5,%ymm1,%ymm1
5490	vpaddd	%ymm4,%ymm0,%ymm0
5491	vpxor	%ymm3,%ymm15,%ymm15
5492	vpxor	%ymm2,%ymm14,%ymm14
5493	vpxor	%ymm1,%ymm13,%ymm13
5494	vpxor	%ymm0,%ymm12,%ymm12
5495	vpshufb	%ymm8,%ymm15,%ymm15
5496	vpshufb	%ymm8,%ymm14,%ymm14
5497	vpshufb	%ymm8,%ymm13,%ymm13
5498	vpshufb	%ymm8,%ymm12,%ymm12
5499	vpaddd	%ymm15,%ymm11,%ymm11
5500	vpaddd	%ymm14,%ymm10,%ymm10
5501	vpaddd	%ymm13,%ymm9,%ymm9
5502	vpaddd	0+128(%rbp),%ymm12,%ymm8
5503	vpxor	%ymm11,%ymm7,%ymm7
5504	vpxor	%ymm10,%ymm6,%ymm6
5505	vpxor	%ymm9,%ymm5,%ymm5
5506	vpxor	%ymm8,%ymm4,%ymm4
5507	vmovdqa	%ymm8,0+128(%rbp)
5508	vpsrld	$25,%ymm7,%ymm8
5509	vpslld	$32-25,%ymm7,%ymm7
5510	vpxor	%ymm8,%ymm7,%ymm7
5511	vpsrld	$25,%ymm6,%ymm8
5512	vpslld	$32-25,%ymm6,%ymm6
5513	vpxor	%ymm8,%ymm6,%ymm6
5514	vpsrld	$25,%ymm5,%ymm8
5515	vpslld	$32-25,%ymm5,%ymm5
5516	vpxor	%ymm8,%ymm5,%ymm5
5517	vpsrld	$25,%ymm4,%ymm8
5518	vpslld	$32-25,%ymm4,%ymm4
5519	vpxor	%ymm8,%ymm4,%ymm4
5520	vmovdqa	0+128(%rbp),%ymm8
5521	vpalignr	$4,%ymm7,%ymm7,%ymm7
5522	vpalignr	$8,%ymm11,%ymm11,%ymm11
5523	vpalignr	$12,%ymm15,%ymm15,%ymm15
5524	vpalignr	$4,%ymm6,%ymm6,%ymm6
5525	vpalignr	$8,%ymm10,%ymm10,%ymm10
5526	vpalignr	$12,%ymm14,%ymm14,%ymm14
5527	vpalignr	$4,%ymm5,%ymm5,%ymm5
5528	vpalignr	$8,%ymm9,%ymm9,%ymm9
5529	vpalignr	$12,%ymm13,%ymm13,%ymm13
5530	vpalignr	$4,%ymm4,%ymm4,%ymm4
5531	vpalignr	$8,%ymm8,%ymm8,%ymm8
5532	vpalignr	$12,%ymm12,%ymm12,%ymm12
5533	vmovdqa	%ymm8,0+128(%rbp)
5534	vmovdqa	L$rol16(%rip),%ymm8
5535	vpaddd	%ymm7,%ymm3,%ymm3
5536	addq	0+16(%r8),%r10
5537	adcq	8+16(%r8),%r11
5538	adcq	$1,%r12
5539	movq	0+0+0(%rbp),%rdx
5540	movq	%rdx,%r15
5541	mulxq	%r10,%r13,%r14
5542	mulxq	%r11,%rax,%rdx
5543	imulq	%r12,%r15
5544	addq	%rax,%r14
5545	adcq	%rdx,%r15
5546	movq	8+0+0(%rbp),%rdx
5547	mulxq	%r10,%r10,%rax
5548	addq	%r10,%r14
5549	mulxq	%r11,%r11,%r9
5550	adcq	%r11,%r15
5551	adcq	$0,%r9
5552	imulq	%r12,%rdx
5553	addq	%rax,%r15
5554	adcq	%rdx,%r9
5555	movq	%r13,%r10
5556	movq	%r14,%r11
5557	movq	%r15,%r12
5558	andq	$3,%r12
5559	movq	%r15,%r13
5560	andq	$-4,%r13
5561	movq	%r9,%r14
5562	shrdq	$2,%r9,%r15
5563	shrq	$2,%r9
5564	addq	%r13,%r15
5565	adcq	%r14,%r9
5566	addq	%r15,%r10
5567	adcq	%r9,%r11
5568	adcq	$0,%r12
5569
5570	leaq	32(%r8),%r8
5571	vpaddd	%ymm6,%ymm2,%ymm2
5572	vpaddd	%ymm5,%ymm1,%ymm1
5573	vpaddd	%ymm4,%ymm0,%ymm0
5574	vpxor	%ymm3,%ymm15,%ymm15
5575	vpxor	%ymm2,%ymm14,%ymm14
5576	vpxor	%ymm1,%ymm13,%ymm13
5577	vpxor	%ymm0,%ymm12,%ymm12
5578	vpshufb	%ymm8,%ymm15,%ymm15
5579	vpshufb	%ymm8,%ymm14,%ymm14
5580	vpshufb	%ymm8,%ymm13,%ymm13
5581	vpshufb	%ymm8,%ymm12,%ymm12
5582	vpaddd	%ymm15,%ymm11,%ymm11
5583	vpaddd	%ymm14,%ymm10,%ymm10
5584	vpaddd	%ymm13,%ymm9,%ymm9
5585	vpaddd	0+128(%rbp),%ymm12,%ymm8
5586	vpxor	%ymm11,%ymm7,%ymm7
5587	vpxor	%ymm10,%ymm6,%ymm6
5588	vpxor	%ymm9,%ymm5,%ymm5
5589	vpxor	%ymm8,%ymm4,%ymm4
5590	vmovdqa	%ymm8,0+128(%rbp)
5591	vpsrld	$20,%ymm7,%ymm8
5592	vpslld	$32-20,%ymm7,%ymm7
5593	vpxor	%ymm8,%ymm7,%ymm7
5594	vpsrld	$20,%ymm6,%ymm8
5595	vpslld	$32-20,%ymm6,%ymm6
5596	vpxor	%ymm8,%ymm6,%ymm6
5597	vpsrld	$20,%ymm5,%ymm8
5598	vpslld	$32-20,%ymm5,%ymm5
5599	vpxor	%ymm8,%ymm5,%ymm5
5600	vpsrld	$20,%ymm4,%ymm8
5601	vpslld	$32-20,%ymm4,%ymm4
5602	vpxor	%ymm8,%ymm4,%ymm4
5603	vmovdqa	L$rol8(%rip),%ymm8
5604	vpaddd	%ymm7,%ymm3,%ymm3
5605	vpaddd	%ymm6,%ymm2,%ymm2
5606	vpaddd	%ymm5,%ymm1,%ymm1
5607	vpaddd	%ymm4,%ymm0,%ymm0
5608	vpxor	%ymm3,%ymm15,%ymm15
5609	vpxor	%ymm2,%ymm14,%ymm14
5610	vpxor	%ymm1,%ymm13,%ymm13
5611	vpxor	%ymm0,%ymm12,%ymm12
5612	vpshufb	%ymm8,%ymm15,%ymm15
5613	vpshufb	%ymm8,%ymm14,%ymm14
5614	vpshufb	%ymm8,%ymm13,%ymm13
5615	vpshufb	%ymm8,%ymm12,%ymm12
5616	vpaddd	%ymm15,%ymm11,%ymm11
5617	vpaddd	%ymm14,%ymm10,%ymm10
5618	vpaddd	%ymm13,%ymm9,%ymm9
5619	vpaddd	0+128(%rbp),%ymm12,%ymm8
5620	vpxor	%ymm11,%ymm7,%ymm7
5621	vpxor	%ymm10,%ymm6,%ymm6
5622	vpxor	%ymm9,%ymm5,%ymm5
5623	vpxor	%ymm8,%ymm4,%ymm4
5624	vmovdqa	%ymm8,0+128(%rbp)
5625	vpsrld	$25,%ymm7,%ymm8
5626	vpslld	$32-25,%ymm7,%ymm7
5627	vpxor	%ymm8,%ymm7,%ymm7
5628	vpsrld	$25,%ymm6,%ymm8
5629	vpslld	$32-25,%ymm6,%ymm6
5630	vpxor	%ymm8,%ymm6,%ymm6
5631	vpsrld	$25,%ymm5,%ymm8
5632	vpslld	$32-25,%ymm5,%ymm5
5633	vpxor	%ymm8,%ymm5,%ymm5
5634	vpsrld	$25,%ymm4,%ymm8
5635	vpslld	$32-25,%ymm4,%ymm4
5636	vpxor	%ymm8,%ymm4,%ymm4
5637	vmovdqa	0+128(%rbp),%ymm8
5638	vpalignr	$12,%ymm7,%ymm7,%ymm7
5639	vpalignr	$8,%ymm11,%ymm11,%ymm11
5640	vpalignr	$4,%ymm15,%ymm15,%ymm15
5641	vpalignr	$12,%ymm6,%ymm6,%ymm6
5642	vpalignr	$8,%ymm10,%ymm10,%ymm10
5643	vpalignr	$4,%ymm14,%ymm14,%ymm14
5644	vpalignr	$12,%ymm5,%ymm5,%ymm5
5645	vpalignr	$8,%ymm9,%ymm9,%ymm9
5646	vpalignr	$4,%ymm13,%ymm13,%ymm13
5647	vpalignr	$12,%ymm4,%ymm4,%ymm4
5648	vpalignr	$8,%ymm8,%ymm8,%ymm8
5649	vpalignr	$4,%ymm12,%ymm12,%ymm12
5650
5651	incq	%rcx
5652	cmpq	$4,%rcx
5653	jl	L$open_avx2_tail_512_rounds_and_x2hash
5654	cmpq	$10,%rcx
5655	jne	L$open_avx2_tail_512_rounds_and_x1hash
5656	movq	%rbx,%rcx
5657	subq	$384,%rcx
5658	andq	$-16,%rcx
5659L$open_avx2_tail_512_hash:
5660	testq	%rcx,%rcx
5661	je	L$open_avx2_tail_512_done
5662	addq	0+0(%r8),%r10
5663	adcq	8+0(%r8),%r11
5664	adcq	$1,%r12
5665	movq	0+0+0(%rbp),%rdx
5666	movq	%rdx,%r15
5667	mulxq	%r10,%r13,%r14
5668	mulxq	%r11,%rax,%rdx
5669	imulq	%r12,%r15
5670	addq	%rax,%r14
5671	adcq	%rdx,%r15
5672	movq	8+0+0(%rbp),%rdx
5673	mulxq	%r10,%r10,%rax
5674	addq	%r10,%r14
5675	mulxq	%r11,%r11,%r9
5676	adcq	%r11,%r15
5677	adcq	$0,%r9
5678	imulq	%r12,%rdx
5679	addq	%rax,%r15
5680	adcq	%rdx,%r9
5681	movq	%r13,%r10
5682	movq	%r14,%r11
5683	movq	%r15,%r12
5684	andq	$3,%r12
5685	movq	%r15,%r13
5686	andq	$-4,%r13
5687	movq	%r9,%r14
5688	shrdq	$2,%r9,%r15
5689	shrq	$2,%r9
5690	addq	%r13,%r15
5691	adcq	%r14,%r9
5692	addq	%r15,%r10
5693	adcq	%r9,%r11
5694	adcq	$0,%r12
5695
5696	leaq	16(%r8),%r8
5697	subq	$16,%rcx
5698	jmp	L$open_avx2_tail_512_hash
5699L$open_avx2_tail_512_done:
5700	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
5701	vpaddd	0+64(%rbp),%ymm7,%ymm7
5702	vpaddd	0+96(%rbp),%ymm11,%ymm11
5703	vpaddd	0+256(%rbp),%ymm15,%ymm15
5704	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
5705	vpaddd	0+64(%rbp),%ymm6,%ymm6
5706	vpaddd	0+96(%rbp),%ymm10,%ymm10
5707	vpaddd	0+224(%rbp),%ymm14,%ymm14
5708	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
5709	vpaddd	0+64(%rbp),%ymm5,%ymm5
5710	vpaddd	0+96(%rbp),%ymm9,%ymm9
5711	vpaddd	0+192(%rbp),%ymm13,%ymm13
5712	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
5713	vpaddd	0+64(%rbp),%ymm4,%ymm4
5714	vpaddd	0+96(%rbp),%ymm8,%ymm8
5715	vpaddd	0+160(%rbp),%ymm12,%ymm12
5716
5717	vmovdqa	%ymm0,0+128(%rbp)
5718	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
5719	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
5720	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
5721	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
5722	vpxor	0+0(%rsi),%ymm0,%ymm0
5723	vpxor	32+0(%rsi),%ymm3,%ymm3
5724	vpxor	64+0(%rsi),%ymm7,%ymm7
5725	vpxor	96+0(%rsi),%ymm11,%ymm11
5726	vmovdqu	%ymm0,0+0(%rdi)
5727	vmovdqu	%ymm3,32+0(%rdi)
5728	vmovdqu	%ymm7,64+0(%rdi)
5729	vmovdqu	%ymm11,96+0(%rdi)
5730
5731	vmovdqa	0+128(%rbp),%ymm0
5732	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
5733	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
5734	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
5735	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
5736	vpxor	0+128(%rsi),%ymm3,%ymm3
5737	vpxor	32+128(%rsi),%ymm2,%ymm2
5738	vpxor	64+128(%rsi),%ymm6,%ymm6
5739	vpxor	96+128(%rsi),%ymm10,%ymm10
5740	vmovdqu	%ymm3,0+128(%rdi)
5741	vmovdqu	%ymm2,32+128(%rdi)
5742	vmovdqu	%ymm6,64+128(%rdi)
5743	vmovdqu	%ymm10,96+128(%rdi)
5744	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5745	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5746	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5747	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5748	vpxor	0+256(%rsi),%ymm3,%ymm3
5749	vpxor	32+256(%rsi),%ymm1,%ymm1
5750	vpxor	64+256(%rsi),%ymm5,%ymm5
5751	vpxor	96+256(%rsi),%ymm9,%ymm9
5752	vmovdqu	%ymm3,0+256(%rdi)
5753	vmovdqu	%ymm1,32+256(%rdi)
5754	vmovdqu	%ymm5,64+256(%rdi)
5755	vmovdqu	%ymm9,96+256(%rdi)
5756	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5757	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5758	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5759	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5760	vmovdqa	%ymm3,%ymm8
5761
5762	leaq	384(%rsi),%rsi
5763	leaq	384(%rdi),%rdi
5764	subq	$384,%rbx
5765L$open_avx2_tail_128_xor:
5766	cmpq	$32,%rbx
5767	jb	L$open_avx2_tail_32_xor
5768	subq	$32,%rbx
5769	vpxor	(%rsi),%ymm0,%ymm0
5770	vmovdqu	%ymm0,(%rdi)
5771	leaq	32(%rsi),%rsi
5772	leaq	32(%rdi),%rdi
5773	vmovdqa	%ymm4,%ymm0
5774	vmovdqa	%ymm8,%ymm4
5775	vmovdqa	%ymm12,%ymm8
5776	jmp	L$open_avx2_tail_128_xor
5777L$open_avx2_tail_32_xor:
5778	cmpq	$16,%rbx
5779	vmovdqa	%xmm0,%xmm1
5780	jb	L$open_avx2_exit
5781	subq	$16,%rbx
5782
5783	vpxor	(%rsi),%xmm0,%xmm1
5784	vmovdqu	%xmm1,(%rdi)
5785	leaq	16(%rsi),%rsi
5786	leaq	16(%rdi),%rdi
5787	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
5788	vmovdqa	%xmm0,%xmm1
5789L$open_avx2_exit:
5790	vzeroupper
5791	jmp	L$open_sse_tail_16
5792
5793L$open_avx2_192:
5794	vmovdqa	%ymm0,%ymm1
5795	vmovdqa	%ymm0,%ymm2
5796	vmovdqa	%ymm4,%ymm5
5797	vmovdqa	%ymm4,%ymm6
5798	vmovdqa	%ymm8,%ymm9
5799	vmovdqa	%ymm8,%ymm10
5800	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
5801	vmovdqa	%ymm12,%ymm11
5802	vmovdqa	%ymm13,%ymm15
5803	movq	$10,%r10
5804L$open_avx2_192_rounds:
5805	vpaddd	%ymm4,%ymm0,%ymm0
5806	vpxor	%ymm0,%ymm12,%ymm12
5807	vpshufb	L$rol16(%rip),%ymm12,%ymm12
5808	vpaddd	%ymm12,%ymm8,%ymm8
5809	vpxor	%ymm8,%ymm4,%ymm4
5810	vpsrld	$20,%ymm4,%ymm3
5811	vpslld	$12,%ymm4,%ymm4
5812	vpxor	%ymm3,%ymm4,%ymm4
5813	vpaddd	%ymm4,%ymm0,%ymm0
5814	vpxor	%ymm0,%ymm12,%ymm12
5815	vpshufb	L$rol8(%rip),%ymm12,%ymm12
5816	vpaddd	%ymm12,%ymm8,%ymm8
5817	vpxor	%ymm8,%ymm4,%ymm4
5818	vpslld	$7,%ymm4,%ymm3
5819	vpsrld	$25,%ymm4,%ymm4
5820	vpxor	%ymm3,%ymm4,%ymm4
5821	vpalignr	$12,%ymm12,%ymm12,%ymm12
5822	vpalignr	$8,%ymm8,%ymm8,%ymm8
5823	vpalignr	$4,%ymm4,%ymm4,%ymm4
5824	vpaddd	%ymm5,%ymm1,%ymm1
5825	vpxor	%ymm1,%ymm13,%ymm13
5826	vpshufb	L$rol16(%rip),%ymm13,%ymm13
5827	vpaddd	%ymm13,%ymm9,%ymm9
5828	vpxor	%ymm9,%ymm5,%ymm5
5829	vpsrld	$20,%ymm5,%ymm3
5830	vpslld	$12,%ymm5,%ymm5
5831	vpxor	%ymm3,%ymm5,%ymm5
5832	vpaddd	%ymm5,%ymm1,%ymm1
5833	vpxor	%ymm1,%ymm13,%ymm13
5834	vpshufb	L$rol8(%rip),%ymm13,%ymm13
5835	vpaddd	%ymm13,%ymm9,%ymm9
5836	vpxor	%ymm9,%ymm5,%ymm5
5837	vpslld	$7,%ymm5,%ymm3
5838	vpsrld	$25,%ymm5,%ymm5
5839	vpxor	%ymm3,%ymm5,%ymm5
5840	vpalignr	$12,%ymm13,%ymm13,%ymm13
5841	vpalignr	$8,%ymm9,%ymm9,%ymm9
5842	vpalignr	$4,%ymm5,%ymm5,%ymm5
5843	vpaddd	%ymm4,%ymm0,%ymm0
5844	vpxor	%ymm0,%ymm12,%ymm12
5845	vpshufb	L$rol16(%rip),%ymm12,%ymm12
5846	vpaddd	%ymm12,%ymm8,%ymm8
5847	vpxor	%ymm8,%ymm4,%ymm4
5848	vpsrld	$20,%ymm4,%ymm3
5849	vpslld	$12,%ymm4,%ymm4
5850	vpxor	%ymm3,%ymm4,%ymm4
5851	vpaddd	%ymm4,%ymm0,%ymm0
5852	vpxor	%ymm0,%ymm12,%ymm12
5853	vpshufb	L$rol8(%rip),%ymm12,%ymm12
5854	vpaddd	%ymm12,%ymm8,%ymm8
5855	vpxor	%ymm8,%ymm4,%ymm4
5856	vpslld	$7,%ymm4,%ymm3
5857	vpsrld	$25,%ymm4,%ymm4
5858	vpxor	%ymm3,%ymm4,%ymm4
5859	vpalignr	$4,%ymm12,%ymm12,%ymm12
5860	vpalignr	$8,%ymm8,%ymm8,%ymm8
5861	vpalignr	$12,%ymm4,%ymm4,%ymm4
5862	vpaddd	%ymm5,%ymm1,%ymm1
5863	vpxor	%ymm1,%ymm13,%ymm13
5864	vpshufb	L$rol16(%rip),%ymm13,%ymm13
5865	vpaddd	%ymm13,%ymm9,%ymm9
5866	vpxor	%ymm9,%ymm5,%ymm5
5867	vpsrld	$20,%ymm5,%ymm3
5868	vpslld	$12,%ymm5,%ymm5
5869	vpxor	%ymm3,%ymm5,%ymm5
5870	vpaddd	%ymm5,%ymm1,%ymm1
5871	vpxor	%ymm1,%ymm13,%ymm13
5872	vpshufb	L$rol8(%rip),%ymm13,%ymm13
5873	vpaddd	%ymm13,%ymm9,%ymm9
5874	vpxor	%ymm9,%ymm5,%ymm5
5875	vpslld	$7,%ymm5,%ymm3
5876	vpsrld	$25,%ymm5,%ymm5
5877	vpxor	%ymm3,%ymm5,%ymm5
5878	vpalignr	$4,%ymm13,%ymm13,%ymm13
5879	vpalignr	$8,%ymm9,%ymm9,%ymm9
5880	vpalignr	$12,%ymm5,%ymm5,%ymm5
5881
5882	decq	%r10
5883	jne	L$open_avx2_192_rounds
5884	vpaddd	%ymm2,%ymm0,%ymm0
5885	vpaddd	%ymm2,%ymm1,%ymm1
5886	vpaddd	%ymm6,%ymm4,%ymm4
5887	vpaddd	%ymm6,%ymm5,%ymm5
5888	vpaddd	%ymm10,%ymm8,%ymm8
5889	vpaddd	%ymm10,%ymm9,%ymm9
5890	vpaddd	%ymm11,%ymm12,%ymm12
5891	vpaddd	%ymm15,%ymm13,%ymm13
5892	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
5893
5894	vpand	L$clamp(%rip),%ymm3,%ymm3
5895	vmovdqa	%ymm3,0+0(%rbp)
5896
5897	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
5898	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
5899	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
5900	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
5901	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
5902	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
5903L$open_avx2_short:
5904	movq	%r8,%r8
5905	call	poly_hash_ad_internal
5906L$open_avx2_short_hash_and_xor_loop:
5907	cmpq	$32,%rbx
5908	jb	L$open_avx2_short_tail_32
5909	subq	$32,%rbx
5910	addq	0+0(%rsi),%r10
5911	adcq	8+0(%rsi),%r11
5912	adcq	$1,%r12
5913	movq	0+0+0(%rbp),%rax
5914	movq	%rax,%r15
5915	mulq	%r10
5916	movq	%rax,%r13
5917	movq	%rdx,%r14
5918	movq	0+0+0(%rbp),%rax
5919	mulq	%r11
5920	imulq	%r12,%r15
5921	addq	%rax,%r14
5922	adcq	%rdx,%r15
5923	movq	8+0+0(%rbp),%rax
5924	movq	%rax,%r9
5925	mulq	%r10
5926	addq	%rax,%r14
5927	adcq	$0,%rdx
5928	movq	%rdx,%r10
5929	movq	8+0+0(%rbp),%rax
5930	mulq	%r11
5931	addq	%rax,%r15
5932	adcq	$0,%rdx
5933	imulq	%r12,%r9
5934	addq	%r10,%r15
5935	adcq	%rdx,%r9
5936	movq	%r13,%r10
5937	movq	%r14,%r11
5938	movq	%r15,%r12
5939	andq	$3,%r12
5940	movq	%r15,%r13
5941	andq	$-4,%r13
5942	movq	%r9,%r14
5943	shrdq	$2,%r9,%r15
5944	shrq	$2,%r9
5945	addq	%r13,%r15
5946	adcq	%r14,%r9
5947	addq	%r15,%r10
5948	adcq	%r9,%r11
5949	adcq	$0,%r12
5950	addq	0+16(%rsi),%r10
5951	adcq	8+16(%rsi),%r11
5952	adcq	$1,%r12
5953	movq	0+0+0(%rbp),%rax
5954	movq	%rax,%r15
5955	mulq	%r10
5956	movq	%rax,%r13
5957	movq	%rdx,%r14
5958	movq	0+0+0(%rbp),%rax
5959	mulq	%r11
5960	imulq	%r12,%r15
5961	addq	%rax,%r14
5962	adcq	%rdx,%r15
5963	movq	8+0+0(%rbp),%rax
5964	movq	%rax,%r9
5965	mulq	%r10
5966	addq	%rax,%r14
5967	adcq	$0,%rdx
5968	movq	%rdx,%r10
5969	movq	8+0+0(%rbp),%rax
5970	mulq	%r11
5971	addq	%rax,%r15
5972	adcq	$0,%rdx
5973	imulq	%r12,%r9
5974	addq	%r10,%r15
5975	adcq	%rdx,%r9
5976	movq	%r13,%r10
5977	movq	%r14,%r11
5978	movq	%r15,%r12
5979	andq	$3,%r12
5980	movq	%r15,%r13
5981	andq	$-4,%r13
5982	movq	%r9,%r14
5983	shrdq	$2,%r9,%r15
5984	shrq	$2,%r9
5985	addq	%r13,%r15
5986	adcq	%r14,%r9
5987	addq	%r15,%r10
5988	adcq	%r9,%r11
5989	adcq	$0,%r12
5990
5991
5992	vpxor	(%rsi),%ymm0,%ymm0
5993	vmovdqu	%ymm0,(%rdi)
5994	leaq	32(%rsi),%rsi
5995	leaq	32(%rdi),%rdi
5996
5997	vmovdqa	%ymm4,%ymm0
5998	vmovdqa	%ymm8,%ymm4
5999	vmovdqa	%ymm12,%ymm8
6000	vmovdqa	%ymm1,%ymm12
6001	vmovdqa	%ymm5,%ymm1
6002	vmovdqa	%ymm9,%ymm5
6003	vmovdqa	%ymm13,%ymm9
6004	vmovdqa	%ymm2,%ymm13
6005	vmovdqa	%ymm6,%ymm2
6006	jmp	L$open_avx2_short_hash_and_xor_loop
6007L$open_avx2_short_tail_32:
6008	cmpq	$16,%rbx
6009	vmovdqa	%xmm0,%xmm1
6010	jb	L$open_avx2_short_tail_32_exit
6011	subq	$16,%rbx
6012	addq	0+0(%rsi),%r10
6013	adcq	8+0(%rsi),%r11
6014	adcq	$1,%r12
6015	movq	0+0+0(%rbp),%rax
6016	movq	%rax,%r15
6017	mulq	%r10
6018	movq	%rax,%r13
6019	movq	%rdx,%r14
6020	movq	0+0+0(%rbp),%rax
6021	mulq	%r11
6022	imulq	%r12,%r15
6023	addq	%rax,%r14
6024	adcq	%rdx,%r15
6025	movq	8+0+0(%rbp),%rax
6026	movq	%rax,%r9
6027	mulq	%r10
6028	addq	%rax,%r14
6029	adcq	$0,%rdx
6030	movq	%rdx,%r10
6031	movq	8+0+0(%rbp),%rax
6032	mulq	%r11
6033	addq	%rax,%r15
6034	adcq	$0,%rdx
6035	imulq	%r12,%r9
6036	addq	%r10,%r15
6037	adcq	%rdx,%r9
6038	movq	%r13,%r10
6039	movq	%r14,%r11
6040	movq	%r15,%r12
6041	andq	$3,%r12
6042	movq	%r15,%r13
6043	andq	$-4,%r13
6044	movq	%r9,%r14
6045	shrdq	$2,%r9,%r15
6046	shrq	$2,%r9
6047	addq	%r13,%r15
6048	adcq	%r14,%r9
6049	addq	%r15,%r10
6050	adcq	%r9,%r11
6051	adcq	$0,%r12
6052
6053	vpxor	(%rsi),%xmm0,%xmm3
6054	vmovdqu	%xmm3,(%rdi)
6055	leaq	16(%rsi),%rsi
6056	leaq	16(%rdi),%rdi
6057	vextracti128	$1,%ymm0,%xmm1
6058L$open_avx2_short_tail_32_exit:
6059	vzeroupper
6060	jmp	L$open_sse_tail_16
6061
6062L$open_avx2_320:
6063	vmovdqa	%ymm0,%ymm1
6064	vmovdqa	%ymm0,%ymm2
6065	vmovdqa	%ymm4,%ymm5
6066	vmovdqa	%ymm4,%ymm6
6067	vmovdqa	%ymm8,%ymm9
6068	vmovdqa	%ymm8,%ymm10
6069	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
6070	vpaddd	L$avx2_inc(%rip),%ymm13,%ymm14
6071	vmovdqa	%ymm4,%ymm7
6072	vmovdqa	%ymm8,%ymm11
6073	vmovdqa	%ymm12,0+160(%rbp)
6074	vmovdqa	%ymm13,0+192(%rbp)
6075	vmovdqa	%ymm14,0+224(%rbp)
6076	movq	$10,%r10
6077L$open_avx2_320_rounds:
6078	vpaddd	%ymm4,%ymm0,%ymm0
6079	vpxor	%ymm0,%ymm12,%ymm12
6080	vpshufb	L$rol16(%rip),%ymm12,%ymm12
6081	vpaddd	%ymm12,%ymm8,%ymm8
6082	vpxor	%ymm8,%ymm4,%ymm4
6083	vpsrld	$20,%ymm4,%ymm3
6084	vpslld	$12,%ymm4,%ymm4
6085	vpxor	%ymm3,%ymm4,%ymm4
6086	vpaddd	%ymm4,%ymm0,%ymm0
6087	vpxor	%ymm0,%ymm12,%ymm12
6088	vpshufb	L$rol8(%rip),%ymm12,%ymm12
6089	vpaddd	%ymm12,%ymm8,%ymm8
6090	vpxor	%ymm8,%ymm4,%ymm4
6091	vpslld	$7,%ymm4,%ymm3
6092	vpsrld	$25,%ymm4,%ymm4
6093	vpxor	%ymm3,%ymm4,%ymm4
6094	vpalignr	$12,%ymm12,%ymm12,%ymm12
6095	vpalignr	$8,%ymm8,%ymm8,%ymm8
6096	vpalignr	$4,%ymm4,%ymm4,%ymm4
6097	vpaddd	%ymm5,%ymm1,%ymm1
6098	vpxor	%ymm1,%ymm13,%ymm13
6099	vpshufb	L$rol16(%rip),%ymm13,%ymm13
6100	vpaddd	%ymm13,%ymm9,%ymm9
6101	vpxor	%ymm9,%ymm5,%ymm5
6102	vpsrld	$20,%ymm5,%ymm3
6103	vpslld	$12,%ymm5,%ymm5
6104	vpxor	%ymm3,%ymm5,%ymm5
6105	vpaddd	%ymm5,%ymm1,%ymm1
6106	vpxor	%ymm1,%ymm13,%ymm13
6107	vpshufb	L$rol8(%rip),%ymm13,%ymm13
6108	vpaddd	%ymm13,%ymm9,%ymm9
6109	vpxor	%ymm9,%ymm5,%ymm5
6110	vpslld	$7,%ymm5,%ymm3
6111	vpsrld	$25,%ymm5,%ymm5
6112	vpxor	%ymm3,%ymm5,%ymm5
6113	vpalignr	$12,%ymm13,%ymm13,%ymm13
6114	vpalignr	$8,%ymm9,%ymm9,%ymm9
6115	vpalignr	$4,%ymm5,%ymm5,%ymm5
6116	vpaddd	%ymm6,%ymm2,%ymm2
6117	vpxor	%ymm2,%ymm14,%ymm14
6118	vpshufb	L$rol16(%rip),%ymm14,%ymm14
6119	vpaddd	%ymm14,%ymm10,%ymm10
6120	vpxor	%ymm10,%ymm6,%ymm6
6121	vpsrld	$20,%ymm6,%ymm3
6122	vpslld	$12,%ymm6,%ymm6
6123	vpxor	%ymm3,%ymm6,%ymm6
6124	vpaddd	%ymm6,%ymm2,%ymm2
6125	vpxor	%ymm2,%ymm14,%ymm14
6126	vpshufb	L$rol8(%rip),%ymm14,%ymm14
6127	vpaddd	%ymm14,%ymm10,%ymm10
6128	vpxor	%ymm10,%ymm6,%ymm6
6129	vpslld	$7,%ymm6,%ymm3
6130	vpsrld	$25,%ymm6,%ymm6
6131	vpxor	%ymm3,%ymm6,%ymm6
6132	vpalignr	$12,%ymm14,%ymm14,%ymm14
6133	vpalignr	$8,%ymm10,%ymm10,%ymm10
6134	vpalignr	$4,%ymm6,%ymm6,%ymm6
6135	vpaddd	%ymm4,%ymm0,%ymm0
6136	vpxor	%ymm0,%ymm12,%ymm12
6137	vpshufb	L$rol16(%rip),%ymm12,%ymm12
6138	vpaddd	%ymm12,%ymm8,%ymm8
6139	vpxor	%ymm8,%ymm4,%ymm4
6140	vpsrld	$20,%ymm4,%ymm3
6141	vpslld	$12,%ymm4,%ymm4
6142	vpxor	%ymm3,%ymm4,%ymm4
6143	vpaddd	%ymm4,%ymm0,%ymm0
6144	vpxor	%ymm0,%ymm12,%ymm12
6145	vpshufb	L$rol8(%rip),%ymm12,%ymm12
6146	vpaddd	%ymm12,%ymm8,%ymm8
6147	vpxor	%ymm8,%ymm4,%ymm4
6148	vpslld	$7,%ymm4,%ymm3
6149	vpsrld	$25,%ymm4,%ymm4
6150	vpxor	%ymm3,%ymm4,%ymm4
6151	vpalignr	$4,%ymm12,%ymm12,%ymm12
6152	vpalignr	$8,%ymm8,%ymm8,%ymm8
6153	vpalignr	$12,%ymm4,%ymm4,%ymm4
6154	vpaddd	%ymm5,%ymm1,%ymm1
6155	vpxor	%ymm1,%ymm13,%ymm13
6156	vpshufb	L$rol16(%rip),%ymm13,%ymm13
6157	vpaddd	%ymm13,%ymm9,%ymm9
6158	vpxor	%ymm9,%ymm5,%ymm5
6159	vpsrld	$20,%ymm5,%ymm3
6160	vpslld	$12,%ymm5,%ymm5
6161	vpxor	%ymm3,%ymm5,%ymm5
6162	vpaddd	%ymm5,%ymm1,%ymm1
6163	vpxor	%ymm1,%ymm13,%ymm13
6164	vpshufb	L$rol8(%rip),%ymm13,%ymm13
6165	vpaddd	%ymm13,%ymm9,%ymm9
6166	vpxor	%ymm9,%ymm5,%ymm5
6167	vpslld	$7,%ymm5,%ymm3
6168	vpsrld	$25,%ymm5,%ymm5
6169	vpxor	%ymm3,%ymm5,%ymm5
6170	vpalignr	$4,%ymm13,%ymm13,%ymm13
6171	vpalignr	$8,%ymm9,%ymm9,%ymm9
6172	vpalignr	$12,%ymm5,%ymm5,%ymm5
6173	vpaddd	%ymm6,%ymm2,%ymm2
6174	vpxor	%ymm2,%ymm14,%ymm14
6175	vpshufb	L$rol16(%rip),%ymm14,%ymm14
6176	vpaddd	%ymm14,%ymm10,%ymm10
6177	vpxor	%ymm10,%ymm6,%ymm6
6178	vpsrld	$20,%ymm6,%ymm3
6179	vpslld	$12,%ymm6,%ymm6
6180	vpxor	%ymm3,%ymm6,%ymm6
6181	vpaddd	%ymm6,%ymm2,%ymm2
6182	vpxor	%ymm2,%ymm14,%ymm14
6183	vpshufb	L$rol8(%rip),%ymm14,%ymm14
6184	vpaddd	%ymm14,%ymm10,%ymm10
6185	vpxor	%ymm10,%ymm6,%ymm6
6186	vpslld	$7,%ymm6,%ymm3
6187	vpsrld	$25,%ymm6,%ymm6
6188	vpxor	%ymm3,%ymm6,%ymm6
6189	vpalignr	$4,%ymm14,%ymm14,%ymm14
6190	vpalignr	$8,%ymm10,%ymm10,%ymm10
6191	vpalignr	$12,%ymm6,%ymm6,%ymm6
6192
6193	decq	%r10
6194	jne	L$open_avx2_320_rounds
6195	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
6196	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
6197	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
6198	vpaddd	%ymm7,%ymm4,%ymm4
6199	vpaddd	%ymm7,%ymm5,%ymm5
6200	vpaddd	%ymm7,%ymm6,%ymm6
6201	vpaddd	%ymm11,%ymm8,%ymm8
6202	vpaddd	%ymm11,%ymm9,%ymm9
6203	vpaddd	%ymm11,%ymm10,%ymm10
6204	vpaddd	0+160(%rbp),%ymm12,%ymm12
6205	vpaddd	0+192(%rbp),%ymm13,%ymm13
6206	vpaddd	0+224(%rbp),%ymm14,%ymm14
6207	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
6208
6209	vpand	L$clamp(%rip),%ymm3,%ymm3
6210	vmovdqa	%ymm3,0+0(%rbp)
6211
6212	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
6213	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
6214	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
6215	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
6216	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
6217	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
6218	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
6219	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
6220	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
6221	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
6222	jmp	L$open_avx2_short
6223
6224
6225
6226
6227
6228.p2align	6
6229chacha20_poly1305_seal_avx2:
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242	vzeroupper
6243	vmovdqa	L$chacha20_consts(%rip),%ymm0
6244	vbroadcasti128	0(%r9),%ymm4
6245	vbroadcasti128	16(%r9),%ymm8
6246	vbroadcasti128	32(%r9),%ymm12
6247	vpaddd	L$avx2_init(%rip),%ymm12,%ymm12
6248	cmpq	$192,%rbx
6249	jbe	L$seal_avx2_192
6250	cmpq	$320,%rbx
6251	jbe	L$seal_avx2_320
6252	vmovdqa	%ymm0,%ymm1
6253	vmovdqa	%ymm0,%ymm2
6254	vmovdqa	%ymm0,%ymm3
6255	vmovdqa	%ymm4,%ymm5
6256	vmovdqa	%ymm4,%ymm6
6257	vmovdqa	%ymm4,%ymm7
6258	vmovdqa	%ymm4,0+64(%rbp)
6259	vmovdqa	%ymm8,%ymm9
6260	vmovdqa	%ymm8,%ymm10
6261	vmovdqa	%ymm8,%ymm11
6262	vmovdqa	%ymm8,0+96(%rbp)
6263	vmovdqa	%ymm12,%ymm15
6264	vpaddd	L$avx2_inc(%rip),%ymm15,%ymm14
6265	vpaddd	L$avx2_inc(%rip),%ymm14,%ymm13
6266	vpaddd	L$avx2_inc(%rip),%ymm13,%ymm12
6267	vmovdqa	%ymm12,0+160(%rbp)
6268	vmovdqa	%ymm13,0+192(%rbp)
6269	vmovdqa	%ymm14,0+224(%rbp)
6270	vmovdqa	%ymm15,0+256(%rbp)
6271	movq	$10,%r10
6272L$seal_avx2_init_rounds:
6273	vmovdqa	%ymm8,0+128(%rbp)
6274	vmovdqa	L$rol16(%rip),%ymm8
6275	vpaddd	%ymm7,%ymm3,%ymm3
6276	vpaddd	%ymm6,%ymm2,%ymm2
6277	vpaddd	%ymm5,%ymm1,%ymm1
6278	vpaddd	%ymm4,%ymm0,%ymm0
6279	vpxor	%ymm3,%ymm15,%ymm15
6280	vpxor	%ymm2,%ymm14,%ymm14
6281	vpxor	%ymm1,%ymm13,%ymm13
6282	vpxor	%ymm0,%ymm12,%ymm12
6283	vpshufb	%ymm8,%ymm15,%ymm15
6284	vpshufb	%ymm8,%ymm14,%ymm14
6285	vpshufb	%ymm8,%ymm13,%ymm13
6286	vpshufb	%ymm8,%ymm12,%ymm12
6287	vpaddd	%ymm15,%ymm11,%ymm11
6288	vpaddd	%ymm14,%ymm10,%ymm10
6289	vpaddd	%ymm13,%ymm9,%ymm9
6290	vpaddd	0+128(%rbp),%ymm12,%ymm8
6291	vpxor	%ymm11,%ymm7,%ymm7
6292	vpxor	%ymm10,%ymm6,%ymm6
6293	vpxor	%ymm9,%ymm5,%ymm5
6294	vpxor	%ymm8,%ymm4,%ymm4
6295	vmovdqa	%ymm8,0+128(%rbp)
6296	vpsrld	$20,%ymm7,%ymm8
6297	vpslld	$32-20,%ymm7,%ymm7
6298	vpxor	%ymm8,%ymm7,%ymm7
6299	vpsrld	$20,%ymm6,%ymm8
6300	vpslld	$32-20,%ymm6,%ymm6
6301	vpxor	%ymm8,%ymm6,%ymm6
6302	vpsrld	$20,%ymm5,%ymm8
6303	vpslld	$32-20,%ymm5,%ymm5
6304	vpxor	%ymm8,%ymm5,%ymm5
6305	vpsrld	$20,%ymm4,%ymm8
6306	vpslld	$32-20,%ymm4,%ymm4
6307	vpxor	%ymm8,%ymm4,%ymm4
6308	vmovdqa	L$rol8(%rip),%ymm8
6309	vpaddd	%ymm7,%ymm3,%ymm3
6310	vpaddd	%ymm6,%ymm2,%ymm2
6311	vpaddd	%ymm5,%ymm1,%ymm1
6312	vpaddd	%ymm4,%ymm0,%ymm0
6313	vpxor	%ymm3,%ymm15,%ymm15
6314	vpxor	%ymm2,%ymm14,%ymm14
6315	vpxor	%ymm1,%ymm13,%ymm13
6316	vpxor	%ymm0,%ymm12,%ymm12
6317	vpshufb	%ymm8,%ymm15,%ymm15
6318	vpshufb	%ymm8,%ymm14,%ymm14
6319	vpshufb	%ymm8,%ymm13,%ymm13
6320	vpshufb	%ymm8,%ymm12,%ymm12
6321	vpaddd	%ymm15,%ymm11,%ymm11
6322	vpaddd	%ymm14,%ymm10,%ymm10
6323	vpaddd	%ymm13,%ymm9,%ymm9
6324	vpaddd	0+128(%rbp),%ymm12,%ymm8
6325	vpxor	%ymm11,%ymm7,%ymm7
6326	vpxor	%ymm10,%ymm6,%ymm6
6327	vpxor	%ymm9,%ymm5,%ymm5
6328	vpxor	%ymm8,%ymm4,%ymm4
6329	vmovdqa	%ymm8,0+128(%rbp)
6330	vpsrld	$25,%ymm7,%ymm8
6331	vpslld	$32-25,%ymm7,%ymm7
6332	vpxor	%ymm8,%ymm7,%ymm7
6333	vpsrld	$25,%ymm6,%ymm8
6334	vpslld	$32-25,%ymm6,%ymm6
6335	vpxor	%ymm8,%ymm6,%ymm6
6336	vpsrld	$25,%ymm5,%ymm8
6337	vpslld	$32-25,%ymm5,%ymm5
6338	vpxor	%ymm8,%ymm5,%ymm5
6339	vpsrld	$25,%ymm4,%ymm8
6340	vpslld	$32-25,%ymm4,%ymm4
6341	vpxor	%ymm8,%ymm4,%ymm4
6342	vmovdqa	0+128(%rbp),%ymm8
6343	vpalignr	$4,%ymm7,%ymm7,%ymm7
6344	vpalignr	$8,%ymm11,%ymm11,%ymm11
6345	vpalignr	$12,%ymm15,%ymm15,%ymm15
6346	vpalignr	$4,%ymm6,%ymm6,%ymm6
6347	vpalignr	$8,%ymm10,%ymm10,%ymm10
6348	vpalignr	$12,%ymm14,%ymm14,%ymm14
6349	vpalignr	$4,%ymm5,%ymm5,%ymm5
6350	vpalignr	$8,%ymm9,%ymm9,%ymm9
6351	vpalignr	$12,%ymm13,%ymm13,%ymm13
6352	vpalignr	$4,%ymm4,%ymm4,%ymm4
6353	vpalignr	$8,%ymm8,%ymm8,%ymm8
6354	vpalignr	$12,%ymm12,%ymm12,%ymm12
6355	vmovdqa	%ymm8,0+128(%rbp)
6356	vmovdqa	L$rol16(%rip),%ymm8
6357	vpaddd	%ymm7,%ymm3,%ymm3
6358	vpaddd	%ymm6,%ymm2,%ymm2
6359	vpaddd	%ymm5,%ymm1,%ymm1
6360	vpaddd	%ymm4,%ymm0,%ymm0
6361	vpxor	%ymm3,%ymm15,%ymm15
6362	vpxor	%ymm2,%ymm14,%ymm14
6363	vpxor	%ymm1,%ymm13,%ymm13
6364	vpxor	%ymm0,%ymm12,%ymm12
6365	vpshufb	%ymm8,%ymm15,%ymm15
6366	vpshufb	%ymm8,%ymm14,%ymm14
6367	vpshufb	%ymm8,%ymm13,%ymm13
6368	vpshufb	%ymm8,%ymm12,%ymm12
6369	vpaddd	%ymm15,%ymm11,%ymm11
6370	vpaddd	%ymm14,%ymm10,%ymm10
6371	vpaddd	%ymm13,%ymm9,%ymm9
6372	vpaddd	0+128(%rbp),%ymm12,%ymm8
6373	vpxor	%ymm11,%ymm7,%ymm7
6374	vpxor	%ymm10,%ymm6,%ymm6
6375	vpxor	%ymm9,%ymm5,%ymm5
6376	vpxor	%ymm8,%ymm4,%ymm4
6377	vmovdqa	%ymm8,0+128(%rbp)
6378	vpsrld	$20,%ymm7,%ymm8
6379	vpslld	$32-20,%ymm7,%ymm7
6380	vpxor	%ymm8,%ymm7,%ymm7
6381	vpsrld	$20,%ymm6,%ymm8
6382	vpslld	$32-20,%ymm6,%ymm6
6383	vpxor	%ymm8,%ymm6,%ymm6
6384	vpsrld	$20,%ymm5,%ymm8
6385	vpslld	$32-20,%ymm5,%ymm5
6386	vpxor	%ymm8,%ymm5,%ymm5
6387	vpsrld	$20,%ymm4,%ymm8
6388	vpslld	$32-20,%ymm4,%ymm4
6389	vpxor	%ymm8,%ymm4,%ymm4
6390	vmovdqa	L$rol8(%rip),%ymm8
6391	vpaddd	%ymm7,%ymm3,%ymm3
6392	vpaddd	%ymm6,%ymm2,%ymm2
6393	vpaddd	%ymm5,%ymm1,%ymm1
6394	vpaddd	%ymm4,%ymm0,%ymm0
6395	vpxor	%ymm3,%ymm15,%ymm15
6396	vpxor	%ymm2,%ymm14,%ymm14
6397	vpxor	%ymm1,%ymm13,%ymm13
6398	vpxor	%ymm0,%ymm12,%ymm12
6399	vpshufb	%ymm8,%ymm15,%ymm15
6400	vpshufb	%ymm8,%ymm14,%ymm14
6401	vpshufb	%ymm8,%ymm13,%ymm13
6402	vpshufb	%ymm8,%ymm12,%ymm12
6403	vpaddd	%ymm15,%ymm11,%ymm11
6404	vpaddd	%ymm14,%ymm10,%ymm10
6405	vpaddd	%ymm13,%ymm9,%ymm9
6406	vpaddd	0+128(%rbp),%ymm12,%ymm8
6407	vpxor	%ymm11,%ymm7,%ymm7
6408	vpxor	%ymm10,%ymm6,%ymm6
6409	vpxor	%ymm9,%ymm5,%ymm5
6410	vpxor	%ymm8,%ymm4,%ymm4
6411	vmovdqa	%ymm8,0+128(%rbp)
6412	vpsrld	$25,%ymm7,%ymm8
6413	vpslld	$32-25,%ymm7,%ymm7
6414	vpxor	%ymm8,%ymm7,%ymm7
6415	vpsrld	$25,%ymm6,%ymm8
6416	vpslld	$32-25,%ymm6,%ymm6
6417	vpxor	%ymm8,%ymm6,%ymm6
6418	vpsrld	$25,%ymm5,%ymm8
6419	vpslld	$32-25,%ymm5,%ymm5
6420	vpxor	%ymm8,%ymm5,%ymm5
6421	vpsrld	$25,%ymm4,%ymm8
6422	vpslld	$32-25,%ymm4,%ymm4
6423	vpxor	%ymm8,%ymm4,%ymm4
6424	vmovdqa	0+128(%rbp),%ymm8
6425	vpalignr	$12,%ymm7,%ymm7,%ymm7
6426	vpalignr	$8,%ymm11,%ymm11,%ymm11
6427	vpalignr	$4,%ymm15,%ymm15,%ymm15
6428	vpalignr	$12,%ymm6,%ymm6,%ymm6
6429	vpalignr	$8,%ymm10,%ymm10,%ymm10
6430	vpalignr	$4,%ymm14,%ymm14,%ymm14
6431	vpalignr	$12,%ymm5,%ymm5,%ymm5
6432	vpalignr	$8,%ymm9,%ymm9,%ymm9
6433	vpalignr	$4,%ymm13,%ymm13,%ymm13
6434	vpalignr	$12,%ymm4,%ymm4,%ymm4
6435	vpalignr	$8,%ymm8,%ymm8,%ymm8
6436	vpalignr	$4,%ymm12,%ymm12,%ymm12
6437
6438	decq	%r10
6439	jnz	L$seal_avx2_init_rounds
6440	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
6441	vpaddd	0+64(%rbp),%ymm7,%ymm7
6442	vpaddd	0+96(%rbp),%ymm11,%ymm11
6443	vpaddd	0+256(%rbp),%ymm15,%ymm15
6444	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
6445	vpaddd	0+64(%rbp),%ymm6,%ymm6
6446	vpaddd	0+96(%rbp),%ymm10,%ymm10
6447	vpaddd	0+224(%rbp),%ymm14,%ymm14
6448	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
6449	vpaddd	0+64(%rbp),%ymm5,%ymm5
6450	vpaddd	0+96(%rbp),%ymm9,%ymm9
6451	vpaddd	0+192(%rbp),%ymm13,%ymm13
6452	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
6453	vpaddd	0+64(%rbp),%ymm4,%ymm4
6454	vpaddd	0+96(%rbp),%ymm8,%ymm8
6455	vpaddd	0+160(%rbp),%ymm12,%ymm12
6456
6457	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
6458	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
6459	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
6460	vpand	L$clamp(%rip),%ymm15,%ymm15
6461	vmovdqa	%ymm15,0+0(%rbp)
6462	movq	%r8,%r8
6463	call	poly_hash_ad_internal
6464
6465	vpxor	0(%rsi),%ymm3,%ymm3
6466	vpxor	32(%rsi),%ymm11,%ymm11
6467	vmovdqu	%ymm3,0(%rdi)
6468	vmovdqu	%ymm11,32(%rdi)
6469	vperm2i128	$0x02,%ymm2,%ymm6,%ymm15
6470	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
6471	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
6472	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
6473	vpxor	0+64(%rsi),%ymm15,%ymm15
6474	vpxor	32+64(%rsi),%ymm2,%ymm2
6475	vpxor	64+64(%rsi),%ymm6,%ymm6
6476	vpxor	96+64(%rsi),%ymm10,%ymm10
6477	vmovdqu	%ymm15,0+64(%rdi)
6478	vmovdqu	%ymm2,32+64(%rdi)
6479	vmovdqu	%ymm6,64+64(%rdi)
6480	vmovdqu	%ymm10,96+64(%rdi)
6481	vperm2i128	$0x02,%ymm1,%ymm5,%ymm15
6482	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
6483	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
6484	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
6485	vpxor	0+192(%rsi),%ymm15,%ymm15
6486	vpxor	32+192(%rsi),%ymm1,%ymm1
6487	vpxor	64+192(%rsi),%ymm5,%ymm5
6488	vpxor	96+192(%rsi),%ymm9,%ymm9
6489	vmovdqu	%ymm15,0+192(%rdi)
6490	vmovdqu	%ymm1,32+192(%rdi)
6491	vmovdqu	%ymm5,64+192(%rdi)
6492	vmovdqu	%ymm9,96+192(%rdi)
6493	vperm2i128	$0x13,%ymm0,%ymm4,%ymm15
6494	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
6495	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
6496	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
6497	vmovdqa	%ymm15,%ymm8
6498
6499	leaq	320(%rsi),%rsi
6500	subq	$320,%rbx
6501	movq	$320,%rcx
6502	cmpq	$128,%rbx
6503	jbe	L$seal_avx2_short_hash_remainder
6504	vpxor	0(%rsi),%ymm0,%ymm0
6505	vpxor	32(%rsi),%ymm4,%ymm4
6506	vpxor	64(%rsi),%ymm8,%ymm8
6507	vpxor	96(%rsi),%ymm12,%ymm12
6508	vmovdqu	%ymm0,320(%rdi)
6509	vmovdqu	%ymm4,352(%rdi)
6510	vmovdqu	%ymm8,384(%rdi)
6511	vmovdqu	%ymm12,416(%rdi)
6512	leaq	128(%rsi),%rsi
6513	subq	$128,%rbx
6514	movq	$8,%rcx
6515	movq	$2,%r8
6516	cmpq	$128,%rbx
6517	jbe	L$seal_avx2_tail_128
6518	cmpq	$256,%rbx
6519	jbe	L$seal_avx2_tail_256
6520	cmpq	$384,%rbx
6521	jbe	L$seal_avx2_tail_384
6522	cmpq	$512,%rbx
6523	jbe	L$seal_avx2_tail_512
6524	vmovdqa	L$chacha20_consts(%rip),%ymm0
6525	vmovdqa	0+64(%rbp),%ymm4
6526	vmovdqa	0+96(%rbp),%ymm8
6527	vmovdqa	%ymm0,%ymm1
6528	vmovdqa	%ymm4,%ymm5
6529	vmovdqa	%ymm8,%ymm9
6530	vmovdqa	%ymm0,%ymm2
6531	vmovdqa	%ymm4,%ymm6
6532	vmovdqa	%ymm8,%ymm10
6533	vmovdqa	%ymm0,%ymm3
6534	vmovdqa	%ymm4,%ymm7
6535	vmovdqa	%ymm8,%ymm11
6536	vmovdqa	L$avx2_inc(%rip),%ymm12
6537	vpaddd	0+160(%rbp),%ymm12,%ymm15
6538	vpaddd	%ymm15,%ymm12,%ymm14
6539	vpaddd	%ymm14,%ymm12,%ymm13
6540	vpaddd	%ymm13,%ymm12,%ymm12
6541	vmovdqa	%ymm15,0+256(%rbp)
6542	vmovdqa	%ymm14,0+224(%rbp)
6543	vmovdqa	%ymm13,0+192(%rbp)
6544	vmovdqa	%ymm12,0+160(%rbp)
6545	vmovdqa	%ymm8,0+128(%rbp)
6546	vmovdqa	L$rol16(%rip),%ymm8
6547	vpaddd	%ymm7,%ymm3,%ymm3
6548	vpaddd	%ymm6,%ymm2,%ymm2
6549	vpaddd	%ymm5,%ymm1,%ymm1
6550	vpaddd	%ymm4,%ymm0,%ymm0
6551	vpxor	%ymm3,%ymm15,%ymm15
6552	vpxor	%ymm2,%ymm14,%ymm14
6553	vpxor	%ymm1,%ymm13,%ymm13
6554	vpxor	%ymm0,%ymm12,%ymm12
6555	vpshufb	%ymm8,%ymm15,%ymm15
6556	vpshufb	%ymm8,%ymm14,%ymm14
6557	vpshufb	%ymm8,%ymm13,%ymm13
6558	vpshufb	%ymm8,%ymm12,%ymm12
6559	vpaddd	%ymm15,%ymm11,%ymm11
6560	vpaddd	%ymm14,%ymm10,%ymm10
6561	vpaddd	%ymm13,%ymm9,%ymm9
6562	vpaddd	0+128(%rbp),%ymm12,%ymm8
6563	vpxor	%ymm11,%ymm7,%ymm7
6564	vpxor	%ymm10,%ymm6,%ymm6
6565	vpxor	%ymm9,%ymm5,%ymm5
6566	vpxor	%ymm8,%ymm4,%ymm4
6567	vmovdqa	%ymm8,0+128(%rbp)
6568	vpsrld	$20,%ymm7,%ymm8
6569	vpslld	$32-20,%ymm7,%ymm7
6570	vpxor	%ymm8,%ymm7,%ymm7
6571	vpsrld	$20,%ymm6,%ymm8
6572	vpslld	$32-20,%ymm6,%ymm6
6573	vpxor	%ymm8,%ymm6,%ymm6
6574	vpsrld	$20,%ymm5,%ymm8
6575	vpslld	$32-20,%ymm5,%ymm5
6576	vpxor	%ymm8,%ymm5,%ymm5
6577	vpsrld	$20,%ymm4,%ymm8
6578	vpslld	$32-20,%ymm4,%ymm4
6579	vpxor	%ymm8,%ymm4,%ymm4
6580	vmovdqa	L$rol8(%rip),%ymm8
6581	vpaddd	%ymm7,%ymm3,%ymm3
6582	vpaddd	%ymm6,%ymm2,%ymm2
6583	vpaddd	%ymm5,%ymm1,%ymm1
6584	vpaddd	%ymm4,%ymm0,%ymm0
6585	vpxor	%ymm3,%ymm15,%ymm15
6586	vpxor	%ymm2,%ymm14,%ymm14
6587	vpxor	%ymm1,%ymm13,%ymm13
6588	vpxor	%ymm0,%ymm12,%ymm12
6589	vpshufb	%ymm8,%ymm15,%ymm15
6590	vpshufb	%ymm8,%ymm14,%ymm14
6591	vpshufb	%ymm8,%ymm13,%ymm13
6592	vpshufb	%ymm8,%ymm12,%ymm12
6593	vpaddd	%ymm15,%ymm11,%ymm11
6594	vpaddd	%ymm14,%ymm10,%ymm10
6595	vpaddd	%ymm13,%ymm9,%ymm9
6596	vpaddd	0+128(%rbp),%ymm12,%ymm8
6597	vpxor	%ymm11,%ymm7,%ymm7
6598	vpxor	%ymm10,%ymm6,%ymm6
6599	vpxor	%ymm9,%ymm5,%ymm5
6600	vpxor	%ymm8,%ymm4,%ymm4
6601	vmovdqa	%ymm8,0+128(%rbp)
6602	vpsrld	$25,%ymm7,%ymm8
6603	vpslld	$32-25,%ymm7,%ymm7
6604	vpxor	%ymm8,%ymm7,%ymm7
6605	vpsrld	$25,%ymm6,%ymm8
6606	vpslld	$32-25,%ymm6,%ymm6
6607	vpxor	%ymm8,%ymm6,%ymm6
6608	vpsrld	$25,%ymm5,%ymm8
6609	vpslld	$32-25,%ymm5,%ymm5
6610	vpxor	%ymm8,%ymm5,%ymm5
6611	vpsrld	$25,%ymm4,%ymm8
6612	vpslld	$32-25,%ymm4,%ymm4
6613	vpxor	%ymm8,%ymm4,%ymm4
6614	vmovdqa	0+128(%rbp),%ymm8
6615	vpalignr	$4,%ymm7,%ymm7,%ymm7
6616	vpalignr	$8,%ymm11,%ymm11,%ymm11
6617	vpalignr	$12,%ymm15,%ymm15,%ymm15
6618	vpalignr	$4,%ymm6,%ymm6,%ymm6
6619	vpalignr	$8,%ymm10,%ymm10,%ymm10
6620	vpalignr	$12,%ymm14,%ymm14,%ymm14
6621	vpalignr	$4,%ymm5,%ymm5,%ymm5
6622	vpalignr	$8,%ymm9,%ymm9,%ymm9
6623	vpalignr	$12,%ymm13,%ymm13,%ymm13
6624	vpalignr	$4,%ymm4,%ymm4,%ymm4
6625	vpalignr	$8,%ymm8,%ymm8,%ymm8
6626	vpalignr	$12,%ymm12,%ymm12,%ymm12
6627	vmovdqa	%ymm8,0+128(%rbp)
6628	vmovdqa	L$rol16(%rip),%ymm8
6629	vpaddd	%ymm7,%ymm3,%ymm3
6630	vpaddd	%ymm6,%ymm2,%ymm2
6631	vpaddd	%ymm5,%ymm1,%ymm1
6632	vpaddd	%ymm4,%ymm0,%ymm0
6633	vpxor	%ymm3,%ymm15,%ymm15
6634	vpxor	%ymm2,%ymm14,%ymm14
6635	vpxor	%ymm1,%ymm13,%ymm13
6636	vpxor	%ymm0,%ymm12,%ymm12
6637	vpshufb	%ymm8,%ymm15,%ymm15
6638	vpshufb	%ymm8,%ymm14,%ymm14
6639	vpshufb	%ymm8,%ymm13,%ymm13
6640	vpshufb	%ymm8,%ymm12,%ymm12
6641	vpaddd	%ymm15,%ymm11,%ymm11
6642	vpaddd	%ymm14,%ymm10,%ymm10
6643	vpaddd	%ymm13,%ymm9,%ymm9
6644	vpaddd	0+128(%rbp),%ymm12,%ymm8
6645	vpxor	%ymm11,%ymm7,%ymm7
6646	vpxor	%ymm10,%ymm6,%ymm6
6647	vpxor	%ymm9,%ymm5,%ymm5
6648	vpxor	%ymm8,%ymm4,%ymm4
6649	vmovdqa	%ymm8,0+128(%rbp)
6650	vpsrld	$20,%ymm7,%ymm8
6651	vpslld	$32-20,%ymm7,%ymm7
6652	vpxor	%ymm8,%ymm7,%ymm7
6653	vpsrld	$20,%ymm6,%ymm8
6654	vpslld	$32-20,%ymm6,%ymm6
6655	vpxor	%ymm8,%ymm6,%ymm6
6656	vpsrld	$20,%ymm5,%ymm8
6657	vpslld	$32-20,%ymm5,%ymm5
6658	vpxor	%ymm8,%ymm5,%ymm5
6659	vpsrld	$20,%ymm4,%ymm8
6660	vpslld	$32-20,%ymm4,%ymm4
6661	vpxor	%ymm8,%ymm4,%ymm4
6662	vmovdqa	L$rol8(%rip),%ymm8
6663	vpaddd	%ymm7,%ymm3,%ymm3
6664	vpaddd	%ymm6,%ymm2,%ymm2
6665	vpaddd	%ymm5,%ymm1,%ymm1
6666	vpaddd	%ymm4,%ymm0,%ymm0
6667	vpxor	%ymm3,%ymm15,%ymm15
6668	vpxor	%ymm2,%ymm14,%ymm14
6669	vpxor	%ymm1,%ymm13,%ymm13
6670	vpxor	%ymm0,%ymm12,%ymm12
6671	vpshufb	%ymm8,%ymm15,%ymm15
6672	vpshufb	%ymm8,%ymm14,%ymm14
6673	vpshufb	%ymm8,%ymm13,%ymm13
6674	vpshufb	%ymm8,%ymm12,%ymm12
6675	vpaddd	%ymm15,%ymm11,%ymm11
6676	vpaddd	%ymm14,%ymm10,%ymm10
6677	vpaddd	%ymm13,%ymm9,%ymm9
6678	vpaddd	0+128(%rbp),%ymm12,%ymm8
6679	vpxor	%ymm11,%ymm7,%ymm7
6680	vpxor	%ymm10,%ymm6,%ymm6
6681	vpxor	%ymm9,%ymm5,%ymm5
6682	vpxor	%ymm8,%ymm4,%ymm4
6683	vmovdqa	%ymm8,0+128(%rbp)
6684	vpsrld	$25,%ymm7,%ymm8
6685	vpslld	$32-25,%ymm7,%ymm7
6686	vpxor	%ymm8,%ymm7,%ymm7
6687	vpsrld	$25,%ymm6,%ymm8
6688	vpslld	$32-25,%ymm6,%ymm6
6689	vpxor	%ymm8,%ymm6,%ymm6
6690	vpsrld	$25,%ymm5,%ymm8
6691	vpslld	$32-25,%ymm5,%ymm5
6692	vpxor	%ymm8,%ymm5,%ymm5
6693	vpsrld	$25,%ymm4,%ymm8
6694	vpslld	$32-25,%ymm4,%ymm4
6695	vpxor	%ymm8,%ymm4,%ymm4
6696	vmovdqa	0+128(%rbp),%ymm8
6697	vpalignr	$12,%ymm7,%ymm7,%ymm7
6698	vpalignr	$8,%ymm11,%ymm11,%ymm11
6699	vpalignr	$4,%ymm15,%ymm15,%ymm15
6700	vpalignr	$12,%ymm6,%ymm6,%ymm6
6701	vpalignr	$8,%ymm10,%ymm10,%ymm10
6702	vpalignr	$4,%ymm14,%ymm14,%ymm14
6703	vpalignr	$12,%ymm5,%ymm5,%ymm5
6704	vpalignr	$8,%ymm9,%ymm9,%ymm9
6705	vpalignr	$4,%ymm13,%ymm13,%ymm13
6706	vpalignr	$12,%ymm4,%ymm4,%ymm4
6707	vpalignr	$8,%ymm8,%ymm8,%ymm8
6708	vpalignr	$4,%ymm12,%ymm12,%ymm12
6709	vmovdqa	%ymm8,0+128(%rbp)
6710	vmovdqa	L$rol16(%rip),%ymm8
6711	vpaddd	%ymm7,%ymm3,%ymm3
6712	vpaddd	%ymm6,%ymm2,%ymm2
6713	vpaddd	%ymm5,%ymm1,%ymm1
6714	vpaddd	%ymm4,%ymm0,%ymm0
6715	vpxor	%ymm3,%ymm15,%ymm15
6716	vpxor	%ymm2,%ymm14,%ymm14
6717	vpxor	%ymm1,%ymm13,%ymm13
6718	vpxor	%ymm0,%ymm12,%ymm12
6719	vpshufb	%ymm8,%ymm15,%ymm15
6720	vpshufb	%ymm8,%ymm14,%ymm14
6721	vpshufb	%ymm8,%ymm13,%ymm13
6722	vpshufb	%ymm8,%ymm12,%ymm12
6723	vpaddd	%ymm15,%ymm11,%ymm11
6724	vpaddd	%ymm14,%ymm10,%ymm10
6725	vpaddd	%ymm13,%ymm9,%ymm9
6726	vpaddd	0+128(%rbp),%ymm12,%ymm8
6727	vpxor	%ymm11,%ymm7,%ymm7
6728	vpxor	%ymm10,%ymm6,%ymm6
6729	vpxor	%ymm9,%ymm5,%ymm5
6730	vpxor	%ymm8,%ymm4,%ymm4
6731	vmovdqa	%ymm8,0+128(%rbp)
6732	vpsrld	$20,%ymm7,%ymm8
6733	vpslld	$32-20,%ymm7,%ymm7
6734	vpxor	%ymm8,%ymm7,%ymm7
6735	vpsrld	$20,%ymm6,%ymm8
6736	vpslld	$32-20,%ymm6,%ymm6
6737	vpxor	%ymm8,%ymm6,%ymm6
6738	vpsrld	$20,%ymm5,%ymm8
6739	vpslld	$32-20,%ymm5,%ymm5
6740	vpxor	%ymm8,%ymm5,%ymm5
6741	vpsrld	$20,%ymm4,%ymm8
6742	vpslld	$32-20,%ymm4,%ymm4
6743	vpxor	%ymm8,%ymm4,%ymm4
6744	vmovdqa	L$rol8(%rip),%ymm8
6745	vpaddd	%ymm7,%ymm3,%ymm3
6746	vpaddd	%ymm6,%ymm2,%ymm2
6747	vpaddd	%ymm5,%ymm1,%ymm1
6748	vpaddd	%ymm4,%ymm0,%ymm0
6749	vpxor	%ymm3,%ymm15,%ymm15
6750
6751	subq	$16,%rdi
6752	movq	$9,%rcx
6753	jmp	L$seal_avx2_main_loop_rounds_entry
6754.p2align	5
6755L$seal_avx2_main_loop:
6756	vmovdqa	L$chacha20_consts(%rip),%ymm0
6757	vmovdqa	0+64(%rbp),%ymm4
6758	vmovdqa	0+96(%rbp),%ymm8
6759	vmovdqa	%ymm0,%ymm1
6760	vmovdqa	%ymm4,%ymm5
6761	vmovdqa	%ymm8,%ymm9
6762	vmovdqa	%ymm0,%ymm2
6763	vmovdqa	%ymm4,%ymm6
6764	vmovdqa	%ymm8,%ymm10
6765	vmovdqa	%ymm0,%ymm3
6766	vmovdqa	%ymm4,%ymm7
6767	vmovdqa	%ymm8,%ymm11
6768	vmovdqa	L$avx2_inc(%rip),%ymm12
6769	vpaddd	0+160(%rbp),%ymm12,%ymm15
6770	vpaddd	%ymm15,%ymm12,%ymm14
6771	vpaddd	%ymm14,%ymm12,%ymm13
6772	vpaddd	%ymm13,%ymm12,%ymm12
6773	vmovdqa	%ymm15,0+256(%rbp)
6774	vmovdqa	%ymm14,0+224(%rbp)
6775	vmovdqa	%ymm13,0+192(%rbp)
6776	vmovdqa	%ymm12,0+160(%rbp)
6777
6778	movq	$10,%rcx
6779.p2align	5
6780L$seal_avx2_main_loop_rounds:
6781	addq	0+0(%rdi),%r10
6782	adcq	8+0(%rdi),%r11
6783	adcq	$1,%r12
6784	vmovdqa	%ymm8,0+128(%rbp)
6785	vmovdqa	L$rol16(%rip),%ymm8
6786	vpaddd	%ymm7,%ymm3,%ymm3
6787	vpaddd	%ymm6,%ymm2,%ymm2
6788	vpaddd	%ymm5,%ymm1,%ymm1
6789	vpaddd	%ymm4,%ymm0,%ymm0
6790	vpxor	%ymm3,%ymm15,%ymm15
6791	vpxor	%ymm2,%ymm14,%ymm14
6792	vpxor	%ymm1,%ymm13,%ymm13
6793	vpxor	%ymm0,%ymm12,%ymm12
6794	movq	0+0+0(%rbp),%rdx
6795	movq	%rdx,%r15
6796	mulxq	%r10,%r13,%r14
6797	mulxq	%r11,%rax,%rdx
6798	imulq	%r12,%r15
6799	addq	%rax,%r14
6800	adcq	%rdx,%r15
6801	vpshufb	%ymm8,%ymm15,%ymm15
6802	vpshufb	%ymm8,%ymm14,%ymm14
6803	vpshufb	%ymm8,%ymm13,%ymm13
6804	vpshufb	%ymm8,%ymm12,%ymm12
6805	vpaddd	%ymm15,%ymm11,%ymm11
6806	vpaddd	%ymm14,%ymm10,%ymm10
6807	vpaddd	%ymm13,%ymm9,%ymm9
6808	vpaddd	0+128(%rbp),%ymm12,%ymm8
6809	vpxor	%ymm11,%ymm7,%ymm7
6810	movq	8+0+0(%rbp),%rdx
6811	mulxq	%r10,%r10,%rax
6812	addq	%r10,%r14
6813	mulxq	%r11,%r11,%r9
6814	adcq	%r11,%r15
6815	adcq	$0,%r9
6816	imulq	%r12,%rdx
6817	vpxor	%ymm10,%ymm6,%ymm6
6818	vpxor	%ymm9,%ymm5,%ymm5
6819	vpxor	%ymm8,%ymm4,%ymm4
6820	vmovdqa	%ymm8,0+128(%rbp)
6821	vpsrld	$20,%ymm7,%ymm8
6822	vpslld	$32-20,%ymm7,%ymm7
6823	vpxor	%ymm8,%ymm7,%ymm7
6824	vpsrld	$20,%ymm6,%ymm8
6825	vpslld	$32-20,%ymm6,%ymm6
6826	vpxor	%ymm8,%ymm6,%ymm6
6827	vpsrld	$20,%ymm5,%ymm8
6828	vpslld	$32-20,%ymm5,%ymm5
6829	addq	%rax,%r15
6830	adcq	%rdx,%r9
6831	vpxor	%ymm8,%ymm5,%ymm5
6832	vpsrld	$20,%ymm4,%ymm8
6833	vpslld	$32-20,%ymm4,%ymm4
6834	vpxor	%ymm8,%ymm4,%ymm4
6835	vmovdqa	L$rol8(%rip),%ymm8
6836	vpaddd	%ymm7,%ymm3,%ymm3
6837	vpaddd	%ymm6,%ymm2,%ymm2
6838	vpaddd	%ymm5,%ymm1,%ymm1
6839	vpaddd	%ymm4,%ymm0,%ymm0
6840	vpxor	%ymm3,%ymm15,%ymm15
6841	movq	%r13,%r10
6842	movq	%r14,%r11
6843	movq	%r15,%r12
6844	andq	$3,%r12
6845	movq	%r15,%r13
6846	andq	$-4,%r13
6847	movq	%r9,%r14
6848	shrdq	$2,%r9,%r15
6849	shrq	$2,%r9
6850	addq	%r13,%r15
6851	adcq	%r14,%r9
6852	addq	%r15,%r10
6853	adcq	%r9,%r11
6854	adcq	$0,%r12
6855
6856L$seal_avx2_main_loop_rounds_entry:
6857	vpxor	%ymm2,%ymm14,%ymm14
6858	vpxor	%ymm1,%ymm13,%ymm13
6859	vpxor	%ymm0,%ymm12,%ymm12
6860	vpshufb	%ymm8,%ymm15,%ymm15
6861	vpshufb	%ymm8,%ymm14,%ymm14
6862	vpshufb	%ymm8,%ymm13,%ymm13
6863	vpshufb	%ymm8,%ymm12,%ymm12
6864	vpaddd	%ymm15,%ymm11,%ymm11
6865	vpaddd	%ymm14,%ymm10,%ymm10
6866	addq	0+16(%rdi),%r10
6867	adcq	8+16(%rdi),%r11
6868	adcq	$1,%r12
6869	vpaddd	%ymm13,%ymm9,%ymm9
6870	vpaddd	0+128(%rbp),%ymm12,%ymm8
6871	vpxor	%ymm11,%ymm7,%ymm7
6872	vpxor	%ymm10,%ymm6,%ymm6
6873	vpxor	%ymm9,%ymm5,%ymm5
6874	vpxor	%ymm8,%ymm4,%ymm4
6875	vmovdqa	%ymm8,0+128(%rbp)
6876	vpsrld	$25,%ymm7,%ymm8
6877	movq	0+0+0(%rbp),%rdx
6878	movq	%rdx,%r15
6879	mulxq	%r10,%r13,%r14
6880	mulxq	%r11,%rax,%rdx
6881	imulq	%r12,%r15
6882	addq	%rax,%r14
6883	adcq	%rdx,%r15
6884	vpslld	$32-25,%ymm7,%ymm7
6885	vpxor	%ymm8,%ymm7,%ymm7
6886	vpsrld	$25,%ymm6,%ymm8
6887	vpslld	$32-25,%ymm6,%ymm6
6888	vpxor	%ymm8,%ymm6,%ymm6
6889	vpsrld	$25,%ymm5,%ymm8
6890	vpslld	$32-25,%ymm5,%ymm5
6891	vpxor	%ymm8,%ymm5,%ymm5
6892	vpsrld	$25,%ymm4,%ymm8
6893	vpslld	$32-25,%ymm4,%ymm4
6894	vpxor	%ymm8,%ymm4,%ymm4
6895	vmovdqa	0+128(%rbp),%ymm8
6896	vpalignr	$4,%ymm7,%ymm7,%ymm7
6897	vpalignr	$8,%ymm11,%ymm11,%ymm11
6898	vpalignr	$12,%ymm15,%ymm15,%ymm15
6899	vpalignr	$4,%ymm6,%ymm6,%ymm6
6900	vpalignr	$8,%ymm10,%ymm10,%ymm10
6901	vpalignr	$12,%ymm14,%ymm14,%ymm14
6902	movq	8+0+0(%rbp),%rdx
6903	mulxq	%r10,%r10,%rax
6904	addq	%r10,%r14
6905	mulxq	%r11,%r11,%r9
6906	adcq	%r11,%r15
6907	adcq	$0,%r9
6908	imulq	%r12,%rdx
6909	vpalignr	$4,%ymm5,%ymm5,%ymm5
6910	vpalignr	$8,%ymm9,%ymm9,%ymm9
6911	vpalignr	$12,%ymm13,%ymm13,%ymm13
6912	vpalignr	$4,%ymm4,%ymm4,%ymm4
6913	vpalignr	$8,%ymm8,%ymm8,%ymm8
6914	vpalignr	$12,%ymm12,%ymm12,%ymm12
6915	vmovdqa	%ymm8,0+128(%rbp)
6916	vmovdqa	L$rol16(%rip),%ymm8
6917	vpaddd	%ymm7,%ymm3,%ymm3
6918	vpaddd	%ymm6,%ymm2,%ymm2
6919	vpaddd	%ymm5,%ymm1,%ymm1
6920	vpaddd	%ymm4,%ymm0,%ymm0
6921	vpxor	%ymm3,%ymm15,%ymm15
6922	vpxor	%ymm2,%ymm14,%ymm14
6923	vpxor	%ymm1,%ymm13,%ymm13
6924	vpxor	%ymm0,%ymm12,%ymm12
6925	vpshufb	%ymm8,%ymm15,%ymm15
6926	vpshufb	%ymm8,%ymm14,%ymm14
6927	addq	%rax,%r15
6928	adcq	%rdx,%r9
6929	vpshufb	%ymm8,%ymm13,%ymm13
6930	vpshufb	%ymm8,%ymm12,%ymm12
6931	vpaddd	%ymm15,%ymm11,%ymm11
6932	vpaddd	%ymm14,%ymm10,%ymm10
6933	vpaddd	%ymm13,%ymm9,%ymm9
6934	vpaddd	0+128(%rbp),%ymm12,%ymm8
6935	vpxor	%ymm11,%ymm7,%ymm7
6936	vpxor	%ymm10,%ymm6,%ymm6
6937	vpxor	%ymm9,%ymm5,%ymm5
6938	movq	%r13,%r10
6939	movq	%r14,%r11
6940	movq	%r15,%r12
6941	andq	$3,%r12
6942	movq	%r15,%r13
6943	andq	$-4,%r13
6944	movq	%r9,%r14
6945	shrdq	$2,%r9,%r15
6946	shrq	$2,%r9
6947	addq	%r13,%r15
6948	adcq	%r14,%r9
6949	addq	%r15,%r10
6950	adcq	%r9,%r11
6951	adcq	$0,%r12
6952	vpxor	%ymm8,%ymm4,%ymm4
6953	vmovdqa	%ymm8,0+128(%rbp)
6954	vpsrld	$20,%ymm7,%ymm8
6955	vpslld	$32-20,%ymm7,%ymm7
6956	vpxor	%ymm8,%ymm7,%ymm7
6957	vpsrld	$20,%ymm6,%ymm8
6958	vpslld	$32-20,%ymm6,%ymm6
6959	vpxor	%ymm8,%ymm6,%ymm6
6960	addq	0+32(%rdi),%r10
6961	adcq	8+32(%rdi),%r11
6962	adcq	$1,%r12
6963
6964	leaq	48(%rdi),%rdi
6965	vpsrld	$20,%ymm5,%ymm8
6966	vpslld	$32-20,%ymm5,%ymm5
6967	vpxor	%ymm8,%ymm5,%ymm5
6968	vpsrld	$20,%ymm4,%ymm8
6969	vpslld	$32-20,%ymm4,%ymm4
6970	vpxor	%ymm8,%ymm4,%ymm4
6971	vmovdqa	L$rol8(%rip),%ymm8
6972	vpaddd	%ymm7,%ymm3,%ymm3
6973	vpaddd	%ymm6,%ymm2,%ymm2
6974	vpaddd	%ymm5,%ymm1,%ymm1
6975	vpaddd	%ymm4,%ymm0,%ymm0
6976	vpxor	%ymm3,%ymm15,%ymm15
6977	vpxor	%ymm2,%ymm14,%ymm14
6978	vpxor	%ymm1,%ymm13,%ymm13
6979	vpxor	%ymm0,%ymm12,%ymm12
6980	vpshufb	%ymm8,%ymm15,%ymm15
6981	vpshufb	%ymm8,%ymm14,%ymm14
6982	vpshufb	%ymm8,%ymm13,%ymm13
6983	movq	0+0+0(%rbp),%rdx
6984	movq	%rdx,%r15
6985	mulxq	%r10,%r13,%r14
6986	mulxq	%r11,%rax,%rdx
6987	imulq	%r12,%r15
6988	addq	%rax,%r14
6989	adcq	%rdx,%r15
6990	vpshufb	%ymm8,%ymm12,%ymm12
6991	vpaddd	%ymm15,%ymm11,%ymm11
6992	vpaddd	%ymm14,%ymm10,%ymm10
6993	vpaddd	%ymm13,%ymm9,%ymm9
6994	vpaddd	0+128(%rbp),%ymm12,%ymm8
6995	vpxor	%ymm11,%ymm7,%ymm7
6996	vpxor	%ymm10,%ymm6,%ymm6
6997	vpxor	%ymm9,%ymm5,%ymm5
6998	movq	8+0+0(%rbp),%rdx
6999	mulxq	%r10,%r10,%rax
7000	addq	%r10,%r14
7001	mulxq	%r11,%r11,%r9
7002	adcq	%r11,%r15
7003	adcq	$0,%r9
7004	imulq	%r12,%rdx
7005	vpxor	%ymm8,%ymm4,%ymm4
7006	vmovdqa	%ymm8,0+128(%rbp)
7007	vpsrld	$25,%ymm7,%ymm8
7008	vpslld	$32-25,%ymm7,%ymm7
7009	vpxor	%ymm8,%ymm7,%ymm7
7010	vpsrld	$25,%ymm6,%ymm8
7011	vpslld	$32-25,%ymm6,%ymm6
7012	vpxor	%ymm8,%ymm6,%ymm6
7013	addq	%rax,%r15
7014	adcq	%rdx,%r9
7015	vpsrld	$25,%ymm5,%ymm8
7016	vpslld	$32-25,%ymm5,%ymm5
7017	vpxor	%ymm8,%ymm5,%ymm5
7018	vpsrld	$25,%ymm4,%ymm8
7019	vpslld	$32-25,%ymm4,%ymm4
7020	vpxor	%ymm8,%ymm4,%ymm4
7021	vmovdqa	0+128(%rbp),%ymm8
7022	vpalignr	$12,%ymm7,%ymm7,%ymm7
7023	vpalignr	$8,%ymm11,%ymm11,%ymm11
7024	vpalignr	$4,%ymm15,%ymm15,%ymm15
7025	vpalignr	$12,%ymm6,%ymm6,%ymm6
7026	vpalignr	$8,%ymm10,%ymm10,%ymm10
7027	vpalignr	$4,%ymm14,%ymm14,%ymm14
7028	vpalignr	$12,%ymm5,%ymm5,%ymm5
7029	vpalignr	$8,%ymm9,%ymm9,%ymm9
7030	vpalignr	$4,%ymm13,%ymm13,%ymm13
7031	vpalignr	$12,%ymm4,%ymm4,%ymm4
7032	vpalignr	$8,%ymm8,%ymm8,%ymm8
7033	movq	%r13,%r10
7034	movq	%r14,%r11
7035	movq	%r15,%r12
7036	andq	$3,%r12
7037	movq	%r15,%r13
7038	andq	$-4,%r13
7039	movq	%r9,%r14
7040	shrdq	$2,%r9,%r15
7041	shrq	$2,%r9
7042	addq	%r13,%r15
7043	adcq	%r14,%r9
7044	addq	%r15,%r10
7045	adcq	%r9,%r11
7046	adcq	$0,%r12
7047	vpalignr	$4,%ymm12,%ymm12,%ymm12
7048
7049	decq	%rcx
7050	jne	L$seal_avx2_main_loop_rounds
7051	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
7052	vpaddd	0+64(%rbp),%ymm7,%ymm7
7053	vpaddd	0+96(%rbp),%ymm11,%ymm11
7054	vpaddd	0+256(%rbp),%ymm15,%ymm15
7055	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
7056	vpaddd	0+64(%rbp),%ymm6,%ymm6
7057	vpaddd	0+96(%rbp),%ymm10,%ymm10
7058	vpaddd	0+224(%rbp),%ymm14,%ymm14
7059	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
7060	vpaddd	0+64(%rbp),%ymm5,%ymm5
7061	vpaddd	0+96(%rbp),%ymm9,%ymm9
7062	vpaddd	0+192(%rbp),%ymm13,%ymm13
7063	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
7064	vpaddd	0+64(%rbp),%ymm4,%ymm4
7065	vpaddd	0+96(%rbp),%ymm8,%ymm8
7066	vpaddd	0+160(%rbp),%ymm12,%ymm12
7067
7068	vmovdqa	%ymm0,0+128(%rbp)
7069	addq	0+0(%rdi),%r10
7070	adcq	8+0(%rdi),%r11
7071	adcq	$1,%r12
7072	movq	0+0+0(%rbp),%rdx
7073	movq	%rdx,%r15
7074	mulxq	%r10,%r13,%r14
7075	mulxq	%r11,%rax,%rdx
7076	imulq	%r12,%r15
7077	addq	%rax,%r14
7078	adcq	%rdx,%r15
7079	movq	8+0+0(%rbp),%rdx
7080	mulxq	%r10,%r10,%rax
7081	addq	%r10,%r14
7082	mulxq	%r11,%r11,%r9
7083	adcq	%r11,%r15
7084	adcq	$0,%r9
7085	imulq	%r12,%rdx
7086	addq	%rax,%r15
7087	adcq	%rdx,%r9
7088	movq	%r13,%r10
7089	movq	%r14,%r11
7090	movq	%r15,%r12
7091	andq	$3,%r12
7092	movq	%r15,%r13
7093	andq	$-4,%r13
7094	movq	%r9,%r14
7095	shrdq	$2,%r9,%r15
7096	shrq	$2,%r9
7097	addq	%r13,%r15
7098	adcq	%r14,%r9
7099	addq	%r15,%r10
7100	adcq	%r9,%r11
7101	adcq	$0,%r12
7102	addq	0+16(%rdi),%r10
7103	adcq	8+16(%rdi),%r11
7104	adcq	$1,%r12
7105	movq	0+0+0(%rbp),%rdx
7106	movq	%rdx,%r15
7107	mulxq	%r10,%r13,%r14
7108	mulxq	%r11,%rax,%rdx
7109	imulq	%r12,%r15
7110	addq	%rax,%r14
7111	adcq	%rdx,%r15
7112	movq	8+0+0(%rbp),%rdx
7113	mulxq	%r10,%r10,%rax
7114	addq	%r10,%r14
7115	mulxq	%r11,%r11,%r9
7116	adcq	%r11,%r15
7117	adcq	$0,%r9
7118	imulq	%r12,%rdx
7119	addq	%rax,%r15
7120	adcq	%rdx,%r9
7121	movq	%r13,%r10
7122	movq	%r14,%r11
7123	movq	%r15,%r12
7124	andq	$3,%r12
7125	movq	%r15,%r13
7126	andq	$-4,%r13
7127	movq	%r9,%r14
7128	shrdq	$2,%r9,%r15
7129	shrq	$2,%r9
7130	addq	%r13,%r15
7131	adcq	%r14,%r9
7132	addq	%r15,%r10
7133	adcq	%r9,%r11
7134	adcq	$0,%r12
7135
7136	leaq	32(%rdi),%rdi
7137	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
7138	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
7139	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
7140	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
7141	vpxor	0+0(%rsi),%ymm0,%ymm0
7142	vpxor	32+0(%rsi),%ymm3,%ymm3
7143	vpxor	64+0(%rsi),%ymm7,%ymm7
7144	vpxor	96+0(%rsi),%ymm11,%ymm11
7145	vmovdqu	%ymm0,0+0(%rdi)
7146	vmovdqu	%ymm3,32+0(%rdi)
7147	vmovdqu	%ymm7,64+0(%rdi)
7148	vmovdqu	%ymm11,96+0(%rdi)
7149
7150	vmovdqa	0+128(%rbp),%ymm0
7151	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
7152	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
7153	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
7154	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
7155	vpxor	0+128(%rsi),%ymm3,%ymm3
7156	vpxor	32+128(%rsi),%ymm2,%ymm2
7157	vpxor	64+128(%rsi),%ymm6,%ymm6
7158	vpxor	96+128(%rsi),%ymm10,%ymm10
7159	vmovdqu	%ymm3,0+128(%rdi)
7160	vmovdqu	%ymm2,32+128(%rdi)
7161	vmovdqu	%ymm6,64+128(%rdi)
7162	vmovdqu	%ymm10,96+128(%rdi)
7163	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7164	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7165	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7166	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7167	vpxor	0+256(%rsi),%ymm3,%ymm3
7168	vpxor	32+256(%rsi),%ymm1,%ymm1
7169	vpxor	64+256(%rsi),%ymm5,%ymm5
7170	vpxor	96+256(%rsi),%ymm9,%ymm9
7171	vmovdqu	%ymm3,0+256(%rdi)
7172	vmovdqu	%ymm1,32+256(%rdi)
7173	vmovdqu	%ymm5,64+256(%rdi)
7174	vmovdqu	%ymm9,96+256(%rdi)
7175	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
7176	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
7177	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
7178	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
7179	vpxor	0+384(%rsi),%ymm3,%ymm3
7180	vpxor	32+384(%rsi),%ymm0,%ymm0
7181	vpxor	64+384(%rsi),%ymm4,%ymm4
7182	vpxor	96+384(%rsi),%ymm8,%ymm8
7183	vmovdqu	%ymm3,0+384(%rdi)
7184	vmovdqu	%ymm0,32+384(%rdi)
7185	vmovdqu	%ymm4,64+384(%rdi)
7186	vmovdqu	%ymm8,96+384(%rdi)
7187
7188	leaq	512(%rsi),%rsi
7189	subq	$512,%rbx
7190	cmpq	$512,%rbx
7191	jg	L$seal_avx2_main_loop
7192
7193	addq	0+0(%rdi),%r10
7194	adcq	8+0(%rdi),%r11
7195	adcq	$1,%r12
7196	movq	0+0+0(%rbp),%rdx
7197	movq	%rdx,%r15
7198	mulxq	%r10,%r13,%r14
7199	mulxq	%r11,%rax,%rdx
7200	imulq	%r12,%r15
7201	addq	%rax,%r14
7202	adcq	%rdx,%r15
7203	movq	8+0+0(%rbp),%rdx
7204	mulxq	%r10,%r10,%rax
7205	addq	%r10,%r14
7206	mulxq	%r11,%r11,%r9
7207	adcq	%r11,%r15
7208	adcq	$0,%r9
7209	imulq	%r12,%rdx
7210	addq	%rax,%r15
7211	adcq	%rdx,%r9
7212	movq	%r13,%r10
7213	movq	%r14,%r11
7214	movq	%r15,%r12
7215	andq	$3,%r12
7216	movq	%r15,%r13
7217	andq	$-4,%r13
7218	movq	%r9,%r14
7219	shrdq	$2,%r9,%r15
7220	shrq	$2,%r9
7221	addq	%r13,%r15
7222	adcq	%r14,%r9
7223	addq	%r15,%r10
7224	adcq	%r9,%r11
7225	adcq	$0,%r12
7226	addq	0+16(%rdi),%r10
7227	adcq	8+16(%rdi),%r11
7228	adcq	$1,%r12
7229	movq	0+0+0(%rbp),%rdx
7230	movq	%rdx,%r15
7231	mulxq	%r10,%r13,%r14
7232	mulxq	%r11,%rax,%rdx
7233	imulq	%r12,%r15
7234	addq	%rax,%r14
7235	adcq	%rdx,%r15
7236	movq	8+0+0(%rbp),%rdx
7237	mulxq	%r10,%r10,%rax
7238	addq	%r10,%r14
7239	mulxq	%r11,%r11,%r9
7240	adcq	%r11,%r15
7241	adcq	$0,%r9
7242	imulq	%r12,%rdx
7243	addq	%rax,%r15
7244	adcq	%rdx,%r9
7245	movq	%r13,%r10
7246	movq	%r14,%r11
7247	movq	%r15,%r12
7248	andq	$3,%r12
7249	movq	%r15,%r13
7250	andq	$-4,%r13
7251	movq	%r9,%r14
7252	shrdq	$2,%r9,%r15
7253	shrq	$2,%r9
7254	addq	%r13,%r15
7255	adcq	%r14,%r9
7256	addq	%r15,%r10
7257	adcq	%r9,%r11
7258	adcq	$0,%r12
7259
7260	leaq	32(%rdi),%rdi
7261	movq	$10,%rcx
7262	xorq	%r8,%r8
7263
7264	cmpq	$384,%rbx
7265	ja	L$seal_avx2_tail_512
7266	cmpq	$256,%rbx
7267	ja	L$seal_avx2_tail_384
7268	cmpq	$128,%rbx
7269	ja	L$seal_avx2_tail_256
7270
7271L$seal_avx2_tail_128:
7272	vmovdqa	L$chacha20_consts(%rip),%ymm0
7273	vmovdqa	0+64(%rbp),%ymm4
7274	vmovdqa	0+96(%rbp),%ymm8
7275	vmovdqa	L$avx2_inc(%rip),%ymm12
7276	vpaddd	0+160(%rbp),%ymm12,%ymm12
7277	vmovdqa	%ymm12,0+160(%rbp)
7278
7279L$seal_avx2_tail_128_rounds_and_3xhash:
7280	addq	0+0(%rdi),%r10
7281	adcq	8+0(%rdi),%r11
7282	adcq	$1,%r12
7283	movq	0+0+0(%rbp),%rdx
7284	movq	%rdx,%r15
7285	mulxq	%r10,%r13,%r14
7286	mulxq	%r11,%rax,%rdx
7287	imulq	%r12,%r15
7288	addq	%rax,%r14
7289	adcq	%rdx,%r15
7290	movq	8+0+0(%rbp),%rdx
7291	mulxq	%r10,%r10,%rax
7292	addq	%r10,%r14
7293	mulxq	%r11,%r11,%r9
7294	adcq	%r11,%r15
7295	adcq	$0,%r9
7296	imulq	%r12,%rdx
7297	addq	%rax,%r15
7298	adcq	%rdx,%r9
7299	movq	%r13,%r10
7300	movq	%r14,%r11
7301	movq	%r15,%r12
7302	andq	$3,%r12
7303	movq	%r15,%r13
7304	andq	$-4,%r13
7305	movq	%r9,%r14
7306	shrdq	$2,%r9,%r15
7307	shrq	$2,%r9
7308	addq	%r13,%r15
7309	adcq	%r14,%r9
7310	addq	%r15,%r10
7311	adcq	%r9,%r11
7312	adcq	$0,%r12
7313
7314	leaq	16(%rdi),%rdi
7315L$seal_avx2_tail_128_rounds_and_2xhash:
7316	vpaddd	%ymm4,%ymm0,%ymm0
7317	vpxor	%ymm0,%ymm12,%ymm12
7318	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7319	vpaddd	%ymm12,%ymm8,%ymm8
7320	vpxor	%ymm8,%ymm4,%ymm4
7321	vpsrld	$20,%ymm4,%ymm3
7322	vpslld	$12,%ymm4,%ymm4
7323	vpxor	%ymm3,%ymm4,%ymm4
7324	vpaddd	%ymm4,%ymm0,%ymm0
7325	vpxor	%ymm0,%ymm12,%ymm12
7326	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7327	vpaddd	%ymm12,%ymm8,%ymm8
7328	vpxor	%ymm8,%ymm4,%ymm4
7329	vpslld	$7,%ymm4,%ymm3
7330	vpsrld	$25,%ymm4,%ymm4
7331	vpxor	%ymm3,%ymm4,%ymm4
7332	vpalignr	$12,%ymm12,%ymm12,%ymm12
7333	vpalignr	$8,%ymm8,%ymm8,%ymm8
7334	vpalignr	$4,%ymm4,%ymm4,%ymm4
7335	addq	0+0(%rdi),%r10
7336	adcq	8+0(%rdi),%r11
7337	adcq	$1,%r12
7338	movq	0+0+0(%rbp),%rdx
7339	movq	%rdx,%r15
7340	mulxq	%r10,%r13,%r14
7341	mulxq	%r11,%rax,%rdx
7342	imulq	%r12,%r15
7343	addq	%rax,%r14
7344	adcq	%rdx,%r15
7345	movq	8+0+0(%rbp),%rdx
7346	mulxq	%r10,%r10,%rax
7347	addq	%r10,%r14
7348	mulxq	%r11,%r11,%r9
7349	adcq	%r11,%r15
7350	adcq	$0,%r9
7351	imulq	%r12,%rdx
7352	addq	%rax,%r15
7353	adcq	%rdx,%r9
7354	movq	%r13,%r10
7355	movq	%r14,%r11
7356	movq	%r15,%r12
7357	andq	$3,%r12
7358	movq	%r15,%r13
7359	andq	$-4,%r13
7360	movq	%r9,%r14
7361	shrdq	$2,%r9,%r15
7362	shrq	$2,%r9
7363	addq	%r13,%r15
7364	adcq	%r14,%r9
7365	addq	%r15,%r10
7366	adcq	%r9,%r11
7367	adcq	$0,%r12
7368	vpaddd	%ymm4,%ymm0,%ymm0
7369	vpxor	%ymm0,%ymm12,%ymm12
7370	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7371	vpaddd	%ymm12,%ymm8,%ymm8
7372	vpxor	%ymm8,%ymm4,%ymm4
7373	vpsrld	$20,%ymm4,%ymm3
7374	vpslld	$12,%ymm4,%ymm4
7375	vpxor	%ymm3,%ymm4,%ymm4
7376	vpaddd	%ymm4,%ymm0,%ymm0
7377	vpxor	%ymm0,%ymm12,%ymm12
7378	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7379	vpaddd	%ymm12,%ymm8,%ymm8
7380	vpxor	%ymm8,%ymm4,%ymm4
7381	vpslld	$7,%ymm4,%ymm3
7382	vpsrld	$25,%ymm4,%ymm4
7383	vpxor	%ymm3,%ymm4,%ymm4
7384	vpalignr	$4,%ymm12,%ymm12,%ymm12
7385	vpalignr	$8,%ymm8,%ymm8,%ymm8
7386	vpalignr	$12,%ymm4,%ymm4,%ymm4
7387	addq	0+16(%rdi),%r10
7388	adcq	8+16(%rdi),%r11
7389	adcq	$1,%r12
7390	movq	0+0+0(%rbp),%rdx
7391	movq	%rdx,%r15
7392	mulxq	%r10,%r13,%r14
7393	mulxq	%r11,%rax,%rdx
7394	imulq	%r12,%r15
7395	addq	%rax,%r14
7396	adcq	%rdx,%r15
7397	movq	8+0+0(%rbp),%rdx
7398	mulxq	%r10,%r10,%rax
7399	addq	%r10,%r14
7400	mulxq	%r11,%r11,%r9
7401	adcq	%r11,%r15
7402	adcq	$0,%r9
7403	imulq	%r12,%rdx
7404	addq	%rax,%r15
7405	adcq	%rdx,%r9
7406	movq	%r13,%r10
7407	movq	%r14,%r11
7408	movq	%r15,%r12
7409	andq	$3,%r12
7410	movq	%r15,%r13
7411	andq	$-4,%r13
7412	movq	%r9,%r14
7413	shrdq	$2,%r9,%r15
7414	shrq	$2,%r9
7415	addq	%r13,%r15
7416	adcq	%r14,%r9
7417	addq	%r15,%r10
7418	adcq	%r9,%r11
7419	adcq	$0,%r12
7420
7421	leaq	32(%rdi),%rdi
7422	decq	%rcx
7423	jg	L$seal_avx2_tail_128_rounds_and_3xhash
7424	decq	%r8
7425	jge	L$seal_avx2_tail_128_rounds_and_2xhash
7426	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
7427	vpaddd	0+64(%rbp),%ymm4,%ymm4
7428	vpaddd	0+96(%rbp),%ymm8,%ymm8
7429	vpaddd	0+160(%rbp),%ymm12,%ymm12
7430	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7431	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7432	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7433	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7434	vmovdqa	%ymm3,%ymm8
7435
7436	jmp	L$seal_avx2_short_loop
7437
7438L$seal_avx2_tail_256:
7439	vmovdqa	L$chacha20_consts(%rip),%ymm0
7440	vmovdqa	0+64(%rbp),%ymm4
7441	vmovdqa	0+96(%rbp),%ymm8
7442	vmovdqa	%ymm0,%ymm1
7443	vmovdqa	%ymm4,%ymm5
7444	vmovdqa	%ymm8,%ymm9
7445	vmovdqa	L$avx2_inc(%rip),%ymm12
7446	vpaddd	0+160(%rbp),%ymm12,%ymm13
7447	vpaddd	%ymm13,%ymm12,%ymm12
7448	vmovdqa	%ymm12,0+160(%rbp)
7449	vmovdqa	%ymm13,0+192(%rbp)
7450
7451L$seal_avx2_tail_256_rounds_and_3xhash:
7452	addq	0+0(%rdi),%r10
7453	adcq	8+0(%rdi),%r11
7454	adcq	$1,%r12
7455	movq	0+0+0(%rbp),%rax
7456	movq	%rax,%r15
7457	mulq	%r10
7458	movq	%rax,%r13
7459	movq	%rdx,%r14
7460	movq	0+0+0(%rbp),%rax
7461	mulq	%r11
7462	imulq	%r12,%r15
7463	addq	%rax,%r14
7464	adcq	%rdx,%r15
7465	movq	8+0+0(%rbp),%rax
7466	movq	%rax,%r9
7467	mulq	%r10
7468	addq	%rax,%r14
7469	adcq	$0,%rdx
7470	movq	%rdx,%r10
7471	movq	8+0+0(%rbp),%rax
7472	mulq	%r11
7473	addq	%rax,%r15
7474	adcq	$0,%rdx
7475	imulq	%r12,%r9
7476	addq	%r10,%r15
7477	adcq	%rdx,%r9
7478	movq	%r13,%r10
7479	movq	%r14,%r11
7480	movq	%r15,%r12
7481	andq	$3,%r12
7482	movq	%r15,%r13
7483	andq	$-4,%r13
7484	movq	%r9,%r14
7485	shrdq	$2,%r9,%r15
7486	shrq	$2,%r9
7487	addq	%r13,%r15
7488	adcq	%r14,%r9
7489	addq	%r15,%r10
7490	adcq	%r9,%r11
7491	adcq	$0,%r12
7492
7493	leaq	16(%rdi),%rdi
7494L$seal_avx2_tail_256_rounds_and_2xhash:
7495	vpaddd	%ymm4,%ymm0,%ymm0
7496	vpxor	%ymm0,%ymm12,%ymm12
7497	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7498	vpaddd	%ymm12,%ymm8,%ymm8
7499	vpxor	%ymm8,%ymm4,%ymm4
7500	vpsrld	$20,%ymm4,%ymm3
7501	vpslld	$12,%ymm4,%ymm4
7502	vpxor	%ymm3,%ymm4,%ymm4
7503	vpaddd	%ymm4,%ymm0,%ymm0
7504	vpxor	%ymm0,%ymm12,%ymm12
7505	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7506	vpaddd	%ymm12,%ymm8,%ymm8
7507	vpxor	%ymm8,%ymm4,%ymm4
7508	vpslld	$7,%ymm4,%ymm3
7509	vpsrld	$25,%ymm4,%ymm4
7510	vpxor	%ymm3,%ymm4,%ymm4
7511	vpalignr	$12,%ymm12,%ymm12,%ymm12
7512	vpalignr	$8,%ymm8,%ymm8,%ymm8
7513	vpalignr	$4,%ymm4,%ymm4,%ymm4
7514	vpaddd	%ymm5,%ymm1,%ymm1
7515	vpxor	%ymm1,%ymm13,%ymm13
7516	vpshufb	L$rol16(%rip),%ymm13,%ymm13
7517	vpaddd	%ymm13,%ymm9,%ymm9
7518	vpxor	%ymm9,%ymm5,%ymm5
7519	vpsrld	$20,%ymm5,%ymm3
7520	vpslld	$12,%ymm5,%ymm5
7521	vpxor	%ymm3,%ymm5,%ymm5
7522	vpaddd	%ymm5,%ymm1,%ymm1
7523	vpxor	%ymm1,%ymm13,%ymm13
7524	vpshufb	L$rol8(%rip),%ymm13,%ymm13
7525	vpaddd	%ymm13,%ymm9,%ymm9
7526	vpxor	%ymm9,%ymm5,%ymm5
7527	vpslld	$7,%ymm5,%ymm3
7528	vpsrld	$25,%ymm5,%ymm5
7529	vpxor	%ymm3,%ymm5,%ymm5
7530	vpalignr	$12,%ymm13,%ymm13,%ymm13
7531	vpalignr	$8,%ymm9,%ymm9,%ymm9
7532	vpalignr	$4,%ymm5,%ymm5,%ymm5
7533	addq	0+0(%rdi),%r10
7534	adcq	8+0(%rdi),%r11
7535	adcq	$1,%r12
7536	movq	0+0+0(%rbp),%rax
7537	movq	%rax,%r15
7538	mulq	%r10
7539	movq	%rax,%r13
7540	movq	%rdx,%r14
7541	movq	0+0+0(%rbp),%rax
7542	mulq	%r11
7543	imulq	%r12,%r15
7544	addq	%rax,%r14
7545	adcq	%rdx,%r15
7546	movq	8+0+0(%rbp),%rax
7547	movq	%rax,%r9
7548	mulq	%r10
7549	addq	%rax,%r14
7550	adcq	$0,%rdx
7551	movq	%rdx,%r10
7552	movq	8+0+0(%rbp),%rax
7553	mulq	%r11
7554	addq	%rax,%r15
7555	adcq	$0,%rdx
7556	imulq	%r12,%r9
7557	addq	%r10,%r15
7558	adcq	%rdx,%r9
7559	movq	%r13,%r10
7560	movq	%r14,%r11
7561	movq	%r15,%r12
7562	andq	$3,%r12
7563	movq	%r15,%r13
7564	andq	$-4,%r13
7565	movq	%r9,%r14
7566	shrdq	$2,%r9,%r15
7567	shrq	$2,%r9
7568	addq	%r13,%r15
7569	adcq	%r14,%r9
7570	addq	%r15,%r10
7571	adcq	%r9,%r11
7572	adcq	$0,%r12
7573	vpaddd	%ymm4,%ymm0,%ymm0
7574	vpxor	%ymm0,%ymm12,%ymm12
7575	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7576	vpaddd	%ymm12,%ymm8,%ymm8
7577	vpxor	%ymm8,%ymm4,%ymm4
7578	vpsrld	$20,%ymm4,%ymm3
7579	vpslld	$12,%ymm4,%ymm4
7580	vpxor	%ymm3,%ymm4,%ymm4
7581	vpaddd	%ymm4,%ymm0,%ymm0
7582	vpxor	%ymm0,%ymm12,%ymm12
7583	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7584	vpaddd	%ymm12,%ymm8,%ymm8
7585	vpxor	%ymm8,%ymm4,%ymm4
7586	vpslld	$7,%ymm4,%ymm3
7587	vpsrld	$25,%ymm4,%ymm4
7588	vpxor	%ymm3,%ymm4,%ymm4
7589	vpalignr	$4,%ymm12,%ymm12,%ymm12
7590	vpalignr	$8,%ymm8,%ymm8,%ymm8
7591	vpalignr	$12,%ymm4,%ymm4,%ymm4
7592	vpaddd	%ymm5,%ymm1,%ymm1
7593	vpxor	%ymm1,%ymm13,%ymm13
7594	vpshufb	L$rol16(%rip),%ymm13,%ymm13
7595	vpaddd	%ymm13,%ymm9,%ymm9
7596	vpxor	%ymm9,%ymm5,%ymm5
7597	vpsrld	$20,%ymm5,%ymm3
7598	vpslld	$12,%ymm5,%ymm5
7599	vpxor	%ymm3,%ymm5,%ymm5
7600	vpaddd	%ymm5,%ymm1,%ymm1
7601	vpxor	%ymm1,%ymm13,%ymm13
7602	vpshufb	L$rol8(%rip),%ymm13,%ymm13
7603	vpaddd	%ymm13,%ymm9,%ymm9
7604	vpxor	%ymm9,%ymm5,%ymm5
7605	vpslld	$7,%ymm5,%ymm3
7606	vpsrld	$25,%ymm5,%ymm5
7607	vpxor	%ymm3,%ymm5,%ymm5
7608	vpalignr	$4,%ymm13,%ymm13,%ymm13
7609	vpalignr	$8,%ymm9,%ymm9,%ymm9
7610	vpalignr	$12,%ymm5,%ymm5,%ymm5
7611	addq	0+16(%rdi),%r10
7612	adcq	8+16(%rdi),%r11
7613	adcq	$1,%r12
7614	movq	0+0+0(%rbp),%rax
7615	movq	%rax,%r15
7616	mulq	%r10
7617	movq	%rax,%r13
7618	movq	%rdx,%r14
7619	movq	0+0+0(%rbp),%rax
7620	mulq	%r11
7621	imulq	%r12,%r15
7622	addq	%rax,%r14
7623	adcq	%rdx,%r15
7624	movq	8+0+0(%rbp),%rax
7625	movq	%rax,%r9
7626	mulq	%r10
7627	addq	%rax,%r14
7628	adcq	$0,%rdx
7629	movq	%rdx,%r10
7630	movq	8+0+0(%rbp),%rax
7631	mulq	%r11
7632	addq	%rax,%r15
7633	adcq	$0,%rdx
7634	imulq	%r12,%r9
7635	addq	%r10,%r15
7636	adcq	%rdx,%r9
7637	movq	%r13,%r10
7638	movq	%r14,%r11
7639	movq	%r15,%r12
7640	andq	$3,%r12
7641	movq	%r15,%r13
7642	andq	$-4,%r13
7643	movq	%r9,%r14
7644	shrdq	$2,%r9,%r15
7645	shrq	$2,%r9
7646	addq	%r13,%r15
7647	adcq	%r14,%r9
7648	addq	%r15,%r10
7649	adcq	%r9,%r11
7650	adcq	$0,%r12
7651
7652	leaq	32(%rdi),%rdi
7653	decq	%rcx
7654	jg	L$seal_avx2_tail_256_rounds_and_3xhash
7655	decq	%r8
7656	jge	L$seal_avx2_tail_256_rounds_and_2xhash
7657	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
7658	vpaddd	0+64(%rbp),%ymm5,%ymm5
7659	vpaddd	0+96(%rbp),%ymm9,%ymm9
7660	vpaddd	0+192(%rbp),%ymm13,%ymm13
7661	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
7662	vpaddd	0+64(%rbp),%ymm4,%ymm4
7663	vpaddd	0+96(%rbp),%ymm8,%ymm8
7664	vpaddd	0+160(%rbp),%ymm12,%ymm12
7665	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7666	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7667	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7668	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7669	vpxor	0+0(%rsi),%ymm3,%ymm3
7670	vpxor	32+0(%rsi),%ymm1,%ymm1
7671	vpxor	64+0(%rsi),%ymm5,%ymm5
7672	vpxor	96+0(%rsi),%ymm9,%ymm9
7673	vmovdqu	%ymm3,0+0(%rdi)
7674	vmovdqu	%ymm1,32+0(%rdi)
7675	vmovdqu	%ymm5,64+0(%rdi)
7676	vmovdqu	%ymm9,96+0(%rdi)
7677	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7678	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7679	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7680	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7681	vmovdqa	%ymm3,%ymm8
7682
7683	movq	$128,%rcx
7684	leaq	128(%rsi),%rsi
7685	subq	$128,%rbx
7686	jmp	L$seal_avx2_short_hash_remainder
7687
7688L$seal_avx2_tail_384:
7689	vmovdqa	L$chacha20_consts(%rip),%ymm0
7690	vmovdqa	0+64(%rbp),%ymm4
7691	vmovdqa	0+96(%rbp),%ymm8
7692	vmovdqa	%ymm0,%ymm1
7693	vmovdqa	%ymm4,%ymm5
7694	vmovdqa	%ymm8,%ymm9
7695	vmovdqa	%ymm0,%ymm2
7696	vmovdqa	%ymm4,%ymm6
7697	vmovdqa	%ymm8,%ymm10
7698	vmovdqa	L$avx2_inc(%rip),%ymm12
7699	vpaddd	0+160(%rbp),%ymm12,%ymm14
7700	vpaddd	%ymm14,%ymm12,%ymm13
7701	vpaddd	%ymm13,%ymm12,%ymm12
7702	vmovdqa	%ymm12,0+160(%rbp)
7703	vmovdqa	%ymm13,0+192(%rbp)
7704	vmovdqa	%ymm14,0+224(%rbp)
7705
7706L$seal_avx2_tail_384_rounds_and_3xhash:
7707	addq	0+0(%rdi),%r10
7708	adcq	8+0(%rdi),%r11
7709	adcq	$1,%r12
7710	movq	0+0+0(%rbp),%rax
7711	movq	%rax,%r15
7712	mulq	%r10
7713	movq	%rax,%r13
7714	movq	%rdx,%r14
7715	movq	0+0+0(%rbp),%rax
7716	mulq	%r11
7717	imulq	%r12,%r15
7718	addq	%rax,%r14
7719	adcq	%rdx,%r15
7720	movq	8+0+0(%rbp),%rax
7721	movq	%rax,%r9
7722	mulq	%r10
7723	addq	%rax,%r14
7724	adcq	$0,%rdx
7725	movq	%rdx,%r10
7726	movq	8+0+0(%rbp),%rax
7727	mulq	%r11
7728	addq	%rax,%r15
7729	adcq	$0,%rdx
7730	imulq	%r12,%r9
7731	addq	%r10,%r15
7732	adcq	%rdx,%r9
7733	movq	%r13,%r10
7734	movq	%r14,%r11
7735	movq	%r15,%r12
7736	andq	$3,%r12
7737	movq	%r15,%r13
7738	andq	$-4,%r13
7739	movq	%r9,%r14
7740	shrdq	$2,%r9,%r15
7741	shrq	$2,%r9
7742	addq	%r13,%r15
7743	adcq	%r14,%r9
7744	addq	%r15,%r10
7745	adcq	%r9,%r11
7746	adcq	$0,%r12
7747
7748	leaq	16(%rdi),%rdi
7749L$seal_avx2_tail_384_rounds_and_2xhash:
7750	vpaddd	%ymm4,%ymm0,%ymm0
7751	vpxor	%ymm0,%ymm12,%ymm12
7752	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7753	vpaddd	%ymm12,%ymm8,%ymm8
7754	vpxor	%ymm8,%ymm4,%ymm4
7755	vpsrld	$20,%ymm4,%ymm3
7756	vpslld	$12,%ymm4,%ymm4
7757	vpxor	%ymm3,%ymm4,%ymm4
7758	vpaddd	%ymm4,%ymm0,%ymm0
7759	vpxor	%ymm0,%ymm12,%ymm12
7760	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7761	vpaddd	%ymm12,%ymm8,%ymm8
7762	vpxor	%ymm8,%ymm4,%ymm4
7763	vpslld	$7,%ymm4,%ymm3
7764	vpsrld	$25,%ymm4,%ymm4
7765	vpxor	%ymm3,%ymm4,%ymm4
7766	vpalignr	$12,%ymm12,%ymm12,%ymm12
7767	vpalignr	$8,%ymm8,%ymm8,%ymm8
7768	vpalignr	$4,%ymm4,%ymm4,%ymm4
7769	vpaddd	%ymm5,%ymm1,%ymm1
7770	vpxor	%ymm1,%ymm13,%ymm13
7771	vpshufb	L$rol16(%rip),%ymm13,%ymm13
7772	vpaddd	%ymm13,%ymm9,%ymm9
7773	vpxor	%ymm9,%ymm5,%ymm5
7774	vpsrld	$20,%ymm5,%ymm3
7775	vpslld	$12,%ymm5,%ymm5
7776	vpxor	%ymm3,%ymm5,%ymm5
7777	vpaddd	%ymm5,%ymm1,%ymm1
7778	vpxor	%ymm1,%ymm13,%ymm13
7779	vpshufb	L$rol8(%rip),%ymm13,%ymm13
7780	vpaddd	%ymm13,%ymm9,%ymm9
7781	vpxor	%ymm9,%ymm5,%ymm5
7782	vpslld	$7,%ymm5,%ymm3
7783	vpsrld	$25,%ymm5,%ymm5
7784	vpxor	%ymm3,%ymm5,%ymm5
7785	vpalignr	$12,%ymm13,%ymm13,%ymm13
7786	vpalignr	$8,%ymm9,%ymm9,%ymm9
7787	vpalignr	$4,%ymm5,%ymm5,%ymm5
7788	addq	0+0(%rdi),%r10
7789	adcq	8+0(%rdi),%r11
7790	adcq	$1,%r12
7791	movq	0+0+0(%rbp),%rax
7792	movq	%rax,%r15
7793	mulq	%r10
7794	movq	%rax,%r13
7795	movq	%rdx,%r14
7796	movq	0+0+0(%rbp),%rax
7797	mulq	%r11
7798	imulq	%r12,%r15
7799	addq	%rax,%r14
7800	adcq	%rdx,%r15
7801	movq	8+0+0(%rbp),%rax
7802	movq	%rax,%r9
7803	mulq	%r10
7804	addq	%rax,%r14
7805	adcq	$0,%rdx
7806	movq	%rdx,%r10
7807	movq	8+0+0(%rbp),%rax
7808	mulq	%r11
7809	addq	%rax,%r15
7810	adcq	$0,%rdx
7811	imulq	%r12,%r9
7812	addq	%r10,%r15
7813	adcq	%rdx,%r9
7814	movq	%r13,%r10
7815	movq	%r14,%r11
7816	movq	%r15,%r12
7817	andq	$3,%r12
7818	movq	%r15,%r13
7819	andq	$-4,%r13
7820	movq	%r9,%r14
7821	shrdq	$2,%r9,%r15
7822	shrq	$2,%r9
7823	addq	%r13,%r15
7824	adcq	%r14,%r9
7825	addq	%r15,%r10
7826	adcq	%r9,%r11
7827	adcq	$0,%r12
7828	vpaddd	%ymm6,%ymm2,%ymm2
7829	vpxor	%ymm2,%ymm14,%ymm14
7830	vpshufb	L$rol16(%rip),%ymm14,%ymm14
7831	vpaddd	%ymm14,%ymm10,%ymm10
7832	vpxor	%ymm10,%ymm6,%ymm6
7833	vpsrld	$20,%ymm6,%ymm3
7834	vpslld	$12,%ymm6,%ymm6
7835	vpxor	%ymm3,%ymm6,%ymm6
7836	vpaddd	%ymm6,%ymm2,%ymm2
7837	vpxor	%ymm2,%ymm14,%ymm14
7838	vpshufb	L$rol8(%rip),%ymm14,%ymm14
7839	vpaddd	%ymm14,%ymm10,%ymm10
7840	vpxor	%ymm10,%ymm6,%ymm6
7841	vpslld	$7,%ymm6,%ymm3
7842	vpsrld	$25,%ymm6,%ymm6
7843	vpxor	%ymm3,%ymm6,%ymm6
7844	vpalignr	$12,%ymm14,%ymm14,%ymm14
7845	vpalignr	$8,%ymm10,%ymm10,%ymm10
7846	vpalignr	$4,%ymm6,%ymm6,%ymm6
7847	vpaddd	%ymm4,%ymm0,%ymm0
7848	vpxor	%ymm0,%ymm12,%ymm12
7849	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7850	vpaddd	%ymm12,%ymm8,%ymm8
7851	vpxor	%ymm8,%ymm4,%ymm4
7852	vpsrld	$20,%ymm4,%ymm3
7853	vpslld	$12,%ymm4,%ymm4
7854	vpxor	%ymm3,%ymm4,%ymm4
7855	vpaddd	%ymm4,%ymm0,%ymm0
7856	vpxor	%ymm0,%ymm12,%ymm12
7857	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7858	vpaddd	%ymm12,%ymm8,%ymm8
7859	vpxor	%ymm8,%ymm4,%ymm4
7860	vpslld	$7,%ymm4,%ymm3
7861	vpsrld	$25,%ymm4,%ymm4
7862	vpxor	%ymm3,%ymm4,%ymm4
7863	vpalignr	$4,%ymm12,%ymm12,%ymm12
7864	vpalignr	$8,%ymm8,%ymm8,%ymm8
7865	vpalignr	$12,%ymm4,%ymm4,%ymm4
7866	addq	0+16(%rdi),%r10
7867	adcq	8+16(%rdi),%r11
7868	adcq	$1,%r12
7869	movq	0+0+0(%rbp),%rax
7870	movq	%rax,%r15
7871	mulq	%r10
7872	movq	%rax,%r13
7873	movq	%rdx,%r14
7874	movq	0+0+0(%rbp),%rax
7875	mulq	%r11
7876	imulq	%r12,%r15
7877	addq	%rax,%r14
7878	adcq	%rdx,%r15
7879	movq	8+0+0(%rbp),%rax
7880	movq	%rax,%r9
7881	mulq	%r10
7882	addq	%rax,%r14
7883	adcq	$0,%rdx
7884	movq	%rdx,%r10
7885	movq	8+0+0(%rbp),%rax
7886	mulq	%r11
7887	addq	%rax,%r15
7888	adcq	$0,%rdx
7889	imulq	%r12,%r9
7890	addq	%r10,%r15
7891	adcq	%rdx,%r9
7892	movq	%r13,%r10
7893	movq	%r14,%r11
7894	movq	%r15,%r12
7895	andq	$3,%r12
7896	movq	%r15,%r13
7897	andq	$-4,%r13
7898	movq	%r9,%r14
7899	shrdq	$2,%r9,%r15
7900	shrq	$2,%r9
7901	addq	%r13,%r15
7902	adcq	%r14,%r9
7903	addq	%r15,%r10
7904	adcq	%r9,%r11
7905	adcq	$0,%r12
7906	vpaddd	%ymm5,%ymm1,%ymm1
7907	vpxor	%ymm1,%ymm13,%ymm13
7908	vpshufb	L$rol16(%rip),%ymm13,%ymm13
7909	vpaddd	%ymm13,%ymm9,%ymm9
7910	vpxor	%ymm9,%ymm5,%ymm5
7911	vpsrld	$20,%ymm5,%ymm3
7912	vpslld	$12,%ymm5,%ymm5
7913	vpxor	%ymm3,%ymm5,%ymm5
7914	vpaddd	%ymm5,%ymm1,%ymm1
7915	vpxor	%ymm1,%ymm13,%ymm13
7916	vpshufb	L$rol8(%rip),%ymm13,%ymm13
7917	vpaddd	%ymm13,%ymm9,%ymm9
7918	vpxor	%ymm9,%ymm5,%ymm5
7919	vpslld	$7,%ymm5,%ymm3
7920	vpsrld	$25,%ymm5,%ymm5
7921	vpxor	%ymm3,%ymm5,%ymm5
7922	vpalignr	$4,%ymm13,%ymm13,%ymm13
7923	vpalignr	$8,%ymm9,%ymm9,%ymm9
7924	vpalignr	$12,%ymm5,%ymm5,%ymm5
7925	vpaddd	%ymm6,%ymm2,%ymm2
7926	vpxor	%ymm2,%ymm14,%ymm14
7927	vpshufb	L$rol16(%rip),%ymm14,%ymm14
7928	vpaddd	%ymm14,%ymm10,%ymm10
7929	vpxor	%ymm10,%ymm6,%ymm6
7930	vpsrld	$20,%ymm6,%ymm3
7931	vpslld	$12,%ymm6,%ymm6
7932	vpxor	%ymm3,%ymm6,%ymm6
7933	vpaddd	%ymm6,%ymm2,%ymm2
7934	vpxor	%ymm2,%ymm14,%ymm14
7935	vpshufb	L$rol8(%rip),%ymm14,%ymm14
7936	vpaddd	%ymm14,%ymm10,%ymm10
7937	vpxor	%ymm10,%ymm6,%ymm6
7938	vpslld	$7,%ymm6,%ymm3
7939	vpsrld	$25,%ymm6,%ymm6
7940	vpxor	%ymm3,%ymm6,%ymm6
7941	vpalignr	$4,%ymm14,%ymm14,%ymm14
7942	vpalignr	$8,%ymm10,%ymm10,%ymm10
7943	vpalignr	$12,%ymm6,%ymm6,%ymm6
7944
7945	leaq	32(%rdi),%rdi
7946	decq	%rcx
7947	jg	L$seal_avx2_tail_384_rounds_and_3xhash
7948	decq	%r8
7949	jge	L$seal_avx2_tail_384_rounds_and_2xhash
7950	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
7951	vpaddd	0+64(%rbp),%ymm6,%ymm6
7952	vpaddd	0+96(%rbp),%ymm10,%ymm10
7953	vpaddd	0+224(%rbp),%ymm14,%ymm14
7954	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
7955	vpaddd	0+64(%rbp),%ymm5,%ymm5
7956	vpaddd	0+96(%rbp),%ymm9,%ymm9
7957	vpaddd	0+192(%rbp),%ymm13,%ymm13
7958	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
7959	vpaddd	0+64(%rbp),%ymm4,%ymm4
7960	vpaddd	0+96(%rbp),%ymm8,%ymm8
7961	vpaddd	0+160(%rbp),%ymm12,%ymm12
7962	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
7963	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
7964	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
7965	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
7966	vpxor	0+0(%rsi),%ymm3,%ymm3
7967	vpxor	32+0(%rsi),%ymm2,%ymm2
7968	vpxor	64+0(%rsi),%ymm6,%ymm6
7969	vpxor	96+0(%rsi),%ymm10,%ymm10
7970	vmovdqu	%ymm3,0+0(%rdi)
7971	vmovdqu	%ymm2,32+0(%rdi)
7972	vmovdqu	%ymm6,64+0(%rdi)
7973	vmovdqu	%ymm10,96+0(%rdi)
7974	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7975	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7976	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7977	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7978	vpxor	0+128(%rsi),%ymm3,%ymm3
7979	vpxor	32+128(%rsi),%ymm1,%ymm1
7980	vpxor	64+128(%rsi),%ymm5,%ymm5
7981	vpxor	96+128(%rsi),%ymm9,%ymm9
7982	vmovdqu	%ymm3,0+128(%rdi)
7983	vmovdqu	%ymm1,32+128(%rdi)
7984	vmovdqu	%ymm5,64+128(%rdi)
7985	vmovdqu	%ymm9,96+128(%rdi)
7986	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7987	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7988	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7989	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7990	vmovdqa	%ymm3,%ymm8
7991
7992	movq	$256,%rcx
7993	leaq	256(%rsi),%rsi
7994	subq	$256,%rbx
7995	jmp	L$seal_avx2_short_hash_remainder
7996
7997L$seal_avx2_tail_512:
7998	vmovdqa	L$chacha20_consts(%rip),%ymm0
7999	vmovdqa	0+64(%rbp),%ymm4
8000	vmovdqa	0+96(%rbp),%ymm8
8001	vmovdqa	%ymm0,%ymm1
8002	vmovdqa	%ymm4,%ymm5
8003	vmovdqa	%ymm8,%ymm9
8004	vmovdqa	%ymm0,%ymm2
8005	vmovdqa	%ymm4,%ymm6
8006	vmovdqa	%ymm8,%ymm10
8007	vmovdqa	%ymm0,%ymm3
8008	vmovdqa	%ymm4,%ymm7
8009	vmovdqa	%ymm8,%ymm11
8010	vmovdqa	L$avx2_inc(%rip),%ymm12
8011	vpaddd	0+160(%rbp),%ymm12,%ymm15
8012	vpaddd	%ymm15,%ymm12,%ymm14
8013	vpaddd	%ymm14,%ymm12,%ymm13
8014	vpaddd	%ymm13,%ymm12,%ymm12
8015	vmovdqa	%ymm15,0+256(%rbp)
8016	vmovdqa	%ymm14,0+224(%rbp)
8017	vmovdqa	%ymm13,0+192(%rbp)
8018	vmovdqa	%ymm12,0+160(%rbp)
8019
8020L$seal_avx2_tail_512_rounds_and_3xhash:
8021	addq	0+0(%rdi),%r10
8022	adcq	8+0(%rdi),%r11
8023	adcq	$1,%r12
8024	movq	0+0+0(%rbp),%rdx
8025	movq	%rdx,%r15
8026	mulxq	%r10,%r13,%r14
8027	mulxq	%r11,%rax,%rdx
8028	imulq	%r12,%r15
8029	addq	%rax,%r14
8030	adcq	%rdx,%r15
8031	movq	8+0+0(%rbp),%rdx
8032	mulxq	%r10,%r10,%rax
8033	addq	%r10,%r14
8034	mulxq	%r11,%r11,%r9
8035	adcq	%r11,%r15
8036	adcq	$0,%r9
8037	imulq	%r12,%rdx
8038	addq	%rax,%r15
8039	adcq	%rdx,%r9
8040	movq	%r13,%r10
8041	movq	%r14,%r11
8042	movq	%r15,%r12
8043	andq	$3,%r12
8044	movq	%r15,%r13
8045	andq	$-4,%r13
8046	movq	%r9,%r14
8047	shrdq	$2,%r9,%r15
8048	shrq	$2,%r9
8049	addq	%r13,%r15
8050	adcq	%r14,%r9
8051	addq	%r15,%r10
8052	adcq	%r9,%r11
8053	adcq	$0,%r12
8054
8055	leaq	16(%rdi),%rdi
8056L$seal_avx2_tail_512_rounds_and_2xhash:
8057	vmovdqa	%ymm8,0+128(%rbp)
8058	vmovdqa	L$rol16(%rip),%ymm8
8059	vpaddd	%ymm7,%ymm3,%ymm3
8060	vpaddd	%ymm6,%ymm2,%ymm2
8061	vpaddd	%ymm5,%ymm1,%ymm1
8062	vpaddd	%ymm4,%ymm0,%ymm0
8063	vpxor	%ymm3,%ymm15,%ymm15
8064	vpxor	%ymm2,%ymm14,%ymm14
8065	vpxor	%ymm1,%ymm13,%ymm13
8066	vpxor	%ymm0,%ymm12,%ymm12
8067	vpshufb	%ymm8,%ymm15,%ymm15
8068	vpshufb	%ymm8,%ymm14,%ymm14
8069	vpshufb	%ymm8,%ymm13,%ymm13
8070	vpshufb	%ymm8,%ymm12,%ymm12
8071	vpaddd	%ymm15,%ymm11,%ymm11
8072	vpaddd	%ymm14,%ymm10,%ymm10
8073	vpaddd	%ymm13,%ymm9,%ymm9
8074	vpaddd	0+128(%rbp),%ymm12,%ymm8
8075	vpxor	%ymm11,%ymm7,%ymm7
8076	vpxor	%ymm10,%ymm6,%ymm6
8077	addq	0+0(%rdi),%r10
8078	adcq	8+0(%rdi),%r11
8079	adcq	$1,%r12
8080	vpxor	%ymm9,%ymm5,%ymm5
8081	vpxor	%ymm8,%ymm4,%ymm4
8082	vmovdqa	%ymm8,0+128(%rbp)
8083	vpsrld	$20,%ymm7,%ymm8
8084	vpslld	$32-20,%ymm7,%ymm7
8085	vpxor	%ymm8,%ymm7,%ymm7
8086	vpsrld	$20,%ymm6,%ymm8
8087	vpslld	$32-20,%ymm6,%ymm6
8088	vpxor	%ymm8,%ymm6,%ymm6
8089	vpsrld	$20,%ymm5,%ymm8
8090	vpslld	$32-20,%ymm5,%ymm5
8091	vpxor	%ymm8,%ymm5,%ymm5
8092	vpsrld	$20,%ymm4,%ymm8
8093	vpslld	$32-20,%ymm4,%ymm4
8094	vpxor	%ymm8,%ymm4,%ymm4
8095	vmovdqa	L$rol8(%rip),%ymm8
8096	vpaddd	%ymm7,%ymm3,%ymm3
8097	vpaddd	%ymm6,%ymm2,%ymm2
8098	vpaddd	%ymm5,%ymm1,%ymm1
8099	vpaddd	%ymm4,%ymm0,%ymm0
8100	movq	0+0+0(%rbp),%rdx
8101	movq	%rdx,%r15
8102	mulxq	%r10,%r13,%r14
8103	mulxq	%r11,%rax,%rdx
8104	imulq	%r12,%r15
8105	addq	%rax,%r14
8106	adcq	%rdx,%r15
8107	vpxor	%ymm3,%ymm15,%ymm15
8108	vpxor	%ymm2,%ymm14,%ymm14
8109	vpxor	%ymm1,%ymm13,%ymm13
8110	vpxor	%ymm0,%ymm12,%ymm12
8111	vpshufb	%ymm8,%ymm15,%ymm15
8112	vpshufb	%ymm8,%ymm14,%ymm14
8113	vpshufb	%ymm8,%ymm13,%ymm13
8114	vpshufb	%ymm8,%ymm12,%ymm12
8115	vpaddd	%ymm15,%ymm11,%ymm11
8116	vpaddd	%ymm14,%ymm10,%ymm10
8117	vpaddd	%ymm13,%ymm9,%ymm9
8118	vpaddd	0+128(%rbp),%ymm12,%ymm8
8119	vpxor	%ymm11,%ymm7,%ymm7
8120	vpxor	%ymm10,%ymm6,%ymm6
8121	vpxor	%ymm9,%ymm5,%ymm5
8122	vpxor	%ymm8,%ymm4,%ymm4
8123	vmovdqa	%ymm8,0+128(%rbp)
8124	vpsrld	$25,%ymm7,%ymm8
8125	vpslld	$32-25,%ymm7,%ymm7
8126	vpxor	%ymm8,%ymm7,%ymm7
8127	movq	8+0+0(%rbp),%rdx
8128	mulxq	%r10,%r10,%rax
8129	addq	%r10,%r14
8130	mulxq	%r11,%r11,%r9
8131	adcq	%r11,%r15
8132	adcq	$0,%r9
8133	imulq	%r12,%rdx
8134	vpsrld	$25,%ymm6,%ymm8
8135	vpslld	$32-25,%ymm6,%ymm6
8136	vpxor	%ymm8,%ymm6,%ymm6
8137	vpsrld	$25,%ymm5,%ymm8
8138	vpslld	$32-25,%ymm5,%ymm5
8139	vpxor	%ymm8,%ymm5,%ymm5
8140	vpsrld	$25,%ymm4,%ymm8
8141	vpslld	$32-25,%ymm4,%ymm4
8142	vpxor	%ymm8,%ymm4,%ymm4
8143	vmovdqa	0+128(%rbp),%ymm8
8144	vpalignr	$4,%ymm7,%ymm7,%ymm7
8145	vpalignr	$8,%ymm11,%ymm11,%ymm11
8146	vpalignr	$12,%ymm15,%ymm15,%ymm15
8147	vpalignr	$4,%ymm6,%ymm6,%ymm6
8148	vpalignr	$8,%ymm10,%ymm10,%ymm10
8149	vpalignr	$12,%ymm14,%ymm14,%ymm14
8150	vpalignr	$4,%ymm5,%ymm5,%ymm5
8151	vpalignr	$8,%ymm9,%ymm9,%ymm9
8152	vpalignr	$12,%ymm13,%ymm13,%ymm13
8153	vpalignr	$4,%ymm4,%ymm4,%ymm4
8154	addq	%rax,%r15
8155	adcq	%rdx,%r9
8156	vpalignr	$8,%ymm8,%ymm8,%ymm8
8157	vpalignr	$12,%ymm12,%ymm12,%ymm12
8158	vmovdqa	%ymm8,0+128(%rbp)
8159	vmovdqa	L$rol16(%rip),%ymm8
8160	vpaddd	%ymm7,%ymm3,%ymm3
8161	vpaddd	%ymm6,%ymm2,%ymm2
8162	vpaddd	%ymm5,%ymm1,%ymm1
8163	vpaddd	%ymm4,%ymm0,%ymm0
8164	vpxor	%ymm3,%ymm15,%ymm15
8165	vpxor	%ymm2,%ymm14,%ymm14
8166	vpxor	%ymm1,%ymm13,%ymm13
8167	vpxor	%ymm0,%ymm12,%ymm12
8168	vpshufb	%ymm8,%ymm15,%ymm15
8169	vpshufb	%ymm8,%ymm14,%ymm14
8170	vpshufb	%ymm8,%ymm13,%ymm13
8171	vpshufb	%ymm8,%ymm12,%ymm12
8172	vpaddd	%ymm15,%ymm11,%ymm11
8173	vpaddd	%ymm14,%ymm10,%ymm10
8174	vpaddd	%ymm13,%ymm9,%ymm9
8175	vpaddd	0+128(%rbp),%ymm12,%ymm8
8176	movq	%r13,%r10
8177	movq	%r14,%r11
8178	movq	%r15,%r12
8179	andq	$3,%r12
8180	movq	%r15,%r13
8181	andq	$-4,%r13
8182	movq	%r9,%r14
8183	shrdq	$2,%r9,%r15
8184	shrq	$2,%r9
8185	addq	%r13,%r15
8186	adcq	%r14,%r9
8187	addq	%r15,%r10
8188	adcq	%r9,%r11
8189	adcq	$0,%r12
8190	vpxor	%ymm11,%ymm7,%ymm7
8191	vpxor	%ymm10,%ymm6,%ymm6
8192	vpxor	%ymm9,%ymm5,%ymm5
8193	vpxor	%ymm8,%ymm4,%ymm4
8194	vmovdqa	%ymm8,0+128(%rbp)
8195	vpsrld	$20,%ymm7,%ymm8
8196	vpslld	$32-20,%ymm7,%ymm7
8197	vpxor	%ymm8,%ymm7,%ymm7
8198	vpsrld	$20,%ymm6,%ymm8
8199	vpslld	$32-20,%ymm6,%ymm6
8200	vpxor	%ymm8,%ymm6,%ymm6
8201	vpsrld	$20,%ymm5,%ymm8
8202	vpslld	$32-20,%ymm5,%ymm5
8203	vpxor	%ymm8,%ymm5,%ymm5
8204	vpsrld	$20,%ymm4,%ymm8
8205	vpslld	$32-20,%ymm4,%ymm4
8206	vpxor	%ymm8,%ymm4,%ymm4
8207	vmovdqa	L$rol8(%rip),%ymm8
8208	vpaddd	%ymm7,%ymm3,%ymm3
8209	vpaddd	%ymm6,%ymm2,%ymm2
8210	addq	0+16(%rdi),%r10
8211	adcq	8+16(%rdi),%r11
8212	adcq	$1,%r12
8213	vpaddd	%ymm5,%ymm1,%ymm1
8214	vpaddd	%ymm4,%ymm0,%ymm0
8215	vpxor	%ymm3,%ymm15,%ymm15
8216	vpxor	%ymm2,%ymm14,%ymm14
8217	vpxor	%ymm1,%ymm13,%ymm13
8218	vpxor	%ymm0,%ymm12,%ymm12
8219	vpshufb	%ymm8,%ymm15,%ymm15
8220	vpshufb	%ymm8,%ymm14,%ymm14
8221	vpshufb	%ymm8,%ymm13,%ymm13
8222	vpshufb	%ymm8,%ymm12,%ymm12
8223	vpaddd	%ymm15,%ymm11,%ymm11
8224	vpaddd	%ymm14,%ymm10,%ymm10
8225	vpaddd	%ymm13,%ymm9,%ymm9
8226	vpaddd	0+128(%rbp),%ymm12,%ymm8
8227	vpxor	%ymm11,%ymm7,%ymm7
8228	vpxor	%ymm10,%ymm6,%ymm6
8229	vpxor	%ymm9,%ymm5,%ymm5
8230	vpxor	%ymm8,%ymm4,%ymm4
8231	vmovdqa	%ymm8,0+128(%rbp)
8232	vpsrld	$25,%ymm7,%ymm8
8233	movq	0+0+0(%rbp),%rdx
8234	movq	%rdx,%r15
8235	mulxq	%r10,%r13,%r14
8236	mulxq	%r11,%rax,%rdx
8237	imulq	%r12,%r15
8238	addq	%rax,%r14
8239	adcq	%rdx,%r15
8240	vpslld	$32-25,%ymm7,%ymm7
8241	vpxor	%ymm8,%ymm7,%ymm7
8242	vpsrld	$25,%ymm6,%ymm8
8243	vpslld	$32-25,%ymm6,%ymm6
8244	vpxor	%ymm8,%ymm6,%ymm6
8245	vpsrld	$25,%ymm5,%ymm8
8246	vpslld	$32-25,%ymm5,%ymm5
8247	vpxor	%ymm8,%ymm5,%ymm5
8248	vpsrld	$25,%ymm4,%ymm8
8249	vpslld	$32-25,%ymm4,%ymm4
8250	vpxor	%ymm8,%ymm4,%ymm4
8251	vmovdqa	0+128(%rbp),%ymm8
8252	vpalignr	$12,%ymm7,%ymm7,%ymm7
8253	vpalignr	$8,%ymm11,%ymm11,%ymm11
8254	vpalignr	$4,%ymm15,%ymm15,%ymm15
8255	vpalignr	$12,%ymm6,%ymm6,%ymm6
8256	vpalignr	$8,%ymm10,%ymm10,%ymm10
8257	vpalignr	$4,%ymm14,%ymm14,%ymm14
8258	vpalignr	$12,%ymm5,%ymm5,%ymm5
8259	vpalignr	$8,%ymm9,%ymm9,%ymm9
8260	movq	8+0+0(%rbp),%rdx
8261	mulxq	%r10,%r10,%rax
8262	addq	%r10,%r14
8263	mulxq	%r11,%r11,%r9
8264	adcq	%r11,%r15
8265	adcq	$0,%r9
8266	imulq	%r12,%rdx
8267	vpalignr	$4,%ymm13,%ymm13,%ymm13
8268	vpalignr	$12,%ymm4,%ymm4,%ymm4
8269	vpalignr	$8,%ymm8,%ymm8,%ymm8
8270	vpalignr	$4,%ymm12,%ymm12,%ymm12
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284
8285
8286
8287	addq	%rax,%r15
8288	adcq	%rdx,%r9
8289
8290
8291
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302
8303
8304
8305
8306
8307
8308
8309	movq	%r13,%r10
8310	movq	%r14,%r11
8311	movq	%r15,%r12
8312	andq	$3,%r12
8313	movq	%r15,%r13
8314	andq	$-4,%r13
8315	movq	%r9,%r14
8316	shrdq	$2,%r9,%r15
8317	shrq	$2,%r9
8318	addq	%r13,%r15
8319	adcq	%r14,%r9
8320	addq	%r15,%r10
8321	adcq	%r9,%r11
8322	adcq	$0,%r12
8323
8324	leaq	32(%rdi),%rdi
8325	decq	%rcx
8326	jg	L$seal_avx2_tail_512_rounds_and_3xhash
8327	decq	%r8
8328	jge	L$seal_avx2_tail_512_rounds_and_2xhash
8329	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
8330	vpaddd	0+64(%rbp),%ymm7,%ymm7
8331	vpaddd	0+96(%rbp),%ymm11,%ymm11
8332	vpaddd	0+256(%rbp),%ymm15,%ymm15
8333	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
8334	vpaddd	0+64(%rbp),%ymm6,%ymm6
8335	vpaddd	0+96(%rbp),%ymm10,%ymm10
8336	vpaddd	0+224(%rbp),%ymm14,%ymm14
8337	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
8338	vpaddd	0+64(%rbp),%ymm5,%ymm5
8339	vpaddd	0+96(%rbp),%ymm9,%ymm9
8340	vpaddd	0+192(%rbp),%ymm13,%ymm13
8341	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
8342	vpaddd	0+64(%rbp),%ymm4,%ymm4
8343	vpaddd	0+96(%rbp),%ymm8,%ymm8
8344	vpaddd	0+160(%rbp),%ymm12,%ymm12
8345
8346	vmovdqa	%ymm0,0+128(%rbp)
8347	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
8348	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
8349	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
8350	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
8351	vpxor	0+0(%rsi),%ymm0,%ymm0
8352	vpxor	32+0(%rsi),%ymm3,%ymm3
8353	vpxor	64+0(%rsi),%ymm7,%ymm7
8354	vpxor	96+0(%rsi),%ymm11,%ymm11
8355	vmovdqu	%ymm0,0+0(%rdi)
8356	vmovdqu	%ymm3,32+0(%rdi)
8357	vmovdqu	%ymm7,64+0(%rdi)
8358	vmovdqu	%ymm11,96+0(%rdi)
8359
8360	vmovdqa	0+128(%rbp),%ymm0
8361	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
8362	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
8363	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
8364	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
8365	vpxor	0+128(%rsi),%ymm3,%ymm3
8366	vpxor	32+128(%rsi),%ymm2,%ymm2
8367	vpxor	64+128(%rsi),%ymm6,%ymm6
8368	vpxor	96+128(%rsi),%ymm10,%ymm10
8369	vmovdqu	%ymm3,0+128(%rdi)
8370	vmovdqu	%ymm2,32+128(%rdi)
8371	vmovdqu	%ymm6,64+128(%rdi)
8372	vmovdqu	%ymm10,96+128(%rdi)
8373	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
8374	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
8375	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
8376	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
8377	vpxor	0+256(%rsi),%ymm3,%ymm3
8378	vpxor	32+256(%rsi),%ymm1,%ymm1
8379	vpxor	64+256(%rsi),%ymm5,%ymm5
8380	vpxor	96+256(%rsi),%ymm9,%ymm9
8381	vmovdqu	%ymm3,0+256(%rdi)
8382	vmovdqu	%ymm1,32+256(%rdi)
8383	vmovdqu	%ymm5,64+256(%rdi)
8384	vmovdqu	%ymm9,96+256(%rdi)
8385	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
8386	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
8387	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
8388	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
8389	vmovdqa	%ymm3,%ymm8
8390
8391	movq	$384,%rcx
8392	leaq	384(%rsi),%rsi
8393	subq	$384,%rbx
8394	jmp	L$seal_avx2_short_hash_remainder
8395
8396L$seal_avx2_320:
8397	vmovdqa	%ymm0,%ymm1
8398	vmovdqa	%ymm0,%ymm2
8399	vmovdqa	%ymm4,%ymm5
8400	vmovdqa	%ymm4,%ymm6
8401	vmovdqa	%ymm8,%ymm9
8402	vmovdqa	%ymm8,%ymm10
8403	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
8404	vpaddd	L$avx2_inc(%rip),%ymm13,%ymm14
8405	vmovdqa	%ymm4,%ymm7
8406	vmovdqa	%ymm8,%ymm11
8407	vmovdqa	%ymm12,0+160(%rbp)
8408	vmovdqa	%ymm13,0+192(%rbp)
8409	vmovdqa	%ymm14,0+224(%rbp)
8410	movq	$10,%r10
8411L$seal_avx2_320_rounds:
8412	vpaddd	%ymm4,%ymm0,%ymm0
8413	vpxor	%ymm0,%ymm12,%ymm12
8414	vpshufb	L$rol16(%rip),%ymm12,%ymm12
8415	vpaddd	%ymm12,%ymm8,%ymm8
8416	vpxor	%ymm8,%ymm4,%ymm4
8417	vpsrld	$20,%ymm4,%ymm3
8418	vpslld	$12,%ymm4,%ymm4
8419	vpxor	%ymm3,%ymm4,%ymm4
8420	vpaddd	%ymm4,%ymm0,%ymm0
8421	vpxor	%ymm0,%ymm12,%ymm12
8422	vpshufb	L$rol8(%rip),%ymm12,%ymm12
8423	vpaddd	%ymm12,%ymm8,%ymm8
8424	vpxor	%ymm8,%ymm4,%ymm4
8425	vpslld	$7,%ymm4,%ymm3
8426	vpsrld	$25,%ymm4,%ymm4
8427	vpxor	%ymm3,%ymm4,%ymm4
8428	vpalignr	$12,%ymm12,%ymm12,%ymm12
8429	vpalignr	$8,%ymm8,%ymm8,%ymm8
8430	vpalignr	$4,%ymm4,%ymm4,%ymm4
8431	vpaddd	%ymm5,%ymm1,%ymm1
8432	vpxor	%ymm1,%ymm13,%ymm13
8433	vpshufb	L$rol16(%rip),%ymm13,%ymm13
8434	vpaddd	%ymm13,%ymm9,%ymm9
8435	vpxor	%ymm9,%ymm5,%ymm5
8436	vpsrld	$20,%ymm5,%ymm3
8437	vpslld	$12,%ymm5,%ymm5
8438	vpxor	%ymm3,%ymm5,%ymm5
8439	vpaddd	%ymm5,%ymm1,%ymm1
8440	vpxor	%ymm1,%ymm13,%ymm13
8441	vpshufb	L$rol8(%rip),%ymm13,%ymm13
8442	vpaddd	%ymm13,%ymm9,%ymm9
8443	vpxor	%ymm9,%ymm5,%ymm5
8444	vpslld	$7,%ymm5,%ymm3
8445	vpsrld	$25,%ymm5,%ymm5
8446	vpxor	%ymm3,%ymm5,%ymm5
8447	vpalignr	$12,%ymm13,%ymm13,%ymm13
8448	vpalignr	$8,%ymm9,%ymm9,%ymm9
8449	vpalignr	$4,%ymm5,%ymm5,%ymm5
8450	vpaddd	%ymm6,%ymm2,%ymm2
8451	vpxor	%ymm2,%ymm14,%ymm14
8452	vpshufb	L$rol16(%rip),%ymm14,%ymm14
8453	vpaddd	%ymm14,%ymm10,%ymm10
8454	vpxor	%ymm10,%ymm6,%ymm6
8455	vpsrld	$20,%ymm6,%ymm3
8456	vpslld	$12,%ymm6,%ymm6
8457	vpxor	%ymm3,%ymm6,%ymm6
8458	vpaddd	%ymm6,%ymm2,%ymm2
8459	vpxor	%ymm2,%ymm14,%ymm14
8460	vpshufb	L$rol8(%rip),%ymm14,%ymm14
8461	vpaddd	%ymm14,%ymm10,%ymm10
8462	vpxor	%ymm10,%ymm6,%ymm6
8463	vpslld	$7,%ymm6,%ymm3
8464	vpsrld	$25,%ymm6,%ymm6
8465	vpxor	%ymm3,%ymm6,%ymm6
8466	vpalignr	$12,%ymm14,%ymm14,%ymm14
8467	vpalignr	$8,%ymm10,%ymm10,%ymm10
8468	vpalignr	$4,%ymm6,%ymm6,%ymm6
8469	vpaddd	%ymm4,%ymm0,%ymm0
8470	vpxor	%ymm0,%ymm12,%ymm12
8471	vpshufb	L$rol16(%rip),%ymm12,%ymm12
8472	vpaddd	%ymm12,%ymm8,%ymm8
8473	vpxor	%ymm8,%ymm4,%ymm4
8474	vpsrld	$20,%ymm4,%ymm3
8475	vpslld	$12,%ymm4,%ymm4
8476	vpxor	%ymm3,%ymm4,%ymm4
8477	vpaddd	%ymm4,%ymm0,%ymm0
8478	vpxor	%ymm0,%ymm12,%ymm12
8479	vpshufb	L$rol8(%rip),%ymm12,%ymm12
8480	vpaddd	%ymm12,%ymm8,%ymm8
8481	vpxor	%ymm8,%ymm4,%ymm4
8482	vpslld	$7,%ymm4,%ymm3
8483	vpsrld	$25,%ymm4,%ymm4
8484	vpxor	%ymm3,%ymm4,%ymm4
8485	vpalignr	$4,%ymm12,%ymm12,%ymm12
8486	vpalignr	$8,%ymm8,%ymm8,%ymm8
8487	vpalignr	$12,%ymm4,%ymm4,%ymm4
8488	vpaddd	%ymm5,%ymm1,%ymm1
8489	vpxor	%ymm1,%ymm13,%ymm13
8490	vpshufb	L$rol16(%rip),%ymm13,%ymm13
8491	vpaddd	%ymm13,%ymm9,%ymm9
8492	vpxor	%ymm9,%ymm5,%ymm5
8493	vpsrld	$20,%ymm5,%ymm3
8494	vpslld	$12,%ymm5,%ymm5
8495	vpxor	%ymm3,%ymm5,%ymm5
8496	vpaddd	%ymm5,%ymm1,%ymm1
8497	vpxor	%ymm1,%ymm13,%ymm13
8498	vpshufb	L$rol8(%rip),%ymm13,%ymm13
8499	vpaddd	%ymm13,%ymm9,%ymm9
8500	vpxor	%ymm9,%ymm5,%ymm5
8501	vpslld	$7,%ymm5,%ymm3
8502	vpsrld	$25,%ymm5,%ymm5
8503	vpxor	%ymm3,%ymm5,%ymm5
8504	vpalignr	$4,%ymm13,%ymm13,%ymm13
8505	vpalignr	$8,%ymm9,%ymm9,%ymm9
8506	vpalignr	$12,%ymm5,%ymm5,%ymm5
8507	vpaddd	%ymm6,%ymm2,%ymm2
8508	vpxor	%ymm2,%ymm14,%ymm14
8509	vpshufb	L$rol16(%rip),%ymm14,%ymm14
8510	vpaddd	%ymm14,%ymm10,%ymm10
8511	vpxor	%ymm10,%ymm6,%ymm6
8512	vpsrld	$20,%ymm6,%ymm3
8513	vpslld	$12,%ymm6,%ymm6
8514	vpxor	%ymm3,%ymm6,%ymm6
8515	vpaddd	%ymm6,%ymm2,%ymm2
8516	vpxor	%ymm2,%ymm14,%ymm14
8517	vpshufb	L$rol8(%rip),%ymm14,%ymm14
8518	vpaddd	%ymm14,%ymm10,%ymm10
8519	vpxor	%ymm10,%ymm6,%ymm6
8520	vpslld	$7,%ymm6,%ymm3
8521	vpsrld	$25,%ymm6,%ymm6
8522	vpxor	%ymm3,%ymm6,%ymm6
8523	vpalignr	$4,%ymm14,%ymm14,%ymm14
8524	vpalignr	$8,%ymm10,%ymm10,%ymm10
8525	vpalignr	$12,%ymm6,%ymm6,%ymm6
8526
8527	decq	%r10
8528	jne	L$seal_avx2_320_rounds
8529	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
8530	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
8531	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
8532	vpaddd	%ymm7,%ymm4,%ymm4
8533	vpaddd	%ymm7,%ymm5,%ymm5
8534	vpaddd	%ymm7,%ymm6,%ymm6
8535	vpaddd	%ymm11,%ymm8,%ymm8
8536	vpaddd	%ymm11,%ymm9,%ymm9
8537	vpaddd	%ymm11,%ymm10,%ymm10
8538	vpaddd	0+160(%rbp),%ymm12,%ymm12
8539	vpaddd	0+192(%rbp),%ymm13,%ymm13
8540	vpaddd	0+224(%rbp),%ymm14,%ymm14
8541	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
8542
8543	vpand	L$clamp(%rip),%ymm3,%ymm3
8544	vmovdqa	%ymm3,0+0(%rbp)
8545
8546	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
8547	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
8548	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
8549	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
8550	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
8551	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
8552	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
8553	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
8554	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
8555	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
8556	jmp	L$seal_avx2_short
8557
8558L$seal_avx2_192:
8559	vmovdqa	%ymm0,%ymm1
8560	vmovdqa	%ymm0,%ymm2
8561	vmovdqa	%ymm4,%ymm5
8562	vmovdqa	%ymm4,%ymm6
8563	vmovdqa	%ymm8,%ymm9
8564	vmovdqa	%ymm8,%ymm10
8565	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
8566	vmovdqa	%ymm12,%ymm11
8567	vmovdqa	%ymm13,%ymm15
8568	movq	$10,%r10
8569L$seal_avx2_192_rounds:
8570	vpaddd	%ymm4,%ymm0,%ymm0
8571	vpxor	%ymm0,%ymm12,%ymm12
8572	vpshufb	L$rol16(%rip),%ymm12,%ymm12
8573	vpaddd	%ymm12,%ymm8,%ymm8
8574	vpxor	%ymm8,%ymm4,%ymm4
8575	vpsrld	$20,%ymm4,%ymm3
8576	vpslld	$12,%ymm4,%ymm4
8577	vpxor	%ymm3,%ymm4,%ymm4
8578	vpaddd	%ymm4,%ymm0,%ymm0
8579	vpxor	%ymm0,%ymm12,%ymm12
8580	vpshufb	L$rol8(%rip),%ymm12,%ymm12
8581	vpaddd	%ymm12,%ymm8,%ymm8
8582	vpxor	%ymm8,%ymm4,%ymm4
8583	vpslld	$7,%ymm4,%ymm3
8584	vpsrld	$25,%ymm4,%ymm4
8585	vpxor	%ymm3,%ymm4,%ymm4
8586	vpalignr	$12,%ymm12,%ymm12,%ymm12
8587	vpalignr	$8,%ymm8,%ymm8,%ymm8
8588	vpalignr	$4,%ymm4,%ymm4,%ymm4
8589	vpaddd	%ymm5,%ymm1,%ymm1
8590	vpxor	%ymm1,%ymm13,%ymm13
8591	vpshufb	L$rol16(%rip),%ymm13,%ymm13
8592	vpaddd	%ymm13,%ymm9,%ymm9
8593	vpxor	%ymm9,%ymm5,%ymm5
8594	vpsrld	$20,%ymm5,%ymm3
8595	vpslld	$12,%ymm5,%ymm5
8596	vpxor	%ymm3,%ymm5,%ymm5
8597	vpaddd	%ymm5,%ymm1,%ymm1
8598	vpxor	%ymm1,%ymm13,%ymm13
8599	vpshufb	L$rol8(%rip),%ymm13,%ymm13
8600	vpaddd	%ymm13,%ymm9,%ymm9
8601	vpxor	%ymm9,%ymm5,%ymm5
8602	vpslld	$7,%ymm5,%ymm3
8603	vpsrld	$25,%ymm5,%ymm5
8604	vpxor	%ymm3,%ymm5,%ymm5
8605	vpalignr	$12,%ymm13,%ymm13,%ymm13
8606	vpalignr	$8,%ymm9,%ymm9,%ymm9
8607	vpalignr	$4,%ymm5,%ymm5,%ymm5
8608	vpaddd	%ymm4,%ymm0,%ymm0
8609	vpxor	%ymm0,%ymm12,%ymm12
8610	vpshufb	L$rol16(%rip),%ymm12,%ymm12
8611	vpaddd	%ymm12,%ymm8,%ymm8
8612	vpxor	%ymm8,%ymm4,%ymm4
8613	vpsrld	$20,%ymm4,%ymm3
8614	vpslld	$12,%ymm4,%ymm4
8615	vpxor	%ymm3,%ymm4,%ymm4
8616	vpaddd	%ymm4,%ymm0,%ymm0
8617	vpxor	%ymm0,%ymm12,%ymm12
8618	vpshufb	L$rol8(%rip),%ymm12,%ymm12
8619	vpaddd	%ymm12,%ymm8,%ymm8
8620	vpxor	%ymm8,%ymm4,%ymm4
8621	vpslld	$7,%ymm4,%ymm3
8622	vpsrld	$25,%ymm4,%ymm4
8623	vpxor	%ymm3,%ymm4,%ymm4
8624	vpalignr	$4,%ymm12,%ymm12,%ymm12
8625	vpalignr	$8,%ymm8,%ymm8,%ymm8
8626	vpalignr	$12,%ymm4,%ymm4,%ymm4
8627	vpaddd	%ymm5,%ymm1,%ymm1
8628	vpxor	%ymm1,%ymm13,%ymm13
8629	vpshufb	L$rol16(%rip),%ymm13,%ymm13
8630	vpaddd	%ymm13,%ymm9,%ymm9
8631	vpxor	%ymm9,%ymm5,%ymm5
8632	vpsrld	$20,%ymm5,%ymm3
8633	vpslld	$12,%ymm5,%ymm5
8634	vpxor	%ymm3,%ymm5,%ymm5
8635	vpaddd	%ymm5,%ymm1,%ymm1
8636	vpxor	%ymm1,%ymm13,%ymm13
8637	vpshufb	L$rol8(%rip),%ymm13,%ymm13
8638	vpaddd	%ymm13,%ymm9,%ymm9
8639	vpxor	%ymm9,%ymm5,%ymm5
8640	vpslld	$7,%ymm5,%ymm3
8641	vpsrld	$25,%ymm5,%ymm5
8642	vpxor	%ymm3,%ymm5,%ymm5
8643	vpalignr	$4,%ymm13,%ymm13,%ymm13
8644	vpalignr	$8,%ymm9,%ymm9,%ymm9
8645	vpalignr	$12,%ymm5,%ymm5,%ymm5
8646
8647	decq	%r10
8648	jne	L$seal_avx2_192_rounds
8649	vpaddd	%ymm2,%ymm0,%ymm0
8650	vpaddd	%ymm2,%ymm1,%ymm1
8651	vpaddd	%ymm6,%ymm4,%ymm4
8652	vpaddd	%ymm6,%ymm5,%ymm5
8653	vpaddd	%ymm10,%ymm8,%ymm8
8654	vpaddd	%ymm10,%ymm9,%ymm9
8655	vpaddd	%ymm11,%ymm12,%ymm12
8656	vpaddd	%ymm15,%ymm13,%ymm13
8657	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
8658
8659	vpand	L$clamp(%rip),%ymm3,%ymm3
8660	vmovdqa	%ymm3,0+0(%rbp)
8661
8662	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
8663	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
8664	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
8665	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
8666	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
8667	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
8668L$seal_avx2_short:
8669	movq	%r8,%r8
8670	call	poly_hash_ad_internal
8671	xorq	%rcx,%rcx
8672L$seal_avx2_short_hash_remainder:
8673	cmpq	$16,%rcx
8674	jb	L$seal_avx2_short_loop
8675	addq	0+0(%rdi),%r10
8676	adcq	8+0(%rdi),%r11
8677	adcq	$1,%r12
8678	movq	0+0+0(%rbp),%rax
8679	movq	%rax,%r15
8680	mulq	%r10
8681	movq	%rax,%r13
8682	movq	%rdx,%r14
8683	movq	0+0+0(%rbp),%rax
8684	mulq	%r11
8685	imulq	%r12,%r15
8686	addq	%rax,%r14
8687	adcq	%rdx,%r15
8688	movq	8+0+0(%rbp),%rax
8689	movq	%rax,%r9
8690	mulq	%r10
8691	addq	%rax,%r14
8692	adcq	$0,%rdx
8693	movq	%rdx,%r10
8694	movq	8+0+0(%rbp),%rax
8695	mulq	%r11
8696	addq	%rax,%r15
8697	adcq	$0,%rdx
8698	imulq	%r12,%r9
8699	addq	%r10,%r15
8700	adcq	%rdx,%r9
8701	movq	%r13,%r10
8702	movq	%r14,%r11
8703	movq	%r15,%r12
8704	andq	$3,%r12
8705	movq	%r15,%r13
8706	andq	$-4,%r13
8707	movq	%r9,%r14
8708	shrdq	$2,%r9,%r15
8709	shrq	$2,%r9
8710	addq	%r13,%r15
8711	adcq	%r14,%r9
8712	addq	%r15,%r10
8713	adcq	%r9,%r11
8714	adcq	$0,%r12
8715
8716	subq	$16,%rcx
8717	addq	$16,%rdi
8718	jmp	L$seal_avx2_short_hash_remainder
8719L$seal_avx2_short_loop:
8720	cmpq	$32,%rbx
8721	jb	L$seal_avx2_short_tail
8722	subq	$32,%rbx
8723
8724	vpxor	(%rsi),%ymm0,%ymm0
8725	vmovdqu	%ymm0,(%rdi)
8726	leaq	32(%rsi),%rsi
8727
8728	addq	0+0(%rdi),%r10
8729	adcq	8+0(%rdi),%r11
8730	adcq	$1,%r12
8731	movq	0+0+0(%rbp),%rax
8732	movq	%rax,%r15
8733	mulq	%r10
8734	movq	%rax,%r13
8735	movq	%rdx,%r14
8736	movq	0+0+0(%rbp),%rax
8737	mulq	%r11
8738	imulq	%r12,%r15
8739	addq	%rax,%r14
8740	adcq	%rdx,%r15
8741	movq	8+0+0(%rbp),%rax
8742	movq	%rax,%r9
8743	mulq	%r10
8744	addq	%rax,%r14
8745	adcq	$0,%rdx
8746	movq	%rdx,%r10
8747	movq	8+0+0(%rbp),%rax
8748	mulq	%r11
8749	addq	%rax,%r15
8750	adcq	$0,%rdx
8751	imulq	%r12,%r9
8752	addq	%r10,%r15
8753	adcq	%rdx,%r9
8754	movq	%r13,%r10
8755	movq	%r14,%r11
8756	movq	%r15,%r12
8757	andq	$3,%r12
8758	movq	%r15,%r13
8759	andq	$-4,%r13
8760	movq	%r9,%r14
8761	shrdq	$2,%r9,%r15
8762	shrq	$2,%r9
8763	addq	%r13,%r15
8764	adcq	%r14,%r9
8765	addq	%r15,%r10
8766	adcq	%r9,%r11
8767	adcq	$0,%r12
8768	addq	0+16(%rdi),%r10
8769	adcq	8+16(%rdi),%r11
8770	adcq	$1,%r12
8771	movq	0+0+0(%rbp),%rax
8772	movq	%rax,%r15
8773	mulq	%r10
8774	movq	%rax,%r13
8775	movq	%rdx,%r14
8776	movq	0+0+0(%rbp),%rax
8777	mulq	%r11
8778	imulq	%r12,%r15
8779	addq	%rax,%r14
8780	adcq	%rdx,%r15
8781	movq	8+0+0(%rbp),%rax
8782	movq	%rax,%r9
8783	mulq	%r10
8784	addq	%rax,%r14
8785	adcq	$0,%rdx
8786	movq	%rdx,%r10
8787	movq	8+0+0(%rbp),%rax
8788	mulq	%r11
8789	addq	%rax,%r15
8790	adcq	$0,%rdx
8791	imulq	%r12,%r9
8792	addq	%r10,%r15
8793	adcq	%rdx,%r9
8794	movq	%r13,%r10
8795	movq	%r14,%r11
8796	movq	%r15,%r12
8797	andq	$3,%r12
8798	movq	%r15,%r13
8799	andq	$-4,%r13
8800	movq	%r9,%r14
8801	shrdq	$2,%r9,%r15
8802	shrq	$2,%r9
8803	addq	%r13,%r15
8804	adcq	%r14,%r9
8805	addq	%r15,%r10
8806	adcq	%r9,%r11
8807	adcq	$0,%r12
8808
8809	leaq	32(%rdi),%rdi
8810
8811	vmovdqa	%ymm4,%ymm0
8812	vmovdqa	%ymm8,%ymm4
8813	vmovdqa	%ymm12,%ymm8
8814	vmovdqa	%ymm1,%ymm12
8815	vmovdqa	%ymm5,%ymm1
8816	vmovdqa	%ymm9,%ymm5
8817	vmovdqa	%ymm13,%ymm9
8818	vmovdqa	%ymm2,%ymm13
8819	vmovdqa	%ymm6,%ymm2
8820	jmp	L$seal_avx2_short_loop
8821L$seal_avx2_short_tail:
8822	cmpq	$16,%rbx
8823	jb	L$seal_avx2_exit
8824	subq	$16,%rbx
8825	vpxor	(%rsi),%xmm0,%xmm3
8826	vmovdqu	%xmm3,(%rdi)
8827	leaq	16(%rsi),%rsi
8828	addq	0+0(%rdi),%r10
8829	adcq	8+0(%rdi),%r11
8830	adcq	$1,%r12
8831	movq	0+0+0(%rbp),%rax
8832	movq	%rax,%r15
8833	mulq	%r10
8834	movq	%rax,%r13
8835	movq	%rdx,%r14
8836	movq	0+0+0(%rbp),%rax
8837	mulq	%r11
8838	imulq	%r12,%r15
8839	addq	%rax,%r14
8840	adcq	%rdx,%r15
8841	movq	8+0+0(%rbp),%rax
8842	movq	%rax,%r9
8843	mulq	%r10
8844	addq	%rax,%r14
8845	adcq	$0,%rdx
8846	movq	%rdx,%r10
8847	movq	8+0+0(%rbp),%rax
8848	mulq	%r11
8849	addq	%rax,%r15
8850	adcq	$0,%rdx
8851	imulq	%r12,%r9
8852	addq	%r10,%r15
8853	adcq	%rdx,%r9
8854	movq	%r13,%r10
8855	movq	%r14,%r11
8856	movq	%r15,%r12
8857	andq	$3,%r12
8858	movq	%r15,%r13
8859	andq	$-4,%r13
8860	movq	%r9,%r14
8861	shrdq	$2,%r9,%r15
8862	shrq	$2,%r9
8863	addq	%r13,%r15
8864	adcq	%r14,%r9
8865	addq	%r15,%r10
8866	adcq	%r9,%r11
8867	adcq	$0,%r12
8868
8869	leaq	16(%rdi),%rdi
8870	vextracti128	$1,%ymm0,%xmm0
8871L$seal_avx2_exit:
8872	vzeroupper
8873	jmp	L$seal_sse_tail_16
8874
8875
8876#endif
8877