• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11.text
12
13
14chacha20_poly1305_constants:
15
16.p2align	6
17L$chacha20_consts:
18.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
19.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
20L$rol8:
21.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
22.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
23L$rol16:
24.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
25.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
26L$avx2_init:
27.long	0,0,0,0
28L$sse_inc:
29.long	1,0,0,0
30L$avx2_inc:
31.long	2,0,0,0,2,0,0,0
32L$clamp:
33.quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
34.quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
35.p2align	4
36L$and_masks:
37.byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
38.byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
39.byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
40.byte	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
41.byte	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
42.byte	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
43.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
44.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
45.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
46.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
47.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
48.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
49.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
50.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
51.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
52.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
53
54
55.p2align	6
56poly_hash_ad_internal:
57
58
59	xorq	%r10,%r10
60	xorq	%r11,%r11
61	xorq	%r12,%r12
62	cmpq	$13,%r8
63	jne	L$hash_ad_loop
64L$poly_fast_tls_ad:
65
66	movq	(%rcx),%r10
67	movq	5(%rcx),%r11
68	shrq	$24,%r11
69	movq	$1,%r12
70	movq	0+0+0(%rbp),%rax
71	movq	%rax,%r15
72	mulq	%r10
73	movq	%rax,%r13
74	movq	%rdx,%r14
75	movq	0+0+0(%rbp),%rax
76	mulq	%r11
77	imulq	%r12,%r15
78	addq	%rax,%r14
79	adcq	%rdx,%r15
80	movq	8+0+0(%rbp),%rax
81	movq	%rax,%r9
82	mulq	%r10
83	addq	%rax,%r14
84	adcq	$0,%rdx
85	movq	%rdx,%r10
86	movq	8+0+0(%rbp),%rax
87	mulq	%r11
88	addq	%rax,%r15
89	adcq	$0,%rdx
90	imulq	%r12,%r9
91	addq	%r10,%r15
92	adcq	%rdx,%r9
93	movq	%r13,%r10
94	movq	%r14,%r11
95	movq	%r15,%r12
96	andq	$3,%r12
97	movq	%r15,%r13
98	andq	$-4,%r13
99	movq	%r9,%r14
100	shrdq	$2,%r9,%r15
101	shrq	$2,%r9
102	addq	%r13,%r15
103	adcq	%r14,%r9
104	addq	%r15,%r10
105	adcq	%r9,%r11
106	adcq	$0,%r12
107
108	.byte	0xf3,0xc3
109L$hash_ad_loop:
110
111	cmpq	$16,%r8
112	jb	L$hash_ad_tail
113	addq	0+0(%rcx),%r10
114	adcq	8+0(%rcx),%r11
115	adcq	$1,%r12
116	movq	0+0+0(%rbp),%rax
117	movq	%rax,%r15
118	mulq	%r10
119	movq	%rax,%r13
120	movq	%rdx,%r14
121	movq	0+0+0(%rbp),%rax
122	mulq	%r11
123	imulq	%r12,%r15
124	addq	%rax,%r14
125	adcq	%rdx,%r15
126	movq	8+0+0(%rbp),%rax
127	movq	%rax,%r9
128	mulq	%r10
129	addq	%rax,%r14
130	adcq	$0,%rdx
131	movq	%rdx,%r10
132	movq	8+0+0(%rbp),%rax
133	mulq	%r11
134	addq	%rax,%r15
135	adcq	$0,%rdx
136	imulq	%r12,%r9
137	addq	%r10,%r15
138	adcq	%rdx,%r9
139	movq	%r13,%r10
140	movq	%r14,%r11
141	movq	%r15,%r12
142	andq	$3,%r12
143	movq	%r15,%r13
144	andq	$-4,%r13
145	movq	%r9,%r14
146	shrdq	$2,%r9,%r15
147	shrq	$2,%r9
148	addq	%r13,%r15
149	adcq	%r14,%r9
150	addq	%r15,%r10
151	adcq	%r9,%r11
152	adcq	$0,%r12
153
154	leaq	16(%rcx),%rcx
155	subq	$16,%r8
156	jmp	L$hash_ad_loop
157L$hash_ad_tail:
158	cmpq	$0,%r8
159	je	L$hash_ad_done
160
161	xorq	%r13,%r13
162	xorq	%r14,%r14
163	xorq	%r15,%r15
164	addq	%r8,%rcx
165L$hash_ad_tail_loop:
166	shldq	$8,%r13,%r14
167	shlq	$8,%r13
168	movzbq	-1(%rcx),%r15
169	xorq	%r15,%r13
170	decq	%rcx
171	decq	%r8
172	jne	L$hash_ad_tail_loop
173
174	addq	%r13,%r10
175	adcq	%r14,%r11
176	adcq	$1,%r12
177	movq	0+0+0(%rbp),%rax
178	movq	%rax,%r15
179	mulq	%r10
180	movq	%rax,%r13
181	movq	%rdx,%r14
182	movq	0+0+0(%rbp),%rax
183	mulq	%r11
184	imulq	%r12,%r15
185	addq	%rax,%r14
186	adcq	%rdx,%r15
187	movq	8+0+0(%rbp),%rax
188	movq	%rax,%r9
189	mulq	%r10
190	addq	%rax,%r14
191	adcq	$0,%rdx
192	movq	%rdx,%r10
193	movq	8+0+0(%rbp),%rax
194	mulq	%r11
195	addq	%rax,%r15
196	adcq	$0,%rdx
197	imulq	%r12,%r9
198	addq	%r10,%r15
199	adcq	%rdx,%r9
200	movq	%r13,%r10
201	movq	%r14,%r11
202	movq	%r15,%r12
203	andq	$3,%r12
204	movq	%r15,%r13
205	andq	$-4,%r13
206	movq	%r9,%r14
207	shrdq	$2,%r9,%r15
208	shrq	$2,%r9
209	addq	%r13,%r15
210	adcq	%r14,%r9
211	addq	%r15,%r10
212	adcq	%r9,%r11
213	adcq	$0,%r12
214
215
216L$hash_ad_done:
217	.byte	0xf3,0xc3
218
219
220
221.globl	_GFp_chacha20_poly1305_open
222.private_extern _GFp_chacha20_poly1305_open
223
224.p2align	6
225_GFp_chacha20_poly1305_open:
226
227	pushq	%rbp
228
229	pushq	%rbx
230
231	pushq	%r12
232
233	pushq	%r13
234
235	pushq	%r14
236
237	pushq	%r15
238
239
240
241	pushq	%r9
242
243	subq	$288 + 0 + 32,%rsp
244
245
246	leaq	32(%rsp),%rbp
247	andq	$-32,%rbp
248
249	movq	%rdx,%rbx
250	movq	%r8,0+0+32(%rbp)
251	movq	%rbx,8+0+32(%rbp)
252
253	movl	_GFp_ia32cap_P+8(%rip),%eax
254	andl	$288,%eax
255	xorl	$288,%eax
256	jz	chacha20_poly1305_open_avx2
257
258	cmpq	$128,%rbx
259	jbe	L$open_sse_128
260
261	movdqa	L$chacha20_consts(%rip),%xmm0
262	movdqu	0(%r9),%xmm4
263	movdqu	16(%r9),%xmm8
264	movdqu	32(%r9),%xmm12
265
266	movdqa	%xmm12,%xmm7
267
268	movdqa	%xmm4,0+48(%rbp)
269	movdqa	%xmm8,0+64(%rbp)
270	movdqa	%xmm12,0+96(%rbp)
271	movq	$10,%r10
272L$open_sse_init_rounds:
273	paddd	%xmm4,%xmm0
274	pxor	%xmm0,%xmm12
275	pshufb	L$rol16(%rip),%xmm12
276	paddd	%xmm12,%xmm8
277	pxor	%xmm8,%xmm4
278	movdqa	%xmm4,%xmm3
279	pslld	$12,%xmm3
280	psrld	$20,%xmm4
281	pxor	%xmm3,%xmm4
282	paddd	%xmm4,%xmm0
283	pxor	%xmm0,%xmm12
284	pshufb	L$rol8(%rip),%xmm12
285	paddd	%xmm12,%xmm8
286	pxor	%xmm8,%xmm4
287	movdqa	%xmm4,%xmm3
288	pslld	$7,%xmm3
289	psrld	$25,%xmm4
290	pxor	%xmm3,%xmm4
291.byte	102,15,58,15,228,4
292.byte	102,69,15,58,15,192,8
293.byte	102,69,15,58,15,228,12
294	paddd	%xmm4,%xmm0
295	pxor	%xmm0,%xmm12
296	pshufb	L$rol16(%rip),%xmm12
297	paddd	%xmm12,%xmm8
298	pxor	%xmm8,%xmm4
299	movdqa	%xmm4,%xmm3
300	pslld	$12,%xmm3
301	psrld	$20,%xmm4
302	pxor	%xmm3,%xmm4
303	paddd	%xmm4,%xmm0
304	pxor	%xmm0,%xmm12
305	pshufb	L$rol8(%rip),%xmm12
306	paddd	%xmm12,%xmm8
307	pxor	%xmm8,%xmm4
308	movdqa	%xmm4,%xmm3
309	pslld	$7,%xmm3
310	psrld	$25,%xmm4
311	pxor	%xmm3,%xmm4
312.byte	102,15,58,15,228,12
313.byte	102,69,15,58,15,192,8
314.byte	102,69,15,58,15,228,4
315
316	decq	%r10
317	jne	L$open_sse_init_rounds
318
319	paddd	L$chacha20_consts(%rip),%xmm0
320	paddd	0+48(%rbp),%xmm4
321
322	pand	L$clamp(%rip),%xmm0
323	movdqa	%xmm0,0+0(%rbp)
324	movdqa	%xmm4,0+16(%rbp)
325
326	movq	%r8,%r8
327	call	poly_hash_ad_internal
328L$open_sse_main_loop:
329	cmpq	$256,%rbx
330	jb	L$open_sse_tail
331
332	movdqa	L$chacha20_consts(%rip),%xmm0
333	movdqa	0+48(%rbp),%xmm4
334	movdqa	0+64(%rbp),%xmm8
335	movdqa	%xmm0,%xmm1
336	movdqa	%xmm4,%xmm5
337	movdqa	%xmm8,%xmm9
338	movdqa	%xmm0,%xmm2
339	movdqa	%xmm4,%xmm6
340	movdqa	%xmm8,%xmm10
341	movdqa	%xmm0,%xmm3
342	movdqa	%xmm4,%xmm7
343	movdqa	%xmm8,%xmm11
344	movdqa	0+96(%rbp),%xmm15
345	paddd	L$sse_inc(%rip),%xmm15
346	movdqa	%xmm15,%xmm14
347	paddd	L$sse_inc(%rip),%xmm14
348	movdqa	%xmm14,%xmm13
349	paddd	L$sse_inc(%rip),%xmm13
350	movdqa	%xmm13,%xmm12
351	paddd	L$sse_inc(%rip),%xmm12
352	movdqa	%xmm12,0+96(%rbp)
353	movdqa	%xmm13,0+112(%rbp)
354	movdqa	%xmm14,0+128(%rbp)
355	movdqa	%xmm15,0+144(%rbp)
356
357
358
359	movq	$4,%rcx
360	movq	%rsi,%r8
361L$open_sse_main_loop_rounds:
362	movdqa	%xmm8,0+80(%rbp)
363	movdqa	L$rol16(%rip),%xmm8
364	paddd	%xmm7,%xmm3
365	paddd	%xmm6,%xmm2
366	paddd	%xmm5,%xmm1
367	paddd	%xmm4,%xmm0
368	pxor	%xmm3,%xmm15
369	pxor	%xmm2,%xmm14
370	pxor	%xmm1,%xmm13
371	pxor	%xmm0,%xmm12
372.byte	102,69,15,56,0,248
373.byte	102,69,15,56,0,240
374.byte	102,69,15,56,0,232
375.byte	102,69,15,56,0,224
376	movdqa	0+80(%rbp),%xmm8
377	paddd	%xmm15,%xmm11
378	paddd	%xmm14,%xmm10
379	paddd	%xmm13,%xmm9
380	paddd	%xmm12,%xmm8
381	pxor	%xmm11,%xmm7
382	addq	0+0(%r8),%r10
383	adcq	8+0(%r8),%r11
384	adcq	$1,%r12
385
386	leaq	16(%r8),%r8
387	pxor	%xmm10,%xmm6
388	pxor	%xmm9,%xmm5
389	pxor	%xmm8,%xmm4
390	movdqa	%xmm8,0+80(%rbp)
391	movdqa	%xmm7,%xmm8
392	psrld	$20,%xmm8
393	pslld	$32-20,%xmm7
394	pxor	%xmm8,%xmm7
395	movdqa	%xmm6,%xmm8
396	psrld	$20,%xmm8
397	pslld	$32-20,%xmm6
398	pxor	%xmm8,%xmm6
399	movdqa	%xmm5,%xmm8
400	psrld	$20,%xmm8
401	pslld	$32-20,%xmm5
402	pxor	%xmm8,%xmm5
403	movdqa	%xmm4,%xmm8
404	psrld	$20,%xmm8
405	pslld	$32-20,%xmm4
406	pxor	%xmm8,%xmm4
407	movq	0+0+0(%rbp),%rax
408	movq	%rax,%r15
409	mulq	%r10
410	movq	%rax,%r13
411	movq	%rdx,%r14
412	movq	0+0+0(%rbp),%rax
413	mulq	%r11
414	imulq	%r12,%r15
415	addq	%rax,%r14
416	adcq	%rdx,%r15
417	movdqa	L$rol8(%rip),%xmm8
418	paddd	%xmm7,%xmm3
419	paddd	%xmm6,%xmm2
420	paddd	%xmm5,%xmm1
421	paddd	%xmm4,%xmm0
422	pxor	%xmm3,%xmm15
423	pxor	%xmm2,%xmm14
424	pxor	%xmm1,%xmm13
425	pxor	%xmm0,%xmm12
426.byte	102,69,15,56,0,248
427.byte	102,69,15,56,0,240
428.byte	102,69,15,56,0,232
429.byte	102,69,15,56,0,224
430	movdqa	0+80(%rbp),%xmm8
431	paddd	%xmm15,%xmm11
432	paddd	%xmm14,%xmm10
433	paddd	%xmm13,%xmm9
434	paddd	%xmm12,%xmm8
435	pxor	%xmm11,%xmm7
436	pxor	%xmm10,%xmm6
437	movq	8+0+0(%rbp),%rax
438	movq	%rax,%r9
439	mulq	%r10
440	addq	%rax,%r14
441	adcq	$0,%rdx
442	movq	%rdx,%r10
443	movq	8+0+0(%rbp),%rax
444	mulq	%r11
445	addq	%rax,%r15
446	adcq	$0,%rdx
447	pxor	%xmm9,%xmm5
448	pxor	%xmm8,%xmm4
449	movdqa	%xmm8,0+80(%rbp)
450	movdqa	%xmm7,%xmm8
451	psrld	$25,%xmm8
452	pslld	$32-25,%xmm7
453	pxor	%xmm8,%xmm7
454	movdqa	%xmm6,%xmm8
455	psrld	$25,%xmm8
456	pslld	$32-25,%xmm6
457	pxor	%xmm8,%xmm6
458	movdqa	%xmm5,%xmm8
459	psrld	$25,%xmm8
460	pslld	$32-25,%xmm5
461	pxor	%xmm8,%xmm5
462	movdqa	%xmm4,%xmm8
463	psrld	$25,%xmm8
464	pslld	$32-25,%xmm4
465	pxor	%xmm8,%xmm4
466	movdqa	0+80(%rbp),%xmm8
467	imulq	%r12,%r9
468	addq	%r10,%r15
469	adcq	%rdx,%r9
470.byte	102,15,58,15,255,4
471.byte	102,69,15,58,15,219,8
472.byte	102,69,15,58,15,255,12
473.byte	102,15,58,15,246,4
474.byte	102,69,15,58,15,210,8
475.byte	102,69,15,58,15,246,12
476.byte	102,15,58,15,237,4
477.byte	102,69,15,58,15,201,8
478.byte	102,69,15,58,15,237,12
479.byte	102,15,58,15,228,4
480.byte	102,69,15,58,15,192,8
481.byte	102,69,15,58,15,228,12
482	movdqa	%xmm8,0+80(%rbp)
483	movdqa	L$rol16(%rip),%xmm8
484	paddd	%xmm7,%xmm3
485	paddd	%xmm6,%xmm2
486	paddd	%xmm5,%xmm1
487	paddd	%xmm4,%xmm0
488	pxor	%xmm3,%xmm15
489	pxor	%xmm2,%xmm14
490	movq	%r13,%r10
491	movq	%r14,%r11
492	movq	%r15,%r12
493	andq	$3,%r12
494	movq	%r15,%r13
495	andq	$-4,%r13
496	movq	%r9,%r14
497	shrdq	$2,%r9,%r15
498	shrq	$2,%r9
499	addq	%r13,%r15
500	adcq	%r14,%r9
501	addq	%r15,%r10
502	adcq	%r9,%r11
503	adcq	$0,%r12
504	pxor	%xmm1,%xmm13
505	pxor	%xmm0,%xmm12
506.byte	102,69,15,56,0,248
507.byte	102,69,15,56,0,240
508.byte	102,69,15,56,0,232
509.byte	102,69,15,56,0,224
510	movdqa	0+80(%rbp),%xmm8
511	paddd	%xmm15,%xmm11
512	paddd	%xmm14,%xmm10
513	paddd	%xmm13,%xmm9
514	paddd	%xmm12,%xmm8
515	pxor	%xmm11,%xmm7
516	pxor	%xmm10,%xmm6
517	pxor	%xmm9,%xmm5
518	pxor	%xmm8,%xmm4
519	movdqa	%xmm8,0+80(%rbp)
520	movdqa	%xmm7,%xmm8
521	psrld	$20,%xmm8
522	pslld	$32-20,%xmm7
523	pxor	%xmm8,%xmm7
524	movdqa	%xmm6,%xmm8
525	psrld	$20,%xmm8
526	pslld	$32-20,%xmm6
527	pxor	%xmm8,%xmm6
528	movdqa	%xmm5,%xmm8
529	psrld	$20,%xmm8
530	pslld	$32-20,%xmm5
531	pxor	%xmm8,%xmm5
532	movdqa	%xmm4,%xmm8
533	psrld	$20,%xmm8
534	pslld	$32-20,%xmm4
535	pxor	%xmm8,%xmm4
536	movdqa	L$rol8(%rip),%xmm8
537	paddd	%xmm7,%xmm3
538	paddd	%xmm6,%xmm2
539	paddd	%xmm5,%xmm1
540	paddd	%xmm4,%xmm0
541	pxor	%xmm3,%xmm15
542	pxor	%xmm2,%xmm14
543	pxor	%xmm1,%xmm13
544	pxor	%xmm0,%xmm12
545.byte	102,69,15,56,0,248
546.byte	102,69,15,56,0,240
547.byte	102,69,15,56,0,232
548.byte	102,69,15,56,0,224
549	movdqa	0+80(%rbp),%xmm8
550	paddd	%xmm15,%xmm11
551	paddd	%xmm14,%xmm10
552	paddd	%xmm13,%xmm9
553	paddd	%xmm12,%xmm8
554	pxor	%xmm11,%xmm7
555	pxor	%xmm10,%xmm6
556	pxor	%xmm9,%xmm5
557	pxor	%xmm8,%xmm4
558	movdqa	%xmm8,0+80(%rbp)
559	movdqa	%xmm7,%xmm8
560	psrld	$25,%xmm8
561	pslld	$32-25,%xmm7
562	pxor	%xmm8,%xmm7
563	movdqa	%xmm6,%xmm8
564	psrld	$25,%xmm8
565	pslld	$32-25,%xmm6
566	pxor	%xmm8,%xmm6
567	movdqa	%xmm5,%xmm8
568	psrld	$25,%xmm8
569	pslld	$32-25,%xmm5
570	pxor	%xmm8,%xmm5
571	movdqa	%xmm4,%xmm8
572	psrld	$25,%xmm8
573	pslld	$32-25,%xmm4
574	pxor	%xmm8,%xmm4
575	movdqa	0+80(%rbp),%xmm8
576.byte	102,15,58,15,255,12
577.byte	102,69,15,58,15,219,8
578.byte	102,69,15,58,15,255,4
579.byte	102,15,58,15,246,12
580.byte	102,69,15,58,15,210,8
581.byte	102,69,15,58,15,246,4
582.byte	102,15,58,15,237,12
583.byte	102,69,15,58,15,201,8
584.byte	102,69,15,58,15,237,4
585.byte	102,15,58,15,228,12
586.byte	102,69,15,58,15,192,8
587.byte	102,69,15,58,15,228,4
588
589	decq	%rcx
590	jge	L$open_sse_main_loop_rounds
591	addq	0+0(%r8),%r10
592	adcq	8+0(%r8),%r11
593	adcq	$1,%r12
594	movq	0+0+0(%rbp),%rax
595	movq	%rax,%r15
596	mulq	%r10
597	movq	%rax,%r13
598	movq	%rdx,%r14
599	movq	0+0+0(%rbp),%rax
600	mulq	%r11
601	imulq	%r12,%r15
602	addq	%rax,%r14
603	adcq	%rdx,%r15
604	movq	8+0+0(%rbp),%rax
605	movq	%rax,%r9
606	mulq	%r10
607	addq	%rax,%r14
608	adcq	$0,%rdx
609	movq	%rdx,%r10
610	movq	8+0+0(%rbp),%rax
611	mulq	%r11
612	addq	%rax,%r15
613	adcq	$0,%rdx
614	imulq	%r12,%r9
615	addq	%r10,%r15
616	adcq	%rdx,%r9
617	movq	%r13,%r10
618	movq	%r14,%r11
619	movq	%r15,%r12
620	andq	$3,%r12
621	movq	%r15,%r13
622	andq	$-4,%r13
623	movq	%r9,%r14
624	shrdq	$2,%r9,%r15
625	shrq	$2,%r9
626	addq	%r13,%r15
627	adcq	%r14,%r9
628	addq	%r15,%r10
629	adcq	%r9,%r11
630	adcq	$0,%r12
631
632	leaq	16(%r8),%r8
633	cmpq	$-6,%rcx
634	jg	L$open_sse_main_loop_rounds
635	paddd	L$chacha20_consts(%rip),%xmm3
636	paddd	0+48(%rbp),%xmm7
637	paddd	0+64(%rbp),%xmm11
638	paddd	0+144(%rbp),%xmm15
639	paddd	L$chacha20_consts(%rip),%xmm2
640	paddd	0+48(%rbp),%xmm6
641	paddd	0+64(%rbp),%xmm10
642	paddd	0+128(%rbp),%xmm14
643	paddd	L$chacha20_consts(%rip),%xmm1
644	paddd	0+48(%rbp),%xmm5
645	paddd	0+64(%rbp),%xmm9
646	paddd	0+112(%rbp),%xmm13
647	paddd	L$chacha20_consts(%rip),%xmm0
648	paddd	0+48(%rbp),%xmm4
649	paddd	0+64(%rbp),%xmm8
650	paddd	0+96(%rbp),%xmm12
651	movdqa	%xmm12,0+80(%rbp)
652	movdqu	0 + 0(%rsi),%xmm12
653	pxor	%xmm3,%xmm12
654	movdqu	%xmm12,0 + 0(%rdi)
655	movdqu	16 + 0(%rsi),%xmm12
656	pxor	%xmm7,%xmm12
657	movdqu	%xmm12,16 + 0(%rdi)
658	movdqu	32 + 0(%rsi),%xmm12
659	pxor	%xmm11,%xmm12
660	movdqu	%xmm12,32 + 0(%rdi)
661	movdqu	48 + 0(%rsi),%xmm12
662	pxor	%xmm15,%xmm12
663	movdqu	%xmm12,48 + 0(%rdi)
664	movdqu	0 + 64(%rsi),%xmm3
665	movdqu	16 + 64(%rsi),%xmm7
666	movdqu	32 + 64(%rsi),%xmm11
667	movdqu	48 + 64(%rsi),%xmm15
668	pxor	%xmm3,%xmm2
669	pxor	%xmm7,%xmm6
670	pxor	%xmm11,%xmm10
671	pxor	%xmm14,%xmm15
672	movdqu	%xmm2,0 + 64(%rdi)
673	movdqu	%xmm6,16 + 64(%rdi)
674	movdqu	%xmm10,32 + 64(%rdi)
675	movdqu	%xmm15,48 + 64(%rdi)
676	movdqu	0 + 128(%rsi),%xmm3
677	movdqu	16 + 128(%rsi),%xmm7
678	movdqu	32 + 128(%rsi),%xmm11
679	movdqu	48 + 128(%rsi),%xmm15
680	pxor	%xmm3,%xmm1
681	pxor	%xmm7,%xmm5
682	pxor	%xmm11,%xmm9
683	pxor	%xmm13,%xmm15
684	movdqu	%xmm1,0 + 128(%rdi)
685	movdqu	%xmm5,16 + 128(%rdi)
686	movdqu	%xmm9,32 + 128(%rdi)
687	movdqu	%xmm15,48 + 128(%rdi)
688	movdqu	0 + 192(%rsi),%xmm3
689	movdqu	16 + 192(%rsi),%xmm7
690	movdqu	32 + 192(%rsi),%xmm11
691	movdqu	48 + 192(%rsi),%xmm15
692	pxor	%xmm3,%xmm0
693	pxor	%xmm7,%xmm4
694	pxor	%xmm11,%xmm8
695	pxor	0+80(%rbp),%xmm15
696	movdqu	%xmm0,0 + 192(%rdi)
697	movdqu	%xmm4,16 + 192(%rdi)
698	movdqu	%xmm8,32 + 192(%rdi)
699	movdqu	%xmm15,48 + 192(%rdi)
700
701	leaq	256(%rsi),%rsi
702	leaq	256(%rdi),%rdi
703	subq	$256,%rbx
704	jmp	L$open_sse_main_loop
705L$open_sse_tail:
706
707	testq	%rbx,%rbx
708	jz	L$open_sse_finalize
709	cmpq	$192,%rbx
710	ja	L$open_sse_tail_256
711	cmpq	$128,%rbx
712	ja	L$open_sse_tail_192
713	cmpq	$64,%rbx
714	ja	L$open_sse_tail_128
715	movdqa	L$chacha20_consts(%rip),%xmm0
716	movdqa	0+48(%rbp),%xmm4
717	movdqa	0+64(%rbp),%xmm8
718	movdqa	0+96(%rbp),%xmm12
719	paddd	L$sse_inc(%rip),%xmm12
720	movdqa	%xmm12,0+96(%rbp)
721
722	xorq	%r8,%r8
723	movq	%rbx,%rcx
724	cmpq	$16,%rcx
725	jb	L$open_sse_tail_64_rounds
726L$open_sse_tail_64_rounds_and_x1hash:
727	addq	0+0(%rsi,%r8,1),%r10
728	adcq	8+0(%rsi,%r8,1),%r11
729	adcq	$1,%r12
730	movq	0+0+0(%rbp),%rax
731	movq	%rax,%r15
732	mulq	%r10
733	movq	%rax,%r13
734	movq	%rdx,%r14
735	movq	0+0+0(%rbp),%rax
736	mulq	%r11
737	imulq	%r12,%r15
738	addq	%rax,%r14
739	adcq	%rdx,%r15
740	movq	8+0+0(%rbp),%rax
741	movq	%rax,%r9
742	mulq	%r10
743	addq	%rax,%r14
744	adcq	$0,%rdx
745	movq	%rdx,%r10
746	movq	8+0+0(%rbp),%rax
747	mulq	%r11
748	addq	%rax,%r15
749	adcq	$0,%rdx
750	imulq	%r12,%r9
751	addq	%r10,%r15
752	adcq	%rdx,%r9
753	movq	%r13,%r10
754	movq	%r14,%r11
755	movq	%r15,%r12
756	andq	$3,%r12
757	movq	%r15,%r13
758	andq	$-4,%r13
759	movq	%r9,%r14
760	shrdq	$2,%r9,%r15
761	shrq	$2,%r9
762	addq	%r13,%r15
763	adcq	%r14,%r9
764	addq	%r15,%r10
765	adcq	%r9,%r11
766	adcq	$0,%r12
767
768	subq	$16,%rcx
769L$open_sse_tail_64_rounds:
770	addq	$16,%r8
771	paddd	%xmm4,%xmm0
772	pxor	%xmm0,%xmm12
773	pshufb	L$rol16(%rip),%xmm12
774	paddd	%xmm12,%xmm8
775	pxor	%xmm8,%xmm4
776	movdqa	%xmm4,%xmm3
777	pslld	$12,%xmm3
778	psrld	$20,%xmm4
779	pxor	%xmm3,%xmm4
780	paddd	%xmm4,%xmm0
781	pxor	%xmm0,%xmm12
782	pshufb	L$rol8(%rip),%xmm12
783	paddd	%xmm12,%xmm8
784	pxor	%xmm8,%xmm4
785	movdqa	%xmm4,%xmm3
786	pslld	$7,%xmm3
787	psrld	$25,%xmm4
788	pxor	%xmm3,%xmm4
789.byte	102,15,58,15,228,4
790.byte	102,69,15,58,15,192,8
791.byte	102,69,15,58,15,228,12
792	paddd	%xmm4,%xmm0
793	pxor	%xmm0,%xmm12
794	pshufb	L$rol16(%rip),%xmm12
795	paddd	%xmm12,%xmm8
796	pxor	%xmm8,%xmm4
797	movdqa	%xmm4,%xmm3
798	pslld	$12,%xmm3
799	psrld	$20,%xmm4
800	pxor	%xmm3,%xmm4
801	paddd	%xmm4,%xmm0
802	pxor	%xmm0,%xmm12
803	pshufb	L$rol8(%rip),%xmm12
804	paddd	%xmm12,%xmm8
805	pxor	%xmm8,%xmm4
806	movdqa	%xmm4,%xmm3
807	pslld	$7,%xmm3
808	psrld	$25,%xmm4
809	pxor	%xmm3,%xmm4
810.byte	102,15,58,15,228,12
811.byte	102,69,15,58,15,192,8
812.byte	102,69,15,58,15,228,4
813
814	cmpq	$16,%rcx
815	jae	L$open_sse_tail_64_rounds_and_x1hash
816	cmpq	$160,%r8
817	jne	L$open_sse_tail_64_rounds
818	paddd	L$chacha20_consts(%rip),%xmm0
819	paddd	0+48(%rbp),%xmm4
820	paddd	0+64(%rbp),%xmm8
821	paddd	0+96(%rbp),%xmm12
822
823	jmp	L$open_sse_tail_64_dec_loop
824
825L$open_sse_tail_128:
826	movdqa	L$chacha20_consts(%rip),%xmm0
827	movdqa	0+48(%rbp),%xmm4
828	movdqa	0+64(%rbp),%xmm8
829	movdqa	%xmm0,%xmm1
830	movdqa	%xmm4,%xmm5
831	movdqa	%xmm8,%xmm9
832	movdqa	0+96(%rbp),%xmm13
833	paddd	L$sse_inc(%rip),%xmm13
834	movdqa	%xmm13,%xmm12
835	paddd	L$sse_inc(%rip),%xmm12
836	movdqa	%xmm12,0+96(%rbp)
837	movdqa	%xmm13,0+112(%rbp)
838
839	movq	%rbx,%rcx
840	andq	$-16,%rcx
841	xorq	%r8,%r8
842L$open_sse_tail_128_rounds_and_x1hash:
843	addq	0+0(%rsi,%r8,1),%r10
844	adcq	8+0(%rsi,%r8,1),%r11
845	adcq	$1,%r12
846	movq	0+0+0(%rbp),%rax
847	movq	%rax,%r15
848	mulq	%r10
849	movq	%rax,%r13
850	movq	%rdx,%r14
851	movq	0+0+0(%rbp),%rax
852	mulq	%r11
853	imulq	%r12,%r15
854	addq	%rax,%r14
855	adcq	%rdx,%r15
856	movq	8+0+0(%rbp),%rax
857	movq	%rax,%r9
858	mulq	%r10
859	addq	%rax,%r14
860	adcq	$0,%rdx
861	movq	%rdx,%r10
862	movq	8+0+0(%rbp),%rax
863	mulq	%r11
864	addq	%rax,%r15
865	adcq	$0,%rdx
866	imulq	%r12,%r9
867	addq	%r10,%r15
868	adcq	%rdx,%r9
869	movq	%r13,%r10
870	movq	%r14,%r11
871	movq	%r15,%r12
872	andq	$3,%r12
873	movq	%r15,%r13
874	andq	$-4,%r13
875	movq	%r9,%r14
876	shrdq	$2,%r9,%r15
877	shrq	$2,%r9
878	addq	%r13,%r15
879	adcq	%r14,%r9
880	addq	%r15,%r10
881	adcq	%r9,%r11
882	adcq	$0,%r12
883
884L$open_sse_tail_128_rounds:
885	addq	$16,%r8
886	paddd	%xmm4,%xmm0
887	pxor	%xmm0,%xmm12
888	pshufb	L$rol16(%rip),%xmm12
889	paddd	%xmm12,%xmm8
890	pxor	%xmm8,%xmm4
891	movdqa	%xmm4,%xmm3
892	pslld	$12,%xmm3
893	psrld	$20,%xmm4
894	pxor	%xmm3,%xmm4
895	paddd	%xmm4,%xmm0
896	pxor	%xmm0,%xmm12
897	pshufb	L$rol8(%rip),%xmm12
898	paddd	%xmm12,%xmm8
899	pxor	%xmm8,%xmm4
900	movdqa	%xmm4,%xmm3
901	pslld	$7,%xmm3
902	psrld	$25,%xmm4
903	pxor	%xmm3,%xmm4
904.byte	102,15,58,15,228,4
905.byte	102,69,15,58,15,192,8
906.byte	102,69,15,58,15,228,12
907	paddd	%xmm5,%xmm1
908	pxor	%xmm1,%xmm13
909	pshufb	L$rol16(%rip),%xmm13
910	paddd	%xmm13,%xmm9
911	pxor	%xmm9,%xmm5
912	movdqa	%xmm5,%xmm3
913	pslld	$12,%xmm3
914	psrld	$20,%xmm5
915	pxor	%xmm3,%xmm5
916	paddd	%xmm5,%xmm1
917	pxor	%xmm1,%xmm13
918	pshufb	L$rol8(%rip),%xmm13
919	paddd	%xmm13,%xmm9
920	pxor	%xmm9,%xmm5
921	movdqa	%xmm5,%xmm3
922	pslld	$7,%xmm3
923	psrld	$25,%xmm5
924	pxor	%xmm3,%xmm5
925.byte	102,15,58,15,237,4
926.byte	102,69,15,58,15,201,8
927.byte	102,69,15,58,15,237,12
928	paddd	%xmm4,%xmm0
929	pxor	%xmm0,%xmm12
930	pshufb	L$rol16(%rip),%xmm12
931	paddd	%xmm12,%xmm8
932	pxor	%xmm8,%xmm4
933	movdqa	%xmm4,%xmm3
934	pslld	$12,%xmm3
935	psrld	$20,%xmm4
936	pxor	%xmm3,%xmm4
937	paddd	%xmm4,%xmm0
938	pxor	%xmm0,%xmm12
939	pshufb	L$rol8(%rip),%xmm12
940	paddd	%xmm12,%xmm8
941	pxor	%xmm8,%xmm4
942	movdqa	%xmm4,%xmm3
943	pslld	$7,%xmm3
944	psrld	$25,%xmm4
945	pxor	%xmm3,%xmm4
946.byte	102,15,58,15,228,12
947.byte	102,69,15,58,15,192,8
948.byte	102,69,15,58,15,228,4
949	paddd	%xmm5,%xmm1
950	pxor	%xmm1,%xmm13
951	pshufb	L$rol16(%rip),%xmm13
952	paddd	%xmm13,%xmm9
953	pxor	%xmm9,%xmm5
954	movdqa	%xmm5,%xmm3
955	pslld	$12,%xmm3
956	psrld	$20,%xmm5
957	pxor	%xmm3,%xmm5
958	paddd	%xmm5,%xmm1
959	pxor	%xmm1,%xmm13
960	pshufb	L$rol8(%rip),%xmm13
961	paddd	%xmm13,%xmm9
962	pxor	%xmm9,%xmm5
963	movdqa	%xmm5,%xmm3
964	pslld	$7,%xmm3
965	psrld	$25,%xmm5
966	pxor	%xmm3,%xmm5
967.byte	102,15,58,15,237,12
968.byte	102,69,15,58,15,201,8
969.byte	102,69,15,58,15,237,4
970
971	cmpq	%rcx,%r8
972	jb	L$open_sse_tail_128_rounds_and_x1hash
973	cmpq	$160,%r8
974	jne	L$open_sse_tail_128_rounds
975	paddd	L$chacha20_consts(%rip),%xmm1
976	paddd	0+48(%rbp),%xmm5
977	paddd	0+64(%rbp),%xmm9
978	paddd	0+112(%rbp),%xmm13
979	paddd	L$chacha20_consts(%rip),%xmm0
980	paddd	0+48(%rbp),%xmm4
981	paddd	0+64(%rbp),%xmm8
982	paddd	0+96(%rbp),%xmm12
983	movdqu	0 + 0(%rsi),%xmm3
984	movdqu	16 + 0(%rsi),%xmm7
985	movdqu	32 + 0(%rsi),%xmm11
986	movdqu	48 + 0(%rsi),%xmm15
987	pxor	%xmm3,%xmm1
988	pxor	%xmm7,%xmm5
989	pxor	%xmm11,%xmm9
990	pxor	%xmm13,%xmm15
991	movdqu	%xmm1,0 + 0(%rdi)
992	movdqu	%xmm5,16 + 0(%rdi)
993	movdqu	%xmm9,32 + 0(%rdi)
994	movdqu	%xmm15,48 + 0(%rdi)
995
996	subq	$64,%rbx
997	leaq	64(%rsi),%rsi
998	leaq	64(%rdi),%rdi
999	jmp	L$open_sse_tail_64_dec_loop
1000
1001L$open_sse_tail_192:
1002	movdqa	L$chacha20_consts(%rip),%xmm0
1003	movdqa	0+48(%rbp),%xmm4
1004	movdqa	0+64(%rbp),%xmm8
1005	movdqa	%xmm0,%xmm1
1006	movdqa	%xmm4,%xmm5
1007	movdqa	%xmm8,%xmm9
1008	movdqa	%xmm0,%xmm2
1009	movdqa	%xmm4,%xmm6
1010	movdqa	%xmm8,%xmm10
1011	movdqa	0+96(%rbp),%xmm14
1012	paddd	L$sse_inc(%rip),%xmm14
1013	movdqa	%xmm14,%xmm13
1014	paddd	L$sse_inc(%rip),%xmm13
1015	movdqa	%xmm13,%xmm12
1016	paddd	L$sse_inc(%rip),%xmm12
1017	movdqa	%xmm12,0+96(%rbp)
1018	movdqa	%xmm13,0+112(%rbp)
1019	movdqa	%xmm14,0+128(%rbp)
1020
1021	movq	%rbx,%rcx
1022	movq	$160,%r8
1023	cmpq	$160,%rcx
1024	cmovgq	%r8,%rcx
1025	andq	$-16,%rcx
1026	xorq	%r8,%r8
1027L$open_sse_tail_192_rounds_and_x1hash:
1028	addq	0+0(%rsi,%r8,1),%r10
1029	adcq	8+0(%rsi,%r8,1),%r11
1030	adcq	$1,%r12
1031	movq	0+0+0(%rbp),%rax
1032	movq	%rax,%r15
1033	mulq	%r10
1034	movq	%rax,%r13
1035	movq	%rdx,%r14
1036	movq	0+0+0(%rbp),%rax
1037	mulq	%r11
1038	imulq	%r12,%r15
1039	addq	%rax,%r14
1040	adcq	%rdx,%r15
1041	movq	8+0+0(%rbp),%rax
1042	movq	%rax,%r9
1043	mulq	%r10
1044	addq	%rax,%r14
1045	adcq	$0,%rdx
1046	movq	%rdx,%r10
1047	movq	8+0+0(%rbp),%rax
1048	mulq	%r11
1049	addq	%rax,%r15
1050	adcq	$0,%rdx
1051	imulq	%r12,%r9
1052	addq	%r10,%r15
1053	adcq	%rdx,%r9
1054	movq	%r13,%r10
1055	movq	%r14,%r11
1056	movq	%r15,%r12
1057	andq	$3,%r12
1058	movq	%r15,%r13
1059	andq	$-4,%r13
1060	movq	%r9,%r14
1061	shrdq	$2,%r9,%r15
1062	shrq	$2,%r9
1063	addq	%r13,%r15
1064	adcq	%r14,%r9
1065	addq	%r15,%r10
1066	adcq	%r9,%r11
1067	adcq	$0,%r12
1068
1069L$open_sse_tail_192_rounds:
1070	addq	$16,%r8
1071	paddd	%xmm4,%xmm0
1072	pxor	%xmm0,%xmm12
1073	pshufb	L$rol16(%rip),%xmm12
1074	paddd	%xmm12,%xmm8
1075	pxor	%xmm8,%xmm4
1076	movdqa	%xmm4,%xmm3
1077	pslld	$12,%xmm3
1078	psrld	$20,%xmm4
1079	pxor	%xmm3,%xmm4
1080	paddd	%xmm4,%xmm0
1081	pxor	%xmm0,%xmm12
1082	pshufb	L$rol8(%rip),%xmm12
1083	paddd	%xmm12,%xmm8
1084	pxor	%xmm8,%xmm4
1085	movdqa	%xmm4,%xmm3
1086	pslld	$7,%xmm3
1087	psrld	$25,%xmm4
1088	pxor	%xmm3,%xmm4
1089.byte	102,15,58,15,228,4
1090.byte	102,69,15,58,15,192,8
1091.byte	102,69,15,58,15,228,12
1092	paddd	%xmm5,%xmm1
1093	pxor	%xmm1,%xmm13
1094	pshufb	L$rol16(%rip),%xmm13
1095	paddd	%xmm13,%xmm9
1096	pxor	%xmm9,%xmm5
1097	movdqa	%xmm5,%xmm3
1098	pslld	$12,%xmm3
1099	psrld	$20,%xmm5
1100	pxor	%xmm3,%xmm5
1101	paddd	%xmm5,%xmm1
1102	pxor	%xmm1,%xmm13
1103	pshufb	L$rol8(%rip),%xmm13
1104	paddd	%xmm13,%xmm9
1105	pxor	%xmm9,%xmm5
1106	movdqa	%xmm5,%xmm3
1107	pslld	$7,%xmm3
1108	psrld	$25,%xmm5
1109	pxor	%xmm3,%xmm5
1110.byte	102,15,58,15,237,4
1111.byte	102,69,15,58,15,201,8
1112.byte	102,69,15,58,15,237,12
1113	paddd	%xmm6,%xmm2
1114	pxor	%xmm2,%xmm14
1115	pshufb	L$rol16(%rip),%xmm14
1116	paddd	%xmm14,%xmm10
1117	pxor	%xmm10,%xmm6
1118	movdqa	%xmm6,%xmm3
1119	pslld	$12,%xmm3
1120	psrld	$20,%xmm6
1121	pxor	%xmm3,%xmm6
1122	paddd	%xmm6,%xmm2
1123	pxor	%xmm2,%xmm14
1124	pshufb	L$rol8(%rip),%xmm14
1125	paddd	%xmm14,%xmm10
1126	pxor	%xmm10,%xmm6
1127	movdqa	%xmm6,%xmm3
1128	pslld	$7,%xmm3
1129	psrld	$25,%xmm6
1130	pxor	%xmm3,%xmm6
1131.byte	102,15,58,15,246,4
1132.byte	102,69,15,58,15,210,8
1133.byte	102,69,15,58,15,246,12
1134	paddd	%xmm4,%xmm0
1135	pxor	%xmm0,%xmm12
1136	pshufb	L$rol16(%rip),%xmm12
1137	paddd	%xmm12,%xmm8
1138	pxor	%xmm8,%xmm4
1139	movdqa	%xmm4,%xmm3
1140	pslld	$12,%xmm3
1141	psrld	$20,%xmm4
1142	pxor	%xmm3,%xmm4
1143	paddd	%xmm4,%xmm0
1144	pxor	%xmm0,%xmm12
1145	pshufb	L$rol8(%rip),%xmm12
1146	paddd	%xmm12,%xmm8
1147	pxor	%xmm8,%xmm4
1148	movdqa	%xmm4,%xmm3
1149	pslld	$7,%xmm3
1150	psrld	$25,%xmm4
1151	pxor	%xmm3,%xmm4
1152.byte	102,15,58,15,228,12
1153.byte	102,69,15,58,15,192,8
1154.byte	102,69,15,58,15,228,4
1155	paddd	%xmm5,%xmm1
1156	pxor	%xmm1,%xmm13
1157	pshufb	L$rol16(%rip),%xmm13
1158	paddd	%xmm13,%xmm9
1159	pxor	%xmm9,%xmm5
1160	movdqa	%xmm5,%xmm3
1161	pslld	$12,%xmm3
1162	psrld	$20,%xmm5
1163	pxor	%xmm3,%xmm5
1164	paddd	%xmm5,%xmm1
1165	pxor	%xmm1,%xmm13
1166	pshufb	L$rol8(%rip),%xmm13
1167	paddd	%xmm13,%xmm9
1168	pxor	%xmm9,%xmm5
1169	movdqa	%xmm5,%xmm3
1170	pslld	$7,%xmm3
1171	psrld	$25,%xmm5
1172	pxor	%xmm3,%xmm5
1173.byte	102,15,58,15,237,12
1174.byte	102,69,15,58,15,201,8
1175.byte	102,69,15,58,15,237,4
1176	paddd	%xmm6,%xmm2
1177	pxor	%xmm2,%xmm14
1178	pshufb	L$rol16(%rip),%xmm14
1179	paddd	%xmm14,%xmm10
1180	pxor	%xmm10,%xmm6
1181	movdqa	%xmm6,%xmm3
1182	pslld	$12,%xmm3
1183	psrld	$20,%xmm6
1184	pxor	%xmm3,%xmm6
1185	paddd	%xmm6,%xmm2
1186	pxor	%xmm2,%xmm14
1187	pshufb	L$rol8(%rip),%xmm14
1188	paddd	%xmm14,%xmm10
1189	pxor	%xmm10,%xmm6
1190	movdqa	%xmm6,%xmm3
1191	pslld	$7,%xmm3
1192	psrld	$25,%xmm6
1193	pxor	%xmm3,%xmm6
1194.byte	102,15,58,15,246,12
1195.byte	102,69,15,58,15,210,8
1196.byte	102,69,15,58,15,246,4
1197
1198	cmpq	%rcx,%r8
1199	jb	L$open_sse_tail_192_rounds_and_x1hash
1200	cmpq	$160,%r8
1201	jne	L$open_sse_tail_192_rounds
1202	cmpq	$176,%rbx
1203	jb	L$open_sse_tail_192_finish
1204	addq	0+160(%rsi),%r10
1205	adcq	8+160(%rsi),%r11
1206	adcq	$1,%r12
1207	movq	0+0+0(%rbp),%rax
1208	movq	%rax,%r15
1209	mulq	%r10
1210	movq	%rax,%r13
1211	movq	%rdx,%r14
1212	movq	0+0+0(%rbp),%rax
1213	mulq	%r11
1214	imulq	%r12,%r15
1215	addq	%rax,%r14
1216	adcq	%rdx,%r15
1217	movq	8+0+0(%rbp),%rax
1218	movq	%rax,%r9
1219	mulq	%r10
1220	addq	%rax,%r14
1221	adcq	$0,%rdx
1222	movq	%rdx,%r10
1223	movq	8+0+0(%rbp),%rax
1224	mulq	%r11
1225	addq	%rax,%r15
1226	adcq	$0,%rdx
1227	imulq	%r12,%r9
1228	addq	%r10,%r15
1229	adcq	%rdx,%r9
1230	movq	%r13,%r10
1231	movq	%r14,%r11
1232	movq	%r15,%r12
1233	andq	$3,%r12
1234	movq	%r15,%r13
1235	andq	$-4,%r13
1236	movq	%r9,%r14
1237	shrdq	$2,%r9,%r15
1238	shrq	$2,%r9
1239	addq	%r13,%r15
1240	adcq	%r14,%r9
1241	addq	%r15,%r10
1242	adcq	%r9,%r11
1243	adcq	$0,%r12
1244
1245	cmpq	$192,%rbx
1246	jb	L$open_sse_tail_192_finish
1247	addq	0+176(%rsi),%r10
1248	adcq	8+176(%rsi),%r11
1249	adcq	$1,%r12
1250	movq	0+0+0(%rbp),%rax
1251	movq	%rax,%r15
1252	mulq	%r10
1253	movq	%rax,%r13
1254	movq	%rdx,%r14
1255	movq	0+0+0(%rbp),%rax
1256	mulq	%r11
1257	imulq	%r12,%r15
1258	addq	%rax,%r14
1259	adcq	%rdx,%r15
1260	movq	8+0+0(%rbp),%rax
1261	movq	%rax,%r9
1262	mulq	%r10
1263	addq	%rax,%r14
1264	adcq	$0,%rdx
1265	movq	%rdx,%r10
1266	movq	8+0+0(%rbp),%rax
1267	mulq	%r11
1268	addq	%rax,%r15
1269	adcq	$0,%rdx
1270	imulq	%r12,%r9
1271	addq	%r10,%r15
1272	adcq	%rdx,%r9
1273	movq	%r13,%r10
1274	movq	%r14,%r11
1275	movq	%r15,%r12
1276	andq	$3,%r12
1277	movq	%r15,%r13
1278	andq	$-4,%r13
1279	movq	%r9,%r14
1280	shrdq	$2,%r9,%r15
1281	shrq	$2,%r9
1282	addq	%r13,%r15
1283	adcq	%r14,%r9
1284	addq	%r15,%r10
1285	adcq	%r9,%r11
1286	adcq	$0,%r12
1287
1288L$open_sse_tail_192_finish:
1289	paddd	L$chacha20_consts(%rip),%xmm2
1290	paddd	0+48(%rbp),%xmm6
1291	paddd	0+64(%rbp),%xmm10
1292	paddd	0+128(%rbp),%xmm14
1293	paddd	L$chacha20_consts(%rip),%xmm1
1294	paddd	0+48(%rbp),%xmm5
1295	paddd	0+64(%rbp),%xmm9
1296	paddd	0+112(%rbp),%xmm13
1297	paddd	L$chacha20_consts(%rip),%xmm0
1298	paddd	0+48(%rbp),%xmm4
1299	paddd	0+64(%rbp),%xmm8
1300	paddd	0+96(%rbp),%xmm12
1301	movdqu	0 + 0(%rsi),%xmm3
1302	movdqu	16 + 0(%rsi),%xmm7
1303	movdqu	32 + 0(%rsi),%xmm11
1304	movdqu	48 + 0(%rsi),%xmm15
1305	pxor	%xmm3,%xmm2
1306	pxor	%xmm7,%xmm6
1307	pxor	%xmm11,%xmm10
1308	pxor	%xmm14,%xmm15
1309	movdqu	%xmm2,0 + 0(%rdi)
1310	movdqu	%xmm6,16 + 0(%rdi)
1311	movdqu	%xmm10,32 + 0(%rdi)
1312	movdqu	%xmm15,48 + 0(%rdi)
1313	movdqu	0 + 64(%rsi),%xmm3
1314	movdqu	16 + 64(%rsi),%xmm7
1315	movdqu	32 + 64(%rsi),%xmm11
1316	movdqu	48 + 64(%rsi),%xmm15
1317	pxor	%xmm3,%xmm1
1318	pxor	%xmm7,%xmm5
1319	pxor	%xmm11,%xmm9
1320	pxor	%xmm13,%xmm15
1321	movdqu	%xmm1,0 + 64(%rdi)
1322	movdqu	%xmm5,16 + 64(%rdi)
1323	movdqu	%xmm9,32 + 64(%rdi)
1324	movdqu	%xmm15,48 + 64(%rdi)
1325
1326	subq	$128,%rbx
1327	leaq	128(%rsi),%rsi
1328	leaq	128(%rdi),%rdi
1329	jmp	L$open_sse_tail_64_dec_loop
1330
1331L$open_sse_tail_256:
1332	movdqa	L$chacha20_consts(%rip),%xmm0
1333	movdqa	0+48(%rbp),%xmm4
1334	movdqa	0+64(%rbp),%xmm8
1335	movdqa	%xmm0,%xmm1
1336	movdqa	%xmm4,%xmm5
1337	movdqa	%xmm8,%xmm9
1338	movdqa	%xmm0,%xmm2
1339	movdqa	%xmm4,%xmm6
1340	movdqa	%xmm8,%xmm10
1341	movdqa	%xmm0,%xmm3
1342	movdqa	%xmm4,%xmm7
1343	movdqa	%xmm8,%xmm11
1344	movdqa	0+96(%rbp),%xmm15
1345	paddd	L$sse_inc(%rip),%xmm15
1346	movdqa	%xmm15,%xmm14
1347	paddd	L$sse_inc(%rip),%xmm14
1348	movdqa	%xmm14,%xmm13
1349	paddd	L$sse_inc(%rip),%xmm13
1350	movdqa	%xmm13,%xmm12
1351	paddd	L$sse_inc(%rip),%xmm12
1352	movdqa	%xmm12,0+96(%rbp)
1353	movdqa	%xmm13,0+112(%rbp)
1354	movdqa	%xmm14,0+128(%rbp)
1355	movdqa	%xmm15,0+144(%rbp)
1356
1357	xorq	%r8,%r8
1358L$open_sse_tail_256_rounds_and_x1hash:
1359	addq	0+0(%rsi,%r8,1),%r10
1360	adcq	8+0(%rsi,%r8,1),%r11
1361	adcq	$1,%r12
1362	movdqa	%xmm11,0+80(%rbp)
1363	paddd	%xmm4,%xmm0
1364	pxor	%xmm0,%xmm12
1365	pshufb	L$rol16(%rip),%xmm12
1366	paddd	%xmm12,%xmm8
1367	pxor	%xmm8,%xmm4
1368	movdqa	%xmm4,%xmm11
1369	pslld	$12,%xmm11
1370	psrld	$20,%xmm4
1371	pxor	%xmm11,%xmm4
1372	paddd	%xmm4,%xmm0
1373	pxor	%xmm0,%xmm12
1374	pshufb	L$rol8(%rip),%xmm12
1375	paddd	%xmm12,%xmm8
1376	pxor	%xmm8,%xmm4
1377	movdqa	%xmm4,%xmm11
1378	pslld	$7,%xmm11
1379	psrld	$25,%xmm4
1380	pxor	%xmm11,%xmm4
1381.byte	102,15,58,15,228,4
1382.byte	102,69,15,58,15,192,8
1383.byte	102,69,15,58,15,228,12
1384	paddd	%xmm5,%xmm1
1385	pxor	%xmm1,%xmm13
1386	pshufb	L$rol16(%rip),%xmm13
1387	paddd	%xmm13,%xmm9
1388	pxor	%xmm9,%xmm5
1389	movdqa	%xmm5,%xmm11
1390	pslld	$12,%xmm11
1391	psrld	$20,%xmm5
1392	pxor	%xmm11,%xmm5
1393	paddd	%xmm5,%xmm1
1394	pxor	%xmm1,%xmm13
1395	pshufb	L$rol8(%rip),%xmm13
1396	paddd	%xmm13,%xmm9
1397	pxor	%xmm9,%xmm5
1398	movdqa	%xmm5,%xmm11
1399	pslld	$7,%xmm11
1400	psrld	$25,%xmm5
1401	pxor	%xmm11,%xmm5
1402.byte	102,15,58,15,237,4
1403.byte	102,69,15,58,15,201,8
1404.byte	102,69,15,58,15,237,12
1405	paddd	%xmm6,%xmm2
1406	pxor	%xmm2,%xmm14
1407	pshufb	L$rol16(%rip),%xmm14
1408	paddd	%xmm14,%xmm10
1409	pxor	%xmm10,%xmm6
1410	movdqa	%xmm6,%xmm11
1411	pslld	$12,%xmm11
1412	psrld	$20,%xmm6
1413	pxor	%xmm11,%xmm6
1414	paddd	%xmm6,%xmm2
1415	pxor	%xmm2,%xmm14
1416	pshufb	L$rol8(%rip),%xmm14
1417	paddd	%xmm14,%xmm10
1418	pxor	%xmm10,%xmm6
1419	movdqa	%xmm6,%xmm11
1420	pslld	$7,%xmm11
1421	psrld	$25,%xmm6
1422	pxor	%xmm11,%xmm6
1423.byte	102,15,58,15,246,4
1424.byte	102,69,15,58,15,210,8
1425.byte	102,69,15,58,15,246,12
1426	movdqa	0+80(%rbp),%xmm11
1427	movq	0+0+0(%rbp),%rax
1428	movq	%rax,%r15
1429	mulq	%r10
1430	movq	%rax,%r13
1431	movq	%rdx,%r14
1432	movq	0+0+0(%rbp),%rax
1433	mulq	%r11
1434	imulq	%r12,%r15
1435	addq	%rax,%r14
1436	adcq	%rdx,%r15
1437	movdqa	%xmm9,0+80(%rbp)
1438	paddd	%xmm7,%xmm3
1439	pxor	%xmm3,%xmm15
1440	pshufb	L$rol16(%rip),%xmm15
1441	paddd	%xmm15,%xmm11
1442	pxor	%xmm11,%xmm7
1443	movdqa	%xmm7,%xmm9
1444	pslld	$12,%xmm9
1445	psrld	$20,%xmm7
1446	pxor	%xmm9,%xmm7
1447	paddd	%xmm7,%xmm3
1448	pxor	%xmm3,%xmm15
1449	pshufb	L$rol8(%rip),%xmm15
1450	paddd	%xmm15,%xmm11
1451	pxor	%xmm11,%xmm7
1452	movdqa	%xmm7,%xmm9
1453	pslld	$7,%xmm9
1454	psrld	$25,%xmm7
1455	pxor	%xmm9,%xmm7
1456.byte	102,15,58,15,255,4
1457.byte	102,69,15,58,15,219,8
1458.byte	102,69,15,58,15,255,12
1459	movdqa	0+80(%rbp),%xmm9
1460	movq	8+0+0(%rbp),%rax
1461	movq	%rax,%r9
1462	mulq	%r10
1463	addq	%rax,%r14
1464	adcq	$0,%rdx
1465	movq	%rdx,%r10
1466	movq	8+0+0(%rbp),%rax
1467	mulq	%r11
1468	addq	%rax,%r15
1469	adcq	$0,%rdx
1470	movdqa	%xmm11,0+80(%rbp)
1471	paddd	%xmm4,%xmm0
1472	pxor	%xmm0,%xmm12
1473	pshufb	L$rol16(%rip),%xmm12
1474	paddd	%xmm12,%xmm8
1475	pxor	%xmm8,%xmm4
1476	movdqa	%xmm4,%xmm11
1477	pslld	$12,%xmm11
1478	psrld	$20,%xmm4
1479	pxor	%xmm11,%xmm4
1480	paddd	%xmm4,%xmm0
1481	pxor	%xmm0,%xmm12
1482	pshufb	L$rol8(%rip),%xmm12
1483	paddd	%xmm12,%xmm8
1484	pxor	%xmm8,%xmm4
1485	movdqa	%xmm4,%xmm11
1486	pslld	$7,%xmm11
1487	psrld	$25,%xmm4
1488	pxor	%xmm11,%xmm4
1489.byte	102,15,58,15,228,12
1490.byte	102,69,15,58,15,192,8
1491.byte	102,69,15,58,15,228,4
1492	paddd	%xmm5,%xmm1
1493	pxor	%xmm1,%xmm13
1494	pshufb	L$rol16(%rip),%xmm13
1495	paddd	%xmm13,%xmm9
1496	pxor	%xmm9,%xmm5
1497	movdqa	%xmm5,%xmm11
1498	pslld	$12,%xmm11
1499	psrld	$20,%xmm5
1500	pxor	%xmm11,%xmm5
1501	paddd	%xmm5,%xmm1
1502	pxor	%xmm1,%xmm13
1503	pshufb	L$rol8(%rip),%xmm13
1504	paddd	%xmm13,%xmm9
1505	pxor	%xmm9,%xmm5
1506	movdqa	%xmm5,%xmm11
1507	pslld	$7,%xmm11
1508	psrld	$25,%xmm5
1509	pxor	%xmm11,%xmm5
1510.byte	102,15,58,15,237,12
1511.byte	102,69,15,58,15,201,8
1512.byte	102,69,15,58,15,237,4
1513	imulq	%r12,%r9
1514	addq	%r10,%r15
1515	adcq	%rdx,%r9
1516	paddd	%xmm6,%xmm2
1517	pxor	%xmm2,%xmm14
1518	pshufb	L$rol16(%rip),%xmm14
1519	paddd	%xmm14,%xmm10
1520	pxor	%xmm10,%xmm6
1521	movdqa	%xmm6,%xmm11
1522	pslld	$12,%xmm11
1523	psrld	$20,%xmm6
1524	pxor	%xmm11,%xmm6
1525	paddd	%xmm6,%xmm2
1526	pxor	%xmm2,%xmm14
1527	pshufb	L$rol8(%rip),%xmm14
1528	paddd	%xmm14,%xmm10
1529	pxor	%xmm10,%xmm6
1530	movdqa	%xmm6,%xmm11
1531	pslld	$7,%xmm11
1532	psrld	$25,%xmm6
1533	pxor	%xmm11,%xmm6
1534.byte	102,15,58,15,246,12
1535.byte	102,69,15,58,15,210,8
1536.byte	102,69,15,58,15,246,4
1537	movdqa	0+80(%rbp),%xmm11
1538	movq	%r13,%r10
1539	movq	%r14,%r11
1540	movq	%r15,%r12
1541	andq	$3,%r12
1542	movq	%r15,%r13
1543	andq	$-4,%r13
1544	movq	%r9,%r14
1545	shrdq	$2,%r9,%r15
1546	shrq	$2,%r9
1547	addq	%r13,%r15
1548	adcq	%r14,%r9
1549	addq	%r15,%r10
1550	adcq	%r9,%r11
1551	adcq	$0,%r12
1552	movdqa	%xmm9,0+80(%rbp)
1553	paddd	%xmm7,%xmm3
1554	pxor	%xmm3,%xmm15
1555	pshufb	L$rol16(%rip),%xmm15
1556	paddd	%xmm15,%xmm11
1557	pxor	%xmm11,%xmm7
1558	movdqa	%xmm7,%xmm9
1559	pslld	$12,%xmm9
1560	psrld	$20,%xmm7
1561	pxor	%xmm9,%xmm7
1562	paddd	%xmm7,%xmm3
1563	pxor	%xmm3,%xmm15
1564	pshufb	L$rol8(%rip),%xmm15
1565	paddd	%xmm15,%xmm11
1566	pxor	%xmm11,%xmm7
1567	movdqa	%xmm7,%xmm9
1568	pslld	$7,%xmm9
1569	psrld	$25,%xmm7
1570	pxor	%xmm9,%xmm7
1571.byte	102,15,58,15,255,12
1572.byte	102,69,15,58,15,219,8
1573.byte	102,69,15,58,15,255,4
1574	movdqa	0+80(%rbp),%xmm9
1575
1576	addq	$16,%r8
1577	cmpq	$160,%r8
1578	jb	L$open_sse_tail_256_rounds_and_x1hash
1579
1580	movq	%rbx,%rcx
1581	andq	$-16,%rcx
1582L$open_sse_tail_256_hash:
1583	addq	0+0(%rsi,%r8,1),%r10
1584	adcq	8+0(%rsi,%r8,1),%r11
1585	adcq	$1,%r12
1586	movq	0+0+0(%rbp),%rax
1587	movq	%rax,%r15
1588	mulq	%r10
1589	movq	%rax,%r13
1590	movq	%rdx,%r14
1591	movq	0+0+0(%rbp),%rax
1592	mulq	%r11
1593	imulq	%r12,%r15
1594	addq	%rax,%r14
1595	adcq	%rdx,%r15
1596	movq	8+0+0(%rbp),%rax
1597	movq	%rax,%r9
1598	mulq	%r10
1599	addq	%rax,%r14
1600	adcq	$0,%rdx
1601	movq	%rdx,%r10
1602	movq	8+0+0(%rbp),%rax
1603	mulq	%r11
1604	addq	%rax,%r15
1605	adcq	$0,%rdx
1606	imulq	%r12,%r9
1607	addq	%r10,%r15
1608	adcq	%rdx,%r9
1609	movq	%r13,%r10
1610	movq	%r14,%r11
1611	movq	%r15,%r12
1612	andq	$3,%r12
1613	movq	%r15,%r13
1614	andq	$-4,%r13
1615	movq	%r9,%r14
1616	shrdq	$2,%r9,%r15
1617	shrq	$2,%r9
1618	addq	%r13,%r15
1619	adcq	%r14,%r9
1620	addq	%r15,%r10
1621	adcq	%r9,%r11
1622	adcq	$0,%r12
1623
1624	addq	$16,%r8
1625	cmpq	%rcx,%r8
1626	jb	L$open_sse_tail_256_hash
1627	paddd	L$chacha20_consts(%rip),%xmm3
1628	paddd	0+48(%rbp),%xmm7
1629	paddd	0+64(%rbp),%xmm11
1630	paddd	0+144(%rbp),%xmm15
1631	paddd	L$chacha20_consts(%rip),%xmm2
1632	paddd	0+48(%rbp),%xmm6
1633	paddd	0+64(%rbp),%xmm10
1634	paddd	0+128(%rbp),%xmm14
1635	paddd	L$chacha20_consts(%rip),%xmm1
1636	paddd	0+48(%rbp),%xmm5
1637	paddd	0+64(%rbp),%xmm9
1638	paddd	0+112(%rbp),%xmm13
1639	paddd	L$chacha20_consts(%rip),%xmm0
1640	paddd	0+48(%rbp),%xmm4
1641	paddd	0+64(%rbp),%xmm8
1642	paddd	0+96(%rbp),%xmm12
1643	movdqa	%xmm12,0+80(%rbp)
1644	movdqu	0 + 0(%rsi),%xmm12
1645	pxor	%xmm3,%xmm12
1646	movdqu	%xmm12,0 + 0(%rdi)
1647	movdqu	16 + 0(%rsi),%xmm12
1648	pxor	%xmm7,%xmm12
1649	movdqu	%xmm12,16 + 0(%rdi)
1650	movdqu	32 + 0(%rsi),%xmm12
1651	pxor	%xmm11,%xmm12
1652	movdqu	%xmm12,32 + 0(%rdi)
1653	movdqu	48 + 0(%rsi),%xmm12
1654	pxor	%xmm15,%xmm12
1655	movdqu	%xmm12,48 + 0(%rdi)
1656	movdqu	0 + 64(%rsi),%xmm3
1657	movdqu	16 + 64(%rsi),%xmm7
1658	movdqu	32 + 64(%rsi),%xmm11
1659	movdqu	48 + 64(%rsi),%xmm15
1660	pxor	%xmm3,%xmm2
1661	pxor	%xmm7,%xmm6
1662	pxor	%xmm11,%xmm10
1663	pxor	%xmm14,%xmm15
1664	movdqu	%xmm2,0 + 64(%rdi)
1665	movdqu	%xmm6,16 + 64(%rdi)
1666	movdqu	%xmm10,32 + 64(%rdi)
1667	movdqu	%xmm15,48 + 64(%rdi)
1668	movdqu	0 + 128(%rsi),%xmm3
1669	movdqu	16 + 128(%rsi),%xmm7
1670	movdqu	32 + 128(%rsi),%xmm11
1671	movdqu	48 + 128(%rsi),%xmm15
1672	pxor	%xmm3,%xmm1
1673	pxor	%xmm7,%xmm5
1674	pxor	%xmm11,%xmm9
1675	pxor	%xmm13,%xmm15
1676	movdqu	%xmm1,0 + 128(%rdi)
1677	movdqu	%xmm5,16 + 128(%rdi)
1678	movdqu	%xmm9,32 + 128(%rdi)
1679	movdqu	%xmm15,48 + 128(%rdi)
1680
1681	movdqa	0+80(%rbp),%xmm12
1682	subq	$192,%rbx
1683	leaq	192(%rsi),%rsi
1684	leaq	192(%rdi),%rdi
1685
1686
1687L$open_sse_tail_64_dec_loop:
1688	cmpq	$16,%rbx
1689	jb	L$open_sse_tail_16_init
1690	subq	$16,%rbx
1691	movdqu	(%rsi),%xmm3
1692	pxor	%xmm3,%xmm0
1693	movdqu	%xmm0,(%rdi)
1694	leaq	16(%rsi),%rsi
1695	leaq	16(%rdi),%rdi
1696	movdqa	%xmm4,%xmm0
1697	movdqa	%xmm8,%xmm4
1698	movdqa	%xmm12,%xmm8
1699	jmp	L$open_sse_tail_64_dec_loop
1700L$open_sse_tail_16_init:
1701	movdqa	%xmm0,%xmm1
1702
1703
1704L$open_sse_tail_16:
1705	testq	%rbx,%rbx
1706	jz	L$open_sse_finalize
1707
1708
1709
1710	pxor	%xmm3,%xmm3
1711	leaq	-1(%rsi,%rbx,1),%rsi
1712	movq	%rbx,%r8
1713L$open_sse_tail_16_compose:
1714	pslldq	$1,%xmm3
1715	pinsrb	$0,(%rsi),%xmm3
1716	subq	$1,%rsi
1717	subq	$1,%r8
1718	jnz	L$open_sse_tail_16_compose
1719
1720.byte	102,73,15,126,221
1721	pextrq	$1,%xmm3,%r14
1722
1723	pxor	%xmm1,%xmm3
1724
1725
1726L$open_sse_tail_16_extract:
1727	pextrb	$0,%xmm3,(%rdi)
1728	psrldq	$1,%xmm3
1729	addq	$1,%rdi
1730	subq	$1,%rbx
1731	jne	L$open_sse_tail_16_extract
1732
1733	addq	%r13,%r10
1734	adcq	%r14,%r11
1735	adcq	$1,%r12
1736	movq	0+0+0(%rbp),%rax
1737	movq	%rax,%r15
1738	mulq	%r10
1739	movq	%rax,%r13
1740	movq	%rdx,%r14
1741	movq	0+0+0(%rbp),%rax
1742	mulq	%r11
1743	imulq	%r12,%r15
1744	addq	%rax,%r14
1745	adcq	%rdx,%r15
1746	movq	8+0+0(%rbp),%rax
1747	movq	%rax,%r9
1748	mulq	%r10
1749	addq	%rax,%r14
1750	adcq	$0,%rdx
1751	movq	%rdx,%r10
1752	movq	8+0+0(%rbp),%rax
1753	mulq	%r11
1754	addq	%rax,%r15
1755	adcq	$0,%rdx
1756	imulq	%r12,%r9
1757	addq	%r10,%r15
1758	adcq	%rdx,%r9
1759	movq	%r13,%r10
1760	movq	%r14,%r11
1761	movq	%r15,%r12
1762	andq	$3,%r12
1763	movq	%r15,%r13
1764	andq	$-4,%r13
1765	movq	%r9,%r14
1766	shrdq	$2,%r9,%r15
1767	shrq	$2,%r9
1768	addq	%r13,%r15
1769	adcq	%r14,%r9
1770	addq	%r15,%r10
1771	adcq	%r9,%r11
1772	adcq	$0,%r12
1773
1774
1775L$open_sse_finalize:
1776	addq	0+0+32(%rbp),%r10
1777	adcq	8+0+32(%rbp),%r11
1778	adcq	$1,%r12
1779	movq	0+0+0(%rbp),%rax
1780	movq	%rax,%r15
1781	mulq	%r10
1782	movq	%rax,%r13
1783	movq	%rdx,%r14
1784	movq	0+0+0(%rbp),%rax
1785	mulq	%r11
1786	imulq	%r12,%r15
1787	addq	%rax,%r14
1788	adcq	%rdx,%r15
1789	movq	8+0+0(%rbp),%rax
1790	movq	%rax,%r9
1791	mulq	%r10
1792	addq	%rax,%r14
1793	adcq	$0,%rdx
1794	movq	%rdx,%r10
1795	movq	8+0+0(%rbp),%rax
1796	mulq	%r11
1797	addq	%rax,%r15
1798	adcq	$0,%rdx
1799	imulq	%r12,%r9
1800	addq	%r10,%r15
1801	adcq	%rdx,%r9
1802	movq	%r13,%r10
1803	movq	%r14,%r11
1804	movq	%r15,%r12
1805	andq	$3,%r12
1806	movq	%r15,%r13
1807	andq	$-4,%r13
1808	movq	%r9,%r14
1809	shrdq	$2,%r9,%r15
1810	shrq	$2,%r9
1811	addq	%r13,%r15
1812	adcq	%r14,%r9
1813	addq	%r15,%r10
1814	adcq	%r9,%r11
1815	adcq	$0,%r12
1816
1817
1818	movq	%r10,%r13
1819	movq	%r11,%r14
1820	movq	%r12,%r15
1821	subq	$-5,%r10
1822	sbbq	$-1,%r11
1823	sbbq	$3,%r12
1824	cmovcq	%r13,%r10
1825	cmovcq	%r14,%r11
1826	cmovcq	%r15,%r12
1827
1828	addq	0+0+16(%rbp),%r10
1829	adcq	8+0+16(%rbp),%r11
1830
1831
1832	addq	$288 + 0 + 32,%rsp
1833
1834
1835	popq	%r9
1836
1837	movq	%r10,(%r9)
1838	movq	%r11,8(%r9)
1839	popq	%r15
1840
1841	popq	%r14
1842
1843	popq	%r13
1844
1845	popq	%r12
1846
1847	popq	%rbx
1848
1849	popq	%rbp
1850
1851	.byte	0xf3,0xc3
1852
1853L$open_sse_128:
1854
1855	movdqu	L$chacha20_consts(%rip),%xmm0
1856	movdqa	%xmm0,%xmm1
1857	movdqa	%xmm0,%xmm2
1858	movdqu	0(%r9),%xmm4
1859	movdqa	%xmm4,%xmm5
1860	movdqa	%xmm4,%xmm6
1861	movdqu	16(%r9),%xmm8
1862	movdqa	%xmm8,%xmm9
1863	movdqa	%xmm8,%xmm10
1864	movdqu	32(%r9),%xmm12
1865	movdqa	%xmm12,%xmm13
1866	paddd	L$sse_inc(%rip),%xmm13
1867	movdqa	%xmm13,%xmm14
1868	paddd	L$sse_inc(%rip),%xmm14
1869	movdqa	%xmm4,%xmm7
1870	movdqa	%xmm8,%xmm11
1871	movdqa	%xmm13,%xmm15
1872	movq	$10,%r10
1873
1874L$open_sse_128_rounds:
1875	paddd	%xmm4,%xmm0
1876	pxor	%xmm0,%xmm12
1877	pshufb	L$rol16(%rip),%xmm12
1878	paddd	%xmm12,%xmm8
1879	pxor	%xmm8,%xmm4
1880	movdqa	%xmm4,%xmm3
1881	pslld	$12,%xmm3
1882	psrld	$20,%xmm4
1883	pxor	%xmm3,%xmm4
1884	paddd	%xmm4,%xmm0
1885	pxor	%xmm0,%xmm12
1886	pshufb	L$rol8(%rip),%xmm12
1887	paddd	%xmm12,%xmm8
1888	pxor	%xmm8,%xmm4
1889	movdqa	%xmm4,%xmm3
1890	pslld	$7,%xmm3
1891	psrld	$25,%xmm4
1892	pxor	%xmm3,%xmm4
1893.byte	102,15,58,15,228,4
1894.byte	102,69,15,58,15,192,8
1895.byte	102,69,15,58,15,228,12
1896	paddd	%xmm5,%xmm1
1897	pxor	%xmm1,%xmm13
1898	pshufb	L$rol16(%rip),%xmm13
1899	paddd	%xmm13,%xmm9
1900	pxor	%xmm9,%xmm5
1901	movdqa	%xmm5,%xmm3
1902	pslld	$12,%xmm3
1903	psrld	$20,%xmm5
1904	pxor	%xmm3,%xmm5
1905	paddd	%xmm5,%xmm1
1906	pxor	%xmm1,%xmm13
1907	pshufb	L$rol8(%rip),%xmm13
1908	paddd	%xmm13,%xmm9
1909	pxor	%xmm9,%xmm5
1910	movdqa	%xmm5,%xmm3
1911	pslld	$7,%xmm3
1912	psrld	$25,%xmm5
1913	pxor	%xmm3,%xmm5
1914.byte	102,15,58,15,237,4
1915.byte	102,69,15,58,15,201,8
1916.byte	102,69,15,58,15,237,12
1917	paddd	%xmm6,%xmm2
1918	pxor	%xmm2,%xmm14
1919	pshufb	L$rol16(%rip),%xmm14
1920	paddd	%xmm14,%xmm10
1921	pxor	%xmm10,%xmm6
1922	movdqa	%xmm6,%xmm3
1923	pslld	$12,%xmm3
1924	psrld	$20,%xmm6
1925	pxor	%xmm3,%xmm6
1926	paddd	%xmm6,%xmm2
1927	pxor	%xmm2,%xmm14
1928	pshufb	L$rol8(%rip),%xmm14
1929	paddd	%xmm14,%xmm10
1930	pxor	%xmm10,%xmm6
1931	movdqa	%xmm6,%xmm3
1932	pslld	$7,%xmm3
1933	psrld	$25,%xmm6
1934	pxor	%xmm3,%xmm6
1935.byte	102,15,58,15,246,4
1936.byte	102,69,15,58,15,210,8
1937.byte	102,69,15,58,15,246,12
1938	paddd	%xmm4,%xmm0
1939	pxor	%xmm0,%xmm12
1940	pshufb	L$rol16(%rip),%xmm12
1941	paddd	%xmm12,%xmm8
1942	pxor	%xmm8,%xmm4
1943	movdqa	%xmm4,%xmm3
1944	pslld	$12,%xmm3
1945	psrld	$20,%xmm4
1946	pxor	%xmm3,%xmm4
1947	paddd	%xmm4,%xmm0
1948	pxor	%xmm0,%xmm12
1949	pshufb	L$rol8(%rip),%xmm12
1950	paddd	%xmm12,%xmm8
1951	pxor	%xmm8,%xmm4
1952	movdqa	%xmm4,%xmm3
1953	pslld	$7,%xmm3
1954	psrld	$25,%xmm4
1955	pxor	%xmm3,%xmm4
1956.byte	102,15,58,15,228,12
1957.byte	102,69,15,58,15,192,8
1958.byte	102,69,15,58,15,228,4
1959	paddd	%xmm5,%xmm1
1960	pxor	%xmm1,%xmm13
1961	pshufb	L$rol16(%rip),%xmm13
1962	paddd	%xmm13,%xmm9
1963	pxor	%xmm9,%xmm5
1964	movdqa	%xmm5,%xmm3
1965	pslld	$12,%xmm3
1966	psrld	$20,%xmm5
1967	pxor	%xmm3,%xmm5
1968	paddd	%xmm5,%xmm1
1969	pxor	%xmm1,%xmm13
1970	pshufb	L$rol8(%rip),%xmm13
1971	paddd	%xmm13,%xmm9
1972	pxor	%xmm9,%xmm5
1973	movdqa	%xmm5,%xmm3
1974	pslld	$7,%xmm3
1975	psrld	$25,%xmm5
1976	pxor	%xmm3,%xmm5
1977.byte	102,15,58,15,237,12
1978.byte	102,69,15,58,15,201,8
1979.byte	102,69,15,58,15,237,4
1980	paddd	%xmm6,%xmm2
1981	pxor	%xmm2,%xmm14
1982	pshufb	L$rol16(%rip),%xmm14
1983	paddd	%xmm14,%xmm10
1984	pxor	%xmm10,%xmm6
1985	movdqa	%xmm6,%xmm3
1986	pslld	$12,%xmm3
1987	psrld	$20,%xmm6
1988	pxor	%xmm3,%xmm6
1989	paddd	%xmm6,%xmm2
1990	pxor	%xmm2,%xmm14
1991	pshufb	L$rol8(%rip),%xmm14
1992	paddd	%xmm14,%xmm10
1993	pxor	%xmm10,%xmm6
1994	movdqa	%xmm6,%xmm3
1995	pslld	$7,%xmm3
1996	psrld	$25,%xmm6
1997	pxor	%xmm3,%xmm6
1998.byte	102,15,58,15,246,12
1999.byte	102,69,15,58,15,210,8
2000.byte	102,69,15,58,15,246,4
2001
2002	decq	%r10
2003	jnz	L$open_sse_128_rounds
2004	paddd	L$chacha20_consts(%rip),%xmm0
2005	paddd	L$chacha20_consts(%rip),%xmm1
2006	paddd	L$chacha20_consts(%rip),%xmm2
2007	paddd	%xmm7,%xmm4
2008	paddd	%xmm7,%xmm5
2009	paddd	%xmm7,%xmm6
2010	paddd	%xmm11,%xmm9
2011	paddd	%xmm11,%xmm10
2012	paddd	%xmm15,%xmm13
2013	paddd	L$sse_inc(%rip),%xmm15
2014	paddd	%xmm15,%xmm14
2015
2016	pand	L$clamp(%rip),%xmm0
2017	movdqa	%xmm0,0+0(%rbp)
2018	movdqa	%xmm4,0+16(%rbp)
2019
2020	movq	%r8,%r8
2021	call	poly_hash_ad_internal
2022L$open_sse_128_xor_hash:
2023	cmpq	$16,%rbx
2024	jb	L$open_sse_tail_16
2025	subq	$16,%rbx
2026	addq	0+0(%rsi),%r10
2027	adcq	8+0(%rsi),%r11
2028	adcq	$1,%r12
2029
2030
2031	movdqu	0(%rsi),%xmm3
2032	pxor	%xmm3,%xmm1
2033	movdqu	%xmm1,0(%rdi)
2034	leaq	16(%rsi),%rsi
2035	leaq	16(%rdi),%rdi
2036	movq	0+0+0(%rbp),%rax
2037	movq	%rax,%r15
2038	mulq	%r10
2039	movq	%rax,%r13
2040	movq	%rdx,%r14
2041	movq	0+0+0(%rbp),%rax
2042	mulq	%r11
2043	imulq	%r12,%r15
2044	addq	%rax,%r14
2045	adcq	%rdx,%r15
2046	movq	8+0+0(%rbp),%rax
2047	movq	%rax,%r9
2048	mulq	%r10
2049	addq	%rax,%r14
2050	adcq	$0,%rdx
2051	movq	%rdx,%r10
2052	movq	8+0+0(%rbp),%rax
2053	mulq	%r11
2054	addq	%rax,%r15
2055	adcq	$0,%rdx
2056	imulq	%r12,%r9
2057	addq	%r10,%r15
2058	adcq	%rdx,%r9
2059	movq	%r13,%r10
2060	movq	%r14,%r11
2061	movq	%r15,%r12
2062	andq	$3,%r12
2063	movq	%r15,%r13
2064	andq	$-4,%r13
2065	movq	%r9,%r14
2066	shrdq	$2,%r9,%r15
2067	shrq	$2,%r9
2068	addq	%r13,%r15
2069	adcq	%r14,%r9
2070	addq	%r15,%r10
2071	adcq	%r9,%r11
2072	adcq	$0,%r12
2073
2074
2075	movdqa	%xmm5,%xmm1
2076	movdqa	%xmm9,%xmm5
2077	movdqa	%xmm13,%xmm9
2078	movdqa	%xmm2,%xmm13
2079	movdqa	%xmm6,%xmm2
2080	movdqa	%xmm10,%xmm6
2081	movdqa	%xmm14,%xmm10
2082	jmp	L$open_sse_128_xor_hash
2083
2084
2085
2086
2087
2088
2089
2090
2091.globl	_GFp_chacha20_poly1305_seal
2092.private_extern _GFp_chacha20_poly1305_seal
2093
2094.p2align	6
2095_GFp_chacha20_poly1305_seal:
2096
2097	pushq	%rbp
2098
2099	pushq	%rbx
2100
2101	pushq	%r12
2102
2103	pushq	%r13
2104
2105	pushq	%r14
2106
2107	pushq	%r15
2108
2109
2110
2111	pushq	%r9
2112
2113	subq	$288 + 0 + 32,%rsp
2114
2115	leaq	32(%rsp),%rbp
2116	andq	$-32,%rbp
2117
2118	movq	56(%r9),%rbx
2119	addq	%rdx,%rbx
2120	movq	%r8,0+0+32(%rbp)
2121	movq	%rbx,8+0+32(%rbp)
2122	movq	%rdx,%rbx
2123
2124	movl	_GFp_ia32cap_P+8(%rip),%eax
2125	andl	$288,%eax
2126	xorl	$288,%eax
2127	jz	chacha20_poly1305_seal_avx2
2128
2129	cmpq	$128,%rbx
2130	jbe	L$seal_sse_128
2131
2132	movdqa	L$chacha20_consts(%rip),%xmm0
2133	movdqu	0(%r9),%xmm4
2134	movdqu	16(%r9),%xmm8
2135	movdqu	32(%r9),%xmm12
2136
2137	movdqa	%xmm0,%xmm1
2138	movdqa	%xmm0,%xmm2
2139	movdqa	%xmm0,%xmm3
2140	movdqa	%xmm4,%xmm5
2141	movdqa	%xmm4,%xmm6
2142	movdqa	%xmm4,%xmm7
2143	movdqa	%xmm8,%xmm9
2144	movdqa	%xmm8,%xmm10
2145	movdqa	%xmm8,%xmm11
2146	movdqa	%xmm12,%xmm15
2147	paddd	L$sse_inc(%rip),%xmm12
2148	movdqa	%xmm12,%xmm14
2149	paddd	L$sse_inc(%rip),%xmm12
2150	movdqa	%xmm12,%xmm13
2151	paddd	L$sse_inc(%rip),%xmm12
2152
2153	movdqa	%xmm4,0+48(%rbp)
2154	movdqa	%xmm8,0+64(%rbp)
2155	movdqa	%xmm12,0+96(%rbp)
2156	movdqa	%xmm13,0+112(%rbp)
2157	movdqa	%xmm14,0+128(%rbp)
2158	movdqa	%xmm15,0+144(%rbp)
2159	movq	$10,%r10
2160L$seal_sse_init_rounds:
2161	movdqa	%xmm8,0+80(%rbp)
2162	movdqa	L$rol16(%rip),%xmm8
2163	paddd	%xmm7,%xmm3
2164	paddd	%xmm6,%xmm2
2165	paddd	%xmm5,%xmm1
2166	paddd	%xmm4,%xmm0
2167	pxor	%xmm3,%xmm15
2168	pxor	%xmm2,%xmm14
2169	pxor	%xmm1,%xmm13
2170	pxor	%xmm0,%xmm12
2171.byte	102,69,15,56,0,248
2172.byte	102,69,15,56,0,240
2173.byte	102,69,15,56,0,232
2174.byte	102,69,15,56,0,224
2175	movdqa	0+80(%rbp),%xmm8
2176	paddd	%xmm15,%xmm11
2177	paddd	%xmm14,%xmm10
2178	paddd	%xmm13,%xmm9
2179	paddd	%xmm12,%xmm8
2180	pxor	%xmm11,%xmm7
2181	pxor	%xmm10,%xmm6
2182	pxor	%xmm9,%xmm5
2183	pxor	%xmm8,%xmm4
2184	movdqa	%xmm8,0+80(%rbp)
2185	movdqa	%xmm7,%xmm8
2186	psrld	$20,%xmm8
2187	pslld	$32-20,%xmm7
2188	pxor	%xmm8,%xmm7
2189	movdqa	%xmm6,%xmm8
2190	psrld	$20,%xmm8
2191	pslld	$32-20,%xmm6
2192	pxor	%xmm8,%xmm6
2193	movdqa	%xmm5,%xmm8
2194	psrld	$20,%xmm8
2195	pslld	$32-20,%xmm5
2196	pxor	%xmm8,%xmm5
2197	movdqa	%xmm4,%xmm8
2198	psrld	$20,%xmm8
2199	pslld	$32-20,%xmm4
2200	pxor	%xmm8,%xmm4
2201	movdqa	L$rol8(%rip),%xmm8
2202	paddd	%xmm7,%xmm3
2203	paddd	%xmm6,%xmm2
2204	paddd	%xmm5,%xmm1
2205	paddd	%xmm4,%xmm0
2206	pxor	%xmm3,%xmm15
2207	pxor	%xmm2,%xmm14
2208	pxor	%xmm1,%xmm13
2209	pxor	%xmm0,%xmm12
2210.byte	102,69,15,56,0,248
2211.byte	102,69,15,56,0,240
2212.byte	102,69,15,56,0,232
2213.byte	102,69,15,56,0,224
2214	movdqa	0+80(%rbp),%xmm8
2215	paddd	%xmm15,%xmm11
2216	paddd	%xmm14,%xmm10
2217	paddd	%xmm13,%xmm9
2218	paddd	%xmm12,%xmm8
2219	pxor	%xmm11,%xmm7
2220	pxor	%xmm10,%xmm6
2221	pxor	%xmm9,%xmm5
2222	pxor	%xmm8,%xmm4
2223	movdqa	%xmm8,0+80(%rbp)
2224	movdqa	%xmm7,%xmm8
2225	psrld	$25,%xmm8
2226	pslld	$32-25,%xmm7
2227	pxor	%xmm8,%xmm7
2228	movdqa	%xmm6,%xmm8
2229	psrld	$25,%xmm8
2230	pslld	$32-25,%xmm6
2231	pxor	%xmm8,%xmm6
2232	movdqa	%xmm5,%xmm8
2233	psrld	$25,%xmm8
2234	pslld	$32-25,%xmm5
2235	pxor	%xmm8,%xmm5
2236	movdqa	%xmm4,%xmm8
2237	psrld	$25,%xmm8
2238	pslld	$32-25,%xmm4
2239	pxor	%xmm8,%xmm4
2240	movdqa	0+80(%rbp),%xmm8
2241.byte	102,15,58,15,255,4
2242.byte	102,69,15,58,15,219,8
2243.byte	102,69,15,58,15,255,12
2244.byte	102,15,58,15,246,4
2245.byte	102,69,15,58,15,210,8
2246.byte	102,69,15,58,15,246,12
2247.byte	102,15,58,15,237,4
2248.byte	102,69,15,58,15,201,8
2249.byte	102,69,15,58,15,237,12
2250.byte	102,15,58,15,228,4
2251.byte	102,69,15,58,15,192,8
2252.byte	102,69,15,58,15,228,12
2253	movdqa	%xmm8,0+80(%rbp)
2254	movdqa	L$rol16(%rip),%xmm8
2255	paddd	%xmm7,%xmm3
2256	paddd	%xmm6,%xmm2
2257	paddd	%xmm5,%xmm1
2258	paddd	%xmm4,%xmm0
2259	pxor	%xmm3,%xmm15
2260	pxor	%xmm2,%xmm14
2261	pxor	%xmm1,%xmm13
2262	pxor	%xmm0,%xmm12
2263.byte	102,69,15,56,0,248
2264.byte	102,69,15,56,0,240
2265.byte	102,69,15,56,0,232
2266.byte	102,69,15,56,0,224
2267	movdqa	0+80(%rbp),%xmm8
2268	paddd	%xmm15,%xmm11
2269	paddd	%xmm14,%xmm10
2270	paddd	%xmm13,%xmm9
2271	paddd	%xmm12,%xmm8
2272	pxor	%xmm11,%xmm7
2273	pxor	%xmm10,%xmm6
2274	pxor	%xmm9,%xmm5
2275	pxor	%xmm8,%xmm4
2276	movdqa	%xmm8,0+80(%rbp)
2277	movdqa	%xmm7,%xmm8
2278	psrld	$20,%xmm8
2279	pslld	$32-20,%xmm7
2280	pxor	%xmm8,%xmm7
2281	movdqa	%xmm6,%xmm8
2282	psrld	$20,%xmm8
2283	pslld	$32-20,%xmm6
2284	pxor	%xmm8,%xmm6
2285	movdqa	%xmm5,%xmm8
2286	psrld	$20,%xmm8
2287	pslld	$32-20,%xmm5
2288	pxor	%xmm8,%xmm5
2289	movdqa	%xmm4,%xmm8
2290	psrld	$20,%xmm8
2291	pslld	$32-20,%xmm4
2292	pxor	%xmm8,%xmm4
2293	movdqa	L$rol8(%rip),%xmm8
2294	paddd	%xmm7,%xmm3
2295	paddd	%xmm6,%xmm2
2296	paddd	%xmm5,%xmm1
2297	paddd	%xmm4,%xmm0
2298	pxor	%xmm3,%xmm15
2299	pxor	%xmm2,%xmm14
2300	pxor	%xmm1,%xmm13
2301	pxor	%xmm0,%xmm12
2302.byte	102,69,15,56,0,248
2303.byte	102,69,15,56,0,240
2304.byte	102,69,15,56,0,232
2305.byte	102,69,15,56,0,224
2306	movdqa	0+80(%rbp),%xmm8
2307	paddd	%xmm15,%xmm11
2308	paddd	%xmm14,%xmm10
2309	paddd	%xmm13,%xmm9
2310	paddd	%xmm12,%xmm8
2311	pxor	%xmm11,%xmm7
2312	pxor	%xmm10,%xmm6
2313	pxor	%xmm9,%xmm5
2314	pxor	%xmm8,%xmm4
2315	movdqa	%xmm8,0+80(%rbp)
2316	movdqa	%xmm7,%xmm8
2317	psrld	$25,%xmm8
2318	pslld	$32-25,%xmm7
2319	pxor	%xmm8,%xmm7
2320	movdqa	%xmm6,%xmm8
2321	psrld	$25,%xmm8
2322	pslld	$32-25,%xmm6
2323	pxor	%xmm8,%xmm6
2324	movdqa	%xmm5,%xmm8
2325	psrld	$25,%xmm8
2326	pslld	$32-25,%xmm5
2327	pxor	%xmm8,%xmm5
2328	movdqa	%xmm4,%xmm8
2329	psrld	$25,%xmm8
2330	pslld	$32-25,%xmm4
2331	pxor	%xmm8,%xmm4
2332	movdqa	0+80(%rbp),%xmm8
2333.byte	102,15,58,15,255,12
2334.byte	102,69,15,58,15,219,8
2335.byte	102,69,15,58,15,255,4
2336.byte	102,15,58,15,246,12
2337.byte	102,69,15,58,15,210,8
2338.byte	102,69,15,58,15,246,4
2339.byte	102,15,58,15,237,12
2340.byte	102,69,15,58,15,201,8
2341.byte	102,69,15,58,15,237,4
2342.byte	102,15,58,15,228,12
2343.byte	102,69,15,58,15,192,8
2344.byte	102,69,15,58,15,228,4
2345
2346	decq	%r10
2347	jnz	L$seal_sse_init_rounds
2348	paddd	L$chacha20_consts(%rip),%xmm3
2349	paddd	0+48(%rbp),%xmm7
2350	paddd	0+64(%rbp),%xmm11
2351	paddd	0+144(%rbp),%xmm15
2352	paddd	L$chacha20_consts(%rip),%xmm2
2353	paddd	0+48(%rbp),%xmm6
2354	paddd	0+64(%rbp),%xmm10
2355	paddd	0+128(%rbp),%xmm14
2356	paddd	L$chacha20_consts(%rip),%xmm1
2357	paddd	0+48(%rbp),%xmm5
2358	paddd	0+64(%rbp),%xmm9
2359	paddd	0+112(%rbp),%xmm13
2360	paddd	L$chacha20_consts(%rip),%xmm0
2361	paddd	0+48(%rbp),%xmm4
2362	paddd	0+64(%rbp),%xmm8
2363	paddd	0+96(%rbp),%xmm12
2364
2365
2366	pand	L$clamp(%rip),%xmm3
2367	movdqa	%xmm3,0+0(%rbp)
2368	movdqa	%xmm7,0+16(%rbp)
2369
2370	movq	%r8,%r8
2371	call	poly_hash_ad_internal
2372	movdqu	0 + 0(%rsi),%xmm3
2373	movdqu	16 + 0(%rsi),%xmm7
2374	movdqu	32 + 0(%rsi),%xmm11
2375	movdqu	48 + 0(%rsi),%xmm15
2376	pxor	%xmm3,%xmm2
2377	pxor	%xmm7,%xmm6
2378	pxor	%xmm11,%xmm10
2379	pxor	%xmm14,%xmm15
2380	movdqu	%xmm2,0 + 0(%rdi)
2381	movdqu	%xmm6,16 + 0(%rdi)
2382	movdqu	%xmm10,32 + 0(%rdi)
2383	movdqu	%xmm15,48 + 0(%rdi)
2384	movdqu	0 + 64(%rsi),%xmm3
2385	movdqu	16 + 64(%rsi),%xmm7
2386	movdqu	32 + 64(%rsi),%xmm11
2387	movdqu	48 + 64(%rsi),%xmm15
2388	pxor	%xmm3,%xmm1
2389	pxor	%xmm7,%xmm5
2390	pxor	%xmm11,%xmm9
2391	pxor	%xmm13,%xmm15
2392	movdqu	%xmm1,0 + 64(%rdi)
2393	movdqu	%xmm5,16 + 64(%rdi)
2394	movdqu	%xmm9,32 + 64(%rdi)
2395	movdqu	%xmm15,48 + 64(%rdi)
2396
2397	cmpq	$192,%rbx
2398	ja	L$seal_sse_main_init
2399	movq	$128,%rcx
2400	subq	$128,%rbx
2401	leaq	128(%rsi),%rsi
2402	jmp	L$seal_sse_128_tail_hash
2403L$seal_sse_main_init:
2404	movdqu	0 + 128(%rsi),%xmm3
2405	movdqu	16 + 128(%rsi),%xmm7
2406	movdqu	32 + 128(%rsi),%xmm11
2407	movdqu	48 + 128(%rsi),%xmm15
2408	pxor	%xmm3,%xmm0
2409	pxor	%xmm7,%xmm4
2410	pxor	%xmm11,%xmm8
2411	pxor	%xmm12,%xmm15
2412	movdqu	%xmm0,0 + 128(%rdi)
2413	movdqu	%xmm4,16 + 128(%rdi)
2414	movdqu	%xmm8,32 + 128(%rdi)
2415	movdqu	%xmm15,48 + 128(%rdi)
2416
2417	movq	$192,%rcx
2418	subq	$192,%rbx
2419	leaq	192(%rsi),%rsi
2420	movq	$2,%rcx
2421	movq	$8,%r8
2422	cmpq	$64,%rbx
2423	jbe	L$seal_sse_tail_64
2424	cmpq	$128,%rbx
2425	jbe	L$seal_sse_tail_128
2426	cmpq	$192,%rbx
2427	jbe	L$seal_sse_tail_192
2428
2429L$seal_sse_main_loop:
2430	movdqa	L$chacha20_consts(%rip),%xmm0
2431	movdqa	0+48(%rbp),%xmm4
2432	movdqa	0+64(%rbp),%xmm8
2433	movdqa	%xmm0,%xmm1
2434	movdqa	%xmm4,%xmm5
2435	movdqa	%xmm8,%xmm9
2436	movdqa	%xmm0,%xmm2
2437	movdqa	%xmm4,%xmm6
2438	movdqa	%xmm8,%xmm10
2439	movdqa	%xmm0,%xmm3
2440	movdqa	%xmm4,%xmm7
2441	movdqa	%xmm8,%xmm11
2442	movdqa	0+96(%rbp),%xmm15
2443	paddd	L$sse_inc(%rip),%xmm15
2444	movdqa	%xmm15,%xmm14
2445	paddd	L$sse_inc(%rip),%xmm14
2446	movdqa	%xmm14,%xmm13
2447	paddd	L$sse_inc(%rip),%xmm13
2448	movdqa	%xmm13,%xmm12
2449	paddd	L$sse_inc(%rip),%xmm12
2450	movdqa	%xmm12,0+96(%rbp)
2451	movdqa	%xmm13,0+112(%rbp)
2452	movdqa	%xmm14,0+128(%rbp)
2453	movdqa	%xmm15,0+144(%rbp)
2454
2455.p2align	5
2456L$seal_sse_main_rounds:
2457	movdqa	%xmm8,0+80(%rbp)
2458	movdqa	L$rol16(%rip),%xmm8
2459	paddd	%xmm7,%xmm3
2460	paddd	%xmm6,%xmm2
2461	paddd	%xmm5,%xmm1
2462	paddd	%xmm4,%xmm0
2463	pxor	%xmm3,%xmm15
2464	pxor	%xmm2,%xmm14
2465	pxor	%xmm1,%xmm13
2466	pxor	%xmm0,%xmm12
2467.byte	102,69,15,56,0,248
2468.byte	102,69,15,56,0,240
2469.byte	102,69,15,56,0,232
2470.byte	102,69,15,56,0,224
2471	movdqa	0+80(%rbp),%xmm8
2472	paddd	%xmm15,%xmm11
2473	paddd	%xmm14,%xmm10
2474	paddd	%xmm13,%xmm9
2475	paddd	%xmm12,%xmm8
2476	pxor	%xmm11,%xmm7
2477	addq	0+0(%rdi),%r10
2478	adcq	8+0(%rdi),%r11
2479	adcq	$1,%r12
2480	pxor	%xmm10,%xmm6
2481	pxor	%xmm9,%xmm5
2482	pxor	%xmm8,%xmm4
2483	movdqa	%xmm8,0+80(%rbp)
2484	movdqa	%xmm7,%xmm8
2485	psrld	$20,%xmm8
2486	pslld	$32-20,%xmm7
2487	pxor	%xmm8,%xmm7
2488	movdqa	%xmm6,%xmm8
2489	psrld	$20,%xmm8
2490	pslld	$32-20,%xmm6
2491	pxor	%xmm8,%xmm6
2492	movdqa	%xmm5,%xmm8
2493	psrld	$20,%xmm8
2494	pslld	$32-20,%xmm5
2495	pxor	%xmm8,%xmm5
2496	movdqa	%xmm4,%xmm8
2497	psrld	$20,%xmm8
2498	pslld	$32-20,%xmm4
2499	pxor	%xmm8,%xmm4
2500	movq	0+0+0(%rbp),%rax
2501	movq	%rax,%r15
2502	mulq	%r10
2503	movq	%rax,%r13
2504	movq	%rdx,%r14
2505	movq	0+0+0(%rbp),%rax
2506	mulq	%r11
2507	imulq	%r12,%r15
2508	addq	%rax,%r14
2509	adcq	%rdx,%r15
2510	movdqa	L$rol8(%rip),%xmm8
2511	paddd	%xmm7,%xmm3
2512	paddd	%xmm6,%xmm2
2513	paddd	%xmm5,%xmm1
2514	paddd	%xmm4,%xmm0
2515	pxor	%xmm3,%xmm15
2516	pxor	%xmm2,%xmm14
2517	pxor	%xmm1,%xmm13
2518	pxor	%xmm0,%xmm12
2519.byte	102,69,15,56,0,248
2520.byte	102,69,15,56,0,240
2521.byte	102,69,15,56,0,232
2522.byte	102,69,15,56,0,224
2523	movdqa	0+80(%rbp),%xmm8
2524	paddd	%xmm15,%xmm11
2525	paddd	%xmm14,%xmm10
2526	paddd	%xmm13,%xmm9
2527	paddd	%xmm12,%xmm8
2528	pxor	%xmm11,%xmm7
2529	pxor	%xmm10,%xmm6
2530	movq	8+0+0(%rbp),%rax
2531	movq	%rax,%r9
2532	mulq	%r10
2533	addq	%rax,%r14
2534	adcq	$0,%rdx
2535	movq	%rdx,%r10
2536	movq	8+0+0(%rbp),%rax
2537	mulq	%r11
2538	addq	%rax,%r15
2539	adcq	$0,%rdx
2540	pxor	%xmm9,%xmm5
2541	pxor	%xmm8,%xmm4
2542	movdqa	%xmm8,0+80(%rbp)
2543	movdqa	%xmm7,%xmm8
2544	psrld	$25,%xmm8
2545	pslld	$32-25,%xmm7
2546	pxor	%xmm8,%xmm7
2547	movdqa	%xmm6,%xmm8
2548	psrld	$25,%xmm8
2549	pslld	$32-25,%xmm6
2550	pxor	%xmm8,%xmm6
2551	movdqa	%xmm5,%xmm8
2552	psrld	$25,%xmm8
2553	pslld	$32-25,%xmm5
2554	pxor	%xmm8,%xmm5
2555	movdqa	%xmm4,%xmm8
2556	psrld	$25,%xmm8
2557	pslld	$32-25,%xmm4
2558	pxor	%xmm8,%xmm4
2559	movdqa	0+80(%rbp),%xmm8
2560	imulq	%r12,%r9
2561	addq	%r10,%r15
2562	adcq	%rdx,%r9
2563.byte	102,15,58,15,255,4
2564.byte	102,69,15,58,15,219,8
2565.byte	102,69,15,58,15,255,12
2566.byte	102,15,58,15,246,4
2567.byte	102,69,15,58,15,210,8
2568.byte	102,69,15,58,15,246,12
2569.byte	102,15,58,15,237,4
2570.byte	102,69,15,58,15,201,8
2571.byte	102,69,15,58,15,237,12
2572.byte	102,15,58,15,228,4
2573.byte	102,69,15,58,15,192,8
2574.byte	102,69,15,58,15,228,12
2575	movdqa	%xmm8,0+80(%rbp)
2576	movdqa	L$rol16(%rip),%xmm8
2577	paddd	%xmm7,%xmm3
2578	paddd	%xmm6,%xmm2
2579	paddd	%xmm5,%xmm1
2580	paddd	%xmm4,%xmm0
2581	pxor	%xmm3,%xmm15
2582	pxor	%xmm2,%xmm14
2583	movq	%r13,%r10
2584	movq	%r14,%r11
2585	movq	%r15,%r12
2586	andq	$3,%r12
2587	movq	%r15,%r13
2588	andq	$-4,%r13
2589	movq	%r9,%r14
2590	shrdq	$2,%r9,%r15
2591	shrq	$2,%r9
2592	addq	%r13,%r15
2593	adcq	%r14,%r9
2594	addq	%r15,%r10
2595	adcq	%r9,%r11
2596	adcq	$0,%r12
2597	pxor	%xmm1,%xmm13
2598	pxor	%xmm0,%xmm12
2599.byte	102,69,15,56,0,248
2600.byte	102,69,15,56,0,240
2601.byte	102,69,15,56,0,232
2602.byte	102,69,15,56,0,224
2603	movdqa	0+80(%rbp),%xmm8
2604	paddd	%xmm15,%xmm11
2605	paddd	%xmm14,%xmm10
2606	paddd	%xmm13,%xmm9
2607	paddd	%xmm12,%xmm8
2608	pxor	%xmm11,%xmm7
2609	pxor	%xmm10,%xmm6
2610	pxor	%xmm9,%xmm5
2611	pxor	%xmm8,%xmm4
2612	movdqa	%xmm8,0+80(%rbp)
2613	movdqa	%xmm7,%xmm8
2614	psrld	$20,%xmm8
2615	pslld	$32-20,%xmm7
2616	pxor	%xmm8,%xmm7
2617	movdqa	%xmm6,%xmm8
2618	psrld	$20,%xmm8
2619	pslld	$32-20,%xmm6
2620	pxor	%xmm8,%xmm6
2621	movdqa	%xmm5,%xmm8
2622	psrld	$20,%xmm8
2623	pslld	$32-20,%xmm5
2624	pxor	%xmm8,%xmm5
2625	movdqa	%xmm4,%xmm8
2626	psrld	$20,%xmm8
2627	pslld	$32-20,%xmm4
2628	pxor	%xmm8,%xmm4
2629	movdqa	L$rol8(%rip),%xmm8
2630	paddd	%xmm7,%xmm3
2631	paddd	%xmm6,%xmm2
2632	paddd	%xmm5,%xmm1
2633	paddd	%xmm4,%xmm0
2634	pxor	%xmm3,%xmm15
2635	pxor	%xmm2,%xmm14
2636	pxor	%xmm1,%xmm13
2637	pxor	%xmm0,%xmm12
2638.byte	102,69,15,56,0,248
2639.byte	102,69,15,56,0,240
2640.byte	102,69,15,56,0,232
2641.byte	102,69,15,56,0,224
2642	movdqa	0+80(%rbp),%xmm8
2643	paddd	%xmm15,%xmm11
2644	paddd	%xmm14,%xmm10
2645	paddd	%xmm13,%xmm9
2646	paddd	%xmm12,%xmm8
2647	pxor	%xmm11,%xmm7
2648	pxor	%xmm10,%xmm6
2649	pxor	%xmm9,%xmm5
2650	pxor	%xmm8,%xmm4
2651	movdqa	%xmm8,0+80(%rbp)
2652	movdqa	%xmm7,%xmm8
2653	psrld	$25,%xmm8
2654	pslld	$32-25,%xmm7
2655	pxor	%xmm8,%xmm7
2656	movdqa	%xmm6,%xmm8
2657	psrld	$25,%xmm8
2658	pslld	$32-25,%xmm6
2659	pxor	%xmm8,%xmm6
2660	movdqa	%xmm5,%xmm8
2661	psrld	$25,%xmm8
2662	pslld	$32-25,%xmm5
2663	pxor	%xmm8,%xmm5
2664	movdqa	%xmm4,%xmm8
2665	psrld	$25,%xmm8
2666	pslld	$32-25,%xmm4
2667	pxor	%xmm8,%xmm4
2668	movdqa	0+80(%rbp),%xmm8
2669.byte	102,15,58,15,255,12
2670.byte	102,69,15,58,15,219,8
2671.byte	102,69,15,58,15,255,4
2672.byte	102,15,58,15,246,12
2673.byte	102,69,15,58,15,210,8
2674.byte	102,69,15,58,15,246,4
2675.byte	102,15,58,15,237,12
2676.byte	102,69,15,58,15,201,8
2677.byte	102,69,15,58,15,237,4
2678.byte	102,15,58,15,228,12
2679.byte	102,69,15,58,15,192,8
2680.byte	102,69,15,58,15,228,4
2681
2682	leaq	16(%rdi),%rdi
2683	decq	%r8
2684	jge	L$seal_sse_main_rounds
2685	addq	0+0(%rdi),%r10
2686	adcq	8+0(%rdi),%r11
2687	adcq	$1,%r12
2688	movq	0+0+0(%rbp),%rax
2689	movq	%rax,%r15
2690	mulq	%r10
2691	movq	%rax,%r13
2692	movq	%rdx,%r14
2693	movq	0+0+0(%rbp),%rax
2694	mulq	%r11
2695	imulq	%r12,%r15
2696	addq	%rax,%r14
2697	adcq	%rdx,%r15
2698	movq	8+0+0(%rbp),%rax
2699	movq	%rax,%r9
2700	mulq	%r10
2701	addq	%rax,%r14
2702	adcq	$0,%rdx
2703	movq	%rdx,%r10
2704	movq	8+0+0(%rbp),%rax
2705	mulq	%r11
2706	addq	%rax,%r15
2707	adcq	$0,%rdx
2708	imulq	%r12,%r9
2709	addq	%r10,%r15
2710	adcq	%rdx,%r9
2711	movq	%r13,%r10
2712	movq	%r14,%r11
2713	movq	%r15,%r12
2714	andq	$3,%r12
2715	movq	%r15,%r13
2716	andq	$-4,%r13
2717	movq	%r9,%r14
2718	shrdq	$2,%r9,%r15
2719	shrq	$2,%r9
2720	addq	%r13,%r15
2721	adcq	%r14,%r9
2722	addq	%r15,%r10
2723	adcq	%r9,%r11
2724	adcq	$0,%r12
2725
2726	leaq	16(%rdi),%rdi
2727	decq	%rcx
2728	jg	L$seal_sse_main_rounds
2729	paddd	L$chacha20_consts(%rip),%xmm3
2730	paddd	0+48(%rbp),%xmm7
2731	paddd	0+64(%rbp),%xmm11
2732	paddd	0+144(%rbp),%xmm15
2733	paddd	L$chacha20_consts(%rip),%xmm2
2734	paddd	0+48(%rbp),%xmm6
2735	paddd	0+64(%rbp),%xmm10
2736	paddd	0+128(%rbp),%xmm14
2737	paddd	L$chacha20_consts(%rip),%xmm1
2738	paddd	0+48(%rbp),%xmm5
2739	paddd	0+64(%rbp),%xmm9
2740	paddd	0+112(%rbp),%xmm13
2741	paddd	L$chacha20_consts(%rip),%xmm0
2742	paddd	0+48(%rbp),%xmm4
2743	paddd	0+64(%rbp),%xmm8
2744	paddd	0+96(%rbp),%xmm12
2745
2746	movdqa	%xmm14,0+80(%rbp)
2747	movdqa	%xmm14,0+80(%rbp)
2748	movdqu	0 + 0(%rsi),%xmm14
2749	pxor	%xmm3,%xmm14
2750	movdqu	%xmm14,0 + 0(%rdi)
2751	movdqu	16 + 0(%rsi),%xmm14
2752	pxor	%xmm7,%xmm14
2753	movdqu	%xmm14,16 + 0(%rdi)
2754	movdqu	32 + 0(%rsi),%xmm14
2755	pxor	%xmm11,%xmm14
2756	movdqu	%xmm14,32 + 0(%rdi)
2757	movdqu	48 + 0(%rsi),%xmm14
2758	pxor	%xmm15,%xmm14
2759	movdqu	%xmm14,48 + 0(%rdi)
2760
2761	movdqa	0+80(%rbp),%xmm14
2762	movdqu	0 + 64(%rsi),%xmm3
2763	movdqu	16 + 64(%rsi),%xmm7
2764	movdqu	32 + 64(%rsi),%xmm11
2765	movdqu	48 + 64(%rsi),%xmm15
2766	pxor	%xmm3,%xmm2
2767	pxor	%xmm7,%xmm6
2768	pxor	%xmm11,%xmm10
2769	pxor	%xmm14,%xmm15
2770	movdqu	%xmm2,0 + 64(%rdi)
2771	movdqu	%xmm6,16 + 64(%rdi)
2772	movdqu	%xmm10,32 + 64(%rdi)
2773	movdqu	%xmm15,48 + 64(%rdi)
2774	movdqu	0 + 128(%rsi),%xmm3
2775	movdqu	16 + 128(%rsi),%xmm7
2776	movdqu	32 + 128(%rsi),%xmm11
2777	movdqu	48 + 128(%rsi),%xmm15
2778	pxor	%xmm3,%xmm1
2779	pxor	%xmm7,%xmm5
2780	pxor	%xmm11,%xmm9
2781	pxor	%xmm13,%xmm15
2782	movdqu	%xmm1,0 + 128(%rdi)
2783	movdqu	%xmm5,16 + 128(%rdi)
2784	movdqu	%xmm9,32 + 128(%rdi)
2785	movdqu	%xmm15,48 + 128(%rdi)
2786
2787	cmpq	$256,%rbx
2788	ja	L$seal_sse_main_loop_xor
2789
2790	movq	$192,%rcx
2791	subq	$192,%rbx
2792	leaq	192(%rsi),%rsi
2793	jmp	L$seal_sse_128_tail_hash
2794L$seal_sse_main_loop_xor:
2795	movdqu	0 + 192(%rsi),%xmm3
2796	movdqu	16 + 192(%rsi),%xmm7
2797	movdqu	32 + 192(%rsi),%xmm11
2798	movdqu	48 + 192(%rsi),%xmm15
2799	pxor	%xmm3,%xmm0
2800	pxor	%xmm7,%xmm4
2801	pxor	%xmm11,%xmm8
2802	pxor	%xmm12,%xmm15
2803	movdqu	%xmm0,0 + 192(%rdi)
2804	movdqu	%xmm4,16 + 192(%rdi)
2805	movdqu	%xmm8,32 + 192(%rdi)
2806	movdqu	%xmm15,48 + 192(%rdi)
2807
2808	leaq	256(%rsi),%rsi
2809	subq	$256,%rbx
2810	movq	$6,%rcx
2811	movq	$4,%r8
2812	cmpq	$192,%rbx
2813	jg	L$seal_sse_main_loop
2814	movq	%rbx,%rcx
2815	testq	%rbx,%rbx
2816	je	L$seal_sse_128_tail_hash
2817	movq	$6,%rcx
2818	cmpq	$128,%rbx
2819	ja	L$seal_sse_tail_192
2820	cmpq	$64,%rbx
2821	ja	L$seal_sse_tail_128
2822
2823L$seal_sse_tail_64:
2824	movdqa	L$chacha20_consts(%rip),%xmm0
2825	movdqa	0+48(%rbp),%xmm4
2826	movdqa	0+64(%rbp),%xmm8
2827	movdqa	0+96(%rbp),%xmm12
2828	paddd	L$sse_inc(%rip),%xmm12
2829	movdqa	%xmm12,0+96(%rbp)
2830
2831L$seal_sse_tail_64_rounds_and_x2hash:
2832	addq	0+0(%rdi),%r10
2833	adcq	8+0(%rdi),%r11
2834	adcq	$1,%r12
2835	movq	0+0+0(%rbp),%rax
2836	movq	%rax,%r15
2837	mulq	%r10
2838	movq	%rax,%r13
2839	movq	%rdx,%r14
2840	movq	0+0+0(%rbp),%rax
2841	mulq	%r11
2842	imulq	%r12,%r15
2843	addq	%rax,%r14
2844	adcq	%rdx,%r15
2845	movq	8+0+0(%rbp),%rax
2846	movq	%rax,%r9
2847	mulq	%r10
2848	addq	%rax,%r14
2849	adcq	$0,%rdx
2850	movq	%rdx,%r10
2851	movq	8+0+0(%rbp),%rax
2852	mulq	%r11
2853	addq	%rax,%r15
2854	adcq	$0,%rdx
2855	imulq	%r12,%r9
2856	addq	%r10,%r15
2857	adcq	%rdx,%r9
2858	movq	%r13,%r10
2859	movq	%r14,%r11
2860	movq	%r15,%r12
2861	andq	$3,%r12
2862	movq	%r15,%r13
2863	andq	$-4,%r13
2864	movq	%r9,%r14
2865	shrdq	$2,%r9,%r15
2866	shrq	$2,%r9
2867	addq	%r13,%r15
2868	adcq	%r14,%r9
2869	addq	%r15,%r10
2870	adcq	%r9,%r11
2871	adcq	$0,%r12
2872
2873	leaq	16(%rdi),%rdi
2874L$seal_sse_tail_64_rounds_and_x1hash:
2875	paddd	%xmm4,%xmm0
2876	pxor	%xmm0,%xmm12
2877	pshufb	L$rol16(%rip),%xmm12
2878	paddd	%xmm12,%xmm8
2879	pxor	%xmm8,%xmm4
2880	movdqa	%xmm4,%xmm3
2881	pslld	$12,%xmm3
2882	psrld	$20,%xmm4
2883	pxor	%xmm3,%xmm4
2884	paddd	%xmm4,%xmm0
2885	pxor	%xmm0,%xmm12
2886	pshufb	L$rol8(%rip),%xmm12
2887	paddd	%xmm12,%xmm8
2888	pxor	%xmm8,%xmm4
2889	movdqa	%xmm4,%xmm3
2890	pslld	$7,%xmm3
2891	psrld	$25,%xmm4
2892	pxor	%xmm3,%xmm4
2893.byte	102,15,58,15,228,4
2894.byte	102,69,15,58,15,192,8
2895.byte	102,69,15,58,15,228,12
2896	paddd	%xmm4,%xmm0
2897	pxor	%xmm0,%xmm12
2898	pshufb	L$rol16(%rip),%xmm12
2899	paddd	%xmm12,%xmm8
2900	pxor	%xmm8,%xmm4
2901	movdqa	%xmm4,%xmm3
2902	pslld	$12,%xmm3
2903	psrld	$20,%xmm4
2904	pxor	%xmm3,%xmm4
2905	paddd	%xmm4,%xmm0
2906	pxor	%xmm0,%xmm12
2907	pshufb	L$rol8(%rip),%xmm12
2908	paddd	%xmm12,%xmm8
2909	pxor	%xmm8,%xmm4
2910	movdqa	%xmm4,%xmm3
2911	pslld	$7,%xmm3
2912	psrld	$25,%xmm4
2913	pxor	%xmm3,%xmm4
2914.byte	102,15,58,15,228,12
2915.byte	102,69,15,58,15,192,8
2916.byte	102,69,15,58,15,228,4
2917	addq	0+0(%rdi),%r10
2918	adcq	8+0(%rdi),%r11
2919	adcq	$1,%r12
2920	movq	0+0+0(%rbp),%rax
2921	movq	%rax,%r15
2922	mulq	%r10
2923	movq	%rax,%r13
2924	movq	%rdx,%r14
2925	movq	0+0+0(%rbp),%rax
2926	mulq	%r11
2927	imulq	%r12,%r15
2928	addq	%rax,%r14
2929	adcq	%rdx,%r15
2930	movq	8+0+0(%rbp),%rax
2931	movq	%rax,%r9
2932	mulq	%r10
2933	addq	%rax,%r14
2934	adcq	$0,%rdx
2935	movq	%rdx,%r10
2936	movq	8+0+0(%rbp),%rax
2937	mulq	%r11
2938	addq	%rax,%r15
2939	adcq	$0,%rdx
2940	imulq	%r12,%r9
2941	addq	%r10,%r15
2942	adcq	%rdx,%r9
2943	movq	%r13,%r10
2944	movq	%r14,%r11
2945	movq	%r15,%r12
2946	andq	$3,%r12
2947	movq	%r15,%r13
2948	andq	$-4,%r13
2949	movq	%r9,%r14
2950	shrdq	$2,%r9,%r15
2951	shrq	$2,%r9
2952	addq	%r13,%r15
2953	adcq	%r14,%r9
2954	addq	%r15,%r10
2955	adcq	%r9,%r11
2956	adcq	$0,%r12
2957
2958	leaq	16(%rdi),%rdi
2959	decq	%rcx
2960	jg	L$seal_sse_tail_64_rounds_and_x2hash
2961	decq	%r8
2962	jge	L$seal_sse_tail_64_rounds_and_x1hash
2963	paddd	L$chacha20_consts(%rip),%xmm0
2964	paddd	0+48(%rbp),%xmm4
2965	paddd	0+64(%rbp),%xmm8
2966	paddd	0+96(%rbp),%xmm12
2967
2968	jmp	L$seal_sse_128_tail_xor
2969
2970L$seal_sse_tail_128:
2971	movdqa	L$chacha20_consts(%rip),%xmm0
2972	movdqa	0+48(%rbp),%xmm4
2973	movdqa	0+64(%rbp),%xmm8
2974	movdqa	%xmm0,%xmm1
2975	movdqa	%xmm4,%xmm5
2976	movdqa	%xmm8,%xmm9
2977	movdqa	0+96(%rbp),%xmm13
2978	paddd	L$sse_inc(%rip),%xmm13
2979	movdqa	%xmm13,%xmm12
2980	paddd	L$sse_inc(%rip),%xmm12
2981	movdqa	%xmm12,0+96(%rbp)
2982	movdqa	%xmm13,0+112(%rbp)
2983
2984L$seal_sse_tail_128_rounds_and_x2hash:
2985	addq	0+0(%rdi),%r10
2986	adcq	8+0(%rdi),%r11
2987	adcq	$1,%r12
2988	movq	0+0+0(%rbp),%rax
2989	movq	%rax,%r15
2990	mulq	%r10
2991	movq	%rax,%r13
2992	movq	%rdx,%r14
2993	movq	0+0+0(%rbp),%rax
2994	mulq	%r11
2995	imulq	%r12,%r15
2996	addq	%rax,%r14
2997	adcq	%rdx,%r15
2998	movq	8+0+0(%rbp),%rax
2999	movq	%rax,%r9
3000	mulq	%r10
3001	addq	%rax,%r14
3002	adcq	$0,%rdx
3003	movq	%rdx,%r10
3004	movq	8+0+0(%rbp),%rax
3005	mulq	%r11
3006	addq	%rax,%r15
3007	adcq	$0,%rdx
3008	imulq	%r12,%r9
3009	addq	%r10,%r15
3010	adcq	%rdx,%r9
3011	movq	%r13,%r10
3012	movq	%r14,%r11
3013	movq	%r15,%r12
3014	andq	$3,%r12
3015	movq	%r15,%r13
3016	andq	$-4,%r13
3017	movq	%r9,%r14
3018	shrdq	$2,%r9,%r15
3019	shrq	$2,%r9
3020	addq	%r13,%r15
3021	adcq	%r14,%r9
3022	addq	%r15,%r10
3023	adcq	%r9,%r11
3024	adcq	$0,%r12
3025
3026	leaq	16(%rdi),%rdi
3027L$seal_sse_tail_128_rounds_and_x1hash:
3028	paddd	%xmm4,%xmm0
3029	pxor	%xmm0,%xmm12
3030	pshufb	L$rol16(%rip),%xmm12
3031	paddd	%xmm12,%xmm8
3032	pxor	%xmm8,%xmm4
3033	movdqa	%xmm4,%xmm3
3034	pslld	$12,%xmm3
3035	psrld	$20,%xmm4
3036	pxor	%xmm3,%xmm4
3037	paddd	%xmm4,%xmm0
3038	pxor	%xmm0,%xmm12
3039	pshufb	L$rol8(%rip),%xmm12
3040	paddd	%xmm12,%xmm8
3041	pxor	%xmm8,%xmm4
3042	movdqa	%xmm4,%xmm3
3043	pslld	$7,%xmm3
3044	psrld	$25,%xmm4
3045	pxor	%xmm3,%xmm4
3046.byte	102,15,58,15,228,4
3047.byte	102,69,15,58,15,192,8
3048.byte	102,69,15,58,15,228,12
3049	paddd	%xmm5,%xmm1
3050	pxor	%xmm1,%xmm13
3051	pshufb	L$rol16(%rip),%xmm13
3052	paddd	%xmm13,%xmm9
3053	pxor	%xmm9,%xmm5
3054	movdqa	%xmm5,%xmm3
3055	pslld	$12,%xmm3
3056	psrld	$20,%xmm5
3057	pxor	%xmm3,%xmm5
3058	paddd	%xmm5,%xmm1
3059	pxor	%xmm1,%xmm13
3060	pshufb	L$rol8(%rip),%xmm13
3061	paddd	%xmm13,%xmm9
3062	pxor	%xmm9,%xmm5
3063	movdqa	%xmm5,%xmm3
3064	pslld	$7,%xmm3
3065	psrld	$25,%xmm5
3066	pxor	%xmm3,%xmm5
3067.byte	102,15,58,15,237,4
3068.byte	102,69,15,58,15,201,8
3069.byte	102,69,15,58,15,237,12
3070	addq	0+0(%rdi),%r10
3071	adcq	8+0(%rdi),%r11
3072	adcq	$1,%r12
3073	movq	0+0+0(%rbp),%rax
3074	movq	%rax,%r15
3075	mulq	%r10
3076	movq	%rax,%r13
3077	movq	%rdx,%r14
3078	movq	0+0+0(%rbp),%rax
3079	mulq	%r11
3080	imulq	%r12,%r15
3081	addq	%rax,%r14
3082	adcq	%rdx,%r15
3083	movq	8+0+0(%rbp),%rax
3084	movq	%rax,%r9
3085	mulq	%r10
3086	addq	%rax,%r14
3087	adcq	$0,%rdx
3088	movq	%rdx,%r10
3089	movq	8+0+0(%rbp),%rax
3090	mulq	%r11
3091	addq	%rax,%r15
3092	adcq	$0,%rdx
3093	imulq	%r12,%r9
3094	addq	%r10,%r15
3095	adcq	%rdx,%r9
3096	movq	%r13,%r10
3097	movq	%r14,%r11
3098	movq	%r15,%r12
3099	andq	$3,%r12
3100	movq	%r15,%r13
3101	andq	$-4,%r13
3102	movq	%r9,%r14
3103	shrdq	$2,%r9,%r15
3104	shrq	$2,%r9
3105	addq	%r13,%r15
3106	adcq	%r14,%r9
3107	addq	%r15,%r10
3108	adcq	%r9,%r11
3109	adcq	$0,%r12
3110	paddd	%xmm4,%xmm0
3111	pxor	%xmm0,%xmm12
3112	pshufb	L$rol16(%rip),%xmm12
3113	paddd	%xmm12,%xmm8
3114	pxor	%xmm8,%xmm4
3115	movdqa	%xmm4,%xmm3
3116	pslld	$12,%xmm3
3117	psrld	$20,%xmm4
3118	pxor	%xmm3,%xmm4
3119	paddd	%xmm4,%xmm0
3120	pxor	%xmm0,%xmm12
3121	pshufb	L$rol8(%rip),%xmm12
3122	paddd	%xmm12,%xmm8
3123	pxor	%xmm8,%xmm4
3124	movdqa	%xmm4,%xmm3
3125	pslld	$7,%xmm3
3126	psrld	$25,%xmm4
3127	pxor	%xmm3,%xmm4
3128.byte	102,15,58,15,228,12
3129.byte	102,69,15,58,15,192,8
3130.byte	102,69,15,58,15,228,4
3131	paddd	%xmm5,%xmm1
3132	pxor	%xmm1,%xmm13
3133	pshufb	L$rol16(%rip),%xmm13
3134	paddd	%xmm13,%xmm9
3135	pxor	%xmm9,%xmm5
3136	movdqa	%xmm5,%xmm3
3137	pslld	$12,%xmm3
3138	psrld	$20,%xmm5
3139	pxor	%xmm3,%xmm5
3140	paddd	%xmm5,%xmm1
3141	pxor	%xmm1,%xmm13
3142	pshufb	L$rol8(%rip),%xmm13
3143	paddd	%xmm13,%xmm9
3144	pxor	%xmm9,%xmm5
3145	movdqa	%xmm5,%xmm3
3146	pslld	$7,%xmm3
3147	psrld	$25,%xmm5
3148	pxor	%xmm3,%xmm5
3149.byte	102,15,58,15,237,12
3150.byte	102,69,15,58,15,201,8
3151.byte	102,69,15,58,15,237,4
3152
3153	leaq	16(%rdi),%rdi
3154	decq	%rcx
3155	jg	L$seal_sse_tail_128_rounds_and_x2hash
3156	decq	%r8
3157	jge	L$seal_sse_tail_128_rounds_and_x1hash
3158	paddd	L$chacha20_consts(%rip),%xmm1
3159	paddd	0+48(%rbp),%xmm5
3160	paddd	0+64(%rbp),%xmm9
3161	paddd	0+112(%rbp),%xmm13
3162	paddd	L$chacha20_consts(%rip),%xmm0
3163	paddd	0+48(%rbp),%xmm4
3164	paddd	0+64(%rbp),%xmm8
3165	paddd	0+96(%rbp),%xmm12
3166	movdqu	0 + 0(%rsi),%xmm3
3167	movdqu	16 + 0(%rsi),%xmm7
3168	movdqu	32 + 0(%rsi),%xmm11
3169	movdqu	48 + 0(%rsi),%xmm15
3170	pxor	%xmm3,%xmm1
3171	pxor	%xmm7,%xmm5
3172	pxor	%xmm11,%xmm9
3173	pxor	%xmm13,%xmm15
3174	movdqu	%xmm1,0 + 0(%rdi)
3175	movdqu	%xmm5,16 + 0(%rdi)
3176	movdqu	%xmm9,32 + 0(%rdi)
3177	movdqu	%xmm15,48 + 0(%rdi)
3178
3179	movq	$64,%rcx
3180	subq	$64,%rbx
3181	leaq	64(%rsi),%rsi
3182	jmp	L$seal_sse_128_tail_hash
3183
3184L$seal_sse_tail_192:
3185	movdqa	L$chacha20_consts(%rip),%xmm0
3186	movdqa	0+48(%rbp),%xmm4
3187	movdqa	0+64(%rbp),%xmm8
3188	movdqa	%xmm0,%xmm1
3189	movdqa	%xmm4,%xmm5
3190	movdqa	%xmm8,%xmm9
3191	movdqa	%xmm0,%xmm2
3192	movdqa	%xmm4,%xmm6
3193	movdqa	%xmm8,%xmm10
3194	movdqa	0+96(%rbp),%xmm14
3195	paddd	L$sse_inc(%rip),%xmm14
3196	movdqa	%xmm14,%xmm13
3197	paddd	L$sse_inc(%rip),%xmm13
3198	movdqa	%xmm13,%xmm12
3199	paddd	L$sse_inc(%rip),%xmm12
3200	movdqa	%xmm12,0+96(%rbp)
3201	movdqa	%xmm13,0+112(%rbp)
3202	movdqa	%xmm14,0+128(%rbp)
3203
3204L$seal_sse_tail_192_rounds_and_x2hash:
3205	addq	0+0(%rdi),%r10
3206	adcq	8+0(%rdi),%r11
3207	adcq	$1,%r12
3208	movq	0+0+0(%rbp),%rax
3209	movq	%rax,%r15
3210	mulq	%r10
3211	movq	%rax,%r13
3212	movq	%rdx,%r14
3213	movq	0+0+0(%rbp),%rax
3214	mulq	%r11
3215	imulq	%r12,%r15
3216	addq	%rax,%r14
3217	adcq	%rdx,%r15
3218	movq	8+0+0(%rbp),%rax
3219	movq	%rax,%r9
3220	mulq	%r10
3221	addq	%rax,%r14
3222	adcq	$0,%rdx
3223	movq	%rdx,%r10
3224	movq	8+0+0(%rbp),%rax
3225	mulq	%r11
3226	addq	%rax,%r15
3227	adcq	$0,%rdx
3228	imulq	%r12,%r9
3229	addq	%r10,%r15
3230	adcq	%rdx,%r9
3231	movq	%r13,%r10
3232	movq	%r14,%r11
3233	movq	%r15,%r12
3234	andq	$3,%r12
3235	movq	%r15,%r13
3236	andq	$-4,%r13
3237	movq	%r9,%r14
3238	shrdq	$2,%r9,%r15
3239	shrq	$2,%r9
3240	addq	%r13,%r15
3241	adcq	%r14,%r9
3242	addq	%r15,%r10
3243	adcq	%r9,%r11
3244	adcq	$0,%r12
3245
3246	leaq	16(%rdi),%rdi
3247L$seal_sse_tail_192_rounds_and_x1hash:
3248	paddd	%xmm4,%xmm0
3249	pxor	%xmm0,%xmm12
3250	pshufb	L$rol16(%rip),%xmm12
3251	paddd	%xmm12,%xmm8
3252	pxor	%xmm8,%xmm4
3253	movdqa	%xmm4,%xmm3
3254	pslld	$12,%xmm3
3255	psrld	$20,%xmm4
3256	pxor	%xmm3,%xmm4
3257	paddd	%xmm4,%xmm0
3258	pxor	%xmm0,%xmm12
3259	pshufb	L$rol8(%rip),%xmm12
3260	paddd	%xmm12,%xmm8
3261	pxor	%xmm8,%xmm4
3262	movdqa	%xmm4,%xmm3
3263	pslld	$7,%xmm3
3264	psrld	$25,%xmm4
3265	pxor	%xmm3,%xmm4
3266.byte	102,15,58,15,228,4
3267.byte	102,69,15,58,15,192,8
3268.byte	102,69,15,58,15,228,12
3269	paddd	%xmm5,%xmm1
3270	pxor	%xmm1,%xmm13
3271	pshufb	L$rol16(%rip),%xmm13
3272	paddd	%xmm13,%xmm9
3273	pxor	%xmm9,%xmm5
3274	movdqa	%xmm5,%xmm3
3275	pslld	$12,%xmm3
3276	psrld	$20,%xmm5
3277	pxor	%xmm3,%xmm5
3278	paddd	%xmm5,%xmm1
3279	pxor	%xmm1,%xmm13
3280	pshufb	L$rol8(%rip),%xmm13
3281	paddd	%xmm13,%xmm9
3282	pxor	%xmm9,%xmm5
3283	movdqa	%xmm5,%xmm3
3284	pslld	$7,%xmm3
3285	psrld	$25,%xmm5
3286	pxor	%xmm3,%xmm5
3287.byte	102,15,58,15,237,4
3288.byte	102,69,15,58,15,201,8
3289.byte	102,69,15,58,15,237,12
3290	paddd	%xmm6,%xmm2
3291	pxor	%xmm2,%xmm14
3292	pshufb	L$rol16(%rip),%xmm14
3293	paddd	%xmm14,%xmm10
3294	pxor	%xmm10,%xmm6
3295	movdqa	%xmm6,%xmm3
3296	pslld	$12,%xmm3
3297	psrld	$20,%xmm6
3298	pxor	%xmm3,%xmm6
3299	paddd	%xmm6,%xmm2
3300	pxor	%xmm2,%xmm14
3301	pshufb	L$rol8(%rip),%xmm14
3302	paddd	%xmm14,%xmm10
3303	pxor	%xmm10,%xmm6
3304	movdqa	%xmm6,%xmm3
3305	pslld	$7,%xmm3
3306	psrld	$25,%xmm6
3307	pxor	%xmm3,%xmm6
3308.byte	102,15,58,15,246,4
3309.byte	102,69,15,58,15,210,8
3310.byte	102,69,15,58,15,246,12
3311	addq	0+0(%rdi),%r10
3312	adcq	8+0(%rdi),%r11
3313	adcq	$1,%r12
3314	movq	0+0+0(%rbp),%rax
3315	movq	%rax,%r15
3316	mulq	%r10
3317	movq	%rax,%r13
3318	movq	%rdx,%r14
3319	movq	0+0+0(%rbp),%rax
3320	mulq	%r11
3321	imulq	%r12,%r15
3322	addq	%rax,%r14
3323	adcq	%rdx,%r15
3324	movq	8+0+0(%rbp),%rax
3325	movq	%rax,%r9
3326	mulq	%r10
3327	addq	%rax,%r14
3328	adcq	$0,%rdx
3329	movq	%rdx,%r10
3330	movq	8+0+0(%rbp),%rax
3331	mulq	%r11
3332	addq	%rax,%r15
3333	adcq	$0,%rdx
3334	imulq	%r12,%r9
3335	addq	%r10,%r15
3336	adcq	%rdx,%r9
3337	movq	%r13,%r10
3338	movq	%r14,%r11
3339	movq	%r15,%r12
3340	andq	$3,%r12
3341	movq	%r15,%r13
3342	andq	$-4,%r13
3343	movq	%r9,%r14
3344	shrdq	$2,%r9,%r15
3345	shrq	$2,%r9
3346	addq	%r13,%r15
3347	adcq	%r14,%r9
3348	addq	%r15,%r10
3349	adcq	%r9,%r11
3350	adcq	$0,%r12
3351	paddd	%xmm4,%xmm0
3352	pxor	%xmm0,%xmm12
3353	pshufb	L$rol16(%rip),%xmm12
3354	paddd	%xmm12,%xmm8
3355	pxor	%xmm8,%xmm4
3356	movdqa	%xmm4,%xmm3
3357	pslld	$12,%xmm3
3358	psrld	$20,%xmm4
3359	pxor	%xmm3,%xmm4
3360	paddd	%xmm4,%xmm0
3361	pxor	%xmm0,%xmm12
3362	pshufb	L$rol8(%rip),%xmm12
3363	paddd	%xmm12,%xmm8
3364	pxor	%xmm8,%xmm4
3365	movdqa	%xmm4,%xmm3
3366	pslld	$7,%xmm3
3367	psrld	$25,%xmm4
3368	pxor	%xmm3,%xmm4
3369.byte	102,15,58,15,228,12
3370.byte	102,69,15,58,15,192,8
3371.byte	102,69,15,58,15,228,4
3372	paddd	%xmm5,%xmm1
3373	pxor	%xmm1,%xmm13
3374	pshufb	L$rol16(%rip),%xmm13
3375	paddd	%xmm13,%xmm9
3376	pxor	%xmm9,%xmm5
3377	movdqa	%xmm5,%xmm3
3378	pslld	$12,%xmm3
3379	psrld	$20,%xmm5
3380	pxor	%xmm3,%xmm5
3381	paddd	%xmm5,%xmm1
3382	pxor	%xmm1,%xmm13
3383	pshufb	L$rol8(%rip),%xmm13
3384	paddd	%xmm13,%xmm9
3385	pxor	%xmm9,%xmm5
3386	movdqa	%xmm5,%xmm3
3387	pslld	$7,%xmm3
3388	psrld	$25,%xmm5
3389	pxor	%xmm3,%xmm5
3390.byte	102,15,58,15,237,12
3391.byte	102,69,15,58,15,201,8
3392.byte	102,69,15,58,15,237,4
3393	paddd	%xmm6,%xmm2
3394	pxor	%xmm2,%xmm14
3395	pshufb	L$rol16(%rip),%xmm14
3396	paddd	%xmm14,%xmm10
3397	pxor	%xmm10,%xmm6
3398	movdqa	%xmm6,%xmm3
3399	pslld	$12,%xmm3
3400	psrld	$20,%xmm6
3401	pxor	%xmm3,%xmm6
3402	paddd	%xmm6,%xmm2
3403	pxor	%xmm2,%xmm14
3404	pshufb	L$rol8(%rip),%xmm14
3405	paddd	%xmm14,%xmm10
3406	pxor	%xmm10,%xmm6
3407	movdqa	%xmm6,%xmm3
3408	pslld	$7,%xmm3
3409	psrld	$25,%xmm6
3410	pxor	%xmm3,%xmm6
3411.byte	102,15,58,15,246,12
3412.byte	102,69,15,58,15,210,8
3413.byte	102,69,15,58,15,246,4
3414
3415	leaq	16(%rdi),%rdi
3416	decq	%rcx
3417	jg	L$seal_sse_tail_192_rounds_and_x2hash
3418	decq	%r8
3419	jge	L$seal_sse_tail_192_rounds_and_x1hash
3420	paddd	L$chacha20_consts(%rip),%xmm2
3421	paddd	0+48(%rbp),%xmm6
3422	paddd	0+64(%rbp),%xmm10
3423	paddd	0+128(%rbp),%xmm14
3424	paddd	L$chacha20_consts(%rip),%xmm1
3425	paddd	0+48(%rbp),%xmm5
3426	paddd	0+64(%rbp),%xmm9
3427	paddd	0+112(%rbp),%xmm13
3428	paddd	L$chacha20_consts(%rip),%xmm0
3429	paddd	0+48(%rbp),%xmm4
3430	paddd	0+64(%rbp),%xmm8
3431	paddd	0+96(%rbp),%xmm12
3432	movdqu	0 + 0(%rsi),%xmm3
3433	movdqu	16 + 0(%rsi),%xmm7
3434	movdqu	32 + 0(%rsi),%xmm11
3435	movdqu	48 + 0(%rsi),%xmm15
3436	pxor	%xmm3,%xmm2
3437	pxor	%xmm7,%xmm6
3438	pxor	%xmm11,%xmm10
3439	pxor	%xmm14,%xmm15
3440	movdqu	%xmm2,0 + 0(%rdi)
3441	movdqu	%xmm6,16 + 0(%rdi)
3442	movdqu	%xmm10,32 + 0(%rdi)
3443	movdqu	%xmm15,48 + 0(%rdi)
3444	movdqu	0 + 64(%rsi),%xmm3
3445	movdqu	16 + 64(%rsi),%xmm7
3446	movdqu	32 + 64(%rsi),%xmm11
3447	movdqu	48 + 64(%rsi),%xmm15
3448	pxor	%xmm3,%xmm1
3449	pxor	%xmm7,%xmm5
3450	pxor	%xmm11,%xmm9
3451	pxor	%xmm13,%xmm15
3452	movdqu	%xmm1,0 + 64(%rdi)
3453	movdqu	%xmm5,16 + 64(%rdi)
3454	movdqu	%xmm9,32 + 64(%rdi)
3455	movdqu	%xmm15,48 + 64(%rdi)
3456
3457	movq	$128,%rcx
3458	subq	$128,%rbx
3459	leaq	128(%rsi),%rsi
3460
3461L$seal_sse_128_tail_hash:
3462	cmpq	$16,%rcx
3463	jb	L$seal_sse_128_tail_xor
3464	addq	0+0(%rdi),%r10
3465	adcq	8+0(%rdi),%r11
3466	adcq	$1,%r12
3467	movq	0+0+0(%rbp),%rax
3468	movq	%rax,%r15
3469	mulq	%r10
3470	movq	%rax,%r13
3471	movq	%rdx,%r14
3472	movq	0+0+0(%rbp),%rax
3473	mulq	%r11
3474	imulq	%r12,%r15
3475	addq	%rax,%r14
3476	adcq	%rdx,%r15
3477	movq	8+0+0(%rbp),%rax
3478	movq	%rax,%r9
3479	mulq	%r10
3480	addq	%rax,%r14
3481	adcq	$0,%rdx
3482	movq	%rdx,%r10
3483	movq	8+0+0(%rbp),%rax
3484	mulq	%r11
3485	addq	%rax,%r15
3486	adcq	$0,%rdx
3487	imulq	%r12,%r9
3488	addq	%r10,%r15
3489	adcq	%rdx,%r9
3490	movq	%r13,%r10
3491	movq	%r14,%r11
3492	movq	%r15,%r12
3493	andq	$3,%r12
3494	movq	%r15,%r13
3495	andq	$-4,%r13
3496	movq	%r9,%r14
3497	shrdq	$2,%r9,%r15
3498	shrq	$2,%r9
3499	addq	%r13,%r15
3500	adcq	%r14,%r9
3501	addq	%r15,%r10
3502	adcq	%r9,%r11
3503	adcq	$0,%r12
3504
3505	subq	$16,%rcx
3506	leaq	16(%rdi),%rdi
3507	jmp	L$seal_sse_128_tail_hash
3508
3509L$seal_sse_128_tail_xor:
3510	cmpq	$16,%rbx
3511	jb	L$seal_sse_tail_16
3512	subq	$16,%rbx
3513
3514	movdqu	0(%rsi),%xmm3
3515	pxor	%xmm3,%xmm0
3516	movdqu	%xmm0,0(%rdi)
3517
3518	addq	0(%rdi),%r10
3519	adcq	8(%rdi),%r11
3520	adcq	$1,%r12
3521	leaq	16(%rsi),%rsi
3522	leaq	16(%rdi),%rdi
3523	movq	0+0+0(%rbp),%rax
3524	movq	%rax,%r15
3525	mulq	%r10
3526	movq	%rax,%r13
3527	movq	%rdx,%r14
3528	movq	0+0+0(%rbp),%rax
3529	mulq	%r11
3530	imulq	%r12,%r15
3531	addq	%rax,%r14
3532	adcq	%rdx,%r15
3533	movq	8+0+0(%rbp),%rax
3534	movq	%rax,%r9
3535	mulq	%r10
3536	addq	%rax,%r14
3537	adcq	$0,%rdx
3538	movq	%rdx,%r10
3539	movq	8+0+0(%rbp),%rax
3540	mulq	%r11
3541	addq	%rax,%r15
3542	adcq	$0,%rdx
3543	imulq	%r12,%r9
3544	addq	%r10,%r15
3545	adcq	%rdx,%r9
3546	movq	%r13,%r10
3547	movq	%r14,%r11
3548	movq	%r15,%r12
3549	andq	$3,%r12
3550	movq	%r15,%r13
3551	andq	$-4,%r13
3552	movq	%r9,%r14
3553	shrdq	$2,%r9,%r15
3554	shrq	$2,%r9
3555	addq	%r13,%r15
3556	adcq	%r14,%r9
3557	addq	%r15,%r10
3558	adcq	%r9,%r11
3559	adcq	$0,%r12
3560
3561
3562	movdqa	%xmm4,%xmm0
3563	movdqa	%xmm8,%xmm4
3564	movdqa	%xmm12,%xmm8
3565	movdqa	%xmm1,%xmm12
3566	movdqa	%xmm5,%xmm1
3567	movdqa	%xmm9,%xmm5
3568	movdqa	%xmm13,%xmm9
3569	jmp	L$seal_sse_128_tail_xor
3570
3571L$seal_sse_tail_16:
3572	testq	%rbx,%rbx
3573	jz	L$process_blocks_of_extra_in
3574
3575	movq	%rbx,%r8
3576	movq	%rbx,%rcx
3577	leaq	-1(%rsi,%rbx,1),%rsi
3578	pxor	%xmm15,%xmm15
3579L$seal_sse_tail_16_compose:
3580	pslldq	$1,%xmm15
3581	pinsrb	$0,(%rsi),%xmm15
3582	leaq	-1(%rsi),%rsi
3583	decq	%rcx
3584	jne	L$seal_sse_tail_16_compose
3585
3586
3587	pxor	%xmm0,%xmm15
3588
3589
3590	movq	%rbx,%rcx
3591	movdqu	%xmm15,%xmm0
3592L$seal_sse_tail_16_extract:
3593	pextrb	$0,%xmm0,(%rdi)
3594	psrldq	$1,%xmm0
3595	addq	$1,%rdi
3596	subq	$1,%rcx
3597	jnz	L$seal_sse_tail_16_extract
3598
3599
3600
3601
3602
3603
3604
3605
3606	movq	288 + 0 + 32(%rsp),%r9
3607	movq	56(%r9),%r14
3608	movq	48(%r9),%r13
3609	testq	%r14,%r14
3610	jz	L$process_partial_block
3611
3612	movq	$16,%r15
3613	subq	%rbx,%r15
3614	cmpq	%r15,%r14
3615
3616	jge	L$load_extra_in
3617	movq	%r14,%r15
3618
3619L$load_extra_in:
3620
3621
3622	leaq	-1(%r13,%r15,1),%rsi
3623
3624
3625	addq	%r15,%r13
3626	subq	%r15,%r14
3627	movq	%r13,48(%r9)
3628	movq	%r14,56(%r9)
3629
3630
3631
3632	addq	%r15,%r8
3633
3634
3635	pxor	%xmm11,%xmm11
3636L$load_extra_load_loop:
3637	pslldq	$1,%xmm11
3638	pinsrb	$0,(%rsi),%xmm11
3639	leaq	-1(%rsi),%rsi
3640	subq	$1,%r15
3641	jnz	L$load_extra_load_loop
3642
3643
3644
3645
3646	movq	%rbx,%r15
3647
3648L$load_extra_shift_loop:
3649	pslldq	$1,%xmm11
3650	subq	$1,%r15
3651	jnz	L$load_extra_shift_loop
3652
3653
3654
3655
3656	leaq	L$and_masks(%rip),%r15
3657	shlq	$4,%rbx
3658	pand	-16(%r15,%rbx,1),%xmm15
3659
3660
3661	por	%xmm11,%xmm15
3662
3663
3664
3665.byte	102,77,15,126,253
3666	pextrq	$1,%xmm15,%r14
3667	addq	%r13,%r10
3668	adcq	%r14,%r11
3669	adcq	$1,%r12
3670	movq	0+0+0(%rbp),%rax
3671	movq	%rax,%r15
3672	mulq	%r10
3673	movq	%rax,%r13
3674	movq	%rdx,%r14
3675	movq	0+0+0(%rbp),%rax
3676	mulq	%r11
3677	imulq	%r12,%r15
3678	addq	%rax,%r14
3679	adcq	%rdx,%r15
3680	movq	8+0+0(%rbp),%rax
3681	movq	%rax,%r9
3682	mulq	%r10
3683	addq	%rax,%r14
3684	adcq	$0,%rdx
3685	movq	%rdx,%r10
3686	movq	8+0+0(%rbp),%rax
3687	mulq	%r11
3688	addq	%rax,%r15
3689	adcq	$0,%rdx
3690	imulq	%r12,%r9
3691	addq	%r10,%r15
3692	adcq	%rdx,%r9
3693	movq	%r13,%r10
3694	movq	%r14,%r11
3695	movq	%r15,%r12
3696	andq	$3,%r12
3697	movq	%r15,%r13
3698	andq	$-4,%r13
3699	movq	%r9,%r14
3700	shrdq	$2,%r9,%r15
3701	shrq	$2,%r9
3702	addq	%r13,%r15
3703	adcq	%r14,%r9
3704	addq	%r15,%r10
3705	adcq	%r9,%r11
3706	adcq	$0,%r12
3707
3708
3709L$process_blocks_of_extra_in:
3710
3711	movq	288+32+0 (%rsp),%r9
3712	movq	48(%r9),%rsi
3713	movq	56(%r9),%r8
3714	movq	%r8,%rcx
3715	shrq	$4,%r8
3716
3717L$process_extra_hash_loop:
3718	jz	process_extra_in_trailer
3719	addq	0+0(%rsi),%r10
3720	adcq	8+0(%rsi),%r11
3721	adcq	$1,%r12
3722	movq	0+0+0(%rbp),%rax
3723	movq	%rax,%r15
3724	mulq	%r10
3725	movq	%rax,%r13
3726	movq	%rdx,%r14
3727	movq	0+0+0(%rbp),%rax
3728	mulq	%r11
3729	imulq	%r12,%r15
3730	addq	%rax,%r14
3731	adcq	%rdx,%r15
3732	movq	8+0+0(%rbp),%rax
3733	movq	%rax,%r9
3734	mulq	%r10
3735	addq	%rax,%r14
3736	adcq	$0,%rdx
3737	movq	%rdx,%r10
3738	movq	8+0+0(%rbp),%rax
3739	mulq	%r11
3740	addq	%rax,%r15
3741	adcq	$0,%rdx
3742	imulq	%r12,%r9
3743	addq	%r10,%r15
3744	adcq	%rdx,%r9
3745	movq	%r13,%r10
3746	movq	%r14,%r11
3747	movq	%r15,%r12
3748	andq	$3,%r12
3749	movq	%r15,%r13
3750	andq	$-4,%r13
3751	movq	%r9,%r14
3752	shrdq	$2,%r9,%r15
3753	shrq	$2,%r9
3754	addq	%r13,%r15
3755	adcq	%r14,%r9
3756	addq	%r15,%r10
3757	adcq	%r9,%r11
3758	adcq	$0,%r12
3759
3760	leaq	16(%rsi),%rsi
3761	subq	$1,%r8
3762	jmp	L$process_extra_hash_loop
3763process_extra_in_trailer:
3764	andq	$15,%rcx
3765	movq	%rcx,%rbx
3766	jz	L$do_length_block
3767	leaq	-1(%rsi,%rcx,1),%rsi
3768
3769L$process_extra_in_trailer_load:
3770	pslldq	$1,%xmm15
3771	pinsrb	$0,(%rsi),%xmm15
3772	leaq	-1(%rsi),%rsi
3773	subq	$1,%rcx
3774	jnz	L$process_extra_in_trailer_load
3775
3776L$process_partial_block:
3777
3778	leaq	L$and_masks(%rip),%r15
3779	shlq	$4,%rbx
3780	pand	-16(%r15,%rbx,1),%xmm15
3781.byte	102,77,15,126,253
3782	pextrq	$1,%xmm15,%r14
3783	addq	%r13,%r10
3784	adcq	%r14,%r11
3785	adcq	$1,%r12
3786	movq	0+0+0(%rbp),%rax
3787	movq	%rax,%r15
3788	mulq	%r10
3789	movq	%rax,%r13
3790	movq	%rdx,%r14
3791	movq	0+0+0(%rbp),%rax
3792	mulq	%r11
3793	imulq	%r12,%r15
3794	addq	%rax,%r14
3795	adcq	%rdx,%r15
3796	movq	8+0+0(%rbp),%rax
3797	movq	%rax,%r9
3798	mulq	%r10
3799	addq	%rax,%r14
3800	adcq	$0,%rdx
3801	movq	%rdx,%r10
3802	movq	8+0+0(%rbp),%rax
3803	mulq	%r11
3804	addq	%rax,%r15
3805	adcq	$0,%rdx
3806	imulq	%r12,%r9
3807	addq	%r10,%r15
3808	adcq	%rdx,%r9
3809	movq	%r13,%r10
3810	movq	%r14,%r11
3811	movq	%r15,%r12
3812	andq	$3,%r12
3813	movq	%r15,%r13
3814	andq	$-4,%r13
3815	movq	%r9,%r14
3816	shrdq	$2,%r9,%r15
3817	shrq	$2,%r9
3818	addq	%r13,%r15
3819	adcq	%r14,%r9
3820	addq	%r15,%r10
3821	adcq	%r9,%r11
3822	adcq	$0,%r12
3823
3824
3825L$do_length_block:
3826	addq	0+0+32(%rbp),%r10
3827	adcq	8+0+32(%rbp),%r11
3828	adcq	$1,%r12
3829	movq	0+0+0(%rbp),%rax
3830	movq	%rax,%r15
3831	mulq	%r10
3832	movq	%rax,%r13
3833	movq	%rdx,%r14
3834	movq	0+0+0(%rbp),%rax
3835	mulq	%r11
3836	imulq	%r12,%r15
3837	addq	%rax,%r14
3838	adcq	%rdx,%r15
3839	movq	8+0+0(%rbp),%rax
3840	movq	%rax,%r9
3841	mulq	%r10
3842	addq	%rax,%r14
3843	adcq	$0,%rdx
3844	movq	%rdx,%r10
3845	movq	8+0+0(%rbp),%rax
3846	mulq	%r11
3847	addq	%rax,%r15
3848	adcq	$0,%rdx
3849	imulq	%r12,%r9
3850	addq	%r10,%r15
3851	adcq	%rdx,%r9
3852	movq	%r13,%r10
3853	movq	%r14,%r11
3854	movq	%r15,%r12
3855	andq	$3,%r12
3856	movq	%r15,%r13
3857	andq	$-4,%r13
3858	movq	%r9,%r14
3859	shrdq	$2,%r9,%r15
3860	shrq	$2,%r9
3861	addq	%r13,%r15
3862	adcq	%r14,%r9
3863	addq	%r15,%r10
3864	adcq	%r9,%r11
3865	adcq	$0,%r12
3866
3867
3868	movq	%r10,%r13
3869	movq	%r11,%r14
3870	movq	%r12,%r15
3871	subq	$-5,%r10
3872	sbbq	$-1,%r11
3873	sbbq	$3,%r12
3874	cmovcq	%r13,%r10
3875	cmovcq	%r14,%r11
3876	cmovcq	%r15,%r12
3877
3878	addq	0+0+16(%rbp),%r10
3879	adcq	8+0+16(%rbp),%r11
3880
3881
3882	addq	$288 + 0 + 32,%rsp
3883
3884
3885	popq	%r9
3886
3887	movq	%r10,(%r9)
3888	movq	%r11,8(%r9)
3889	popq	%r15
3890
3891	popq	%r14
3892
3893	popq	%r13
3894
3895	popq	%r12
3896
3897	popq	%rbx
3898
3899	popq	%rbp
3900
3901	.byte	0xf3,0xc3
3902
3903L$seal_sse_128:
3904
3905	movdqu	L$chacha20_consts(%rip),%xmm0
3906	movdqa	%xmm0,%xmm1
3907	movdqa	%xmm0,%xmm2
3908	movdqu	0(%r9),%xmm4
3909	movdqa	%xmm4,%xmm5
3910	movdqa	%xmm4,%xmm6
3911	movdqu	16(%r9),%xmm8
3912	movdqa	%xmm8,%xmm9
3913	movdqa	%xmm8,%xmm10
3914	movdqu	32(%r9),%xmm14
3915	movdqa	%xmm14,%xmm12
3916	paddd	L$sse_inc(%rip),%xmm12
3917	movdqa	%xmm12,%xmm13
3918	paddd	L$sse_inc(%rip),%xmm13
3919	movdqa	%xmm4,%xmm7
3920	movdqa	%xmm8,%xmm11
3921	movdqa	%xmm12,%xmm15
3922	movq	$10,%r10
3923
3924L$seal_sse_128_rounds:
3925	paddd	%xmm4,%xmm0
3926	pxor	%xmm0,%xmm12
3927	pshufb	L$rol16(%rip),%xmm12
3928	paddd	%xmm12,%xmm8
3929	pxor	%xmm8,%xmm4
3930	movdqa	%xmm4,%xmm3
3931	pslld	$12,%xmm3
3932	psrld	$20,%xmm4
3933	pxor	%xmm3,%xmm4
3934	paddd	%xmm4,%xmm0
3935	pxor	%xmm0,%xmm12
3936	pshufb	L$rol8(%rip),%xmm12
3937	paddd	%xmm12,%xmm8
3938	pxor	%xmm8,%xmm4
3939	movdqa	%xmm4,%xmm3
3940	pslld	$7,%xmm3
3941	psrld	$25,%xmm4
3942	pxor	%xmm3,%xmm4
3943.byte	102,15,58,15,228,4
3944.byte	102,69,15,58,15,192,8
3945.byte	102,69,15,58,15,228,12
3946	paddd	%xmm5,%xmm1
3947	pxor	%xmm1,%xmm13
3948	pshufb	L$rol16(%rip),%xmm13
3949	paddd	%xmm13,%xmm9
3950	pxor	%xmm9,%xmm5
3951	movdqa	%xmm5,%xmm3
3952	pslld	$12,%xmm3
3953	psrld	$20,%xmm5
3954	pxor	%xmm3,%xmm5
3955	paddd	%xmm5,%xmm1
3956	pxor	%xmm1,%xmm13
3957	pshufb	L$rol8(%rip),%xmm13
3958	paddd	%xmm13,%xmm9
3959	pxor	%xmm9,%xmm5
3960	movdqa	%xmm5,%xmm3
3961	pslld	$7,%xmm3
3962	psrld	$25,%xmm5
3963	pxor	%xmm3,%xmm5
3964.byte	102,15,58,15,237,4
3965.byte	102,69,15,58,15,201,8
3966.byte	102,69,15,58,15,237,12
3967	paddd	%xmm6,%xmm2
3968	pxor	%xmm2,%xmm14
3969	pshufb	L$rol16(%rip),%xmm14
3970	paddd	%xmm14,%xmm10
3971	pxor	%xmm10,%xmm6
3972	movdqa	%xmm6,%xmm3
3973	pslld	$12,%xmm3
3974	psrld	$20,%xmm6
3975	pxor	%xmm3,%xmm6
3976	paddd	%xmm6,%xmm2
3977	pxor	%xmm2,%xmm14
3978	pshufb	L$rol8(%rip),%xmm14
3979	paddd	%xmm14,%xmm10
3980	pxor	%xmm10,%xmm6
3981	movdqa	%xmm6,%xmm3
3982	pslld	$7,%xmm3
3983	psrld	$25,%xmm6
3984	pxor	%xmm3,%xmm6
3985.byte	102,15,58,15,246,4
3986.byte	102,69,15,58,15,210,8
3987.byte	102,69,15,58,15,246,12
3988	paddd	%xmm4,%xmm0
3989	pxor	%xmm0,%xmm12
3990	pshufb	L$rol16(%rip),%xmm12
3991	paddd	%xmm12,%xmm8
3992	pxor	%xmm8,%xmm4
3993	movdqa	%xmm4,%xmm3
3994	pslld	$12,%xmm3
3995	psrld	$20,%xmm4
3996	pxor	%xmm3,%xmm4
3997	paddd	%xmm4,%xmm0
3998	pxor	%xmm0,%xmm12
3999	pshufb	L$rol8(%rip),%xmm12
4000	paddd	%xmm12,%xmm8
4001	pxor	%xmm8,%xmm4
4002	movdqa	%xmm4,%xmm3
4003	pslld	$7,%xmm3
4004	psrld	$25,%xmm4
4005	pxor	%xmm3,%xmm4
4006.byte	102,15,58,15,228,12
4007.byte	102,69,15,58,15,192,8
4008.byte	102,69,15,58,15,228,4
4009	paddd	%xmm5,%xmm1
4010	pxor	%xmm1,%xmm13
4011	pshufb	L$rol16(%rip),%xmm13
4012	paddd	%xmm13,%xmm9
4013	pxor	%xmm9,%xmm5
4014	movdqa	%xmm5,%xmm3
4015	pslld	$12,%xmm3
4016	psrld	$20,%xmm5
4017	pxor	%xmm3,%xmm5
4018	paddd	%xmm5,%xmm1
4019	pxor	%xmm1,%xmm13
4020	pshufb	L$rol8(%rip),%xmm13
4021	paddd	%xmm13,%xmm9
4022	pxor	%xmm9,%xmm5
4023	movdqa	%xmm5,%xmm3
4024	pslld	$7,%xmm3
4025	psrld	$25,%xmm5
4026	pxor	%xmm3,%xmm5
4027.byte	102,15,58,15,237,12
4028.byte	102,69,15,58,15,201,8
4029.byte	102,69,15,58,15,237,4
4030	paddd	%xmm6,%xmm2
4031	pxor	%xmm2,%xmm14
4032	pshufb	L$rol16(%rip),%xmm14
4033	paddd	%xmm14,%xmm10
4034	pxor	%xmm10,%xmm6
4035	movdqa	%xmm6,%xmm3
4036	pslld	$12,%xmm3
4037	psrld	$20,%xmm6
4038	pxor	%xmm3,%xmm6
4039	paddd	%xmm6,%xmm2
4040	pxor	%xmm2,%xmm14
4041	pshufb	L$rol8(%rip),%xmm14
4042	paddd	%xmm14,%xmm10
4043	pxor	%xmm10,%xmm6
4044	movdqa	%xmm6,%xmm3
4045	pslld	$7,%xmm3
4046	psrld	$25,%xmm6
4047	pxor	%xmm3,%xmm6
4048.byte	102,15,58,15,246,12
4049.byte	102,69,15,58,15,210,8
4050.byte	102,69,15,58,15,246,4
4051
4052	decq	%r10
4053	jnz	L$seal_sse_128_rounds
4054	paddd	L$chacha20_consts(%rip),%xmm0
4055	paddd	L$chacha20_consts(%rip),%xmm1
4056	paddd	L$chacha20_consts(%rip),%xmm2
4057	paddd	%xmm7,%xmm4
4058	paddd	%xmm7,%xmm5
4059	paddd	%xmm7,%xmm6
4060	paddd	%xmm11,%xmm8
4061	paddd	%xmm11,%xmm9
4062	paddd	%xmm15,%xmm12
4063	paddd	L$sse_inc(%rip),%xmm15
4064	paddd	%xmm15,%xmm13
4065
4066	pand	L$clamp(%rip),%xmm2
4067	movdqa	%xmm2,0+0(%rbp)
4068	movdqa	%xmm6,0+16(%rbp)
4069
4070	movq	%r8,%r8
4071	call	poly_hash_ad_internal
4072	jmp	L$seal_sse_128_tail_xor
4073
4074
4075
4076
4077
4078.p2align	6
4079chacha20_poly1305_open_avx2:
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092	vzeroupper
4093	vmovdqa	L$chacha20_consts(%rip),%ymm0
4094	vbroadcasti128	0(%r9),%ymm4
4095	vbroadcasti128	16(%r9),%ymm8
4096	vbroadcasti128	32(%r9),%ymm12
4097	vpaddd	L$avx2_init(%rip),%ymm12,%ymm12
4098	cmpq	$192,%rbx
4099	jbe	L$open_avx2_192
4100	cmpq	$320,%rbx
4101	jbe	L$open_avx2_320
4102
4103	vmovdqa	%ymm4,0+64(%rbp)
4104	vmovdqa	%ymm8,0+96(%rbp)
4105	vmovdqa	%ymm12,0+160(%rbp)
4106	movq	$10,%r10
4107L$open_avx2_init_rounds:
4108	vpaddd	%ymm4,%ymm0,%ymm0
4109	vpxor	%ymm0,%ymm12,%ymm12
4110	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4111	vpaddd	%ymm12,%ymm8,%ymm8
4112	vpxor	%ymm8,%ymm4,%ymm4
4113	vpsrld	$20,%ymm4,%ymm3
4114	vpslld	$12,%ymm4,%ymm4
4115	vpxor	%ymm3,%ymm4,%ymm4
4116	vpaddd	%ymm4,%ymm0,%ymm0
4117	vpxor	%ymm0,%ymm12,%ymm12
4118	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4119	vpaddd	%ymm12,%ymm8,%ymm8
4120	vpxor	%ymm8,%ymm4,%ymm4
4121	vpslld	$7,%ymm4,%ymm3
4122	vpsrld	$25,%ymm4,%ymm4
4123	vpxor	%ymm3,%ymm4,%ymm4
4124	vpalignr	$12,%ymm12,%ymm12,%ymm12
4125	vpalignr	$8,%ymm8,%ymm8,%ymm8
4126	vpalignr	$4,%ymm4,%ymm4,%ymm4
4127	vpaddd	%ymm4,%ymm0,%ymm0
4128	vpxor	%ymm0,%ymm12,%ymm12
4129	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4130	vpaddd	%ymm12,%ymm8,%ymm8
4131	vpxor	%ymm8,%ymm4,%ymm4
4132	vpsrld	$20,%ymm4,%ymm3
4133	vpslld	$12,%ymm4,%ymm4
4134	vpxor	%ymm3,%ymm4,%ymm4
4135	vpaddd	%ymm4,%ymm0,%ymm0
4136	vpxor	%ymm0,%ymm12,%ymm12
4137	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4138	vpaddd	%ymm12,%ymm8,%ymm8
4139	vpxor	%ymm8,%ymm4,%ymm4
4140	vpslld	$7,%ymm4,%ymm3
4141	vpsrld	$25,%ymm4,%ymm4
4142	vpxor	%ymm3,%ymm4,%ymm4
4143	vpalignr	$4,%ymm12,%ymm12,%ymm12
4144	vpalignr	$8,%ymm8,%ymm8,%ymm8
4145	vpalignr	$12,%ymm4,%ymm4,%ymm4
4146
4147	decq	%r10
4148	jne	L$open_avx2_init_rounds
4149	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
4150	vpaddd	0+64(%rbp),%ymm4,%ymm4
4151	vpaddd	0+96(%rbp),%ymm8,%ymm8
4152	vpaddd	0+160(%rbp),%ymm12,%ymm12
4153
4154	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
4155
4156	vpand	L$clamp(%rip),%ymm3,%ymm3
4157	vmovdqa	%ymm3,0+0(%rbp)
4158
4159	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
4160	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
4161
4162	movq	%r8,%r8
4163	call	poly_hash_ad_internal
4164
4165	xorq	%rcx,%rcx
4166L$open_avx2_init_hash:
4167	addq	0+0(%rsi,%rcx,1),%r10
4168	adcq	8+0(%rsi,%rcx,1),%r11
4169	adcq	$1,%r12
4170	movq	0+0+0(%rbp),%rax
4171	movq	%rax,%r15
4172	mulq	%r10
4173	movq	%rax,%r13
4174	movq	%rdx,%r14
4175	movq	0+0+0(%rbp),%rax
4176	mulq	%r11
4177	imulq	%r12,%r15
4178	addq	%rax,%r14
4179	adcq	%rdx,%r15
4180	movq	8+0+0(%rbp),%rax
4181	movq	%rax,%r9
4182	mulq	%r10
4183	addq	%rax,%r14
4184	adcq	$0,%rdx
4185	movq	%rdx,%r10
4186	movq	8+0+0(%rbp),%rax
4187	mulq	%r11
4188	addq	%rax,%r15
4189	adcq	$0,%rdx
4190	imulq	%r12,%r9
4191	addq	%r10,%r15
4192	adcq	%rdx,%r9
4193	movq	%r13,%r10
4194	movq	%r14,%r11
4195	movq	%r15,%r12
4196	andq	$3,%r12
4197	movq	%r15,%r13
4198	andq	$-4,%r13
4199	movq	%r9,%r14
4200	shrdq	$2,%r9,%r15
4201	shrq	$2,%r9
4202	addq	%r13,%r15
4203	adcq	%r14,%r9
4204	addq	%r15,%r10
4205	adcq	%r9,%r11
4206	adcq	$0,%r12
4207
4208	addq	$16,%rcx
4209	cmpq	$64,%rcx
4210	jne	L$open_avx2_init_hash
4211
4212	vpxor	0(%rsi),%ymm0,%ymm0
4213	vpxor	32(%rsi),%ymm4,%ymm4
4214
4215	vmovdqu	%ymm0,0(%rdi)
4216	vmovdqu	%ymm4,32(%rdi)
4217	leaq	64(%rsi),%rsi
4218	leaq	64(%rdi),%rdi
4219	subq	$64,%rbx
4220L$open_avx2_main_loop:
4221
4222	cmpq	$512,%rbx
4223	jb	L$open_avx2_main_loop_done
4224	vmovdqa	L$chacha20_consts(%rip),%ymm0
4225	vmovdqa	0+64(%rbp),%ymm4
4226	vmovdqa	0+96(%rbp),%ymm8
4227	vmovdqa	%ymm0,%ymm1
4228	vmovdqa	%ymm4,%ymm5
4229	vmovdqa	%ymm8,%ymm9
4230	vmovdqa	%ymm0,%ymm2
4231	vmovdqa	%ymm4,%ymm6
4232	vmovdqa	%ymm8,%ymm10
4233	vmovdqa	%ymm0,%ymm3
4234	vmovdqa	%ymm4,%ymm7
4235	vmovdqa	%ymm8,%ymm11
4236	vmovdqa	L$avx2_inc(%rip),%ymm12
4237	vpaddd	0+160(%rbp),%ymm12,%ymm15
4238	vpaddd	%ymm15,%ymm12,%ymm14
4239	vpaddd	%ymm14,%ymm12,%ymm13
4240	vpaddd	%ymm13,%ymm12,%ymm12
4241	vmovdqa	%ymm15,0+256(%rbp)
4242	vmovdqa	%ymm14,0+224(%rbp)
4243	vmovdqa	%ymm13,0+192(%rbp)
4244	vmovdqa	%ymm12,0+160(%rbp)
4245
4246	xorq	%rcx,%rcx
4247L$open_avx2_main_loop_rounds:
4248	addq	0+0(%rsi,%rcx,1),%r10
4249	adcq	8+0(%rsi,%rcx,1),%r11
4250	adcq	$1,%r12
4251	vmovdqa	%ymm8,0+128(%rbp)
4252	vmovdqa	L$rol16(%rip),%ymm8
4253	vpaddd	%ymm7,%ymm3,%ymm3
4254	vpaddd	%ymm6,%ymm2,%ymm2
4255	vpaddd	%ymm5,%ymm1,%ymm1
4256	vpaddd	%ymm4,%ymm0,%ymm0
4257	vpxor	%ymm3,%ymm15,%ymm15
4258	vpxor	%ymm2,%ymm14,%ymm14
4259	vpxor	%ymm1,%ymm13,%ymm13
4260	vpxor	%ymm0,%ymm12,%ymm12
4261	movq	0+0+0(%rbp),%rdx
4262	movq	%rdx,%r15
4263	mulxq	%r10,%r13,%r14
4264	mulxq	%r11,%rax,%rdx
4265	imulq	%r12,%r15
4266	addq	%rax,%r14
4267	adcq	%rdx,%r15
4268	vpshufb	%ymm8,%ymm15,%ymm15
4269	vpshufb	%ymm8,%ymm14,%ymm14
4270	vpshufb	%ymm8,%ymm13,%ymm13
4271	vpshufb	%ymm8,%ymm12,%ymm12
4272	vpaddd	%ymm15,%ymm11,%ymm11
4273	vpaddd	%ymm14,%ymm10,%ymm10
4274	vpaddd	%ymm13,%ymm9,%ymm9
4275	vpaddd	0+128(%rbp),%ymm12,%ymm8
4276	vpxor	%ymm11,%ymm7,%ymm7
4277	movq	8+0+0(%rbp),%rdx
4278	mulxq	%r10,%r10,%rax
4279	addq	%r10,%r14
4280	mulxq	%r11,%r11,%r9
4281	adcq	%r11,%r15
4282	adcq	$0,%r9
4283	imulq	%r12,%rdx
4284	vpxor	%ymm10,%ymm6,%ymm6
4285	vpxor	%ymm9,%ymm5,%ymm5
4286	vpxor	%ymm8,%ymm4,%ymm4
4287	vmovdqa	%ymm8,0+128(%rbp)
4288	vpsrld	$20,%ymm7,%ymm8
4289	vpslld	$32-20,%ymm7,%ymm7
4290	vpxor	%ymm8,%ymm7,%ymm7
4291	vpsrld	$20,%ymm6,%ymm8
4292	vpslld	$32-20,%ymm6,%ymm6
4293	vpxor	%ymm8,%ymm6,%ymm6
4294	vpsrld	$20,%ymm5,%ymm8
4295	vpslld	$32-20,%ymm5,%ymm5
4296	addq	%rax,%r15
4297	adcq	%rdx,%r9
4298	vpxor	%ymm8,%ymm5,%ymm5
4299	vpsrld	$20,%ymm4,%ymm8
4300	vpslld	$32-20,%ymm4,%ymm4
4301	vpxor	%ymm8,%ymm4,%ymm4
4302	vmovdqa	L$rol8(%rip),%ymm8
4303	vpaddd	%ymm7,%ymm3,%ymm3
4304	vpaddd	%ymm6,%ymm2,%ymm2
4305	vpaddd	%ymm5,%ymm1,%ymm1
4306	vpaddd	%ymm4,%ymm0,%ymm0
4307	vpxor	%ymm3,%ymm15,%ymm15
4308	movq	%r13,%r10
4309	movq	%r14,%r11
4310	movq	%r15,%r12
4311	andq	$3,%r12
4312	movq	%r15,%r13
4313	andq	$-4,%r13
4314	movq	%r9,%r14
4315	shrdq	$2,%r9,%r15
4316	shrq	$2,%r9
4317	addq	%r13,%r15
4318	adcq	%r14,%r9
4319	addq	%r15,%r10
4320	adcq	%r9,%r11
4321	adcq	$0,%r12
4322	vpxor	%ymm2,%ymm14,%ymm14
4323	vpxor	%ymm1,%ymm13,%ymm13
4324	vpxor	%ymm0,%ymm12,%ymm12
4325	vpshufb	%ymm8,%ymm15,%ymm15
4326	vpshufb	%ymm8,%ymm14,%ymm14
4327	vpshufb	%ymm8,%ymm13,%ymm13
4328	vpshufb	%ymm8,%ymm12,%ymm12
4329	vpaddd	%ymm15,%ymm11,%ymm11
4330	vpaddd	%ymm14,%ymm10,%ymm10
4331	addq	0+16(%rsi,%rcx,1),%r10
4332	adcq	8+16(%rsi,%rcx,1),%r11
4333	adcq	$1,%r12
4334	vpaddd	%ymm13,%ymm9,%ymm9
4335	vpaddd	0+128(%rbp),%ymm12,%ymm8
4336	vpxor	%ymm11,%ymm7,%ymm7
4337	vpxor	%ymm10,%ymm6,%ymm6
4338	vpxor	%ymm9,%ymm5,%ymm5
4339	vpxor	%ymm8,%ymm4,%ymm4
4340	vmovdqa	%ymm8,0+128(%rbp)
4341	vpsrld	$25,%ymm7,%ymm8
4342	movq	0+0+0(%rbp),%rdx
4343	movq	%rdx,%r15
4344	mulxq	%r10,%r13,%r14
4345	mulxq	%r11,%rax,%rdx
4346	imulq	%r12,%r15
4347	addq	%rax,%r14
4348	adcq	%rdx,%r15
4349	vpslld	$32-25,%ymm7,%ymm7
4350	vpxor	%ymm8,%ymm7,%ymm7
4351	vpsrld	$25,%ymm6,%ymm8
4352	vpslld	$32-25,%ymm6,%ymm6
4353	vpxor	%ymm8,%ymm6,%ymm6
4354	vpsrld	$25,%ymm5,%ymm8
4355	vpslld	$32-25,%ymm5,%ymm5
4356	vpxor	%ymm8,%ymm5,%ymm5
4357	vpsrld	$25,%ymm4,%ymm8
4358	vpslld	$32-25,%ymm4,%ymm4
4359	vpxor	%ymm8,%ymm4,%ymm4
4360	vmovdqa	0+128(%rbp),%ymm8
4361	vpalignr	$4,%ymm7,%ymm7,%ymm7
4362	vpalignr	$8,%ymm11,%ymm11,%ymm11
4363	vpalignr	$12,%ymm15,%ymm15,%ymm15
4364	vpalignr	$4,%ymm6,%ymm6,%ymm6
4365	vpalignr	$8,%ymm10,%ymm10,%ymm10
4366	vpalignr	$12,%ymm14,%ymm14,%ymm14
4367	movq	8+0+0(%rbp),%rdx
4368	mulxq	%r10,%r10,%rax
4369	addq	%r10,%r14
4370	mulxq	%r11,%r11,%r9
4371	adcq	%r11,%r15
4372	adcq	$0,%r9
4373	imulq	%r12,%rdx
4374	vpalignr	$4,%ymm5,%ymm5,%ymm5
4375	vpalignr	$8,%ymm9,%ymm9,%ymm9
4376	vpalignr	$12,%ymm13,%ymm13,%ymm13
4377	vpalignr	$4,%ymm4,%ymm4,%ymm4
4378	vpalignr	$8,%ymm8,%ymm8,%ymm8
4379	vpalignr	$12,%ymm12,%ymm12,%ymm12
4380	vmovdqa	%ymm8,0+128(%rbp)
4381	vmovdqa	L$rol16(%rip),%ymm8
4382	vpaddd	%ymm7,%ymm3,%ymm3
4383	vpaddd	%ymm6,%ymm2,%ymm2
4384	vpaddd	%ymm5,%ymm1,%ymm1
4385	vpaddd	%ymm4,%ymm0,%ymm0
4386	vpxor	%ymm3,%ymm15,%ymm15
4387	vpxor	%ymm2,%ymm14,%ymm14
4388	vpxor	%ymm1,%ymm13,%ymm13
4389	vpxor	%ymm0,%ymm12,%ymm12
4390	vpshufb	%ymm8,%ymm15,%ymm15
4391	vpshufb	%ymm8,%ymm14,%ymm14
4392	addq	%rax,%r15
4393	adcq	%rdx,%r9
4394	vpshufb	%ymm8,%ymm13,%ymm13
4395	vpshufb	%ymm8,%ymm12,%ymm12
4396	vpaddd	%ymm15,%ymm11,%ymm11
4397	vpaddd	%ymm14,%ymm10,%ymm10
4398	vpaddd	%ymm13,%ymm9,%ymm9
4399	vpaddd	0+128(%rbp),%ymm12,%ymm8
4400	vpxor	%ymm11,%ymm7,%ymm7
4401	vpxor	%ymm10,%ymm6,%ymm6
4402	vpxor	%ymm9,%ymm5,%ymm5
4403	movq	%r13,%r10
4404	movq	%r14,%r11
4405	movq	%r15,%r12
4406	andq	$3,%r12
4407	movq	%r15,%r13
4408	andq	$-4,%r13
4409	movq	%r9,%r14
4410	shrdq	$2,%r9,%r15
4411	shrq	$2,%r9
4412	addq	%r13,%r15
4413	adcq	%r14,%r9
4414	addq	%r15,%r10
4415	adcq	%r9,%r11
4416	adcq	$0,%r12
4417	vpxor	%ymm8,%ymm4,%ymm4
4418	vmovdqa	%ymm8,0+128(%rbp)
4419	vpsrld	$20,%ymm7,%ymm8
4420	vpslld	$32-20,%ymm7,%ymm7
4421	vpxor	%ymm8,%ymm7,%ymm7
4422	vpsrld	$20,%ymm6,%ymm8
4423	vpslld	$32-20,%ymm6,%ymm6
4424	vpxor	%ymm8,%ymm6,%ymm6
4425	addq	0+32(%rsi,%rcx,1),%r10
4426	adcq	8+32(%rsi,%rcx,1),%r11
4427	adcq	$1,%r12
4428
4429	leaq	48(%rcx),%rcx
4430	vpsrld	$20,%ymm5,%ymm8
4431	vpslld	$32-20,%ymm5,%ymm5
4432	vpxor	%ymm8,%ymm5,%ymm5
4433	vpsrld	$20,%ymm4,%ymm8
4434	vpslld	$32-20,%ymm4,%ymm4
4435	vpxor	%ymm8,%ymm4,%ymm4
4436	vmovdqa	L$rol8(%rip),%ymm8
4437	vpaddd	%ymm7,%ymm3,%ymm3
4438	vpaddd	%ymm6,%ymm2,%ymm2
4439	vpaddd	%ymm5,%ymm1,%ymm1
4440	vpaddd	%ymm4,%ymm0,%ymm0
4441	vpxor	%ymm3,%ymm15,%ymm15
4442	vpxor	%ymm2,%ymm14,%ymm14
4443	vpxor	%ymm1,%ymm13,%ymm13
4444	vpxor	%ymm0,%ymm12,%ymm12
4445	vpshufb	%ymm8,%ymm15,%ymm15
4446	vpshufb	%ymm8,%ymm14,%ymm14
4447	vpshufb	%ymm8,%ymm13,%ymm13
4448	movq	0+0+0(%rbp),%rdx
4449	movq	%rdx,%r15
4450	mulxq	%r10,%r13,%r14
4451	mulxq	%r11,%rax,%rdx
4452	imulq	%r12,%r15
4453	addq	%rax,%r14
4454	adcq	%rdx,%r15
4455	vpshufb	%ymm8,%ymm12,%ymm12
4456	vpaddd	%ymm15,%ymm11,%ymm11
4457	vpaddd	%ymm14,%ymm10,%ymm10
4458	vpaddd	%ymm13,%ymm9,%ymm9
4459	vpaddd	0+128(%rbp),%ymm12,%ymm8
4460	vpxor	%ymm11,%ymm7,%ymm7
4461	vpxor	%ymm10,%ymm6,%ymm6
4462	vpxor	%ymm9,%ymm5,%ymm5
4463	movq	8+0+0(%rbp),%rdx
4464	mulxq	%r10,%r10,%rax
4465	addq	%r10,%r14
4466	mulxq	%r11,%r11,%r9
4467	adcq	%r11,%r15
4468	adcq	$0,%r9
4469	imulq	%r12,%rdx
4470	vpxor	%ymm8,%ymm4,%ymm4
4471	vmovdqa	%ymm8,0+128(%rbp)
4472	vpsrld	$25,%ymm7,%ymm8
4473	vpslld	$32-25,%ymm7,%ymm7
4474	vpxor	%ymm8,%ymm7,%ymm7
4475	vpsrld	$25,%ymm6,%ymm8
4476	vpslld	$32-25,%ymm6,%ymm6
4477	vpxor	%ymm8,%ymm6,%ymm6
4478	addq	%rax,%r15
4479	adcq	%rdx,%r9
4480	vpsrld	$25,%ymm5,%ymm8
4481	vpslld	$32-25,%ymm5,%ymm5
4482	vpxor	%ymm8,%ymm5,%ymm5
4483	vpsrld	$25,%ymm4,%ymm8
4484	vpslld	$32-25,%ymm4,%ymm4
4485	vpxor	%ymm8,%ymm4,%ymm4
4486	vmovdqa	0+128(%rbp),%ymm8
4487	vpalignr	$12,%ymm7,%ymm7,%ymm7
4488	vpalignr	$8,%ymm11,%ymm11,%ymm11
4489	vpalignr	$4,%ymm15,%ymm15,%ymm15
4490	vpalignr	$12,%ymm6,%ymm6,%ymm6
4491	vpalignr	$8,%ymm10,%ymm10,%ymm10
4492	vpalignr	$4,%ymm14,%ymm14,%ymm14
4493	vpalignr	$12,%ymm5,%ymm5,%ymm5
4494	vpalignr	$8,%ymm9,%ymm9,%ymm9
4495	vpalignr	$4,%ymm13,%ymm13,%ymm13
4496	vpalignr	$12,%ymm4,%ymm4,%ymm4
4497	vpalignr	$8,%ymm8,%ymm8,%ymm8
4498	movq	%r13,%r10
4499	movq	%r14,%r11
4500	movq	%r15,%r12
4501	andq	$3,%r12
4502	movq	%r15,%r13
4503	andq	$-4,%r13
4504	movq	%r9,%r14
4505	shrdq	$2,%r9,%r15
4506	shrq	$2,%r9
4507	addq	%r13,%r15
4508	adcq	%r14,%r9
4509	addq	%r15,%r10
4510	adcq	%r9,%r11
4511	adcq	$0,%r12
4512	vpalignr	$4,%ymm12,%ymm12,%ymm12
4513
4514	cmpq	$60*8,%rcx
4515	jne	L$open_avx2_main_loop_rounds
4516	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
4517	vpaddd	0+64(%rbp),%ymm7,%ymm7
4518	vpaddd	0+96(%rbp),%ymm11,%ymm11
4519	vpaddd	0+256(%rbp),%ymm15,%ymm15
4520	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
4521	vpaddd	0+64(%rbp),%ymm6,%ymm6
4522	vpaddd	0+96(%rbp),%ymm10,%ymm10
4523	vpaddd	0+224(%rbp),%ymm14,%ymm14
4524	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
4525	vpaddd	0+64(%rbp),%ymm5,%ymm5
4526	vpaddd	0+96(%rbp),%ymm9,%ymm9
4527	vpaddd	0+192(%rbp),%ymm13,%ymm13
4528	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
4529	vpaddd	0+64(%rbp),%ymm4,%ymm4
4530	vpaddd	0+96(%rbp),%ymm8,%ymm8
4531	vpaddd	0+160(%rbp),%ymm12,%ymm12
4532
4533	vmovdqa	%ymm0,0+128(%rbp)
4534	addq	0+60*8(%rsi),%r10
4535	adcq	8+60*8(%rsi),%r11
4536	adcq	$1,%r12
4537	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
4538	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
4539	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
4540	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
4541	vpxor	0+0(%rsi),%ymm0,%ymm0
4542	vpxor	32+0(%rsi),%ymm3,%ymm3
4543	vpxor	64+0(%rsi),%ymm7,%ymm7
4544	vpxor	96+0(%rsi),%ymm11,%ymm11
4545	vmovdqu	%ymm0,0+0(%rdi)
4546	vmovdqu	%ymm3,32+0(%rdi)
4547	vmovdqu	%ymm7,64+0(%rdi)
4548	vmovdqu	%ymm11,96+0(%rdi)
4549
4550	vmovdqa	0+128(%rbp),%ymm0
4551	movq	0+0+0(%rbp),%rax
4552	movq	%rax,%r15
4553	mulq	%r10
4554	movq	%rax,%r13
4555	movq	%rdx,%r14
4556	movq	0+0+0(%rbp),%rax
4557	mulq	%r11
4558	imulq	%r12,%r15
4559	addq	%rax,%r14
4560	adcq	%rdx,%r15
4561	movq	8+0+0(%rbp),%rax
4562	movq	%rax,%r9
4563	mulq	%r10
4564	addq	%rax,%r14
4565	adcq	$0,%rdx
4566	movq	%rdx,%r10
4567	movq	8+0+0(%rbp),%rax
4568	mulq	%r11
4569	addq	%rax,%r15
4570	adcq	$0,%rdx
4571	imulq	%r12,%r9
4572	addq	%r10,%r15
4573	adcq	%rdx,%r9
4574	movq	%r13,%r10
4575	movq	%r14,%r11
4576	movq	%r15,%r12
4577	andq	$3,%r12
4578	movq	%r15,%r13
4579	andq	$-4,%r13
4580	movq	%r9,%r14
4581	shrdq	$2,%r9,%r15
4582	shrq	$2,%r9
4583	addq	%r13,%r15
4584	adcq	%r14,%r9
4585	addq	%r15,%r10
4586	adcq	%r9,%r11
4587	adcq	$0,%r12
4588	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
4589	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
4590	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
4591	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
4592	vpxor	0+128(%rsi),%ymm3,%ymm3
4593	vpxor	32+128(%rsi),%ymm2,%ymm2
4594	vpxor	64+128(%rsi),%ymm6,%ymm6
4595	vpxor	96+128(%rsi),%ymm10,%ymm10
4596	vmovdqu	%ymm3,0+128(%rdi)
4597	vmovdqu	%ymm2,32+128(%rdi)
4598	vmovdqu	%ymm6,64+128(%rdi)
4599	vmovdqu	%ymm10,96+128(%rdi)
4600	addq	0+60*8+16(%rsi),%r10
4601	adcq	8+60*8+16(%rsi),%r11
4602	adcq	$1,%r12
4603	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
4604	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
4605	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
4606	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
4607	vpxor	0+256(%rsi),%ymm3,%ymm3
4608	vpxor	32+256(%rsi),%ymm1,%ymm1
4609	vpxor	64+256(%rsi),%ymm5,%ymm5
4610	vpxor	96+256(%rsi),%ymm9,%ymm9
4611	vmovdqu	%ymm3,0+256(%rdi)
4612	vmovdqu	%ymm1,32+256(%rdi)
4613	vmovdqu	%ymm5,64+256(%rdi)
4614	vmovdqu	%ymm9,96+256(%rdi)
4615	movq	0+0+0(%rbp),%rax
4616	movq	%rax,%r15
4617	mulq	%r10
4618	movq	%rax,%r13
4619	movq	%rdx,%r14
4620	movq	0+0+0(%rbp),%rax
4621	mulq	%r11
4622	imulq	%r12,%r15
4623	addq	%rax,%r14
4624	adcq	%rdx,%r15
4625	movq	8+0+0(%rbp),%rax
4626	movq	%rax,%r9
4627	mulq	%r10
4628	addq	%rax,%r14
4629	adcq	$0,%rdx
4630	movq	%rdx,%r10
4631	movq	8+0+0(%rbp),%rax
4632	mulq	%r11
4633	addq	%rax,%r15
4634	adcq	$0,%rdx
4635	imulq	%r12,%r9
4636	addq	%r10,%r15
4637	adcq	%rdx,%r9
4638	movq	%r13,%r10
4639	movq	%r14,%r11
4640	movq	%r15,%r12
4641	andq	$3,%r12
4642	movq	%r15,%r13
4643	andq	$-4,%r13
4644	movq	%r9,%r14
4645	shrdq	$2,%r9,%r15
4646	shrq	$2,%r9
4647	addq	%r13,%r15
4648	adcq	%r14,%r9
4649	addq	%r15,%r10
4650	adcq	%r9,%r11
4651	adcq	$0,%r12
4652	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
4653	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
4654	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
4655	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
4656	vpxor	0+384(%rsi),%ymm3,%ymm3
4657	vpxor	32+384(%rsi),%ymm0,%ymm0
4658	vpxor	64+384(%rsi),%ymm4,%ymm4
4659	vpxor	96+384(%rsi),%ymm8,%ymm8
4660	vmovdqu	%ymm3,0+384(%rdi)
4661	vmovdqu	%ymm0,32+384(%rdi)
4662	vmovdqu	%ymm4,64+384(%rdi)
4663	vmovdqu	%ymm8,96+384(%rdi)
4664
4665	leaq	512(%rsi),%rsi
4666	leaq	512(%rdi),%rdi
4667	subq	$512,%rbx
4668	jmp	L$open_avx2_main_loop
4669L$open_avx2_main_loop_done:
4670	testq	%rbx,%rbx
4671	vzeroupper
4672	je	L$open_sse_finalize
4673
4674	cmpq	$384,%rbx
4675	ja	L$open_avx2_tail_512
4676	cmpq	$256,%rbx
4677	ja	L$open_avx2_tail_384
4678	cmpq	$128,%rbx
4679	ja	L$open_avx2_tail_256
4680	vmovdqa	L$chacha20_consts(%rip),%ymm0
4681	vmovdqa	0+64(%rbp),%ymm4
4682	vmovdqa	0+96(%rbp),%ymm8
4683	vmovdqa	L$avx2_inc(%rip),%ymm12
4684	vpaddd	0+160(%rbp),%ymm12,%ymm12
4685	vmovdqa	%ymm12,0+160(%rbp)
4686
4687	xorq	%r8,%r8
4688	movq	%rbx,%rcx
4689	andq	$-16,%rcx
4690	testq	%rcx,%rcx
4691	je	L$open_avx2_tail_128_rounds
4692L$open_avx2_tail_128_rounds_and_x1hash:
4693	addq	0+0(%rsi,%r8,1),%r10
4694	adcq	8+0(%rsi,%r8,1),%r11
4695	adcq	$1,%r12
4696	movq	0+0+0(%rbp),%rax
4697	movq	%rax,%r15
4698	mulq	%r10
4699	movq	%rax,%r13
4700	movq	%rdx,%r14
4701	movq	0+0+0(%rbp),%rax
4702	mulq	%r11
4703	imulq	%r12,%r15
4704	addq	%rax,%r14
4705	adcq	%rdx,%r15
4706	movq	8+0+0(%rbp),%rax
4707	movq	%rax,%r9
4708	mulq	%r10
4709	addq	%rax,%r14
4710	adcq	$0,%rdx
4711	movq	%rdx,%r10
4712	movq	8+0+0(%rbp),%rax
4713	mulq	%r11
4714	addq	%rax,%r15
4715	adcq	$0,%rdx
4716	imulq	%r12,%r9
4717	addq	%r10,%r15
4718	adcq	%rdx,%r9
4719	movq	%r13,%r10
4720	movq	%r14,%r11
4721	movq	%r15,%r12
4722	andq	$3,%r12
4723	movq	%r15,%r13
4724	andq	$-4,%r13
4725	movq	%r9,%r14
4726	shrdq	$2,%r9,%r15
4727	shrq	$2,%r9
4728	addq	%r13,%r15
4729	adcq	%r14,%r9
4730	addq	%r15,%r10
4731	adcq	%r9,%r11
4732	adcq	$0,%r12
4733
4734L$open_avx2_tail_128_rounds:
4735	addq	$16,%r8
4736	vpaddd	%ymm4,%ymm0,%ymm0
4737	vpxor	%ymm0,%ymm12,%ymm12
4738	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4739	vpaddd	%ymm12,%ymm8,%ymm8
4740	vpxor	%ymm8,%ymm4,%ymm4
4741	vpsrld	$20,%ymm4,%ymm3
4742	vpslld	$12,%ymm4,%ymm4
4743	vpxor	%ymm3,%ymm4,%ymm4
4744	vpaddd	%ymm4,%ymm0,%ymm0
4745	vpxor	%ymm0,%ymm12,%ymm12
4746	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4747	vpaddd	%ymm12,%ymm8,%ymm8
4748	vpxor	%ymm8,%ymm4,%ymm4
4749	vpslld	$7,%ymm4,%ymm3
4750	vpsrld	$25,%ymm4,%ymm4
4751	vpxor	%ymm3,%ymm4,%ymm4
4752	vpalignr	$12,%ymm12,%ymm12,%ymm12
4753	vpalignr	$8,%ymm8,%ymm8,%ymm8
4754	vpalignr	$4,%ymm4,%ymm4,%ymm4
4755	vpaddd	%ymm4,%ymm0,%ymm0
4756	vpxor	%ymm0,%ymm12,%ymm12
4757	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4758	vpaddd	%ymm12,%ymm8,%ymm8
4759	vpxor	%ymm8,%ymm4,%ymm4
4760	vpsrld	$20,%ymm4,%ymm3
4761	vpslld	$12,%ymm4,%ymm4
4762	vpxor	%ymm3,%ymm4,%ymm4
4763	vpaddd	%ymm4,%ymm0,%ymm0
4764	vpxor	%ymm0,%ymm12,%ymm12
4765	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4766	vpaddd	%ymm12,%ymm8,%ymm8
4767	vpxor	%ymm8,%ymm4,%ymm4
4768	vpslld	$7,%ymm4,%ymm3
4769	vpsrld	$25,%ymm4,%ymm4
4770	vpxor	%ymm3,%ymm4,%ymm4
4771	vpalignr	$4,%ymm12,%ymm12,%ymm12
4772	vpalignr	$8,%ymm8,%ymm8,%ymm8
4773	vpalignr	$12,%ymm4,%ymm4,%ymm4
4774
4775	cmpq	%rcx,%r8
4776	jb	L$open_avx2_tail_128_rounds_and_x1hash
4777	cmpq	$160,%r8
4778	jne	L$open_avx2_tail_128_rounds
4779	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
4780	vpaddd	0+64(%rbp),%ymm4,%ymm4
4781	vpaddd	0+96(%rbp),%ymm8,%ymm8
4782	vpaddd	0+160(%rbp),%ymm12,%ymm12
4783	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
4784	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
4785	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
4786	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
4787	vmovdqa	%ymm3,%ymm8
4788
4789	jmp	L$open_avx2_tail_128_xor
4790
4791L$open_avx2_tail_256:
4792	vmovdqa	L$chacha20_consts(%rip),%ymm0
4793	vmovdqa	0+64(%rbp),%ymm4
4794	vmovdqa	0+96(%rbp),%ymm8
4795	vmovdqa	%ymm0,%ymm1
4796	vmovdqa	%ymm4,%ymm5
4797	vmovdqa	%ymm8,%ymm9
4798	vmovdqa	L$avx2_inc(%rip),%ymm12
4799	vpaddd	0+160(%rbp),%ymm12,%ymm13
4800	vpaddd	%ymm13,%ymm12,%ymm12
4801	vmovdqa	%ymm12,0+160(%rbp)
4802	vmovdqa	%ymm13,0+192(%rbp)
4803
4804	movq	%rbx,0+128(%rbp)
4805	movq	%rbx,%rcx
4806	subq	$128,%rcx
4807	shrq	$4,%rcx
4808	movq	$10,%r8
4809	cmpq	$10,%rcx
4810	cmovgq	%r8,%rcx
4811	movq	%rsi,%rbx
4812	xorq	%r8,%r8
4813L$open_avx2_tail_256_rounds_and_x1hash:
4814	addq	0+0(%rbx),%r10
4815	adcq	8+0(%rbx),%r11
4816	adcq	$1,%r12
4817	movq	0+0+0(%rbp),%rdx
4818	movq	%rdx,%r15
4819	mulxq	%r10,%r13,%r14
4820	mulxq	%r11,%rax,%rdx
4821	imulq	%r12,%r15
4822	addq	%rax,%r14
4823	adcq	%rdx,%r15
4824	movq	8+0+0(%rbp),%rdx
4825	mulxq	%r10,%r10,%rax
4826	addq	%r10,%r14
4827	mulxq	%r11,%r11,%r9
4828	adcq	%r11,%r15
4829	adcq	$0,%r9
4830	imulq	%r12,%rdx
4831	addq	%rax,%r15
4832	adcq	%rdx,%r9
4833	movq	%r13,%r10
4834	movq	%r14,%r11
4835	movq	%r15,%r12
4836	andq	$3,%r12
4837	movq	%r15,%r13
4838	andq	$-4,%r13
4839	movq	%r9,%r14
4840	shrdq	$2,%r9,%r15
4841	shrq	$2,%r9
4842	addq	%r13,%r15
4843	adcq	%r14,%r9
4844	addq	%r15,%r10
4845	adcq	%r9,%r11
4846	adcq	$0,%r12
4847
4848	leaq	16(%rbx),%rbx
4849L$open_avx2_tail_256_rounds:
4850	vpaddd	%ymm4,%ymm0,%ymm0
4851	vpxor	%ymm0,%ymm12,%ymm12
4852	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4853	vpaddd	%ymm12,%ymm8,%ymm8
4854	vpxor	%ymm8,%ymm4,%ymm4
4855	vpsrld	$20,%ymm4,%ymm3
4856	vpslld	$12,%ymm4,%ymm4
4857	vpxor	%ymm3,%ymm4,%ymm4
4858	vpaddd	%ymm4,%ymm0,%ymm0
4859	vpxor	%ymm0,%ymm12,%ymm12
4860	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4861	vpaddd	%ymm12,%ymm8,%ymm8
4862	vpxor	%ymm8,%ymm4,%ymm4
4863	vpslld	$7,%ymm4,%ymm3
4864	vpsrld	$25,%ymm4,%ymm4
4865	vpxor	%ymm3,%ymm4,%ymm4
4866	vpalignr	$12,%ymm12,%ymm12,%ymm12
4867	vpalignr	$8,%ymm8,%ymm8,%ymm8
4868	vpalignr	$4,%ymm4,%ymm4,%ymm4
4869	vpaddd	%ymm5,%ymm1,%ymm1
4870	vpxor	%ymm1,%ymm13,%ymm13
4871	vpshufb	L$rol16(%rip),%ymm13,%ymm13
4872	vpaddd	%ymm13,%ymm9,%ymm9
4873	vpxor	%ymm9,%ymm5,%ymm5
4874	vpsrld	$20,%ymm5,%ymm3
4875	vpslld	$12,%ymm5,%ymm5
4876	vpxor	%ymm3,%ymm5,%ymm5
4877	vpaddd	%ymm5,%ymm1,%ymm1
4878	vpxor	%ymm1,%ymm13,%ymm13
4879	vpshufb	L$rol8(%rip),%ymm13,%ymm13
4880	vpaddd	%ymm13,%ymm9,%ymm9
4881	vpxor	%ymm9,%ymm5,%ymm5
4882	vpslld	$7,%ymm5,%ymm3
4883	vpsrld	$25,%ymm5,%ymm5
4884	vpxor	%ymm3,%ymm5,%ymm5
4885	vpalignr	$12,%ymm13,%ymm13,%ymm13
4886	vpalignr	$8,%ymm9,%ymm9,%ymm9
4887	vpalignr	$4,%ymm5,%ymm5,%ymm5
4888
4889	incq	%r8
4890	vpaddd	%ymm4,%ymm0,%ymm0
4891	vpxor	%ymm0,%ymm12,%ymm12
4892	vpshufb	L$rol16(%rip),%ymm12,%ymm12
4893	vpaddd	%ymm12,%ymm8,%ymm8
4894	vpxor	%ymm8,%ymm4,%ymm4
4895	vpsrld	$20,%ymm4,%ymm3
4896	vpslld	$12,%ymm4,%ymm4
4897	vpxor	%ymm3,%ymm4,%ymm4
4898	vpaddd	%ymm4,%ymm0,%ymm0
4899	vpxor	%ymm0,%ymm12,%ymm12
4900	vpshufb	L$rol8(%rip),%ymm12,%ymm12
4901	vpaddd	%ymm12,%ymm8,%ymm8
4902	vpxor	%ymm8,%ymm4,%ymm4
4903	vpslld	$7,%ymm4,%ymm3
4904	vpsrld	$25,%ymm4,%ymm4
4905	vpxor	%ymm3,%ymm4,%ymm4
4906	vpalignr	$4,%ymm12,%ymm12,%ymm12
4907	vpalignr	$8,%ymm8,%ymm8,%ymm8
4908	vpalignr	$12,%ymm4,%ymm4,%ymm4
4909	vpaddd	%ymm5,%ymm1,%ymm1
4910	vpxor	%ymm1,%ymm13,%ymm13
4911	vpshufb	L$rol16(%rip),%ymm13,%ymm13
4912	vpaddd	%ymm13,%ymm9,%ymm9
4913	vpxor	%ymm9,%ymm5,%ymm5
4914	vpsrld	$20,%ymm5,%ymm3
4915	vpslld	$12,%ymm5,%ymm5
4916	vpxor	%ymm3,%ymm5,%ymm5
4917	vpaddd	%ymm5,%ymm1,%ymm1
4918	vpxor	%ymm1,%ymm13,%ymm13
4919	vpshufb	L$rol8(%rip),%ymm13,%ymm13
4920	vpaddd	%ymm13,%ymm9,%ymm9
4921	vpxor	%ymm9,%ymm5,%ymm5
4922	vpslld	$7,%ymm5,%ymm3
4923	vpsrld	$25,%ymm5,%ymm5
4924	vpxor	%ymm3,%ymm5,%ymm5
4925	vpalignr	$4,%ymm13,%ymm13,%ymm13
4926	vpalignr	$8,%ymm9,%ymm9,%ymm9
4927	vpalignr	$12,%ymm5,%ymm5,%ymm5
4928	vpaddd	%ymm6,%ymm2,%ymm2
4929	vpxor	%ymm2,%ymm14,%ymm14
4930	vpshufb	L$rol16(%rip),%ymm14,%ymm14
4931	vpaddd	%ymm14,%ymm10,%ymm10
4932	vpxor	%ymm10,%ymm6,%ymm6
4933	vpsrld	$20,%ymm6,%ymm3
4934	vpslld	$12,%ymm6,%ymm6
4935	vpxor	%ymm3,%ymm6,%ymm6
4936	vpaddd	%ymm6,%ymm2,%ymm2
4937	vpxor	%ymm2,%ymm14,%ymm14
4938	vpshufb	L$rol8(%rip),%ymm14,%ymm14
4939	vpaddd	%ymm14,%ymm10,%ymm10
4940	vpxor	%ymm10,%ymm6,%ymm6
4941	vpslld	$7,%ymm6,%ymm3
4942	vpsrld	$25,%ymm6,%ymm6
4943	vpxor	%ymm3,%ymm6,%ymm6
4944	vpalignr	$4,%ymm14,%ymm14,%ymm14
4945	vpalignr	$8,%ymm10,%ymm10,%ymm10
4946	vpalignr	$12,%ymm6,%ymm6,%ymm6
4947
4948	cmpq	%rcx,%r8
4949	jb	L$open_avx2_tail_256_rounds_and_x1hash
4950	cmpq	$10,%r8
4951	jne	L$open_avx2_tail_256_rounds
4952	movq	%rbx,%r8
4953	subq	%rsi,%rbx
4954	movq	%rbx,%rcx
4955	movq	0+128(%rbp),%rbx
4956L$open_avx2_tail_256_hash:
4957	addq	$16,%rcx
4958	cmpq	%rbx,%rcx
4959	jg	L$open_avx2_tail_256_done
4960	addq	0+0(%r8),%r10
4961	adcq	8+0(%r8),%r11
4962	adcq	$1,%r12
4963	movq	0+0+0(%rbp),%rdx
4964	movq	%rdx,%r15
4965	mulxq	%r10,%r13,%r14
4966	mulxq	%r11,%rax,%rdx
4967	imulq	%r12,%r15
4968	addq	%rax,%r14
4969	adcq	%rdx,%r15
4970	movq	8+0+0(%rbp),%rdx
4971	mulxq	%r10,%r10,%rax
4972	addq	%r10,%r14
4973	mulxq	%r11,%r11,%r9
4974	adcq	%r11,%r15
4975	adcq	$0,%r9
4976	imulq	%r12,%rdx
4977	addq	%rax,%r15
4978	adcq	%rdx,%r9
4979	movq	%r13,%r10
4980	movq	%r14,%r11
4981	movq	%r15,%r12
4982	andq	$3,%r12
4983	movq	%r15,%r13
4984	andq	$-4,%r13
4985	movq	%r9,%r14
4986	shrdq	$2,%r9,%r15
4987	shrq	$2,%r9
4988	addq	%r13,%r15
4989	adcq	%r14,%r9
4990	addq	%r15,%r10
4991	adcq	%r9,%r11
4992	adcq	$0,%r12
4993
4994	leaq	16(%r8),%r8
4995	jmp	L$open_avx2_tail_256_hash
4996L$open_avx2_tail_256_done:
4997	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
4998	vpaddd	0+64(%rbp),%ymm5,%ymm5
4999	vpaddd	0+96(%rbp),%ymm9,%ymm9
5000	vpaddd	0+192(%rbp),%ymm13,%ymm13
5001	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
5002	vpaddd	0+64(%rbp),%ymm4,%ymm4
5003	vpaddd	0+96(%rbp),%ymm8,%ymm8
5004	vpaddd	0+160(%rbp),%ymm12,%ymm12
5005	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5006	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5007	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5008	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5009	vpxor	0+0(%rsi),%ymm3,%ymm3
5010	vpxor	32+0(%rsi),%ymm1,%ymm1
5011	vpxor	64+0(%rsi),%ymm5,%ymm5
5012	vpxor	96+0(%rsi),%ymm9,%ymm9
5013	vmovdqu	%ymm3,0+0(%rdi)
5014	vmovdqu	%ymm1,32+0(%rdi)
5015	vmovdqu	%ymm5,64+0(%rdi)
5016	vmovdqu	%ymm9,96+0(%rdi)
5017	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5018	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5019	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5020	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5021	vmovdqa	%ymm3,%ymm8
5022
5023	leaq	128(%rsi),%rsi
5024	leaq	128(%rdi),%rdi
5025	subq	$128,%rbx
5026	jmp	L$open_avx2_tail_128_xor
5027
5028L$open_avx2_tail_384:
5029	vmovdqa	L$chacha20_consts(%rip),%ymm0
5030	vmovdqa	0+64(%rbp),%ymm4
5031	vmovdqa	0+96(%rbp),%ymm8
5032	vmovdqa	%ymm0,%ymm1
5033	vmovdqa	%ymm4,%ymm5
5034	vmovdqa	%ymm8,%ymm9
5035	vmovdqa	%ymm0,%ymm2
5036	vmovdqa	%ymm4,%ymm6
5037	vmovdqa	%ymm8,%ymm10
5038	vmovdqa	L$avx2_inc(%rip),%ymm12
5039	vpaddd	0+160(%rbp),%ymm12,%ymm14
5040	vpaddd	%ymm14,%ymm12,%ymm13
5041	vpaddd	%ymm13,%ymm12,%ymm12
5042	vmovdqa	%ymm12,0+160(%rbp)
5043	vmovdqa	%ymm13,0+192(%rbp)
5044	vmovdqa	%ymm14,0+224(%rbp)
5045
5046	movq	%rbx,0+128(%rbp)
5047	movq	%rbx,%rcx
5048	subq	$256,%rcx
5049	shrq	$4,%rcx
5050	addq	$6,%rcx
5051	movq	$10,%r8
5052	cmpq	$10,%rcx
5053	cmovgq	%r8,%rcx
5054	movq	%rsi,%rbx
5055	xorq	%r8,%r8
5056L$open_avx2_tail_384_rounds_and_x2hash:
5057	addq	0+0(%rbx),%r10
5058	adcq	8+0(%rbx),%r11
5059	adcq	$1,%r12
5060	movq	0+0+0(%rbp),%rdx
5061	movq	%rdx,%r15
5062	mulxq	%r10,%r13,%r14
5063	mulxq	%r11,%rax,%rdx
5064	imulq	%r12,%r15
5065	addq	%rax,%r14
5066	adcq	%rdx,%r15
5067	movq	8+0+0(%rbp),%rdx
5068	mulxq	%r10,%r10,%rax
5069	addq	%r10,%r14
5070	mulxq	%r11,%r11,%r9
5071	adcq	%r11,%r15
5072	adcq	$0,%r9
5073	imulq	%r12,%rdx
5074	addq	%rax,%r15
5075	adcq	%rdx,%r9
5076	movq	%r13,%r10
5077	movq	%r14,%r11
5078	movq	%r15,%r12
5079	andq	$3,%r12
5080	movq	%r15,%r13
5081	andq	$-4,%r13
5082	movq	%r9,%r14
5083	shrdq	$2,%r9,%r15
5084	shrq	$2,%r9
5085	addq	%r13,%r15
5086	adcq	%r14,%r9
5087	addq	%r15,%r10
5088	adcq	%r9,%r11
5089	adcq	$0,%r12
5090
5091	leaq	16(%rbx),%rbx
5092L$open_avx2_tail_384_rounds_and_x1hash:
5093	vpaddd	%ymm6,%ymm2,%ymm2
5094	vpxor	%ymm2,%ymm14,%ymm14
5095	vpshufb	L$rol16(%rip),%ymm14,%ymm14
5096	vpaddd	%ymm14,%ymm10,%ymm10
5097	vpxor	%ymm10,%ymm6,%ymm6
5098	vpsrld	$20,%ymm6,%ymm3
5099	vpslld	$12,%ymm6,%ymm6
5100	vpxor	%ymm3,%ymm6,%ymm6
5101	vpaddd	%ymm6,%ymm2,%ymm2
5102	vpxor	%ymm2,%ymm14,%ymm14
5103	vpshufb	L$rol8(%rip),%ymm14,%ymm14
5104	vpaddd	%ymm14,%ymm10,%ymm10
5105	vpxor	%ymm10,%ymm6,%ymm6
5106	vpslld	$7,%ymm6,%ymm3
5107	vpsrld	$25,%ymm6,%ymm6
5108	vpxor	%ymm3,%ymm6,%ymm6
5109	vpalignr	$12,%ymm14,%ymm14,%ymm14
5110	vpalignr	$8,%ymm10,%ymm10,%ymm10
5111	vpalignr	$4,%ymm6,%ymm6,%ymm6
5112	vpaddd	%ymm5,%ymm1,%ymm1
5113	vpxor	%ymm1,%ymm13,%ymm13
5114	vpshufb	L$rol16(%rip),%ymm13,%ymm13
5115	vpaddd	%ymm13,%ymm9,%ymm9
5116	vpxor	%ymm9,%ymm5,%ymm5
5117	vpsrld	$20,%ymm5,%ymm3
5118	vpslld	$12,%ymm5,%ymm5
5119	vpxor	%ymm3,%ymm5,%ymm5
5120	vpaddd	%ymm5,%ymm1,%ymm1
5121	vpxor	%ymm1,%ymm13,%ymm13
5122	vpshufb	L$rol8(%rip),%ymm13,%ymm13
5123	vpaddd	%ymm13,%ymm9,%ymm9
5124	vpxor	%ymm9,%ymm5,%ymm5
5125	vpslld	$7,%ymm5,%ymm3
5126	vpsrld	$25,%ymm5,%ymm5
5127	vpxor	%ymm3,%ymm5,%ymm5
5128	vpalignr	$12,%ymm13,%ymm13,%ymm13
5129	vpalignr	$8,%ymm9,%ymm9,%ymm9
5130	vpalignr	$4,%ymm5,%ymm5,%ymm5
5131	vpaddd	%ymm4,%ymm0,%ymm0
5132	vpxor	%ymm0,%ymm12,%ymm12
5133	vpshufb	L$rol16(%rip),%ymm12,%ymm12
5134	vpaddd	%ymm12,%ymm8,%ymm8
5135	vpxor	%ymm8,%ymm4,%ymm4
5136	vpsrld	$20,%ymm4,%ymm3
5137	vpslld	$12,%ymm4,%ymm4
5138	vpxor	%ymm3,%ymm4,%ymm4
5139	vpaddd	%ymm4,%ymm0,%ymm0
5140	vpxor	%ymm0,%ymm12,%ymm12
5141	vpshufb	L$rol8(%rip),%ymm12,%ymm12
5142	vpaddd	%ymm12,%ymm8,%ymm8
5143	vpxor	%ymm8,%ymm4,%ymm4
5144	vpslld	$7,%ymm4,%ymm3
5145	vpsrld	$25,%ymm4,%ymm4
5146	vpxor	%ymm3,%ymm4,%ymm4
5147	vpalignr	$12,%ymm12,%ymm12,%ymm12
5148	vpalignr	$8,%ymm8,%ymm8,%ymm8
5149	vpalignr	$4,%ymm4,%ymm4,%ymm4
5150	addq	0+0(%rbx),%r10
5151	adcq	8+0(%rbx),%r11
5152	adcq	$1,%r12
5153	movq	0+0+0(%rbp),%rax
5154	movq	%rax,%r15
5155	mulq	%r10
5156	movq	%rax,%r13
5157	movq	%rdx,%r14
5158	movq	0+0+0(%rbp),%rax
5159	mulq	%r11
5160	imulq	%r12,%r15
5161	addq	%rax,%r14
5162	adcq	%rdx,%r15
5163	movq	8+0+0(%rbp),%rax
5164	movq	%rax,%r9
5165	mulq	%r10
5166	addq	%rax,%r14
5167	adcq	$0,%rdx
5168	movq	%rdx,%r10
5169	movq	8+0+0(%rbp),%rax
5170	mulq	%r11
5171	addq	%rax,%r15
5172	adcq	$0,%rdx
5173	imulq	%r12,%r9
5174	addq	%r10,%r15
5175	adcq	%rdx,%r9
5176	movq	%r13,%r10
5177	movq	%r14,%r11
5178	movq	%r15,%r12
5179	andq	$3,%r12
5180	movq	%r15,%r13
5181	andq	$-4,%r13
5182	movq	%r9,%r14
5183	shrdq	$2,%r9,%r15
5184	shrq	$2,%r9
5185	addq	%r13,%r15
5186	adcq	%r14,%r9
5187	addq	%r15,%r10
5188	adcq	%r9,%r11
5189	adcq	$0,%r12
5190
5191	leaq	16(%rbx),%rbx
5192	incq	%r8
5193	vpaddd	%ymm6,%ymm2,%ymm2
5194	vpxor	%ymm2,%ymm14,%ymm14
5195	vpshufb	L$rol16(%rip),%ymm14,%ymm14
5196	vpaddd	%ymm14,%ymm10,%ymm10
5197	vpxor	%ymm10,%ymm6,%ymm6
5198	vpsrld	$20,%ymm6,%ymm3
5199	vpslld	$12,%ymm6,%ymm6
5200	vpxor	%ymm3,%ymm6,%ymm6
5201	vpaddd	%ymm6,%ymm2,%ymm2
5202	vpxor	%ymm2,%ymm14,%ymm14
5203	vpshufb	L$rol8(%rip),%ymm14,%ymm14
5204	vpaddd	%ymm14,%ymm10,%ymm10
5205	vpxor	%ymm10,%ymm6,%ymm6
5206	vpslld	$7,%ymm6,%ymm3
5207	vpsrld	$25,%ymm6,%ymm6
5208	vpxor	%ymm3,%ymm6,%ymm6
5209	vpalignr	$4,%ymm14,%ymm14,%ymm14
5210	vpalignr	$8,%ymm10,%ymm10,%ymm10
5211	vpalignr	$12,%ymm6,%ymm6,%ymm6
5212	vpaddd	%ymm5,%ymm1,%ymm1
5213	vpxor	%ymm1,%ymm13,%ymm13
5214	vpshufb	L$rol16(%rip),%ymm13,%ymm13
5215	vpaddd	%ymm13,%ymm9,%ymm9
5216	vpxor	%ymm9,%ymm5,%ymm5
5217	vpsrld	$20,%ymm5,%ymm3
5218	vpslld	$12,%ymm5,%ymm5
5219	vpxor	%ymm3,%ymm5,%ymm5
5220	vpaddd	%ymm5,%ymm1,%ymm1
5221	vpxor	%ymm1,%ymm13,%ymm13
5222	vpshufb	L$rol8(%rip),%ymm13,%ymm13
5223	vpaddd	%ymm13,%ymm9,%ymm9
5224	vpxor	%ymm9,%ymm5,%ymm5
5225	vpslld	$7,%ymm5,%ymm3
5226	vpsrld	$25,%ymm5,%ymm5
5227	vpxor	%ymm3,%ymm5,%ymm5
5228	vpalignr	$4,%ymm13,%ymm13,%ymm13
5229	vpalignr	$8,%ymm9,%ymm9,%ymm9
5230	vpalignr	$12,%ymm5,%ymm5,%ymm5
5231	vpaddd	%ymm4,%ymm0,%ymm0
5232	vpxor	%ymm0,%ymm12,%ymm12
5233	vpshufb	L$rol16(%rip),%ymm12,%ymm12
5234	vpaddd	%ymm12,%ymm8,%ymm8
5235	vpxor	%ymm8,%ymm4,%ymm4
5236	vpsrld	$20,%ymm4,%ymm3
5237	vpslld	$12,%ymm4,%ymm4
5238	vpxor	%ymm3,%ymm4,%ymm4
5239	vpaddd	%ymm4,%ymm0,%ymm0
5240	vpxor	%ymm0,%ymm12,%ymm12
5241	vpshufb	L$rol8(%rip),%ymm12,%ymm12
5242	vpaddd	%ymm12,%ymm8,%ymm8
5243	vpxor	%ymm8,%ymm4,%ymm4
5244	vpslld	$7,%ymm4,%ymm3
5245	vpsrld	$25,%ymm4,%ymm4
5246	vpxor	%ymm3,%ymm4,%ymm4
5247	vpalignr	$4,%ymm12,%ymm12,%ymm12
5248	vpalignr	$8,%ymm8,%ymm8,%ymm8
5249	vpalignr	$12,%ymm4,%ymm4,%ymm4
5250
5251	cmpq	%rcx,%r8
5252	jb	L$open_avx2_tail_384_rounds_and_x2hash
5253	cmpq	$10,%r8
5254	jne	L$open_avx2_tail_384_rounds_and_x1hash
5255	movq	%rbx,%r8
5256	subq	%rsi,%rbx
5257	movq	%rbx,%rcx
5258	movq	0+128(%rbp),%rbx
5259L$open_avx2_384_tail_hash:
5260	addq	$16,%rcx
5261	cmpq	%rbx,%rcx
5262	jg	L$open_avx2_384_tail_done
5263	addq	0+0(%r8),%r10
5264	adcq	8+0(%r8),%r11
5265	adcq	$1,%r12
5266	movq	0+0+0(%rbp),%rdx
5267	movq	%rdx,%r15
5268	mulxq	%r10,%r13,%r14
5269	mulxq	%r11,%rax,%rdx
5270	imulq	%r12,%r15
5271	addq	%rax,%r14
5272	adcq	%rdx,%r15
5273	movq	8+0+0(%rbp),%rdx
5274	mulxq	%r10,%r10,%rax
5275	addq	%r10,%r14
5276	mulxq	%r11,%r11,%r9
5277	adcq	%r11,%r15
5278	adcq	$0,%r9
5279	imulq	%r12,%rdx
5280	addq	%rax,%r15
5281	adcq	%rdx,%r9
5282	movq	%r13,%r10
5283	movq	%r14,%r11
5284	movq	%r15,%r12
5285	andq	$3,%r12
5286	movq	%r15,%r13
5287	andq	$-4,%r13
5288	movq	%r9,%r14
5289	shrdq	$2,%r9,%r15
5290	shrq	$2,%r9
5291	addq	%r13,%r15
5292	adcq	%r14,%r9
5293	addq	%r15,%r10
5294	adcq	%r9,%r11
5295	adcq	$0,%r12
5296
5297	leaq	16(%r8),%r8
5298	jmp	L$open_avx2_384_tail_hash
5299L$open_avx2_384_tail_done:
5300	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
5301	vpaddd	0+64(%rbp),%ymm6,%ymm6
5302	vpaddd	0+96(%rbp),%ymm10,%ymm10
5303	vpaddd	0+224(%rbp),%ymm14,%ymm14
5304	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
5305	vpaddd	0+64(%rbp),%ymm5,%ymm5
5306	vpaddd	0+96(%rbp),%ymm9,%ymm9
5307	vpaddd	0+192(%rbp),%ymm13,%ymm13
5308	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
5309	vpaddd	0+64(%rbp),%ymm4,%ymm4
5310	vpaddd	0+96(%rbp),%ymm8,%ymm8
5311	vpaddd	0+160(%rbp),%ymm12,%ymm12
5312	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
5313	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
5314	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
5315	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
5316	vpxor	0+0(%rsi),%ymm3,%ymm3
5317	vpxor	32+0(%rsi),%ymm2,%ymm2
5318	vpxor	64+0(%rsi),%ymm6,%ymm6
5319	vpxor	96+0(%rsi),%ymm10,%ymm10
5320	vmovdqu	%ymm3,0+0(%rdi)
5321	vmovdqu	%ymm2,32+0(%rdi)
5322	vmovdqu	%ymm6,64+0(%rdi)
5323	vmovdqu	%ymm10,96+0(%rdi)
5324	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5325	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5326	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5327	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5328	vpxor	0+128(%rsi),%ymm3,%ymm3
5329	vpxor	32+128(%rsi),%ymm1,%ymm1
5330	vpxor	64+128(%rsi),%ymm5,%ymm5
5331	vpxor	96+128(%rsi),%ymm9,%ymm9
5332	vmovdqu	%ymm3,0+128(%rdi)
5333	vmovdqu	%ymm1,32+128(%rdi)
5334	vmovdqu	%ymm5,64+128(%rdi)
5335	vmovdqu	%ymm9,96+128(%rdi)
5336	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5337	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5338	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5339	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5340	vmovdqa	%ymm3,%ymm8
5341
5342	leaq	256(%rsi),%rsi
5343	leaq	256(%rdi),%rdi
5344	subq	$256,%rbx
5345	jmp	L$open_avx2_tail_128_xor
5346
5347L$open_avx2_tail_512:
5348	vmovdqa	L$chacha20_consts(%rip),%ymm0
5349	vmovdqa	0+64(%rbp),%ymm4
5350	vmovdqa	0+96(%rbp),%ymm8
5351	vmovdqa	%ymm0,%ymm1
5352	vmovdqa	%ymm4,%ymm5
5353	vmovdqa	%ymm8,%ymm9
5354	vmovdqa	%ymm0,%ymm2
5355	vmovdqa	%ymm4,%ymm6
5356	vmovdqa	%ymm8,%ymm10
5357	vmovdqa	%ymm0,%ymm3
5358	vmovdqa	%ymm4,%ymm7
5359	vmovdqa	%ymm8,%ymm11
5360	vmovdqa	L$avx2_inc(%rip),%ymm12
5361	vpaddd	0+160(%rbp),%ymm12,%ymm15
5362	vpaddd	%ymm15,%ymm12,%ymm14
5363	vpaddd	%ymm14,%ymm12,%ymm13
5364	vpaddd	%ymm13,%ymm12,%ymm12
5365	vmovdqa	%ymm15,0+256(%rbp)
5366	vmovdqa	%ymm14,0+224(%rbp)
5367	vmovdqa	%ymm13,0+192(%rbp)
5368	vmovdqa	%ymm12,0+160(%rbp)
5369
5370	xorq	%rcx,%rcx
5371	movq	%rsi,%r8
5372L$open_avx2_tail_512_rounds_and_x2hash:
5373	addq	0+0(%r8),%r10
5374	adcq	8+0(%r8),%r11
5375	adcq	$1,%r12
5376	movq	0+0+0(%rbp),%rax
5377	movq	%rax,%r15
5378	mulq	%r10
5379	movq	%rax,%r13
5380	movq	%rdx,%r14
5381	movq	0+0+0(%rbp),%rax
5382	mulq	%r11
5383	imulq	%r12,%r15
5384	addq	%rax,%r14
5385	adcq	%rdx,%r15
5386	movq	8+0+0(%rbp),%rax
5387	movq	%rax,%r9
5388	mulq	%r10
5389	addq	%rax,%r14
5390	adcq	$0,%rdx
5391	movq	%rdx,%r10
5392	movq	8+0+0(%rbp),%rax
5393	mulq	%r11
5394	addq	%rax,%r15
5395	adcq	$0,%rdx
5396	imulq	%r12,%r9
5397	addq	%r10,%r15
5398	adcq	%rdx,%r9
5399	movq	%r13,%r10
5400	movq	%r14,%r11
5401	movq	%r15,%r12
5402	andq	$3,%r12
5403	movq	%r15,%r13
5404	andq	$-4,%r13
5405	movq	%r9,%r14
5406	shrdq	$2,%r9,%r15
5407	shrq	$2,%r9
5408	addq	%r13,%r15
5409	adcq	%r14,%r9
5410	addq	%r15,%r10
5411	adcq	%r9,%r11
5412	adcq	$0,%r12
5413
5414	leaq	16(%r8),%r8
5415L$open_avx2_tail_512_rounds_and_x1hash:
5416	vmovdqa	%ymm8,0+128(%rbp)
5417	vmovdqa	L$rol16(%rip),%ymm8
5418	vpaddd	%ymm7,%ymm3,%ymm3
5419	vpaddd	%ymm6,%ymm2,%ymm2
5420	vpaddd	%ymm5,%ymm1,%ymm1
5421	vpaddd	%ymm4,%ymm0,%ymm0
5422	vpxor	%ymm3,%ymm15,%ymm15
5423	vpxor	%ymm2,%ymm14,%ymm14
5424	vpxor	%ymm1,%ymm13,%ymm13
5425	vpxor	%ymm0,%ymm12,%ymm12
5426	vpshufb	%ymm8,%ymm15,%ymm15
5427	vpshufb	%ymm8,%ymm14,%ymm14
5428	vpshufb	%ymm8,%ymm13,%ymm13
5429	vpshufb	%ymm8,%ymm12,%ymm12
5430	vpaddd	%ymm15,%ymm11,%ymm11
5431	vpaddd	%ymm14,%ymm10,%ymm10
5432	vpaddd	%ymm13,%ymm9,%ymm9
5433	vpaddd	0+128(%rbp),%ymm12,%ymm8
5434	vpxor	%ymm11,%ymm7,%ymm7
5435	vpxor	%ymm10,%ymm6,%ymm6
5436	vpxor	%ymm9,%ymm5,%ymm5
5437	vpxor	%ymm8,%ymm4,%ymm4
5438	vmovdqa	%ymm8,0+128(%rbp)
5439	vpsrld	$20,%ymm7,%ymm8
5440	vpslld	$32-20,%ymm7,%ymm7
5441	vpxor	%ymm8,%ymm7,%ymm7
5442	vpsrld	$20,%ymm6,%ymm8
5443	vpslld	$32-20,%ymm6,%ymm6
5444	vpxor	%ymm8,%ymm6,%ymm6
5445	vpsrld	$20,%ymm5,%ymm8
5446	vpslld	$32-20,%ymm5,%ymm5
5447	vpxor	%ymm8,%ymm5,%ymm5
5448	vpsrld	$20,%ymm4,%ymm8
5449	vpslld	$32-20,%ymm4,%ymm4
5450	vpxor	%ymm8,%ymm4,%ymm4
5451	vmovdqa	L$rol8(%rip),%ymm8
5452	vpaddd	%ymm7,%ymm3,%ymm3
5453	addq	0+0(%r8),%r10
5454	adcq	8+0(%r8),%r11
5455	adcq	$1,%r12
5456	movq	0+0+0(%rbp),%rdx
5457	movq	%rdx,%r15
5458	mulxq	%r10,%r13,%r14
5459	mulxq	%r11,%rax,%rdx
5460	imulq	%r12,%r15
5461	addq	%rax,%r14
5462	adcq	%rdx,%r15
5463	movq	8+0+0(%rbp),%rdx
5464	mulxq	%r10,%r10,%rax
5465	addq	%r10,%r14
5466	mulxq	%r11,%r11,%r9
5467	adcq	%r11,%r15
5468	adcq	$0,%r9
5469	imulq	%r12,%rdx
5470	addq	%rax,%r15
5471	adcq	%rdx,%r9
5472	movq	%r13,%r10
5473	movq	%r14,%r11
5474	movq	%r15,%r12
5475	andq	$3,%r12
5476	movq	%r15,%r13
5477	andq	$-4,%r13
5478	movq	%r9,%r14
5479	shrdq	$2,%r9,%r15
5480	shrq	$2,%r9
5481	addq	%r13,%r15
5482	adcq	%r14,%r9
5483	addq	%r15,%r10
5484	adcq	%r9,%r11
5485	adcq	$0,%r12
5486	vpaddd	%ymm6,%ymm2,%ymm2
5487	vpaddd	%ymm5,%ymm1,%ymm1
5488	vpaddd	%ymm4,%ymm0,%ymm0
5489	vpxor	%ymm3,%ymm15,%ymm15
5490	vpxor	%ymm2,%ymm14,%ymm14
5491	vpxor	%ymm1,%ymm13,%ymm13
5492	vpxor	%ymm0,%ymm12,%ymm12
5493	vpshufb	%ymm8,%ymm15,%ymm15
5494	vpshufb	%ymm8,%ymm14,%ymm14
5495	vpshufb	%ymm8,%ymm13,%ymm13
5496	vpshufb	%ymm8,%ymm12,%ymm12
5497	vpaddd	%ymm15,%ymm11,%ymm11
5498	vpaddd	%ymm14,%ymm10,%ymm10
5499	vpaddd	%ymm13,%ymm9,%ymm9
5500	vpaddd	0+128(%rbp),%ymm12,%ymm8
5501	vpxor	%ymm11,%ymm7,%ymm7
5502	vpxor	%ymm10,%ymm6,%ymm6
5503	vpxor	%ymm9,%ymm5,%ymm5
5504	vpxor	%ymm8,%ymm4,%ymm4
5505	vmovdqa	%ymm8,0+128(%rbp)
5506	vpsrld	$25,%ymm7,%ymm8
5507	vpslld	$32-25,%ymm7,%ymm7
5508	vpxor	%ymm8,%ymm7,%ymm7
5509	vpsrld	$25,%ymm6,%ymm8
5510	vpslld	$32-25,%ymm6,%ymm6
5511	vpxor	%ymm8,%ymm6,%ymm6
5512	vpsrld	$25,%ymm5,%ymm8
5513	vpslld	$32-25,%ymm5,%ymm5
5514	vpxor	%ymm8,%ymm5,%ymm5
5515	vpsrld	$25,%ymm4,%ymm8
5516	vpslld	$32-25,%ymm4,%ymm4
5517	vpxor	%ymm8,%ymm4,%ymm4
5518	vmovdqa	0+128(%rbp),%ymm8
5519	vpalignr	$4,%ymm7,%ymm7,%ymm7
5520	vpalignr	$8,%ymm11,%ymm11,%ymm11
5521	vpalignr	$12,%ymm15,%ymm15,%ymm15
5522	vpalignr	$4,%ymm6,%ymm6,%ymm6
5523	vpalignr	$8,%ymm10,%ymm10,%ymm10
5524	vpalignr	$12,%ymm14,%ymm14,%ymm14
5525	vpalignr	$4,%ymm5,%ymm5,%ymm5
5526	vpalignr	$8,%ymm9,%ymm9,%ymm9
5527	vpalignr	$12,%ymm13,%ymm13,%ymm13
5528	vpalignr	$4,%ymm4,%ymm4,%ymm4
5529	vpalignr	$8,%ymm8,%ymm8,%ymm8
5530	vpalignr	$12,%ymm12,%ymm12,%ymm12
5531	vmovdqa	%ymm8,0+128(%rbp)
5532	vmovdqa	L$rol16(%rip),%ymm8
5533	vpaddd	%ymm7,%ymm3,%ymm3
5534	addq	0+16(%r8),%r10
5535	adcq	8+16(%r8),%r11
5536	adcq	$1,%r12
5537	movq	0+0+0(%rbp),%rdx
5538	movq	%rdx,%r15
5539	mulxq	%r10,%r13,%r14
5540	mulxq	%r11,%rax,%rdx
5541	imulq	%r12,%r15
5542	addq	%rax,%r14
5543	adcq	%rdx,%r15
5544	movq	8+0+0(%rbp),%rdx
5545	mulxq	%r10,%r10,%rax
5546	addq	%r10,%r14
5547	mulxq	%r11,%r11,%r9
5548	adcq	%r11,%r15
5549	adcq	$0,%r9
5550	imulq	%r12,%rdx
5551	addq	%rax,%r15
5552	adcq	%rdx,%r9
5553	movq	%r13,%r10
5554	movq	%r14,%r11
5555	movq	%r15,%r12
5556	andq	$3,%r12
5557	movq	%r15,%r13
5558	andq	$-4,%r13
5559	movq	%r9,%r14
5560	shrdq	$2,%r9,%r15
5561	shrq	$2,%r9
5562	addq	%r13,%r15
5563	adcq	%r14,%r9
5564	addq	%r15,%r10
5565	adcq	%r9,%r11
5566	adcq	$0,%r12
5567
5568	leaq	32(%r8),%r8
5569	vpaddd	%ymm6,%ymm2,%ymm2
5570	vpaddd	%ymm5,%ymm1,%ymm1
5571	vpaddd	%ymm4,%ymm0,%ymm0
5572	vpxor	%ymm3,%ymm15,%ymm15
5573	vpxor	%ymm2,%ymm14,%ymm14
5574	vpxor	%ymm1,%ymm13,%ymm13
5575	vpxor	%ymm0,%ymm12,%ymm12
5576	vpshufb	%ymm8,%ymm15,%ymm15
5577	vpshufb	%ymm8,%ymm14,%ymm14
5578	vpshufb	%ymm8,%ymm13,%ymm13
5579	vpshufb	%ymm8,%ymm12,%ymm12
5580	vpaddd	%ymm15,%ymm11,%ymm11
5581	vpaddd	%ymm14,%ymm10,%ymm10
5582	vpaddd	%ymm13,%ymm9,%ymm9
5583	vpaddd	0+128(%rbp),%ymm12,%ymm8
5584	vpxor	%ymm11,%ymm7,%ymm7
5585	vpxor	%ymm10,%ymm6,%ymm6
5586	vpxor	%ymm9,%ymm5,%ymm5
5587	vpxor	%ymm8,%ymm4,%ymm4
5588	vmovdqa	%ymm8,0+128(%rbp)
5589	vpsrld	$20,%ymm7,%ymm8
5590	vpslld	$32-20,%ymm7,%ymm7
5591	vpxor	%ymm8,%ymm7,%ymm7
5592	vpsrld	$20,%ymm6,%ymm8
5593	vpslld	$32-20,%ymm6,%ymm6
5594	vpxor	%ymm8,%ymm6,%ymm6
5595	vpsrld	$20,%ymm5,%ymm8
5596	vpslld	$32-20,%ymm5,%ymm5
5597	vpxor	%ymm8,%ymm5,%ymm5
5598	vpsrld	$20,%ymm4,%ymm8
5599	vpslld	$32-20,%ymm4,%ymm4
5600	vpxor	%ymm8,%ymm4,%ymm4
5601	vmovdqa	L$rol8(%rip),%ymm8
5602	vpaddd	%ymm7,%ymm3,%ymm3
5603	vpaddd	%ymm6,%ymm2,%ymm2
5604	vpaddd	%ymm5,%ymm1,%ymm1
5605	vpaddd	%ymm4,%ymm0,%ymm0
5606	vpxor	%ymm3,%ymm15,%ymm15
5607	vpxor	%ymm2,%ymm14,%ymm14
5608	vpxor	%ymm1,%ymm13,%ymm13
5609	vpxor	%ymm0,%ymm12,%ymm12
5610	vpshufb	%ymm8,%ymm15,%ymm15
5611	vpshufb	%ymm8,%ymm14,%ymm14
5612	vpshufb	%ymm8,%ymm13,%ymm13
5613	vpshufb	%ymm8,%ymm12,%ymm12
5614	vpaddd	%ymm15,%ymm11,%ymm11
5615	vpaddd	%ymm14,%ymm10,%ymm10
5616	vpaddd	%ymm13,%ymm9,%ymm9
5617	vpaddd	0+128(%rbp),%ymm12,%ymm8
5618	vpxor	%ymm11,%ymm7,%ymm7
5619	vpxor	%ymm10,%ymm6,%ymm6
5620	vpxor	%ymm9,%ymm5,%ymm5
5621	vpxor	%ymm8,%ymm4,%ymm4
5622	vmovdqa	%ymm8,0+128(%rbp)
5623	vpsrld	$25,%ymm7,%ymm8
5624	vpslld	$32-25,%ymm7,%ymm7
5625	vpxor	%ymm8,%ymm7,%ymm7
5626	vpsrld	$25,%ymm6,%ymm8
5627	vpslld	$32-25,%ymm6,%ymm6
5628	vpxor	%ymm8,%ymm6,%ymm6
5629	vpsrld	$25,%ymm5,%ymm8
5630	vpslld	$32-25,%ymm5,%ymm5
5631	vpxor	%ymm8,%ymm5,%ymm5
5632	vpsrld	$25,%ymm4,%ymm8
5633	vpslld	$32-25,%ymm4,%ymm4
5634	vpxor	%ymm8,%ymm4,%ymm4
5635	vmovdqa	0+128(%rbp),%ymm8
5636	vpalignr	$12,%ymm7,%ymm7,%ymm7
5637	vpalignr	$8,%ymm11,%ymm11,%ymm11
5638	vpalignr	$4,%ymm15,%ymm15,%ymm15
5639	vpalignr	$12,%ymm6,%ymm6,%ymm6
5640	vpalignr	$8,%ymm10,%ymm10,%ymm10
5641	vpalignr	$4,%ymm14,%ymm14,%ymm14
5642	vpalignr	$12,%ymm5,%ymm5,%ymm5
5643	vpalignr	$8,%ymm9,%ymm9,%ymm9
5644	vpalignr	$4,%ymm13,%ymm13,%ymm13
5645	vpalignr	$12,%ymm4,%ymm4,%ymm4
5646	vpalignr	$8,%ymm8,%ymm8,%ymm8
5647	vpalignr	$4,%ymm12,%ymm12,%ymm12
5648
5649	incq	%rcx
5650	cmpq	$4,%rcx
5651	jl	L$open_avx2_tail_512_rounds_and_x2hash
5652	cmpq	$10,%rcx
5653	jne	L$open_avx2_tail_512_rounds_and_x1hash
5654	movq	%rbx,%rcx
5655	subq	$384,%rcx
5656	andq	$-16,%rcx
5657L$open_avx2_tail_512_hash:
5658	testq	%rcx,%rcx
5659	je	L$open_avx2_tail_512_done
5660	addq	0+0(%r8),%r10
5661	adcq	8+0(%r8),%r11
5662	adcq	$1,%r12
5663	movq	0+0+0(%rbp),%rdx
5664	movq	%rdx,%r15
5665	mulxq	%r10,%r13,%r14
5666	mulxq	%r11,%rax,%rdx
5667	imulq	%r12,%r15
5668	addq	%rax,%r14
5669	adcq	%rdx,%r15
5670	movq	8+0+0(%rbp),%rdx
5671	mulxq	%r10,%r10,%rax
5672	addq	%r10,%r14
5673	mulxq	%r11,%r11,%r9
5674	adcq	%r11,%r15
5675	adcq	$0,%r9
5676	imulq	%r12,%rdx
5677	addq	%rax,%r15
5678	adcq	%rdx,%r9
5679	movq	%r13,%r10
5680	movq	%r14,%r11
5681	movq	%r15,%r12
5682	andq	$3,%r12
5683	movq	%r15,%r13
5684	andq	$-4,%r13
5685	movq	%r9,%r14
5686	shrdq	$2,%r9,%r15
5687	shrq	$2,%r9
5688	addq	%r13,%r15
5689	adcq	%r14,%r9
5690	addq	%r15,%r10
5691	adcq	%r9,%r11
5692	adcq	$0,%r12
5693
5694	leaq	16(%r8),%r8
5695	subq	$16,%rcx
5696	jmp	L$open_avx2_tail_512_hash
5697L$open_avx2_tail_512_done:
5698	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
5699	vpaddd	0+64(%rbp),%ymm7,%ymm7
5700	vpaddd	0+96(%rbp),%ymm11,%ymm11
5701	vpaddd	0+256(%rbp),%ymm15,%ymm15
5702	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
5703	vpaddd	0+64(%rbp),%ymm6,%ymm6
5704	vpaddd	0+96(%rbp),%ymm10,%ymm10
5705	vpaddd	0+224(%rbp),%ymm14,%ymm14
5706	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
5707	vpaddd	0+64(%rbp),%ymm5,%ymm5
5708	vpaddd	0+96(%rbp),%ymm9,%ymm9
5709	vpaddd	0+192(%rbp),%ymm13,%ymm13
5710	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
5711	vpaddd	0+64(%rbp),%ymm4,%ymm4
5712	vpaddd	0+96(%rbp),%ymm8,%ymm8
5713	vpaddd	0+160(%rbp),%ymm12,%ymm12
5714
5715	vmovdqa	%ymm0,0+128(%rbp)
5716	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
5717	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
5718	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
5719	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
5720	vpxor	0+0(%rsi),%ymm0,%ymm0
5721	vpxor	32+0(%rsi),%ymm3,%ymm3
5722	vpxor	64+0(%rsi),%ymm7,%ymm7
5723	vpxor	96+0(%rsi),%ymm11,%ymm11
5724	vmovdqu	%ymm0,0+0(%rdi)
5725	vmovdqu	%ymm3,32+0(%rdi)
5726	vmovdqu	%ymm7,64+0(%rdi)
5727	vmovdqu	%ymm11,96+0(%rdi)
5728
5729	vmovdqa	0+128(%rbp),%ymm0
5730	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
5731	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
5732	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
5733	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
5734	vpxor	0+128(%rsi),%ymm3,%ymm3
5735	vpxor	32+128(%rsi),%ymm2,%ymm2
5736	vpxor	64+128(%rsi),%ymm6,%ymm6
5737	vpxor	96+128(%rsi),%ymm10,%ymm10
5738	vmovdqu	%ymm3,0+128(%rdi)
5739	vmovdqu	%ymm2,32+128(%rdi)
5740	vmovdqu	%ymm6,64+128(%rdi)
5741	vmovdqu	%ymm10,96+128(%rdi)
5742	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5743	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5744	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5745	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5746	vpxor	0+256(%rsi),%ymm3,%ymm3
5747	vpxor	32+256(%rsi),%ymm1,%ymm1
5748	vpxor	64+256(%rsi),%ymm5,%ymm5
5749	vpxor	96+256(%rsi),%ymm9,%ymm9
5750	vmovdqu	%ymm3,0+256(%rdi)
5751	vmovdqu	%ymm1,32+256(%rdi)
5752	vmovdqu	%ymm5,64+256(%rdi)
5753	vmovdqu	%ymm9,96+256(%rdi)
5754	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5755	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5756	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5757	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5758	vmovdqa	%ymm3,%ymm8
5759
5760	leaq	384(%rsi),%rsi
5761	leaq	384(%rdi),%rdi
5762	subq	$384,%rbx
5763L$open_avx2_tail_128_xor:
5764	cmpq	$32,%rbx
5765	jb	L$open_avx2_tail_32_xor
5766	subq	$32,%rbx
5767	vpxor	(%rsi),%ymm0,%ymm0
5768	vmovdqu	%ymm0,(%rdi)
5769	leaq	32(%rsi),%rsi
5770	leaq	32(%rdi),%rdi
5771	vmovdqa	%ymm4,%ymm0
5772	vmovdqa	%ymm8,%ymm4
5773	vmovdqa	%ymm12,%ymm8
5774	jmp	L$open_avx2_tail_128_xor
5775L$open_avx2_tail_32_xor:
5776	cmpq	$16,%rbx
5777	vmovdqa	%xmm0,%xmm1
5778	jb	L$open_avx2_exit
5779	subq	$16,%rbx
5780
5781	vpxor	(%rsi),%xmm0,%xmm1
5782	vmovdqu	%xmm1,(%rdi)
5783	leaq	16(%rsi),%rsi
5784	leaq	16(%rdi),%rdi
5785	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
5786	vmovdqa	%xmm0,%xmm1
5787L$open_avx2_exit:
5788	vzeroupper
5789	jmp	L$open_sse_tail_16
5790
5791L$open_avx2_192:
5792	vmovdqa	%ymm0,%ymm1
5793	vmovdqa	%ymm0,%ymm2
5794	vmovdqa	%ymm4,%ymm5
5795	vmovdqa	%ymm4,%ymm6
5796	vmovdqa	%ymm8,%ymm9
5797	vmovdqa	%ymm8,%ymm10
5798	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
5799	vmovdqa	%ymm12,%ymm11
5800	vmovdqa	%ymm13,%ymm15
5801	movq	$10,%r10
5802L$open_avx2_192_rounds:
5803	vpaddd	%ymm4,%ymm0,%ymm0
5804	vpxor	%ymm0,%ymm12,%ymm12
5805	vpshufb	L$rol16(%rip),%ymm12,%ymm12
5806	vpaddd	%ymm12,%ymm8,%ymm8
5807	vpxor	%ymm8,%ymm4,%ymm4
5808	vpsrld	$20,%ymm4,%ymm3
5809	vpslld	$12,%ymm4,%ymm4
5810	vpxor	%ymm3,%ymm4,%ymm4
5811	vpaddd	%ymm4,%ymm0,%ymm0
5812	vpxor	%ymm0,%ymm12,%ymm12
5813	vpshufb	L$rol8(%rip),%ymm12,%ymm12
5814	vpaddd	%ymm12,%ymm8,%ymm8
5815	vpxor	%ymm8,%ymm4,%ymm4
5816	vpslld	$7,%ymm4,%ymm3
5817	vpsrld	$25,%ymm4,%ymm4
5818	vpxor	%ymm3,%ymm4,%ymm4
5819	vpalignr	$12,%ymm12,%ymm12,%ymm12
5820	vpalignr	$8,%ymm8,%ymm8,%ymm8
5821	vpalignr	$4,%ymm4,%ymm4,%ymm4
5822	vpaddd	%ymm5,%ymm1,%ymm1
5823	vpxor	%ymm1,%ymm13,%ymm13
5824	vpshufb	L$rol16(%rip),%ymm13,%ymm13
5825	vpaddd	%ymm13,%ymm9,%ymm9
5826	vpxor	%ymm9,%ymm5,%ymm5
5827	vpsrld	$20,%ymm5,%ymm3
5828	vpslld	$12,%ymm5,%ymm5
5829	vpxor	%ymm3,%ymm5,%ymm5
5830	vpaddd	%ymm5,%ymm1,%ymm1
5831	vpxor	%ymm1,%ymm13,%ymm13
5832	vpshufb	L$rol8(%rip),%ymm13,%ymm13
5833	vpaddd	%ymm13,%ymm9,%ymm9
5834	vpxor	%ymm9,%ymm5,%ymm5
5835	vpslld	$7,%ymm5,%ymm3
5836	vpsrld	$25,%ymm5,%ymm5
5837	vpxor	%ymm3,%ymm5,%ymm5
5838	vpalignr	$12,%ymm13,%ymm13,%ymm13
5839	vpalignr	$8,%ymm9,%ymm9,%ymm9
5840	vpalignr	$4,%ymm5,%ymm5,%ymm5
5841	vpaddd	%ymm4,%ymm0,%ymm0
5842	vpxor	%ymm0,%ymm12,%ymm12
5843	vpshufb	L$rol16(%rip),%ymm12,%ymm12
5844	vpaddd	%ymm12,%ymm8,%ymm8
5845	vpxor	%ymm8,%ymm4,%ymm4
5846	vpsrld	$20,%ymm4,%ymm3
5847	vpslld	$12,%ymm4,%ymm4
5848	vpxor	%ymm3,%ymm4,%ymm4
5849	vpaddd	%ymm4,%ymm0,%ymm0
5850	vpxor	%ymm0,%ymm12,%ymm12
5851	vpshufb	L$rol8(%rip),%ymm12,%ymm12
5852	vpaddd	%ymm12,%ymm8,%ymm8
5853	vpxor	%ymm8,%ymm4,%ymm4
5854	vpslld	$7,%ymm4,%ymm3
5855	vpsrld	$25,%ymm4,%ymm4
5856	vpxor	%ymm3,%ymm4,%ymm4
5857	vpalignr	$4,%ymm12,%ymm12,%ymm12
5858	vpalignr	$8,%ymm8,%ymm8,%ymm8
5859	vpalignr	$12,%ymm4,%ymm4,%ymm4
5860	vpaddd	%ymm5,%ymm1,%ymm1
5861	vpxor	%ymm1,%ymm13,%ymm13
5862	vpshufb	L$rol16(%rip),%ymm13,%ymm13
5863	vpaddd	%ymm13,%ymm9,%ymm9
5864	vpxor	%ymm9,%ymm5,%ymm5
5865	vpsrld	$20,%ymm5,%ymm3
5866	vpslld	$12,%ymm5,%ymm5
5867	vpxor	%ymm3,%ymm5,%ymm5
5868	vpaddd	%ymm5,%ymm1,%ymm1
5869	vpxor	%ymm1,%ymm13,%ymm13
5870	vpshufb	L$rol8(%rip),%ymm13,%ymm13
5871	vpaddd	%ymm13,%ymm9,%ymm9
5872	vpxor	%ymm9,%ymm5,%ymm5
5873	vpslld	$7,%ymm5,%ymm3
5874	vpsrld	$25,%ymm5,%ymm5
5875	vpxor	%ymm3,%ymm5,%ymm5
5876	vpalignr	$4,%ymm13,%ymm13,%ymm13
5877	vpalignr	$8,%ymm9,%ymm9,%ymm9
5878	vpalignr	$12,%ymm5,%ymm5,%ymm5
5879
5880	decq	%r10
5881	jne	L$open_avx2_192_rounds
5882	vpaddd	%ymm2,%ymm0,%ymm0
5883	vpaddd	%ymm2,%ymm1,%ymm1
5884	vpaddd	%ymm6,%ymm4,%ymm4
5885	vpaddd	%ymm6,%ymm5,%ymm5
5886	vpaddd	%ymm10,%ymm8,%ymm8
5887	vpaddd	%ymm10,%ymm9,%ymm9
5888	vpaddd	%ymm11,%ymm12,%ymm12
5889	vpaddd	%ymm15,%ymm13,%ymm13
5890	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
5891
5892	vpand	L$clamp(%rip),%ymm3,%ymm3
5893	vmovdqa	%ymm3,0+0(%rbp)
5894
5895	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
5896	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
5897	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
5898	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
5899	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
5900	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
5901L$open_avx2_short:
5902	movq	%r8,%r8
5903	call	poly_hash_ad_internal
5904L$open_avx2_short_hash_and_xor_loop:
5905	cmpq	$32,%rbx
5906	jb	L$open_avx2_short_tail_32
5907	subq	$32,%rbx
5908	addq	0+0(%rsi),%r10
5909	adcq	8+0(%rsi),%r11
5910	adcq	$1,%r12
5911	movq	0+0+0(%rbp),%rax
5912	movq	%rax,%r15
5913	mulq	%r10
5914	movq	%rax,%r13
5915	movq	%rdx,%r14
5916	movq	0+0+0(%rbp),%rax
5917	mulq	%r11
5918	imulq	%r12,%r15
5919	addq	%rax,%r14
5920	adcq	%rdx,%r15
5921	movq	8+0+0(%rbp),%rax
5922	movq	%rax,%r9
5923	mulq	%r10
5924	addq	%rax,%r14
5925	adcq	$0,%rdx
5926	movq	%rdx,%r10
5927	movq	8+0+0(%rbp),%rax
5928	mulq	%r11
5929	addq	%rax,%r15
5930	adcq	$0,%rdx
5931	imulq	%r12,%r9
5932	addq	%r10,%r15
5933	adcq	%rdx,%r9
5934	movq	%r13,%r10
5935	movq	%r14,%r11
5936	movq	%r15,%r12
5937	andq	$3,%r12
5938	movq	%r15,%r13
5939	andq	$-4,%r13
5940	movq	%r9,%r14
5941	shrdq	$2,%r9,%r15
5942	shrq	$2,%r9
5943	addq	%r13,%r15
5944	adcq	%r14,%r9
5945	addq	%r15,%r10
5946	adcq	%r9,%r11
5947	adcq	$0,%r12
5948	addq	0+16(%rsi),%r10
5949	adcq	8+16(%rsi),%r11
5950	adcq	$1,%r12
5951	movq	0+0+0(%rbp),%rax
5952	movq	%rax,%r15
5953	mulq	%r10
5954	movq	%rax,%r13
5955	movq	%rdx,%r14
5956	movq	0+0+0(%rbp),%rax
5957	mulq	%r11
5958	imulq	%r12,%r15
5959	addq	%rax,%r14
5960	adcq	%rdx,%r15
5961	movq	8+0+0(%rbp),%rax
5962	movq	%rax,%r9
5963	mulq	%r10
5964	addq	%rax,%r14
5965	adcq	$0,%rdx
5966	movq	%rdx,%r10
5967	movq	8+0+0(%rbp),%rax
5968	mulq	%r11
5969	addq	%rax,%r15
5970	adcq	$0,%rdx
5971	imulq	%r12,%r9
5972	addq	%r10,%r15
5973	adcq	%rdx,%r9
5974	movq	%r13,%r10
5975	movq	%r14,%r11
5976	movq	%r15,%r12
5977	andq	$3,%r12
5978	movq	%r15,%r13
5979	andq	$-4,%r13
5980	movq	%r9,%r14
5981	shrdq	$2,%r9,%r15
5982	shrq	$2,%r9
5983	addq	%r13,%r15
5984	adcq	%r14,%r9
5985	addq	%r15,%r10
5986	adcq	%r9,%r11
5987	adcq	$0,%r12
5988
5989
5990	vpxor	(%rsi),%ymm0,%ymm0
5991	vmovdqu	%ymm0,(%rdi)
5992	leaq	32(%rsi),%rsi
5993	leaq	32(%rdi),%rdi
5994
5995	vmovdqa	%ymm4,%ymm0
5996	vmovdqa	%ymm8,%ymm4
5997	vmovdqa	%ymm12,%ymm8
5998	vmovdqa	%ymm1,%ymm12
5999	vmovdqa	%ymm5,%ymm1
6000	vmovdqa	%ymm9,%ymm5
6001	vmovdqa	%ymm13,%ymm9
6002	vmovdqa	%ymm2,%ymm13
6003	vmovdqa	%ymm6,%ymm2
6004	jmp	L$open_avx2_short_hash_and_xor_loop
6005L$open_avx2_short_tail_32:
6006	cmpq	$16,%rbx
6007	vmovdqa	%xmm0,%xmm1
6008	jb	L$open_avx2_short_tail_32_exit
6009	subq	$16,%rbx
6010	addq	0+0(%rsi),%r10
6011	adcq	8+0(%rsi),%r11
6012	adcq	$1,%r12
6013	movq	0+0+0(%rbp),%rax
6014	movq	%rax,%r15
6015	mulq	%r10
6016	movq	%rax,%r13
6017	movq	%rdx,%r14
6018	movq	0+0+0(%rbp),%rax
6019	mulq	%r11
6020	imulq	%r12,%r15
6021	addq	%rax,%r14
6022	adcq	%rdx,%r15
6023	movq	8+0+0(%rbp),%rax
6024	movq	%rax,%r9
6025	mulq	%r10
6026	addq	%rax,%r14
6027	adcq	$0,%rdx
6028	movq	%rdx,%r10
6029	movq	8+0+0(%rbp),%rax
6030	mulq	%r11
6031	addq	%rax,%r15
6032	adcq	$0,%rdx
6033	imulq	%r12,%r9
6034	addq	%r10,%r15
6035	adcq	%rdx,%r9
6036	movq	%r13,%r10
6037	movq	%r14,%r11
6038	movq	%r15,%r12
6039	andq	$3,%r12
6040	movq	%r15,%r13
6041	andq	$-4,%r13
6042	movq	%r9,%r14
6043	shrdq	$2,%r9,%r15
6044	shrq	$2,%r9
6045	addq	%r13,%r15
6046	adcq	%r14,%r9
6047	addq	%r15,%r10
6048	adcq	%r9,%r11
6049	adcq	$0,%r12
6050
6051	vpxor	(%rsi),%xmm0,%xmm3
6052	vmovdqu	%xmm3,(%rdi)
6053	leaq	16(%rsi),%rsi
6054	leaq	16(%rdi),%rdi
6055	vextracti128	$1,%ymm0,%xmm1
6056L$open_avx2_short_tail_32_exit:
6057	vzeroupper
6058	jmp	L$open_sse_tail_16
6059
6060L$open_avx2_320:
6061	vmovdqa	%ymm0,%ymm1
6062	vmovdqa	%ymm0,%ymm2
6063	vmovdqa	%ymm4,%ymm5
6064	vmovdqa	%ymm4,%ymm6
6065	vmovdqa	%ymm8,%ymm9
6066	vmovdqa	%ymm8,%ymm10
6067	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
6068	vpaddd	L$avx2_inc(%rip),%ymm13,%ymm14
6069	vmovdqa	%ymm4,%ymm7
6070	vmovdqa	%ymm8,%ymm11
6071	vmovdqa	%ymm12,0+160(%rbp)
6072	vmovdqa	%ymm13,0+192(%rbp)
6073	vmovdqa	%ymm14,0+224(%rbp)
6074	movq	$10,%r10
6075L$open_avx2_320_rounds:
6076	vpaddd	%ymm4,%ymm0,%ymm0
6077	vpxor	%ymm0,%ymm12,%ymm12
6078	vpshufb	L$rol16(%rip),%ymm12,%ymm12
6079	vpaddd	%ymm12,%ymm8,%ymm8
6080	vpxor	%ymm8,%ymm4,%ymm4
6081	vpsrld	$20,%ymm4,%ymm3
6082	vpslld	$12,%ymm4,%ymm4
6083	vpxor	%ymm3,%ymm4,%ymm4
6084	vpaddd	%ymm4,%ymm0,%ymm0
6085	vpxor	%ymm0,%ymm12,%ymm12
6086	vpshufb	L$rol8(%rip),%ymm12,%ymm12
6087	vpaddd	%ymm12,%ymm8,%ymm8
6088	vpxor	%ymm8,%ymm4,%ymm4
6089	vpslld	$7,%ymm4,%ymm3
6090	vpsrld	$25,%ymm4,%ymm4
6091	vpxor	%ymm3,%ymm4,%ymm4
6092	vpalignr	$12,%ymm12,%ymm12,%ymm12
6093	vpalignr	$8,%ymm8,%ymm8,%ymm8
6094	vpalignr	$4,%ymm4,%ymm4,%ymm4
6095	vpaddd	%ymm5,%ymm1,%ymm1
6096	vpxor	%ymm1,%ymm13,%ymm13
6097	vpshufb	L$rol16(%rip),%ymm13,%ymm13
6098	vpaddd	%ymm13,%ymm9,%ymm9
6099	vpxor	%ymm9,%ymm5,%ymm5
6100	vpsrld	$20,%ymm5,%ymm3
6101	vpslld	$12,%ymm5,%ymm5
6102	vpxor	%ymm3,%ymm5,%ymm5
6103	vpaddd	%ymm5,%ymm1,%ymm1
6104	vpxor	%ymm1,%ymm13,%ymm13
6105	vpshufb	L$rol8(%rip),%ymm13,%ymm13
6106	vpaddd	%ymm13,%ymm9,%ymm9
6107	vpxor	%ymm9,%ymm5,%ymm5
6108	vpslld	$7,%ymm5,%ymm3
6109	vpsrld	$25,%ymm5,%ymm5
6110	vpxor	%ymm3,%ymm5,%ymm5
6111	vpalignr	$12,%ymm13,%ymm13,%ymm13
6112	vpalignr	$8,%ymm9,%ymm9,%ymm9
6113	vpalignr	$4,%ymm5,%ymm5,%ymm5
6114	vpaddd	%ymm6,%ymm2,%ymm2
6115	vpxor	%ymm2,%ymm14,%ymm14
6116	vpshufb	L$rol16(%rip),%ymm14,%ymm14
6117	vpaddd	%ymm14,%ymm10,%ymm10
6118	vpxor	%ymm10,%ymm6,%ymm6
6119	vpsrld	$20,%ymm6,%ymm3
6120	vpslld	$12,%ymm6,%ymm6
6121	vpxor	%ymm3,%ymm6,%ymm6
6122	vpaddd	%ymm6,%ymm2,%ymm2
6123	vpxor	%ymm2,%ymm14,%ymm14
6124	vpshufb	L$rol8(%rip),%ymm14,%ymm14
6125	vpaddd	%ymm14,%ymm10,%ymm10
6126	vpxor	%ymm10,%ymm6,%ymm6
6127	vpslld	$7,%ymm6,%ymm3
6128	vpsrld	$25,%ymm6,%ymm6
6129	vpxor	%ymm3,%ymm6,%ymm6
6130	vpalignr	$12,%ymm14,%ymm14,%ymm14
6131	vpalignr	$8,%ymm10,%ymm10,%ymm10
6132	vpalignr	$4,%ymm6,%ymm6,%ymm6
6133	vpaddd	%ymm4,%ymm0,%ymm0
6134	vpxor	%ymm0,%ymm12,%ymm12
6135	vpshufb	L$rol16(%rip),%ymm12,%ymm12
6136	vpaddd	%ymm12,%ymm8,%ymm8
6137	vpxor	%ymm8,%ymm4,%ymm4
6138	vpsrld	$20,%ymm4,%ymm3
6139	vpslld	$12,%ymm4,%ymm4
6140	vpxor	%ymm3,%ymm4,%ymm4
6141	vpaddd	%ymm4,%ymm0,%ymm0
6142	vpxor	%ymm0,%ymm12,%ymm12
6143	vpshufb	L$rol8(%rip),%ymm12,%ymm12
6144	vpaddd	%ymm12,%ymm8,%ymm8
6145	vpxor	%ymm8,%ymm4,%ymm4
6146	vpslld	$7,%ymm4,%ymm3
6147	vpsrld	$25,%ymm4,%ymm4
6148	vpxor	%ymm3,%ymm4,%ymm4
6149	vpalignr	$4,%ymm12,%ymm12,%ymm12
6150	vpalignr	$8,%ymm8,%ymm8,%ymm8
6151	vpalignr	$12,%ymm4,%ymm4,%ymm4
6152	vpaddd	%ymm5,%ymm1,%ymm1
6153	vpxor	%ymm1,%ymm13,%ymm13
6154	vpshufb	L$rol16(%rip),%ymm13,%ymm13
6155	vpaddd	%ymm13,%ymm9,%ymm9
6156	vpxor	%ymm9,%ymm5,%ymm5
6157	vpsrld	$20,%ymm5,%ymm3
6158	vpslld	$12,%ymm5,%ymm5
6159	vpxor	%ymm3,%ymm5,%ymm5
6160	vpaddd	%ymm5,%ymm1,%ymm1
6161	vpxor	%ymm1,%ymm13,%ymm13
6162	vpshufb	L$rol8(%rip),%ymm13,%ymm13
6163	vpaddd	%ymm13,%ymm9,%ymm9
6164	vpxor	%ymm9,%ymm5,%ymm5
6165	vpslld	$7,%ymm5,%ymm3
6166	vpsrld	$25,%ymm5,%ymm5
6167	vpxor	%ymm3,%ymm5,%ymm5
6168	vpalignr	$4,%ymm13,%ymm13,%ymm13
6169	vpalignr	$8,%ymm9,%ymm9,%ymm9
6170	vpalignr	$12,%ymm5,%ymm5,%ymm5
6171	vpaddd	%ymm6,%ymm2,%ymm2
6172	vpxor	%ymm2,%ymm14,%ymm14
6173	vpshufb	L$rol16(%rip),%ymm14,%ymm14
6174	vpaddd	%ymm14,%ymm10,%ymm10
6175	vpxor	%ymm10,%ymm6,%ymm6
6176	vpsrld	$20,%ymm6,%ymm3
6177	vpslld	$12,%ymm6,%ymm6
6178	vpxor	%ymm3,%ymm6,%ymm6
6179	vpaddd	%ymm6,%ymm2,%ymm2
6180	vpxor	%ymm2,%ymm14,%ymm14
6181	vpshufb	L$rol8(%rip),%ymm14,%ymm14
6182	vpaddd	%ymm14,%ymm10,%ymm10
6183	vpxor	%ymm10,%ymm6,%ymm6
6184	vpslld	$7,%ymm6,%ymm3
6185	vpsrld	$25,%ymm6,%ymm6
6186	vpxor	%ymm3,%ymm6,%ymm6
6187	vpalignr	$4,%ymm14,%ymm14,%ymm14
6188	vpalignr	$8,%ymm10,%ymm10,%ymm10
6189	vpalignr	$12,%ymm6,%ymm6,%ymm6
6190
6191	decq	%r10
6192	jne	L$open_avx2_320_rounds
6193	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
6194	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
6195	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
6196	vpaddd	%ymm7,%ymm4,%ymm4
6197	vpaddd	%ymm7,%ymm5,%ymm5
6198	vpaddd	%ymm7,%ymm6,%ymm6
6199	vpaddd	%ymm11,%ymm8,%ymm8
6200	vpaddd	%ymm11,%ymm9,%ymm9
6201	vpaddd	%ymm11,%ymm10,%ymm10
6202	vpaddd	0+160(%rbp),%ymm12,%ymm12
6203	vpaddd	0+192(%rbp),%ymm13,%ymm13
6204	vpaddd	0+224(%rbp),%ymm14,%ymm14
6205	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
6206
6207	vpand	L$clamp(%rip),%ymm3,%ymm3
6208	vmovdqa	%ymm3,0+0(%rbp)
6209
6210	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
6211	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
6212	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
6213	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
6214	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
6215	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
6216	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
6217	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
6218	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
6219	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
6220	jmp	L$open_avx2_short
6221
6222
6223
6224
6225
6226.p2align	6
6227chacha20_poly1305_seal_avx2:
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240	vzeroupper
6241	vmovdqa	L$chacha20_consts(%rip),%ymm0
6242	vbroadcasti128	0(%r9),%ymm4
6243	vbroadcasti128	16(%r9),%ymm8
6244	vbroadcasti128	32(%r9),%ymm12
6245	vpaddd	L$avx2_init(%rip),%ymm12,%ymm12
6246	cmpq	$192,%rbx
6247	jbe	L$seal_avx2_192
6248	cmpq	$320,%rbx
6249	jbe	L$seal_avx2_320
6250	vmovdqa	%ymm0,%ymm1
6251	vmovdqa	%ymm0,%ymm2
6252	vmovdqa	%ymm0,%ymm3
6253	vmovdqa	%ymm4,%ymm5
6254	vmovdqa	%ymm4,%ymm6
6255	vmovdqa	%ymm4,%ymm7
6256	vmovdqa	%ymm4,0+64(%rbp)
6257	vmovdqa	%ymm8,%ymm9
6258	vmovdqa	%ymm8,%ymm10
6259	vmovdqa	%ymm8,%ymm11
6260	vmovdqa	%ymm8,0+96(%rbp)
6261	vmovdqa	%ymm12,%ymm15
6262	vpaddd	L$avx2_inc(%rip),%ymm15,%ymm14
6263	vpaddd	L$avx2_inc(%rip),%ymm14,%ymm13
6264	vpaddd	L$avx2_inc(%rip),%ymm13,%ymm12
6265	vmovdqa	%ymm12,0+160(%rbp)
6266	vmovdqa	%ymm13,0+192(%rbp)
6267	vmovdqa	%ymm14,0+224(%rbp)
6268	vmovdqa	%ymm15,0+256(%rbp)
6269	movq	$10,%r10
6270L$seal_avx2_init_rounds:
6271	vmovdqa	%ymm8,0+128(%rbp)
6272	vmovdqa	L$rol16(%rip),%ymm8
6273	vpaddd	%ymm7,%ymm3,%ymm3
6274	vpaddd	%ymm6,%ymm2,%ymm2
6275	vpaddd	%ymm5,%ymm1,%ymm1
6276	vpaddd	%ymm4,%ymm0,%ymm0
6277	vpxor	%ymm3,%ymm15,%ymm15
6278	vpxor	%ymm2,%ymm14,%ymm14
6279	vpxor	%ymm1,%ymm13,%ymm13
6280	vpxor	%ymm0,%ymm12,%ymm12
6281	vpshufb	%ymm8,%ymm15,%ymm15
6282	vpshufb	%ymm8,%ymm14,%ymm14
6283	vpshufb	%ymm8,%ymm13,%ymm13
6284	vpshufb	%ymm8,%ymm12,%ymm12
6285	vpaddd	%ymm15,%ymm11,%ymm11
6286	vpaddd	%ymm14,%ymm10,%ymm10
6287	vpaddd	%ymm13,%ymm9,%ymm9
6288	vpaddd	0+128(%rbp),%ymm12,%ymm8
6289	vpxor	%ymm11,%ymm7,%ymm7
6290	vpxor	%ymm10,%ymm6,%ymm6
6291	vpxor	%ymm9,%ymm5,%ymm5
6292	vpxor	%ymm8,%ymm4,%ymm4
6293	vmovdqa	%ymm8,0+128(%rbp)
6294	vpsrld	$20,%ymm7,%ymm8
6295	vpslld	$32-20,%ymm7,%ymm7
6296	vpxor	%ymm8,%ymm7,%ymm7
6297	vpsrld	$20,%ymm6,%ymm8
6298	vpslld	$32-20,%ymm6,%ymm6
6299	vpxor	%ymm8,%ymm6,%ymm6
6300	vpsrld	$20,%ymm5,%ymm8
6301	vpslld	$32-20,%ymm5,%ymm5
6302	vpxor	%ymm8,%ymm5,%ymm5
6303	vpsrld	$20,%ymm4,%ymm8
6304	vpslld	$32-20,%ymm4,%ymm4
6305	vpxor	%ymm8,%ymm4,%ymm4
6306	vmovdqa	L$rol8(%rip),%ymm8
6307	vpaddd	%ymm7,%ymm3,%ymm3
6308	vpaddd	%ymm6,%ymm2,%ymm2
6309	vpaddd	%ymm5,%ymm1,%ymm1
6310	vpaddd	%ymm4,%ymm0,%ymm0
6311	vpxor	%ymm3,%ymm15,%ymm15
6312	vpxor	%ymm2,%ymm14,%ymm14
6313	vpxor	%ymm1,%ymm13,%ymm13
6314	vpxor	%ymm0,%ymm12,%ymm12
6315	vpshufb	%ymm8,%ymm15,%ymm15
6316	vpshufb	%ymm8,%ymm14,%ymm14
6317	vpshufb	%ymm8,%ymm13,%ymm13
6318	vpshufb	%ymm8,%ymm12,%ymm12
6319	vpaddd	%ymm15,%ymm11,%ymm11
6320	vpaddd	%ymm14,%ymm10,%ymm10
6321	vpaddd	%ymm13,%ymm9,%ymm9
6322	vpaddd	0+128(%rbp),%ymm12,%ymm8
6323	vpxor	%ymm11,%ymm7,%ymm7
6324	vpxor	%ymm10,%ymm6,%ymm6
6325	vpxor	%ymm9,%ymm5,%ymm5
6326	vpxor	%ymm8,%ymm4,%ymm4
6327	vmovdqa	%ymm8,0+128(%rbp)
6328	vpsrld	$25,%ymm7,%ymm8
6329	vpslld	$32-25,%ymm7,%ymm7
6330	vpxor	%ymm8,%ymm7,%ymm7
6331	vpsrld	$25,%ymm6,%ymm8
6332	vpslld	$32-25,%ymm6,%ymm6
6333	vpxor	%ymm8,%ymm6,%ymm6
6334	vpsrld	$25,%ymm5,%ymm8
6335	vpslld	$32-25,%ymm5,%ymm5
6336	vpxor	%ymm8,%ymm5,%ymm5
6337	vpsrld	$25,%ymm4,%ymm8
6338	vpslld	$32-25,%ymm4,%ymm4
6339	vpxor	%ymm8,%ymm4,%ymm4
6340	vmovdqa	0+128(%rbp),%ymm8
6341	vpalignr	$4,%ymm7,%ymm7,%ymm7
6342	vpalignr	$8,%ymm11,%ymm11,%ymm11
6343	vpalignr	$12,%ymm15,%ymm15,%ymm15
6344	vpalignr	$4,%ymm6,%ymm6,%ymm6
6345	vpalignr	$8,%ymm10,%ymm10,%ymm10
6346	vpalignr	$12,%ymm14,%ymm14,%ymm14
6347	vpalignr	$4,%ymm5,%ymm5,%ymm5
6348	vpalignr	$8,%ymm9,%ymm9,%ymm9
6349	vpalignr	$12,%ymm13,%ymm13,%ymm13
6350	vpalignr	$4,%ymm4,%ymm4,%ymm4
6351	vpalignr	$8,%ymm8,%ymm8,%ymm8
6352	vpalignr	$12,%ymm12,%ymm12,%ymm12
6353	vmovdqa	%ymm8,0+128(%rbp)
6354	vmovdqa	L$rol16(%rip),%ymm8
6355	vpaddd	%ymm7,%ymm3,%ymm3
6356	vpaddd	%ymm6,%ymm2,%ymm2
6357	vpaddd	%ymm5,%ymm1,%ymm1
6358	vpaddd	%ymm4,%ymm0,%ymm0
6359	vpxor	%ymm3,%ymm15,%ymm15
6360	vpxor	%ymm2,%ymm14,%ymm14
6361	vpxor	%ymm1,%ymm13,%ymm13
6362	vpxor	%ymm0,%ymm12,%ymm12
6363	vpshufb	%ymm8,%ymm15,%ymm15
6364	vpshufb	%ymm8,%ymm14,%ymm14
6365	vpshufb	%ymm8,%ymm13,%ymm13
6366	vpshufb	%ymm8,%ymm12,%ymm12
6367	vpaddd	%ymm15,%ymm11,%ymm11
6368	vpaddd	%ymm14,%ymm10,%ymm10
6369	vpaddd	%ymm13,%ymm9,%ymm9
6370	vpaddd	0+128(%rbp),%ymm12,%ymm8
6371	vpxor	%ymm11,%ymm7,%ymm7
6372	vpxor	%ymm10,%ymm6,%ymm6
6373	vpxor	%ymm9,%ymm5,%ymm5
6374	vpxor	%ymm8,%ymm4,%ymm4
6375	vmovdqa	%ymm8,0+128(%rbp)
6376	vpsrld	$20,%ymm7,%ymm8
6377	vpslld	$32-20,%ymm7,%ymm7
6378	vpxor	%ymm8,%ymm7,%ymm7
6379	vpsrld	$20,%ymm6,%ymm8
6380	vpslld	$32-20,%ymm6,%ymm6
6381	vpxor	%ymm8,%ymm6,%ymm6
6382	vpsrld	$20,%ymm5,%ymm8
6383	vpslld	$32-20,%ymm5,%ymm5
6384	vpxor	%ymm8,%ymm5,%ymm5
6385	vpsrld	$20,%ymm4,%ymm8
6386	vpslld	$32-20,%ymm4,%ymm4
6387	vpxor	%ymm8,%ymm4,%ymm4
6388	vmovdqa	L$rol8(%rip),%ymm8
6389	vpaddd	%ymm7,%ymm3,%ymm3
6390	vpaddd	%ymm6,%ymm2,%ymm2
6391	vpaddd	%ymm5,%ymm1,%ymm1
6392	vpaddd	%ymm4,%ymm0,%ymm0
6393	vpxor	%ymm3,%ymm15,%ymm15
6394	vpxor	%ymm2,%ymm14,%ymm14
6395	vpxor	%ymm1,%ymm13,%ymm13
6396	vpxor	%ymm0,%ymm12,%ymm12
6397	vpshufb	%ymm8,%ymm15,%ymm15
6398	vpshufb	%ymm8,%ymm14,%ymm14
6399	vpshufb	%ymm8,%ymm13,%ymm13
6400	vpshufb	%ymm8,%ymm12,%ymm12
6401	vpaddd	%ymm15,%ymm11,%ymm11
6402	vpaddd	%ymm14,%ymm10,%ymm10
6403	vpaddd	%ymm13,%ymm9,%ymm9
6404	vpaddd	0+128(%rbp),%ymm12,%ymm8
6405	vpxor	%ymm11,%ymm7,%ymm7
6406	vpxor	%ymm10,%ymm6,%ymm6
6407	vpxor	%ymm9,%ymm5,%ymm5
6408	vpxor	%ymm8,%ymm4,%ymm4
6409	vmovdqa	%ymm8,0+128(%rbp)
6410	vpsrld	$25,%ymm7,%ymm8
6411	vpslld	$32-25,%ymm7,%ymm7
6412	vpxor	%ymm8,%ymm7,%ymm7
6413	vpsrld	$25,%ymm6,%ymm8
6414	vpslld	$32-25,%ymm6,%ymm6
6415	vpxor	%ymm8,%ymm6,%ymm6
6416	vpsrld	$25,%ymm5,%ymm8
6417	vpslld	$32-25,%ymm5,%ymm5
6418	vpxor	%ymm8,%ymm5,%ymm5
6419	vpsrld	$25,%ymm4,%ymm8
6420	vpslld	$32-25,%ymm4,%ymm4
6421	vpxor	%ymm8,%ymm4,%ymm4
6422	vmovdqa	0+128(%rbp),%ymm8
6423	vpalignr	$12,%ymm7,%ymm7,%ymm7
6424	vpalignr	$8,%ymm11,%ymm11,%ymm11
6425	vpalignr	$4,%ymm15,%ymm15,%ymm15
6426	vpalignr	$12,%ymm6,%ymm6,%ymm6
6427	vpalignr	$8,%ymm10,%ymm10,%ymm10
6428	vpalignr	$4,%ymm14,%ymm14,%ymm14
6429	vpalignr	$12,%ymm5,%ymm5,%ymm5
6430	vpalignr	$8,%ymm9,%ymm9,%ymm9
6431	vpalignr	$4,%ymm13,%ymm13,%ymm13
6432	vpalignr	$12,%ymm4,%ymm4,%ymm4
6433	vpalignr	$8,%ymm8,%ymm8,%ymm8
6434	vpalignr	$4,%ymm12,%ymm12,%ymm12
6435
6436	decq	%r10
6437	jnz	L$seal_avx2_init_rounds
6438	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
6439	vpaddd	0+64(%rbp),%ymm7,%ymm7
6440	vpaddd	0+96(%rbp),%ymm11,%ymm11
6441	vpaddd	0+256(%rbp),%ymm15,%ymm15
6442	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
6443	vpaddd	0+64(%rbp),%ymm6,%ymm6
6444	vpaddd	0+96(%rbp),%ymm10,%ymm10
6445	vpaddd	0+224(%rbp),%ymm14,%ymm14
6446	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
6447	vpaddd	0+64(%rbp),%ymm5,%ymm5
6448	vpaddd	0+96(%rbp),%ymm9,%ymm9
6449	vpaddd	0+192(%rbp),%ymm13,%ymm13
6450	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
6451	vpaddd	0+64(%rbp),%ymm4,%ymm4
6452	vpaddd	0+96(%rbp),%ymm8,%ymm8
6453	vpaddd	0+160(%rbp),%ymm12,%ymm12
6454
6455	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
6456	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
6457	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
6458	vpand	L$clamp(%rip),%ymm15,%ymm15
6459	vmovdqa	%ymm15,0+0(%rbp)
6460	movq	%r8,%r8
6461	call	poly_hash_ad_internal
6462
6463	vpxor	0(%rsi),%ymm3,%ymm3
6464	vpxor	32(%rsi),%ymm11,%ymm11
6465	vmovdqu	%ymm3,0(%rdi)
6466	vmovdqu	%ymm11,32(%rdi)
6467	vperm2i128	$0x02,%ymm2,%ymm6,%ymm15
6468	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
6469	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
6470	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
6471	vpxor	0+64(%rsi),%ymm15,%ymm15
6472	vpxor	32+64(%rsi),%ymm2,%ymm2
6473	vpxor	64+64(%rsi),%ymm6,%ymm6
6474	vpxor	96+64(%rsi),%ymm10,%ymm10
6475	vmovdqu	%ymm15,0+64(%rdi)
6476	vmovdqu	%ymm2,32+64(%rdi)
6477	vmovdqu	%ymm6,64+64(%rdi)
6478	vmovdqu	%ymm10,96+64(%rdi)
6479	vperm2i128	$0x02,%ymm1,%ymm5,%ymm15
6480	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
6481	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
6482	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
6483	vpxor	0+192(%rsi),%ymm15,%ymm15
6484	vpxor	32+192(%rsi),%ymm1,%ymm1
6485	vpxor	64+192(%rsi),%ymm5,%ymm5
6486	vpxor	96+192(%rsi),%ymm9,%ymm9
6487	vmovdqu	%ymm15,0+192(%rdi)
6488	vmovdqu	%ymm1,32+192(%rdi)
6489	vmovdqu	%ymm5,64+192(%rdi)
6490	vmovdqu	%ymm9,96+192(%rdi)
6491	vperm2i128	$0x13,%ymm0,%ymm4,%ymm15
6492	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
6493	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
6494	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
6495	vmovdqa	%ymm15,%ymm8
6496
6497	leaq	320(%rsi),%rsi
6498	subq	$320,%rbx
6499	movq	$320,%rcx
6500	cmpq	$128,%rbx
6501	jbe	L$seal_avx2_short_hash_remainder
6502	vpxor	0(%rsi),%ymm0,%ymm0
6503	vpxor	32(%rsi),%ymm4,%ymm4
6504	vpxor	64(%rsi),%ymm8,%ymm8
6505	vpxor	96(%rsi),%ymm12,%ymm12
6506	vmovdqu	%ymm0,320(%rdi)
6507	vmovdqu	%ymm4,352(%rdi)
6508	vmovdqu	%ymm8,384(%rdi)
6509	vmovdqu	%ymm12,416(%rdi)
6510	leaq	128(%rsi),%rsi
6511	subq	$128,%rbx
6512	movq	$8,%rcx
6513	movq	$2,%r8
6514	cmpq	$128,%rbx
6515	jbe	L$seal_avx2_tail_128
6516	cmpq	$256,%rbx
6517	jbe	L$seal_avx2_tail_256
6518	cmpq	$384,%rbx
6519	jbe	L$seal_avx2_tail_384
6520	cmpq	$512,%rbx
6521	jbe	L$seal_avx2_tail_512
6522	vmovdqa	L$chacha20_consts(%rip),%ymm0
6523	vmovdqa	0+64(%rbp),%ymm4
6524	vmovdqa	0+96(%rbp),%ymm8
6525	vmovdqa	%ymm0,%ymm1
6526	vmovdqa	%ymm4,%ymm5
6527	vmovdqa	%ymm8,%ymm9
6528	vmovdqa	%ymm0,%ymm2
6529	vmovdqa	%ymm4,%ymm6
6530	vmovdqa	%ymm8,%ymm10
6531	vmovdqa	%ymm0,%ymm3
6532	vmovdqa	%ymm4,%ymm7
6533	vmovdqa	%ymm8,%ymm11
6534	vmovdqa	L$avx2_inc(%rip),%ymm12
6535	vpaddd	0+160(%rbp),%ymm12,%ymm15
6536	vpaddd	%ymm15,%ymm12,%ymm14
6537	vpaddd	%ymm14,%ymm12,%ymm13
6538	vpaddd	%ymm13,%ymm12,%ymm12
6539	vmovdqa	%ymm15,0+256(%rbp)
6540	vmovdqa	%ymm14,0+224(%rbp)
6541	vmovdqa	%ymm13,0+192(%rbp)
6542	vmovdqa	%ymm12,0+160(%rbp)
6543	vmovdqa	%ymm8,0+128(%rbp)
6544	vmovdqa	L$rol16(%rip),%ymm8
6545	vpaddd	%ymm7,%ymm3,%ymm3
6546	vpaddd	%ymm6,%ymm2,%ymm2
6547	vpaddd	%ymm5,%ymm1,%ymm1
6548	vpaddd	%ymm4,%ymm0,%ymm0
6549	vpxor	%ymm3,%ymm15,%ymm15
6550	vpxor	%ymm2,%ymm14,%ymm14
6551	vpxor	%ymm1,%ymm13,%ymm13
6552	vpxor	%ymm0,%ymm12,%ymm12
6553	vpshufb	%ymm8,%ymm15,%ymm15
6554	vpshufb	%ymm8,%ymm14,%ymm14
6555	vpshufb	%ymm8,%ymm13,%ymm13
6556	vpshufb	%ymm8,%ymm12,%ymm12
6557	vpaddd	%ymm15,%ymm11,%ymm11
6558	vpaddd	%ymm14,%ymm10,%ymm10
6559	vpaddd	%ymm13,%ymm9,%ymm9
6560	vpaddd	0+128(%rbp),%ymm12,%ymm8
6561	vpxor	%ymm11,%ymm7,%ymm7
6562	vpxor	%ymm10,%ymm6,%ymm6
6563	vpxor	%ymm9,%ymm5,%ymm5
6564	vpxor	%ymm8,%ymm4,%ymm4
6565	vmovdqa	%ymm8,0+128(%rbp)
6566	vpsrld	$20,%ymm7,%ymm8
6567	vpslld	$32-20,%ymm7,%ymm7
6568	vpxor	%ymm8,%ymm7,%ymm7
6569	vpsrld	$20,%ymm6,%ymm8
6570	vpslld	$32-20,%ymm6,%ymm6
6571	vpxor	%ymm8,%ymm6,%ymm6
6572	vpsrld	$20,%ymm5,%ymm8
6573	vpslld	$32-20,%ymm5,%ymm5
6574	vpxor	%ymm8,%ymm5,%ymm5
6575	vpsrld	$20,%ymm4,%ymm8
6576	vpslld	$32-20,%ymm4,%ymm4
6577	vpxor	%ymm8,%ymm4,%ymm4
6578	vmovdqa	L$rol8(%rip),%ymm8
6579	vpaddd	%ymm7,%ymm3,%ymm3
6580	vpaddd	%ymm6,%ymm2,%ymm2
6581	vpaddd	%ymm5,%ymm1,%ymm1
6582	vpaddd	%ymm4,%ymm0,%ymm0
6583	vpxor	%ymm3,%ymm15,%ymm15
6584	vpxor	%ymm2,%ymm14,%ymm14
6585	vpxor	%ymm1,%ymm13,%ymm13
6586	vpxor	%ymm0,%ymm12,%ymm12
6587	vpshufb	%ymm8,%ymm15,%ymm15
6588	vpshufb	%ymm8,%ymm14,%ymm14
6589	vpshufb	%ymm8,%ymm13,%ymm13
6590	vpshufb	%ymm8,%ymm12,%ymm12
6591	vpaddd	%ymm15,%ymm11,%ymm11
6592	vpaddd	%ymm14,%ymm10,%ymm10
6593	vpaddd	%ymm13,%ymm9,%ymm9
6594	vpaddd	0+128(%rbp),%ymm12,%ymm8
6595	vpxor	%ymm11,%ymm7,%ymm7
6596	vpxor	%ymm10,%ymm6,%ymm6
6597	vpxor	%ymm9,%ymm5,%ymm5
6598	vpxor	%ymm8,%ymm4,%ymm4
6599	vmovdqa	%ymm8,0+128(%rbp)
6600	vpsrld	$25,%ymm7,%ymm8
6601	vpslld	$32-25,%ymm7,%ymm7
6602	vpxor	%ymm8,%ymm7,%ymm7
6603	vpsrld	$25,%ymm6,%ymm8
6604	vpslld	$32-25,%ymm6,%ymm6
6605	vpxor	%ymm8,%ymm6,%ymm6
6606	vpsrld	$25,%ymm5,%ymm8
6607	vpslld	$32-25,%ymm5,%ymm5
6608	vpxor	%ymm8,%ymm5,%ymm5
6609	vpsrld	$25,%ymm4,%ymm8
6610	vpslld	$32-25,%ymm4,%ymm4
6611	vpxor	%ymm8,%ymm4,%ymm4
6612	vmovdqa	0+128(%rbp),%ymm8
6613	vpalignr	$4,%ymm7,%ymm7,%ymm7
6614	vpalignr	$8,%ymm11,%ymm11,%ymm11
6615	vpalignr	$12,%ymm15,%ymm15,%ymm15
6616	vpalignr	$4,%ymm6,%ymm6,%ymm6
6617	vpalignr	$8,%ymm10,%ymm10,%ymm10
6618	vpalignr	$12,%ymm14,%ymm14,%ymm14
6619	vpalignr	$4,%ymm5,%ymm5,%ymm5
6620	vpalignr	$8,%ymm9,%ymm9,%ymm9
6621	vpalignr	$12,%ymm13,%ymm13,%ymm13
6622	vpalignr	$4,%ymm4,%ymm4,%ymm4
6623	vpalignr	$8,%ymm8,%ymm8,%ymm8
6624	vpalignr	$12,%ymm12,%ymm12,%ymm12
6625	vmovdqa	%ymm8,0+128(%rbp)
6626	vmovdqa	L$rol16(%rip),%ymm8
6627	vpaddd	%ymm7,%ymm3,%ymm3
6628	vpaddd	%ymm6,%ymm2,%ymm2
6629	vpaddd	%ymm5,%ymm1,%ymm1
6630	vpaddd	%ymm4,%ymm0,%ymm0
6631	vpxor	%ymm3,%ymm15,%ymm15
6632	vpxor	%ymm2,%ymm14,%ymm14
6633	vpxor	%ymm1,%ymm13,%ymm13
6634	vpxor	%ymm0,%ymm12,%ymm12
6635	vpshufb	%ymm8,%ymm15,%ymm15
6636	vpshufb	%ymm8,%ymm14,%ymm14
6637	vpshufb	%ymm8,%ymm13,%ymm13
6638	vpshufb	%ymm8,%ymm12,%ymm12
6639	vpaddd	%ymm15,%ymm11,%ymm11
6640	vpaddd	%ymm14,%ymm10,%ymm10
6641	vpaddd	%ymm13,%ymm9,%ymm9
6642	vpaddd	0+128(%rbp),%ymm12,%ymm8
6643	vpxor	%ymm11,%ymm7,%ymm7
6644	vpxor	%ymm10,%ymm6,%ymm6
6645	vpxor	%ymm9,%ymm5,%ymm5
6646	vpxor	%ymm8,%ymm4,%ymm4
6647	vmovdqa	%ymm8,0+128(%rbp)
6648	vpsrld	$20,%ymm7,%ymm8
6649	vpslld	$32-20,%ymm7,%ymm7
6650	vpxor	%ymm8,%ymm7,%ymm7
6651	vpsrld	$20,%ymm6,%ymm8
6652	vpslld	$32-20,%ymm6,%ymm6
6653	vpxor	%ymm8,%ymm6,%ymm6
6654	vpsrld	$20,%ymm5,%ymm8
6655	vpslld	$32-20,%ymm5,%ymm5
6656	vpxor	%ymm8,%ymm5,%ymm5
6657	vpsrld	$20,%ymm4,%ymm8
6658	vpslld	$32-20,%ymm4,%ymm4
6659	vpxor	%ymm8,%ymm4,%ymm4
6660	vmovdqa	L$rol8(%rip),%ymm8
6661	vpaddd	%ymm7,%ymm3,%ymm3
6662	vpaddd	%ymm6,%ymm2,%ymm2
6663	vpaddd	%ymm5,%ymm1,%ymm1
6664	vpaddd	%ymm4,%ymm0,%ymm0
6665	vpxor	%ymm3,%ymm15,%ymm15
6666	vpxor	%ymm2,%ymm14,%ymm14
6667	vpxor	%ymm1,%ymm13,%ymm13
6668	vpxor	%ymm0,%ymm12,%ymm12
6669	vpshufb	%ymm8,%ymm15,%ymm15
6670	vpshufb	%ymm8,%ymm14,%ymm14
6671	vpshufb	%ymm8,%ymm13,%ymm13
6672	vpshufb	%ymm8,%ymm12,%ymm12
6673	vpaddd	%ymm15,%ymm11,%ymm11
6674	vpaddd	%ymm14,%ymm10,%ymm10
6675	vpaddd	%ymm13,%ymm9,%ymm9
6676	vpaddd	0+128(%rbp),%ymm12,%ymm8
6677	vpxor	%ymm11,%ymm7,%ymm7
6678	vpxor	%ymm10,%ymm6,%ymm6
6679	vpxor	%ymm9,%ymm5,%ymm5
6680	vpxor	%ymm8,%ymm4,%ymm4
6681	vmovdqa	%ymm8,0+128(%rbp)
6682	vpsrld	$25,%ymm7,%ymm8
6683	vpslld	$32-25,%ymm7,%ymm7
6684	vpxor	%ymm8,%ymm7,%ymm7
6685	vpsrld	$25,%ymm6,%ymm8
6686	vpslld	$32-25,%ymm6,%ymm6
6687	vpxor	%ymm8,%ymm6,%ymm6
6688	vpsrld	$25,%ymm5,%ymm8
6689	vpslld	$32-25,%ymm5,%ymm5
6690	vpxor	%ymm8,%ymm5,%ymm5
6691	vpsrld	$25,%ymm4,%ymm8
6692	vpslld	$32-25,%ymm4,%ymm4
6693	vpxor	%ymm8,%ymm4,%ymm4
6694	vmovdqa	0+128(%rbp),%ymm8
6695	vpalignr	$12,%ymm7,%ymm7,%ymm7
6696	vpalignr	$8,%ymm11,%ymm11,%ymm11
6697	vpalignr	$4,%ymm15,%ymm15,%ymm15
6698	vpalignr	$12,%ymm6,%ymm6,%ymm6
6699	vpalignr	$8,%ymm10,%ymm10,%ymm10
6700	vpalignr	$4,%ymm14,%ymm14,%ymm14
6701	vpalignr	$12,%ymm5,%ymm5,%ymm5
6702	vpalignr	$8,%ymm9,%ymm9,%ymm9
6703	vpalignr	$4,%ymm13,%ymm13,%ymm13
6704	vpalignr	$12,%ymm4,%ymm4,%ymm4
6705	vpalignr	$8,%ymm8,%ymm8,%ymm8
6706	vpalignr	$4,%ymm12,%ymm12,%ymm12
6707	vmovdqa	%ymm8,0+128(%rbp)
6708	vmovdqa	L$rol16(%rip),%ymm8
6709	vpaddd	%ymm7,%ymm3,%ymm3
6710	vpaddd	%ymm6,%ymm2,%ymm2
6711	vpaddd	%ymm5,%ymm1,%ymm1
6712	vpaddd	%ymm4,%ymm0,%ymm0
6713	vpxor	%ymm3,%ymm15,%ymm15
6714	vpxor	%ymm2,%ymm14,%ymm14
6715	vpxor	%ymm1,%ymm13,%ymm13
6716	vpxor	%ymm0,%ymm12,%ymm12
6717	vpshufb	%ymm8,%ymm15,%ymm15
6718	vpshufb	%ymm8,%ymm14,%ymm14
6719	vpshufb	%ymm8,%ymm13,%ymm13
6720	vpshufb	%ymm8,%ymm12,%ymm12
6721	vpaddd	%ymm15,%ymm11,%ymm11
6722	vpaddd	%ymm14,%ymm10,%ymm10
6723	vpaddd	%ymm13,%ymm9,%ymm9
6724	vpaddd	0+128(%rbp),%ymm12,%ymm8
6725	vpxor	%ymm11,%ymm7,%ymm7
6726	vpxor	%ymm10,%ymm6,%ymm6
6727	vpxor	%ymm9,%ymm5,%ymm5
6728	vpxor	%ymm8,%ymm4,%ymm4
6729	vmovdqa	%ymm8,0+128(%rbp)
6730	vpsrld	$20,%ymm7,%ymm8
6731	vpslld	$32-20,%ymm7,%ymm7
6732	vpxor	%ymm8,%ymm7,%ymm7
6733	vpsrld	$20,%ymm6,%ymm8
6734	vpslld	$32-20,%ymm6,%ymm6
6735	vpxor	%ymm8,%ymm6,%ymm6
6736	vpsrld	$20,%ymm5,%ymm8
6737	vpslld	$32-20,%ymm5,%ymm5
6738	vpxor	%ymm8,%ymm5,%ymm5
6739	vpsrld	$20,%ymm4,%ymm8
6740	vpslld	$32-20,%ymm4,%ymm4
6741	vpxor	%ymm8,%ymm4,%ymm4
6742	vmovdqa	L$rol8(%rip),%ymm8
6743	vpaddd	%ymm7,%ymm3,%ymm3
6744	vpaddd	%ymm6,%ymm2,%ymm2
6745	vpaddd	%ymm5,%ymm1,%ymm1
6746	vpaddd	%ymm4,%ymm0,%ymm0
6747	vpxor	%ymm3,%ymm15,%ymm15
6748
6749	subq	$16,%rdi
6750	movq	$9,%rcx
6751	jmp	L$seal_avx2_main_loop_rounds_entry
6752.p2align	5
6753L$seal_avx2_main_loop:
6754	vmovdqa	L$chacha20_consts(%rip),%ymm0
6755	vmovdqa	0+64(%rbp),%ymm4
6756	vmovdqa	0+96(%rbp),%ymm8
6757	vmovdqa	%ymm0,%ymm1
6758	vmovdqa	%ymm4,%ymm5
6759	vmovdqa	%ymm8,%ymm9
6760	vmovdqa	%ymm0,%ymm2
6761	vmovdqa	%ymm4,%ymm6
6762	vmovdqa	%ymm8,%ymm10
6763	vmovdqa	%ymm0,%ymm3
6764	vmovdqa	%ymm4,%ymm7
6765	vmovdqa	%ymm8,%ymm11
6766	vmovdqa	L$avx2_inc(%rip),%ymm12
6767	vpaddd	0+160(%rbp),%ymm12,%ymm15
6768	vpaddd	%ymm15,%ymm12,%ymm14
6769	vpaddd	%ymm14,%ymm12,%ymm13
6770	vpaddd	%ymm13,%ymm12,%ymm12
6771	vmovdqa	%ymm15,0+256(%rbp)
6772	vmovdqa	%ymm14,0+224(%rbp)
6773	vmovdqa	%ymm13,0+192(%rbp)
6774	vmovdqa	%ymm12,0+160(%rbp)
6775
6776	movq	$10,%rcx
6777.p2align	5
6778L$seal_avx2_main_loop_rounds:
6779	addq	0+0(%rdi),%r10
6780	adcq	8+0(%rdi),%r11
6781	adcq	$1,%r12
6782	vmovdqa	%ymm8,0+128(%rbp)
6783	vmovdqa	L$rol16(%rip),%ymm8
6784	vpaddd	%ymm7,%ymm3,%ymm3
6785	vpaddd	%ymm6,%ymm2,%ymm2
6786	vpaddd	%ymm5,%ymm1,%ymm1
6787	vpaddd	%ymm4,%ymm0,%ymm0
6788	vpxor	%ymm3,%ymm15,%ymm15
6789	vpxor	%ymm2,%ymm14,%ymm14
6790	vpxor	%ymm1,%ymm13,%ymm13
6791	vpxor	%ymm0,%ymm12,%ymm12
6792	movq	0+0+0(%rbp),%rdx
6793	movq	%rdx,%r15
6794	mulxq	%r10,%r13,%r14
6795	mulxq	%r11,%rax,%rdx
6796	imulq	%r12,%r15
6797	addq	%rax,%r14
6798	adcq	%rdx,%r15
6799	vpshufb	%ymm8,%ymm15,%ymm15
6800	vpshufb	%ymm8,%ymm14,%ymm14
6801	vpshufb	%ymm8,%ymm13,%ymm13
6802	vpshufb	%ymm8,%ymm12,%ymm12
6803	vpaddd	%ymm15,%ymm11,%ymm11
6804	vpaddd	%ymm14,%ymm10,%ymm10
6805	vpaddd	%ymm13,%ymm9,%ymm9
6806	vpaddd	0+128(%rbp),%ymm12,%ymm8
6807	vpxor	%ymm11,%ymm7,%ymm7
6808	movq	8+0+0(%rbp),%rdx
6809	mulxq	%r10,%r10,%rax
6810	addq	%r10,%r14
6811	mulxq	%r11,%r11,%r9
6812	adcq	%r11,%r15
6813	adcq	$0,%r9
6814	imulq	%r12,%rdx
6815	vpxor	%ymm10,%ymm6,%ymm6
6816	vpxor	%ymm9,%ymm5,%ymm5
6817	vpxor	%ymm8,%ymm4,%ymm4
6818	vmovdqa	%ymm8,0+128(%rbp)
6819	vpsrld	$20,%ymm7,%ymm8
6820	vpslld	$32-20,%ymm7,%ymm7
6821	vpxor	%ymm8,%ymm7,%ymm7
6822	vpsrld	$20,%ymm6,%ymm8
6823	vpslld	$32-20,%ymm6,%ymm6
6824	vpxor	%ymm8,%ymm6,%ymm6
6825	vpsrld	$20,%ymm5,%ymm8
6826	vpslld	$32-20,%ymm5,%ymm5
6827	addq	%rax,%r15
6828	adcq	%rdx,%r9
6829	vpxor	%ymm8,%ymm5,%ymm5
6830	vpsrld	$20,%ymm4,%ymm8
6831	vpslld	$32-20,%ymm4,%ymm4
6832	vpxor	%ymm8,%ymm4,%ymm4
6833	vmovdqa	L$rol8(%rip),%ymm8
6834	vpaddd	%ymm7,%ymm3,%ymm3
6835	vpaddd	%ymm6,%ymm2,%ymm2
6836	vpaddd	%ymm5,%ymm1,%ymm1
6837	vpaddd	%ymm4,%ymm0,%ymm0
6838	vpxor	%ymm3,%ymm15,%ymm15
6839	movq	%r13,%r10
6840	movq	%r14,%r11
6841	movq	%r15,%r12
6842	andq	$3,%r12
6843	movq	%r15,%r13
6844	andq	$-4,%r13
6845	movq	%r9,%r14
6846	shrdq	$2,%r9,%r15
6847	shrq	$2,%r9
6848	addq	%r13,%r15
6849	adcq	%r14,%r9
6850	addq	%r15,%r10
6851	adcq	%r9,%r11
6852	adcq	$0,%r12
6853
6854L$seal_avx2_main_loop_rounds_entry:
6855	vpxor	%ymm2,%ymm14,%ymm14
6856	vpxor	%ymm1,%ymm13,%ymm13
6857	vpxor	%ymm0,%ymm12,%ymm12
6858	vpshufb	%ymm8,%ymm15,%ymm15
6859	vpshufb	%ymm8,%ymm14,%ymm14
6860	vpshufb	%ymm8,%ymm13,%ymm13
6861	vpshufb	%ymm8,%ymm12,%ymm12
6862	vpaddd	%ymm15,%ymm11,%ymm11
6863	vpaddd	%ymm14,%ymm10,%ymm10
6864	addq	0+16(%rdi),%r10
6865	adcq	8+16(%rdi),%r11
6866	adcq	$1,%r12
6867	vpaddd	%ymm13,%ymm9,%ymm9
6868	vpaddd	0+128(%rbp),%ymm12,%ymm8
6869	vpxor	%ymm11,%ymm7,%ymm7
6870	vpxor	%ymm10,%ymm6,%ymm6
6871	vpxor	%ymm9,%ymm5,%ymm5
6872	vpxor	%ymm8,%ymm4,%ymm4
6873	vmovdqa	%ymm8,0+128(%rbp)
6874	vpsrld	$25,%ymm7,%ymm8
6875	movq	0+0+0(%rbp),%rdx
6876	movq	%rdx,%r15
6877	mulxq	%r10,%r13,%r14
6878	mulxq	%r11,%rax,%rdx
6879	imulq	%r12,%r15
6880	addq	%rax,%r14
6881	adcq	%rdx,%r15
6882	vpslld	$32-25,%ymm7,%ymm7
6883	vpxor	%ymm8,%ymm7,%ymm7
6884	vpsrld	$25,%ymm6,%ymm8
6885	vpslld	$32-25,%ymm6,%ymm6
6886	vpxor	%ymm8,%ymm6,%ymm6
6887	vpsrld	$25,%ymm5,%ymm8
6888	vpslld	$32-25,%ymm5,%ymm5
6889	vpxor	%ymm8,%ymm5,%ymm5
6890	vpsrld	$25,%ymm4,%ymm8
6891	vpslld	$32-25,%ymm4,%ymm4
6892	vpxor	%ymm8,%ymm4,%ymm4
6893	vmovdqa	0+128(%rbp),%ymm8
6894	vpalignr	$4,%ymm7,%ymm7,%ymm7
6895	vpalignr	$8,%ymm11,%ymm11,%ymm11
6896	vpalignr	$12,%ymm15,%ymm15,%ymm15
6897	vpalignr	$4,%ymm6,%ymm6,%ymm6
6898	vpalignr	$8,%ymm10,%ymm10,%ymm10
6899	vpalignr	$12,%ymm14,%ymm14,%ymm14
6900	movq	8+0+0(%rbp),%rdx
6901	mulxq	%r10,%r10,%rax
6902	addq	%r10,%r14
6903	mulxq	%r11,%r11,%r9
6904	adcq	%r11,%r15
6905	adcq	$0,%r9
6906	imulq	%r12,%rdx
6907	vpalignr	$4,%ymm5,%ymm5,%ymm5
6908	vpalignr	$8,%ymm9,%ymm9,%ymm9
6909	vpalignr	$12,%ymm13,%ymm13,%ymm13
6910	vpalignr	$4,%ymm4,%ymm4,%ymm4
6911	vpalignr	$8,%ymm8,%ymm8,%ymm8
6912	vpalignr	$12,%ymm12,%ymm12,%ymm12
6913	vmovdqa	%ymm8,0+128(%rbp)
6914	vmovdqa	L$rol16(%rip),%ymm8
6915	vpaddd	%ymm7,%ymm3,%ymm3
6916	vpaddd	%ymm6,%ymm2,%ymm2
6917	vpaddd	%ymm5,%ymm1,%ymm1
6918	vpaddd	%ymm4,%ymm0,%ymm0
6919	vpxor	%ymm3,%ymm15,%ymm15
6920	vpxor	%ymm2,%ymm14,%ymm14
6921	vpxor	%ymm1,%ymm13,%ymm13
6922	vpxor	%ymm0,%ymm12,%ymm12
6923	vpshufb	%ymm8,%ymm15,%ymm15
6924	vpshufb	%ymm8,%ymm14,%ymm14
6925	addq	%rax,%r15
6926	adcq	%rdx,%r9
6927	vpshufb	%ymm8,%ymm13,%ymm13
6928	vpshufb	%ymm8,%ymm12,%ymm12
6929	vpaddd	%ymm15,%ymm11,%ymm11
6930	vpaddd	%ymm14,%ymm10,%ymm10
6931	vpaddd	%ymm13,%ymm9,%ymm9
6932	vpaddd	0+128(%rbp),%ymm12,%ymm8
6933	vpxor	%ymm11,%ymm7,%ymm7
6934	vpxor	%ymm10,%ymm6,%ymm6
6935	vpxor	%ymm9,%ymm5,%ymm5
6936	movq	%r13,%r10
6937	movq	%r14,%r11
6938	movq	%r15,%r12
6939	andq	$3,%r12
6940	movq	%r15,%r13
6941	andq	$-4,%r13
6942	movq	%r9,%r14
6943	shrdq	$2,%r9,%r15
6944	shrq	$2,%r9
6945	addq	%r13,%r15
6946	adcq	%r14,%r9
6947	addq	%r15,%r10
6948	adcq	%r9,%r11
6949	adcq	$0,%r12
6950	vpxor	%ymm8,%ymm4,%ymm4
6951	vmovdqa	%ymm8,0+128(%rbp)
6952	vpsrld	$20,%ymm7,%ymm8
6953	vpslld	$32-20,%ymm7,%ymm7
6954	vpxor	%ymm8,%ymm7,%ymm7
6955	vpsrld	$20,%ymm6,%ymm8
6956	vpslld	$32-20,%ymm6,%ymm6
6957	vpxor	%ymm8,%ymm6,%ymm6
6958	addq	0+32(%rdi),%r10
6959	adcq	8+32(%rdi),%r11
6960	adcq	$1,%r12
6961
6962	leaq	48(%rdi),%rdi
6963	vpsrld	$20,%ymm5,%ymm8
6964	vpslld	$32-20,%ymm5,%ymm5
6965	vpxor	%ymm8,%ymm5,%ymm5
6966	vpsrld	$20,%ymm4,%ymm8
6967	vpslld	$32-20,%ymm4,%ymm4
6968	vpxor	%ymm8,%ymm4,%ymm4
6969	vmovdqa	L$rol8(%rip),%ymm8
6970	vpaddd	%ymm7,%ymm3,%ymm3
6971	vpaddd	%ymm6,%ymm2,%ymm2
6972	vpaddd	%ymm5,%ymm1,%ymm1
6973	vpaddd	%ymm4,%ymm0,%ymm0
6974	vpxor	%ymm3,%ymm15,%ymm15
6975	vpxor	%ymm2,%ymm14,%ymm14
6976	vpxor	%ymm1,%ymm13,%ymm13
6977	vpxor	%ymm0,%ymm12,%ymm12
6978	vpshufb	%ymm8,%ymm15,%ymm15
6979	vpshufb	%ymm8,%ymm14,%ymm14
6980	vpshufb	%ymm8,%ymm13,%ymm13
6981	movq	0+0+0(%rbp),%rdx
6982	movq	%rdx,%r15
6983	mulxq	%r10,%r13,%r14
6984	mulxq	%r11,%rax,%rdx
6985	imulq	%r12,%r15
6986	addq	%rax,%r14
6987	adcq	%rdx,%r15
6988	vpshufb	%ymm8,%ymm12,%ymm12
6989	vpaddd	%ymm15,%ymm11,%ymm11
6990	vpaddd	%ymm14,%ymm10,%ymm10
6991	vpaddd	%ymm13,%ymm9,%ymm9
6992	vpaddd	0+128(%rbp),%ymm12,%ymm8
6993	vpxor	%ymm11,%ymm7,%ymm7
6994	vpxor	%ymm10,%ymm6,%ymm6
6995	vpxor	%ymm9,%ymm5,%ymm5
6996	movq	8+0+0(%rbp),%rdx
6997	mulxq	%r10,%r10,%rax
6998	addq	%r10,%r14
6999	mulxq	%r11,%r11,%r9
7000	adcq	%r11,%r15
7001	adcq	$0,%r9
7002	imulq	%r12,%rdx
7003	vpxor	%ymm8,%ymm4,%ymm4
7004	vmovdqa	%ymm8,0+128(%rbp)
7005	vpsrld	$25,%ymm7,%ymm8
7006	vpslld	$32-25,%ymm7,%ymm7
7007	vpxor	%ymm8,%ymm7,%ymm7
7008	vpsrld	$25,%ymm6,%ymm8
7009	vpslld	$32-25,%ymm6,%ymm6
7010	vpxor	%ymm8,%ymm6,%ymm6
7011	addq	%rax,%r15
7012	adcq	%rdx,%r9
7013	vpsrld	$25,%ymm5,%ymm8
7014	vpslld	$32-25,%ymm5,%ymm5
7015	vpxor	%ymm8,%ymm5,%ymm5
7016	vpsrld	$25,%ymm4,%ymm8
7017	vpslld	$32-25,%ymm4,%ymm4
7018	vpxor	%ymm8,%ymm4,%ymm4
7019	vmovdqa	0+128(%rbp),%ymm8
7020	vpalignr	$12,%ymm7,%ymm7,%ymm7
7021	vpalignr	$8,%ymm11,%ymm11,%ymm11
7022	vpalignr	$4,%ymm15,%ymm15,%ymm15
7023	vpalignr	$12,%ymm6,%ymm6,%ymm6
7024	vpalignr	$8,%ymm10,%ymm10,%ymm10
7025	vpalignr	$4,%ymm14,%ymm14,%ymm14
7026	vpalignr	$12,%ymm5,%ymm5,%ymm5
7027	vpalignr	$8,%ymm9,%ymm9,%ymm9
7028	vpalignr	$4,%ymm13,%ymm13,%ymm13
7029	vpalignr	$12,%ymm4,%ymm4,%ymm4
7030	vpalignr	$8,%ymm8,%ymm8,%ymm8
7031	movq	%r13,%r10
7032	movq	%r14,%r11
7033	movq	%r15,%r12
7034	andq	$3,%r12
7035	movq	%r15,%r13
7036	andq	$-4,%r13
7037	movq	%r9,%r14
7038	shrdq	$2,%r9,%r15
7039	shrq	$2,%r9
7040	addq	%r13,%r15
7041	adcq	%r14,%r9
7042	addq	%r15,%r10
7043	adcq	%r9,%r11
7044	adcq	$0,%r12
7045	vpalignr	$4,%ymm12,%ymm12,%ymm12
7046
7047	decq	%rcx
7048	jne	L$seal_avx2_main_loop_rounds
7049	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
7050	vpaddd	0+64(%rbp),%ymm7,%ymm7
7051	vpaddd	0+96(%rbp),%ymm11,%ymm11
7052	vpaddd	0+256(%rbp),%ymm15,%ymm15
7053	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
7054	vpaddd	0+64(%rbp),%ymm6,%ymm6
7055	vpaddd	0+96(%rbp),%ymm10,%ymm10
7056	vpaddd	0+224(%rbp),%ymm14,%ymm14
7057	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
7058	vpaddd	0+64(%rbp),%ymm5,%ymm5
7059	vpaddd	0+96(%rbp),%ymm9,%ymm9
7060	vpaddd	0+192(%rbp),%ymm13,%ymm13
7061	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
7062	vpaddd	0+64(%rbp),%ymm4,%ymm4
7063	vpaddd	0+96(%rbp),%ymm8,%ymm8
7064	vpaddd	0+160(%rbp),%ymm12,%ymm12
7065
7066	vmovdqa	%ymm0,0+128(%rbp)
7067	addq	0+0(%rdi),%r10
7068	adcq	8+0(%rdi),%r11
7069	adcq	$1,%r12
7070	movq	0+0+0(%rbp),%rdx
7071	movq	%rdx,%r15
7072	mulxq	%r10,%r13,%r14
7073	mulxq	%r11,%rax,%rdx
7074	imulq	%r12,%r15
7075	addq	%rax,%r14
7076	adcq	%rdx,%r15
7077	movq	8+0+0(%rbp),%rdx
7078	mulxq	%r10,%r10,%rax
7079	addq	%r10,%r14
7080	mulxq	%r11,%r11,%r9
7081	adcq	%r11,%r15
7082	adcq	$0,%r9
7083	imulq	%r12,%rdx
7084	addq	%rax,%r15
7085	adcq	%rdx,%r9
7086	movq	%r13,%r10
7087	movq	%r14,%r11
7088	movq	%r15,%r12
7089	andq	$3,%r12
7090	movq	%r15,%r13
7091	andq	$-4,%r13
7092	movq	%r9,%r14
7093	shrdq	$2,%r9,%r15
7094	shrq	$2,%r9
7095	addq	%r13,%r15
7096	adcq	%r14,%r9
7097	addq	%r15,%r10
7098	adcq	%r9,%r11
7099	adcq	$0,%r12
7100	addq	0+16(%rdi),%r10
7101	adcq	8+16(%rdi),%r11
7102	adcq	$1,%r12
7103	movq	0+0+0(%rbp),%rdx
7104	movq	%rdx,%r15
7105	mulxq	%r10,%r13,%r14
7106	mulxq	%r11,%rax,%rdx
7107	imulq	%r12,%r15
7108	addq	%rax,%r14
7109	adcq	%rdx,%r15
7110	movq	8+0+0(%rbp),%rdx
7111	mulxq	%r10,%r10,%rax
7112	addq	%r10,%r14
7113	mulxq	%r11,%r11,%r9
7114	adcq	%r11,%r15
7115	adcq	$0,%r9
7116	imulq	%r12,%rdx
7117	addq	%rax,%r15
7118	adcq	%rdx,%r9
7119	movq	%r13,%r10
7120	movq	%r14,%r11
7121	movq	%r15,%r12
7122	andq	$3,%r12
7123	movq	%r15,%r13
7124	andq	$-4,%r13
7125	movq	%r9,%r14
7126	shrdq	$2,%r9,%r15
7127	shrq	$2,%r9
7128	addq	%r13,%r15
7129	adcq	%r14,%r9
7130	addq	%r15,%r10
7131	adcq	%r9,%r11
7132	adcq	$0,%r12
7133
7134	leaq	32(%rdi),%rdi
7135	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
7136	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
7137	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
7138	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
7139	vpxor	0+0(%rsi),%ymm0,%ymm0
7140	vpxor	32+0(%rsi),%ymm3,%ymm3
7141	vpxor	64+0(%rsi),%ymm7,%ymm7
7142	vpxor	96+0(%rsi),%ymm11,%ymm11
7143	vmovdqu	%ymm0,0+0(%rdi)
7144	vmovdqu	%ymm3,32+0(%rdi)
7145	vmovdqu	%ymm7,64+0(%rdi)
7146	vmovdqu	%ymm11,96+0(%rdi)
7147
7148	vmovdqa	0+128(%rbp),%ymm0
7149	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
7150	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
7151	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
7152	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
7153	vpxor	0+128(%rsi),%ymm3,%ymm3
7154	vpxor	32+128(%rsi),%ymm2,%ymm2
7155	vpxor	64+128(%rsi),%ymm6,%ymm6
7156	vpxor	96+128(%rsi),%ymm10,%ymm10
7157	vmovdqu	%ymm3,0+128(%rdi)
7158	vmovdqu	%ymm2,32+128(%rdi)
7159	vmovdqu	%ymm6,64+128(%rdi)
7160	vmovdqu	%ymm10,96+128(%rdi)
7161	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7162	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7163	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7164	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7165	vpxor	0+256(%rsi),%ymm3,%ymm3
7166	vpxor	32+256(%rsi),%ymm1,%ymm1
7167	vpxor	64+256(%rsi),%ymm5,%ymm5
7168	vpxor	96+256(%rsi),%ymm9,%ymm9
7169	vmovdqu	%ymm3,0+256(%rdi)
7170	vmovdqu	%ymm1,32+256(%rdi)
7171	vmovdqu	%ymm5,64+256(%rdi)
7172	vmovdqu	%ymm9,96+256(%rdi)
7173	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
7174	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
7175	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
7176	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
7177	vpxor	0+384(%rsi),%ymm3,%ymm3
7178	vpxor	32+384(%rsi),%ymm0,%ymm0
7179	vpxor	64+384(%rsi),%ymm4,%ymm4
7180	vpxor	96+384(%rsi),%ymm8,%ymm8
7181	vmovdqu	%ymm3,0+384(%rdi)
7182	vmovdqu	%ymm0,32+384(%rdi)
7183	vmovdqu	%ymm4,64+384(%rdi)
7184	vmovdqu	%ymm8,96+384(%rdi)
7185
7186	leaq	512(%rsi),%rsi
7187	subq	$512,%rbx
7188	cmpq	$512,%rbx
7189	jg	L$seal_avx2_main_loop
7190
7191	addq	0+0(%rdi),%r10
7192	adcq	8+0(%rdi),%r11
7193	adcq	$1,%r12
7194	movq	0+0+0(%rbp),%rdx
7195	movq	%rdx,%r15
7196	mulxq	%r10,%r13,%r14
7197	mulxq	%r11,%rax,%rdx
7198	imulq	%r12,%r15
7199	addq	%rax,%r14
7200	adcq	%rdx,%r15
7201	movq	8+0+0(%rbp),%rdx
7202	mulxq	%r10,%r10,%rax
7203	addq	%r10,%r14
7204	mulxq	%r11,%r11,%r9
7205	adcq	%r11,%r15
7206	adcq	$0,%r9
7207	imulq	%r12,%rdx
7208	addq	%rax,%r15
7209	adcq	%rdx,%r9
7210	movq	%r13,%r10
7211	movq	%r14,%r11
7212	movq	%r15,%r12
7213	andq	$3,%r12
7214	movq	%r15,%r13
7215	andq	$-4,%r13
7216	movq	%r9,%r14
7217	shrdq	$2,%r9,%r15
7218	shrq	$2,%r9
7219	addq	%r13,%r15
7220	adcq	%r14,%r9
7221	addq	%r15,%r10
7222	adcq	%r9,%r11
7223	adcq	$0,%r12
7224	addq	0+16(%rdi),%r10
7225	adcq	8+16(%rdi),%r11
7226	adcq	$1,%r12
7227	movq	0+0+0(%rbp),%rdx
7228	movq	%rdx,%r15
7229	mulxq	%r10,%r13,%r14
7230	mulxq	%r11,%rax,%rdx
7231	imulq	%r12,%r15
7232	addq	%rax,%r14
7233	adcq	%rdx,%r15
7234	movq	8+0+0(%rbp),%rdx
7235	mulxq	%r10,%r10,%rax
7236	addq	%r10,%r14
7237	mulxq	%r11,%r11,%r9
7238	adcq	%r11,%r15
7239	adcq	$0,%r9
7240	imulq	%r12,%rdx
7241	addq	%rax,%r15
7242	adcq	%rdx,%r9
7243	movq	%r13,%r10
7244	movq	%r14,%r11
7245	movq	%r15,%r12
7246	andq	$3,%r12
7247	movq	%r15,%r13
7248	andq	$-4,%r13
7249	movq	%r9,%r14
7250	shrdq	$2,%r9,%r15
7251	shrq	$2,%r9
7252	addq	%r13,%r15
7253	adcq	%r14,%r9
7254	addq	%r15,%r10
7255	adcq	%r9,%r11
7256	adcq	$0,%r12
7257
7258	leaq	32(%rdi),%rdi
7259	movq	$10,%rcx
7260	xorq	%r8,%r8
7261
7262	cmpq	$384,%rbx
7263	ja	L$seal_avx2_tail_512
7264	cmpq	$256,%rbx
7265	ja	L$seal_avx2_tail_384
7266	cmpq	$128,%rbx
7267	ja	L$seal_avx2_tail_256
7268
7269L$seal_avx2_tail_128:
7270	vmovdqa	L$chacha20_consts(%rip),%ymm0
7271	vmovdqa	0+64(%rbp),%ymm4
7272	vmovdqa	0+96(%rbp),%ymm8
7273	vmovdqa	L$avx2_inc(%rip),%ymm12
7274	vpaddd	0+160(%rbp),%ymm12,%ymm12
7275	vmovdqa	%ymm12,0+160(%rbp)
7276
7277L$seal_avx2_tail_128_rounds_and_3xhash:
7278	addq	0+0(%rdi),%r10
7279	adcq	8+0(%rdi),%r11
7280	adcq	$1,%r12
7281	movq	0+0+0(%rbp),%rdx
7282	movq	%rdx,%r15
7283	mulxq	%r10,%r13,%r14
7284	mulxq	%r11,%rax,%rdx
7285	imulq	%r12,%r15
7286	addq	%rax,%r14
7287	adcq	%rdx,%r15
7288	movq	8+0+0(%rbp),%rdx
7289	mulxq	%r10,%r10,%rax
7290	addq	%r10,%r14
7291	mulxq	%r11,%r11,%r9
7292	adcq	%r11,%r15
7293	adcq	$0,%r9
7294	imulq	%r12,%rdx
7295	addq	%rax,%r15
7296	adcq	%rdx,%r9
7297	movq	%r13,%r10
7298	movq	%r14,%r11
7299	movq	%r15,%r12
7300	andq	$3,%r12
7301	movq	%r15,%r13
7302	andq	$-4,%r13
7303	movq	%r9,%r14
7304	shrdq	$2,%r9,%r15
7305	shrq	$2,%r9
7306	addq	%r13,%r15
7307	adcq	%r14,%r9
7308	addq	%r15,%r10
7309	adcq	%r9,%r11
7310	adcq	$0,%r12
7311
7312	leaq	16(%rdi),%rdi
7313L$seal_avx2_tail_128_rounds_and_2xhash:
7314	vpaddd	%ymm4,%ymm0,%ymm0
7315	vpxor	%ymm0,%ymm12,%ymm12
7316	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7317	vpaddd	%ymm12,%ymm8,%ymm8
7318	vpxor	%ymm8,%ymm4,%ymm4
7319	vpsrld	$20,%ymm4,%ymm3
7320	vpslld	$12,%ymm4,%ymm4
7321	vpxor	%ymm3,%ymm4,%ymm4
7322	vpaddd	%ymm4,%ymm0,%ymm0
7323	vpxor	%ymm0,%ymm12,%ymm12
7324	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7325	vpaddd	%ymm12,%ymm8,%ymm8
7326	vpxor	%ymm8,%ymm4,%ymm4
7327	vpslld	$7,%ymm4,%ymm3
7328	vpsrld	$25,%ymm4,%ymm4
7329	vpxor	%ymm3,%ymm4,%ymm4
7330	vpalignr	$12,%ymm12,%ymm12,%ymm12
7331	vpalignr	$8,%ymm8,%ymm8,%ymm8
7332	vpalignr	$4,%ymm4,%ymm4,%ymm4
7333	addq	0+0(%rdi),%r10
7334	adcq	8+0(%rdi),%r11
7335	adcq	$1,%r12
7336	movq	0+0+0(%rbp),%rdx
7337	movq	%rdx,%r15
7338	mulxq	%r10,%r13,%r14
7339	mulxq	%r11,%rax,%rdx
7340	imulq	%r12,%r15
7341	addq	%rax,%r14
7342	adcq	%rdx,%r15
7343	movq	8+0+0(%rbp),%rdx
7344	mulxq	%r10,%r10,%rax
7345	addq	%r10,%r14
7346	mulxq	%r11,%r11,%r9
7347	adcq	%r11,%r15
7348	adcq	$0,%r9
7349	imulq	%r12,%rdx
7350	addq	%rax,%r15
7351	adcq	%rdx,%r9
7352	movq	%r13,%r10
7353	movq	%r14,%r11
7354	movq	%r15,%r12
7355	andq	$3,%r12
7356	movq	%r15,%r13
7357	andq	$-4,%r13
7358	movq	%r9,%r14
7359	shrdq	$2,%r9,%r15
7360	shrq	$2,%r9
7361	addq	%r13,%r15
7362	adcq	%r14,%r9
7363	addq	%r15,%r10
7364	adcq	%r9,%r11
7365	adcq	$0,%r12
7366	vpaddd	%ymm4,%ymm0,%ymm0
7367	vpxor	%ymm0,%ymm12,%ymm12
7368	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7369	vpaddd	%ymm12,%ymm8,%ymm8
7370	vpxor	%ymm8,%ymm4,%ymm4
7371	vpsrld	$20,%ymm4,%ymm3
7372	vpslld	$12,%ymm4,%ymm4
7373	vpxor	%ymm3,%ymm4,%ymm4
7374	vpaddd	%ymm4,%ymm0,%ymm0
7375	vpxor	%ymm0,%ymm12,%ymm12
7376	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7377	vpaddd	%ymm12,%ymm8,%ymm8
7378	vpxor	%ymm8,%ymm4,%ymm4
7379	vpslld	$7,%ymm4,%ymm3
7380	vpsrld	$25,%ymm4,%ymm4
7381	vpxor	%ymm3,%ymm4,%ymm4
7382	vpalignr	$4,%ymm12,%ymm12,%ymm12
7383	vpalignr	$8,%ymm8,%ymm8,%ymm8
7384	vpalignr	$12,%ymm4,%ymm4,%ymm4
7385	addq	0+16(%rdi),%r10
7386	adcq	8+16(%rdi),%r11
7387	adcq	$1,%r12
7388	movq	0+0+0(%rbp),%rdx
7389	movq	%rdx,%r15
7390	mulxq	%r10,%r13,%r14
7391	mulxq	%r11,%rax,%rdx
7392	imulq	%r12,%r15
7393	addq	%rax,%r14
7394	adcq	%rdx,%r15
7395	movq	8+0+0(%rbp),%rdx
7396	mulxq	%r10,%r10,%rax
7397	addq	%r10,%r14
7398	mulxq	%r11,%r11,%r9
7399	adcq	%r11,%r15
7400	adcq	$0,%r9
7401	imulq	%r12,%rdx
7402	addq	%rax,%r15
7403	adcq	%rdx,%r9
7404	movq	%r13,%r10
7405	movq	%r14,%r11
7406	movq	%r15,%r12
7407	andq	$3,%r12
7408	movq	%r15,%r13
7409	andq	$-4,%r13
7410	movq	%r9,%r14
7411	shrdq	$2,%r9,%r15
7412	shrq	$2,%r9
7413	addq	%r13,%r15
7414	adcq	%r14,%r9
7415	addq	%r15,%r10
7416	adcq	%r9,%r11
7417	adcq	$0,%r12
7418
7419	leaq	32(%rdi),%rdi
7420	decq	%rcx
7421	jg	L$seal_avx2_tail_128_rounds_and_3xhash
7422	decq	%r8
7423	jge	L$seal_avx2_tail_128_rounds_and_2xhash
7424	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
7425	vpaddd	0+64(%rbp),%ymm4,%ymm4
7426	vpaddd	0+96(%rbp),%ymm8,%ymm8
7427	vpaddd	0+160(%rbp),%ymm12,%ymm12
7428	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7429	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7430	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7431	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7432	vmovdqa	%ymm3,%ymm8
7433
7434	jmp	L$seal_avx2_short_loop
7435
7436L$seal_avx2_tail_256:
7437	vmovdqa	L$chacha20_consts(%rip),%ymm0
7438	vmovdqa	0+64(%rbp),%ymm4
7439	vmovdqa	0+96(%rbp),%ymm8
7440	vmovdqa	%ymm0,%ymm1
7441	vmovdqa	%ymm4,%ymm5
7442	vmovdqa	%ymm8,%ymm9
7443	vmovdqa	L$avx2_inc(%rip),%ymm12
7444	vpaddd	0+160(%rbp),%ymm12,%ymm13
7445	vpaddd	%ymm13,%ymm12,%ymm12
7446	vmovdqa	%ymm12,0+160(%rbp)
7447	vmovdqa	%ymm13,0+192(%rbp)
7448
7449L$seal_avx2_tail_256_rounds_and_3xhash:
7450	addq	0+0(%rdi),%r10
7451	adcq	8+0(%rdi),%r11
7452	adcq	$1,%r12
7453	movq	0+0+0(%rbp),%rax
7454	movq	%rax,%r15
7455	mulq	%r10
7456	movq	%rax,%r13
7457	movq	%rdx,%r14
7458	movq	0+0+0(%rbp),%rax
7459	mulq	%r11
7460	imulq	%r12,%r15
7461	addq	%rax,%r14
7462	adcq	%rdx,%r15
7463	movq	8+0+0(%rbp),%rax
7464	movq	%rax,%r9
7465	mulq	%r10
7466	addq	%rax,%r14
7467	adcq	$0,%rdx
7468	movq	%rdx,%r10
7469	movq	8+0+0(%rbp),%rax
7470	mulq	%r11
7471	addq	%rax,%r15
7472	adcq	$0,%rdx
7473	imulq	%r12,%r9
7474	addq	%r10,%r15
7475	adcq	%rdx,%r9
7476	movq	%r13,%r10
7477	movq	%r14,%r11
7478	movq	%r15,%r12
7479	andq	$3,%r12
7480	movq	%r15,%r13
7481	andq	$-4,%r13
7482	movq	%r9,%r14
7483	shrdq	$2,%r9,%r15
7484	shrq	$2,%r9
7485	addq	%r13,%r15
7486	adcq	%r14,%r9
7487	addq	%r15,%r10
7488	adcq	%r9,%r11
7489	adcq	$0,%r12
7490
7491	leaq	16(%rdi),%rdi
7492L$seal_avx2_tail_256_rounds_and_2xhash:
7493	vpaddd	%ymm4,%ymm0,%ymm0
7494	vpxor	%ymm0,%ymm12,%ymm12
7495	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7496	vpaddd	%ymm12,%ymm8,%ymm8
7497	vpxor	%ymm8,%ymm4,%ymm4
7498	vpsrld	$20,%ymm4,%ymm3
7499	vpslld	$12,%ymm4,%ymm4
7500	vpxor	%ymm3,%ymm4,%ymm4
7501	vpaddd	%ymm4,%ymm0,%ymm0
7502	vpxor	%ymm0,%ymm12,%ymm12
7503	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7504	vpaddd	%ymm12,%ymm8,%ymm8
7505	vpxor	%ymm8,%ymm4,%ymm4
7506	vpslld	$7,%ymm4,%ymm3
7507	vpsrld	$25,%ymm4,%ymm4
7508	vpxor	%ymm3,%ymm4,%ymm4
7509	vpalignr	$12,%ymm12,%ymm12,%ymm12
7510	vpalignr	$8,%ymm8,%ymm8,%ymm8
7511	vpalignr	$4,%ymm4,%ymm4,%ymm4
7512	vpaddd	%ymm5,%ymm1,%ymm1
7513	vpxor	%ymm1,%ymm13,%ymm13
7514	vpshufb	L$rol16(%rip),%ymm13,%ymm13
7515	vpaddd	%ymm13,%ymm9,%ymm9
7516	vpxor	%ymm9,%ymm5,%ymm5
7517	vpsrld	$20,%ymm5,%ymm3
7518	vpslld	$12,%ymm5,%ymm5
7519	vpxor	%ymm3,%ymm5,%ymm5
7520	vpaddd	%ymm5,%ymm1,%ymm1
7521	vpxor	%ymm1,%ymm13,%ymm13
7522	vpshufb	L$rol8(%rip),%ymm13,%ymm13
7523	vpaddd	%ymm13,%ymm9,%ymm9
7524	vpxor	%ymm9,%ymm5,%ymm5
7525	vpslld	$7,%ymm5,%ymm3
7526	vpsrld	$25,%ymm5,%ymm5
7527	vpxor	%ymm3,%ymm5,%ymm5
7528	vpalignr	$12,%ymm13,%ymm13,%ymm13
7529	vpalignr	$8,%ymm9,%ymm9,%ymm9
7530	vpalignr	$4,%ymm5,%ymm5,%ymm5
7531	addq	0+0(%rdi),%r10
7532	adcq	8+0(%rdi),%r11
7533	adcq	$1,%r12
7534	movq	0+0+0(%rbp),%rax
7535	movq	%rax,%r15
7536	mulq	%r10
7537	movq	%rax,%r13
7538	movq	%rdx,%r14
7539	movq	0+0+0(%rbp),%rax
7540	mulq	%r11
7541	imulq	%r12,%r15
7542	addq	%rax,%r14
7543	adcq	%rdx,%r15
7544	movq	8+0+0(%rbp),%rax
7545	movq	%rax,%r9
7546	mulq	%r10
7547	addq	%rax,%r14
7548	adcq	$0,%rdx
7549	movq	%rdx,%r10
7550	movq	8+0+0(%rbp),%rax
7551	mulq	%r11
7552	addq	%rax,%r15
7553	adcq	$0,%rdx
7554	imulq	%r12,%r9
7555	addq	%r10,%r15
7556	adcq	%rdx,%r9
7557	movq	%r13,%r10
7558	movq	%r14,%r11
7559	movq	%r15,%r12
7560	andq	$3,%r12
7561	movq	%r15,%r13
7562	andq	$-4,%r13
7563	movq	%r9,%r14
7564	shrdq	$2,%r9,%r15
7565	shrq	$2,%r9
7566	addq	%r13,%r15
7567	adcq	%r14,%r9
7568	addq	%r15,%r10
7569	adcq	%r9,%r11
7570	adcq	$0,%r12
7571	vpaddd	%ymm4,%ymm0,%ymm0
7572	vpxor	%ymm0,%ymm12,%ymm12
7573	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7574	vpaddd	%ymm12,%ymm8,%ymm8
7575	vpxor	%ymm8,%ymm4,%ymm4
7576	vpsrld	$20,%ymm4,%ymm3
7577	vpslld	$12,%ymm4,%ymm4
7578	vpxor	%ymm3,%ymm4,%ymm4
7579	vpaddd	%ymm4,%ymm0,%ymm0
7580	vpxor	%ymm0,%ymm12,%ymm12
7581	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7582	vpaddd	%ymm12,%ymm8,%ymm8
7583	vpxor	%ymm8,%ymm4,%ymm4
7584	vpslld	$7,%ymm4,%ymm3
7585	vpsrld	$25,%ymm4,%ymm4
7586	vpxor	%ymm3,%ymm4,%ymm4
7587	vpalignr	$4,%ymm12,%ymm12,%ymm12
7588	vpalignr	$8,%ymm8,%ymm8,%ymm8
7589	vpalignr	$12,%ymm4,%ymm4,%ymm4
7590	vpaddd	%ymm5,%ymm1,%ymm1
7591	vpxor	%ymm1,%ymm13,%ymm13
7592	vpshufb	L$rol16(%rip),%ymm13,%ymm13
7593	vpaddd	%ymm13,%ymm9,%ymm9
7594	vpxor	%ymm9,%ymm5,%ymm5
7595	vpsrld	$20,%ymm5,%ymm3
7596	vpslld	$12,%ymm5,%ymm5
7597	vpxor	%ymm3,%ymm5,%ymm5
7598	vpaddd	%ymm5,%ymm1,%ymm1
7599	vpxor	%ymm1,%ymm13,%ymm13
7600	vpshufb	L$rol8(%rip),%ymm13,%ymm13
7601	vpaddd	%ymm13,%ymm9,%ymm9
7602	vpxor	%ymm9,%ymm5,%ymm5
7603	vpslld	$7,%ymm5,%ymm3
7604	vpsrld	$25,%ymm5,%ymm5
7605	vpxor	%ymm3,%ymm5,%ymm5
7606	vpalignr	$4,%ymm13,%ymm13,%ymm13
7607	vpalignr	$8,%ymm9,%ymm9,%ymm9
7608	vpalignr	$12,%ymm5,%ymm5,%ymm5
7609	addq	0+16(%rdi),%r10
7610	adcq	8+16(%rdi),%r11
7611	adcq	$1,%r12
7612	movq	0+0+0(%rbp),%rax
7613	movq	%rax,%r15
7614	mulq	%r10
7615	movq	%rax,%r13
7616	movq	%rdx,%r14
7617	movq	0+0+0(%rbp),%rax
7618	mulq	%r11
7619	imulq	%r12,%r15
7620	addq	%rax,%r14
7621	adcq	%rdx,%r15
7622	movq	8+0+0(%rbp),%rax
7623	movq	%rax,%r9
7624	mulq	%r10
7625	addq	%rax,%r14
7626	adcq	$0,%rdx
7627	movq	%rdx,%r10
7628	movq	8+0+0(%rbp),%rax
7629	mulq	%r11
7630	addq	%rax,%r15
7631	adcq	$0,%rdx
7632	imulq	%r12,%r9
7633	addq	%r10,%r15
7634	adcq	%rdx,%r9
7635	movq	%r13,%r10
7636	movq	%r14,%r11
7637	movq	%r15,%r12
7638	andq	$3,%r12
7639	movq	%r15,%r13
7640	andq	$-4,%r13
7641	movq	%r9,%r14
7642	shrdq	$2,%r9,%r15
7643	shrq	$2,%r9
7644	addq	%r13,%r15
7645	adcq	%r14,%r9
7646	addq	%r15,%r10
7647	adcq	%r9,%r11
7648	adcq	$0,%r12
7649
7650	leaq	32(%rdi),%rdi
7651	decq	%rcx
7652	jg	L$seal_avx2_tail_256_rounds_and_3xhash
7653	decq	%r8
7654	jge	L$seal_avx2_tail_256_rounds_and_2xhash
7655	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
7656	vpaddd	0+64(%rbp),%ymm5,%ymm5
7657	vpaddd	0+96(%rbp),%ymm9,%ymm9
7658	vpaddd	0+192(%rbp),%ymm13,%ymm13
7659	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
7660	vpaddd	0+64(%rbp),%ymm4,%ymm4
7661	vpaddd	0+96(%rbp),%ymm8,%ymm8
7662	vpaddd	0+160(%rbp),%ymm12,%ymm12
7663	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7664	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7665	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7666	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7667	vpxor	0+0(%rsi),%ymm3,%ymm3
7668	vpxor	32+0(%rsi),%ymm1,%ymm1
7669	vpxor	64+0(%rsi),%ymm5,%ymm5
7670	vpxor	96+0(%rsi),%ymm9,%ymm9
7671	vmovdqu	%ymm3,0+0(%rdi)
7672	vmovdqu	%ymm1,32+0(%rdi)
7673	vmovdqu	%ymm5,64+0(%rdi)
7674	vmovdqu	%ymm9,96+0(%rdi)
7675	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7676	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7677	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7678	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7679	vmovdqa	%ymm3,%ymm8
7680
7681	movq	$128,%rcx
7682	leaq	128(%rsi),%rsi
7683	subq	$128,%rbx
7684	jmp	L$seal_avx2_short_hash_remainder
7685
7686L$seal_avx2_tail_384:
7687	vmovdqa	L$chacha20_consts(%rip),%ymm0
7688	vmovdqa	0+64(%rbp),%ymm4
7689	vmovdqa	0+96(%rbp),%ymm8
7690	vmovdqa	%ymm0,%ymm1
7691	vmovdqa	%ymm4,%ymm5
7692	vmovdqa	%ymm8,%ymm9
7693	vmovdqa	%ymm0,%ymm2
7694	vmovdqa	%ymm4,%ymm6
7695	vmovdqa	%ymm8,%ymm10
7696	vmovdqa	L$avx2_inc(%rip),%ymm12
7697	vpaddd	0+160(%rbp),%ymm12,%ymm14
7698	vpaddd	%ymm14,%ymm12,%ymm13
7699	vpaddd	%ymm13,%ymm12,%ymm12
7700	vmovdqa	%ymm12,0+160(%rbp)
7701	vmovdqa	%ymm13,0+192(%rbp)
7702	vmovdqa	%ymm14,0+224(%rbp)
7703
7704L$seal_avx2_tail_384_rounds_and_3xhash:
7705	addq	0+0(%rdi),%r10
7706	adcq	8+0(%rdi),%r11
7707	adcq	$1,%r12
7708	movq	0+0+0(%rbp),%rax
7709	movq	%rax,%r15
7710	mulq	%r10
7711	movq	%rax,%r13
7712	movq	%rdx,%r14
7713	movq	0+0+0(%rbp),%rax
7714	mulq	%r11
7715	imulq	%r12,%r15
7716	addq	%rax,%r14
7717	adcq	%rdx,%r15
7718	movq	8+0+0(%rbp),%rax
7719	movq	%rax,%r9
7720	mulq	%r10
7721	addq	%rax,%r14
7722	adcq	$0,%rdx
7723	movq	%rdx,%r10
7724	movq	8+0+0(%rbp),%rax
7725	mulq	%r11
7726	addq	%rax,%r15
7727	adcq	$0,%rdx
7728	imulq	%r12,%r9
7729	addq	%r10,%r15
7730	adcq	%rdx,%r9
7731	movq	%r13,%r10
7732	movq	%r14,%r11
7733	movq	%r15,%r12
7734	andq	$3,%r12
7735	movq	%r15,%r13
7736	andq	$-4,%r13
7737	movq	%r9,%r14
7738	shrdq	$2,%r9,%r15
7739	shrq	$2,%r9
7740	addq	%r13,%r15
7741	adcq	%r14,%r9
7742	addq	%r15,%r10
7743	adcq	%r9,%r11
7744	adcq	$0,%r12
7745
7746	leaq	16(%rdi),%rdi
7747L$seal_avx2_tail_384_rounds_and_2xhash:
7748	vpaddd	%ymm4,%ymm0,%ymm0
7749	vpxor	%ymm0,%ymm12,%ymm12
7750	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7751	vpaddd	%ymm12,%ymm8,%ymm8
7752	vpxor	%ymm8,%ymm4,%ymm4
7753	vpsrld	$20,%ymm4,%ymm3
7754	vpslld	$12,%ymm4,%ymm4
7755	vpxor	%ymm3,%ymm4,%ymm4
7756	vpaddd	%ymm4,%ymm0,%ymm0
7757	vpxor	%ymm0,%ymm12,%ymm12
7758	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7759	vpaddd	%ymm12,%ymm8,%ymm8
7760	vpxor	%ymm8,%ymm4,%ymm4
7761	vpslld	$7,%ymm4,%ymm3
7762	vpsrld	$25,%ymm4,%ymm4
7763	vpxor	%ymm3,%ymm4,%ymm4
7764	vpalignr	$12,%ymm12,%ymm12,%ymm12
7765	vpalignr	$8,%ymm8,%ymm8,%ymm8
7766	vpalignr	$4,%ymm4,%ymm4,%ymm4
7767	vpaddd	%ymm5,%ymm1,%ymm1
7768	vpxor	%ymm1,%ymm13,%ymm13
7769	vpshufb	L$rol16(%rip),%ymm13,%ymm13
7770	vpaddd	%ymm13,%ymm9,%ymm9
7771	vpxor	%ymm9,%ymm5,%ymm5
7772	vpsrld	$20,%ymm5,%ymm3
7773	vpslld	$12,%ymm5,%ymm5
7774	vpxor	%ymm3,%ymm5,%ymm5
7775	vpaddd	%ymm5,%ymm1,%ymm1
7776	vpxor	%ymm1,%ymm13,%ymm13
7777	vpshufb	L$rol8(%rip),%ymm13,%ymm13
7778	vpaddd	%ymm13,%ymm9,%ymm9
7779	vpxor	%ymm9,%ymm5,%ymm5
7780	vpslld	$7,%ymm5,%ymm3
7781	vpsrld	$25,%ymm5,%ymm5
7782	vpxor	%ymm3,%ymm5,%ymm5
7783	vpalignr	$12,%ymm13,%ymm13,%ymm13
7784	vpalignr	$8,%ymm9,%ymm9,%ymm9
7785	vpalignr	$4,%ymm5,%ymm5,%ymm5
7786	addq	0+0(%rdi),%r10
7787	adcq	8+0(%rdi),%r11
7788	adcq	$1,%r12
7789	movq	0+0+0(%rbp),%rax
7790	movq	%rax,%r15
7791	mulq	%r10
7792	movq	%rax,%r13
7793	movq	%rdx,%r14
7794	movq	0+0+0(%rbp),%rax
7795	mulq	%r11
7796	imulq	%r12,%r15
7797	addq	%rax,%r14
7798	adcq	%rdx,%r15
7799	movq	8+0+0(%rbp),%rax
7800	movq	%rax,%r9
7801	mulq	%r10
7802	addq	%rax,%r14
7803	adcq	$0,%rdx
7804	movq	%rdx,%r10
7805	movq	8+0+0(%rbp),%rax
7806	mulq	%r11
7807	addq	%rax,%r15
7808	adcq	$0,%rdx
7809	imulq	%r12,%r9
7810	addq	%r10,%r15
7811	adcq	%rdx,%r9
7812	movq	%r13,%r10
7813	movq	%r14,%r11
7814	movq	%r15,%r12
7815	andq	$3,%r12
7816	movq	%r15,%r13
7817	andq	$-4,%r13
7818	movq	%r9,%r14
7819	shrdq	$2,%r9,%r15
7820	shrq	$2,%r9
7821	addq	%r13,%r15
7822	adcq	%r14,%r9
7823	addq	%r15,%r10
7824	adcq	%r9,%r11
7825	adcq	$0,%r12
7826	vpaddd	%ymm6,%ymm2,%ymm2
7827	vpxor	%ymm2,%ymm14,%ymm14
7828	vpshufb	L$rol16(%rip),%ymm14,%ymm14
7829	vpaddd	%ymm14,%ymm10,%ymm10
7830	vpxor	%ymm10,%ymm6,%ymm6
7831	vpsrld	$20,%ymm6,%ymm3
7832	vpslld	$12,%ymm6,%ymm6
7833	vpxor	%ymm3,%ymm6,%ymm6
7834	vpaddd	%ymm6,%ymm2,%ymm2
7835	vpxor	%ymm2,%ymm14,%ymm14
7836	vpshufb	L$rol8(%rip),%ymm14,%ymm14
7837	vpaddd	%ymm14,%ymm10,%ymm10
7838	vpxor	%ymm10,%ymm6,%ymm6
7839	vpslld	$7,%ymm6,%ymm3
7840	vpsrld	$25,%ymm6,%ymm6
7841	vpxor	%ymm3,%ymm6,%ymm6
7842	vpalignr	$12,%ymm14,%ymm14,%ymm14
7843	vpalignr	$8,%ymm10,%ymm10,%ymm10
7844	vpalignr	$4,%ymm6,%ymm6,%ymm6
7845	vpaddd	%ymm4,%ymm0,%ymm0
7846	vpxor	%ymm0,%ymm12,%ymm12
7847	vpshufb	L$rol16(%rip),%ymm12,%ymm12
7848	vpaddd	%ymm12,%ymm8,%ymm8
7849	vpxor	%ymm8,%ymm4,%ymm4
7850	vpsrld	$20,%ymm4,%ymm3
7851	vpslld	$12,%ymm4,%ymm4
7852	vpxor	%ymm3,%ymm4,%ymm4
7853	vpaddd	%ymm4,%ymm0,%ymm0
7854	vpxor	%ymm0,%ymm12,%ymm12
7855	vpshufb	L$rol8(%rip),%ymm12,%ymm12
7856	vpaddd	%ymm12,%ymm8,%ymm8
7857	vpxor	%ymm8,%ymm4,%ymm4
7858	vpslld	$7,%ymm4,%ymm3
7859	vpsrld	$25,%ymm4,%ymm4
7860	vpxor	%ymm3,%ymm4,%ymm4
7861	vpalignr	$4,%ymm12,%ymm12,%ymm12
7862	vpalignr	$8,%ymm8,%ymm8,%ymm8
7863	vpalignr	$12,%ymm4,%ymm4,%ymm4
7864	addq	0+16(%rdi),%r10
7865	adcq	8+16(%rdi),%r11
7866	adcq	$1,%r12
7867	movq	0+0+0(%rbp),%rax
7868	movq	%rax,%r15
7869	mulq	%r10
7870	movq	%rax,%r13
7871	movq	%rdx,%r14
7872	movq	0+0+0(%rbp),%rax
7873	mulq	%r11
7874	imulq	%r12,%r15
7875	addq	%rax,%r14
7876	adcq	%rdx,%r15
7877	movq	8+0+0(%rbp),%rax
7878	movq	%rax,%r9
7879	mulq	%r10
7880	addq	%rax,%r14
7881	adcq	$0,%rdx
7882	movq	%rdx,%r10
7883	movq	8+0+0(%rbp),%rax
7884	mulq	%r11
7885	addq	%rax,%r15
7886	adcq	$0,%rdx
7887	imulq	%r12,%r9
7888	addq	%r10,%r15
7889	adcq	%rdx,%r9
7890	movq	%r13,%r10
7891	movq	%r14,%r11
7892	movq	%r15,%r12
7893	andq	$3,%r12
7894	movq	%r15,%r13
7895	andq	$-4,%r13
7896	movq	%r9,%r14
7897	shrdq	$2,%r9,%r15
7898	shrq	$2,%r9
7899	addq	%r13,%r15
7900	adcq	%r14,%r9
7901	addq	%r15,%r10
7902	adcq	%r9,%r11
7903	adcq	$0,%r12
7904	vpaddd	%ymm5,%ymm1,%ymm1
7905	vpxor	%ymm1,%ymm13,%ymm13
7906	vpshufb	L$rol16(%rip),%ymm13,%ymm13
7907	vpaddd	%ymm13,%ymm9,%ymm9
7908	vpxor	%ymm9,%ymm5,%ymm5
7909	vpsrld	$20,%ymm5,%ymm3
7910	vpslld	$12,%ymm5,%ymm5
7911	vpxor	%ymm3,%ymm5,%ymm5
7912	vpaddd	%ymm5,%ymm1,%ymm1
7913	vpxor	%ymm1,%ymm13,%ymm13
7914	vpshufb	L$rol8(%rip),%ymm13,%ymm13
7915	vpaddd	%ymm13,%ymm9,%ymm9
7916	vpxor	%ymm9,%ymm5,%ymm5
7917	vpslld	$7,%ymm5,%ymm3
7918	vpsrld	$25,%ymm5,%ymm5
7919	vpxor	%ymm3,%ymm5,%ymm5
7920	vpalignr	$4,%ymm13,%ymm13,%ymm13
7921	vpalignr	$8,%ymm9,%ymm9,%ymm9
7922	vpalignr	$12,%ymm5,%ymm5,%ymm5
7923	vpaddd	%ymm6,%ymm2,%ymm2
7924	vpxor	%ymm2,%ymm14,%ymm14
7925	vpshufb	L$rol16(%rip),%ymm14,%ymm14
7926	vpaddd	%ymm14,%ymm10,%ymm10
7927	vpxor	%ymm10,%ymm6,%ymm6
7928	vpsrld	$20,%ymm6,%ymm3
7929	vpslld	$12,%ymm6,%ymm6
7930	vpxor	%ymm3,%ymm6,%ymm6
7931	vpaddd	%ymm6,%ymm2,%ymm2
7932	vpxor	%ymm2,%ymm14,%ymm14
7933	vpshufb	L$rol8(%rip),%ymm14,%ymm14
7934	vpaddd	%ymm14,%ymm10,%ymm10
7935	vpxor	%ymm10,%ymm6,%ymm6
7936	vpslld	$7,%ymm6,%ymm3
7937	vpsrld	$25,%ymm6,%ymm6
7938	vpxor	%ymm3,%ymm6,%ymm6
7939	vpalignr	$4,%ymm14,%ymm14,%ymm14
7940	vpalignr	$8,%ymm10,%ymm10,%ymm10
7941	vpalignr	$12,%ymm6,%ymm6,%ymm6
7942
7943	leaq	32(%rdi),%rdi
7944	decq	%rcx
7945	jg	L$seal_avx2_tail_384_rounds_and_3xhash
7946	decq	%r8
7947	jge	L$seal_avx2_tail_384_rounds_and_2xhash
7948	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
7949	vpaddd	0+64(%rbp),%ymm6,%ymm6
7950	vpaddd	0+96(%rbp),%ymm10,%ymm10
7951	vpaddd	0+224(%rbp),%ymm14,%ymm14
7952	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
7953	vpaddd	0+64(%rbp),%ymm5,%ymm5
7954	vpaddd	0+96(%rbp),%ymm9,%ymm9
7955	vpaddd	0+192(%rbp),%ymm13,%ymm13
7956	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
7957	vpaddd	0+64(%rbp),%ymm4,%ymm4
7958	vpaddd	0+96(%rbp),%ymm8,%ymm8
7959	vpaddd	0+160(%rbp),%ymm12,%ymm12
7960	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
7961	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
7962	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
7963	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
7964	vpxor	0+0(%rsi),%ymm3,%ymm3
7965	vpxor	32+0(%rsi),%ymm2,%ymm2
7966	vpxor	64+0(%rsi),%ymm6,%ymm6
7967	vpxor	96+0(%rsi),%ymm10,%ymm10
7968	vmovdqu	%ymm3,0+0(%rdi)
7969	vmovdqu	%ymm2,32+0(%rdi)
7970	vmovdqu	%ymm6,64+0(%rdi)
7971	vmovdqu	%ymm10,96+0(%rdi)
7972	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7973	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7974	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7975	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7976	vpxor	0+128(%rsi),%ymm3,%ymm3
7977	vpxor	32+128(%rsi),%ymm1,%ymm1
7978	vpxor	64+128(%rsi),%ymm5,%ymm5
7979	vpxor	96+128(%rsi),%ymm9,%ymm9
7980	vmovdqu	%ymm3,0+128(%rdi)
7981	vmovdqu	%ymm1,32+128(%rdi)
7982	vmovdqu	%ymm5,64+128(%rdi)
7983	vmovdqu	%ymm9,96+128(%rdi)
7984	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7985	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7986	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7987	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7988	vmovdqa	%ymm3,%ymm8
7989
7990	movq	$256,%rcx
7991	leaq	256(%rsi),%rsi
7992	subq	$256,%rbx
7993	jmp	L$seal_avx2_short_hash_remainder
7994
7995L$seal_avx2_tail_512:
7996	vmovdqa	L$chacha20_consts(%rip),%ymm0
7997	vmovdqa	0+64(%rbp),%ymm4
7998	vmovdqa	0+96(%rbp),%ymm8
7999	vmovdqa	%ymm0,%ymm1
8000	vmovdqa	%ymm4,%ymm5
8001	vmovdqa	%ymm8,%ymm9
8002	vmovdqa	%ymm0,%ymm2
8003	vmovdqa	%ymm4,%ymm6
8004	vmovdqa	%ymm8,%ymm10
8005	vmovdqa	%ymm0,%ymm3
8006	vmovdqa	%ymm4,%ymm7
8007	vmovdqa	%ymm8,%ymm11
8008	vmovdqa	L$avx2_inc(%rip),%ymm12
8009	vpaddd	0+160(%rbp),%ymm12,%ymm15
8010	vpaddd	%ymm15,%ymm12,%ymm14
8011	vpaddd	%ymm14,%ymm12,%ymm13
8012	vpaddd	%ymm13,%ymm12,%ymm12
8013	vmovdqa	%ymm15,0+256(%rbp)
8014	vmovdqa	%ymm14,0+224(%rbp)
8015	vmovdqa	%ymm13,0+192(%rbp)
8016	vmovdqa	%ymm12,0+160(%rbp)
8017
8018L$seal_avx2_tail_512_rounds_and_3xhash:
8019	addq	0+0(%rdi),%r10
8020	adcq	8+0(%rdi),%r11
8021	adcq	$1,%r12
8022	movq	0+0+0(%rbp),%rdx
8023	movq	%rdx,%r15
8024	mulxq	%r10,%r13,%r14
8025	mulxq	%r11,%rax,%rdx
8026	imulq	%r12,%r15
8027	addq	%rax,%r14
8028	adcq	%rdx,%r15
8029	movq	8+0+0(%rbp),%rdx
8030	mulxq	%r10,%r10,%rax
8031	addq	%r10,%r14
8032	mulxq	%r11,%r11,%r9
8033	adcq	%r11,%r15
8034	adcq	$0,%r9
8035	imulq	%r12,%rdx
8036	addq	%rax,%r15
8037	adcq	%rdx,%r9
8038	movq	%r13,%r10
8039	movq	%r14,%r11
8040	movq	%r15,%r12
8041	andq	$3,%r12
8042	movq	%r15,%r13
8043	andq	$-4,%r13
8044	movq	%r9,%r14
8045	shrdq	$2,%r9,%r15
8046	shrq	$2,%r9
8047	addq	%r13,%r15
8048	adcq	%r14,%r9
8049	addq	%r15,%r10
8050	adcq	%r9,%r11
8051	adcq	$0,%r12
8052
8053	leaq	16(%rdi),%rdi
8054L$seal_avx2_tail_512_rounds_and_2xhash:
8055	vmovdqa	%ymm8,0+128(%rbp)
8056	vmovdqa	L$rol16(%rip),%ymm8
8057	vpaddd	%ymm7,%ymm3,%ymm3
8058	vpaddd	%ymm6,%ymm2,%ymm2
8059	vpaddd	%ymm5,%ymm1,%ymm1
8060	vpaddd	%ymm4,%ymm0,%ymm0
8061	vpxor	%ymm3,%ymm15,%ymm15
8062	vpxor	%ymm2,%ymm14,%ymm14
8063	vpxor	%ymm1,%ymm13,%ymm13
8064	vpxor	%ymm0,%ymm12,%ymm12
8065	vpshufb	%ymm8,%ymm15,%ymm15
8066	vpshufb	%ymm8,%ymm14,%ymm14
8067	vpshufb	%ymm8,%ymm13,%ymm13
8068	vpshufb	%ymm8,%ymm12,%ymm12
8069	vpaddd	%ymm15,%ymm11,%ymm11
8070	vpaddd	%ymm14,%ymm10,%ymm10
8071	vpaddd	%ymm13,%ymm9,%ymm9
8072	vpaddd	0+128(%rbp),%ymm12,%ymm8
8073	vpxor	%ymm11,%ymm7,%ymm7
8074	vpxor	%ymm10,%ymm6,%ymm6
8075	addq	0+0(%rdi),%r10
8076	adcq	8+0(%rdi),%r11
8077	adcq	$1,%r12
8078	vpxor	%ymm9,%ymm5,%ymm5
8079	vpxor	%ymm8,%ymm4,%ymm4
8080	vmovdqa	%ymm8,0+128(%rbp)
8081	vpsrld	$20,%ymm7,%ymm8
8082	vpslld	$32-20,%ymm7,%ymm7
8083	vpxor	%ymm8,%ymm7,%ymm7
8084	vpsrld	$20,%ymm6,%ymm8
8085	vpslld	$32-20,%ymm6,%ymm6
8086	vpxor	%ymm8,%ymm6,%ymm6
8087	vpsrld	$20,%ymm5,%ymm8
8088	vpslld	$32-20,%ymm5,%ymm5
8089	vpxor	%ymm8,%ymm5,%ymm5
8090	vpsrld	$20,%ymm4,%ymm8
8091	vpslld	$32-20,%ymm4,%ymm4
8092	vpxor	%ymm8,%ymm4,%ymm4
8093	vmovdqa	L$rol8(%rip),%ymm8
8094	vpaddd	%ymm7,%ymm3,%ymm3
8095	vpaddd	%ymm6,%ymm2,%ymm2
8096	vpaddd	%ymm5,%ymm1,%ymm1
8097	vpaddd	%ymm4,%ymm0,%ymm0
8098	movq	0+0+0(%rbp),%rdx
8099	movq	%rdx,%r15
8100	mulxq	%r10,%r13,%r14
8101	mulxq	%r11,%rax,%rdx
8102	imulq	%r12,%r15
8103	addq	%rax,%r14
8104	adcq	%rdx,%r15
8105	vpxor	%ymm3,%ymm15,%ymm15
8106	vpxor	%ymm2,%ymm14,%ymm14
8107	vpxor	%ymm1,%ymm13,%ymm13
8108	vpxor	%ymm0,%ymm12,%ymm12
8109	vpshufb	%ymm8,%ymm15,%ymm15
8110	vpshufb	%ymm8,%ymm14,%ymm14
8111	vpshufb	%ymm8,%ymm13,%ymm13
8112	vpshufb	%ymm8,%ymm12,%ymm12
8113	vpaddd	%ymm15,%ymm11,%ymm11
8114	vpaddd	%ymm14,%ymm10,%ymm10
8115	vpaddd	%ymm13,%ymm9,%ymm9
8116	vpaddd	0+128(%rbp),%ymm12,%ymm8
8117	vpxor	%ymm11,%ymm7,%ymm7
8118	vpxor	%ymm10,%ymm6,%ymm6
8119	vpxor	%ymm9,%ymm5,%ymm5
8120	vpxor	%ymm8,%ymm4,%ymm4
8121	vmovdqa	%ymm8,0+128(%rbp)
8122	vpsrld	$25,%ymm7,%ymm8
8123	vpslld	$32-25,%ymm7,%ymm7
8124	vpxor	%ymm8,%ymm7,%ymm7
8125	movq	8+0+0(%rbp),%rdx
8126	mulxq	%r10,%r10,%rax
8127	addq	%r10,%r14
8128	mulxq	%r11,%r11,%r9
8129	adcq	%r11,%r15
8130	adcq	$0,%r9
8131	imulq	%r12,%rdx
8132	vpsrld	$25,%ymm6,%ymm8
8133	vpslld	$32-25,%ymm6,%ymm6
8134	vpxor	%ymm8,%ymm6,%ymm6
8135	vpsrld	$25,%ymm5,%ymm8
8136	vpslld	$32-25,%ymm5,%ymm5
8137	vpxor	%ymm8,%ymm5,%ymm5
8138	vpsrld	$25,%ymm4,%ymm8
8139	vpslld	$32-25,%ymm4,%ymm4
8140	vpxor	%ymm8,%ymm4,%ymm4
8141	vmovdqa	0+128(%rbp),%ymm8
8142	vpalignr	$4,%ymm7,%ymm7,%ymm7
8143	vpalignr	$8,%ymm11,%ymm11,%ymm11
8144	vpalignr	$12,%ymm15,%ymm15,%ymm15
8145	vpalignr	$4,%ymm6,%ymm6,%ymm6
8146	vpalignr	$8,%ymm10,%ymm10,%ymm10
8147	vpalignr	$12,%ymm14,%ymm14,%ymm14
8148	vpalignr	$4,%ymm5,%ymm5,%ymm5
8149	vpalignr	$8,%ymm9,%ymm9,%ymm9
8150	vpalignr	$12,%ymm13,%ymm13,%ymm13
8151	vpalignr	$4,%ymm4,%ymm4,%ymm4
8152	addq	%rax,%r15
8153	adcq	%rdx,%r9
8154	vpalignr	$8,%ymm8,%ymm8,%ymm8
8155	vpalignr	$12,%ymm12,%ymm12,%ymm12
8156	vmovdqa	%ymm8,0+128(%rbp)
8157	vmovdqa	L$rol16(%rip),%ymm8
8158	vpaddd	%ymm7,%ymm3,%ymm3
8159	vpaddd	%ymm6,%ymm2,%ymm2
8160	vpaddd	%ymm5,%ymm1,%ymm1
8161	vpaddd	%ymm4,%ymm0,%ymm0
8162	vpxor	%ymm3,%ymm15,%ymm15
8163	vpxor	%ymm2,%ymm14,%ymm14
8164	vpxor	%ymm1,%ymm13,%ymm13
8165	vpxor	%ymm0,%ymm12,%ymm12
8166	vpshufb	%ymm8,%ymm15,%ymm15
8167	vpshufb	%ymm8,%ymm14,%ymm14
8168	vpshufb	%ymm8,%ymm13,%ymm13
8169	vpshufb	%ymm8,%ymm12,%ymm12
8170	vpaddd	%ymm15,%ymm11,%ymm11
8171	vpaddd	%ymm14,%ymm10,%ymm10
8172	vpaddd	%ymm13,%ymm9,%ymm9
8173	vpaddd	0+128(%rbp),%ymm12,%ymm8
8174	movq	%r13,%r10
8175	movq	%r14,%r11
8176	movq	%r15,%r12
8177	andq	$3,%r12
8178	movq	%r15,%r13
8179	andq	$-4,%r13
8180	movq	%r9,%r14
8181	shrdq	$2,%r9,%r15
8182	shrq	$2,%r9
8183	addq	%r13,%r15
8184	adcq	%r14,%r9
8185	addq	%r15,%r10
8186	adcq	%r9,%r11
8187	adcq	$0,%r12
8188	vpxor	%ymm11,%ymm7,%ymm7
8189	vpxor	%ymm10,%ymm6,%ymm6
8190	vpxor	%ymm9,%ymm5,%ymm5
8191	vpxor	%ymm8,%ymm4,%ymm4
8192	vmovdqa	%ymm8,0+128(%rbp)
8193	vpsrld	$20,%ymm7,%ymm8
8194	vpslld	$32-20,%ymm7,%ymm7
8195	vpxor	%ymm8,%ymm7,%ymm7
8196	vpsrld	$20,%ymm6,%ymm8
8197	vpslld	$32-20,%ymm6,%ymm6
8198	vpxor	%ymm8,%ymm6,%ymm6
8199	vpsrld	$20,%ymm5,%ymm8
8200	vpslld	$32-20,%ymm5,%ymm5
8201	vpxor	%ymm8,%ymm5,%ymm5
8202	vpsrld	$20,%ymm4,%ymm8
8203	vpslld	$32-20,%ymm4,%ymm4
8204	vpxor	%ymm8,%ymm4,%ymm4
8205	vmovdqa	L$rol8(%rip),%ymm8
8206	vpaddd	%ymm7,%ymm3,%ymm3
8207	vpaddd	%ymm6,%ymm2,%ymm2
8208	addq	0+16(%rdi),%r10
8209	adcq	8+16(%rdi),%r11
8210	adcq	$1,%r12
8211	vpaddd	%ymm5,%ymm1,%ymm1
8212	vpaddd	%ymm4,%ymm0,%ymm0
8213	vpxor	%ymm3,%ymm15,%ymm15
8214	vpxor	%ymm2,%ymm14,%ymm14
8215	vpxor	%ymm1,%ymm13,%ymm13
8216	vpxor	%ymm0,%ymm12,%ymm12
8217	vpshufb	%ymm8,%ymm15,%ymm15
8218	vpshufb	%ymm8,%ymm14,%ymm14
8219	vpshufb	%ymm8,%ymm13,%ymm13
8220	vpshufb	%ymm8,%ymm12,%ymm12
8221	vpaddd	%ymm15,%ymm11,%ymm11
8222	vpaddd	%ymm14,%ymm10,%ymm10
8223	vpaddd	%ymm13,%ymm9,%ymm9
8224	vpaddd	0+128(%rbp),%ymm12,%ymm8
8225	vpxor	%ymm11,%ymm7,%ymm7
8226	vpxor	%ymm10,%ymm6,%ymm6
8227	vpxor	%ymm9,%ymm5,%ymm5
8228	vpxor	%ymm8,%ymm4,%ymm4
8229	vmovdqa	%ymm8,0+128(%rbp)
8230	vpsrld	$25,%ymm7,%ymm8
8231	movq	0+0+0(%rbp),%rdx
8232	movq	%rdx,%r15
8233	mulxq	%r10,%r13,%r14
8234	mulxq	%r11,%rax,%rdx
8235	imulq	%r12,%r15
8236	addq	%rax,%r14
8237	adcq	%rdx,%r15
8238	vpslld	$32-25,%ymm7,%ymm7
8239	vpxor	%ymm8,%ymm7,%ymm7
8240	vpsrld	$25,%ymm6,%ymm8
8241	vpslld	$32-25,%ymm6,%ymm6
8242	vpxor	%ymm8,%ymm6,%ymm6
8243	vpsrld	$25,%ymm5,%ymm8
8244	vpslld	$32-25,%ymm5,%ymm5
8245	vpxor	%ymm8,%ymm5,%ymm5
8246	vpsrld	$25,%ymm4,%ymm8
8247	vpslld	$32-25,%ymm4,%ymm4
8248	vpxor	%ymm8,%ymm4,%ymm4
8249	vmovdqa	0+128(%rbp),%ymm8
8250	vpalignr	$12,%ymm7,%ymm7,%ymm7
8251	vpalignr	$8,%ymm11,%ymm11,%ymm11
8252	vpalignr	$4,%ymm15,%ymm15,%ymm15
8253	vpalignr	$12,%ymm6,%ymm6,%ymm6
8254	vpalignr	$8,%ymm10,%ymm10,%ymm10
8255	vpalignr	$4,%ymm14,%ymm14,%ymm14
8256	vpalignr	$12,%ymm5,%ymm5,%ymm5
8257	vpalignr	$8,%ymm9,%ymm9,%ymm9
8258	movq	8+0+0(%rbp),%rdx
8259	mulxq	%r10,%r10,%rax
8260	addq	%r10,%r14
8261	mulxq	%r11,%r11,%r9
8262	adcq	%r11,%r15
8263	adcq	$0,%r9
8264	imulq	%r12,%rdx
8265	vpalignr	$4,%ymm13,%ymm13,%ymm13
8266	vpalignr	$12,%ymm4,%ymm4,%ymm4
8267	vpalignr	$8,%ymm8,%ymm8,%ymm8
8268	vpalignr	$4,%ymm12,%ymm12,%ymm12
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284
8285	addq	%rax,%r15
8286	adcq	%rdx,%r9
8287
8288
8289
8290
8291
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302
8303
8304
8305
8306
8307	movq	%r13,%r10
8308	movq	%r14,%r11
8309	movq	%r15,%r12
8310	andq	$3,%r12
8311	movq	%r15,%r13
8312	andq	$-4,%r13
8313	movq	%r9,%r14
8314	shrdq	$2,%r9,%r15
8315	shrq	$2,%r9
8316	addq	%r13,%r15
8317	adcq	%r14,%r9
8318	addq	%r15,%r10
8319	adcq	%r9,%r11
8320	adcq	$0,%r12
8321
8322	leaq	32(%rdi),%rdi
8323	decq	%rcx
8324	jg	L$seal_avx2_tail_512_rounds_and_3xhash
8325	decq	%r8
8326	jge	L$seal_avx2_tail_512_rounds_and_2xhash
8327	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
8328	vpaddd	0+64(%rbp),%ymm7,%ymm7
8329	vpaddd	0+96(%rbp),%ymm11,%ymm11
8330	vpaddd	0+256(%rbp),%ymm15,%ymm15
8331	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
8332	vpaddd	0+64(%rbp),%ymm6,%ymm6
8333	vpaddd	0+96(%rbp),%ymm10,%ymm10
8334	vpaddd	0+224(%rbp),%ymm14,%ymm14
8335	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
8336	vpaddd	0+64(%rbp),%ymm5,%ymm5
8337	vpaddd	0+96(%rbp),%ymm9,%ymm9
8338	vpaddd	0+192(%rbp),%ymm13,%ymm13
8339	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
8340	vpaddd	0+64(%rbp),%ymm4,%ymm4
8341	vpaddd	0+96(%rbp),%ymm8,%ymm8
8342	vpaddd	0+160(%rbp),%ymm12,%ymm12
8343
8344	vmovdqa	%ymm0,0+128(%rbp)
8345	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
8346	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
8347	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
8348	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
8349	vpxor	0+0(%rsi),%ymm0,%ymm0
8350	vpxor	32+0(%rsi),%ymm3,%ymm3
8351	vpxor	64+0(%rsi),%ymm7,%ymm7
8352	vpxor	96+0(%rsi),%ymm11,%ymm11
8353	vmovdqu	%ymm0,0+0(%rdi)
8354	vmovdqu	%ymm3,32+0(%rdi)
8355	vmovdqu	%ymm7,64+0(%rdi)
8356	vmovdqu	%ymm11,96+0(%rdi)
8357
8358	vmovdqa	0+128(%rbp),%ymm0
8359	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
8360	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
8361	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
8362	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
8363	vpxor	0+128(%rsi),%ymm3,%ymm3
8364	vpxor	32+128(%rsi),%ymm2,%ymm2
8365	vpxor	64+128(%rsi),%ymm6,%ymm6
8366	vpxor	96+128(%rsi),%ymm10,%ymm10
8367	vmovdqu	%ymm3,0+128(%rdi)
8368	vmovdqu	%ymm2,32+128(%rdi)
8369	vmovdqu	%ymm6,64+128(%rdi)
8370	vmovdqu	%ymm10,96+128(%rdi)
8371	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
8372	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
8373	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
8374	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
8375	vpxor	0+256(%rsi),%ymm3,%ymm3
8376	vpxor	32+256(%rsi),%ymm1,%ymm1
8377	vpxor	64+256(%rsi),%ymm5,%ymm5
8378	vpxor	96+256(%rsi),%ymm9,%ymm9
8379	vmovdqu	%ymm3,0+256(%rdi)
8380	vmovdqu	%ymm1,32+256(%rdi)
8381	vmovdqu	%ymm5,64+256(%rdi)
8382	vmovdqu	%ymm9,96+256(%rdi)
8383	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
8384	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
8385	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
8386	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
8387	vmovdqa	%ymm3,%ymm8
8388
8389	movq	$384,%rcx
8390	leaq	384(%rsi),%rsi
8391	subq	$384,%rbx
8392	jmp	L$seal_avx2_short_hash_remainder
8393
8394L$seal_avx2_320:
8395	vmovdqa	%ymm0,%ymm1
8396	vmovdqa	%ymm0,%ymm2
8397	vmovdqa	%ymm4,%ymm5
8398	vmovdqa	%ymm4,%ymm6
8399	vmovdqa	%ymm8,%ymm9
8400	vmovdqa	%ymm8,%ymm10
8401	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
8402	vpaddd	L$avx2_inc(%rip),%ymm13,%ymm14
8403	vmovdqa	%ymm4,%ymm7
8404	vmovdqa	%ymm8,%ymm11
8405	vmovdqa	%ymm12,0+160(%rbp)
8406	vmovdqa	%ymm13,0+192(%rbp)
8407	vmovdqa	%ymm14,0+224(%rbp)
8408	movq	$10,%r10
8409L$seal_avx2_320_rounds:
8410	vpaddd	%ymm4,%ymm0,%ymm0
8411	vpxor	%ymm0,%ymm12,%ymm12
8412	vpshufb	L$rol16(%rip),%ymm12,%ymm12
8413	vpaddd	%ymm12,%ymm8,%ymm8
8414	vpxor	%ymm8,%ymm4,%ymm4
8415	vpsrld	$20,%ymm4,%ymm3
8416	vpslld	$12,%ymm4,%ymm4
8417	vpxor	%ymm3,%ymm4,%ymm4
8418	vpaddd	%ymm4,%ymm0,%ymm0
8419	vpxor	%ymm0,%ymm12,%ymm12
8420	vpshufb	L$rol8(%rip),%ymm12,%ymm12
8421	vpaddd	%ymm12,%ymm8,%ymm8
8422	vpxor	%ymm8,%ymm4,%ymm4
8423	vpslld	$7,%ymm4,%ymm3
8424	vpsrld	$25,%ymm4,%ymm4
8425	vpxor	%ymm3,%ymm4,%ymm4
8426	vpalignr	$12,%ymm12,%ymm12,%ymm12
8427	vpalignr	$8,%ymm8,%ymm8,%ymm8
8428	vpalignr	$4,%ymm4,%ymm4,%ymm4
8429	vpaddd	%ymm5,%ymm1,%ymm1
8430	vpxor	%ymm1,%ymm13,%ymm13
8431	vpshufb	L$rol16(%rip),%ymm13,%ymm13
8432	vpaddd	%ymm13,%ymm9,%ymm9
8433	vpxor	%ymm9,%ymm5,%ymm5
8434	vpsrld	$20,%ymm5,%ymm3
8435	vpslld	$12,%ymm5,%ymm5
8436	vpxor	%ymm3,%ymm5,%ymm5
8437	vpaddd	%ymm5,%ymm1,%ymm1
8438	vpxor	%ymm1,%ymm13,%ymm13
8439	vpshufb	L$rol8(%rip),%ymm13,%ymm13
8440	vpaddd	%ymm13,%ymm9,%ymm9
8441	vpxor	%ymm9,%ymm5,%ymm5
8442	vpslld	$7,%ymm5,%ymm3
8443	vpsrld	$25,%ymm5,%ymm5
8444	vpxor	%ymm3,%ymm5,%ymm5
8445	vpalignr	$12,%ymm13,%ymm13,%ymm13
8446	vpalignr	$8,%ymm9,%ymm9,%ymm9
8447	vpalignr	$4,%ymm5,%ymm5,%ymm5
8448	vpaddd	%ymm6,%ymm2,%ymm2
8449	vpxor	%ymm2,%ymm14,%ymm14
8450	vpshufb	L$rol16(%rip),%ymm14,%ymm14
8451	vpaddd	%ymm14,%ymm10,%ymm10
8452	vpxor	%ymm10,%ymm6,%ymm6
8453	vpsrld	$20,%ymm6,%ymm3
8454	vpslld	$12,%ymm6,%ymm6
8455	vpxor	%ymm3,%ymm6,%ymm6
8456	vpaddd	%ymm6,%ymm2,%ymm2
8457	vpxor	%ymm2,%ymm14,%ymm14
8458	vpshufb	L$rol8(%rip),%ymm14,%ymm14
8459	vpaddd	%ymm14,%ymm10,%ymm10
8460	vpxor	%ymm10,%ymm6,%ymm6
8461	vpslld	$7,%ymm6,%ymm3
8462	vpsrld	$25,%ymm6,%ymm6
8463	vpxor	%ymm3,%ymm6,%ymm6
8464	vpalignr	$12,%ymm14,%ymm14,%ymm14
8465	vpalignr	$8,%ymm10,%ymm10,%ymm10
8466	vpalignr	$4,%ymm6,%ymm6,%ymm6
8467	vpaddd	%ymm4,%ymm0,%ymm0
8468	vpxor	%ymm0,%ymm12,%ymm12
8469	vpshufb	L$rol16(%rip),%ymm12,%ymm12
8470	vpaddd	%ymm12,%ymm8,%ymm8
8471	vpxor	%ymm8,%ymm4,%ymm4
8472	vpsrld	$20,%ymm4,%ymm3
8473	vpslld	$12,%ymm4,%ymm4
8474	vpxor	%ymm3,%ymm4,%ymm4
8475	vpaddd	%ymm4,%ymm0,%ymm0
8476	vpxor	%ymm0,%ymm12,%ymm12
8477	vpshufb	L$rol8(%rip),%ymm12,%ymm12
8478	vpaddd	%ymm12,%ymm8,%ymm8
8479	vpxor	%ymm8,%ymm4,%ymm4
8480	vpslld	$7,%ymm4,%ymm3
8481	vpsrld	$25,%ymm4,%ymm4
8482	vpxor	%ymm3,%ymm4,%ymm4
8483	vpalignr	$4,%ymm12,%ymm12,%ymm12
8484	vpalignr	$8,%ymm8,%ymm8,%ymm8
8485	vpalignr	$12,%ymm4,%ymm4,%ymm4
8486	vpaddd	%ymm5,%ymm1,%ymm1
8487	vpxor	%ymm1,%ymm13,%ymm13
8488	vpshufb	L$rol16(%rip),%ymm13,%ymm13
8489	vpaddd	%ymm13,%ymm9,%ymm9
8490	vpxor	%ymm9,%ymm5,%ymm5
8491	vpsrld	$20,%ymm5,%ymm3
8492	vpslld	$12,%ymm5,%ymm5
8493	vpxor	%ymm3,%ymm5,%ymm5
8494	vpaddd	%ymm5,%ymm1,%ymm1
8495	vpxor	%ymm1,%ymm13,%ymm13
8496	vpshufb	L$rol8(%rip),%ymm13,%ymm13
8497	vpaddd	%ymm13,%ymm9,%ymm9
8498	vpxor	%ymm9,%ymm5,%ymm5
8499	vpslld	$7,%ymm5,%ymm3
8500	vpsrld	$25,%ymm5,%ymm5
8501	vpxor	%ymm3,%ymm5,%ymm5
8502	vpalignr	$4,%ymm13,%ymm13,%ymm13
8503	vpalignr	$8,%ymm9,%ymm9,%ymm9
8504	vpalignr	$12,%ymm5,%ymm5,%ymm5
8505	vpaddd	%ymm6,%ymm2,%ymm2
8506	vpxor	%ymm2,%ymm14,%ymm14
8507	vpshufb	L$rol16(%rip),%ymm14,%ymm14
8508	vpaddd	%ymm14,%ymm10,%ymm10
8509	vpxor	%ymm10,%ymm6,%ymm6
8510	vpsrld	$20,%ymm6,%ymm3
8511	vpslld	$12,%ymm6,%ymm6
8512	vpxor	%ymm3,%ymm6,%ymm6
8513	vpaddd	%ymm6,%ymm2,%ymm2
8514	vpxor	%ymm2,%ymm14,%ymm14
8515	vpshufb	L$rol8(%rip),%ymm14,%ymm14
8516	vpaddd	%ymm14,%ymm10,%ymm10
8517	vpxor	%ymm10,%ymm6,%ymm6
8518	vpslld	$7,%ymm6,%ymm3
8519	vpsrld	$25,%ymm6,%ymm6
8520	vpxor	%ymm3,%ymm6,%ymm6
8521	vpalignr	$4,%ymm14,%ymm14,%ymm14
8522	vpalignr	$8,%ymm10,%ymm10,%ymm10
8523	vpalignr	$12,%ymm6,%ymm6,%ymm6
8524
8525	decq	%r10
8526	jne	L$seal_avx2_320_rounds
8527	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
8528	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
8529	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
8530	vpaddd	%ymm7,%ymm4,%ymm4
8531	vpaddd	%ymm7,%ymm5,%ymm5
8532	vpaddd	%ymm7,%ymm6,%ymm6
8533	vpaddd	%ymm11,%ymm8,%ymm8
8534	vpaddd	%ymm11,%ymm9,%ymm9
8535	vpaddd	%ymm11,%ymm10,%ymm10
8536	vpaddd	0+160(%rbp),%ymm12,%ymm12
8537	vpaddd	0+192(%rbp),%ymm13,%ymm13
8538	vpaddd	0+224(%rbp),%ymm14,%ymm14
8539	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
8540
8541	vpand	L$clamp(%rip),%ymm3,%ymm3
8542	vmovdqa	%ymm3,0+0(%rbp)
8543
8544	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
8545	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
8546	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
8547	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
8548	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
8549	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
8550	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
8551	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
8552	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
8553	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
8554	jmp	L$seal_avx2_short
8555
8556L$seal_avx2_192:
8557	vmovdqa	%ymm0,%ymm1
8558	vmovdqa	%ymm0,%ymm2
8559	vmovdqa	%ymm4,%ymm5
8560	vmovdqa	%ymm4,%ymm6
8561	vmovdqa	%ymm8,%ymm9
8562	vmovdqa	%ymm8,%ymm10
8563	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
8564	vmovdqa	%ymm12,%ymm11
8565	vmovdqa	%ymm13,%ymm15
8566	movq	$10,%r10
8567L$seal_avx2_192_rounds:
8568	vpaddd	%ymm4,%ymm0,%ymm0
8569	vpxor	%ymm0,%ymm12,%ymm12
8570	vpshufb	L$rol16(%rip),%ymm12,%ymm12
8571	vpaddd	%ymm12,%ymm8,%ymm8
8572	vpxor	%ymm8,%ymm4,%ymm4
8573	vpsrld	$20,%ymm4,%ymm3
8574	vpslld	$12,%ymm4,%ymm4
8575	vpxor	%ymm3,%ymm4,%ymm4
8576	vpaddd	%ymm4,%ymm0,%ymm0
8577	vpxor	%ymm0,%ymm12,%ymm12
8578	vpshufb	L$rol8(%rip),%ymm12,%ymm12
8579	vpaddd	%ymm12,%ymm8,%ymm8
8580	vpxor	%ymm8,%ymm4,%ymm4
8581	vpslld	$7,%ymm4,%ymm3
8582	vpsrld	$25,%ymm4,%ymm4
8583	vpxor	%ymm3,%ymm4,%ymm4
8584	vpalignr	$12,%ymm12,%ymm12,%ymm12
8585	vpalignr	$8,%ymm8,%ymm8,%ymm8
8586	vpalignr	$4,%ymm4,%ymm4,%ymm4
8587	vpaddd	%ymm5,%ymm1,%ymm1
8588	vpxor	%ymm1,%ymm13,%ymm13
8589	vpshufb	L$rol16(%rip),%ymm13,%ymm13
8590	vpaddd	%ymm13,%ymm9,%ymm9
8591	vpxor	%ymm9,%ymm5,%ymm5
8592	vpsrld	$20,%ymm5,%ymm3
8593	vpslld	$12,%ymm5,%ymm5
8594	vpxor	%ymm3,%ymm5,%ymm5
8595	vpaddd	%ymm5,%ymm1,%ymm1
8596	vpxor	%ymm1,%ymm13,%ymm13
8597	vpshufb	L$rol8(%rip),%ymm13,%ymm13
8598	vpaddd	%ymm13,%ymm9,%ymm9
8599	vpxor	%ymm9,%ymm5,%ymm5
8600	vpslld	$7,%ymm5,%ymm3
8601	vpsrld	$25,%ymm5,%ymm5
8602	vpxor	%ymm3,%ymm5,%ymm5
8603	vpalignr	$12,%ymm13,%ymm13,%ymm13
8604	vpalignr	$8,%ymm9,%ymm9,%ymm9
8605	vpalignr	$4,%ymm5,%ymm5,%ymm5
8606	vpaddd	%ymm4,%ymm0,%ymm0
8607	vpxor	%ymm0,%ymm12,%ymm12
8608	vpshufb	L$rol16(%rip),%ymm12,%ymm12
8609	vpaddd	%ymm12,%ymm8,%ymm8
8610	vpxor	%ymm8,%ymm4,%ymm4
8611	vpsrld	$20,%ymm4,%ymm3
8612	vpslld	$12,%ymm4,%ymm4
8613	vpxor	%ymm3,%ymm4,%ymm4
8614	vpaddd	%ymm4,%ymm0,%ymm0
8615	vpxor	%ymm0,%ymm12,%ymm12
8616	vpshufb	L$rol8(%rip),%ymm12,%ymm12
8617	vpaddd	%ymm12,%ymm8,%ymm8
8618	vpxor	%ymm8,%ymm4,%ymm4
8619	vpslld	$7,%ymm4,%ymm3
8620	vpsrld	$25,%ymm4,%ymm4
8621	vpxor	%ymm3,%ymm4,%ymm4
8622	vpalignr	$4,%ymm12,%ymm12,%ymm12
8623	vpalignr	$8,%ymm8,%ymm8,%ymm8
8624	vpalignr	$12,%ymm4,%ymm4,%ymm4
8625	vpaddd	%ymm5,%ymm1,%ymm1
8626	vpxor	%ymm1,%ymm13,%ymm13
8627	vpshufb	L$rol16(%rip),%ymm13,%ymm13
8628	vpaddd	%ymm13,%ymm9,%ymm9
8629	vpxor	%ymm9,%ymm5,%ymm5
8630	vpsrld	$20,%ymm5,%ymm3
8631	vpslld	$12,%ymm5,%ymm5
8632	vpxor	%ymm3,%ymm5,%ymm5
8633	vpaddd	%ymm5,%ymm1,%ymm1
8634	vpxor	%ymm1,%ymm13,%ymm13
8635	vpshufb	L$rol8(%rip),%ymm13,%ymm13
8636	vpaddd	%ymm13,%ymm9,%ymm9
8637	vpxor	%ymm9,%ymm5,%ymm5
8638	vpslld	$7,%ymm5,%ymm3
8639	vpsrld	$25,%ymm5,%ymm5
8640	vpxor	%ymm3,%ymm5,%ymm5
8641	vpalignr	$4,%ymm13,%ymm13,%ymm13
8642	vpalignr	$8,%ymm9,%ymm9,%ymm9
8643	vpalignr	$12,%ymm5,%ymm5,%ymm5
8644
8645	decq	%r10
8646	jne	L$seal_avx2_192_rounds
8647	vpaddd	%ymm2,%ymm0,%ymm0
8648	vpaddd	%ymm2,%ymm1,%ymm1
8649	vpaddd	%ymm6,%ymm4,%ymm4
8650	vpaddd	%ymm6,%ymm5,%ymm5
8651	vpaddd	%ymm10,%ymm8,%ymm8
8652	vpaddd	%ymm10,%ymm9,%ymm9
8653	vpaddd	%ymm11,%ymm12,%ymm12
8654	vpaddd	%ymm15,%ymm13,%ymm13
8655	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
8656
8657	vpand	L$clamp(%rip),%ymm3,%ymm3
8658	vmovdqa	%ymm3,0+0(%rbp)
8659
8660	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
8661	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
8662	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
8663	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
8664	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
8665	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
8666L$seal_avx2_short:
8667	movq	%r8,%r8
8668	call	poly_hash_ad_internal
8669	xorq	%rcx,%rcx
8670L$seal_avx2_short_hash_remainder:
8671	cmpq	$16,%rcx
8672	jb	L$seal_avx2_short_loop
8673	addq	0+0(%rdi),%r10
8674	adcq	8+0(%rdi),%r11
8675	adcq	$1,%r12
8676	movq	0+0+0(%rbp),%rax
8677	movq	%rax,%r15
8678	mulq	%r10
8679	movq	%rax,%r13
8680	movq	%rdx,%r14
8681	movq	0+0+0(%rbp),%rax
8682	mulq	%r11
8683	imulq	%r12,%r15
8684	addq	%rax,%r14
8685	adcq	%rdx,%r15
8686	movq	8+0+0(%rbp),%rax
8687	movq	%rax,%r9
8688	mulq	%r10
8689	addq	%rax,%r14
8690	adcq	$0,%rdx
8691	movq	%rdx,%r10
8692	movq	8+0+0(%rbp),%rax
8693	mulq	%r11
8694	addq	%rax,%r15
8695	adcq	$0,%rdx
8696	imulq	%r12,%r9
8697	addq	%r10,%r15
8698	adcq	%rdx,%r9
8699	movq	%r13,%r10
8700	movq	%r14,%r11
8701	movq	%r15,%r12
8702	andq	$3,%r12
8703	movq	%r15,%r13
8704	andq	$-4,%r13
8705	movq	%r9,%r14
8706	shrdq	$2,%r9,%r15
8707	shrq	$2,%r9
8708	addq	%r13,%r15
8709	adcq	%r14,%r9
8710	addq	%r15,%r10
8711	adcq	%r9,%r11
8712	adcq	$0,%r12
8713
8714	subq	$16,%rcx
8715	addq	$16,%rdi
8716	jmp	L$seal_avx2_short_hash_remainder
8717L$seal_avx2_short_loop:
8718	cmpq	$32,%rbx
8719	jb	L$seal_avx2_short_tail
8720	subq	$32,%rbx
8721
8722	vpxor	(%rsi),%ymm0,%ymm0
8723	vmovdqu	%ymm0,(%rdi)
8724	leaq	32(%rsi),%rsi
8725
8726	addq	0+0(%rdi),%r10
8727	adcq	8+0(%rdi),%r11
8728	adcq	$1,%r12
8729	movq	0+0+0(%rbp),%rax
8730	movq	%rax,%r15
8731	mulq	%r10
8732	movq	%rax,%r13
8733	movq	%rdx,%r14
8734	movq	0+0+0(%rbp),%rax
8735	mulq	%r11
8736	imulq	%r12,%r15
8737	addq	%rax,%r14
8738	adcq	%rdx,%r15
8739	movq	8+0+0(%rbp),%rax
8740	movq	%rax,%r9
8741	mulq	%r10
8742	addq	%rax,%r14
8743	adcq	$0,%rdx
8744	movq	%rdx,%r10
8745	movq	8+0+0(%rbp),%rax
8746	mulq	%r11
8747	addq	%rax,%r15
8748	adcq	$0,%rdx
8749	imulq	%r12,%r9
8750	addq	%r10,%r15
8751	adcq	%rdx,%r9
8752	movq	%r13,%r10
8753	movq	%r14,%r11
8754	movq	%r15,%r12
8755	andq	$3,%r12
8756	movq	%r15,%r13
8757	andq	$-4,%r13
8758	movq	%r9,%r14
8759	shrdq	$2,%r9,%r15
8760	shrq	$2,%r9
8761	addq	%r13,%r15
8762	adcq	%r14,%r9
8763	addq	%r15,%r10
8764	adcq	%r9,%r11
8765	adcq	$0,%r12
8766	addq	0+16(%rdi),%r10
8767	adcq	8+16(%rdi),%r11
8768	adcq	$1,%r12
8769	movq	0+0+0(%rbp),%rax
8770	movq	%rax,%r15
8771	mulq	%r10
8772	movq	%rax,%r13
8773	movq	%rdx,%r14
8774	movq	0+0+0(%rbp),%rax
8775	mulq	%r11
8776	imulq	%r12,%r15
8777	addq	%rax,%r14
8778	adcq	%rdx,%r15
8779	movq	8+0+0(%rbp),%rax
8780	movq	%rax,%r9
8781	mulq	%r10
8782	addq	%rax,%r14
8783	adcq	$0,%rdx
8784	movq	%rdx,%r10
8785	movq	8+0+0(%rbp),%rax
8786	mulq	%r11
8787	addq	%rax,%r15
8788	adcq	$0,%rdx
8789	imulq	%r12,%r9
8790	addq	%r10,%r15
8791	adcq	%rdx,%r9
8792	movq	%r13,%r10
8793	movq	%r14,%r11
8794	movq	%r15,%r12
8795	andq	$3,%r12
8796	movq	%r15,%r13
8797	andq	$-4,%r13
8798	movq	%r9,%r14
8799	shrdq	$2,%r9,%r15
8800	shrq	$2,%r9
8801	addq	%r13,%r15
8802	adcq	%r14,%r9
8803	addq	%r15,%r10
8804	adcq	%r9,%r11
8805	adcq	$0,%r12
8806
8807	leaq	32(%rdi),%rdi
8808
8809	vmovdqa	%ymm4,%ymm0
8810	vmovdqa	%ymm8,%ymm4
8811	vmovdqa	%ymm12,%ymm8
8812	vmovdqa	%ymm1,%ymm12
8813	vmovdqa	%ymm5,%ymm1
8814	vmovdqa	%ymm9,%ymm5
8815	vmovdqa	%ymm13,%ymm9
8816	vmovdqa	%ymm2,%ymm13
8817	vmovdqa	%ymm6,%ymm2
8818	jmp	L$seal_avx2_short_loop
8819L$seal_avx2_short_tail:
8820	cmpq	$16,%rbx
8821	jb	L$seal_avx2_exit
8822	subq	$16,%rbx
8823	vpxor	(%rsi),%xmm0,%xmm3
8824	vmovdqu	%xmm3,(%rdi)
8825	leaq	16(%rsi),%rsi
8826	addq	0+0(%rdi),%r10
8827	adcq	8+0(%rdi),%r11
8828	adcq	$1,%r12
8829	movq	0+0+0(%rbp),%rax
8830	movq	%rax,%r15
8831	mulq	%r10
8832	movq	%rax,%r13
8833	movq	%rdx,%r14
8834	movq	0+0+0(%rbp),%rax
8835	mulq	%r11
8836	imulq	%r12,%r15
8837	addq	%rax,%r14
8838	adcq	%rdx,%r15
8839	movq	8+0+0(%rbp),%rax
8840	movq	%rax,%r9
8841	mulq	%r10
8842	addq	%rax,%r14
8843	adcq	$0,%rdx
8844	movq	%rdx,%r10
8845	movq	8+0+0(%rbp),%rax
8846	mulq	%r11
8847	addq	%rax,%r15
8848	adcq	$0,%rdx
8849	imulq	%r12,%r9
8850	addq	%r10,%r15
8851	adcq	%rdx,%r9
8852	movq	%r13,%r10
8853	movq	%r14,%r11
8854	movq	%r15,%r12
8855	andq	$3,%r12
8856	movq	%r15,%r13
8857	andq	$-4,%r13
8858	movq	%r9,%r14
8859	shrdq	$2,%r9,%r15
8860	shrq	$2,%r9
8861	addq	%r13,%r15
8862	adcq	%r14,%r9
8863	addq	%r15,%r10
8864	adcq	%r9,%r11
8865	adcq	$0,%r12
8866
8867	leaq	16(%rdi),%rdi
8868	vextracti128	$1,%ymm0,%xmm0
8869L$seal_avx2_exit:
8870	vzeroupper
8871	jmp	L$seal_sse_tail_16
8872
8873
8874#endif
8875