• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifidn __OUTPUT_FORMAT__, win64
5default	rel
6%define XMMWORD
7%define YMMWORD
8%define ZMMWORD
9%define _CET_ENDBR
10
11%ifdef BORINGSSL_PREFIX
12%include "boringssl_prefix_symbols_nasm.inc"
13%endif
14section	.text code align=64
15
16EXTERN	OPENSSL_ia32cap_P
17
18chacha20_poly1305_constants:
19
20section	.rdata rdata align=8
21ALIGN	64
22$L$chacha20_consts:
23	DB	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
24	DB	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
25$L$rol8:
26	DB	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
27	DB	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
28$L$rol16:
29	DB	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
30	DB	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
31$L$avx2_init:
32	DD	0,0,0,0
33$L$sse_inc:
34	DD	1,0,0,0
35$L$avx2_inc:
36	DD	2,0,0,0,2,0,0,0
37$L$clamp:
38	DQ	0x0FFFFFFC0FFFFFFF,0x0FFFFFFC0FFFFFFC
39	DQ	0xFFFFFFFFFFFFFFFF,0xFFFFFFFFFFFFFFFF
40ALIGN	16
41$L$and_masks:
42	DB	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
43	DB	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
44	DB	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
45	DB	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
46	DB	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
47	DB	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
48	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
49	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
50	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
51	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
52	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
53	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
54	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
55	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
56	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
57	DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
58section	.text
59
60
61
62ALIGN	64
63poly_hash_ad_internal:
64
65
66	xor	r10,r10
67	xor	r11,r11
68	xor	r12,r12
69	cmp	r8,13
70	jne	NEAR $L$hash_ad_loop
71$L$poly_fast_tls_ad:
72
73	mov	r10,QWORD[rcx]
74	mov	r11,QWORD[5+rcx]
75	shr	r11,24
76	mov	r12,1
77	mov	rax,QWORD[((0+160+0))+rbp]
78	mov	r15,rax
79	mul	r10
80	mov	r13,rax
81	mov	r14,rdx
82	mov	rax,QWORD[((0+160+0))+rbp]
83	mul	r11
84	imul	r15,r12
85	add	r14,rax
86	adc	r15,rdx
87	mov	rax,QWORD[((8+160+0))+rbp]
88	mov	r9,rax
89	mul	r10
90	add	r14,rax
91	adc	rdx,0
92	mov	r10,rdx
93	mov	rax,QWORD[((8+160+0))+rbp]
94	mul	r11
95	add	r15,rax
96	adc	rdx,0
97	imul	r9,r12
98	add	r15,r10
99	adc	r9,rdx
100	mov	r10,r13
101	mov	r11,r14
102	mov	r12,r15
103	and	r12,3
104	mov	r13,r15
105	and	r13,-4
106	mov	r14,r9
107	shrd	r15,r9,2
108	shr	r9,2
109	add	r15,r13
110	adc	r9,r14
111	add	r10,r15
112	adc	r11,r9
113	adc	r12,0
114
115	ret
116$L$hash_ad_loop:
117
118	cmp	r8,16
119	jb	NEAR $L$hash_ad_tail
120	add	r10,QWORD[((0+0))+rcx]
121	adc	r11,QWORD[((8+0))+rcx]
122	adc	r12,1
123	mov	rax,QWORD[((0+160+0))+rbp]
124	mov	r15,rax
125	mul	r10
126	mov	r13,rax
127	mov	r14,rdx
128	mov	rax,QWORD[((0+160+0))+rbp]
129	mul	r11
130	imul	r15,r12
131	add	r14,rax
132	adc	r15,rdx
133	mov	rax,QWORD[((8+160+0))+rbp]
134	mov	r9,rax
135	mul	r10
136	add	r14,rax
137	adc	rdx,0
138	mov	r10,rdx
139	mov	rax,QWORD[((8+160+0))+rbp]
140	mul	r11
141	add	r15,rax
142	adc	rdx,0
143	imul	r9,r12
144	add	r15,r10
145	adc	r9,rdx
146	mov	r10,r13
147	mov	r11,r14
148	mov	r12,r15
149	and	r12,3
150	mov	r13,r15
151	and	r13,-4
152	mov	r14,r9
153	shrd	r15,r9,2
154	shr	r9,2
155	add	r15,r13
156	adc	r9,r14
157	add	r10,r15
158	adc	r11,r9
159	adc	r12,0
160
161	lea	rcx,[16+rcx]
162	sub	r8,16
163	jmp	NEAR $L$hash_ad_loop
164$L$hash_ad_tail:
165	cmp	r8,0
166	je	NEAR $L$hash_ad_done
167
168	xor	r13,r13
169	xor	r14,r14
170	xor	r15,r15
171	add	rcx,r8
172$L$hash_ad_tail_loop:
173	shld	r14,r13,8
174	shl	r13,8
175	movzx	r15,BYTE[((-1))+rcx]
176	xor	r13,r15
177	dec	rcx
178	dec	r8
179	jne	NEAR $L$hash_ad_tail_loop
180
181	add	r10,r13
182	adc	r11,r14
183	adc	r12,1
184	mov	rax,QWORD[((0+160+0))+rbp]
185	mov	r15,rax
186	mul	r10
187	mov	r13,rax
188	mov	r14,rdx
189	mov	rax,QWORD[((0+160+0))+rbp]
190	mul	r11
191	imul	r15,r12
192	add	r14,rax
193	adc	r15,rdx
194	mov	rax,QWORD[((8+160+0))+rbp]
195	mov	r9,rax
196	mul	r10
197	add	r14,rax
198	adc	rdx,0
199	mov	r10,rdx
200	mov	rax,QWORD[((8+160+0))+rbp]
201	mul	r11
202	add	r15,rax
203	adc	rdx,0
204	imul	r9,r12
205	add	r15,r10
206	adc	r9,rdx
207	mov	r10,r13
208	mov	r11,r14
209	mov	r12,r15
210	and	r12,3
211	mov	r13,r15
212	and	r13,-4
213	mov	r14,r9
214	shrd	r15,r9,2
215	shr	r9,2
216	add	r15,r13
217	adc	r9,r14
218	add	r10,r15
219	adc	r11,r9
220	adc	r12,0
221
222
223$L$hash_ad_done:
224	ret
225
226
227
228global	chacha20_poly1305_open
229
230ALIGN	64
231chacha20_poly1305_open:
232	mov	QWORD[8+rsp],rdi	;WIN64 prologue
233	mov	QWORD[16+rsp],rsi
234	mov	rax,rsp
235$L$SEH_begin_chacha20_poly1305_open:
236	mov	rdi,rcx
237	mov	rsi,rdx
238	mov	rdx,r8
239	mov	rcx,r9
240	mov	r8,QWORD[40+rsp]
241	mov	r9,QWORD[48+rsp]
242
243
244
245_CET_ENDBR
246	push	rbp
247
248	push	rbx
249
250	push	r12
251
252	push	r13
253
254	push	r14
255
256	push	r15
257
258
259
260	push	r9
261
262	sub	rsp,288 + 160 + 32
263
264
265	lea	rbp,[32+rsp]
266	and	rbp,-32
267
268	movaps	XMMWORD[(0+0)+rbp],xmm6
269	movaps	XMMWORD[(16+0)+rbp],xmm7
270	movaps	XMMWORD[(32+0)+rbp],xmm8
271	movaps	XMMWORD[(48+0)+rbp],xmm9
272	movaps	XMMWORD[(64+0)+rbp],xmm10
273	movaps	XMMWORD[(80+0)+rbp],xmm11
274	movaps	XMMWORD[(96+0)+rbp],xmm12
275	movaps	XMMWORD[(112+0)+rbp],xmm13
276	movaps	XMMWORD[(128+0)+rbp],xmm14
277	movaps	XMMWORD[(144+0)+rbp],xmm15
278
279	mov	rbx,rdx
280	mov	QWORD[((0+160+32))+rbp],r8
281	mov	QWORD[((8+160+32))+rbp],rbx
282
283	mov	eax,DWORD[((OPENSSL_ia32cap_P+8))]
284	and	eax,288
285	xor	eax,288
286	jz	NEAR chacha20_poly1305_open_avx2
287
288	cmp	rbx,128
289	jbe	NEAR $L$open_sse_128
290
291	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
292	movdqu	xmm4,XMMWORD[r9]
293	movdqu	xmm8,XMMWORD[16+r9]
294	movdqu	xmm12,XMMWORD[32+r9]
295
296	movdqa	xmm7,xmm12
297
298	movdqa	XMMWORD[(160+48)+rbp],xmm4
299	movdqa	XMMWORD[(160+64)+rbp],xmm8
300	movdqa	XMMWORD[(160+96)+rbp],xmm12
301	mov	r10,10
302$L$open_sse_init_rounds:
303	paddd	xmm0,xmm4
304	pxor	xmm12,xmm0
305	pshufb	xmm12,XMMWORD[$L$rol16]
306	paddd	xmm8,xmm12
307	pxor	xmm4,xmm8
308	movdqa	xmm3,xmm4
309	pslld	xmm3,12
310	psrld	xmm4,20
311	pxor	xmm4,xmm3
312	paddd	xmm0,xmm4
313	pxor	xmm12,xmm0
314	pshufb	xmm12,XMMWORD[$L$rol8]
315	paddd	xmm8,xmm12
316	pxor	xmm4,xmm8
317	movdqa	xmm3,xmm4
318	pslld	xmm3,7
319	psrld	xmm4,25
320	pxor	xmm4,xmm3
321DB	102,15,58,15,228,4
322DB	102,69,15,58,15,192,8
323DB	102,69,15,58,15,228,12
324	paddd	xmm0,xmm4
325	pxor	xmm12,xmm0
326	pshufb	xmm12,XMMWORD[$L$rol16]
327	paddd	xmm8,xmm12
328	pxor	xmm4,xmm8
329	movdqa	xmm3,xmm4
330	pslld	xmm3,12
331	psrld	xmm4,20
332	pxor	xmm4,xmm3
333	paddd	xmm0,xmm4
334	pxor	xmm12,xmm0
335	pshufb	xmm12,XMMWORD[$L$rol8]
336	paddd	xmm8,xmm12
337	pxor	xmm4,xmm8
338	movdqa	xmm3,xmm4
339	pslld	xmm3,7
340	psrld	xmm4,25
341	pxor	xmm4,xmm3
342DB	102,15,58,15,228,12
343DB	102,69,15,58,15,192,8
344DB	102,69,15,58,15,228,4
345
346	dec	r10
347	jne	NEAR $L$open_sse_init_rounds
348
349	paddd	xmm0,XMMWORD[$L$chacha20_consts]
350	paddd	xmm4,XMMWORD[((160+48))+rbp]
351
352	pand	xmm0,XMMWORD[$L$clamp]
353	movdqa	XMMWORD[(160+0)+rbp],xmm0
354	movdqa	XMMWORD[(160+16)+rbp],xmm4
355
356	mov	r8,r8
357	call	poly_hash_ad_internal
358$L$open_sse_main_loop:
359	cmp	rbx,16*16
360	jb	NEAR $L$open_sse_tail
361
362	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
363	movdqa	xmm4,XMMWORD[((160+48))+rbp]
364	movdqa	xmm8,XMMWORD[((160+64))+rbp]
365	movdqa	xmm1,xmm0
366	movdqa	xmm5,xmm4
367	movdqa	xmm9,xmm8
368	movdqa	xmm2,xmm0
369	movdqa	xmm6,xmm4
370	movdqa	xmm10,xmm8
371	movdqa	xmm3,xmm0
372	movdqa	xmm7,xmm4
373	movdqa	xmm11,xmm8
374	movdqa	xmm15,XMMWORD[((160+96))+rbp]
375	paddd	xmm15,XMMWORD[$L$sse_inc]
376	movdqa	xmm14,xmm15
377	paddd	xmm14,XMMWORD[$L$sse_inc]
378	movdqa	xmm13,xmm14
379	paddd	xmm13,XMMWORD[$L$sse_inc]
380	movdqa	xmm12,xmm13
381	paddd	xmm12,XMMWORD[$L$sse_inc]
382	movdqa	XMMWORD[(160+96)+rbp],xmm12
383	movdqa	XMMWORD[(160+112)+rbp],xmm13
384	movdqa	XMMWORD[(160+128)+rbp],xmm14
385	movdqa	XMMWORD[(160+144)+rbp],xmm15
386
387
388
389	mov	rcx,4
390	mov	r8,rsi
391$L$open_sse_main_loop_rounds:
392	movdqa	XMMWORD[(160+80)+rbp],xmm8
393	movdqa	xmm8,XMMWORD[$L$rol16]
394	paddd	xmm3,xmm7
395	paddd	xmm2,xmm6
396	paddd	xmm1,xmm5
397	paddd	xmm0,xmm4
398	pxor	xmm15,xmm3
399	pxor	xmm14,xmm2
400	pxor	xmm13,xmm1
401	pxor	xmm12,xmm0
402DB	102,69,15,56,0,248
403DB	102,69,15,56,0,240
404DB	102,69,15,56,0,232
405DB	102,69,15,56,0,224
406	movdqa	xmm8,XMMWORD[((160+80))+rbp]
407	paddd	xmm11,xmm15
408	paddd	xmm10,xmm14
409	paddd	xmm9,xmm13
410	paddd	xmm8,xmm12
411	pxor	xmm7,xmm11
412	add	r10,QWORD[((0+0))+r8]
413	adc	r11,QWORD[((8+0))+r8]
414	adc	r12,1
415
416	lea	r8,[16+r8]
417	pxor	xmm6,xmm10
418	pxor	xmm5,xmm9
419	pxor	xmm4,xmm8
420	movdqa	XMMWORD[(160+80)+rbp],xmm8
421	movdqa	xmm8,xmm7
422	psrld	xmm8,20
423	pslld	xmm7,32-20
424	pxor	xmm7,xmm8
425	movdqa	xmm8,xmm6
426	psrld	xmm8,20
427	pslld	xmm6,32-20
428	pxor	xmm6,xmm8
429	movdqa	xmm8,xmm5
430	psrld	xmm8,20
431	pslld	xmm5,32-20
432	pxor	xmm5,xmm8
433	movdqa	xmm8,xmm4
434	psrld	xmm8,20
435	pslld	xmm4,32-20
436	pxor	xmm4,xmm8
437	mov	rax,QWORD[((0+160+0))+rbp]
438	mov	r15,rax
439	mul	r10
440	mov	r13,rax
441	mov	r14,rdx
442	mov	rax,QWORD[((0+160+0))+rbp]
443	mul	r11
444	imul	r15,r12
445	add	r14,rax
446	adc	r15,rdx
447	movdqa	xmm8,XMMWORD[$L$rol8]
448	paddd	xmm3,xmm7
449	paddd	xmm2,xmm6
450	paddd	xmm1,xmm5
451	paddd	xmm0,xmm4
452	pxor	xmm15,xmm3
453	pxor	xmm14,xmm2
454	pxor	xmm13,xmm1
455	pxor	xmm12,xmm0
456DB	102,69,15,56,0,248
457DB	102,69,15,56,0,240
458DB	102,69,15,56,0,232
459DB	102,69,15,56,0,224
460	movdqa	xmm8,XMMWORD[((160+80))+rbp]
461	paddd	xmm11,xmm15
462	paddd	xmm10,xmm14
463	paddd	xmm9,xmm13
464	paddd	xmm8,xmm12
465	pxor	xmm7,xmm11
466	pxor	xmm6,xmm10
467	mov	rax,QWORD[((8+160+0))+rbp]
468	mov	r9,rax
469	mul	r10
470	add	r14,rax
471	adc	rdx,0
472	mov	r10,rdx
473	mov	rax,QWORD[((8+160+0))+rbp]
474	mul	r11
475	add	r15,rax
476	adc	rdx,0
477	pxor	xmm5,xmm9
478	pxor	xmm4,xmm8
479	movdqa	XMMWORD[(160+80)+rbp],xmm8
480	movdqa	xmm8,xmm7
481	psrld	xmm8,25
482	pslld	xmm7,32-25
483	pxor	xmm7,xmm8
484	movdqa	xmm8,xmm6
485	psrld	xmm8,25
486	pslld	xmm6,32-25
487	pxor	xmm6,xmm8
488	movdqa	xmm8,xmm5
489	psrld	xmm8,25
490	pslld	xmm5,32-25
491	pxor	xmm5,xmm8
492	movdqa	xmm8,xmm4
493	psrld	xmm8,25
494	pslld	xmm4,32-25
495	pxor	xmm4,xmm8
496	movdqa	xmm8,XMMWORD[((160+80))+rbp]
497	imul	r9,r12
498	add	r15,r10
499	adc	r9,rdx
500DB	102,15,58,15,255,4
501DB	102,69,15,58,15,219,8
502DB	102,69,15,58,15,255,12
503DB	102,15,58,15,246,4
504DB	102,69,15,58,15,210,8
505DB	102,69,15,58,15,246,12
506DB	102,15,58,15,237,4
507DB	102,69,15,58,15,201,8
508DB	102,69,15,58,15,237,12
509DB	102,15,58,15,228,4
510DB	102,69,15,58,15,192,8
511DB	102,69,15,58,15,228,12
512	movdqa	XMMWORD[(160+80)+rbp],xmm8
513	movdqa	xmm8,XMMWORD[$L$rol16]
514	paddd	xmm3,xmm7
515	paddd	xmm2,xmm6
516	paddd	xmm1,xmm5
517	paddd	xmm0,xmm4
518	pxor	xmm15,xmm3
519	pxor	xmm14,xmm2
520	mov	r10,r13
521	mov	r11,r14
522	mov	r12,r15
523	and	r12,3
524	mov	r13,r15
525	and	r13,-4
526	mov	r14,r9
527	shrd	r15,r9,2
528	shr	r9,2
529	add	r15,r13
530	adc	r9,r14
531	add	r10,r15
532	adc	r11,r9
533	adc	r12,0
534	pxor	xmm13,xmm1
535	pxor	xmm12,xmm0
536DB	102,69,15,56,0,248
537DB	102,69,15,56,0,240
538DB	102,69,15,56,0,232
539DB	102,69,15,56,0,224
540	movdqa	xmm8,XMMWORD[((160+80))+rbp]
541	paddd	xmm11,xmm15
542	paddd	xmm10,xmm14
543	paddd	xmm9,xmm13
544	paddd	xmm8,xmm12
545	pxor	xmm7,xmm11
546	pxor	xmm6,xmm10
547	pxor	xmm5,xmm9
548	pxor	xmm4,xmm8
549	movdqa	XMMWORD[(160+80)+rbp],xmm8
550	movdqa	xmm8,xmm7
551	psrld	xmm8,20
552	pslld	xmm7,32-20
553	pxor	xmm7,xmm8
554	movdqa	xmm8,xmm6
555	psrld	xmm8,20
556	pslld	xmm6,32-20
557	pxor	xmm6,xmm8
558	movdqa	xmm8,xmm5
559	psrld	xmm8,20
560	pslld	xmm5,32-20
561	pxor	xmm5,xmm8
562	movdqa	xmm8,xmm4
563	psrld	xmm8,20
564	pslld	xmm4,32-20
565	pxor	xmm4,xmm8
566	movdqa	xmm8,XMMWORD[$L$rol8]
567	paddd	xmm3,xmm7
568	paddd	xmm2,xmm6
569	paddd	xmm1,xmm5
570	paddd	xmm0,xmm4
571	pxor	xmm15,xmm3
572	pxor	xmm14,xmm2
573	pxor	xmm13,xmm1
574	pxor	xmm12,xmm0
575DB	102,69,15,56,0,248
576DB	102,69,15,56,0,240
577DB	102,69,15,56,0,232
578DB	102,69,15,56,0,224
579	movdqa	xmm8,XMMWORD[((160+80))+rbp]
580	paddd	xmm11,xmm15
581	paddd	xmm10,xmm14
582	paddd	xmm9,xmm13
583	paddd	xmm8,xmm12
584	pxor	xmm7,xmm11
585	pxor	xmm6,xmm10
586	pxor	xmm5,xmm9
587	pxor	xmm4,xmm8
588	movdqa	XMMWORD[(160+80)+rbp],xmm8
589	movdqa	xmm8,xmm7
590	psrld	xmm8,25
591	pslld	xmm7,32-25
592	pxor	xmm7,xmm8
593	movdqa	xmm8,xmm6
594	psrld	xmm8,25
595	pslld	xmm6,32-25
596	pxor	xmm6,xmm8
597	movdqa	xmm8,xmm5
598	psrld	xmm8,25
599	pslld	xmm5,32-25
600	pxor	xmm5,xmm8
601	movdqa	xmm8,xmm4
602	psrld	xmm8,25
603	pslld	xmm4,32-25
604	pxor	xmm4,xmm8
605	movdqa	xmm8,XMMWORD[((160+80))+rbp]
606DB	102,15,58,15,255,12
607DB	102,69,15,58,15,219,8
608DB	102,69,15,58,15,255,4
609DB	102,15,58,15,246,12
610DB	102,69,15,58,15,210,8
611DB	102,69,15,58,15,246,4
612DB	102,15,58,15,237,12
613DB	102,69,15,58,15,201,8
614DB	102,69,15,58,15,237,4
615DB	102,15,58,15,228,12
616DB	102,69,15,58,15,192,8
617DB	102,69,15,58,15,228,4
618
619	dec	rcx
620	jge	NEAR $L$open_sse_main_loop_rounds
621	add	r10,QWORD[((0+0))+r8]
622	adc	r11,QWORD[((8+0))+r8]
623	adc	r12,1
624	mov	rax,QWORD[((0+160+0))+rbp]
625	mov	r15,rax
626	mul	r10
627	mov	r13,rax
628	mov	r14,rdx
629	mov	rax,QWORD[((0+160+0))+rbp]
630	mul	r11
631	imul	r15,r12
632	add	r14,rax
633	adc	r15,rdx
634	mov	rax,QWORD[((8+160+0))+rbp]
635	mov	r9,rax
636	mul	r10
637	add	r14,rax
638	adc	rdx,0
639	mov	r10,rdx
640	mov	rax,QWORD[((8+160+0))+rbp]
641	mul	r11
642	add	r15,rax
643	adc	rdx,0
644	imul	r9,r12
645	add	r15,r10
646	adc	r9,rdx
647	mov	r10,r13
648	mov	r11,r14
649	mov	r12,r15
650	and	r12,3
651	mov	r13,r15
652	and	r13,-4
653	mov	r14,r9
654	shrd	r15,r9,2
655	shr	r9,2
656	add	r15,r13
657	adc	r9,r14
658	add	r10,r15
659	adc	r11,r9
660	adc	r12,0
661
662	lea	r8,[16+r8]
663	cmp	rcx,-6
664	jg	NEAR $L$open_sse_main_loop_rounds
665	paddd	xmm3,XMMWORD[$L$chacha20_consts]
666	paddd	xmm7,XMMWORD[((160+48))+rbp]
667	paddd	xmm11,XMMWORD[((160+64))+rbp]
668	paddd	xmm15,XMMWORD[((160+144))+rbp]
669	paddd	xmm2,XMMWORD[$L$chacha20_consts]
670	paddd	xmm6,XMMWORD[((160+48))+rbp]
671	paddd	xmm10,XMMWORD[((160+64))+rbp]
672	paddd	xmm14,XMMWORD[((160+128))+rbp]
673	paddd	xmm1,XMMWORD[$L$chacha20_consts]
674	paddd	xmm5,XMMWORD[((160+48))+rbp]
675	paddd	xmm9,XMMWORD[((160+64))+rbp]
676	paddd	xmm13,XMMWORD[((160+112))+rbp]
677	paddd	xmm0,XMMWORD[$L$chacha20_consts]
678	paddd	xmm4,XMMWORD[((160+48))+rbp]
679	paddd	xmm8,XMMWORD[((160+64))+rbp]
680	paddd	xmm12,XMMWORD[((160+96))+rbp]
681	movdqa	XMMWORD[(160+80)+rbp],xmm12
682	movdqu	xmm12,XMMWORD[((0 + 0))+rsi]
683	pxor	xmm12,xmm3
684	movdqu	XMMWORD[(0 + 0)+rdi],xmm12
685	movdqu	xmm12,XMMWORD[((16 + 0))+rsi]
686	pxor	xmm12,xmm7
687	movdqu	XMMWORD[(16 + 0)+rdi],xmm12
688	movdqu	xmm12,XMMWORD[((32 + 0))+rsi]
689	pxor	xmm12,xmm11
690	movdqu	XMMWORD[(32 + 0)+rdi],xmm12
691	movdqu	xmm12,XMMWORD[((48 + 0))+rsi]
692	pxor	xmm12,xmm15
693	movdqu	XMMWORD[(48 + 0)+rdi],xmm12
694	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
695	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
696	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
697	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
698	pxor	xmm2,xmm3
699	pxor	xmm6,xmm7
700	pxor	xmm10,xmm11
701	pxor	xmm15,xmm14
702	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
703	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
704	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
705	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
706	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
707	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
708	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
709	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
710	pxor	xmm1,xmm3
711	pxor	xmm5,xmm7
712	pxor	xmm9,xmm11
713	pxor	xmm15,xmm13
714	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
715	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
716	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
717	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
718	movdqu	xmm3,XMMWORD[((0 + 192))+rsi]
719	movdqu	xmm7,XMMWORD[((16 + 192))+rsi]
720	movdqu	xmm11,XMMWORD[((32 + 192))+rsi]
721	movdqu	xmm15,XMMWORD[((48 + 192))+rsi]
722	pxor	xmm0,xmm3
723	pxor	xmm4,xmm7
724	pxor	xmm8,xmm11
725	pxor	xmm15,XMMWORD[((160+80))+rbp]
726	movdqu	XMMWORD[(0 + 192)+rdi],xmm0
727	movdqu	XMMWORD[(16 + 192)+rdi],xmm4
728	movdqu	XMMWORD[(32 + 192)+rdi],xmm8
729	movdqu	XMMWORD[(48 + 192)+rdi],xmm15
730
731	lea	rsi,[256+rsi]
732	lea	rdi,[256+rdi]
733	sub	rbx,16*16
734	jmp	NEAR $L$open_sse_main_loop
735$L$open_sse_tail:
736
737	test	rbx,rbx
738	jz	NEAR $L$open_sse_finalize
739	cmp	rbx,12*16
740	ja	NEAR $L$open_sse_tail_256
741	cmp	rbx,8*16
742	ja	NEAR $L$open_sse_tail_192
743	cmp	rbx,4*16
744	ja	NEAR $L$open_sse_tail_128
745	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
746	movdqa	xmm4,XMMWORD[((160+48))+rbp]
747	movdqa	xmm8,XMMWORD[((160+64))+rbp]
748	movdqa	xmm12,XMMWORD[((160+96))+rbp]
749	paddd	xmm12,XMMWORD[$L$sse_inc]
750	movdqa	XMMWORD[(160+96)+rbp],xmm12
751
752	xor	r8,r8
753	mov	rcx,rbx
754	cmp	rcx,16
755	jb	NEAR $L$open_sse_tail_64_rounds
756$L$open_sse_tail_64_rounds_and_x1hash:
757	add	r10,QWORD[((0+0))+r8*1+rsi]
758	adc	r11,QWORD[((8+0))+r8*1+rsi]
759	adc	r12,1
760	mov	rax,QWORD[((0+160+0))+rbp]
761	mov	r15,rax
762	mul	r10
763	mov	r13,rax
764	mov	r14,rdx
765	mov	rax,QWORD[((0+160+0))+rbp]
766	mul	r11
767	imul	r15,r12
768	add	r14,rax
769	adc	r15,rdx
770	mov	rax,QWORD[((8+160+0))+rbp]
771	mov	r9,rax
772	mul	r10
773	add	r14,rax
774	adc	rdx,0
775	mov	r10,rdx
776	mov	rax,QWORD[((8+160+0))+rbp]
777	mul	r11
778	add	r15,rax
779	adc	rdx,0
780	imul	r9,r12
781	add	r15,r10
782	adc	r9,rdx
783	mov	r10,r13
784	mov	r11,r14
785	mov	r12,r15
786	and	r12,3
787	mov	r13,r15
788	and	r13,-4
789	mov	r14,r9
790	shrd	r15,r9,2
791	shr	r9,2
792	add	r15,r13
793	adc	r9,r14
794	add	r10,r15
795	adc	r11,r9
796	adc	r12,0
797
798	sub	rcx,16
799$L$open_sse_tail_64_rounds:
800	add	r8,16
801	paddd	xmm0,xmm4
802	pxor	xmm12,xmm0
803	pshufb	xmm12,XMMWORD[$L$rol16]
804	paddd	xmm8,xmm12
805	pxor	xmm4,xmm8
806	movdqa	xmm3,xmm4
807	pslld	xmm3,12
808	psrld	xmm4,20
809	pxor	xmm4,xmm3
810	paddd	xmm0,xmm4
811	pxor	xmm12,xmm0
812	pshufb	xmm12,XMMWORD[$L$rol8]
813	paddd	xmm8,xmm12
814	pxor	xmm4,xmm8
815	movdqa	xmm3,xmm4
816	pslld	xmm3,7
817	psrld	xmm4,25
818	pxor	xmm4,xmm3
819DB	102,15,58,15,228,4
820DB	102,69,15,58,15,192,8
821DB	102,69,15,58,15,228,12
822	paddd	xmm0,xmm4
823	pxor	xmm12,xmm0
824	pshufb	xmm12,XMMWORD[$L$rol16]
825	paddd	xmm8,xmm12
826	pxor	xmm4,xmm8
827	movdqa	xmm3,xmm4
828	pslld	xmm3,12
829	psrld	xmm4,20
830	pxor	xmm4,xmm3
831	paddd	xmm0,xmm4
832	pxor	xmm12,xmm0
833	pshufb	xmm12,XMMWORD[$L$rol8]
834	paddd	xmm8,xmm12
835	pxor	xmm4,xmm8
836	movdqa	xmm3,xmm4
837	pslld	xmm3,7
838	psrld	xmm4,25
839	pxor	xmm4,xmm3
840DB	102,15,58,15,228,12
841DB	102,69,15,58,15,192,8
842DB	102,69,15,58,15,228,4
843
844	cmp	rcx,16
845	jae	NEAR $L$open_sse_tail_64_rounds_and_x1hash
846	cmp	r8,10*16
847	jne	NEAR $L$open_sse_tail_64_rounds
848	paddd	xmm0,XMMWORD[$L$chacha20_consts]
849	paddd	xmm4,XMMWORD[((160+48))+rbp]
850	paddd	xmm8,XMMWORD[((160+64))+rbp]
851	paddd	xmm12,XMMWORD[((160+96))+rbp]
852
853	jmp	NEAR $L$open_sse_tail_64_dec_loop
854
855$L$open_sse_tail_128:
856	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
857	movdqa	xmm4,XMMWORD[((160+48))+rbp]
858	movdqa	xmm8,XMMWORD[((160+64))+rbp]
859	movdqa	xmm1,xmm0
860	movdqa	xmm5,xmm4
861	movdqa	xmm9,xmm8
862	movdqa	xmm13,XMMWORD[((160+96))+rbp]
863	paddd	xmm13,XMMWORD[$L$sse_inc]
864	movdqa	xmm12,xmm13
865	paddd	xmm12,XMMWORD[$L$sse_inc]
866	movdqa	XMMWORD[(160+96)+rbp],xmm12
867	movdqa	XMMWORD[(160+112)+rbp],xmm13
868
869	mov	rcx,rbx
870	and	rcx,-16
871	xor	r8,r8
872$L$open_sse_tail_128_rounds_and_x1hash:
873	add	r10,QWORD[((0+0))+r8*1+rsi]
874	adc	r11,QWORD[((8+0))+r8*1+rsi]
875	adc	r12,1
876	mov	rax,QWORD[((0+160+0))+rbp]
877	mov	r15,rax
878	mul	r10
879	mov	r13,rax
880	mov	r14,rdx
881	mov	rax,QWORD[((0+160+0))+rbp]
882	mul	r11
883	imul	r15,r12
884	add	r14,rax
885	adc	r15,rdx
886	mov	rax,QWORD[((8+160+0))+rbp]
887	mov	r9,rax
888	mul	r10
889	add	r14,rax
890	adc	rdx,0
891	mov	r10,rdx
892	mov	rax,QWORD[((8+160+0))+rbp]
893	mul	r11
894	add	r15,rax
895	adc	rdx,0
896	imul	r9,r12
897	add	r15,r10
898	adc	r9,rdx
899	mov	r10,r13
900	mov	r11,r14
901	mov	r12,r15
902	and	r12,3
903	mov	r13,r15
904	and	r13,-4
905	mov	r14,r9
906	shrd	r15,r9,2
907	shr	r9,2
908	add	r15,r13
909	adc	r9,r14
910	add	r10,r15
911	adc	r11,r9
912	adc	r12,0
913
914$L$open_sse_tail_128_rounds:
915	add	r8,16
916	paddd	xmm0,xmm4
917	pxor	xmm12,xmm0
918	pshufb	xmm12,XMMWORD[$L$rol16]
919	paddd	xmm8,xmm12
920	pxor	xmm4,xmm8
921	movdqa	xmm3,xmm4
922	pslld	xmm3,12
923	psrld	xmm4,20
924	pxor	xmm4,xmm3
925	paddd	xmm0,xmm4
926	pxor	xmm12,xmm0
927	pshufb	xmm12,XMMWORD[$L$rol8]
928	paddd	xmm8,xmm12
929	pxor	xmm4,xmm8
930	movdqa	xmm3,xmm4
931	pslld	xmm3,7
932	psrld	xmm4,25
933	pxor	xmm4,xmm3
934DB	102,15,58,15,228,4
935DB	102,69,15,58,15,192,8
936DB	102,69,15,58,15,228,12
937	paddd	xmm1,xmm5
938	pxor	xmm13,xmm1
939	pshufb	xmm13,XMMWORD[$L$rol16]
940	paddd	xmm9,xmm13
941	pxor	xmm5,xmm9
942	movdqa	xmm3,xmm5
943	pslld	xmm3,12
944	psrld	xmm5,20
945	pxor	xmm5,xmm3
946	paddd	xmm1,xmm5
947	pxor	xmm13,xmm1
948	pshufb	xmm13,XMMWORD[$L$rol8]
949	paddd	xmm9,xmm13
950	pxor	xmm5,xmm9
951	movdqa	xmm3,xmm5
952	pslld	xmm3,7
953	psrld	xmm5,25
954	pxor	xmm5,xmm3
955DB	102,15,58,15,237,4
956DB	102,69,15,58,15,201,8
957DB	102,69,15,58,15,237,12
958	paddd	xmm0,xmm4
959	pxor	xmm12,xmm0
960	pshufb	xmm12,XMMWORD[$L$rol16]
961	paddd	xmm8,xmm12
962	pxor	xmm4,xmm8
963	movdqa	xmm3,xmm4
964	pslld	xmm3,12
965	psrld	xmm4,20
966	pxor	xmm4,xmm3
967	paddd	xmm0,xmm4
968	pxor	xmm12,xmm0
969	pshufb	xmm12,XMMWORD[$L$rol8]
970	paddd	xmm8,xmm12
971	pxor	xmm4,xmm8
972	movdqa	xmm3,xmm4
973	pslld	xmm3,7
974	psrld	xmm4,25
975	pxor	xmm4,xmm3
976DB	102,15,58,15,228,12
977DB	102,69,15,58,15,192,8
978DB	102,69,15,58,15,228,4
979	paddd	xmm1,xmm5
980	pxor	xmm13,xmm1
981	pshufb	xmm13,XMMWORD[$L$rol16]
982	paddd	xmm9,xmm13
983	pxor	xmm5,xmm9
984	movdqa	xmm3,xmm5
985	pslld	xmm3,12
986	psrld	xmm5,20
987	pxor	xmm5,xmm3
988	paddd	xmm1,xmm5
989	pxor	xmm13,xmm1
990	pshufb	xmm13,XMMWORD[$L$rol8]
991	paddd	xmm9,xmm13
992	pxor	xmm5,xmm9
993	movdqa	xmm3,xmm5
994	pslld	xmm3,7
995	psrld	xmm5,25
996	pxor	xmm5,xmm3
997DB	102,15,58,15,237,12
998DB	102,69,15,58,15,201,8
999DB	102,69,15,58,15,237,4
1000
1001	cmp	r8,rcx
1002	jb	NEAR $L$open_sse_tail_128_rounds_and_x1hash
1003	cmp	r8,10*16
1004	jne	NEAR $L$open_sse_tail_128_rounds
1005	paddd	xmm1,XMMWORD[$L$chacha20_consts]
1006	paddd	xmm5,XMMWORD[((160+48))+rbp]
1007	paddd	xmm9,XMMWORD[((160+64))+rbp]
1008	paddd	xmm13,XMMWORD[((160+112))+rbp]
1009	paddd	xmm0,XMMWORD[$L$chacha20_consts]
1010	paddd	xmm4,XMMWORD[((160+48))+rbp]
1011	paddd	xmm8,XMMWORD[((160+64))+rbp]
1012	paddd	xmm12,XMMWORD[((160+96))+rbp]
1013	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
1014	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
1015	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
1016	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
1017	pxor	xmm1,xmm3
1018	pxor	xmm5,xmm7
1019	pxor	xmm9,xmm11
1020	pxor	xmm15,xmm13
1021	movdqu	XMMWORD[(0 + 0)+rdi],xmm1
1022	movdqu	XMMWORD[(16 + 0)+rdi],xmm5
1023	movdqu	XMMWORD[(32 + 0)+rdi],xmm9
1024	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
1025
1026	sub	rbx,4*16
1027	lea	rsi,[64+rsi]
1028	lea	rdi,[64+rdi]
1029	jmp	NEAR $L$open_sse_tail_64_dec_loop
1030
1031$L$open_sse_tail_192:
1032	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
1033	movdqa	xmm4,XMMWORD[((160+48))+rbp]
1034	movdqa	xmm8,XMMWORD[((160+64))+rbp]
1035	movdqa	xmm1,xmm0
1036	movdqa	xmm5,xmm4
1037	movdqa	xmm9,xmm8
1038	movdqa	xmm2,xmm0
1039	movdqa	xmm6,xmm4
1040	movdqa	xmm10,xmm8
1041	movdqa	xmm14,XMMWORD[((160+96))+rbp]
1042	paddd	xmm14,XMMWORD[$L$sse_inc]
1043	movdqa	xmm13,xmm14
1044	paddd	xmm13,XMMWORD[$L$sse_inc]
1045	movdqa	xmm12,xmm13
1046	paddd	xmm12,XMMWORD[$L$sse_inc]
1047	movdqa	XMMWORD[(160+96)+rbp],xmm12
1048	movdqa	XMMWORD[(160+112)+rbp],xmm13
1049	movdqa	XMMWORD[(160+128)+rbp],xmm14
1050
1051	mov	rcx,rbx
1052	mov	r8,10*16
1053	cmp	rcx,10*16
1054	cmovg	rcx,r8
1055	and	rcx,-16
1056	xor	r8,r8
1057$L$open_sse_tail_192_rounds_and_x1hash:
1058	add	r10,QWORD[((0+0))+r8*1+rsi]
1059	adc	r11,QWORD[((8+0))+r8*1+rsi]
1060	adc	r12,1
1061	mov	rax,QWORD[((0+160+0))+rbp]
1062	mov	r15,rax
1063	mul	r10
1064	mov	r13,rax
1065	mov	r14,rdx
1066	mov	rax,QWORD[((0+160+0))+rbp]
1067	mul	r11
1068	imul	r15,r12
1069	add	r14,rax
1070	adc	r15,rdx
1071	mov	rax,QWORD[((8+160+0))+rbp]
1072	mov	r9,rax
1073	mul	r10
1074	add	r14,rax
1075	adc	rdx,0
1076	mov	r10,rdx
1077	mov	rax,QWORD[((8+160+0))+rbp]
1078	mul	r11
1079	add	r15,rax
1080	adc	rdx,0
1081	imul	r9,r12
1082	add	r15,r10
1083	adc	r9,rdx
1084	mov	r10,r13
1085	mov	r11,r14
1086	mov	r12,r15
1087	and	r12,3
1088	mov	r13,r15
1089	and	r13,-4
1090	mov	r14,r9
1091	shrd	r15,r9,2
1092	shr	r9,2
1093	add	r15,r13
1094	adc	r9,r14
1095	add	r10,r15
1096	adc	r11,r9
1097	adc	r12,0
1098
1099$L$open_sse_tail_192_rounds:
1100	add	r8,16
1101	paddd	xmm0,xmm4
1102	pxor	xmm12,xmm0
1103	pshufb	xmm12,XMMWORD[$L$rol16]
1104	paddd	xmm8,xmm12
1105	pxor	xmm4,xmm8
1106	movdqa	xmm3,xmm4
1107	pslld	xmm3,12
1108	psrld	xmm4,20
1109	pxor	xmm4,xmm3
1110	paddd	xmm0,xmm4
1111	pxor	xmm12,xmm0
1112	pshufb	xmm12,XMMWORD[$L$rol8]
1113	paddd	xmm8,xmm12
1114	pxor	xmm4,xmm8
1115	movdqa	xmm3,xmm4
1116	pslld	xmm3,7
1117	psrld	xmm4,25
1118	pxor	xmm4,xmm3
1119DB	102,15,58,15,228,4
1120DB	102,69,15,58,15,192,8
1121DB	102,69,15,58,15,228,12
1122	paddd	xmm1,xmm5
1123	pxor	xmm13,xmm1
1124	pshufb	xmm13,XMMWORD[$L$rol16]
1125	paddd	xmm9,xmm13
1126	pxor	xmm5,xmm9
1127	movdqa	xmm3,xmm5
1128	pslld	xmm3,12
1129	psrld	xmm5,20
1130	pxor	xmm5,xmm3
1131	paddd	xmm1,xmm5
1132	pxor	xmm13,xmm1
1133	pshufb	xmm13,XMMWORD[$L$rol8]
1134	paddd	xmm9,xmm13
1135	pxor	xmm5,xmm9
1136	movdqa	xmm3,xmm5
1137	pslld	xmm3,7
1138	psrld	xmm5,25
1139	pxor	xmm5,xmm3
1140DB	102,15,58,15,237,4
1141DB	102,69,15,58,15,201,8
1142DB	102,69,15,58,15,237,12
1143	paddd	xmm2,xmm6
1144	pxor	xmm14,xmm2
1145	pshufb	xmm14,XMMWORD[$L$rol16]
1146	paddd	xmm10,xmm14
1147	pxor	xmm6,xmm10
1148	movdqa	xmm3,xmm6
1149	pslld	xmm3,12
1150	psrld	xmm6,20
1151	pxor	xmm6,xmm3
1152	paddd	xmm2,xmm6
1153	pxor	xmm14,xmm2
1154	pshufb	xmm14,XMMWORD[$L$rol8]
1155	paddd	xmm10,xmm14
1156	pxor	xmm6,xmm10
1157	movdqa	xmm3,xmm6
1158	pslld	xmm3,7
1159	psrld	xmm6,25
1160	pxor	xmm6,xmm3
1161DB	102,15,58,15,246,4
1162DB	102,69,15,58,15,210,8
1163DB	102,69,15,58,15,246,12
1164	paddd	xmm0,xmm4
1165	pxor	xmm12,xmm0
1166	pshufb	xmm12,XMMWORD[$L$rol16]
1167	paddd	xmm8,xmm12
1168	pxor	xmm4,xmm8
1169	movdqa	xmm3,xmm4
1170	pslld	xmm3,12
1171	psrld	xmm4,20
1172	pxor	xmm4,xmm3
1173	paddd	xmm0,xmm4
1174	pxor	xmm12,xmm0
1175	pshufb	xmm12,XMMWORD[$L$rol8]
1176	paddd	xmm8,xmm12
1177	pxor	xmm4,xmm8
1178	movdqa	xmm3,xmm4
1179	pslld	xmm3,7
1180	psrld	xmm4,25
1181	pxor	xmm4,xmm3
1182DB	102,15,58,15,228,12
1183DB	102,69,15,58,15,192,8
1184DB	102,69,15,58,15,228,4
1185	paddd	xmm1,xmm5
1186	pxor	xmm13,xmm1
1187	pshufb	xmm13,XMMWORD[$L$rol16]
1188	paddd	xmm9,xmm13
1189	pxor	xmm5,xmm9
1190	movdqa	xmm3,xmm5
1191	pslld	xmm3,12
1192	psrld	xmm5,20
1193	pxor	xmm5,xmm3
1194	paddd	xmm1,xmm5
1195	pxor	xmm13,xmm1
1196	pshufb	xmm13,XMMWORD[$L$rol8]
1197	paddd	xmm9,xmm13
1198	pxor	xmm5,xmm9
1199	movdqa	xmm3,xmm5
1200	pslld	xmm3,7
1201	psrld	xmm5,25
1202	pxor	xmm5,xmm3
1203DB	102,15,58,15,237,12
1204DB	102,69,15,58,15,201,8
1205DB	102,69,15,58,15,237,4
1206	paddd	xmm2,xmm6
1207	pxor	xmm14,xmm2
1208	pshufb	xmm14,XMMWORD[$L$rol16]
1209	paddd	xmm10,xmm14
1210	pxor	xmm6,xmm10
1211	movdqa	xmm3,xmm6
1212	pslld	xmm3,12
1213	psrld	xmm6,20
1214	pxor	xmm6,xmm3
1215	paddd	xmm2,xmm6
1216	pxor	xmm14,xmm2
1217	pshufb	xmm14,XMMWORD[$L$rol8]
1218	paddd	xmm10,xmm14
1219	pxor	xmm6,xmm10
1220	movdqa	xmm3,xmm6
1221	pslld	xmm3,7
1222	psrld	xmm6,25
1223	pxor	xmm6,xmm3
1224DB	102,15,58,15,246,12
1225DB	102,69,15,58,15,210,8
1226DB	102,69,15,58,15,246,4
1227
1228	cmp	r8,rcx
1229	jb	NEAR $L$open_sse_tail_192_rounds_and_x1hash
1230	cmp	r8,10*16
1231	jne	NEAR $L$open_sse_tail_192_rounds
1232	cmp	rbx,11*16
1233	jb	NEAR $L$open_sse_tail_192_finish
1234	add	r10,QWORD[((0+160))+rsi]
1235	adc	r11,QWORD[((8+160))+rsi]
1236	adc	r12,1
1237	mov	rax,QWORD[((0+160+0))+rbp]
1238	mov	r15,rax
1239	mul	r10
1240	mov	r13,rax
1241	mov	r14,rdx
1242	mov	rax,QWORD[((0+160+0))+rbp]
1243	mul	r11
1244	imul	r15,r12
1245	add	r14,rax
1246	adc	r15,rdx
1247	mov	rax,QWORD[((8+160+0))+rbp]
1248	mov	r9,rax
1249	mul	r10
1250	add	r14,rax
1251	adc	rdx,0
1252	mov	r10,rdx
1253	mov	rax,QWORD[((8+160+0))+rbp]
1254	mul	r11
1255	add	r15,rax
1256	adc	rdx,0
1257	imul	r9,r12
1258	add	r15,r10
1259	adc	r9,rdx
1260	mov	r10,r13
1261	mov	r11,r14
1262	mov	r12,r15
1263	and	r12,3
1264	mov	r13,r15
1265	and	r13,-4
1266	mov	r14,r9
1267	shrd	r15,r9,2
1268	shr	r9,2
1269	add	r15,r13
1270	adc	r9,r14
1271	add	r10,r15
1272	adc	r11,r9
1273	adc	r12,0
1274
1275	cmp	rbx,12*16
1276	jb	NEAR $L$open_sse_tail_192_finish
1277	add	r10,QWORD[((0+176))+rsi]
1278	adc	r11,QWORD[((8+176))+rsi]
1279	adc	r12,1
1280	mov	rax,QWORD[((0+160+0))+rbp]
1281	mov	r15,rax
1282	mul	r10
1283	mov	r13,rax
1284	mov	r14,rdx
1285	mov	rax,QWORD[((0+160+0))+rbp]
1286	mul	r11
1287	imul	r15,r12
1288	add	r14,rax
1289	adc	r15,rdx
1290	mov	rax,QWORD[((8+160+0))+rbp]
1291	mov	r9,rax
1292	mul	r10
1293	add	r14,rax
1294	adc	rdx,0
1295	mov	r10,rdx
1296	mov	rax,QWORD[((8+160+0))+rbp]
1297	mul	r11
1298	add	r15,rax
1299	adc	rdx,0
1300	imul	r9,r12
1301	add	r15,r10
1302	adc	r9,rdx
1303	mov	r10,r13
1304	mov	r11,r14
1305	mov	r12,r15
1306	and	r12,3
1307	mov	r13,r15
1308	and	r13,-4
1309	mov	r14,r9
1310	shrd	r15,r9,2
1311	shr	r9,2
1312	add	r15,r13
1313	adc	r9,r14
1314	add	r10,r15
1315	adc	r11,r9
1316	adc	r12,0
1317
1318$L$open_sse_tail_192_finish:
1319	paddd	xmm2,XMMWORD[$L$chacha20_consts]
1320	paddd	xmm6,XMMWORD[((160+48))+rbp]
1321	paddd	xmm10,XMMWORD[((160+64))+rbp]
1322	paddd	xmm14,XMMWORD[((160+128))+rbp]
1323	paddd	xmm1,XMMWORD[$L$chacha20_consts]
1324	paddd	xmm5,XMMWORD[((160+48))+rbp]
1325	paddd	xmm9,XMMWORD[((160+64))+rbp]
1326	paddd	xmm13,XMMWORD[((160+112))+rbp]
1327	paddd	xmm0,XMMWORD[$L$chacha20_consts]
1328	paddd	xmm4,XMMWORD[((160+48))+rbp]
1329	paddd	xmm8,XMMWORD[((160+64))+rbp]
1330	paddd	xmm12,XMMWORD[((160+96))+rbp]
1331	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
1332	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
1333	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
1334	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
1335	pxor	xmm2,xmm3
1336	pxor	xmm6,xmm7
1337	pxor	xmm10,xmm11
1338	pxor	xmm15,xmm14
1339	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
1340	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
1341	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
1342	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
1343	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
1344	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
1345	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
1346	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
1347	pxor	xmm1,xmm3
1348	pxor	xmm5,xmm7
1349	pxor	xmm9,xmm11
1350	pxor	xmm15,xmm13
1351	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
1352	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
1353	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
1354	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
1355
1356	sub	rbx,8*16
1357	lea	rsi,[128+rsi]
1358	lea	rdi,[128+rdi]
1359	jmp	NEAR $L$open_sse_tail_64_dec_loop
1360
1361$L$open_sse_tail_256:
1362	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
1363	movdqa	xmm4,XMMWORD[((160+48))+rbp]
1364	movdqa	xmm8,XMMWORD[((160+64))+rbp]
1365	movdqa	xmm1,xmm0
1366	movdqa	xmm5,xmm4
1367	movdqa	xmm9,xmm8
1368	movdqa	xmm2,xmm0
1369	movdqa	xmm6,xmm4
1370	movdqa	xmm10,xmm8
1371	movdqa	xmm3,xmm0
1372	movdqa	xmm7,xmm4
1373	movdqa	xmm11,xmm8
1374	movdqa	xmm15,XMMWORD[((160+96))+rbp]
1375	paddd	xmm15,XMMWORD[$L$sse_inc]
1376	movdqa	xmm14,xmm15
1377	paddd	xmm14,XMMWORD[$L$sse_inc]
1378	movdqa	xmm13,xmm14
1379	paddd	xmm13,XMMWORD[$L$sse_inc]
1380	movdqa	xmm12,xmm13
1381	paddd	xmm12,XMMWORD[$L$sse_inc]
1382	movdqa	XMMWORD[(160+96)+rbp],xmm12
1383	movdqa	XMMWORD[(160+112)+rbp],xmm13
1384	movdqa	XMMWORD[(160+128)+rbp],xmm14
1385	movdqa	XMMWORD[(160+144)+rbp],xmm15
1386
1387	xor	r8,r8
1388$L$open_sse_tail_256_rounds_and_x1hash:
1389	add	r10,QWORD[((0+0))+r8*1+rsi]
1390	adc	r11,QWORD[((8+0))+r8*1+rsi]
1391	adc	r12,1
1392	movdqa	XMMWORD[(160+80)+rbp],xmm11
1393	paddd	xmm0,xmm4
1394	pxor	xmm12,xmm0
1395	pshufb	xmm12,XMMWORD[$L$rol16]
1396	paddd	xmm8,xmm12
1397	pxor	xmm4,xmm8
1398	movdqa	xmm11,xmm4
1399	pslld	xmm11,12
1400	psrld	xmm4,20
1401	pxor	xmm4,xmm11
1402	paddd	xmm0,xmm4
1403	pxor	xmm12,xmm0
1404	pshufb	xmm12,XMMWORD[$L$rol8]
1405	paddd	xmm8,xmm12
1406	pxor	xmm4,xmm8
1407	movdqa	xmm11,xmm4
1408	pslld	xmm11,7
1409	psrld	xmm4,25
1410	pxor	xmm4,xmm11
1411DB	102,15,58,15,228,4
1412DB	102,69,15,58,15,192,8
1413DB	102,69,15,58,15,228,12
1414	paddd	xmm1,xmm5
1415	pxor	xmm13,xmm1
1416	pshufb	xmm13,XMMWORD[$L$rol16]
1417	paddd	xmm9,xmm13
1418	pxor	xmm5,xmm9
1419	movdqa	xmm11,xmm5
1420	pslld	xmm11,12
1421	psrld	xmm5,20
1422	pxor	xmm5,xmm11
1423	paddd	xmm1,xmm5
1424	pxor	xmm13,xmm1
1425	pshufb	xmm13,XMMWORD[$L$rol8]
1426	paddd	xmm9,xmm13
1427	pxor	xmm5,xmm9
1428	movdqa	xmm11,xmm5
1429	pslld	xmm11,7
1430	psrld	xmm5,25
1431	pxor	xmm5,xmm11
1432DB	102,15,58,15,237,4
1433DB	102,69,15,58,15,201,8
1434DB	102,69,15,58,15,237,12
1435	paddd	xmm2,xmm6
1436	pxor	xmm14,xmm2
1437	pshufb	xmm14,XMMWORD[$L$rol16]
1438	paddd	xmm10,xmm14
1439	pxor	xmm6,xmm10
1440	movdqa	xmm11,xmm6
1441	pslld	xmm11,12
1442	psrld	xmm6,20
1443	pxor	xmm6,xmm11
1444	paddd	xmm2,xmm6
1445	pxor	xmm14,xmm2
1446	pshufb	xmm14,XMMWORD[$L$rol8]
1447	paddd	xmm10,xmm14
1448	pxor	xmm6,xmm10
1449	movdqa	xmm11,xmm6
1450	pslld	xmm11,7
1451	psrld	xmm6,25
1452	pxor	xmm6,xmm11
1453DB	102,15,58,15,246,4
1454DB	102,69,15,58,15,210,8
1455DB	102,69,15,58,15,246,12
1456	movdqa	xmm11,XMMWORD[((160+80))+rbp]
1457	mov	rax,QWORD[((0+160+0))+rbp]
1458	mov	r15,rax
1459	mul	r10
1460	mov	r13,rax
1461	mov	r14,rdx
1462	mov	rax,QWORD[((0+160+0))+rbp]
1463	mul	r11
1464	imul	r15,r12
1465	add	r14,rax
1466	adc	r15,rdx
1467	movdqa	XMMWORD[(160+80)+rbp],xmm9
1468	paddd	xmm3,xmm7
1469	pxor	xmm15,xmm3
1470	pshufb	xmm15,XMMWORD[$L$rol16]
1471	paddd	xmm11,xmm15
1472	pxor	xmm7,xmm11
1473	movdqa	xmm9,xmm7
1474	pslld	xmm9,12
1475	psrld	xmm7,20
1476	pxor	xmm7,xmm9
1477	paddd	xmm3,xmm7
1478	pxor	xmm15,xmm3
1479	pshufb	xmm15,XMMWORD[$L$rol8]
1480	paddd	xmm11,xmm15
1481	pxor	xmm7,xmm11
1482	movdqa	xmm9,xmm7
1483	pslld	xmm9,7
1484	psrld	xmm7,25
1485	pxor	xmm7,xmm9
1486DB	102,15,58,15,255,4
1487DB	102,69,15,58,15,219,8
1488DB	102,69,15,58,15,255,12
1489	movdqa	xmm9,XMMWORD[((160+80))+rbp]
1490	mov	rax,QWORD[((8+160+0))+rbp]
1491	mov	r9,rax
1492	mul	r10
1493	add	r14,rax
1494	adc	rdx,0
1495	mov	r10,rdx
1496	mov	rax,QWORD[((8+160+0))+rbp]
1497	mul	r11
1498	add	r15,rax
1499	adc	rdx,0
1500	movdqa	XMMWORD[(160+80)+rbp],xmm11
1501	paddd	xmm0,xmm4
1502	pxor	xmm12,xmm0
1503	pshufb	xmm12,XMMWORD[$L$rol16]
1504	paddd	xmm8,xmm12
1505	pxor	xmm4,xmm8
1506	movdqa	xmm11,xmm4
1507	pslld	xmm11,12
1508	psrld	xmm4,20
1509	pxor	xmm4,xmm11
1510	paddd	xmm0,xmm4
1511	pxor	xmm12,xmm0
1512	pshufb	xmm12,XMMWORD[$L$rol8]
1513	paddd	xmm8,xmm12
1514	pxor	xmm4,xmm8
1515	movdqa	xmm11,xmm4
1516	pslld	xmm11,7
1517	psrld	xmm4,25
1518	pxor	xmm4,xmm11
1519DB	102,15,58,15,228,12
1520DB	102,69,15,58,15,192,8
1521DB	102,69,15,58,15,228,4
1522	paddd	xmm1,xmm5
1523	pxor	xmm13,xmm1
1524	pshufb	xmm13,XMMWORD[$L$rol16]
1525	paddd	xmm9,xmm13
1526	pxor	xmm5,xmm9
1527	movdqa	xmm11,xmm5
1528	pslld	xmm11,12
1529	psrld	xmm5,20
1530	pxor	xmm5,xmm11
1531	paddd	xmm1,xmm5
1532	pxor	xmm13,xmm1
1533	pshufb	xmm13,XMMWORD[$L$rol8]
1534	paddd	xmm9,xmm13
1535	pxor	xmm5,xmm9
1536	movdqa	xmm11,xmm5
1537	pslld	xmm11,7
1538	psrld	xmm5,25
1539	pxor	xmm5,xmm11
1540DB	102,15,58,15,237,12
1541DB	102,69,15,58,15,201,8
1542DB	102,69,15,58,15,237,4
1543	imul	r9,r12
1544	add	r15,r10
1545	adc	r9,rdx
1546	paddd	xmm2,xmm6
1547	pxor	xmm14,xmm2
1548	pshufb	xmm14,XMMWORD[$L$rol16]
1549	paddd	xmm10,xmm14
1550	pxor	xmm6,xmm10
1551	movdqa	xmm11,xmm6
1552	pslld	xmm11,12
1553	psrld	xmm6,20
1554	pxor	xmm6,xmm11
1555	paddd	xmm2,xmm6
1556	pxor	xmm14,xmm2
1557	pshufb	xmm14,XMMWORD[$L$rol8]
1558	paddd	xmm10,xmm14
1559	pxor	xmm6,xmm10
1560	movdqa	xmm11,xmm6
1561	pslld	xmm11,7
1562	psrld	xmm6,25
1563	pxor	xmm6,xmm11
1564DB	102,15,58,15,246,12
1565DB	102,69,15,58,15,210,8
1566DB	102,69,15,58,15,246,4
1567	movdqa	xmm11,XMMWORD[((160+80))+rbp]
1568	mov	r10,r13
1569	mov	r11,r14
1570	mov	r12,r15
1571	and	r12,3
1572	mov	r13,r15
1573	and	r13,-4
1574	mov	r14,r9
1575	shrd	r15,r9,2
1576	shr	r9,2
1577	add	r15,r13
1578	adc	r9,r14
1579	add	r10,r15
1580	adc	r11,r9
1581	adc	r12,0
1582	movdqa	XMMWORD[(160+80)+rbp],xmm9
1583	paddd	xmm3,xmm7
1584	pxor	xmm15,xmm3
1585	pshufb	xmm15,XMMWORD[$L$rol16]
1586	paddd	xmm11,xmm15
1587	pxor	xmm7,xmm11
1588	movdqa	xmm9,xmm7
1589	pslld	xmm9,12
1590	psrld	xmm7,20
1591	pxor	xmm7,xmm9
1592	paddd	xmm3,xmm7
1593	pxor	xmm15,xmm3
1594	pshufb	xmm15,XMMWORD[$L$rol8]
1595	paddd	xmm11,xmm15
1596	pxor	xmm7,xmm11
1597	movdqa	xmm9,xmm7
1598	pslld	xmm9,7
1599	psrld	xmm7,25
1600	pxor	xmm7,xmm9
1601DB	102,15,58,15,255,12
1602DB	102,69,15,58,15,219,8
1603DB	102,69,15,58,15,255,4
1604	movdqa	xmm9,XMMWORD[((160+80))+rbp]
1605
1606	add	r8,16
1607	cmp	r8,10*16
1608	jb	NEAR $L$open_sse_tail_256_rounds_and_x1hash
1609
1610	mov	rcx,rbx
1611	and	rcx,-16
1612$L$open_sse_tail_256_hash:
1613	add	r10,QWORD[((0+0))+r8*1+rsi]
1614	adc	r11,QWORD[((8+0))+r8*1+rsi]
1615	adc	r12,1
1616	mov	rax,QWORD[((0+160+0))+rbp]
1617	mov	r15,rax
1618	mul	r10
1619	mov	r13,rax
1620	mov	r14,rdx
1621	mov	rax,QWORD[((0+160+0))+rbp]
1622	mul	r11
1623	imul	r15,r12
1624	add	r14,rax
1625	adc	r15,rdx
1626	mov	rax,QWORD[((8+160+0))+rbp]
1627	mov	r9,rax
1628	mul	r10
1629	add	r14,rax
1630	adc	rdx,0
1631	mov	r10,rdx
1632	mov	rax,QWORD[((8+160+0))+rbp]
1633	mul	r11
1634	add	r15,rax
1635	adc	rdx,0
1636	imul	r9,r12
1637	add	r15,r10
1638	adc	r9,rdx
1639	mov	r10,r13
1640	mov	r11,r14
1641	mov	r12,r15
1642	and	r12,3
1643	mov	r13,r15
1644	and	r13,-4
1645	mov	r14,r9
1646	shrd	r15,r9,2
1647	shr	r9,2
1648	add	r15,r13
1649	adc	r9,r14
1650	add	r10,r15
1651	adc	r11,r9
1652	adc	r12,0
1653
1654	add	r8,16
1655	cmp	r8,rcx
1656	jb	NEAR $L$open_sse_tail_256_hash
1657	paddd	xmm3,XMMWORD[$L$chacha20_consts]
1658	paddd	xmm7,XMMWORD[((160+48))+rbp]
1659	paddd	xmm11,XMMWORD[((160+64))+rbp]
1660	paddd	xmm15,XMMWORD[((160+144))+rbp]
1661	paddd	xmm2,XMMWORD[$L$chacha20_consts]
1662	paddd	xmm6,XMMWORD[((160+48))+rbp]
1663	paddd	xmm10,XMMWORD[((160+64))+rbp]
1664	paddd	xmm14,XMMWORD[((160+128))+rbp]
1665	paddd	xmm1,XMMWORD[$L$chacha20_consts]
1666	paddd	xmm5,XMMWORD[((160+48))+rbp]
1667	paddd	xmm9,XMMWORD[((160+64))+rbp]
1668	paddd	xmm13,XMMWORD[((160+112))+rbp]
1669	paddd	xmm0,XMMWORD[$L$chacha20_consts]
1670	paddd	xmm4,XMMWORD[((160+48))+rbp]
1671	paddd	xmm8,XMMWORD[((160+64))+rbp]
1672	paddd	xmm12,XMMWORD[((160+96))+rbp]
1673	movdqa	XMMWORD[(160+80)+rbp],xmm12
1674	movdqu	xmm12,XMMWORD[((0 + 0))+rsi]
1675	pxor	xmm12,xmm3
1676	movdqu	XMMWORD[(0 + 0)+rdi],xmm12
1677	movdqu	xmm12,XMMWORD[((16 + 0))+rsi]
1678	pxor	xmm12,xmm7
1679	movdqu	XMMWORD[(16 + 0)+rdi],xmm12
1680	movdqu	xmm12,XMMWORD[((32 + 0))+rsi]
1681	pxor	xmm12,xmm11
1682	movdqu	XMMWORD[(32 + 0)+rdi],xmm12
1683	movdqu	xmm12,XMMWORD[((48 + 0))+rsi]
1684	pxor	xmm12,xmm15
1685	movdqu	XMMWORD[(48 + 0)+rdi],xmm12
1686	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
1687	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
1688	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
1689	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
1690	pxor	xmm2,xmm3
1691	pxor	xmm6,xmm7
1692	pxor	xmm10,xmm11
1693	pxor	xmm15,xmm14
1694	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
1695	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
1696	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
1697	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
1698	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
1699	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
1700	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
1701	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
1702	pxor	xmm1,xmm3
1703	pxor	xmm5,xmm7
1704	pxor	xmm9,xmm11
1705	pxor	xmm15,xmm13
1706	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
1707	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
1708	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
1709	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
1710
1711	movdqa	xmm12,XMMWORD[((160+80))+rbp]
1712	sub	rbx,12*16
1713	lea	rsi,[192+rsi]
1714	lea	rdi,[192+rdi]
1715
1716
1717$L$open_sse_tail_64_dec_loop:
1718	cmp	rbx,16
1719	jb	NEAR $L$open_sse_tail_16_init
1720	sub	rbx,16
1721	movdqu	xmm3,XMMWORD[rsi]
1722	pxor	xmm0,xmm3
1723	movdqu	XMMWORD[rdi],xmm0
1724	lea	rsi,[16+rsi]
1725	lea	rdi,[16+rdi]
1726	movdqa	xmm0,xmm4
1727	movdqa	xmm4,xmm8
1728	movdqa	xmm8,xmm12
1729	jmp	NEAR $L$open_sse_tail_64_dec_loop
1730$L$open_sse_tail_16_init:
1731	movdqa	xmm1,xmm0
1732
1733
1734$L$open_sse_tail_16:
1735	test	rbx,rbx
1736	jz	NEAR $L$open_sse_finalize
1737
1738
1739
1740	pxor	xmm3,xmm3
1741	lea	rsi,[((-1))+rbx*1+rsi]
1742	mov	r8,rbx
1743$L$open_sse_tail_16_compose:
1744	pslldq	xmm3,1
1745	pinsrb	xmm3,BYTE[rsi],0
1746	sub	rsi,1
1747	sub	r8,1
1748	jnz	NEAR $L$open_sse_tail_16_compose
1749
1750DB	102,73,15,126,221
1751	pextrq	r14,xmm3,1
1752
1753	pxor	xmm3,xmm1
1754
1755
1756$L$open_sse_tail_16_extract:
1757	pextrb	XMMWORD[rdi],xmm3,0
1758	psrldq	xmm3,1
1759	add	rdi,1
1760	sub	rbx,1
1761	jne	NEAR $L$open_sse_tail_16_extract
1762
1763	add	r10,r13
1764	adc	r11,r14
1765	adc	r12,1
1766	mov	rax,QWORD[((0+160+0))+rbp]
1767	mov	r15,rax
1768	mul	r10
1769	mov	r13,rax
1770	mov	r14,rdx
1771	mov	rax,QWORD[((0+160+0))+rbp]
1772	mul	r11
1773	imul	r15,r12
1774	add	r14,rax
1775	adc	r15,rdx
1776	mov	rax,QWORD[((8+160+0))+rbp]
1777	mov	r9,rax
1778	mul	r10
1779	add	r14,rax
1780	adc	rdx,0
1781	mov	r10,rdx
1782	mov	rax,QWORD[((8+160+0))+rbp]
1783	mul	r11
1784	add	r15,rax
1785	adc	rdx,0
1786	imul	r9,r12
1787	add	r15,r10
1788	adc	r9,rdx
1789	mov	r10,r13
1790	mov	r11,r14
1791	mov	r12,r15
1792	and	r12,3
1793	mov	r13,r15
1794	and	r13,-4
1795	mov	r14,r9
1796	shrd	r15,r9,2
1797	shr	r9,2
1798	add	r15,r13
1799	adc	r9,r14
1800	add	r10,r15
1801	adc	r11,r9
1802	adc	r12,0
1803
1804
1805$L$open_sse_finalize:
1806	add	r10,QWORD[((0+160+32))+rbp]
1807	adc	r11,QWORD[((8+160+32))+rbp]
1808	adc	r12,1
1809	mov	rax,QWORD[((0+160+0))+rbp]
1810	mov	r15,rax
1811	mul	r10
1812	mov	r13,rax
1813	mov	r14,rdx
1814	mov	rax,QWORD[((0+160+0))+rbp]
1815	mul	r11
1816	imul	r15,r12
1817	add	r14,rax
1818	adc	r15,rdx
1819	mov	rax,QWORD[((8+160+0))+rbp]
1820	mov	r9,rax
1821	mul	r10
1822	add	r14,rax
1823	adc	rdx,0
1824	mov	r10,rdx
1825	mov	rax,QWORD[((8+160+0))+rbp]
1826	mul	r11
1827	add	r15,rax
1828	adc	rdx,0
1829	imul	r9,r12
1830	add	r15,r10
1831	adc	r9,rdx
1832	mov	r10,r13
1833	mov	r11,r14
1834	mov	r12,r15
1835	and	r12,3
1836	mov	r13,r15
1837	and	r13,-4
1838	mov	r14,r9
1839	shrd	r15,r9,2
1840	shr	r9,2
1841	add	r15,r13
1842	adc	r9,r14
1843	add	r10,r15
1844	adc	r11,r9
1845	adc	r12,0
1846
1847
1848	mov	r13,r10
1849	mov	r14,r11
1850	mov	r15,r12
1851	sub	r10,-5
1852	sbb	r11,-1
1853	sbb	r12,3
1854	cmovc	r10,r13
1855	cmovc	r11,r14
1856	cmovc	r12,r15
1857
1858	add	r10,QWORD[((0+160+16))+rbp]
1859	adc	r11,QWORD[((8+160+16))+rbp]
1860
1861	movaps	xmm6,XMMWORD[((0+0))+rbp]
1862	movaps	xmm7,XMMWORD[((16+0))+rbp]
1863	movaps	xmm8,XMMWORD[((32+0))+rbp]
1864	movaps	xmm9,XMMWORD[((48+0))+rbp]
1865	movaps	xmm10,XMMWORD[((64+0))+rbp]
1866	movaps	xmm11,XMMWORD[((80+0))+rbp]
1867	movaps	xmm12,XMMWORD[((96+0))+rbp]
1868	movaps	xmm13,XMMWORD[((112+0))+rbp]
1869	movaps	xmm14,XMMWORD[((128+0))+rbp]
1870	movaps	xmm15,XMMWORD[((144+0))+rbp]
1871
1872
1873	add	rsp,288 + 160 + 32
1874
1875
1876	pop	r9
1877
1878	mov	QWORD[r9],r10
1879	mov	QWORD[8+r9],r11
1880	pop	r15
1881
1882	pop	r14
1883
1884	pop	r13
1885
1886	pop	r12
1887
1888	pop	rbx
1889
1890	pop	rbp
1891
1892	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1893	mov	rsi,QWORD[16+rsp]
1894	ret
1895
1896$L$open_sse_128:
1897
1898	movdqu	xmm0,XMMWORD[$L$chacha20_consts]
1899	movdqa	xmm1,xmm0
1900	movdqa	xmm2,xmm0
1901	movdqu	xmm4,XMMWORD[r9]
1902	movdqa	xmm5,xmm4
1903	movdqa	xmm6,xmm4
1904	movdqu	xmm8,XMMWORD[16+r9]
1905	movdqa	xmm9,xmm8
1906	movdqa	xmm10,xmm8
1907	movdqu	xmm12,XMMWORD[32+r9]
1908	movdqa	xmm13,xmm12
1909	paddd	xmm13,XMMWORD[$L$sse_inc]
1910	movdqa	xmm14,xmm13
1911	paddd	xmm14,XMMWORD[$L$sse_inc]
1912	movdqa	xmm7,xmm4
1913	movdqa	xmm11,xmm8
1914	movdqa	xmm15,xmm13
1915	mov	r10,10
1916
1917$L$open_sse_128_rounds:
1918	paddd	xmm0,xmm4
1919	pxor	xmm12,xmm0
1920	pshufb	xmm12,XMMWORD[$L$rol16]
1921	paddd	xmm8,xmm12
1922	pxor	xmm4,xmm8
1923	movdqa	xmm3,xmm4
1924	pslld	xmm3,12
1925	psrld	xmm4,20
1926	pxor	xmm4,xmm3
1927	paddd	xmm0,xmm4
1928	pxor	xmm12,xmm0
1929	pshufb	xmm12,XMMWORD[$L$rol8]
1930	paddd	xmm8,xmm12
1931	pxor	xmm4,xmm8
1932	movdqa	xmm3,xmm4
1933	pslld	xmm3,7
1934	psrld	xmm4,25
1935	pxor	xmm4,xmm3
1936DB	102,15,58,15,228,4
1937DB	102,69,15,58,15,192,8
1938DB	102,69,15,58,15,228,12
1939	paddd	xmm1,xmm5
1940	pxor	xmm13,xmm1
1941	pshufb	xmm13,XMMWORD[$L$rol16]
1942	paddd	xmm9,xmm13
1943	pxor	xmm5,xmm9
1944	movdqa	xmm3,xmm5
1945	pslld	xmm3,12
1946	psrld	xmm5,20
1947	pxor	xmm5,xmm3
1948	paddd	xmm1,xmm5
1949	pxor	xmm13,xmm1
1950	pshufb	xmm13,XMMWORD[$L$rol8]
1951	paddd	xmm9,xmm13
1952	pxor	xmm5,xmm9
1953	movdqa	xmm3,xmm5
1954	pslld	xmm3,7
1955	psrld	xmm5,25
1956	pxor	xmm5,xmm3
1957DB	102,15,58,15,237,4
1958DB	102,69,15,58,15,201,8
1959DB	102,69,15,58,15,237,12
1960	paddd	xmm2,xmm6
1961	pxor	xmm14,xmm2
1962	pshufb	xmm14,XMMWORD[$L$rol16]
1963	paddd	xmm10,xmm14
1964	pxor	xmm6,xmm10
1965	movdqa	xmm3,xmm6
1966	pslld	xmm3,12
1967	psrld	xmm6,20
1968	pxor	xmm6,xmm3
1969	paddd	xmm2,xmm6
1970	pxor	xmm14,xmm2
1971	pshufb	xmm14,XMMWORD[$L$rol8]
1972	paddd	xmm10,xmm14
1973	pxor	xmm6,xmm10
1974	movdqa	xmm3,xmm6
1975	pslld	xmm3,7
1976	psrld	xmm6,25
1977	pxor	xmm6,xmm3
1978DB	102,15,58,15,246,4
1979DB	102,69,15,58,15,210,8
1980DB	102,69,15,58,15,246,12
1981	paddd	xmm0,xmm4
1982	pxor	xmm12,xmm0
1983	pshufb	xmm12,XMMWORD[$L$rol16]
1984	paddd	xmm8,xmm12
1985	pxor	xmm4,xmm8
1986	movdqa	xmm3,xmm4
1987	pslld	xmm3,12
1988	psrld	xmm4,20
1989	pxor	xmm4,xmm3
1990	paddd	xmm0,xmm4
1991	pxor	xmm12,xmm0
1992	pshufb	xmm12,XMMWORD[$L$rol8]
1993	paddd	xmm8,xmm12
1994	pxor	xmm4,xmm8
1995	movdqa	xmm3,xmm4
1996	pslld	xmm3,7
1997	psrld	xmm4,25
1998	pxor	xmm4,xmm3
1999DB	102,15,58,15,228,12
2000DB	102,69,15,58,15,192,8
2001DB	102,69,15,58,15,228,4
2002	paddd	xmm1,xmm5
2003	pxor	xmm13,xmm1
2004	pshufb	xmm13,XMMWORD[$L$rol16]
2005	paddd	xmm9,xmm13
2006	pxor	xmm5,xmm9
2007	movdqa	xmm3,xmm5
2008	pslld	xmm3,12
2009	psrld	xmm5,20
2010	pxor	xmm5,xmm3
2011	paddd	xmm1,xmm5
2012	pxor	xmm13,xmm1
2013	pshufb	xmm13,XMMWORD[$L$rol8]
2014	paddd	xmm9,xmm13
2015	pxor	xmm5,xmm9
2016	movdqa	xmm3,xmm5
2017	pslld	xmm3,7
2018	psrld	xmm5,25
2019	pxor	xmm5,xmm3
2020DB	102,15,58,15,237,12
2021DB	102,69,15,58,15,201,8
2022DB	102,69,15,58,15,237,4
2023	paddd	xmm2,xmm6
2024	pxor	xmm14,xmm2
2025	pshufb	xmm14,XMMWORD[$L$rol16]
2026	paddd	xmm10,xmm14
2027	pxor	xmm6,xmm10
2028	movdqa	xmm3,xmm6
2029	pslld	xmm3,12
2030	psrld	xmm6,20
2031	pxor	xmm6,xmm3
2032	paddd	xmm2,xmm6
2033	pxor	xmm14,xmm2
2034	pshufb	xmm14,XMMWORD[$L$rol8]
2035	paddd	xmm10,xmm14
2036	pxor	xmm6,xmm10
2037	movdqa	xmm3,xmm6
2038	pslld	xmm3,7
2039	psrld	xmm6,25
2040	pxor	xmm6,xmm3
2041DB	102,15,58,15,246,12
2042DB	102,69,15,58,15,210,8
2043DB	102,69,15,58,15,246,4
2044
2045	dec	r10
2046	jnz	NEAR $L$open_sse_128_rounds
2047	paddd	xmm0,XMMWORD[$L$chacha20_consts]
2048	paddd	xmm1,XMMWORD[$L$chacha20_consts]
2049	paddd	xmm2,XMMWORD[$L$chacha20_consts]
2050	paddd	xmm4,xmm7
2051	paddd	xmm5,xmm7
2052	paddd	xmm6,xmm7
2053	paddd	xmm9,xmm11
2054	paddd	xmm10,xmm11
2055	paddd	xmm13,xmm15
2056	paddd	xmm15,XMMWORD[$L$sse_inc]
2057	paddd	xmm14,xmm15
2058
2059	pand	xmm0,XMMWORD[$L$clamp]
2060	movdqa	XMMWORD[(160+0)+rbp],xmm0
2061	movdqa	XMMWORD[(160+16)+rbp],xmm4
2062
2063	mov	r8,r8
2064	call	poly_hash_ad_internal
2065$L$open_sse_128_xor_hash:
2066	cmp	rbx,16
2067	jb	NEAR $L$open_sse_tail_16
2068	sub	rbx,16
2069	add	r10,QWORD[((0+0))+rsi]
2070	adc	r11,QWORD[((8+0))+rsi]
2071	adc	r12,1
2072
2073
2074	movdqu	xmm3,XMMWORD[rsi]
2075	pxor	xmm1,xmm3
2076	movdqu	XMMWORD[rdi],xmm1
2077	lea	rsi,[16+rsi]
2078	lea	rdi,[16+rdi]
2079	mov	rax,QWORD[((0+160+0))+rbp]
2080	mov	r15,rax
2081	mul	r10
2082	mov	r13,rax
2083	mov	r14,rdx
2084	mov	rax,QWORD[((0+160+0))+rbp]
2085	mul	r11
2086	imul	r15,r12
2087	add	r14,rax
2088	adc	r15,rdx
2089	mov	rax,QWORD[((8+160+0))+rbp]
2090	mov	r9,rax
2091	mul	r10
2092	add	r14,rax
2093	adc	rdx,0
2094	mov	r10,rdx
2095	mov	rax,QWORD[((8+160+0))+rbp]
2096	mul	r11
2097	add	r15,rax
2098	adc	rdx,0
2099	imul	r9,r12
2100	add	r15,r10
2101	adc	r9,rdx
2102	mov	r10,r13
2103	mov	r11,r14
2104	mov	r12,r15
2105	and	r12,3
2106	mov	r13,r15
2107	and	r13,-4
2108	mov	r14,r9
2109	shrd	r15,r9,2
2110	shr	r9,2
2111	add	r15,r13
2112	adc	r9,r14
2113	add	r10,r15
2114	adc	r11,r9
2115	adc	r12,0
2116
2117
2118	movdqa	xmm1,xmm5
2119	movdqa	xmm5,xmm9
2120	movdqa	xmm9,xmm13
2121	movdqa	xmm13,xmm2
2122	movdqa	xmm2,xmm6
2123	movdqa	xmm6,xmm10
2124	movdqa	xmm10,xmm14
2125	jmp	NEAR $L$open_sse_128_xor_hash
2126$L$SEH_end_chacha20_poly1305_open:
2127
2128
2129
2130
2131
2132
2133
2134
2135global	chacha20_poly1305_seal
2136
2137ALIGN	64
2138chacha20_poly1305_seal:
2139	mov	QWORD[8+rsp],rdi	;WIN64 prologue
2140	mov	QWORD[16+rsp],rsi
2141	mov	rax,rsp
2142$L$SEH_begin_chacha20_poly1305_seal:
2143	mov	rdi,rcx
2144	mov	rsi,rdx
2145	mov	rdx,r8
2146	mov	rcx,r9
2147	mov	r8,QWORD[40+rsp]
2148	mov	r9,QWORD[48+rsp]
2149
2150
2151
2152_CET_ENDBR
2153	push	rbp
2154
2155	push	rbx
2156
2157	push	r12
2158
2159	push	r13
2160
2161	push	r14
2162
2163	push	r15
2164
2165
2166
2167	push	r9
2168
2169	sub	rsp,288 + 160 + 32
2170
2171	lea	rbp,[32+rsp]
2172	and	rbp,-32
2173
2174	movaps	XMMWORD[(0+0)+rbp],xmm6
2175	movaps	XMMWORD[(16+0)+rbp],xmm7
2176	movaps	XMMWORD[(32+0)+rbp],xmm8
2177	movaps	XMMWORD[(48+0)+rbp],xmm9
2178	movaps	XMMWORD[(64+0)+rbp],xmm10
2179	movaps	XMMWORD[(80+0)+rbp],xmm11
2180	movaps	XMMWORD[(96+0)+rbp],xmm12
2181	movaps	XMMWORD[(112+0)+rbp],xmm13
2182	movaps	XMMWORD[(128+0)+rbp],xmm14
2183	movaps	XMMWORD[(144+0)+rbp],xmm15
2184
2185	mov	rbx,QWORD[56+r9]
2186	add	rbx,rdx
2187	mov	QWORD[((0+160+32))+rbp],r8
2188	mov	QWORD[((8+160+32))+rbp],rbx
2189	mov	rbx,rdx
2190
2191	mov	eax,DWORD[((OPENSSL_ia32cap_P+8))]
2192	and	eax,288
2193	xor	eax,288
2194	jz	NEAR chacha20_poly1305_seal_avx2
2195
2196	cmp	rbx,128
2197	jbe	NEAR $L$seal_sse_128
2198
2199	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
2200	movdqu	xmm4,XMMWORD[r9]
2201	movdqu	xmm8,XMMWORD[16+r9]
2202	movdqu	xmm12,XMMWORD[32+r9]
2203
2204	movdqa	xmm1,xmm0
2205	movdqa	xmm2,xmm0
2206	movdqa	xmm3,xmm0
2207	movdqa	xmm5,xmm4
2208	movdqa	xmm6,xmm4
2209	movdqa	xmm7,xmm4
2210	movdqa	xmm9,xmm8
2211	movdqa	xmm10,xmm8
2212	movdqa	xmm11,xmm8
2213	movdqa	xmm15,xmm12
2214	paddd	xmm12,XMMWORD[$L$sse_inc]
2215	movdqa	xmm14,xmm12
2216	paddd	xmm12,XMMWORD[$L$sse_inc]
2217	movdqa	xmm13,xmm12
2218	paddd	xmm12,XMMWORD[$L$sse_inc]
2219
2220	movdqa	XMMWORD[(160+48)+rbp],xmm4
2221	movdqa	XMMWORD[(160+64)+rbp],xmm8
2222	movdqa	XMMWORD[(160+96)+rbp],xmm12
2223	movdqa	XMMWORD[(160+112)+rbp],xmm13
2224	movdqa	XMMWORD[(160+128)+rbp],xmm14
2225	movdqa	XMMWORD[(160+144)+rbp],xmm15
2226	mov	r10,10
2227$L$seal_sse_init_rounds:
2228	movdqa	XMMWORD[(160+80)+rbp],xmm8
2229	movdqa	xmm8,XMMWORD[$L$rol16]
2230	paddd	xmm3,xmm7
2231	paddd	xmm2,xmm6
2232	paddd	xmm1,xmm5
2233	paddd	xmm0,xmm4
2234	pxor	xmm15,xmm3
2235	pxor	xmm14,xmm2
2236	pxor	xmm13,xmm1
2237	pxor	xmm12,xmm0
2238DB	102,69,15,56,0,248
2239DB	102,69,15,56,0,240
2240DB	102,69,15,56,0,232
2241DB	102,69,15,56,0,224
2242	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2243	paddd	xmm11,xmm15
2244	paddd	xmm10,xmm14
2245	paddd	xmm9,xmm13
2246	paddd	xmm8,xmm12
2247	pxor	xmm7,xmm11
2248	pxor	xmm6,xmm10
2249	pxor	xmm5,xmm9
2250	pxor	xmm4,xmm8
2251	movdqa	XMMWORD[(160+80)+rbp],xmm8
2252	movdqa	xmm8,xmm7
2253	psrld	xmm8,20
2254	pslld	xmm7,32-20
2255	pxor	xmm7,xmm8
2256	movdqa	xmm8,xmm6
2257	psrld	xmm8,20
2258	pslld	xmm6,32-20
2259	pxor	xmm6,xmm8
2260	movdqa	xmm8,xmm5
2261	psrld	xmm8,20
2262	pslld	xmm5,32-20
2263	pxor	xmm5,xmm8
2264	movdqa	xmm8,xmm4
2265	psrld	xmm8,20
2266	pslld	xmm4,32-20
2267	pxor	xmm4,xmm8
2268	movdqa	xmm8,XMMWORD[$L$rol8]
2269	paddd	xmm3,xmm7
2270	paddd	xmm2,xmm6
2271	paddd	xmm1,xmm5
2272	paddd	xmm0,xmm4
2273	pxor	xmm15,xmm3
2274	pxor	xmm14,xmm2
2275	pxor	xmm13,xmm1
2276	pxor	xmm12,xmm0
2277DB	102,69,15,56,0,248
2278DB	102,69,15,56,0,240
2279DB	102,69,15,56,0,232
2280DB	102,69,15,56,0,224
2281	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2282	paddd	xmm11,xmm15
2283	paddd	xmm10,xmm14
2284	paddd	xmm9,xmm13
2285	paddd	xmm8,xmm12
2286	pxor	xmm7,xmm11
2287	pxor	xmm6,xmm10
2288	pxor	xmm5,xmm9
2289	pxor	xmm4,xmm8
2290	movdqa	XMMWORD[(160+80)+rbp],xmm8
2291	movdqa	xmm8,xmm7
2292	psrld	xmm8,25
2293	pslld	xmm7,32-25
2294	pxor	xmm7,xmm8
2295	movdqa	xmm8,xmm6
2296	psrld	xmm8,25
2297	pslld	xmm6,32-25
2298	pxor	xmm6,xmm8
2299	movdqa	xmm8,xmm5
2300	psrld	xmm8,25
2301	pslld	xmm5,32-25
2302	pxor	xmm5,xmm8
2303	movdqa	xmm8,xmm4
2304	psrld	xmm8,25
2305	pslld	xmm4,32-25
2306	pxor	xmm4,xmm8
2307	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2308DB	102,15,58,15,255,4
2309DB	102,69,15,58,15,219,8
2310DB	102,69,15,58,15,255,12
2311DB	102,15,58,15,246,4
2312DB	102,69,15,58,15,210,8
2313DB	102,69,15,58,15,246,12
2314DB	102,15,58,15,237,4
2315DB	102,69,15,58,15,201,8
2316DB	102,69,15,58,15,237,12
2317DB	102,15,58,15,228,4
2318DB	102,69,15,58,15,192,8
2319DB	102,69,15,58,15,228,12
2320	movdqa	XMMWORD[(160+80)+rbp],xmm8
2321	movdqa	xmm8,XMMWORD[$L$rol16]
2322	paddd	xmm3,xmm7
2323	paddd	xmm2,xmm6
2324	paddd	xmm1,xmm5
2325	paddd	xmm0,xmm4
2326	pxor	xmm15,xmm3
2327	pxor	xmm14,xmm2
2328	pxor	xmm13,xmm1
2329	pxor	xmm12,xmm0
2330DB	102,69,15,56,0,248
2331DB	102,69,15,56,0,240
2332DB	102,69,15,56,0,232
2333DB	102,69,15,56,0,224
2334	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2335	paddd	xmm11,xmm15
2336	paddd	xmm10,xmm14
2337	paddd	xmm9,xmm13
2338	paddd	xmm8,xmm12
2339	pxor	xmm7,xmm11
2340	pxor	xmm6,xmm10
2341	pxor	xmm5,xmm9
2342	pxor	xmm4,xmm8
2343	movdqa	XMMWORD[(160+80)+rbp],xmm8
2344	movdqa	xmm8,xmm7
2345	psrld	xmm8,20
2346	pslld	xmm7,32-20
2347	pxor	xmm7,xmm8
2348	movdqa	xmm8,xmm6
2349	psrld	xmm8,20
2350	pslld	xmm6,32-20
2351	pxor	xmm6,xmm8
2352	movdqa	xmm8,xmm5
2353	psrld	xmm8,20
2354	pslld	xmm5,32-20
2355	pxor	xmm5,xmm8
2356	movdqa	xmm8,xmm4
2357	psrld	xmm8,20
2358	pslld	xmm4,32-20
2359	pxor	xmm4,xmm8
2360	movdqa	xmm8,XMMWORD[$L$rol8]
2361	paddd	xmm3,xmm7
2362	paddd	xmm2,xmm6
2363	paddd	xmm1,xmm5
2364	paddd	xmm0,xmm4
2365	pxor	xmm15,xmm3
2366	pxor	xmm14,xmm2
2367	pxor	xmm13,xmm1
2368	pxor	xmm12,xmm0
2369DB	102,69,15,56,0,248
2370DB	102,69,15,56,0,240
2371DB	102,69,15,56,0,232
2372DB	102,69,15,56,0,224
2373	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2374	paddd	xmm11,xmm15
2375	paddd	xmm10,xmm14
2376	paddd	xmm9,xmm13
2377	paddd	xmm8,xmm12
2378	pxor	xmm7,xmm11
2379	pxor	xmm6,xmm10
2380	pxor	xmm5,xmm9
2381	pxor	xmm4,xmm8
2382	movdqa	XMMWORD[(160+80)+rbp],xmm8
2383	movdqa	xmm8,xmm7
2384	psrld	xmm8,25
2385	pslld	xmm7,32-25
2386	pxor	xmm7,xmm8
2387	movdqa	xmm8,xmm6
2388	psrld	xmm8,25
2389	pslld	xmm6,32-25
2390	pxor	xmm6,xmm8
2391	movdqa	xmm8,xmm5
2392	psrld	xmm8,25
2393	pslld	xmm5,32-25
2394	pxor	xmm5,xmm8
2395	movdqa	xmm8,xmm4
2396	psrld	xmm8,25
2397	pslld	xmm4,32-25
2398	pxor	xmm4,xmm8
2399	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2400DB	102,15,58,15,255,12
2401DB	102,69,15,58,15,219,8
2402DB	102,69,15,58,15,255,4
2403DB	102,15,58,15,246,12
2404DB	102,69,15,58,15,210,8
2405DB	102,69,15,58,15,246,4
2406DB	102,15,58,15,237,12
2407DB	102,69,15,58,15,201,8
2408DB	102,69,15,58,15,237,4
2409DB	102,15,58,15,228,12
2410DB	102,69,15,58,15,192,8
2411DB	102,69,15,58,15,228,4
2412
2413	dec	r10
2414	jnz	NEAR $L$seal_sse_init_rounds
2415	paddd	xmm3,XMMWORD[$L$chacha20_consts]
2416	paddd	xmm7,XMMWORD[((160+48))+rbp]
2417	paddd	xmm11,XMMWORD[((160+64))+rbp]
2418	paddd	xmm15,XMMWORD[((160+144))+rbp]
2419	paddd	xmm2,XMMWORD[$L$chacha20_consts]
2420	paddd	xmm6,XMMWORD[((160+48))+rbp]
2421	paddd	xmm10,XMMWORD[((160+64))+rbp]
2422	paddd	xmm14,XMMWORD[((160+128))+rbp]
2423	paddd	xmm1,XMMWORD[$L$chacha20_consts]
2424	paddd	xmm5,XMMWORD[((160+48))+rbp]
2425	paddd	xmm9,XMMWORD[((160+64))+rbp]
2426	paddd	xmm13,XMMWORD[((160+112))+rbp]
2427	paddd	xmm0,XMMWORD[$L$chacha20_consts]
2428	paddd	xmm4,XMMWORD[((160+48))+rbp]
2429	paddd	xmm8,XMMWORD[((160+64))+rbp]
2430	paddd	xmm12,XMMWORD[((160+96))+rbp]
2431
2432
2433	pand	xmm3,XMMWORD[$L$clamp]
2434	movdqa	XMMWORD[(160+0)+rbp],xmm3
2435	movdqa	XMMWORD[(160+16)+rbp],xmm7
2436
2437	mov	r8,r8
2438	call	poly_hash_ad_internal
2439	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
2440	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
2441	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
2442	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
2443	pxor	xmm2,xmm3
2444	pxor	xmm6,xmm7
2445	pxor	xmm10,xmm11
2446	pxor	xmm15,xmm14
2447	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
2448	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
2449	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
2450	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
2451	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
2452	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
2453	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
2454	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
2455	pxor	xmm1,xmm3
2456	pxor	xmm5,xmm7
2457	pxor	xmm9,xmm11
2458	pxor	xmm15,xmm13
2459	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
2460	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
2461	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
2462	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
2463
2464	cmp	rbx,12*16
2465	ja	NEAR $L$seal_sse_main_init
2466	mov	rcx,8*16
2467	sub	rbx,8*16
2468	lea	rsi,[128+rsi]
2469	jmp	NEAR $L$seal_sse_128_tail_hash
2470$L$seal_sse_main_init:
2471	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
2472	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
2473	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
2474	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
2475	pxor	xmm0,xmm3
2476	pxor	xmm4,xmm7
2477	pxor	xmm8,xmm11
2478	pxor	xmm15,xmm12
2479	movdqu	XMMWORD[(0 + 128)+rdi],xmm0
2480	movdqu	XMMWORD[(16 + 128)+rdi],xmm4
2481	movdqu	XMMWORD[(32 + 128)+rdi],xmm8
2482	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
2483
2484	mov	rcx,12*16
2485	sub	rbx,12*16
2486	lea	rsi,[192+rsi]
2487	mov	rcx,2
2488	mov	r8,8
2489	cmp	rbx,4*16
2490	jbe	NEAR $L$seal_sse_tail_64
2491	cmp	rbx,8*16
2492	jbe	NEAR $L$seal_sse_tail_128
2493	cmp	rbx,12*16
2494	jbe	NEAR $L$seal_sse_tail_192
2495
2496$L$seal_sse_main_loop:
2497	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
2498	movdqa	xmm4,XMMWORD[((160+48))+rbp]
2499	movdqa	xmm8,XMMWORD[((160+64))+rbp]
2500	movdqa	xmm1,xmm0
2501	movdqa	xmm5,xmm4
2502	movdqa	xmm9,xmm8
2503	movdqa	xmm2,xmm0
2504	movdqa	xmm6,xmm4
2505	movdqa	xmm10,xmm8
2506	movdqa	xmm3,xmm0
2507	movdqa	xmm7,xmm4
2508	movdqa	xmm11,xmm8
2509	movdqa	xmm15,XMMWORD[((160+96))+rbp]
2510	paddd	xmm15,XMMWORD[$L$sse_inc]
2511	movdqa	xmm14,xmm15
2512	paddd	xmm14,XMMWORD[$L$sse_inc]
2513	movdqa	xmm13,xmm14
2514	paddd	xmm13,XMMWORD[$L$sse_inc]
2515	movdqa	xmm12,xmm13
2516	paddd	xmm12,XMMWORD[$L$sse_inc]
2517	movdqa	XMMWORD[(160+96)+rbp],xmm12
2518	movdqa	XMMWORD[(160+112)+rbp],xmm13
2519	movdqa	XMMWORD[(160+128)+rbp],xmm14
2520	movdqa	XMMWORD[(160+144)+rbp],xmm15
2521
2522ALIGN	32
2523$L$seal_sse_main_rounds:
2524	movdqa	XMMWORD[(160+80)+rbp],xmm8
2525	movdqa	xmm8,XMMWORD[$L$rol16]
2526	paddd	xmm3,xmm7
2527	paddd	xmm2,xmm6
2528	paddd	xmm1,xmm5
2529	paddd	xmm0,xmm4
2530	pxor	xmm15,xmm3
2531	pxor	xmm14,xmm2
2532	pxor	xmm13,xmm1
2533	pxor	xmm12,xmm0
2534DB	102,69,15,56,0,248
2535DB	102,69,15,56,0,240
2536DB	102,69,15,56,0,232
2537DB	102,69,15,56,0,224
2538	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2539	paddd	xmm11,xmm15
2540	paddd	xmm10,xmm14
2541	paddd	xmm9,xmm13
2542	paddd	xmm8,xmm12
2543	pxor	xmm7,xmm11
2544	add	r10,QWORD[((0+0))+rdi]
2545	adc	r11,QWORD[((8+0))+rdi]
2546	adc	r12,1
2547	pxor	xmm6,xmm10
2548	pxor	xmm5,xmm9
2549	pxor	xmm4,xmm8
2550	movdqa	XMMWORD[(160+80)+rbp],xmm8
2551	movdqa	xmm8,xmm7
2552	psrld	xmm8,20
2553	pslld	xmm7,32-20
2554	pxor	xmm7,xmm8
2555	movdqa	xmm8,xmm6
2556	psrld	xmm8,20
2557	pslld	xmm6,32-20
2558	pxor	xmm6,xmm8
2559	movdqa	xmm8,xmm5
2560	psrld	xmm8,20
2561	pslld	xmm5,32-20
2562	pxor	xmm5,xmm8
2563	movdqa	xmm8,xmm4
2564	psrld	xmm8,20
2565	pslld	xmm4,32-20
2566	pxor	xmm4,xmm8
2567	mov	rax,QWORD[((0+160+0))+rbp]
2568	mov	r15,rax
2569	mul	r10
2570	mov	r13,rax
2571	mov	r14,rdx
2572	mov	rax,QWORD[((0+160+0))+rbp]
2573	mul	r11
2574	imul	r15,r12
2575	add	r14,rax
2576	adc	r15,rdx
2577	movdqa	xmm8,XMMWORD[$L$rol8]
2578	paddd	xmm3,xmm7
2579	paddd	xmm2,xmm6
2580	paddd	xmm1,xmm5
2581	paddd	xmm0,xmm4
2582	pxor	xmm15,xmm3
2583	pxor	xmm14,xmm2
2584	pxor	xmm13,xmm1
2585	pxor	xmm12,xmm0
2586DB	102,69,15,56,0,248
2587DB	102,69,15,56,0,240
2588DB	102,69,15,56,0,232
2589DB	102,69,15,56,0,224
2590	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2591	paddd	xmm11,xmm15
2592	paddd	xmm10,xmm14
2593	paddd	xmm9,xmm13
2594	paddd	xmm8,xmm12
2595	pxor	xmm7,xmm11
2596	pxor	xmm6,xmm10
2597	mov	rax,QWORD[((8+160+0))+rbp]
2598	mov	r9,rax
2599	mul	r10
2600	add	r14,rax
2601	adc	rdx,0
2602	mov	r10,rdx
2603	mov	rax,QWORD[((8+160+0))+rbp]
2604	mul	r11
2605	add	r15,rax
2606	adc	rdx,0
2607	pxor	xmm5,xmm9
2608	pxor	xmm4,xmm8
2609	movdqa	XMMWORD[(160+80)+rbp],xmm8
2610	movdqa	xmm8,xmm7
2611	psrld	xmm8,25
2612	pslld	xmm7,32-25
2613	pxor	xmm7,xmm8
2614	movdqa	xmm8,xmm6
2615	psrld	xmm8,25
2616	pslld	xmm6,32-25
2617	pxor	xmm6,xmm8
2618	movdqa	xmm8,xmm5
2619	psrld	xmm8,25
2620	pslld	xmm5,32-25
2621	pxor	xmm5,xmm8
2622	movdqa	xmm8,xmm4
2623	psrld	xmm8,25
2624	pslld	xmm4,32-25
2625	pxor	xmm4,xmm8
2626	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2627	imul	r9,r12
2628	add	r15,r10
2629	adc	r9,rdx
2630DB	102,15,58,15,255,4
2631DB	102,69,15,58,15,219,8
2632DB	102,69,15,58,15,255,12
2633DB	102,15,58,15,246,4
2634DB	102,69,15,58,15,210,8
2635DB	102,69,15,58,15,246,12
2636DB	102,15,58,15,237,4
2637DB	102,69,15,58,15,201,8
2638DB	102,69,15,58,15,237,12
2639DB	102,15,58,15,228,4
2640DB	102,69,15,58,15,192,8
2641DB	102,69,15,58,15,228,12
2642	movdqa	XMMWORD[(160+80)+rbp],xmm8
2643	movdqa	xmm8,XMMWORD[$L$rol16]
2644	paddd	xmm3,xmm7
2645	paddd	xmm2,xmm6
2646	paddd	xmm1,xmm5
2647	paddd	xmm0,xmm4
2648	pxor	xmm15,xmm3
2649	pxor	xmm14,xmm2
2650	mov	r10,r13
2651	mov	r11,r14
2652	mov	r12,r15
2653	and	r12,3
2654	mov	r13,r15
2655	and	r13,-4
2656	mov	r14,r9
2657	shrd	r15,r9,2
2658	shr	r9,2
2659	add	r15,r13
2660	adc	r9,r14
2661	add	r10,r15
2662	adc	r11,r9
2663	adc	r12,0
2664	pxor	xmm13,xmm1
2665	pxor	xmm12,xmm0
2666DB	102,69,15,56,0,248
2667DB	102,69,15,56,0,240
2668DB	102,69,15,56,0,232
2669DB	102,69,15,56,0,224
2670	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2671	paddd	xmm11,xmm15
2672	paddd	xmm10,xmm14
2673	paddd	xmm9,xmm13
2674	paddd	xmm8,xmm12
2675	pxor	xmm7,xmm11
2676	pxor	xmm6,xmm10
2677	pxor	xmm5,xmm9
2678	pxor	xmm4,xmm8
2679	movdqa	XMMWORD[(160+80)+rbp],xmm8
2680	movdqa	xmm8,xmm7
2681	psrld	xmm8,20
2682	pslld	xmm7,32-20
2683	pxor	xmm7,xmm8
2684	movdqa	xmm8,xmm6
2685	psrld	xmm8,20
2686	pslld	xmm6,32-20
2687	pxor	xmm6,xmm8
2688	movdqa	xmm8,xmm5
2689	psrld	xmm8,20
2690	pslld	xmm5,32-20
2691	pxor	xmm5,xmm8
2692	movdqa	xmm8,xmm4
2693	psrld	xmm8,20
2694	pslld	xmm4,32-20
2695	pxor	xmm4,xmm8
2696	movdqa	xmm8,XMMWORD[$L$rol8]
2697	paddd	xmm3,xmm7
2698	paddd	xmm2,xmm6
2699	paddd	xmm1,xmm5
2700	paddd	xmm0,xmm4
2701	pxor	xmm15,xmm3
2702	pxor	xmm14,xmm2
2703	pxor	xmm13,xmm1
2704	pxor	xmm12,xmm0
2705DB	102,69,15,56,0,248
2706DB	102,69,15,56,0,240
2707DB	102,69,15,56,0,232
2708DB	102,69,15,56,0,224
2709	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2710	paddd	xmm11,xmm15
2711	paddd	xmm10,xmm14
2712	paddd	xmm9,xmm13
2713	paddd	xmm8,xmm12
2714	pxor	xmm7,xmm11
2715	pxor	xmm6,xmm10
2716	pxor	xmm5,xmm9
2717	pxor	xmm4,xmm8
2718	movdqa	XMMWORD[(160+80)+rbp],xmm8
2719	movdqa	xmm8,xmm7
2720	psrld	xmm8,25
2721	pslld	xmm7,32-25
2722	pxor	xmm7,xmm8
2723	movdqa	xmm8,xmm6
2724	psrld	xmm8,25
2725	pslld	xmm6,32-25
2726	pxor	xmm6,xmm8
2727	movdqa	xmm8,xmm5
2728	psrld	xmm8,25
2729	pslld	xmm5,32-25
2730	pxor	xmm5,xmm8
2731	movdqa	xmm8,xmm4
2732	psrld	xmm8,25
2733	pslld	xmm4,32-25
2734	pxor	xmm4,xmm8
2735	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2736DB	102,15,58,15,255,12
2737DB	102,69,15,58,15,219,8
2738DB	102,69,15,58,15,255,4
2739DB	102,15,58,15,246,12
2740DB	102,69,15,58,15,210,8
2741DB	102,69,15,58,15,246,4
2742DB	102,15,58,15,237,12
2743DB	102,69,15,58,15,201,8
2744DB	102,69,15,58,15,237,4
2745DB	102,15,58,15,228,12
2746DB	102,69,15,58,15,192,8
2747DB	102,69,15,58,15,228,4
2748
2749	lea	rdi,[16+rdi]
2750	dec	r8
2751	jge	NEAR $L$seal_sse_main_rounds
2752	add	r10,QWORD[((0+0))+rdi]
2753	adc	r11,QWORD[((8+0))+rdi]
2754	adc	r12,1
2755	mov	rax,QWORD[((0+160+0))+rbp]
2756	mov	r15,rax
2757	mul	r10
2758	mov	r13,rax
2759	mov	r14,rdx
2760	mov	rax,QWORD[((0+160+0))+rbp]
2761	mul	r11
2762	imul	r15,r12
2763	add	r14,rax
2764	adc	r15,rdx
2765	mov	rax,QWORD[((8+160+0))+rbp]
2766	mov	r9,rax
2767	mul	r10
2768	add	r14,rax
2769	adc	rdx,0
2770	mov	r10,rdx
2771	mov	rax,QWORD[((8+160+0))+rbp]
2772	mul	r11
2773	add	r15,rax
2774	adc	rdx,0
2775	imul	r9,r12
2776	add	r15,r10
2777	adc	r9,rdx
2778	mov	r10,r13
2779	mov	r11,r14
2780	mov	r12,r15
2781	and	r12,3
2782	mov	r13,r15
2783	and	r13,-4
2784	mov	r14,r9
2785	shrd	r15,r9,2
2786	shr	r9,2
2787	add	r15,r13
2788	adc	r9,r14
2789	add	r10,r15
2790	adc	r11,r9
2791	adc	r12,0
2792
2793	lea	rdi,[16+rdi]
2794	dec	rcx
2795	jg	NEAR $L$seal_sse_main_rounds
2796	paddd	xmm3,XMMWORD[$L$chacha20_consts]
2797	paddd	xmm7,XMMWORD[((160+48))+rbp]
2798	paddd	xmm11,XMMWORD[((160+64))+rbp]
2799	paddd	xmm15,XMMWORD[((160+144))+rbp]
2800	paddd	xmm2,XMMWORD[$L$chacha20_consts]
2801	paddd	xmm6,XMMWORD[((160+48))+rbp]
2802	paddd	xmm10,XMMWORD[((160+64))+rbp]
2803	paddd	xmm14,XMMWORD[((160+128))+rbp]
2804	paddd	xmm1,XMMWORD[$L$chacha20_consts]
2805	paddd	xmm5,XMMWORD[((160+48))+rbp]
2806	paddd	xmm9,XMMWORD[((160+64))+rbp]
2807	paddd	xmm13,XMMWORD[((160+112))+rbp]
2808	paddd	xmm0,XMMWORD[$L$chacha20_consts]
2809	paddd	xmm4,XMMWORD[((160+48))+rbp]
2810	paddd	xmm8,XMMWORD[((160+64))+rbp]
2811	paddd	xmm12,XMMWORD[((160+96))+rbp]
2812
2813	movdqa	XMMWORD[(160+80)+rbp],xmm14
2814	movdqa	XMMWORD[(160+80)+rbp],xmm14
2815	movdqu	xmm14,XMMWORD[((0 + 0))+rsi]
2816	pxor	xmm14,xmm3
2817	movdqu	XMMWORD[(0 + 0)+rdi],xmm14
2818	movdqu	xmm14,XMMWORD[((16 + 0))+rsi]
2819	pxor	xmm14,xmm7
2820	movdqu	XMMWORD[(16 + 0)+rdi],xmm14
2821	movdqu	xmm14,XMMWORD[((32 + 0))+rsi]
2822	pxor	xmm14,xmm11
2823	movdqu	XMMWORD[(32 + 0)+rdi],xmm14
2824	movdqu	xmm14,XMMWORD[((48 + 0))+rsi]
2825	pxor	xmm14,xmm15
2826	movdqu	XMMWORD[(48 + 0)+rdi],xmm14
2827
2828	movdqa	xmm14,XMMWORD[((160+80))+rbp]
2829	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
2830	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
2831	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
2832	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
2833	pxor	xmm2,xmm3
2834	pxor	xmm6,xmm7
2835	pxor	xmm10,xmm11
2836	pxor	xmm15,xmm14
2837	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
2838	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
2839	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
2840	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
2841	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
2842	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
2843	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
2844	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
2845	pxor	xmm1,xmm3
2846	pxor	xmm5,xmm7
2847	pxor	xmm9,xmm11
2848	pxor	xmm15,xmm13
2849	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
2850	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
2851	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
2852	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
2853
2854	cmp	rbx,16*16
2855	ja	NEAR $L$seal_sse_main_loop_xor
2856
2857	mov	rcx,12*16
2858	sub	rbx,12*16
2859	lea	rsi,[192+rsi]
2860	jmp	NEAR $L$seal_sse_128_tail_hash
2861$L$seal_sse_main_loop_xor:
2862	movdqu	xmm3,XMMWORD[((0 + 192))+rsi]
2863	movdqu	xmm7,XMMWORD[((16 + 192))+rsi]
2864	movdqu	xmm11,XMMWORD[((32 + 192))+rsi]
2865	movdqu	xmm15,XMMWORD[((48 + 192))+rsi]
2866	pxor	xmm0,xmm3
2867	pxor	xmm4,xmm7
2868	pxor	xmm8,xmm11
2869	pxor	xmm15,xmm12
2870	movdqu	XMMWORD[(0 + 192)+rdi],xmm0
2871	movdqu	XMMWORD[(16 + 192)+rdi],xmm4
2872	movdqu	XMMWORD[(32 + 192)+rdi],xmm8
2873	movdqu	XMMWORD[(48 + 192)+rdi],xmm15
2874
2875	lea	rsi,[256+rsi]
2876	sub	rbx,16*16
2877	mov	rcx,6
2878	mov	r8,4
2879	cmp	rbx,12*16
2880	jg	NEAR $L$seal_sse_main_loop
2881	mov	rcx,rbx
2882	test	rbx,rbx
2883	je	NEAR $L$seal_sse_128_tail_hash
2884	mov	rcx,6
2885	cmp	rbx,8*16
2886	ja	NEAR $L$seal_sse_tail_192
2887	cmp	rbx,4*16
2888	ja	NEAR $L$seal_sse_tail_128
2889
2890$L$seal_sse_tail_64:
2891	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
2892	movdqa	xmm4,XMMWORD[((160+48))+rbp]
2893	movdqa	xmm8,XMMWORD[((160+64))+rbp]
2894	movdqa	xmm12,XMMWORD[((160+96))+rbp]
2895	paddd	xmm12,XMMWORD[$L$sse_inc]
2896	movdqa	XMMWORD[(160+96)+rbp],xmm12
2897
2898$L$seal_sse_tail_64_rounds_and_x2hash:
2899	add	r10,QWORD[((0+0))+rdi]
2900	adc	r11,QWORD[((8+0))+rdi]
2901	adc	r12,1
2902	mov	rax,QWORD[((0+160+0))+rbp]
2903	mov	r15,rax
2904	mul	r10
2905	mov	r13,rax
2906	mov	r14,rdx
2907	mov	rax,QWORD[((0+160+0))+rbp]
2908	mul	r11
2909	imul	r15,r12
2910	add	r14,rax
2911	adc	r15,rdx
2912	mov	rax,QWORD[((8+160+0))+rbp]
2913	mov	r9,rax
2914	mul	r10
2915	add	r14,rax
2916	adc	rdx,0
2917	mov	r10,rdx
2918	mov	rax,QWORD[((8+160+0))+rbp]
2919	mul	r11
2920	add	r15,rax
2921	adc	rdx,0
2922	imul	r9,r12
2923	add	r15,r10
2924	adc	r9,rdx
2925	mov	r10,r13
2926	mov	r11,r14
2927	mov	r12,r15
2928	and	r12,3
2929	mov	r13,r15
2930	and	r13,-4
2931	mov	r14,r9
2932	shrd	r15,r9,2
2933	shr	r9,2
2934	add	r15,r13
2935	adc	r9,r14
2936	add	r10,r15
2937	adc	r11,r9
2938	adc	r12,0
2939
2940	lea	rdi,[16+rdi]
2941$L$seal_sse_tail_64_rounds_and_x1hash:
2942	paddd	xmm0,xmm4
2943	pxor	xmm12,xmm0
2944	pshufb	xmm12,XMMWORD[$L$rol16]
2945	paddd	xmm8,xmm12
2946	pxor	xmm4,xmm8
2947	movdqa	xmm3,xmm4
2948	pslld	xmm3,12
2949	psrld	xmm4,20
2950	pxor	xmm4,xmm3
2951	paddd	xmm0,xmm4
2952	pxor	xmm12,xmm0
2953	pshufb	xmm12,XMMWORD[$L$rol8]
2954	paddd	xmm8,xmm12
2955	pxor	xmm4,xmm8
2956	movdqa	xmm3,xmm4
2957	pslld	xmm3,7
2958	psrld	xmm4,25
2959	pxor	xmm4,xmm3
2960DB	102,15,58,15,228,4
2961DB	102,69,15,58,15,192,8
2962DB	102,69,15,58,15,228,12
2963	paddd	xmm0,xmm4
2964	pxor	xmm12,xmm0
2965	pshufb	xmm12,XMMWORD[$L$rol16]
2966	paddd	xmm8,xmm12
2967	pxor	xmm4,xmm8
2968	movdqa	xmm3,xmm4
2969	pslld	xmm3,12
2970	psrld	xmm4,20
2971	pxor	xmm4,xmm3
2972	paddd	xmm0,xmm4
2973	pxor	xmm12,xmm0
2974	pshufb	xmm12,XMMWORD[$L$rol8]
2975	paddd	xmm8,xmm12
2976	pxor	xmm4,xmm8
2977	movdqa	xmm3,xmm4
2978	pslld	xmm3,7
2979	psrld	xmm4,25
2980	pxor	xmm4,xmm3
2981DB	102,15,58,15,228,12
2982DB	102,69,15,58,15,192,8
2983DB	102,69,15,58,15,228,4
2984	add	r10,QWORD[((0+0))+rdi]
2985	adc	r11,QWORD[((8+0))+rdi]
2986	adc	r12,1
2987	mov	rax,QWORD[((0+160+0))+rbp]
2988	mov	r15,rax
2989	mul	r10
2990	mov	r13,rax
2991	mov	r14,rdx
2992	mov	rax,QWORD[((0+160+0))+rbp]
2993	mul	r11
2994	imul	r15,r12
2995	add	r14,rax
2996	adc	r15,rdx
2997	mov	rax,QWORD[((8+160+0))+rbp]
2998	mov	r9,rax
2999	mul	r10
3000	add	r14,rax
3001	adc	rdx,0
3002	mov	r10,rdx
3003	mov	rax,QWORD[((8+160+0))+rbp]
3004	mul	r11
3005	add	r15,rax
3006	adc	rdx,0
3007	imul	r9,r12
3008	add	r15,r10
3009	adc	r9,rdx
3010	mov	r10,r13
3011	mov	r11,r14
3012	mov	r12,r15
3013	and	r12,3
3014	mov	r13,r15
3015	and	r13,-4
3016	mov	r14,r9
3017	shrd	r15,r9,2
3018	shr	r9,2
3019	add	r15,r13
3020	adc	r9,r14
3021	add	r10,r15
3022	adc	r11,r9
3023	adc	r12,0
3024
3025	lea	rdi,[16+rdi]
3026	dec	rcx
3027	jg	NEAR $L$seal_sse_tail_64_rounds_and_x2hash
3028	dec	r8
3029	jge	NEAR $L$seal_sse_tail_64_rounds_and_x1hash
3030	paddd	xmm0,XMMWORD[$L$chacha20_consts]
3031	paddd	xmm4,XMMWORD[((160+48))+rbp]
3032	paddd	xmm8,XMMWORD[((160+64))+rbp]
3033	paddd	xmm12,XMMWORD[((160+96))+rbp]
3034
3035	jmp	NEAR $L$seal_sse_128_tail_xor
3036
3037$L$seal_sse_tail_128:
3038	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
3039	movdqa	xmm4,XMMWORD[((160+48))+rbp]
3040	movdqa	xmm8,XMMWORD[((160+64))+rbp]
3041	movdqa	xmm1,xmm0
3042	movdqa	xmm5,xmm4
3043	movdqa	xmm9,xmm8
3044	movdqa	xmm13,XMMWORD[((160+96))+rbp]
3045	paddd	xmm13,XMMWORD[$L$sse_inc]
3046	movdqa	xmm12,xmm13
3047	paddd	xmm12,XMMWORD[$L$sse_inc]
3048	movdqa	XMMWORD[(160+96)+rbp],xmm12
3049	movdqa	XMMWORD[(160+112)+rbp],xmm13
3050
3051$L$seal_sse_tail_128_rounds_and_x2hash:
3052	add	r10,QWORD[((0+0))+rdi]
3053	adc	r11,QWORD[((8+0))+rdi]
3054	adc	r12,1
3055	mov	rax,QWORD[((0+160+0))+rbp]
3056	mov	r15,rax
3057	mul	r10
3058	mov	r13,rax
3059	mov	r14,rdx
3060	mov	rax,QWORD[((0+160+0))+rbp]
3061	mul	r11
3062	imul	r15,r12
3063	add	r14,rax
3064	adc	r15,rdx
3065	mov	rax,QWORD[((8+160+0))+rbp]
3066	mov	r9,rax
3067	mul	r10
3068	add	r14,rax
3069	adc	rdx,0
3070	mov	r10,rdx
3071	mov	rax,QWORD[((8+160+0))+rbp]
3072	mul	r11
3073	add	r15,rax
3074	adc	rdx,0
3075	imul	r9,r12
3076	add	r15,r10
3077	adc	r9,rdx
3078	mov	r10,r13
3079	mov	r11,r14
3080	mov	r12,r15
3081	and	r12,3
3082	mov	r13,r15
3083	and	r13,-4
3084	mov	r14,r9
3085	shrd	r15,r9,2
3086	shr	r9,2
3087	add	r15,r13
3088	adc	r9,r14
3089	add	r10,r15
3090	adc	r11,r9
3091	adc	r12,0
3092
3093	lea	rdi,[16+rdi]
3094$L$seal_sse_tail_128_rounds_and_x1hash:
3095	paddd	xmm0,xmm4
3096	pxor	xmm12,xmm0
3097	pshufb	xmm12,XMMWORD[$L$rol16]
3098	paddd	xmm8,xmm12
3099	pxor	xmm4,xmm8
3100	movdqa	xmm3,xmm4
3101	pslld	xmm3,12
3102	psrld	xmm4,20
3103	pxor	xmm4,xmm3
3104	paddd	xmm0,xmm4
3105	pxor	xmm12,xmm0
3106	pshufb	xmm12,XMMWORD[$L$rol8]
3107	paddd	xmm8,xmm12
3108	pxor	xmm4,xmm8
3109	movdqa	xmm3,xmm4
3110	pslld	xmm3,7
3111	psrld	xmm4,25
3112	pxor	xmm4,xmm3
3113DB	102,15,58,15,228,4
3114DB	102,69,15,58,15,192,8
3115DB	102,69,15,58,15,228,12
3116	paddd	xmm1,xmm5
3117	pxor	xmm13,xmm1
3118	pshufb	xmm13,XMMWORD[$L$rol16]
3119	paddd	xmm9,xmm13
3120	pxor	xmm5,xmm9
3121	movdqa	xmm3,xmm5
3122	pslld	xmm3,12
3123	psrld	xmm5,20
3124	pxor	xmm5,xmm3
3125	paddd	xmm1,xmm5
3126	pxor	xmm13,xmm1
3127	pshufb	xmm13,XMMWORD[$L$rol8]
3128	paddd	xmm9,xmm13
3129	pxor	xmm5,xmm9
3130	movdqa	xmm3,xmm5
3131	pslld	xmm3,7
3132	psrld	xmm5,25
3133	pxor	xmm5,xmm3
3134DB	102,15,58,15,237,4
3135DB	102,69,15,58,15,201,8
3136DB	102,69,15,58,15,237,12
3137	add	r10,QWORD[((0+0))+rdi]
3138	adc	r11,QWORD[((8+0))+rdi]
3139	adc	r12,1
3140	mov	rax,QWORD[((0+160+0))+rbp]
3141	mov	r15,rax
3142	mul	r10
3143	mov	r13,rax
3144	mov	r14,rdx
3145	mov	rax,QWORD[((0+160+0))+rbp]
3146	mul	r11
3147	imul	r15,r12
3148	add	r14,rax
3149	adc	r15,rdx
3150	mov	rax,QWORD[((8+160+0))+rbp]
3151	mov	r9,rax
3152	mul	r10
3153	add	r14,rax
3154	adc	rdx,0
3155	mov	r10,rdx
3156	mov	rax,QWORD[((8+160+0))+rbp]
3157	mul	r11
3158	add	r15,rax
3159	adc	rdx,0
3160	imul	r9,r12
3161	add	r15,r10
3162	adc	r9,rdx
3163	mov	r10,r13
3164	mov	r11,r14
3165	mov	r12,r15
3166	and	r12,3
3167	mov	r13,r15
3168	and	r13,-4
3169	mov	r14,r9
3170	shrd	r15,r9,2
3171	shr	r9,2
3172	add	r15,r13
3173	adc	r9,r14
3174	add	r10,r15
3175	adc	r11,r9
3176	adc	r12,0
3177	paddd	xmm0,xmm4
3178	pxor	xmm12,xmm0
3179	pshufb	xmm12,XMMWORD[$L$rol16]
3180	paddd	xmm8,xmm12
3181	pxor	xmm4,xmm8
3182	movdqa	xmm3,xmm4
3183	pslld	xmm3,12
3184	psrld	xmm4,20
3185	pxor	xmm4,xmm3
3186	paddd	xmm0,xmm4
3187	pxor	xmm12,xmm0
3188	pshufb	xmm12,XMMWORD[$L$rol8]
3189	paddd	xmm8,xmm12
3190	pxor	xmm4,xmm8
3191	movdqa	xmm3,xmm4
3192	pslld	xmm3,7
3193	psrld	xmm4,25
3194	pxor	xmm4,xmm3
3195DB	102,15,58,15,228,12
3196DB	102,69,15,58,15,192,8
3197DB	102,69,15,58,15,228,4
3198	paddd	xmm1,xmm5
3199	pxor	xmm13,xmm1
3200	pshufb	xmm13,XMMWORD[$L$rol16]
3201	paddd	xmm9,xmm13
3202	pxor	xmm5,xmm9
3203	movdqa	xmm3,xmm5
3204	pslld	xmm3,12
3205	psrld	xmm5,20
3206	pxor	xmm5,xmm3
3207	paddd	xmm1,xmm5
3208	pxor	xmm13,xmm1
3209	pshufb	xmm13,XMMWORD[$L$rol8]
3210	paddd	xmm9,xmm13
3211	pxor	xmm5,xmm9
3212	movdqa	xmm3,xmm5
3213	pslld	xmm3,7
3214	psrld	xmm5,25
3215	pxor	xmm5,xmm3
3216DB	102,15,58,15,237,12
3217DB	102,69,15,58,15,201,8
3218DB	102,69,15,58,15,237,4
3219
3220	lea	rdi,[16+rdi]
3221	dec	rcx
3222	jg	NEAR $L$seal_sse_tail_128_rounds_and_x2hash
3223	dec	r8
3224	jge	NEAR $L$seal_sse_tail_128_rounds_and_x1hash
3225	paddd	xmm1,XMMWORD[$L$chacha20_consts]
3226	paddd	xmm5,XMMWORD[((160+48))+rbp]
3227	paddd	xmm9,XMMWORD[((160+64))+rbp]
3228	paddd	xmm13,XMMWORD[((160+112))+rbp]
3229	paddd	xmm0,XMMWORD[$L$chacha20_consts]
3230	paddd	xmm4,XMMWORD[((160+48))+rbp]
3231	paddd	xmm8,XMMWORD[((160+64))+rbp]
3232	paddd	xmm12,XMMWORD[((160+96))+rbp]
3233	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
3234	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
3235	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
3236	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
3237	pxor	xmm1,xmm3
3238	pxor	xmm5,xmm7
3239	pxor	xmm9,xmm11
3240	pxor	xmm15,xmm13
3241	movdqu	XMMWORD[(0 + 0)+rdi],xmm1
3242	movdqu	XMMWORD[(16 + 0)+rdi],xmm5
3243	movdqu	XMMWORD[(32 + 0)+rdi],xmm9
3244	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
3245
3246	mov	rcx,4*16
3247	sub	rbx,4*16
3248	lea	rsi,[64+rsi]
3249	jmp	NEAR $L$seal_sse_128_tail_hash
3250
3251$L$seal_sse_tail_192:
3252	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
3253	movdqa	xmm4,XMMWORD[((160+48))+rbp]
3254	movdqa	xmm8,XMMWORD[((160+64))+rbp]
3255	movdqa	xmm1,xmm0
3256	movdqa	xmm5,xmm4
3257	movdqa	xmm9,xmm8
3258	movdqa	xmm2,xmm0
3259	movdqa	xmm6,xmm4
3260	movdqa	xmm10,xmm8
3261	movdqa	xmm14,XMMWORD[((160+96))+rbp]
3262	paddd	xmm14,XMMWORD[$L$sse_inc]
3263	movdqa	xmm13,xmm14
3264	paddd	xmm13,XMMWORD[$L$sse_inc]
3265	movdqa	xmm12,xmm13
3266	paddd	xmm12,XMMWORD[$L$sse_inc]
3267	movdqa	XMMWORD[(160+96)+rbp],xmm12
3268	movdqa	XMMWORD[(160+112)+rbp],xmm13
3269	movdqa	XMMWORD[(160+128)+rbp],xmm14
3270
3271$L$seal_sse_tail_192_rounds_and_x2hash:
3272	add	r10,QWORD[((0+0))+rdi]
3273	adc	r11,QWORD[((8+0))+rdi]
3274	adc	r12,1
3275	mov	rax,QWORD[((0+160+0))+rbp]
3276	mov	r15,rax
3277	mul	r10
3278	mov	r13,rax
3279	mov	r14,rdx
3280	mov	rax,QWORD[((0+160+0))+rbp]
3281	mul	r11
3282	imul	r15,r12
3283	add	r14,rax
3284	adc	r15,rdx
3285	mov	rax,QWORD[((8+160+0))+rbp]
3286	mov	r9,rax
3287	mul	r10
3288	add	r14,rax
3289	adc	rdx,0
3290	mov	r10,rdx
3291	mov	rax,QWORD[((8+160+0))+rbp]
3292	mul	r11
3293	add	r15,rax
3294	adc	rdx,0
3295	imul	r9,r12
3296	add	r15,r10
3297	adc	r9,rdx
3298	mov	r10,r13
3299	mov	r11,r14
3300	mov	r12,r15
3301	and	r12,3
3302	mov	r13,r15
3303	and	r13,-4
3304	mov	r14,r9
3305	shrd	r15,r9,2
3306	shr	r9,2
3307	add	r15,r13
3308	adc	r9,r14
3309	add	r10,r15
3310	adc	r11,r9
3311	adc	r12,0
3312
3313	lea	rdi,[16+rdi]
3314$L$seal_sse_tail_192_rounds_and_x1hash:
3315	paddd	xmm0,xmm4
3316	pxor	xmm12,xmm0
3317	pshufb	xmm12,XMMWORD[$L$rol16]
3318	paddd	xmm8,xmm12
3319	pxor	xmm4,xmm8
3320	movdqa	xmm3,xmm4
3321	pslld	xmm3,12
3322	psrld	xmm4,20
3323	pxor	xmm4,xmm3
3324	paddd	xmm0,xmm4
3325	pxor	xmm12,xmm0
3326	pshufb	xmm12,XMMWORD[$L$rol8]
3327	paddd	xmm8,xmm12
3328	pxor	xmm4,xmm8
3329	movdqa	xmm3,xmm4
3330	pslld	xmm3,7
3331	psrld	xmm4,25
3332	pxor	xmm4,xmm3
3333DB	102,15,58,15,228,4
3334DB	102,69,15,58,15,192,8
3335DB	102,69,15,58,15,228,12
3336	paddd	xmm1,xmm5
3337	pxor	xmm13,xmm1
3338	pshufb	xmm13,XMMWORD[$L$rol16]
3339	paddd	xmm9,xmm13
3340	pxor	xmm5,xmm9
3341	movdqa	xmm3,xmm5
3342	pslld	xmm3,12
3343	psrld	xmm5,20
3344	pxor	xmm5,xmm3
3345	paddd	xmm1,xmm5
3346	pxor	xmm13,xmm1
3347	pshufb	xmm13,XMMWORD[$L$rol8]
3348	paddd	xmm9,xmm13
3349	pxor	xmm5,xmm9
3350	movdqa	xmm3,xmm5
3351	pslld	xmm3,7
3352	psrld	xmm5,25
3353	pxor	xmm5,xmm3
3354DB	102,15,58,15,237,4
3355DB	102,69,15,58,15,201,8
3356DB	102,69,15,58,15,237,12
3357	paddd	xmm2,xmm6
3358	pxor	xmm14,xmm2
3359	pshufb	xmm14,XMMWORD[$L$rol16]
3360	paddd	xmm10,xmm14
3361	pxor	xmm6,xmm10
3362	movdqa	xmm3,xmm6
3363	pslld	xmm3,12
3364	psrld	xmm6,20
3365	pxor	xmm6,xmm3
3366	paddd	xmm2,xmm6
3367	pxor	xmm14,xmm2
3368	pshufb	xmm14,XMMWORD[$L$rol8]
3369	paddd	xmm10,xmm14
3370	pxor	xmm6,xmm10
3371	movdqa	xmm3,xmm6
3372	pslld	xmm3,7
3373	psrld	xmm6,25
3374	pxor	xmm6,xmm3
3375DB	102,15,58,15,246,4
3376DB	102,69,15,58,15,210,8
3377DB	102,69,15,58,15,246,12
3378	add	r10,QWORD[((0+0))+rdi]
3379	adc	r11,QWORD[((8+0))+rdi]
3380	adc	r12,1
3381	mov	rax,QWORD[((0+160+0))+rbp]
3382	mov	r15,rax
3383	mul	r10
3384	mov	r13,rax
3385	mov	r14,rdx
3386	mov	rax,QWORD[((0+160+0))+rbp]
3387	mul	r11
3388	imul	r15,r12
3389	add	r14,rax
3390	adc	r15,rdx
3391	mov	rax,QWORD[((8+160+0))+rbp]
3392	mov	r9,rax
3393	mul	r10
3394	add	r14,rax
3395	adc	rdx,0
3396	mov	r10,rdx
3397	mov	rax,QWORD[((8+160+0))+rbp]
3398	mul	r11
3399	add	r15,rax
3400	adc	rdx,0
3401	imul	r9,r12
3402	add	r15,r10
3403	adc	r9,rdx
3404	mov	r10,r13
3405	mov	r11,r14
3406	mov	r12,r15
3407	and	r12,3
3408	mov	r13,r15
3409	and	r13,-4
3410	mov	r14,r9
3411	shrd	r15,r9,2
3412	shr	r9,2
3413	add	r15,r13
3414	adc	r9,r14
3415	add	r10,r15
3416	adc	r11,r9
3417	adc	r12,0
3418	paddd	xmm0,xmm4
3419	pxor	xmm12,xmm0
3420	pshufb	xmm12,XMMWORD[$L$rol16]
3421	paddd	xmm8,xmm12
3422	pxor	xmm4,xmm8
3423	movdqa	xmm3,xmm4
3424	pslld	xmm3,12
3425	psrld	xmm4,20
3426	pxor	xmm4,xmm3
3427	paddd	xmm0,xmm4
3428	pxor	xmm12,xmm0
3429	pshufb	xmm12,XMMWORD[$L$rol8]
3430	paddd	xmm8,xmm12
3431	pxor	xmm4,xmm8
3432	movdqa	xmm3,xmm4
3433	pslld	xmm3,7
3434	psrld	xmm4,25
3435	pxor	xmm4,xmm3
3436DB	102,15,58,15,228,12
3437DB	102,69,15,58,15,192,8
3438DB	102,69,15,58,15,228,4
3439	paddd	xmm1,xmm5
3440	pxor	xmm13,xmm1
3441	pshufb	xmm13,XMMWORD[$L$rol16]
3442	paddd	xmm9,xmm13
3443	pxor	xmm5,xmm9
3444	movdqa	xmm3,xmm5
3445	pslld	xmm3,12
3446	psrld	xmm5,20
3447	pxor	xmm5,xmm3
3448	paddd	xmm1,xmm5
3449	pxor	xmm13,xmm1
3450	pshufb	xmm13,XMMWORD[$L$rol8]
3451	paddd	xmm9,xmm13
3452	pxor	xmm5,xmm9
3453	movdqa	xmm3,xmm5
3454	pslld	xmm3,7
3455	psrld	xmm5,25
3456	pxor	xmm5,xmm3
3457DB	102,15,58,15,237,12
3458DB	102,69,15,58,15,201,8
3459DB	102,69,15,58,15,237,4
3460	paddd	xmm2,xmm6
3461	pxor	xmm14,xmm2
3462	pshufb	xmm14,XMMWORD[$L$rol16]
3463	paddd	xmm10,xmm14
3464	pxor	xmm6,xmm10
3465	movdqa	xmm3,xmm6
3466	pslld	xmm3,12
3467	psrld	xmm6,20
3468	pxor	xmm6,xmm3
3469	paddd	xmm2,xmm6
3470	pxor	xmm14,xmm2
3471	pshufb	xmm14,XMMWORD[$L$rol8]
3472	paddd	xmm10,xmm14
3473	pxor	xmm6,xmm10
3474	movdqa	xmm3,xmm6
3475	pslld	xmm3,7
3476	psrld	xmm6,25
3477	pxor	xmm6,xmm3
3478DB	102,15,58,15,246,12
3479DB	102,69,15,58,15,210,8
3480DB	102,69,15,58,15,246,4
3481
3482	lea	rdi,[16+rdi]
3483	dec	rcx
3484	jg	NEAR $L$seal_sse_tail_192_rounds_and_x2hash
3485	dec	r8
3486	jge	NEAR $L$seal_sse_tail_192_rounds_and_x1hash
3487	paddd	xmm2,XMMWORD[$L$chacha20_consts]
3488	paddd	xmm6,XMMWORD[((160+48))+rbp]
3489	paddd	xmm10,XMMWORD[((160+64))+rbp]
3490	paddd	xmm14,XMMWORD[((160+128))+rbp]
3491	paddd	xmm1,XMMWORD[$L$chacha20_consts]
3492	paddd	xmm5,XMMWORD[((160+48))+rbp]
3493	paddd	xmm9,XMMWORD[((160+64))+rbp]
3494	paddd	xmm13,XMMWORD[((160+112))+rbp]
3495	paddd	xmm0,XMMWORD[$L$chacha20_consts]
3496	paddd	xmm4,XMMWORD[((160+48))+rbp]
3497	paddd	xmm8,XMMWORD[((160+64))+rbp]
3498	paddd	xmm12,XMMWORD[((160+96))+rbp]
3499	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
3500	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
3501	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
3502	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
3503	pxor	xmm2,xmm3
3504	pxor	xmm6,xmm7
3505	pxor	xmm10,xmm11
3506	pxor	xmm15,xmm14
3507	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
3508	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
3509	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
3510	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
3511	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
3512	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
3513	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
3514	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
3515	pxor	xmm1,xmm3
3516	pxor	xmm5,xmm7
3517	pxor	xmm9,xmm11
3518	pxor	xmm15,xmm13
3519	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
3520	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
3521	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
3522	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
3523
3524	mov	rcx,8*16
3525	sub	rbx,8*16
3526	lea	rsi,[128+rsi]
3527
3528$L$seal_sse_128_tail_hash:
3529	cmp	rcx,16
3530	jb	NEAR $L$seal_sse_128_tail_xor
3531	add	r10,QWORD[((0+0))+rdi]
3532	adc	r11,QWORD[((8+0))+rdi]
3533	adc	r12,1
3534	mov	rax,QWORD[((0+160+0))+rbp]
3535	mov	r15,rax
3536	mul	r10
3537	mov	r13,rax
3538	mov	r14,rdx
3539	mov	rax,QWORD[((0+160+0))+rbp]
3540	mul	r11
3541	imul	r15,r12
3542	add	r14,rax
3543	adc	r15,rdx
3544	mov	rax,QWORD[((8+160+0))+rbp]
3545	mov	r9,rax
3546	mul	r10
3547	add	r14,rax
3548	adc	rdx,0
3549	mov	r10,rdx
3550	mov	rax,QWORD[((8+160+0))+rbp]
3551	mul	r11
3552	add	r15,rax
3553	adc	rdx,0
3554	imul	r9,r12
3555	add	r15,r10
3556	adc	r9,rdx
3557	mov	r10,r13
3558	mov	r11,r14
3559	mov	r12,r15
3560	and	r12,3
3561	mov	r13,r15
3562	and	r13,-4
3563	mov	r14,r9
3564	shrd	r15,r9,2
3565	shr	r9,2
3566	add	r15,r13
3567	adc	r9,r14
3568	add	r10,r15
3569	adc	r11,r9
3570	adc	r12,0
3571
3572	sub	rcx,16
3573	lea	rdi,[16+rdi]
3574	jmp	NEAR $L$seal_sse_128_tail_hash
3575
3576$L$seal_sse_128_tail_xor:
3577	cmp	rbx,16
3578	jb	NEAR $L$seal_sse_tail_16
3579	sub	rbx,16
3580
3581	movdqu	xmm3,XMMWORD[rsi]
3582	pxor	xmm0,xmm3
3583	movdqu	XMMWORD[rdi],xmm0
3584
3585	add	r10,QWORD[rdi]
3586	adc	r11,QWORD[8+rdi]
3587	adc	r12,1
3588	lea	rsi,[16+rsi]
3589	lea	rdi,[16+rdi]
3590	mov	rax,QWORD[((0+160+0))+rbp]
3591	mov	r15,rax
3592	mul	r10
3593	mov	r13,rax
3594	mov	r14,rdx
3595	mov	rax,QWORD[((0+160+0))+rbp]
3596	mul	r11
3597	imul	r15,r12
3598	add	r14,rax
3599	adc	r15,rdx
3600	mov	rax,QWORD[((8+160+0))+rbp]
3601	mov	r9,rax
3602	mul	r10
3603	add	r14,rax
3604	adc	rdx,0
3605	mov	r10,rdx
3606	mov	rax,QWORD[((8+160+0))+rbp]
3607	mul	r11
3608	add	r15,rax
3609	adc	rdx,0
3610	imul	r9,r12
3611	add	r15,r10
3612	adc	r9,rdx
3613	mov	r10,r13
3614	mov	r11,r14
3615	mov	r12,r15
3616	and	r12,3
3617	mov	r13,r15
3618	and	r13,-4
3619	mov	r14,r9
3620	shrd	r15,r9,2
3621	shr	r9,2
3622	add	r15,r13
3623	adc	r9,r14
3624	add	r10,r15
3625	adc	r11,r9
3626	adc	r12,0
3627
3628
3629	movdqa	xmm0,xmm4
3630	movdqa	xmm4,xmm8
3631	movdqa	xmm8,xmm12
3632	movdqa	xmm12,xmm1
3633	movdqa	xmm1,xmm5
3634	movdqa	xmm5,xmm9
3635	movdqa	xmm9,xmm13
3636	jmp	NEAR $L$seal_sse_128_tail_xor
3637
3638$L$seal_sse_tail_16:
3639	test	rbx,rbx
3640	jz	NEAR $L$process_blocks_of_extra_in
3641
3642	mov	r8,rbx
3643	mov	rcx,rbx
3644	lea	rsi,[((-1))+rbx*1+rsi]
3645	pxor	xmm15,xmm15
3646$L$seal_sse_tail_16_compose:
3647	pslldq	xmm15,1
3648	pinsrb	xmm15,BYTE[rsi],0
3649	lea	rsi,[((-1))+rsi]
3650	dec	rcx
3651	jne	NEAR $L$seal_sse_tail_16_compose
3652
3653
3654	pxor	xmm15,xmm0
3655
3656
3657	mov	rcx,rbx
3658	movdqu	xmm0,xmm15
3659$L$seal_sse_tail_16_extract:
3660	pextrb	XMMWORD[rdi],xmm0,0
3661	psrldq	xmm0,1
3662	add	rdi,1
3663	sub	rcx,1
3664	jnz	NEAR $L$seal_sse_tail_16_extract
3665
3666
3667
3668
3669
3670
3671
3672
3673	mov	r9,QWORD[((288 + 160 + 32))+rsp]
3674	mov	r14,QWORD[56+r9]
3675	mov	r13,QWORD[48+r9]
3676	test	r14,r14
3677	jz	NEAR $L$process_partial_block
3678
3679	mov	r15,16
3680	sub	r15,rbx
3681	cmp	r14,r15
3682
3683	jge	NEAR $L$load_extra_in
3684	mov	r15,r14
3685
3686$L$load_extra_in:
3687
3688
3689	lea	rsi,[((-1))+r15*1+r13]
3690
3691
3692	add	r13,r15
3693	sub	r14,r15
3694	mov	QWORD[48+r9],r13
3695	mov	QWORD[56+r9],r14
3696
3697
3698
3699	add	r8,r15
3700
3701
3702	pxor	xmm11,xmm11
3703$L$load_extra_load_loop:
3704	pslldq	xmm11,1
3705	pinsrb	xmm11,BYTE[rsi],0
3706	lea	rsi,[((-1))+rsi]
3707	sub	r15,1
3708	jnz	NEAR $L$load_extra_load_loop
3709
3710
3711
3712
3713	mov	r15,rbx
3714
3715$L$load_extra_shift_loop:
3716	pslldq	xmm11,1
3717	sub	r15,1
3718	jnz	NEAR $L$load_extra_shift_loop
3719
3720
3721
3722
3723	lea	r15,[$L$and_masks]
3724	shl	rbx,4
3725	pand	xmm15,XMMWORD[((-16))+rbx*1+r15]
3726
3727
3728	por	xmm15,xmm11
3729
3730
3731
3732DB	102,77,15,126,253
3733	pextrq	r14,xmm15,1
3734	add	r10,r13
3735	adc	r11,r14
3736	adc	r12,1
3737	mov	rax,QWORD[((0+160+0))+rbp]
3738	mov	r15,rax
3739	mul	r10
3740	mov	r13,rax
3741	mov	r14,rdx
3742	mov	rax,QWORD[((0+160+0))+rbp]
3743	mul	r11
3744	imul	r15,r12
3745	add	r14,rax
3746	adc	r15,rdx
3747	mov	rax,QWORD[((8+160+0))+rbp]
3748	mov	r9,rax
3749	mul	r10
3750	add	r14,rax
3751	adc	rdx,0
3752	mov	r10,rdx
3753	mov	rax,QWORD[((8+160+0))+rbp]
3754	mul	r11
3755	add	r15,rax
3756	adc	rdx,0
3757	imul	r9,r12
3758	add	r15,r10
3759	adc	r9,rdx
3760	mov	r10,r13
3761	mov	r11,r14
3762	mov	r12,r15
3763	and	r12,3
3764	mov	r13,r15
3765	and	r13,-4
3766	mov	r14,r9
3767	shrd	r15,r9,2
3768	shr	r9,2
3769	add	r15,r13
3770	adc	r9,r14
3771	add	r10,r15
3772	adc	r11,r9
3773	adc	r12,0
3774
3775
3776$L$process_blocks_of_extra_in:
3777
3778	mov	r9,QWORD[((288+32+160 ))+rsp]
3779	mov	rsi,QWORD[48+r9]
3780	mov	r8,QWORD[56+r9]
3781	mov	rcx,r8
3782	shr	r8,4
3783
3784$L$process_extra_hash_loop:
3785	jz	NEAR process_extra_in_trailer
3786	add	r10,QWORD[((0+0))+rsi]
3787	adc	r11,QWORD[((8+0))+rsi]
3788	adc	r12,1
3789	mov	rax,QWORD[((0+160+0))+rbp]
3790	mov	r15,rax
3791	mul	r10
3792	mov	r13,rax
3793	mov	r14,rdx
3794	mov	rax,QWORD[((0+160+0))+rbp]
3795	mul	r11
3796	imul	r15,r12
3797	add	r14,rax
3798	adc	r15,rdx
3799	mov	rax,QWORD[((8+160+0))+rbp]
3800	mov	r9,rax
3801	mul	r10
3802	add	r14,rax
3803	adc	rdx,0
3804	mov	r10,rdx
3805	mov	rax,QWORD[((8+160+0))+rbp]
3806	mul	r11
3807	add	r15,rax
3808	adc	rdx,0
3809	imul	r9,r12
3810	add	r15,r10
3811	adc	r9,rdx
3812	mov	r10,r13
3813	mov	r11,r14
3814	mov	r12,r15
3815	and	r12,3
3816	mov	r13,r15
3817	and	r13,-4
3818	mov	r14,r9
3819	shrd	r15,r9,2
3820	shr	r9,2
3821	add	r15,r13
3822	adc	r9,r14
3823	add	r10,r15
3824	adc	r11,r9
3825	adc	r12,0
3826
3827	lea	rsi,[16+rsi]
3828	sub	r8,1
3829	jmp	NEAR $L$process_extra_hash_loop
3830process_extra_in_trailer:
3831	and	rcx,15
3832	mov	rbx,rcx
3833	jz	NEAR $L$do_length_block
3834	lea	rsi,[((-1))+rcx*1+rsi]
3835
3836$L$process_extra_in_trailer_load:
3837	pslldq	xmm15,1
3838	pinsrb	xmm15,BYTE[rsi],0
3839	lea	rsi,[((-1))+rsi]
3840	sub	rcx,1
3841	jnz	NEAR $L$process_extra_in_trailer_load
3842
3843$L$process_partial_block:
3844
3845	lea	r15,[$L$and_masks]
3846	shl	rbx,4
3847	pand	xmm15,XMMWORD[((-16))+rbx*1+r15]
3848DB	102,77,15,126,253
3849	pextrq	r14,xmm15,1
3850	add	r10,r13
3851	adc	r11,r14
3852	adc	r12,1
3853	mov	rax,QWORD[((0+160+0))+rbp]
3854	mov	r15,rax
3855	mul	r10
3856	mov	r13,rax
3857	mov	r14,rdx
3858	mov	rax,QWORD[((0+160+0))+rbp]
3859	mul	r11
3860	imul	r15,r12
3861	add	r14,rax
3862	adc	r15,rdx
3863	mov	rax,QWORD[((8+160+0))+rbp]
3864	mov	r9,rax
3865	mul	r10
3866	add	r14,rax
3867	adc	rdx,0
3868	mov	r10,rdx
3869	mov	rax,QWORD[((8+160+0))+rbp]
3870	mul	r11
3871	add	r15,rax
3872	adc	rdx,0
3873	imul	r9,r12
3874	add	r15,r10
3875	adc	r9,rdx
3876	mov	r10,r13
3877	mov	r11,r14
3878	mov	r12,r15
3879	and	r12,3
3880	mov	r13,r15
3881	and	r13,-4
3882	mov	r14,r9
3883	shrd	r15,r9,2
3884	shr	r9,2
3885	add	r15,r13
3886	adc	r9,r14
3887	add	r10,r15
3888	adc	r11,r9
3889	adc	r12,0
3890
3891
3892$L$do_length_block:
3893	add	r10,QWORD[((0+160+32))+rbp]
3894	adc	r11,QWORD[((8+160+32))+rbp]
3895	adc	r12,1
3896	mov	rax,QWORD[((0+160+0))+rbp]
3897	mov	r15,rax
3898	mul	r10
3899	mov	r13,rax
3900	mov	r14,rdx
3901	mov	rax,QWORD[((0+160+0))+rbp]
3902	mul	r11
3903	imul	r15,r12
3904	add	r14,rax
3905	adc	r15,rdx
3906	mov	rax,QWORD[((8+160+0))+rbp]
3907	mov	r9,rax
3908	mul	r10
3909	add	r14,rax
3910	adc	rdx,0
3911	mov	r10,rdx
3912	mov	rax,QWORD[((8+160+0))+rbp]
3913	mul	r11
3914	add	r15,rax
3915	adc	rdx,0
3916	imul	r9,r12
3917	add	r15,r10
3918	adc	r9,rdx
3919	mov	r10,r13
3920	mov	r11,r14
3921	mov	r12,r15
3922	and	r12,3
3923	mov	r13,r15
3924	and	r13,-4
3925	mov	r14,r9
3926	shrd	r15,r9,2
3927	shr	r9,2
3928	add	r15,r13
3929	adc	r9,r14
3930	add	r10,r15
3931	adc	r11,r9
3932	adc	r12,0
3933
3934
3935	mov	r13,r10
3936	mov	r14,r11
3937	mov	r15,r12
3938	sub	r10,-5
3939	sbb	r11,-1
3940	sbb	r12,3
3941	cmovc	r10,r13
3942	cmovc	r11,r14
3943	cmovc	r12,r15
3944
3945	add	r10,QWORD[((0+160+16))+rbp]
3946	adc	r11,QWORD[((8+160+16))+rbp]
3947
3948	movaps	xmm6,XMMWORD[((0+0))+rbp]
3949	movaps	xmm7,XMMWORD[((16+0))+rbp]
3950	movaps	xmm8,XMMWORD[((32+0))+rbp]
3951	movaps	xmm9,XMMWORD[((48+0))+rbp]
3952	movaps	xmm10,XMMWORD[((64+0))+rbp]
3953	movaps	xmm11,XMMWORD[((80+0))+rbp]
3954	movaps	xmm12,XMMWORD[((96+0))+rbp]
3955	movaps	xmm13,XMMWORD[((112+0))+rbp]
3956	movaps	xmm14,XMMWORD[((128+0))+rbp]
3957	movaps	xmm15,XMMWORD[((144+0))+rbp]
3958
3959
3960	add	rsp,288 + 160 + 32
3961
3962
3963	pop	r9
3964
3965	mov	QWORD[r9],r10
3966	mov	QWORD[8+r9],r11
3967	pop	r15
3968
3969	pop	r14
3970
3971	pop	r13
3972
3973	pop	r12
3974
3975	pop	rbx
3976
3977	pop	rbp
3978
3979	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
3980	mov	rsi,QWORD[16+rsp]
3981	ret
3982
3983$L$seal_sse_128:
3984
3985	movdqu	xmm0,XMMWORD[$L$chacha20_consts]
3986	movdqa	xmm1,xmm0
3987	movdqa	xmm2,xmm0
3988	movdqu	xmm4,XMMWORD[r9]
3989	movdqa	xmm5,xmm4
3990	movdqa	xmm6,xmm4
3991	movdqu	xmm8,XMMWORD[16+r9]
3992	movdqa	xmm9,xmm8
3993	movdqa	xmm10,xmm8
3994	movdqu	xmm14,XMMWORD[32+r9]
3995	movdqa	xmm12,xmm14
3996	paddd	xmm12,XMMWORD[$L$sse_inc]
3997	movdqa	xmm13,xmm12
3998	paddd	xmm13,XMMWORD[$L$sse_inc]
3999	movdqa	xmm7,xmm4
4000	movdqa	xmm11,xmm8
4001	movdqa	xmm15,xmm12
4002	mov	r10,10
4003
4004$L$seal_sse_128_rounds:
4005	paddd	xmm0,xmm4
4006	pxor	xmm12,xmm0
4007	pshufb	xmm12,XMMWORD[$L$rol16]
4008	paddd	xmm8,xmm12
4009	pxor	xmm4,xmm8
4010	movdqa	xmm3,xmm4
4011	pslld	xmm3,12
4012	psrld	xmm4,20
4013	pxor	xmm4,xmm3
4014	paddd	xmm0,xmm4
4015	pxor	xmm12,xmm0
4016	pshufb	xmm12,XMMWORD[$L$rol8]
4017	paddd	xmm8,xmm12
4018	pxor	xmm4,xmm8
4019	movdqa	xmm3,xmm4
4020	pslld	xmm3,7
4021	psrld	xmm4,25
4022	pxor	xmm4,xmm3
4023DB	102,15,58,15,228,4
4024DB	102,69,15,58,15,192,8
4025DB	102,69,15,58,15,228,12
4026	paddd	xmm1,xmm5
4027	pxor	xmm13,xmm1
4028	pshufb	xmm13,XMMWORD[$L$rol16]
4029	paddd	xmm9,xmm13
4030	pxor	xmm5,xmm9
4031	movdqa	xmm3,xmm5
4032	pslld	xmm3,12
4033	psrld	xmm5,20
4034	pxor	xmm5,xmm3
4035	paddd	xmm1,xmm5
4036	pxor	xmm13,xmm1
4037	pshufb	xmm13,XMMWORD[$L$rol8]
4038	paddd	xmm9,xmm13
4039	pxor	xmm5,xmm9
4040	movdqa	xmm3,xmm5
4041	pslld	xmm3,7
4042	psrld	xmm5,25
4043	pxor	xmm5,xmm3
4044DB	102,15,58,15,237,4
4045DB	102,69,15,58,15,201,8
4046DB	102,69,15,58,15,237,12
4047	paddd	xmm2,xmm6
4048	pxor	xmm14,xmm2
4049	pshufb	xmm14,XMMWORD[$L$rol16]
4050	paddd	xmm10,xmm14
4051	pxor	xmm6,xmm10
4052	movdqa	xmm3,xmm6
4053	pslld	xmm3,12
4054	psrld	xmm6,20
4055	pxor	xmm6,xmm3
4056	paddd	xmm2,xmm6
4057	pxor	xmm14,xmm2
4058	pshufb	xmm14,XMMWORD[$L$rol8]
4059	paddd	xmm10,xmm14
4060	pxor	xmm6,xmm10
4061	movdqa	xmm3,xmm6
4062	pslld	xmm3,7
4063	psrld	xmm6,25
4064	pxor	xmm6,xmm3
4065DB	102,15,58,15,246,4
4066DB	102,69,15,58,15,210,8
4067DB	102,69,15,58,15,246,12
4068	paddd	xmm0,xmm4
4069	pxor	xmm12,xmm0
4070	pshufb	xmm12,XMMWORD[$L$rol16]
4071	paddd	xmm8,xmm12
4072	pxor	xmm4,xmm8
4073	movdqa	xmm3,xmm4
4074	pslld	xmm3,12
4075	psrld	xmm4,20
4076	pxor	xmm4,xmm3
4077	paddd	xmm0,xmm4
4078	pxor	xmm12,xmm0
4079	pshufb	xmm12,XMMWORD[$L$rol8]
4080	paddd	xmm8,xmm12
4081	pxor	xmm4,xmm8
4082	movdqa	xmm3,xmm4
4083	pslld	xmm3,7
4084	psrld	xmm4,25
4085	pxor	xmm4,xmm3
4086DB	102,15,58,15,228,12
4087DB	102,69,15,58,15,192,8
4088DB	102,69,15,58,15,228,4
4089	paddd	xmm1,xmm5
4090	pxor	xmm13,xmm1
4091	pshufb	xmm13,XMMWORD[$L$rol16]
4092	paddd	xmm9,xmm13
4093	pxor	xmm5,xmm9
4094	movdqa	xmm3,xmm5
4095	pslld	xmm3,12
4096	psrld	xmm5,20
4097	pxor	xmm5,xmm3
4098	paddd	xmm1,xmm5
4099	pxor	xmm13,xmm1
4100	pshufb	xmm13,XMMWORD[$L$rol8]
4101	paddd	xmm9,xmm13
4102	pxor	xmm5,xmm9
4103	movdqa	xmm3,xmm5
4104	pslld	xmm3,7
4105	psrld	xmm5,25
4106	pxor	xmm5,xmm3
4107DB	102,15,58,15,237,12
4108DB	102,69,15,58,15,201,8
4109DB	102,69,15,58,15,237,4
4110	paddd	xmm2,xmm6
4111	pxor	xmm14,xmm2
4112	pshufb	xmm14,XMMWORD[$L$rol16]
4113	paddd	xmm10,xmm14
4114	pxor	xmm6,xmm10
4115	movdqa	xmm3,xmm6
4116	pslld	xmm3,12
4117	psrld	xmm6,20
4118	pxor	xmm6,xmm3
4119	paddd	xmm2,xmm6
4120	pxor	xmm14,xmm2
4121	pshufb	xmm14,XMMWORD[$L$rol8]
4122	paddd	xmm10,xmm14
4123	pxor	xmm6,xmm10
4124	movdqa	xmm3,xmm6
4125	pslld	xmm3,7
4126	psrld	xmm6,25
4127	pxor	xmm6,xmm3
4128DB	102,15,58,15,246,12
4129DB	102,69,15,58,15,210,8
4130DB	102,69,15,58,15,246,4
4131
4132	dec	r10
4133	jnz	NEAR $L$seal_sse_128_rounds
4134	paddd	xmm0,XMMWORD[$L$chacha20_consts]
4135	paddd	xmm1,XMMWORD[$L$chacha20_consts]
4136	paddd	xmm2,XMMWORD[$L$chacha20_consts]
4137	paddd	xmm4,xmm7
4138	paddd	xmm5,xmm7
4139	paddd	xmm6,xmm7
4140	paddd	xmm8,xmm11
4141	paddd	xmm9,xmm11
4142	paddd	xmm12,xmm15
4143	paddd	xmm15,XMMWORD[$L$sse_inc]
4144	paddd	xmm13,xmm15
4145
4146	pand	xmm2,XMMWORD[$L$clamp]
4147	movdqa	XMMWORD[(160+0)+rbp],xmm2
4148	movdqa	XMMWORD[(160+16)+rbp],xmm6
4149
4150	mov	r8,r8
4151	call	poly_hash_ad_internal
4152	jmp	NEAR $L$seal_sse_128_tail_xor
4153$L$SEH_end_chacha20_poly1305_seal:
4154
4155
4156
4157
4158ALIGN	64
4159chacha20_poly1305_open_avx2:
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172	vzeroupper
4173	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4174	vbroadcasti128	ymm4,XMMWORD[r9]
4175	vbroadcasti128	ymm8,XMMWORD[16+r9]
4176	vbroadcasti128	ymm12,XMMWORD[32+r9]
4177	vpaddd	ymm12,ymm12,YMMWORD[$L$avx2_init]
4178	cmp	rbx,6*32
4179	jbe	NEAR $L$open_avx2_192
4180	cmp	rbx,10*32
4181	jbe	NEAR $L$open_avx2_320
4182
4183	vmovdqa	YMMWORD[(160+64)+rbp],ymm4
4184	vmovdqa	YMMWORD[(160+96)+rbp],ymm8
4185	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4186	mov	r10,10
4187$L$open_avx2_init_rounds:
4188	vpaddd	ymm0,ymm0,ymm4
4189	vpxor	ymm12,ymm12,ymm0
4190	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4191	vpaddd	ymm8,ymm8,ymm12
4192	vpxor	ymm4,ymm4,ymm8
4193	vpsrld	ymm3,ymm4,20
4194	vpslld	ymm4,ymm4,12
4195	vpxor	ymm4,ymm4,ymm3
4196	vpaddd	ymm0,ymm0,ymm4
4197	vpxor	ymm12,ymm12,ymm0
4198	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4199	vpaddd	ymm8,ymm8,ymm12
4200	vpxor	ymm4,ymm4,ymm8
4201	vpslld	ymm3,ymm4,7
4202	vpsrld	ymm4,ymm4,25
4203	vpxor	ymm4,ymm4,ymm3
4204	vpalignr	ymm12,ymm12,ymm12,12
4205	vpalignr	ymm8,ymm8,ymm8,8
4206	vpalignr	ymm4,ymm4,ymm4,4
4207	vpaddd	ymm0,ymm0,ymm4
4208	vpxor	ymm12,ymm12,ymm0
4209	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4210	vpaddd	ymm8,ymm8,ymm12
4211	vpxor	ymm4,ymm4,ymm8
4212	vpsrld	ymm3,ymm4,20
4213	vpslld	ymm4,ymm4,12
4214	vpxor	ymm4,ymm4,ymm3
4215	vpaddd	ymm0,ymm0,ymm4
4216	vpxor	ymm12,ymm12,ymm0
4217	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4218	vpaddd	ymm8,ymm8,ymm12
4219	vpxor	ymm4,ymm4,ymm8
4220	vpslld	ymm3,ymm4,7
4221	vpsrld	ymm4,ymm4,25
4222	vpxor	ymm4,ymm4,ymm3
4223	vpalignr	ymm12,ymm12,ymm12,4
4224	vpalignr	ymm8,ymm8,ymm8,8
4225	vpalignr	ymm4,ymm4,ymm4,12
4226
4227	dec	r10
4228	jne	NEAR $L$open_avx2_init_rounds
4229	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
4230	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
4231	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
4232	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4233
4234	vperm2i128	ymm3,ymm4,ymm0,0x02
4235
4236	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
4237	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
4238
4239	vperm2i128	ymm0,ymm4,ymm0,0x13
4240	vperm2i128	ymm4,ymm12,ymm8,0x13
4241
4242	mov	r8,r8
4243	call	poly_hash_ad_internal
4244
4245	xor	rcx,rcx
4246$L$open_avx2_init_hash:
4247	add	r10,QWORD[((0+0))+rcx*1+rsi]
4248	adc	r11,QWORD[((8+0))+rcx*1+rsi]
4249	adc	r12,1
4250	mov	rax,QWORD[((0+160+0))+rbp]
4251	mov	r15,rax
4252	mul	r10
4253	mov	r13,rax
4254	mov	r14,rdx
4255	mov	rax,QWORD[((0+160+0))+rbp]
4256	mul	r11
4257	imul	r15,r12
4258	add	r14,rax
4259	adc	r15,rdx
4260	mov	rax,QWORD[((8+160+0))+rbp]
4261	mov	r9,rax
4262	mul	r10
4263	add	r14,rax
4264	adc	rdx,0
4265	mov	r10,rdx
4266	mov	rax,QWORD[((8+160+0))+rbp]
4267	mul	r11
4268	add	r15,rax
4269	adc	rdx,0
4270	imul	r9,r12
4271	add	r15,r10
4272	adc	r9,rdx
4273	mov	r10,r13
4274	mov	r11,r14
4275	mov	r12,r15
4276	and	r12,3
4277	mov	r13,r15
4278	and	r13,-4
4279	mov	r14,r9
4280	shrd	r15,r9,2
4281	shr	r9,2
4282	add	r15,r13
4283	adc	r9,r14
4284	add	r10,r15
4285	adc	r11,r9
4286	adc	r12,0
4287
4288	add	rcx,16
4289	cmp	rcx,2*32
4290	jne	NEAR $L$open_avx2_init_hash
4291
4292	vpxor	ymm0,ymm0,YMMWORD[rsi]
4293	vpxor	ymm4,ymm4,YMMWORD[32+rsi]
4294
4295	vmovdqu	YMMWORD[rdi],ymm0
4296	vmovdqu	YMMWORD[32+rdi],ymm4
4297	lea	rsi,[64+rsi]
4298	lea	rdi,[64+rdi]
4299	sub	rbx,2*32
4300$L$open_avx2_main_loop:
4301
4302	cmp	rbx,16*32
4303	jb	NEAR $L$open_avx2_main_loop_done
4304	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4305	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
4306	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
4307	vmovdqa	ymm1,ymm0
4308	vmovdqa	ymm5,ymm4
4309	vmovdqa	ymm9,ymm8
4310	vmovdqa	ymm2,ymm0
4311	vmovdqa	ymm6,ymm4
4312	vmovdqa	ymm10,ymm8
4313	vmovdqa	ymm3,ymm0
4314	vmovdqa	ymm7,ymm4
4315	vmovdqa	ymm11,ymm8
4316	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
4317	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
4318	vpaddd	ymm14,ymm12,ymm15
4319	vpaddd	ymm13,ymm12,ymm14
4320	vpaddd	ymm12,ymm12,ymm13
4321	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
4322	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
4323	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
4324	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4325
4326	xor	rcx,rcx
4327$L$open_avx2_main_loop_rounds:
4328	add	r10,QWORD[((0+0))+rcx*1+rsi]
4329	adc	r11,QWORD[((8+0))+rcx*1+rsi]
4330	adc	r12,1
4331	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4332	vmovdqa	ymm8,YMMWORD[$L$rol16]
4333	vpaddd	ymm3,ymm3,ymm7
4334	vpaddd	ymm2,ymm2,ymm6
4335	vpaddd	ymm1,ymm1,ymm5
4336	vpaddd	ymm0,ymm0,ymm4
4337	vpxor	ymm15,ymm15,ymm3
4338	vpxor	ymm14,ymm14,ymm2
4339	vpxor	ymm13,ymm13,ymm1
4340	vpxor	ymm12,ymm12,ymm0
4341	mov	rdx,QWORD[((0+160+0))+rbp]
4342	mov	r15,rdx
4343	mulx	r14,r13,r10
4344	mulx	rdx,rax,r11
4345	imul	r15,r12
4346	add	r14,rax
4347	adc	r15,rdx
4348	vpshufb	ymm15,ymm15,ymm8
4349	vpshufb	ymm14,ymm14,ymm8
4350	vpshufb	ymm13,ymm13,ymm8
4351	vpshufb	ymm12,ymm12,ymm8
4352	vpaddd	ymm11,ymm11,ymm15
4353	vpaddd	ymm10,ymm10,ymm14
4354	vpaddd	ymm9,ymm9,ymm13
4355	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4356	vpxor	ymm7,ymm7,ymm11
4357	mov	rdx,QWORD[((8+160+0))+rbp]
4358	mulx	rax,r10,r10
4359	add	r14,r10
4360	mulx	r9,r11,r11
4361	adc	r15,r11
4362	adc	r9,0
4363	imul	rdx,r12
4364	vpxor	ymm6,ymm6,ymm10
4365	vpxor	ymm5,ymm5,ymm9
4366	vpxor	ymm4,ymm4,ymm8
4367	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4368	vpsrld	ymm8,ymm7,20
4369	vpslld	ymm7,ymm7,32-20
4370	vpxor	ymm7,ymm7,ymm8
4371	vpsrld	ymm8,ymm6,20
4372	vpslld	ymm6,ymm6,32-20
4373	vpxor	ymm6,ymm6,ymm8
4374	vpsrld	ymm8,ymm5,20
4375	vpslld	ymm5,ymm5,32-20
4376	add	r15,rax
4377	adc	r9,rdx
4378	vpxor	ymm5,ymm5,ymm8
4379	vpsrld	ymm8,ymm4,20
4380	vpslld	ymm4,ymm4,32-20
4381	vpxor	ymm4,ymm4,ymm8
4382	vmovdqa	ymm8,YMMWORD[$L$rol8]
4383	vpaddd	ymm3,ymm3,ymm7
4384	vpaddd	ymm2,ymm2,ymm6
4385	vpaddd	ymm1,ymm1,ymm5
4386	vpaddd	ymm0,ymm0,ymm4
4387	vpxor	ymm15,ymm15,ymm3
4388	mov	r10,r13
4389	mov	r11,r14
4390	mov	r12,r15
4391	and	r12,3
4392	mov	r13,r15
4393	and	r13,-4
4394	mov	r14,r9
4395	shrd	r15,r9,2
4396	shr	r9,2
4397	add	r15,r13
4398	adc	r9,r14
4399	add	r10,r15
4400	adc	r11,r9
4401	adc	r12,0
4402	vpxor	ymm14,ymm14,ymm2
4403	vpxor	ymm13,ymm13,ymm1
4404	vpxor	ymm12,ymm12,ymm0
4405	vpshufb	ymm15,ymm15,ymm8
4406	vpshufb	ymm14,ymm14,ymm8
4407	vpshufb	ymm13,ymm13,ymm8
4408	vpshufb	ymm12,ymm12,ymm8
4409	vpaddd	ymm11,ymm11,ymm15
4410	vpaddd	ymm10,ymm10,ymm14
4411	add	r10,QWORD[((0+16))+rcx*1+rsi]
4412	adc	r11,QWORD[((8+16))+rcx*1+rsi]
4413	adc	r12,1
4414	vpaddd	ymm9,ymm9,ymm13
4415	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4416	vpxor	ymm7,ymm7,ymm11
4417	vpxor	ymm6,ymm6,ymm10
4418	vpxor	ymm5,ymm5,ymm9
4419	vpxor	ymm4,ymm4,ymm8
4420	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4421	vpsrld	ymm8,ymm7,25
4422	mov	rdx,QWORD[((0+160+0))+rbp]
4423	mov	r15,rdx
4424	mulx	r14,r13,r10
4425	mulx	rdx,rax,r11
4426	imul	r15,r12
4427	add	r14,rax
4428	adc	r15,rdx
4429	vpslld	ymm7,ymm7,32-25
4430	vpxor	ymm7,ymm7,ymm8
4431	vpsrld	ymm8,ymm6,25
4432	vpslld	ymm6,ymm6,32-25
4433	vpxor	ymm6,ymm6,ymm8
4434	vpsrld	ymm8,ymm5,25
4435	vpslld	ymm5,ymm5,32-25
4436	vpxor	ymm5,ymm5,ymm8
4437	vpsrld	ymm8,ymm4,25
4438	vpslld	ymm4,ymm4,32-25
4439	vpxor	ymm4,ymm4,ymm8
4440	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
4441	vpalignr	ymm7,ymm7,ymm7,4
4442	vpalignr	ymm11,ymm11,ymm11,8
4443	vpalignr	ymm15,ymm15,ymm15,12
4444	vpalignr	ymm6,ymm6,ymm6,4
4445	vpalignr	ymm10,ymm10,ymm10,8
4446	vpalignr	ymm14,ymm14,ymm14,12
4447	mov	rdx,QWORD[((8+160+0))+rbp]
4448	mulx	rax,r10,r10
4449	add	r14,r10
4450	mulx	r9,r11,r11
4451	adc	r15,r11
4452	adc	r9,0
4453	imul	rdx,r12
4454	vpalignr	ymm5,ymm5,ymm5,4
4455	vpalignr	ymm9,ymm9,ymm9,8
4456	vpalignr	ymm13,ymm13,ymm13,12
4457	vpalignr	ymm4,ymm4,ymm4,4
4458	vpalignr	ymm8,ymm8,ymm8,8
4459	vpalignr	ymm12,ymm12,ymm12,12
4460	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4461	vmovdqa	ymm8,YMMWORD[$L$rol16]
4462	vpaddd	ymm3,ymm3,ymm7
4463	vpaddd	ymm2,ymm2,ymm6
4464	vpaddd	ymm1,ymm1,ymm5
4465	vpaddd	ymm0,ymm0,ymm4
4466	vpxor	ymm15,ymm15,ymm3
4467	vpxor	ymm14,ymm14,ymm2
4468	vpxor	ymm13,ymm13,ymm1
4469	vpxor	ymm12,ymm12,ymm0
4470	vpshufb	ymm15,ymm15,ymm8
4471	vpshufb	ymm14,ymm14,ymm8
4472	add	r15,rax
4473	adc	r9,rdx
4474	vpshufb	ymm13,ymm13,ymm8
4475	vpshufb	ymm12,ymm12,ymm8
4476	vpaddd	ymm11,ymm11,ymm15
4477	vpaddd	ymm10,ymm10,ymm14
4478	vpaddd	ymm9,ymm9,ymm13
4479	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4480	vpxor	ymm7,ymm7,ymm11
4481	vpxor	ymm6,ymm6,ymm10
4482	vpxor	ymm5,ymm5,ymm9
4483	mov	r10,r13
4484	mov	r11,r14
4485	mov	r12,r15
4486	and	r12,3
4487	mov	r13,r15
4488	and	r13,-4
4489	mov	r14,r9
4490	shrd	r15,r9,2
4491	shr	r9,2
4492	add	r15,r13
4493	adc	r9,r14
4494	add	r10,r15
4495	adc	r11,r9
4496	adc	r12,0
4497	vpxor	ymm4,ymm4,ymm8
4498	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4499	vpsrld	ymm8,ymm7,20
4500	vpslld	ymm7,ymm7,32-20
4501	vpxor	ymm7,ymm7,ymm8
4502	vpsrld	ymm8,ymm6,20
4503	vpslld	ymm6,ymm6,32-20
4504	vpxor	ymm6,ymm6,ymm8
4505	add	r10,QWORD[((0+32))+rcx*1+rsi]
4506	adc	r11,QWORD[((8+32))+rcx*1+rsi]
4507	adc	r12,1
4508
4509	lea	rcx,[48+rcx]
4510	vpsrld	ymm8,ymm5,20
4511	vpslld	ymm5,ymm5,32-20
4512	vpxor	ymm5,ymm5,ymm8
4513	vpsrld	ymm8,ymm4,20
4514	vpslld	ymm4,ymm4,32-20
4515	vpxor	ymm4,ymm4,ymm8
4516	vmovdqa	ymm8,YMMWORD[$L$rol8]
4517	vpaddd	ymm3,ymm3,ymm7
4518	vpaddd	ymm2,ymm2,ymm6
4519	vpaddd	ymm1,ymm1,ymm5
4520	vpaddd	ymm0,ymm0,ymm4
4521	vpxor	ymm15,ymm15,ymm3
4522	vpxor	ymm14,ymm14,ymm2
4523	vpxor	ymm13,ymm13,ymm1
4524	vpxor	ymm12,ymm12,ymm0
4525	vpshufb	ymm15,ymm15,ymm8
4526	vpshufb	ymm14,ymm14,ymm8
4527	vpshufb	ymm13,ymm13,ymm8
4528	mov	rdx,QWORD[((0+160+0))+rbp]
4529	mov	r15,rdx
4530	mulx	r14,r13,r10
4531	mulx	rdx,rax,r11
4532	imul	r15,r12
4533	add	r14,rax
4534	adc	r15,rdx
4535	vpshufb	ymm12,ymm12,ymm8
4536	vpaddd	ymm11,ymm11,ymm15
4537	vpaddd	ymm10,ymm10,ymm14
4538	vpaddd	ymm9,ymm9,ymm13
4539	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4540	vpxor	ymm7,ymm7,ymm11
4541	vpxor	ymm6,ymm6,ymm10
4542	vpxor	ymm5,ymm5,ymm9
4543	mov	rdx,QWORD[((8+160+0))+rbp]
4544	mulx	rax,r10,r10
4545	add	r14,r10
4546	mulx	r9,r11,r11
4547	adc	r15,r11
4548	adc	r9,0
4549	imul	rdx,r12
4550	vpxor	ymm4,ymm4,ymm8
4551	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4552	vpsrld	ymm8,ymm7,25
4553	vpslld	ymm7,ymm7,32-25
4554	vpxor	ymm7,ymm7,ymm8
4555	vpsrld	ymm8,ymm6,25
4556	vpslld	ymm6,ymm6,32-25
4557	vpxor	ymm6,ymm6,ymm8
4558	add	r15,rax
4559	adc	r9,rdx
4560	vpsrld	ymm8,ymm5,25
4561	vpslld	ymm5,ymm5,32-25
4562	vpxor	ymm5,ymm5,ymm8
4563	vpsrld	ymm8,ymm4,25
4564	vpslld	ymm4,ymm4,32-25
4565	vpxor	ymm4,ymm4,ymm8
4566	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
4567	vpalignr	ymm7,ymm7,ymm7,12
4568	vpalignr	ymm11,ymm11,ymm11,8
4569	vpalignr	ymm15,ymm15,ymm15,4
4570	vpalignr	ymm6,ymm6,ymm6,12
4571	vpalignr	ymm10,ymm10,ymm10,8
4572	vpalignr	ymm14,ymm14,ymm14,4
4573	vpalignr	ymm5,ymm5,ymm5,12
4574	vpalignr	ymm9,ymm9,ymm9,8
4575	vpalignr	ymm13,ymm13,ymm13,4
4576	vpalignr	ymm4,ymm4,ymm4,12
4577	vpalignr	ymm8,ymm8,ymm8,8
4578	mov	r10,r13
4579	mov	r11,r14
4580	mov	r12,r15
4581	and	r12,3
4582	mov	r13,r15
4583	and	r13,-4
4584	mov	r14,r9
4585	shrd	r15,r9,2
4586	shr	r9,2
4587	add	r15,r13
4588	adc	r9,r14
4589	add	r10,r15
4590	adc	r11,r9
4591	adc	r12,0
4592	vpalignr	ymm12,ymm12,ymm12,4
4593
4594	cmp	rcx,10*6*8
4595	jne	NEAR $L$open_avx2_main_loop_rounds
4596	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
4597	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
4598	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
4599	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
4600	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
4601	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
4602	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
4603	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
4604	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
4605	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
4606	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
4607	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
4608	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
4609	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
4610	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
4611	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4612
4613	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
4614	add	r10,QWORD[((0+480))+rsi]
4615	adc	r11,QWORD[((8+480))+rsi]
4616	adc	r12,1
4617	vperm2i128	ymm0,ymm7,ymm3,0x02
4618	vperm2i128	ymm7,ymm7,ymm3,0x13
4619	vperm2i128	ymm3,ymm15,ymm11,0x02
4620	vperm2i128	ymm11,ymm15,ymm11,0x13
4621	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
4622	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
4623	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
4624	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
4625	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
4626	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
4627	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
4628	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
4629
4630	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
4631	mov	rax,QWORD[((0+160+0))+rbp]
4632	mov	r15,rax
4633	mul	r10
4634	mov	r13,rax
4635	mov	r14,rdx
4636	mov	rax,QWORD[((0+160+0))+rbp]
4637	mul	r11
4638	imul	r15,r12
4639	add	r14,rax
4640	adc	r15,rdx
4641	mov	rax,QWORD[((8+160+0))+rbp]
4642	mov	r9,rax
4643	mul	r10
4644	add	r14,rax
4645	adc	rdx,0
4646	mov	r10,rdx
4647	mov	rax,QWORD[((8+160+0))+rbp]
4648	mul	r11
4649	add	r15,rax
4650	adc	rdx,0
4651	imul	r9,r12
4652	add	r15,r10
4653	adc	r9,rdx
4654	mov	r10,r13
4655	mov	r11,r14
4656	mov	r12,r15
4657	and	r12,3
4658	mov	r13,r15
4659	and	r13,-4
4660	mov	r14,r9
4661	shrd	r15,r9,2
4662	shr	r9,2
4663	add	r15,r13
4664	adc	r9,r14
4665	add	r10,r15
4666	adc	r11,r9
4667	adc	r12,0
4668	vperm2i128	ymm3,ymm6,ymm2,0x02
4669	vperm2i128	ymm6,ymm6,ymm2,0x13
4670	vperm2i128	ymm2,ymm14,ymm10,0x02
4671	vperm2i128	ymm10,ymm14,ymm10,0x13
4672	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
4673	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
4674	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
4675	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
4676	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
4677	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
4678	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
4679	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
4680	add	r10,QWORD[((0+480+16))+rsi]
4681	adc	r11,QWORD[((8+480+16))+rsi]
4682	adc	r12,1
4683	vperm2i128	ymm3,ymm5,ymm1,0x02
4684	vperm2i128	ymm5,ymm5,ymm1,0x13
4685	vperm2i128	ymm1,ymm13,ymm9,0x02
4686	vperm2i128	ymm9,ymm13,ymm9,0x13
4687	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
4688	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
4689	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
4690	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
4691	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
4692	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
4693	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
4694	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
4695	mov	rax,QWORD[((0+160+0))+rbp]
4696	mov	r15,rax
4697	mul	r10
4698	mov	r13,rax
4699	mov	r14,rdx
4700	mov	rax,QWORD[((0+160+0))+rbp]
4701	mul	r11
4702	imul	r15,r12
4703	add	r14,rax
4704	adc	r15,rdx
4705	mov	rax,QWORD[((8+160+0))+rbp]
4706	mov	r9,rax
4707	mul	r10
4708	add	r14,rax
4709	adc	rdx,0
4710	mov	r10,rdx
4711	mov	rax,QWORD[((8+160+0))+rbp]
4712	mul	r11
4713	add	r15,rax
4714	adc	rdx,0
4715	imul	r9,r12
4716	add	r15,r10
4717	adc	r9,rdx
4718	mov	r10,r13
4719	mov	r11,r14
4720	mov	r12,r15
4721	and	r12,3
4722	mov	r13,r15
4723	and	r13,-4
4724	mov	r14,r9
4725	shrd	r15,r9,2
4726	shr	r9,2
4727	add	r15,r13
4728	adc	r9,r14
4729	add	r10,r15
4730	adc	r11,r9
4731	adc	r12,0
4732	vperm2i128	ymm3,ymm4,ymm0,0x02
4733	vperm2i128	ymm4,ymm4,ymm0,0x13
4734	vperm2i128	ymm0,ymm12,ymm8,0x02
4735	vperm2i128	ymm8,ymm12,ymm8,0x13
4736	vpxor	ymm3,ymm3,YMMWORD[((0+384))+rsi]
4737	vpxor	ymm0,ymm0,YMMWORD[((32+384))+rsi]
4738	vpxor	ymm4,ymm4,YMMWORD[((64+384))+rsi]
4739	vpxor	ymm8,ymm8,YMMWORD[((96+384))+rsi]
4740	vmovdqu	YMMWORD[(0+384)+rdi],ymm3
4741	vmovdqu	YMMWORD[(32+384)+rdi],ymm0
4742	vmovdqu	YMMWORD[(64+384)+rdi],ymm4
4743	vmovdqu	YMMWORD[(96+384)+rdi],ymm8
4744
4745	lea	rsi,[512+rsi]
4746	lea	rdi,[512+rdi]
4747	sub	rbx,16*32
4748	jmp	NEAR $L$open_avx2_main_loop
4749$L$open_avx2_main_loop_done:
4750	test	rbx,rbx
4751	vzeroupper
4752	je	NEAR $L$open_sse_finalize
4753
4754	cmp	rbx,12*32
4755	ja	NEAR $L$open_avx2_tail_512
4756	cmp	rbx,8*32
4757	ja	NEAR $L$open_avx2_tail_384
4758	cmp	rbx,4*32
4759	ja	NEAR $L$open_avx2_tail_256
4760	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4761	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
4762	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
4763	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
4764	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4765	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4766
4767	xor	r8,r8
4768	mov	rcx,rbx
4769	and	rcx,-16
4770	test	rcx,rcx
4771	je	NEAR $L$open_avx2_tail_128_rounds
4772$L$open_avx2_tail_128_rounds_and_x1hash:
4773	add	r10,QWORD[((0+0))+r8*1+rsi]
4774	adc	r11,QWORD[((8+0))+r8*1+rsi]
4775	adc	r12,1
4776	mov	rax,QWORD[((0+160+0))+rbp]
4777	mov	r15,rax
4778	mul	r10
4779	mov	r13,rax
4780	mov	r14,rdx
4781	mov	rax,QWORD[((0+160+0))+rbp]
4782	mul	r11
4783	imul	r15,r12
4784	add	r14,rax
4785	adc	r15,rdx
4786	mov	rax,QWORD[((8+160+0))+rbp]
4787	mov	r9,rax
4788	mul	r10
4789	add	r14,rax
4790	adc	rdx,0
4791	mov	r10,rdx
4792	mov	rax,QWORD[((8+160+0))+rbp]
4793	mul	r11
4794	add	r15,rax
4795	adc	rdx,0
4796	imul	r9,r12
4797	add	r15,r10
4798	adc	r9,rdx
4799	mov	r10,r13
4800	mov	r11,r14
4801	mov	r12,r15
4802	and	r12,3
4803	mov	r13,r15
4804	and	r13,-4
4805	mov	r14,r9
4806	shrd	r15,r9,2
4807	shr	r9,2
4808	add	r15,r13
4809	adc	r9,r14
4810	add	r10,r15
4811	adc	r11,r9
4812	adc	r12,0
4813
4814$L$open_avx2_tail_128_rounds:
4815	add	r8,16
4816	vpaddd	ymm0,ymm0,ymm4
4817	vpxor	ymm12,ymm12,ymm0
4818	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4819	vpaddd	ymm8,ymm8,ymm12
4820	vpxor	ymm4,ymm4,ymm8
4821	vpsrld	ymm3,ymm4,20
4822	vpslld	ymm4,ymm4,12
4823	vpxor	ymm4,ymm4,ymm3
4824	vpaddd	ymm0,ymm0,ymm4
4825	vpxor	ymm12,ymm12,ymm0
4826	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4827	vpaddd	ymm8,ymm8,ymm12
4828	vpxor	ymm4,ymm4,ymm8
4829	vpslld	ymm3,ymm4,7
4830	vpsrld	ymm4,ymm4,25
4831	vpxor	ymm4,ymm4,ymm3
4832	vpalignr	ymm12,ymm12,ymm12,12
4833	vpalignr	ymm8,ymm8,ymm8,8
4834	vpalignr	ymm4,ymm4,ymm4,4
4835	vpaddd	ymm0,ymm0,ymm4
4836	vpxor	ymm12,ymm12,ymm0
4837	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4838	vpaddd	ymm8,ymm8,ymm12
4839	vpxor	ymm4,ymm4,ymm8
4840	vpsrld	ymm3,ymm4,20
4841	vpslld	ymm4,ymm4,12
4842	vpxor	ymm4,ymm4,ymm3
4843	vpaddd	ymm0,ymm0,ymm4
4844	vpxor	ymm12,ymm12,ymm0
4845	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4846	vpaddd	ymm8,ymm8,ymm12
4847	vpxor	ymm4,ymm4,ymm8
4848	vpslld	ymm3,ymm4,7
4849	vpsrld	ymm4,ymm4,25
4850	vpxor	ymm4,ymm4,ymm3
4851	vpalignr	ymm12,ymm12,ymm12,4
4852	vpalignr	ymm8,ymm8,ymm8,8
4853	vpalignr	ymm4,ymm4,ymm4,12
4854
4855	cmp	r8,rcx
4856	jb	NEAR $L$open_avx2_tail_128_rounds_and_x1hash
4857	cmp	r8,160
4858	jne	NEAR $L$open_avx2_tail_128_rounds
4859	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
4860	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
4861	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
4862	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4863	vperm2i128	ymm3,ymm4,ymm0,0x13
4864	vperm2i128	ymm0,ymm4,ymm0,0x02
4865	vperm2i128	ymm4,ymm12,ymm8,0x02
4866	vperm2i128	ymm12,ymm12,ymm8,0x13
4867	vmovdqa	ymm8,ymm3
4868
4869	jmp	NEAR $L$open_avx2_tail_128_xor
4870
4871$L$open_avx2_tail_256:
4872	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4873	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
4874	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
4875	vmovdqa	ymm1,ymm0
4876	vmovdqa	ymm5,ymm4
4877	vmovdqa	ymm9,ymm8
4878	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
4879	vpaddd	ymm13,ymm12,YMMWORD[((160+160))+rbp]
4880	vpaddd	ymm12,ymm12,ymm13
4881	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4882	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
4883
4884	mov	QWORD[((160+128))+rbp],rbx
4885	mov	rcx,rbx
4886	sub	rcx,4*32
4887	shr	rcx,4
4888	mov	r8,10
4889	cmp	rcx,10
4890	cmovg	rcx,r8
4891	mov	rbx,rsi
4892	xor	r8,r8
4893$L$open_avx2_tail_256_rounds_and_x1hash:
4894	add	r10,QWORD[((0+0))+rbx]
4895	adc	r11,QWORD[((8+0))+rbx]
4896	adc	r12,1
4897	mov	rdx,QWORD[((0+160+0))+rbp]
4898	mov	r15,rdx
4899	mulx	r14,r13,r10
4900	mulx	rdx,rax,r11
4901	imul	r15,r12
4902	add	r14,rax
4903	adc	r15,rdx
4904	mov	rdx,QWORD[((8+160+0))+rbp]
4905	mulx	rax,r10,r10
4906	add	r14,r10
4907	mulx	r9,r11,r11
4908	adc	r15,r11
4909	adc	r9,0
4910	imul	rdx,r12
4911	add	r15,rax
4912	adc	r9,rdx
4913	mov	r10,r13
4914	mov	r11,r14
4915	mov	r12,r15
4916	and	r12,3
4917	mov	r13,r15
4918	and	r13,-4
4919	mov	r14,r9
4920	shrd	r15,r9,2
4921	shr	r9,2
4922	add	r15,r13
4923	adc	r9,r14
4924	add	r10,r15
4925	adc	r11,r9
4926	adc	r12,0
4927
4928	lea	rbx,[16+rbx]
4929$L$open_avx2_tail_256_rounds:
4930	vpaddd	ymm0,ymm0,ymm4
4931	vpxor	ymm12,ymm12,ymm0
4932	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4933	vpaddd	ymm8,ymm8,ymm12
4934	vpxor	ymm4,ymm4,ymm8
4935	vpsrld	ymm3,ymm4,20
4936	vpslld	ymm4,ymm4,12
4937	vpxor	ymm4,ymm4,ymm3
4938	vpaddd	ymm0,ymm0,ymm4
4939	vpxor	ymm12,ymm12,ymm0
4940	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4941	vpaddd	ymm8,ymm8,ymm12
4942	vpxor	ymm4,ymm4,ymm8
4943	vpslld	ymm3,ymm4,7
4944	vpsrld	ymm4,ymm4,25
4945	vpxor	ymm4,ymm4,ymm3
4946	vpalignr	ymm12,ymm12,ymm12,12
4947	vpalignr	ymm8,ymm8,ymm8,8
4948	vpalignr	ymm4,ymm4,ymm4,4
4949	vpaddd	ymm1,ymm1,ymm5
4950	vpxor	ymm13,ymm13,ymm1
4951	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
4952	vpaddd	ymm9,ymm9,ymm13
4953	vpxor	ymm5,ymm5,ymm9
4954	vpsrld	ymm3,ymm5,20
4955	vpslld	ymm5,ymm5,12
4956	vpxor	ymm5,ymm5,ymm3
4957	vpaddd	ymm1,ymm1,ymm5
4958	vpxor	ymm13,ymm13,ymm1
4959	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
4960	vpaddd	ymm9,ymm9,ymm13
4961	vpxor	ymm5,ymm5,ymm9
4962	vpslld	ymm3,ymm5,7
4963	vpsrld	ymm5,ymm5,25
4964	vpxor	ymm5,ymm5,ymm3
4965	vpalignr	ymm13,ymm13,ymm13,12
4966	vpalignr	ymm9,ymm9,ymm9,8
4967	vpalignr	ymm5,ymm5,ymm5,4
4968
4969	inc	r8
4970	vpaddd	ymm0,ymm0,ymm4
4971	vpxor	ymm12,ymm12,ymm0
4972	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4973	vpaddd	ymm8,ymm8,ymm12
4974	vpxor	ymm4,ymm4,ymm8
4975	vpsrld	ymm3,ymm4,20
4976	vpslld	ymm4,ymm4,12
4977	vpxor	ymm4,ymm4,ymm3
4978	vpaddd	ymm0,ymm0,ymm4
4979	vpxor	ymm12,ymm12,ymm0
4980	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4981	vpaddd	ymm8,ymm8,ymm12
4982	vpxor	ymm4,ymm4,ymm8
4983	vpslld	ymm3,ymm4,7
4984	vpsrld	ymm4,ymm4,25
4985	vpxor	ymm4,ymm4,ymm3
4986	vpalignr	ymm12,ymm12,ymm12,4
4987	vpalignr	ymm8,ymm8,ymm8,8
4988	vpalignr	ymm4,ymm4,ymm4,12
4989	vpaddd	ymm1,ymm1,ymm5
4990	vpxor	ymm13,ymm13,ymm1
4991	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
4992	vpaddd	ymm9,ymm9,ymm13
4993	vpxor	ymm5,ymm5,ymm9
4994	vpsrld	ymm3,ymm5,20
4995	vpslld	ymm5,ymm5,12
4996	vpxor	ymm5,ymm5,ymm3
4997	vpaddd	ymm1,ymm1,ymm5
4998	vpxor	ymm13,ymm13,ymm1
4999	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5000	vpaddd	ymm9,ymm9,ymm13
5001	vpxor	ymm5,ymm5,ymm9
5002	vpslld	ymm3,ymm5,7
5003	vpsrld	ymm5,ymm5,25
5004	vpxor	ymm5,ymm5,ymm3
5005	vpalignr	ymm13,ymm13,ymm13,4
5006	vpalignr	ymm9,ymm9,ymm9,8
5007	vpalignr	ymm5,ymm5,ymm5,12
5008	vpaddd	ymm2,ymm2,ymm6
5009	vpxor	ymm14,ymm14,ymm2
5010	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
5011	vpaddd	ymm10,ymm10,ymm14
5012	vpxor	ymm6,ymm6,ymm10
5013	vpsrld	ymm3,ymm6,20
5014	vpslld	ymm6,ymm6,12
5015	vpxor	ymm6,ymm6,ymm3
5016	vpaddd	ymm2,ymm2,ymm6
5017	vpxor	ymm14,ymm14,ymm2
5018	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
5019	vpaddd	ymm10,ymm10,ymm14
5020	vpxor	ymm6,ymm6,ymm10
5021	vpslld	ymm3,ymm6,7
5022	vpsrld	ymm6,ymm6,25
5023	vpxor	ymm6,ymm6,ymm3
5024	vpalignr	ymm14,ymm14,ymm14,4
5025	vpalignr	ymm10,ymm10,ymm10,8
5026	vpalignr	ymm6,ymm6,ymm6,12
5027
5028	cmp	r8,rcx
5029	jb	NEAR $L$open_avx2_tail_256_rounds_and_x1hash
5030	cmp	r8,10
5031	jne	NEAR $L$open_avx2_tail_256_rounds
5032	mov	r8,rbx
5033	sub	rbx,rsi
5034	mov	rcx,rbx
5035	mov	rbx,QWORD[((160+128))+rbp]
5036$L$open_avx2_tail_256_hash:
5037	add	rcx,16
5038	cmp	rcx,rbx
5039	jg	NEAR $L$open_avx2_tail_256_done
5040	add	r10,QWORD[((0+0))+r8]
5041	adc	r11,QWORD[((8+0))+r8]
5042	adc	r12,1
5043	mov	rdx,QWORD[((0+160+0))+rbp]
5044	mov	r15,rdx
5045	mulx	r14,r13,r10
5046	mulx	rdx,rax,r11
5047	imul	r15,r12
5048	add	r14,rax
5049	adc	r15,rdx
5050	mov	rdx,QWORD[((8+160+0))+rbp]
5051	mulx	rax,r10,r10
5052	add	r14,r10
5053	mulx	r9,r11,r11
5054	adc	r15,r11
5055	adc	r9,0
5056	imul	rdx,r12
5057	add	r15,rax
5058	adc	r9,rdx
5059	mov	r10,r13
5060	mov	r11,r14
5061	mov	r12,r15
5062	and	r12,3
5063	mov	r13,r15
5064	and	r13,-4
5065	mov	r14,r9
5066	shrd	r15,r9,2
5067	shr	r9,2
5068	add	r15,r13
5069	adc	r9,r14
5070	add	r10,r15
5071	adc	r11,r9
5072	adc	r12,0
5073
5074	lea	r8,[16+r8]
5075	jmp	NEAR $L$open_avx2_tail_256_hash
5076$L$open_avx2_tail_256_done:
5077	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
5078	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
5079	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
5080	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
5081	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
5082	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
5083	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
5084	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
5085	vperm2i128	ymm3,ymm5,ymm1,0x02
5086	vperm2i128	ymm5,ymm5,ymm1,0x13
5087	vperm2i128	ymm1,ymm13,ymm9,0x02
5088	vperm2i128	ymm9,ymm13,ymm9,0x13
5089	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
5090	vpxor	ymm1,ymm1,YMMWORD[((32+0))+rsi]
5091	vpxor	ymm5,ymm5,YMMWORD[((64+0))+rsi]
5092	vpxor	ymm9,ymm9,YMMWORD[((96+0))+rsi]
5093	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
5094	vmovdqu	YMMWORD[(32+0)+rdi],ymm1
5095	vmovdqu	YMMWORD[(64+0)+rdi],ymm5
5096	vmovdqu	YMMWORD[(96+0)+rdi],ymm9
5097	vperm2i128	ymm3,ymm4,ymm0,0x13
5098	vperm2i128	ymm0,ymm4,ymm0,0x02
5099	vperm2i128	ymm4,ymm12,ymm8,0x02
5100	vperm2i128	ymm12,ymm12,ymm8,0x13
5101	vmovdqa	ymm8,ymm3
5102
5103	lea	rsi,[128+rsi]
5104	lea	rdi,[128+rdi]
5105	sub	rbx,4*32
5106	jmp	NEAR $L$open_avx2_tail_128_xor
5107
5108$L$open_avx2_tail_384:
5109	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
5110	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
5111	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
5112	vmovdqa	ymm1,ymm0
5113	vmovdqa	ymm5,ymm4
5114	vmovdqa	ymm9,ymm8
5115	vmovdqa	ymm2,ymm0
5116	vmovdqa	ymm6,ymm4
5117	vmovdqa	ymm10,ymm8
5118	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
5119	vpaddd	ymm14,ymm12,YMMWORD[((160+160))+rbp]
5120	vpaddd	ymm13,ymm12,ymm14
5121	vpaddd	ymm12,ymm12,ymm13
5122	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
5123	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
5124	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
5125
5126	mov	QWORD[((160+128))+rbp],rbx
5127	mov	rcx,rbx
5128	sub	rcx,8*32
5129	shr	rcx,4
5130	add	rcx,6
5131	mov	r8,10
5132	cmp	rcx,10
5133	cmovg	rcx,r8
5134	mov	rbx,rsi
5135	xor	r8,r8
5136$L$open_avx2_tail_384_rounds_and_x2hash:
5137	add	r10,QWORD[((0+0))+rbx]
5138	adc	r11,QWORD[((8+0))+rbx]
5139	adc	r12,1
5140	mov	rdx,QWORD[((0+160+0))+rbp]
5141	mov	r15,rdx
5142	mulx	r14,r13,r10
5143	mulx	rdx,rax,r11
5144	imul	r15,r12
5145	add	r14,rax
5146	adc	r15,rdx
5147	mov	rdx,QWORD[((8+160+0))+rbp]
5148	mulx	rax,r10,r10
5149	add	r14,r10
5150	mulx	r9,r11,r11
5151	adc	r15,r11
5152	adc	r9,0
5153	imul	rdx,r12
5154	add	r15,rax
5155	adc	r9,rdx
5156	mov	r10,r13
5157	mov	r11,r14
5158	mov	r12,r15
5159	and	r12,3
5160	mov	r13,r15
5161	and	r13,-4
5162	mov	r14,r9
5163	shrd	r15,r9,2
5164	shr	r9,2
5165	add	r15,r13
5166	adc	r9,r14
5167	add	r10,r15
5168	adc	r11,r9
5169	adc	r12,0
5170
5171	lea	rbx,[16+rbx]
5172$L$open_avx2_tail_384_rounds_and_x1hash:
5173	vpaddd	ymm2,ymm2,ymm6
5174	vpxor	ymm14,ymm14,ymm2
5175	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
5176	vpaddd	ymm10,ymm10,ymm14
5177	vpxor	ymm6,ymm6,ymm10
5178	vpsrld	ymm3,ymm6,20
5179	vpslld	ymm6,ymm6,12
5180	vpxor	ymm6,ymm6,ymm3
5181	vpaddd	ymm2,ymm2,ymm6
5182	vpxor	ymm14,ymm14,ymm2
5183	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
5184	vpaddd	ymm10,ymm10,ymm14
5185	vpxor	ymm6,ymm6,ymm10
5186	vpslld	ymm3,ymm6,7
5187	vpsrld	ymm6,ymm6,25
5188	vpxor	ymm6,ymm6,ymm3
5189	vpalignr	ymm14,ymm14,ymm14,12
5190	vpalignr	ymm10,ymm10,ymm10,8
5191	vpalignr	ymm6,ymm6,ymm6,4
5192	vpaddd	ymm1,ymm1,ymm5
5193	vpxor	ymm13,ymm13,ymm1
5194	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5195	vpaddd	ymm9,ymm9,ymm13
5196	vpxor	ymm5,ymm5,ymm9
5197	vpsrld	ymm3,ymm5,20
5198	vpslld	ymm5,ymm5,12
5199	vpxor	ymm5,ymm5,ymm3
5200	vpaddd	ymm1,ymm1,ymm5
5201	vpxor	ymm13,ymm13,ymm1
5202	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5203	vpaddd	ymm9,ymm9,ymm13
5204	vpxor	ymm5,ymm5,ymm9
5205	vpslld	ymm3,ymm5,7
5206	vpsrld	ymm5,ymm5,25
5207	vpxor	ymm5,ymm5,ymm3
5208	vpalignr	ymm13,ymm13,ymm13,12
5209	vpalignr	ymm9,ymm9,ymm9,8
5210	vpalignr	ymm5,ymm5,ymm5,4
5211	vpaddd	ymm0,ymm0,ymm4
5212	vpxor	ymm12,ymm12,ymm0
5213	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5214	vpaddd	ymm8,ymm8,ymm12
5215	vpxor	ymm4,ymm4,ymm8
5216	vpsrld	ymm3,ymm4,20
5217	vpslld	ymm4,ymm4,12
5218	vpxor	ymm4,ymm4,ymm3
5219	vpaddd	ymm0,ymm0,ymm4
5220	vpxor	ymm12,ymm12,ymm0
5221	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5222	vpaddd	ymm8,ymm8,ymm12
5223	vpxor	ymm4,ymm4,ymm8
5224	vpslld	ymm3,ymm4,7
5225	vpsrld	ymm4,ymm4,25
5226	vpxor	ymm4,ymm4,ymm3
5227	vpalignr	ymm12,ymm12,ymm12,12
5228	vpalignr	ymm8,ymm8,ymm8,8
5229	vpalignr	ymm4,ymm4,ymm4,4
5230	add	r10,QWORD[((0+0))+rbx]
5231	adc	r11,QWORD[((8+0))+rbx]
5232	adc	r12,1
5233	mov	rax,QWORD[((0+160+0))+rbp]
5234	mov	r15,rax
5235	mul	r10
5236	mov	r13,rax
5237	mov	r14,rdx
5238	mov	rax,QWORD[((0+160+0))+rbp]
5239	mul	r11
5240	imul	r15,r12
5241	add	r14,rax
5242	adc	r15,rdx
5243	mov	rax,QWORD[((8+160+0))+rbp]
5244	mov	r9,rax
5245	mul	r10
5246	add	r14,rax
5247	adc	rdx,0
5248	mov	r10,rdx
5249	mov	rax,QWORD[((8+160+0))+rbp]
5250	mul	r11
5251	add	r15,rax
5252	adc	rdx,0
5253	imul	r9,r12
5254	add	r15,r10
5255	adc	r9,rdx
5256	mov	r10,r13
5257	mov	r11,r14
5258	mov	r12,r15
5259	and	r12,3
5260	mov	r13,r15
5261	and	r13,-4
5262	mov	r14,r9
5263	shrd	r15,r9,2
5264	shr	r9,2
5265	add	r15,r13
5266	adc	r9,r14
5267	add	r10,r15
5268	adc	r11,r9
5269	adc	r12,0
5270
5271	lea	rbx,[16+rbx]
5272	inc	r8
5273	vpaddd	ymm2,ymm2,ymm6
5274	vpxor	ymm14,ymm14,ymm2
5275	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
5276	vpaddd	ymm10,ymm10,ymm14
5277	vpxor	ymm6,ymm6,ymm10
5278	vpsrld	ymm3,ymm6,20
5279	vpslld	ymm6,ymm6,12
5280	vpxor	ymm6,ymm6,ymm3
5281	vpaddd	ymm2,ymm2,ymm6
5282	vpxor	ymm14,ymm14,ymm2
5283	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
5284	vpaddd	ymm10,ymm10,ymm14
5285	vpxor	ymm6,ymm6,ymm10
5286	vpslld	ymm3,ymm6,7
5287	vpsrld	ymm6,ymm6,25
5288	vpxor	ymm6,ymm6,ymm3
5289	vpalignr	ymm14,ymm14,ymm14,4
5290	vpalignr	ymm10,ymm10,ymm10,8
5291	vpalignr	ymm6,ymm6,ymm6,12
5292	vpaddd	ymm1,ymm1,ymm5
5293	vpxor	ymm13,ymm13,ymm1
5294	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5295	vpaddd	ymm9,ymm9,ymm13
5296	vpxor	ymm5,ymm5,ymm9
5297	vpsrld	ymm3,ymm5,20
5298	vpslld	ymm5,ymm5,12
5299	vpxor	ymm5,ymm5,ymm3
5300	vpaddd	ymm1,ymm1,ymm5
5301	vpxor	ymm13,ymm13,ymm1
5302	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5303	vpaddd	ymm9,ymm9,ymm13
5304	vpxor	ymm5,ymm5,ymm9
5305	vpslld	ymm3,ymm5,7
5306	vpsrld	ymm5,ymm5,25
5307	vpxor	ymm5,ymm5,ymm3
5308	vpalignr	ymm13,ymm13,ymm13,4
5309	vpalignr	ymm9,ymm9,ymm9,8
5310	vpalignr	ymm5,ymm5,ymm5,12
5311	vpaddd	ymm0,ymm0,ymm4
5312	vpxor	ymm12,ymm12,ymm0
5313	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5314	vpaddd	ymm8,ymm8,ymm12
5315	vpxor	ymm4,ymm4,ymm8
5316	vpsrld	ymm3,ymm4,20
5317	vpslld	ymm4,ymm4,12
5318	vpxor	ymm4,ymm4,ymm3
5319	vpaddd	ymm0,ymm0,ymm4
5320	vpxor	ymm12,ymm12,ymm0
5321	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5322	vpaddd	ymm8,ymm8,ymm12
5323	vpxor	ymm4,ymm4,ymm8
5324	vpslld	ymm3,ymm4,7
5325	vpsrld	ymm4,ymm4,25
5326	vpxor	ymm4,ymm4,ymm3
5327	vpalignr	ymm12,ymm12,ymm12,4
5328	vpalignr	ymm8,ymm8,ymm8,8
5329	vpalignr	ymm4,ymm4,ymm4,12
5330
5331	cmp	r8,rcx
5332	jb	NEAR $L$open_avx2_tail_384_rounds_and_x2hash
5333	cmp	r8,10
5334	jne	NEAR $L$open_avx2_tail_384_rounds_and_x1hash
5335	mov	r8,rbx
5336	sub	rbx,rsi
5337	mov	rcx,rbx
5338	mov	rbx,QWORD[((160+128))+rbp]
5339$L$open_avx2_384_tail_hash:
5340	add	rcx,16
5341	cmp	rcx,rbx
5342	jg	NEAR $L$open_avx2_384_tail_done
5343	add	r10,QWORD[((0+0))+r8]
5344	adc	r11,QWORD[((8+0))+r8]
5345	adc	r12,1
5346	mov	rdx,QWORD[((0+160+0))+rbp]
5347	mov	r15,rdx
5348	mulx	r14,r13,r10
5349	mulx	rdx,rax,r11
5350	imul	r15,r12
5351	add	r14,rax
5352	adc	r15,rdx
5353	mov	rdx,QWORD[((8+160+0))+rbp]
5354	mulx	rax,r10,r10
5355	add	r14,r10
5356	mulx	r9,r11,r11
5357	adc	r15,r11
5358	adc	r9,0
5359	imul	rdx,r12
5360	add	r15,rax
5361	adc	r9,rdx
5362	mov	r10,r13
5363	mov	r11,r14
5364	mov	r12,r15
5365	and	r12,3
5366	mov	r13,r15
5367	and	r13,-4
5368	mov	r14,r9
5369	shrd	r15,r9,2
5370	shr	r9,2
5371	add	r15,r13
5372	adc	r9,r14
5373	add	r10,r15
5374	adc	r11,r9
5375	adc	r12,0
5376
5377	lea	r8,[16+r8]
5378	jmp	NEAR $L$open_avx2_384_tail_hash
5379$L$open_avx2_384_tail_done:
5380	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
5381	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
5382	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
5383	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
5384	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
5385	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
5386	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
5387	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
5388	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
5389	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
5390	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
5391	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
5392	vperm2i128	ymm3,ymm6,ymm2,0x02
5393	vperm2i128	ymm6,ymm6,ymm2,0x13
5394	vperm2i128	ymm2,ymm14,ymm10,0x02
5395	vperm2i128	ymm10,ymm14,ymm10,0x13
5396	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
5397	vpxor	ymm2,ymm2,YMMWORD[((32+0))+rsi]
5398	vpxor	ymm6,ymm6,YMMWORD[((64+0))+rsi]
5399	vpxor	ymm10,ymm10,YMMWORD[((96+0))+rsi]
5400	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
5401	vmovdqu	YMMWORD[(32+0)+rdi],ymm2
5402	vmovdqu	YMMWORD[(64+0)+rdi],ymm6
5403	vmovdqu	YMMWORD[(96+0)+rdi],ymm10
5404	vperm2i128	ymm3,ymm5,ymm1,0x02
5405	vperm2i128	ymm5,ymm5,ymm1,0x13
5406	vperm2i128	ymm1,ymm13,ymm9,0x02
5407	vperm2i128	ymm9,ymm13,ymm9,0x13
5408	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
5409	vpxor	ymm1,ymm1,YMMWORD[((32+128))+rsi]
5410	vpxor	ymm5,ymm5,YMMWORD[((64+128))+rsi]
5411	vpxor	ymm9,ymm9,YMMWORD[((96+128))+rsi]
5412	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
5413	vmovdqu	YMMWORD[(32+128)+rdi],ymm1
5414	vmovdqu	YMMWORD[(64+128)+rdi],ymm5
5415	vmovdqu	YMMWORD[(96+128)+rdi],ymm9
5416	vperm2i128	ymm3,ymm4,ymm0,0x13
5417	vperm2i128	ymm0,ymm4,ymm0,0x02
5418	vperm2i128	ymm4,ymm12,ymm8,0x02
5419	vperm2i128	ymm12,ymm12,ymm8,0x13
5420	vmovdqa	ymm8,ymm3
5421
5422	lea	rsi,[256+rsi]
5423	lea	rdi,[256+rdi]
5424	sub	rbx,8*32
5425	jmp	NEAR $L$open_avx2_tail_128_xor
5426
5427$L$open_avx2_tail_512:
5428	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
5429	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
5430	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
5431	vmovdqa	ymm1,ymm0
5432	vmovdqa	ymm5,ymm4
5433	vmovdqa	ymm9,ymm8
5434	vmovdqa	ymm2,ymm0
5435	vmovdqa	ymm6,ymm4
5436	vmovdqa	ymm10,ymm8
5437	vmovdqa	ymm3,ymm0
5438	vmovdqa	ymm7,ymm4
5439	vmovdqa	ymm11,ymm8
5440	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
5441	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
5442	vpaddd	ymm14,ymm12,ymm15
5443	vpaddd	ymm13,ymm12,ymm14
5444	vpaddd	ymm12,ymm12,ymm13
5445	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
5446	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
5447	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
5448	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
5449
5450	xor	rcx,rcx
5451	mov	r8,rsi
5452$L$open_avx2_tail_512_rounds_and_x2hash:
5453	add	r10,QWORD[((0+0))+r8]
5454	adc	r11,QWORD[((8+0))+r8]
5455	adc	r12,1
5456	mov	rax,QWORD[((0+160+0))+rbp]
5457	mov	r15,rax
5458	mul	r10
5459	mov	r13,rax
5460	mov	r14,rdx
5461	mov	rax,QWORD[((0+160+0))+rbp]
5462	mul	r11
5463	imul	r15,r12
5464	add	r14,rax
5465	adc	r15,rdx
5466	mov	rax,QWORD[((8+160+0))+rbp]
5467	mov	r9,rax
5468	mul	r10
5469	add	r14,rax
5470	adc	rdx,0
5471	mov	r10,rdx
5472	mov	rax,QWORD[((8+160+0))+rbp]
5473	mul	r11
5474	add	r15,rax
5475	adc	rdx,0
5476	imul	r9,r12
5477	add	r15,r10
5478	adc	r9,rdx
5479	mov	r10,r13
5480	mov	r11,r14
5481	mov	r12,r15
5482	and	r12,3
5483	mov	r13,r15
5484	and	r13,-4
5485	mov	r14,r9
5486	shrd	r15,r9,2
5487	shr	r9,2
5488	add	r15,r13
5489	adc	r9,r14
5490	add	r10,r15
5491	adc	r11,r9
5492	adc	r12,0
5493
5494	lea	r8,[16+r8]
5495$L$open_avx2_tail_512_rounds_and_x1hash:
5496	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5497	vmovdqa	ymm8,YMMWORD[$L$rol16]
5498	vpaddd	ymm3,ymm3,ymm7
5499	vpaddd	ymm2,ymm2,ymm6
5500	vpaddd	ymm1,ymm1,ymm5
5501	vpaddd	ymm0,ymm0,ymm4
5502	vpxor	ymm15,ymm15,ymm3
5503	vpxor	ymm14,ymm14,ymm2
5504	vpxor	ymm13,ymm13,ymm1
5505	vpxor	ymm12,ymm12,ymm0
5506	vpshufb	ymm15,ymm15,ymm8
5507	vpshufb	ymm14,ymm14,ymm8
5508	vpshufb	ymm13,ymm13,ymm8
5509	vpshufb	ymm12,ymm12,ymm8
5510	vpaddd	ymm11,ymm11,ymm15
5511	vpaddd	ymm10,ymm10,ymm14
5512	vpaddd	ymm9,ymm9,ymm13
5513	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5514	vpxor	ymm7,ymm7,ymm11
5515	vpxor	ymm6,ymm6,ymm10
5516	vpxor	ymm5,ymm5,ymm9
5517	vpxor	ymm4,ymm4,ymm8
5518	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5519	vpsrld	ymm8,ymm7,20
5520	vpslld	ymm7,ymm7,32-20
5521	vpxor	ymm7,ymm7,ymm8
5522	vpsrld	ymm8,ymm6,20
5523	vpslld	ymm6,ymm6,32-20
5524	vpxor	ymm6,ymm6,ymm8
5525	vpsrld	ymm8,ymm5,20
5526	vpslld	ymm5,ymm5,32-20
5527	vpxor	ymm5,ymm5,ymm8
5528	vpsrld	ymm8,ymm4,20
5529	vpslld	ymm4,ymm4,32-20
5530	vpxor	ymm4,ymm4,ymm8
5531	vmovdqa	ymm8,YMMWORD[$L$rol8]
5532	vpaddd	ymm3,ymm3,ymm7
5533	add	r10,QWORD[((0+0))+r8]
5534	adc	r11,QWORD[((8+0))+r8]
5535	adc	r12,1
5536	mov	rdx,QWORD[((0+160+0))+rbp]
5537	mov	r15,rdx
5538	mulx	r14,r13,r10
5539	mulx	rdx,rax,r11
5540	imul	r15,r12
5541	add	r14,rax
5542	adc	r15,rdx
5543	mov	rdx,QWORD[((8+160+0))+rbp]
5544	mulx	rax,r10,r10
5545	add	r14,r10
5546	mulx	r9,r11,r11
5547	adc	r15,r11
5548	adc	r9,0
5549	imul	rdx,r12
5550	add	r15,rax
5551	adc	r9,rdx
5552	mov	r10,r13
5553	mov	r11,r14
5554	mov	r12,r15
5555	and	r12,3
5556	mov	r13,r15
5557	and	r13,-4
5558	mov	r14,r9
5559	shrd	r15,r9,2
5560	shr	r9,2
5561	add	r15,r13
5562	adc	r9,r14
5563	add	r10,r15
5564	adc	r11,r9
5565	adc	r12,0
5566	vpaddd	ymm2,ymm2,ymm6
5567	vpaddd	ymm1,ymm1,ymm5
5568	vpaddd	ymm0,ymm0,ymm4
5569	vpxor	ymm15,ymm15,ymm3
5570	vpxor	ymm14,ymm14,ymm2
5571	vpxor	ymm13,ymm13,ymm1
5572	vpxor	ymm12,ymm12,ymm0
5573	vpshufb	ymm15,ymm15,ymm8
5574	vpshufb	ymm14,ymm14,ymm8
5575	vpshufb	ymm13,ymm13,ymm8
5576	vpshufb	ymm12,ymm12,ymm8
5577	vpaddd	ymm11,ymm11,ymm15
5578	vpaddd	ymm10,ymm10,ymm14
5579	vpaddd	ymm9,ymm9,ymm13
5580	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5581	vpxor	ymm7,ymm7,ymm11
5582	vpxor	ymm6,ymm6,ymm10
5583	vpxor	ymm5,ymm5,ymm9
5584	vpxor	ymm4,ymm4,ymm8
5585	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5586	vpsrld	ymm8,ymm7,25
5587	vpslld	ymm7,ymm7,32-25
5588	vpxor	ymm7,ymm7,ymm8
5589	vpsrld	ymm8,ymm6,25
5590	vpslld	ymm6,ymm6,32-25
5591	vpxor	ymm6,ymm6,ymm8
5592	vpsrld	ymm8,ymm5,25
5593	vpslld	ymm5,ymm5,32-25
5594	vpxor	ymm5,ymm5,ymm8
5595	vpsrld	ymm8,ymm4,25
5596	vpslld	ymm4,ymm4,32-25
5597	vpxor	ymm4,ymm4,ymm8
5598	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
5599	vpalignr	ymm7,ymm7,ymm7,4
5600	vpalignr	ymm11,ymm11,ymm11,8
5601	vpalignr	ymm15,ymm15,ymm15,12
5602	vpalignr	ymm6,ymm6,ymm6,4
5603	vpalignr	ymm10,ymm10,ymm10,8
5604	vpalignr	ymm14,ymm14,ymm14,12
5605	vpalignr	ymm5,ymm5,ymm5,4
5606	vpalignr	ymm9,ymm9,ymm9,8
5607	vpalignr	ymm13,ymm13,ymm13,12
5608	vpalignr	ymm4,ymm4,ymm4,4
5609	vpalignr	ymm8,ymm8,ymm8,8
5610	vpalignr	ymm12,ymm12,ymm12,12
5611	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5612	vmovdqa	ymm8,YMMWORD[$L$rol16]
5613	vpaddd	ymm3,ymm3,ymm7
5614	add	r10,QWORD[((0+16))+r8]
5615	adc	r11,QWORD[((8+16))+r8]
5616	adc	r12,1
5617	mov	rdx,QWORD[((0+160+0))+rbp]
5618	mov	r15,rdx
5619	mulx	r14,r13,r10
5620	mulx	rdx,rax,r11
5621	imul	r15,r12
5622	add	r14,rax
5623	adc	r15,rdx
5624	mov	rdx,QWORD[((8+160+0))+rbp]
5625	mulx	rax,r10,r10
5626	add	r14,r10
5627	mulx	r9,r11,r11
5628	adc	r15,r11
5629	adc	r9,0
5630	imul	rdx,r12
5631	add	r15,rax
5632	adc	r9,rdx
5633	mov	r10,r13
5634	mov	r11,r14
5635	mov	r12,r15
5636	and	r12,3
5637	mov	r13,r15
5638	and	r13,-4
5639	mov	r14,r9
5640	shrd	r15,r9,2
5641	shr	r9,2
5642	add	r15,r13
5643	adc	r9,r14
5644	add	r10,r15
5645	adc	r11,r9
5646	adc	r12,0
5647
5648	lea	r8,[32+r8]
5649	vpaddd	ymm2,ymm2,ymm6
5650	vpaddd	ymm1,ymm1,ymm5
5651	vpaddd	ymm0,ymm0,ymm4
5652	vpxor	ymm15,ymm15,ymm3
5653	vpxor	ymm14,ymm14,ymm2
5654	vpxor	ymm13,ymm13,ymm1
5655	vpxor	ymm12,ymm12,ymm0
5656	vpshufb	ymm15,ymm15,ymm8
5657	vpshufb	ymm14,ymm14,ymm8
5658	vpshufb	ymm13,ymm13,ymm8
5659	vpshufb	ymm12,ymm12,ymm8
5660	vpaddd	ymm11,ymm11,ymm15
5661	vpaddd	ymm10,ymm10,ymm14
5662	vpaddd	ymm9,ymm9,ymm13
5663	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5664	vpxor	ymm7,ymm7,ymm11
5665	vpxor	ymm6,ymm6,ymm10
5666	vpxor	ymm5,ymm5,ymm9
5667	vpxor	ymm4,ymm4,ymm8
5668	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5669	vpsrld	ymm8,ymm7,20
5670	vpslld	ymm7,ymm7,32-20
5671	vpxor	ymm7,ymm7,ymm8
5672	vpsrld	ymm8,ymm6,20
5673	vpslld	ymm6,ymm6,32-20
5674	vpxor	ymm6,ymm6,ymm8
5675	vpsrld	ymm8,ymm5,20
5676	vpslld	ymm5,ymm5,32-20
5677	vpxor	ymm5,ymm5,ymm8
5678	vpsrld	ymm8,ymm4,20
5679	vpslld	ymm4,ymm4,32-20
5680	vpxor	ymm4,ymm4,ymm8
5681	vmovdqa	ymm8,YMMWORD[$L$rol8]
5682	vpaddd	ymm3,ymm3,ymm7
5683	vpaddd	ymm2,ymm2,ymm6
5684	vpaddd	ymm1,ymm1,ymm5
5685	vpaddd	ymm0,ymm0,ymm4
5686	vpxor	ymm15,ymm15,ymm3
5687	vpxor	ymm14,ymm14,ymm2
5688	vpxor	ymm13,ymm13,ymm1
5689	vpxor	ymm12,ymm12,ymm0
5690	vpshufb	ymm15,ymm15,ymm8
5691	vpshufb	ymm14,ymm14,ymm8
5692	vpshufb	ymm13,ymm13,ymm8
5693	vpshufb	ymm12,ymm12,ymm8
5694	vpaddd	ymm11,ymm11,ymm15
5695	vpaddd	ymm10,ymm10,ymm14
5696	vpaddd	ymm9,ymm9,ymm13
5697	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5698	vpxor	ymm7,ymm7,ymm11
5699	vpxor	ymm6,ymm6,ymm10
5700	vpxor	ymm5,ymm5,ymm9
5701	vpxor	ymm4,ymm4,ymm8
5702	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5703	vpsrld	ymm8,ymm7,25
5704	vpslld	ymm7,ymm7,32-25
5705	vpxor	ymm7,ymm7,ymm8
5706	vpsrld	ymm8,ymm6,25
5707	vpslld	ymm6,ymm6,32-25
5708	vpxor	ymm6,ymm6,ymm8
5709	vpsrld	ymm8,ymm5,25
5710	vpslld	ymm5,ymm5,32-25
5711	vpxor	ymm5,ymm5,ymm8
5712	vpsrld	ymm8,ymm4,25
5713	vpslld	ymm4,ymm4,32-25
5714	vpxor	ymm4,ymm4,ymm8
5715	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
5716	vpalignr	ymm7,ymm7,ymm7,12
5717	vpalignr	ymm11,ymm11,ymm11,8
5718	vpalignr	ymm15,ymm15,ymm15,4
5719	vpalignr	ymm6,ymm6,ymm6,12
5720	vpalignr	ymm10,ymm10,ymm10,8
5721	vpalignr	ymm14,ymm14,ymm14,4
5722	vpalignr	ymm5,ymm5,ymm5,12
5723	vpalignr	ymm9,ymm9,ymm9,8
5724	vpalignr	ymm13,ymm13,ymm13,4
5725	vpalignr	ymm4,ymm4,ymm4,12
5726	vpalignr	ymm8,ymm8,ymm8,8
5727	vpalignr	ymm12,ymm12,ymm12,4
5728
5729	inc	rcx
5730	cmp	rcx,4
5731	jl	NEAR $L$open_avx2_tail_512_rounds_and_x2hash
5732	cmp	rcx,10
5733	jne	NEAR $L$open_avx2_tail_512_rounds_and_x1hash
5734	mov	rcx,rbx
5735	sub	rcx,12*32
5736	and	rcx,-16
5737$L$open_avx2_tail_512_hash:
5738	test	rcx,rcx
5739	je	NEAR $L$open_avx2_tail_512_done
5740	add	r10,QWORD[((0+0))+r8]
5741	adc	r11,QWORD[((8+0))+r8]
5742	adc	r12,1
5743	mov	rdx,QWORD[((0+160+0))+rbp]
5744	mov	r15,rdx
5745	mulx	r14,r13,r10
5746	mulx	rdx,rax,r11
5747	imul	r15,r12
5748	add	r14,rax
5749	adc	r15,rdx
5750	mov	rdx,QWORD[((8+160+0))+rbp]
5751	mulx	rax,r10,r10
5752	add	r14,r10
5753	mulx	r9,r11,r11
5754	adc	r15,r11
5755	adc	r9,0
5756	imul	rdx,r12
5757	add	r15,rax
5758	adc	r9,rdx
5759	mov	r10,r13
5760	mov	r11,r14
5761	mov	r12,r15
5762	and	r12,3
5763	mov	r13,r15
5764	and	r13,-4
5765	mov	r14,r9
5766	shrd	r15,r9,2
5767	shr	r9,2
5768	add	r15,r13
5769	adc	r9,r14
5770	add	r10,r15
5771	adc	r11,r9
5772	adc	r12,0
5773
5774	lea	r8,[16+r8]
5775	sub	rcx,2*8
5776	jmp	NEAR $L$open_avx2_tail_512_hash
5777$L$open_avx2_tail_512_done:
5778	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
5779	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
5780	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
5781	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
5782	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
5783	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
5784	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
5785	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
5786	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
5787	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
5788	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
5789	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
5790	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
5791	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
5792	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
5793	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
5794
5795	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
5796	vperm2i128	ymm0,ymm7,ymm3,0x02
5797	vperm2i128	ymm7,ymm7,ymm3,0x13
5798	vperm2i128	ymm3,ymm15,ymm11,0x02
5799	vperm2i128	ymm11,ymm15,ymm11,0x13
5800	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
5801	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
5802	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
5803	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
5804	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
5805	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
5806	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
5807	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
5808
5809	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
5810	vperm2i128	ymm3,ymm6,ymm2,0x02
5811	vperm2i128	ymm6,ymm6,ymm2,0x13
5812	vperm2i128	ymm2,ymm14,ymm10,0x02
5813	vperm2i128	ymm10,ymm14,ymm10,0x13
5814	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
5815	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
5816	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
5817	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
5818	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
5819	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
5820	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
5821	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
5822	vperm2i128	ymm3,ymm5,ymm1,0x02
5823	vperm2i128	ymm5,ymm5,ymm1,0x13
5824	vperm2i128	ymm1,ymm13,ymm9,0x02
5825	vperm2i128	ymm9,ymm13,ymm9,0x13
5826	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
5827	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
5828	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
5829	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
5830	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
5831	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
5832	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
5833	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
5834	vperm2i128	ymm3,ymm4,ymm0,0x13
5835	vperm2i128	ymm0,ymm4,ymm0,0x02
5836	vperm2i128	ymm4,ymm12,ymm8,0x02
5837	vperm2i128	ymm12,ymm12,ymm8,0x13
5838	vmovdqa	ymm8,ymm3
5839
5840	lea	rsi,[384+rsi]
5841	lea	rdi,[384+rdi]
5842	sub	rbx,12*32
5843$L$open_avx2_tail_128_xor:
5844	cmp	rbx,32
5845	jb	NEAR $L$open_avx2_tail_32_xor
5846	sub	rbx,32
5847	vpxor	ymm0,ymm0,YMMWORD[rsi]
5848	vmovdqu	YMMWORD[rdi],ymm0
5849	lea	rsi,[32+rsi]
5850	lea	rdi,[32+rdi]
5851	vmovdqa	ymm0,ymm4
5852	vmovdqa	ymm4,ymm8
5853	vmovdqa	ymm8,ymm12
5854	jmp	NEAR $L$open_avx2_tail_128_xor
5855$L$open_avx2_tail_32_xor:
5856	cmp	rbx,16
5857	vmovdqa	xmm1,xmm0
5858	jb	NEAR $L$open_avx2_exit
5859	sub	rbx,16
5860
5861	vpxor	xmm1,xmm0,XMMWORD[rsi]
5862	vmovdqu	XMMWORD[rdi],xmm1
5863	lea	rsi,[16+rsi]
5864	lea	rdi,[16+rdi]
5865	vperm2i128	ymm0,ymm0,ymm0,0x11
5866	vmovdqa	xmm1,xmm0
5867$L$open_avx2_exit:
5868	vzeroupper
5869	jmp	NEAR $L$open_sse_tail_16
5870
5871$L$open_avx2_192:
5872	vmovdqa	ymm1,ymm0
5873	vmovdqa	ymm2,ymm0
5874	vmovdqa	ymm5,ymm4
5875	vmovdqa	ymm6,ymm4
5876	vmovdqa	ymm9,ymm8
5877	vmovdqa	ymm10,ymm8
5878	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
5879	vmovdqa	ymm11,ymm12
5880	vmovdqa	ymm15,ymm13
5881	mov	r10,10
5882$L$open_avx2_192_rounds:
5883	vpaddd	ymm0,ymm0,ymm4
5884	vpxor	ymm12,ymm12,ymm0
5885	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5886	vpaddd	ymm8,ymm8,ymm12
5887	vpxor	ymm4,ymm4,ymm8
5888	vpsrld	ymm3,ymm4,20
5889	vpslld	ymm4,ymm4,12
5890	vpxor	ymm4,ymm4,ymm3
5891	vpaddd	ymm0,ymm0,ymm4
5892	vpxor	ymm12,ymm12,ymm0
5893	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5894	vpaddd	ymm8,ymm8,ymm12
5895	vpxor	ymm4,ymm4,ymm8
5896	vpslld	ymm3,ymm4,7
5897	vpsrld	ymm4,ymm4,25
5898	vpxor	ymm4,ymm4,ymm3
5899	vpalignr	ymm12,ymm12,ymm12,12
5900	vpalignr	ymm8,ymm8,ymm8,8
5901	vpalignr	ymm4,ymm4,ymm4,4
5902	vpaddd	ymm1,ymm1,ymm5
5903	vpxor	ymm13,ymm13,ymm1
5904	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5905	vpaddd	ymm9,ymm9,ymm13
5906	vpxor	ymm5,ymm5,ymm9
5907	vpsrld	ymm3,ymm5,20
5908	vpslld	ymm5,ymm5,12
5909	vpxor	ymm5,ymm5,ymm3
5910	vpaddd	ymm1,ymm1,ymm5
5911	vpxor	ymm13,ymm13,ymm1
5912	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5913	vpaddd	ymm9,ymm9,ymm13
5914	vpxor	ymm5,ymm5,ymm9
5915	vpslld	ymm3,ymm5,7
5916	vpsrld	ymm5,ymm5,25
5917	vpxor	ymm5,ymm5,ymm3
5918	vpalignr	ymm13,ymm13,ymm13,12
5919	vpalignr	ymm9,ymm9,ymm9,8
5920	vpalignr	ymm5,ymm5,ymm5,4
5921	vpaddd	ymm0,ymm0,ymm4
5922	vpxor	ymm12,ymm12,ymm0
5923	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5924	vpaddd	ymm8,ymm8,ymm12
5925	vpxor	ymm4,ymm4,ymm8
5926	vpsrld	ymm3,ymm4,20
5927	vpslld	ymm4,ymm4,12
5928	vpxor	ymm4,ymm4,ymm3
5929	vpaddd	ymm0,ymm0,ymm4
5930	vpxor	ymm12,ymm12,ymm0
5931	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5932	vpaddd	ymm8,ymm8,ymm12
5933	vpxor	ymm4,ymm4,ymm8
5934	vpslld	ymm3,ymm4,7
5935	vpsrld	ymm4,ymm4,25
5936	vpxor	ymm4,ymm4,ymm3
5937	vpalignr	ymm12,ymm12,ymm12,4
5938	vpalignr	ymm8,ymm8,ymm8,8
5939	vpalignr	ymm4,ymm4,ymm4,12
5940	vpaddd	ymm1,ymm1,ymm5
5941	vpxor	ymm13,ymm13,ymm1
5942	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5943	vpaddd	ymm9,ymm9,ymm13
5944	vpxor	ymm5,ymm5,ymm9
5945	vpsrld	ymm3,ymm5,20
5946	vpslld	ymm5,ymm5,12
5947	vpxor	ymm5,ymm5,ymm3
5948	vpaddd	ymm1,ymm1,ymm5
5949	vpxor	ymm13,ymm13,ymm1
5950	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5951	vpaddd	ymm9,ymm9,ymm13
5952	vpxor	ymm5,ymm5,ymm9
5953	vpslld	ymm3,ymm5,7
5954	vpsrld	ymm5,ymm5,25
5955	vpxor	ymm5,ymm5,ymm3
5956	vpalignr	ymm13,ymm13,ymm13,4
5957	vpalignr	ymm9,ymm9,ymm9,8
5958	vpalignr	ymm5,ymm5,ymm5,12
5959
5960	dec	r10
5961	jne	NEAR $L$open_avx2_192_rounds
5962	vpaddd	ymm0,ymm0,ymm2
5963	vpaddd	ymm1,ymm1,ymm2
5964	vpaddd	ymm4,ymm4,ymm6
5965	vpaddd	ymm5,ymm5,ymm6
5966	vpaddd	ymm8,ymm8,ymm10
5967	vpaddd	ymm9,ymm9,ymm10
5968	vpaddd	ymm12,ymm12,ymm11
5969	vpaddd	ymm13,ymm13,ymm15
5970	vperm2i128	ymm3,ymm4,ymm0,0x02
5971
5972	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
5973	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
5974
5975	vperm2i128	ymm0,ymm4,ymm0,0x13
5976	vperm2i128	ymm4,ymm12,ymm8,0x13
5977	vperm2i128	ymm8,ymm5,ymm1,0x02
5978	vperm2i128	ymm12,ymm13,ymm9,0x02
5979	vperm2i128	ymm1,ymm5,ymm1,0x13
5980	vperm2i128	ymm5,ymm13,ymm9,0x13
5981$L$open_avx2_short:
5982	mov	r8,r8
5983	call	poly_hash_ad_internal
5984$L$open_avx2_short_hash_and_xor_loop:
5985	cmp	rbx,32
5986	jb	NEAR $L$open_avx2_short_tail_32
5987	sub	rbx,32
5988	add	r10,QWORD[((0+0))+rsi]
5989	adc	r11,QWORD[((8+0))+rsi]
5990	adc	r12,1
5991	mov	rax,QWORD[((0+160+0))+rbp]
5992	mov	r15,rax
5993	mul	r10
5994	mov	r13,rax
5995	mov	r14,rdx
5996	mov	rax,QWORD[((0+160+0))+rbp]
5997	mul	r11
5998	imul	r15,r12
5999	add	r14,rax
6000	adc	r15,rdx
6001	mov	rax,QWORD[((8+160+0))+rbp]
6002	mov	r9,rax
6003	mul	r10
6004	add	r14,rax
6005	adc	rdx,0
6006	mov	r10,rdx
6007	mov	rax,QWORD[((8+160+0))+rbp]
6008	mul	r11
6009	add	r15,rax
6010	adc	rdx,0
6011	imul	r9,r12
6012	add	r15,r10
6013	adc	r9,rdx
6014	mov	r10,r13
6015	mov	r11,r14
6016	mov	r12,r15
6017	and	r12,3
6018	mov	r13,r15
6019	and	r13,-4
6020	mov	r14,r9
6021	shrd	r15,r9,2
6022	shr	r9,2
6023	add	r15,r13
6024	adc	r9,r14
6025	add	r10,r15
6026	adc	r11,r9
6027	adc	r12,0
6028	add	r10,QWORD[((0+16))+rsi]
6029	adc	r11,QWORD[((8+16))+rsi]
6030	adc	r12,1
6031	mov	rax,QWORD[((0+160+0))+rbp]
6032	mov	r15,rax
6033	mul	r10
6034	mov	r13,rax
6035	mov	r14,rdx
6036	mov	rax,QWORD[((0+160+0))+rbp]
6037	mul	r11
6038	imul	r15,r12
6039	add	r14,rax
6040	adc	r15,rdx
6041	mov	rax,QWORD[((8+160+0))+rbp]
6042	mov	r9,rax
6043	mul	r10
6044	add	r14,rax
6045	adc	rdx,0
6046	mov	r10,rdx
6047	mov	rax,QWORD[((8+160+0))+rbp]
6048	mul	r11
6049	add	r15,rax
6050	adc	rdx,0
6051	imul	r9,r12
6052	add	r15,r10
6053	adc	r9,rdx
6054	mov	r10,r13
6055	mov	r11,r14
6056	mov	r12,r15
6057	and	r12,3
6058	mov	r13,r15
6059	and	r13,-4
6060	mov	r14,r9
6061	shrd	r15,r9,2
6062	shr	r9,2
6063	add	r15,r13
6064	adc	r9,r14
6065	add	r10,r15
6066	adc	r11,r9
6067	adc	r12,0
6068
6069
6070	vpxor	ymm0,ymm0,YMMWORD[rsi]
6071	vmovdqu	YMMWORD[rdi],ymm0
6072	lea	rsi,[32+rsi]
6073	lea	rdi,[32+rdi]
6074
6075	vmovdqa	ymm0,ymm4
6076	vmovdqa	ymm4,ymm8
6077	vmovdqa	ymm8,ymm12
6078	vmovdqa	ymm12,ymm1
6079	vmovdqa	ymm1,ymm5
6080	vmovdqa	ymm5,ymm9
6081	vmovdqa	ymm9,ymm13
6082	vmovdqa	ymm13,ymm2
6083	vmovdqa	ymm2,ymm6
6084	jmp	NEAR $L$open_avx2_short_hash_and_xor_loop
6085$L$open_avx2_short_tail_32:
6086	cmp	rbx,16
6087	vmovdqa	xmm1,xmm0
6088	jb	NEAR $L$open_avx2_short_tail_32_exit
6089	sub	rbx,16
6090	add	r10,QWORD[((0+0))+rsi]
6091	adc	r11,QWORD[((8+0))+rsi]
6092	adc	r12,1
6093	mov	rax,QWORD[((0+160+0))+rbp]
6094	mov	r15,rax
6095	mul	r10
6096	mov	r13,rax
6097	mov	r14,rdx
6098	mov	rax,QWORD[((0+160+0))+rbp]
6099	mul	r11
6100	imul	r15,r12
6101	add	r14,rax
6102	adc	r15,rdx
6103	mov	rax,QWORD[((8+160+0))+rbp]
6104	mov	r9,rax
6105	mul	r10
6106	add	r14,rax
6107	adc	rdx,0
6108	mov	r10,rdx
6109	mov	rax,QWORD[((8+160+0))+rbp]
6110	mul	r11
6111	add	r15,rax
6112	adc	rdx,0
6113	imul	r9,r12
6114	add	r15,r10
6115	adc	r9,rdx
6116	mov	r10,r13
6117	mov	r11,r14
6118	mov	r12,r15
6119	and	r12,3
6120	mov	r13,r15
6121	and	r13,-4
6122	mov	r14,r9
6123	shrd	r15,r9,2
6124	shr	r9,2
6125	add	r15,r13
6126	adc	r9,r14
6127	add	r10,r15
6128	adc	r11,r9
6129	adc	r12,0
6130
6131	vpxor	xmm3,xmm0,XMMWORD[rsi]
6132	vmovdqu	XMMWORD[rdi],xmm3
6133	lea	rsi,[16+rsi]
6134	lea	rdi,[16+rdi]
6135	vextracti128	xmm1,ymm0,1
6136$L$open_avx2_short_tail_32_exit:
6137	vzeroupper
6138	jmp	NEAR $L$open_sse_tail_16
6139
6140$L$open_avx2_320:
6141	vmovdqa	ymm1,ymm0
6142	vmovdqa	ymm2,ymm0
6143	vmovdqa	ymm5,ymm4
6144	vmovdqa	ymm6,ymm4
6145	vmovdqa	ymm9,ymm8
6146	vmovdqa	ymm10,ymm8
6147	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
6148	vpaddd	ymm14,ymm13,YMMWORD[$L$avx2_inc]
6149	vmovdqa	ymm7,ymm4
6150	vmovdqa	ymm11,ymm8
6151	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6152	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6153	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6154	mov	r10,10
6155$L$open_avx2_320_rounds:
6156	vpaddd	ymm0,ymm0,ymm4
6157	vpxor	ymm12,ymm12,ymm0
6158	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
6159	vpaddd	ymm8,ymm8,ymm12
6160	vpxor	ymm4,ymm4,ymm8
6161	vpsrld	ymm3,ymm4,20
6162	vpslld	ymm4,ymm4,12
6163	vpxor	ymm4,ymm4,ymm3
6164	vpaddd	ymm0,ymm0,ymm4
6165	vpxor	ymm12,ymm12,ymm0
6166	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
6167	vpaddd	ymm8,ymm8,ymm12
6168	vpxor	ymm4,ymm4,ymm8
6169	vpslld	ymm3,ymm4,7
6170	vpsrld	ymm4,ymm4,25
6171	vpxor	ymm4,ymm4,ymm3
6172	vpalignr	ymm12,ymm12,ymm12,12
6173	vpalignr	ymm8,ymm8,ymm8,8
6174	vpalignr	ymm4,ymm4,ymm4,4
6175	vpaddd	ymm1,ymm1,ymm5
6176	vpxor	ymm13,ymm13,ymm1
6177	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
6178	vpaddd	ymm9,ymm9,ymm13
6179	vpxor	ymm5,ymm5,ymm9
6180	vpsrld	ymm3,ymm5,20
6181	vpslld	ymm5,ymm5,12
6182	vpxor	ymm5,ymm5,ymm3
6183	vpaddd	ymm1,ymm1,ymm5
6184	vpxor	ymm13,ymm13,ymm1
6185	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
6186	vpaddd	ymm9,ymm9,ymm13
6187	vpxor	ymm5,ymm5,ymm9
6188	vpslld	ymm3,ymm5,7
6189	vpsrld	ymm5,ymm5,25
6190	vpxor	ymm5,ymm5,ymm3
6191	vpalignr	ymm13,ymm13,ymm13,12
6192	vpalignr	ymm9,ymm9,ymm9,8
6193	vpalignr	ymm5,ymm5,ymm5,4
6194	vpaddd	ymm2,ymm2,ymm6
6195	vpxor	ymm14,ymm14,ymm2
6196	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
6197	vpaddd	ymm10,ymm10,ymm14
6198	vpxor	ymm6,ymm6,ymm10
6199	vpsrld	ymm3,ymm6,20
6200	vpslld	ymm6,ymm6,12
6201	vpxor	ymm6,ymm6,ymm3
6202	vpaddd	ymm2,ymm2,ymm6
6203	vpxor	ymm14,ymm14,ymm2
6204	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
6205	vpaddd	ymm10,ymm10,ymm14
6206	vpxor	ymm6,ymm6,ymm10
6207	vpslld	ymm3,ymm6,7
6208	vpsrld	ymm6,ymm6,25
6209	vpxor	ymm6,ymm6,ymm3
6210	vpalignr	ymm14,ymm14,ymm14,12
6211	vpalignr	ymm10,ymm10,ymm10,8
6212	vpalignr	ymm6,ymm6,ymm6,4
6213	vpaddd	ymm0,ymm0,ymm4
6214	vpxor	ymm12,ymm12,ymm0
6215	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
6216	vpaddd	ymm8,ymm8,ymm12
6217	vpxor	ymm4,ymm4,ymm8
6218	vpsrld	ymm3,ymm4,20
6219	vpslld	ymm4,ymm4,12
6220	vpxor	ymm4,ymm4,ymm3
6221	vpaddd	ymm0,ymm0,ymm4
6222	vpxor	ymm12,ymm12,ymm0
6223	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
6224	vpaddd	ymm8,ymm8,ymm12
6225	vpxor	ymm4,ymm4,ymm8
6226	vpslld	ymm3,ymm4,7
6227	vpsrld	ymm4,ymm4,25
6228	vpxor	ymm4,ymm4,ymm3
6229	vpalignr	ymm12,ymm12,ymm12,4
6230	vpalignr	ymm8,ymm8,ymm8,8
6231	vpalignr	ymm4,ymm4,ymm4,12
6232	vpaddd	ymm1,ymm1,ymm5
6233	vpxor	ymm13,ymm13,ymm1
6234	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
6235	vpaddd	ymm9,ymm9,ymm13
6236	vpxor	ymm5,ymm5,ymm9
6237	vpsrld	ymm3,ymm5,20
6238	vpslld	ymm5,ymm5,12
6239	vpxor	ymm5,ymm5,ymm3
6240	vpaddd	ymm1,ymm1,ymm5
6241	vpxor	ymm13,ymm13,ymm1
6242	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
6243	vpaddd	ymm9,ymm9,ymm13
6244	vpxor	ymm5,ymm5,ymm9
6245	vpslld	ymm3,ymm5,7
6246	vpsrld	ymm5,ymm5,25
6247	vpxor	ymm5,ymm5,ymm3
6248	vpalignr	ymm13,ymm13,ymm13,4
6249	vpalignr	ymm9,ymm9,ymm9,8
6250	vpalignr	ymm5,ymm5,ymm5,12
6251	vpaddd	ymm2,ymm2,ymm6
6252	vpxor	ymm14,ymm14,ymm2
6253	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
6254	vpaddd	ymm10,ymm10,ymm14
6255	vpxor	ymm6,ymm6,ymm10
6256	vpsrld	ymm3,ymm6,20
6257	vpslld	ymm6,ymm6,12
6258	vpxor	ymm6,ymm6,ymm3
6259	vpaddd	ymm2,ymm2,ymm6
6260	vpxor	ymm14,ymm14,ymm2
6261	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
6262	vpaddd	ymm10,ymm10,ymm14
6263	vpxor	ymm6,ymm6,ymm10
6264	vpslld	ymm3,ymm6,7
6265	vpsrld	ymm6,ymm6,25
6266	vpxor	ymm6,ymm6,ymm3
6267	vpalignr	ymm14,ymm14,ymm14,4
6268	vpalignr	ymm10,ymm10,ymm10,8
6269	vpalignr	ymm6,ymm6,ymm6,12
6270
6271	dec	r10
6272	jne	NEAR $L$open_avx2_320_rounds
6273	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
6274	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
6275	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
6276	vpaddd	ymm4,ymm4,ymm7
6277	vpaddd	ymm5,ymm5,ymm7
6278	vpaddd	ymm6,ymm6,ymm7
6279	vpaddd	ymm8,ymm8,ymm11
6280	vpaddd	ymm9,ymm9,ymm11
6281	vpaddd	ymm10,ymm10,ymm11
6282	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
6283	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
6284	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
6285	vperm2i128	ymm3,ymm4,ymm0,0x02
6286
6287	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
6288	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
6289
6290	vperm2i128	ymm0,ymm4,ymm0,0x13
6291	vperm2i128	ymm4,ymm12,ymm8,0x13
6292	vperm2i128	ymm8,ymm5,ymm1,0x02
6293	vperm2i128	ymm12,ymm13,ymm9,0x02
6294	vperm2i128	ymm1,ymm5,ymm1,0x13
6295	vperm2i128	ymm5,ymm13,ymm9,0x13
6296	vperm2i128	ymm9,ymm6,ymm2,0x02
6297	vperm2i128	ymm13,ymm14,ymm10,0x02
6298	vperm2i128	ymm2,ymm6,ymm2,0x13
6299	vperm2i128	ymm6,ymm14,ymm10,0x13
6300	jmp	NEAR $L$open_avx2_short
6301
6302
6303
6304
6305
6306ALIGN	64
6307chacha20_poly1305_seal_avx2:
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320	vzeroupper
6321	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
6322	vbroadcasti128	ymm4,XMMWORD[r9]
6323	vbroadcasti128	ymm8,XMMWORD[16+r9]
6324	vbroadcasti128	ymm12,XMMWORD[32+r9]
6325	vpaddd	ymm12,ymm12,YMMWORD[$L$avx2_init]
6326	cmp	rbx,6*32
6327	jbe	NEAR $L$seal_avx2_192
6328	cmp	rbx,10*32
6329	jbe	NEAR $L$seal_avx2_320
6330	vmovdqa	ymm1,ymm0
6331	vmovdqa	ymm2,ymm0
6332	vmovdqa	ymm3,ymm0
6333	vmovdqa	ymm5,ymm4
6334	vmovdqa	ymm6,ymm4
6335	vmovdqa	ymm7,ymm4
6336	vmovdqa	YMMWORD[(160+64)+rbp],ymm4
6337	vmovdqa	ymm9,ymm8
6338	vmovdqa	ymm10,ymm8
6339	vmovdqa	ymm11,ymm8
6340	vmovdqa	YMMWORD[(160+96)+rbp],ymm8
6341	vmovdqa	ymm15,ymm12
6342	vpaddd	ymm14,ymm15,YMMWORD[$L$avx2_inc]
6343	vpaddd	ymm13,ymm14,YMMWORD[$L$avx2_inc]
6344	vpaddd	ymm12,ymm13,YMMWORD[$L$avx2_inc]
6345	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6346	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6347	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6348	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
6349	mov	r10,10
6350$L$seal_avx2_init_rounds:
6351	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6352	vmovdqa	ymm8,YMMWORD[$L$rol16]
6353	vpaddd	ymm3,ymm3,ymm7
6354	vpaddd	ymm2,ymm2,ymm6
6355	vpaddd	ymm1,ymm1,ymm5
6356	vpaddd	ymm0,ymm0,ymm4
6357	vpxor	ymm15,ymm15,ymm3
6358	vpxor	ymm14,ymm14,ymm2
6359	vpxor	ymm13,ymm13,ymm1
6360	vpxor	ymm12,ymm12,ymm0
6361	vpshufb	ymm15,ymm15,ymm8
6362	vpshufb	ymm14,ymm14,ymm8
6363	vpshufb	ymm13,ymm13,ymm8
6364	vpshufb	ymm12,ymm12,ymm8
6365	vpaddd	ymm11,ymm11,ymm15
6366	vpaddd	ymm10,ymm10,ymm14
6367	vpaddd	ymm9,ymm9,ymm13
6368	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6369	vpxor	ymm7,ymm7,ymm11
6370	vpxor	ymm6,ymm6,ymm10
6371	vpxor	ymm5,ymm5,ymm9
6372	vpxor	ymm4,ymm4,ymm8
6373	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6374	vpsrld	ymm8,ymm7,20
6375	vpslld	ymm7,ymm7,32-20
6376	vpxor	ymm7,ymm7,ymm8
6377	vpsrld	ymm8,ymm6,20
6378	vpslld	ymm6,ymm6,32-20
6379	vpxor	ymm6,ymm6,ymm8
6380	vpsrld	ymm8,ymm5,20
6381	vpslld	ymm5,ymm5,32-20
6382	vpxor	ymm5,ymm5,ymm8
6383	vpsrld	ymm8,ymm4,20
6384	vpslld	ymm4,ymm4,32-20
6385	vpxor	ymm4,ymm4,ymm8
6386	vmovdqa	ymm8,YMMWORD[$L$rol8]
6387	vpaddd	ymm3,ymm3,ymm7
6388	vpaddd	ymm2,ymm2,ymm6
6389	vpaddd	ymm1,ymm1,ymm5
6390	vpaddd	ymm0,ymm0,ymm4
6391	vpxor	ymm15,ymm15,ymm3
6392	vpxor	ymm14,ymm14,ymm2
6393	vpxor	ymm13,ymm13,ymm1
6394	vpxor	ymm12,ymm12,ymm0
6395	vpshufb	ymm15,ymm15,ymm8
6396	vpshufb	ymm14,ymm14,ymm8
6397	vpshufb	ymm13,ymm13,ymm8
6398	vpshufb	ymm12,ymm12,ymm8
6399	vpaddd	ymm11,ymm11,ymm15
6400	vpaddd	ymm10,ymm10,ymm14
6401	vpaddd	ymm9,ymm9,ymm13
6402	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6403	vpxor	ymm7,ymm7,ymm11
6404	vpxor	ymm6,ymm6,ymm10
6405	vpxor	ymm5,ymm5,ymm9
6406	vpxor	ymm4,ymm4,ymm8
6407	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6408	vpsrld	ymm8,ymm7,25
6409	vpslld	ymm7,ymm7,32-25
6410	vpxor	ymm7,ymm7,ymm8
6411	vpsrld	ymm8,ymm6,25
6412	vpslld	ymm6,ymm6,32-25
6413	vpxor	ymm6,ymm6,ymm8
6414	vpsrld	ymm8,ymm5,25
6415	vpslld	ymm5,ymm5,32-25
6416	vpxor	ymm5,ymm5,ymm8
6417	vpsrld	ymm8,ymm4,25
6418	vpslld	ymm4,ymm4,32-25
6419	vpxor	ymm4,ymm4,ymm8
6420	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6421	vpalignr	ymm7,ymm7,ymm7,4
6422	vpalignr	ymm11,ymm11,ymm11,8
6423	vpalignr	ymm15,ymm15,ymm15,12
6424	vpalignr	ymm6,ymm6,ymm6,4
6425	vpalignr	ymm10,ymm10,ymm10,8
6426	vpalignr	ymm14,ymm14,ymm14,12
6427	vpalignr	ymm5,ymm5,ymm5,4
6428	vpalignr	ymm9,ymm9,ymm9,8
6429	vpalignr	ymm13,ymm13,ymm13,12
6430	vpalignr	ymm4,ymm4,ymm4,4
6431	vpalignr	ymm8,ymm8,ymm8,8
6432	vpalignr	ymm12,ymm12,ymm12,12
6433	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6434	vmovdqa	ymm8,YMMWORD[$L$rol16]
6435	vpaddd	ymm3,ymm3,ymm7
6436	vpaddd	ymm2,ymm2,ymm6
6437	vpaddd	ymm1,ymm1,ymm5
6438	vpaddd	ymm0,ymm0,ymm4
6439	vpxor	ymm15,ymm15,ymm3
6440	vpxor	ymm14,ymm14,ymm2
6441	vpxor	ymm13,ymm13,ymm1
6442	vpxor	ymm12,ymm12,ymm0
6443	vpshufb	ymm15,ymm15,ymm8
6444	vpshufb	ymm14,ymm14,ymm8
6445	vpshufb	ymm13,ymm13,ymm8
6446	vpshufb	ymm12,ymm12,ymm8
6447	vpaddd	ymm11,ymm11,ymm15
6448	vpaddd	ymm10,ymm10,ymm14
6449	vpaddd	ymm9,ymm9,ymm13
6450	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6451	vpxor	ymm7,ymm7,ymm11
6452	vpxor	ymm6,ymm6,ymm10
6453	vpxor	ymm5,ymm5,ymm9
6454	vpxor	ymm4,ymm4,ymm8
6455	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6456	vpsrld	ymm8,ymm7,20
6457	vpslld	ymm7,ymm7,32-20
6458	vpxor	ymm7,ymm7,ymm8
6459	vpsrld	ymm8,ymm6,20
6460	vpslld	ymm6,ymm6,32-20
6461	vpxor	ymm6,ymm6,ymm8
6462	vpsrld	ymm8,ymm5,20
6463	vpslld	ymm5,ymm5,32-20
6464	vpxor	ymm5,ymm5,ymm8
6465	vpsrld	ymm8,ymm4,20
6466	vpslld	ymm4,ymm4,32-20
6467	vpxor	ymm4,ymm4,ymm8
6468	vmovdqa	ymm8,YMMWORD[$L$rol8]
6469	vpaddd	ymm3,ymm3,ymm7
6470	vpaddd	ymm2,ymm2,ymm6
6471	vpaddd	ymm1,ymm1,ymm5
6472	vpaddd	ymm0,ymm0,ymm4
6473	vpxor	ymm15,ymm15,ymm3
6474	vpxor	ymm14,ymm14,ymm2
6475	vpxor	ymm13,ymm13,ymm1
6476	vpxor	ymm12,ymm12,ymm0
6477	vpshufb	ymm15,ymm15,ymm8
6478	vpshufb	ymm14,ymm14,ymm8
6479	vpshufb	ymm13,ymm13,ymm8
6480	vpshufb	ymm12,ymm12,ymm8
6481	vpaddd	ymm11,ymm11,ymm15
6482	vpaddd	ymm10,ymm10,ymm14
6483	vpaddd	ymm9,ymm9,ymm13
6484	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6485	vpxor	ymm7,ymm7,ymm11
6486	vpxor	ymm6,ymm6,ymm10
6487	vpxor	ymm5,ymm5,ymm9
6488	vpxor	ymm4,ymm4,ymm8
6489	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6490	vpsrld	ymm8,ymm7,25
6491	vpslld	ymm7,ymm7,32-25
6492	vpxor	ymm7,ymm7,ymm8
6493	vpsrld	ymm8,ymm6,25
6494	vpslld	ymm6,ymm6,32-25
6495	vpxor	ymm6,ymm6,ymm8
6496	vpsrld	ymm8,ymm5,25
6497	vpslld	ymm5,ymm5,32-25
6498	vpxor	ymm5,ymm5,ymm8
6499	vpsrld	ymm8,ymm4,25
6500	vpslld	ymm4,ymm4,32-25
6501	vpxor	ymm4,ymm4,ymm8
6502	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6503	vpalignr	ymm7,ymm7,ymm7,12
6504	vpalignr	ymm11,ymm11,ymm11,8
6505	vpalignr	ymm15,ymm15,ymm15,4
6506	vpalignr	ymm6,ymm6,ymm6,12
6507	vpalignr	ymm10,ymm10,ymm10,8
6508	vpalignr	ymm14,ymm14,ymm14,4
6509	vpalignr	ymm5,ymm5,ymm5,12
6510	vpalignr	ymm9,ymm9,ymm9,8
6511	vpalignr	ymm13,ymm13,ymm13,4
6512	vpalignr	ymm4,ymm4,ymm4,12
6513	vpalignr	ymm8,ymm8,ymm8,8
6514	vpalignr	ymm12,ymm12,ymm12,4
6515
6516	dec	r10
6517	jnz	NEAR $L$seal_avx2_init_rounds
6518	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
6519	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
6520	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
6521	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
6522	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
6523	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
6524	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
6525	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
6526	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
6527	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
6528	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
6529	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
6530	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
6531	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
6532	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
6533	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
6534
6535	vperm2i128	ymm11,ymm15,ymm11,0x13
6536	vperm2i128	ymm15,ymm7,ymm3,0x02
6537	vperm2i128	ymm3,ymm7,ymm3,0x13
6538	vpand	ymm15,ymm15,YMMWORD[$L$clamp]
6539	vmovdqa	YMMWORD[(160+0)+rbp],ymm15
6540	mov	r8,r8
6541	call	poly_hash_ad_internal
6542
6543	vpxor	ymm3,ymm3,YMMWORD[rsi]
6544	vpxor	ymm11,ymm11,YMMWORD[32+rsi]
6545	vmovdqu	YMMWORD[rdi],ymm3
6546	vmovdqu	YMMWORD[32+rdi],ymm11
6547	vperm2i128	ymm15,ymm6,ymm2,0x02
6548	vperm2i128	ymm6,ymm6,ymm2,0x13
6549	vperm2i128	ymm2,ymm14,ymm10,0x02
6550	vperm2i128	ymm10,ymm14,ymm10,0x13
6551	vpxor	ymm15,ymm15,YMMWORD[((0+64))+rsi]
6552	vpxor	ymm2,ymm2,YMMWORD[((32+64))+rsi]
6553	vpxor	ymm6,ymm6,YMMWORD[((64+64))+rsi]
6554	vpxor	ymm10,ymm10,YMMWORD[((96+64))+rsi]
6555	vmovdqu	YMMWORD[(0+64)+rdi],ymm15
6556	vmovdqu	YMMWORD[(32+64)+rdi],ymm2
6557	vmovdqu	YMMWORD[(64+64)+rdi],ymm6
6558	vmovdqu	YMMWORD[(96+64)+rdi],ymm10
6559	vperm2i128	ymm15,ymm5,ymm1,0x02
6560	vperm2i128	ymm5,ymm5,ymm1,0x13
6561	vperm2i128	ymm1,ymm13,ymm9,0x02
6562	vperm2i128	ymm9,ymm13,ymm9,0x13
6563	vpxor	ymm15,ymm15,YMMWORD[((0+192))+rsi]
6564	vpxor	ymm1,ymm1,YMMWORD[((32+192))+rsi]
6565	vpxor	ymm5,ymm5,YMMWORD[((64+192))+rsi]
6566	vpxor	ymm9,ymm9,YMMWORD[((96+192))+rsi]
6567	vmovdqu	YMMWORD[(0+192)+rdi],ymm15
6568	vmovdqu	YMMWORD[(32+192)+rdi],ymm1
6569	vmovdqu	YMMWORD[(64+192)+rdi],ymm5
6570	vmovdqu	YMMWORD[(96+192)+rdi],ymm9
6571	vperm2i128	ymm15,ymm4,ymm0,0x13
6572	vperm2i128	ymm0,ymm4,ymm0,0x02
6573	vperm2i128	ymm4,ymm12,ymm8,0x02
6574	vperm2i128	ymm12,ymm12,ymm8,0x13
6575	vmovdqa	ymm8,ymm15
6576
6577	lea	rsi,[320+rsi]
6578	sub	rbx,10*32
6579	mov	rcx,10*32
6580	cmp	rbx,4*32
6581	jbe	NEAR $L$seal_avx2_short_hash_remainder
6582	vpxor	ymm0,ymm0,YMMWORD[rsi]
6583	vpxor	ymm4,ymm4,YMMWORD[32+rsi]
6584	vpxor	ymm8,ymm8,YMMWORD[64+rsi]
6585	vpxor	ymm12,ymm12,YMMWORD[96+rsi]
6586	vmovdqu	YMMWORD[320+rdi],ymm0
6587	vmovdqu	YMMWORD[352+rdi],ymm4
6588	vmovdqu	YMMWORD[384+rdi],ymm8
6589	vmovdqu	YMMWORD[416+rdi],ymm12
6590	lea	rsi,[128+rsi]
6591	sub	rbx,4*32
6592	mov	rcx,8
6593	mov	r8,2
6594	cmp	rbx,4*32
6595	jbe	NEAR $L$seal_avx2_tail_128
6596	cmp	rbx,8*32
6597	jbe	NEAR $L$seal_avx2_tail_256
6598	cmp	rbx,12*32
6599	jbe	NEAR $L$seal_avx2_tail_384
6600	cmp	rbx,16*32
6601	jbe	NEAR $L$seal_avx2_tail_512
6602	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
6603	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
6604	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
6605	vmovdqa	ymm1,ymm0
6606	vmovdqa	ymm5,ymm4
6607	vmovdqa	ymm9,ymm8
6608	vmovdqa	ymm2,ymm0
6609	vmovdqa	ymm6,ymm4
6610	vmovdqa	ymm10,ymm8
6611	vmovdqa	ymm3,ymm0
6612	vmovdqa	ymm7,ymm4
6613	vmovdqa	ymm11,ymm8
6614	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
6615	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
6616	vpaddd	ymm14,ymm12,ymm15
6617	vpaddd	ymm13,ymm12,ymm14
6618	vpaddd	ymm12,ymm12,ymm13
6619	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
6620	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6621	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6622	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6623	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6624	vmovdqa	ymm8,YMMWORD[$L$rol16]
6625	vpaddd	ymm3,ymm3,ymm7
6626	vpaddd	ymm2,ymm2,ymm6
6627	vpaddd	ymm1,ymm1,ymm5
6628	vpaddd	ymm0,ymm0,ymm4
6629	vpxor	ymm15,ymm15,ymm3
6630	vpxor	ymm14,ymm14,ymm2
6631	vpxor	ymm13,ymm13,ymm1
6632	vpxor	ymm12,ymm12,ymm0
6633	vpshufb	ymm15,ymm15,ymm8
6634	vpshufb	ymm14,ymm14,ymm8
6635	vpshufb	ymm13,ymm13,ymm8
6636	vpshufb	ymm12,ymm12,ymm8
6637	vpaddd	ymm11,ymm11,ymm15
6638	vpaddd	ymm10,ymm10,ymm14
6639	vpaddd	ymm9,ymm9,ymm13
6640	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6641	vpxor	ymm7,ymm7,ymm11
6642	vpxor	ymm6,ymm6,ymm10
6643	vpxor	ymm5,ymm5,ymm9
6644	vpxor	ymm4,ymm4,ymm8
6645	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6646	vpsrld	ymm8,ymm7,20
6647	vpslld	ymm7,ymm7,32-20
6648	vpxor	ymm7,ymm7,ymm8
6649	vpsrld	ymm8,ymm6,20
6650	vpslld	ymm6,ymm6,32-20
6651	vpxor	ymm6,ymm6,ymm8
6652	vpsrld	ymm8,ymm5,20
6653	vpslld	ymm5,ymm5,32-20
6654	vpxor	ymm5,ymm5,ymm8
6655	vpsrld	ymm8,ymm4,20
6656	vpslld	ymm4,ymm4,32-20
6657	vpxor	ymm4,ymm4,ymm8
6658	vmovdqa	ymm8,YMMWORD[$L$rol8]
6659	vpaddd	ymm3,ymm3,ymm7
6660	vpaddd	ymm2,ymm2,ymm6
6661	vpaddd	ymm1,ymm1,ymm5
6662	vpaddd	ymm0,ymm0,ymm4
6663	vpxor	ymm15,ymm15,ymm3
6664	vpxor	ymm14,ymm14,ymm2
6665	vpxor	ymm13,ymm13,ymm1
6666	vpxor	ymm12,ymm12,ymm0
6667	vpshufb	ymm15,ymm15,ymm8
6668	vpshufb	ymm14,ymm14,ymm8
6669	vpshufb	ymm13,ymm13,ymm8
6670	vpshufb	ymm12,ymm12,ymm8
6671	vpaddd	ymm11,ymm11,ymm15
6672	vpaddd	ymm10,ymm10,ymm14
6673	vpaddd	ymm9,ymm9,ymm13
6674	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6675	vpxor	ymm7,ymm7,ymm11
6676	vpxor	ymm6,ymm6,ymm10
6677	vpxor	ymm5,ymm5,ymm9
6678	vpxor	ymm4,ymm4,ymm8
6679	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6680	vpsrld	ymm8,ymm7,25
6681	vpslld	ymm7,ymm7,32-25
6682	vpxor	ymm7,ymm7,ymm8
6683	vpsrld	ymm8,ymm6,25
6684	vpslld	ymm6,ymm6,32-25
6685	vpxor	ymm6,ymm6,ymm8
6686	vpsrld	ymm8,ymm5,25
6687	vpslld	ymm5,ymm5,32-25
6688	vpxor	ymm5,ymm5,ymm8
6689	vpsrld	ymm8,ymm4,25
6690	vpslld	ymm4,ymm4,32-25
6691	vpxor	ymm4,ymm4,ymm8
6692	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6693	vpalignr	ymm7,ymm7,ymm7,4
6694	vpalignr	ymm11,ymm11,ymm11,8
6695	vpalignr	ymm15,ymm15,ymm15,12
6696	vpalignr	ymm6,ymm6,ymm6,4
6697	vpalignr	ymm10,ymm10,ymm10,8
6698	vpalignr	ymm14,ymm14,ymm14,12
6699	vpalignr	ymm5,ymm5,ymm5,4
6700	vpalignr	ymm9,ymm9,ymm9,8
6701	vpalignr	ymm13,ymm13,ymm13,12
6702	vpalignr	ymm4,ymm4,ymm4,4
6703	vpalignr	ymm8,ymm8,ymm8,8
6704	vpalignr	ymm12,ymm12,ymm12,12
6705	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6706	vmovdqa	ymm8,YMMWORD[$L$rol16]
6707	vpaddd	ymm3,ymm3,ymm7
6708	vpaddd	ymm2,ymm2,ymm6
6709	vpaddd	ymm1,ymm1,ymm5
6710	vpaddd	ymm0,ymm0,ymm4
6711	vpxor	ymm15,ymm15,ymm3
6712	vpxor	ymm14,ymm14,ymm2
6713	vpxor	ymm13,ymm13,ymm1
6714	vpxor	ymm12,ymm12,ymm0
6715	vpshufb	ymm15,ymm15,ymm8
6716	vpshufb	ymm14,ymm14,ymm8
6717	vpshufb	ymm13,ymm13,ymm8
6718	vpshufb	ymm12,ymm12,ymm8
6719	vpaddd	ymm11,ymm11,ymm15
6720	vpaddd	ymm10,ymm10,ymm14
6721	vpaddd	ymm9,ymm9,ymm13
6722	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6723	vpxor	ymm7,ymm7,ymm11
6724	vpxor	ymm6,ymm6,ymm10
6725	vpxor	ymm5,ymm5,ymm9
6726	vpxor	ymm4,ymm4,ymm8
6727	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6728	vpsrld	ymm8,ymm7,20
6729	vpslld	ymm7,ymm7,32-20
6730	vpxor	ymm7,ymm7,ymm8
6731	vpsrld	ymm8,ymm6,20
6732	vpslld	ymm6,ymm6,32-20
6733	vpxor	ymm6,ymm6,ymm8
6734	vpsrld	ymm8,ymm5,20
6735	vpslld	ymm5,ymm5,32-20
6736	vpxor	ymm5,ymm5,ymm8
6737	vpsrld	ymm8,ymm4,20
6738	vpslld	ymm4,ymm4,32-20
6739	vpxor	ymm4,ymm4,ymm8
6740	vmovdqa	ymm8,YMMWORD[$L$rol8]
6741	vpaddd	ymm3,ymm3,ymm7
6742	vpaddd	ymm2,ymm2,ymm6
6743	vpaddd	ymm1,ymm1,ymm5
6744	vpaddd	ymm0,ymm0,ymm4
6745	vpxor	ymm15,ymm15,ymm3
6746	vpxor	ymm14,ymm14,ymm2
6747	vpxor	ymm13,ymm13,ymm1
6748	vpxor	ymm12,ymm12,ymm0
6749	vpshufb	ymm15,ymm15,ymm8
6750	vpshufb	ymm14,ymm14,ymm8
6751	vpshufb	ymm13,ymm13,ymm8
6752	vpshufb	ymm12,ymm12,ymm8
6753	vpaddd	ymm11,ymm11,ymm15
6754	vpaddd	ymm10,ymm10,ymm14
6755	vpaddd	ymm9,ymm9,ymm13
6756	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6757	vpxor	ymm7,ymm7,ymm11
6758	vpxor	ymm6,ymm6,ymm10
6759	vpxor	ymm5,ymm5,ymm9
6760	vpxor	ymm4,ymm4,ymm8
6761	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6762	vpsrld	ymm8,ymm7,25
6763	vpslld	ymm7,ymm7,32-25
6764	vpxor	ymm7,ymm7,ymm8
6765	vpsrld	ymm8,ymm6,25
6766	vpslld	ymm6,ymm6,32-25
6767	vpxor	ymm6,ymm6,ymm8
6768	vpsrld	ymm8,ymm5,25
6769	vpslld	ymm5,ymm5,32-25
6770	vpxor	ymm5,ymm5,ymm8
6771	vpsrld	ymm8,ymm4,25
6772	vpslld	ymm4,ymm4,32-25
6773	vpxor	ymm4,ymm4,ymm8
6774	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6775	vpalignr	ymm7,ymm7,ymm7,12
6776	vpalignr	ymm11,ymm11,ymm11,8
6777	vpalignr	ymm15,ymm15,ymm15,4
6778	vpalignr	ymm6,ymm6,ymm6,12
6779	vpalignr	ymm10,ymm10,ymm10,8
6780	vpalignr	ymm14,ymm14,ymm14,4
6781	vpalignr	ymm5,ymm5,ymm5,12
6782	vpalignr	ymm9,ymm9,ymm9,8
6783	vpalignr	ymm13,ymm13,ymm13,4
6784	vpalignr	ymm4,ymm4,ymm4,12
6785	vpalignr	ymm8,ymm8,ymm8,8
6786	vpalignr	ymm12,ymm12,ymm12,4
6787	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6788	vmovdqa	ymm8,YMMWORD[$L$rol16]
6789	vpaddd	ymm3,ymm3,ymm7
6790	vpaddd	ymm2,ymm2,ymm6
6791	vpaddd	ymm1,ymm1,ymm5
6792	vpaddd	ymm0,ymm0,ymm4
6793	vpxor	ymm15,ymm15,ymm3
6794	vpxor	ymm14,ymm14,ymm2
6795	vpxor	ymm13,ymm13,ymm1
6796	vpxor	ymm12,ymm12,ymm0
6797	vpshufb	ymm15,ymm15,ymm8
6798	vpshufb	ymm14,ymm14,ymm8
6799	vpshufb	ymm13,ymm13,ymm8
6800	vpshufb	ymm12,ymm12,ymm8
6801	vpaddd	ymm11,ymm11,ymm15
6802	vpaddd	ymm10,ymm10,ymm14
6803	vpaddd	ymm9,ymm9,ymm13
6804	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6805	vpxor	ymm7,ymm7,ymm11
6806	vpxor	ymm6,ymm6,ymm10
6807	vpxor	ymm5,ymm5,ymm9
6808	vpxor	ymm4,ymm4,ymm8
6809	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6810	vpsrld	ymm8,ymm7,20
6811	vpslld	ymm7,ymm7,32-20
6812	vpxor	ymm7,ymm7,ymm8
6813	vpsrld	ymm8,ymm6,20
6814	vpslld	ymm6,ymm6,32-20
6815	vpxor	ymm6,ymm6,ymm8
6816	vpsrld	ymm8,ymm5,20
6817	vpslld	ymm5,ymm5,32-20
6818	vpxor	ymm5,ymm5,ymm8
6819	vpsrld	ymm8,ymm4,20
6820	vpslld	ymm4,ymm4,32-20
6821	vpxor	ymm4,ymm4,ymm8
6822	vmovdqa	ymm8,YMMWORD[$L$rol8]
6823	vpaddd	ymm3,ymm3,ymm7
6824	vpaddd	ymm2,ymm2,ymm6
6825	vpaddd	ymm1,ymm1,ymm5
6826	vpaddd	ymm0,ymm0,ymm4
6827	vpxor	ymm15,ymm15,ymm3
6828
6829	sub	rdi,16
6830	mov	rcx,9
6831	jmp	NEAR $L$seal_avx2_main_loop_rounds_entry
6832ALIGN	32
6833$L$seal_avx2_main_loop:
6834	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
6835	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
6836	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
6837	vmovdqa	ymm1,ymm0
6838	vmovdqa	ymm5,ymm4
6839	vmovdqa	ymm9,ymm8
6840	vmovdqa	ymm2,ymm0
6841	vmovdqa	ymm6,ymm4
6842	vmovdqa	ymm10,ymm8
6843	vmovdqa	ymm3,ymm0
6844	vmovdqa	ymm7,ymm4
6845	vmovdqa	ymm11,ymm8
6846	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
6847	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
6848	vpaddd	ymm14,ymm12,ymm15
6849	vpaddd	ymm13,ymm12,ymm14
6850	vpaddd	ymm12,ymm12,ymm13
6851	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
6852	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6853	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6854	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6855
6856	mov	rcx,10
6857ALIGN	32
6858$L$seal_avx2_main_loop_rounds:
6859	add	r10,QWORD[((0+0))+rdi]
6860	adc	r11,QWORD[((8+0))+rdi]
6861	adc	r12,1
6862	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6863	vmovdqa	ymm8,YMMWORD[$L$rol16]
6864	vpaddd	ymm3,ymm3,ymm7
6865	vpaddd	ymm2,ymm2,ymm6
6866	vpaddd	ymm1,ymm1,ymm5
6867	vpaddd	ymm0,ymm0,ymm4
6868	vpxor	ymm15,ymm15,ymm3
6869	vpxor	ymm14,ymm14,ymm2
6870	vpxor	ymm13,ymm13,ymm1
6871	vpxor	ymm12,ymm12,ymm0
6872	mov	rdx,QWORD[((0+160+0))+rbp]
6873	mov	r15,rdx
6874	mulx	r14,r13,r10
6875	mulx	rdx,rax,r11
6876	imul	r15,r12
6877	add	r14,rax
6878	adc	r15,rdx
6879	vpshufb	ymm15,ymm15,ymm8
6880	vpshufb	ymm14,ymm14,ymm8
6881	vpshufb	ymm13,ymm13,ymm8
6882	vpshufb	ymm12,ymm12,ymm8
6883	vpaddd	ymm11,ymm11,ymm15
6884	vpaddd	ymm10,ymm10,ymm14
6885	vpaddd	ymm9,ymm9,ymm13
6886	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6887	vpxor	ymm7,ymm7,ymm11
6888	mov	rdx,QWORD[((8+160+0))+rbp]
6889	mulx	rax,r10,r10
6890	add	r14,r10
6891	mulx	r9,r11,r11
6892	adc	r15,r11
6893	adc	r9,0
6894	imul	rdx,r12
6895	vpxor	ymm6,ymm6,ymm10
6896	vpxor	ymm5,ymm5,ymm9
6897	vpxor	ymm4,ymm4,ymm8
6898	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6899	vpsrld	ymm8,ymm7,20
6900	vpslld	ymm7,ymm7,32-20
6901	vpxor	ymm7,ymm7,ymm8
6902	vpsrld	ymm8,ymm6,20
6903	vpslld	ymm6,ymm6,32-20
6904	vpxor	ymm6,ymm6,ymm8
6905	vpsrld	ymm8,ymm5,20
6906	vpslld	ymm5,ymm5,32-20
6907	add	r15,rax
6908	adc	r9,rdx
6909	vpxor	ymm5,ymm5,ymm8
6910	vpsrld	ymm8,ymm4,20
6911	vpslld	ymm4,ymm4,32-20
6912	vpxor	ymm4,ymm4,ymm8
6913	vmovdqa	ymm8,YMMWORD[$L$rol8]
6914	vpaddd	ymm3,ymm3,ymm7
6915	vpaddd	ymm2,ymm2,ymm6
6916	vpaddd	ymm1,ymm1,ymm5
6917	vpaddd	ymm0,ymm0,ymm4
6918	vpxor	ymm15,ymm15,ymm3
6919	mov	r10,r13
6920	mov	r11,r14
6921	mov	r12,r15
6922	and	r12,3
6923	mov	r13,r15
6924	and	r13,-4
6925	mov	r14,r9
6926	shrd	r15,r9,2
6927	shr	r9,2
6928	add	r15,r13
6929	adc	r9,r14
6930	add	r10,r15
6931	adc	r11,r9
6932	adc	r12,0
6933
6934$L$seal_avx2_main_loop_rounds_entry:
6935	vpxor	ymm14,ymm14,ymm2
6936	vpxor	ymm13,ymm13,ymm1
6937	vpxor	ymm12,ymm12,ymm0
6938	vpshufb	ymm15,ymm15,ymm8
6939	vpshufb	ymm14,ymm14,ymm8
6940	vpshufb	ymm13,ymm13,ymm8
6941	vpshufb	ymm12,ymm12,ymm8
6942	vpaddd	ymm11,ymm11,ymm15
6943	vpaddd	ymm10,ymm10,ymm14
6944	add	r10,QWORD[((0+16))+rdi]
6945	adc	r11,QWORD[((8+16))+rdi]
6946	adc	r12,1
6947	vpaddd	ymm9,ymm9,ymm13
6948	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6949	vpxor	ymm7,ymm7,ymm11
6950	vpxor	ymm6,ymm6,ymm10
6951	vpxor	ymm5,ymm5,ymm9
6952	vpxor	ymm4,ymm4,ymm8
6953	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6954	vpsrld	ymm8,ymm7,25
6955	mov	rdx,QWORD[((0+160+0))+rbp]
6956	mov	r15,rdx
6957	mulx	r14,r13,r10
6958	mulx	rdx,rax,r11
6959	imul	r15,r12
6960	add	r14,rax
6961	adc	r15,rdx
6962	vpslld	ymm7,ymm7,32-25
6963	vpxor	ymm7,ymm7,ymm8
6964	vpsrld	ymm8,ymm6,25
6965	vpslld	ymm6,ymm6,32-25
6966	vpxor	ymm6,ymm6,ymm8
6967	vpsrld	ymm8,ymm5,25
6968	vpslld	ymm5,ymm5,32-25
6969	vpxor	ymm5,ymm5,ymm8
6970	vpsrld	ymm8,ymm4,25
6971	vpslld	ymm4,ymm4,32-25
6972	vpxor	ymm4,ymm4,ymm8
6973	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6974	vpalignr	ymm7,ymm7,ymm7,4
6975	vpalignr	ymm11,ymm11,ymm11,8
6976	vpalignr	ymm15,ymm15,ymm15,12
6977	vpalignr	ymm6,ymm6,ymm6,4
6978	vpalignr	ymm10,ymm10,ymm10,8
6979	vpalignr	ymm14,ymm14,ymm14,12
6980	mov	rdx,QWORD[((8+160+0))+rbp]
6981	mulx	rax,r10,r10
6982	add	r14,r10
6983	mulx	r9,r11,r11
6984	adc	r15,r11
6985	adc	r9,0
6986	imul	rdx,r12
6987	vpalignr	ymm5,ymm5,ymm5,4
6988	vpalignr	ymm9,ymm9,ymm9,8
6989	vpalignr	ymm13,ymm13,ymm13,12
6990	vpalignr	ymm4,ymm4,ymm4,4
6991	vpalignr	ymm8,ymm8,ymm8,8
6992	vpalignr	ymm12,ymm12,ymm12,12
6993	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6994	vmovdqa	ymm8,YMMWORD[$L$rol16]
6995	vpaddd	ymm3,ymm3,ymm7
6996	vpaddd	ymm2,ymm2,ymm6
6997	vpaddd	ymm1,ymm1,ymm5
6998	vpaddd	ymm0,ymm0,ymm4
6999	vpxor	ymm15,ymm15,ymm3
7000	vpxor	ymm14,ymm14,ymm2
7001	vpxor	ymm13,ymm13,ymm1
7002	vpxor	ymm12,ymm12,ymm0
7003	vpshufb	ymm15,ymm15,ymm8
7004	vpshufb	ymm14,ymm14,ymm8
7005	add	r15,rax
7006	adc	r9,rdx
7007	vpshufb	ymm13,ymm13,ymm8
7008	vpshufb	ymm12,ymm12,ymm8
7009	vpaddd	ymm11,ymm11,ymm15
7010	vpaddd	ymm10,ymm10,ymm14
7011	vpaddd	ymm9,ymm9,ymm13
7012	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
7013	vpxor	ymm7,ymm7,ymm11
7014	vpxor	ymm6,ymm6,ymm10
7015	vpxor	ymm5,ymm5,ymm9
7016	mov	r10,r13
7017	mov	r11,r14
7018	mov	r12,r15
7019	and	r12,3
7020	mov	r13,r15
7021	and	r13,-4
7022	mov	r14,r9
7023	shrd	r15,r9,2
7024	shr	r9,2
7025	add	r15,r13
7026	adc	r9,r14
7027	add	r10,r15
7028	adc	r11,r9
7029	adc	r12,0
7030	vpxor	ymm4,ymm4,ymm8
7031	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
7032	vpsrld	ymm8,ymm7,20
7033	vpslld	ymm7,ymm7,32-20
7034	vpxor	ymm7,ymm7,ymm8
7035	vpsrld	ymm8,ymm6,20
7036	vpslld	ymm6,ymm6,32-20
7037	vpxor	ymm6,ymm6,ymm8
7038	add	r10,QWORD[((0+32))+rdi]
7039	adc	r11,QWORD[((8+32))+rdi]
7040	adc	r12,1
7041
7042	lea	rdi,[48+rdi]
7043	vpsrld	ymm8,ymm5,20
7044	vpslld	ymm5,ymm5,32-20
7045	vpxor	ymm5,ymm5,ymm8
7046	vpsrld	ymm8,ymm4,20
7047	vpslld	ymm4,ymm4,32-20
7048	vpxor	ymm4,ymm4,ymm8
7049	vmovdqa	ymm8,YMMWORD[$L$rol8]
7050	vpaddd	ymm3,ymm3,ymm7
7051	vpaddd	ymm2,ymm2,ymm6
7052	vpaddd	ymm1,ymm1,ymm5
7053	vpaddd	ymm0,ymm0,ymm4
7054	vpxor	ymm15,ymm15,ymm3
7055	vpxor	ymm14,ymm14,ymm2
7056	vpxor	ymm13,ymm13,ymm1
7057	vpxor	ymm12,ymm12,ymm0
7058	vpshufb	ymm15,ymm15,ymm8
7059	vpshufb	ymm14,ymm14,ymm8
7060	vpshufb	ymm13,ymm13,ymm8
7061	mov	rdx,QWORD[((0+160+0))+rbp]
7062	mov	r15,rdx
7063	mulx	r14,r13,r10
7064	mulx	rdx,rax,r11
7065	imul	r15,r12
7066	add	r14,rax
7067	adc	r15,rdx
7068	vpshufb	ymm12,ymm12,ymm8
7069	vpaddd	ymm11,ymm11,ymm15
7070	vpaddd	ymm10,ymm10,ymm14
7071	vpaddd	ymm9,ymm9,ymm13
7072	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
7073	vpxor	ymm7,ymm7,ymm11
7074	vpxor	ymm6,ymm6,ymm10
7075	vpxor	ymm5,ymm5,ymm9
7076	mov	rdx,QWORD[((8+160+0))+rbp]
7077	mulx	rax,r10,r10
7078	add	r14,r10
7079	mulx	r9,r11,r11
7080	adc	r15,r11
7081	adc	r9,0
7082	imul	rdx,r12
7083	vpxor	ymm4,ymm4,ymm8
7084	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
7085	vpsrld	ymm8,ymm7,25
7086	vpslld	ymm7,ymm7,32-25
7087	vpxor	ymm7,ymm7,ymm8
7088	vpsrld	ymm8,ymm6,25
7089	vpslld	ymm6,ymm6,32-25
7090	vpxor	ymm6,ymm6,ymm8
7091	add	r15,rax
7092	adc	r9,rdx
7093	vpsrld	ymm8,ymm5,25
7094	vpslld	ymm5,ymm5,32-25
7095	vpxor	ymm5,ymm5,ymm8
7096	vpsrld	ymm8,ymm4,25
7097	vpslld	ymm4,ymm4,32-25
7098	vpxor	ymm4,ymm4,ymm8
7099	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
7100	vpalignr	ymm7,ymm7,ymm7,12
7101	vpalignr	ymm11,ymm11,ymm11,8
7102	vpalignr	ymm15,ymm15,ymm15,4
7103	vpalignr	ymm6,ymm6,ymm6,12
7104	vpalignr	ymm10,ymm10,ymm10,8
7105	vpalignr	ymm14,ymm14,ymm14,4
7106	vpalignr	ymm5,ymm5,ymm5,12
7107	vpalignr	ymm9,ymm9,ymm9,8
7108	vpalignr	ymm13,ymm13,ymm13,4
7109	vpalignr	ymm4,ymm4,ymm4,12
7110	vpalignr	ymm8,ymm8,ymm8,8
7111	mov	r10,r13
7112	mov	r11,r14
7113	mov	r12,r15
7114	and	r12,3
7115	mov	r13,r15
7116	and	r13,-4
7117	mov	r14,r9
7118	shrd	r15,r9,2
7119	shr	r9,2
7120	add	r15,r13
7121	adc	r9,r14
7122	add	r10,r15
7123	adc	r11,r9
7124	adc	r12,0
7125	vpalignr	ymm12,ymm12,ymm12,4
7126
7127	dec	rcx
7128	jne	NEAR $L$seal_avx2_main_loop_rounds
7129	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
7130	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
7131	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
7132	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
7133	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
7134	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
7135	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
7136	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
7137	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
7138	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
7139	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
7140	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
7141	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
7142	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
7143	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
7144	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7145
7146	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
7147	add	r10,QWORD[((0+0))+rdi]
7148	adc	r11,QWORD[((8+0))+rdi]
7149	adc	r12,1
7150	mov	rdx,QWORD[((0+160+0))+rbp]
7151	mov	r15,rdx
7152	mulx	r14,r13,r10
7153	mulx	rdx,rax,r11
7154	imul	r15,r12
7155	add	r14,rax
7156	adc	r15,rdx
7157	mov	rdx,QWORD[((8+160+0))+rbp]
7158	mulx	rax,r10,r10
7159	add	r14,r10
7160	mulx	r9,r11,r11
7161	adc	r15,r11
7162	adc	r9,0
7163	imul	rdx,r12
7164	add	r15,rax
7165	adc	r9,rdx
7166	mov	r10,r13
7167	mov	r11,r14
7168	mov	r12,r15
7169	and	r12,3
7170	mov	r13,r15
7171	and	r13,-4
7172	mov	r14,r9
7173	shrd	r15,r9,2
7174	shr	r9,2
7175	add	r15,r13
7176	adc	r9,r14
7177	add	r10,r15
7178	adc	r11,r9
7179	adc	r12,0
7180	add	r10,QWORD[((0+16))+rdi]
7181	adc	r11,QWORD[((8+16))+rdi]
7182	adc	r12,1
7183	mov	rdx,QWORD[((0+160+0))+rbp]
7184	mov	r15,rdx
7185	mulx	r14,r13,r10
7186	mulx	rdx,rax,r11
7187	imul	r15,r12
7188	add	r14,rax
7189	adc	r15,rdx
7190	mov	rdx,QWORD[((8+160+0))+rbp]
7191	mulx	rax,r10,r10
7192	add	r14,r10
7193	mulx	r9,r11,r11
7194	adc	r15,r11
7195	adc	r9,0
7196	imul	rdx,r12
7197	add	r15,rax
7198	adc	r9,rdx
7199	mov	r10,r13
7200	mov	r11,r14
7201	mov	r12,r15
7202	and	r12,3
7203	mov	r13,r15
7204	and	r13,-4
7205	mov	r14,r9
7206	shrd	r15,r9,2
7207	shr	r9,2
7208	add	r15,r13
7209	adc	r9,r14
7210	add	r10,r15
7211	adc	r11,r9
7212	adc	r12,0
7213
7214	lea	rdi,[32+rdi]
7215	vperm2i128	ymm0,ymm7,ymm3,0x02
7216	vperm2i128	ymm7,ymm7,ymm3,0x13
7217	vperm2i128	ymm3,ymm15,ymm11,0x02
7218	vperm2i128	ymm11,ymm15,ymm11,0x13
7219	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
7220	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
7221	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
7222	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
7223	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
7224	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
7225	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
7226	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
7227
7228	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
7229	vperm2i128	ymm3,ymm6,ymm2,0x02
7230	vperm2i128	ymm6,ymm6,ymm2,0x13
7231	vperm2i128	ymm2,ymm14,ymm10,0x02
7232	vperm2i128	ymm10,ymm14,ymm10,0x13
7233	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
7234	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
7235	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
7236	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
7237	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
7238	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
7239	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
7240	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
7241	vperm2i128	ymm3,ymm5,ymm1,0x02
7242	vperm2i128	ymm5,ymm5,ymm1,0x13
7243	vperm2i128	ymm1,ymm13,ymm9,0x02
7244	vperm2i128	ymm9,ymm13,ymm9,0x13
7245	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
7246	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
7247	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
7248	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
7249	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
7250	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
7251	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
7252	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
7253	vperm2i128	ymm3,ymm4,ymm0,0x02
7254	vperm2i128	ymm4,ymm4,ymm0,0x13
7255	vperm2i128	ymm0,ymm12,ymm8,0x02
7256	vperm2i128	ymm8,ymm12,ymm8,0x13
7257	vpxor	ymm3,ymm3,YMMWORD[((0+384))+rsi]
7258	vpxor	ymm0,ymm0,YMMWORD[((32+384))+rsi]
7259	vpxor	ymm4,ymm4,YMMWORD[((64+384))+rsi]
7260	vpxor	ymm8,ymm8,YMMWORD[((96+384))+rsi]
7261	vmovdqu	YMMWORD[(0+384)+rdi],ymm3
7262	vmovdqu	YMMWORD[(32+384)+rdi],ymm0
7263	vmovdqu	YMMWORD[(64+384)+rdi],ymm4
7264	vmovdqu	YMMWORD[(96+384)+rdi],ymm8
7265
7266	lea	rsi,[512+rsi]
7267	sub	rbx,16*32
7268	cmp	rbx,16*32
7269	jg	NEAR $L$seal_avx2_main_loop
7270
7271	add	r10,QWORD[((0+0))+rdi]
7272	adc	r11,QWORD[((8+0))+rdi]
7273	adc	r12,1
7274	mov	rdx,QWORD[((0+160+0))+rbp]
7275	mov	r15,rdx
7276	mulx	r14,r13,r10
7277	mulx	rdx,rax,r11
7278	imul	r15,r12
7279	add	r14,rax
7280	adc	r15,rdx
7281	mov	rdx,QWORD[((8+160+0))+rbp]
7282	mulx	rax,r10,r10
7283	add	r14,r10
7284	mulx	r9,r11,r11
7285	adc	r15,r11
7286	adc	r9,0
7287	imul	rdx,r12
7288	add	r15,rax
7289	adc	r9,rdx
7290	mov	r10,r13
7291	mov	r11,r14
7292	mov	r12,r15
7293	and	r12,3
7294	mov	r13,r15
7295	and	r13,-4
7296	mov	r14,r9
7297	shrd	r15,r9,2
7298	shr	r9,2
7299	add	r15,r13
7300	adc	r9,r14
7301	add	r10,r15
7302	adc	r11,r9
7303	adc	r12,0
7304	add	r10,QWORD[((0+16))+rdi]
7305	adc	r11,QWORD[((8+16))+rdi]
7306	adc	r12,1
7307	mov	rdx,QWORD[((0+160+0))+rbp]
7308	mov	r15,rdx
7309	mulx	r14,r13,r10
7310	mulx	rdx,rax,r11
7311	imul	r15,r12
7312	add	r14,rax
7313	adc	r15,rdx
7314	mov	rdx,QWORD[((8+160+0))+rbp]
7315	mulx	rax,r10,r10
7316	add	r14,r10
7317	mulx	r9,r11,r11
7318	adc	r15,r11
7319	adc	r9,0
7320	imul	rdx,r12
7321	add	r15,rax
7322	adc	r9,rdx
7323	mov	r10,r13
7324	mov	r11,r14
7325	mov	r12,r15
7326	and	r12,3
7327	mov	r13,r15
7328	and	r13,-4
7329	mov	r14,r9
7330	shrd	r15,r9,2
7331	shr	r9,2
7332	add	r15,r13
7333	adc	r9,r14
7334	add	r10,r15
7335	adc	r11,r9
7336	adc	r12,0
7337
7338	lea	rdi,[32+rdi]
7339	mov	rcx,10
7340	xor	r8,r8
7341
7342	cmp	rbx,12*32
7343	ja	NEAR $L$seal_avx2_tail_512
7344	cmp	rbx,8*32
7345	ja	NEAR $L$seal_avx2_tail_384
7346	cmp	rbx,4*32
7347	ja	NEAR $L$seal_avx2_tail_256
7348
7349$L$seal_avx2_tail_128:
7350	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
7351	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
7352	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
7353	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
7354	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7355	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
7356
7357$L$seal_avx2_tail_128_rounds_and_3xhash:
7358	add	r10,QWORD[((0+0))+rdi]
7359	adc	r11,QWORD[((8+0))+rdi]
7360	adc	r12,1
7361	mov	rdx,QWORD[((0+160+0))+rbp]
7362	mov	r15,rdx
7363	mulx	r14,r13,r10
7364	mulx	rdx,rax,r11
7365	imul	r15,r12
7366	add	r14,rax
7367	adc	r15,rdx
7368	mov	rdx,QWORD[((8+160+0))+rbp]
7369	mulx	rax,r10,r10
7370	add	r14,r10
7371	mulx	r9,r11,r11
7372	adc	r15,r11
7373	adc	r9,0
7374	imul	rdx,r12
7375	add	r15,rax
7376	adc	r9,rdx
7377	mov	r10,r13
7378	mov	r11,r14
7379	mov	r12,r15
7380	and	r12,3
7381	mov	r13,r15
7382	and	r13,-4
7383	mov	r14,r9
7384	shrd	r15,r9,2
7385	shr	r9,2
7386	add	r15,r13
7387	adc	r9,r14
7388	add	r10,r15
7389	adc	r11,r9
7390	adc	r12,0
7391
7392	lea	rdi,[16+rdi]
7393$L$seal_avx2_tail_128_rounds_and_2xhash:
7394	vpaddd	ymm0,ymm0,ymm4
7395	vpxor	ymm12,ymm12,ymm0
7396	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7397	vpaddd	ymm8,ymm8,ymm12
7398	vpxor	ymm4,ymm4,ymm8
7399	vpsrld	ymm3,ymm4,20
7400	vpslld	ymm4,ymm4,12
7401	vpxor	ymm4,ymm4,ymm3
7402	vpaddd	ymm0,ymm0,ymm4
7403	vpxor	ymm12,ymm12,ymm0
7404	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7405	vpaddd	ymm8,ymm8,ymm12
7406	vpxor	ymm4,ymm4,ymm8
7407	vpslld	ymm3,ymm4,7
7408	vpsrld	ymm4,ymm4,25
7409	vpxor	ymm4,ymm4,ymm3
7410	vpalignr	ymm12,ymm12,ymm12,12
7411	vpalignr	ymm8,ymm8,ymm8,8
7412	vpalignr	ymm4,ymm4,ymm4,4
7413	add	r10,QWORD[((0+0))+rdi]
7414	adc	r11,QWORD[((8+0))+rdi]
7415	adc	r12,1
7416	mov	rdx,QWORD[((0+160+0))+rbp]
7417	mov	r15,rdx
7418	mulx	r14,r13,r10
7419	mulx	rdx,rax,r11
7420	imul	r15,r12
7421	add	r14,rax
7422	adc	r15,rdx
7423	mov	rdx,QWORD[((8+160+0))+rbp]
7424	mulx	rax,r10,r10
7425	add	r14,r10
7426	mulx	r9,r11,r11
7427	adc	r15,r11
7428	adc	r9,0
7429	imul	rdx,r12
7430	add	r15,rax
7431	adc	r9,rdx
7432	mov	r10,r13
7433	mov	r11,r14
7434	mov	r12,r15
7435	and	r12,3
7436	mov	r13,r15
7437	and	r13,-4
7438	mov	r14,r9
7439	shrd	r15,r9,2
7440	shr	r9,2
7441	add	r15,r13
7442	adc	r9,r14
7443	add	r10,r15
7444	adc	r11,r9
7445	adc	r12,0
7446	vpaddd	ymm0,ymm0,ymm4
7447	vpxor	ymm12,ymm12,ymm0
7448	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7449	vpaddd	ymm8,ymm8,ymm12
7450	vpxor	ymm4,ymm4,ymm8
7451	vpsrld	ymm3,ymm4,20
7452	vpslld	ymm4,ymm4,12
7453	vpxor	ymm4,ymm4,ymm3
7454	vpaddd	ymm0,ymm0,ymm4
7455	vpxor	ymm12,ymm12,ymm0
7456	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7457	vpaddd	ymm8,ymm8,ymm12
7458	vpxor	ymm4,ymm4,ymm8
7459	vpslld	ymm3,ymm4,7
7460	vpsrld	ymm4,ymm4,25
7461	vpxor	ymm4,ymm4,ymm3
7462	vpalignr	ymm12,ymm12,ymm12,4
7463	vpalignr	ymm8,ymm8,ymm8,8
7464	vpalignr	ymm4,ymm4,ymm4,12
7465	add	r10,QWORD[((0+16))+rdi]
7466	adc	r11,QWORD[((8+16))+rdi]
7467	adc	r12,1
7468	mov	rdx,QWORD[((0+160+0))+rbp]
7469	mov	r15,rdx
7470	mulx	r14,r13,r10
7471	mulx	rdx,rax,r11
7472	imul	r15,r12
7473	add	r14,rax
7474	adc	r15,rdx
7475	mov	rdx,QWORD[((8+160+0))+rbp]
7476	mulx	rax,r10,r10
7477	add	r14,r10
7478	mulx	r9,r11,r11
7479	adc	r15,r11
7480	adc	r9,0
7481	imul	rdx,r12
7482	add	r15,rax
7483	adc	r9,rdx
7484	mov	r10,r13
7485	mov	r11,r14
7486	mov	r12,r15
7487	and	r12,3
7488	mov	r13,r15
7489	and	r13,-4
7490	mov	r14,r9
7491	shrd	r15,r9,2
7492	shr	r9,2
7493	add	r15,r13
7494	adc	r9,r14
7495	add	r10,r15
7496	adc	r11,r9
7497	adc	r12,0
7498
7499	lea	rdi,[32+rdi]
7500	dec	rcx
7501	jg	NEAR $L$seal_avx2_tail_128_rounds_and_3xhash
7502	dec	r8
7503	jge	NEAR $L$seal_avx2_tail_128_rounds_and_2xhash
7504	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
7505	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
7506	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
7507	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7508	vperm2i128	ymm3,ymm4,ymm0,0x13
7509	vperm2i128	ymm0,ymm4,ymm0,0x02
7510	vperm2i128	ymm4,ymm12,ymm8,0x02
7511	vperm2i128	ymm12,ymm12,ymm8,0x13
7512	vmovdqa	ymm8,ymm3
7513
7514	jmp	NEAR $L$seal_avx2_short_loop
7515
7516$L$seal_avx2_tail_256:
7517	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
7518	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
7519	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
7520	vmovdqa	ymm1,ymm0
7521	vmovdqa	ymm5,ymm4
7522	vmovdqa	ymm9,ymm8
7523	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
7524	vpaddd	ymm13,ymm12,YMMWORD[((160+160))+rbp]
7525	vpaddd	ymm12,ymm12,ymm13
7526	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
7527	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
7528
7529$L$seal_avx2_tail_256_rounds_and_3xhash:
7530	add	r10,QWORD[((0+0))+rdi]
7531	adc	r11,QWORD[((8+0))+rdi]
7532	adc	r12,1
7533	mov	rax,QWORD[((0+160+0))+rbp]
7534	mov	r15,rax
7535	mul	r10
7536	mov	r13,rax
7537	mov	r14,rdx
7538	mov	rax,QWORD[((0+160+0))+rbp]
7539	mul	r11
7540	imul	r15,r12
7541	add	r14,rax
7542	adc	r15,rdx
7543	mov	rax,QWORD[((8+160+0))+rbp]
7544	mov	r9,rax
7545	mul	r10
7546	add	r14,rax
7547	adc	rdx,0
7548	mov	r10,rdx
7549	mov	rax,QWORD[((8+160+0))+rbp]
7550	mul	r11
7551	add	r15,rax
7552	adc	rdx,0
7553	imul	r9,r12
7554	add	r15,r10
7555	adc	r9,rdx
7556	mov	r10,r13
7557	mov	r11,r14
7558	mov	r12,r15
7559	and	r12,3
7560	mov	r13,r15
7561	and	r13,-4
7562	mov	r14,r9
7563	shrd	r15,r9,2
7564	shr	r9,2
7565	add	r15,r13
7566	adc	r9,r14
7567	add	r10,r15
7568	adc	r11,r9
7569	adc	r12,0
7570
7571	lea	rdi,[16+rdi]
7572$L$seal_avx2_tail_256_rounds_and_2xhash:
7573	vpaddd	ymm0,ymm0,ymm4
7574	vpxor	ymm12,ymm12,ymm0
7575	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7576	vpaddd	ymm8,ymm8,ymm12
7577	vpxor	ymm4,ymm4,ymm8
7578	vpsrld	ymm3,ymm4,20
7579	vpslld	ymm4,ymm4,12
7580	vpxor	ymm4,ymm4,ymm3
7581	vpaddd	ymm0,ymm0,ymm4
7582	vpxor	ymm12,ymm12,ymm0
7583	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7584	vpaddd	ymm8,ymm8,ymm12
7585	vpxor	ymm4,ymm4,ymm8
7586	vpslld	ymm3,ymm4,7
7587	vpsrld	ymm4,ymm4,25
7588	vpxor	ymm4,ymm4,ymm3
7589	vpalignr	ymm12,ymm12,ymm12,12
7590	vpalignr	ymm8,ymm8,ymm8,8
7591	vpalignr	ymm4,ymm4,ymm4,4
7592	vpaddd	ymm1,ymm1,ymm5
7593	vpxor	ymm13,ymm13,ymm1
7594	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7595	vpaddd	ymm9,ymm9,ymm13
7596	vpxor	ymm5,ymm5,ymm9
7597	vpsrld	ymm3,ymm5,20
7598	vpslld	ymm5,ymm5,12
7599	vpxor	ymm5,ymm5,ymm3
7600	vpaddd	ymm1,ymm1,ymm5
7601	vpxor	ymm13,ymm13,ymm1
7602	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7603	vpaddd	ymm9,ymm9,ymm13
7604	vpxor	ymm5,ymm5,ymm9
7605	vpslld	ymm3,ymm5,7
7606	vpsrld	ymm5,ymm5,25
7607	vpxor	ymm5,ymm5,ymm3
7608	vpalignr	ymm13,ymm13,ymm13,12
7609	vpalignr	ymm9,ymm9,ymm9,8
7610	vpalignr	ymm5,ymm5,ymm5,4
7611	add	r10,QWORD[((0+0))+rdi]
7612	adc	r11,QWORD[((8+0))+rdi]
7613	adc	r12,1
7614	mov	rax,QWORD[((0+160+0))+rbp]
7615	mov	r15,rax
7616	mul	r10
7617	mov	r13,rax
7618	mov	r14,rdx
7619	mov	rax,QWORD[((0+160+0))+rbp]
7620	mul	r11
7621	imul	r15,r12
7622	add	r14,rax
7623	adc	r15,rdx
7624	mov	rax,QWORD[((8+160+0))+rbp]
7625	mov	r9,rax
7626	mul	r10
7627	add	r14,rax
7628	adc	rdx,0
7629	mov	r10,rdx
7630	mov	rax,QWORD[((8+160+0))+rbp]
7631	mul	r11
7632	add	r15,rax
7633	adc	rdx,0
7634	imul	r9,r12
7635	add	r15,r10
7636	adc	r9,rdx
7637	mov	r10,r13
7638	mov	r11,r14
7639	mov	r12,r15
7640	and	r12,3
7641	mov	r13,r15
7642	and	r13,-4
7643	mov	r14,r9
7644	shrd	r15,r9,2
7645	shr	r9,2
7646	add	r15,r13
7647	adc	r9,r14
7648	add	r10,r15
7649	adc	r11,r9
7650	adc	r12,0
7651	vpaddd	ymm0,ymm0,ymm4
7652	vpxor	ymm12,ymm12,ymm0
7653	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7654	vpaddd	ymm8,ymm8,ymm12
7655	vpxor	ymm4,ymm4,ymm8
7656	vpsrld	ymm3,ymm4,20
7657	vpslld	ymm4,ymm4,12
7658	vpxor	ymm4,ymm4,ymm3
7659	vpaddd	ymm0,ymm0,ymm4
7660	vpxor	ymm12,ymm12,ymm0
7661	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7662	vpaddd	ymm8,ymm8,ymm12
7663	vpxor	ymm4,ymm4,ymm8
7664	vpslld	ymm3,ymm4,7
7665	vpsrld	ymm4,ymm4,25
7666	vpxor	ymm4,ymm4,ymm3
7667	vpalignr	ymm12,ymm12,ymm12,4
7668	vpalignr	ymm8,ymm8,ymm8,8
7669	vpalignr	ymm4,ymm4,ymm4,12
7670	vpaddd	ymm1,ymm1,ymm5
7671	vpxor	ymm13,ymm13,ymm1
7672	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7673	vpaddd	ymm9,ymm9,ymm13
7674	vpxor	ymm5,ymm5,ymm9
7675	vpsrld	ymm3,ymm5,20
7676	vpslld	ymm5,ymm5,12
7677	vpxor	ymm5,ymm5,ymm3
7678	vpaddd	ymm1,ymm1,ymm5
7679	vpxor	ymm13,ymm13,ymm1
7680	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7681	vpaddd	ymm9,ymm9,ymm13
7682	vpxor	ymm5,ymm5,ymm9
7683	vpslld	ymm3,ymm5,7
7684	vpsrld	ymm5,ymm5,25
7685	vpxor	ymm5,ymm5,ymm3
7686	vpalignr	ymm13,ymm13,ymm13,4
7687	vpalignr	ymm9,ymm9,ymm9,8
7688	vpalignr	ymm5,ymm5,ymm5,12
7689	add	r10,QWORD[((0+16))+rdi]
7690	adc	r11,QWORD[((8+16))+rdi]
7691	adc	r12,1
7692	mov	rax,QWORD[((0+160+0))+rbp]
7693	mov	r15,rax
7694	mul	r10
7695	mov	r13,rax
7696	mov	r14,rdx
7697	mov	rax,QWORD[((0+160+0))+rbp]
7698	mul	r11
7699	imul	r15,r12
7700	add	r14,rax
7701	adc	r15,rdx
7702	mov	rax,QWORD[((8+160+0))+rbp]
7703	mov	r9,rax
7704	mul	r10
7705	add	r14,rax
7706	adc	rdx,0
7707	mov	r10,rdx
7708	mov	rax,QWORD[((8+160+0))+rbp]
7709	mul	r11
7710	add	r15,rax
7711	adc	rdx,0
7712	imul	r9,r12
7713	add	r15,r10
7714	adc	r9,rdx
7715	mov	r10,r13
7716	mov	r11,r14
7717	mov	r12,r15
7718	and	r12,3
7719	mov	r13,r15
7720	and	r13,-4
7721	mov	r14,r9
7722	shrd	r15,r9,2
7723	shr	r9,2
7724	add	r15,r13
7725	adc	r9,r14
7726	add	r10,r15
7727	adc	r11,r9
7728	adc	r12,0
7729
7730	lea	rdi,[32+rdi]
7731	dec	rcx
7732	jg	NEAR $L$seal_avx2_tail_256_rounds_and_3xhash
7733	dec	r8
7734	jge	NEAR $L$seal_avx2_tail_256_rounds_and_2xhash
7735	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
7736	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
7737	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
7738	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
7739	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
7740	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
7741	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
7742	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7743	vperm2i128	ymm3,ymm5,ymm1,0x02
7744	vperm2i128	ymm5,ymm5,ymm1,0x13
7745	vperm2i128	ymm1,ymm13,ymm9,0x02
7746	vperm2i128	ymm9,ymm13,ymm9,0x13
7747	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
7748	vpxor	ymm1,ymm1,YMMWORD[((32+0))+rsi]
7749	vpxor	ymm5,ymm5,YMMWORD[((64+0))+rsi]
7750	vpxor	ymm9,ymm9,YMMWORD[((96+0))+rsi]
7751	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
7752	vmovdqu	YMMWORD[(32+0)+rdi],ymm1
7753	vmovdqu	YMMWORD[(64+0)+rdi],ymm5
7754	vmovdqu	YMMWORD[(96+0)+rdi],ymm9
7755	vperm2i128	ymm3,ymm4,ymm0,0x13
7756	vperm2i128	ymm0,ymm4,ymm0,0x02
7757	vperm2i128	ymm4,ymm12,ymm8,0x02
7758	vperm2i128	ymm12,ymm12,ymm8,0x13
7759	vmovdqa	ymm8,ymm3
7760
7761	mov	rcx,4*32
7762	lea	rsi,[128+rsi]
7763	sub	rbx,4*32
7764	jmp	NEAR $L$seal_avx2_short_hash_remainder
7765
7766$L$seal_avx2_tail_384:
7767	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
7768	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
7769	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
7770	vmovdqa	ymm1,ymm0
7771	vmovdqa	ymm5,ymm4
7772	vmovdqa	ymm9,ymm8
7773	vmovdqa	ymm2,ymm0
7774	vmovdqa	ymm6,ymm4
7775	vmovdqa	ymm10,ymm8
7776	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
7777	vpaddd	ymm14,ymm12,YMMWORD[((160+160))+rbp]
7778	vpaddd	ymm13,ymm12,ymm14
7779	vpaddd	ymm12,ymm12,ymm13
7780	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
7781	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
7782	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
7783
7784$L$seal_avx2_tail_384_rounds_and_3xhash:
7785	add	r10,QWORD[((0+0))+rdi]
7786	adc	r11,QWORD[((8+0))+rdi]
7787	adc	r12,1
7788	mov	rax,QWORD[((0+160+0))+rbp]
7789	mov	r15,rax
7790	mul	r10
7791	mov	r13,rax
7792	mov	r14,rdx
7793	mov	rax,QWORD[((0+160+0))+rbp]
7794	mul	r11
7795	imul	r15,r12
7796	add	r14,rax
7797	adc	r15,rdx
7798	mov	rax,QWORD[((8+160+0))+rbp]
7799	mov	r9,rax
7800	mul	r10
7801	add	r14,rax
7802	adc	rdx,0
7803	mov	r10,rdx
7804	mov	rax,QWORD[((8+160+0))+rbp]
7805	mul	r11
7806	add	r15,rax
7807	adc	rdx,0
7808	imul	r9,r12
7809	add	r15,r10
7810	adc	r9,rdx
7811	mov	r10,r13
7812	mov	r11,r14
7813	mov	r12,r15
7814	and	r12,3
7815	mov	r13,r15
7816	and	r13,-4
7817	mov	r14,r9
7818	shrd	r15,r9,2
7819	shr	r9,2
7820	add	r15,r13
7821	adc	r9,r14
7822	add	r10,r15
7823	adc	r11,r9
7824	adc	r12,0
7825
7826	lea	rdi,[16+rdi]
7827$L$seal_avx2_tail_384_rounds_and_2xhash:
7828	vpaddd	ymm0,ymm0,ymm4
7829	vpxor	ymm12,ymm12,ymm0
7830	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7831	vpaddd	ymm8,ymm8,ymm12
7832	vpxor	ymm4,ymm4,ymm8
7833	vpsrld	ymm3,ymm4,20
7834	vpslld	ymm4,ymm4,12
7835	vpxor	ymm4,ymm4,ymm3
7836	vpaddd	ymm0,ymm0,ymm4
7837	vpxor	ymm12,ymm12,ymm0
7838	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7839	vpaddd	ymm8,ymm8,ymm12
7840	vpxor	ymm4,ymm4,ymm8
7841	vpslld	ymm3,ymm4,7
7842	vpsrld	ymm4,ymm4,25
7843	vpxor	ymm4,ymm4,ymm3
7844	vpalignr	ymm12,ymm12,ymm12,12
7845	vpalignr	ymm8,ymm8,ymm8,8
7846	vpalignr	ymm4,ymm4,ymm4,4
7847	vpaddd	ymm1,ymm1,ymm5
7848	vpxor	ymm13,ymm13,ymm1
7849	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7850	vpaddd	ymm9,ymm9,ymm13
7851	vpxor	ymm5,ymm5,ymm9
7852	vpsrld	ymm3,ymm5,20
7853	vpslld	ymm5,ymm5,12
7854	vpxor	ymm5,ymm5,ymm3
7855	vpaddd	ymm1,ymm1,ymm5
7856	vpxor	ymm13,ymm13,ymm1
7857	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7858	vpaddd	ymm9,ymm9,ymm13
7859	vpxor	ymm5,ymm5,ymm9
7860	vpslld	ymm3,ymm5,7
7861	vpsrld	ymm5,ymm5,25
7862	vpxor	ymm5,ymm5,ymm3
7863	vpalignr	ymm13,ymm13,ymm13,12
7864	vpalignr	ymm9,ymm9,ymm9,8
7865	vpalignr	ymm5,ymm5,ymm5,4
7866	add	r10,QWORD[((0+0))+rdi]
7867	adc	r11,QWORD[((8+0))+rdi]
7868	adc	r12,1
7869	mov	rax,QWORD[((0+160+0))+rbp]
7870	mov	r15,rax
7871	mul	r10
7872	mov	r13,rax
7873	mov	r14,rdx
7874	mov	rax,QWORD[((0+160+0))+rbp]
7875	mul	r11
7876	imul	r15,r12
7877	add	r14,rax
7878	adc	r15,rdx
7879	mov	rax,QWORD[((8+160+0))+rbp]
7880	mov	r9,rax
7881	mul	r10
7882	add	r14,rax
7883	adc	rdx,0
7884	mov	r10,rdx
7885	mov	rax,QWORD[((8+160+0))+rbp]
7886	mul	r11
7887	add	r15,rax
7888	adc	rdx,0
7889	imul	r9,r12
7890	add	r15,r10
7891	adc	r9,rdx
7892	mov	r10,r13
7893	mov	r11,r14
7894	mov	r12,r15
7895	and	r12,3
7896	mov	r13,r15
7897	and	r13,-4
7898	mov	r14,r9
7899	shrd	r15,r9,2
7900	shr	r9,2
7901	add	r15,r13
7902	adc	r9,r14
7903	add	r10,r15
7904	adc	r11,r9
7905	adc	r12,0
7906	vpaddd	ymm2,ymm2,ymm6
7907	vpxor	ymm14,ymm14,ymm2
7908	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
7909	vpaddd	ymm10,ymm10,ymm14
7910	vpxor	ymm6,ymm6,ymm10
7911	vpsrld	ymm3,ymm6,20
7912	vpslld	ymm6,ymm6,12
7913	vpxor	ymm6,ymm6,ymm3
7914	vpaddd	ymm2,ymm2,ymm6
7915	vpxor	ymm14,ymm14,ymm2
7916	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
7917	vpaddd	ymm10,ymm10,ymm14
7918	vpxor	ymm6,ymm6,ymm10
7919	vpslld	ymm3,ymm6,7
7920	vpsrld	ymm6,ymm6,25
7921	vpxor	ymm6,ymm6,ymm3
7922	vpalignr	ymm14,ymm14,ymm14,12
7923	vpalignr	ymm10,ymm10,ymm10,8
7924	vpalignr	ymm6,ymm6,ymm6,4
7925	vpaddd	ymm0,ymm0,ymm4
7926	vpxor	ymm12,ymm12,ymm0
7927	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7928	vpaddd	ymm8,ymm8,ymm12
7929	vpxor	ymm4,ymm4,ymm8
7930	vpsrld	ymm3,ymm4,20
7931	vpslld	ymm4,ymm4,12
7932	vpxor	ymm4,ymm4,ymm3
7933	vpaddd	ymm0,ymm0,ymm4
7934	vpxor	ymm12,ymm12,ymm0
7935	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7936	vpaddd	ymm8,ymm8,ymm12
7937	vpxor	ymm4,ymm4,ymm8
7938	vpslld	ymm3,ymm4,7
7939	vpsrld	ymm4,ymm4,25
7940	vpxor	ymm4,ymm4,ymm3
7941	vpalignr	ymm12,ymm12,ymm12,4
7942	vpalignr	ymm8,ymm8,ymm8,8
7943	vpalignr	ymm4,ymm4,ymm4,12
7944	add	r10,QWORD[((0+16))+rdi]
7945	adc	r11,QWORD[((8+16))+rdi]
7946	adc	r12,1
7947	mov	rax,QWORD[((0+160+0))+rbp]
7948	mov	r15,rax
7949	mul	r10
7950	mov	r13,rax
7951	mov	r14,rdx
7952	mov	rax,QWORD[((0+160+0))+rbp]
7953	mul	r11
7954	imul	r15,r12
7955	add	r14,rax
7956	adc	r15,rdx
7957	mov	rax,QWORD[((8+160+0))+rbp]
7958	mov	r9,rax
7959	mul	r10
7960	add	r14,rax
7961	adc	rdx,0
7962	mov	r10,rdx
7963	mov	rax,QWORD[((8+160+0))+rbp]
7964	mul	r11
7965	add	r15,rax
7966	adc	rdx,0
7967	imul	r9,r12
7968	add	r15,r10
7969	adc	r9,rdx
7970	mov	r10,r13
7971	mov	r11,r14
7972	mov	r12,r15
7973	and	r12,3
7974	mov	r13,r15
7975	and	r13,-4
7976	mov	r14,r9
7977	shrd	r15,r9,2
7978	shr	r9,2
7979	add	r15,r13
7980	adc	r9,r14
7981	add	r10,r15
7982	adc	r11,r9
7983	adc	r12,0
7984	vpaddd	ymm1,ymm1,ymm5
7985	vpxor	ymm13,ymm13,ymm1
7986	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7987	vpaddd	ymm9,ymm9,ymm13
7988	vpxor	ymm5,ymm5,ymm9
7989	vpsrld	ymm3,ymm5,20
7990	vpslld	ymm5,ymm5,12
7991	vpxor	ymm5,ymm5,ymm3
7992	vpaddd	ymm1,ymm1,ymm5
7993	vpxor	ymm13,ymm13,ymm1
7994	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7995	vpaddd	ymm9,ymm9,ymm13
7996	vpxor	ymm5,ymm5,ymm9
7997	vpslld	ymm3,ymm5,7
7998	vpsrld	ymm5,ymm5,25
7999	vpxor	ymm5,ymm5,ymm3
8000	vpalignr	ymm13,ymm13,ymm13,4
8001	vpalignr	ymm9,ymm9,ymm9,8
8002	vpalignr	ymm5,ymm5,ymm5,12
8003	vpaddd	ymm2,ymm2,ymm6
8004	vpxor	ymm14,ymm14,ymm2
8005	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
8006	vpaddd	ymm10,ymm10,ymm14
8007	vpxor	ymm6,ymm6,ymm10
8008	vpsrld	ymm3,ymm6,20
8009	vpslld	ymm6,ymm6,12
8010	vpxor	ymm6,ymm6,ymm3
8011	vpaddd	ymm2,ymm2,ymm6
8012	vpxor	ymm14,ymm14,ymm2
8013	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
8014	vpaddd	ymm10,ymm10,ymm14
8015	vpxor	ymm6,ymm6,ymm10
8016	vpslld	ymm3,ymm6,7
8017	vpsrld	ymm6,ymm6,25
8018	vpxor	ymm6,ymm6,ymm3
8019	vpalignr	ymm14,ymm14,ymm14,4
8020	vpalignr	ymm10,ymm10,ymm10,8
8021	vpalignr	ymm6,ymm6,ymm6,12
8022
8023	lea	rdi,[32+rdi]
8024	dec	rcx
8025	jg	NEAR $L$seal_avx2_tail_384_rounds_and_3xhash
8026	dec	r8
8027	jge	NEAR $L$seal_avx2_tail_384_rounds_and_2xhash
8028	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
8029	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
8030	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
8031	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
8032	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
8033	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
8034	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
8035	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
8036	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
8037	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
8038	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
8039	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
8040	vperm2i128	ymm3,ymm6,ymm2,0x02
8041	vperm2i128	ymm6,ymm6,ymm2,0x13
8042	vperm2i128	ymm2,ymm14,ymm10,0x02
8043	vperm2i128	ymm10,ymm14,ymm10,0x13
8044	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
8045	vpxor	ymm2,ymm2,YMMWORD[((32+0))+rsi]
8046	vpxor	ymm6,ymm6,YMMWORD[((64+0))+rsi]
8047	vpxor	ymm10,ymm10,YMMWORD[((96+0))+rsi]
8048	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
8049	vmovdqu	YMMWORD[(32+0)+rdi],ymm2
8050	vmovdqu	YMMWORD[(64+0)+rdi],ymm6
8051	vmovdqu	YMMWORD[(96+0)+rdi],ymm10
8052	vperm2i128	ymm3,ymm5,ymm1,0x02
8053	vperm2i128	ymm5,ymm5,ymm1,0x13
8054	vperm2i128	ymm1,ymm13,ymm9,0x02
8055	vperm2i128	ymm9,ymm13,ymm9,0x13
8056	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
8057	vpxor	ymm1,ymm1,YMMWORD[((32+128))+rsi]
8058	vpxor	ymm5,ymm5,YMMWORD[((64+128))+rsi]
8059	vpxor	ymm9,ymm9,YMMWORD[((96+128))+rsi]
8060	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
8061	vmovdqu	YMMWORD[(32+128)+rdi],ymm1
8062	vmovdqu	YMMWORD[(64+128)+rdi],ymm5
8063	vmovdqu	YMMWORD[(96+128)+rdi],ymm9
8064	vperm2i128	ymm3,ymm4,ymm0,0x13
8065	vperm2i128	ymm0,ymm4,ymm0,0x02
8066	vperm2i128	ymm4,ymm12,ymm8,0x02
8067	vperm2i128	ymm12,ymm12,ymm8,0x13
8068	vmovdqa	ymm8,ymm3
8069
8070	mov	rcx,8*32
8071	lea	rsi,[256+rsi]
8072	sub	rbx,8*32
8073	jmp	NEAR $L$seal_avx2_short_hash_remainder
8074
8075$L$seal_avx2_tail_512:
8076	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
8077	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
8078	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
8079	vmovdqa	ymm1,ymm0
8080	vmovdqa	ymm5,ymm4
8081	vmovdqa	ymm9,ymm8
8082	vmovdqa	ymm2,ymm0
8083	vmovdqa	ymm6,ymm4
8084	vmovdqa	ymm10,ymm8
8085	vmovdqa	ymm3,ymm0
8086	vmovdqa	ymm7,ymm4
8087	vmovdqa	ymm11,ymm8
8088	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
8089	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
8090	vpaddd	ymm14,ymm12,ymm15
8091	vpaddd	ymm13,ymm12,ymm14
8092	vpaddd	ymm12,ymm12,ymm13
8093	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
8094	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
8095	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
8096	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
8097
8098$L$seal_avx2_tail_512_rounds_and_3xhash:
8099	add	r10,QWORD[((0+0))+rdi]
8100	adc	r11,QWORD[((8+0))+rdi]
8101	adc	r12,1
8102	mov	rdx,QWORD[((0+160+0))+rbp]
8103	mov	r15,rdx
8104	mulx	r14,r13,r10
8105	mulx	rdx,rax,r11
8106	imul	r15,r12
8107	add	r14,rax
8108	adc	r15,rdx
8109	mov	rdx,QWORD[((8+160+0))+rbp]
8110	mulx	rax,r10,r10
8111	add	r14,r10
8112	mulx	r9,r11,r11
8113	adc	r15,r11
8114	adc	r9,0
8115	imul	rdx,r12
8116	add	r15,rax
8117	adc	r9,rdx
8118	mov	r10,r13
8119	mov	r11,r14
8120	mov	r12,r15
8121	and	r12,3
8122	mov	r13,r15
8123	and	r13,-4
8124	mov	r14,r9
8125	shrd	r15,r9,2
8126	shr	r9,2
8127	add	r15,r13
8128	adc	r9,r14
8129	add	r10,r15
8130	adc	r11,r9
8131	adc	r12,0
8132
8133	lea	rdi,[16+rdi]
8134$L$seal_avx2_tail_512_rounds_and_2xhash:
8135	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8136	vmovdqa	ymm8,YMMWORD[$L$rol16]
8137	vpaddd	ymm3,ymm3,ymm7
8138	vpaddd	ymm2,ymm2,ymm6
8139	vpaddd	ymm1,ymm1,ymm5
8140	vpaddd	ymm0,ymm0,ymm4
8141	vpxor	ymm15,ymm15,ymm3
8142	vpxor	ymm14,ymm14,ymm2
8143	vpxor	ymm13,ymm13,ymm1
8144	vpxor	ymm12,ymm12,ymm0
8145	vpshufb	ymm15,ymm15,ymm8
8146	vpshufb	ymm14,ymm14,ymm8
8147	vpshufb	ymm13,ymm13,ymm8
8148	vpshufb	ymm12,ymm12,ymm8
8149	vpaddd	ymm11,ymm11,ymm15
8150	vpaddd	ymm10,ymm10,ymm14
8151	vpaddd	ymm9,ymm9,ymm13
8152	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8153	vpxor	ymm7,ymm7,ymm11
8154	vpxor	ymm6,ymm6,ymm10
8155	add	r10,QWORD[((0+0))+rdi]
8156	adc	r11,QWORD[((8+0))+rdi]
8157	adc	r12,1
8158	vpxor	ymm5,ymm5,ymm9
8159	vpxor	ymm4,ymm4,ymm8
8160	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8161	vpsrld	ymm8,ymm7,20
8162	vpslld	ymm7,ymm7,32-20
8163	vpxor	ymm7,ymm7,ymm8
8164	vpsrld	ymm8,ymm6,20
8165	vpslld	ymm6,ymm6,32-20
8166	vpxor	ymm6,ymm6,ymm8
8167	vpsrld	ymm8,ymm5,20
8168	vpslld	ymm5,ymm5,32-20
8169	vpxor	ymm5,ymm5,ymm8
8170	vpsrld	ymm8,ymm4,20
8171	vpslld	ymm4,ymm4,32-20
8172	vpxor	ymm4,ymm4,ymm8
8173	vmovdqa	ymm8,YMMWORD[$L$rol8]
8174	vpaddd	ymm3,ymm3,ymm7
8175	vpaddd	ymm2,ymm2,ymm6
8176	vpaddd	ymm1,ymm1,ymm5
8177	vpaddd	ymm0,ymm0,ymm4
8178	mov	rdx,QWORD[((0+160+0))+rbp]
8179	mov	r15,rdx
8180	mulx	r14,r13,r10
8181	mulx	rdx,rax,r11
8182	imul	r15,r12
8183	add	r14,rax
8184	adc	r15,rdx
8185	vpxor	ymm15,ymm15,ymm3
8186	vpxor	ymm14,ymm14,ymm2
8187	vpxor	ymm13,ymm13,ymm1
8188	vpxor	ymm12,ymm12,ymm0
8189	vpshufb	ymm15,ymm15,ymm8
8190	vpshufb	ymm14,ymm14,ymm8
8191	vpshufb	ymm13,ymm13,ymm8
8192	vpshufb	ymm12,ymm12,ymm8
8193	vpaddd	ymm11,ymm11,ymm15
8194	vpaddd	ymm10,ymm10,ymm14
8195	vpaddd	ymm9,ymm9,ymm13
8196	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8197	vpxor	ymm7,ymm7,ymm11
8198	vpxor	ymm6,ymm6,ymm10
8199	vpxor	ymm5,ymm5,ymm9
8200	vpxor	ymm4,ymm4,ymm8
8201	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8202	vpsrld	ymm8,ymm7,25
8203	vpslld	ymm7,ymm7,32-25
8204	vpxor	ymm7,ymm7,ymm8
8205	mov	rdx,QWORD[((8+160+0))+rbp]
8206	mulx	rax,r10,r10
8207	add	r14,r10
8208	mulx	r9,r11,r11
8209	adc	r15,r11
8210	adc	r9,0
8211	imul	rdx,r12
8212	vpsrld	ymm8,ymm6,25
8213	vpslld	ymm6,ymm6,32-25
8214	vpxor	ymm6,ymm6,ymm8
8215	vpsrld	ymm8,ymm5,25
8216	vpslld	ymm5,ymm5,32-25
8217	vpxor	ymm5,ymm5,ymm8
8218	vpsrld	ymm8,ymm4,25
8219	vpslld	ymm4,ymm4,32-25
8220	vpxor	ymm4,ymm4,ymm8
8221	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
8222	vpalignr	ymm7,ymm7,ymm7,4
8223	vpalignr	ymm11,ymm11,ymm11,8
8224	vpalignr	ymm15,ymm15,ymm15,12
8225	vpalignr	ymm6,ymm6,ymm6,4
8226	vpalignr	ymm10,ymm10,ymm10,8
8227	vpalignr	ymm14,ymm14,ymm14,12
8228	vpalignr	ymm5,ymm5,ymm5,4
8229	vpalignr	ymm9,ymm9,ymm9,8
8230	vpalignr	ymm13,ymm13,ymm13,12
8231	vpalignr	ymm4,ymm4,ymm4,4
8232	add	r15,rax
8233	adc	r9,rdx
8234	vpalignr	ymm8,ymm8,ymm8,8
8235	vpalignr	ymm12,ymm12,ymm12,12
8236	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8237	vmovdqa	ymm8,YMMWORD[$L$rol16]
8238	vpaddd	ymm3,ymm3,ymm7
8239	vpaddd	ymm2,ymm2,ymm6
8240	vpaddd	ymm1,ymm1,ymm5
8241	vpaddd	ymm0,ymm0,ymm4
8242	vpxor	ymm15,ymm15,ymm3
8243	vpxor	ymm14,ymm14,ymm2
8244	vpxor	ymm13,ymm13,ymm1
8245	vpxor	ymm12,ymm12,ymm0
8246	vpshufb	ymm15,ymm15,ymm8
8247	vpshufb	ymm14,ymm14,ymm8
8248	vpshufb	ymm13,ymm13,ymm8
8249	vpshufb	ymm12,ymm12,ymm8
8250	vpaddd	ymm11,ymm11,ymm15
8251	vpaddd	ymm10,ymm10,ymm14
8252	vpaddd	ymm9,ymm9,ymm13
8253	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8254	mov	r10,r13
8255	mov	r11,r14
8256	mov	r12,r15
8257	and	r12,3
8258	mov	r13,r15
8259	and	r13,-4
8260	mov	r14,r9
8261	shrd	r15,r9,2
8262	shr	r9,2
8263	add	r15,r13
8264	adc	r9,r14
8265	add	r10,r15
8266	adc	r11,r9
8267	adc	r12,0
8268	vpxor	ymm7,ymm7,ymm11
8269	vpxor	ymm6,ymm6,ymm10
8270	vpxor	ymm5,ymm5,ymm9
8271	vpxor	ymm4,ymm4,ymm8
8272	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8273	vpsrld	ymm8,ymm7,20
8274	vpslld	ymm7,ymm7,32-20
8275	vpxor	ymm7,ymm7,ymm8
8276	vpsrld	ymm8,ymm6,20
8277	vpslld	ymm6,ymm6,32-20
8278	vpxor	ymm6,ymm6,ymm8
8279	vpsrld	ymm8,ymm5,20
8280	vpslld	ymm5,ymm5,32-20
8281	vpxor	ymm5,ymm5,ymm8
8282	vpsrld	ymm8,ymm4,20
8283	vpslld	ymm4,ymm4,32-20
8284	vpxor	ymm4,ymm4,ymm8
8285	vmovdqa	ymm8,YMMWORD[$L$rol8]
8286	vpaddd	ymm3,ymm3,ymm7
8287	vpaddd	ymm2,ymm2,ymm6
8288	add	r10,QWORD[((0+16))+rdi]
8289	adc	r11,QWORD[((8+16))+rdi]
8290	adc	r12,1
8291	vpaddd	ymm1,ymm1,ymm5
8292	vpaddd	ymm0,ymm0,ymm4
8293	vpxor	ymm15,ymm15,ymm3
8294	vpxor	ymm14,ymm14,ymm2
8295	vpxor	ymm13,ymm13,ymm1
8296	vpxor	ymm12,ymm12,ymm0
8297	vpshufb	ymm15,ymm15,ymm8
8298	vpshufb	ymm14,ymm14,ymm8
8299	vpshufb	ymm13,ymm13,ymm8
8300	vpshufb	ymm12,ymm12,ymm8
8301	vpaddd	ymm11,ymm11,ymm15
8302	vpaddd	ymm10,ymm10,ymm14
8303	vpaddd	ymm9,ymm9,ymm13
8304	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8305	vpxor	ymm7,ymm7,ymm11
8306	vpxor	ymm6,ymm6,ymm10
8307	vpxor	ymm5,ymm5,ymm9
8308	vpxor	ymm4,ymm4,ymm8
8309	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8310	vpsrld	ymm8,ymm7,25
8311	mov	rdx,QWORD[((0+160+0))+rbp]
8312	mov	r15,rdx
8313	mulx	r14,r13,r10
8314	mulx	rdx,rax,r11
8315	imul	r15,r12
8316	add	r14,rax
8317	adc	r15,rdx
8318	vpslld	ymm7,ymm7,32-25
8319	vpxor	ymm7,ymm7,ymm8
8320	vpsrld	ymm8,ymm6,25
8321	vpslld	ymm6,ymm6,32-25
8322	vpxor	ymm6,ymm6,ymm8
8323	vpsrld	ymm8,ymm5,25
8324	vpslld	ymm5,ymm5,32-25
8325	vpxor	ymm5,ymm5,ymm8
8326	vpsrld	ymm8,ymm4,25
8327	vpslld	ymm4,ymm4,32-25
8328	vpxor	ymm4,ymm4,ymm8
8329	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
8330	vpalignr	ymm7,ymm7,ymm7,12
8331	vpalignr	ymm11,ymm11,ymm11,8
8332	vpalignr	ymm15,ymm15,ymm15,4
8333	vpalignr	ymm6,ymm6,ymm6,12
8334	vpalignr	ymm10,ymm10,ymm10,8
8335	vpalignr	ymm14,ymm14,ymm14,4
8336	vpalignr	ymm5,ymm5,ymm5,12
8337	vpalignr	ymm9,ymm9,ymm9,8
8338	mov	rdx,QWORD[((8+160+0))+rbp]
8339	mulx	rax,r10,r10
8340	add	r14,r10
8341	mulx	r9,r11,r11
8342	adc	r15,r11
8343	adc	r9,0
8344	imul	rdx,r12
8345	vpalignr	ymm13,ymm13,ymm13,4
8346	vpalignr	ymm4,ymm4,ymm4,12
8347	vpalignr	ymm8,ymm8,ymm8,8
8348	vpalignr	ymm12,ymm12,ymm12,4
8349
8350
8351
8352
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365	add	r15,rax
8366	adc	r9,rdx
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387	mov	r10,r13
8388	mov	r11,r14
8389	mov	r12,r15
8390	and	r12,3
8391	mov	r13,r15
8392	and	r13,-4
8393	mov	r14,r9
8394	shrd	r15,r9,2
8395	shr	r9,2
8396	add	r15,r13
8397	adc	r9,r14
8398	add	r10,r15
8399	adc	r11,r9
8400	adc	r12,0
8401
8402	lea	rdi,[32+rdi]
8403	dec	rcx
8404	jg	NEAR $L$seal_avx2_tail_512_rounds_and_3xhash
8405	dec	r8
8406	jge	NEAR $L$seal_avx2_tail_512_rounds_and_2xhash
8407	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
8408	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
8409	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
8410	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
8411	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
8412	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
8413	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
8414	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
8415	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
8416	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
8417	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
8418	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
8419	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
8420	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
8421	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
8422	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
8423
8424	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
8425	vperm2i128	ymm0,ymm7,ymm3,0x02
8426	vperm2i128	ymm7,ymm7,ymm3,0x13
8427	vperm2i128	ymm3,ymm15,ymm11,0x02
8428	vperm2i128	ymm11,ymm15,ymm11,0x13
8429	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
8430	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
8431	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
8432	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
8433	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
8434	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
8435	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
8436	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
8437
8438	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
8439	vperm2i128	ymm3,ymm6,ymm2,0x02
8440	vperm2i128	ymm6,ymm6,ymm2,0x13
8441	vperm2i128	ymm2,ymm14,ymm10,0x02
8442	vperm2i128	ymm10,ymm14,ymm10,0x13
8443	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
8444	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
8445	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
8446	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
8447	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
8448	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
8449	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
8450	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
8451	vperm2i128	ymm3,ymm5,ymm1,0x02
8452	vperm2i128	ymm5,ymm5,ymm1,0x13
8453	vperm2i128	ymm1,ymm13,ymm9,0x02
8454	vperm2i128	ymm9,ymm13,ymm9,0x13
8455	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
8456	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
8457	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
8458	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
8459	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
8460	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
8461	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
8462	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
8463	vperm2i128	ymm3,ymm4,ymm0,0x13
8464	vperm2i128	ymm0,ymm4,ymm0,0x02
8465	vperm2i128	ymm4,ymm12,ymm8,0x02
8466	vperm2i128	ymm12,ymm12,ymm8,0x13
8467	vmovdqa	ymm8,ymm3
8468
8469	mov	rcx,12*32
8470	lea	rsi,[384+rsi]
8471	sub	rbx,12*32
8472	jmp	NEAR $L$seal_avx2_short_hash_remainder
8473
8474$L$seal_avx2_320:
8475	vmovdqa	ymm1,ymm0
8476	vmovdqa	ymm2,ymm0
8477	vmovdqa	ymm5,ymm4
8478	vmovdqa	ymm6,ymm4
8479	vmovdqa	ymm9,ymm8
8480	vmovdqa	ymm10,ymm8
8481	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
8482	vpaddd	ymm14,ymm13,YMMWORD[$L$avx2_inc]
8483	vmovdqa	ymm7,ymm4
8484	vmovdqa	ymm11,ymm8
8485	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
8486	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
8487	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
8488	mov	r10,10
8489$L$seal_avx2_320_rounds:
8490	vpaddd	ymm0,ymm0,ymm4
8491	vpxor	ymm12,ymm12,ymm0
8492	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8493	vpaddd	ymm8,ymm8,ymm12
8494	vpxor	ymm4,ymm4,ymm8
8495	vpsrld	ymm3,ymm4,20
8496	vpslld	ymm4,ymm4,12
8497	vpxor	ymm4,ymm4,ymm3
8498	vpaddd	ymm0,ymm0,ymm4
8499	vpxor	ymm12,ymm12,ymm0
8500	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8501	vpaddd	ymm8,ymm8,ymm12
8502	vpxor	ymm4,ymm4,ymm8
8503	vpslld	ymm3,ymm4,7
8504	vpsrld	ymm4,ymm4,25
8505	vpxor	ymm4,ymm4,ymm3
8506	vpalignr	ymm12,ymm12,ymm12,12
8507	vpalignr	ymm8,ymm8,ymm8,8
8508	vpalignr	ymm4,ymm4,ymm4,4
8509	vpaddd	ymm1,ymm1,ymm5
8510	vpxor	ymm13,ymm13,ymm1
8511	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8512	vpaddd	ymm9,ymm9,ymm13
8513	vpxor	ymm5,ymm5,ymm9
8514	vpsrld	ymm3,ymm5,20
8515	vpslld	ymm5,ymm5,12
8516	vpxor	ymm5,ymm5,ymm3
8517	vpaddd	ymm1,ymm1,ymm5
8518	vpxor	ymm13,ymm13,ymm1
8519	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8520	vpaddd	ymm9,ymm9,ymm13
8521	vpxor	ymm5,ymm5,ymm9
8522	vpslld	ymm3,ymm5,7
8523	vpsrld	ymm5,ymm5,25
8524	vpxor	ymm5,ymm5,ymm3
8525	vpalignr	ymm13,ymm13,ymm13,12
8526	vpalignr	ymm9,ymm9,ymm9,8
8527	vpalignr	ymm5,ymm5,ymm5,4
8528	vpaddd	ymm2,ymm2,ymm6
8529	vpxor	ymm14,ymm14,ymm2
8530	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
8531	vpaddd	ymm10,ymm10,ymm14
8532	vpxor	ymm6,ymm6,ymm10
8533	vpsrld	ymm3,ymm6,20
8534	vpslld	ymm6,ymm6,12
8535	vpxor	ymm6,ymm6,ymm3
8536	vpaddd	ymm2,ymm2,ymm6
8537	vpxor	ymm14,ymm14,ymm2
8538	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
8539	vpaddd	ymm10,ymm10,ymm14
8540	vpxor	ymm6,ymm6,ymm10
8541	vpslld	ymm3,ymm6,7
8542	vpsrld	ymm6,ymm6,25
8543	vpxor	ymm6,ymm6,ymm3
8544	vpalignr	ymm14,ymm14,ymm14,12
8545	vpalignr	ymm10,ymm10,ymm10,8
8546	vpalignr	ymm6,ymm6,ymm6,4
8547	vpaddd	ymm0,ymm0,ymm4
8548	vpxor	ymm12,ymm12,ymm0
8549	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8550	vpaddd	ymm8,ymm8,ymm12
8551	vpxor	ymm4,ymm4,ymm8
8552	vpsrld	ymm3,ymm4,20
8553	vpslld	ymm4,ymm4,12
8554	vpxor	ymm4,ymm4,ymm3
8555	vpaddd	ymm0,ymm0,ymm4
8556	vpxor	ymm12,ymm12,ymm0
8557	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8558	vpaddd	ymm8,ymm8,ymm12
8559	vpxor	ymm4,ymm4,ymm8
8560	vpslld	ymm3,ymm4,7
8561	vpsrld	ymm4,ymm4,25
8562	vpxor	ymm4,ymm4,ymm3
8563	vpalignr	ymm12,ymm12,ymm12,4
8564	vpalignr	ymm8,ymm8,ymm8,8
8565	vpalignr	ymm4,ymm4,ymm4,12
8566	vpaddd	ymm1,ymm1,ymm5
8567	vpxor	ymm13,ymm13,ymm1
8568	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8569	vpaddd	ymm9,ymm9,ymm13
8570	vpxor	ymm5,ymm5,ymm9
8571	vpsrld	ymm3,ymm5,20
8572	vpslld	ymm5,ymm5,12
8573	vpxor	ymm5,ymm5,ymm3
8574	vpaddd	ymm1,ymm1,ymm5
8575	vpxor	ymm13,ymm13,ymm1
8576	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8577	vpaddd	ymm9,ymm9,ymm13
8578	vpxor	ymm5,ymm5,ymm9
8579	vpslld	ymm3,ymm5,7
8580	vpsrld	ymm5,ymm5,25
8581	vpxor	ymm5,ymm5,ymm3
8582	vpalignr	ymm13,ymm13,ymm13,4
8583	vpalignr	ymm9,ymm9,ymm9,8
8584	vpalignr	ymm5,ymm5,ymm5,12
8585	vpaddd	ymm2,ymm2,ymm6
8586	vpxor	ymm14,ymm14,ymm2
8587	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
8588	vpaddd	ymm10,ymm10,ymm14
8589	vpxor	ymm6,ymm6,ymm10
8590	vpsrld	ymm3,ymm6,20
8591	vpslld	ymm6,ymm6,12
8592	vpxor	ymm6,ymm6,ymm3
8593	vpaddd	ymm2,ymm2,ymm6
8594	vpxor	ymm14,ymm14,ymm2
8595	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
8596	vpaddd	ymm10,ymm10,ymm14
8597	vpxor	ymm6,ymm6,ymm10
8598	vpslld	ymm3,ymm6,7
8599	vpsrld	ymm6,ymm6,25
8600	vpxor	ymm6,ymm6,ymm3
8601	vpalignr	ymm14,ymm14,ymm14,4
8602	vpalignr	ymm10,ymm10,ymm10,8
8603	vpalignr	ymm6,ymm6,ymm6,12
8604
8605	dec	r10
8606	jne	NEAR $L$seal_avx2_320_rounds
8607	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
8608	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
8609	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
8610	vpaddd	ymm4,ymm4,ymm7
8611	vpaddd	ymm5,ymm5,ymm7
8612	vpaddd	ymm6,ymm6,ymm7
8613	vpaddd	ymm8,ymm8,ymm11
8614	vpaddd	ymm9,ymm9,ymm11
8615	vpaddd	ymm10,ymm10,ymm11
8616	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
8617	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
8618	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
8619	vperm2i128	ymm3,ymm4,ymm0,0x02
8620
8621	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
8622	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
8623
8624	vperm2i128	ymm0,ymm4,ymm0,0x13
8625	vperm2i128	ymm4,ymm12,ymm8,0x13
8626	vperm2i128	ymm8,ymm5,ymm1,0x02
8627	vperm2i128	ymm12,ymm13,ymm9,0x02
8628	vperm2i128	ymm1,ymm5,ymm1,0x13
8629	vperm2i128	ymm5,ymm13,ymm9,0x13
8630	vperm2i128	ymm9,ymm6,ymm2,0x02
8631	vperm2i128	ymm13,ymm14,ymm10,0x02
8632	vperm2i128	ymm2,ymm6,ymm2,0x13
8633	vperm2i128	ymm6,ymm14,ymm10,0x13
8634	jmp	NEAR $L$seal_avx2_short
8635
8636$L$seal_avx2_192:
8637	vmovdqa	ymm1,ymm0
8638	vmovdqa	ymm2,ymm0
8639	vmovdqa	ymm5,ymm4
8640	vmovdqa	ymm6,ymm4
8641	vmovdqa	ymm9,ymm8
8642	vmovdqa	ymm10,ymm8
8643	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
8644	vmovdqa	ymm11,ymm12
8645	vmovdqa	ymm15,ymm13
8646	mov	r10,10
8647$L$seal_avx2_192_rounds:
8648	vpaddd	ymm0,ymm0,ymm4
8649	vpxor	ymm12,ymm12,ymm0
8650	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8651	vpaddd	ymm8,ymm8,ymm12
8652	vpxor	ymm4,ymm4,ymm8
8653	vpsrld	ymm3,ymm4,20
8654	vpslld	ymm4,ymm4,12
8655	vpxor	ymm4,ymm4,ymm3
8656	vpaddd	ymm0,ymm0,ymm4
8657	vpxor	ymm12,ymm12,ymm0
8658	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8659	vpaddd	ymm8,ymm8,ymm12
8660	vpxor	ymm4,ymm4,ymm8
8661	vpslld	ymm3,ymm4,7
8662	vpsrld	ymm4,ymm4,25
8663	vpxor	ymm4,ymm4,ymm3
8664	vpalignr	ymm12,ymm12,ymm12,12
8665	vpalignr	ymm8,ymm8,ymm8,8
8666	vpalignr	ymm4,ymm4,ymm4,4
8667	vpaddd	ymm1,ymm1,ymm5
8668	vpxor	ymm13,ymm13,ymm1
8669	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8670	vpaddd	ymm9,ymm9,ymm13
8671	vpxor	ymm5,ymm5,ymm9
8672	vpsrld	ymm3,ymm5,20
8673	vpslld	ymm5,ymm5,12
8674	vpxor	ymm5,ymm5,ymm3
8675	vpaddd	ymm1,ymm1,ymm5
8676	vpxor	ymm13,ymm13,ymm1
8677	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8678	vpaddd	ymm9,ymm9,ymm13
8679	vpxor	ymm5,ymm5,ymm9
8680	vpslld	ymm3,ymm5,7
8681	vpsrld	ymm5,ymm5,25
8682	vpxor	ymm5,ymm5,ymm3
8683	vpalignr	ymm13,ymm13,ymm13,12
8684	vpalignr	ymm9,ymm9,ymm9,8
8685	vpalignr	ymm5,ymm5,ymm5,4
8686	vpaddd	ymm0,ymm0,ymm4
8687	vpxor	ymm12,ymm12,ymm0
8688	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8689	vpaddd	ymm8,ymm8,ymm12
8690	vpxor	ymm4,ymm4,ymm8
8691	vpsrld	ymm3,ymm4,20
8692	vpslld	ymm4,ymm4,12
8693	vpxor	ymm4,ymm4,ymm3
8694	vpaddd	ymm0,ymm0,ymm4
8695	vpxor	ymm12,ymm12,ymm0
8696	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8697	vpaddd	ymm8,ymm8,ymm12
8698	vpxor	ymm4,ymm4,ymm8
8699	vpslld	ymm3,ymm4,7
8700	vpsrld	ymm4,ymm4,25
8701	vpxor	ymm4,ymm4,ymm3
8702	vpalignr	ymm12,ymm12,ymm12,4
8703	vpalignr	ymm8,ymm8,ymm8,8
8704	vpalignr	ymm4,ymm4,ymm4,12
8705	vpaddd	ymm1,ymm1,ymm5
8706	vpxor	ymm13,ymm13,ymm1
8707	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8708	vpaddd	ymm9,ymm9,ymm13
8709	vpxor	ymm5,ymm5,ymm9
8710	vpsrld	ymm3,ymm5,20
8711	vpslld	ymm5,ymm5,12
8712	vpxor	ymm5,ymm5,ymm3
8713	vpaddd	ymm1,ymm1,ymm5
8714	vpxor	ymm13,ymm13,ymm1
8715	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8716	vpaddd	ymm9,ymm9,ymm13
8717	vpxor	ymm5,ymm5,ymm9
8718	vpslld	ymm3,ymm5,7
8719	vpsrld	ymm5,ymm5,25
8720	vpxor	ymm5,ymm5,ymm3
8721	vpalignr	ymm13,ymm13,ymm13,4
8722	vpalignr	ymm9,ymm9,ymm9,8
8723	vpalignr	ymm5,ymm5,ymm5,12
8724
8725	dec	r10
8726	jne	NEAR $L$seal_avx2_192_rounds
8727	vpaddd	ymm0,ymm0,ymm2
8728	vpaddd	ymm1,ymm1,ymm2
8729	vpaddd	ymm4,ymm4,ymm6
8730	vpaddd	ymm5,ymm5,ymm6
8731	vpaddd	ymm8,ymm8,ymm10
8732	vpaddd	ymm9,ymm9,ymm10
8733	vpaddd	ymm12,ymm12,ymm11
8734	vpaddd	ymm13,ymm13,ymm15
8735	vperm2i128	ymm3,ymm4,ymm0,0x02
8736
8737	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
8738	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
8739
8740	vperm2i128	ymm0,ymm4,ymm0,0x13
8741	vperm2i128	ymm4,ymm12,ymm8,0x13
8742	vperm2i128	ymm8,ymm5,ymm1,0x02
8743	vperm2i128	ymm12,ymm13,ymm9,0x02
8744	vperm2i128	ymm1,ymm5,ymm1,0x13
8745	vperm2i128	ymm5,ymm13,ymm9,0x13
8746$L$seal_avx2_short:
8747	mov	r8,r8
8748	call	poly_hash_ad_internal
8749	xor	rcx,rcx
8750$L$seal_avx2_short_hash_remainder:
8751	cmp	rcx,16
8752	jb	NEAR $L$seal_avx2_short_loop
8753	add	r10,QWORD[((0+0))+rdi]
8754	adc	r11,QWORD[((8+0))+rdi]
8755	adc	r12,1
8756	mov	rax,QWORD[((0+160+0))+rbp]
8757	mov	r15,rax
8758	mul	r10
8759	mov	r13,rax
8760	mov	r14,rdx
8761	mov	rax,QWORD[((0+160+0))+rbp]
8762	mul	r11
8763	imul	r15,r12
8764	add	r14,rax
8765	adc	r15,rdx
8766	mov	rax,QWORD[((8+160+0))+rbp]
8767	mov	r9,rax
8768	mul	r10
8769	add	r14,rax
8770	adc	rdx,0
8771	mov	r10,rdx
8772	mov	rax,QWORD[((8+160+0))+rbp]
8773	mul	r11
8774	add	r15,rax
8775	adc	rdx,0
8776	imul	r9,r12
8777	add	r15,r10
8778	adc	r9,rdx
8779	mov	r10,r13
8780	mov	r11,r14
8781	mov	r12,r15
8782	and	r12,3
8783	mov	r13,r15
8784	and	r13,-4
8785	mov	r14,r9
8786	shrd	r15,r9,2
8787	shr	r9,2
8788	add	r15,r13
8789	adc	r9,r14
8790	add	r10,r15
8791	adc	r11,r9
8792	adc	r12,0
8793
8794	sub	rcx,16
8795	add	rdi,16
8796	jmp	NEAR $L$seal_avx2_short_hash_remainder
8797$L$seal_avx2_short_loop:
8798	cmp	rbx,32
8799	jb	NEAR $L$seal_avx2_short_tail
8800	sub	rbx,32
8801
8802	vpxor	ymm0,ymm0,YMMWORD[rsi]
8803	vmovdqu	YMMWORD[rdi],ymm0
8804	lea	rsi,[32+rsi]
8805
8806	add	r10,QWORD[((0+0))+rdi]
8807	adc	r11,QWORD[((8+0))+rdi]
8808	adc	r12,1
8809	mov	rax,QWORD[((0+160+0))+rbp]
8810	mov	r15,rax
8811	mul	r10
8812	mov	r13,rax
8813	mov	r14,rdx
8814	mov	rax,QWORD[((0+160+0))+rbp]
8815	mul	r11
8816	imul	r15,r12
8817	add	r14,rax
8818	adc	r15,rdx
8819	mov	rax,QWORD[((8+160+0))+rbp]
8820	mov	r9,rax
8821	mul	r10
8822	add	r14,rax
8823	adc	rdx,0
8824	mov	r10,rdx
8825	mov	rax,QWORD[((8+160+0))+rbp]
8826	mul	r11
8827	add	r15,rax
8828	adc	rdx,0
8829	imul	r9,r12
8830	add	r15,r10
8831	adc	r9,rdx
8832	mov	r10,r13
8833	mov	r11,r14
8834	mov	r12,r15
8835	and	r12,3
8836	mov	r13,r15
8837	and	r13,-4
8838	mov	r14,r9
8839	shrd	r15,r9,2
8840	shr	r9,2
8841	add	r15,r13
8842	adc	r9,r14
8843	add	r10,r15
8844	adc	r11,r9
8845	adc	r12,0
8846	add	r10,QWORD[((0+16))+rdi]
8847	adc	r11,QWORD[((8+16))+rdi]
8848	adc	r12,1
8849	mov	rax,QWORD[((0+160+0))+rbp]
8850	mov	r15,rax
8851	mul	r10
8852	mov	r13,rax
8853	mov	r14,rdx
8854	mov	rax,QWORD[((0+160+0))+rbp]
8855	mul	r11
8856	imul	r15,r12
8857	add	r14,rax
8858	adc	r15,rdx
8859	mov	rax,QWORD[((8+160+0))+rbp]
8860	mov	r9,rax
8861	mul	r10
8862	add	r14,rax
8863	adc	rdx,0
8864	mov	r10,rdx
8865	mov	rax,QWORD[((8+160+0))+rbp]
8866	mul	r11
8867	add	r15,rax
8868	adc	rdx,0
8869	imul	r9,r12
8870	add	r15,r10
8871	adc	r9,rdx
8872	mov	r10,r13
8873	mov	r11,r14
8874	mov	r12,r15
8875	and	r12,3
8876	mov	r13,r15
8877	and	r13,-4
8878	mov	r14,r9
8879	shrd	r15,r9,2
8880	shr	r9,2
8881	add	r15,r13
8882	adc	r9,r14
8883	add	r10,r15
8884	adc	r11,r9
8885	adc	r12,0
8886
8887	lea	rdi,[32+rdi]
8888
8889	vmovdqa	ymm0,ymm4
8890	vmovdqa	ymm4,ymm8
8891	vmovdqa	ymm8,ymm12
8892	vmovdqa	ymm12,ymm1
8893	vmovdqa	ymm1,ymm5
8894	vmovdqa	ymm5,ymm9
8895	vmovdqa	ymm9,ymm13
8896	vmovdqa	ymm13,ymm2
8897	vmovdqa	ymm2,ymm6
8898	jmp	NEAR $L$seal_avx2_short_loop
8899$L$seal_avx2_short_tail:
8900	cmp	rbx,16
8901	jb	NEAR $L$seal_avx2_exit
8902	sub	rbx,16
8903	vpxor	xmm3,xmm0,XMMWORD[rsi]
8904	vmovdqu	XMMWORD[rdi],xmm3
8905	lea	rsi,[16+rsi]
8906	add	r10,QWORD[((0+0))+rdi]
8907	adc	r11,QWORD[((8+0))+rdi]
8908	adc	r12,1
8909	mov	rax,QWORD[((0+160+0))+rbp]
8910	mov	r15,rax
8911	mul	r10
8912	mov	r13,rax
8913	mov	r14,rdx
8914	mov	rax,QWORD[((0+160+0))+rbp]
8915	mul	r11
8916	imul	r15,r12
8917	add	r14,rax
8918	adc	r15,rdx
8919	mov	rax,QWORD[((8+160+0))+rbp]
8920	mov	r9,rax
8921	mul	r10
8922	add	r14,rax
8923	adc	rdx,0
8924	mov	r10,rdx
8925	mov	rax,QWORD[((8+160+0))+rbp]
8926	mul	r11
8927	add	r15,rax
8928	adc	rdx,0
8929	imul	r9,r12
8930	add	r15,r10
8931	adc	r9,rdx
8932	mov	r10,r13
8933	mov	r11,r14
8934	mov	r12,r15
8935	and	r12,3
8936	mov	r13,r15
8937	and	r13,-4
8938	mov	r14,r9
8939	shrd	r15,r9,2
8940	shr	r9,2
8941	add	r15,r13
8942	adc	r9,r14
8943	add	r10,r15
8944	adc	r11,r9
8945	adc	r12,0
8946
8947	lea	rdi,[16+rdi]
8948	vextracti128	xmm0,ymm0,1
8949$L$seal_avx2_exit:
8950	vzeroupper
8951	jmp	NEAR $L$seal_sse_tail_16
8952
8953
8954%else
8955; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
8956ret
8957%endif
8958