• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8
9%include "ring_core_generated/prefix_symbols_nasm.inc"
10section	.text code align=64
11
12EXTERN	OPENSSL_ia32cap_P
13
14chacha20_poly1305_constants:
15
16ALIGN	64
17$L$chacha20_consts:
18DB	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
19DB	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
20$L$rol8:
21DB	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
22DB	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
23$L$rol16:
24DB	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
25DB	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
26$L$avx2_init:
27	DD	0,0,0,0
28$L$sse_inc:
29	DD	1,0,0,0
30$L$avx2_inc:
31	DD	2,0,0,0,2,0,0,0
32$L$clamp:
33	DQ	0x0FFFFFFC0FFFFFFF,0x0FFFFFFC0FFFFFFC
34	DQ	0xFFFFFFFFFFFFFFFF,0xFFFFFFFFFFFFFFFF
35ALIGN	16
36$L$and_masks:
37DB	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
38DB	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
39DB	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
40DB	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
41DB	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
42DB	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
43DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
44DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
45DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
46DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
47DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
48DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
49DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
50DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
51DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
52DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
53
54
55ALIGN	64
56poly_hash_ad_internal:
57
58
59	xor	r10,r10
60	xor	r11,r11
61	xor	r12,r12
62	cmp	r8,13
63	jne	NEAR $L$hash_ad_loop
64$L$poly_fast_tls_ad:
65
66	mov	r10,QWORD[rcx]
67	mov	r11,QWORD[5+rcx]
68	shr	r11,24
69	mov	r12,1
70	mov	rax,QWORD[((0+160+0))+rbp]
71	mov	r15,rax
72	mul	r10
73	mov	r13,rax
74	mov	r14,rdx
75	mov	rax,QWORD[((0+160+0))+rbp]
76	mul	r11
77	imul	r15,r12
78	add	r14,rax
79	adc	r15,rdx
80	mov	rax,QWORD[((8+160+0))+rbp]
81	mov	r9,rax
82	mul	r10
83	add	r14,rax
84	adc	rdx,0
85	mov	r10,rdx
86	mov	rax,QWORD[((8+160+0))+rbp]
87	mul	r11
88	add	r15,rax
89	adc	rdx,0
90	imul	r9,r12
91	add	r15,r10
92	adc	r9,rdx
93	mov	r10,r13
94	mov	r11,r14
95	mov	r12,r15
96	and	r12,3
97	mov	r13,r15
98	and	r13,-4
99	mov	r14,r9
100	shrd	r15,r9,2
101	shr	r9,2
102	add	r15,r13
103	adc	r9,r14
104	add	r10,r15
105	adc	r11,r9
106	adc	r12,0
107
108	DB	0F3h,0C3h		;repret
109$L$hash_ad_loop:
110
111	cmp	r8,16
112	jb	NEAR $L$hash_ad_tail
113	add	r10,QWORD[((0+0))+rcx]
114	adc	r11,QWORD[((8+0))+rcx]
115	adc	r12,1
116	mov	rax,QWORD[((0+160+0))+rbp]
117	mov	r15,rax
118	mul	r10
119	mov	r13,rax
120	mov	r14,rdx
121	mov	rax,QWORD[((0+160+0))+rbp]
122	mul	r11
123	imul	r15,r12
124	add	r14,rax
125	adc	r15,rdx
126	mov	rax,QWORD[((8+160+0))+rbp]
127	mov	r9,rax
128	mul	r10
129	add	r14,rax
130	adc	rdx,0
131	mov	r10,rdx
132	mov	rax,QWORD[((8+160+0))+rbp]
133	mul	r11
134	add	r15,rax
135	adc	rdx,0
136	imul	r9,r12
137	add	r15,r10
138	adc	r9,rdx
139	mov	r10,r13
140	mov	r11,r14
141	mov	r12,r15
142	and	r12,3
143	mov	r13,r15
144	and	r13,-4
145	mov	r14,r9
146	shrd	r15,r9,2
147	shr	r9,2
148	add	r15,r13
149	adc	r9,r14
150	add	r10,r15
151	adc	r11,r9
152	adc	r12,0
153
154	lea	rcx,[16+rcx]
155	sub	r8,16
156	jmp	NEAR $L$hash_ad_loop
157$L$hash_ad_tail:
158	cmp	r8,0
159	je	NEAR $L$hash_ad_done
160
161	xor	r13,r13
162	xor	r14,r14
163	xor	r15,r15
164	add	rcx,r8
165$L$hash_ad_tail_loop:
166	shld	r14,r13,8
167	shl	r13,8
168	movzx	r15,BYTE[((-1))+rcx]
169	xor	r13,r15
170	dec	rcx
171	dec	r8
172	jne	NEAR $L$hash_ad_tail_loop
173
174	add	r10,r13
175	adc	r11,r14
176	adc	r12,1
177	mov	rax,QWORD[((0+160+0))+rbp]
178	mov	r15,rax
179	mul	r10
180	mov	r13,rax
181	mov	r14,rdx
182	mov	rax,QWORD[((0+160+0))+rbp]
183	mul	r11
184	imul	r15,r12
185	add	r14,rax
186	adc	r15,rdx
187	mov	rax,QWORD[((8+160+0))+rbp]
188	mov	r9,rax
189	mul	r10
190	add	r14,rax
191	adc	rdx,0
192	mov	r10,rdx
193	mov	rax,QWORD[((8+160+0))+rbp]
194	mul	r11
195	add	r15,rax
196	adc	rdx,0
197	imul	r9,r12
198	add	r15,r10
199	adc	r9,rdx
200	mov	r10,r13
201	mov	r11,r14
202	mov	r12,r15
203	and	r12,3
204	mov	r13,r15
205	and	r13,-4
206	mov	r14,r9
207	shrd	r15,r9,2
208	shr	r9,2
209	add	r15,r13
210	adc	r9,r14
211	add	r10,r15
212	adc	r11,r9
213	adc	r12,0
214
215
216$L$hash_ad_done:
217	DB	0F3h,0C3h		;repret
218
219
220
221global	chacha20_poly1305_open
222
223ALIGN	64
224chacha20_poly1305_open:
225	mov	QWORD[8+rsp],rdi	;WIN64 prologue
226	mov	QWORD[16+rsp],rsi
227	mov	rax,rsp
228$L$SEH_begin_chacha20_poly1305_open:
229	mov	rdi,rcx
230	mov	rsi,rdx
231	mov	rdx,r8
232	mov	rcx,r9
233	mov	r8,QWORD[40+rsp]
234	mov	r9,QWORD[48+rsp]
235
236
237
238	push	rbp
239
240	push	rbx
241
242	push	r12
243
244	push	r13
245
246	push	r14
247
248	push	r15
249
250
251
252	push	r9
253
254	sub	rsp,288 + 160 + 32
255
256
257	lea	rbp,[32+rsp]
258	and	rbp,-32
259
260	movaps	XMMWORD[(0+0)+rbp],xmm6
261	movaps	XMMWORD[(16+0)+rbp],xmm7
262	movaps	XMMWORD[(32+0)+rbp],xmm8
263	movaps	XMMWORD[(48+0)+rbp],xmm9
264	movaps	XMMWORD[(64+0)+rbp],xmm10
265	movaps	XMMWORD[(80+0)+rbp],xmm11
266	movaps	XMMWORD[(96+0)+rbp],xmm12
267	movaps	XMMWORD[(112+0)+rbp],xmm13
268	movaps	XMMWORD[(128+0)+rbp],xmm14
269	movaps	XMMWORD[(144+0)+rbp],xmm15
270
271	mov	rbx,rdx
272	mov	QWORD[((0+160+32))+rbp],r8
273	mov	QWORD[((8+160+32))+rbp],rbx
274
275	mov	eax,DWORD[((OPENSSL_ia32cap_P+8))]
276	and	eax,288
277	xor	eax,288
278	jz	NEAR chacha20_poly1305_open_avx2
279
280	cmp	rbx,128
281	jbe	NEAR $L$open_sse_128
282
283	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
284	movdqu	xmm4,XMMWORD[r9]
285	movdqu	xmm8,XMMWORD[16+r9]
286	movdqu	xmm12,XMMWORD[32+r9]
287
288	movdqa	xmm7,xmm12
289
290	movdqa	XMMWORD[(160+48)+rbp],xmm4
291	movdqa	XMMWORD[(160+64)+rbp],xmm8
292	movdqa	XMMWORD[(160+96)+rbp],xmm12
293	mov	r10,10
294$L$open_sse_init_rounds:
295	paddd	xmm0,xmm4
296	pxor	xmm12,xmm0
297	pshufb	xmm12,XMMWORD[$L$rol16]
298	paddd	xmm8,xmm12
299	pxor	xmm4,xmm8
300	movdqa	xmm3,xmm4
301	pslld	xmm3,12
302	psrld	xmm4,20
303	pxor	xmm4,xmm3
304	paddd	xmm0,xmm4
305	pxor	xmm12,xmm0
306	pshufb	xmm12,XMMWORD[$L$rol8]
307	paddd	xmm8,xmm12
308	pxor	xmm4,xmm8
309	movdqa	xmm3,xmm4
310	pslld	xmm3,7
311	psrld	xmm4,25
312	pxor	xmm4,xmm3
313DB	102,15,58,15,228,4
314DB	102,69,15,58,15,192,8
315DB	102,69,15,58,15,228,12
316	paddd	xmm0,xmm4
317	pxor	xmm12,xmm0
318	pshufb	xmm12,XMMWORD[$L$rol16]
319	paddd	xmm8,xmm12
320	pxor	xmm4,xmm8
321	movdqa	xmm3,xmm4
322	pslld	xmm3,12
323	psrld	xmm4,20
324	pxor	xmm4,xmm3
325	paddd	xmm0,xmm4
326	pxor	xmm12,xmm0
327	pshufb	xmm12,XMMWORD[$L$rol8]
328	paddd	xmm8,xmm12
329	pxor	xmm4,xmm8
330	movdqa	xmm3,xmm4
331	pslld	xmm3,7
332	psrld	xmm4,25
333	pxor	xmm4,xmm3
334DB	102,15,58,15,228,12
335DB	102,69,15,58,15,192,8
336DB	102,69,15,58,15,228,4
337
338	dec	r10
339	jne	NEAR $L$open_sse_init_rounds
340
341	paddd	xmm0,XMMWORD[$L$chacha20_consts]
342	paddd	xmm4,XMMWORD[((160+48))+rbp]
343
344	pand	xmm0,XMMWORD[$L$clamp]
345	movdqa	XMMWORD[(160+0)+rbp],xmm0
346	movdqa	XMMWORD[(160+16)+rbp],xmm4
347
348	mov	r8,r8
349	call	poly_hash_ad_internal
350$L$open_sse_main_loop:
351	cmp	rbx,16*16
352	jb	NEAR $L$open_sse_tail
353
354	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
355	movdqa	xmm4,XMMWORD[((160+48))+rbp]
356	movdqa	xmm8,XMMWORD[((160+64))+rbp]
357	movdqa	xmm1,xmm0
358	movdqa	xmm5,xmm4
359	movdqa	xmm9,xmm8
360	movdqa	xmm2,xmm0
361	movdqa	xmm6,xmm4
362	movdqa	xmm10,xmm8
363	movdqa	xmm3,xmm0
364	movdqa	xmm7,xmm4
365	movdqa	xmm11,xmm8
366	movdqa	xmm15,XMMWORD[((160+96))+rbp]
367	paddd	xmm15,XMMWORD[$L$sse_inc]
368	movdqa	xmm14,xmm15
369	paddd	xmm14,XMMWORD[$L$sse_inc]
370	movdqa	xmm13,xmm14
371	paddd	xmm13,XMMWORD[$L$sse_inc]
372	movdqa	xmm12,xmm13
373	paddd	xmm12,XMMWORD[$L$sse_inc]
374	movdqa	XMMWORD[(160+96)+rbp],xmm12
375	movdqa	XMMWORD[(160+112)+rbp],xmm13
376	movdqa	XMMWORD[(160+128)+rbp],xmm14
377	movdqa	XMMWORD[(160+144)+rbp],xmm15
378
379
380
381	mov	rcx,4
382	mov	r8,rsi
383$L$open_sse_main_loop_rounds:
384	movdqa	XMMWORD[(160+80)+rbp],xmm8
385	movdqa	xmm8,XMMWORD[$L$rol16]
386	paddd	xmm3,xmm7
387	paddd	xmm2,xmm6
388	paddd	xmm1,xmm5
389	paddd	xmm0,xmm4
390	pxor	xmm15,xmm3
391	pxor	xmm14,xmm2
392	pxor	xmm13,xmm1
393	pxor	xmm12,xmm0
394DB	102,69,15,56,0,248
395DB	102,69,15,56,0,240
396DB	102,69,15,56,0,232
397DB	102,69,15,56,0,224
398	movdqa	xmm8,XMMWORD[((160+80))+rbp]
399	paddd	xmm11,xmm15
400	paddd	xmm10,xmm14
401	paddd	xmm9,xmm13
402	paddd	xmm8,xmm12
403	pxor	xmm7,xmm11
404	add	r10,QWORD[((0+0))+r8]
405	adc	r11,QWORD[((8+0))+r8]
406	adc	r12,1
407
408	lea	r8,[16+r8]
409	pxor	xmm6,xmm10
410	pxor	xmm5,xmm9
411	pxor	xmm4,xmm8
412	movdqa	XMMWORD[(160+80)+rbp],xmm8
413	movdqa	xmm8,xmm7
414	psrld	xmm8,20
415	pslld	xmm7,32-20
416	pxor	xmm7,xmm8
417	movdqa	xmm8,xmm6
418	psrld	xmm8,20
419	pslld	xmm6,32-20
420	pxor	xmm6,xmm8
421	movdqa	xmm8,xmm5
422	psrld	xmm8,20
423	pslld	xmm5,32-20
424	pxor	xmm5,xmm8
425	movdqa	xmm8,xmm4
426	psrld	xmm8,20
427	pslld	xmm4,32-20
428	pxor	xmm4,xmm8
429	mov	rax,QWORD[((0+160+0))+rbp]
430	mov	r15,rax
431	mul	r10
432	mov	r13,rax
433	mov	r14,rdx
434	mov	rax,QWORD[((0+160+0))+rbp]
435	mul	r11
436	imul	r15,r12
437	add	r14,rax
438	adc	r15,rdx
439	movdqa	xmm8,XMMWORD[$L$rol8]
440	paddd	xmm3,xmm7
441	paddd	xmm2,xmm6
442	paddd	xmm1,xmm5
443	paddd	xmm0,xmm4
444	pxor	xmm15,xmm3
445	pxor	xmm14,xmm2
446	pxor	xmm13,xmm1
447	pxor	xmm12,xmm0
448DB	102,69,15,56,0,248
449DB	102,69,15,56,0,240
450DB	102,69,15,56,0,232
451DB	102,69,15,56,0,224
452	movdqa	xmm8,XMMWORD[((160+80))+rbp]
453	paddd	xmm11,xmm15
454	paddd	xmm10,xmm14
455	paddd	xmm9,xmm13
456	paddd	xmm8,xmm12
457	pxor	xmm7,xmm11
458	pxor	xmm6,xmm10
459	mov	rax,QWORD[((8+160+0))+rbp]
460	mov	r9,rax
461	mul	r10
462	add	r14,rax
463	adc	rdx,0
464	mov	r10,rdx
465	mov	rax,QWORD[((8+160+0))+rbp]
466	mul	r11
467	add	r15,rax
468	adc	rdx,0
469	pxor	xmm5,xmm9
470	pxor	xmm4,xmm8
471	movdqa	XMMWORD[(160+80)+rbp],xmm8
472	movdqa	xmm8,xmm7
473	psrld	xmm8,25
474	pslld	xmm7,32-25
475	pxor	xmm7,xmm8
476	movdqa	xmm8,xmm6
477	psrld	xmm8,25
478	pslld	xmm6,32-25
479	pxor	xmm6,xmm8
480	movdqa	xmm8,xmm5
481	psrld	xmm8,25
482	pslld	xmm5,32-25
483	pxor	xmm5,xmm8
484	movdqa	xmm8,xmm4
485	psrld	xmm8,25
486	pslld	xmm4,32-25
487	pxor	xmm4,xmm8
488	movdqa	xmm8,XMMWORD[((160+80))+rbp]
489	imul	r9,r12
490	add	r15,r10
491	adc	r9,rdx
492DB	102,15,58,15,255,4
493DB	102,69,15,58,15,219,8
494DB	102,69,15,58,15,255,12
495DB	102,15,58,15,246,4
496DB	102,69,15,58,15,210,8
497DB	102,69,15,58,15,246,12
498DB	102,15,58,15,237,4
499DB	102,69,15,58,15,201,8
500DB	102,69,15,58,15,237,12
501DB	102,15,58,15,228,4
502DB	102,69,15,58,15,192,8
503DB	102,69,15,58,15,228,12
504	movdqa	XMMWORD[(160+80)+rbp],xmm8
505	movdqa	xmm8,XMMWORD[$L$rol16]
506	paddd	xmm3,xmm7
507	paddd	xmm2,xmm6
508	paddd	xmm1,xmm5
509	paddd	xmm0,xmm4
510	pxor	xmm15,xmm3
511	pxor	xmm14,xmm2
512	mov	r10,r13
513	mov	r11,r14
514	mov	r12,r15
515	and	r12,3
516	mov	r13,r15
517	and	r13,-4
518	mov	r14,r9
519	shrd	r15,r9,2
520	shr	r9,2
521	add	r15,r13
522	adc	r9,r14
523	add	r10,r15
524	adc	r11,r9
525	adc	r12,0
526	pxor	xmm13,xmm1
527	pxor	xmm12,xmm0
528DB	102,69,15,56,0,248
529DB	102,69,15,56,0,240
530DB	102,69,15,56,0,232
531DB	102,69,15,56,0,224
532	movdqa	xmm8,XMMWORD[((160+80))+rbp]
533	paddd	xmm11,xmm15
534	paddd	xmm10,xmm14
535	paddd	xmm9,xmm13
536	paddd	xmm8,xmm12
537	pxor	xmm7,xmm11
538	pxor	xmm6,xmm10
539	pxor	xmm5,xmm9
540	pxor	xmm4,xmm8
541	movdqa	XMMWORD[(160+80)+rbp],xmm8
542	movdqa	xmm8,xmm7
543	psrld	xmm8,20
544	pslld	xmm7,32-20
545	pxor	xmm7,xmm8
546	movdqa	xmm8,xmm6
547	psrld	xmm8,20
548	pslld	xmm6,32-20
549	pxor	xmm6,xmm8
550	movdqa	xmm8,xmm5
551	psrld	xmm8,20
552	pslld	xmm5,32-20
553	pxor	xmm5,xmm8
554	movdqa	xmm8,xmm4
555	psrld	xmm8,20
556	pslld	xmm4,32-20
557	pxor	xmm4,xmm8
558	movdqa	xmm8,XMMWORD[$L$rol8]
559	paddd	xmm3,xmm7
560	paddd	xmm2,xmm6
561	paddd	xmm1,xmm5
562	paddd	xmm0,xmm4
563	pxor	xmm15,xmm3
564	pxor	xmm14,xmm2
565	pxor	xmm13,xmm1
566	pxor	xmm12,xmm0
567DB	102,69,15,56,0,248
568DB	102,69,15,56,0,240
569DB	102,69,15,56,0,232
570DB	102,69,15,56,0,224
571	movdqa	xmm8,XMMWORD[((160+80))+rbp]
572	paddd	xmm11,xmm15
573	paddd	xmm10,xmm14
574	paddd	xmm9,xmm13
575	paddd	xmm8,xmm12
576	pxor	xmm7,xmm11
577	pxor	xmm6,xmm10
578	pxor	xmm5,xmm9
579	pxor	xmm4,xmm8
580	movdqa	XMMWORD[(160+80)+rbp],xmm8
581	movdqa	xmm8,xmm7
582	psrld	xmm8,25
583	pslld	xmm7,32-25
584	pxor	xmm7,xmm8
585	movdqa	xmm8,xmm6
586	psrld	xmm8,25
587	pslld	xmm6,32-25
588	pxor	xmm6,xmm8
589	movdqa	xmm8,xmm5
590	psrld	xmm8,25
591	pslld	xmm5,32-25
592	pxor	xmm5,xmm8
593	movdqa	xmm8,xmm4
594	psrld	xmm8,25
595	pslld	xmm4,32-25
596	pxor	xmm4,xmm8
597	movdqa	xmm8,XMMWORD[((160+80))+rbp]
598DB	102,15,58,15,255,12
599DB	102,69,15,58,15,219,8
600DB	102,69,15,58,15,255,4
601DB	102,15,58,15,246,12
602DB	102,69,15,58,15,210,8
603DB	102,69,15,58,15,246,4
604DB	102,15,58,15,237,12
605DB	102,69,15,58,15,201,8
606DB	102,69,15,58,15,237,4
607DB	102,15,58,15,228,12
608DB	102,69,15,58,15,192,8
609DB	102,69,15,58,15,228,4
610
611	dec	rcx
612	jge	NEAR $L$open_sse_main_loop_rounds
613	add	r10,QWORD[((0+0))+r8]
614	adc	r11,QWORD[((8+0))+r8]
615	adc	r12,1
616	mov	rax,QWORD[((0+160+0))+rbp]
617	mov	r15,rax
618	mul	r10
619	mov	r13,rax
620	mov	r14,rdx
621	mov	rax,QWORD[((0+160+0))+rbp]
622	mul	r11
623	imul	r15,r12
624	add	r14,rax
625	adc	r15,rdx
626	mov	rax,QWORD[((8+160+0))+rbp]
627	mov	r9,rax
628	mul	r10
629	add	r14,rax
630	adc	rdx,0
631	mov	r10,rdx
632	mov	rax,QWORD[((8+160+0))+rbp]
633	mul	r11
634	add	r15,rax
635	adc	rdx,0
636	imul	r9,r12
637	add	r15,r10
638	adc	r9,rdx
639	mov	r10,r13
640	mov	r11,r14
641	mov	r12,r15
642	and	r12,3
643	mov	r13,r15
644	and	r13,-4
645	mov	r14,r9
646	shrd	r15,r9,2
647	shr	r9,2
648	add	r15,r13
649	adc	r9,r14
650	add	r10,r15
651	adc	r11,r9
652	adc	r12,0
653
654	lea	r8,[16+r8]
655	cmp	rcx,-6
656	jg	NEAR $L$open_sse_main_loop_rounds
657	paddd	xmm3,XMMWORD[$L$chacha20_consts]
658	paddd	xmm7,XMMWORD[((160+48))+rbp]
659	paddd	xmm11,XMMWORD[((160+64))+rbp]
660	paddd	xmm15,XMMWORD[((160+144))+rbp]
661	paddd	xmm2,XMMWORD[$L$chacha20_consts]
662	paddd	xmm6,XMMWORD[((160+48))+rbp]
663	paddd	xmm10,XMMWORD[((160+64))+rbp]
664	paddd	xmm14,XMMWORD[((160+128))+rbp]
665	paddd	xmm1,XMMWORD[$L$chacha20_consts]
666	paddd	xmm5,XMMWORD[((160+48))+rbp]
667	paddd	xmm9,XMMWORD[((160+64))+rbp]
668	paddd	xmm13,XMMWORD[((160+112))+rbp]
669	paddd	xmm0,XMMWORD[$L$chacha20_consts]
670	paddd	xmm4,XMMWORD[((160+48))+rbp]
671	paddd	xmm8,XMMWORD[((160+64))+rbp]
672	paddd	xmm12,XMMWORD[((160+96))+rbp]
673	movdqa	XMMWORD[(160+80)+rbp],xmm12
674	movdqu	xmm12,XMMWORD[((0 + 0))+rsi]
675	pxor	xmm12,xmm3
676	movdqu	XMMWORD[(0 + 0)+rdi],xmm12
677	movdqu	xmm12,XMMWORD[((16 + 0))+rsi]
678	pxor	xmm12,xmm7
679	movdqu	XMMWORD[(16 + 0)+rdi],xmm12
680	movdqu	xmm12,XMMWORD[((32 + 0))+rsi]
681	pxor	xmm12,xmm11
682	movdqu	XMMWORD[(32 + 0)+rdi],xmm12
683	movdqu	xmm12,XMMWORD[((48 + 0))+rsi]
684	pxor	xmm12,xmm15
685	movdqu	XMMWORD[(48 + 0)+rdi],xmm12
686	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
687	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
688	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
689	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
690	pxor	xmm2,xmm3
691	pxor	xmm6,xmm7
692	pxor	xmm10,xmm11
693	pxor	xmm15,xmm14
694	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
695	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
696	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
697	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
698	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
699	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
700	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
701	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
702	pxor	xmm1,xmm3
703	pxor	xmm5,xmm7
704	pxor	xmm9,xmm11
705	pxor	xmm15,xmm13
706	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
707	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
708	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
709	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
710	movdqu	xmm3,XMMWORD[((0 + 192))+rsi]
711	movdqu	xmm7,XMMWORD[((16 + 192))+rsi]
712	movdqu	xmm11,XMMWORD[((32 + 192))+rsi]
713	movdqu	xmm15,XMMWORD[((48 + 192))+rsi]
714	pxor	xmm0,xmm3
715	pxor	xmm4,xmm7
716	pxor	xmm8,xmm11
717	pxor	xmm15,XMMWORD[((160+80))+rbp]
718	movdqu	XMMWORD[(0 + 192)+rdi],xmm0
719	movdqu	XMMWORD[(16 + 192)+rdi],xmm4
720	movdqu	XMMWORD[(32 + 192)+rdi],xmm8
721	movdqu	XMMWORD[(48 + 192)+rdi],xmm15
722
723	lea	rsi,[256+rsi]
724	lea	rdi,[256+rdi]
725	sub	rbx,16*16
726	jmp	NEAR $L$open_sse_main_loop
727$L$open_sse_tail:
728
729	test	rbx,rbx
730	jz	NEAR $L$open_sse_finalize
731	cmp	rbx,12*16
732	ja	NEAR $L$open_sse_tail_256
733	cmp	rbx,8*16
734	ja	NEAR $L$open_sse_tail_192
735	cmp	rbx,4*16
736	ja	NEAR $L$open_sse_tail_128
737	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
738	movdqa	xmm4,XMMWORD[((160+48))+rbp]
739	movdqa	xmm8,XMMWORD[((160+64))+rbp]
740	movdqa	xmm12,XMMWORD[((160+96))+rbp]
741	paddd	xmm12,XMMWORD[$L$sse_inc]
742	movdqa	XMMWORD[(160+96)+rbp],xmm12
743
744	xor	r8,r8
745	mov	rcx,rbx
746	cmp	rcx,16
747	jb	NEAR $L$open_sse_tail_64_rounds
748$L$open_sse_tail_64_rounds_and_x1hash:
749	add	r10,QWORD[((0+0))+r8*1+rsi]
750	adc	r11,QWORD[((8+0))+r8*1+rsi]
751	adc	r12,1
752	mov	rax,QWORD[((0+160+0))+rbp]
753	mov	r15,rax
754	mul	r10
755	mov	r13,rax
756	mov	r14,rdx
757	mov	rax,QWORD[((0+160+0))+rbp]
758	mul	r11
759	imul	r15,r12
760	add	r14,rax
761	adc	r15,rdx
762	mov	rax,QWORD[((8+160+0))+rbp]
763	mov	r9,rax
764	mul	r10
765	add	r14,rax
766	adc	rdx,0
767	mov	r10,rdx
768	mov	rax,QWORD[((8+160+0))+rbp]
769	mul	r11
770	add	r15,rax
771	adc	rdx,0
772	imul	r9,r12
773	add	r15,r10
774	adc	r9,rdx
775	mov	r10,r13
776	mov	r11,r14
777	mov	r12,r15
778	and	r12,3
779	mov	r13,r15
780	and	r13,-4
781	mov	r14,r9
782	shrd	r15,r9,2
783	shr	r9,2
784	add	r15,r13
785	adc	r9,r14
786	add	r10,r15
787	adc	r11,r9
788	adc	r12,0
789
790	sub	rcx,16
791$L$open_sse_tail_64_rounds:
792	add	r8,16
793	paddd	xmm0,xmm4
794	pxor	xmm12,xmm0
795	pshufb	xmm12,XMMWORD[$L$rol16]
796	paddd	xmm8,xmm12
797	pxor	xmm4,xmm8
798	movdqa	xmm3,xmm4
799	pslld	xmm3,12
800	psrld	xmm4,20
801	pxor	xmm4,xmm3
802	paddd	xmm0,xmm4
803	pxor	xmm12,xmm0
804	pshufb	xmm12,XMMWORD[$L$rol8]
805	paddd	xmm8,xmm12
806	pxor	xmm4,xmm8
807	movdqa	xmm3,xmm4
808	pslld	xmm3,7
809	psrld	xmm4,25
810	pxor	xmm4,xmm3
811DB	102,15,58,15,228,4
812DB	102,69,15,58,15,192,8
813DB	102,69,15,58,15,228,12
814	paddd	xmm0,xmm4
815	pxor	xmm12,xmm0
816	pshufb	xmm12,XMMWORD[$L$rol16]
817	paddd	xmm8,xmm12
818	pxor	xmm4,xmm8
819	movdqa	xmm3,xmm4
820	pslld	xmm3,12
821	psrld	xmm4,20
822	pxor	xmm4,xmm3
823	paddd	xmm0,xmm4
824	pxor	xmm12,xmm0
825	pshufb	xmm12,XMMWORD[$L$rol8]
826	paddd	xmm8,xmm12
827	pxor	xmm4,xmm8
828	movdqa	xmm3,xmm4
829	pslld	xmm3,7
830	psrld	xmm4,25
831	pxor	xmm4,xmm3
832DB	102,15,58,15,228,12
833DB	102,69,15,58,15,192,8
834DB	102,69,15,58,15,228,4
835
836	cmp	rcx,16
837	jae	NEAR $L$open_sse_tail_64_rounds_and_x1hash
838	cmp	r8,10*16
839	jne	NEAR $L$open_sse_tail_64_rounds
840	paddd	xmm0,XMMWORD[$L$chacha20_consts]
841	paddd	xmm4,XMMWORD[((160+48))+rbp]
842	paddd	xmm8,XMMWORD[((160+64))+rbp]
843	paddd	xmm12,XMMWORD[((160+96))+rbp]
844
845	jmp	NEAR $L$open_sse_tail_64_dec_loop
846
847$L$open_sse_tail_128:
848	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
849	movdqa	xmm4,XMMWORD[((160+48))+rbp]
850	movdqa	xmm8,XMMWORD[((160+64))+rbp]
851	movdqa	xmm1,xmm0
852	movdqa	xmm5,xmm4
853	movdqa	xmm9,xmm8
854	movdqa	xmm13,XMMWORD[((160+96))+rbp]
855	paddd	xmm13,XMMWORD[$L$sse_inc]
856	movdqa	xmm12,xmm13
857	paddd	xmm12,XMMWORD[$L$sse_inc]
858	movdqa	XMMWORD[(160+96)+rbp],xmm12
859	movdqa	XMMWORD[(160+112)+rbp],xmm13
860
861	mov	rcx,rbx
862	and	rcx,-16
863	xor	r8,r8
864$L$open_sse_tail_128_rounds_and_x1hash:
865	add	r10,QWORD[((0+0))+r8*1+rsi]
866	adc	r11,QWORD[((8+0))+r8*1+rsi]
867	adc	r12,1
868	mov	rax,QWORD[((0+160+0))+rbp]
869	mov	r15,rax
870	mul	r10
871	mov	r13,rax
872	mov	r14,rdx
873	mov	rax,QWORD[((0+160+0))+rbp]
874	mul	r11
875	imul	r15,r12
876	add	r14,rax
877	adc	r15,rdx
878	mov	rax,QWORD[((8+160+0))+rbp]
879	mov	r9,rax
880	mul	r10
881	add	r14,rax
882	adc	rdx,0
883	mov	r10,rdx
884	mov	rax,QWORD[((8+160+0))+rbp]
885	mul	r11
886	add	r15,rax
887	adc	rdx,0
888	imul	r9,r12
889	add	r15,r10
890	adc	r9,rdx
891	mov	r10,r13
892	mov	r11,r14
893	mov	r12,r15
894	and	r12,3
895	mov	r13,r15
896	and	r13,-4
897	mov	r14,r9
898	shrd	r15,r9,2
899	shr	r9,2
900	add	r15,r13
901	adc	r9,r14
902	add	r10,r15
903	adc	r11,r9
904	adc	r12,0
905
906$L$open_sse_tail_128_rounds:
907	add	r8,16
908	paddd	xmm0,xmm4
909	pxor	xmm12,xmm0
910	pshufb	xmm12,XMMWORD[$L$rol16]
911	paddd	xmm8,xmm12
912	pxor	xmm4,xmm8
913	movdqa	xmm3,xmm4
914	pslld	xmm3,12
915	psrld	xmm4,20
916	pxor	xmm4,xmm3
917	paddd	xmm0,xmm4
918	pxor	xmm12,xmm0
919	pshufb	xmm12,XMMWORD[$L$rol8]
920	paddd	xmm8,xmm12
921	pxor	xmm4,xmm8
922	movdqa	xmm3,xmm4
923	pslld	xmm3,7
924	psrld	xmm4,25
925	pxor	xmm4,xmm3
926DB	102,15,58,15,228,4
927DB	102,69,15,58,15,192,8
928DB	102,69,15,58,15,228,12
929	paddd	xmm1,xmm5
930	pxor	xmm13,xmm1
931	pshufb	xmm13,XMMWORD[$L$rol16]
932	paddd	xmm9,xmm13
933	pxor	xmm5,xmm9
934	movdqa	xmm3,xmm5
935	pslld	xmm3,12
936	psrld	xmm5,20
937	pxor	xmm5,xmm3
938	paddd	xmm1,xmm5
939	pxor	xmm13,xmm1
940	pshufb	xmm13,XMMWORD[$L$rol8]
941	paddd	xmm9,xmm13
942	pxor	xmm5,xmm9
943	movdqa	xmm3,xmm5
944	pslld	xmm3,7
945	psrld	xmm5,25
946	pxor	xmm5,xmm3
947DB	102,15,58,15,237,4
948DB	102,69,15,58,15,201,8
949DB	102,69,15,58,15,237,12
950	paddd	xmm0,xmm4
951	pxor	xmm12,xmm0
952	pshufb	xmm12,XMMWORD[$L$rol16]
953	paddd	xmm8,xmm12
954	pxor	xmm4,xmm8
955	movdqa	xmm3,xmm4
956	pslld	xmm3,12
957	psrld	xmm4,20
958	pxor	xmm4,xmm3
959	paddd	xmm0,xmm4
960	pxor	xmm12,xmm0
961	pshufb	xmm12,XMMWORD[$L$rol8]
962	paddd	xmm8,xmm12
963	pxor	xmm4,xmm8
964	movdqa	xmm3,xmm4
965	pslld	xmm3,7
966	psrld	xmm4,25
967	pxor	xmm4,xmm3
968DB	102,15,58,15,228,12
969DB	102,69,15,58,15,192,8
970DB	102,69,15,58,15,228,4
971	paddd	xmm1,xmm5
972	pxor	xmm13,xmm1
973	pshufb	xmm13,XMMWORD[$L$rol16]
974	paddd	xmm9,xmm13
975	pxor	xmm5,xmm9
976	movdqa	xmm3,xmm5
977	pslld	xmm3,12
978	psrld	xmm5,20
979	pxor	xmm5,xmm3
980	paddd	xmm1,xmm5
981	pxor	xmm13,xmm1
982	pshufb	xmm13,XMMWORD[$L$rol8]
983	paddd	xmm9,xmm13
984	pxor	xmm5,xmm9
985	movdqa	xmm3,xmm5
986	pslld	xmm3,7
987	psrld	xmm5,25
988	pxor	xmm5,xmm3
989DB	102,15,58,15,237,12
990DB	102,69,15,58,15,201,8
991DB	102,69,15,58,15,237,4
992
993	cmp	r8,rcx
994	jb	NEAR $L$open_sse_tail_128_rounds_and_x1hash
995	cmp	r8,10*16
996	jne	NEAR $L$open_sse_tail_128_rounds
997	paddd	xmm1,XMMWORD[$L$chacha20_consts]
998	paddd	xmm5,XMMWORD[((160+48))+rbp]
999	paddd	xmm9,XMMWORD[((160+64))+rbp]
1000	paddd	xmm13,XMMWORD[((160+112))+rbp]
1001	paddd	xmm0,XMMWORD[$L$chacha20_consts]
1002	paddd	xmm4,XMMWORD[((160+48))+rbp]
1003	paddd	xmm8,XMMWORD[((160+64))+rbp]
1004	paddd	xmm12,XMMWORD[((160+96))+rbp]
1005	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
1006	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
1007	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
1008	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
1009	pxor	xmm1,xmm3
1010	pxor	xmm5,xmm7
1011	pxor	xmm9,xmm11
1012	pxor	xmm15,xmm13
1013	movdqu	XMMWORD[(0 + 0)+rdi],xmm1
1014	movdqu	XMMWORD[(16 + 0)+rdi],xmm5
1015	movdqu	XMMWORD[(32 + 0)+rdi],xmm9
1016	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
1017
1018	sub	rbx,4*16
1019	lea	rsi,[64+rsi]
1020	lea	rdi,[64+rdi]
1021	jmp	NEAR $L$open_sse_tail_64_dec_loop
1022
1023$L$open_sse_tail_192:
1024	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
1025	movdqa	xmm4,XMMWORD[((160+48))+rbp]
1026	movdqa	xmm8,XMMWORD[((160+64))+rbp]
1027	movdqa	xmm1,xmm0
1028	movdqa	xmm5,xmm4
1029	movdqa	xmm9,xmm8
1030	movdqa	xmm2,xmm0
1031	movdqa	xmm6,xmm4
1032	movdqa	xmm10,xmm8
1033	movdqa	xmm14,XMMWORD[((160+96))+rbp]
1034	paddd	xmm14,XMMWORD[$L$sse_inc]
1035	movdqa	xmm13,xmm14
1036	paddd	xmm13,XMMWORD[$L$sse_inc]
1037	movdqa	xmm12,xmm13
1038	paddd	xmm12,XMMWORD[$L$sse_inc]
1039	movdqa	XMMWORD[(160+96)+rbp],xmm12
1040	movdqa	XMMWORD[(160+112)+rbp],xmm13
1041	movdqa	XMMWORD[(160+128)+rbp],xmm14
1042
1043	mov	rcx,rbx
1044	mov	r8,10*16
1045	cmp	rcx,10*16
1046	cmovg	rcx,r8
1047	and	rcx,-16
1048	xor	r8,r8
1049$L$open_sse_tail_192_rounds_and_x1hash:
1050	add	r10,QWORD[((0+0))+r8*1+rsi]
1051	adc	r11,QWORD[((8+0))+r8*1+rsi]
1052	adc	r12,1
1053	mov	rax,QWORD[((0+160+0))+rbp]
1054	mov	r15,rax
1055	mul	r10
1056	mov	r13,rax
1057	mov	r14,rdx
1058	mov	rax,QWORD[((0+160+0))+rbp]
1059	mul	r11
1060	imul	r15,r12
1061	add	r14,rax
1062	adc	r15,rdx
1063	mov	rax,QWORD[((8+160+0))+rbp]
1064	mov	r9,rax
1065	mul	r10
1066	add	r14,rax
1067	adc	rdx,0
1068	mov	r10,rdx
1069	mov	rax,QWORD[((8+160+0))+rbp]
1070	mul	r11
1071	add	r15,rax
1072	adc	rdx,0
1073	imul	r9,r12
1074	add	r15,r10
1075	adc	r9,rdx
1076	mov	r10,r13
1077	mov	r11,r14
1078	mov	r12,r15
1079	and	r12,3
1080	mov	r13,r15
1081	and	r13,-4
1082	mov	r14,r9
1083	shrd	r15,r9,2
1084	shr	r9,2
1085	add	r15,r13
1086	adc	r9,r14
1087	add	r10,r15
1088	adc	r11,r9
1089	adc	r12,0
1090
1091$L$open_sse_tail_192_rounds:
1092	add	r8,16
1093	paddd	xmm0,xmm4
1094	pxor	xmm12,xmm0
1095	pshufb	xmm12,XMMWORD[$L$rol16]
1096	paddd	xmm8,xmm12
1097	pxor	xmm4,xmm8
1098	movdqa	xmm3,xmm4
1099	pslld	xmm3,12
1100	psrld	xmm4,20
1101	pxor	xmm4,xmm3
1102	paddd	xmm0,xmm4
1103	pxor	xmm12,xmm0
1104	pshufb	xmm12,XMMWORD[$L$rol8]
1105	paddd	xmm8,xmm12
1106	pxor	xmm4,xmm8
1107	movdqa	xmm3,xmm4
1108	pslld	xmm3,7
1109	psrld	xmm4,25
1110	pxor	xmm4,xmm3
1111DB	102,15,58,15,228,4
1112DB	102,69,15,58,15,192,8
1113DB	102,69,15,58,15,228,12
1114	paddd	xmm1,xmm5
1115	pxor	xmm13,xmm1
1116	pshufb	xmm13,XMMWORD[$L$rol16]
1117	paddd	xmm9,xmm13
1118	pxor	xmm5,xmm9
1119	movdqa	xmm3,xmm5
1120	pslld	xmm3,12
1121	psrld	xmm5,20
1122	pxor	xmm5,xmm3
1123	paddd	xmm1,xmm5
1124	pxor	xmm13,xmm1
1125	pshufb	xmm13,XMMWORD[$L$rol8]
1126	paddd	xmm9,xmm13
1127	pxor	xmm5,xmm9
1128	movdqa	xmm3,xmm5
1129	pslld	xmm3,7
1130	psrld	xmm5,25
1131	pxor	xmm5,xmm3
1132DB	102,15,58,15,237,4
1133DB	102,69,15,58,15,201,8
1134DB	102,69,15,58,15,237,12
1135	paddd	xmm2,xmm6
1136	pxor	xmm14,xmm2
1137	pshufb	xmm14,XMMWORD[$L$rol16]
1138	paddd	xmm10,xmm14
1139	pxor	xmm6,xmm10
1140	movdqa	xmm3,xmm6
1141	pslld	xmm3,12
1142	psrld	xmm6,20
1143	pxor	xmm6,xmm3
1144	paddd	xmm2,xmm6
1145	pxor	xmm14,xmm2
1146	pshufb	xmm14,XMMWORD[$L$rol8]
1147	paddd	xmm10,xmm14
1148	pxor	xmm6,xmm10
1149	movdqa	xmm3,xmm6
1150	pslld	xmm3,7
1151	psrld	xmm6,25
1152	pxor	xmm6,xmm3
1153DB	102,15,58,15,246,4
1154DB	102,69,15,58,15,210,8
1155DB	102,69,15,58,15,246,12
1156	paddd	xmm0,xmm4
1157	pxor	xmm12,xmm0
1158	pshufb	xmm12,XMMWORD[$L$rol16]
1159	paddd	xmm8,xmm12
1160	pxor	xmm4,xmm8
1161	movdqa	xmm3,xmm4
1162	pslld	xmm3,12
1163	psrld	xmm4,20
1164	pxor	xmm4,xmm3
1165	paddd	xmm0,xmm4
1166	pxor	xmm12,xmm0
1167	pshufb	xmm12,XMMWORD[$L$rol8]
1168	paddd	xmm8,xmm12
1169	pxor	xmm4,xmm8
1170	movdqa	xmm3,xmm4
1171	pslld	xmm3,7
1172	psrld	xmm4,25
1173	pxor	xmm4,xmm3
1174DB	102,15,58,15,228,12
1175DB	102,69,15,58,15,192,8
1176DB	102,69,15,58,15,228,4
1177	paddd	xmm1,xmm5
1178	pxor	xmm13,xmm1
1179	pshufb	xmm13,XMMWORD[$L$rol16]
1180	paddd	xmm9,xmm13
1181	pxor	xmm5,xmm9
1182	movdqa	xmm3,xmm5
1183	pslld	xmm3,12
1184	psrld	xmm5,20
1185	pxor	xmm5,xmm3
1186	paddd	xmm1,xmm5
1187	pxor	xmm13,xmm1
1188	pshufb	xmm13,XMMWORD[$L$rol8]
1189	paddd	xmm9,xmm13
1190	pxor	xmm5,xmm9
1191	movdqa	xmm3,xmm5
1192	pslld	xmm3,7
1193	psrld	xmm5,25
1194	pxor	xmm5,xmm3
1195DB	102,15,58,15,237,12
1196DB	102,69,15,58,15,201,8
1197DB	102,69,15,58,15,237,4
1198	paddd	xmm2,xmm6
1199	pxor	xmm14,xmm2
1200	pshufb	xmm14,XMMWORD[$L$rol16]
1201	paddd	xmm10,xmm14
1202	pxor	xmm6,xmm10
1203	movdqa	xmm3,xmm6
1204	pslld	xmm3,12
1205	psrld	xmm6,20
1206	pxor	xmm6,xmm3
1207	paddd	xmm2,xmm6
1208	pxor	xmm14,xmm2
1209	pshufb	xmm14,XMMWORD[$L$rol8]
1210	paddd	xmm10,xmm14
1211	pxor	xmm6,xmm10
1212	movdqa	xmm3,xmm6
1213	pslld	xmm3,7
1214	psrld	xmm6,25
1215	pxor	xmm6,xmm3
1216DB	102,15,58,15,246,12
1217DB	102,69,15,58,15,210,8
1218DB	102,69,15,58,15,246,4
1219
1220	cmp	r8,rcx
1221	jb	NEAR $L$open_sse_tail_192_rounds_and_x1hash
1222	cmp	r8,10*16
1223	jne	NEAR $L$open_sse_tail_192_rounds
1224	cmp	rbx,11*16
1225	jb	NEAR $L$open_sse_tail_192_finish
1226	add	r10,QWORD[((0+160))+rsi]
1227	adc	r11,QWORD[((8+160))+rsi]
1228	adc	r12,1
1229	mov	rax,QWORD[((0+160+0))+rbp]
1230	mov	r15,rax
1231	mul	r10
1232	mov	r13,rax
1233	mov	r14,rdx
1234	mov	rax,QWORD[((0+160+0))+rbp]
1235	mul	r11
1236	imul	r15,r12
1237	add	r14,rax
1238	adc	r15,rdx
1239	mov	rax,QWORD[((8+160+0))+rbp]
1240	mov	r9,rax
1241	mul	r10
1242	add	r14,rax
1243	adc	rdx,0
1244	mov	r10,rdx
1245	mov	rax,QWORD[((8+160+0))+rbp]
1246	mul	r11
1247	add	r15,rax
1248	adc	rdx,0
1249	imul	r9,r12
1250	add	r15,r10
1251	adc	r9,rdx
1252	mov	r10,r13
1253	mov	r11,r14
1254	mov	r12,r15
1255	and	r12,3
1256	mov	r13,r15
1257	and	r13,-4
1258	mov	r14,r9
1259	shrd	r15,r9,2
1260	shr	r9,2
1261	add	r15,r13
1262	adc	r9,r14
1263	add	r10,r15
1264	adc	r11,r9
1265	adc	r12,0
1266
1267	cmp	rbx,12*16
1268	jb	NEAR $L$open_sse_tail_192_finish
1269	add	r10,QWORD[((0+176))+rsi]
1270	adc	r11,QWORD[((8+176))+rsi]
1271	adc	r12,1
1272	mov	rax,QWORD[((0+160+0))+rbp]
1273	mov	r15,rax
1274	mul	r10
1275	mov	r13,rax
1276	mov	r14,rdx
1277	mov	rax,QWORD[((0+160+0))+rbp]
1278	mul	r11
1279	imul	r15,r12
1280	add	r14,rax
1281	adc	r15,rdx
1282	mov	rax,QWORD[((8+160+0))+rbp]
1283	mov	r9,rax
1284	mul	r10
1285	add	r14,rax
1286	adc	rdx,0
1287	mov	r10,rdx
1288	mov	rax,QWORD[((8+160+0))+rbp]
1289	mul	r11
1290	add	r15,rax
1291	adc	rdx,0
1292	imul	r9,r12
1293	add	r15,r10
1294	adc	r9,rdx
1295	mov	r10,r13
1296	mov	r11,r14
1297	mov	r12,r15
1298	and	r12,3
1299	mov	r13,r15
1300	and	r13,-4
1301	mov	r14,r9
1302	shrd	r15,r9,2
1303	shr	r9,2
1304	add	r15,r13
1305	adc	r9,r14
1306	add	r10,r15
1307	adc	r11,r9
1308	adc	r12,0
1309
1310$L$open_sse_tail_192_finish:
1311	paddd	xmm2,XMMWORD[$L$chacha20_consts]
1312	paddd	xmm6,XMMWORD[((160+48))+rbp]
1313	paddd	xmm10,XMMWORD[((160+64))+rbp]
1314	paddd	xmm14,XMMWORD[((160+128))+rbp]
1315	paddd	xmm1,XMMWORD[$L$chacha20_consts]
1316	paddd	xmm5,XMMWORD[((160+48))+rbp]
1317	paddd	xmm9,XMMWORD[((160+64))+rbp]
1318	paddd	xmm13,XMMWORD[((160+112))+rbp]
1319	paddd	xmm0,XMMWORD[$L$chacha20_consts]
1320	paddd	xmm4,XMMWORD[((160+48))+rbp]
1321	paddd	xmm8,XMMWORD[((160+64))+rbp]
1322	paddd	xmm12,XMMWORD[((160+96))+rbp]
1323	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
1324	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
1325	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
1326	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
1327	pxor	xmm2,xmm3
1328	pxor	xmm6,xmm7
1329	pxor	xmm10,xmm11
1330	pxor	xmm15,xmm14
1331	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
1332	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
1333	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
1334	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
1335	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
1336	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
1337	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
1338	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
1339	pxor	xmm1,xmm3
1340	pxor	xmm5,xmm7
1341	pxor	xmm9,xmm11
1342	pxor	xmm15,xmm13
1343	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
1344	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
1345	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
1346	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
1347
1348	sub	rbx,8*16
1349	lea	rsi,[128+rsi]
1350	lea	rdi,[128+rdi]
1351	jmp	NEAR $L$open_sse_tail_64_dec_loop
1352
1353$L$open_sse_tail_256:
1354	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
1355	movdqa	xmm4,XMMWORD[((160+48))+rbp]
1356	movdqa	xmm8,XMMWORD[((160+64))+rbp]
1357	movdqa	xmm1,xmm0
1358	movdqa	xmm5,xmm4
1359	movdqa	xmm9,xmm8
1360	movdqa	xmm2,xmm0
1361	movdqa	xmm6,xmm4
1362	movdqa	xmm10,xmm8
1363	movdqa	xmm3,xmm0
1364	movdqa	xmm7,xmm4
1365	movdqa	xmm11,xmm8
1366	movdqa	xmm15,XMMWORD[((160+96))+rbp]
1367	paddd	xmm15,XMMWORD[$L$sse_inc]
1368	movdqa	xmm14,xmm15
1369	paddd	xmm14,XMMWORD[$L$sse_inc]
1370	movdqa	xmm13,xmm14
1371	paddd	xmm13,XMMWORD[$L$sse_inc]
1372	movdqa	xmm12,xmm13
1373	paddd	xmm12,XMMWORD[$L$sse_inc]
1374	movdqa	XMMWORD[(160+96)+rbp],xmm12
1375	movdqa	XMMWORD[(160+112)+rbp],xmm13
1376	movdqa	XMMWORD[(160+128)+rbp],xmm14
1377	movdqa	XMMWORD[(160+144)+rbp],xmm15
1378
1379	xor	r8,r8
1380$L$open_sse_tail_256_rounds_and_x1hash:
1381	add	r10,QWORD[((0+0))+r8*1+rsi]
1382	adc	r11,QWORD[((8+0))+r8*1+rsi]
1383	adc	r12,1
1384	movdqa	XMMWORD[(160+80)+rbp],xmm11
1385	paddd	xmm0,xmm4
1386	pxor	xmm12,xmm0
1387	pshufb	xmm12,XMMWORD[$L$rol16]
1388	paddd	xmm8,xmm12
1389	pxor	xmm4,xmm8
1390	movdqa	xmm11,xmm4
1391	pslld	xmm11,12
1392	psrld	xmm4,20
1393	pxor	xmm4,xmm11
1394	paddd	xmm0,xmm4
1395	pxor	xmm12,xmm0
1396	pshufb	xmm12,XMMWORD[$L$rol8]
1397	paddd	xmm8,xmm12
1398	pxor	xmm4,xmm8
1399	movdqa	xmm11,xmm4
1400	pslld	xmm11,7
1401	psrld	xmm4,25
1402	pxor	xmm4,xmm11
1403DB	102,15,58,15,228,4
1404DB	102,69,15,58,15,192,8
1405DB	102,69,15,58,15,228,12
1406	paddd	xmm1,xmm5
1407	pxor	xmm13,xmm1
1408	pshufb	xmm13,XMMWORD[$L$rol16]
1409	paddd	xmm9,xmm13
1410	pxor	xmm5,xmm9
1411	movdqa	xmm11,xmm5
1412	pslld	xmm11,12
1413	psrld	xmm5,20
1414	pxor	xmm5,xmm11
1415	paddd	xmm1,xmm5
1416	pxor	xmm13,xmm1
1417	pshufb	xmm13,XMMWORD[$L$rol8]
1418	paddd	xmm9,xmm13
1419	pxor	xmm5,xmm9
1420	movdqa	xmm11,xmm5
1421	pslld	xmm11,7
1422	psrld	xmm5,25
1423	pxor	xmm5,xmm11
1424DB	102,15,58,15,237,4
1425DB	102,69,15,58,15,201,8
1426DB	102,69,15,58,15,237,12
1427	paddd	xmm2,xmm6
1428	pxor	xmm14,xmm2
1429	pshufb	xmm14,XMMWORD[$L$rol16]
1430	paddd	xmm10,xmm14
1431	pxor	xmm6,xmm10
1432	movdqa	xmm11,xmm6
1433	pslld	xmm11,12
1434	psrld	xmm6,20
1435	pxor	xmm6,xmm11
1436	paddd	xmm2,xmm6
1437	pxor	xmm14,xmm2
1438	pshufb	xmm14,XMMWORD[$L$rol8]
1439	paddd	xmm10,xmm14
1440	pxor	xmm6,xmm10
1441	movdqa	xmm11,xmm6
1442	pslld	xmm11,7
1443	psrld	xmm6,25
1444	pxor	xmm6,xmm11
1445DB	102,15,58,15,246,4
1446DB	102,69,15,58,15,210,8
1447DB	102,69,15,58,15,246,12
1448	movdqa	xmm11,XMMWORD[((160+80))+rbp]
1449	mov	rax,QWORD[((0+160+0))+rbp]
1450	mov	r15,rax
1451	mul	r10
1452	mov	r13,rax
1453	mov	r14,rdx
1454	mov	rax,QWORD[((0+160+0))+rbp]
1455	mul	r11
1456	imul	r15,r12
1457	add	r14,rax
1458	adc	r15,rdx
1459	movdqa	XMMWORD[(160+80)+rbp],xmm9
1460	paddd	xmm3,xmm7
1461	pxor	xmm15,xmm3
1462	pshufb	xmm15,XMMWORD[$L$rol16]
1463	paddd	xmm11,xmm15
1464	pxor	xmm7,xmm11
1465	movdqa	xmm9,xmm7
1466	pslld	xmm9,12
1467	psrld	xmm7,20
1468	pxor	xmm7,xmm9
1469	paddd	xmm3,xmm7
1470	pxor	xmm15,xmm3
1471	pshufb	xmm15,XMMWORD[$L$rol8]
1472	paddd	xmm11,xmm15
1473	pxor	xmm7,xmm11
1474	movdqa	xmm9,xmm7
1475	pslld	xmm9,7
1476	psrld	xmm7,25
1477	pxor	xmm7,xmm9
1478DB	102,15,58,15,255,4
1479DB	102,69,15,58,15,219,8
1480DB	102,69,15,58,15,255,12
1481	movdqa	xmm9,XMMWORD[((160+80))+rbp]
1482	mov	rax,QWORD[((8+160+0))+rbp]
1483	mov	r9,rax
1484	mul	r10
1485	add	r14,rax
1486	adc	rdx,0
1487	mov	r10,rdx
1488	mov	rax,QWORD[((8+160+0))+rbp]
1489	mul	r11
1490	add	r15,rax
1491	adc	rdx,0
1492	movdqa	XMMWORD[(160+80)+rbp],xmm11
1493	paddd	xmm0,xmm4
1494	pxor	xmm12,xmm0
1495	pshufb	xmm12,XMMWORD[$L$rol16]
1496	paddd	xmm8,xmm12
1497	pxor	xmm4,xmm8
1498	movdqa	xmm11,xmm4
1499	pslld	xmm11,12
1500	psrld	xmm4,20
1501	pxor	xmm4,xmm11
1502	paddd	xmm0,xmm4
1503	pxor	xmm12,xmm0
1504	pshufb	xmm12,XMMWORD[$L$rol8]
1505	paddd	xmm8,xmm12
1506	pxor	xmm4,xmm8
1507	movdqa	xmm11,xmm4
1508	pslld	xmm11,7
1509	psrld	xmm4,25
1510	pxor	xmm4,xmm11
1511DB	102,15,58,15,228,12
1512DB	102,69,15,58,15,192,8
1513DB	102,69,15,58,15,228,4
1514	paddd	xmm1,xmm5
1515	pxor	xmm13,xmm1
1516	pshufb	xmm13,XMMWORD[$L$rol16]
1517	paddd	xmm9,xmm13
1518	pxor	xmm5,xmm9
1519	movdqa	xmm11,xmm5
1520	pslld	xmm11,12
1521	psrld	xmm5,20
1522	pxor	xmm5,xmm11
1523	paddd	xmm1,xmm5
1524	pxor	xmm13,xmm1
1525	pshufb	xmm13,XMMWORD[$L$rol8]
1526	paddd	xmm9,xmm13
1527	pxor	xmm5,xmm9
1528	movdqa	xmm11,xmm5
1529	pslld	xmm11,7
1530	psrld	xmm5,25
1531	pxor	xmm5,xmm11
1532DB	102,15,58,15,237,12
1533DB	102,69,15,58,15,201,8
1534DB	102,69,15,58,15,237,4
1535	imul	r9,r12
1536	add	r15,r10
1537	adc	r9,rdx
1538	paddd	xmm2,xmm6
1539	pxor	xmm14,xmm2
1540	pshufb	xmm14,XMMWORD[$L$rol16]
1541	paddd	xmm10,xmm14
1542	pxor	xmm6,xmm10
1543	movdqa	xmm11,xmm6
1544	pslld	xmm11,12
1545	psrld	xmm6,20
1546	pxor	xmm6,xmm11
1547	paddd	xmm2,xmm6
1548	pxor	xmm14,xmm2
1549	pshufb	xmm14,XMMWORD[$L$rol8]
1550	paddd	xmm10,xmm14
1551	pxor	xmm6,xmm10
1552	movdqa	xmm11,xmm6
1553	pslld	xmm11,7
1554	psrld	xmm6,25
1555	pxor	xmm6,xmm11
1556DB	102,15,58,15,246,12
1557DB	102,69,15,58,15,210,8
1558DB	102,69,15,58,15,246,4
1559	movdqa	xmm11,XMMWORD[((160+80))+rbp]
1560	mov	r10,r13
1561	mov	r11,r14
1562	mov	r12,r15
1563	and	r12,3
1564	mov	r13,r15
1565	and	r13,-4
1566	mov	r14,r9
1567	shrd	r15,r9,2
1568	shr	r9,2
1569	add	r15,r13
1570	adc	r9,r14
1571	add	r10,r15
1572	adc	r11,r9
1573	adc	r12,0
1574	movdqa	XMMWORD[(160+80)+rbp],xmm9
1575	paddd	xmm3,xmm7
1576	pxor	xmm15,xmm3
1577	pshufb	xmm15,XMMWORD[$L$rol16]
1578	paddd	xmm11,xmm15
1579	pxor	xmm7,xmm11
1580	movdqa	xmm9,xmm7
1581	pslld	xmm9,12
1582	psrld	xmm7,20
1583	pxor	xmm7,xmm9
1584	paddd	xmm3,xmm7
1585	pxor	xmm15,xmm3
1586	pshufb	xmm15,XMMWORD[$L$rol8]
1587	paddd	xmm11,xmm15
1588	pxor	xmm7,xmm11
1589	movdqa	xmm9,xmm7
1590	pslld	xmm9,7
1591	psrld	xmm7,25
1592	pxor	xmm7,xmm9
1593DB	102,15,58,15,255,12
1594DB	102,69,15,58,15,219,8
1595DB	102,69,15,58,15,255,4
1596	movdqa	xmm9,XMMWORD[((160+80))+rbp]
1597
1598	add	r8,16
1599	cmp	r8,10*16
1600	jb	NEAR $L$open_sse_tail_256_rounds_and_x1hash
1601
1602	mov	rcx,rbx
1603	and	rcx,-16
1604$L$open_sse_tail_256_hash:
1605	add	r10,QWORD[((0+0))+r8*1+rsi]
1606	adc	r11,QWORD[((8+0))+r8*1+rsi]
1607	adc	r12,1
1608	mov	rax,QWORD[((0+160+0))+rbp]
1609	mov	r15,rax
1610	mul	r10
1611	mov	r13,rax
1612	mov	r14,rdx
1613	mov	rax,QWORD[((0+160+0))+rbp]
1614	mul	r11
1615	imul	r15,r12
1616	add	r14,rax
1617	adc	r15,rdx
1618	mov	rax,QWORD[((8+160+0))+rbp]
1619	mov	r9,rax
1620	mul	r10
1621	add	r14,rax
1622	adc	rdx,0
1623	mov	r10,rdx
1624	mov	rax,QWORD[((8+160+0))+rbp]
1625	mul	r11
1626	add	r15,rax
1627	adc	rdx,0
1628	imul	r9,r12
1629	add	r15,r10
1630	adc	r9,rdx
1631	mov	r10,r13
1632	mov	r11,r14
1633	mov	r12,r15
1634	and	r12,3
1635	mov	r13,r15
1636	and	r13,-4
1637	mov	r14,r9
1638	shrd	r15,r9,2
1639	shr	r9,2
1640	add	r15,r13
1641	adc	r9,r14
1642	add	r10,r15
1643	adc	r11,r9
1644	adc	r12,0
1645
1646	add	r8,16
1647	cmp	r8,rcx
1648	jb	NEAR $L$open_sse_tail_256_hash
1649	paddd	xmm3,XMMWORD[$L$chacha20_consts]
1650	paddd	xmm7,XMMWORD[((160+48))+rbp]
1651	paddd	xmm11,XMMWORD[((160+64))+rbp]
1652	paddd	xmm15,XMMWORD[((160+144))+rbp]
1653	paddd	xmm2,XMMWORD[$L$chacha20_consts]
1654	paddd	xmm6,XMMWORD[((160+48))+rbp]
1655	paddd	xmm10,XMMWORD[((160+64))+rbp]
1656	paddd	xmm14,XMMWORD[((160+128))+rbp]
1657	paddd	xmm1,XMMWORD[$L$chacha20_consts]
1658	paddd	xmm5,XMMWORD[((160+48))+rbp]
1659	paddd	xmm9,XMMWORD[((160+64))+rbp]
1660	paddd	xmm13,XMMWORD[((160+112))+rbp]
1661	paddd	xmm0,XMMWORD[$L$chacha20_consts]
1662	paddd	xmm4,XMMWORD[((160+48))+rbp]
1663	paddd	xmm8,XMMWORD[((160+64))+rbp]
1664	paddd	xmm12,XMMWORD[((160+96))+rbp]
1665	movdqa	XMMWORD[(160+80)+rbp],xmm12
1666	movdqu	xmm12,XMMWORD[((0 + 0))+rsi]
1667	pxor	xmm12,xmm3
1668	movdqu	XMMWORD[(0 + 0)+rdi],xmm12
1669	movdqu	xmm12,XMMWORD[((16 + 0))+rsi]
1670	pxor	xmm12,xmm7
1671	movdqu	XMMWORD[(16 + 0)+rdi],xmm12
1672	movdqu	xmm12,XMMWORD[((32 + 0))+rsi]
1673	pxor	xmm12,xmm11
1674	movdqu	XMMWORD[(32 + 0)+rdi],xmm12
1675	movdqu	xmm12,XMMWORD[((48 + 0))+rsi]
1676	pxor	xmm12,xmm15
1677	movdqu	XMMWORD[(48 + 0)+rdi],xmm12
1678	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
1679	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
1680	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
1681	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
1682	pxor	xmm2,xmm3
1683	pxor	xmm6,xmm7
1684	pxor	xmm10,xmm11
1685	pxor	xmm15,xmm14
1686	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
1687	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
1688	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
1689	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
1690	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
1691	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
1692	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
1693	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
1694	pxor	xmm1,xmm3
1695	pxor	xmm5,xmm7
1696	pxor	xmm9,xmm11
1697	pxor	xmm15,xmm13
1698	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
1699	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
1700	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
1701	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
1702
1703	movdqa	xmm12,XMMWORD[((160+80))+rbp]
1704	sub	rbx,12*16
1705	lea	rsi,[192+rsi]
1706	lea	rdi,[192+rdi]
1707
1708
1709$L$open_sse_tail_64_dec_loop:
1710	cmp	rbx,16
1711	jb	NEAR $L$open_sse_tail_16_init
1712	sub	rbx,16
1713	movdqu	xmm3,XMMWORD[rsi]
1714	pxor	xmm0,xmm3
1715	movdqu	XMMWORD[rdi],xmm0
1716	lea	rsi,[16+rsi]
1717	lea	rdi,[16+rdi]
1718	movdqa	xmm0,xmm4
1719	movdqa	xmm4,xmm8
1720	movdqa	xmm8,xmm12
1721	jmp	NEAR $L$open_sse_tail_64_dec_loop
1722$L$open_sse_tail_16_init:
1723	movdqa	xmm1,xmm0
1724
1725
1726$L$open_sse_tail_16:
1727	test	rbx,rbx
1728	jz	NEAR $L$open_sse_finalize
1729
1730
1731
1732	pxor	xmm3,xmm3
1733	lea	rsi,[((-1))+rbx*1+rsi]
1734	mov	r8,rbx
1735$L$open_sse_tail_16_compose:
1736	pslldq	xmm3,1
1737	pinsrb	xmm3,BYTE[rsi],0
1738	sub	rsi,1
1739	sub	r8,1
1740	jnz	NEAR $L$open_sse_tail_16_compose
1741
1742DB	102,73,15,126,221
1743	pextrq	r14,xmm3,1
1744
1745	pxor	xmm3,xmm1
1746
1747
1748$L$open_sse_tail_16_extract:
1749	pextrb	XMMWORD[rdi],xmm3,0
1750	psrldq	xmm3,1
1751	add	rdi,1
1752	sub	rbx,1
1753	jne	NEAR $L$open_sse_tail_16_extract
1754
1755	add	r10,r13
1756	adc	r11,r14
1757	adc	r12,1
1758	mov	rax,QWORD[((0+160+0))+rbp]
1759	mov	r15,rax
1760	mul	r10
1761	mov	r13,rax
1762	mov	r14,rdx
1763	mov	rax,QWORD[((0+160+0))+rbp]
1764	mul	r11
1765	imul	r15,r12
1766	add	r14,rax
1767	adc	r15,rdx
1768	mov	rax,QWORD[((8+160+0))+rbp]
1769	mov	r9,rax
1770	mul	r10
1771	add	r14,rax
1772	adc	rdx,0
1773	mov	r10,rdx
1774	mov	rax,QWORD[((8+160+0))+rbp]
1775	mul	r11
1776	add	r15,rax
1777	adc	rdx,0
1778	imul	r9,r12
1779	add	r15,r10
1780	adc	r9,rdx
1781	mov	r10,r13
1782	mov	r11,r14
1783	mov	r12,r15
1784	and	r12,3
1785	mov	r13,r15
1786	and	r13,-4
1787	mov	r14,r9
1788	shrd	r15,r9,2
1789	shr	r9,2
1790	add	r15,r13
1791	adc	r9,r14
1792	add	r10,r15
1793	adc	r11,r9
1794	adc	r12,0
1795
1796
1797$L$open_sse_finalize:
1798	add	r10,QWORD[((0+160+32))+rbp]
1799	adc	r11,QWORD[((8+160+32))+rbp]
1800	adc	r12,1
1801	mov	rax,QWORD[((0+160+0))+rbp]
1802	mov	r15,rax
1803	mul	r10
1804	mov	r13,rax
1805	mov	r14,rdx
1806	mov	rax,QWORD[((0+160+0))+rbp]
1807	mul	r11
1808	imul	r15,r12
1809	add	r14,rax
1810	adc	r15,rdx
1811	mov	rax,QWORD[((8+160+0))+rbp]
1812	mov	r9,rax
1813	mul	r10
1814	add	r14,rax
1815	adc	rdx,0
1816	mov	r10,rdx
1817	mov	rax,QWORD[((8+160+0))+rbp]
1818	mul	r11
1819	add	r15,rax
1820	adc	rdx,0
1821	imul	r9,r12
1822	add	r15,r10
1823	adc	r9,rdx
1824	mov	r10,r13
1825	mov	r11,r14
1826	mov	r12,r15
1827	and	r12,3
1828	mov	r13,r15
1829	and	r13,-4
1830	mov	r14,r9
1831	shrd	r15,r9,2
1832	shr	r9,2
1833	add	r15,r13
1834	adc	r9,r14
1835	add	r10,r15
1836	adc	r11,r9
1837	adc	r12,0
1838
1839
1840	mov	r13,r10
1841	mov	r14,r11
1842	mov	r15,r12
1843	sub	r10,-5
1844	sbb	r11,-1
1845	sbb	r12,3
1846	cmovc	r10,r13
1847	cmovc	r11,r14
1848	cmovc	r12,r15
1849
1850	add	r10,QWORD[((0+160+16))+rbp]
1851	adc	r11,QWORD[((8+160+16))+rbp]
1852
1853	movaps	xmm6,XMMWORD[((0+0))+rbp]
1854	movaps	xmm7,XMMWORD[((16+0))+rbp]
1855	movaps	xmm8,XMMWORD[((32+0))+rbp]
1856	movaps	xmm9,XMMWORD[((48+0))+rbp]
1857	movaps	xmm10,XMMWORD[((64+0))+rbp]
1858	movaps	xmm11,XMMWORD[((80+0))+rbp]
1859	movaps	xmm12,XMMWORD[((96+0))+rbp]
1860	movaps	xmm13,XMMWORD[((112+0))+rbp]
1861	movaps	xmm14,XMMWORD[((128+0))+rbp]
1862	movaps	xmm15,XMMWORD[((144+0))+rbp]
1863
1864
1865	add	rsp,288 + 160 + 32
1866
1867
1868	pop	r9
1869
1870	mov	QWORD[r9],r10
1871	mov	QWORD[8+r9],r11
1872	pop	r15
1873
1874	pop	r14
1875
1876	pop	r13
1877
1878	pop	r12
1879
1880	pop	rbx
1881
1882	pop	rbp
1883
1884	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1885	mov	rsi,QWORD[16+rsp]
1886	DB	0F3h,0C3h		;repret
1887
1888$L$open_sse_128:
1889
1890	movdqu	xmm0,XMMWORD[$L$chacha20_consts]
1891	movdqa	xmm1,xmm0
1892	movdqa	xmm2,xmm0
1893	movdqu	xmm4,XMMWORD[r9]
1894	movdqa	xmm5,xmm4
1895	movdqa	xmm6,xmm4
1896	movdqu	xmm8,XMMWORD[16+r9]
1897	movdqa	xmm9,xmm8
1898	movdqa	xmm10,xmm8
1899	movdqu	xmm12,XMMWORD[32+r9]
1900	movdqa	xmm13,xmm12
1901	paddd	xmm13,XMMWORD[$L$sse_inc]
1902	movdqa	xmm14,xmm13
1903	paddd	xmm14,XMMWORD[$L$sse_inc]
1904	movdqa	xmm7,xmm4
1905	movdqa	xmm11,xmm8
1906	movdqa	xmm15,xmm13
1907	mov	r10,10
1908
1909$L$open_sse_128_rounds:
1910	paddd	xmm0,xmm4
1911	pxor	xmm12,xmm0
1912	pshufb	xmm12,XMMWORD[$L$rol16]
1913	paddd	xmm8,xmm12
1914	pxor	xmm4,xmm8
1915	movdqa	xmm3,xmm4
1916	pslld	xmm3,12
1917	psrld	xmm4,20
1918	pxor	xmm4,xmm3
1919	paddd	xmm0,xmm4
1920	pxor	xmm12,xmm0
1921	pshufb	xmm12,XMMWORD[$L$rol8]
1922	paddd	xmm8,xmm12
1923	pxor	xmm4,xmm8
1924	movdqa	xmm3,xmm4
1925	pslld	xmm3,7
1926	psrld	xmm4,25
1927	pxor	xmm4,xmm3
1928DB	102,15,58,15,228,4
1929DB	102,69,15,58,15,192,8
1930DB	102,69,15,58,15,228,12
1931	paddd	xmm1,xmm5
1932	pxor	xmm13,xmm1
1933	pshufb	xmm13,XMMWORD[$L$rol16]
1934	paddd	xmm9,xmm13
1935	pxor	xmm5,xmm9
1936	movdqa	xmm3,xmm5
1937	pslld	xmm3,12
1938	psrld	xmm5,20
1939	pxor	xmm5,xmm3
1940	paddd	xmm1,xmm5
1941	pxor	xmm13,xmm1
1942	pshufb	xmm13,XMMWORD[$L$rol8]
1943	paddd	xmm9,xmm13
1944	pxor	xmm5,xmm9
1945	movdqa	xmm3,xmm5
1946	pslld	xmm3,7
1947	psrld	xmm5,25
1948	pxor	xmm5,xmm3
1949DB	102,15,58,15,237,4
1950DB	102,69,15,58,15,201,8
1951DB	102,69,15,58,15,237,12
1952	paddd	xmm2,xmm6
1953	pxor	xmm14,xmm2
1954	pshufb	xmm14,XMMWORD[$L$rol16]
1955	paddd	xmm10,xmm14
1956	pxor	xmm6,xmm10
1957	movdqa	xmm3,xmm6
1958	pslld	xmm3,12
1959	psrld	xmm6,20
1960	pxor	xmm6,xmm3
1961	paddd	xmm2,xmm6
1962	pxor	xmm14,xmm2
1963	pshufb	xmm14,XMMWORD[$L$rol8]
1964	paddd	xmm10,xmm14
1965	pxor	xmm6,xmm10
1966	movdqa	xmm3,xmm6
1967	pslld	xmm3,7
1968	psrld	xmm6,25
1969	pxor	xmm6,xmm3
1970DB	102,15,58,15,246,4
1971DB	102,69,15,58,15,210,8
1972DB	102,69,15,58,15,246,12
1973	paddd	xmm0,xmm4
1974	pxor	xmm12,xmm0
1975	pshufb	xmm12,XMMWORD[$L$rol16]
1976	paddd	xmm8,xmm12
1977	pxor	xmm4,xmm8
1978	movdqa	xmm3,xmm4
1979	pslld	xmm3,12
1980	psrld	xmm4,20
1981	pxor	xmm4,xmm3
1982	paddd	xmm0,xmm4
1983	pxor	xmm12,xmm0
1984	pshufb	xmm12,XMMWORD[$L$rol8]
1985	paddd	xmm8,xmm12
1986	pxor	xmm4,xmm8
1987	movdqa	xmm3,xmm4
1988	pslld	xmm3,7
1989	psrld	xmm4,25
1990	pxor	xmm4,xmm3
1991DB	102,15,58,15,228,12
1992DB	102,69,15,58,15,192,8
1993DB	102,69,15,58,15,228,4
1994	paddd	xmm1,xmm5
1995	pxor	xmm13,xmm1
1996	pshufb	xmm13,XMMWORD[$L$rol16]
1997	paddd	xmm9,xmm13
1998	pxor	xmm5,xmm9
1999	movdqa	xmm3,xmm5
2000	pslld	xmm3,12
2001	psrld	xmm5,20
2002	pxor	xmm5,xmm3
2003	paddd	xmm1,xmm5
2004	pxor	xmm13,xmm1
2005	pshufb	xmm13,XMMWORD[$L$rol8]
2006	paddd	xmm9,xmm13
2007	pxor	xmm5,xmm9
2008	movdqa	xmm3,xmm5
2009	pslld	xmm3,7
2010	psrld	xmm5,25
2011	pxor	xmm5,xmm3
2012DB	102,15,58,15,237,12
2013DB	102,69,15,58,15,201,8
2014DB	102,69,15,58,15,237,4
2015	paddd	xmm2,xmm6
2016	pxor	xmm14,xmm2
2017	pshufb	xmm14,XMMWORD[$L$rol16]
2018	paddd	xmm10,xmm14
2019	pxor	xmm6,xmm10
2020	movdqa	xmm3,xmm6
2021	pslld	xmm3,12
2022	psrld	xmm6,20
2023	pxor	xmm6,xmm3
2024	paddd	xmm2,xmm6
2025	pxor	xmm14,xmm2
2026	pshufb	xmm14,XMMWORD[$L$rol8]
2027	paddd	xmm10,xmm14
2028	pxor	xmm6,xmm10
2029	movdqa	xmm3,xmm6
2030	pslld	xmm3,7
2031	psrld	xmm6,25
2032	pxor	xmm6,xmm3
2033DB	102,15,58,15,246,12
2034DB	102,69,15,58,15,210,8
2035DB	102,69,15,58,15,246,4
2036
2037	dec	r10
2038	jnz	NEAR $L$open_sse_128_rounds
2039	paddd	xmm0,XMMWORD[$L$chacha20_consts]
2040	paddd	xmm1,XMMWORD[$L$chacha20_consts]
2041	paddd	xmm2,XMMWORD[$L$chacha20_consts]
2042	paddd	xmm4,xmm7
2043	paddd	xmm5,xmm7
2044	paddd	xmm6,xmm7
2045	paddd	xmm9,xmm11
2046	paddd	xmm10,xmm11
2047	paddd	xmm13,xmm15
2048	paddd	xmm15,XMMWORD[$L$sse_inc]
2049	paddd	xmm14,xmm15
2050
2051	pand	xmm0,XMMWORD[$L$clamp]
2052	movdqa	XMMWORD[(160+0)+rbp],xmm0
2053	movdqa	XMMWORD[(160+16)+rbp],xmm4
2054
2055	mov	r8,r8
2056	call	poly_hash_ad_internal
2057$L$open_sse_128_xor_hash:
2058	cmp	rbx,16
2059	jb	NEAR $L$open_sse_tail_16
2060	sub	rbx,16
2061	add	r10,QWORD[((0+0))+rsi]
2062	adc	r11,QWORD[((8+0))+rsi]
2063	adc	r12,1
2064
2065
2066	movdqu	xmm3,XMMWORD[rsi]
2067	pxor	xmm1,xmm3
2068	movdqu	XMMWORD[rdi],xmm1
2069	lea	rsi,[16+rsi]
2070	lea	rdi,[16+rdi]
2071	mov	rax,QWORD[((0+160+0))+rbp]
2072	mov	r15,rax
2073	mul	r10
2074	mov	r13,rax
2075	mov	r14,rdx
2076	mov	rax,QWORD[((0+160+0))+rbp]
2077	mul	r11
2078	imul	r15,r12
2079	add	r14,rax
2080	adc	r15,rdx
2081	mov	rax,QWORD[((8+160+0))+rbp]
2082	mov	r9,rax
2083	mul	r10
2084	add	r14,rax
2085	adc	rdx,0
2086	mov	r10,rdx
2087	mov	rax,QWORD[((8+160+0))+rbp]
2088	mul	r11
2089	add	r15,rax
2090	adc	rdx,0
2091	imul	r9,r12
2092	add	r15,r10
2093	adc	r9,rdx
2094	mov	r10,r13
2095	mov	r11,r14
2096	mov	r12,r15
2097	and	r12,3
2098	mov	r13,r15
2099	and	r13,-4
2100	mov	r14,r9
2101	shrd	r15,r9,2
2102	shr	r9,2
2103	add	r15,r13
2104	adc	r9,r14
2105	add	r10,r15
2106	adc	r11,r9
2107	adc	r12,0
2108
2109
2110	movdqa	xmm1,xmm5
2111	movdqa	xmm5,xmm9
2112	movdqa	xmm9,xmm13
2113	movdqa	xmm13,xmm2
2114	movdqa	xmm2,xmm6
2115	movdqa	xmm6,xmm10
2116	movdqa	xmm10,xmm14
2117	jmp	NEAR $L$open_sse_128_xor_hash
2118$L$SEH_end_chacha20_poly1305_open:
2119
2120
2121
2122
2123
2124
2125
2126
2127global	chacha20_poly1305_seal
2128
2129ALIGN	64
2130chacha20_poly1305_seal:
2131	mov	QWORD[8+rsp],rdi	;WIN64 prologue
2132	mov	QWORD[16+rsp],rsi
2133	mov	rax,rsp
2134$L$SEH_begin_chacha20_poly1305_seal:
2135	mov	rdi,rcx
2136	mov	rsi,rdx
2137	mov	rdx,r8
2138	mov	rcx,r9
2139	mov	r8,QWORD[40+rsp]
2140	mov	r9,QWORD[48+rsp]
2141
2142
2143
2144	push	rbp
2145
2146	push	rbx
2147
2148	push	r12
2149
2150	push	r13
2151
2152	push	r14
2153
2154	push	r15
2155
2156
2157
2158	push	r9
2159
2160	sub	rsp,288 + 160 + 32
2161
2162	lea	rbp,[32+rsp]
2163	and	rbp,-32
2164
2165	movaps	XMMWORD[(0+0)+rbp],xmm6
2166	movaps	XMMWORD[(16+0)+rbp],xmm7
2167	movaps	XMMWORD[(32+0)+rbp],xmm8
2168	movaps	XMMWORD[(48+0)+rbp],xmm9
2169	movaps	XMMWORD[(64+0)+rbp],xmm10
2170	movaps	XMMWORD[(80+0)+rbp],xmm11
2171	movaps	XMMWORD[(96+0)+rbp],xmm12
2172	movaps	XMMWORD[(112+0)+rbp],xmm13
2173	movaps	XMMWORD[(128+0)+rbp],xmm14
2174	movaps	XMMWORD[(144+0)+rbp],xmm15
2175
2176	mov	rbx,QWORD[56+r9]
2177	add	rbx,rdx
2178	mov	QWORD[((0+160+32))+rbp],r8
2179	mov	QWORD[((8+160+32))+rbp],rbx
2180	mov	rbx,rdx
2181
2182	mov	eax,DWORD[((OPENSSL_ia32cap_P+8))]
2183	and	eax,288
2184	xor	eax,288
2185	jz	NEAR chacha20_poly1305_seal_avx2
2186
2187	cmp	rbx,128
2188	jbe	NEAR $L$seal_sse_128
2189
2190	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
2191	movdqu	xmm4,XMMWORD[r9]
2192	movdqu	xmm8,XMMWORD[16+r9]
2193	movdqu	xmm12,XMMWORD[32+r9]
2194
2195	movdqa	xmm1,xmm0
2196	movdqa	xmm2,xmm0
2197	movdqa	xmm3,xmm0
2198	movdqa	xmm5,xmm4
2199	movdqa	xmm6,xmm4
2200	movdqa	xmm7,xmm4
2201	movdqa	xmm9,xmm8
2202	movdqa	xmm10,xmm8
2203	movdqa	xmm11,xmm8
2204	movdqa	xmm15,xmm12
2205	paddd	xmm12,XMMWORD[$L$sse_inc]
2206	movdqa	xmm14,xmm12
2207	paddd	xmm12,XMMWORD[$L$sse_inc]
2208	movdqa	xmm13,xmm12
2209	paddd	xmm12,XMMWORD[$L$sse_inc]
2210
2211	movdqa	XMMWORD[(160+48)+rbp],xmm4
2212	movdqa	XMMWORD[(160+64)+rbp],xmm8
2213	movdqa	XMMWORD[(160+96)+rbp],xmm12
2214	movdqa	XMMWORD[(160+112)+rbp],xmm13
2215	movdqa	XMMWORD[(160+128)+rbp],xmm14
2216	movdqa	XMMWORD[(160+144)+rbp],xmm15
2217	mov	r10,10
2218$L$seal_sse_init_rounds:
2219	movdqa	XMMWORD[(160+80)+rbp],xmm8
2220	movdqa	xmm8,XMMWORD[$L$rol16]
2221	paddd	xmm3,xmm7
2222	paddd	xmm2,xmm6
2223	paddd	xmm1,xmm5
2224	paddd	xmm0,xmm4
2225	pxor	xmm15,xmm3
2226	pxor	xmm14,xmm2
2227	pxor	xmm13,xmm1
2228	pxor	xmm12,xmm0
2229DB	102,69,15,56,0,248
2230DB	102,69,15,56,0,240
2231DB	102,69,15,56,0,232
2232DB	102,69,15,56,0,224
2233	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2234	paddd	xmm11,xmm15
2235	paddd	xmm10,xmm14
2236	paddd	xmm9,xmm13
2237	paddd	xmm8,xmm12
2238	pxor	xmm7,xmm11
2239	pxor	xmm6,xmm10
2240	pxor	xmm5,xmm9
2241	pxor	xmm4,xmm8
2242	movdqa	XMMWORD[(160+80)+rbp],xmm8
2243	movdqa	xmm8,xmm7
2244	psrld	xmm8,20
2245	pslld	xmm7,32-20
2246	pxor	xmm7,xmm8
2247	movdqa	xmm8,xmm6
2248	psrld	xmm8,20
2249	pslld	xmm6,32-20
2250	pxor	xmm6,xmm8
2251	movdqa	xmm8,xmm5
2252	psrld	xmm8,20
2253	pslld	xmm5,32-20
2254	pxor	xmm5,xmm8
2255	movdqa	xmm8,xmm4
2256	psrld	xmm8,20
2257	pslld	xmm4,32-20
2258	pxor	xmm4,xmm8
2259	movdqa	xmm8,XMMWORD[$L$rol8]
2260	paddd	xmm3,xmm7
2261	paddd	xmm2,xmm6
2262	paddd	xmm1,xmm5
2263	paddd	xmm0,xmm4
2264	pxor	xmm15,xmm3
2265	pxor	xmm14,xmm2
2266	pxor	xmm13,xmm1
2267	pxor	xmm12,xmm0
2268DB	102,69,15,56,0,248
2269DB	102,69,15,56,0,240
2270DB	102,69,15,56,0,232
2271DB	102,69,15,56,0,224
2272	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2273	paddd	xmm11,xmm15
2274	paddd	xmm10,xmm14
2275	paddd	xmm9,xmm13
2276	paddd	xmm8,xmm12
2277	pxor	xmm7,xmm11
2278	pxor	xmm6,xmm10
2279	pxor	xmm5,xmm9
2280	pxor	xmm4,xmm8
2281	movdqa	XMMWORD[(160+80)+rbp],xmm8
2282	movdqa	xmm8,xmm7
2283	psrld	xmm8,25
2284	pslld	xmm7,32-25
2285	pxor	xmm7,xmm8
2286	movdqa	xmm8,xmm6
2287	psrld	xmm8,25
2288	pslld	xmm6,32-25
2289	pxor	xmm6,xmm8
2290	movdqa	xmm8,xmm5
2291	psrld	xmm8,25
2292	pslld	xmm5,32-25
2293	pxor	xmm5,xmm8
2294	movdqa	xmm8,xmm4
2295	psrld	xmm8,25
2296	pslld	xmm4,32-25
2297	pxor	xmm4,xmm8
2298	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2299DB	102,15,58,15,255,4
2300DB	102,69,15,58,15,219,8
2301DB	102,69,15,58,15,255,12
2302DB	102,15,58,15,246,4
2303DB	102,69,15,58,15,210,8
2304DB	102,69,15,58,15,246,12
2305DB	102,15,58,15,237,4
2306DB	102,69,15,58,15,201,8
2307DB	102,69,15,58,15,237,12
2308DB	102,15,58,15,228,4
2309DB	102,69,15,58,15,192,8
2310DB	102,69,15,58,15,228,12
2311	movdqa	XMMWORD[(160+80)+rbp],xmm8
2312	movdqa	xmm8,XMMWORD[$L$rol16]
2313	paddd	xmm3,xmm7
2314	paddd	xmm2,xmm6
2315	paddd	xmm1,xmm5
2316	paddd	xmm0,xmm4
2317	pxor	xmm15,xmm3
2318	pxor	xmm14,xmm2
2319	pxor	xmm13,xmm1
2320	pxor	xmm12,xmm0
2321DB	102,69,15,56,0,248
2322DB	102,69,15,56,0,240
2323DB	102,69,15,56,0,232
2324DB	102,69,15,56,0,224
2325	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2326	paddd	xmm11,xmm15
2327	paddd	xmm10,xmm14
2328	paddd	xmm9,xmm13
2329	paddd	xmm8,xmm12
2330	pxor	xmm7,xmm11
2331	pxor	xmm6,xmm10
2332	pxor	xmm5,xmm9
2333	pxor	xmm4,xmm8
2334	movdqa	XMMWORD[(160+80)+rbp],xmm8
2335	movdqa	xmm8,xmm7
2336	psrld	xmm8,20
2337	pslld	xmm7,32-20
2338	pxor	xmm7,xmm8
2339	movdqa	xmm8,xmm6
2340	psrld	xmm8,20
2341	pslld	xmm6,32-20
2342	pxor	xmm6,xmm8
2343	movdqa	xmm8,xmm5
2344	psrld	xmm8,20
2345	pslld	xmm5,32-20
2346	pxor	xmm5,xmm8
2347	movdqa	xmm8,xmm4
2348	psrld	xmm8,20
2349	pslld	xmm4,32-20
2350	pxor	xmm4,xmm8
2351	movdqa	xmm8,XMMWORD[$L$rol8]
2352	paddd	xmm3,xmm7
2353	paddd	xmm2,xmm6
2354	paddd	xmm1,xmm5
2355	paddd	xmm0,xmm4
2356	pxor	xmm15,xmm3
2357	pxor	xmm14,xmm2
2358	pxor	xmm13,xmm1
2359	pxor	xmm12,xmm0
2360DB	102,69,15,56,0,248
2361DB	102,69,15,56,0,240
2362DB	102,69,15,56,0,232
2363DB	102,69,15,56,0,224
2364	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2365	paddd	xmm11,xmm15
2366	paddd	xmm10,xmm14
2367	paddd	xmm9,xmm13
2368	paddd	xmm8,xmm12
2369	pxor	xmm7,xmm11
2370	pxor	xmm6,xmm10
2371	pxor	xmm5,xmm9
2372	pxor	xmm4,xmm8
2373	movdqa	XMMWORD[(160+80)+rbp],xmm8
2374	movdqa	xmm8,xmm7
2375	psrld	xmm8,25
2376	pslld	xmm7,32-25
2377	pxor	xmm7,xmm8
2378	movdqa	xmm8,xmm6
2379	psrld	xmm8,25
2380	pslld	xmm6,32-25
2381	pxor	xmm6,xmm8
2382	movdqa	xmm8,xmm5
2383	psrld	xmm8,25
2384	pslld	xmm5,32-25
2385	pxor	xmm5,xmm8
2386	movdqa	xmm8,xmm4
2387	psrld	xmm8,25
2388	pslld	xmm4,32-25
2389	pxor	xmm4,xmm8
2390	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2391DB	102,15,58,15,255,12
2392DB	102,69,15,58,15,219,8
2393DB	102,69,15,58,15,255,4
2394DB	102,15,58,15,246,12
2395DB	102,69,15,58,15,210,8
2396DB	102,69,15,58,15,246,4
2397DB	102,15,58,15,237,12
2398DB	102,69,15,58,15,201,8
2399DB	102,69,15,58,15,237,4
2400DB	102,15,58,15,228,12
2401DB	102,69,15,58,15,192,8
2402DB	102,69,15,58,15,228,4
2403
2404	dec	r10
2405	jnz	NEAR $L$seal_sse_init_rounds
2406	paddd	xmm3,XMMWORD[$L$chacha20_consts]
2407	paddd	xmm7,XMMWORD[((160+48))+rbp]
2408	paddd	xmm11,XMMWORD[((160+64))+rbp]
2409	paddd	xmm15,XMMWORD[((160+144))+rbp]
2410	paddd	xmm2,XMMWORD[$L$chacha20_consts]
2411	paddd	xmm6,XMMWORD[((160+48))+rbp]
2412	paddd	xmm10,XMMWORD[((160+64))+rbp]
2413	paddd	xmm14,XMMWORD[((160+128))+rbp]
2414	paddd	xmm1,XMMWORD[$L$chacha20_consts]
2415	paddd	xmm5,XMMWORD[((160+48))+rbp]
2416	paddd	xmm9,XMMWORD[((160+64))+rbp]
2417	paddd	xmm13,XMMWORD[((160+112))+rbp]
2418	paddd	xmm0,XMMWORD[$L$chacha20_consts]
2419	paddd	xmm4,XMMWORD[((160+48))+rbp]
2420	paddd	xmm8,XMMWORD[((160+64))+rbp]
2421	paddd	xmm12,XMMWORD[((160+96))+rbp]
2422
2423
2424	pand	xmm3,XMMWORD[$L$clamp]
2425	movdqa	XMMWORD[(160+0)+rbp],xmm3
2426	movdqa	XMMWORD[(160+16)+rbp],xmm7
2427
2428	mov	r8,r8
2429	call	poly_hash_ad_internal
2430	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
2431	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
2432	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
2433	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
2434	pxor	xmm2,xmm3
2435	pxor	xmm6,xmm7
2436	pxor	xmm10,xmm11
2437	pxor	xmm15,xmm14
2438	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
2439	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
2440	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
2441	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
2442	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
2443	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
2444	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
2445	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
2446	pxor	xmm1,xmm3
2447	pxor	xmm5,xmm7
2448	pxor	xmm9,xmm11
2449	pxor	xmm15,xmm13
2450	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
2451	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
2452	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
2453	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
2454
2455	cmp	rbx,12*16
2456	ja	NEAR $L$seal_sse_main_init
2457	mov	rcx,8*16
2458	sub	rbx,8*16
2459	lea	rsi,[128+rsi]
2460	jmp	NEAR $L$seal_sse_128_tail_hash
2461$L$seal_sse_main_init:
2462	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
2463	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
2464	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
2465	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
2466	pxor	xmm0,xmm3
2467	pxor	xmm4,xmm7
2468	pxor	xmm8,xmm11
2469	pxor	xmm15,xmm12
2470	movdqu	XMMWORD[(0 + 128)+rdi],xmm0
2471	movdqu	XMMWORD[(16 + 128)+rdi],xmm4
2472	movdqu	XMMWORD[(32 + 128)+rdi],xmm8
2473	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
2474
2475	mov	rcx,12*16
2476	sub	rbx,12*16
2477	lea	rsi,[192+rsi]
2478	mov	rcx,2
2479	mov	r8,8
2480	cmp	rbx,4*16
2481	jbe	NEAR $L$seal_sse_tail_64
2482	cmp	rbx,8*16
2483	jbe	NEAR $L$seal_sse_tail_128
2484	cmp	rbx,12*16
2485	jbe	NEAR $L$seal_sse_tail_192
2486
2487$L$seal_sse_main_loop:
2488	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
2489	movdqa	xmm4,XMMWORD[((160+48))+rbp]
2490	movdqa	xmm8,XMMWORD[((160+64))+rbp]
2491	movdqa	xmm1,xmm0
2492	movdqa	xmm5,xmm4
2493	movdqa	xmm9,xmm8
2494	movdqa	xmm2,xmm0
2495	movdqa	xmm6,xmm4
2496	movdqa	xmm10,xmm8
2497	movdqa	xmm3,xmm0
2498	movdqa	xmm7,xmm4
2499	movdqa	xmm11,xmm8
2500	movdqa	xmm15,XMMWORD[((160+96))+rbp]
2501	paddd	xmm15,XMMWORD[$L$sse_inc]
2502	movdqa	xmm14,xmm15
2503	paddd	xmm14,XMMWORD[$L$sse_inc]
2504	movdqa	xmm13,xmm14
2505	paddd	xmm13,XMMWORD[$L$sse_inc]
2506	movdqa	xmm12,xmm13
2507	paddd	xmm12,XMMWORD[$L$sse_inc]
2508	movdqa	XMMWORD[(160+96)+rbp],xmm12
2509	movdqa	XMMWORD[(160+112)+rbp],xmm13
2510	movdqa	XMMWORD[(160+128)+rbp],xmm14
2511	movdqa	XMMWORD[(160+144)+rbp],xmm15
2512
2513ALIGN	32
2514$L$seal_sse_main_rounds:
2515	movdqa	XMMWORD[(160+80)+rbp],xmm8
2516	movdqa	xmm8,XMMWORD[$L$rol16]
2517	paddd	xmm3,xmm7
2518	paddd	xmm2,xmm6
2519	paddd	xmm1,xmm5
2520	paddd	xmm0,xmm4
2521	pxor	xmm15,xmm3
2522	pxor	xmm14,xmm2
2523	pxor	xmm13,xmm1
2524	pxor	xmm12,xmm0
2525DB	102,69,15,56,0,248
2526DB	102,69,15,56,0,240
2527DB	102,69,15,56,0,232
2528DB	102,69,15,56,0,224
2529	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2530	paddd	xmm11,xmm15
2531	paddd	xmm10,xmm14
2532	paddd	xmm9,xmm13
2533	paddd	xmm8,xmm12
2534	pxor	xmm7,xmm11
2535	add	r10,QWORD[((0+0))+rdi]
2536	adc	r11,QWORD[((8+0))+rdi]
2537	adc	r12,1
2538	pxor	xmm6,xmm10
2539	pxor	xmm5,xmm9
2540	pxor	xmm4,xmm8
2541	movdqa	XMMWORD[(160+80)+rbp],xmm8
2542	movdqa	xmm8,xmm7
2543	psrld	xmm8,20
2544	pslld	xmm7,32-20
2545	pxor	xmm7,xmm8
2546	movdqa	xmm8,xmm6
2547	psrld	xmm8,20
2548	pslld	xmm6,32-20
2549	pxor	xmm6,xmm8
2550	movdqa	xmm8,xmm5
2551	psrld	xmm8,20
2552	pslld	xmm5,32-20
2553	pxor	xmm5,xmm8
2554	movdqa	xmm8,xmm4
2555	psrld	xmm8,20
2556	pslld	xmm4,32-20
2557	pxor	xmm4,xmm8
2558	mov	rax,QWORD[((0+160+0))+rbp]
2559	mov	r15,rax
2560	mul	r10
2561	mov	r13,rax
2562	mov	r14,rdx
2563	mov	rax,QWORD[((0+160+0))+rbp]
2564	mul	r11
2565	imul	r15,r12
2566	add	r14,rax
2567	adc	r15,rdx
2568	movdqa	xmm8,XMMWORD[$L$rol8]
2569	paddd	xmm3,xmm7
2570	paddd	xmm2,xmm6
2571	paddd	xmm1,xmm5
2572	paddd	xmm0,xmm4
2573	pxor	xmm15,xmm3
2574	pxor	xmm14,xmm2
2575	pxor	xmm13,xmm1
2576	pxor	xmm12,xmm0
2577DB	102,69,15,56,0,248
2578DB	102,69,15,56,0,240
2579DB	102,69,15,56,0,232
2580DB	102,69,15,56,0,224
2581	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2582	paddd	xmm11,xmm15
2583	paddd	xmm10,xmm14
2584	paddd	xmm9,xmm13
2585	paddd	xmm8,xmm12
2586	pxor	xmm7,xmm11
2587	pxor	xmm6,xmm10
2588	mov	rax,QWORD[((8+160+0))+rbp]
2589	mov	r9,rax
2590	mul	r10
2591	add	r14,rax
2592	adc	rdx,0
2593	mov	r10,rdx
2594	mov	rax,QWORD[((8+160+0))+rbp]
2595	mul	r11
2596	add	r15,rax
2597	adc	rdx,0
2598	pxor	xmm5,xmm9
2599	pxor	xmm4,xmm8
2600	movdqa	XMMWORD[(160+80)+rbp],xmm8
2601	movdqa	xmm8,xmm7
2602	psrld	xmm8,25
2603	pslld	xmm7,32-25
2604	pxor	xmm7,xmm8
2605	movdqa	xmm8,xmm6
2606	psrld	xmm8,25
2607	pslld	xmm6,32-25
2608	pxor	xmm6,xmm8
2609	movdqa	xmm8,xmm5
2610	psrld	xmm8,25
2611	pslld	xmm5,32-25
2612	pxor	xmm5,xmm8
2613	movdqa	xmm8,xmm4
2614	psrld	xmm8,25
2615	pslld	xmm4,32-25
2616	pxor	xmm4,xmm8
2617	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2618	imul	r9,r12
2619	add	r15,r10
2620	adc	r9,rdx
2621DB	102,15,58,15,255,4
2622DB	102,69,15,58,15,219,8
2623DB	102,69,15,58,15,255,12
2624DB	102,15,58,15,246,4
2625DB	102,69,15,58,15,210,8
2626DB	102,69,15,58,15,246,12
2627DB	102,15,58,15,237,4
2628DB	102,69,15,58,15,201,8
2629DB	102,69,15,58,15,237,12
2630DB	102,15,58,15,228,4
2631DB	102,69,15,58,15,192,8
2632DB	102,69,15,58,15,228,12
2633	movdqa	XMMWORD[(160+80)+rbp],xmm8
2634	movdqa	xmm8,XMMWORD[$L$rol16]
2635	paddd	xmm3,xmm7
2636	paddd	xmm2,xmm6
2637	paddd	xmm1,xmm5
2638	paddd	xmm0,xmm4
2639	pxor	xmm15,xmm3
2640	pxor	xmm14,xmm2
2641	mov	r10,r13
2642	mov	r11,r14
2643	mov	r12,r15
2644	and	r12,3
2645	mov	r13,r15
2646	and	r13,-4
2647	mov	r14,r9
2648	shrd	r15,r9,2
2649	shr	r9,2
2650	add	r15,r13
2651	adc	r9,r14
2652	add	r10,r15
2653	adc	r11,r9
2654	adc	r12,0
2655	pxor	xmm13,xmm1
2656	pxor	xmm12,xmm0
2657DB	102,69,15,56,0,248
2658DB	102,69,15,56,0,240
2659DB	102,69,15,56,0,232
2660DB	102,69,15,56,0,224
2661	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2662	paddd	xmm11,xmm15
2663	paddd	xmm10,xmm14
2664	paddd	xmm9,xmm13
2665	paddd	xmm8,xmm12
2666	pxor	xmm7,xmm11
2667	pxor	xmm6,xmm10
2668	pxor	xmm5,xmm9
2669	pxor	xmm4,xmm8
2670	movdqa	XMMWORD[(160+80)+rbp],xmm8
2671	movdqa	xmm8,xmm7
2672	psrld	xmm8,20
2673	pslld	xmm7,32-20
2674	pxor	xmm7,xmm8
2675	movdqa	xmm8,xmm6
2676	psrld	xmm8,20
2677	pslld	xmm6,32-20
2678	pxor	xmm6,xmm8
2679	movdqa	xmm8,xmm5
2680	psrld	xmm8,20
2681	pslld	xmm5,32-20
2682	pxor	xmm5,xmm8
2683	movdqa	xmm8,xmm4
2684	psrld	xmm8,20
2685	pslld	xmm4,32-20
2686	pxor	xmm4,xmm8
2687	movdqa	xmm8,XMMWORD[$L$rol8]
2688	paddd	xmm3,xmm7
2689	paddd	xmm2,xmm6
2690	paddd	xmm1,xmm5
2691	paddd	xmm0,xmm4
2692	pxor	xmm15,xmm3
2693	pxor	xmm14,xmm2
2694	pxor	xmm13,xmm1
2695	pxor	xmm12,xmm0
2696DB	102,69,15,56,0,248
2697DB	102,69,15,56,0,240
2698DB	102,69,15,56,0,232
2699DB	102,69,15,56,0,224
2700	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2701	paddd	xmm11,xmm15
2702	paddd	xmm10,xmm14
2703	paddd	xmm9,xmm13
2704	paddd	xmm8,xmm12
2705	pxor	xmm7,xmm11
2706	pxor	xmm6,xmm10
2707	pxor	xmm5,xmm9
2708	pxor	xmm4,xmm8
2709	movdqa	XMMWORD[(160+80)+rbp],xmm8
2710	movdqa	xmm8,xmm7
2711	psrld	xmm8,25
2712	pslld	xmm7,32-25
2713	pxor	xmm7,xmm8
2714	movdqa	xmm8,xmm6
2715	psrld	xmm8,25
2716	pslld	xmm6,32-25
2717	pxor	xmm6,xmm8
2718	movdqa	xmm8,xmm5
2719	psrld	xmm8,25
2720	pslld	xmm5,32-25
2721	pxor	xmm5,xmm8
2722	movdqa	xmm8,xmm4
2723	psrld	xmm8,25
2724	pslld	xmm4,32-25
2725	pxor	xmm4,xmm8
2726	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2727DB	102,15,58,15,255,12
2728DB	102,69,15,58,15,219,8
2729DB	102,69,15,58,15,255,4
2730DB	102,15,58,15,246,12
2731DB	102,69,15,58,15,210,8
2732DB	102,69,15,58,15,246,4
2733DB	102,15,58,15,237,12
2734DB	102,69,15,58,15,201,8
2735DB	102,69,15,58,15,237,4
2736DB	102,15,58,15,228,12
2737DB	102,69,15,58,15,192,8
2738DB	102,69,15,58,15,228,4
2739
2740	lea	rdi,[16+rdi]
2741	dec	r8
2742	jge	NEAR $L$seal_sse_main_rounds
2743	add	r10,QWORD[((0+0))+rdi]
2744	adc	r11,QWORD[((8+0))+rdi]
2745	adc	r12,1
2746	mov	rax,QWORD[((0+160+0))+rbp]
2747	mov	r15,rax
2748	mul	r10
2749	mov	r13,rax
2750	mov	r14,rdx
2751	mov	rax,QWORD[((0+160+0))+rbp]
2752	mul	r11
2753	imul	r15,r12
2754	add	r14,rax
2755	adc	r15,rdx
2756	mov	rax,QWORD[((8+160+0))+rbp]
2757	mov	r9,rax
2758	mul	r10
2759	add	r14,rax
2760	adc	rdx,0
2761	mov	r10,rdx
2762	mov	rax,QWORD[((8+160+0))+rbp]
2763	mul	r11
2764	add	r15,rax
2765	adc	rdx,0
2766	imul	r9,r12
2767	add	r15,r10
2768	adc	r9,rdx
2769	mov	r10,r13
2770	mov	r11,r14
2771	mov	r12,r15
2772	and	r12,3
2773	mov	r13,r15
2774	and	r13,-4
2775	mov	r14,r9
2776	shrd	r15,r9,2
2777	shr	r9,2
2778	add	r15,r13
2779	adc	r9,r14
2780	add	r10,r15
2781	adc	r11,r9
2782	adc	r12,0
2783
2784	lea	rdi,[16+rdi]
2785	dec	rcx
2786	jg	NEAR $L$seal_sse_main_rounds
2787	paddd	xmm3,XMMWORD[$L$chacha20_consts]
2788	paddd	xmm7,XMMWORD[((160+48))+rbp]
2789	paddd	xmm11,XMMWORD[((160+64))+rbp]
2790	paddd	xmm15,XMMWORD[((160+144))+rbp]
2791	paddd	xmm2,XMMWORD[$L$chacha20_consts]
2792	paddd	xmm6,XMMWORD[((160+48))+rbp]
2793	paddd	xmm10,XMMWORD[((160+64))+rbp]
2794	paddd	xmm14,XMMWORD[((160+128))+rbp]
2795	paddd	xmm1,XMMWORD[$L$chacha20_consts]
2796	paddd	xmm5,XMMWORD[((160+48))+rbp]
2797	paddd	xmm9,XMMWORD[((160+64))+rbp]
2798	paddd	xmm13,XMMWORD[((160+112))+rbp]
2799	paddd	xmm0,XMMWORD[$L$chacha20_consts]
2800	paddd	xmm4,XMMWORD[((160+48))+rbp]
2801	paddd	xmm8,XMMWORD[((160+64))+rbp]
2802	paddd	xmm12,XMMWORD[((160+96))+rbp]
2803
2804	movdqa	XMMWORD[(160+80)+rbp],xmm14
2805	movdqa	XMMWORD[(160+80)+rbp],xmm14
2806	movdqu	xmm14,XMMWORD[((0 + 0))+rsi]
2807	pxor	xmm14,xmm3
2808	movdqu	XMMWORD[(0 + 0)+rdi],xmm14
2809	movdqu	xmm14,XMMWORD[((16 + 0))+rsi]
2810	pxor	xmm14,xmm7
2811	movdqu	XMMWORD[(16 + 0)+rdi],xmm14
2812	movdqu	xmm14,XMMWORD[((32 + 0))+rsi]
2813	pxor	xmm14,xmm11
2814	movdqu	XMMWORD[(32 + 0)+rdi],xmm14
2815	movdqu	xmm14,XMMWORD[((48 + 0))+rsi]
2816	pxor	xmm14,xmm15
2817	movdqu	XMMWORD[(48 + 0)+rdi],xmm14
2818
2819	movdqa	xmm14,XMMWORD[((160+80))+rbp]
2820	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
2821	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
2822	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
2823	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
2824	pxor	xmm2,xmm3
2825	pxor	xmm6,xmm7
2826	pxor	xmm10,xmm11
2827	pxor	xmm15,xmm14
2828	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
2829	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
2830	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
2831	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
2832	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
2833	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
2834	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
2835	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
2836	pxor	xmm1,xmm3
2837	pxor	xmm5,xmm7
2838	pxor	xmm9,xmm11
2839	pxor	xmm15,xmm13
2840	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
2841	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
2842	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
2843	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
2844
2845	cmp	rbx,16*16
2846	ja	NEAR $L$seal_sse_main_loop_xor
2847
2848	mov	rcx,12*16
2849	sub	rbx,12*16
2850	lea	rsi,[192+rsi]
2851	jmp	NEAR $L$seal_sse_128_tail_hash
2852$L$seal_sse_main_loop_xor:
2853	movdqu	xmm3,XMMWORD[((0 + 192))+rsi]
2854	movdqu	xmm7,XMMWORD[((16 + 192))+rsi]
2855	movdqu	xmm11,XMMWORD[((32 + 192))+rsi]
2856	movdqu	xmm15,XMMWORD[((48 + 192))+rsi]
2857	pxor	xmm0,xmm3
2858	pxor	xmm4,xmm7
2859	pxor	xmm8,xmm11
2860	pxor	xmm15,xmm12
2861	movdqu	XMMWORD[(0 + 192)+rdi],xmm0
2862	movdqu	XMMWORD[(16 + 192)+rdi],xmm4
2863	movdqu	XMMWORD[(32 + 192)+rdi],xmm8
2864	movdqu	XMMWORD[(48 + 192)+rdi],xmm15
2865
2866	lea	rsi,[256+rsi]
2867	sub	rbx,16*16
2868	mov	rcx,6
2869	mov	r8,4
2870	cmp	rbx,12*16
2871	jg	NEAR $L$seal_sse_main_loop
2872	mov	rcx,rbx
2873	test	rbx,rbx
2874	je	NEAR $L$seal_sse_128_tail_hash
2875	mov	rcx,6
2876	cmp	rbx,8*16
2877	ja	NEAR $L$seal_sse_tail_192
2878	cmp	rbx,4*16
2879	ja	NEAR $L$seal_sse_tail_128
2880
2881$L$seal_sse_tail_64:
2882	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
2883	movdqa	xmm4,XMMWORD[((160+48))+rbp]
2884	movdqa	xmm8,XMMWORD[((160+64))+rbp]
2885	movdqa	xmm12,XMMWORD[((160+96))+rbp]
2886	paddd	xmm12,XMMWORD[$L$sse_inc]
2887	movdqa	XMMWORD[(160+96)+rbp],xmm12
2888
2889$L$seal_sse_tail_64_rounds_and_x2hash:
2890	add	r10,QWORD[((0+0))+rdi]
2891	adc	r11,QWORD[((8+0))+rdi]
2892	adc	r12,1
2893	mov	rax,QWORD[((0+160+0))+rbp]
2894	mov	r15,rax
2895	mul	r10
2896	mov	r13,rax
2897	mov	r14,rdx
2898	mov	rax,QWORD[((0+160+0))+rbp]
2899	mul	r11
2900	imul	r15,r12
2901	add	r14,rax
2902	adc	r15,rdx
2903	mov	rax,QWORD[((8+160+0))+rbp]
2904	mov	r9,rax
2905	mul	r10
2906	add	r14,rax
2907	adc	rdx,0
2908	mov	r10,rdx
2909	mov	rax,QWORD[((8+160+0))+rbp]
2910	mul	r11
2911	add	r15,rax
2912	adc	rdx,0
2913	imul	r9,r12
2914	add	r15,r10
2915	adc	r9,rdx
2916	mov	r10,r13
2917	mov	r11,r14
2918	mov	r12,r15
2919	and	r12,3
2920	mov	r13,r15
2921	and	r13,-4
2922	mov	r14,r9
2923	shrd	r15,r9,2
2924	shr	r9,2
2925	add	r15,r13
2926	adc	r9,r14
2927	add	r10,r15
2928	adc	r11,r9
2929	adc	r12,0
2930
2931	lea	rdi,[16+rdi]
2932$L$seal_sse_tail_64_rounds_and_x1hash:
2933	paddd	xmm0,xmm4
2934	pxor	xmm12,xmm0
2935	pshufb	xmm12,XMMWORD[$L$rol16]
2936	paddd	xmm8,xmm12
2937	pxor	xmm4,xmm8
2938	movdqa	xmm3,xmm4
2939	pslld	xmm3,12
2940	psrld	xmm4,20
2941	pxor	xmm4,xmm3
2942	paddd	xmm0,xmm4
2943	pxor	xmm12,xmm0
2944	pshufb	xmm12,XMMWORD[$L$rol8]
2945	paddd	xmm8,xmm12
2946	pxor	xmm4,xmm8
2947	movdqa	xmm3,xmm4
2948	pslld	xmm3,7
2949	psrld	xmm4,25
2950	pxor	xmm4,xmm3
2951DB	102,15,58,15,228,4
2952DB	102,69,15,58,15,192,8
2953DB	102,69,15,58,15,228,12
2954	paddd	xmm0,xmm4
2955	pxor	xmm12,xmm0
2956	pshufb	xmm12,XMMWORD[$L$rol16]
2957	paddd	xmm8,xmm12
2958	pxor	xmm4,xmm8
2959	movdqa	xmm3,xmm4
2960	pslld	xmm3,12
2961	psrld	xmm4,20
2962	pxor	xmm4,xmm3
2963	paddd	xmm0,xmm4
2964	pxor	xmm12,xmm0
2965	pshufb	xmm12,XMMWORD[$L$rol8]
2966	paddd	xmm8,xmm12
2967	pxor	xmm4,xmm8
2968	movdqa	xmm3,xmm4
2969	pslld	xmm3,7
2970	psrld	xmm4,25
2971	pxor	xmm4,xmm3
2972DB	102,15,58,15,228,12
2973DB	102,69,15,58,15,192,8
2974DB	102,69,15,58,15,228,4
2975	add	r10,QWORD[((0+0))+rdi]
2976	adc	r11,QWORD[((8+0))+rdi]
2977	adc	r12,1
2978	mov	rax,QWORD[((0+160+0))+rbp]
2979	mov	r15,rax
2980	mul	r10
2981	mov	r13,rax
2982	mov	r14,rdx
2983	mov	rax,QWORD[((0+160+0))+rbp]
2984	mul	r11
2985	imul	r15,r12
2986	add	r14,rax
2987	adc	r15,rdx
2988	mov	rax,QWORD[((8+160+0))+rbp]
2989	mov	r9,rax
2990	mul	r10
2991	add	r14,rax
2992	adc	rdx,0
2993	mov	r10,rdx
2994	mov	rax,QWORD[((8+160+0))+rbp]
2995	mul	r11
2996	add	r15,rax
2997	adc	rdx,0
2998	imul	r9,r12
2999	add	r15,r10
3000	adc	r9,rdx
3001	mov	r10,r13
3002	mov	r11,r14
3003	mov	r12,r15
3004	and	r12,3
3005	mov	r13,r15
3006	and	r13,-4
3007	mov	r14,r9
3008	shrd	r15,r9,2
3009	shr	r9,2
3010	add	r15,r13
3011	adc	r9,r14
3012	add	r10,r15
3013	adc	r11,r9
3014	adc	r12,0
3015
3016	lea	rdi,[16+rdi]
3017	dec	rcx
3018	jg	NEAR $L$seal_sse_tail_64_rounds_and_x2hash
3019	dec	r8
3020	jge	NEAR $L$seal_sse_tail_64_rounds_and_x1hash
3021	paddd	xmm0,XMMWORD[$L$chacha20_consts]
3022	paddd	xmm4,XMMWORD[((160+48))+rbp]
3023	paddd	xmm8,XMMWORD[((160+64))+rbp]
3024	paddd	xmm12,XMMWORD[((160+96))+rbp]
3025
3026	jmp	NEAR $L$seal_sse_128_tail_xor
3027
3028$L$seal_sse_tail_128:
3029	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
3030	movdqa	xmm4,XMMWORD[((160+48))+rbp]
3031	movdqa	xmm8,XMMWORD[((160+64))+rbp]
3032	movdqa	xmm1,xmm0
3033	movdqa	xmm5,xmm4
3034	movdqa	xmm9,xmm8
3035	movdqa	xmm13,XMMWORD[((160+96))+rbp]
3036	paddd	xmm13,XMMWORD[$L$sse_inc]
3037	movdqa	xmm12,xmm13
3038	paddd	xmm12,XMMWORD[$L$sse_inc]
3039	movdqa	XMMWORD[(160+96)+rbp],xmm12
3040	movdqa	XMMWORD[(160+112)+rbp],xmm13
3041
3042$L$seal_sse_tail_128_rounds_and_x2hash:
3043	add	r10,QWORD[((0+0))+rdi]
3044	adc	r11,QWORD[((8+0))+rdi]
3045	adc	r12,1
3046	mov	rax,QWORD[((0+160+0))+rbp]
3047	mov	r15,rax
3048	mul	r10
3049	mov	r13,rax
3050	mov	r14,rdx
3051	mov	rax,QWORD[((0+160+0))+rbp]
3052	mul	r11
3053	imul	r15,r12
3054	add	r14,rax
3055	adc	r15,rdx
3056	mov	rax,QWORD[((8+160+0))+rbp]
3057	mov	r9,rax
3058	mul	r10
3059	add	r14,rax
3060	adc	rdx,0
3061	mov	r10,rdx
3062	mov	rax,QWORD[((8+160+0))+rbp]
3063	mul	r11
3064	add	r15,rax
3065	adc	rdx,0
3066	imul	r9,r12
3067	add	r15,r10
3068	adc	r9,rdx
3069	mov	r10,r13
3070	mov	r11,r14
3071	mov	r12,r15
3072	and	r12,3
3073	mov	r13,r15
3074	and	r13,-4
3075	mov	r14,r9
3076	shrd	r15,r9,2
3077	shr	r9,2
3078	add	r15,r13
3079	adc	r9,r14
3080	add	r10,r15
3081	adc	r11,r9
3082	adc	r12,0
3083
3084	lea	rdi,[16+rdi]
3085$L$seal_sse_tail_128_rounds_and_x1hash:
3086	paddd	xmm0,xmm4
3087	pxor	xmm12,xmm0
3088	pshufb	xmm12,XMMWORD[$L$rol16]
3089	paddd	xmm8,xmm12
3090	pxor	xmm4,xmm8
3091	movdqa	xmm3,xmm4
3092	pslld	xmm3,12
3093	psrld	xmm4,20
3094	pxor	xmm4,xmm3
3095	paddd	xmm0,xmm4
3096	pxor	xmm12,xmm0
3097	pshufb	xmm12,XMMWORD[$L$rol8]
3098	paddd	xmm8,xmm12
3099	pxor	xmm4,xmm8
3100	movdqa	xmm3,xmm4
3101	pslld	xmm3,7
3102	psrld	xmm4,25
3103	pxor	xmm4,xmm3
3104DB	102,15,58,15,228,4
3105DB	102,69,15,58,15,192,8
3106DB	102,69,15,58,15,228,12
3107	paddd	xmm1,xmm5
3108	pxor	xmm13,xmm1
3109	pshufb	xmm13,XMMWORD[$L$rol16]
3110	paddd	xmm9,xmm13
3111	pxor	xmm5,xmm9
3112	movdqa	xmm3,xmm5
3113	pslld	xmm3,12
3114	psrld	xmm5,20
3115	pxor	xmm5,xmm3
3116	paddd	xmm1,xmm5
3117	pxor	xmm13,xmm1
3118	pshufb	xmm13,XMMWORD[$L$rol8]
3119	paddd	xmm9,xmm13
3120	pxor	xmm5,xmm9
3121	movdqa	xmm3,xmm5
3122	pslld	xmm3,7
3123	psrld	xmm5,25
3124	pxor	xmm5,xmm3
3125DB	102,15,58,15,237,4
3126DB	102,69,15,58,15,201,8
3127DB	102,69,15,58,15,237,12
3128	add	r10,QWORD[((0+0))+rdi]
3129	adc	r11,QWORD[((8+0))+rdi]
3130	adc	r12,1
3131	mov	rax,QWORD[((0+160+0))+rbp]
3132	mov	r15,rax
3133	mul	r10
3134	mov	r13,rax
3135	mov	r14,rdx
3136	mov	rax,QWORD[((0+160+0))+rbp]
3137	mul	r11
3138	imul	r15,r12
3139	add	r14,rax
3140	adc	r15,rdx
3141	mov	rax,QWORD[((8+160+0))+rbp]
3142	mov	r9,rax
3143	mul	r10
3144	add	r14,rax
3145	adc	rdx,0
3146	mov	r10,rdx
3147	mov	rax,QWORD[((8+160+0))+rbp]
3148	mul	r11
3149	add	r15,rax
3150	adc	rdx,0
3151	imul	r9,r12
3152	add	r15,r10
3153	adc	r9,rdx
3154	mov	r10,r13
3155	mov	r11,r14
3156	mov	r12,r15
3157	and	r12,3
3158	mov	r13,r15
3159	and	r13,-4
3160	mov	r14,r9
3161	shrd	r15,r9,2
3162	shr	r9,2
3163	add	r15,r13
3164	adc	r9,r14
3165	add	r10,r15
3166	adc	r11,r9
3167	adc	r12,0
3168	paddd	xmm0,xmm4
3169	pxor	xmm12,xmm0
3170	pshufb	xmm12,XMMWORD[$L$rol16]
3171	paddd	xmm8,xmm12
3172	pxor	xmm4,xmm8
3173	movdqa	xmm3,xmm4
3174	pslld	xmm3,12
3175	psrld	xmm4,20
3176	pxor	xmm4,xmm3
3177	paddd	xmm0,xmm4
3178	pxor	xmm12,xmm0
3179	pshufb	xmm12,XMMWORD[$L$rol8]
3180	paddd	xmm8,xmm12
3181	pxor	xmm4,xmm8
3182	movdqa	xmm3,xmm4
3183	pslld	xmm3,7
3184	psrld	xmm4,25
3185	pxor	xmm4,xmm3
3186DB	102,15,58,15,228,12
3187DB	102,69,15,58,15,192,8
3188DB	102,69,15,58,15,228,4
3189	paddd	xmm1,xmm5
3190	pxor	xmm13,xmm1
3191	pshufb	xmm13,XMMWORD[$L$rol16]
3192	paddd	xmm9,xmm13
3193	pxor	xmm5,xmm9
3194	movdqa	xmm3,xmm5
3195	pslld	xmm3,12
3196	psrld	xmm5,20
3197	pxor	xmm5,xmm3
3198	paddd	xmm1,xmm5
3199	pxor	xmm13,xmm1
3200	pshufb	xmm13,XMMWORD[$L$rol8]
3201	paddd	xmm9,xmm13
3202	pxor	xmm5,xmm9
3203	movdqa	xmm3,xmm5
3204	pslld	xmm3,7
3205	psrld	xmm5,25
3206	pxor	xmm5,xmm3
3207DB	102,15,58,15,237,12
3208DB	102,69,15,58,15,201,8
3209DB	102,69,15,58,15,237,4
3210
3211	lea	rdi,[16+rdi]
3212	dec	rcx
3213	jg	NEAR $L$seal_sse_tail_128_rounds_and_x2hash
3214	dec	r8
3215	jge	NEAR $L$seal_sse_tail_128_rounds_and_x1hash
3216	paddd	xmm1,XMMWORD[$L$chacha20_consts]
3217	paddd	xmm5,XMMWORD[((160+48))+rbp]
3218	paddd	xmm9,XMMWORD[((160+64))+rbp]
3219	paddd	xmm13,XMMWORD[((160+112))+rbp]
3220	paddd	xmm0,XMMWORD[$L$chacha20_consts]
3221	paddd	xmm4,XMMWORD[((160+48))+rbp]
3222	paddd	xmm8,XMMWORD[((160+64))+rbp]
3223	paddd	xmm12,XMMWORD[((160+96))+rbp]
3224	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
3225	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
3226	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
3227	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
3228	pxor	xmm1,xmm3
3229	pxor	xmm5,xmm7
3230	pxor	xmm9,xmm11
3231	pxor	xmm15,xmm13
3232	movdqu	XMMWORD[(0 + 0)+rdi],xmm1
3233	movdqu	XMMWORD[(16 + 0)+rdi],xmm5
3234	movdqu	XMMWORD[(32 + 0)+rdi],xmm9
3235	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
3236
3237	mov	rcx,4*16
3238	sub	rbx,4*16
3239	lea	rsi,[64+rsi]
3240	jmp	NEAR $L$seal_sse_128_tail_hash
3241
3242$L$seal_sse_tail_192:
3243	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
3244	movdqa	xmm4,XMMWORD[((160+48))+rbp]
3245	movdqa	xmm8,XMMWORD[((160+64))+rbp]
3246	movdqa	xmm1,xmm0
3247	movdqa	xmm5,xmm4
3248	movdqa	xmm9,xmm8
3249	movdqa	xmm2,xmm0
3250	movdqa	xmm6,xmm4
3251	movdqa	xmm10,xmm8
3252	movdqa	xmm14,XMMWORD[((160+96))+rbp]
3253	paddd	xmm14,XMMWORD[$L$sse_inc]
3254	movdqa	xmm13,xmm14
3255	paddd	xmm13,XMMWORD[$L$sse_inc]
3256	movdqa	xmm12,xmm13
3257	paddd	xmm12,XMMWORD[$L$sse_inc]
3258	movdqa	XMMWORD[(160+96)+rbp],xmm12
3259	movdqa	XMMWORD[(160+112)+rbp],xmm13
3260	movdqa	XMMWORD[(160+128)+rbp],xmm14
3261
3262$L$seal_sse_tail_192_rounds_and_x2hash:
3263	add	r10,QWORD[((0+0))+rdi]
3264	adc	r11,QWORD[((8+0))+rdi]
3265	adc	r12,1
3266	mov	rax,QWORD[((0+160+0))+rbp]
3267	mov	r15,rax
3268	mul	r10
3269	mov	r13,rax
3270	mov	r14,rdx
3271	mov	rax,QWORD[((0+160+0))+rbp]
3272	mul	r11
3273	imul	r15,r12
3274	add	r14,rax
3275	adc	r15,rdx
3276	mov	rax,QWORD[((8+160+0))+rbp]
3277	mov	r9,rax
3278	mul	r10
3279	add	r14,rax
3280	adc	rdx,0
3281	mov	r10,rdx
3282	mov	rax,QWORD[((8+160+0))+rbp]
3283	mul	r11
3284	add	r15,rax
3285	adc	rdx,0
3286	imul	r9,r12
3287	add	r15,r10
3288	adc	r9,rdx
3289	mov	r10,r13
3290	mov	r11,r14
3291	mov	r12,r15
3292	and	r12,3
3293	mov	r13,r15
3294	and	r13,-4
3295	mov	r14,r9
3296	shrd	r15,r9,2
3297	shr	r9,2
3298	add	r15,r13
3299	adc	r9,r14
3300	add	r10,r15
3301	adc	r11,r9
3302	adc	r12,0
3303
3304	lea	rdi,[16+rdi]
3305$L$seal_sse_tail_192_rounds_and_x1hash:
3306	paddd	xmm0,xmm4
3307	pxor	xmm12,xmm0
3308	pshufb	xmm12,XMMWORD[$L$rol16]
3309	paddd	xmm8,xmm12
3310	pxor	xmm4,xmm8
3311	movdqa	xmm3,xmm4
3312	pslld	xmm3,12
3313	psrld	xmm4,20
3314	pxor	xmm4,xmm3
3315	paddd	xmm0,xmm4
3316	pxor	xmm12,xmm0
3317	pshufb	xmm12,XMMWORD[$L$rol8]
3318	paddd	xmm8,xmm12
3319	pxor	xmm4,xmm8
3320	movdqa	xmm3,xmm4
3321	pslld	xmm3,7
3322	psrld	xmm4,25
3323	pxor	xmm4,xmm3
3324DB	102,15,58,15,228,4
3325DB	102,69,15,58,15,192,8
3326DB	102,69,15,58,15,228,12
3327	paddd	xmm1,xmm5
3328	pxor	xmm13,xmm1
3329	pshufb	xmm13,XMMWORD[$L$rol16]
3330	paddd	xmm9,xmm13
3331	pxor	xmm5,xmm9
3332	movdqa	xmm3,xmm5
3333	pslld	xmm3,12
3334	psrld	xmm5,20
3335	pxor	xmm5,xmm3
3336	paddd	xmm1,xmm5
3337	pxor	xmm13,xmm1
3338	pshufb	xmm13,XMMWORD[$L$rol8]
3339	paddd	xmm9,xmm13
3340	pxor	xmm5,xmm9
3341	movdqa	xmm3,xmm5
3342	pslld	xmm3,7
3343	psrld	xmm5,25
3344	pxor	xmm5,xmm3
3345DB	102,15,58,15,237,4
3346DB	102,69,15,58,15,201,8
3347DB	102,69,15,58,15,237,12
3348	paddd	xmm2,xmm6
3349	pxor	xmm14,xmm2
3350	pshufb	xmm14,XMMWORD[$L$rol16]
3351	paddd	xmm10,xmm14
3352	pxor	xmm6,xmm10
3353	movdqa	xmm3,xmm6
3354	pslld	xmm3,12
3355	psrld	xmm6,20
3356	pxor	xmm6,xmm3
3357	paddd	xmm2,xmm6
3358	pxor	xmm14,xmm2
3359	pshufb	xmm14,XMMWORD[$L$rol8]
3360	paddd	xmm10,xmm14
3361	pxor	xmm6,xmm10
3362	movdqa	xmm3,xmm6
3363	pslld	xmm3,7
3364	psrld	xmm6,25
3365	pxor	xmm6,xmm3
3366DB	102,15,58,15,246,4
3367DB	102,69,15,58,15,210,8
3368DB	102,69,15,58,15,246,12
3369	add	r10,QWORD[((0+0))+rdi]
3370	adc	r11,QWORD[((8+0))+rdi]
3371	adc	r12,1
3372	mov	rax,QWORD[((0+160+0))+rbp]
3373	mov	r15,rax
3374	mul	r10
3375	mov	r13,rax
3376	mov	r14,rdx
3377	mov	rax,QWORD[((0+160+0))+rbp]
3378	mul	r11
3379	imul	r15,r12
3380	add	r14,rax
3381	adc	r15,rdx
3382	mov	rax,QWORD[((8+160+0))+rbp]
3383	mov	r9,rax
3384	mul	r10
3385	add	r14,rax
3386	adc	rdx,0
3387	mov	r10,rdx
3388	mov	rax,QWORD[((8+160+0))+rbp]
3389	mul	r11
3390	add	r15,rax
3391	adc	rdx,0
3392	imul	r9,r12
3393	add	r15,r10
3394	adc	r9,rdx
3395	mov	r10,r13
3396	mov	r11,r14
3397	mov	r12,r15
3398	and	r12,3
3399	mov	r13,r15
3400	and	r13,-4
3401	mov	r14,r9
3402	shrd	r15,r9,2
3403	shr	r9,2
3404	add	r15,r13
3405	adc	r9,r14
3406	add	r10,r15
3407	adc	r11,r9
3408	adc	r12,0
3409	paddd	xmm0,xmm4
3410	pxor	xmm12,xmm0
3411	pshufb	xmm12,XMMWORD[$L$rol16]
3412	paddd	xmm8,xmm12
3413	pxor	xmm4,xmm8
3414	movdqa	xmm3,xmm4
3415	pslld	xmm3,12
3416	psrld	xmm4,20
3417	pxor	xmm4,xmm3
3418	paddd	xmm0,xmm4
3419	pxor	xmm12,xmm0
3420	pshufb	xmm12,XMMWORD[$L$rol8]
3421	paddd	xmm8,xmm12
3422	pxor	xmm4,xmm8
3423	movdqa	xmm3,xmm4
3424	pslld	xmm3,7
3425	psrld	xmm4,25
3426	pxor	xmm4,xmm3
3427DB	102,15,58,15,228,12
3428DB	102,69,15,58,15,192,8
3429DB	102,69,15,58,15,228,4
3430	paddd	xmm1,xmm5
3431	pxor	xmm13,xmm1
3432	pshufb	xmm13,XMMWORD[$L$rol16]
3433	paddd	xmm9,xmm13
3434	pxor	xmm5,xmm9
3435	movdqa	xmm3,xmm5
3436	pslld	xmm3,12
3437	psrld	xmm5,20
3438	pxor	xmm5,xmm3
3439	paddd	xmm1,xmm5
3440	pxor	xmm13,xmm1
3441	pshufb	xmm13,XMMWORD[$L$rol8]
3442	paddd	xmm9,xmm13
3443	pxor	xmm5,xmm9
3444	movdqa	xmm3,xmm5
3445	pslld	xmm3,7
3446	psrld	xmm5,25
3447	pxor	xmm5,xmm3
3448DB	102,15,58,15,237,12
3449DB	102,69,15,58,15,201,8
3450DB	102,69,15,58,15,237,4
3451	paddd	xmm2,xmm6
3452	pxor	xmm14,xmm2
3453	pshufb	xmm14,XMMWORD[$L$rol16]
3454	paddd	xmm10,xmm14
3455	pxor	xmm6,xmm10
3456	movdqa	xmm3,xmm6
3457	pslld	xmm3,12
3458	psrld	xmm6,20
3459	pxor	xmm6,xmm3
3460	paddd	xmm2,xmm6
3461	pxor	xmm14,xmm2
3462	pshufb	xmm14,XMMWORD[$L$rol8]
3463	paddd	xmm10,xmm14
3464	pxor	xmm6,xmm10
3465	movdqa	xmm3,xmm6
3466	pslld	xmm3,7
3467	psrld	xmm6,25
3468	pxor	xmm6,xmm3
3469DB	102,15,58,15,246,12
3470DB	102,69,15,58,15,210,8
3471DB	102,69,15,58,15,246,4
3472
3473	lea	rdi,[16+rdi]
3474	dec	rcx
3475	jg	NEAR $L$seal_sse_tail_192_rounds_and_x2hash
3476	dec	r8
3477	jge	NEAR $L$seal_sse_tail_192_rounds_and_x1hash
3478	paddd	xmm2,XMMWORD[$L$chacha20_consts]
3479	paddd	xmm6,XMMWORD[((160+48))+rbp]
3480	paddd	xmm10,XMMWORD[((160+64))+rbp]
3481	paddd	xmm14,XMMWORD[((160+128))+rbp]
3482	paddd	xmm1,XMMWORD[$L$chacha20_consts]
3483	paddd	xmm5,XMMWORD[((160+48))+rbp]
3484	paddd	xmm9,XMMWORD[((160+64))+rbp]
3485	paddd	xmm13,XMMWORD[((160+112))+rbp]
3486	paddd	xmm0,XMMWORD[$L$chacha20_consts]
3487	paddd	xmm4,XMMWORD[((160+48))+rbp]
3488	paddd	xmm8,XMMWORD[((160+64))+rbp]
3489	paddd	xmm12,XMMWORD[((160+96))+rbp]
3490	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
3491	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
3492	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
3493	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
3494	pxor	xmm2,xmm3
3495	pxor	xmm6,xmm7
3496	pxor	xmm10,xmm11
3497	pxor	xmm15,xmm14
3498	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
3499	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
3500	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
3501	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
3502	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
3503	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
3504	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
3505	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
3506	pxor	xmm1,xmm3
3507	pxor	xmm5,xmm7
3508	pxor	xmm9,xmm11
3509	pxor	xmm15,xmm13
3510	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
3511	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
3512	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
3513	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
3514
3515	mov	rcx,8*16
3516	sub	rbx,8*16
3517	lea	rsi,[128+rsi]
3518
3519$L$seal_sse_128_tail_hash:
3520	cmp	rcx,16
3521	jb	NEAR $L$seal_sse_128_tail_xor
3522	add	r10,QWORD[((0+0))+rdi]
3523	adc	r11,QWORD[((8+0))+rdi]
3524	adc	r12,1
3525	mov	rax,QWORD[((0+160+0))+rbp]
3526	mov	r15,rax
3527	mul	r10
3528	mov	r13,rax
3529	mov	r14,rdx
3530	mov	rax,QWORD[((0+160+0))+rbp]
3531	mul	r11
3532	imul	r15,r12
3533	add	r14,rax
3534	adc	r15,rdx
3535	mov	rax,QWORD[((8+160+0))+rbp]
3536	mov	r9,rax
3537	mul	r10
3538	add	r14,rax
3539	adc	rdx,0
3540	mov	r10,rdx
3541	mov	rax,QWORD[((8+160+0))+rbp]
3542	mul	r11
3543	add	r15,rax
3544	adc	rdx,0
3545	imul	r9,r12
3546	add	r15,r10
3547	adc	r9,rdx
3548	mov	r10,r13
3549	mov	r11,r14
3550	mov	r12,r15
3551	and	r12,3
3552	mov	r13,r15
3553	and	r13,-4
3554	mov	r14,r9
3555	shrd	r15,r9,2
3556	shr	r9,2
3557	add	r15,r13
3558	adc	r9,r14
3559	add	r10,r15
3560	adc	r11,r9
3561	adc	r12,0
3562
3563	sub	rcx,16
3564	lea	rdi,[16+rdi]
3565	jmp	NEAR $L$seal_sse_128_tail_hash
3566
3567$L$seal_sse_128_tail_xor:
3568	cmp	rbx,16
3569	jb	NEAR $L$seal_sse_tail_16
3570	sub	rbx,16
3571
3572	movdqu	xmm3,XMMWORD[rsi]
3573	pxor	xmm0,xmm3
3574	movdqu	XMMWORD[rdi],xmm0
3575
3576	add	r10,QWORD[rdi]
3577	adc	r11,QWORD[8+rdi]
3578	adc	r12,1
3579	lea	rsi,[16+rsi]
3580	lea	rdi,[16+rdi]
3581	mov	rax,QWORD[((0+160+0))+rbp]
3582	mov	r15,rax
3583	mul	r10
3584	mov	r13,rax
3585	mov	r14,rdx
3586	mov	rax,QWORD[((0+160+0))+rbp]
3587	mul	r11
3588	imul	r15,r12
3589	add	r14,rax
3590	adc	r15,rdx
3591	mov	rax,QWORD[((8+160+0))+rbp]
3592	mov	r9,rax
3593	mul	r10
3594	add	r14,rax
3595	adc	rdx,0
3596	mov	r10,rdx
3597	mov	rax,QWORD[((8+160+0))+rbp]
3598	mul	r11
3599	add	r15,rax
3600	adc	rdx,0
3601	imul	r9,r12
3602	add	r15,r10
3603	adc	r9,rdx
3604	mov	r10,r13
3605	mov	r11,r14
3606	mov	r12,r15
3607	and	r12,3
3608	mov	r13,r15
3609	and	r13,-4
3610	mov	r14,r9
3611	shrd	r15,r9,2
3612	shr	r9,2
3613	add	r15,r13
3614	adc	r9,r14
3615	add	r10,r15
3616	adc	r11,r9
3617	adc	r12,0
3618
3619
3620	movdqa	xmm0,xmm4
3621	movdqa	xmm4,xmm8
3622	movdqa	xmm8,xmm12
3623	movdqa	xmm12,xmm1
3624	movdqa	xmm1,xmm5
3625	movdqa	xmm5,xmm9
3626	movdqa	xmm9,xmm13
3627	jmp	NEAR $L$seal_sse_128_tail_xor
3628
3629$L$seal_sse_tail_16:
3630	test	rbx,rbx
3631	jz	NEAR $L$process_blocks_of_extra_in
3632
3633	mov	r8,rbx
3634	mov	rcx,rbx
3635	lea	rsi,[((-1))+rbx*1+rsi]
3636	pxor	xmm15,xmm15
3637$L$seal_sse_tail_16_compose:
3638	pslldq	xmm15,1
3639	pinsrb	xmm15,BYTE[rsi],0
3640	lea	rsi,[((-1))+rsi]
3641	dec	rcx
3642	jne	NEAR $L$seal_sse_tail_16_compose
3643
3644
3645	pxor	xmm15,xmm0
3646
3647
3648	mov	rcx,rbx
3649	movdqu	xmm0,xmm15
3650$L$seal_sse_tail_16_extract:
3651	pextrb	XMMWORD[rdi],xmm0,0
3652	psrldq	xmm0,1
3653	add	rdi,1
3654	sub	rcx,1
3655	jnz	NEAR $L$seal_sse_tail_16_extract
3656
3657
3658
3659
3660
3661
3662
3663
3664	mov	r9,QWORD[((288 + 160 + 32))+rsp]
3665	mov	r14,QWORD[56+r9]
3666	mov	r13,QWORD[48+r9]
3667	test	r14,r14
3668	jz	NEAR $L$process_partial_block
3669
3670	mov	r15,16
3671	sub	r15,rbx
3672	cmp	r14,r15
3673
3674	jge	NEAR $L$load_extra_in
3675	mov	r15,r14
3676
3677$L$load_extra_in:
3678
3679
3680	lea	rsi,[((-1))+r15*1+r13]
3681
3682
3683	add	r13,r15
3684	sub	r14,r15
3685	mov	QWORD[48+r9],r13
3686	mov	QWORD[56+r9],r14
3687
3688
3689
3690	add	r8,r15
3691
3692
3693	pxor	xmm11,xmm11
3694$L$load_extra_load_loop:
3695	pslldq	xmm11,1
3696	pinsrb	xmm11,BYTE[rsi],0
3697	lea	rsi,[((-1))+rsi]
3698	sub	r15,1
3699	jnz	NEAR $L$load_extra_load_loop
3700
3701
3702
3703
3704	mov	r15,rbx
3705
3706$L$load_extra_shift_loop:
3707	pslldq	xmm11,1
3708	sub	r15,1
3709	jnz	NEAR $L$load_extra_shift_loop
3710
3711
3712
3713
3714	lea	r15,[$L$and_masks]
3715	shl	rbx,4
3716	pand	xmm15,XMMWORD[((-16))+rbx*1+r15]
3717
3718
3719	por	xmm15,xmm11
3720
3721
3722
3723DB	102,77,15,126,253
3724	pextrq	r14,xmm15,1
3725	add	r10,r13
3726	adc	r11,r14
3727	adc	r12,1
3728	mov	rax,QWORD[((0+160+0))+rbp]
3729	mov	r15,rax
3730	mul	r10
3731	mov	r13,rax
3732	mov	r14,rdx
3733	mov	rax,QWORD[((0+160+0))+rbp]
3734	mul	r11
3735	imul	r15,r12
3736	add	r14,rax
3737	adc	r15,rdx
3738	mov	rax,QWORD[((8+160+0))+rbp]
3739	mov	r9,rax
3740	mul	r10
3741	add	r14,rax
3742	adc	rdx,0
3743	mov	r10,rdx
3744	mov	rax,QWORD[((8+160+0))+rbp]
3745	mul	r11
3746	add	r15,rax
3747	adc	rdx,0
3748	imul	r9,r12
3749	add	r15,r10
3750	adc	r9,rdx
3751	mov	r10,r13
3752	mov	r11,r14
3753	mov	r12,r15
3754	and	r12,3
3755	mov	r13,r15
3756	and	r13,-4
3757	mov	r14,r9
3758	shrd	r15,r9,2
3759	shr	r9,2
3760	add	r15,r13
3761	adc	r9,r14
3762	add	r10,r15
3763	adc	r11,r9
3764	adc	r12,0
3765
3766
3767$L$process_blocks_of_extra_in:
3768
3769	mov	r9,QWORD[((288+32+160 ))+rsp]
3770	mov	rsi,QWORD[48+r9]
3771	mov	r8,QWORD[56+r9]
3772	mov	rcx,r8
3773	shr	r8,4
3774
3775$L$process_extra_hash_loop:
3776	jz	NEAR process_extra_in_trailer
3777	add	r10,QWORD[((0+0))+rsi]
3778	adc	r11,QWORD[((8+0))+rsi]
3779	adc	r12,1
3780	mov	rax,QWORD[((0+160+0))+rbp]
3781	mov	r15,rax
3782	mul	r10
3783	mov	r13,rax
3784	mov	r14,rdx
3785	mov	rax,QWORD[((0+160+0))+rbp]
3786	mul	r11
3787	imul	r15,r12
3788	add	r14,rax
3789	adc	r15,rdx
3790	mov	rax,QWORD[((8+160+0))+rbp]
3791	mov	r9,rax
3792	mul	r10
3793	add	r14,rax
3794	adc	rdx,0
3795	mov	r10,rdx
3796	mov	rax,QWORD[((8+160+0))+rbp]
3797	mul	r11
3798	add	r15,rax
3799	adc	rdx,0
3800	imul	r9,r12
3801	add	r15,r10
3802	adc	r9,rdx
3803	mov	r10,r13
3804	mov	r11,r14
3805	mov	r12,r15
3806	and	r12,3
3807	mov	r13,r15
3808	and	r13,-4
3809	mov	r14,r9
3810	shrd	r15,r9,2
3811	shr	r9,2
3812	add	r15,r13
3813	adc	r9,r14
3814	add	r10,r15
3815	adc	r11,r9
3816	adc	r12,0
3817
3818	lea	rsi,[16+rsi]
3819	sub	r8,1
3820	jmp	NEAR $L$process_extra_hash_loop
3821process_extra_in_trailer:
3822	and	rcx,15
3823	mov	rbx,rcx
3824	jz	NEAR $L$do_length_block
3825	lea	rsi,[((-1))+rcx*1+rsi]
3826
3827$L$process_extra_in_trailer_load:
3828	pslldq	xmm15,1
3829	pinsrb	xmm15,BYTE[rsi],0
3830	lea	rsi,[((-1))+rsi]
3831	sub	rcx,1
3832	jnz	NEAR $L$process_extra_in_trailer_load
3833
3834$L$process_partial_block:
3835
3836	lea	r15,[$L$and_masks]
3837	shl	rbx,4
3838	pand	xmm15,XMMWORD[((-16))+rbx*1+r15]
3839DB	102,77,15,126,253
3840	pextrq	r14,xmm15,1
3841	add	r10,r13
3842	adc	r11,r14
3843	adc	r12,1
3844	mov	rax,QWORD[((0+160+0))+rbp]
3845	mov	r15,rax
3846	mul	r10
3847	mov	r13,rax
3848	mov	r14,rdx
3849	mov	rax,QWORD[((0+160+0))+rbp]
3850	mul	r11
3851	imul	r15,r12
3852	add	r14,rax
3853	adc	r15,rdx
3854	mov	rax,QWORD[((8+160+0))+rbp]
3855	mov	r9,rax
3856	mul	r10
3857	add	r14,rax
3858	adc	rdx,0
3859	mov	r10,rdx
3860	mov	rax,QWORD[((8+160+0))+rbp]
3861	mul	r11
3862	add	r15,rax
3863	adc	rdx,0
3864	imul	r9,r12
3865	add	r15,r10
3866	adc	r9,rdx
3867	mov	r10,r13
3868	mov	r11,r14
3869	mov	r12,r15
3870	and	r12,3
3871	mov	r13,r15
3872	and	r13,-4
3873	mov	r14,r9
3874	shrd	r15,r9,2
3875	shr	r9,2
3876	add	r15,r13
3877	adc	r9,r14
3878	add	r10,r15
3879	adc	r11,r9
3880	adc	r12,0
3881
3882
3883$L$do_length_block:
3884	add	r10,QWORD[((0+160+32))+rbp]
3885	adc	r11,QWORD[((8+160+32))+rbp]
3886	adc	r12,1
3887	mov	rax,QWORD[((0+160+0))+rbp]
3888	mov	r15,rax
3889	mul	r10
3890	mov	r13,rax
3891	mov	r14,rdx
3892	mov	rax,QWORD[((0+160+0))+rbp]
3893	mul	r11
3894	imul	r15,r12
3895	add	r14,rax
3896	adc	r15,rdx
3897	mov	rax,QWORD[((8+160+0))+rbp]
3898	mov	r9,rax
3899	mul	r10
3900	add	r14,rax
3901	adc	rdx,0
3902	mov	r10,rdx
3903	mov	rax,QWORD[((8+160+0))+rbp]
3904	mul	r11
3905	add	r15,rax
3906	adc	rdx,0
3907	imul	r9,r12
3908	add	r15,r10
3909	adc	r9,rdx
3910	mov	r10,r13
3911	mov	r11,r14
3912	mov	r12,r15
3913	and	r12,3
3914	mov	r13,r15
3915	and	r13,-4
3916	mov	r14,r9
3917	shrd	r15,r9,2
3918	shr	r9,2
3919	add	r15,r13
3920	adc	r9,r14
3921	add	r10,r15
3922	adc	r11,r9
3923	adc	r12,0
3924
3925
3926	mov	r13,r10
3927	mov	r14,r11
3928	mov	r15,r12
3929	sub	r10,-5
3930	sbb	r11,-1
3931	sbb	r12,3
3932	cmovc	r10,r13
3933	cmovc	r11,r14
3934	cmovc	r12,r15
3935
3936	add	r10,QWORD[((0+160+16))+rbp]
3937	adc	r11,QWORD[((8+160+16))+rbp]
3938
3939	movaps	xmm6,XMMWORD[((0+0))+rbp]
3940	movaps	xmm7,XMMWORD[((16+0))+rbp]
3941	movaps	xmm8,XMMWORD[((32+0))+rbp]
3942	movaps	xmm9,XMMWORD[((48+0))+rbp]
3943	movaps	xmm10,XMMWORD[((64+0))+rbp]
3944	movaps	xmm11,XMMWORD[((80+0))+rbp]
3945	movaps	xmm12,XMMWORD[((96+0))+rbp]
3946	movaps	xmm13,XMMWORD[((112+0))+rbp]
3947	movaps	xmm14,XMMWORD[((128+0))+rbp]
3948	movaps	xmm15,XMMWORD[((144+0))+rbp]
3949
3950
3951	add	rsp,288 + 160 + 32
3952
3953
3954	pop	r9
3955
3956	mov	QWORD[r9],r10
3957	mov	QWORD[8+r9],r11
3958	pop	r15
3959
3960	pop	r14
3961
3962	pop	r13
3963
3964	pop	r12
3965
3966	pop	rbx
3967
3968	pop	rbp
3969
3970	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
3971	mov	rsi,QWORD[16+rsp]
3972	DB	0F3h,0C3h		;repret
3973
3974$L$seal_sse_128:
3975
3976	movdqu	xmm0,XMMWORD[$L$chacha20_consts]
3977	movdqa	xmm1,xmm0
3978	movdqa	xmm2,xmm0
3979	movdqu	xmm4,XMMWORD[r9]
3980	movdqa	xmm5,xmm4
3981	movdqa	xmm6,xmm4
3982	movdqu	xmm8,XMMWORD[16+r9]
3983	movdqa	xmm9,xmm8
3984	movdqa	xmm10,xmm8
3985	movdqu	xmm14,XMMWORD[32+r9]
3986	movdqa	xmm12,xmm14
3987	paddd	xmm12,XMMWORD[$L$sse_inc]
3988	movdqa	xmm13,xmm12
3989	paddd	xmm13,XMMWORD[$L$sse_inc]
3990	movdqa	xmm7,xmm4
3991	movdqa	xmm11,xmm8
3992	movdqa	xmm15,xmm12
3993	mov	r10,10
3994
3995$L$seal_sse_128_rounds:
3996	paddd	xmm0,xmm4
3997	pxor	xmm12,xmm0
3998	pshufb	xmm12,XMMWORD[$L$rol16]
3999	paddd	xmm8,xmm12
4000	pxor	xmm4,xmm8
4001	movdqa	xmm3,xmm4
4002	pslld	xmm3,12
4003	psrld	xmm4,20
4004	pxor	xmm4,xmm3
4005	paddd	xmm0,xmm4
4006	pxor	xmm12,xmm0
4007	pshufb	xmm12,XMMWORD[$L$rol8]
4008	paddd	xmm8,xmm12
4009	pxor	xmm4,xmm8
4010	movdqa	xmm3,xmm4
4011	pslld	xmm3,7
4012	psrld	xmm4,25
4013	pxor	xmm4,xmm3
4014DB	102,15,58,15,228,4
4015DB	102,69,15,58,15,192,8
4016DB	102,69,15,58,15,228,12
4017	paddd	xmm1,xmm5
4018	pxor	xmm13,xmm1
4019	pshufb	xmm13,XMMWORD[$L$rol16]
4020	paddd	xmm9,xmm13
4021	pxor	xmm5,xmm9
4022	movdqa	xmm3,xmm5
4023	pslld	xmm3,12
4024	psrld	xmm5,20
4025	pxor	xmm5,xmm3
4026	paddd	xmm1,xmm5
4027	pxor	xmm13,xmm1
4028	pshufb	xmm13,XMMWORD[$L$rol8]
4029	paddd	xmm9,xmm13
4030	pxor	xmm5,xmm9
4031	movdqa	xmm3,xmm5
4032	pslld	xmm3,7
4033	psrld	xmm5,25
4034	pxor	xmm5,xmm3
4035DB	102,15,58,15,237,4
4036DB	102,69,15,58,15,201,8
4037DB	102,69,15,58,15,237,12
4038	paddd	xmm2,xmm6
4039	pxor	xmm14,xmm2
4040	pshufb	xmm14,XMMWORD[$L$rol16]
4041	paddd	xmm10,xmm14
4042	pxor	xmm6,xmm10
4043	movdqa	xmm3,xmm6
4044	pslld	xmm3,12
4045	psrld	xmm6,20
4046	pxor	xmm6,xmm3
4047	paddd	xmm2,xmm6
4048	pxor	xmm14,xmm2
4049	pshufb	xmm14,XMMWORD[$L$rol8]
4050	paddd	xmm10,xmm14
4051	pxor	xmm6,xmm10
4052	movdqa	xmm3,xmm6
4053	pslld	xmm3,7
4054	psrld	xmm6,25
4055	pxor	xmm6,xmm3
4056DB	102,15,58,15,246,4
4057DB	102,69,15,58,15,210,8
4058DB	102,69,15,58,15,246,12
4059	paddd	xmm0,xmm4
4060	pxor	xmm12,xmm0
4061	pshufb	xmm12,XMMWORD[$L$rol16]
4062	paddd	xmm8,xmm12
4063	pxor	xmm4,xmm8
4064	movdqa	xmm3,xmm4
4065	pslld	xmm3,12
4066	psrld	xmm4,20
4067	pxor	xmm4,xmm3
4068	paddd	xmm0,xmm4
4069	pxor	xmm12,xmm0
4070	pshufb	xmm12,XMMWORD[$L$rol8]
4071	paddd	xmm8,xmm12
4072	pxor	xmm4,xmm8
4073	movdqa	xmm3,xmm4
4074	pslld	xmm3,7
4075	psrld	xmm4,25
4076	pxor	xmm4,xmm3
4077DB	102,15,58,15,228,12
4078DB	102,69,15,58,15,192,8
4079DB	102,69,15,58,15,228,4
4080	paddd	xmm1,xmm5
4081	pxor	xmm13,xmm1
4082	pshufb	xmm13,XMMWORD[$L$rol16]
4083	paddd	xmm9,xmm13
4084	pxor	xmm5,xmm9
4085	movdqa	xmm3,xmm5
4086	pslld	xmm3,12
4087	psrld	xmm5,20
4088	pxor	xmm5,xmm3
4089	paddd	xmm1,xmm5
4090	pxor	xmm13,xmm1
4091	pshufb	xmm13,XMMWORD[$L$rol8]
4092	paddd	xmm9,xmm13
4093	pxor	xmm5,xmm9
4094	movdqa	xmm3,xmm5
4095	pslld	xmm3,7
4096	psrld	xmm5,25
4097	pxor	xmm5,xmm3
4098DB	102,15,58,15,237,12
4099DB	102,69,15,58,15,201,8
4100DB	102,69,15,58,15,237,4
4101	paddd	xmm2,xmm6
4102	pxor	xmm14,xmm2
4103	pshufb	xmm14,XMMWORD[$L$rol16]
4104	paddd	xmm10,xmm14
4105	pxor	xmm6,xmm10
4106	movdqa	xmm3,xmm6
4107	pslld	xmm3,12
4108	psrld	xmm6,20
4109	pxor	xmm6,xmm3
4110	paddd	xmm2,xmm6
4111	pxor	xmm14,xmm2
4112	pshufb	xmm14,XMMWORD[$L$rol8]
4113	paddd	xmm10,xmm14
4114	pxor	xmm6,xmm10
4115	movdqa	xmm3,xmm6
4116	pslld	xmm3,7
4117	psrld	xmm6,25
4118	pxor	xmm6,xmm3
4119DB	102,15,58,15,246,12
4120DB	102,69,15,58,15,210,8
4121DB	102,69,15,58,15,246,4
4122
4123	dec	r10
4124	jnz	NEAR $L$seal_sse_128_rounds
4125	paddd	xmm0,XMMWORD[$L$chacha20_consts]
4126	paddd	xmm1,XMMWORD[$L$chacha20_consts]
4127	paddd	xmm2,XMMWORD[$L$chacha20_consts]
4128	paddd	xmm4,xmm7
4129	paddd	xmm5,xmm7
4130	paddd	xmm6,xmm7
4131	paddd	xmm8,xmm11
4132	paddd	xmm9,xmm11
4133	paddd	xmm12,xmm15
4134	paddd	xmm15,XMMWORD[$L$sse_inc]
4135	paddd	xmm13,xmm15
4136
4137	pand	xmm2,XMMWORD[$L$clamp]
4138	movdqa	XMMWORD[(160+0)+rbp],xmm2
4139	movdqa	XMMWORD[(160+16)+rbp],xmm6
4140
4141	mov	r8,r8
4142	call	poly_hash_ad_internal
4143	jmp	NEAR $L$seal_sse_128_tail_xor
4144$L$SEH_end_chacha20_poly1305_seal:
4145
4146
4147
4148
4149ALIGN	64
4150chacha20_poly1305_open_avx2:
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163	vzeroupper
4164	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4165	vbroadcasti128	ymm4,XMMWORD[r9]
4166	vbroadcasti128	ymm8,XMMWORD[16+r9]
4167	vbroadcasti128	ymm12,XMMWORD[32+r9]
4168	vpaddd	ymm12,ymm12,YMMWORD[$L$avx2_init]
4169	cmp	rbx,6*32
4170	jbe	NEAR $L$open_avx2_192
4171	cmp	rbx,10*32
4172	jbe	NEAR $L$open_avx2_320
4173
4174	vmovdqa	YMMWORD[(160+64)+rbp],ymm4
4175	vmovdqa	YMMWORD[(160+96)+rbp],ymm8
4176	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4177	mov	r10,10
4178$L$open_avx2_init_rounds:
4179	vpaddd	ymm0,ymm0,ymm4
4180	vpxor	ymm12,ymm12,ymm0
4181	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4182	vpaddd	ymm8,ymm8,ymm12
4183	vpxor	ymm4,ymm4,ymm8
4184	vpsrld	ymm3,ymm4,20
4185	vpslld	ymm4,ymm4,12
4186	vpxor	ymm4,ymm4,ymm3
4187	vpaddd	ymm0,ymm0,ymm4
4188	vpxor	ymm12,ymm12,ymm0
4189	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4190	vpaddd	ymm8,ymm8,ymm12
4191	vpxor	ymm4,ymm4,ymm8
4192	vpslld	ymm3,ymm4,7
4193	vpsrld	ymm4,ymm4,25
4194	vpxor	ymm4,ymm4,ymm3
4195	vpalignr	ymm12,ymm12,ymm12,12
4196	vpalignr	ymm8,ymm8,ymm8,8
4197	vpalignr	ymm4,ymm4,ymm4,4
4198	vpaddd	ymm0,ymm0,ymm4
4199	vpxor	ymm12,ymm12,ymm0
4200	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4201	vpaddd	ymm8,ymm8,ymm12
4202	vpxor	ymm4,ymm4,ymm8
4203	vpsrld	ymm3,ymm4,20
4204	vpslld	ymm4,ymm4,12
4205	vpxor	ymm4,ymm4,ymm3
4206	vpaddd	ymm0,ymm0,ymm4
4207	vpxor	ymm12,ymm12,ymm0
4208	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4209	vpaddd	ymm8,ymm8,ymm12
4210	vpxor	ymm4,ymm4,ymm8
4211	vpslld	ymm3,ymm4,7
4212	vpsrld	ymm4,ymm4,25
4213	vpxor	ymm4,ymm4,ymm3
4214	vpalignr	ymm12,ymm12,ymm12,4
4215	vpalignr	ymm8,ymm8,ymm8,8
4216	vpalignr	ymm4,ymm4,ymm4,12
4217
4218	dec	r10
4219	jne	NEAR $L$open_avx2_init_rounds
4220	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
4221	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
4222	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
4223	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4224
4225	vperm2i128	ymm3,ymm4,ymm0,0x02
4226
4227	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
4228	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
4229
4230	vperm2i128	ymm0,ymm4,ymm0,0x13
4231	vperm2i128	ymm4,ymm12,ymm8,0x13
4232
4233	mov	r8,r8
4234	call	poly_hash_ad_internal
4235
4236	xor	rcx,rcx
4237$L$open_avx2_init_hash:
4238	add	r10,QWORD[((0+0))+rcx*1+rsi]
4239	adc	r11,QWORD[((8+0))+rcx*1+rsi]
4240	adc	r12,1
4241	mov	rax,QWORD[((0+160+0))+rbp]
4242	mov	r15,rax
4243	mul	r10
4244	mov	r13,rax
4245	mov	r14,rdx
4246	mov	rax,QWORD[((0+160+0))+rbp]
4247	mul	r11
4248	imul	r15,r12
4249	add	r14,rax
4250	adc	r15,rdx
4251	mov	rax,QWORD[((8+160+0))+rbp]
4252	mov	r9,rax
4253	mul	r10
4254	add	r14,rax
4255	adc	rdx,0
4256	mov	r10,rdx
4257	mov	rax,QWORD[((8+160+0))+rbp]
4258	mul	r11
4259	add	r15,rax
4260	adc	rdx,0
4261	imul	r9,r12
4262	add	r15,r10
4263	adc	r9,rdx
4264	mov	r10,r13
4265	mov	r11,r14
4266	mov	r12,r15
4267	and	r12,3
4268	mov	r13,r15
4269	and	r13,-4
4270	mov	r14,r9
4271	shrd	r15,r9,2
4272	shr	r9,2
4273	add	r15,r13
4274	adc	r9,r14
4275	add	r10,r15
4276	adc	r11,r9
4277	adc	r12,0
4278
4279	add	rcx,16
4280	cmp	rcx,2*32
4281	jne	NEAR $L$open_avx2_init_hash
4282
4283	vpxor	ymm0,ymm0,YMMWORD[rsi]
4284	vpxor	ymm4,ymm4,YMMWORD[32+rsi]
4285
4286	vmovdqu	YMMWORD[rdi],ymm0
4287	vmovdqu	YMMWORD[32+rdi],ymm4
4288	lea	rsi,[64+rsi]
4289	lea	rdi,[64+rdi]
4290	sub	rbx,2*32
4291$L$open_avx2_main_loop:
4292
4293	cmp	rbx,16*32
4294	jb	NEAR $L$open_avx2_main_loop_done
4295	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4296	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
4297	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
4298	vmovdqa	ymm1,ymm0
4299	vmovdqa	ymm5,ymm4
4300	vmovdqa	ymm9,ymm8
4301	vmovdqa	ymm2,ymm0
4302	vmovdqa	ymm6,ymm4
4303	vmovdqa	ymm10,ymm8
4304	vmovdqa	ymm3,ymm0
4305	vmovdqa	ymm7,ymm4
4306	vmovdqa	ymm11,ymm8
4307	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
4308	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
4309	vpaddd	ymm14,ymm12,ymm15
4310	vpaddd	ymm13,ymm12,ymm14
4311	vpaddd	ymm12,ymm12,ymm13
4312	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
4313	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
4314	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
4315	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4316
4317	xor	rcx,rcx
4318$L$open_avx2_main_loop_rounds:
4319	add	r10,QWORD[((0+0))+rcx*1+rsi]
4320	adc	r11,QWORD[((8+0))+rcx*1+rsi]
4321	adc	r12,1
4322	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4323	vmovdqa	ymm8,YMMWORD[$L$rol16]
4324	vpaddd	ymm3,ymm3,ymm7
4325	vpaddd	ymm2,ymm2,ymm6
4326	vpaddd	ymm1,ymm1,ymm5
4327	vpaddd	ymm0,ymm0,ymm4
4328	vpxor	ymm15,ymm15,ymm3
4329	vpxor	ymm14,ymm14,ymm2
4330	vpxor	ymm13,ymm13,ymm1
4331	vpxor	ymm12,ymm12,ymm0
4332	mov	rdx,QWORD[((0+160+0))+rbp]
4333	mov	r15,rdx
4334	mulx	r14,r13,r10
4335	mulx	rdx,rax,r11
4336	imul	r15,r12
4337	add	r14,rax
4338	adc	r15,rdx
4339	vpshufb	ymm15,ymm15,ymm8
4340	vpshufb	ymm14,ymm14,ymm8
4341	vpshufb	ymm13,ymm13,ymm8
4342	vpshufb	ymm12,ymm12,ymm8
4343	vpaddd	ymm11,ymm11,ymm15
4344	vpaddd	ymm10,ymm10,ymm14
4345	vpaddd	ymm9,ymm9,ymm13
4346	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4347	vpxor	ymm7,ymm7,ymm11
4348	mov	rdx,QWORD[((8+160+0))+rbp]
4349	mulx	rax,r10,r10
4350	add	r14,r10
4351	mulx	r9,r11,r11
4352	adc	r15,r11
4353	adc	r9,0
4354	imul	rdx,r12
4355	vpxor	ymm6,ymm6,ymm10
4356	vpxor	ymm5,ymm5,ymm9
4357	vpxor	ymm4,ymm4,ymm8
4358	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4359	vpsrld	ymm8,ymm7,20
4360	vpslld	ymm7,ymm7,32-20
4361	vpxor	ymm7,ymm7,ymm8
4362	vpsrld	ymm8,ymm6,20
4363	vpslld	ymm6,ymm6,32-20
4364	vpxor	ymm6,ymm6,ymm8
4365	vpsrld	ymm8,ymm5,20
4366	vpslld	ymm5,ymm5,32-20
4367	add	r15,rax
4368	adc	r9,rdx
4369	vpxor	ymm5,ymm5,ymm8
4370	vpsrld	ymm8,ymm4,20
4371	vpslld	ymm4,ymm4,32-20
4372	vpxor	ymm4,ymm4,ymm8
4373	vmovdqa	ymm8,YMMWORD[$L$rol8]
4374	vpaddd	ymm3,ymm3,ymm7
4375	vpaddd	ymm2,ymm2,ymm6
4376	vpaddd	ymm1,ymm1,ymm5
4377	vpaddd	ymm0,ymm0,ymm4
4378	vpxor	ymm15,ymm15,ymm3
4379	mov	r10,r13
4380	mov	r11,r14
4381	mov	r12,r15
4382	and	r12,3
4383	mov	r13,r15
4384	and	r13,-4
4385	mov	r14,r9
4386	shrd	r15,r9,2
4387	shr	r9,2
4388	add	r15,r13
4389	adc	r9,r14
4390	add	r10,r15
4391	adc	r11,r9
4392	adc	r12,0
4393	vpxor	ymm14,ymm14,ymm2
4394	vpxor	ymm13,ymm13,ymm1
4395	vpxor	ymm12,ymm12,ymm0
4396	vpshufb	ymm15,ymm15,ymm8
4397	vpshufb	ymm14,ymm14,ymm8
4398	vpshufb	ymm13,ymm13,ymm8
4399	vpshufb	ymm12,ymm12,ymm8
4400	vpaddd	ymm11,ymm11,ymm15
4401	vpaddd	ymm10,ymm10,ymm14
4402	add	r10,QWORD[((0+16))+rcx*1+rsi]
4403	adc	r11,QWORD[((8+16))+rcx*1+rsi]
4404	adc	r12,1
4405	vpaddd	ymm9,ymm9,ymm13
4406	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4407	vpxor	ymm7,ymm7,ymm11
4408	vpxor	ymm6,ymm6,ymm10
4409	vpxor	ymm5,ymm5,ymm9
4410	vpxor	ymm4,ymm4,ymm8
4411	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4412	vpsrld	ymm8,ymm7,25
4413	mov	rdx,QWORD[((0+160+0))+rbp]
4414	mov	r15,rdx
4415	mulx	r14,r13,r10
4416	mulx	rdx,rax,r11
4417	imul	r15,r12
4418	add	r14,rax
4419	adc	r15,rdx
4420	vpslld	ymm7,ymm7,32-25
4421	vpxor	ymm7,ymm7,ymm8
4422	vpsrld	ymm8,ymm6,25
4423	vpslld	ymm6,ymm6,32-25
4424	vpxor	ymm6,ymm6,ymm8
4425	vpsrld	ymm8,ymm5,25
4426	vpslld	ymm5,ymm5,32-25
4427	vpxor	ymm5,ymm5,ymm8
4428	vpsrld	ymm8,ymm4,25
4429	vpslld	ymm4,ymm4,32-25
4430	vpxor	ymm4,ymm4,ymm8
4431	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
4432	vpalignr	ymm7,ymm7,ymm7,4
4433	vpalignr	ymm11,ymm11,ymm11,8
4434	vpalignr	ymm15,ymm15,ymm15,12
4435	vpalignr	ymm6,ymm6,ymm6,4
4436	vpalignr	ymm10,ymm10,ymm10,8
4437	vpalignr	ymm14,ymm14,ymm14,12
4438	mov	rdx,QWORD[((8+160+0))+rbp]
4439	mulx	rax,r10,r10
4440	add	r14,r10
4441	mulx	r9,r11,r11
4442	adc	r15,r11
4443	adc	r9,0
4444	imul	rdx,r12
4445	vpalignr	ymm5,ymm5,ymm5,4
4446	vpalignr	ymm9,ymm9,ymm9,8
4447	vpalignr	ymm13,ymm13,ymm13,12
4448	vpalignr	ymm4,ymm4,ymm4,4
4449	vpalignr	ymm8,ymm8,ymm8,8
4450	vpalignr	ymm12,ymm12,ymm12,12
4451	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4452	vmovdqa	ymm8,YMMWORD[$L$rol16]
4453	vpaddd	ymm3,ymm3,ymm7
4454	vpaddd	ymm2,ymm2,ymm6
4455	vpaddd	ymm1,ymm1,ymm5
4456	vpaddd	ymm0,ymm0,ymm4
4457	vpxor	ymm15,ymm15,ymm3
4458	vpxor	ymm14,ymm14,ymm2
4459	vpxor	ymm13,ymm13,ymm1
4460	vpxor	ymm12,ymm12,ymm0
4461	vpshufb	ymm15,ymm15,ymm8
4462	vpshufb	ymm14,ymm14,ymm8
4463	add	r15,rax
4464	adc	r9,rdx
4465	vpshufb	ymm13,ymm13,ymm8
4466	vpshufb	ymm12,ymm12,ymm8
4467	vpaddd	ymm11,ymm11,ymm15
4468	vpaddd	ymm10,ymm10,ymm14
4469	vpaddd	ymm9,ymm9,ymm13
4470	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4471	vpxor	ymm7,ymm7,ymm11
4472	vpxor	ymm6,ymm6,ymm10
4473	vpxor	ymm5,ymm5,ymm9
4474	mov	r10,r13
4475	mov	r11,r14
4476	mov	r12,r15
4477	and	r12,3
4478	mov	r13,r15
4479	and	r13,-4
4480	mov	r14,r9
4481	shrd	r15,r9,2
4482	shr	r9,2
4483	add	r15,r13
4484	adc	r9,r14
4485	add	r10,r15
4486	adc	r11,r9
4487	adc	r12,0
4488	vpxor	ymm4,ymm4,ymm8
4489	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4490	vpsrld	ymm8,ymm7,20
4491	vpslld	ymm7,ymm7,32-20
4492	vpxor	ymm7,ymm7,ymm8
4493	vpsrld	ymm8,ymm6,20
4494	vpslld	ymm6,ymm6,32-20
4495	vpxor	ymm6,ymm6,ymm8
4496	add	r10,QWORD[((0+32))+rcx*1+rsi]
4497	adc	r11,QWORD[((8+32))+rcx*1+rsi]
4498	adc	r12,1
4499
4500	lea	rcx,[48+rcx]
4501	vpsrld	ymm8,ymm5,20
4502	vpslld	ymm5,ymm5,32-20
4503	vpxor	ymm5,ymm5,ymm8
4504	vpsrld	ymm8,ymm4,20
4505	vpslld	ymm4,ymm4,32-20
4506	vpxor	ymm4,ymm4,ymm8
4507	vmovdqa	ymm8,YMMWORD[$L$rol8]
4508	vpaddd	ymm3,ymm3,ymm7
4509	vpaddd	ymm2,ymm2,ymm6
4510	vpaddd	ymm1,ymm1,ymm5
4511	vpaddd	ymm0,ymm0,ymm4
4512	vpxor	ymm15,ymm15,ymm3
4513	vpxor	ymm14,ymm14,ymm2
4514	vpxor	ymm13,ymm13,ymm1
4515	vpxor	ymm12,ymm12,ymm0
4516	vpshufb	ymm15,ymm15,ymm8
4517	vpshufb	ymm14,ymm14,ymm8
4518	vpshufb	ymm13,ymm13,ymm8
4519	mov	rdx,QWORD[((0+160+0))+rbp]
4520	mov	r15,rdx
4521	mulx	r14,r13,r10
4522	mulx	rdx,rax,r11
4523	imul	r15,r12
4524	add	r14,rax
4525	adc	r15,rdx
4526	vpshufb	ymm12,ymm12,ymm8
4527	vpaddd	ymm11,ymm11,ymm15
4528	vpaddd	ymm10,ymm10,ymm14
4529	vpaddd	ymm9,ymm9,ymm13
4530	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4531	vpxor	ymm7,ymm7,ymm11
4532	vpxor	ymm6,ymm6,ymm10
4533	vpxor	ymm5,ymm5,ymm9
4534	mov	rdx,QWORD[((8+160+0))+rbp]
4535	mulx	rax,r10,r10
4536	add	r14,r10
4537	mulx	r9,r11,r11
4538	adc	r15,r11
4539	adc	r9,0
4540	imul	rdx,r12
4541	vpxor	ymm4,ymm4,ymm8
4542	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4543	vpsrld	ymm8,ymm7,25
4544	vpslld	ymm7,ymm7,32-25
4545	vpxor	ymm7,ymm7,ymm8
4546	vpsrld	ymm8,ymm6,25
4547	vpslld	ymm6,ymm6,32-25
4548	vpxor	ymm6,ymm6,ymm8
4549	add	r15,rax
4550	adc	r9,rdx
4551	vpsrld	ymm8,ymm5,25
4552	vpslld	ymm5,ymm5,32-25
4553	vpxor	ymm5,ymm5,ymm8
4554	vpsrld	ymm8,ymm4,25
4555	vpslld	ymm4,ymm4,32-25
4556	vpxor	ymm4,ymm4,ymm8
4557	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
4558	vpalignr	ymm7,ymm7,ymm7,12
4559	vpalignr	ymm11,ymm11,ymm11,8
4560	vpalignr	ymm15,ymm15,ymm15,4
4561	vpalignr	ymm6,ymm6,ymm6,12
4562	vpalignr	ymm10,ymm10,ymm10,8
4563	vpalignr	ymm14,ymm14,ymm14,4
4564	vpalignr	ymm5,ymm5,ymm5,12
4565	vpalignr	ymm9,ymm9,ymm9,8
4566	vpalignr	ymm13,ymm13,ymm13,4
4567	vpalignr	ymm4,ymm4,ymm4,12
4568	vpalignr	ymm8,ymm8,ymm8,8
4569	mov	r10,r13
4570	mov	r11,r14
4571	mov	r12,r15
4572	and	r12,3
4573	mov	r13,r15
4574	and	r13,-4
4575	mov	r14,r9
4576	shrd	r15,r9,2
4577	shr	r9,2
4578	add	r15,r13
4579	adc	r9,r14
4580	add	r10,r15
4581	adc	r11,r9
4582	adc	r12,0
4583	vpalignr	ymm12,ymm12,ymm12,4
4584
4585	cmp	rcx,10*6*8
4586	jne	NEAR $L$open_avx2_main_loop_rounds
4587	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
4588	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
4589	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
4590	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
4591	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
4592	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
4593	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
4594	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
4595	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
4596	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
4597	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
4598	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
4599	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
4600	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
4601	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
4602	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4603
4604	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
4605	add	r10,QWORD[((0+480))+rsi]
4606	adc	r11,QWORD[((8+480))+rsi]
4607	adc	r12,1
4608	vperm2i128	ymm0,ymm7,ymm3,0x02
4609	vperm2i128	ymm7,ymm7,ymm3,0x13
4610	vperm2i128	ymm3,ymm15,ymm11,0x02
4611	vperm2i128	ymm11,ymm15,ymm11,0x13
4612	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
4613	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
4614	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
4615	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
4616	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
4617	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
4618	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
4619	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
4620
4621	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
4622	mov	rax,QWORD[((0+160+0))+rbp]
4623	mov	r15,rax
4624	mul	r10
4625	mov	r13,rax
4626	mov	r14,rdx
4627	mov	rax,QWORD[((0+160+0))+rbp]
4628	mul	r11
4629	imul	r15,r12
4630	add	r14,rax
4631	adc	r15,rdx
4632	mov	rax,QWORD[((8+160+0))+rbp]
4633	mov	r9,rax
4634	mul	r10
4635	add	r14,rax
4636	adc	rdx,0
4637	mov	r10,rdx
4638	mov	rax,QWORD[((8+160+0))+rbp]
4639	mul	r11
4640	add	r15,rax
4641	adc	rdx,0
4642	imul	r9,r12
4643	add	r15,r10
4644	adc	r9,rdx
4645	mov	r10,r13
4646	mov	r11,r14
4647	mov	r12,r15
4648	and	r12,3
4649	mov	r13,r15
4650	and	r13,-4
4651	mov	r14,r9
4652	shrd	r15,r9,2
4653	shr	r9,2
4654	add	r15,r13
4655	adc	r9,r14
4656	add	r10,r15
4657	adc	r11,r9
4658	adc	r12,0
4659	vperm2i128	ymm3,ymm6,ymm2,0x02
4660	vperm2i128	ymm6,ymm6,ymm2,0x13
4661	vperm2i128	ymm2,ymm14,ymm10,0x02
4662	vperm2i128	ymm10,ymm14,ymm10,0x13
4663	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
4664	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
4665	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
4666	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
4667	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
4668	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
4669	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
4670	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
4671	add	r10,QWORD[((0+480+16))+rsi]
4672	adc	r11,QWORD[((8+480+16))+rsi]
4673	adc	r12,1
4674	vperm2i128	ymm3,ymm5,ymm1,0x02
4675	vperm2i128	ymm5,ymm5,ymm1,0x13
4676	vperm2i128	ymm1,ymm13,ymm9,0x02
4677	vperm2i128	ymm9,ymm13,ymm9,0x13
4678	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
4679	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
4680	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
4681	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
4682	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
4683	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
4684	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
4685	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
4686	mov	rax,QWORD[((0+160+0))+rbp]
4687	mov	r15,rax
4688	mul	r10
4689	mov	r13,rax
4690	mov	r14,rdx
4691	mov	rax,QWORD[((0+160+0))+rbp]
4692	mul	r11
4693	imul	r15,r12
4694	add	r14,rax
4695	adc	r15,rdx
4696	mov	rax,QWORD[((8+160+0))+rbp]
4697	mov	r9,rax
4698	mul	r10
4699	add	r14,rax
4700	adc	rdx,0
4701	mov	r10,rdx
4702	mov	rax,QWORD[((8+160+0))+rbp]
4703	mul	r11
4704	add	r15,rax
4705	adc	rdx,0
4706	imul	r9,r12
4707	add	r15,r10
4708	adc	r9,rdx
4709	mov	r10,r13
4710	mov	r11,r14
4711	mov	r12,r15
4712	and	r12,3
4713	mov	r13,r15
4714	and	r13,-4
4715	mov	r14,r9
4716	shrd	r15,r9,2
4717	shr	r9,2
4718	add	r15,r13
4719	adc	r9,r14
4720	add	r10,r15
4721	adc	r11,r9
4722	adc	r12,0
4723	vperm2i128	ymm3,ymm4,ymm0,0x02
4724	vperm2i128	ymm4,ymm4,ymm0,0x13
4725	vperm2i128	ymm0,ymm12,ymm8,0x02
4726	vperm2i128	ymm8,ymm12,ymm8,0x13
4727	vpxor	ymm3,ymm3,YMMWORD[((0+384))+rsi]
4728	vpxor	ymm0,ymm0,YMMWORD[((32+384))+rsi]
4729	vpxor	ymm4,ymm4,YMMWORD[((64+384))+rsi]
4730	vpxor	ymm8,ymm8,YMMWORD[((96+384))+rsi]
4731	vmovdqu	YMMWORD[(0+384)+rdi],ymm3
4732	vmovdqu	YMMWORD[(32+384)+rdi],ymm0
4733	vmovdqu	YMMWORD[(64+384)+rdi],ymm4
4734	vmovdqu	YMMWORD[(96+384)+rdi],ymm8
4735
4736	lea	rsi,[512+rsi]
4737	lea	rdi,[512+rdi]
4738	sub	rbx,16*32
4739	jmp	NEAR $L$open_avx2_main_loop
4740$L$open_avx2_main_loop_done:
4741	test	rbx,rbx
4742	vzeroupper
4743	je	NEAR $L$open_sse_finalize
4744
4745	cmp	rbx,12*32
4746	ja	NEAR $L$open_avx2_tail_512
4747	cmp	rbx,8*32
4748	ja	NEAR $L$open_avx2_tail_384
4749	cmp	rbx,4*32
4750	ja	NEAR $L$open_avx2_tail_256
4751	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4752	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
4753	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
4754	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
4755	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4756	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4757
4758	xor	r8,r8
4759	mov	rcx,rbx
4760	and	rcx,-16
4761	test	rcx,rcx
4762	je	NEAR $L$open_avx2_tail_128_rounds
4763$L$open_avx2_tail_128_rounds_and_x1hash:
4764	add	r10,QWORD[((0+0))+r8*1+rsi]
4765	adc	r11,QWORD[((8+0))+r8*1+rsi]
4766	adc	r12,1
4767	mov	rax,QWORD[((0+160+0))+rbp]
4768	mov	r15,rax
4769	mul	r10
4770	mov	r13,rax
4771	mov	r14,rdx
4772	mov	rax,QWORD[((0+160+0))+rbp]
4773	mul	r11
4774	imul	r15,r12
4775	add	r14,rax
4776	adc	r15,rdx
4777	mov	rax,QWORD[((8+160+0))+rbp]
4778	mov	r9,rax
4779	mul	r10
4780	add	r14,rax
4781	adc	rdx,0
4782	mov	r10,rdx
4783	mov	rax,QWORD[((8+160+0))+rbp]
4784	mul	r11
4785	add	r15,rax
4786	adc	rdx,0
4787	imul	r9,r12
4788	add	r15,r10
4789	adc	r9,rdx
4790	mov	r10,r13
4791	mov	r11,r14
4792	mov	r12,r15
4793	and	r12,3
4794	mov	r13,r15
4795	and	r13,-4
4796	mov	r14,r9
4797	shrd	r15,r9,2
4798	shr	r9,2
4799	add	r15,r13
4800	adc	r9,r14
4801	add	r10,r15
4802	adc	r11,r9
4803	adc	r12,0
4804
4805$L$open_avx2_tail_128_rounds:
4806	add	r8,16
4807	vpaddd	ymm0,ymm0,ymm4
4808	vpxor	ymm12,ymm12,ymm0
4809	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4810	vpaddd	ymm8,ymm8,ymm12
4811	vpxor	ymm4,ymm4,ymm8
4812	vpsrld	ymm3,ymm4,20
4813	vpslld	ymm4,ymm4,12
4814	vpxor	ymm4,ymm4,ymm3
4815	vpaddd	ymm0,ymm0,ymm4
4816	vpxor	ymm12,ymm12,ymm0
4817	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4818	vpaddd	ymm8,ymm8,ymm12
4819	vpxor	ymm4,ymm4,ymm8
4820	vpslld	ymm3,ymm4,7
4821	vpsrld	ymm4,ymm4,25
4822	vpxor	ymm4,ymm4,ymm3
4823	vpalignr	ymm12,ymm12,ymm12,12
4824	vpalignr	ymm8,ymm8,ymm8,8
4825	vpalignr	ymm4,ymm4,ymm4,4
4826	vpaddd	ymm0,ymm0,ymm4
4827	vpxor	ymm12,ymm12,ymm0
4828	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4829	vpaddd	ymm8,ymm8,ymm12
4830	vpxor	ymm4,ymm4,ymm8
4831	vpsrld	ymm3,ymm4,20
4832	vpslld	ymm4,ymm4,12
4833	vpxor	ymm4,ymm4,ymm3
4834	vpaddd	ymm0,ymm0,ymm4
4835	vpxor	ymm12,ymm12,ymm0
4836	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4837	vpaddd	ymm8,ymm8,ymm12
4838	vpxor	ymm4,ymm4,ymm8
4839	vpslld	ymm3,ymm4,7
4840	vpsrld	ymm4,ymm4,25
4841	vpxor	ymm4,ymm4,ymm3
4842	vpalignr	ymm12,ymm12,ymm12,4
4843	vpalignr	ymm8,ymm8,ymm8,8
4844	vpalignr	ymm4,ymm4,ymm4,12
4845
4846	cmp	r8,rcx
4847	jb	NEAR $L$open_avx2_tail_128_rounds_and_x1hash
4848	cmp	r8,160
4849	jne	NEAR $L$open_avx2_tail_128_rounds
4850	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
4851	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
4852	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
4853	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4854	vperm2i128	ymm3,ymm4,ymm0,0x13
4855	vperm2i128	ymm0,ymm4,ymm0,0x02
4856	vperm2i128	ymm4,ymm12,ymm8,0x02
4857	vperm2i128	ymm12,ymm12,ymm8,0x13
4858	vmovdqa	ymm8,ymm3
4859
4860	jmp	NEAR $L$open_avx2_tail_128_xor
4861
4862$L$open_avx2_tail_256:
4863	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4864	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
4865	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
4866	vmovdqa	ymm1,ymm0
4867	vmovdqa	ymm5,ymm4
4868	vmovdqa	ymm9,ymm8
4869	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
4870	vpaddd	ymm13,ymm12,YMMWORD[((160+160))+rbp]
4871	vpaddd	ymm12,ymm12,ymm13
4872	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4873	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
4874
4875	mov	QWORD[((160+128))+rbp],rbx
4876	mov	rcx,rbx
4877	sub	rcx,4*32
4878	shr	rcx,4
4879	mov	r8,10
4880	cmp	rcx,10
4881	cmovg	rcx,r8
4882	mov	rbx,rsi
4883	xor	r8,r8
4884$L$open_avx2_tail_256_rounds_and_x1hash:
4885	add	r10,QWORD[((0+0))+rbx]
4886	adc	r11,QWORD[((8+0))+rbx]
4887	adc	r12,1
4888	mov	rdx,QWORD[((0+160+0))+rbp]
4889	mov	r15,rdx
4890	mulx	r14,r13,r10
4891	mulx	rdx,rax,r11
4892	imul	r15,r12
4893	add	r14,rax
4894	adc	r15,rdx
4895	mov	rdx,QWORD[((8+160+0))+rbp]
4896	mulx	rax,r10,r10
4897	add	r14,r10
4898	mulx	r9,r11,r11
4899	adc	r15,r11
4900	adc	r9,0
4901	imul	rdx,r12
4902	add	r15,rax
4903	adc	r9,rdx
4904	mov	r10,r13
4905	mov	r11,r14
4906	mov	r12,r15
4907	and	r12,3
4908	mov	r13,r15
4909	and	r13,-4
4910	mov	r14,r9
4911	shrd	r15,r9,2
4912	shr	r9,2
4913	add	r15,r13
4914	adc	r9,r14
4915	add	r10,r15
4916	adc	r11,r9
4917	adc	r12,0
4918
4919	lea	rbx,[16+rbx]
4920$L$open_avx2_tail_256_rounds:
4921	vpaddd	ymm0,ymm0,ymm4
4922	vpxor	ymm12,ymm12,ymm0
4923	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4924	vpaddd	ymm8,ymm8,ymm12
4925	vpxor	ymm4,ymm4,ymm8
4926	vpsrld	ymm3,ymm4,20
4927	vpslld	ymm4,ymm4,12
4928	vpxor	ymm4,ymm4,ymm3
4929	vpaddd	ymm0,ymm0,ymm4
4930	vpxor	ymm12,ymm12,ymm0
4931	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4932	vpaddd	ymm8,ymm8,ymm12
4933	vpxor	ymm4,ymm4,ymm8
4934	vpslld	ymm3,ymm4,7
4935	vpsrld	ymm4,ymm4,25
4936	vpxor	ymm4,ymm4,ymm3
4937	vpalignr	ymm12,ymm12,ymm12,12
4938	vpalignr	ymm8,ymm8,ymm8,8
4939	vpalignr	ymm4,ymm4,ymm4,4
4940	vpaddd	ymm1,ymm1,ymm5
4941	vpxor	ymm13,ymm13,ymm1
4942	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
4943	vpaddd	ymm9,ymm9,ymm13
4944	vpxor	ymm5,ymm5,ymm9
4945	vpsrld	ymm3,ymm5,20
4946	vpslld	ymm5,ymm5,12
4947	vpxor	ymm5,ymm5,ymm3
4948	vpaddd	ymm1,ymm1,ymm5
4949	vpxor	ymm13,ymm13,ymm1
4950	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
4951	vpaddd	ymm9,ymm9,ymm13
4952	vpxor	ymm5,ymm5,ymm9
4953	vpslld	ymm3,ymm5,7
4954	vpsrld	ymm5,ymm5,25
4955	vpxor	ymm5,ymm5,ymm3
4956	vpalignr	ymm13,ymm13,ymm13,12
4957	vpalignr	ymm9,ymm9,ymm9,8
4958	vpalignr	ymm5,ymm5,ymm5,4
4959
4960	inc	r8
4961	vpaddd	ymm0,ymm0,ymm4
4962	vpxor	ymm12,ymm12,ymm0
4963	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4964	vpaddd	ymm8,ymm8,ymm12
4965	vpxor	ymm4,ymm4,ymm8
4966	vpsrld	ymm3,ymm4,20
4967	vpslld	ymm4,ymm4,12
4968	vpxor	ymm4,ymm4,ymm3
4969	vpaddd	ymm0,ymm0,ymm4
4970	vpxor	ymm12,ymm12,ymm0
4971	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4972	vpaddd	ymm8,ymm8,ymm12
4973	vpxor	ymm4,ymm4,ymm8
4974	vpslld	ymm3,ymm4,7
4975	vpsrld	ymm4,ymm4,25
4976	vpxor	ymm4,ymm4,ymm3
4977	vpalignr	ymm12,ymm12,ymm12,4
4978	vpalignr	ymm8,ymm8,ymm8,8
4979	vpalignr	ymm4,ymm4,ymm4,12
4980	vpaddd	ymm1,ymm1,ymm5
4981	vpxor	ymm13,ymm13,ymm1
4982	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
4983	vpaddd	ymm9,ymm9,ymm13
4984	vpxor	ymm5,ymm5,ymm9
4985	vpsrld	ymm3,ymm5,20
4986	vpslld	ymm5,ymm5,12
4987	vpxor	ymm5,ymm5,ymm3
4988	vpaddd	ymm1,ymm1,ymm5
4989	vpxor	ymm13,ymm13,ymm1
4990	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
4991	vpaddd	ymm9,ymm9,ymm13
4992	vpxor	ymm5,ymm5,ymm9
4993	vpslld	ymm3,ymm5,7
4994	vpsrld	ymm5,ymm5,25
4995	vpxor	ymm5,ymm5,ymm3
4996	vpalignr	ymm13,ymm13,ymm13,4
4997	vpalignr	ymm9,ymm9,ymm9,8
4998	vpalignr	ymm5,ymm5,ymm5,12
4999	vpaddd	ymm2,ymm2,ymm6
5000	vpxor	ymm14,ymm14,ymm2
5001	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
5002	vpaddd	ymm10,ymm10,ymm14
5003	vpxor	ymm6,ymm6,ymm10
5004	vpsrld	ymm3,ymm6,20
5005	vpslld	ymm6,ymm6,12
5006	vpxor	ymm6,ymm6,ymm3
5007	vpaddd	ymm2,ymm2,ymm6
5008	vpxor	ymm14,ymm14,ymm2
5009	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
5010	vpaddd	ymm10,ymm10,ymm14
5011	vpxor	ymm6,ymm6,ymm10
5012	vpslld	ymm3,ymm6,7
5013	vpsrld	ymm6,ymm6,25
5014	vpxor	ymm6,ymm6,ymm3
5015	vpalignr	ymm14,ymm14,ymm14,4
5016	vpalignr	ymm10,ymm10,ymm10,8
5017	vpalignr	ymm6,ymm6,ymm6,12
5018
5019	cmp	r8,rcx
5020	jb	NEAR $L$open_avx2_tail_256_rounds_and_x1hash
5021	cmp	r8,10
5022	jne	NEAR $L$open_avx2_tail_256_rounds
5023	mov	r8,rbx
5024	sub	rbx,rsi
5025	mov	rcx,rbx
5026	mov	rbx,QWORD[((160+128))+rbp]
5027$L$open_avx2_tail_256_hash:
5028	add	rcx,16
5029	cmp	rcx,rbx
5030	jg	NEAR $L$open_avx2_tail_256_done
5031	add	r10,QWORD[((0+0))+r8]
5032	adc	r11,QWORD[((8+0))+r8]
5033	adc	r12,1
5034	mov	rdx,QWORD[((0+160+0))+rbp]
5035	mov	r15,rdx
5036	mulx	r14,r13,r10
5037	mulx	rdx,rax,r11
5038	imul	r15,r12
5039	add	r14,rax
5040	adc	r15,rdx
5041	mov	rdx,QWORD[((8+160+0))+rbp]
5042	mulx	rax,r10,r10
5043	add	r14,r10
5044	mulx	r9,r11,r11
5045	adc	r15,r11
5046	adc	r9,0
5047	imul	rdx,r12
5048	add	r15,rax
5049	adc	r9,rdx
5050	mov	r10,r13
5051	mov	r11,r14
5052	mov	r12,r15
5053	and	r12,3
5054	mov	r13,r15
5055	and	r13,-4
5056	mov	r14,r9
5057	shrd	r15,r9,2
5058	shr	r9,2
5059	add	r15,r13
5060	adc	r9,r14
5061	add	r10,r15
5062	adc	r11,r9
5063	adc	r12,0
5064
5065	lea	r8,[16+r8]
5066	jmp	NEAR $L$open_avx2_tail_256_hash
5067$L$open_avx2_tail_256_done:
5068	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
5069	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
5070	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
5071	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
5072	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
5073	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
5074	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
5075	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
5076	vperm2i128	ymm3,ymm5,ymm1,0x02
5077	vperm2i128	ymm5,ymm5,ymm1,0x13
5078	vperm2i128	ymm1,ymm13,ymm9,0x02
5079	vperm2i128	ymm9,ymm13,ymm9,0x13
5080	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
5081	vpxor	ymm1,ymm1,YMMWORD[((32+0))+rsi]
5082	vpxor	ymm5,ymm5,YMMWORD[((64+0))+rsi]
5083	vpxor	ymm9,ymm9,YMMWORD[((96+0))+rsi]
5084	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
5085	vmovdqu	YMMWORD[(32+0)+rdi],ymm1
5086	vmovdqu	YMMWORD[(64+0)+rdi],ymm5
5087	vmovdqu	YMMWORD[(96+0)+rdi],ymm9
5088	vperm2i128	ymm3,ymm4,ymm0,0x13
5089	vperm2i128	ymm0,ymm4,ymm0,0x02
5090	vperm2i128	ymm4,ymm12,ymm8,0x02
5091	vperm2i128	ymm12,ymm12,ymm8,0x13
5092	vmovdqa	ymm8,ymm3
5093
5094	lea	rsi,[128+rsi]
5095	lea	rdi,[128+rdi]
5096	sub	rbx,4*32
5097	jmp	NEAR $L$open_avx2_tail_128_xor
5098
5099$L$open_avx2_tail_384:
5100	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
5101	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
5102	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
5103	vmovdqa	ymm1,ymm0
5104	vmovdqa	ymm5,ymm4
5105	vmovdqa	ymm9,ymm8
5106	vmovdqa	ymm2,ymm0
5107	vmovdqa	ymm6,ymm4
5108	vmovdqa	ymm10,ymm8
5109	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
5110	vpaddd	ymm14,ymm12,YMMWORD[((160+160))+rbp]
5111	vpaddd	ymm13,ymm12,ymm14
5112	vpaddd	ymm12,ymm12,ymm13
5113	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
5114	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
5115	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
5116
5117	mov	QWORD[((160+128))+rbp],rbx
5118	mov	rcx,rbx
5119	sub	rcx,8*32
5120	shr	rcx,4
5121	add	rcx,6
5122	mov	r8,10
5123	cmp	rcx,10
5124	cmovg	rcx,r8
5125	mov	rbx,rsi
5126	xor	r8,r8
5127$L$open_avx2_tail_384_rounds_and_x2hash:
5128	add	r10,QWORD[((0+0))+rbx]
5129	adc	r11,QWORD[((8+0))+rbx]
5130	adc	r12,1
5131	mov	rdx,QWORD[((0+160+0))+rbp]
5132	mov	r15,rdx
5133	mulx	r14,r13,r10
5134	mulx	rdx,rax,r11
5135	imul	r15,r12
5136	add	r14,rax
5137	adc	r15,rdx
5138	mov	rdx,QWORD[((8+160+0))+rbp]
5139	mulx	rax,r10,r10
5140	add	r14,r10
5141	mulx	r9,r11,r11
5142	adc	r15,r11
5143	adc	r9,0
5144	imul	rdx,r12
5145	add	r15,rax
5146	adc	r9,rdx
5147	mov	r10,r13
5148	mov	r11,r14
5149	mov	r12,r15
5150	and	r12,3
5151	mov	r13,r15
5152	and	r13,-4
5153	mov	r14,r9
5154	shrd	r15,r9,2
5155	shr	r9,2
5156	add	r15,r13
5157	adc	r9,r14
5158	add	r10,r15
5159	adc	r11,r9
5160	adc	r12,0
5161
5162	lea	rbx,[16+rbx]
5163$L$open_avx2_tail_384_rounds_and_x1hash:
5164	vpaddd	ymm2,ymm2,ymm6
5165	vpxor	ymm14,ymm14,ymm2
5166	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
5167	vpaddd	ymm10,ymm10,ymm14
5168	vpxor	ymm6,ymm6,ymm10
5169	vpsrld	ymm3,ymm6,20
5170	vpslld	ymm6,ymm6,12
5171	vpxor	ymm6,ymm6,ymm3
5172	vpaddd	ymm2,ymm2,ymm6
5173	vpxor	ymm14,ymm14,ymm2
5174	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
5175	vpaddd	ymm10,ymm10,ymm14
5176	vpxor	ymm6,ymm6,ymm10
5177	vpslld	ymm3,ymm6,7
5178	vpsrld	ymm6,ymm6,25
5179	vpxor	ymm6,ymm6,ymm3
5180	vpalignr	ymm14,ymm14,ymm14,12
5181	vpalignr	ymm10,ymm10,ymm10,8
5182	vpalignr	ymm6,ymm6,ymm6,4
5183	vpaddd	ymm1,ymm1,ymm5
5184	vpxor	ymm13,ymm13,ymm1
5185	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5186	vpaddd	ymm9,ymm9,ymm13
5187	vpxor	ymm5,ymm5,ymm9
5188	vpsrld	ymm3,ymm5,20
5189	vpslld	ymm5,ymm5,12
5190	vpxor	ymm5,ymm5,ymm3
5191	vpaddd	ymm1,ymm1,ymm5
5192	vpxor	ymm13,ymm13,ymm1
5193	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5194	vpaddd	ymm9,ymm9,ymm13
5195	vpxor	ymm5,ymm5,ymm9
5196	vpslld	ymm3,ymm5,7
5197	vpsrld	ymm5,ymm5,25
5198	vpxor	ymm5,ymm5,ymm3
5199	vpalignr	ymm13,ymm13,ymm13,12
5200	vpalignr	ymm9,ymm9,ymm9,8
5201	vpalignr	ymm5,ymm5,ymm5,4
5202	vpaddd	ymm0,ymm0,ymm4
5203	vpxor	ymm12,ymm12,ymm0
5204	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5205	vpaddd	ymm8,ymm8,ymm12
5206	vpxor	ymm4,ymm4,ymm8
5207	vpsrld	ymm3,ymm4,20
5208	vpslld	ymm4,ymm4,12
5209	vpxor	ymm4,ymm4,ymm3
5210	vpaddd	ymm0,ymm0,ymm4
5211	vpxor	ymm12,ymm12,ymm0
5212	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5213	vpaddd	ymm8,ymm8,ymm12
5214	vpxor	ymm4,ymm4,ymm8
5215	vpslld	ymm3,ymm4,7
5216	vpsrld	ymm4,ymm4,25
5217	vpxor	ymm4,ymm4,ymm3
5218	vpalignr	ymm12,ymm12,ymm12,12
5219	vpalignr	ymm8,ymm8,ymm8,8
5220	vpalignr	ymm4,ymm4,ymm4,4
5221	add	r10,QWORD[((0+0))+rbx]
5222	adc	r11,QWORD[((8+0))+rbx]
5223	adc	r12,1
5224	mov	rax,QWORD[((0+160+0))+rbp]
5225	mov	r15,rax
5226	mul	r10
5227	mov	r13,rax
5228	mov	r14,rdx
5229	mov	rax,QWORD[((0+160+0))+rbp]
5230	mul	r11
5231	imul	r15,r12
5232	add	r14,rax
5233	adc	r15,rdx
5234	mov	rax,QWORD[((8+160+0))+rbp]
5235	mov	r9,rax
5236	mul	r10
5237	add	r14,rax
5238	adc	rdx,0
5239	mov	r10,rdx
5240	mov	rax,QWORD[((8+160+0))+rbp]
5241	mul	r11
5242	add	r15,rax
5243	adc	rdx,0
5244	imul	r9,r12
5245	add	r15,r10
5246	adc	r9,rdx
5247	mov	r10,r13
5248	mov	r11,r14
5249	mov	r12,r15
5250	and	r12,3
5251	mov	r13,r15
5252	and	r13,-4
5253	mov	r14,r9
5254	shrd	r15,r9,2
5255	shr	r9,2
5256	add	r15,r13
5257	adc	r9,r14
5258	add	r10,r15
5259	adc	r11,r9
5260	adc	r12,0
5261
5262	lea	rbx,[16+rbx]
5263	inc	r8
5264	vpaddd	ymm2,ymm2,ymm6
5265	vpxor	ymm14,ymm14,ymm2
5266	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
5267	vpaddd	ymm10,ymm10,ymm14
5268	vpxor	ymm6,ymm6,ymm10
5269	vpsrld	ymm3,ymm6,20
5270	vpslld	ymm6,ymm6,12
5271	vpxor	ymm6,ymm6,ymm3
5272	vpaddd	ymm2,ymm2,ymm6
5273	vpxor	ymm14,ymm14,ymm2
5274	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
5275	vpaddd	ymm10,ymm10,ymm14
5276	vpxor	ymm6,ymm6,ymm10
5277	vpslld	ymm3,ymm6,7
5278	vpsrld	ymm6,ymm6,25
5279	vpxor	ymm6,ymm6,ymm3
5280	vpalignr	ymm14,ymm14,ymm14,4
5281	vpalignr	ymm10,ymm10,ymm10,8
5282	vpalignr	ymm6,ymm6,ymm6,12
5283	vpaddd	ymm1,ymm1,ymm5
5284	vpxor	ymm13,ymm13,ymm1
5285	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5286	vpaddd	ymm9,ymm9,ymm13
5287	vpxor	ymm5,ymm5,ymm9
5288	vpsrld	ymm3,ymm5,20
5289	vpslld	ymm5,ymm5,12
5290	vpxor	ymm5,ymm5,ymm3
5291	vpaddd	ymm1,ymm1,ymm5
5292	vpxor	ymm13,ymm13,ymm1
5293	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5294	vpaddd	ymm9,ymm9,ymm13
5295	vpxor	ymm5,ymm5,ymm9
5296	vpslld	ymm3,ymm5,7
5297	vpsrld	ymm5,ymm5,25
5298	vpxor	ymm5,ymm5,ymm3
5299	vpalignr	ymm13,ymm13,ymm13,4
5300	vpalignr	ymm9,ymm9,ymm9,8
5301	vpalignr	ymm5,ymm5,ymm5,12
5302	vpaddd	ymm0,ymm0,ymm4
5303	vpxor	ymm12,ymm12,ymm0
5304	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5305	vpaddd	ymm8,ymm8,ymm12
5306	vpxor	ymm4,ymm4,ymm8
5307	vpsrld	ymm3,ymm4,20
5308	vpslld	ymm4,ymm4,12
5309	vpxor	ymm4,ymm4,ymm3
5310	vpaddd	ymm0,ymm0,ymm4
5311	vpxor	ymm12,ymm12,ymm0
5312	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5313	vpaddd	ymm8,ymm8,ymm12
5314	vpxor	ymm4,ymm4,ymm8
5315	vpslld	ymm3,ymm4,7
5316	vpsrld	ymm4,ymm4,25
5317	vpxor	ymm4,ymm4,ymm3
5318	vpalignr	ymm12,ymm12,ymm12,4
5319	vpalignr	ymm8,ymm8,ymm8,8
5320	vpalignr	ymm4,ymm4,ymm4,12
5321
5322	cmp	r8,rcx
5323	jb	NEAR $L$open_avx2_tail_384_rounds_and_x2hash
5324	cmp	r8,10
5325	jne	NEAR $L$open_avx2_tail_384_rounds_and_x1hash
5326	mov	r8,rbx
5327	sub	rbx,rsi
5328	mov	rcx,rbx
5329	mov	rbx,QWORD[((160+128))+rbp]
5330$L$open_avx2_384_tail_hash:
5331	add	rcx,16
5332	cmp	rcx,rbx
5333	jg	NEAR $L$open_avx2_384_tail_done
5334	add	r10,QWORD[((0+0))+r8]
5335	adc	r11,QWORD[((8+0))+r8]
5336	adc	r12,1
5337	mov	rdx,QWORD[((0+160+0))+rbp]
5338	mov	r15,rdx
5339	mulx	r14,r13,r10
5340	mulx	rdx,rax,r11
5341	imul	r15,r12
5342	add	r14,rax
5343	adc	r15,rdx
5344	mov	rdx,QWORD[((8+160+0))+rbp]
5345	mulx	rax,r10,r10
5346	add	r14,r10
5347	mulx	r9,r11,r11
5348	adc	r15,r11
5349	adc	r9,0
5350	imul	rdx,r12
5351	add	r15,rax
5352	adc	r9,rdx
5353	mov	r10,r13
5354	mov	r11,r14
5355	mov	r12,r15
5356	and	r12,3
5357	mov	r13,r15
5358	and	r13,-4
5359	mov	r14,r9
5360	shrd	r15,r9,2
5361	shr	r9,2
5362	add	r15,r13
5363	adc	r9,r14
5364	add	r10,r15
5365	adc	r11,r9
5366	adc	r12,0
5367
5368	lea	r8,[16+r8]
5369	jmp	NEAR $L$open_avx2_384_tail_hash
5370$L$open_avx2_384_tail_done:
5371	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
5372	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
5373	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
5374	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
5375	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
5376	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
5377	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
5378	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
5379	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
5380	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
5381	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
5382	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
5383	vperm2i128	ymm3,ymm6,ymm2,0x02
5384	vperm2i128	ymm6,ymm6,ymm2,0x13
5385	vperm2i128	ymm2,ymm14,ymm10,0x02
5386	vperm2i128	ymm10,ymm14,ymm10,0x13
5387	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
5388	vpxor	ymm2,ymm2,YMMWORD[((32+0))+rsi]
5389	vpxor	ymm6,ymm6,YMMWORD[((64+0))+rsi]
5390	vpxor	ymm10,ymm10,YMMWORD[((96+0))+rsi]
5391	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
5392	vmovdqu	YMMWORD[(32+0)+rdi],ymm2
5393	vmovdqu	YMMWORD[(64+0)+rdi],ymm6
5394	vmovdqu	YMMWORD[(96+0)+rdi],ymm10
5395	vperm2i128	ymm3,ymm5,ymm1,0x02
5396	vperm2i128	ymm5,ymm5,ymm1,0x13
5397	vperm2i128	ymm1,ymm13,ymm9,0x02
5398	vperm2i128	ymm9,ymm13,ymm9,0x13
5399	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
5400	vpxor	ymm1,ymm1,YMMWORD[((32+128))+rsi]
5401	vpxor	ymm5,ymm5,YMMWORD[((64+128))+rsi]
5402	vpxor	ymm9,ymm9,YMMWORD[((96+128))+rsi]
5403	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
5404	vmovdqu	YMMWORD[(32+128)+rdi],ymm1
5405	vmovdqu	YMMWORD[(64+128)+rdi],ymm5
5406	vmovdqu	YMMWORD[(96+128)+rdi],ymm9
5407	vperm2i128	ymm3,ymm4,ymm0,0x13
5408	vperm2i128	ymm0,ymm4,ymm0,0x02
5409	vperm2i128	ymm4,ymm12,ymm8,0x02
5410	vperm2i128	ymm12,ymm12,ymm8,0x13
5411	vmovdqa	ymm8,ymm3
5412
5413	lea	rsi,[256+rsi]
5414	lea	rdi,[256+rdi]
5415	sub	rbx,8*32
5416	jmp	NEAR $L$open_avx2_tail_128_xor
5417
5418$L$open_avx2_tail_512:
5419	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
5420	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
5421	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
5422	vmovdqa	ymm1,ymm0
5423	vmovdqa	ymm5,ymm4
5424	vmovdqa	ymm9,ymm8
5425	vmovdqa	ymm2,ymm0
5426	vmovdqa	ymm6,ymm4
5427	vmovdqa	ymm10,ymm8
5428	vmovdqa	ymm3,ymm0
5429	vmovdqa	ymm7,ymm4
5430	vmovdqa	ymm11,ymm8
5431	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
5432	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
5433	vpaddd	ymm14,ymm12,ymm15
5434	vpaddd	ymm13,ymm12,ymm14
5435	vpaddd	ymm12,ymm12,ymm13
5436	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
5437	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
5438	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
5439	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
5440
5441	xor	rcx,rcx
5442	mov	r8,rsi
5443$L$open_avx2_tail_512_rounds_and_x2hash:
5444	add	r10,QWORD[((0+0))+r8]
5445	adc	r11,QWORD[((8+0))+r8]
5446	adc	r12,1
5447	mov	rax,QWORD[((0+160+0))+rbp]
5448	mov	r15,rax
5449	mul	r10
5450	mov	r13,rax
5451	mov	r14,rdx
5452	mov	rax,QWORD[((0+160+0))+rbp]
5453	mul	r11
5454	imul	r15,r12
5455	add	r14,rax
5456	adc	r15,rdx
5457	mov	rax,QWORD[((8+160+0))+rbp]
5458	mov	r9,rax
5459	mul	r10
5460	add	r14,rax
5461	adc	rdx,0
5462	mov	r10,rdx
5463	mov	rax,QWORD[((8+160+0))+rbp]
5464	mul	r11
5465	add	r15,rax
5466	adc	rdx,0
5467	imul	r9,r12
5468	add	r15,r10
5469	adc	r9,rdx
5470	mov	r10,r13
5471	mov	r11,r14
5472	mov	r12,r15
5473	and	r12,3
5474	mov	r13,r15
5475	and	r13,-4
5476	mov	r14,r9
5477	shrd	r15,r9,2
5478	shr	r9,2
5479	add	r15,r13
5480	adc	r9,r14
5481	add	r10,r15
5482	adc	r11,r9
5483	adc	r12,0
5484
5485	lea	r8,[16+r8]
5486$L$open_avx2_tail_512_rounds_and_x1hash:
5487	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5488	vmovdqa	ymm8,YMMWORD[$L$rol16]
5489	vpaddd	ymm3,ymm3,ymm7
5490	vpaddd	ymm2,ymm2,ymm6
5491	vpaddd	ymm1,ymm1,ymm5
5492	vpaddd	ymm0,ymm0,ymm4
5493	vpxor	ymm15,ymm15,ymm3
5494	vpxor	ymm14,ymm14,ymm2
5495	vpxor	ymm13,ymm13,ymm1
5496	vpxor	ymm12,ymm12,ymm0
5497	vpshufb	ymm15,ymm15,ymm8
5498	vpshufb	ymm14,ymm14,ymm8
5499	vpshufb	ymm13,ymm13,ymm8
5500	vpshufb	ymm12,ymm12,ymm8
5501	vpaddd	ymm11,ymm11,ymm15
5502	vpaddd	ymm10,ymm10,ymm14
5503	vpaddd	ymm9,ymm9,ymm13
5504	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5505	vpxor	ymm7,ymm7,ymm11
5506	vpxor	ymm6,ymm6,ymm10
5507	vpxor	ymm5,ymm5,ymm9
5508	vpxor	ymm4,ymm4,ymm8
5509	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5510	vpsrld	ymm8,ymm7,20
5511	vpslld	ymm7,ymm7,32-20
5512	vpxor	ymm7,ymm7,ymm8
5513	vpsrld	ymm8,ymm6,20
5514	vpslld	ymm6,ymm6,32-20
5515	vpxor	ymm6,ymm6,ymm8
5516	vpsrld	ymm8,ymm5,20
5517	vpslld	ymm5,ymm5,32-20
5518	vpxor	ymm5,ymm5,ymm8
5519	vpsrld	ymm8,ymm4,20
5520	vpslld	ymm4,ymm4,32-20
5521	vpxor	ymm4,ymm4,ymm8
5522	vmovdqa	ymm8,YMMWORD[$L$rol8]
5523	vpaddd	ymm3,ymm3,ymm7
5524	add	r10,QWORD[((0+0))+r8]
5525	adc	r11,QWORD[((8+0))+r8]
5526	adc	r12,1
5527	mov	rdx,QWORD[((0+160+0))+rbp]
5528	mov	r15,rdx
5529	mulx	r14,r13,r10
5530	mulx	rdx,rax,r11
5531	imul	r15,r12
5532	add	r14,rax
5533	adc	r15,rdx
5534	mov	rdx,QWORD[((8+160+0))+rbp]
5535	mulx	rax,r10,r10
5536	add	r14,r10
5537	mulx	r9,r11,r11
5538	adc	r15,r11
5539	adc	r9,0
5540	imul	rdx,r12
5541	add	r15,rax
5542	adc	r9,rdx
5543	mov	r10,r13
5544	mov	r11,r14
5545	mov	r12,r15
5546	and	r12,3
5547	mov	r13,r15
5548	and	r13,-4
5549	mov	r14,r9
5550	shrd	r15,r9,2
5551	shr	r9,2
5552	add	r15,r13
5553	adc	r9,r14
5554	add	r10,r15
5555	adc	r11,r9
5556	adc	r12,0
5557	vpaddd	ymm2,ymm2,ymm6
5558	vpaddd	ymm1,ymm1,ymm5
5559	vpaddd	ymm0,ymm0,ymm4
5560	vpxor	ymm15,ymm15,ymm3
5561	vpxor	ymm14,ymm14,ymm2
5562	vpxor	ymm13,ymm13,ymm1
5563	vpxor	ymm12,ymm12,ymm0
5564	vpshufb	ymm15,ymm15,ymm8
5565	vpshufb	ymm14,ymm14,ymm8
5566	vpshufb	ymm13,ymm13,ymm8
5567	vpshufb	ymm12,ymm12,ymm8
5568	vpaddd	ymm11,ymm11,ymm15
5569	vpaddd	ymm10,ymm10,ymm14
5570	vpaddd	ymm9,ymm9,ymm13
5571	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5572	vpxor	ymm7,ymm7,ymm11
5573	vpxor	ymm6,ymm6,ymm10
5574	vpxor	ymm5,ymm5,ymm9
5575	vpxor	ymm4,ymm4,ymm8
5576	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5577	vpsrld	ymm8,ymm7,25
5578	vpslld	ymm7,ymm7,32-25
5579	vpxor	ymm7,ymm7,ymm8
5580	vpsrld	ymm8,ymm6,25
5581	vpslld	ymm6,ymm6,32-25
5582	vpxor	ymm6,ymm6,ymm8
5583	vpsrld	ymm8,ymm5,25
5584	vpslld	ymm5,ymm5,32-25
5585	vpxor	ymm5,ymm5,ymm8
5586	vpsrld	ymm8,ymm4,25
5587	vpslld	ymm4,ymm4,32-25
5588	vpxor	ymm4,ymm4,ymm8
5589	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
5590	vpalignr	ymm7,ymm7,ymm7,4
5591	vpalignr	ymm11,ymm11,ymm11,8
5592	vpalignr	ymm15,ymm15,ymm15,12
5593	vpalignr	ymm6,ymm6,ymm6,4
5594	vpalignr	ymm10,ymm10,ymm10,8
5595	vpalignr	ymm14,ymm14,ymm14,12
5596	vpalignr	ymm5,ymm5,ymm5,4
5597	vpalignr	ymm9,ymm9,ymm9,8
5598	vpalignr	ymm13,ymm13,ymm13,12
5599	vpalignr	ymm4,ymm4,ymm4,4
5600	vpalignr	ymm8,ymm8,ymm8,8
5601	vpalignr	ymm12,ymm12,ymm12,12
5602	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5603	vmovdqa	ymm8,YMMWORD[$L$rol16]
5604	vpaddd	ymm3,ymm3,ymm7
5605	add	r10,QWORD[((0+16))+r8]
5606	adc	r11,QWORD[((8+16))+r8]
5607	adc	r12,1
5608	mov	rdx,QWORD[((0+160+0))+rbp]
5609	mov	r15,rdx
5610	mulx	r14,r13,r10
5611	mulx	rdx,rax,r11
5612	imul	r15,r12
5613	add	r14,rax
5614	adc	r15,rdx
5615	mov	rdx,QWORD[((8+160+0))+rbp]
5616	mulx	rax,r10,r10
5617	add	r14,r10
5618	mulx	r9,r11,r11
5619	adc	r15,r11
5620	adc	r9,0
5621	imul	rdx,r12
5622	add	r15,rax
5623	adc	r9,rdx
5624	mov	r10,r13
5625	mov	r11,r14
5626	mov	r12,r15
5627	and	r12,3
5628	mov	r13,r15
5629	and	r13,-4
5630	mov	r14,r9
5631	shrd	r15,r9,2
5632	shr	r9,2
5633	add	r15,r13
5634	adc	r9,r14
5635	add	r10,r15
5636	adc	r11,r9
5637	adc	r12,0
5638
5639	lea	r8,[32+r8]
5640	vpaddd	ymm2,ymm2,ymm6
5641	vpaddd	ymm1,ymm1,ymm5
5642	vpaddd	ymm0,ymm0,ymm4
5643	vpxor	ymm15,ymm15,ymm3
5644	vpxor	ymm14,ymm14,ymm2
5645	vpxor	ymm13,ymm13,ymm1
5646	vpxor	ymm12,ymm12,ymm0
5647	vpshufb	ymm15,ymm15,ymm8
5648	vpshufb	ymm14,ymm14,ymm8
5649	vpshufb	ymm13,ymm13,ymm8
5650	vpshufb	ymm12,ymm12,ymm8
5651	vpaddd	ymm11,ymm11,ymm15
5652	vpaddd	ymm10,ymm10,ymm14
5653	vpaddd	ymm9,ymm9,ymm13
5654	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5655	vpxor	ymm7,ymm7,ymm11
5656	vpxor	ymm6,ymm6,ymm10
5657	vpxor	ymm5,ymm5,ymm9
5658	vpxor	ymm4,ymm4,ymm8
5659	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5660	vpsrld	ymm8,ymm7,20
5661	vpslld	ymm7,ymm7,32-20
5662	vpxor	ymm7,ymm7,ymm8
5663	vpsrld	ymm8,ymm6,20
5664	vpslld	ymm6,ymm6,32-20
5665	vpxor	ymm6,ymm6,ymm8
5666	vpsrld	ymm8,ymm5,20
5667	vpslld	ymm5,ymm5,32-20
5668	vpxor	ymm5,ymm5,ymm8
5669	vpsrld	ymm8,ymm4,20
5670	vpslld	ymm4,ymm4,32-20
5671	vpxor	ymm4,ymm4,ymm8
5672	vmovdqa	ymm8,YMMWORD[$L$rol8]
5673	vpaddd	ymm3,ymm3,ymm7
5674	vpaddd	ymm2,ymm2,ymm6
5675	vpaddd	ymm1,ymm1,ymm5
5676	vpaddd	ymm0,ymm0,ymm4
5677	vpxor	ymm15,ymm15,ymm3
5678	vpxor	ymm14,ymm14,ymm2
5679	vpxor	ymm13,ymm13,ymm1
5680	vpxor	ymm12,ymm12,ymm0
5681	vpshufb	ymm15,ymm15,ymm8
5682	vpshufb	ymm14,ymm14,ymm8
5683	vpshufb	ymm13,ymm13,ymm8
5684	vpshufb	ymm12,ymm12,ymm8
5685	vpaddd	ymm11,ymm11,ymm15
5686	vpaddd	ymm10,ymm10,ymm14
5687	vpaddd	ymm9,ymm9,ymm13
5688	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5689	vpxor	ymm7,ymm7,ymm11
5690	vpxor	ymm6,ymm6,ymm10
5691	vpxor	ymm5,ymm5,ymm9
5692	vpxor	ymm4,ymm4,ymm8
5693	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5694	vpsrld	ymm8,ymm7,25
5695	vpslld	ymm7,ymm7,32-25
5696	vpxor	ymm7,ymm7,ymm8
5697	vpsrld	ymm8,ymm6,25
5698	vpslld	ymm6,ymm6,32-25
5699	vpxor	ymm6,ymm6,ymm8
5700	vpsrld	ymm8,ymm5,25
5701	vpslld	ymm5,ymm5,32-25
5702	vpxor	ymm5,ymm5,ymm8
5703	vpsrld	ymm8,ymm4,25
5704	vpslld	ymm4,ymm4,32-25
5705	vpxor	ymm4,ymm4,ymm8
5706	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
5707	vpalignr	ymm7,ymm7,ymm7,12
5708	vpalignr	ymm11,ymm11,ymm11,8
5709	vpalignr	ymm15,ymm15,ymm15,4
5710	vpalignr	ymm6,ymm6,ymm6,12
5711	vpalignr	ymm10,ymm10,ymm10,8
5712	vpalignr	ymm14,ymm14,ymm14,4
5713	vpalignr	ymm5,ymm5,ymm5,12
5714	vpalignr	ymm9,ymm9,ymm9,8
5715	vpalignr	ymm13,ymm13,ymm13,4
5716	vpalignr	ymm4,ymm4,ymm4,12
5717	vpalignr	ymm8,ymm8,ymm8,8
5718	vpalignr	ymm12,ymm12,ymm12,4
5719
5720	inc	rcx
5721	cmp	rcx,4
5722	jl	NEAR $L$open_avx2_tail_512_rounds_and_x2hash
5723	cmp	rcx,10
5724	jne	NEAR $L$open_avx2_tail_512_rounds_and_x1hash
5725	mov	rcx,rbx
5726	sub	rcx,12*32
5727	and	rcx,-16
5728$L$open_avx2_tail_512_hash:
5729	test	rcx,rcx
5730	je	NEAR $L$open_avx2_tail_512_done
5731	add	r10,QWORD[((0+0))+r8]
5732	adc	r11,QWORD[((8+0))+r8]
5733	adc	r12,1
5734	mov	rdx,QWORD[((0+160+0))+rbp]
5735	mov	r15,rdx
5736	mulx	r14,r13,r10
5737	mulx	rdx,rax,r11
5738	imul	r15,r12
5739	add	r14,rax
5740	adc	r15,rdx
5741	mov	rdx,QWORD[((8+160+0))+rbp]
5742	mulx	rax,r10,r10
5743	add	r14,r10
5744	mulx	r9,r11,r11
5745	adc	r15,r11
5746	adc	r9,0
5747	imul	rdx,r12
5748	add	r15,rax
5749	adc	r9,rdx
5750	mov	r10,r13
5751	mov	r11,r14
5752	mov	r12,r15
5753	and	r12,3
5754	mov	r13,r15
5755	and	r13,-4
5756	mov	r14,r9
5757	shrd	r15,r9,2
5758	shr	r9,2
5759	add	r15,r13
5760	adc	r9,r14
5761	add	r10,r15
5762	adc	r11,r9
5763	adc	r12,0
5764
5765	lea	r8,[16+r8]
5766	sub	rcx,2*8
5767	jmp	NEAR $L$open_avx2_tail_512_hash
5768$L$open_avx2_tail_512_done:
5769	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
5770	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
5771	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
5772	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
5773	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
5774	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
5775	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
5776	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
5777	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
5778	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
5779	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
5780	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
5781	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
5782	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
5783	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
5784	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
5785
5786	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
5787	vperm2i128	ymm0,ymm7,ymm3,0x02
5788	vperm2i128	ymm7,ymm7,ymm3,0x13
5789	vperm2i128	ymm3,ymm15,ymm11,0x02
5790	vperm2i128	ymm11,ymm15,ymm11,0x13
5791	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
5792	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
5793	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
5794	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
5795	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
5796	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
5797	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
5798	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
5799
5800	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
5801	vperm2i128	ymm3,ymm6,ymm2,0x02
5802	vperm2i128	ymm6,ymm6,ymm2,0x13
5803	vperm2i128	ymm2,ymm14,ymm10,0x02
5804	vperm2i128	ymm10,ymm14,ymm10,0x13
5805	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
5806	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
5807	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
5808	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
5809	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
5810	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
5811	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
5812	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
5813	vperm2i128	ymm3,ymm5,ymm1,0x02
5814	vperm2i128	ymm5,ymm5,ymm1,0x13
5815	vperm2i128	ymm1,ymm13,ymm9,0x02
5816	vperm2i128	ymm9,ymm13,ymm9,0x13
5817	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
5818	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
5819	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
5820	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
5821	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
5822	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
5823	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
5824	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
5825	vperm2i128	ymm3,ymm4,ymm0,0x13
5826	vperm2i128	ymm0,ymm4,ymm0,0x02
5827	vperm2i128	ymm4,ymm12,ymm8,0x02
5828	vperm2i128	ymm12,ymm12,ymm8,0x13
5829	vmovdqa	ymm8,ymm3
5830
5831	lea	rsi,[384+rsi]
5832	lea	rdi,[384+rdi]
5833	sub	rbx,12*32
5834$L$open_avx2_tail_128_xor:
5835	cmp	rbx,32
5836	jb	NEAR $L$open_avx2_tail_32_xor
5837	sub	rbx,32
5838	vpxor	ymm0,ymm0,YMMWORD[rsi]
5839	vmovdqu	YMMWORD[rdi],ymm0
5840	lea	rsi,[32+rsi]
5841	lea	rdi,[32+rdi]
5842	vmovdqa	ymm0,ymm4
5843	vmovdqa	ymm4,ymm8
5844	vmovdqa	ymm8,ymm12
5845	jmp	NEAR $L$open_avx2_tail_128_xor
5846$L$open_avx2_tail_32_xor:
5847	cmp	rbx,16
5848	vmovdqa	xmm1,xmm0
5849	jb	NEAR $L$open_avx2_exit
5850	sub	rbx,16
5851
5852	vpxor	xmm1,xmm0,XMMWORD[rsi]
5853	vmovdqu	XMMWORD[rdi],xmm1
5854	lea	rsi,[16+rsi]
5855	lea	rdi,[16+rdi]
5856	vperm2i128	ymm0,ymm0,ymm0,0x11
5857	vmovdqa	xmm1,xmm0
5858$L$open_avx2_exit:
5859	vzeroupper
5860	jmp	NEAR $L$open_sse_tail_16
5861
5862$L$open_avx2_192:
5863	vmovdqa	ymm1,ymm0
5864	vmovdqa	ymm2,ymm0
5865	vmovdqa	ymm5,ymm4
5866	vmovdqa	ymm6,ymm4
5867	vmovdqa	ymm9,ymm8
5868	vmovdqa	ymm10,ymm8
5869	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
5870	vmovdqa	ymm11,ymm12
5871	vmovdqa	ymm15,ymm13
5872	mov	r10,10
5873$L$open_avx2_192_rounds:
5874	vpaddd	ymm0,ymm0,ymm4
5875	vpxor	ymm12,ymm12,ymm0
5876	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5877	vpaddd	ymm8,ymm8,ymm12
5878	vpxor	ymm4,ymm4,ymm8
5879	vpsrld	ymm3,ymm4,20
5880	vpslld	ymm4,ymm4,12
5881	vpxor	ymm4,ymm4,ymm3
5882	vpaddd	ymm0,ymm0,ymm4
5883	vpxor	ymm12,ymm12,ymm0
5884	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5885	vpaddd	ymm8,ymm8,ymm12
5886	vpxor	ymm4,ymm4,ymm8
5887	vpslld	ymm3,ymm4,7
5888	vpsrld	ymm4,ymm4,25
5889	vpxor	ymm4,ymm4,ymm3
5890	vpalignr	ymm12,ymm12,ymm12,12
5891	vpalignr	ymm8,ymm8,ymm8,8
5892	vpalignr	ymm4,ymm4,ymm4,4
5893	vpaddd	ymm1,ymm1,ymm5
5894	vpxor	ymm13,ymm13,ymm1
5895	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5896	vpaddd	ymm9,ymm9,ymm13
5897	vpxor	ymm5,ymm5,ymm9
5898	vpsrld	ymm3,ymm5,20
5899	vpslld	ymm5,ymm5,12
5900	vpxor	ymm5,ymm5,ymm3
5901	vpaddd	ymm1,ymm1,ymm5
5902	vpxor	ymm13,ymm13,ymm1
5903	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5904	vpaddd	ymm9,ymm9,ymm13
5905	vpxor	ymm5,ymm5,ymm9
5906	vpslld	ymm3,ymm5,7
5907	vpsrld	ymm5,ymm5,25
5908	vpxor	ymm5,ymm5,ymm3
5909	vpalignr	ymm13,ymm13,ymm13,12
5910	vpalignr	ymm9,ymm9,ymm9,8
5911	vpalignr	ymm5,ymm5,ymm5,4
5912	vpaddd	ymm0,ymm0,ymm4
5913	vpxor	ymm12,ymm12,ymm0
5914	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5915	vpaddd	ymm8,ymm8,ymm12
5916	vpxor	ymm4,ymm4,ymm8
5917	vpsrld	ymm3,ymm4,20
5918	vpslld	ymm4,ymm4,12
5919	vpxor	ymm4,ymm4,ymm3
5920	vpaddd	ymm0,ymm0,ymm4
5921	vpxor	ymm12,ymm12,ymm0
5922	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5923	vpaddd	ymm8,ymm8,ymm12
5924	vpxor	ymm4,ymm4,ymm8
5925	vpslld	ymm3,ymm4,7
5926	vpsrld	ymm4,ymm4,25
5927	vpxor	ymm4,ymm4,ymm3
5928	vpalignr	ymm12,ymm12,ymm12,4
5929	vpalignr	ymm8,ymm8,ymm8,8
5930	vpalignr	ymm4,ymm4,ymm4,12
5931	vpaddd	ymm1,ymm1,ymm5
5932	vpxor	ymm13,ymm13,ymm1
5933	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5934	vpaddd	ymm9,ymm9,ymm13
5935	vpxor	ymm5,ymm5,ymm9
5936	vpsrld	ymm3,ymm5,20
5937	vpslld	ymm5,ymm5,12
5938	vpxor	ymm5,ymm5,ymm3
5939	vpaddd	ymm1,ymm1,ymm5
5940	vpxor	ymm13,ymm13,ymm1
5941	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5942	vpaddd	ymm9,ymm9,ymm13
5943	vpxor	ymm5,ymm5,ymm9
5944	vpslld	ymm3,ymm5,7
5945	vpsrld	ymm5,ymm5,25
5946	vpxor	ymm5,ymm5,ymm3
5947	vpalignr	ymm13,ymm13,ymm13,4
5948	vpalignr	ymm9,ymm9,ymm9,8
5949	vpalignr	ymm5,ymm5,ymm5,12
5950
5951	dec	r10
5952	jne	NEAR $L$open_avx2_192_rounds
5953	vpaddd	ymm0,ymm0,ymm2
5954	vpaddd	ymm1,ymm1,ymm2
5955	vpaddd	ymm4,ymm4,ymm6
5956	vpaddd	ymm5,ymm5,ymm6
5957	vpaddd	ymm8,ymm8,ymm10
5958	vpaddd	ymm9,ymm9,ymm10
5959	vpaddd	ymm12,ymm12,ymm11
5960	vpaddd	ymm13,ymm13,ymm15
5961	vperm2i128	ymm3,ymm4,ymm0,0x02
5962
5963	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
5964	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
5965
5966	vperm2i128	ymm0,ymm4,ymm0,0x13
5967	vperm2i128	ymm4,ymm12,ymm8,0x13
5968	vperm2i128	ymm8,ymm5,ymm1,0x02
5969	vperm2i128	ymm12,ymm13,ymm9,0x02
5970	vperm2i128	ymm1,ymm5,ymm1,0x13
5971	vperm2i128	ymm5,ymm13,ymm9,0x13
5972$L$open_avx2_short:
5973	mov	r8,r8
5974	call	poly_hash_ad_internal
5975$L$open_avx2_short_hash_and_xor_loop:
5976	cmp	rbx,32
5977	jb	NEAR $L$open_avx2_short_tail_32
5978	sub	rbx,32
5979	add	r10,QWORD[((0+0))+rsi]
5980	adc	r11,QWORD[((8+0))+rsi]
5981	adc	r12,1
5982	mov	rax,QWORD[((0+160+0))+rbp]
5983	mov	r15,rax
5984	mul	r10
5985	mov	r13,rax
5986	mov	r14,rdx
5987	mov	rax,QWORD[((0+160+0))+rbp]
5988	mul	r11
5989	imul	r15,r12
5990	add	r14,rax
5991	adc	r15,rdx
5992	mov	rax,QWORD[((8+160+0))+rbp]
5993	mov	r9,rax
5994	mul	r10
5995	add	r14,rax
5996	adc	rdx,0
5997	mov	r10,rdx
5998	mov	rax,QWORD[((8+160+0))+rbp]
5999	mul	r11
6000	add	r15,rax
6001	adc	rdx,0
6002	imul	r9,r12
6003	add	r15,r10
6004	adc	r9,rdx
6005	mov	r10,r13
6006	mov	r11,r14
6007	mov	r12,r15
6008	and	r12,3
6009	mov	r13,r15
6010	and	r13,-4
6011	mov	r14,r9
6012	shrd	r15,r9,2
6013	shr	r9,2
6014	add	r15,r13
6015	adc	r9,r14
6016	add	r10,r15
6017	adc	r11,r9
6018	adc	r12,0
6019	add	r10,QWORD[((0+16))+rsi]
6020	adc	r11,QWORD[((8+16))+rsi]
6021	adc	r12,1
6022	mov	rax,QWORD[((0+160+0))+rbp]
6023	mov	r15,rax
6024	mul	r10
6025	mov	r13,rax
6026	mov	r14,rdx
6027	mov	rax,QWORD[((0+160+0))+rbp]
6028	mul	r11
6029	imul	r15,r12
6030	add	r14,rax
6031	adc	r15,rdx
6032	mov	rax,QWORD[((8+160+0))+rbp]
6033	mov	r9,rax
6034	mul	r10
6035	add	r14,rax
6036	adc	rdx,0
6037	mov	r10,rdx
6038	mov	rax,QWORD[((8+160+0))+rbp]
6039	mul	r11
6040	add	r15,rax
6041	adc	rdx,0
6042	imul	r9,r12
6043	add	r15,r10
6044	adc	r9,rdx
6045	mov	r10,r13
6046	mov	r11,r14
6047	mov	r12,r15
6048	and	r12,3
6049	mov	r13,r15
6050	and	r13,-4
6051	mov	r14,r9
6052	shrd	r15,r9,2
6053	shr	r9,2
6054	add	r15,r13
6055	adc	r9,r14
6056	add	r10,r15
6057	adc	r11,r9
6058	adc	r12,0
6059
6060
6061	vpxor	ymm0,ymm0,YMMWORD[rsi]
6062	vmovdqu	YMMWORD[rdi],ymm0
6063	lea	rsi,[32+rsi]
6064	lea	rdi,[32+rdi]
6065
6066	vmovdqa	ymm0,ymm4
6067	vmovdqa	ymm4,ymm8
6068	vmovdqa	ymm8,ymm12
6069	vmovdqa	ymm12,ymm1
6070	vmovdqa	ymm1,ymm5
6071	vmovdqa	ymm5,ymm9
6072	vmovdqa	ymm9,ymm13
6073	vmovdqa	ymm13,ymm2
6074	vmovdqa	ymm2,ymm6
6075	jmp	NEAR $L$open_avx2_short_hash_and_xor_loop
6076$L$open_avx2_short_tail_32:
6077	cmp	rbx,16
6078	vmovdqa	xmm1,xmm0
6079	jb	NEAR $L$open_avx2_short_tail_32_exit
6080	sub	rbx,16
6081	add	r10,QWORD[((0+0))+rsi]
6082	adc	r11,QWORD[((8+0))+rsi]
6083	adc	r12,1
6084	mov	rax,QWORD[((0+160+0))+rbp]
6085	mov	r15,rax
6086	mul	r10
6087	mov	r13,rax
6088	mov	r14,rdx
6089	mov	rax,QWORD[((0+160+0))+rbp]
6090	mul	r11
6091	imul	r15,r12
6092	add	r14,rax
6093	adc	r15,rdx
6094	mov	rax,QWORD[((8+160+0))+rbp]
6095	mov	r9,rax
6096	mul	r10
6097	add	r14,rax
6098	adc	rdx,0
6099	mov	r10,rdx
6100	mov	rax,QWORD[((8+160+0))+rbp]
6101	mul	r11
6102	add	r15,rax
6103	adc	rdx,0
6104	imul	r9,r12
6105	add	r15,r10
6106	adc	r9,rdx
6107	mov	r10,r13
6108	mov	r11,r14
6109	mov	r12,r15
6110	and	r12,3
6111	mov	r13,r15
6112	and	r13,-4
6113	mov	r14,r9
6114	shrd	r15,r9,2
6115	shr	r9,2
6116	add	r15,r13
6117	adc	r9,r14
6118	add	r10,r15
6119	adc	r11,r9
6120	adc	r12,0
6121
6122	vpxor	xmm3,xmm0,XMMWORD[rsi]
6123	vmovdqu	XMMWORD[rdi],xmm3
6124	lea	rsi,[16+rsi]
6125	lea	rdi,[16+rdi]
6126	vextracti128	xmm1,ymm0,1
6127$L$open_avx2_short_tail_32_exit:
6128	vzeroupper
6129	jmp	NEAR $L$open_sse_tail_16
6130
6131$L$open_avx2_320:
6132	vmovdqa	ymm1,ymm0
6133	vmovdqa	ymm2,ymm0
6134	vmovdqa	ymm5,ymm4
6135	vmovdqa	ymm6,ymm4
6136	vmovdqa	ymm9,ymm8
6137	vmovdqa	ymm10,ymm8
6138	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
6139	vpaddd	ymm14,ymm13,YMMWORD[$L$avx2_inc]
6140	vmovdqa	ymm7,ymm4
6141	vmovdqa	ymm11,ymm8
6142	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6143	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6144	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6145	mov	r10,10
6146$L$open_avx2_320_rounds:
6147	vpaddd	ymm0,ymm0,ymm4
6148	vpxor	ymm12,ymm12,ymm0
6149	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
6150	vpaddd	ymm8,ymm8,ymm12
6151	vpxor	ymm4,ymm4,ymm8
6152	vpsrld	ymm3,ymm4,20
6153	vpslld	ymm4,ymm4,12
6154	vpxor	ymm4,ymm4,ymm3
6155	vpaddd	ymm0,ymm0,ymm4
6156	vpxor	ymm12,ymm12,ymm0
6157	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
6158	vpaddd	ymm8,ymm8,ymm12
6159	vpxor	ymm4,ymm4,ymm8
6160	vpslld	ymm3,ymm4,7
6161	vpsrld	ymm4,ymm4,25
6162	vpxor	ymm4,ymm4,ymm3
6163	vpalignr	ymm12,ymm12,ymm12,12
6164	vpalignr	ymm8,ymm8,ymm8,8
6165	vpalignr	ymm4,ymm4,ymm4,4
6166	vpaddd	ymm1,ymm1,ymm5
6167	vpxor	ymm13,ymm13,ymm1
6168	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
6169	vpaddd	ymm9,ymm9,ymm13
6170	vpxor	ymm5,ymm5,ymm9
6171	vpsrld	ymm3,ymm5,20
6172	vpslld	ymm5,ymm5,12
6173	vpxor	ymm5,ymm5,ymm3
6174	vpaddd	ymm1,ymm1,ymm5
6175	vpxor	ymm13,ymm13,ymm1
6176	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
6177	vpaddd	ymm9,ymm9,ymm13
6178	vpxor	ymm5,ymm5,ymm9
6179	vpslld	ymm3,ymm5,7
6180	vpsrld	ymm5,ymm5,25
6181	vpxor	ymm5,ymm5,ymm3
6182	vpalignr	ymm13,ymm13,ymm13,12
6183	vpalignr	ymm9,ymm9,ymm9,8
6184	vpalignr	ymm5,ymm5,ymm5,4
6185	vpaddd	ymm2,ymm2,ymm6
6186	vpxor	ymm14,ymm14,ymm2
6187	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
6188	vpaddd	ymm10,ymm10,ymm14
6189	vpxor	ymm6,ymm6,ymm10
6190	vpsrld	ymm3,ymm6,20
6191	vpslld	ymm6,ymm6,12
6192	vpxor	ymm6,ymm6,ymm3
6193	vpaddd	ymm2,ymm2,ymm6
6194	vpxor	ymm14,ymm14,ymm2
6195	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
6196	vpaddd	ymm10,ymm10,ymm14
6197	vpxor	ymm6,ymm6,ymm10
6198	vpslld	ymm3,ymm6,7
6199	vpsrld	ymm6,ymm6,25
6200	vpxor	ymm6,ymm6,ymm3
6201	vpalignr	ymm14,ymm14,ymm14,12
6202	vpalignr	ymm10,ymm10,ymm10,8
6203	vpalignr	ymm6,ymm6,ymm6,4
6204	vpaddd	ymm0,ymm0,ymm4
6205	vpxor	ymm12,ymm12,ymm0
6206	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
6207	vpaddd	ymm8,ymm8,ymm12
6208	vpxor	ymm4,ymm4,ymm8
6209	vpsrld	ymm3,ymm4,20
6210	vpslld	ymm4,ymm4,12
6211	vpxor	ymm4,ymm4,ymm3
6212	vpaddd	ymm0,ymm0,ymm4
6213	vpxor	ymm12,ymm12,ymm0
6214	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
6215	vpaddd	ymm8,ymm8,ymm12
6216	vpxor	ymm4,ymm4,ymm8
6217	vpslld	ymm3,ymm4,7
6218	vpsrld	ymm4,ymm4,25
6219	vpxor	ymm4,ymm4,ymm3
6220	vpalignr	ymm12,ymm12,ymm12,4
6221	vpalignr	ymm8,ymm8,ymm8,8
6222	vpalignr	ymm4,ymm4,ymm4,12
6223	vpaddd	ymm1,ymm1,ymm5
6224	vpxor	ymm13,ymm13,ymm1
6225	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
6226	vpaddd	ymm9,ymm9,ymm13
6227	vpxor	ymm5,ymm5,ymm9
6228	vpsrld	ymm3,ymm5,20
6229	vpslld	ymm5,ymm5,12
6230	vpxor	ymm5,ymm5,ymm3
6231	vpaddd	ymm1,ymm1,ymm5
6232	vpxor	ymm13,ymm13,ymm1
6233	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
6234	vpaddd	ymm9,ymm9,ymm13
6235	vpxor	ymm5,ymm5,ymm9
6236	vpslld	ymm3,ymm5,7
6237	vpsrld	ymm5,ymm5,25
6238	vpxor	ymm5,ymm5,ymm3
6239	vpalignr	ymm13,ymm13,ymm13,4
6240	vpalignr	ymm9,ymm9,ymm9,8
6241	vpalignr	ymm5,ymm5,ymm5,12
6242	vpaddd	ymm2,ymm2,ymm6
6243	vpxor	ymm14,ymm14,ymm2
6244	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
6245	vpaddd	ymm10,ymm10,ymm14
6246	vpxor	ymm6,ymm6,ymm10
6247	vpsrld	ymm3,ymm6,20
6248	vpslld	ymm6,ymm6,12
6249	vpxor	ymm6,ymm6,ymm3
6250	vpaddd	ymm2,ymm2,ymm6
6251	vpxor	ymm14,ymm14,ymm2
6252	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
6253	vpaddd	ymm10,ymm10,ymm14
6254	vpxor	ymm6,ymm6,ymm10
6255	vpslld	ymm3,ymm6,7
6256	vpsrld	ymm6,ymm6,25
6257	vpxor	ymm6,ymm6,ymm3
6258	vpalignr	ymm14,ymm14,ymm14,4
6259	vpalignr	ymm10,ymm10,ymm10,8
6260	vpalignr	ymm6,ymm6,ymm6,12
6261
6262	dec	r10
6263	jne	NEAR $L$open_avx2_320_rounds
6264	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
6265	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
6266	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
6267	vpaddd	ymm4,ymm4,ymm7
6268	vpaddd	ymm5,ymm5,ymm7
6269	vpaddd	ymm6,ymm6,ymm7
6270	vpaddd	ymm8,ymm8,ymm11
6271	vpaddd	ymm9,ymm9,ymm11
6272	vpaddd	ymm10,ymm10,ymm11
6273	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
6274	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
6275	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
6276	vperm2i128	ymm3,ymm4,ymm0,0x02
6277
6278	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
6279	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
6280
6281	vperm2i128	ymm0,ymm4,ymm0,0x13
6282	vperm2i128	ymm4,ymm12,ymm8,0x13
6283	vperm2i128	ymm8,ymm5,ymm1,0x02
6284	vperm2i128	ymm12,ymm13,ymm9,0x02
6285	vperm2i128	ymm1,ymm5,ymm1,0x13
6286	vperm2i128	ymm5,ymm13,ymm9,0x13
6287	vperm2i128	ymm9,ymm6,ymm2,0x02
6288	vperm2i128	ymm13,ymm14,ymm10,0x02
6289	vperm2i128	ymm2,ymm6,ymm2,0x13
6290	vperm2i128	ymm6,ymm14,ymm10,0x13
6291	jmp	NEAR $L$open_avx2_short
6292
6293
6294
6295
6296
6297ALIGN	64
6298chacha20_poly1305_seal_avx2:
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311	vzeroupper
6312	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
6313	vbroadcasti128	ymm4,XMMWORD[r9]
6314	vbroadcasti128	ymm8,XMMWORD[16+r9]
6315	vbroadcasti128	ymm12,XMMWORD[32+r9]
6316	vpaddd	ymm12,ymm12,YMMWORD[$L$avx2_init]
6317	cmp	rbx,6*32
6318	jbe	NEAR $L$seal_avx2_192
6319	cmp	rbx,10*32
6320	jbe	NEAR $L$seal_avx2_320
6321	vmovdqa	ymm1,ymm0
6322	vmovdqa	ymm2,ymm0
6323	vmovdqa	ymm3,ymm0
6324	vmovdqa	ymm5,ymm4
6325	vmovdqa	ymm6,ymm4
6326	vmovdqa	ymm7,ymm4
6327	vmovdqa	YMMWORD[(160+64)+rbp],ymm4
6328	vmovdqa	ymm9,ymm8
6329	vmovdqa	ymm10,ymm8
6330	vmovdqa	ymm11,ymm8
6331	vmovdqa	YMMWORD[(160+96)+rbp],ymm8
6332	vmovdqa	ymm15,ymm12
6333	vpaddd	ymm14,ymm15,YMMWORD[$L$avx2_inc]
6334	vpaddd	ymm13,ymm14,YMMWORD[$L$avx2_inc]
6335	vpaddd	ymm12,ymm13,YMMWORD[$L$avx2_inc]
6336	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6337	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6338	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6339	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
6340	mov	r10,10
6341$L$seal_avx2_init_rounds:
6342	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6343	vmovdqa	ymm8,YMMWORD[$L$rol16]
6344	vpaddd	ymm3,ymm3,ymm7
6345	vpaddd	ymm2,ymm2,ymm6
6346	vpaddd	ymm1,ymm1,ymm5
6347	vpaddd	ymm0,ymm0,ymm4
6348	vpxor	ymm15,ymm15,ymm3
6349	vpxor	ymm14,ymm14,ymm2
6350	vpxor	ymm13,ymm13,ymm1
6351	vpxor	ymm12,ymm12,ymm0
6352	vpshufb	ymm15,ymm15,ymm8
6353	vpshufb	ymm14,ymm14,ymm8
6354	vpshufb	ymm13,ymm13,ymm8
6355	vpshufb	ymm12,ymm12,ymm8
6356	vpaddd	ymm11,ymm11,ymm15
6357	vpaddd	ymm10,ymm10,ymm14
6358	vpaddd	ymm9,ymm9,ymm13
6359	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6360	vpxor	ymm7,ymm7,ymm11
6361	vpxor	ymm6,ymm6,ymm10
6362	vpxor	ymm5,ymm5,ymm9
6363	vpxor	ymm4,ymm4,ymm8
6364	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6365	vpsrld	ymm8,ymm7,20
6366	vpslld	ymm7,ymm7,32-20
6367	vpxor	ymm7,ymm7,ymm8
6368	vpsrld	ymm8,ymm6,20
6369	vpslld	ymm6,ymm6,32-20
6370	vpxor	ymm6,ymm6,ymm8
6371	vpsrld	ymm8,ymm5,20
6372	vpslld	ymm5,ymm5,32-20
6373	vpxor	ymm5,ymm5,ymm8
6374	vpsrld	ymm8,ymm4,20
6375	vpslld	ymm4,ymm4,32-20
6376	vpxor	ymm4,ymm4,ymm8
6377	vmovdqa	ymm8,YMMWORD[$L$rol8]
6378	vpaddd	ymm3,ymm3,ymm7
6379	vpaddd	ymm2,ymm2,ymm6
6380	vpaddd	ymm1,ymm1,ymm5
6381	vpaddd	ymm0,ymm0,ymm4
6382	vpxor	ymm15,ymm15,ymm3
6383	vpxor	ymm14,ymm14,ymm2
6384	vpxor	ymm13,ymm13,ymm1
6385	vpxor	ymm12,ymm12,ymm0
6386	vpshufb	ymm15,ymm15,ymm8
6387	vpshufb	ymm14,ymm14,ymm8
6388	vpshufb	ymm13,ymm13,ymm8
6389	vpshufb	ymm12,ymm12,ymm8
6390	vpaddd	ymm11,ymm11,ymm15
6391	vpaddd	ymm10,ymm10,ymm14
6392	vpaddd	ymm9,ymm9,ymm13
6393	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6394	vpxor	ymm7,ymm7,ymm11
6395	vpxor	ymm6,ymm6,ymm10
6396	vpxor	ymm5,ymm5,ymm9
6397	vpxor	ymm4,ymm4,ymm8
6398	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6399	vpsrld	ymm8,ymm7,25
6400	vpslld	ymm7,ymm7,32-25
6401	vpxor	ymm7,ymm7,ymm8
6402	vpsrld	ymm8,ymm6,25
6403	vpslld	ymm6,ymm6,32-25
6404	vpxor	ymm6,ymm6,ymm8
6405	vpsrld	ymm8,ymm5,25
6406	vpslld	ymm5,ymm5,32-25
6407	vpxor	ymm5,ymm5,ymm8
6408	vpsrld	ymm8,ymm4,25
6409	vpslld	ymm4,ymm4,32-25
6410	vpxor	ymm4,ymm4,ymm8
6411	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6412	vpalignr	ymm7,ymm7,ymm7,4
6413	vpalignr	ymm11,ymm11,ymm11,8
6414	vpalignr	ymm15,ymm15,ymm15,12
6415	vpalignr	ymm6,ymm6,ymm6,4
6416	vpalignr	ymm10,ymm10,ymm10,8
6417	vpalignr	ymm14,ymm14,ymm14,12
6418	vpalignr	ymm5,ymm5,ymm5,4
6419	vpalignr	ymm9,ymm9,ymm9,8
6420	vpalignr	ymm13,ymm13,ymm13,12
6421	vpalignr	ymm4,ymm4,ymm4,4
6422	vpalignr	ymm8,ymm8,ymm8,8
6423	vpalignr	ymm12,ymm12,ymm12,12
6424	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6425	vmovdqa	ymm8,YMMWORD[$L$rol16]
6426	vpaddd	ymm3,ymm3,ymm7
6427	vpaddd	ymm2,ymm2,ymm6
6428	vpaddd	ymm1,ymm1,ymm5
6429	vpaddd	ymm0,ymm0,ymm4
6430	vpxor	ymm15,ymm15,ymm3
6431	vpxor	ymm14,ymm14,ymm2
6432	vpxor	ymm13,ymm13,ymm1
6433	vpxor	ymm12,ymm12,ymm0
6434	vpshufb	ymm15,ymm15,ymm8
6435	vpshufb	ymm14,ymm14,ymm8
6436	vpshufb	ymm13,ymm13,ymm8
6437	vpshufb	ymm12,ymm12,ymm8
6438	vpaddd	ymm11,ymm11,ymm15
6439	vpaddd	ymm10,ymm10,ymm14
6440	vpaddd	ymm9,ymm9,ymm13
6441	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6442	vpxor	ymm7,ymm7,ymm11
6443	vpxor	ymm6,ymm6,ymm10
6444	vpxor	ymm5,ymm5,ymm9
6445	vpxor	ymm4,ymm4,ymm8
6446	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6447	vpsrld	ymm8,ymm7,20
6448	vpslld	ymm7,ymm7,32-20
6449	vpxor	ymm7,ymm7,ymm8
6450	vpsrld	ymm8,ymm6,20
6451	vpslld	ymm6,ymm6,32-20
6452	vpxor	ymm6,ymm6,ymm8
6453	vpsrld	ymm8,ymm5,20
6454	vpslld	ymm5,ymm5,32-20
6455	vpxor	ymm5,ymm5,ymm8
6456	vpsrld	ymm8,ymm4,20
6457	vpslld	ymm4,ymm4,32-20
6458	vpxor	ymm4,ymm4,ymm8
6459	vmovdqa	ymm8,YMMWORD[$L$rol8]
6460	vpaddd	ymm3,ymm3,ymm7
6461	vpaddd	ymm2,ymm2,ymm6
6462	vpaddd	ymm1,ymm1,ymm5
6463	vpaddd	ymm0,ymm0,ymm4
6464	vpxor	ymm15,ymm15,ymm3
6465	vpxor	ymm14,ymm14,ymm2
6466	vpxor	ymm13,ymm13,ymm1
6467	vpxor	ymm12,ymm12,ymm0
6468	vpshufb	ymm15,ymm15,ymm8
6469	vpshufb	ymm14,ymm14,ymm8
6470	vpshufb	ymm13,ymm13,ymm8
6471	vpshufb	ymm12,ymm12,ymm8
6472	vpaddd	ymm11,ymm11,ymm15
6473	vpaddd	ymm10,ymm10,ymm14
6474	vpaddd	ymm9,ymm9,ymm13
6475	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6476	vpxor	ymm7,ymm7,ymm11
6477	vpxor	ymm6,ymm6,ymm10
6478	vpxor	ymm5,ymm5,ymm9
6479	vpxor	ymm4,ymm4,ymm8
6480	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6481	vpsrld	ymm8,ymm7,25
6482	vpslld	ymm7,ymm7,32-25
6483	vpxor	ymm7,ymm7,ymm8
6484	vpsrld	ymm8,ymm6,25
6485	vpslld	ymm6,ymm6,32-25
6486	vpxor	ymm6,ymm6,ymm8
6487	vpsrld	ymm8,ymm5,25
6488	vpslld	ymm5,ymm5,32-25
6489	vpxor	ymm5,ymm5,ymm8
6490	vpsrld	ymm8,ymm4,25
6491	vpslld	ymm4,ymm4,32-25
6492	vpxor	ymm4,ymm4,ymm8
6493	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6494	vpalignr	ymm7,ymm7,ymm7,12
6495	vpalignr	ymm11,ymm11,ymm11,8
6496	vpalignr	ymm15,ymm15,ymm15,4
6497	vpalignr	ymm6,ymm6,ymm6,12
6498	vpalignr	ymm10,ymm10,ymm10,8
6499	vpalignr	ymm14,ymm14,ymm14,4
6500	vpalignr	ymm5,ymm5,ymm5,12
6501	vpalignr	ymm9,ymm9,ymm9,8
6502	vpalignr	ymm13,ymm13,ymm13,4
6503	vpalignr	ymm4,ymm4,ymm4,12
6504	vpalignr	ymm8,ymm8,ymm8,8
6505	vpalignr	ymm12,ymm12,ymm12,4
6506
6507	dec	r10
6508	jnz	NEAR $L$seal_avx2_init_rounds
6509	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
6510	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
6511	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
6512	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
6513	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
6514	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
6515	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
6516	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
6517	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
6518	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
6519	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
6520	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
6521	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
6522	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
6523	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
6524	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
6525
6526	vperm2i128	ymm11,ymm15,ymm11,0x13
6527	vperm2i128	ymm15,ymm7,ymm3,0x02
6528	vperm2i128	ymm3,ymm7,ymm3,0x13
6529	vpand	ymm15,ymm15,YMMWORD[$L$clamp]
6530	vmovdqa	YMMWORD[(160+0)+rbp],ymm15
6531	mov	r8,r8
6532	call	poly_hash_ad_internal
6533
6534	vpxor	ymm3,ymm3,YMMWORD[rsi]
6535	vpxor	ymm11,ymm11,YMMWORD[32+rsi]
6536	vmovdqu	YMMWORD[rdi],ymm3
6537	vmovdqu	YMMWORD[32+rdi],ymm11
6538	vperm2i128	ymm15,ymm6,ymm2,0x02
6539	vperm2i128	ymm6,ymm6,ymm2,0x13
6540	vperm2i128	ymm2,ymm14,ymm10,0x02
6541	vperm2i128	ymm10,ymm14,ymm10,0x13
6542	vpxor	ymm15,ymm15,YMMWORD[((0+64))+rsi]
6543	vpxor	ymm2,ymm2,YMMWORD[((32+64))+rsi]
6544	vpxor	ymm6,ymm6,YMMWORD[((64+64))+rsi]
6545	vpxor	ymm10,ymm10,YMMWORD[((96+64))+rsi]
6546	vmovdqu	YMMWORD[(0+64)+rdi],ymm15
6547	vmovdqu	YMMWORD[(32+64)+rdi],ymm2
6548	vmovdqu	YMMWORD[(64+64)+rdi],ymm6
6549	vmovdqu	YMMWORD[(96+64)+rdi],ymm10
6550	vperm2i128	ymm15,ymm5,ymm1,0x02
6551	vperm2i128	ymm5,ymm5,ymm1,0x13
6552	vperm2i128	ymm1,ymm13,ymm9,0x02
6553	vperm2i128	ymm9,ymm13,ymm9,0x13
6554	vpxor	ymm15,ymm15,YMMWORD[((0+192))+rsi]
6555	vpxor	ymm1,ymm1,YMMWORD[((32+192))+rsi]
6556	vpxor	ymm5,ymm5,YMMWORD[((64+192))+rsi]
6557	vpxor	ymm9,ymm9,YMMWORD[((96+192))+rsi]
6558	vmovdqu	YMMWORD[(0+192)+rdi],ymm15
6559	vmovdqu	YMMWORD[(32+192)+rdi],ymm1
6560	vmovdqu	YMMWORD[(64+192)+rdi],ymm5
6561	vmovdqu	YMMWORD[(96+192)+rdi],ymm9
6562	vperm2i128	ymm15,ymm4,ymm0,0x13
6563	vperm2i128	ymm0,ymm4,ymm0,0x02
6564	vperm2i128	ymm4,ymm12,ymm8,0x02
6565	vperm2i128	ymm12,ymm12,ymm8,0x13
6566	vmovdqa	ymm8,ymm15
6567
6568	lea	rsi,[320+rsi]
6569	sub	rbx,10*32
6570	mov	rcx,10*32
6571	cmp	rbx,4*32
6572	jbe	NEAR $L$seal_avx2_short_hash_remainder
6573	vpxor	ymm0,ymm0,YMMWORD[rsi]
6574	vpxor	ymm4,ymm4,YMMWORD[32+rsi]
6575	vpxor	ymm8,ymm8,YMMWORD[64+rsi]
6576	vpxor	ymm12,ymm12,YMMWORD[96+rsi]
6577	vmovdqu	YMMWORD[320+rdi],ymm0
6578	vmovdqu	YMMWORD[352+rdi],ymm4
6579	vmovdqu	YMMWORD[384+rdi],ymm8
6580	vmovdqu	YMMWORD[416+rdi],ymm12
6581	lea	rsi,[128+rsi]
6582	sub	rbx,4*32
6583	mov	rcx,8
6584	mov	r8,2
6585	cmp	rbx,4*32
6586	jbe	NEAR $L$seal_avx2_tail_128
6587	cmp	rbx,8*32
6588	jbe	NEAR $L$seal_avx2_tail_256
6589	cmp	rbx,12*32
6590	jbe	NEAR $L$seal_avx2_tail_384
6591	cmp	rbx,16*32
6592	jbe	NEAR $L$seal_avx2_tail_512
6593	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
6594	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
6595	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
6596	vmovdqa	ymm1,ymm0
6597	vmovdqa	ymm5,ymm4
6598	vmovdqa	ymm9,ymm8
6599	vmovdqa	ymm2,ymm0
6600	vmovdqa	ymm6,ymm4
6601	vmovdqa	ymm10,ymm8
6602	vmovdqa	ymm3,ymm0
6603	vmovdqa	ymm7,ymm4
6604	vmovdqa	ymm11,ymm8
6605	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
6606	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
6607	vpaddd	ymm14,ymm12,ymm15
6608	vpaddd	ymm13,ymm12,ymm14
6609	vpaddd	ymm12,ymm12,ymm13
6610	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
6611	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6612	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6613	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6614	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6615	vmovdqa	ymm8,YMMWORD[$L$rol16]
6616	vpaddd	ymm3,ymm3,ymm7
6617	vpaddd	ymm2,ymm2,ymm6
6618	vpaddd	ymm1,ymm1,ymm5
6619	vpaddd	ymm0,ymm0,ymm4
6620	vpxor	ymm15,ymm15,ymm3
6621	vpxor	ymm14,ymm14,ymm2
6622	vpxor	ymm13,ymm13,ymm1
6623	vpxor	ymm12,ymm12,ymm0
6624	vpshufb	ymm15,ymm15,ymm8
6625	vpshufb	ymm14,ymm14,ymm8
6626	vpshufb	ymm13,ymm13,ymm8
6627	vpshufb	ymm12,ymm12,ymm8
6628	vpaddd	ymm11,ymm11,ymm15
6629	vpaddd	ymm10,ymm10,ymm14
6630	vpaddd	ymm9,ymm9,ymm13
6631	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6632	vpxor	ymm7,ymm7,ymm11
6633	vpxor	ymm6,ymm6,ymm10
6634	vpxor	ymm5,ymm5,ymm9
6635	vpxor	ymm4,ymm4,ymm8
6636	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6637	vpsrld	ymm8,ymm7,20
6638	vpslld	ymm7,ymm7,32-20
6639	vpxor	ymm7,ymm7,ymm8
6640	vpsrld	ymm8,ymm6,20
6641	vpslld	ymm6,ymm6,32-20
6642	vpxor	ymm6,ymm6,ymm8
6643	vpsrld	ymm8,ymm5,20
6644	vpslld	ymm5,ymm5,32-20
6645	vpxor	ymm5,ymm5,ymm8
6646	vpsrld	ymm8,ymm4,20
6647	vpslld	ymm4,ymm4,32-20
6648	vpxor	ymm4,ymm4,ymm8
6649	vmovdqa	ymm8,YMMWORD[$L$rol8]
6650	vpaddd	ymm3,ymm3,ymm7
6651	vpaddd	ymm2,ymm2,ymm6
6652	vpaddd	ymm1,ymm1,ymm5
6653	vpaddd	ymm0,ymm0,ymm4
6654	vpxor	ymm15,ymm15,ymm3
6655	vpxor	ymm14,ymm14,ymm2
6656	vpxor	ymm13,ymm13,ymm1
6657	vpxor	ymm12,ymm12,ymm0
6658	vpshufb	ymm15,ymm15,ymm8
6659	vpshufb	ymm14,ymm14,ymm8
6660	vpshufb	ymm13,ymm13,ymm8
6661	vpshufb	ymm12,ymm12,ymm8
6662	vpaddd	ymm11,ymm11,ymm15
6663	vpaddd	ymm10,ymm10,ymm14
6664	vpaddd	ymm9,ymm9,ymm13
6665	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6666	vpxor	ymm7,ymm7,ymm11
6667	vpxor	ymm6,ymm6,ymm10
6668	vpxor	ymm5,ymm5,ymm9
6669	vpxor	ymm4,ymm4,ymm8
6670	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6671	vpsrld	ymm8,ymm7,25
6672	vpslld	ymm7,ymm7,32-25
6673	vpxor	ymm7,ymm7,ymm8
6674	vpsrld	ymm8,ymm6,25
6675	vpslld	ymm6,ymm6,32-25
6676	vpxor	ymm6,ymm6,ymm8
6677	vpsrld	ymm8,ymm5,25
6678	vpslld	ymm5,ymm5,32-25
6679	vpxor	ymm5,ymm5,ymm8
6680	vpsrld	ymm8,ymm4,25
6681	vpslld	ymm4,ymm4,32-25
6682	vpxor	ymm4,ymm4,ymm8
6683	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6684	vpalignr	ymm7,ymm7,ymm7,4
6685	vpalignr	ymm11,ymm11,ymm11,8
6686	vpalignr	ymm15,ymm15,ymm15,12
6687	vpalignr	ymm6,ymm6,ymm6,4
6688	vpalignr	ymm10,ymm10,ymm10,8
6689	vpalignr	ymm14,ymm14,ymm14,12
6690	vpalignr	ymm5,ymm5,ymm5,4
6691	vpalignr	ymm9,ymm9,ymm9,8
6692	vpalignr	ymm13,ymm13,ymm13,12
6693	vpalignr	ymm4,ymm4,ymm4,4
6694	vpalignr	ymm8,ymm8,ymm8,8
6695	vpalignr	ymm12,ymm12,ymm12,12
6696	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6697	vmovdqa	ymm8,YMMWORD[$L$rol16]
6698	vpaddd	ymm3,ymm3,ymm7
6699	vpaddd	ymm2,ymm2,ymm6
6700	vpaddd	ymm1,ymm1,ymm5
6701	vpaddd	ymm0,ymm0,ymm4
6702	vpxor	ymm15,ymm15,ymm3
6703	vpxor	ymm14,ymm14,ymm2
6704	vpxor	ymm13,ymm13,ymm1
6705	vpxor	ymm12,ymm12,ymm0
6706	vpshufb	ymm15,ymm15,ymm8
6707	vpshufb	ymm14,ymm14,ymm8
6708	vpshufb	ymm13,ymm13,ymm8
6709	vpshufb	ymm12,ymm12,ymm8
6710	vpaddd	ymm11,ymm11,ymm15
6711	vpaddd	ymm10,ymm10,ymm14
6712	vpaddd	ymm9,ymm9,ymm13
6713	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6714	vpxor	ymm7,ymm7,ymm11
6715	vpxor	ymm6,ymm6,ymm10
6716	vpxor	ymm5,ymm5,ymm9
6717	vpxor	ymm4,ymm4,ymm8
6718	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6719	vpsrld	ymm8,ymm7,20
6720	vpslld	ymm7,ymm7,32-20
6721	vpxor	ymm7,ymm7,ymm8
6722	vpsrld	ymm8,ymm6,20
6723	vpslld	ymm6,ymm6,32-20
6724	vpxor	ymm6,ymm6,ymm8
6725	vpsrld	ymm8,ymm5,20
6726	vpslld	ymm5,ymm5,32-20
6727	vpxor	ymm5,ymm5,ymm8
6728	vpsrld	ymm8,ymm4,20
6729	vpslld	ymm4,ymm4,32-20
6730	vpxor	ymm4,ymm4,ymm8
6731	vmovdqa	ymm8,YMMWORD[$L$rol8]
6732	vpaddd	ymm3,ymm3,ymm7
6733	vpaddd	ymm2,ymm2,ymm6
6734	vpaddd	ymm1,ymm1,ymm5
6735	vpaddd	ymm0,ymm0,ymm4
6736	vpxor	ymm15,ymm15,ymm3
6737	vpxor	ymm14,ymm14,ymm2
6738	vpxor	ymm13,ymm13,ymm1
6739	vpxor	ymm12,ymm12,ymm0
6740	vpshufb	ymm15,ymm15,ymm8
6741	vpshufb	ymm14,ymm14,ymm8
6742	vpshufb	ymm13,ymm13,ymm8
6743	vpshufb	ymm12,ymm12,ymm8
6744	vpaddd	ymm11,ymm11,ymm15
6745	vpaddd	ymm10,ymm10,ymm14
6746	vpaddd	ymm9,ymm9,ymm13
6747	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6748	vpxor	ymm7,ymm7,ymm11
6749	vpxor	ymm6,ymm6,ymm10
6750	vpxor	ymm5,ymm5,ymm9
6751	vpxor	ymm4,ymm4,ymm8
6752	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6753	vpsrld	ymm8,ymm7,25
6754	vpslld	ymm7,ymm7,32-25
6755	vpxor	ymm7,ymm7,ymm8
6756	vpsrld	ymm8,ymm6,25
6757	vpslld	ymm6,ymm6,32-25
6758	vpxor	ymm6,ymm6,ymm8
6759	vpsrld	ymm8,ymm5,25
6760	vpslld	ymm5,ymm5,32-25
6761	vpxor	ymm5,ymm5,ymm8
6762	vpsrld	ymm8,ymm4,25
6763	vpslld	ymm4,ymm4,32-25
6764	vpxor	ymm4,ymm4,ymm8
6765	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6766	vpalignr	ymm7,ymm7,ymm7,12
6767	vpalignr	ymm11,ymm11,ymm11,8
6768	vpalignr	ymm15,ymm15,ymm15,4
6769	vpalignr	ymm6,ymm6,ymm6,12
6770	vpalignr	ymm10,ymm10,ymm10,8
6771	vpalignr	ymm14,ymm14,ymm14,4
6772	vpalignr	ymm5,ymm5,ymm5,12
6773	vpalignr	ymm9,ymm9,ymm9,8
6774	vpalignr	ymm13,ymm13,ymm13,4
6775	vpalignr	ymm4,ymm4,ymm4,12
6776	vpalignr	ymm8,ymm8,ymm8,8
6777	vpalignr	ymm12,ymm12,ymm12,4
6778	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6779	vmovdqa	ymm8,YMMWORD[$L$rol16]
6780	vpaddd	ymm3,ymm3,ymm7
6781	vpaddd	ymm2,ymm2,ymm6
6782	vpaddd	ymm1,ymm1,ymm5
6783	vpaddd	ymm0,ymm0,ymm4
6784	vpxor	ymm15,ymm15,ymm3
6785	vpxor	ymm14,ymm14,ymm2
6786	vpxor	ymm13,ymm13,ymm1
6787	vpxor	ymm12,ymm12,ymm0
6788	vpshufb	ymm15,ymm15,ymm8
6789	vpshufb	ymm14,ymm14,ymm8
6790	vpshufb	ymm13,ymm13,ymm8
6791	vpshufb	ymm12,ymm12,ymm8
6792	vpaddd	ymm11,ymm11,ymm15
6793	vpaddd	ymm10,ymm10,ymm14
6794	vpaddd	ymm9,ymm9,ymm13
6795	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6796	vpxor	ymm7,ymm7,ymm11
6797	vpxor	ymm6,ymm6,ymm10
6798	vpxor	ymm5,ymm5,ymm9
6799	vpxor	ymm4,ymm4,ymm8
6800	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6801	vpsrld	ymm8,ymm7,20
6802	vpslld	ymm7,ymm7,32-20
6803	vpxor	ymm7,ymm7,ymm8
6804	vpsrld	ymm8,ymm6,20
6805	vpslld	ymm6,ymm6,32-20
6806	vpxor	ymm6,ymm6,ymm8
6807	vpsrld	ymm8,ymm5,20
6808	vpslld	ymm5,ymm5,32-20
6809	vpxor	ymm5,ymm5,ymm8
6810	vpsrld	ymm8,ymm4,20
6811	vpslld	ymm4,ymm4,32-20
6812	vpxor	ymm4,ymm4,ymm8
6813	vmovdqa	ymm8,YMMWORD[$L$rol8]
6814	vpaddd	ymm3,ymm3,ymm7
6815	vpaddd	ymm2,ymm2,ymm6
6816	vpaddd	ymm1,ymm1,ymm5
6817	vpaddd	ymm0,ymm0,ymm4
6818	vpxor	ymm15,ymm15,ymm3
6819
6820	sub	rdi,16
6821	mov	rcx,9
6822	jmp	NEAR $L$seal_avx2_main_loop_rounds_entry
6823ALIGN	32
6824$L$seal_avx2_main_loop:
6825	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
6826	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
6827	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
6828	vmovdqa	ymm1,ymm0
6829	vmovdqa	ymm5,ymm4
6830	vmovdqa	ymm9,ymm8
6831	vmovdqa	ymm2,ymm0
6832	vmovdqa	ymm6,ymm4
6833	vmovdqa	ymm10,ymm8
6834	vmovdqa	ymm3,ymm0
6835	vmovdqa	ymm7,ymm4
6836	vmovdqa	ymm11,ymm8
6837	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
6838	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
6839	vpaddd	ymm14,ymm12,ymm15
6840	vpaddd	ymm13,ymm12,ymm14
6841	vpaddd	ymm12,ymm12,ymm13
6842	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
6843	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6844	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6845	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6846
6847	mov	rcx,10
6848ALIGN	32
6849$L$seal_avx2_main_loop_rounds:
6850	add	r10,QWORD[((0+0))+rdi]
6851	adc	r11,QWORD[((8+0))+rdi]
6852	adc	r12,1
6853	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6854	vmovdqa	ymm8,YMMWORD[$L$rol16]
6855	vpaddd	ymm3,ymm3,ymm7
6856	vpaddd	ymm2,ymm2,ymm6
6857	vpaddd	ymm1,ymm1,ymm5
6858	vpaddd	ymm0,ymm0,ymm4
6859	vpxor	ymm15,ymm15,ymm3
6860	vpxor	ymm14,ymm14,ymm2
6861	vpxor	ymm13,ymm13,ymm1
6862	vpxor	ymm12,ymm12,ymm0
6863	mov	rdx,QWORD[((0+160+0))+rbp]
6864	mov	r15,rdx
6865	mulx	r14,r13,r10
6866	mulx	rdx,rax,r11
6867	imul	r15,r12
6868	add	r14,rax
6869	adc	r15,rdx
6870	vpshufb	ymm15,ymm15,ymm8
6871	vpshufb	ymm14,ymm14,ymm8
6872	vpshufb	ymm13,ymm13,ymm8
6873	vpshufb	ymm12,ymm12,ymm8
6874	vpaddd	ymm11,ymm11,ymm15
6875	vpaddd	ymm10,ymm10,ymm14
6876	vpaddd	ymm9,ymm9,ymm13
6877	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6878	vpxor	ymm7,ymm7,ymm11
6879	mov	rdx,QWORD[((8+160+0))+rbp]
6880	mulx	rax,r10,r10
6881	add	r14,r10
6882	mulx	r9,r11,r11
6883	adc	r15,r11
6884	adc	r9,0
6885	imul	rdx,r12
6886	vpxor	ymm6,ymm6,ymm10
6887	vpxor	ymm5,ymm5,ymm9
6888	vpxor	ymm4,ymm4,ymm8
6889	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6890	vpsrld	ymm8,ymm7,20
6891	vpslld	ymm7,ymm7,32-20
6892	vpxor	ymm7,ymm7,ymm8
6893	vpsrld	ymm8,ymm6,20
6894	vpslld	ymm6,ymm6,32-20
6895	vpxor	ymm6,ymm6,ymm8
6896	vpsrld	ymm8,ymm5,20
6897	vpslld	ymm5,ymm5,32-20
6898	add	r15,rax
6899	adc	r9,rdx
6900	vpxor	ymm5,ymm5,ymm8
6901	vpsrld	ymm8,ymm4,20
6902	vpslld	ymm4,ymm4,32-20
6903	vpxor	ymm4,ymm4,ymm8
6904	vmovdqa	ymm8,YMMWORD[$L$rol8]
6905	vpaddd	ymm3,ymm3,ymm7
6906	vpaddd	ymm2,ymm2,ymm6
6907	vpaddd	ymm1,ymm1,ymm5
6908	vpaddd	ymm0,ymm0,ymm4
6909	vpxor	ymm15,ymm15,ymm3
6910	mov	r10,r13
6911	mov	r11,r14
6912	mov	r12,r15
6913	and	r12,3
6914	mov	r13,r15
6915	and	r13,-4
6916	mov	r14,r9
6917	shrd	r15,r9,2
6918	shr	r9,2
6919	add	r15,r13
6920	adc	r9,r14
6921	add	r10,r15
6922	adc	r11,r9
6923	adc	r12,0
6924
6925$L$seal_avx2_main_loop_rounds_entry:
6926	vpxor	ymm14,ymm14,ymm2
6927	vpxor	ymm13,ymm13,ymm1
6928	vpxor	ymm12,ymm12,ymm0
6929	vpshufb	ymm15,ymm15,ymm8
6930	vpshufb	ymm14,ymm14,ymm8
6931	vpshufb	ymm13,ymm13,ymm8
6932	vpshufb	ymm12,ymm12,ymm8
6933	vpaddd	ymm11,ymm11,ymm15
6934	vpaddd	ymm10,ymm10,ymm14
6935	add	r10,QWORD[((0+16))+rdi]
6936	adc	r11,QWORD[((8+16))+rdi]
6937	adc	r12,1
6938	vpaddd	ymm9,ymm9,ymm13
6939	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6940	vpxor	ymm7,ymm7,ymm11
6941	vpxor	ymm6,ymm6,ymm10
6942	vpxor	ymm5,ymm5,ymm9
6943	vpxor	ymm4,ymm4,ymm8
6944	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6945	vpsrld	ymm8,ymm7,25
6946	mov	rdx,QWORD[((0+160+0))+rbp]
6947	mov	r15,rdx
6948	mulx	r14,r13,r10
6949	mulx	rdx,rax,r11
6950	imul	r15,r12
6951	add	r14,rax
6952	adc	r15,rdx
6953	vpslld	ymm7,ymm7,32-25
6954	vpxor	ymm7,ymm7,ymm8
6955	vpsrld	ymm8,ymm6,25
6956	vpslld	ymm6,ymm6,32-25
6957	vpxor	ymm6,ymm6,ymm8
6958	vpsrld	ymm8,ymm5,25
6959	vpslld	ymm5,ymm5,32-25
6960	vpxor	ymm5,ymm5,ymm8
6961	vpsrld	ymm8,ymm4,25
6962	vpslld	ymm4,ymm4,32-25
6963	vpxor	ymm4,ymm4,ymm8
6964	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6965	vpalignr	ymm7,ymm7,ymm7,4
6966	vpalignr	ymm11,ymm11,ymm11,8
6967	vpalignr	ymm15,ymm15,ymm15,12
6968	vpalignr	ymm6,ymm6,ymm6,4
6969	vpalignr	ymm10,ymm10,ymm10,8
6970	vpalignr	ymm14,ymm14,ymm14,12
6971	mov	rdx,QWORD[((8+160+0))+rbp]
6972	mulx	rax,r10,r10
6973	add	r14,r10
6974	mulx	r9,r11,r11
6975	adc	r15,r11
6976	adc	r9,0
6977	imul	rdx,r12
6978	vpalignr	ymm5,ymm5,ymm5,4
6979	vpalignr	ymm9,ymm9,ymm9,8
6980	vpalignr	ymm13,ymm13,ymm13,12
6981	vpalignr	ymm4,ymm4,ymm4,4
6982	vpalignr	ymm8,ymm8,ymm8,8
6983	vpalignr	ymm12,ymm12,ymm12,12
6984	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6985	vmovdqa	ymm8,YMMWORD[$L$rol16]
6986	vpaddd	ymm3,ymm3,ymm7
6987	vpaddd	ymm2,ymm2,ymm6
6988	vpaddd	ymm1,ymm1,ymm5
6989	vpaddd	ymm0,ymm0,ymm4
6990	vpxor	ymm15,ymm15,ymm3
6991	vpxor	ymm14,ymm14,ymm2
6992	vpxor	ymm13,ymm13,ymm1
6993	vpxor	ymm12,ymm12,ymm0
6994	vpshufb	ymm15,ymm15,ymm8
6995	vpshufb	ymm14,ymm14,ymm8
6996	add	r15,rax
6997	adc	r9,rdx
6998	vpshufb	ymm13,ymm13,ymm8
6999	vpshufb	ymm12,ymm12,ymm8
7000	vpaddd	ymm11,ymm11,ymm15
7001	vpaddd	ymm10,ymm10,ymm14
7002	vpaddd	ymm9,ymm9,ymm13
7003	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
7004	vpxor	ymm7,ymm7,ymm11
7005	vpxor	ymm6,ymm6,ymm10
7006	vpxor	ymm5,ymm5,ymm9
7007	mov	r10,r13
7008	mov	r11,r14
7009	mov	r12,r15
7010	and	r12,3
7011	mov	r13,r15
7012	and	r13,-4
7013	mov	r14,r9
7014	shrd	r15,r9,2
7015	shr	r9,2
7016	add	r15,r13
7017	adc	r9,r14
7018	add	r10,r15
7019	adc	r11,r9
7020	adc	r12,0
7021	vpxor	ymm4,ymm4,ymm8
7022	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
7023	vpsrld	ymm8,ymm7,20
7024	vpslld	ymm7,ymm7,32-20
7025	vpxor	ymm7,ymm7,ymm8
7026	vpsrld	ymm8,ymm6,20
7027	vpslld	ymm6,ymm6,32-20
7028	vpxor	ymm6,ymm6,ymm8
7029	add	r10,QWORD[((0+32))+rdi]
7030	adc	r11,QWORD[((8+32))+rdi]
7031	adc	r12,1
7032
7033	lea	rdi,[48+rdi]
7034	vpsrld	ymm8,ymm5,20
7035	vpslld	ymm5,ymm5,32-20
7036	vpxor	ymm5,ymm5,ymm8
7037	vpsrld	ymm8,ymm4,20
7038	vpslld	ymm4,ymm4,32-20
7039	vpxor	ymm4,ymm4,ymm8
7040	vmovdqa	ymm8,YMMWORD[$L$rol8]
7041	vpaddd	ymm3,ymm3,ymm7
7042	vpaddd	ymm2,ymm2,ymm6
7043	vpaddd	ymm1,ymm1,ymm5
7044	vpaddd	ymm0,ymm0,ymm4
7045	vpxor	ymm15,ymm15,ymm3
7046	vpxor	ymm14,ymm14,ymm2
7047	vpxor	ymm13,ymm13,ymm1
7048	vpxor	ymm12,ymm12,ymm0
7049	vpshufb	ymm15,ymm15,ymm8
7050	vpshufb	ymm14,ymm14,ymm8
7051	vpshufb	ymm13,ymm13,ymm8
7052	mov	rdx,QWORD[((0+160+0))+rbp]
7053	mov	r15,rdx
7054	mulx	r14,r13,r10
7055	mulx	rdx,rax,r11
7056	imul	r15,r12
7057	add	r14,rax
7058	adc	r15,rdx
7059	vpshufb	ymm12,ymm12,ymm8
7060	vpaddd	ymm11,ymm11,ymm15
7061	vpaddd	ymm10,ymm10,ymm14
7062	vpaddd	ymm9,ymm9,ymm13
7063	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
7064	vpxor	ymm7,ymm7,ymm11
7065	vpxor	ymm6,ymm6,ymm10
7066	vpxor	ymm5,ymm5,ymm9
7067	mov	rdx,QWORD[((8+160+0))+rbp]
7068	mulx	rax,r10,r10
7069	add	r14,r10
7070	mulx	r9,r11,r11
7071	adc	r15,r11
7072	adc	r9,0
7073	imul	rdx,r12
7074	vpxor	ymm4,ymm4,ymm8
7075	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
7076	vpsrld	ymm8,ymm7,25
7077	vpslld	ymm7,ymm7,32-25
7078	vpxor	ymm7,ymm7,ymm8
7079	vpsrld	ymm8,ymm6,25
7080	vpslld	ymm6,ymm6,32-25
7081	vpxor	ymm6,ymm6,ymm8
7082	add	r15,rax
7083	adc	r9,rdx
7084	vpsrld	ymm8,ymm5,25
7085	vpslld	ymm5,ymm5,32-25
7086	vpxor	ymm5,ymm5,ymm8
7087	vpsrld	ymm8,ymm4,25
7088	vpslld	ymm4,ymm4,32-25
7089	vpxor	ymm4,ymm4,ymm8
7090	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
7091	vpalignr	ymm7,ymm7,ymm7,12
7092	vpalignr	ymm11,ymm11,ymm11,8
7093	vpalignr	ymm15,ymm15,ymm15,4
7094	vpalignr	ymm6,ymm6,ymm6,12
7095	vpalignr	ymm10,ymm10,ymm10,8
7096	vpalignr	ymm14,ymm14,ymm14,4
7097	vpalignr	ymm5,ymm5,ymm5,12
7098	vpalignr	ymm9,ymm9,ymm9,8
7099	vpalignr	ymm13,ymm13,ymm13,4
7100	vpalignr	ymm4,ymm4,ymm4,12
7101	vpalignr	ymm8,ymm8,ymm8,8
7102	mov	r10,r13
7103	mov	r11,r14
7104	mov	r12,r15
7105	and	r12,3
7106	mov	r13,r15
7107	and	r13,-4
7108	mov	r14,r9
7109	shrd	r15,r9,2
7110	shr	r9,2
7111	add	r15,r13
7112	adc	r9,r14
7113	add	r10,r15
7114	adc	r11,r9
7115	adc	r12,0
7116	vpalignr	ymm12,ymm12,ymm12,4
7117
7118	dec	rcx
7119	jne	NEAR $L$seal_avx2_main_loop_rounds
7120	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
7121	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
7122	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
7123	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
7124	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
7125	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
7126	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
7127	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
7128	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
7129	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
7130	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
7131	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
7132	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
7133	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
7134	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
7135	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7136
7137	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
7138	add	r10,QWORD[((0+0))+rdi]
7139	adc	r11,QWORD[((8+0))+rdi]
7140	adc	r12,1
7141	mov	rdx,QWORD[((0+160+0))+rbp]
7142	mov	r15,rdx
7143	mulx	r14,r13,r10
7144	mulx	rdx,rax,r11
7145	imul	r15,r12
7146	add	r14,rax
7147	adc	r15,rdx
7148	mov	rdx,QWORD[((8+160+0))+rbp]
7149	mulx	rax,r10,r10
7150	add	r14,r10
7151	mulx	r9,r11,r11
7152	adc	r15,r11
7153	adc	r9,0
7154	imul	rdx,r12
7155	add	r15,rax
7156	adc	r9,rdx
7157	mov	r10,r13
7158	mov	r11,r14
7159	mov	r12,r15
7160	and	r12,3
7161	mov	r13,r15
7162	and	r13,-4
7163	mov	r14,r9
7164	shrd	r15,r9,2
7165	shr	r9,2
7166	add	r15,r13
7167	adc	r9,r14
7168	add	r10,r15
7169	adc	r11,r9
7170	adc	r12,0
7171	add	r10,QWORD[((0+16))+rdi]
7172	adc	r11,QWORD[((8+16))+rdi]
7173	adc	r12,1
7174	mov	rdx,QWORD[((0+160+0))+rbp]
7175	mov	r15,rdx
7176	mulx	r14,r13,r10
7177	mulx	rdx,rax,r11
7178	imul	r15,r12
7179	add	r14,rax
7180	adc	r15,rdx
7181	mov	rdx,QWORD[((8+160+0))+rbp]
7182	mulx	rax,r10,r10
7183	add	r14,r10
7184	mulx	r9,r11,r11
7185	adc	r15,r11
7186	adc	r9,0
7187	imul	rdx,r12
7188	add	r15,rax
7189	adc	r9,rdx
7190	mov	r10,r13
7191	mov	r11,r14
7192	mov	r12,r15
7193	and	r12,3
7194	mov	r13,r15
7195	and	r13,-4
7196	mov	r14,r9
7197	shrd	r15,r9,2
7198	shr	r9,2
7199	add	r15,r13
7200	adc	r9,r14
7201	add	r10,r15
7202	adc	r11,r9
7203	adc	r12,0
7204
7205	lea	rdi,[32+rdi]
7206	vperm2i128	ymm0,ymm7,ymm3,0x02
7207	vperm2i128	ymm7,ymm7,ymm3,0x13
7208	vperm2i128	ymm3,ymm15,ymm11,0x02
7209	vperm2i128	ymm11,ymm15,ymm11,0x13
7210	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
7211	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
7212	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
7213	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
7214	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
7215	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
7216	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
7217	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
7218
7219	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
7220	vperm2i128	ymm3,ymm6,ymm2,0x02
7221	vperm2i128	ymm6,ymm6,ymm2,0x13
7222	vperm2i128	ymm2,ymm14,ymm10,0x02
7223	vperm2i128	ymm10,ymm14,ymm10,0x13
7224	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
7225	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
7226	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
7227	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
7228	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
7229	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
7230	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
7231	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
7232	vperm2i128	ymm3,ymm5,ymm1,0x02
7233	vperm2i128	ymm5,ymm5,ymm1,0x13
7234	vperm2i128	ymm1,ymm13,ymm9,0x02
7235	vperm2i128	ymm9,ymm13,ymm9,0x13
7236	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
7237	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
7238	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
7239	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
7240	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
7241	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
7242	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
7243	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
7244	vperm2i128	ymm3,ymm4,ymm0,0x02
7245	vperm2i128	ymm4,ymm4,ymm0,0x13
7246	vperm2i128	ymm0,ymm12,ymm8,0x02
7247	vperm2i128	ymm8,ymm12,ymm8,0x13
7248	vpxor	ymm3,ymm3,YMMWORD[((0+384))+rsi]
7249	vpxor	ymm0,ymm0,YMMWORD[((32+384))+rsi]
7250	vpxor	ymm4,ymm4,YMMWORD[((64+384))+rsi]
7251	vpxor	ymm8,ymm8,YMMWORD[((96+384))+rsi]
7252	vmovdqu	YMMWORD[(0+384)+rdi],ymm3
7253	vmovdqu	YMMWORD[(32+384)+rdi],ymm0
7254	vmovdqu	YMMWORD[(64+384)+rdi],ymm4
7255	vmovdqu	YMMWORD[(96+384)+rdi],ymm8
7256
7257	lea	rsi,[512+rsi]
7258	sub	rbx,16*32
7259	cmp	rbx,16*32
7260	jg	NEAR $L$seal_avx2_main_loop
7261
7262	add	r10,QWORD[((0+0))+rdi]
7263	adc	r11,QWORD[((8+0))+rdi]
7264	adc	r12,1
7265	mov	rdx,QWORD[((0+160+0))+rbp]
7266	mov	r15,rdx
7267	mulx	r14,r13,r10
7268	mulx	rdx,rax,r11
7269	imul	r15,r12
7270	add	r14,rax
7271	adc	r15,rdx
7272	mov	rdx,QWORD[((8+160+0))+rbp]
7273	mulx	rax,r10,r10
7274	add	r14,r10
7275	mulx	r9,r11,r11
7276	adc	r15,r11
7277	adc	r9,0
7278	imul	rdx,r12
7279	add	r15,rax
7280	adc	r9,rdx
7281	mov	r10,r13
7282	mov	r11,r14
7283	mov	r12,r15
7284	and	r12,3
7285	mov	r13,r15
7286	and	r13,-4
7287	mov	r14,r9
7288	shrd	r15,r9,2
7289	shr	r9,2
7290	add	r15,r13
7291	adc	r9,r14
7292	add	r10,r15
7293	adc	r11,r9
7294	adc	r12,0
7295	add	r10,QWORD[((0+16))+rdi]
7296	adc	r11,QWORD[((8+16))+rdi]
7297	adc	r12,1
7298	mov	rdx,QWORD[((0+160+0))+rbp]
7299	mov	r15,rdx
7300	mulx	r14,r13,r10
7301	mulx	rdx,rax,r11
7302	imul	r15,r12
7303	add	r14,rax
7304	adc	r15,rdx
7305	mov	rdx,QWORD[((8+160+0))+rbp]
7306	mulx	rax,r10,r10
7307	add	r14,r10
7308	mulx	r9,r11,r11
7309	adc	r15,r11
7310	adc	r9,0
7311	imul	rdx,r12
7312	add	r15,rax
7313	adc	r9,rdx
7314	mov	r10,r13
7315	mov	r11,r14
7316	mov	r12,r15
7317	and	r12,3
7318	mov	r13,r15
7319	and	r13,-4
7320	mov	r14,r9
7321	shrd	r15,r9,2
7322	shr	r9,2
7323	add	r15,r13
7324	adc	r9,r14
7325	add	r10,r15
7326	adc	r11,r9
7327	adc	r12,0
7328
7329	lea	rdi,[32+rdi]
7330	mov	rcx,10
7331	xor	r8,r8
7332
7333	cmp	rbx,12*32
7334	ja	NEAR $L$seal_avx2_tail_512
7335	cmp	rbx,8*32
7336	ja	NEAR $L$seal_avx2_tail_384
7337	cmp	rbx,4*32
7338	ja	NEAR $L$seal_avx2_tail_256
7339
7340$L$seal_avx2_tail_128:
7341	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
7342	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
7343	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
7344	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
7345	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7346	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
7347
7348$L$seal_avx2_tail_128_rounds_and_3xhash:
7349	add	r10,QWORD[((0+0))+rdi]
7350	adc	r11,QWORD[((8+0))+rdi]
7351	adc	r12,1
7352	mov	rdx,QWORD[((0+160+0))+rbp]
7353	mov	r15,rdx
7354	mulx	r14,r13,r10
7355	mulx	rdx,rax,r11
7356	imul	r15,r12
7357	add	r14,rax
7358	adc	r15,rdx
7359	mov	rdx,QWORD[((8+160+0))+rbp]
7360	mulx	rax,r10,r10
7361	add	r14,r10
7362	mulx	r9,r11,r11
7363	adc	r15,r11
7364	adc	r9,0
7365	imul	rdx,r12
7366	add	r15,rax
7367	adc	r9,rdx
7368	mov	r10,r13
7369	mov	r11,r14
7370	mov	r12,r15
7371	and	r12,3
7372	mov	r13,r15
7373	and	r13,-4
7374	mov	r14,r9
7375	shrd	r15,r9,2
7376	shr	r9,2
7377	add	r15,r13
7378	adc	r9,r14
7379	add	r10,r15
7380	adc	r11,r9
7381	adc	r12,0
7382
7383	lea	rdi,[16+rdi]
7384$L$seal_avx2_tail_128_rounds_and_2xhash:
7385	vpaddd	ymm0,ymm0,ymm4
7386	vpxor	ymm12,ymm12,ymm0
7387	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7388	vpaddd	ymm8,ymm8,ymm12
7389	vpxor	ymm4,ymm4,ymm8
7390	vpsrld	ymm3,ymm4,20
7391	vpslld	ymm4,ymm4,12
7392	vpxor	ymm4,ymm4,ymm3
7393	vpaddd	ymm0,ymm0,ymm4
7394	vpxor	ymm12,ymm12,ymm0
7395	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7396	vpaddd	ymm8,ymm8,ymm12
7397	vpxor	ymm4,ymm4,ymm8
7398	vpslld	ymm3,ymm4,7
7399	vpsrld	ymm4,ymm4,25
7400	vpxor	ymm4,ymm4,ymm3
7401	vpalignr	ymm12,ymm12,ymm12,12
7402	vpalignr	ymm8,ymm8,ymm8,8
7403	vpalignr	ymm4,ymm4,ymm4,4
7404	add	r10,QWORD[((0+0))+rdi]
7405	adc	r11,QWORD[((8+0))+rdi]
7406	adc	r12,1
7407	mov	rdx,QWORD[((0+160+0))+rbp]
7408	mov	r15,rdx
7409	mulx	r14,r13,r10
7410	mulx	rdx,rax,r11
7411	imul	r15,r12
7412	add	r14,rax
7413	adc	r15,rdx
7414	mov	rdx,QWORD[((8+160+0))+rbp]
7415	mulx	rax,r10,r10
7416	add	r14,r10
7417	mulx	r9,r11,r11
7418	adc	r15,r11
7419	adc	r9,0
7420	imul	rdx,r12
7421	add	r15,rax
7422	adc	r9,rdx
7423	mov	r10,r13
7424	mov	r11,r14
7425	mov	r12,r15
7426	and	r12,3
7427	mov	r13,r15
7428	and	r13,-4
7429	mov	r14,r9
7430	shrd	r15,r9,2
7431	shr	r9,2
7432	add	r15,r13
7433	adc	r9,r14
7434	add	r10,r15
7435	adc	r11,r9
7436	adc	r12,0
7437	vpaddd	ymm0,ymm0,ymm4
7438	vpxor	ymm12,ymm12,ymm0
7439	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7440	vpaddd	ymm8,ymm8,ymm12
7441	vpxor	ymm4,ymm4,ymm8
7442	vpsrld	ymm3,ymm4,20
7443	vpslld	ymm4,ymm4,12
7444	vpxor	ymm4,ymm4,ymm3
7445	vpaddd	ymm0,ymm0,ymm4
7446	vpxor	ymm12,ymm12,ymm0
7447	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7448	vpaddd	ymm8,ymm8,ymm12
7449	vpxor	ymm4,ymm4,ymm8
7450	vpslld	ymm3,ymm4,7
7451	vpsrld	ymm4,ymm4,25
7452	vpxor	ymm4,ymm4,ymm3
7453	vpalignr	ymm12,ymm12,ymm12,4
7454	vpalignr	ymm8,ymm8,ymm8,8
7455	vpalignr	ymm4,ymm4,ymm4,12
7456	add	r10,QWORD[((0+16))+rdi]
7457	adc	r11,QWORD[((8+16))+rdi]
7458	adc	r12,1
7459	mov	rdx,QWORD[((0+160+0))+rbp]
7460	mov	r15,rdx
7461	mulx	r14,r13,r10
7462	mulx	rdx,rax,r11
7463	imul	r15,r12
7464	add	r14,rax
7465	adc	r15,rdx
7466	mov	rdx,QWORD[((8+160+0))+rbp]
7467	mulx	rax,r10,r10
7468	add	r14,r10
7469	mulx	r9,r11,r11
7470	adc	r15,r11
7471	adc	r9,0
7472	imul	rdx,r12
7473	add	r15,rax
7474	adc	r9,rdx
7475	mov	r10,r13
7476	mov	r11,r14
7477	mov	r12,r15
7478	and	r12,3
7479	mov	r13,r15
7480	and	r13,-4
7481	mov	r14,r9
7482	shrd	r15,r9,2
7483	shr	r9,2
7484	add	r15,r13
7485	adc	r9,r14
7486	add	r10,r15
7487	adc	r11,r9
7488	adc	r12,0
7489
7490	lea	rdi,[32+rdi]
7491	dec	rcx
7492	jg	NEAR $L$seal_avx2_tail_128_rounds_and_3xhash
7493	dec	r8
7494	jge	NEAR $L$seal_avx2_tail_128_rounds_and_2xhash
7495	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
7496	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
7497	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
7498	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7499	vperm2i128	ymm3,ymm4,ymm0,0x13
7500	vperm2i128	ymm0,ymm4,ymm0,0x02
7501	vperm2i128	ymm4,ymm12,ymm8,0x02
7502	vperm2i128	ymm12,ymm12,ymm8,0x13
7503	vmovdqa	ymm8,ymm3
7504
7505	jmp	NEAR $L$seal_avx2_short_loop
7506
7507$L$seal_avx2_tail_256:
7508	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
7509	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
7510	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
7511	vmovdqa	ymm1,ymm0
7512	vmovdqa	ymm5,ymm4
7513	vmovdqa	ymm9,ymm8
7514	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
7515	vpaddd	ymm13,ymm12,YMMWORD[((160+160))+rbp]
7516	vpaddd	ymm12,ymm12,ymm13
7517	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
7518	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
7519
7520$L$seal_avx2_tail_256_rounds_and_3xhash:
7521	add	r10,QWORD[((0+0))+rdi]
7522	adc	r11,QWORD[((8+0))+rdi]
7523	adc	r12,1
7524	mov	rax,QWORD[((0+160+0))+rbp]
7525	mov	r15,rax
7526	mul	r10
7527	mov	r13,rax
7528	mov	r14,rdx
7529	mov	rax,QWORD[((0+160+0))+rbp]
7530	mul	r11
7531	imul	r15,r12
7532	add	r14,rax
7533	adc	r15,rdx
7534	mov	rax,QWORD[((8+160+0))+rbp]
7535	mov	r9,rax
7536	mul	r10
7537	add	r14,rax
7538	adc	rdx,0
7539	mov	r10,rdx
7540	mov	rax,QWORD[((8+160+0))+rbp]
7541	mul	r11
7542	add	r15,rax
7543	adc	rdx,0
7544	imul	r9,r12
7545	add	r15,r10
7546	adc	r9,rdx
7547	mov	r10,r13
7548	mov	r11,r14
7549	mov	r12,r15
7550	and	r12,3
7551	mov	r13,r15
7552	and	r13,-4
7553	mov	r14,r9
7554	shrd	r15,r9,2
7555	shr	r9,2
7556	add	r15,r13
7557	adc	r9,r14
7558	add	r10,r15
7559	adc	r11,r9
7560	adc	r12,0
7561
7562	lea	rdi,[16+rdi]
7563$L$seal_avx2_tail_256_rounds_and_2xhash:
7564	vpaddd	ymm0,ymm0,ymm4
7565	vpxor	ymm12,ymm12,ymm0
7566	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7567	vpaddd	ymm8,ymm8,ymm12
7568	vpxor	ymm4,ymm4,ymm8
7569	vpsrld	ymm3,ymm4,20
7570	vpslld	ymm4,ymm4,12
7571	vpxor	ymm4,ymm4,ymm3
7572	vpaddd	ymm0,ymm0,ymm4
7573	vpxor	ymm12,ymm12,ymm0
7574	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7575	vpaddd	ymm8,ymm8,ymm12
7576	vpxor	ymm4,ymm4,ymm8
7577	vpslld	ymm3,ymm4,7
7578	vpsrld	ymm4,ymm4,25
7579	vpxor	ymm4,ymm4,ymm3
7580	vpalignr	ymm12,ymm12,ymm12,12
7581	vpalignr	ymm8,ymm8,ymm8,8
7582	vpalignr	ymm4,ymm4,ymm4,4
7583	vpaddd	ymm1,ymm1,ymm5
7584	vpxor	ymm13,ymm13,ymm1
7585	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7586	vpaddd	ymm9,ymm9,ymm13
7587	vpxor	ymm5,ymm5,ymm9
7588	vpsrld	ymm3,ymm5,20
7589	vpslld	ymm5,ymm5,12
7590	vpxor	ymm5,ymm5,ymm3
7591	vpaddd	ymm1,ymm1,ymm5
7592	vpxor	ymm13,ymm13,ymm1
7593	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7594	vpaddd	ymm9,ymm9,ymm13
7595	vpxor	ymm5,ymm5,ymm9
7596	vpslld	ymm3,ymm5,7
7597	vpsrld	ymm5,ymm5,25
7598	vpxor	ymm5,ymm5,ymm3
7599	vpalignr	ymm13,ymm13,ymm13,12
7600	vpalignr	ymm9,ymm9,ymm9,8
7601	vpalignr	ymm5,ymm5,ymm5,4
7602	add	r10,QWORD[((0+0))+rdi]
7603	adc	r11,QWORD[((8+0))+rdi]
7604	adc	r12,1
7605	mov	rax,QWORD[((0+160+0))+rbp]
7606	mov	r15,rax
7607	mul	r10
7608	mov	r13,rax
7609	mov	r14,rdx
7610	mov	rax,QWORD[((0+160+0))+rbp]
7611	mul	r11
7612	imul	r15,r12
7613	add	r14,rax
7614	adc	r15,rdx
7615	mov	rax,QWORD[((8+160+0))+rbp]
7616	mov	r9,rax
7617	mul	r10
7618	add	r14,rax
7619	adc	rdx,0
7620	mov	r10,rdx
7621	mov	rax,QWORD[((8+160+0))+rbp]
7622	mul	r11
7623	add	r15,rax
7624	adc	rdx,0
7625	imul	r9,r12
7626	add	r15,r10
7627	adc	r9,rdx
7628	mov	r10,r13
7629	mov	r11,r14
7630	mov	r12,r15
7631	and	r12,3
7632	mov	r13,r15
7633	and	r13,-4
7634	mov	r14,r9
7635	shrd	r15,r9,2
7636	shr	r9,2
7637	add	r15,r13
7638	adc	r9,r14
7639	add	r10,r15
7640	adc	r11,r9
7641	adc	r12,0
7642	vpaddd	ymm0,ymm0,ymm4
7643	vpxor	ymm12,ymm12,ymm0
7644	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7645	vpaddd	ymm8,ymm8,ymm12
7646	vpxor	ymm4,ymm4,ymm8
7647	vpsrld	ymm3,ymm4,20
7648	vpslld	ymm4,ymm4,12
7649	vpxor	ymm4,ymm4,ymm3
7650	vpaddd	ymm0,ymm0,ymm4
7651	vpxor	ymm12,ymm12,ymm0
7652	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7653	vpaddd	ymm8,ymm8,ymm12
7654	vpxor	ymm4,ymm4,ymm8
7655	vpslld	ymm3,ymm4,7
7656	vpsrld	ymm4,ymm4,25
7657	vpxor	ymm4,ymm4,ymm3
7658	vpalignr	ymm12,ymm12,ymm12,4
7659	vpalignr	ymm8,ymm8,ymm8,8
7660	vpalignr	ymm4,ymm4,ymm4,12
7661	vpaddd	ymm1,ymm1,ymm5
7662	vpxor	ymm13,ymm13,ymm1
7663	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7664	vpaddd	ymm9,ymm9,ymm13
7665	vpxor	ymm5,ymm5,ymm9
7666	vpsrld	ymm3,ymm5,20
7667	vpslld	ymm5,ymm5,12
7668	vpxor	ymm5,ymm5,ymm3
7669	vpaddd	ymm1,ymm1,ymm5
7670	vpxor	ymm13,ymm13,ymm1
7671	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7672	vpaddd	ymm9,ymm9,ymm13
7673	vpxor	ymm5,ymm5,ymm9
7674	vpslld	ymm3,ymm5,7
7675	vpsrld	ymm5,ymm5,25
7676	vpxor	ymm5,ymm5,ymm3
7677	vpalignr	ymm13,ymm13,ymm13,4
7678	vpalignr	ymm9,ymm9,ymm9,8
7679	vpalignr	ymm5,ymm5,ymm5,12
7680	add	r10,QWORD[((0+16))+rdi]
7681	adc	r11,QWORD[((8+16))+rdi]
7682	adc	r12,1
7683	mov	rax,QWORD[((0+160+0))+rbp]
7684	mov	r15,rax
7685	mul	r10
7686	mov	r13,rax
7687	mov	r14,rdx
7688	mov	rax,QWORD[((0+160+0))+rbp]
7689	mul	r11
7690	imul	r15,r12
7691	add	r14,rax
7692	adc	r15,rdx
7693	mov	rax,QWORD[((8+160+0))+rbp]
7694	mov	r9,rax
7695	mul	r10
7696	add	r14,rax
7697	adc	rdx,0
7698	mov	r10,rdx
7699	mov	rax,QWORD[((8+160+0))+rbp]
7700	mul	r11
7701	add	r15,rax
7702	adc	rdx,0
7703	imul	r9,r12
7704	add	r15,r10
7705	adc	r9,rdx
7706	mov	r10,r13
7707	mov	r11,r14
7708	mov	r12,r15
7709	and	r12,3
7710	mov	r13,r15
7711	and	r13,-4
7712	mov	r14,r9
7713	shrd	r15,r9,2
7714	shr	r9,2
7715	add	r15,r13
7716	adc	r9,r14
7717	add	r10,r15
7718	adc	r11,r9
7719	adc	r12,0
7720
7721	lea	rdi,[32+rdi]
7722	dec	rcx
7723	jg	NEAR $L$seal_avx2_tail_256_rounds_and_3xhash
7724	dec	r8
7725	jge	NEAR $L$seal_avx2_tail_256_rounds_and_2xhash
7726	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
7727	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
7728	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
7729	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
7730	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
7731	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
7732	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
7733	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7734	vperm2i128	ymm3,ymm5,ymm1,0x02
7735	vperm2i128	ymm5,ymm5,ymm1,0x13
7736	vperm2i128	ymm1,ymm13,ymm9,0x02
7737	vperm2i128	ymm9,ymm13,ymm9,0x13
7738	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
7739	vpxor	ymm1,ymm1,YMMWORD[((32+0))+rsi]
7740	vpxor	ymm5,ymm5,YMMWORD[((64+0))+rsi]
7741	vpxor	ymm9,ymm9,YMMWORD[((96+0))+rsi]
7742	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
7743	vmovdqu	YMMWORD[(32+0)+rdi],ymm1
7744	vmovdqu	YMMWORD[(64+0)+rdi],ymm5
7745	vmovdqu	YMMWORD[(96+0)+rdi],ymm9
7746	vperm2i128	ymm3,ymm4,ymm0,0x13
7747	vperm2i128	ymm0,ymm4,ymm0,0x02
7748	vperm2i128	ymm4,ymm12,ymm8,0x02
7749	vperm2i128	ymm12,ymm12,ymm8,0x13
7750	vmovdqa	ymm8,ymm3
7751
7752	mov	rcx,4*32
7753	lea	rsi,[128+rsi]
7754	sub	rbx,4*32
7755	jmp	NEAR $L$seal_avx2_short_hash_remainder
7756
7757$L$seal_avx2_tail_384:
7758	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
7759	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
7760	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
7761	vmovdqa	ymm1,ymm0
7762	vmovdqa	ymm5,ymm4
7763	vmovdqa	ymm9,ymm8
7764	vmovdqa	ymm2,ymm0
7765	vmovdqa	ymm6,ymm4
7766	vmovdqa	ymm10,ymm8
7767	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
7768	vpaddd	ymm14,ymm12,YMMWORD[((160+160))+rbp]
7769	vpaddd	ymm13,ymm12,ymm14
7770	vpaddd	ymm12,ymm12,ymm13
7771	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
7772	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
7773	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
7774
7775$L$seal_avx2_tail_384_rounds_and_3xhash:
7776	add	r10,QWORD[((0+0))+rdi]
7777	adc	r11,QWORD[((8+0))+rdi]
7778	adc	r12,1
7779	mov	rax,QWORD[((0+160+0))+rbp]
7780	mov	r15,rax
7781	mul	r10
7782	mov	r13,rax
7783	mov	r14,rdx
7784	mov	rax,QWORD[((0+160+0))+rbp]
7785	mul	r11
7786	imul	r15,r12
7787	add	r14,rax
7788	adc	r15,rdx
7789	mov	rax,QWORD[((8+160+0))+rbp]
7790	mov	r9,rax
7791	mul	r10
7792	add	r14,rax
7793	adc	rdx,0
7794	mov	r10,rdx
7795	mov	rax,QWORD[((8+160+0))+rbp]
7796	mul	r11
7797	add	r15,rax
7798	adc	rdx,0
7799	imul	r9,r12
7800	add	r15,r10
7801	adc	r9,rdx
7802	mov	r10,r13
7803	mov	r11,r14
7804	mov	r12,r15
7805	and	r12,3
7806	mov	r13,r15
7807	and	r13,-4
7808	mov	r14,r9
7809	shrd	r15,r9,2
7810	shr	r9,2
7811	add	r15,r13
7812	adc	r9,r14
7813	add	r10,r15
7814	adc	r11,r9
7815	adc	r12,0
7816
7817	lea	rdi,[16+rdi]
7818$L$seal_avx2_tail_384_rounds_and_2xhash:
7819	vpaddd	ymm0,ymm0,ymm4
7820	vpxor	ymm12,ymm12,ymm0
7821	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7822	vpaddd	ymm8,ymm8,ymm12
7823	vpxor	ymm4,ymm4,ymm8
7824	vpsrld	ymm3,ymm4,20
7825	vpslld	ymm4,ymm4,12
7826	vpxor	ymm4,ymm4,ymm3
7827	vpaddd	ymm0,ymm0,ymm4
7828	vpxor	ymm12,ymm12,ymm0
7829	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7830	vpaddd	ymm8,ymm8,ymm12
7831	vpxor	ymm4,ymm4,ymm8
7832	vpslld	ymm3,ymm4,7
7833	vpsrld	ymm4,ymm4,25
7834	vpxor	ymm4,ymm4,ymm3
7835	vpalignr	ymm12,ymm12,ymm12,12
7836	vpalignr	ymm8,ymm8,ymm8,8
7837	vpalignr	ymm4,ymm4,ymm4,4
7838	vpaddd	ymm1,ymm1,ymm5
7839	vpxor	ymm13,ymm13,ymm1
7840	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7841	vpaddd	ymm9,ymm9,ymm13
7842	vpxor	ymm5,ymm5,ymm9
7843	vpsrld	ymm3,ymm5,20
7844	vpslld	ymm5,ymm5,12
7845	vpxor	ymm5,ymm5,ymm3
7846	vpaddd	ymm1,ymm1,ymm5
7847	vpxor	ymm13,ymm13,ymm1
7848	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7849	vpaddd	ymm9,ymm9,ymm13
7850	vpxor	ymm5,ymm5,ymm9
7851	vpslld	ymm3,ymm5,7
7852	vpsrld	ymm5,ymm5,25
7853	vpxor	ymm5,ymm5,ymm3
7854	vpalignr	ymm13,ymm13,ymm13,12
7855	vpalignr	ymm9,ymm9,ymm9,8
7856	vpalignr	ymm5,ymm5,ymm5,4
7857	add	r10,QWORD[((0+0))+rdi]
7858	adc	r11,QWORD[((8+0))+rdi]
7859	adc	r12,1
7860	mov	rax,QWORD[((0+160+0))+rbp]
7861	mov	r15,rax
7862	mul	r10
7863	mov	r13,rax
7864	mov	r14,rdx
7865	mov	rax,QWORD[((0+160+0))+rbp]
7866	mul	r11
7867	imul	r15,r12
7868	add	r14,rax
7869	adc	r15,rdx
7870	mov	rax,QWORD[((8+160+0))+rbp]
7871	mov	r9,rax
7872	mul	r10
7873	add	r14,rax
7874	adc	rdx,0
7875	mov	r10,rdx
7876	mov	rax,QWORD[((8+160+0))+rbp]
7877	mul	r11
7878	add	r15,rax
7879	adc	rdx,0
7880	imul	r9,r12
7881	add	r15,r10
7882	adc	r9,rdx
7883	mov	r10,r13
7884	mov	r11,r14
7885	mov	r12,r15
7886	and	r12,3
7887	mov	r13,r15
7888	and	r13,-4
7889	mov	r14,r9
7890	shrd	r15,r9,2
7891	shr	r9,2
7892	add	r15,r13
7893	adc	r9,r14
7894	add	r10,r15
7895	adc	r11,r9
7896	adc	r12,0
7897	vpaddd	ymm2,ymm2,ymm6
7898	vpxor	ymm14,ymm14,ymm2
7899	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
7900	vpaddd	ymm10,ymm10,ymm14
7901	vpxor	ymm6,ymm6,ymm10
7902	vpsrld	ymm3,ymm6,20
7903	vpslld	ymm6,ymm6,12
7904	vpxor	ymm6,ymm6,ymm3
7905	vpaddd	ymm2,ymm2,ymm6
7906	vpxor	ymm14,ymm14,ymm2
7907	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
7908	vpaddd	ymm10,ymm10,ymm14
7909	vpxor	ymm6,ymm6,ymm10
7910	vpslld	ymm3,ymm6,7
7911	vpsrld	ymm6,ymm6,25
7912	vpxor	ymm6,ymm6,ymm3
7913	vpalignr	ymm14,ymm14,ymm14,12
7914	vpalignr	ymm10,ymm10,ymm10,8
7915	vpalignr	ymm6,ymm6,ymm6,4
7916	vpaddd	ymm0,ymm0,ymm4
7917	vpxor	ymm12,ymm12,ymm0
7918	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7919	vpaddd	ymm8,ymm8,ymm12
7920	vpxor	ymm4,ymm4,ymm8
7921	vpsrld	ymm3,ymm4,20
7922	vpslld	ymm4,ymm4,12
7923	vpxor	ymm4,ymm4,ymm3
7924	vpaddd	ymm0,ymm0,ymm4
7925	vpxor	ymm12,ymm12,ymm0
7926	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7927	vpaddd	ymm8,ymm8,ymm12
7928	vpxor	ymm4,ymm4,ymm8
7929	vpslld	ymm3,ymm4,7
7930	vpsrld	ymm4,ymm4,25
7931	vpxor	ymm4,ymm4,ymm3
7932	vpalignr	ymm12,ymm12,ymm12,4
7933	vpalignr	ymm8,ymm8,ymm8,8
7934	vpalignr	ymm4,ymm4,ymm4,12
7935	add	r10,QWORD[((0+16))+rdi]
7936	adc	r11,QWORD[((8+16))+rdi]
7937	adc	r12,1
7938	mov	rax,QWORD[((0+160+0))+rbp]
7939	mov	r15,rax
7940	mul	r10
7941	mov	r13,rax
7942	mov	r14,rdx
7943	mov	rax,QWORD[((0+160+0))+rbp]
7944	mul	r11
7945	imul	r15,r12
7946	add	r14,rax
7947	adc	r15,rdx
7948	mov	rax,QWORD[((8+160+0))+rbp]
7949	mov	r9,rax
7950	mul	r10
7951	add	r14,rax
7952	adc	rdx,0
7953	mov	r10,rdx
7954	mov	rax,QWORD[((8+160+0))+rbp]
7955	mul	r11
7956	add	r15,rax
7957	adc	rdx,0
7958	imul	r9,r12
7959	add	r15,r10
7960	adc	r9,rdx
7961	mov	r10,r13
7962	mov	r11,r14
7963	mov	r12,r15
7964	and	r12,3
7965	mov	r13,r15
7966	and	r13,-4
7967	mov	r14,r9
7968	shrd	r15,r9,2
7969	shr	r9,2
7970	add	r15,r13
7971	adc	r9,r14
7972	add	r10,r15
7973	adc	r11,r9
7974	adc	r12,0
7975	vpaddd	ymm1,ymm1,ymm5
7976	vpxor	ymm13,ymm13,ymm1
7977	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7978	vpaddd	ymm9,ymm9,ymm13
7979	vpxor	ymm5,ymm5,ymm9
7980	vpsrld	ymm3,ymm5,20
7981	vpslld	ymm5,ymm5,12
7982	vpxor	ymm5,ymm5,ymm3
7983	vpaddd	ymm1,ymm1,ymm5
7984	vpxor	ymm13,ymm13,ymm1
7985	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7986	vpaddd	ymm9,ymm9,ymm13
7987	vpxor	ymm5,ymm5,ymm9
7988	vpslld	ymm3,ymm5,7
7989	vpsrld	ymm5,ymm5,25
7990	vpxor	ymm5,ymm5,ymm3
7991	vpalignr	ymm13,ymm13,ymm13,4
7992	vpalignr	ymm9,ymm9,ymm9,8
7993	vpalignr	ymm5,ymm5,ymm5,12
7994	vpaddd	ymm2,ymm2,ymm6
7995	vpxor	ymm14,ymm14,ymm2
7996	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
7997	vpaddd	ymm10,ymm10,ymm14
7998	vpxor	ymm6,ymm6,ymm10
7999	vpsrld	ymm3,ymm6,20
8000	vpslld	ymm6,ymm6,12
8001	vpxor	ymm6,ymm6,ymm3
8002	vpaddd	ymm2,ymm2,ymm6
8003	vpxor	ymm14,ymm14,ymm2
8004	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
8005	vpaddd	ymm10,ymm10,ymm14
8006	vpxor	ymm6,ymm6,ymm10
8007	vpslld	ymm3,ymm6,7
8008	vpsrld	ymm6,ymm6,25
8009	vpxor	ymm6,ymm6,ymm3
8010	vpalignr	ymm14,ymm14,ymm14,4
8011	vpalignr	ymm10,ymm10,ymm10,8
8012	vpalignr	ymm6,ymm6,ymm6,12
8013
8014	lea	rdi,[32+rdi]
8015	dec	rcx
8016	jg	NEAR $L$seal_avx2_tail_384_rounds_and_3xhash
8017	dec	r8
8018	jge	NEAR $L$seal_avx2_tail_384_rounds_and_2xhash
8019	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
8020	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
8021	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
8022	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
8023	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
8024	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
8025	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
8026	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
8027	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
8028	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
8029	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
8030	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
8031	vperm2i128	ymm3,ymm6,ymm2,0x02
8032	vperm2i128	ymm6,ymm6,ymm2,0x13
8033	vperm2i128	ymm2,ymm14,ymm10,0x02
8034	vperm2i128	ymm10,ymm14,ymm10,0x13
8035	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
8036	vpxor	ymm2,ymm2,YMMWORD[((32+0))+rsi]
8037	vpxor	ymm6,ymm6,YMMWORD[((64+0))+rsi]
8038	vpxor	ymm10,ymm10,YMMWORD[((96+0))+rsi]
8039	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
8040	vmovdqu	YMMWORD[(32+0)+rdi],ymm2
8041	vmovdqu	YMMWORD[(64+0)+rdi],ymm6
8042	vmovdqu	YMMWORD[(96+0)+rdi],ymm10
8043	vperm2i128	ymm3,ymm5,ymm1,0x02
8044	vperm2i128	ymm5,ymm5,ymm1,0x13
8045	vperm2i128	ymm1,ymm13,ymm9,0x02
8046	vperm2i128	ymm9,ymm13,ymm9,0x13
8047	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
8048	vpxor	ymm1,ymm1,YMMWORD[((32+128))+rsi]
8049	vpxor	ymm5,ymm5,YMMWORD[((64+128))+rsi]
8050	vpxor	ymm9,ymm9,YMMWORD[((96+128))+rsi]
8051	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
8052	vmovdqu	YMMWORD[(32+128)+rdi],ymm1
8053	vmovdqu	YMMWORD[(64+128)+rdi],ymm5
8054	vmovdqu	YMMWORD[(96+128)+rdi],ymm9
8055	vperm2i128	ymm3,ymm4,ymm0,0x13
8056	vperm2i128	ymm0,ymm4,ymm0,0x02
8057	vperm2i128	ymm4,ymm12,ymm8,0x02
8058	vperm2i128	ymm12,ymm12,ymm8,0x13
8059	vmovdqa	ymm8,ymm3
8060
8061	mov	rcx,8*32
8062	lea	rsi,[256+rsi]
8063	sub	rbx,8*32
8064	jmp	NEAR $L$seal_avx2_short_hash_remainder
8065
8066$L$seal_avx2_tail_512:
8067	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
8068	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
8069	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
8070	vmovdqa	ymm1,ymm0
8071	vmovdqa	ymm5,ymm4
8072	vmovdqa	ymm9,ymm8
8073	vmovdqa	ymm2,ymm0
8074	vmovdqa	ymm6,ymm4
8075	vmovdqa	ymm10,ymm8
8076	vmovdqa	ymm3,ymm0
8077	vmovdqa	ymm7,ymm4
8078	vmovdqa	ymm11,ymm8
8079	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
8080	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
8081	vpaddd	ymm14,ymm12,ymm15
8082	vpaddd	ymm13,ymm12,ymm14
8083	vpaddd	ymm12,ymm12,ymm13
8084	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
8085	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
8086	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
8087	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
8088
8089$L$seal_avx2_tail_512_rounds_and_3xhash:
8090	add	r10,QWORD[((0+0))+rdi]
8091	adc	r11,QWORD[((8+0))+rdi]
8092	adc	r12,1
8093	mov	rdx,QWORD[((0+160+0))+rbp]
8094	mov	r15,rdx
8095	mulx	r14,r13,r10
8096	mulx	rdx,rax,r11
8097	imul	r15,r12
8098	add	r14,rax
8099	adc	r15,rdx
8100	mov	rdx,QWORD[((8+160+0))+rbp]
8101	mulx	rax,r10,r10
8102	add	r14,r10
8103	mulx	r9,r11,r11
8104	adc	r15,r11
8105	adc	r9,0
8106	imul	rdx,r12
8107	add	r15,rax
8108	adc	r9,rdx
8109	mov	r10,r13
8110	mov	r11,r14
8111	mov	r12,r15
8112	and	r12,3
8113	mov	r13,r15
8114	and	r13,-4
8115	mov	r14,r9
8116	shrd	r15,r9,2
8117	shr	r9,2
8118	add	r15,r13
8119	adc	r9,r14
8120	add	r10,r15
8121	adc	r11,r9
8122	adc	r12,0
8123
8124	lea	rdi,[16+rdi]
8125$L$seal_avx2_tail_512_rounds_and_2xhash:
8126	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8127	vmovdqa	ymm8,YMMWORD[$L$rol16]
8128	vpaddd	ymm3,ymm3,ymm7
8129	vpaddd	ymm2,ymm2,ymm6
8130	vpaddd	ymm1,ymm1,ymm5
8131	vpaddd	ymm0,ymm0,ymm4
8132	vpxor	ymm15,ymm15,ymm3
8133	vpxor	ymm14,ymm14,ymm2
8134	vpxor	ymm13,ymm13,ymm1
8135	vpxor	ymm12,ymm12,ymm0
8136	vpshufb	ymm15,ymm15,ymm8
8137	vpshufb	ymm14,ymm14,ymm8
8138	vpshufb	ymm13,ymm13,ymm8
8139	vpshufb	ymm12,ymm12,ymm8
8140	vpaddd	ymm11,ymm11,ymm15
8141	vpaddd	ymm10,ymm10,ymm14
8142	vpaddd	ymm9,ymm9,ymm13
8143	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8144	vpxor	ymm7,ymm7,ymm11
8145	vpxor	ymm6,ymm6,ymm10
8146	add	r10,QWORD[((0+0))+rdi]
8147	adc	r11,QWORD[((8+0))+rdi]
8148	adc	r12,1
8149	vpxor	ymm5,ymm5,ymm9
8150	vpxor	ymm4,ymm4,ymm8
8151	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8152	vpsrld	ymm8,ymm7,20
8153	vpslld	ymm7,ymm7,32-20
8154	vpxor	ymm7,ymm7,ymm8
8155	vpsrld	ymm8,ymm6,20
8156	vpslld	ymm6,ymm6,32-20
8157	vpxor	ymm6,ymm6,ymm8
8158	vpsrld	ymm8,ymm5,20
8159	vpslld	ymm5,ymm5,32-20
8160	vpxor	ymm5,ymm5,ymm8
8161	vpsrld	ymm8,ymm4,20
8162	vpslld	ymm4,ymm4,32-20
8163	vpxor	ymm4,ymm4,ymm8
8164	vmovdqa	ymm8,YMMWORD[$L$rol8]
8165	vpaddd	ymm3,ymm3,ymm7
8166	vpaddd	ymm2,ymm2,ymm6
8167	vpaddd	ymm1,ymm1,ymm5
8168	vpaddd	ymm0,ymm0,ymm4
8169	mov	rdx,QWORD[((0+160+0))+rbp]
8170	mov	r15,rdx
8171	mulx	r14,r13,r10
8172	mulx	rdx,rax,r11
8173	imul	r15,r12
8174	add	r14,rax
8175	adc	r15,rdx
8176	vpxor	ymm15,ymm15,ymm3
8177	vpxor	ymm14,ymm14,ymm2
8178	vpxor	ymm13,ymm13,ymm1
8179	vpxor	ymm12,ymm12,ymm0
8180	vpshufb	ymm15,ymm15,ymm8
8181	vpshufb	ymm14,ymm14,ymm8
8182	vpshufb	ymm13,ymm13,ymm8
8183	vpshufb	ymm12,ymm12,ymm8
8184	vpaddd	ymm11,ymm11,ymm15
8185	vpaddd	ymm10,ymm10,ymm14
8186	vpaddd	ymm9,ymm9,ymm13
8187	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8188	vpxor	ymm7,ymm7,ymm11
8189	vpxor	ymm6,ymm6,ymm10
8190	vpxor	ymm5,ymm5,ymm9
8191	vpxor	ymm4,ymm4,ymm8
8192	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8193	vpsrld	ymm8,ymm7,25
8194	vpslld	ymm7,ymm7,32-25
8195	vpxor	ymm7,ymm7,ymm8
8196	mov	rdx,QWORD[((8+160+0))+rbp]
8197	mulx	rax,r10,r10
8198	add	r14,r10
8199	mulx	r9,r11,r11
8200	adc	r15,r11
8201	adc	r9,0
8202	imul	rdx,r12
8203	vpsrld	ymm8,ymm6,25
8204	vpslld	ymm6,ymm6,32-25
8205	vpxor	ymm6,ymm6,ymm8
8206	vpsrld	ymm8,ymm5,25
8207	vpslld	ymm5,ymm5,32-25
8208	vpxor	ymm5,ymm5,ymm8
8209	vpsrld	ymm8,ymm4,25
8210	vpslld	ymm4,ymm4,32-25
8211	vpxor	ymm4,ymm4,ymm8
8212	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
8213	vpalignr	ymm7,ymm7,ymm7,4
8214	vpalignr	ymm11,ymm11,ymm11,8
8215	vpalignr	ymm15,ymm15,ymm15,12
8216	vpalignr	ymm6,ymm6,ymm6,4
8217	vpalignr	ymm10,ymm10,ymm10,8
8218	vpalignr	ymm14,ymm14,ymm14,12
8219	vpalignr	ymm5,ymm5,ymm5,4
8220	vpalignr	ymm9,ymm9,ymm9,8
8221	vpalignr	ymm13,ymm13,ymm13,12
8222	vpalignr	ymm4,ymm4,ymm4,4
8223	add	r15,rax
8224	adc	r9,rdx
8225	vpalignr	ymm8,ymm8,ymm8,8
8226	vpalignr	ymm12,ymm12,ymm12,12
8227	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8228	vmovdqa	ymm8,YMMWORD[$L$rol16]
8229	vpaddd	ymm3,ymm3,ymm7
8230	vpaddd	ymm2,ymm2,ymm6
8231	vpaddd	ymm1,ymm1,ymm5
8232	vpaddd	ymm0,ymm0,ymm4
8233	vpxor	ymm15,ymm15,ymm3
8234	vpxor	ymm14,ymm14,ymm2
8235	vpxor	ymm13,ymm13,ymm1
8236	vpxor	ymm12,ymm12,ymm0
8237	vpshufb	ymm15,ymm15,ymm8
8238	vpshufb	ymm14,ymm14,ymm8
8239	vpshufb	ymm13,ymm13,ymm8
8240	vpshufb	ymm12,ymm12,ymm8
8241	vpaddd	ymm11,ymm11,ymm15
8242	vpaddd	ymm10,ymm10,ymm14
8243	vpaddd	ymm9,ymm9,ymm13
8244	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8245	mov	r10,r13
8246	mov	r11,r14
8247	mov	r12,r15
8248	and	r12,3
8249	mov	r13,r15
8250	and	r13,-4
8251	mov	r14,r9
8252	shrd	r15,r9,2
8253	shr	r9,2
8254	add	r15,r13
8255	adc	r9,r14
8256	add	r10,r15
8257	adc	r11,r9
8258	adc	r12,0
8259	vpxor	ymm7,ymm7,ymm11
8260	vpxor	ymm6,ymm6,ymm10
8261	vpxor	ymm5,ymm5,ymm9
8262	vpxor	ymm4,ymm4,ymm8
8263	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8264	vpsrld	ymm8,ymm7,20
8265	vpslld	ymm7,ymm7,32-20
8266	vpxor	ymm7,ymm7,ymm8
8267	vpsrld	ymm8,ymm6,20
8268	vpslld	ymm6,ymm6,32-20
8269	vpxor	ymm6,ymm6,ymm8
8270	vpsrld	ymm8,ymm5,20
8271	vpslld	ymm5,ymm5,32-20
8272	vpxor	ymm5,ymm5,ymm8
8273	vpsrld	ymm8,ymm4,20
8274	vpslld	ymm4,ymm4,32-20
8275	vpxor	ymm4,ymm4,ymm8
8276	vmovdqa	ymm8,YMMWORD[$L$rol8]
8277	vpaddd	ymm3,ymm3,ymm7
8278	vpaddd	ymm2,ymm2,ymm6
8279	add	r10,QWORD[((0+16))+rdi]
8280	adc	r11,QWORD[((8+16))+rdi]
8281	adc	r12,1
8282	vpaddd	ymm1,ymm1,ymm5
8283	vpaddd	ymm0,ymm0,ymm4
8284	vpxor	ymm15,ymm15,ymm3
8285	vpxor	ymm14,ymm14,ymm2
8286	vpxor	ymm13,ymm13,ymm1
8287	vpxor	ymm12,ymm12,ymm0
8288	vpshufb	ymm15,ymm15,ymm8
8289	vpshufb	ymm14,ymm14,ymm8
8290	vpshufb	ymm13,ymm13,ymm8
8291	vpshufb	ymm12,ymm12,ymm8
8292	vpaddd	ymm11,ymm11,ymm15
8293	vpaddd	ymm10,ymm10,ymm14
8294	vpaddd	ymm9,ymm9,ymm13
8295	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8296	vpxor	ymm7,ymm7,ymm11
8297	vpxor	ymm6,ymm6,ymm10
8298	vpxor	ymm5,ymm5,ymm9
8299	vpxor	ymm4,ymm4,ymm8
8300	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8301	vpsrld	ymm8,ymm7,25
8302	mov	rdx,QWORD[((0+160+0))+rbp]
8303	mov	r15,rdx
8304	mulx	r14,r13,r10
8305	mulx	rdx,rax,r11
8306	imul	r15,r12
8307	add	r14,rax
8308	adc	r15,rdx
8309	vpslld	ymm7,ymm7,32-25
8310	vpxor	ymm7,ymm7,ymm8
8311	vpsrld	ymm8,ymm6,25
8312	vpslld	ymm6,ymm6,32-25
8313	vpxor	ymm6,ymm6,ymm8
8314	vpsrld	ymm8,ymm5,25
8315	vpslld	ymm5,ymm5,32-25
8316	vpxor	ymm5,ymm5,ymm8
8317	vpsrld	ymm8,ymm4,25
8318	vpslld	ymm4,ymm4,32-25
8319	vpxor	ymm4,ymm4,ymm8
8320	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
8321	vpalignr	ymm7,ymm7,ymm7,12
8322	vpalignr	ymm11,ymm11,ymm11,8
8323	vpalignr	ymm15,ymm15,ymm15,4
8324	vpalignr	ymm6,ymm6,ymm6,12
8325	vpalignr	ymm10,ymm10,ymm10,8
8326	vpalignr	ymm14,ymm14,ymm14,4
8327	vpalignr	ymm5,ymm5,ymm5,12
8328	vpalignr	ymm9,ymm9,ymm9,8
8329	mov	rdx,QWORD[((8+160+0))+rbp]
8330	mulx	rax,r10,r10
8331	add	r14,r10
8332	mulx	r9,r11,r11
8333	adc	r15,r11
8334	adc	r9,0
8335	imul	rdx,r12
8336	vpalignr	ymm13,ymm13,ymm13,4
8337	vpalignr	ymm4,ymm4,ymm4,12
8338	vpalignr	ymm8,ymm8,ymm8,8
8339	vpalignr	ymm12,ymm12,ymm12,4
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356	add	r15,rax
8357	adc	r9,rdx
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376
8377
8378	mov	r10,r13
8379	mov	r11,r14
8380	mov	r12,r15
8381	and	r12,3
8382	mov	r13,r15
8383	and	r13,-4
8384	mov	r14,r9
8385	shrd	r15,r9,2
8386	shr	r9,2
8387	add	r15,r13
8388	adc	r9,r14
8389	add	r10,r15
8390	adc	r11,r9
8391	adc	r12,0
8392
8393	lea	rdi,[32+rdi]
8394	dec	rcx
8395	jg	NEAR $L$seal_avx2_tail_512_rounds_and_3xhash
8396	dec	r8
8397	jge	NEAR $L$seal_avx2_tail_512_rounds_and_2xhash
8398	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
8399	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
8400	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
8401	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
8402	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
8403	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
8404	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
8405	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
8406	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
8407	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
8408	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
8409	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
8410	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
8411	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
8412	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
8413	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
8414
8415	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
8416	vperm2i128	ymm0,ymm7,ymm3,0x02
8417	vperm2i128	ymm7,ymm7,ymm3,0x13
8418	vperm2i128	ymm3,ymm15,ymm11,0x02
8419	vperm2i128	ymm11,ymm15,ymm11,0x13
8420	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
8421	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
8422	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
8423	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
8424	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
8425	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
8426	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
8427	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
8428
8429	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
8430	vperm2i128	ymm3,ymm6,ymm2,0x02
8431	vperm2i128	ymm6,ymm6,ymm2,0x13
8432	vperm2i128	ymm2,ymm14,ymm10,0x02
8433	vperm2i128	ymm10,ymm14,ymm10,0x13
8434	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
8435	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
8436	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
8437	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
8438	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
8439	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
8440	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
8441	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
8442	vperm2i128	ymm3,ymm5,ymm1,0x02
8443	vperm2i128	ymm5,ymm5,ymm1,0x13
8444	vperm2i128	ymm1,ymm13,ymm9,0x02
8445	vperm2i128	ymm9,ymm13,ymm9,0x13
8446	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
8447	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
8448	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
8449	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
8450	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
8451	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
8452	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
8453	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
8454	vperm2i128	ymm3,ymm4,ymm0,0x13
8455	vperm2i128	ymm0,ymm4,ymm0,0x02
8456	vperm2i128	ymm4,ymm12,ymm8,0x02
8457	vperm2i128	ymm12,ymm12,ymm8,0x13
8458	vmovdqa	ymm8,ymm3
8459
8460	mov	rcx,12*32
8461	lea	rsi,[384+rsi]
8462	sub	rbx,12*32
8463	jmp	NEAR $L$seal_avx2_short_hash_remainder
8464
8465$L$seal_avx2_320:
8466	vmovdqa	ymm1,ymm0
8467	vmovdqa	ymm2,ymm0
8468	vmovdqa	ymm5,ymm4
8469	vmovdqa	ymm6,ymm4
8470	vmovdqa	ymm9,ymm8
8471	vmovdqa	ymm10,ymm8
8472	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
8473	vpaddd	ymm14,ymm13,YMMWORD[$L$avx2_inc]
8474	vmovdqa	ymm7,ymm4
8475	vmovdqa	ymm11,ymm8
8476	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
8477	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
8478	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
8479	mov	r10,10
8480$L$seal_avx2_320_rounds:
8481	vpaddd	ymm0,ymm0,ymm4
8482	vpxor	ymm12,ymm12,ymm0
8483	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8484	vpaddd	ymm8,ymm8,ymm12
8485	vpxor	ymm4,ymm4,ymm8
8486	vpsrld	ymm3,ymm4,20
8487	vpslld	ymm4,ymm4,12
8488	vpxor	ymm4,ymm4,ymm3
8489	vpaddd	ymm0,ymm0,ymm4
8490	vpxor	ymm12,ymm12,ymm0
8491	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8492	vpaddd	ymm8,ymm8,ymm12
8493	vpxor	ymm4,ymm4,ymm8
8494	vpslld	ymm3,ymm4,7
8495	vpsrld	ymm4,ymm4,25
8496	vpxor	ymm4,ymm4,ymm3
8497	vpalignr	ymm12,ymm12,ymm12,12
8498	vpalignr	ymm8,ymm8,ymm8,8
8499	vpalignr	ymm4,ymm4,ymm4,4
8500	vpaddd	ymm1,ymm1,ymm5
8501	vpxor	ymm13,ymm13,ymm1
8502	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8503	vpaddd	ymm9,ymm9,ymm13
8504	vpxor	ymm5,ymm5,ymm9
8505	vpsrld	ymm3,ymm5,20
8506	vpslld	ymm5,ymm5,12
8507	vpxor	ymm5,ymm5,ymm3
8508	vpaddd	ymm1,ymm1,ymm5
8509	vpxor	ymm13,ymm13,ymm1
8510	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8511	vpaddd	ymm9,ymm9,ymm13
8512	vpxor	ymm5,ymm5,ymm9
8513	vpslld	ymm3,ymm5,7
8514	vpsrld	ymm5,ymm5,25
8515	vpxor	ymm5,ymm5,ymm3
8516	vpalignr	ymm13,ymm13,ymm13,12
8517	vpalignr	ymm9,ymm9,ymm9,8
8518	vpalignr	ymm5,ymm5,ymm5,4
8519	vpaddd	ymm2,ymm2,ymm6
8520	vpxor	ymm14,ymm14,ymm2
8521	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
8522	vpaddd	ymm10,ymm10,ymm14
8523	vpxor	ymm6,ymm6,ymm10
8524	vpsrld	ymm3,ymm6,20
8525	vpslld	ymm6,ymm6,12
8526	vpxor	ymm6,ymm6,ymm3
8527	vpaddd	ymm2,ymm2,ymm6
8528	vpxor	ymm14,ymm14,ymm2
8529	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
8530	vpaddd	ymm10,ymm10,ymm14
8531	vpxor	ymm6,ymm6,ymm10
8532	vpslld	ymm3,ymm6,7
8533	vpsrld	ymm6,ymm6,25
8534	vpxor	ymm6,ymm6,ymm3
8535	vpalignr	ymm14,ymm14,ymm14,12
8536	vpalignr	ymm10,ymm10,ymm10,8
8537	vpalignr	ymm6,ymm6,ymm6,4
8538	vpaddd	ymm0,ymm0,ymm4
8539	vpxor	ymm12,ymm12,ymm0
8540	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8541	vpaddd	ymm8,ymm8,ymm12
8542	vpxor	ymm4,ymm4,ymm8
8543	vpsrld	ymm3,ymm4,20
8544	vpslld	ymm4,ymm4,12
8545	vpxor	ymm4,ymm4,ymm3
8546	vpaddd	ymm0,ymm0,ymm4
8547	vpxor	ymm12,ymm12,ymm0
8548	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8549	vpaddd	ymm8,ymm8,ymm12
8550	vpxor	ymm4,ymm4,ymm8
8551	vpslld	ymm3,ymm4,7
8552	vpsrld	ymm4,ymm4,25
8553	vpxor	ymm4,ymm4,ymm3
8554	vpalignr	ymm12,ymm12,ymm12,4
8555	vpalignr	ymm8,ymm8,ymm8,8
8556	vpalignr	ymm4,ymm4,ymm4,12
8557	vpaddd	ymm1,ymm1,ymm5
8558	vpxor	ymm13,ymm13,ymm1
8559	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8560	vpaddd	ymm9,ymm9,ymm13
8561	vpxor	ymm5,ymm5,ymm9
8562	vpsrld	ymm3,ymm5,20
8563	vpslld	ymm5,ymm5,12
8564	vpxor	ymm5,ymm5,ymm3
8565	vpaddd	ymm1,ymm1,ymm5
8566	vpxor	ymm13,ymm13,ymm1
8567	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8568	vpaddd	ymm9,ymm9,ymm13
8569	vpxor	ymm5,ymm5,ymm9
8570	vpslld	ymm3,ymm5,7
8571	vpsrld	ymm5,ymm5,25
8572	vpxor	ymm5,ymm5,ymm3
8573	vpalignr	ymm13,ymm13,ymm13,4
8574	vpalignr	ymm9,ymm9,ymm9,8
8575	vpalignr	ymm5,ymm5,ymm5,12
8576	vpaddd	ymm2,ymm2,ymm6
8577	vpxor	ymm14,ymm14,ymm2
8578	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
8579	vpaddd	ymm10,ymm10,ymm14
8580	vpxor	ymm6,ymm6,ymm10
8581	vpsrld	ymm3,ymm6,20
8582	vpslld	ymm6,ymm6,12
8583	vpxor	ymm6,ymm6,ymm3
8584	vpaddd	ymm2,ymm2,ymm6
8585	vpxor	ymm14,ymm14,ymm2
8586	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
8587	vpaddd	ymm10,ymm10,ymm14
8588	vpxor	ymm6,ymm6,ymm10
8589	vpslld	ymm3,ymm6,7
8590	vpsrld	ymm6,ymm6,25
8591	vpxor	ymm6,ymm6,ymm3
8592	vpalignr	ymm14,ymm14,ymm14,4
8593	vpalignr	ymm10,ymm10,ymm10,8
8594	vpalignr	ymm6,ymm6,ymm6,12
8595
8596	dec	r10
8597	jne	NEAR $L$seal_avx2_320_rounds
8598	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
8599	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
8600	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
8601	vpaddd	ymm4,ymm4,ymm7
8602	vpaddd	ymm5,ymm5,ymm7
8603	vpaddd	ymm6,ymm6,ymm7
8604	vpaddd	ymm8,ymm8,ymm11
8605	vpaddd	ymm9,ymm9,ymm11
8606	vpaddd	ymm10,ymm10,ymm11
8607	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
8608	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
8609	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
8610	vperm2i128	ymm3,ymm4,ymm0,0x02
8611
8612	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
8613	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
8614
8615	vperm2i128	ymm0,ymm4,ymm0,0x13
8616	vperm2i128	ymm4,ymm12,ymm8,0x13
8617	vperm2i128	ymm8,ymm5,ymm1,0x02
8618	vperm2i128	ymm12,ymm13,ymm9,0x02
8619	vperm2i128	ymm1,ymm5,ymm1,0x13
8620	vperm2i128	ymm5,ymm13,ymm9,0x13
8621	vperm2i128	ymm9,ymm6,ymm2,0x02
8622	vperm2i128	ymm13,ymm14,ymm10,0x02
8623	vperm2i128	ymm2,ymm6,ymm2,0x13
8624	vperm2i128	ymm6,ymm14,ymm10,0x13
8625	jmp	NEAR $L$seal_avx2_short
8626
8627$L$seal_avx2_192:
8628	vmovdqa	ymm1,ymm0
8629	vmovdqa	ymm2,ymm0
8630	vmovdqa	ymm5,ymm4
8631	vmovdqa	ymm6,ymm4
8632	vmovdqa	ymm9,ymm8
8633	vmovdqa	ymm10,ymm8
8634	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
8635	vmovdqa	ymm11,ymm12
8636	vmovdqa	ymm15,ymm13
8637	mov	r10,10
8638$L$seal_avx2_192_rounds:
8639	vpaddd	ymm0,ymm0,ymm4
8640	vpxor	ymm12,ymm12,ymm0
8641	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8642	vpaddd	ymm8,ymm8,ymm12
8643	vpxor	ymm4,ymm4,ymm8
8644	vpsrld	ymm3,ymm4,20
8645	vpslld	ymm4,ymm4,12
8646	vpxor	ymm4,ymm4,ymm3
8647	vpaddd	ymm0,ymm0,ymm4
8648	vpxor	ymm12,ymm12,ymm0
8649	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8650	vpaddd	ymm8,ymm8,ymm12
8651	vpxor	ymm4,ymm4,ymm8
8652	vpslld	ymm3,ymm4,7
8653	vpsrld	ymm4,ymm4,25
8654	vpxor	ymm4,ymm4,ymm3
8655	vpalignr	ymm12,ymm12,ymm12,12
8656	vpalignr	ymm8,ymm8,ymm8,8
8657	vpalignr	ymm4,ymm4,ymm4,4
8658	vpaddd	ymm1,ymm1,ymm5
8659	vpxor	ymm13,ymm13,ymm1
8660	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8661	vpaddd	ymm9,ymm9,ymm13
8662	vpxor	ymm5,ymm5,ymm9
8663	vpsrld	ymm3,ymm5,20
8664	vpslld	ymm5,ymm5,12
8665	vpxor	ymm5,ymm5,ymm3
8666	vpaddd	ymm1,ymm1,ymm5
8667	vpxor	ymm13,ymm13,ymm1
8668	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8669	vpaddd	ymm9,ymm9,ymm13
8670	vpxor	ymm5,ymm5,ymm9
8671	vpslld	ymm3,ymm5,7
8672	vpsrld	ymm5,ymm5,25
8673	vpxor	ymm5,ymm5,ymm3
8674	vpalignr	ymm13,ymm13,ymm13,12
8675	vpalignr	ymm9,ymm9,ymm9,8
8676	vpalignr	ymm5,ymm5,ymm5,4
8677	vpaddd	ymm0,ymm0,ymm4
8678	vpxor	ymm12,ymm12,ymm0
8679	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8680	vpaddd	ymm8,ymm8,ymm12
8681	vpxor	ymm4,ymm4,ymm8
8682	vpsrld	ymm3,ymm4,20
8683	vpslld	ymm4,ymm4,12
8684	vpxor	ymm4,ymm4,ymm3
8685	vpaddd	ymm0,ymm0,ymm4
8686	vpxor	ymm12,ymm12,ymm0
8687	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8688	vpaddd	ymm8,ymm8,ymm12
8689	vpxor	ymm4,ymm4,ymm8
8690	vpslld	ymm3,ymm4,7
8691	vpsrld	ymm4,ymm4,25
8692	vpxor	ymm4,ymm4,ymm3
8693	vpalignr	ymm12,ymm12,ymm12,4
8694	vpalignr	ymm8,ymm8,ymm8,8
8695	vpalignr	ymm4,ymm4,ymm4,12
8696	vpaddd	ymm1,ymm1,ymm5
8697	vpxor	ymm13,ymm13,ymm1
8698	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8699	vpaddd	ymm9,ymm9,ymm13
8700	vpxor	ymm5,ymm5,ymm9
8701	vpsrld	ymm3,ymm5,20
8702	vpslld	ymm5,ymm5,12
8703	vpxor	ymm5,ymm5,ymm3
8704	vpaddd	ymm1,ymm1,ymm5
8705	vpxor	ymm13,ymm13,ymm1
8706	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8707	vpaddd	ymm9,ymm9,ymm13
8708	vpxor	ymm5,ymm5,ymm9
8709	vpslld	ymm3,ymm5,7
8710	vpsrld	ymm5,ymm5,25
8711	vpxor	ymm5,ymm5,ymm3
8712	vpalignr	ymm13,ymm13,ymm13,4
8713	vpalignr	ymm9,ymm9,ymm9,8
8714	vpalignr	ymm5,ymm5,ymm5,12
8715
8716	dec	r10
8717	jne	NEAR $L$seal_avx2_192_rounds
8718	vpaddd	ymm0,ymm0,ymm2
8719	vpaddd	ymm1,ymm1,ymm2
8720	vpaddd	ymm4,ymm4,ymm6
8721	vpaddd	ymm5,ymm5,ymm6
8722	vpaddd	ymm8,ymm8,ymm10
8723	vpaddd	ymm9,ymm9,ymm10
8724	vpaddd	ymm12,ymm12,ymm11
8725	vpaddd	ymm13,ymm13,ymm15
8726	vperm2i128	ymm3,ymm4,ymm0,0x02
8727
8728	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
8729	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
8730
8731	vperm2i128	ymm0,ymm4,ymm0,0x13
8732	vperm2i128	ymm4,ymm12,ymm8,0x13
8733	vperm2i128	ymm8,ymm5,ymm1,0x02
8734	vperm2i128	ymm12,ymm13,ymm9,0x02
8735	vperm2i128	ymm1,ymm5,ymm1,0x13
8736	vperm2i128	ymm5,ymm13,ymm9,0x13
8737$L$seal_avx2_short:
8738	mov	r8,r8
8739	call	poly_hash_ad_internal
8740	xor	rcx,rcx
8741$L$seal_avx2_short_hash_remainder:
8742	cmp	rcx,16
8743	jb	NEAR $L$seal_avx2_short_loop
8744	add	r10,QWORD[((0+0))+rdi]
8745	adc	r11,QWORD[((8+0))+rdi]
8746	adc	r12,1
8747	mov	rax,QWORD[((0+160+0))+rbp]
8748	mov	r15,rax
8749	mul	r10
8750	mov	r13,rax
8751	mov	r14,rdx
8752	mov	rax,QWORD[((0+160+0))+rbp]
8753	mul	r11
8754	imul	r15,r12
8755	add	r14,rax
8756	adc	r15,rdx
8757	mov	rax,QWORD[((8+160+0))+rbp]
8758	mov	r9,rax
8759	mul	r10
8760	add	r14,rax
8761	adc	rdx,0
8762	mov	r10,rdx
8763	mov	rax,QWORD[((8+160+0))+rbp]
8764	mul	r11
8765	add	r15,rax
8766	adc	rdx,0
8767	imul	r9,r12
8768	add	r15,r10
8769	adc	r9,rdx
8770	mov	r10,r13
8771	mov	r11,r14
8772	mov	r12,r15
8773	and	r12,3
8774	mov	r13,r15
8775	and	r13,-4
8776	mov	r14,r9
8777	shrd	r15,r9,2
8778	shr	r9,2
8779	add	r15,r13
8780	adc	r9,r14
8781	add	r10,r15
8782	adc	r11,r9
8783	adc	r12,0
8784
8785	sub	rcx,16
8786	add	rdi,16
8787	jmp	NEAR $L$seal_avx2_short_hash_remainder
8788$L$seal_avx2_short_loop:
8789	cmp	rbx,32
8790	jb	NEAR $L$seal_avx2_short_tail
8791	sub	rbx,32
8792
8793	vpxor	ymm0,ymm0,YMMWORD[rsi]
8794	vmovdqu	YMMWORD[rdi],ymm0
8795	lea	rsi,[32+rsi]
8796
8797	add	r10,QWORD[((0+0))+rdi]
8798	adc	r11,QWORD[((8+0))+rdi]
8799	adc	r12,1
8800	mov	rax,QWORD[((0+160+0))+rbp]
8801	mov	r15,rax
8802	mul	r10
8803	mov	r13,rax
8804	mov	r14,rdx
8805	mov	rax,QWORD[((0+160+0))+rbp]
8806	mul	r11
8807	imul	r15,r12
8808	add	r14,rax
8809	adc	r15,rdx
8810	mov	rax,QWORD[((8+160+0))+rbp]
8811	mov	r9,rax
8812	mul	r10
8813	add	r14,rax
8814	adc	rdx,0
8815	mov	r10,rdx
8816	mov	rax,QWORD[((8+160+0))+rbp]
8817	mul	r11
8818	add	r15,rax
8819	adc	rdx,0
8820	imul	r9,r12
8821	add	r15,r10
8822	adc	r9,rdx
8823	mov	r10,r13
8824	mov	r11,r14
8825	mov	r12,r15
8826	and	r12,3
8827	mov	r13,r15
8828	and	r13,-4
8829	mov	r14,r9
8830	shrd	r15,r9,2
8831	shr	r9,2
8832	add	r15,r13
8833	adc	r9,r14
8834	add	r10,r15
8835	adc	r11,r9
8836	adc	r12,0
8837	add	r10,QWORD[((0+16))+rdi]
8838	adc	r11,QWORD[((8+16))+rdi]
8839	adc	r12,1
8840	mov	rax,QWORD[((0+160+0))+rbp]
8841	mov	r15,rax
8842	mul	r10
8843	mov	r13,rax
8844	mov	r14,rdx
8845	mov	rax,QWORD[((0+160+0))+rbp]
8846	mul	r11
8847	imul	r15,r12
8848	add	r14,rax
8849	adc	r15,rdx
8850	mov	rax,QWORD[((8+160+0))+rbp]
8851	mov	r9,rax
8852	mul	r10
8853	add	r14,rax
8854	adc	rdx,0
8855	mov	r10,rdx
8856	mov	rax,QWORD[((8+160+0))+rbp]
8857	mul	r11
8858	add	r15,rax
8859	adc	rdx,0
8860	imul	r9,r12
8861	add	r15,r10
8862	adc	r9,rdx
8863	mov	r10,r13
8864	mov	r11,r14
8865	mov	r12,r15
8866	and	r12,3
8867	mov	r13,r15
8868	and	r13,-4
8869	mov	r14,r9
8870	shrd	r15,r9,2
8871	shr	r9,2
8872	add	r15,r13
8873	adc	r9,r14
8874	add	r10,r15
8875	adc	r11,r9
8876	adc	r12,0
8877
8878	lea	rdi,[32+rdi]
8879
8880	vmovdqa	ymm0,ymm4
8881	vmovdqa	ymm4,ymm8
8882	vmovdqa	ymm8,ymm12
8883	vmovdqa	ymm12,ymm1
8884	vmovdqa	ymm1,ymm5
8885	vmovdqa	ymm5,ymm9
8886	vmovdqa	ymm9,ymm13
8887	vmovdqa	ymm13,ymm2
8888	vmovdqa	ymm2,ymm6
8889	jmp	NEAR $L$seal_avx2_short_loop
8890$L$seal_avx2_short_tail:
8891	cmp	rbx,16
8892	jb	NEAR $L$seal_avx2_exit
8893	sub	rbx,16
8894	vpxor	xmm3,xmm0,XMMWORD[rsi]
8895	vmovdqu	XMMWORD[rdi],xmm3
8896	lea	rsi,[16+rsi]
8897	add	r10,QWORD[((0+0))+rdi]
8898	adc	r11,QWORD[((8+0))+rdi]
8899	adc	r12,1
8900	mov	rax,QWORD[((0+160+0))+rbp]
8901	mov	r15,rax
8902	mul	r10
8903	mov	r13,rax
8904	mov	r14,rdx
8905	mov	rax,QWORD[((0+160+0))+rbp]
8906	mul	r11
8907	imul	r15,r12
8908	add	r14,rax
8909	adc	r15,rdx
8910	mov	rax,QWORD[((8+160+0))+rbp]
8911	mov	r9,rax
8912	mul	r10
8913	add	r14,rax
8914	adc	rdx,0
8915	mov	r10,rdx
8916	mov	rax,QWORD[((8+160+0))+rbp]
8917	mul	r11
8918	add	r15,rax
8919	adc	rdx,0
8920	imul	r9,r12
8921	add	r15,r10
8922	adc	r9,rdx
8923	mov	r10,r13
8924	mov	r11,r14
8925	mov	r12,r15
8926	and	r12,3
8927	mov	r13,r15
8928	and	r13,-4
8929	mov	r14,r9
8930	shrd	r15,r9,2
8931	shr	r9,2
8932	add	r15,r13
8933	adc	r9,r14
8934	add	r10,r15
8935	adc	r11,r9
8936	adc	r12,0
8937
8938	lea	rdi,[16+rdi]
8939	vextracti128	xmm0,ymm0,1
8940$L$seal_avx2_exit:
8941	vzeroupper
8942	jmp	NEAR $L$seal_sse_tail_16
8943
8944
8945