• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifidn __OUTPUT_FORMAT__, win64
5default	rel
6%define XMMWORD
7%define YMMWORD
8%define ZMMWORD
9%define _CET_ENDBR
10
11%ifdef BORINGSSL_PREFIX
12%include "boringssl_prefix_symbols_nasm.inc"
13%endif
14section	.text code align=64
15
16global	gcm_init_clmul
17
18ALIGN	16
19gcm_init_clmul:
20
21$L$SEH_begin_gcm_init_clmul_1:
22_CET_ENDBR
23$L$_init_clmul:
24	sub	rsp,0x18
25$L$SEH_prolog_gcm_init_clmul_2:
26	movaps	XMMWORD[rsp],xmm6
27$L$SEH_prolog_gcm_init_clmul_3:
28	movdqu	xmm2,XMMWORD[rdx]
29	pshufd	xmm2,xmm2,78
30
31
32	pshufd	xmm4,xmm2,255
33	movdqa	xmm3,xmm2
34	psllq	xmm2,1
35	pxor	xmm5,xmm5
36	psrlq	xmm3,63
37	pcmpgtd	xmm5,xmm4
38	pslldq	xmm3,8
39	por	xmm2,xmm3
40
41
42	pand	xmm5,XMMWORD[$L$0x1c2_polynomial]
43	pxor	xmm2,xmm5
44
45
46	pshufd	xmm6,xmm2,78
47	movdqa	xmm0,xmm2
48	pxor	xmm6,xmm2
49	movdqa	xmm1,xmm0
50	pshufd	xmm3,xmm0,78
51	pxor	xmm3,xmm0
52DB	102,15,58,68,194,0
53DB	102,15,58,68,202,17
54DB	102,15,58,68,222,0
55	pxor	xmm3,xmm0
56	pxor	xmm3,xmm1
57
58	movdqa	xmm4,xmm3
59	psrldq	xmm3,8
60	pslldq	xmm4,8
61	pxor	xmm1,xmm3
62	pxor	xmm0,xmm4
63
64	movdqa	xmm4,xmm0
65	movdqa	xmm3,xmm0
66	psllq	xmm0,5
67	pxor	xmm3,xmm0
68	psllq	xmm0,1
69	pxor	xmm0,xmm3
70	psllq	xmm0,57
71	movdqa	xmm3,xmm0
72	pslldq	xmm0,8
73	psrldq	xmm3,8
74	pxor	xmm0,xmm4
75	pxor	xmm1,xmm3
76
77
78	movdqa	xmm4,xmm0
79	psrlq	xmm0,1
80	pxor	xmm1,xmm4
81	pxor	xmm4,xmm0
82	psrlq	xmm0,5
83	pxor	xmm0,xmm4
84	psrlq	xmm0,1
85	pxor	xmm0,xmm1
86	pshufd	xmm3,xmm2,78
87	pshufd	xmm4,xmm0,78
88	pxor	xmm3,xmm2
89	movdqu	XMMWORD[rcx],xmm2
90	pxor	xmm4,xmm0
91	movdqu	XMMWORD[16+rcx],xmm0
92DB	102,15,58,15,227,8
93	movdqu	XMMWORD[32+rcx],xmm4
94	movdqa	xmm1,xmm0
95	pshufd	xmm3,xmm0,78
96	pxor	xmm3,xmm0
97DB	102,15,58,68,194,0
98DB	102,15,58,68,202,17
99DB	102,15,58,68,222,0
100	pxor	xmm3,xmm0
101	pxor	xmm3,xmm1
102
103	movdqa	xmm4,xmm3
104	psrldq	xmm3,8
105	pslldq	xmm4,8
106	pxor	xmm1,xmm3
107	pxor	xmm0,xmm4
108
109	movdqa	xmm4,xmm0
110	movdqa	xmm3,xmm0
111	psllq	xmm0,5
112	pxor	xmm3,xmm0
113	psllq	xmm0,1
114	pxor	xmm0,xmm3
115	psllq	xmm0,57
116	movdqa	xmm3,xmm0
117	pslldq	xmm0,8
118	psrldq	xmm3,8
119	pxor	xmm0,xmm4
120	pxor	xmm1,xmm3
121
122
123	movdqa	xmm4,xmm0
124	psrlq	xmm0,1
125	pxor	xmm1,xmm4
126	pxor	xmm4,xmm0
127	psrlq	xmm0,5
128	pxor	xmm0,xmm4
129	psrlq	xmm0,1
130	pxor	xmm0,xmm1
131	movdqa	xmm5,xmm0
132	movdqa	xmm1,xmm0
133	pshufd	xmm3,xmm0,78
134	pxor	xmm3,xmm0
135DB	102,15,58,68,194,0
136DB	102,15,58,68,202,17
137DB	102,15,58,68,222,0
138	pxor	xmm3,xmm0
139	pxor	xmm3,xmm1
140
141	movdqa	xmm4,xmm3
142	psrldq	xmm3,8
143	pslldq	xmm4,8
144	pxor	xmm1,xmm3
145	pxor	xmm0,xmm4
146
147	movdqa	xmm4,xmm0
148	movdqa	xmm3,xmm0
149	psllq	xmm0,5
150	pxor	xmm3,xmm0
151	psllq	xmm0,1
152	pxor	xmm0,xmm3
153	psllq	xmm0,57
154	movdqa	xmm3,xmm0
155	pslldq	xmm0,8
156	psrldq	xmm3,8
157	pxor	xmm0,xmm4
158	pxor	xmm1,xmm3
159
160
161	movdqa	xmm4,xmm0
162	psrlq	xmm0,1
163	pxor	xmm1,xmm4
164	pxor	xmm4,xmm0
165	psrlq	xmm0,5
166	pxor	xmm0,xmm4
167	psrlq	xmm0,1
168	pxor	xmm0,xmm1
169	pshufd	xmm3,xmm5,78
170	pshufd	xmm4,xmm0,78
171	pxor	xmm3,xmm5
172	movdqu	XMMWORD[48+rcx],xmm5
173	pxor	xmm4,xmm0
174	movdqu	XMMWORD[64+rcx],xmm0
175DB	102,15,58,15,227,8
176	movdqu	XMMWORD[80+rcx],xmm4
177	movaps	xmm6,XMMWORD[rsp]
178	lea	rsp,[24+rsp]
179	ret
180
181$L$SEH_end_gcm_init_clmul_4:
182
183global	gcm_gmult_clmul
184
185ALIGN	16
186gcm_gmult_clmul:
187
188_CET_ENDBR
189$L$_gmult_clmul:
190	movdqu	xmm0,XMMWORD[rcx]
191	movdqa	xmm5,XMMWORD[$L$bswap_mask]
192	movdqu	xmm2,XMMWORD[rdx]
193	movdqu	xmm4,XMMWORD[32+rdx]
194DB	102,15,56,0,197
195	movdqa	xmm1,xmm0
196	pshufd	xmm3,xmm0,78
197	pxor	xmm3,xmm0
198DB	102,15,58,68,194,0
199DB	102,15,58,68,202,17
200DB	102,15,58,68,220,0
201	pxor	xmm3,xmm0
202	pxor	xmm3,xmm1
203
204	movdqa	xmm4,xmm3
205	psrldq	xmm3,8
206	pslldq	xmm4,8
207	pxor	xmm1,xmm3
208	pxor	xmm0,xmm4
209
210	movdqa	xmm4,xmm0
211	movdqa	xmm3,xmm0
212	psllq	xmm0,5
213	pxor	xmm3,xmm0
214	psllq	xmm0,1
215	pxor	xmm0,xmm3
216	psllq	xmm0,57
217	movdqa	xmm3,xmm0
218	pslldq	xmm0,8
219	psrldq	xmm3,8
220	pxor	xmm0,xmm4
221	pxor	xmm1,xmm3
222
223
224	movdqa	xmm4,xmm0
225	psrlq	xmm0,1
226	pxor	xmm1,xmm4
227	pxor	xmm4,xmm0
228	psrlq	xmm0,5
229	pxor	xmm0,xmm4
230	psrlq	xmm0,1
231	pxor	xmm0,xmm1
232DB	102,15,56,0,197
233	movdqu	XMMWORD[rcx],xmm0
234	ret
235
236
237global	gcm_ghash_clmul
238
239ALIGN	32
240gcm_ghash_clmul:
241
242$L$SEH_begin_gcm_ghash_clmul_1:
243_CET_ENDBR
244$L$_ghash_clmul:
245	lea	rax,[((-136))+rsp]
246	lea	rsp,[((-32))+rax]
247$L$SEH_prolog_gcm_ghash_clmul_2:
248	movaps	XMMWORD[(-32)+rax],xmm6
249$L$SEH_prolog_gcm_ghash_clmul_3:
250	movaps	XMMWORD[(-16)+rax],xmm7
251$L$SEH_prolog_gcm_ghash_clmul_4:
252	movaps	XMMWORD[rax],xmm8
253$L$SEH_prolog_gcm_ghash_clmul_5:
254	movaps	XMMWORD[16+rax],xmm9
255$L$SEH_prolog_gcm_ghash_clmul_6:
256	movaps	XMMWORD[32+rax],xmm10
257$L$SEH_prolog_gcm_ghash_clmul_7:
258	movaps	XMMWORD[48+rax],xmm11
259$L$SEH_prolog_gcm_ghash_clmul_8:
260	movaps	XMMWORD[64+rax],xmm12
261$L$SEH_prolog_gcm_ghash_clmul_9:
262	movaps	XMMWORD[80+rax],xmm13
263$L$SEH_prolog_gcm_ghash_clmul_10:
264	movaps	XMMWORD[96+rax],xmm14
265$L$SEH_prolog_gcm_ghash_clmul_11:
266	movaps	XMMWORD[112+rax],xmm15
267$L$SEH_prolog_gcm_ghash_clmul_12:
268	movdqa	xmm10,XMMWORD[$L$bswap_mask]
269
270	movdqu	xmm0,XMMWORD[rcx]
271	movdqu	xmm2,XMMWORD[rdx]
272	movdqu	xmm7,XMMWORD[32+rdx]
273DB	102,65,15,56,0,194
274
275	sub	r9,0x10
276	jz	NEAR $L$odd_tail
277
278	movdqu	xmm6,XMMWORD[16+rdx]
279	cmp	r9,0x30
280	jb	NEAR $L$skip4x
281
282	sub	r9,0x30
283	mov	rax,0xA040608020C0E000
284	movdqu	xmm14,XMMWORD[48+rdx]
285	movdqu	xmm15,XMMWORD[64+rdx]
286
287
288
289
290	movdqu	xmm3,XMMWORD[48+r8]
291	movdqu	xmm11,XMMWORD[32+r8]
292DB	102,65,15,56,0,218
293DB	102,69,15,56,0,218
294	movdqa	xmm5,xmm3
295	pshufd	xmm4,xmm3,78
296	pxor	xmm4,xmm3
297DB	102,15,58,68,218,0
298DB	102,15,58,68,234,17
299DB	102,15,58,68,231,0
300
301	movdqa	xmm13,xmm11
302	pshufd	xmm12,xmm11,78
303	pxor	xmm12,xmm11
304DB	102,68,15,58,68,222,0
305DB	102,68,15,58,68,238,17
306DB	102,68,15,58,68,231,16
307	xorps	xmm3,xmm11
308	xorps	xmm5,xmm13
309	movups	xmm7,XMMWORD[80+rdx]
310	xorps	xmm4,xmm12
311
312	movdqu	xmm11,XMMWORD[16+r8]
313	movdqu	xmm8,XMMWORD[r8]
314DB	102,69,15,56,0,218
315DB	102,69,15,56,0,194
316	movdqa	xmm13,xmm11
317	pshufd	xmm12,xmm11,78
318	pxor	xmm0,xmm8
319	pxor	xmm12,xmm11
320DB	102,69,15,58,68,222,0
321	movdqa	xmm1,xmm0
322	pshufd	xmm8,xmm0,78
323	pxor	xmm8,xmm0
324DB	102,69,15,58,68,238,17
325DB	102,68,15,58,68,231,0
326	xorps	xmm3,xmm11
327	xorps	xmm5,xmm13
328
329	lea	r8,[64+r8]
330	sub	r9,0x40
331	jc	NEAR $L$tail4x
332
333	jmp	NEAR $L$mod4_loop
334ALIGN	32
335$L$mod4_loop:
336DB	102,65,15,58,68,199,0
337	xorps	xmm4,xmm12
338	movdqu	xmm11,XMMWORD[48+r8]
339DB	102,69,15,56,0,218
340DB	102,65,15,58,68,207,17
341	xorps	xmm0,xmm3
342	movdqu	xmm3,XMMWORD[32+r8]
343	movdqa	xmm13,xmm11
344DB	102,68,15,58,68,199,16
345	pshufd	xmm12,xmm11,78
346	xorps	xmm1,xmm5
347	pxor	xmm12,xmm11
348DB	102,65,15,56,0,218
349	movups	xmm7,XMMWORD[32+rdx]
350	xorps	xmm8,xmm4
351DB	102,68,15,58,68,218,0
352	pshufd	xmm4,xmm3,78
353
354	pxor	xmm8,xmm0
355	movdqa	xmm5,xmm3
356	pxor	xmm8,xmm1
357	pxor	xmm4,xmm3
358	movdqa	xmm9,xmm8
359DB	102,68,15,58,68,234,17
360	pslldq	xmm8,8
361	psrldq	xmm9,8
362	pxor	xmm0,xmm8
363	movdqa	xmm8,XMMWORD[$L$7_mask]
364	pxor	xmm1,xmm9
365DB	102,76,15,110,200
366
367	pand	xmm8,xmm0
368DB	102,69,15,56,0,200
369	pxor	xmm9,xmm0
370DB	102,68,15,58,68,231,0
371	psllq	xmm9,57
372	movdqa	xmm8,xmm9
373	pslldq	xmm9,8
374DB	102,15,58,68,222,0
375	psrldq	xmm8,8
376	pxor	xmm0,xmm9
377	pxor	xmm1,xmm8
378	movdqu	xmm8,XMMWORD[r8]
379
380	movdqa	xmm9,xmm0
381	psrlq	xmm0,1
382DB	102,15,58,68,238,17
383	xorps	xmm3,xmm11
384	movdqu	xmm11,XMMWORD[16+r8]
385DB	102,69,15,56,0,218
386DB	102,15,58,68,231,16
387	xorps	xmm5,xmm13
388	movups	xmm7,XMMWORD[80+rdx]
389DB	102,69,15,56,0,194
390	pxor	xmm1,xmm9
391	pxor	xmm9,xmm0
392	psrlq	xmm0,5
393
394	movdqa	xmm13,xmm11
395	pxor	xmm4,xmm12
396	pshufd	xmm12,xmm11,78
397	pxor	xmm0,xmm9
398	pxor	xmm1,xmm8
399	pxor	xmm12,xmm11
400DB	102,69,15,58,68,222,0
401	psrlq	xmm0,1
402	pxor	xmm0,xmm1
403	movdqa	xmm1,xmm0
404DB	102,69,15,58,68,238,17
405	xorps	xmm3,xmm11
406	pshufd	xmm8,xmm0,78
407	pxor	xmm8,xmm0
408
409DB	102,68,15,58,68,231,0
410	xorps	xmm5,xmm13
411
412	lea	r8,[64+r8]
413	sub	r9,0x40
414	jnc	NEAR $L$mod4_loop
415
416$L$tail4x:
417DB	102,65,15,58,68,199,0
418DB	102,65,15,58,68,207,17
419DB	102,68,15,58,68,199,16
420	xorps	xmm4,xmm12
421	xorps	xmm0,xmm3
422	xorps	xmm1,xmm5
423	pxor	xmm1,xmm0
424	pxor	xmm8,xmm4
425
426	pxor	xmm8,xmm1
427	pxor	xmm1,xmm0
428
429	movdqa	xmm9,xmm8
430	psrldq	xmm8,8
431	pslldq	xmm9,8
432	pxor	xmm1,xmm8
433	pxor	xmm0,xmm9
434
435	movdqa	xmm4,xmm0
436	movdqa	xmm3,xmm0
437	psllq	xmm0,5
438	pxor	xmm3,xmm0
439	psllq	xmm0,1
440	pxor	xmm0,xmm3
441	psllq	xmm0,57
442	movdqa	xmm3,xmm0
443	pslldq	xmm0,8
444	psrldq	xmm3,8
445	pxor	xmm0,xmm4
446	pxor	xmm1,xmm3
447
448
449	movdqa	xmm4,xmm0
450	psrlq	xmm0,1
451	pxor	xmm1,xmm4
452	pxor	xmm4,xmm0
453	psrlq	xmm0,5
454	pxor	xmm0,xmm4
455	psrlq	xmm0,1
456	pxor	xmm0,xmm1
457	add	r9,0x40
458	jz	NEAR $L$done
459	movdqu	xmm7,XMMWORD[32+rdx]
460	sub	r9,0x10
461	jz	NEAR $L$odd_tail
462$L$skip4x:
463
464
465
466
467
468	movdqu	xmm8,XMMWORD[r8]
469	movdqu	xmm3,XMMWORD[16+r8]
470DB	102,69,15,56,0,194
471DB	102,65,15,56,0,218
472	pxor	xmm0,xmm8
473
474	movdqa	xmm5,xmm3
475	pshufd	xmm4,xmm3,78
476	pxor	xmm4,xmm3
477DB	102,15,58,68,218,0
478DB	102,15,58,68,234,17
479DB	102,15,58,68,231,0
480
481	lea	r8,[32+r8]
482	nop
483	sub	r9,0x20
484	jbe	NEAR $L$even_tail
485	nop
486	jmp	NEAR $L$mod_loop
487
488ALIGN	32
489$L$mod_loop:
490	movdqa	xmm1,xmm0
491	movdqa	xmm8,xmm4
492	pshufd	xmm4,xmm0,78
493	pxor	xmm4,xmm0
494
495DB	102,15,58,68,198,0
496DB	102,15,58,68,206,17
497DB	102,15,58,68,231,16
498
499	pxor	xmm0,xmm3
500	pxor	xmm1,xmm5
501	movdqu	xmm9,XMMWORD[r8]
502	pxor	xmm8,xmm0
503DB	102,69,15,56,0,202
504	movdqu	xmm3,XMMWORD[16+r8]
505
506	pxor	xmm8,xmm1
507	pxor	xmm1,xmm9
508	pxor	xmm4,xmm8
509DB	102,65,15,56,0,218
510	movdqa	xmm8,xmm4
511	psrldq	xmm8,8
512	pslldq	xmm4,8
513	pxor	xmm1,xmm8
514	pxor	xmm0,xmm4
515
516	movdqa	xmm5,xmm3
517
518	movdqa	xmm9,xmm0
519	movdqa	xmm8,xmm0
520	psllq	xmm0,5
521	pxor	xmm8,xmm0
522DB	102,15,58,68,218,0
523	psllq	xmm0,1
524	pxor	xmm0,xmm8
525	psllq	xmm0,57
526	movdqa	xmm8,xmm0
527	pslldq	xmm0,8
528	psrldq	xmm8,8
529	pxor	xmm0,xmm9
530	pshufd	xmm4,xmm5,78
531	pxor	xmm1,xmm8
532	pxor	xmm4,xmm5
533
534	movdqa	xmm9,xmm0
535	psrlq	xmm0,1
536DB	102,15,58,68,234,17
537	pxor	xmm1,xmm9
538	pxor	xmm9,xmm0
539	psrlq	xmm0,5
540	pxor	xmm0,xmm9
541	lea	r8,[32+r8]
542	psrlq	xmm0,1
543DB	102,15,58,68,231,0
544	pxor	xmm0,xmm1
545
546	sub	r9,0x20
547	ja	NEAR $L$mod_loop
548
549$L$even_tail:
550	movdqa	xmm1,xmm0
551	movdqa	xmm8,xmm4
552	pshufd	xmm4,xmm0,78
553	pxor	xmm4,xmm0
554
555DB	102,15,58,68,198,0
556DB	102,15,58,68,206,17
557DB	102,15,58,68,231,16
558
559	pxor	xmm0,xmm3
560	pxor	xmm1,xmm5
561	pxor	xmm8,xmm0
562	pxor	xmm8,xmm1
563	pxor	xmm4,xmm8
564	movdqa	xmm8,xmm4
565	psrldq	xmm8,8
566	pslldq	xmm4,8
567	pxor	xmm1,xmm8
568	pxor	xmm0,xmm4
569
570	movdqa	xmm4,xmm0
571	movdqa	xmm3,xmm0
572	psllq	xmm0,5
573	pxor	xmm3,xmm0
574	psllq	xmm0,1
575	pxor	xmm0,xmm3
576	psllq	xmm0,57
577	movdqa	xmm3,xmm0
578	pslldq	xmm0,8
579	psrldq	xmm3,8
580	pxor	xmm0,xmm4
581	pxor	xmm1,xmm3
582
583
584	movdqa	xmm4,xmm0
585	psrlq	xmm0,1
586	pxor	xmm1,xmm4
587	pxor	xmm4,xmm0
588	psrlq	xmm0,5
589	pxor	xmm0,xmm4
590	psrlq	xmm0,1
591	pxor	xmm0,xmm1
592	test	r9,r9
593	jnz	NEAR $L$done
594
595$L$odd_tail:
596	movdqu	xmm8,XMMWORD[r8]
597DB	102,69,15,56,0,194
598	pxor	xmm0,xmm8
599	movdqa	xmm1,xmm0
600	pshufd	xmm3,xmm0,78
601	pxor	xmm3,xmm0
602DB	102,15,58,68,194,0
603DB	102,15,58,68,202,17
604DB	102,15,58,68,223,0
605	pxor	xmm3,xmm0
606	pxor	xmm3,xmm1
607
608	movdqa	xmm4,xmm3
609	psrldq	xmm3,8
610	pslldq	xmm4,8
611	pxor	xmm1,xmm3
612	pxor	xmm0,xmm4
613
614	movdqa	xmm4,xmm0
615	movdqa	xmm3,xmm0
616	psllq	xmm0,5
617	pxor	xmm3,xmm0
618	psllq	xmm0,1
619	pxor	xmm0,xmm3
620	psllq	xmm0,57
621	movdqa	xmm3,xmm0
622	pslldq	xmm0,8
623	psrldq	xmm3,8
624	pxor	xmm0,xmm4
625	pxor	xmm1,xmm3
626
627
628	movdqa	xmm4,xmm0
629	psrlq	xmm0,1
630	pxor	xmm1,xmm4
631	pxor	xmm4,xmm0
632	psrlq	xmm0,5
633	pxor	xmm0,xmm4
634	psrlq	xmm0,1
635	pxor	xmm0,xmm1
636$L$done:
637DB	102,65,15,56,0,194
638	movdqu	XMMWORD[rcx],xmm0
639	movaps	xmm6,XMMWORD[rsp]
640	movaps	xmm7,XMMWORD[16+rsp]
641	movaps	xmm8,XMMWORD[32+rsp]
642	movaps	xmm9,XMMWORD[48+rsp]
643	movaps	xmm10,XMMWORD[64+rsp]
644	movaps	xmm11,XMMWORD[80+rsp]
645	movaps	xmm12,XMMWORD[96+rsp]
646	movaps	xmm13,XMMWORD[112+rsp]
647	movaps	xmm14,XMMWORD[128+rsp]
648	movaps	xmm15,XMMWORD[144+rsp]
649	lea	rsp,[168+rsp]
650	ret
651
652$L$SEH_end_gcm_ghash_clmul_13:
653
654global	gcm_init_avx
655
656ALIGN	32
657gcm_init_avx:
658
659_CET_ENDBR
660$L$SEH_begin_gcm_init_avx_1:
661	sub	rsp,0x18
662$L$SEH_prolog_gcm_init_avx_2:
663	movaps	XMMWORD[rsp],xmm6
664$L$SEH_prolog_gcm_init_avx_3:
665	vzeroupper
666
667	vmovdqu	xmm2,XMMWORD[rdx]
668	vpshufd	xmm2,xmm2,78
669
670
671	vpshufd	xmm4,xmm2,255
672	vpsrlq	xmm3,xmm2,63
673	vpsllq	xmm2,xmm2,1
674	vpxor	xmm5,xmm5,xmm5
675	vpcmpgtd	xmm5,xmm5,xmm4
676	vpslldq	xmm3,xmm3,8
677	vpor	xmm2,xmm2,xmm3
678
679
680	vpand	xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
681	vpxor	xmm2,xmm2,xmm5
682
683	vpunpckhqdq	xmm6,xmm2,xmm2
684	vmovdqa	xmm0,xmm2
685	vpxor	xmm6,xmm6,xmm2
686	mov	r10,4
687	jmp	NEAR $L$init_start_avx
688ALIGN	32
689$L$init_loop_avx:
690	vpalignr	xmm5,xmm4,xmm3,8
691	vmovdqu	XMMWORD[(-16)+rcx],xmm5
692	vpunpckhqdq	xmm3,xmm0,xmm0
693	vpxor	xmm3,xmm3,xmm0
694	vpclmulqdq	xmm1,xmm0,xmm2,0x11
695	vpclmulqdq	xmm0,xmm0,xmm2,0x00
696	vpclmulqdq	xmm3,xmm3,xmm6,0x00
697	vpxor	xmm4,xmm1,xmm0
698	vpxor	xmm3,xmm3,xmm4
699
700	vpslldq	xmm4,xmm3,8
701	vpsrldq	xmm3,xmm3,8
702	vpxor	xmm0,xmm0,xmm4
703	vpxor	xmm1,xmm1,xmm3
704	vpsllq	xmm3,xmm0,57
705	vpsllq	xmm4,xmm0,62
706	vpxor	xmm4,xmm4,xmm3
707	vpsllq	xmm3,xmm0,63
708	vpxor	xmm4,xmm4,xmm3
709	vpslldq	xmm3,xmm4,8
710	vpsrldq	xmm4,xmm4,8
711	vpxor	xmm0,xmm0,xmm3
712	vpxor	xmm1,xmm1,xmm4
713
714	vpsrlq	xmm4,xmm0,1
715	vpxor	xmm1,xmm1,xmm0
716	vpxor	xmm0,xmm0,xmm4
717	vpsrlq	xmm4,xmm4,5
718	vpxor	xmm0,xmm0,xmm4
719	vpsrlq	xmm0,xmm0,1
720	vpxor	xmm0,xmm0,xmm1
721$L$init_start_avx:
722	vmovdqa	xmm5,xmm0
723	vpunpckhqdq	xmm3,xmm0,xmm0
724	vpxor	xmm3,xmm3,xmm0
725	vpclmulqdq	xmm1,xmm0,xmm2,0x11
726	vpclmulqdq	xmm0,xmm0,xmm2,0x00
727	vpclmulqdq	xmm3,xmm3,xmm6,0x00
728	vpxor	xmm4,xmm1,xmm0
729	vpxor	xmm3,xmm3,xmm4
730
731	vpslldq	xmm4,xmm3,8
732	vpsrldq	xmm3,xmm3,8
733	vpxor	xmm0,xmm0,xmm4
734	vpxor	xmm1,xmm1,xmm3
735	vpsllq	xmm3,xmm0,57
736	vpsllq	xmm4,xmm0,62
737	vpxor	xmm4,xmm4,xmm3
738	vpsllq	xmm3,xmm0,63
739	vpxor	xmm4,xmm4,xmm3
740	vpslldq	xmm3,xmm4,8
741	vpsrldq	xmm4,xmm4,8
742	vpxor	xmm0,xmm0,xmm3
743	vpxor	xmm1,xmm1,xmm4
744
745	vpsrlq	xmm4,xmm0,1
746	vpxor	xmm1,xmm1,xmm0
747	vpxor	xmm0,xmm0,xmm4
748	vpsrlq	xmm4,xmm4,5
749	vpxor	xmm0,xmm0,xmm4
750	vpsrlq	xmm0,xmm0,1
751	vpxor	xmm0,xmm0,xmm1
752	vpshufd	xmm3,xmm5,78
753	vpshufd	xmm4,xmm0,78
754	vpxor	xmm3,xmm3,xmm5
755	vmovdqu	XMMWORD[rcx],xmm5
756	vpxor	xmm4,xmm4,xmm0
757	vmovdqu	XMMWORD[16+rcx],xmm0
758	lea	rcx,[48+rcx]
759	sub	r10,1
760	jnz	NEAR $L$init_loop_avx
761
762	vpalignr	xmm5,xmm3,xmm4,8
763	vmovdqu	XMMWORD[(-16)+rcx],xmm5
764
765	vzeroupper
766	movaps	xmm6,XMMWORD[rsp]
767	lea	rsp,[24+rsp]
768	ret
769$L$SEH_end_gcm_init_avx_4:
770
771
772global	gcm_gmult_avx
773
774ALIGN	32
775gcm_gmult_avx:
776
777_CET_ENDBR
778	jmp	NEAR $L$_gmult_clmul
779
780
781global	gcm_ghash_avx
782
783ALIGN	32
784gcm_ghash_avx:
785
786_CET_ENDBR
787$L$SEH_begin_gcm_ghash_avx_1:
788	lea	rax,[((-136))+rsp]
789	lea	rsp,[((-32))+rax]
790$L$SEH_prolog_gcm_ghash_avx_2:
791	movaps	XMMWORD[(-32)+rax],xmm6
792$L$SEH_prolog_gcm_ghash_avx_3:
793	movaps	XMMWORD[(-16)+rax],xmm7
794$L$SEH_prolog_gcm_ghash_avx_4:
795	movaps	XMMWORD[rax],xmm8
796$L$SEH_prolog_gcm_ghash_avx_5:
797	movaps	XMMWORD[16+rax],xmm9
798$L$SEH_prolog_gcm_ghash_avx_6:
799	movaps	XMMWORD[32+rax],xmm10
800$L$SEH_prolog_gcm_ghash_avx_7:
801	movaps	XMMWORD[48+rax],xmm11
802$L$SEH_prolog_gcm_ghash_avx_8:
803	movaps	XMMWORD[64+rax],xmm12
804$L$SEH_prolog_gcm_ghash_avx_9:
805	movaps	XMMWORD[80+rax],xmm13
806$L$SEH_prolog_gcm_ghash_avx_10:
807	movaps	XMMWORD[96+rax],xmm14
808$L$SEH_prolog_gcm_ghash_avx_11:
809	movaps	XMMWORD[112+rax],xmm15
810$L$SEH_prolog_gcm_ghash_avx_12:
811	vzeroupper
812
813	vmovdqu	xmm10,XMMWORD[rcx]
814	lea	r10,[$L$0x1c2_polynomial]
815	lea	rdx,[64+rdx]
816	vmovdqu	xmm13,XMMWORD[$L$bswap_mask]
817	vpshufb	xmm10,xmm10,xmm13
818	cmp	r9,0x80
819	jb	NEAR $L$short_avx
820	sub	r9,0x80
821
822	vmovdqu	xmm14,XMMWORD[112+r8]
823	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
824	vpshufb	xmm14,xmm14,xmm13
825	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
826
827	vpunpckhqdq	xmm9,xmm14,xmm14
828	vmovdqu	xmm15,XMMWORD[96+r8]
829	vpclmulqdq	xmm0,xmm14,xmm6,0x00
830	vpxor	xmm9,xmm9,xmm14
831	vpshufb	xmm15,xmm15,xmm13
832	vpclmulqdq	xmm1,xmm14,xmm6,0x11
833	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
834	vpunpckhqdq	xmm8,xmm15,xmm15
835	vmovdqu	xmm14,XMMWORD[80+r8]
836	vpclmulqdq	xmm2,xmm9,xmm7,0x00
837	vpxor	xmm8,xmm8,xmm15
838
839	vpshufb	xmm14,xmm14,xmm13
840	vpclmulqdq	xmm3,xmm15,xmm6,0x00
841	vpunpckhqdq	xmm9,xmm14,xmm14
842	vpclmulqdq	xmm4,xmm15,xmm6,0x11
843	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
844	vpxor	xmm9,xmm9,xmm14
845	vmovdqu	xmm15,XMMWORD[64+r8]
846	vpclmulqdq	xmm5,xmm8,xmm7,0x10
847	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
848
849	vpshufb	xmm15,xmm15,xmm13
850	vpxor	xmm3,xmm3,xmm0
851	vpclmulqdq	xmm0,xmm14,xmm6,0x00
852	vpxor	xmm4,xmm4,xmm1
853	vpunpckhqdq	xmm8,xmm15,xmm15
854	vpclmulqdq	xmm1,xmm14,xmm6,0x11
855	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
856	vpxor	xmm5,xmm5,xmm2
857	vpclmulqdq	xmm2,xmm9,xmm7,0x00
858	vpxor	xmm8,xmm8,xmm15
859
860	vmovdqu	xmm14,XMMWORD[48+r8]
861	vpxor	xmm0,xmm0,xmm3
862	vpclmulqdq	xmm3,xmm15,xmm6,0x00
863	vpxor	xmm1,xmm1,xmm4
864	vpshufb	xmm14,xmm14,xmm13
865	vpclmulqdq	xmm4,xmm15,xmm6,0x11
866	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
867	vpxor	xmm2,xmm2,xmm5
868	vpunpckhqdq	xmm9,xmm14,xmm14
869	vpclmulqdq	xmm5,xmm8,xmm7,0x10
870	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
871	vpxor	xmm9,xmm9,xmm14
872
873	vmovdqu	xmm15,XMMWORD[32+r8]
874	vpxor	xmm3,xmm3,xmm0
875	vpclmulqdq	xmm0,xmm14,xmm6,0x00
876	vpxor	xmm4,xmm4,xmm1
877	vpshufb	xmm15,xmm15,xmm13
878	vpclmulqdq	xmm1,xmm14,xmm6,0x11
879	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
880	vpxor	xmm5,xmm5,xmm2
881	vpunpckhqdq	xmm8,xmm15,xmm15
882	vpclmulqdq	xmm2,xmm9,xmm7,0x00
883	vpxor	xmm8,xmm8,xmm15
884
885	vmovdqu	xmm14,XMMWORD[16+r8]
886	vpxor	xmm0,xmm0,xmm3
887	vpclmulqdq	xmm3,xmm15,xmm6,0x00
888	vpxor	xmm1,xmm1,xmm4
889	vpshufb	xmm14,xmm14,xmm13
890	vpclmulqdq	xmm4,xmm15,xmm6,0x11
891	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
892	vpxor	xmm2,xmm2,xmm5
893	vpunpckhqdq	xmm9,xmm14,xmm14
894	vpclmulqdq	xmm5,xmm8,xmm7,0x10
895	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
896	vpxor	xmm9,xmm9,xmm14
897
898	vmovdqu	xmm15,XMMWORD[r8]
899	vpxor	xmm3,xmm3,xmm0
900	vpclmulqdq	xmm0,xmm14,xmm6,0x00
901	vpxor	xmm4,xmm4,xmm1
902	vpshufb	xmm15,xmm15,xmm13
903	vpclmulqdq	xmm1,xmm14,xmm6,0x11
904	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
905	vpxor	xmm5,xmm5,xmm2
906	vpclmulqdq	xmm2,xmm9,xmm7,0x10
907
908	lea	r8,[128+r8]
909	cmp	r9,0x80
910	jb	NEAR $L$tail_avx
911
912	vpxor	xmm15,xmm15,xmm10
913	sub	r9,0x80
914	jmp	NEAR $L$oop8x_avx
915
916ALIGN	32
917$L$oop8x_avx:
918	vpunpckhqdq	xmm8,xmm15,xmm15
919	vmovdqu	xmm14,XMMWORD[112+r8]
920	vpxor	xmm3,xmm3,xmm0
921	vpxor	xmm8,xmm8,xmm15
922	vpclmulqdq	xmm10,xmm15,xmm6,0x00
923	vpshufb	xmm14,xmm14,xmm13
924	vpxor	xmm4,xmm4,xmm1
925	vpclmulqdq	xmm11,xmm15,xmm6,0x11
926	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
927	vpunpckhqdq	xmm9,xmm14,xmm14
928	vpxor	xmm5,xmm5,xmm2
929	vpclmulqdq	xmm12,xmm8,xmm7,0x00
930	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
931	vpxor	xmm9,xmm9,xmm14
932
933	vmovdqu	xmm15,XMMWORD[96+r8]
934	vpclmulqdq	xmm0,xmm14,xmm6,0x00
935	vpxor	xmm10,xmm10,xmm3
936	vpshufb	xmm15,xmm15,xmm13
937	vpclmulqdq	xmm1,xmm14,xmm6,0x11
938	vxorps	xmm11,xmm11,xmm4
939	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
940	vpunpckhqdq	xmm8,xmm15,xmm15
941	vpclmulqdq	xmm2,xmm9,xmm7,0x00
942	vpxor	xmm12,xmm12,xmm5
943	vxorps	xmm8,xmm8,xmm15
944
945	vmovdqu	xmm14,XMMWORD[80+r8]
946	vpxor	xmm12,xmm12,xmm10
947	vpclmulqdq	xmm3,xmm15,xmm6,0x00
948	vpxor	xmm12,xmm12,xmm11
949	vpslldq	xmm9,xmm12,8
950	vpxor	xmm3,xmm3,xmm0
951	vpclmulqdq	xmm4,xmm15,xmm6,0x11
952	vpsrldq	xmm12,xmm12,8
953	vpxor	xmm10,xmm10,xmm9
954	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
955	vpshufb	xmm14,xmm14,xmm13
956	vxorps	xmm11,xmm11,xmm12
957	vpxor	xmm4,xmm4,xmm1
958	vpunpckhqdq	xmm9,xmm14,xmm14
959	vpclmulqdq	xmm5,xmm8,xmm7,0x10
960	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
961	vpxor	xmm9,xmm9,xmm14
962	vpxor	xmm5,xmm5,xmm2
963
964	vmovdqu	xmm15,XMMWORD[64+r8]
965	vpalignr	xmm12,xmm10,xmm10,8
966	vpclmulqdq	xmm0,xmm14,xmm6,0x00
967	vpshufb	xmm15,xmm15,xmm13
968	vpxor	xmm0,xmm0,xmm3
969	vpclmulqdq	xmm1,xmm14,xmm6,0x11
970	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
971	vpunpckhqdq	xmm8,xmm15,xmm15
972	vpxor	xmm1,xmm1,xmm4
973	vpclmulqdq	xmm2,xmm9,xmm7,0x00
974	vxorps	xmm8,xmm8,xmm15
975	vpxor	xmm2,xmm2,xmm5
976
977	vmovdqu	xmm14,XMMWORD[48+r8]
978	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
979	vpclmulqdq	xmm3,xmm15,xmm6,0x00
980	vpshufb	xmm14,xmm14,xmm13
981	vpxor	xmm3,xmm3,xmm0
982	vpclmulqdq	xmm4,xmm15,xmm6,0x11
983	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
984	vpunpckhqdq	xmm9,xmm14,xmm14
985	vpxor	xmm4,xmm4,xmm1
986	vpclmulqdq	xmm5,xmm8,xmm7,0x10
987	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
988	vpxor	xmm9,xmm9,xmm14
989	vpxor	xmm5,xmm5,xmm2
990
991	vmovdqu	xmm15,XMMWORD[32+r8]
992	vpclmulqdq	xmm0,xmm14,xmm6,0x00
993	vpshufb	xmm15,xmm15,xmm13
994	vpxor	xmm0,xmm0,xmm3
995	vpclmulqdq	xmm1,xmm14,xmm6,0x11
996	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
997	vpunpckhqdq	xmm8,xmm15,xmm15
998	vpxor	xmm1,xmm1,xmm4
999	vpclmulqdq	xmm2,xmm9,xmm7,0x00
1000	vpxor	xmm8,xmm8,xmm15
1001	vpxor	xmm2,xmm2,xmm5
1002	vxorps	xmm10,xmm10,xmm12
1003
1004	vmovdqu	xmm14,XMMWORD[16+r8]
1005	vpalignr	xmm12,xmm10,xmm10,8
1006	vpclmulqdq	xmm3,xmm15,xmm6,0x00
1007	vpshufb	xmm14,xmm14,xmm13
1008	vpxor	xmm3,xmm3,xmm0
1009	vpclmulqdq	xmm4,xmm15,xmm6,0x11
1010	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
1011	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
1012	vxorps	xmm12,xmm12,xmm11
1013	vpunpckhqdq	xmm9,xmm14,xmm14
1014	vpxor	xmm4,xmm4,xmm1
1015	vpclmulqdq	xmm5,xmm8,xmm7,0x10
1016	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
1017	vpxor	xmm9,xmm9,xmm14
1018	vpxor	xmm5,xmm5,xmm2
1019
1020	vmovdqu	xmm15,XMMWORD[r8]
1021	vpclmulqdq	xmm0,xmm14,xmm6,0x00
1022	vpshufb	xmm15,xmm15,xmm13
1023	vpclmulqdq	xmm1,xmm14,xmm6,0x11
1024	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
1025	vpxor	xmm15,xmm15,xmm12
1026	vpclmulqdq	xmm2,xmm9,xmm7,0x10
1027	vpxor	xmm15,xmm15,xmm10
1028
1029	lea	r8,[128+r8]
1030	sub	r9,0x80
1031	jnc	NEAR $L$oop8x_avx
1032
1033	add	r9,0x80
1034	jmp	NEAR $L$tail_no_xor_avx
1035
1036ALIGN	32
1037$L$short_avx:
1038	vmovdqu	xmm14,XMMWORD[((-16))+r9*1+r8]
1039	lea	r8,[r9*1+r8]
1040	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
1041	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
1042	vpshufb	xmm15,xmm14,xmm13
1043
1044	vmovdqa	xmm3,xmm0
1045	vmovdqa	xmm4,xmm1
1046	vmovdqa	xmm5,xmm2
1047	sub	r9,0x10
1048	jz	NEAR $L$tail_avx
1049
1050	vpunpckhqdq	xmm8,xmm15,xmm15
1051	vpxor	xmm3,xmm3,xmm0
1052	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1053	vpxor	xmm8,xmm8,xmm15
1054	vmovdqu	xmm14,XMMWORD[((-32))+r8]
1055	vpxor	xmm4,xmm4,xmm1
1056	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1057	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
1058	vpshufb	xmm15,xmm14,xmm13
1059	vpxor	xmm5,xmm5,xmm2
1060	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1061	vpsrldq	xmm7,xmm7,8
1062	sub	r9,0x10
1063	jz	NEAR $L$tail_avx
1064
1065	vpunpckhqdq	xmm8,xmm15,xmm15
1066	vpxor	xmm3,xmm3,xmm0
1067	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1068	vpxor	xmm8,xmm8,xmm15
1069	vmovdqu	xmm14,XMMWORD[((-48))+r8]
1070	vpxor	xmm4,xmm4,xmm1
1071	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1072	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
1073	vpshufb	xmm15,xmm14,xmm13
1074	vpxor	xmm5,xmm5,xmm2
1075	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1076	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
1077	sub	r9,0x10
1078	jz	NEAR $L$tail_avx
1079
1080	vpunpckhqdq	xmm8,xmm15,xmm15
1081	vpxor	xmm3,xmm3,xmm0
1082	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1083	vpxor	xmm8,xmm8,xmm15
1084	vmovdqu	xmm14,XMMWORD[((-64))+r8]
1085	vpxor	xmm4,xmm4,xmm1
1086	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1087	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
1088	vpshufb	xmm15,xmm14,xmm13
1089	vpxor	xmm5,xmm5,xmm2
1090	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1091	vpsrldq	xmm7,xmm7,8
1092	sub	r9,0x10
1093	jz	NEAR $L$tail_avx
1094
1095	vpunpckhqdq	xmm8,xmm15,xmm15
1096	vpxor	xmm3,xmm3,xmm0
1097	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1098	vpxor	xmm8,xmm8,xmm15
1099	vmovdqu	xmm14,XMMWORD[((-80))+r8]
1100	vpxor	xmm4,xmm4,xmm1
1101	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1102	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
1103	vpshufb	xmm15,xmm14,xmm13
1104	vpxor	xmm5,xmm5,xmm2
1105	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1106	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
1107	sub	r9,0x10
1108	jz	NEAR $L$tail_avx
1109
1110	vpunpckhqdq	xmm8,xmm15,xmm15
1111	vpxor	xmm3,xmm3,xmm0
1112	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1113	vpxor	xmm8,xmm8,xmm15
1114	vmovdqu	xmm14,XMMWORD[((-96))+r8]
1115	vpxor	xmm4,xmm4,xmm1
1116	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1117	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
1118	vpshufb	xmm15,xmm14,xmm13
1119	vpxor	xmm5,xmm5,xmm2
1120	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1121	vpsrldq	xmm7,xmm7,8
1122	sub	r9,0x10
1123	jz	NEAR $L$tail_avx
1124
1125	vpunpckhqdq	xmm8,xmm15,xmm15
1126	vpxor	xmm3,xmm3,xmm0
1127	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1128	vpxor	xmm8,xmm8,xmm15
1129	vmovdqu	xmm14,XMMWORD[((-112))+r8]
1130	vpxor	xmm4,xmm4,xmm1
1131	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1132	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
1133	vpshufb	xmm15,xmm14,xmm13
1134	vpxor	xmm5,xmm5,xmm2
1135	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1136	vmovq	xmm7,QWORD[((184-64))+rdx]
1137	sub	r9,0x10
1138	jmp	NEAR $L$tail_avx
1139
1140ALIGN	32
1141$L$tail_avx:
1142	vpxor	xmm15,xmm15,xmm10
1143$L$tail_no_xor_avx:
1144	vpunpckhqdq	xmm8,xmm15,xmm15
1145	vpxor	xmm3,xmm3,xmm0
1146	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1147	vpxor	xmm8,xmm8,xmm15
1148	vpxor	xmm4,xmm4,xmm1
1149	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1150	vpxor	xmm5,xmm5,xmm2
1151	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1152
1153	vmovdqu	xmm12,XMMWORD[r10]
1154
1155	vpxor	xmm10,xmm3,xmm0
1156	vpxor	xmm11,xmm4,xmm1
1157	vpxor	xmm5,xmm5,xmm2
1158
1159	vpxor	xmm5,xmm5,xmm10
1160	vpxor	xmm5,xmm5,xmm11
1161	vpslldq	xmm9,xmm5,8
1162	vpsrldq	xmm5,xmm5,8
1163	vpxor	xmm10,xmm10,xmm9
1164	vpxor	xmm11,xmm11,xmm5
1165
1166	vpclmulqdq	xmm9,xmm10,xmm12,0x10
1167	vpalignr	xmm10,xmm10,xmm10,8
1168	vpxor	xmm10,xmm10,xmm9
1169
1170	vpclmulqdq	xmm9,xmm10,xmm12,0x10
1171	vpalignr	xmm10,xmm10,xmm10,8
1172	vpxor	xmm10,xmm10,xmm11
1173	vpxor	xmm10,xmm10,xmm9
1174
1175	cmp	r9,0
1176	jne	NEAR $L$short_avx
1177
1178	vpshufb	xmm10,xmm10,xmm13
1179	vmovdqu	XMMWORD[rcx],xmm10
1180	vzeroupper
1181	movaps	xmm6,XMMWORD[rsp]
1182	movaps	xmm7,XMMWORD[16+rsp]
1183	movaps	xmm8,XMMWORD[32+rsp]
1184	movaps	xmm9,XMMWORD[48+rsp]
1185	movaps	xmm10,XMMWORD[64+rsp]
1186	movaps	xmm11,XMMWORD[80+rsp]
1187	movaps	xmm12,XMMWORD[96+rsp]
1188	movaps	xmm13,XMMWORD[112+rsp]
1189	movaps	xmm14,XMMWORD[128+rsp]
1190	movaps	xmm15,XMMWORD[144+rsp]
1191	lea	rsp,[168+rsp]
1192	ret
1193
1194$L$SEH_end_gcm_ghash_avx_13:
1195
1196section	.rdata rdata align=8
1197ALIGN	64
1198$L$bswap_mask:
1199	DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1200$L$0x1c2_polynomial:
1201	DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1202$L$7_mask:
1203	DD	7,0,7,0
1204ALIGN	64
1205
1206	DB	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
1207	DB	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1208	DB	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1209	DB	114,103,62,0
1210ALIGN	64
1211section	.text
1212
1213section	.pdata rdata align=4
1214ALIGN	4
1215	DD	$L$SEH_begin_gcm_init_clmul_1 wrt ..imagebase
1216	DD	$L$SEH_end_gcm_init_clmul_4 wrt ..imagebase
1217	DD	$L$SEH_info_gcm_init_clmul_0 wrt ..imagebase
1218
1219	DD	$L$SEH_begin_gcm_ghash_clmul_1 wrt ..imagebase
1220	DD	$L$SEH_end_gcm_ghash_clmul_13 wrt ..imagebase
1221	DD	$L$SEH_info_gcm_ghash_clmul_0 wrt ..imagebase
1222
1223	DD	$L$SEH_begin_gcm_init_avx_1 wrt ..imagebase
1224	DD	$L$SEH_end_gcm_init_avx_4 wrt ..imagebase
1225	DD	$L$SEH_info_gcm_init_avx_0 wrt ..imagebase
1226
1227	DD	$L$SEH_begin_gcm_ghash_avx_1 wrt ..imagebase
1228	DD	$L$SEH_end_gcm_ghash_avx_13 wrt ..imagebase
1229	DD	$L$SEH_info_gcm_ghash_avx_0 wrt ..imagebase
1230
1231
1232section	.xdata rdata align=8
1233ALIGN	4
1234$L$SEH_info_gcm_init_clmul_0:
1235	DB	1
1236	DB	$L$SEH_prolog_gcm_init_clmul_3-$L$SEH_begin_gcm_init_clmul_1
1237	DB	3
1238	DB	0
1239	DB	$L$SEH_prolog_gcm_init_clmul_3-$L$SEH_begin_gcm_init_clmul_1
1240	DB	104
1241	DW	0
1242	DB	$L$SEH_prolog_gcm_init_clmul_2-$L$SEH_begin_gcm_init_clmul_1
1243	DB	34
1244
1245$L$SEH_info_gcm_ghash_clmul_0:
1246	DB	1
1247	DB	$L$SEH_prolog_gcm_ghash_clmul_12-$L$SEH_begin_gcm_ghash_clmul_1
1248	DB	22
1249	DB	0
1250	DB	$L$SEH_prolog_gcm_ghash_clmul_12-$L$SEH_begin_gcm_ghash_clmul_1
1251	DB	248
1252	DW	9
1253	DB	$L$SEH_prolog_gcm_ghash_clmul_11-$L$SEH_begin_gcm_ghash_clmul_1
1254	DB	232
1255	DW	8
1256	DB	$L$SEH_prolog_gcm_ghash_clmul_10-$L$SEH_begin_gcm_ghash_clmul_1
1257	DB	216
1258	DW	7
1259	DB	$L$SEH_prolog_gcm_ghash_clmul_9-$L$SEH_begin_gcm_ghash_clmul_1
1260	DB	200
1261	DW	6
1262	DB	$L$SEH_prolog_gcm_ghash_clmul_8-$L$SEH_begin_gcm_ghash_clmul_1
1263	DB	184
1264	DW	5
1265	DB	$L$SEH_prolog_gcm_ghash_clmul_7-$L$SEH_begin_gcm_ghash_clmul_1
1266	DB	168
1267	DW	4
1268	DB	$L$SEH_prolog_gcm_ghash_clmul_6-$L$SEH_begin_gcm_ghash_clmul_1
1269	DB	152
1270	DW	3
1271	DB	$L$SEH_prolog_gcm_ghash_clmul_5-$L$SEH_begin_gcm_ghash_clmul_1
1272	DB	136
1273	DW	2
1274	DB	$L$SEH_prolog_gcm_ghash_clmul_4-$L$SEH_begin_gcm_ghash_clmul_1
1275	DB	120
1276	DW	1
1277	DB	$L$SEH_prolog_gcm_ghash_clmul_3-$L$SEH_begin_gcm_ghash_clmul_1
1278	DB	104
1279	DW	0
1280	DB	$L$SEH_prolog_gcm_ghash_clmul_2-$L$SEH_begin_gcm_ghash_clmul_1
1281	DB	1
1282	DW	21
1283
1284$L$SEH_info_gcm_init_avx_0:
1285	DB	1
1286	DB	$L$SEH_prolog_gcm_init_avx_3-$L$SEH_begin_gcm_init_avx_1
1287	DB	3
1288	DB	0
1289	DB	$L$SEH_prolog_gcm_init_avx_3-$L$SEH_begin_gcm_init_avx_1
1290	DB	104
1291	DW	0
1292	DB	$L$SEH_prolog_gcm_init_avx_2-$L$SEH_begin_gcm_init_avx_1
1293	DB	34
1294
1295$L$SEH_info_gcm_ghash_avx_0:
1296	DB	1
1297	DB	$L$SEH_prolog_gcm_ghash_avx_12-$L$SEH_begin_gcm_ghash_avx_1
1298	DB	22
1299	DB	0
1300	DB	$L$SEH_prolog_gcm_ghash_avx_12-$L$SEH_begin_gcm_ghash_avx_1
1301	DB	248
1302	DW	9
1303	DB	$L$SEH_prolog_gcm_ghash_avx_11-$L$SEH_begin_gcm_ghash_avx_1
1304	DB	232
1305	DW	8
1306	DB	$L$SEH_prolog_gcm_ghash_avx_10-$L$SEH_begin_gcm_ghash_avx_1
1307	DB	216
1308	DW	7
1309	DB	$L$SEH_prolog_gcm_ghash_avx_9-$L$SEH_begin_gcm_ghash_avx_1
1310	DB	200
1311	DW	6
1312	DB	$L$SEH_prolog_gcm_ghash_avx_8-$L$SEH_begin_gcm_ghash_avx_1
1313	DB	184
1314	DW	5
1315	DB	$L$SEH_prolog_gcm_ghash_avx_7-$L$SEH_begin_gcm_ghash_avx_1
1316	DB	168
1317	DW	4
1318	DB	$L$SEH_prolog_gcm_ghash_avx_6-$L$SEH_begin_gcm_ghash_avx_1
1319	DB	152
1320	DW	3
1321	DB	$L$SEH_prolog_gcm_ghash_avx_5-$L$SEH_begin_gcm_ghash_avx_1
1322	DB	136
1323	DW	2
1324	DB	$L$SEH_prolog_gcm_ghash_avx_4-$L$SEH_begin_gcm_ghash_avx_1
1325	DB	120
1326	DW	1
1327	DB	$L$SEH_prolog_gcm_ghash_avx_3-$L$SEH_begin_gcm_ghash_avx_1
1328	DB	104
1329	DW	0
1330	DB	$L$SEH_prolog_gcm_ghash_avx_2-$L$SEH_begin_gcm_ghash_avx_1
1331	DB	1
1332	DW	21
1333%else
1334; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
1335ret
1336%endif
1337