• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8
9%include "ring_core_generated/prefix_symbols_nasm.inc"
10section	.text code align=64
11
12EXTERN	OPENSSL_ia32cap_P
13global	gcm_init_clmul
14
15ALIGN	16
16gcm_init_clmul:
17
18$L$_init_clmul:
19$L$SEH_begin_gcm_init_clmul:
20
21DB	0x48,0x83,0xec,0x18
22DB	0x0f,0x29,0x34,0x24
23	movdqu	xmm2,XMMWORD[rdx]
24	pshufd	xmm2,xmm2,78
25
26
27	pshufd	xmm4,xmm2,255
28	movdqa	xmm3,xmm2
29	psllq	xmm2,1
30	pxor	xmm5,xmm5
31	psrlq	xmm3,63
32	pcmpgtd	xmm5,xmm4
33	pslldq	xmm3,8
34	por	xmm2,xmm3
35
36
37	pand	xmm5,XMMWORD[$L$0x1c2_polynomial]
38	pxor	xmm2,xmm5
39
40
41	pshufd	xmm6,xmm2,78
42	movdqa	xmm0,xmm2
43	pxor	xmm6,xmm2
44	movdqa	xmm1,xmm0
45	pshufd	xmm3,xmm0,78
46	pxor	xmm3,xmm0
47DB	102,15,58,68,194,0
48DB	102,15,58,68,202,17
49DB	102,15,58,68,222,0
50	pxor	xmm3,xmm0
51	pxor	xmm3,xmm1
52
53	movdqa	xmm4,xmm3
54	psrldq	xmm3,8
55	pslldq	xmm4,8
56	pxor	xmm1,xmm3
57	pxor	xmm0,xmm4
58
59	movdqa	xmm4,xmm0
60	movdqa	xmm3,xmm0
61	psllq	xmm0,5
62	pxor	xmm3,xmm0
63	psllq	xmm0,1
64	pxor	xmm0,xmm3
65	psllq	xmm0,57
66	movdqa	xmm3,xmm0
67	pslldq	xmm0,8
68	psrldq	xmm3,8
69	pxor	xmm0,xmm4
70	pxor	xmm1,xmm3
71
72
73	movdqa	xmm4,xmm0
74	psrlq	xmm0,1
75	pxor	xmm1,xmm4
76	pxor	xmm4,xmm0
77	psrlq	xmm0,5
78	pxor	xmm0,xmm4
79	psrlq	xmm0,1
80	pxor	xmm0,xmm1
81	pshufd	xmm3,xmm2,78
82	pshufd	xmm4,xmm0,78
83	pxor	xmm3,xmm2
84	movdqu	XMMWORD[rcx],xmm2
85	pxor	xmm4,xmm0
86	movdqu	XMMWORD[16+rcx],xmm0
87DB	102,15,58,15,227,8
88	movdqu	XMMWORD[32+rcx],xmm4
89	movdqa	xmm1,xmm0
90	pshufd	xmm3,xmm0,78
91	pxor	xmm3,xmm0
92DB	102,15,58,68,194,0
93DB	102,15,58,68,202,17
94DB	102,15,58,68,222,0
95	pxor	xmm3,xmm0
96	pxor	xmm3,xmm1
97
98	movdqa	xmm4,xmm3
99	psrldq	xmm3,8
100	pslldq	xmm4,8
101	pxor	xmm1,xmm3
102	pxor	xmm0,xmm4
103
104	movdqa	xmm4,xmm0
105	movdqa	xmm3,xmm0
106	psllq	xmm0,5
107	pxor	xmm3,xmm0
108	psllq	xmm0,1
109	pxor	xmm0,xmm3
110	psllq	xmm0,57
111	movdqa	xmm3,xmm0
112	pslldq	xmm0,8
113	psrldq	xmm3,8
114	pxor	xmm0,xmm4
115	pxor	xmm1,xmm3
116
117
118	movdqa	xmm4,xmm0
119	psrlq	xmm0,1
120	pxor	xmm1,xmm4
121	pxor	xmm4,xmm0
122	psrlq	xmm0,5
123	pxor	xmm0,xmm4
124	psrlq	xmm0,1
125	pxor	xmm0,xmm1
126	movdqa	xmm5,xmm0
127	movdqa	xmm1,xmm0
128	pshufd	xmm3,xmm0,78
129	pxor	xmm3,xmm0
130DB	102,15,58,68,194,0
131DB	102,15,58,68,202,17
132DB	102,15,58,68,222,0
133	pxor	xmm3,xmm0
134	pxor	xmm3,xmm1
135
136	movdqa	xmm4,xmm3
137	psrldq	xmm3,8
138	pslldq	xmm4,8
139	pxor	xmm1,xmm3
140	pxor	xmm0,xmm4
141
142	movdqa	xmm4,xmm0
143	movdqa	xmm3,xmm0
144	psllq	xmm0,5
145	pxor	xmm3,xmm0
146	psllq	xmm0,1
147	pxor	xmm0,xmm3
148	psllq	xmm0,57
149	movdqa	xmm3,xmm0
150	pslldq	xmm0,8
151	psrldq	xmm3,8
152	pxor	xmm0,xmm4
153	pxor	xmm1,xmm3
154
155
156	movdqa	xmm4,xmm0
157	psrlq	xmm0,1
158	pxor	xmm1,xmm4
159	pxor	xmm4,xmm0
160	psrlq	xmm0,5
161	pxor	xmm0,xmm4
162	psrlq	xmm0,1
163	pxor	xmm0,xmm1
164	pshufd	xmm3,xmm5,78
165	pshufd	xmm4,xmm0,78
166	pxor	xmm3,xmm5
167	movdqu	XMMWORD[48+rcx],xmm5
168	pxor	xmm4,xmm0
169	movdqu	XMMWORD[64+rcx],xmm0
170DB	102,15,58,15,227,8
171	movdqu	XMMWORD[80+rcx],xmm4
172	movaps	xmm6,XMMWORD[rsp]
173	lea	rsp,[24+rsp]
174$L$SEH_end_gcm_init_clmul:
175	DB	0F3h,0C3h		;repret
176
177
178global	gcm_gmult_clmul
179
180ALIGN	16
181gcm_gmult_clmul:
182
183$L$_gmult_clmul:
184	movdqu	xmm0,XMMWORD[rcx]
185	movdqa	xmm5,XMMWORD[$L$bswap_mask]
186	movdqu	xmm2,XMMWORD[rdx]
187	movdqu	xmm4,XMMWORD[32+rdx]
188DB	102,15,56,0,197
189	movdqa	xmm1,xmm0
190	pshufd	xmm3,xmm0,78
191	pxor	xmm3,xmm0
192DB	102,15,58,68,194,0
193DB	102,15,58,68,202,17
194DB	102,15,58,68,220,0
195	pxor	xmm3,xmm0
196	pxor	xmm3,xmm1
197
198	movdqa	xmm4,xmm3
199	psrldq	xmm3,8
200	pslldq	xmm4,8
201	pxor	xmm1,xmm3
202	pxor	xmm0,xmm4
203
204	movdqa	xmm4,xmm0
205	movdqa	xmm3,xmm0
206	psllq	xmm0,5
207	pxor	xmm3,xmm0
208	psllq	xmm0,1
209	pxor	xmm0,xmm3
210	psllq	xmm0,57
211	movdqa	xmm3,xmm0
212	pslldq	xmm0,8
213	psrldq	xmm3,8
214	pxor	xmm0,xmm4
215	pxor	xmm1,xmm3
216
217
218	movdqa	xmm4,xmm0
219	psrlq	xmm0,1
220	pxor	xmm1,xmm4
221	pxor	xmm4,xmm0
222	psrlq	xmm0,5
223	pxor	xmm0,xmm4
224	psrlq	xmm0,1
225	pxor	xmm0,xmm1
226DB	102,15,56,0,197
227	movdqu	XMMWORD[rcx],xmm0
228	DB	0F3h,0C3h		;repret
229
230
231global	gcm_ghash_clmul
232
233ALIGN	32
234gcm_ghash_clmul:
235
236$L$_ghash_clmul:
237	lea	rax,[((-136))+rsp]
238$L$SEH_begin_gcm_ghash_clmul:
239
240DB	0x48,0x8d,0x60,0xe0
241DB	0x0f,0x29,0x70,0xe0
242DB	0x0f,0x29,0x78,0xf0
243DB	0x44,0x0f,0x29,0x00
244DB	0x44,0x0f,0x29,0x48,0x10
245DB	0x44,0x0f,0x29,0x50,0x20
246DB	0x44,0x0f,0x29,0x58,0x30
247DB	0x44,0x0f,0x29,0x60,0x40
248DB	0x44,0x0f,0x29,0x68,0x50
249DB	0x44,0x0f,0x29,0x70,0x60
250DB	0x44,0x0f,0x29,0x78,0x70
251	movdqa	xmm10,XMMWORD[$L$bswap_mask]
252
253	movdqu	xmm0,XMMWORD[rcx]
254	movdqu	xmm2,XMMWORD[rdx]
255	movdqu	xmm7,XMMWORD[32+rdx]
256DB	102,65,15,56,0,194
257
258	sub	r9,0x10
259	jz	NEAR $L$odd_tail
260
261	movdqu	xmm6,XMMWORD[16+rdx]
262	lea	rax,[OPENSSL_ia32cap_P]
263	mov	eax,DWORD[4+rax]
264	cmp	r9,0x30
265	jb	NEAR $L$skip4x
266
267	and	eax,71303168
268	cmp	eax,4194304
269	je	NEAR $L$skip4x
270
271	sub	r9,0x30
272	mov	rax,0xA040608020C0E000
273	movdqu	xmm14,XMMWORD[48+rdx]
274	movdqu	xmm15,XMMWORD[64+rdx]
275
276
277
278
279	movdqu	xmm3,XMMWORD[48+r8]
280	movdqu	xmm11,XMMWORD[32+r8]
281DB	102,65,15,56,0,218
282DB	102,69,15,56,0,218
283	movdqa	xmm5,xmm3
284	pshufd	xmm4,xmm3,78
285	pxor	xmm4,xmm3
286DB	102,15,58,68,218,0
287DB	102,15,58,68,234,17
288DB	102,15,58,68,231,0
289
290	movdqa	xmm13,xmm11
291	pshufd	xmm12,xmm11,78
292	pxor	xmm12,xmm11
293DB	102,68,15,58,68,222,0
294DB	102,68,15,58,68,238,17
295DB	102,68,15,58,68,231,16
296	xorps	xmm3,xmm11
297	xorps	xmm5,xmm13
298	movups	xmm7,XMMWORD[80+rdx]
299	xorps	xmm4,xmm12
300
301	movdqu	xmm11,XMMWORD[16+r8]
302	movdqu	xmm8,XMMWORD[r8]
303DB	102,69,15,56,0,218
304DB	102,69,15,56,0,194
305	movdqa	xmm13,xmm11
306	pshufd	xmm12,xmm11,78
307	pxor	xmm0,xmm8
308	pxor	xmm12,xmm11
309DB	102,69,15,58,68,222,0
310	movdqa	xmm1,xmm0
311	pshufd	xmm8,xmm0,78
312	pxor	xmm8,xmm0
313DB	102,69,15,58,68,238,17
314DB	102,68,15,58,68,231,0
315	xorps	xmm3,xmm11
316	xorps	xmm5,xmm13
317
318	lea	r8,[64+r8]
319	sub	r9,0x40
320	jc	NEAR $L$tail4x
321
322	jmp	NEAR $L$mod4_loop
323ALIGN	32
324$L$mod4_loop:
325DB	102,65,15,58,68,199,0
326	xorps	xmm4,xmm12
327	movdqu	xmm11,XMMWORD[48+r8]
328DB	102,69,15,56,0,218
329DB	102,65,15,58,68,207,17
330	xorps	xmm0,xmm3
331	movdqu	xmm3,XMMWORD[32+r8]
332	movdqa	xmm13,xmm11
333DB	102,68,15,58,68,199,16
334	pshufd	xmm12,xmm11,78
335	xorps	xmm1,xmm5
336	pxor	xmm12,xmm11
337DB	102,65,15,56,0,218
338	movups	xmm7,XMMWORD[32+rdx]
339	xorps	xmm8,xmm4
340DB	102,68,15,58,68,218,0
341	pshufd	xmm4,xmm3,78
342
343	pxor	xmm8,xmm0
344	movdqa	xmm5,xmm3
345	pxor	xmm8,xmm1
346	pxor	xmm4,xmm3
347	movdqa	xmm9,xmm8
348DB	102,68,15,58,68,234,17
349	pslldq	xmm8,8
350	psrldq	xmm9,8
351	pxor	xmm0,xmm8
352	movdqa	xmm8,XMMWORD[$L$7_mask]
353	pxor	xmm1,xmm9
354DB	102,76,15,110,200
355
356	pand	xmm8,xmm0
357DB	102,69,15,56,0,200
358	pxor	xmm9,xmm0
359DB	102,68,15,58,68,231,0
360	psllq	xmm9,57
361	movdqa	xmm8,xmm9
362	pslldq	xmm9,8
363DB	102,15,58,68,222,0
364	psrldq	xmm8,8
365	pxor	xmm0,xmm9
366	pxor	xmm1,xmm8
367	movdqu	xmm8,XMMWORD[r8]
368
369	movdqa	xmm9,xmm0
370	psrlq	xmm0,1
371DB	102,15,58,68,238,17
372	xorps	xmm3,xmm11
373	movdqu	xmm11,XMMWORD[16+r8]
374DB	102,69,15,56,0,218
375DB	102,15,58,68,231,16
376	xorps	xmm5,xmm13
377	movups	xmm7,XMMWORD[80+rdx]
378DB	102,69,15,56,0,194
379	pxor	xmm1,xmm9
380	pxor	xmm9,xmm0
381	psrlq	xmm0,5
382
383	movdqa	xmm13,xmm11
384	pxor	xmm4,xmm12
385	pshufd	xmm12,xmm11,78
386	pxor	xmm0,xmm9
387	pxor	xmm1,xmm8
388	pxor	xmm12,xmm11
389DB	102,69,15,58,68,222,0
390	psrlq	xmm0,1
391	pxor	xmm0,xmm1
392	movdqa	xmm1,xmm0
393DB	102,69,15,58,68,238,17
394	xorps	xmm3,xmm11
395	pshufd	xmm8,xmm0,78
396	pxor	xmm8,xmm0
397
398DB	102,68,15,58,68,231,0
399	xorps	xmm5,xmm13
400
401	lea	r8,[64+r8]
402	sub	r9,0x40
403	jnc	NEAR $L$mod4_loop
404
405$L$tail4x:
406DB	102,65,15,58,68,199,0
407DB	102,65,15,58,68,207,17
408DB	102,68,15,58,68,199,16
409	xorps	xmm4,xmm12
410	xorps	xmm0,xmm3
411	xorps	xmm1,xmm5
412	pxor	xmm1,xmm0
413	pxor	xmm8,xmm4
414
415	pxor	xmm8,xmm1
416	pxor	xmm1,xmm0
417
418	movdqa	xmm9,xmm8
419	psrldq	xmm8,8
420	pslldq	xmm9,8
421	pxor	xmm1,xmm8
422	pxor	xmm0,xmm9
423
424	movdqa	xmm4,xmm0
425	movdqa	xmm3,xmm0
426	psllq	xmm0,5
427	pxor	xmm3,xmm0
428	psllq	xmm0,1
429	pxor	xmm0,xmm3
430	psllq	xmm0,57
431	movdqa	xmm3,xmm0
432	pslldq	xmm0,8
433	psrldq	xmm3,8
434	pxor	xmm0,xmm4
435	pxor	xmm1,xmm3
436
437
438	movdqa	xmm4,xmm0
439	psrlq	xmm0,1
440	pxor	xmm1,xmm4
441	pxor	xmm4,xmm0
442	psrlq	xmm0,5
443	pxor	xmm0,xmm4
444	psrlq	xmm0,1
445	pxor	xmm0,xmm1
446	add	r9,0x40
447	jz	NEAR $L$done
448	movdqu	xmm7,XMMWORD[32+rdx]
449	sub	r9,0x10
450	jz	NEAR $L$odd_tail
451$L$skip4x:
452
453
454
455
456
457	movdqu	xmm8,XMMWORD[r8]
458	movdqu	xmm3,XMMWORD[16+r8]
459DB	102,69,15,56,0,194
460DB	102,65,15,56,0,218
461	pxor	xmm0,xmm8
462
463	movdqa	xmm5,xmm3
464	pshufd	xmm4,xmm3,78
465	pxor	xmm4,xmm3
466DB	102,15,58,68,218,0
467DB	102,15,58,68,234,17
468DB	102,15,58,68,231,0
469
470	lea	r8,[32+r8]
471	nop
472	sub	r9,0x20
473	jbe	NEAR $L$even_tail
474	nop
475	jmp	NEAR $L$mod_loop
476
477ALIGN	32
478$L$mod_loop:
479	movdqa	xmm1,xmm0
480	movdqa	xmm8,xmm4
481	pshufd	xmm4,xmm0,78
482	pxor	xmm4,xmm0
483
484DB	102,15,58,68,198,0
485DB	102,15,58,68,206,17
486DB	102,15,58,68,231,16
487
488	pxor	xmm0,xmm3
489	pxor	xmm1,xmm5
490	movdqu	xmm9,XMMWORD[r8]
491	pxor	xmm8,xmm0
492DB	102,69,15,56,0,202
493	movdqu	xmm3,XMMWORD[16+r8]
494
495	pxor	xmm8,xmm1
496	pxor	xmm1,xmm9
497	pxor	xmm4,xmm8
498DB	102,65,15,56,0,218
499	movdqa	xmm8,xmm4
500	psrldq	xmm8,8
501	pslldq	xmm4,8
502	pxor	xmm1,xmm8
503	pxor	xmm0,xmm4
504
505	movdqa	xmm5,xmm3
506
507	movdqa	xmm9,xmm0
508	movdqa	xmm8,xmm0
509	psllq	xmm0,5
510	pxor	xmm8,xmm0
511DB	102,15,58,68,218,0
512	psllq	xmm0,1
513	pxor	xmm0,xmm8
514	psllq	xmm0,57
515	movdqa	xmm8,xmm0
516	pslldq	xmm0,8
517	psrldq	xmm8,8
518	pxor	xmm0,xmm9
519	pshufd	xmm4,xmm5,78
520	pxor	xmm1,xmm8
521	pxor	xmm4,xmm5
522
523	movdqa	xmm9,xmm0
524	psrlq	xmm0,1
525DB	102,15,58,68,234,17
526	pxor	xmm1,xmm9
527	pxor	xmm9,xmm0
528	psrlq	xmm0,5
529	pxor	xmm0,xmm9
530	lea	r8,[32+r8]
531	psrlq	xmm0,1
532DB	102,15,58,68,231,0
533	pxor	xmm0,xmm1
534
535	sub	r9,0x20
536	ja	NEAR $L$mod_loop
537
538$L$even_tail:
539	movdqa	xmm1,xmm0
540	movdqa	xmm8,xmm4
541	pshufd	xmm4,xmm0,78
542	pxor	xmm4,xmm0
543
544DB	102,15,58,68,198,0
545DB	102,15,58,68,206,17
546DB	102,15,58,68,231,16
547
548	pxor	xmm0,xmm3
549	pxor	xmm1,xmm5
550	pxor	xmm8,xmm0
551	pxor	xmm8,xmm1
552	pxor	xmm4,xmm8
553	movdqa	xmm8,xmm4
554	psrldq	xmm8,8
555	pslldq	xmm4,8
556	pxor	xmm1,xmm8
557	pxor	xmm0,xmm4
558
559	movdqa	xmm4,xmm0
560	movdqa	xmm3,xmm0
561	psllq	xmm0,5
562	pxor	xmm3,xmm0
563	psllq	xmm0,1
564	pxor	xmm0,xmm3
565	psllq	xmm0,57
566	movdqa	xmm3,xmm0
567	pslldq	xmm0,8
568	psrldq	xmm3,8
569	pxor	xmm0,xmm4
570	pxor	xmm1,xmm3
571
572
573	movdqa	xmm4,xmm0
574	psrlq	xmm0,1
575	pxor	xmm1,xmm4
576	pxor	xmm4,xmm0
577	psrlq	xmm0,5
578	pxor	xmm0,xmm4
579	psrlq	xmm0,1
580	pxor	xmm0,xmm1
581	test	r9,r9
582	jnz	NEAR $L$done
583
584$L$odd_tail:
585	movdqu	xmm8,XMMWORD[r8]
586DB	102,69,15,56,0,194
587	pxor	xmm0,xmm8
588	movdqa	xmm1,xmm0
589	pshufd	xmm3,xmm0,78
590	pxor	xmm3,xmm0
591DB	102,15,58,68,194,0
592DB	102,15,58,68,202,17
593DB	102,15,58,68,223,0
594	pxor	xmm3,xmm0
595	pxor	xmm3,xmm1
596
597	movdqa	xmm4,xmm3
598	psrldq	xmm3,8
599	pslldq	xmm4,8
600	pxor	xmm1,xmm3
601	pxor	xmm0,xmm4
602
603	movdqa	xmm4,xmm0
604	movdqa	xmm3,xmm0
605	psllq	xmm0,5
606	pxor	xmm3,xmm0
607	psllq	xmm0,1
608	pxor	xmm0,xmm3
609	psllq	xmm0,57
610	movdqa	xmm3,xmm0
611	pslldq	xmm0,8
612	psrldq	xmm3,8
613	pxor	xmm0,xmm4
614	pxor	xmm1,xmm3
615
616
617	movdqa	xmm4,xmm0
618	psrlq	xmm0,1
619	pxor	xmm1,xmm4
620	pxor	xmm4,xmm0
621	psrlq	xmm0,5
622	pxor	xmm0,xmm4
623	psrlq	xmm0,1
624	pxor	xmm0,xmm1
625$L$done:
626DB	102,65,15,56,0,194
627	movdqu	XMMWORD[rcx],xmm0
628	movaps	xmm6,XMMWORD[rsp]
629	movaps	xmm7,XMMWORD[16+rsp]
630	movaps	xmm8,XMMWORD[32+rsp]
631	movaps	xmm9,XMMWORD[48+rsp]
632	movaps	xmm10,XMMWORD[64+rsp]
633	movaps	xmm11,XMMWORD[80+rsp]
634	movaps	xmm12,XMMWORD[96+rsp]
635	movaps	xmm13,XMMWORD[112+rsp]
636	movaps	xmm14,XMMWORD[128+rsp]
637	movaps	xmm15,XMMWORD[144+rsp]
638	lea	rsp,[168+rsp]
639$L$SEH_end_gcm_ghash_clmul:
640	DB	0F3h,0C3h		;repret
641
642
643global	gcm_init_avx
644
645ALIGN	32
646gcm_init_avx:
647
648$L$SEH_begin_gcm_init_avx:
649
650DB	0x48,0x83,0xec,0x18
651DB	0x0f,0x29,0x34,0x24
652	vzeroupper
653
654	vmovdqu	xmm2,XMMWORD[rdx]
655	vpshufd	xmm2,xmm2,78
656
657
658	vpshufd	xmm4,xmm2,255
659	vpsrlq	xmm3,xmm2,63
660	vpsllq	xmm2,xmm2,1
661	vpxor	xmm5,xmm5,xmm5
662	vpcmpgtd	xmm5,xmm5,xmm4
663	vpslldq	xmm3,xmm3,8
664	vpor	xmm2,xmm2,xmm3
665
666
667	vpand	xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
668	vpxor	xmm2,xmm2,xmm5
669
670	vpunpckhqdq	xmm6,xmm2,xmm2
671	vmovdqa	xmm0,xmm2
672	vpxor	xmm6,xmm6,xmm2
673	mov	r10,4
674	jmp	NEAR $L$init_start_avx
675ALIGN	32
676$L$init_loop_avx:
677	vpalignr	xmm5,xmm4,xmm3,8
678	vmovdqu	XMMWORD[(-16)+rcx],xmm5
679	vpunpckhqdq	xmm3,xmm0,xmm0
680	vpxor	xmm3,xmm3,xmm0
681	vpclmulqdq	xmm1,xmm0,xmm2,0x11
682	vpclmulqdq	xmm0,xmm0,xmm2,0x00
683	vpclmulqdq	xmm3,xmm3,xmm6,0x00
684	vpxor	xmm4,xmm1,xmm0
685	vpxor	xmm3,xmm3,xmm4
686
687	vpslldq	xmm4,xmm3,8
688	vpsrldq	xmm3,xmm3,8
689	vpxor	xmm0,xmm0,xmm4
690	vpxor	xmm1,xmm1,xmm3
691	vpsllq	xmm3,xmm0,57
692	vpsllq	xmm4,xmm0,62
693	vpxor	xmm4,xmm4,xmm3
694	vpsllq	xmm3,xmm0,63
695	vpxor	xmm4,xmm4,xmm3
696	vpslldq	xmm3,xmm4,8
697	vpsrldq	xmm4,xmm4,8
698	vpxor	xmm0,xmm0,xmm3
699	vpxor	xmm1,xmm1,xmm4
700
701	vpsrlq	xmm4,xmm0,1
702	vpxor	xmm1,xmm1,xmm0
703	vpxor	xmm0,xmm0,xmm4
704	vpsrlq	xmm4,xmm4,5
705	vpxor	xmm0,xmm0,xmm4
706	vpsrlq	xmm0,xmm0,1
707	vpxor	xmm0,xmm0,xmm1
708$L$init_start_avx:
709	vmovdqa	xmm5,xmm0
710	vpunpckhqdq	xmm3,xmm0,xmm0
711	vpxor	xmm3,xmm3,xmm0
712	vpclmulqdq	xmm1,xmm0,xmm2,0x11
713	vpclmulqdq	xmm0,xmm0,xmm2,0x00
714	vpclmulqdq	xmm3,xmm3,xmm6,0x00
715	vpxor	xmm4,xmm1,xmm0
716	vpxor	xmm3,xmm3,xmm4
717
718	vpslldq	xmm4,xmm3,8
719	vpsrldq	xmm3,xmm3,8
720	vpxor	xmm0,xmm0,xmm4
721	vpxor	xmm1,xmm1,xmm3
722	vpsllq	xmm3,xmm0,57
723	vpsllq	xmm4,xmm0,62
724	vpxor	xmm4,xmm4,xmm3
725	vpsllq	xmm3,xmm0,63
726	vpxor	xmm4,xmm4,xmm3
727	vpslldq	xmm3,xmm4,8
728	vpsrldq	xmm4,xmm4,8
729	vpxor	xmm0,xmm0,xmm3
730	vpxor	xmm1,xmm1,xmm4
731
732	vpsrlq	xmm4,xmm0,1
733	vpxor	xmm1,xmm1,xmm0
734	vpxor	xmm0,xmm0,xmm4
735	vpsrlq	xmm4,xmm4,5
736	vpxor	xmm0,xmm0,xmm4
737	vpsrlq	xmm0,xmm0,1
738	vpxor	xmm0,xmm0,xmm1
739	vpshufd	xmm3,xmm5,78
740	vpshufd	xmm4,xmm0,78
741	vpxor	xmm3,xmm3,xmm5
742	vmovdqu	XMMWORD[rcx],xmm5
743	vpxor	xmm4,xmm4,xmm0
744	vmovdqu	XMMWORD[16+rcx],xmm0
745	lea	rcx,[48+rcx]
746	sub	r10,1
747	jnz	NEAR $L$init_loop_avx
748
749	vpalignr	xmm5,xmm3,xmm4,8
750	vmovdqu	XMMWORD[(-16)+rcx],xmm5
751
752	vzeroupper
753	movaps	xmm6,XMMWORD[rsp]
754	lea	rsp,[24+rsp]
755$L$SEH_end_gcm_init_avx:
756	DB	0F3h,0C3h		;repret
757
758
759global	gcm_ghash_avx
760
761ALIGN	32
762gcm_ghash_avx:
763
764	lea	rax,[((-136))+rsp]
765$L$SEH_begin_gcm_ghash_avx:
766
767DB	0x48,0x8d,0x60,0xe0
768DB	0x0f,0x29,0x70,0xe0
769DB	0x0f,0x29,0x78,0xf0
770DB	0x44,0x0f,0x29,0x00
771DB	0x44,0x0f,0x29,0x48,0x10
772DB	0x44,0x0f,0x29,0x50,0x20
773DB	0x44,0x0f,0x29,0x58,0x30
774DB	0x44,0x0f,0x29,0x60,0x40
775DB	0x44,0x0f,0x29,0x68,0x50
776DB	0x44,0x0f,0x29,0x70,0x60
777DB	0x44,0x0f,0x29,0x78,0x70
778	vzeroupper
779
780	vmovdqu	xmm10,XMMWORD[rcx]
781	lea	r10,[$L$0x1c2_polynomial]
782	lea	rdx,[64+rdx]
783	vmovdqu	xmm13,XMMWORD[$L$bswap_mask]
784	vpshufb	xmm10,xmm10,xmm13
785	cmp	r9,0x80
786	jb	NEAR $L$short_avx
787	sub	r9,0x80
788
789	vmovdqu	xmm14,XMMWORD[112+r8]
790	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
791	vpshufb	xmm14,xmm14,xmm13
792	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
793
794	vpunpckhqdq	xmm9,xmm14,xmm14
795	vmovdqu	xmm15,XMMWORD[96+r8]
796	vpclmulqdq	xmm0,xmm14,xmm6,0x00
797	vpxor	xmm9,xmm9,xmm14
798	vpshufb	xmm15,xmm15,xmm13
799	vpclmulqdq	xmm1,xmm14,xmm6,0x11
800	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
801	vpunpckhqdq	xmm8,xmm15,xmm15
802	vmovdqu	xmm14,XMMWORD[80+r8]
803	vpclmulqdq	xmm2,xmm9,xmm7,0x00
804	vpxor	xmm8,xmm8,xmm15
805
806	vpshufb	xmm14,xmm14,xmm13
807	vpclmulqdq	xmm3,xmm15,xmm6,0x00
808	vpunpckhqdq	xmm9,xmm14,xmm14
809	vpclmulqdq	xmm4,xmm15,xmm6,0x11
810	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
811	vpxor	xmm9,xmm9,xmm14
812	vmovdqu	xmm15,XMMWORD[64+r8]
813	vpclmulqdq	xmm5,xmm8,xmm7,0x10
814	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
815
816	vpshufb	xmm15,xmm15,xmm13
817	vpxor	xmm3,xmm3,xmm0
818	vpclmulqdq	xmm0,xmm14,xmm6,0x00
819	vpxor	xmm4,xmm4,xmm1
820	vpunpckhqdq	xmm8,xmm15,xmm15
821	vpclmulqdq	xmm1,xmm14,xmm6,0x11
822	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
823	vpxor	xmm5,xmm5,xmm2
824	vpclmulqdq	xmm2,xmm9,xmm7,0x00
825	vpxor	xmm8,xmm8,xmm15
826
827	vmovdqu	xmm14,XMMWORD[48+r8]
828	vpxor	xmm0,xmm0,xmm3
829	vpclmulqdq	xmm3,xmm15,xmm6,0x00
830	vpxor	xmm1,xmm1,xmm4
831	vpshufb	xmm14,xmm14,xmm13
832	vpclmulqdq	xmm4,xmm15,xmm6,0x11
833	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
834	vpxor	xmm2,xmm2,xmm5
835	vpunpckhqdq	xmm9,xmm14,xmm14
836	vpclmulqdq	xmm5,xmm8,xmm7,0x10
837	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
838	vpxor	xmm9,xmm9,xmm14
839
840	vmovdqu	xmm15,XMMWORD[32+r8]
841	vpxor	xmm3,xmm3,xmm0
842	vpclmulqdq	xmm0,xmm14,xmm6,0x00
843	vpxor	xmm4,xmm4,xmm1
844	vpshufb	xmm15,xmm15,xmm13
845	vpclmulqdq	xmm1,xmm14,xmm6,0x11
846	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
847	vpxor	xmm5,xmm5,xmm2
848	vpunpckhqdq	xmm8,xmm15,xmm15
849	vpclmulqdq	xmm2,xmm9,xmm7,0x00
850	vpxor	xmm8,xmm8,xmm15
851
852	vmovdqu	xmm14,XMMWORD[16+r8]
853	vpxor	xmm0,xmm0,xmm3
854	vpclmulqdq	xmm3,xmm15,xmm6,0x00
855	vpxor	xmm1,xmm1,xmm4
856	vpshufb	xmm14,xmm14,xmm13
857	vpclmulqdq	xmm4,xmm15,xmm6,0x11
858	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
859	vpxor	xmm2,xmm2,xmm5
860	vpunpckhqdq	xmm9,xmm14,xmm14
861	vpclmulqdq	xmm5,xmm8,xmm7,0x10
862	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
863	vpxor	xmm9,xmm9,xmm14
864
865	vmovdqu	xmm15,XMMWORD[r8]
866	vpxor	xmm3,xmm3,xmm0
867	vpclmulqdq	xmm0,xmm14,xmm6,0x00
868	vpxor	xmm4,xmm4,xmm1
869	vpshufb	xmm15,xmm15,xmm13
870	vpclmulqdq	xmm1,xmm14,xmm6,0x11
871	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
872	vpxor	xmm5,xmm5,xmm2
873	vpclmulqdq	xmm2,xmm9,xmm7,0x10
874
875	lea	r8,[128+r8]
876	cmp	r9,0x80
877	jb	NEAR $L$tail_avx
878
879	vpxor	xmm15,xmm15,xmm10
880	sub	r9,0x80
881	jmp	NEAR $L$oop8x_avx
882
883ALIGN	32
884$L$oop8x_avx:
885	vpunpckhqdq	xmm8,xmm15,xmm15
886	vmovdqu	xmm14,XMMWORD[112+r8]
887	vpxor	xmm3,xmm3,xmm0
888	vpxor	xmm8,xmm8,xmm15
889	vpclmulqdq	xmm10,xmm15,xmm6,0x00
890	vpshufb	xmm14,xmm14,xmm13
891	vpxor	xmm4,xmm4,xmm1
892	vpclmulqdq	xmm11,xmm15,xmm6,0x11
893	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
894	vpunpckhqdq	xmm9,xmm14,xmm14
895	vpxor	xmm5,xmm5,xmm2
896	vpclmulqdq	xmm12,xmm8,xmm7,0x00
897	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
898	vpxor	xmm9,xmm9,xmm14
899
900	vmovdqu	xmm15,XMMWORD[96+r8]
901	vpclmulqdq	xmm0,xmm14,xmm6,0x00
902	vpxor	xmm10,xmm10,xmm3
903	vpshufb	xmm15,xmm15,xmm13
904	vpclmulqdq	xmm1,xmm14,xmm6,0x11
905	vxorps	xmm11,xmm11,xmm4
906	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
907	vpunpckhqdq	xmm8,xmm15,xmm15
908	vpclmulqdq	xmm2,xmm9,xmm7,0x00
909	vpxor	xmm12,xmm12,xmm5
910	vxorps	xmm8,xmm8,xmm15
911
912	vmovdqu	xmm14,XMMWORD[80+r8]
913	vpxor	xmm12,xmm12,xmm10
914	vpclmulqdq	xmm3,xmm15,xmm6,0x00
915	vpxor	xmm12,xmm12,xmm11
916	vpslldq	xmm9,xmm12,8
917	vpxor	xmm3,xmm3,xmm0
918	vpclmulqdq	xmm4,xmm15,xmm6,0x11
919	vpsrldq	xmm12,xmm12,8
920	vpxor	xmm10,xmm10,xmm9
921	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
922	vpshufb	xmm14,xmm14,xmm13
923	vxorps	xmm11,xmm11,xmm12
924	vpxor	xmm4,xmm4,xmm1
925	vpunpckhqdq	xmm9,xmm14,xmm14
926	vpclmulqdq	xmm5,xmm8,xmm7,0x10
927	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
928	vpxor	xmm9,xmm9,xmm14
929	vpxor	xmm5,xmm5,xmm2
930
931	vmovdqu	xmm15,XMMWORD[64+r8]
932	vpalignr	xmm12,xmm10,xmm10,8
933	vpclmulqdq	xmm0,xmm14,xmm6,0x00
934	vpshufb	xmm15,xmm15,xmm13
935	vpxor	xmm0,xmm0,xmm3
936	vpclmulqdq	xmm1,xmm14,xmm6,0x11
937	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
938	vpunpckhqdq	xmm8,xmm15,xmm15
939	vpxor	xmm1,xmm1,xmm4
940	vpclmulqdq	xmm2,xmm9,xmm7,0x00
941	vxorps	xmm8,xmm8,xmm15
942	vpxor	xmm2,xmm2,xmm5
943
944	vmovdqu	xmm14,XMMWORD[48+r8]
945	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
946	vpclmulqdq	xmm3,xmm15,xmm6,0x00
947	vpshufb	xmm14,xmm14,xmm13
948	vpxor	xmm3,xmm3,xmm0
949	vpclmulqdq	xmm4,xmm15,xmm6,0x11
950	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
951	vpunpckhqdq	xmm9,xmm14,xmm14
952	vpxor	xmm4,xmm4,xmm1
953	vpclmulqdq	xmm5,xmm8,xmm7,0x10
954	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
955	vpxor	xmm9,xmm9,xmm14
956	vpxor	xmm5,xmm5,xmm2
957
958	vmovdqu	xmm15,XMMWORD[32+r8]
959	vpclmulqdq	xmm0,xmm14,xmm6,0x00
960	vpshufb	xmm15,xmm15,xmm13
961	vpxor	xmm0,xmm0,xmm3
962	vpclmulqdq	xmm1,xmm14,xmm6,0x11
963	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
964	vpunpckhqdq	xmm8,xmm15,xmm15
965	vpxor	xmm1,xmm1,xmm4
966	vpclmulqdq	xmm2,xmm9,xmm7,0x00
967	vpxor	xmm8,xmm8,xmm15
968	vpxor	xmm2,xmm2,xmm5
969	vxorps	xmm10,xmm10,xmm12
970
971	vmovdqu	xmm14,XMMWORD[16+r8]
972	vpalignr	xmm12,xmm10,xmm10,8
973	vpclmulqdq	xmm3,xmm15,xmm6,0x00
974	vpshufb	xmm14,xmm14,xmm13
975	vpxor	xmm3,xmm3,xmm0
976	vpclmulqdq	xmm4,xmm15,xmm6,0x11
977	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
978	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
979	vxorps	xmm12,xmm12,xmm11
980	vpunpckhqdq	xmm9,xmm14,xmm14
981	vpxor	xmm4,xmm4,xmm1
982	vpclmulqdq	xmm5,xmm8,xmm7,0x10
983	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
984	vpxor	xmm9,xmm9,xmm14
985	vpxor	xmm5,xmm5,xmm2
986
987	vmovdqu	xmm15,XMMWORD[r8]
988	vpclmulqdq	xmm0,xmm14,xmm6,0x00
989	vpshufb	xmm15,xmm15,xmm13
990	vpclmulqdq	xmm1,xmm14,xmm6,0x11
991	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
992	vpxor	xmm15,xmm15,xmm12
993	vpclmulqdq	xmm2,xmm9,xmm7,0x10
994	vpxor	xmm15,xmm15,xmm10
995
996	lea	r8,[128+r8]
997	sub	r9,0x80
998	jnc	NEAR $L$oop8x_avx
999
1000	add	r9,0x80
1001	jmp	NEAR $L$tail_no_xor_avx
1002
1003ALIGN	32
1004$L$short_avx:
1005	vmovdqu	xmm14,XMMWORD[((-16))+r9*1+r8]
1006	lea	r8,[r9*1+r8]
1007	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
1008	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
1009	vpshufb	xmm15,xmm14,xmm13
1010
1011	vmovdqa	xmm3,xmm0
1012	vmovdqa	xmm4,xmm1
1013	vmovdqa	xmm5,xmm2
1014	sub	r9,0x10
1015	jz	NEAR $L$tail_avx
1016
1017	vpunpckhqdq	xmm8,xmm15,xmm15
1018	vpxor	xmm3,xmm3,xmm0
1019	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1020	vpxor	xmm8,xmm8,xmm15
1021	vmovdqu	xmm14,XMMWORD[((-32))+r8]
1022	vpxor	xmm4,xmm4,xmm1
1023	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1024	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
1025	vpshufb	xmm15,xmm14,xmm13
1026	vpxor	xmm5,xmm5,xmm2
1027	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1028	vpsrldq	xmm7,xmm7,8
1029	sub	r9,0x10
1030	jz	NEAR $L$tail_avx
1031
1032	vpunpckhqdq	xmm8,xmm15,xmm15
1033	vpxor	xmm3,xmm3,xmm0
1034	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1035	vpxor	xmm8,xmm8,xmm15
1036	vmovdqu	xmm14,XMMWORD[((-48))+r8]
1037	vpxor	xmm4,xmm4,xmm1
1038	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1039	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
1040	vpshufb	xmm15,xmm14,xmm13
1041	vpxor	xmm5,xmm5,xmm2
1042	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1043	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
1044	sub	r9,0x10
1045	jz	NEAR $L$tail_avx
1046
1047	vpunpckhqdq	xmm8,xmm15,xmm15
1048	vpxor	xmm3,xmm3,xmm0
1049	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1050	vpxor	xmm8,xmm8,xmm15
1051	vmovdqu	xmm14,XMMWORD[((-64))+r8]
1052	vpxor	xmm4,xmm4,xmm1
1053	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1054	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
1055	vpshufb	xmm15,xmm14,xmm13
1056	vpxor	xmm5,xmm5,xmm2
1057	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1058	vpsrldq	xmm7,xmm7,8
1059	sub	r9,0x10
1060	jz	NEAR $L$tail_avx
1061
1062	vpunpckhqdq	xmm8,xmm15,xmm15
1063	vpxor	xmm3,xmm3,xmm0
1064	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1065	vpxor	xmm8,xmm8,xmm15
1066	vmovdqu	xmm14,XMMWORD[((-80))+r8]
1067	vpxor	xmm4,xmm4,xmm1
1068	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1069	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
1070	vpshufb	xmm15,xmm14,xmm13
1071	vpxor	xmm5,xmm5,xmm2
1072	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1073	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
1074	sub	r9,0x10
1075	jz	NEAR $L$tail_avx
1076
1077	vpunpckhqdq	xmm8,xmm15,xmm15
1078	vpxor	xmm3,xmm3,xmm0
1079	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1080	vpxor	xmm8,xmm8,xmm15
1081	vmovdqu	xmm14,XMMWORD[((-96))+r8]
1082	vpxor	xmm4,xmm4,xmm1
1083	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1084	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
1085	vpshufb	xmm15,xmm14,xmm13
1086	vpxor	xmm5,xmm5,xmm2
1087	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1088	vpsrldq	xmm7,xmm7,8
1089	sub	r9,0x10
1090	jz	NEAR $L$tail_avx
1091
1092	vpunpckhqdq	xmm8,xmm15,xmm15
1093	vpxor	xmm3,xmm3,xmm0
1094	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1095	vpxor	xmm8,xmm8,xmm15
1096	vmovdqu	xmm14,XMMWORD[((-112))+r8]
1097	vpxor	xmm4,xmm4,xmm1
1098	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1099	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
1100	vpshufb	xmm15,xmm14,xmm13
1101	vpxor	xmm5,xmm5,xmm2
1102	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1103	vmovq	xmm7,QWORD[((184-64))+rdx]
1104	sub	r9,0x10
1105	jmp	NEAR $L$tail_avx
1106
1107ALIGN	32
1108$L$tail_avx:
1109	vpxor	xmm15,xmm15,xmm10
1110$L$tail_no_xor_avx:
1111	vpunpckhqdq	xmm8,xmm15,xmm15
1112	vpxor	xmm3,xmm3,xmm0
1113	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1114	vpxor	xmm8,xmm8,xmm15
1115	vpxor	xmm4,xmm4,xmm1
1116	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1117	vpxor	xmm5,xmm5,xmm2
1118	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1119
1120	vmovdqu	xmm12,XMMWORD[r10]
1121
1122	vpxor	xmm10,xmm3,xmm0
1123	vpxor	xmm11,xmm4,xmm1
1124	vpxor	xmm5,xmm5,xmm2
1125
1126	vpxor	xmm5,xmm5,xmm10
1127	vpxor	xmm5,xmm5,xmm11
1128	vpslldq	xmm9,xmm5,8
1129	vpsrldq	xmm5,xmm5,8
1130	vpxor	xmm10,xmm10,xmm9
1131	vpxor	xmm11,xmm11,xmm5
1132
1133	vpclmulqdq	xmm9,xmm10,xmm12,0x10
1134	vpalignr	xmm10,xmm10,xmm10,8
1135	vpxor	xmm10,xmm10,xmm9
1136
1137	vpclmulqdq	xmm9,xmm10,xmm12,0x10
1138	vpalignr	xmm10,xmm10,xmm10,8
1139	vpxor	xmm10,xmm10,xmm11
1140	vpxor	xmm10,xmm10,xmm9
1141
1142	cmp	r9,0
1143	jne	NEAR $L$short_avx
1144
1145	vpshufb	xmm10,xmm10,xmm13
1146	vmovdqu	XMMWORD[rcx],xmm10
1147	vzeroupper
1148	movaps	xmm6,XMMWORD[rsp]
1149	movaps	xmm7,XMMWORD[16+rsp]
1150	movaps	xmm8,XMMWORD[32+rsp]
1151	movaps	xmm9,XMMWORD[48+rsp]
1152	movaps	xmm10,XMMWORD[64+rsp]
1153	movaps	xmm11,XMMWORD[80+rsp]
1154	movaps	xmm12,XMMWORD[96+rsp]
1155	movaps	xmm13,XMMWORD[112+rsp]
1156	movaps	xmm14,XMMWORD[128+rsp]
1157	movaps	xmm15,XMMWORD[144+rsp]
1158	lea	rsp,[168+rsp]
1159$L$SEH_end_gcm_ghash_avx:
1160	DB	0F3h,0C3h		;repret
1161
1162
1163ALIGN	64
1164$L$bswap_mask:
1165DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1166$L$0x1c2_polynomial:
1167DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1168$L$7_mask:
1169	DD	7,0,7,0
1170ALIGN	64
1171
1172DB	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
1173DB	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1174DB	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1175DB	114,103,62,0
1176ALIGN	64
1177section	.pdata rdata align=4
1178ALIGN	4
1179	DD	$L$SEH_begin_gcm_init_clmul wrt ..imagebase
1180	DD	$L$SEH_end_gcm_init_clmul wrt ..imagebase
1181	DD	$L$SEH_info_gcm_init_clmul wrt ..imagebase
1182
1183	DD	$L$SEH_begin_gcm_ghash_clmul wrt ..imagebase
1184	DD	$L$SEH_end_gcm_ghash_clmul wrt ..imagebase
1185	DD	$L$SEH_info_gcm_ghash_clmul wrt ..imagebase
1186	DD	$L$SEH_begin_gcm_init_avx wrt ..imagebase
1187	DD	$L$SEH_end_gcm_init_avx wrt ..imagebase
1188	DD	$L$SEH_info_gcm_init_clmul wrt ..imagebase
1189
1190	DD	$L$SEH_begin_gcm_ghash_avx wrt ..imagebase
1191	DD	$L$SEH_end_gcm_ghash_avx wrt ..imagebase
1192	DD	$L$SEH_info_gcm_ghash_clmul wrt ..imagebase
1193section	.xdata rdata align=8
1194ALIGN	8
1195$L$SEH_info_gcm_init_clmul:
1196DB	0x01,0x08,0x03,0x00
1197DB	0x08,0x68,0x00,0x00
1198DB	0x04,0x22,0x00,0x00
1199$L$SEH_info_gcm_ghash_clmul:
1200DB	0x01,0x33,0x16,0x00
1201DB	0x33,0xf8,0x09,0x00
1202DB	0x2e,0xe8,0x08,0x00
1203DB	0x29,0xd8,0x07,0x00
1204DB	0x24,0xc8,0x06,0x00
1205DB	0x1f,0xb8,0x05,0x00
1206DB	0x1a,0xa8,0x04,0x00
1207DB	0x15,0x98,0x03,0x00
1208DB	0x10,0x88,0x02,0x00
1209DB	0x0c,0x78,0x01,0x00
1210DB	0x08,0x68,0x00,0x00
1211DB	0x04,0x01,0x15,0x00
1212