• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8
9%ifdef BORINGSSL_PREFIX
10%include "boringssl_prefix_symbols_nasm.inc"
11%endif
12section	.text code align=64
13
14EXTERN	OPENSSL_ia32cap_P
15
16global	gcm_gmult_4bit
17
18ALIGN	16
19gcm_gmult_4bit:
20	mov	QWORD[8+rsp],rdi	;WIN64 prologue
21	mov	QWORD[16+rsp],rsi
22	mov	rax,rsp
23$L$SEH_begin_gcm_gmult_4bit:
24	mov	rdi,rcx
25	mov	rsi,rdx
26
27
28
29	push	rbx
30
31	push	rbp
32
33	push	r12
34
35	push	r13
36
37	push	r14
38
39	push	r15
40
41	sub	rsp,280
42
43$L$gmult_prologue:
44
45	movzx	r8,BYTE[15+rdi]
46	lea	r11,[$L$rem_4bit]
47	xor	rax,rax
48	xor	rbx,rbx
49	mov	al,r8b
50	mov	bl,r8b
51	shl	al,4
52	mov	rcx,14
53	mov	r8,QWORD[8+rax*1+rsi]
54	mov	r9,QWORD[rax*1+rsi]
55	and	bl,0xf0
56	mov	rdx,r8
57	jmp	NEAR $L$oop1
58
59ALIGN	16
60$L$oop1:
61	shr	r8,4
62	and	rdx,0xf
63	mov	r10,r9
64	mov	al,BYTE[rcx*1+rdi]
65	shr	r9,4
66	xor	r8,QWORD[8+rbx*1+rsi]
67	shl	r10,60
68	xor	r9,QWORD[rbx*1+rsi]
69	mov	bl,al
70	xor	r9,QWORD[rdx*8+r11]
71	mov	rdx,r8
72	shl	al,4
73	xor	r8,r10
74	dec	rcx
75	js	NEAR $L$break1
76
77	shr	r8,4
78	and	rdx,0xf
79	mov	r10,r9
80	shr	r9,4
81	xor	r8,QWORD[8+rax*1+rsi]
82	shl	r10,60
83	xor	r9,QWORD[rax*1+rsi]
84	and	bl,0xf0
85	xor	r9,QWORD[rdx*8+r11]
86	mov	rdx,r8
87	xor	r8,r10
88	jmp	NEAR $L$oop1
89
90ALIGN	16
91$L$break1:
92	shr	r8,4
93	and	rdx,0xf
94	mov	r10,r9
95	shr	r9,4
96	xor	r8,QWORD[8+rax*1+rsi]
97	shl	r10,60
98	xor	r9,QWORD[rax*1+rsi]
99	and	bl,0xf0
100	xor	r9,QWORD[rdx*8+r11]
101	mov	rdx,r8
102	xor	r8,r10
103
104	shr	r8,4
105	and	rdx,0xf
106	mov	r10,r9
107	shr	r9,4
108	xor	r8,QWORD[8+rbx*1+rsi]
109	shl	r10,60
110	xor	r9,QWORD[rbx*1+rsi]
111	xor	r8,r10
112	xor	r9,QWORD[rdx*8+r11]
113
114	bswap	r8
115	bswap	r9
116	mov	QWORD[8+rdi],r8
117	mov	QWORD[rdi],r9
118
119	lea	rsi,[((280+48))+rsp]
120
121	mov	rbx,QWORD[((-8))+rsi]
122
123	lea	rsp,[rsi]
124
125$L$gmult_epilogue:
126	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
127	mov	rsi,QWORD[16+rsp]
128	DB	0F3h,0C3h		;repret
129
130$L$SEH_end_gcm_gmult_4bit:
131global	gcm_ghash_4bit
132
133ALIGN	16
134gcm_ghash_4bit:
135	mov	QWORD[8+rsp],rdi	;WIN64 prologue
136	mov	QWORD[16+rsp],rsi
137	mov	rax,rsp
138$L$SEH_begin_gcm_ghash_4bit:
139	mov	rdi,rcx
140	mov	rsi,rdx
141	mov	rdx,r8
142	mov	rcx,r9
143
144
145
146	push	rbx
147
148	push	rbp
149
150	push	r12
151
152	push	r13
153
154	push	r14
155
156	push	r15
157
158	sub	rsp,280
159
160$L$ghash_prologue:
161	mov	r14,rdx
162	mov	r15,rcx
163	sub	rsi,-128
164	lea	rbp,[((16+128))+rsp]
165	xor	edx,edx
166	mov	r8,QWORD[((0+0-128))+rsi]
167	mov	rax,QWORD[((0+8-128))+rsi]
168	mov	dl,al
169	shr	rax,4
170	mov	r10,r8
171	shr	r8,4
172	mov	r9,QWORD[((16+0-128))+rsi]
173	shl	dl,4
174	mov	rbx,QWORD[((16+8-128))+rsi]
175	shl	r10,60
176	mov	BYTE[rsp],dl
177	or	rax,r10
178	mov	dl,bl
179	shr	rbx,4
180	mov	r10,r9
181	shr	r9,4
182	mov	QWORD[rbp],r8
183	mov	r8,QWORD[((32+0-128))+rsi]
184	shl	dl,4
185	mov	QWORD[((0-128))+rbp],rax
186	mov	rax,QWORD[((32+8-128))+rsi]
187	shl	r10,60
188	mov	BYTE[1+rsp],dl
189	or	rbx,r10
190	mov	dl,al
191	shr	rax,4
192	mov	r10,r8
193	shr	r8,4
194	mov	QWORD[8+rbp],r9
195	mov	r9,QWORD[((48+0-128))+rsi]
196	shl	dl,4
197	mov	QWORD[((8-128))+rbp],rbx
198	mov	rbx,QWORD[((48+8-128))+rsi]
199	shl	r10,60
200	mov	BYTE[2+rsp],dl
201	or	rax,r10
202	mov	dl,bl
203	shr	rbx,4
204	mov	r10,r9
205	shr	r9,4
206	mov	QWORD[16+rbp],r8
207	mov	r8,QWORD[((64+0-128))+rsi]
208	shl	dl,4
209	mov	QWORD[((16-128))+rbp],rax
210	mov	rax,QWORD[((64+8-128))+rsi]
211	shl	r10,60
212	mov	BYTE[3+rsp],dl
213	or	rbx,r10
214	mov	dl,al
215	shr	rax,4
216	mov	r10,r8
217	shr	r8,4
218	mov	QWORD[24+rbp],r9
219	mov	r9,QWORD[((80+0-128))+rsi]
220	shl	dl,4
221	mov	QWORD[((24-128))+rbp],rbx
222	mov	rbx,QWORD[((80+8-128))+rsi]
223	shl	r10,60
224	mov	BYTE[4+rsp],dl
225	or	rax,r10
226	mov	dl,bl
227	shr	rbx,4
228	mov	r10,r9
229	shr	r9,4
230	mov	QWORD[32+rbp],r8
231	mov	r8,QWORD[((96+0-128))+rsi]
232	shl	dl,4
233	mov	QWORD[((32-128))+rbp],rax
234	mov	rax,QWORD[((96+8-128))+rsi]
235	shl	r10,60
236	mov	BYTE[5+rsp],dl
237	or	rbx,r10
238	mov	dl,al
239	shr	rax,4
240	mov	r10,r8
241	shr	r8,4
242	mov	QWORD[40+rbp],r9
243	mov	r9,QWORD[((112+0-128))+rsi]
244	shl	dl,4
245	mov	QWORD[((40-128))+rbp],rbx
246	mov	rbx,QWORD[((112+8-128))+rsi]
247	shl	r10,60
248	mov	BYTE[6+rsp],dl
249	or	rax,r10
250	mov	dl,bl
251	shr	rbx,4
252	mov	r10,r9
253	shr	r9,4
254	mov	QWORD[48+rbp],r8
255	mov	r8,QWORD[((128+0-128))+rsi]
256	shl	dl,4
257	mov	QWORD[((48-128))+rbp],rax
258	mov	rax,QWORD[((128+8-128))+rsi]
259	shl	r10,60
260	mov	BYTE[7+rsp],dl
261	or	rbx,r10
262	mov	dl,al
263	shr	rax,4
264	mov	r10,r8
265	shr	r8,4
266	mov	QWORD[56+rbp],r9
267	mov	r9,QWORD[((144+0-128))+rsi]
268	shl	dl,4
269	mov	QWORD[((56-128))+rbp],rbx
270	mov	rbx,QWORD[((144+8-128))+rsi]
271	shl	r10,60
272	mov	BYTE[8+rsp],dl
273	or	rax,r10
274	mov	dl,bl
275	shr	rbx,4
276	mov	r10,r9
277	shr	r9,4
278	mov	QWORD[64+rbp],r8
279	mov	r8,QWORD[((160+0-128))+rsi]
280	shl	dl,4
281	mov	QWORD[((64-128))+rbp],rax
282	mov	rax,QWORD[((160+8-128))+rsi]
283	shl	r10,60
284	mov	BYTE[9+rsp],dl
285	or	rbx,r10
286	mov	dl,al
287	shr	rax,4
288	mov	r10,r8
289	shr	r8,4
290	mov	QWORD[72+rbp],r9
291	mov	r9,QWORD[((176+0-128))+rsi]
292	shl	dl,4
293	mov	QWORD[((72-128))+rbp],rbx
294	mov	rbx,QWORD[((176+8-128))+rsi]
295	shl	r10,60
296	mov	BYTE[10+rsp],dl
297	or	rax,r10
298	mov	dl,bl
299	shr	rbx,4
300	mov	r10,r9
301	shr	r9,4
302	mov	QWORD[80+rbp],r8
303	mov	r8,QWORD[((192+0-128))+rsi]
304	shl	dl,4
305	mov	QWORD[((80-128))+rbp],rax
306	mov	rax,QWORD[((192+8-128))+rsi]
307	shl	r10,60
308	mov	BYTE[11+rsp],dl
309	or	rbx,r10
310	mov	dl,al
311	shr	rax,4
312	mov	r10,r8
313	shr	r8,4
314	mov	QWORD[88+rbp],r9
315	mov	r9,QWORD[((208+0-128))+rsi]
316	shl	dl,4
317	mov	QWORD[((88-128))+rbp],rbx
318	mov	rbx,QWORD[((208+8-128))+rsi]
319	shl	r10,60
320	mov	BYTE[12+rsp],dl
321	or	rax,r10
322	mov	dl,bl
323	shr	rbx,4
324	mov	r10,r9
325	shr	r9,4
326	mov	QWORD[96+rbp],r8
327	mov	r8,QWORD[((224+0-128))+rsi]
328	shl	dl,4
329	mov	QWORD[((96-128))+rbp],rax
330	mov	rax,QWORD[((224+8-128))+rsi]
331	shl	r10,60
332	mov	BYTE[13+rsp],dl
333	or	rbx,r10
334	mov	dl,al
335	shr	rax,4
336	mov	r10,r8
337	shr	r8,4
338	mov	QWORD[104+rbp],r9
339	mov	r9,QWORD[((240+0-128))+rsi]
340	shl	dl,4
341	mov	QWORD[((104-128))+rbp],rbx
342	mov	rbx,QWORD[((240+8-128))+rsi]
343	shl	r10,60
344	mov	BYTE[14+rsp],dl
345	or	rax,r10
346	mov	dl,bl
347	shr	rbx,4
348	mov	r10,r9
349	shr	r9,4
350	mov	QWORD[112+rbp],r8
351	shl	dl,4
352	mov	QWORD[((112-128))+rbp],rax
353	shl	r10,60
354	mov	BYTE[15+rsp],dl
355	or	rbx,r10
356	mov	QWORD[120+rbp],r9
357	mov	QWORD[((120-128))+rbp],rbx
358	add	rsi,-128
359	mov	r8,QWORD[8+rdi]
360	mov	r9,QWORD[rdi]
361	add	r15,r14
362	lea	r11,[$L$rem_8bit]
363	jmp	NEAR $L$outer_loop
364ALIGN	16
365$L$outer_loop:
366	xor	r9,QWORD[r14]
367	mov	rdx,QWORD[8+r14]
368	lea	r14,[16+r14]
369	xor	rdx,r8
370	mov	QWORD[rdi],r9
371	mov	QWORD[8+rdi],rdx
372	shr	rdx,32
373	xor	rax,rax
374	rol	edx,8
375	mov	al,dl
376	movzx	ebx,dl
377	shl	al,4
378	shr	ebx,4
379	rol	edx,8
380	mov	r8,QWORD[8+rax*1+rsi]
381	mov	r9,QWORD[rax*1+rsi]
382	mov	al,dl
383	movzx	ecx,dl
384	shl	al,4
385	movzx	r12,BYTE[rbx*1+rsp]
386	shr	ecx,4
387	xor	r12,r8
388	mov	r10,r9
389	shr	r8,8
390	movzx	r12,r12b
391	shr	r9,8
392	xor	r8,QWORD[((-128))+rbx*8+rbp]
393	shl	r10,56
394	xor	r9,QWORD[rbx*8+rbp]
395	rol	edx,8
396	xor	r8,QWORD[8+rax*1+rsi]
397	xor	r9,QWORD[rax*1+rsi]
398	mov	al,dl
399	xor	r8,r10
400	movzx	r12,WORD[r12*2+r11]
401	movzx	ebx,dl
402	shl	al,4
403	movzx	r13,BYTE[rcx*1+rsp]
404	shr	ebx,4
405	shl	r12,48
406	xor	r13,r8
407	mov	r10,r9
408	xor	r9,r12
409	shr	r8,8
410	movzx	r13,r13b
411	shr	r9,8
412	xor	r8,QWORD[((-128))+rcx*8+rbp]
413	shl	r10,56
414	xor	r9,QWORD[rcx*8+rbp]
415	rol	edx,8
416	xor	r8,QWORD[8+rax*1+rsi]
417	xor	r9,QWORD[rax*1+rsi]
418	mov	al,dl
419	xor	r8,r10
420	movzx	r13,WORD[r13*2+r11]
421	movzx	ecx,dl
422	shl	al,4
423	movzx	r12,BYTE[rbx*1+rsp]
424	shr	ecx,4
425	shl	r13,48
426	xor	r12,r8
427	mov	r10,r9
428	xor	r9,r13
429	shr	r8,8
430	movzx	r12,r12b
431	mov	edx,DWORD[8+rdi]
432	shr	r9,8
433	xor	r8,QWORD[((-128))+rbx*8+rbp]
434	shl	r10,56
435	xor	r9,QWORD[rbx*8+rbp]
436	rol	edx,8
437	xor	r8,QWORD[8+rax*1+rsi]
438	xor	r9,QWORD[rax*1+rsi]
439	mov	al,dl
440	xor	r8,r10
441	movzx	r12,WORD[r12*2+r11]
442	movzx	ebx,dl
443	shl	al,4
444	movzx	r13,BYTE[rcx*1+rsp]
445	shr	ebx,4
446	shl	r12,48
447	xor	r13,r8
448	mov	r10,r9
449	xor	r9,r12
450	shr	r8,8
451	movzx	r13,r13b
452	shr	r9,8
453	xor	r8,QWORD[((-128))+rcx*8+rbp]
454	shl	r10,56
455	xor	r9,QWORD[rcx*8+rbp]
456	rol	edx,8
457	xor	r8,QWORD[8+rax*1+rsi]
458	xor	r9,QWORD[rax*1+rsi]
459	mov	al,dl
460	xor	r8,r10
461	movzx	r13,WORD[r13*2+r11]
462	movzx	ecx,dl
463	shl	al,4
464	movzx	r12,BYTE[rbx*1+rsp]
465	shr	ecx,4
466	shl	r13,48
467	xor	r12,r8
468	mov	r10,r9
469	xor	r9,r13
470	shr	r8,8
471	movzx	r12,r12b
472	shr	r9,8
473	xor	r8,QWORD[((-128))+rbx*8+rbp]
474	shl	r10,56
475	xor	r9,QWORD[rbx*8+rbp]
476	rol	edx,8
477	xor	r8,QWORD[8+rax*1+rsi]
478	xor	r9,QWORD[rax*1+rsi]
479	mov	al,dl
480	xor	r8,r10
481	movzx	r12,WORD[r12*2+r11]
482	movzx	ebx,dl
483	shl	al,4
484	movzx	r13,BYTE[rcx*1+rsp]
485	shr	ebx,4
486	shl	r12,48
487	xor	r13,r8
488	mov	r10,r9
489	xor	r9,r12
490	shr	r8,8
491	movzx	r13,r13b
492	shr	r9,8
493	xor	r8,QWORD[((-128))+rcx*8+rbp]
494	shl	r10,56
495	xor	r9,QWORD[rcx*8+rbp]
496	rol	edx,8
497	xor	r8,QWORD[8+rax*1+rsi]
498	xor	r9,QWORD[rax*1+rsi]
499	mov	al,dl
500	xor	r8,r10
501	movzx	r13,WORD[r13*2+r11]
502	movzx	ecx,dl
503	shl	al,4
504	movzx	r12,BYTE[rbx*1+rsp]
505	shr	ecx,4
506	shl	r13,48
507	xor	r12,r8
508	mov	r10,r9
509	xor	r9,r13
510	shr	r8,8
511	movzx	r12,r12b
512	mov	edx,DWORD[4+rdi]
513	shr	r9,8
514	xor	r8,QWORD[((-128))+rbx*8+rbp]
515	shl	r10,56
516	xor	r9,QWORD[rbx*8+rbp]
517	rol	edx,8
518	xor	r8,QWORD[8+rax*1+rsi]
519	xor	r9,QWORD[rax*1+rsi]
520	mov	al,dl
521	xor	r8,r10
522	movzx	r12,WORD[r12*2+r11]
523	movzx	ebx,dl
524	shl	al,4
525	movzx	r13,BYTE[rcx*1+rsp]
526	shr	ebx,4
527	shl	r12,48
528	xor	r13,r8
529	mov	r10,r9
530	xor	r9,r12
531	shr	r8,8
532	movzx	r13,r13b
533	shr	r9,8
534	xor	r8,QWORD[((-128))+rcx*8+rbp]
535	shl	r10,56
536	xor	r9,QWORD[rcx*8+rbp]
537	rol	edx,8
538	xor	r8,QWORD[8+rax*1+rsi]
539	xor	r9,QWORD[rax*1+rsi]
540	mov	al,dl
541	xor	r8,r10
542	movzx	r13,WORD[r13*2+r11]
543	movzx	ecx,dl
544	shl	al,4
545	movzx	r12,BYTE[rbx*1+rsp]
546	shr	ecx,4
547	shl	r13,48
548	xor	r12,r8
549	mov	r10,r9
550	xor	r9,r13
551	shr	r8,8
552	movzx	r12,r12b
553	shr	r9,8
554	xor	r8,QWORD[((-128))+rbx*8+rbp]
555	shl	r10,56
556	xor	r9,QWORD[rbx*8+rbp]
557	rol	edx,8
558	xor	r8,QWORD[8+rax*1+rsi]
559	xor	r9,QWORD[rax*1+rsi]
560	mov	al,dl
561	xor	r8,r10
562	movzx	r12,WORD[r12*2+r11]
563	movzx	ebx,dl
564	shl	al,4
565	movzx	r13,BYTE[rcx*1+rsp]
566	shr	ebx,4
567	shl	r12,48
568	xor	r13,r8
569	mov	r10,r9
570	xor	r9,r12
571	shr	r8,8
572	movzx	r13,r13b
573	shr	r9,8
574	xor	r8,QWORD[((-128))+rcx*8+rbp]
575	shl	r10,56
576	xor	r9,QWORD[rcx*8+rbp]
577	rol	edx,8
578	xor	r8,QWORD[8+rax*1+rsi]
579	xor	r9,QWORD[rax*1+rsi]
580	mov	al,dl
581	xor	r8,r10
582	movzx	r13,WORD[r13*2+r11]
583	movzx	ecx,dl
584	shl	al,4
585	movzx	r12,BYTE[rbx*1+rsp]
586	shr	ecx,4
587	shl	r13,48
588	xor	r12,r8
589	mov	r10,r9
590	xor	r9,r13
591	shr	r8,8
592	movzx	r12,r12b
593	mov	edx,DWORD[rdi]
594	shr	r9,8
595	xor	r8,QWORD[((-128))+rbx*8+rbp]
596	shl	r10,56
597	xor	r9,QWORD[rbx*8+rbp]
598	rol	edx,8
599	xor	r8,QWORD[8+rax*1+rsi]
600	xor	r9,QWORD[rax*1+rsi]
601	mov	al,dl
602	xor	r8,r10
603	movzx	r12,WORD[r12*2+r11]
604	movzx	ebx,dl
605	shl	al,4
606	movzx	r13,BYTE[rcx*1+rsp]
607	shr	ebx,4
608	shl	r12,48
609	xor	r13,r8
610	mov	r10,r9
611	xor	r9,r12
612	shr	r8,8
613	movzx	r13,r13b
614	shr	r9,8
615	xor	r8,QWORD[((-128))+rcx*8+rbp]
616	shl	r10,56
617	xor	r9,QWORD[rcx*8+rbp]
618	rol	edx,8
619	xor	r8,QWORD[8+rax*1+rsi]
620	xor	r9,QWORD[rax*1+rsi]
621	mov	al,dl
622	xor	r8,r10
623	movzx	r13,WORD[r13*2+r11]
624	movzx	ecx,dl
625	shl	al,4
626	movzx	r12,BYTE[rbx*1+rsp]
627	shr	ecx,4
628	shl	r13,48
629	xor	r12,r8
630	mov	r10,r9
631	xor	r9,r13
632	shr	r8,8
633	movzx	r12,r12b
634	shr	r9,8
635	xor	r8,QWORD[((-128))+rbx*8+rbp]
636	shl	r10,56
637	xor	r9,QWORD[rbx*8+rbp]
638	rol	edx,8
639	xor	r8,QWORD[8+rax*1+rsi]
640	xor	r9,QWORD[rax*1+rsi]
641	mov	al,dl
642	xor	r8,r10
643	movzx	r12,WORD[r12*2+r11]
644	movzx	ebx,dl
645	shl	al,4
646	movzx	r13,BYTE[rcx*1+rsp]
647	shr	ebx,4
648	shl	r12,48
649	xor	r13,r8
650	mov	r10,r9
651	xor	r9,r12
652	shr	r8,8
653	movzx	r13,r13b
654	shr	r9,8
655	xor	r8,QWORD[((-128))+rcx*8+rbp]
656	shl	r10,56
657	xor	r9,QWORD[rcx*8+rbp]
658	rol	edx,8
659	xor	r8,QWORD[8+rax*1+rsi]
660	xor	r9,QWORD[rax*1+rsi]
661	mov	al,dl
662	xor	r8,r10
663	movzx	r13,WORD[r13*2+r11]
664	movzx	ecx,dl
665	shl	al,4
666	movzx	r12,BYTE[rbx*1+rsp]
667	and	ecx,240
668	shl	r13,48
669	xor	r12,r8
670	mov	r10,r9
671	xor	r9,r13
672	shr	r8,8
673	movzx	r12,r12b
674	mov	edx,DWORD[((-4))+rdi]
675	shr	r9,8
676	xor	r8,QWORD[((-128))+rbx*8+rbp]
677	shl	r10,56
678	xor	r9,QWORD[rbx*8+rbp]
679	movzx	r12,WORD[r12*2+r11]
680	xor	r8,QWORD[8+rax*1+rsi]
681	xor	r9,QWORD[rax*1+rsi]
682	shl	r12,48
683	xor	r8,r10
684	xor	r9,r12
685	movzx	r13,r8b
686	shr	r8,4
687	mov	r10,r9
688	shl	r13b,4
689	shr	r9,4
690	xor	r8,QWORD[8+rcx*1+rsi]
691	movzx	r13,WORD[r13*2+r11]
692	shl	r10,60
693	xor	r9,QWORD[rcx*1+rsi]
694	xor	r8,r10
695	shl	r13,48
696	bswap	r8
697	xor	r9,r13
698	bswap	r9
699	cmp	r14,r15
700	jb	NEAR $L$outer_loop
701	mov	QWORD[8+rdi],r8
702	mov	QWORD[rdi],r9
703
704	lea	rsi,[((280+48))+rsp]
705
706	mov	r15,QWORD[((-48))+rsi]
707
708	mov	r14,QWORD[((-40))+rsi]
709
710	mov	r13,QWORD[((-32))+rsi]
711
712	mov	r12,QWORD[((-24))+rsi]
713
714	mov	rbp,QWORD[((-16))+rsi]
715
716	mov	rbx,QWORD[((-8))+rsi]
717
718	lea	rsp,[rsi]
719
720$L$ghash_epilogue:
721	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
722	mov	rsi,QWORD[16+rsp]
723	DB	0F3h,0C3h		;repret
724
725$L$SEH_end_gcm_ghash_4bit:
726global	gcm_init_clmul
727
728ALIGN	16
729gcm_init_clmul:
730
731$L$_init_clmul:
732$L$SEH_begin_gcm_init_clmul:
733
734DB	0x48,0x83,0xec,0x18
735DB	0x0f,0x29,0x34,0x24
736	movdqu	xmm2,XMMWORD[rdx]
737	pshufd	xmm2,xmm2,78
738
739
740	pshufd	xmm4,xmm2,255
741	movdqa	xmm3,xmm2
742	psllq	xmm2,1
743	pxor	xmm5,xmm5
744	psrlq	xmm3,63
745	pcmpgtd	xmm5,xmm4
746	pslldq	xmm3,8
747	por	xmm2,xmm3
748
749
750	pand	xmm5,XMMWORD[$L$0x1c2_polynomial]
751	pxor	xmm2,xmm5
752
753
754	pshufd	xmm6,xmm2,78
755	movdqa	xmm0,xmm2
756	pxor	xmm6,xmm2
757	movdqa	xmm1,xmm0
758	pshufd	xmm3,xmm0,78
759	pxor	xmm3,xmm0
760DB	102,15,58,68,194,0
761DB	102,15,58,68,202,17
762DB	102,15,58,68,222,0
763	pxor	xmm3,xmm0
764	pxor	xmm3,xmm1
765
766	movdqa	xmm4,xmm3
767	psrldq	xmm3,8
768	pslldq	xmm4,8
769	pxor	xmm1,xmm3
770	pxor	xmm0,xmm4
771
772	movdqa	xmm4,xmm0
773	movdqa	xmm3,xmm0
774	psllq	xmm0,5
775	pxor	xmm3,xmm0
776	psllq	xmm0,1
777	pxor	xmm0,xmm3
778	psllq	xmm0,57
779	movdqa	xmm3,xmm0
780	pslldq	xmm0,8
781	psrldq	xmm3,8
782	pxor	xmm0,xmm4
783	pxor	xmm1,xmm3
784
785
786	movdqa	xmm4,xmm0
787	psrlq	xmm0,1
788	pxor	xmm1,xmm4
789	pxor	xmm4,xmm0
790	psrlq	xmm0,5
791	pxor	xmm0,xmm4
792	psrlq	xmm0,1
793	pxor	xmm0,xmm1
794	pshufd	xmm3,xmm2,78
795	pshufd	xmm4,xmm0,78
796	pxor	xmm3,xmm2
797	movdqu	XMMWORD[rcx],xmm2
798	pxor	xmm4,xmm0
799	movdqu	XMMWORD[16+rcx],xmm0
800DB	102,15,58,15,227,8
801	movdqu	XMMWORD[32+rcx],xmm4
802	movdqa	xmm1,xmm0
803	pshufd	xmm3,xmm0,78
804	pxor	xmm3,xmm0
805DB	102,15,58,68,194,0
806DB	102,15,58,68,202,17
807DB	102,15,58,68,222,0
808	pxor	xmm3,xmm0
809	pxor	xmm3,xmm1
810
811	movdqa	xmm4,xmm3
812	psrldq	xmm3,8
813	pslldq	xmm4,8
814	pxor	xmm1,xmm3
815	pxor	xmm0,xmm4
816
817	movdqa	xmm4,xmm0
818	movdqa	xmm3,xmm0
819	psllq	xmm0,5
820	pxor	xmm3,xmm0
821	psllq	xmm0,1
822	pxor	xmm0,xmm3
823	psllq	xmm0,57
824	movdqa	xmm3,xmm0
825	pslldq	xmm0,8
826	psrldq	xmm3,8
827	pxor	xmm0,xmm4
828	pxor	xmm1,xmm3
829
830
831	movdqa	xmm4,xmm0
832	psrlq	xmm0,1
833	pxor	xmm1,xmm4
834	pxor	xmm4,xmm0
835	psrlq	xmm0,5
836	pxor	xmm0,xmm4
837	psrlq	xmm0,1
838	pxor	xmm0,xmm1
839	movdqa	xmm5,xmm0
840	movdqa	xmm1,xmm0
841	pshufd	xmm3,xmm0,78
842	pxor	xmm3,xmm0
843DB	102,15,58,68,194,0
844DB	102,15,58,68,202,17
845DB	102,15,58,68,222,0
846	pxor	xmm3,xmm0
847	pxor	xmm3,xmm1
848
849	movdqa	xmm4,xmm3
850	psrldq	xmm3,8
851	pslldq	xmm4,8
852	pxor	xmm1,xmm3
853	pxor	xmm0,xmm4
854
855	movdqa	xmm4,xmm0
856	movdqa	xmm3,xmm0
857	psllq	xmm0,5
858	pxor	xmm3,xmm0
859	psllq	xmm0,1
860	pxor	xmm0,xmm3
861	psllq	xmm0,57
862	movdqa	xmm3,xmm0
863	pslldq	xmm0,8
864	psrldq	xmm3,8
865	pxor	xmm0,xmm4
866	pxor	xmm1,xmm3
867
868
869	movdqa	xmm4,xmm0
870	psrlq	xmm0,1
871	pxor	xmm1,xmm4
872	pxor	xmm4,xmm0
873	psrlq	xmm0,5
874	pxor	xmm0,xmm4
875	psrlq	xmm0,1
876	pxor	xmm0,xmm1
877	pshufd	xmm3,xmm5,78
878	pshufd	xmm4,xmm0,78
879	pxor	xmm3,xmm5
880	movdqu	XMMWORD[48+rcx],xmm5
881	pxor	xmm4,xmm0
882	movdqu	XMMWORD[64+rcx],xmm0
883DB	102,15,58,15,227,8
884	movdqu	XMMWORD[80+rcx],xmm4
885	movaps	xmm6,XMMWORD[rsp]
886	lea	rsp,[24+rsp]
887$L$SEH_end_gcm_init_clmul:
888	DB	0F3h,0C3h		;repret
889
890
891global	gcm_gmult_clmul
892
893ALIGN	16
894gcm_gmult_clmul:
895
896$L$_gmult_clmul:
897	movdqu	xmm0,XMMWORD[rcx]
898	movdqa	xmm5,XMMWORD[$L$bswap_mask]
899	movdqu	xmm2,XMMWORD[rdx]
900	movdqu	xmm4,XMMWORD[32+rdx]
901DB	102,15,56,0,197
902	movdqa	xmm1,xmm0
903	pshufd	xmm3,xmm0,78
904	pxor	xmm3,xmm0
905DB	102,15,58,68,194,0
906DB	102,15,58,68,202,17
907DB	102,15,58,68,220,0
908	pxor	xmm3,xmm0
909	pxor	xmm3,xmm1
910
911	movdqa	xmm4,xmm3
912	psrldq	xmm3,8
913	pslldq	xmm4,8
914	pxor	xmm1,xmm3
915	pxor	xmm0,xmm4
916
917	movdqa	xmm4,xmm0
918	movdqa	xmm3,xmm0
919	psllq	xmm0,5
920	pxor	xmm3,xmm0
921	psllq	xmm0,1
922	pxor	xmm0,xmm3
923	psllq	xmm0,57
924	movdqa	xmm3,xmm0
925	pslldq	xmm0,8
926	psrldq	xmm3,8
927	pxor	xmm0,xmm4
928	pxor	xmm1,xmm3
929
930
931	movdqa	xmm4,xmm0
932	psrlq	xmm0,1
933	pxor	xmm1,xmm4
934	pxor	xmm4,xmm0
935	psrlq	xmm0,5
936	pxor	xmm0,xmm4
937	psrlq	xmm0,1
938	pxor	xmm0,xmm1
939DB	102,15,56,0,197
940	movdqu	XMMWORD[rcx],xmm0
941	DB	0F3h,0C3h		;repret
942
943
944global	gcm_ghash_clmul
945
946ALIGN	32
947gcm_ghash_clmul:
948
949$L$_ghash_clmul:
950	lea	rax,[((-136))+rsp]
951$L$SEH_begin_gcm_ghash_clmul:
952
953DB	0x48,0x8d,0x60,0xe0
954DB	0x0f,0x29,0x70,0xe0
955DB	0x0f,0x29,0x78,0xf0
956DB	0x44,0x0f,0x29,0x00
957DB	0x44,0x0f,0x29,0x48,0x10
958DB	0x44,0x0f,0x29,0x50,0x20
959DB	0x44,0x0f,0x29,0x58,0x30
960DB	0x44,0x0f,0x29,0x60,0x40
961DB	0x44,0x0f,0x29,0x68,0x50
962DB	0x44,0x0f,0x29,0x70,0x60
963DB	0x44,0x0f,0x29,0x78,0x70
964	movdqa	xmm10,XMMWORD[$L$bswap_mask]
965
966	movdqu	xmm0,XMMWORD[rcx]
967	movdqu	xmm2,XMMWORD[rdx]
968	movdqu	xmm7,XMMWORD[32+rdx]
969DB	102,65,15,56,0,194
970
971	sub	r9,0x10
972	jz	NEAR $L$odd_tail
973
974	movdqu	xmm6,XMMWORD[16+rdx]
975	lea	rax,[OPENSSL_ia32cap_P]
976	mov	eax,DWORD[4+rax]
977	cmp	r9,0x30
978	jb	NEAR $L$skip4x
979
980	and	eax,71303168
981	cmp	eax,4194304
982	je	NEAR $L$skip4x
983
984	sub	r9,0x30
985	mov	rax,0xA040608020C0E000
986	movdqu	xmm14,XMMWORD[48+rdx]
987	movdqu	xmm15,XMMWORD[64+rdx]
988
989
990
991
992	movdqu	xmm3,XMMWORD[48+r8]
993	movdqu	xmm11,XMMWORD[32+r8]
994DB	102,65,15,56,0,218
995DB	102,69,15,56,0,218
996	movdqa	xmm5,xmm3
997	pshufd	xmm4,xmm3,78
998	pxor	xmm4,xmm3
999DB	102,15,58,68,218,0
1000DB	102,15,58,68,234,17
1001DB	102,15,58,68,231,0
1002
1003	movdqa	xmm13,xmm11
1004	pshufd	xmm12,xmm11,78
1005	pxor	xmm12,xmm11
1006DB	102,68,15,58,68,222,0
1007DB	102,68,15,58,68,238,17
1008DB	102,68,15,58,68,231,16
1009	xorps	xmm3,xmm11
1010	xorps	xmm5,xmm13
1011	movups	xmm7,XMMWORD[80+rdx]
1012	xorps	xmm4,xmm12
1013
1014	movdqu	xmm11,XMMWORD[16+r8]
1015	movdqu	xmm8,XMMWORD[r8]
1016DB	102,69,15,56,0,218
1017DB	102,69,15,56,0,194
1018	movdqa	xmm13,xmm11
1019	pshufd	xmm12,xmm11,78
1020	pxor	xmm0,xmm8
1021	pxor	xmm12,xmm11
1022DB	102,69,15,58,68,222,0
1023	movdqa	xmm1,xmm0
1024	pshufd	xmm8,xmm0,78
1025	pxor	xmm8,xmm0
1026DB	102,69,15,58,68,238,17
1027DB	102,68,15,58,68,231,0
1028	xorps	xmm3,xmm11
1029	xorps	xmm5,xmm13
1030
1031	lea	r8,[64+r8]
1032	sub	r9,0x40
1033	jc	NEAR $L$tail4x
1034
1035	jmp	NEAR $L$mod4_loop
1036ALIGN	32
1037$L$mod4_loop:
1038DB	102,65,15,58,68,199,0
1039	xorps	xmm4,xmm12
1040	movdqu	xmm11,XMMWORD[48+r8]
1041DB	102,69,15,56,0,218
1042DB	102,65,15,58,68,207,17
1043	xorps	xmm0,xmm3
1044	movdqu	xmm3,XMMWORD[32+r8]
1045	movdqa	xmm13,xmm11
1046DB	102,68,15,58,68,199,16
1047	pshufd	xmm12,xmm11,78
1048	xorps	xmm1,xmm5
1049	pxor	xmm12,xmm11
1050DB	102,65,15,56,0,218
1051	movups	xmm7,XMMWORD[32+rdx]
1052	xorps	xmm8,xmm4
1053DB	102,68,15,58,68,218,0
1054	pshufd	xmm4,xmm3,78
1055
1056	pxor	xmm8,xmm0
1057	movdqa	xmm5,xmm3
1058	pxor	xmm8,xmm1
1059	pxor	xmm4,xmm3
1060	movdqa	xmm9,xmm8
1061DB	102,68,15,58,68,234,17
1062	pslldq	xmm8,8
1063	psrldq	xmm9,8
1064	pxor	xmm0,xmm8
1065	movdqa	xmm8,XMMWORD[$L$7_mask]
1066	pxor	xmm1,xmm9
1067DB	102,76,15,110,200
1068
1069	pand	xmm8,xmm0
1070DB	102,69,15,56,0,200
1071	pxor	xmm9,xmm0
1072DB	102,68,15,58,68,231,0
1073	psllq	xmm9,57
1074	movdqa	xmm8,xmm9
1075	pslldq	xmm9,8
1076DB	102,15,58,68,222,0
1077	psrldq	xmm8,8
1078	pxor	xmm0,xmm9
1079	pxor	xmm1,xmm8
1080	movdqu	xmm8,XMMWORD[r8]
1081
1082	movdqa	xmm9,xmm0
1083	psrlq	xmm0,1
1084DB	102,15,58,68,238,17
1085	xorps	xmm3,xmm11
1086	movdqu	xmm11,XMMWORD[16+r8]
1087DB	102,69,15,56,0,218
1088DB	102,15,58,68,231,16
1089	xorps	xmm5,xmm13
1090	movups	xmm7,XMMWORD[80+rdx]
1091DB	102,69,15,56,0,194
1092	pxor	xmm1,xmm9
1093	pxor	xmm9,xmm0
1094	psrlq	xmm0,5
1095
1096	movdqa	xmm13,xmm11
1097	pxor	xmm4,xmm12
1098	pshufd	xmm12,xmm11,78
1099	pxor	xmm0,xmm9
1100	pxor	xmm1,xmm8
1101	pxor	xmm12,xmm11
1102DB	102,69,15,58,68,222,0
1103	psrlq	xmm0,1
1104	pxor	xmm0,xmm1
1105	movdqa	xmm1,xmm0
1106DB	102,69,15,58,68,238,17
1107	xorps	xmm3,xmm11
1108	pshufd	xmm8,xmm0,78
1109	pxor	xmm8,xmm0
1110
1111DB	102,68,15,58,68,231,0
1112	xorps	xmm5,xmm13
1113
1114	lea	r8,[64+r8]
1115	sub	r9,0x40
1116	jnc	NEAR $L$mod4_loop
1117
1118$L$tail4x:
1119DB	102,65,15,58,68,199,0
1120DB	102,65,15,58,68,207,17
1121DB	102,68,15,58,68,199,16
1122	xorps	xmm4,xmm12
1123	xorps	xmm0,xmm3
1124	xorps	xmm1,xmm5
1125	pxor	xmm1,xmm0
1126	pxor	xmm8,xmm4
1127
1128	pxor	xmm8,xmm1
1129	pxor	xmm1,xmm0
1130
1131	movdqa	xmm9,xmm8
1132	psrldq	xmm8,8
1133	pslldq	xmm9,8
1134	pxor	xmm1,xmm8
1135	pxor	xmm0,xmm9
1136
1137	movdqa	xmm4,xmm0
1138	movdqa	xmm3,xmm0
1139	psllq	xmm0,5
1140	pxor	xmm3,xmm0
1141	psllq	xmm0,1
1142	pxor	xmm0,xmm3
1143	psllq	xmm0,57
1144	movdqa	xmm3,xmm0
1145	pslldq	xmm0,8
1146	psrldq	xmm3,8
1147	pxor	xmm0,xmm4
1148	pxor	xmm1,xmm3
1149
1150
1151	movdqa	xmm4,xmm0
1152	psrlq	xmm0,1
1153	pxor	xmm1,xmm4
1154	pxor	xmm4,xmm0
1155	psrlq	xmm0,5
1156	pxor	xmm0,xmm4
1157	psrlq	xmm0,1
1158	pxor	xmm0,xmm1
1159	add	r9,0x40
1160	jz	NEAR $L$done
1161	movdqu	xmm7,XMMWORD[32+rdx]
1162	sub	r9,0x10
1163	jz	NEAR $L$odd_tail
1164$L$skip4x:
1165
1166
1167
1168
1169
1170	movdqu	xmm8,XMMWORD[r8]
1171	movdqu	xmm3,XMMWORD[16+r8]
1172DB	102,69,15,56,0,194
1173DB	102,65,15,56,0,218
1174	pxor	xmm0,xmm8
1175
1176	movdqa	xmm5,xmm3
1177	pshufd	xmm4,xmm3,78
1178	pxor	xmm4,xmm3
1179DB	102,15,58,68,218,0
1180DB	102,15,58,68,234,17
1181DB	102,15,58,68,231,0
1182
1183	lea	r8,[32+r8]
1184	nop
1185	sub	r9,0x20
1186	jbe	NEAR $L$even_tail
1187	nop
1188	jmp	NEAR $L$mod_loop
1189
1190ALIGN	32
1191$L$mod_loop:
1192	movdqa	xmm1,xmm0
1193	movdqa	xmm8,xmm4
1194	pshufd	xmm4,xmm0,78
1195	pxor	xmm4,xmm0
1196
1197DB	102,15,58,68,198,0
1198DB	102,15,58,68,206,17
1199DB	102,15,58,68,231,16
1200
1201	pxor	xmm0,xmm3
1202	pxor	xmm1,xmm5
1203	movdqu	xmm9,XMMWORD[r8]
1204	pxor	xmm8,xmm0
1205DB	102,69,15,56,0,202
1206	movdqu	xmm3,XMMWORD[16+r8]
1207
1208	pxor	xmm8,xmm1
1209	pxor	xmm1,xmm9
1210	pxor	xmm4,xmm8
1211DB	102,65,15,56,0,218
1212	movdqa	xmm8,xmm4
1213	psrldq	xmm8,8
1214	pslldq	xmm4,8
1215	pxor	xmm1,xmm8
1216	pxor	xmm0,xmm4
1217
1218	movdqa	xmm5,xmm3
1219
1220	movdqa	xmm9,xmm0
1221	movdqa	xmm8,xmm0
1222	psllq	xmm0,5
1223	pxor	xmm8,xmm0
1224DB	102,15,58,68,218,0
1225	psllq	xmm0,1
1226	pxor	xmm0,xmm8
1227	psllq	xmm0,57
1228	movdqa	xmm8,xmm0
1229	pslldq	xmm0,8
1230	psrldq	xmm8,8
1231	pxor	xmm0,xmm9
1232	pshufd	xmm4,xmm5,78
1233	pxor	xmm1,xmm8
1234	pxor	xmm4,xmm5
1235
1236	movdqa	xmm9,xmm0
1237	psrlq	xmm0,1
1238DB	102,15,58,68,234,17
1239	pxor	xmm1,xmm9
1240	pxor	xmm9,xmm0
1241	psrlq	xmm0,5
1242	pxor	xmm0,xmm9
1243	lea	r8,[32+r8]
1244	psrlq	xmm0,1
1245DB	102,15,58,68,231,0
1246	pxor	xmm0,xmm1
1247
1248	sub	r9,0x20
1249	ja	NEAR $L$mod_loop
1250
1251$L$even_tail:
1252	movdqa	xmm1,xmm0
1253	movdqa	xmm8,xmm4
1254	pshufd	xmm4,xmm0,78
1255	pxor	xmm4,xmm0
1256
1257DB	102,15,58,68,198,0
1258DB	102,15,58,68,206,17
1259DB	102,15,58,68,231,16
1260
1261	pxor	xmm0,xmm3
1262	pxor	xmm1,xmm5
1263	pxor	xmm8,xmm0
1264	pxor	xmm8,xmm1
1265	pxor	xmm4,xmm8
1266	movdqa	xmm8,xmm4
1267	psrldq	xmm8,8
1268	pslldq	xmm4,8
1269	pxor	xmm1,xmm8
1270	pxor	xmm0,xmm4
1271
1272	movdqa	xmm4,xmm0
1273	movdqa	xmm3,xmm0
1274	psllq	xmm0,5
1275	pxor	xmm3,xmm0
1276	psllq	xmm0,1
1277	pxor	xmm0,xmm3
1278	psllq	xmm0,57
1279	movdqa	xmm3,xmm0
1280	pslldq	xmm0,8
1281	psrldq	xmm3,8
1282	pxor	xmm0,xmm4
1283	pxor	xmm1,xmm3
1284
1285
1286	movdqa	xmm4,xmm0
1287	psrlq	xmm0,1
1288	pxor	xmm1,xmm4
1289	pxor	xmm4,xmm0
1290	psrlq	xmm0,5
1291	pxor	xmm0,xmm4
1292	psrlq	xmm0,1
1293	pxor	xmm0,xmm1
1294	test	r9,r9
1295	jnz	NEAR $L$done
1296
1297$L$odd_tail:
1298	movdqu	xmm8,XMMWORD[r8]
1299DB	102,69,15,56,0,194
1300	pxor	xmm0,xmm8
1301	movdqa	xmm1,xmm0
1302	pshufd	xmm3,xmm0,78
1303	pxor	xmm3,xmm0
1304DB	102,15,58,68,194,0
1305DB	102,15,58,68,202,17
1306DB	102,15,58,68,223,0
1307	pxor	xmm3,xmm0
1308	pxor	xmm3,xmm1
1309
1310	movdqa	xmm4,xmm3
1311	psrldq	xmm3,8
1312	pslldq	xmm4,8
1313	pxor	xmm1,xmm3
1314	pxor	xmm0,xmm4
1315
1316	movdqa	xmm4,xmm0
1317	movdqa	xmm3,xmm0
1318	psllq	xmm0,5
1319	pxor	xmm3,xmm0
1320	psllq	xmm0,1
1321	pxor	xmm0,xmm3
1322	psllq	xmm0,57
1323	movdqa	xmm3,xmm0
1324	pslldq	xmm0,8
1325	psrldq	xmm3,8
1326	pxor	xmm0,xmm4
1327	pxor	xmm1,xmm3
1328
1329
1330	movdqa	xmm4,xmm0
1331	psrlq	xmm0,1
1332	pxor	xmm1,xmm4
1333	pxor	xmm4,xmm0
1334	psrlq	xmm0,5
1335	pxor	xmm0,xmm4
1336	psrlq	xmm0,1
1337	pxor	xmm0,xmm1
1338$L$done:
1339DB	102,65,15,56,0,194
1340	movdqu	XMMWORD[rcx],xmm0
1341	movaps	xmm6,XMMWORD[rsp]
1342	movaps	xmm7,XMMWORD[16+rsp]
1343	movaps	xmm8,XMMWORD[32+rsp]
1344	movaps	xmm9,XMMWORD[48+rsp]
1345	movaps	xmm10,XMMWORD[64+rsp]
1346	movaps	xmm11,XMMWORD[80+rsp]
1347	movaps	xmm12,XMMWORD[96+rsp]
1348	movaps	xmm13,XMMWORD[112+rsp]
1349	movaps	xmm14,XMMWORD[128+rsp]
1350	movaps	xmm15,XMMWORD[144+rsp]
1351	lea	rsp,[168+rsp]
1352$L$SEH_end_gcm_ghash_clmul:
1353	DB	0F3h,0C3h		;repret
1354
1355
1356global	gcm_init_avx
1357
1358ALIGN	32
1359gcm_init_avx:
1360
1361$L$SEH_begin_gcm_init_avx:
1362
1363DB	0x48,0x83,0xec,0x18
1364DB	0x0f,0x29,0x34,0x24
1365	vzeroupper
1366
1367	vmovdqu	xmm2,XMMWORD[rdx]
1368	vpshufd	xmm2,xmm2,78
1369
1370
1371	vpshufd	xmm4,xmm2,255
1372	vpsrlq	xmm3,xmm2,63
1373	vpsllq	xmm2,xmm2,1
1374	vpxor	xmm5,xmm5,xmm5
1375	vpcmpgtd	xmm5,xmm5,xmm4
1376	vpslldq	xmm3,xmm3,8
1377	vpor	xmm2,xmm2,xmm3
1378
1379
1380	vpand	xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
1381	vpxor	xmm2,xmm2,xmm5
1382
1383	vpunpckhqdq	xmm6,xmm2,xmm2
1384	vmovdqa	xmm0,xmm2
1385	vpxor	xmm6,xmm6,xmm2
1386	mov	r10,4
1387	jmp	NEAR $L$init_start_avx
1388ALIGN	32
1389$L$init_loop_avx:
1390	vpalignr	xmm5,xmm4,xmm3,8
1391	vmovdqu	XMMWORD[(-16)+rcx],xmm5
1392	vpunpckhqdq	xmm3,xmm0,xmm0
1393	vpxor	xmm3,xmm3,xmm0
1394	vpclmulqdq	xmm1,xmm0,xmm2,0x11
1395	vpclmulqdq	xmm0,xmm0,xmm2,0x00
1396	vpclmulqdq	xmm3,xmm3,xmm6,0x00
1397	vpxor	xmm4,xmm1,xmm0
1398	vpxor	xmm3,xmm3,xmm4
1399
1400	vpslldq	xmm4,xmm3,8
1401	vpsrldq	xmm3,xmm3,8
1402	vpxor	xmm0,xmm0,xmm4
1403	vpxor	xmm1,xmm1,xmm3
1404	vpsllq	xmm3,xmm0,57
1405	vpsllq	xmm4,xmm0,62
1406	vpxor	xmm4,xmm4,xmm3
1407	vpsllq	xmm3,xmm0,63
1408	vpxor	xmm4,xmm4,xmm3
1409	vpslldq	xmm3,xmm4,8
1410	vpsrldq	xmm4,xmm4,8
1411	vpxor	xmm0,xmm0,xmm3
1412	vpxor	xmm1,xmm1,xmm4
1413
1414	vpsrlq	xmm4,xmm0,1
1415	vpxor	xmm1,xmm1,xmm0
1416	vpxor	xmm0,xmm0,xmm4
1417	vpsrlq	xmm4,xmm4,5
1418	vpxor	xmm0,xmm0,xmm4
1419	vpsrlq	xmm0,xmm0,1
1420	vpxor	xmm0,xmm0,xmm1
1421$L$init_start_avx:
1422	vmovdqa	xmm5,xmm0
1423	vpunpckhqdq	xmm3,xmm0,xmm0
1424	vpxor	xmm3,xmm3,xmm0
1425	vpclmulqdq	xmm1,xmm0,xmm2,0x11
1426	vpclmulqdq	xmm0,xmm0,xmm2,0x00
1427	vpclmulqdq	xmm3,xmm3,xmm6,0x00
1428	vpxor	xmm4,xmm1,xmm0
1429	vpxor	xmm3,xmm3,xmm4
1430
1431	vpslldq	xmm4,xmm3,8
1432	vpsrldq	xmm3,xmm3,8
1433	vpxor	xmm0,xmm0,xmm4
1434	vpxor	xmm1,xmm1,xmm3
1435	vpsllq	xmm3,xmm0,57
1436	vpsllq	xmm4,xmm0,62
1437	vpxor	xmm4,xmm4,xmm3
1438	vpsllq	xmm3,xmm0,63
1439	vpxor	xmm4,xmm4,xmm3
1440	vpslldq	xmm3,xmm4,8
1441	vpsrldq	xmm4,xmm4,8
1442	vpxor	xmm0,xmm0,xmm3
1443	vpxor	xmm1,xmm1,xmm4
1444
1445	vpsrlq	xmm4,xmm0,1
1446	vpxor	xmm1,xmm1,xmm0
1447	vpxor	xmm0,xmm0,xmm4
1448	vpsrlq	xmm4,xmm4,5
1449	vpxor	xmm0,xmm0,xmm4
1450	vpsrlq	xmm0,xmm0,1
1451	vpxor	xmm0,xmm0,xmm1
1452	vpshufd	xmm3,xmm5,78
1453	vpshufd	xmm4,xmm0,78
1454	vpxor	xmm3,xmm3,xmm5
1455	vmovdqu	XMMWORD[rcx],xmm5
1456	vpxor	xmm4,xmm4,xmm0
1457	vmovdqu	XMMWORD[16+rcx],xmm0
1458	lea	rcx,[48+rcx]
1459	sub	r10,1
1460	jnz	NEAR $L$init_loop_avx
1461
1462	vpalignr	xmm5,xmm3,xmm4,8
1463	vmovdqu	XMMWORD[(-16)+rcx],xmm5
1464
1465	vzeroupper
1466	movaps	xmm6,XMMWORD[rsp]
1467	lea	rsp,[24+rsp]
1468$L$SEH_end_gcm_init_avx:
1469	DB	0F3h,0C3h		;repret
1470
1471
1472global	gcm_gmult_avx
1473
1474ALIGN	32
1475gcm_gmult_avx:
1476
1477	jmp	NEAR $L$_gmult_clmul
1478
1479
1480global	gcm_ghash_avx
1481
1482ALIGN	32
1483gcm_ghash_avx:
1484
1485	lea	rax,[((-136))+rsp]
1486$L$SEH_begin_gcm_ghash_avx:
1487
1488DB	0x48,0x8d,0x60,0xe0
1489DB	0x0f,0x29,0x70,0xe0
1490DB	0x0f,0x29,0x78,0xf0
1491DB	0x44,0x0f,0x29,0x00
1492DB	0x44,0x0f,0x29,0x48,0x10
1493DB	0x44,0x0f,0x29,0x50,0x20
1494DB	0x44,0x0f,0x29,0x58,0x30
1495DB	0x44,0x0f,0x29,0x60,0x40
1496DB	0x44,0x0f,0x29,0x68,0x50
1497DB	0x44,0x0f,0x29,0x70,0x60
1498DB	0x44,0x0f,0x29,0x78,0x70
1499	vzeroupper
1500
1501	vmovdqu	xmm10,XMMWORD[rcx]
1502	lea	r10,[$L$0x1c2_polynomial]
1503	lea	rdx,[64+rdx]
1504	vmovdqu	xmm13,XMMWORD[$L$bswap_mask]
1505	vpshufb	xmm10,xmm10,xmm13
1506	cmp	r9,0x80
1507	jb	NEAR $L$short_avx
1508	sub	r9,0x80
1509
1510	vmovdqu	xmm14,XMMWORD[112+r8]
1511	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
1512	vpshufb	xmm14,xmm14,xmm13
1513	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
1514
1515	vpunpckhqdq	xmm9,xmm14,xmm14
1516	vmovdqu	xmm15,XMMWORD[96+r8]
1517	vpclmulqdq	xmm0,xmm14,xmm6,0x00
1518	vpxor	xmm9,xmm9,xmm14
1519	vpshufb	xmm15,xmm15,xmm13
1520	vpclmulqdq	xmm1,xmm14,xmm6,0x11
1521	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
1522	vpunpckhqdq	xmm8,xmm15,xmm15
1523	vmovdqu	xmm14,XMMWORD[80+r8]
1524	vpclmulqdq	xmm2,xmm9,xmm7,0x00
1525	vpxor	xmm8,xmm8,xmm15
1526
1527	vpshufb	xmm14,xmm14,xmm13
1528	vpclmulqdq	xmm3,xmm15,xmm6,0x00
1529	vpunpckhqdq	xmm9,xmm14,xmm14
1530	vpclmulqdq	xmm4,xmm15,xmm6,0x11
1531	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
1532	vpxor	xmm9,xmm9,xmm14
1533	vmovdqu	xmm15,XMMWORD[64+r8]
1534	vpclmulqdq	xmm5,xmm8,xmm7,0x10
1535	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
1536
1537	vpshufb	xmm15,xmm15,xmm13
1538	vpxor	xmm3,xmm3,xmm0
1539	vpclmulqdq	xmm0,xmm14,xmm6,0x00
1540	vpxor	xmm4,xmm4,xmm1
1541	vpunpckhqdq	xmm8,xmm15,xmm15
1542	vpclmulqdq	xmm1,xmm14,xmm6,0x11
1543	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
1544	vpxor	xmm5,xmm5,xmm2
1545	vpclmulqdq	xmm2,xmm9,xmm7,0x00
1546	vpxor	xmm8,xmm8,xmm15
1547
1548	vmovdqu	xmm14,XMMWORD[48+r8]
1549	vpxor	xmm0,xmm0,xmm3
1550	vpclmulqdq	xmm3,xmm15,xmm6,0x00
1551	vpxor	xmm1,xmm1,xmm4
1552	vpshufb	xmm14,xmm14,xmm13
1553	vpclmulqdq	xmm4,xmm15,xmm6,0x11
1554	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
1555	vpxor	xmm2,xmm2,xmm5
1556	vpunpckhqdq	xmm9,xmm14,xmm14
1557	vpclmulqdq	xmm5,xmm8,xmm7,0x10
1558	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
1559	vpxor	xmm9,xmm9,xmm14
1560
1561	vmovdqu	xmm15,XMMWORD[32+r8]
1562	vpxor	xmm3,xmm3,xmm0
1563	vpclmulqdq	xmm0,xmm14,xmm6,0x00
1564	vpxor	xmm4,xmm4,xmm1
1565	vpshufb	xmm15,xmm15,xmm13
1566	vpclmulqdq	xmm1,xmm14,xmm6,0x11
1567	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
1568	vpxor	xmm5,xmm5,xmm2
1569	vpunpckhqdq	xmm8,xmm15,xmm15
1570	vpclmulqdq	xmm2,xmm9,xmm7,0x00
1571	vpxor	xmm8,xmm8,xmm15
1572
1573	vmovdqu	xmm14,XMMWORD[16+r8]
1574	vpxor	xmm0,xmm0,xmm3
1575	vpclmulqdq	xmm3,xmm15,xmm6,0x00
1576	vpxor	xmm1,xmm1,xmm4
1577	vpshufb	xmm14,xmm14,xmm13
1578	vpclmulqdq	xmm4,xmm15,xmm6,0x11
1579	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
1580	vpxor	xmm2,xmm2,xmm5
1581	vpunpckhqdq	xmm9,xmm14,xmm14
1582	vpclmulqdq	xmm5,xmm8,xmm7,0x10
1583	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
1584	vpxor	xmm9,xmm9,xmm14
1585
1586	vmovdqu	xmm15,XMMWORD[r8]
1587	vpxor	xmm3,xmm3,xmm0
1588	vpclmulqdq	xmm0,xmm14,xmm6,0x00
1589	vpxor	xmm4,xmm4,xmm1
1590	vpshufb	xmm15,xmm15,xmm13
1591	vpclmulqdq	xmm1,xmm14,xmm6,0x11
1592	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
1593	vpxor	xmm5,xmm5,xmm2
1594	vpclmulqdq	xmm2,xmm9,xmm7,0x10
1595
1596	lea	r8,[128+r8]
1597	cmp	r9,0x80
1598	jb	NEAR $L$tail_avx
1599
1600	vpxor	xmm15,xmm15,xmm10
1601	sub	r9,0x80
1602	jmp	NEAR $L$oop8x_avx
1603
1604ALIGN	32
1605$L$oop8x_avx:
1606	vpunpckhqdq	xmm8,xmm15,xmm15
1607	vmovdqu	xmm14,XMMWORD[112+r8]
1608	vpxor	xmm3,xmm3,xmm0
1609	vpxor	xmm8,xmm8,xmm15
1610	vpclmulqdq	xmm10,xmm15,xmm6,0x00
1611	vpshufb	xmm14,xmm14,xmm13
1612	vpxor	xmm4,xmm4,xmm1
1613	vpclmulqdq	xmm11,xmm15,xmm6,0x11
1614	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
1615	vpunpckhqdq	xmm9,xmm14,xmm14
1616	vpxor	xmm5,xmm5,xmm2
1617	vpclmulqdq	xmm12,xmm8,xmm7,0x00
1618	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
1619	vpxor	xmm9,xmm9,xmm14
1620
1621	vmovdqu	xmm15,XMMWORD[96+r8]
1622	vpclmulqdq	xmm0,xmm14,xmm6,0x00
1623	vpxor	xmm10,xmm10,xmm3
1624	vpshufb	xmm15,xmm15,xmm13
1625	vpclmulqdq	xmm1,xmm14,xmm6,0x11
1626	vxorps	xmm11,xmm11,xmm4
1627	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
1628	vpunpckhqdq	xmm8,xmm15,xmm15
1629	vpclmulqdq	xmm2,xmm9,xmm7,0x00
1630	vpxor	xmm12,xmm12,xmm5
1631	vxorps	xmm8,xmm8,xmm15
1632
1633	vmovdqu	xmm14,XMMWORD[80+r8]
1634	vpxor	xmm12,xmm12,xmm10
1635	vpclmulqdq	xmm3,xmm15,xmm6,0x00
1636	vpxor	xmm12,xmm12,xmm11
1637	vpslldq	xmm9,xmm12,8
1638	vpxor	xmm3,xmm3,xmm0
1639	vpclmulqdq	xmm4,xmm15,xmm6,0x11
1640	vpsrldq	xmm12,xmm12,8
1641	vpxor	xmm10,xmm10,xmm9
1642	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
1643	vpshufb	xmm14,xmm14,xmm13
1644	vxorps	xmm11,xmm11,xmm12
1645	vpxor	xmm4,xmm4,xmm1
1646	vpunpckhqdq	xmm9,xmm14,xmm14
1647	vpclmulqdq	xmm5,xmm8,xmm7,0x10
1648	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
1649	vpxor	xmm9,xmm9,xmm14
1650	vpxor	xmm5,xmm5,xmm2
1651
1652	vmovdqu	xmm15,XMMWORD[64+r8]
1653	vpalignr	xmm12,xmm10,xmm10,8
1654	vpclmulqdq	xmm0,xmm14,xmm6,0x00
1655	vpshufb	xmm15,xmm15,xmm13
1656	vpxor	xmm0,xmm0,xmm3
1657	vpclmulqdq	xmm1,xmm14,xmm6,0x11
1658	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
1659	vpunpckhqdq	xmm8,xmm15,xmm15
1660	vpxor	xmm1,xmm1,xmm4
1661	vpclmulqdq	xmm2,xmm9,xmm7,0x00
1662	vxorps	xmm8,xmm8,xmm15
1663	vpxor	xmm2,xmm2,xmm5
1664
1665	vmovdqu	xmm14,XMMWORD[48+r8]
1666	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
1667	vpclmulqdq	xmm3,xmm15,xmm6,0x00
1668	vpshufb	xmm14,xmm14,xmm13
1669	vpxor	xmm3,xmm3,xmm0
1670	vpclmulqdq	xmm4,xmm15,xmm6,0x11
1671	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
1672	vpunpckhqdq	xmm9,xmm14,xmm14
1673	vpxor	xmm4,xmm4,xmm1
1674	vpclmulqdq	xmm5,xmm8,xmm7,0x10
1675	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
1676	vpxor	xmm9,xmm9,xmm14
1677	vpxor	xmm5,xmm5,xmm2
1678
1679	vmovdqu	xmm15,XMMWORD[32+r8]
1680	vpclmulqdq	xmm0,xmm14,xmm6,0x00
1681	vpshufb	xmm15,xmm15,xmm13
1682	vpxor	xmm0,xmm0,xmm3
1683	vpclmulqdq	xmm1,xmm14,xmm6,0x11
1684	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
1685	vpunpckhqdq	xmm8,xmm15,xmm15
1686	vpxor	xmm1,xmm1,xmm4
1687	vpclmulqdq	xmm2,xmm9,xmm7,0x00
1688	vpxor	xmm8,xmm8,xmm15
1689	vpxor	xmm2,xmm2,xmm5
1690	vxorps	xmm10,xmm10,xmm12
1691
1692	vmovdqu	xmm14,XMMWORD[16+r8]
1693	vpalignr	xmm12,xmm10,xmm10,8
1694	vpclmulqdq	xmm3,xmm15,xmm6,0x00
1695	vpshufb	xmm14,xmm14,xmm13
1696	vpxor	xmm3,xmm3,xmm0
1697	vpclmulqdq	xmm4,xmm15,xmm6,0x11
1698	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
1699	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
1700	vxorps	xmm12,xmm12,xmm11
1701	vpunpckhqdq	xmm9,xmm14,xmm14
1702	vpxor	xmm4,xmm4,xmm1
1703	vpclmulqdq	xmm5,xmm8,xmm7,0x10
1704	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
1705	vpxor	xmm9,xmm9,xmm14
1706	vpxor	xmm5,xmm5,xmm2
1707
1708	vmovdqu	xmm15,XMMWORD[r8]
1709	vpclmulqdq	xmm0,xmm14,xmm6,0x00
1710	vpshufb	xmm15,xmm15,xmm13
1711	vpclmulqdq	xmm1,xmm14,xmm6,0x11
1712	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
1713	vpxor	xmm15,xmm15,xmm12
1714	vpclmulqdq	xmm2,xmm9,xmm7,0x10
1715	vpxor	xmm15,xmm15,xmm10
1716
1717	lea	r8,[128+r8]
1718	sub	r9,0x80
1719	jnc	NEAR $L$oop8x_avx
1720
1721	add	r9,0x80
1722	jmp	NEAR $L$tail_no_xor_avx
1723
1724ALIGN	32
1725$L$short_avx:
1726	vmovdqu	xmm14,XMMWORD[((-16))+r9*1+r8]
1727	lea	r8,[r9*1+r8]
1728	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
1729	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
1730	vpshufb	xmm15,xmm14,xmm13
1731
1732	vmovdqa	xmm3,xmm0
1733	vmovdqa	xmm4,xmm1
1734	vmovdqa	xmm5,xmm2
1735	sub	r9,0x10
1736	jz	NEAR $L$tail_avx
1737
1738	vpunpckhqdq	xmm8,xmm15,xmm15
1739	vpxor	xmm3,xmm3,xmm0
1740	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1741	vpxor	xmm8,xmm8,xmm15
1742	vmovdqu	xmm14,XMMWORD[((-32))+r8]
1743	vpxor	xmm4,xmm4,xmm1
1744	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1745	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
1746	vpshufb	xmm15,xmm14,xmm13
1747	vpxor	xmm5,xmm5,xmm2
1748	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1749	vpsrldq	xmm7,xmm7,8
1750	sub	r9,0x10
1751	jz	NEAR $L$tail_avx
1752
1753	vpunpckhqdq	xmm8,xmm15,xmm15
1754	vpxor	xmm3,xmm3,xmm0
1755	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1756	vpxor	xmm8,xmm8,xmm15
1757	vmovdqu	xmm14,XMMWORD[((-48))+r8]
1758	vpxor	xmm4,xmm4,xmm1
1759	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1760	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
1761	vpshufb	xmm15,xmm14,xmm13
1762	vpxor	xmm5,xmm5,xmm2
1763	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1764	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
1765	sub	r9,0x10
1766	jz	NEAR $L$tail_avx
1767
1768	vpunpckhqdq	xmm8,xmm15,xmm15
1769	vpxor	xmm3,xmm3,xmm0
1770	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1771	vpxor	xmm8,xmm8,xmm15
1772	vmovdqu	xmm14,XMMWORD[((-64))+r8]
1773	vpxor	xmm4,xmm4,xmm1
1774	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1775	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
1776	vpshufb	xmm15,xmm14,xmm13
1777	vpxor	xmm5,xmm5,xmm2
1778	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1779	vpsrldq	xmm7,xmm7,8
1780	sub	r9,0x10
1781	jz	NEAR $L$tail_avx
1782
1783	vpunpckhqdq	xmm8,xmm15,xmm15
1784	vpxor	xmm3,xmm3,xmm0
1785	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1786	vpxor	xmm8,xmm8,xmm15
1787	vmovdqu	xmm14,XMMWORD[((-80))+r8]
1788	vpxor	xmm4,xmm4,xmm1
1789	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1790	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
1791	vpshufb	xmm15,xmm14,xmm13
1792	vpxor	xmm5,xmm5,xmm2
1793	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1794	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
1795	sub	r9,0x10
1796	jz	NEAR $L$tail_avx
1797
1798	vpunpckhqdq	xmm8,xmm15,xmm15
1799	vpxor	xmm3,xmm3,xmm0
1800	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1801	vpxor	xmm8,xmm8,xmm15
1802	vmovdqu	xmm14,XMMWORD[((-96))+r8]
1803	vpxor	xmm4,xmm4,xmm1
1804	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1805	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
1806	vpshufb	xmm15,xmm14,xmm13
1807	vpxor	xmm5,xmm5,xmm2
1808	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1809	vpsrldq	xmm7,xmm7,8
1810	sub	r9,0x10
1811	jz	NEAR $L$tail_avx
1812
1813	vpunpckhqdq	xmm8,xmm15,xmm15
1814	vpxor	xmm3,xmm3,xmm0
1815	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1816	vpxor	xmm8,xmm8,xmm15
1817	vmovdqu	xmm14,XMMWORD[((-112))+r8]
1818	vpxor	xmm4,xmm4,xmm1
1819	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1820	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
1821	vpshufb	xmm15,xmm14,xmm13
1822	vpxor	xmm5,xmm5,xmm2
1823	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1824	vmovq	xmm7,QWORD[((184-64))+rdx]
1825	sub	r9,0x10
1826	jmp	NEAR $L$tail_avx
1827
1828ALIGN	32
1829$L$tail_avx:
1830	vpxor	xmm15,xmm15,xmm10
1831$L$tail_no_xor_avx:
1832	vpunpckhqdq	xmm8,xmm15,xmm15
1833	vpxor	xmm3,xmm3,xmm0
1834	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1835	vpxor	xmm8,xmm8,xmm15
1836	vpxor	xmm4,xmm4,xmm1
1837	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1838	vpxor	xmm5,xmm5,xmm2
1839	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1840
1841	vmovdqu	xmm12,XMMWORD[r10]
1842
1843	vpxor	xmm10,xmm3,xmm0
1844	vpxor	xmm11,xmm4,xmm1
1845	vpxor	xmm5,xmm5,xmm2
1846
1847	vpxor	xmm5,xmm5,xmm10
1848	vpxor	xmm5,xmm5,xmm11
1849	vpslldq	xmm9,xmm5,8
1850	vpsrldq	xmm5,xmm5,8
1851	vpxor	xmm10,xmm10,xmm9
1852	vpxor	xmm11,xmm11,xmm5
1853
1854	vpclmulqdq	xmm9,xmm10,xmm12,0x10
1855	vpalignr	xmm10,xmm10,xmm10,8
1856	vpxor	xmm10,xmm10,xmm9
1857
1858	vpclmulqdq	xmm9,xmm10,xmm12,0x10
1859	vpalignr	xmm10,xmm10,xmm10,8
1860	vpxor	xmm10,xmm10,xmm11
1861	vpxor	xmm10,xmm10,xmm9
1862
1863	cmp	r9,0
1864	jne	NEAR $L$short_avx
1865
1866	vpshufb	xmm10,xmm10,xmm13
1867	vmovdqu	XMMWORD[rcx],xmm10
1868	vzeroupper
1869	movaps	xmm6,XMMWORD[rsp]
1870	movaps	xmm7,XMMWORD[16+rsp]
1871	movaps	xmm8,XMMWORD[32+rsp]
1872	movaps	xmm9,XMMWORD[48+rsp]
1873	movaps	xmm10,XMMWORD[64+rsp]
1874	movaps	xmm11,XMMWORD[80+rsp]
1875	movaps	xmm12,XMMWORD[96+rsp]
1876	movaps	xmm13,XMMWORD[112+rsp]
1877	movaps	xmm14,XMMWORD[128+rsp]
1878	movaps	xmm15,XMMWORD[144+rsp]
1879	lea	rsp,[168+rsp]
1880$L$SEH_end_gcm_ghash_avx:
1881	DB	0F3h,0C3h		;repret
1882
1883
1884ALIGN	64
1885$L$bswap_mask:
1886DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1887$L$0x1c2_polynomial:
1888DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1889$L$7_mask:
1890	DD	7,0,7,0
1891$L$7_mask_poly:
1892	DD	7,0,450,0
1893ALIGN	64
1894
1895$L$rem_4bit:
1896	DD	0,0,0,471859200,0,943718400,0,610271232
1897	DD	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1898	DD	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1899	DD	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1900
1901$L$rem_8bit:
1902	DW	0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
1903	DW	0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
1904	DW	0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
1905	DW	0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
1906	DW	0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
1907	DW	0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1908	DW	0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1909	DW	0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1910	DW	0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1911	DW	0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1912	DW	0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1913	DW	0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1914	DW	0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1915	DW	0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1916	DW	0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1917	DW	0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1918	DW	0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1919	DW	0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1920	DW	0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1921	DW	0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1922	DW	0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1923	DW	0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1924	DW	0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1925	DW	0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1926	DW	0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1927	DW	0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1928	DW	0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1929	DW	0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1930	DW	0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1931	DW	0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1932	DW	0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1933	DW	0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1934
1935DB	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
1936DB	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1937DB	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1938DB	114,103,62,0
1939ALIGN	64
1940EXTERN	__imp_RtlVirtualUnwind
1941
1942ALIGN	16
1943se_handler:
1944	push	rsi
1945	push	rdi
1946	push	rbx
1947	push	rbp
1948	push	r12
1949	push	r13
1950	push	r14
1951	push	r15
1952	pushfq
1953	sub	rsp,64
1954
1955	mov	rax,QWORD[120+r8]
1956	mov	rbx,QWORD[248+r8]
1957
1958	mov	rsi,QWORD[8+r9]
1959	mov	r11,QWORD[56+r9]
1960
1961	mov	r10d,DWORD[r11]
1962	lea	r10,[r10*1+rsi]
1963	cmp	rbx,r10
1964	jb	NEAR $L$in_prologue
1965
1966	mov	rax,QWORD[152+r8]
1967
1968	mov	r10d,DWORD[4+r11]
1969	lea	r10,[r10*1+rsi]
1970	cmp	rbx,r10
1971	jae	NEAR $L$in_prologue
1972
1973	lea	rax,[((48+280))+rax]
1974
1975	mov	rbx,QWORD[((-8))+rax]
1976	mov	rbp,QWORD[((-16))+rax]
1977	mov	r12,QWORD[((-24))+rax]
1978	mov	r13,QWORD[((-32))+rax]
1979	mov	r14,QWORD[((-40))+rax]
1980	mov	r15,QWORD[((-48))+rax]
1981	mov	QWORD[144+r8],rbx
1982	mov	QWORD[160+r8],rbp
1983	mov	QWORD[216+r8],r12
1984	mov	QWORD[224+r8],r13
1985	mov	QWORD[232+r8],r14
1986	mov	QWORD[240+r8],r15
1987
1988$L$in_prologue:
1989	mov	rdi,QWORD[8+rax]
1990	mov	rsi,QWORD[16+rax]
1991	mov	QWORD[152+r8],rax
1992	mov	QWORD[168+r8],rsi
1993	mov	QWORD[176+r8],rdi
1994
1995	mov	rdi,QWORD[40+r9]
1996	mov	rsi,r8
1997	mov	ecx,154
1998	DD	0xa548f3fc
1999
2000	mov	rsi,r9
2001	xor	rcx,rcx
2002	mov	rdx,QWORD[8+rsi]
2003	mov	r8,QWORD[rsi]
2004	mov	r9,QWORD[16+rsi]
2005	mov	r10,QWORD[40+rsi]
2006	lea	r11,[56+rsi]
2007	lea	r12,[24+rsi]
2008	mov	QWORD[32+rsp],r10
2009	mov	QWORD[40+rsp],r11
2010	mov	QWORD[48+rsp],r12
2011	mov	QWORD[56+rsp],rcx
2012	call	QWORD[__imp_RtlVirtualUnwind]
2013
2014	mov	eax,1
2015	add	rsp,64
2016	popfq
2017	pop	r15
2018	pop	r14
2019	pop	r13
2020	pop	r12
2021	pop	rbp
2022	pop	rbx
2023	pop	rdi
2024	pop	rsi
2025	DB	0F3h,0C3h		;repret
2026
2027
2028section	.pdata rdata align=4
2029ALIGN	4
2030	DD	$L$SEH_begin_gcm_gmult_4bit wrt ..imagebase
2031	DD	$L$SEH_end_gcm_gmult_4bit wrt ..imagebase
2032	DD	$L$SEH_info_gcm_gmult_4bit wrt ..imagebase
2033
2034	DD	$L$SEH_begin_gcm_ghash_4bit wrt ..imagebase
2035	DD	$L$SEH_end_gcm_ghash_4bit wrt ..imagebase
2036	DD	$L$SEH_info_gcm_ghash_4bit wrt ..imagebase
2037
2038	DD	$L$SEH_begin_gcm_init_clmul wrt ..imagebase
2039	DD	$L$SEH_end_gcm_init_clmul wrt ..imagebase
2040	DD	$L$SEH_info_gcm_init_clmul wrt ..imagebase
2041
2042	DD	$L$SEH_begin_gcm_ghash_clmul wrt ..imagebase
2043	DD	$L$SEH_end_gcm_ghash_clmul wrt ..imagebase
2044	DD	$L$SEH_info_gcm_ghash_clmul wrt ..imagebase
2045	DD	$L$SEH_begin_gcm_init_avx wrt ..imagebase
2046	DD	$L$SEH_end_gcm_init_avx wrt ..imagebase
2047	DD	$L$SEH_info_gcm_init_clmul wrt ..imagebase
2048
2049	DD	$L$SEH_begin_gcm_ghash_avx wrt ..imagebase
2050	DD	$L$SEH_end_gcm_ghash_avx wrt ..imagebase
2051	DD	$L$SEH_info_gcm_ghash_clmul wrt ..imagebase
2052section	.xdata rdata align=8
2053ALIGN	8
2054$L$SEH_info_gcm_gmult_4bit:
2055DB	9,0,0,0
2056	DD	se_handler wrt ..imagebase
2057	DD	$L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imagebase
2058$L$SEH_info_gcm_ghash_4bit:
2059DB	9,0,0,0
2060	DD	se_handler wrt ..imagebase
2061	DD	$L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imagebase
2062$L$SEH_info_gcm_init_clmul:
2063DB	0x01,0x08,0x03,0x00
2064DB	0x08,0x68,0x00,0x00
2065DB	0x04,0x22,0x00,0x00
2066$L$SEH_info_gcm_ghash_clmul:
2067DB	0x01,0x33,0x16,0x00
2068DB	0x33,0xf8,0x09,0x00
2069DB	0x2e,0xe8,0x08,0x00
2070DB	0x29,0xd8,0x07,0x00
2071DB	0x24,0xc8,0x06,0x00
2072DB	0x1f,0xb8,0x05,0x00
2073DB	0x1a,0xa8,0x04,0x00
2074DB	0x15,0x98,0x03,0x00
2075DB	0x10,0x88,0x02,0x00
2076DB	0x0c,0x78,0x01,0x00
2077DB	0x08,0x68,0x00,0x00
2078DB	0x04,0x01,0x15,0x00
2079