• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1default	rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section	.text code align=64
6
7EXTERN	OPENSSL_ia32cap_P
8
9global	gcm_gmult_4bit
10
11ALIGN	16
12gcm_gmult_4bit:
13	mov	QWORD[8+rsp],rdi	;WIN64 prologue
14	mov	QWORD[16+rsp],rsi
15	mov	rax,rsp
16$L$SEH_begin_gcm_gmult_4bit:
17	mov	rdi,rcx
18	mov	rsi,rdx
19
20
21	push	rbx
22	push	rbp
23	push	r12
24$L$gmult_prologue:
25
26	movzx	r8,BYTE[15+rdi]
27	lea	r11,[$L$rem_4bit]
28	xor	rax,rax
29	xor	rbx,rbx
30	mov	al,r8b
31	mov	bl,r8b
32	shl	al,4
33	mov	rcx,14
34	mov	r8,QWORD[8+rax*1+rsi]
35	mov	r9,QWORD[rax*1+rsi]
36	and	bl,0xf0
37	mov	rdx,r8
38	jmp	NEAR $L$oop1
39
40ALIGN	16
41$L$oop1:
42	shr	r8,4
43	and	rdx,0xf
44	mov	r10,r9
45	mov	al,BYTE[rcx*1+rdi]
46	shr	r9,4
47	xor	r8,QWORD[8+rbx*1+rsi]
48	shl	r10,60
49	xor	r9,QWORD[rbx*1+rsi]
50	mov	bl,al
51	xor	r9,QWORD[rdx*8+r11]
52	mov	rdx,r8
53	shl	al,4
54	xor	r8,r10
55	dec	rcx
56	js	NEAR $L$break1
57
58	shr	r8,4
59	and	rdx,0xf
60	mov	r10,r9
61	shr	r9,4
62	xor	r8,QWORD[8+rax*1+rsi]
63	shl	r10,60
64	xor	r9,QWORD[rax*1+rsi]
65	and	bl,0xf0
66	xor	r9,QWORD[rdx*8+r11]
67	mov	rdx,r8
68	xor	r8,r10
69	jmp	NEAR $L$oop1
70
71ALIGN	16
72$L$break1:
73	shr	r8,4
74	and	rdx,0xf
75	mov	r10,r9
76	shr	r9,4
77	xor	r8,QWORD[8+rax*1+rsi]
78	shl	r10,60
79	xor	r9,QWORD[rax*1+rsi]
80	and	bl,0xf0
81	xor	r9,QWORD[rdx*8+r11]
82	mov	rdx,r8
83	xor	r8,r10
84
85	shr	r8,4
86	and	rdx,0xf
87	mov	r10,r9
88	shr	r9,4
89	xor	r8,QWORD[8+rbx*1+rsi]
90	shl	r10,60
91	xor	r9,QWORD[rbx*1+rsi]
92	xor	r8,r10
93	xor	r9,QWORD[rdx*8+r11]
94
95	bswap	r8
96	bswap	r9
97	mov	QWORD[8+rdi],r8
98	mov	QWORD[rdi],r9
99
100	mov	rbx,QWORD[16+rsp]
101	lea	rsp,[24+rsp]
102$L$gmult_epilogue:
103	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
104	mov	rsi,QWORD[16+rsp]
105	DB	0F3h,0C3h		;repret
106$L$SEH_end_gcm_gmult_4bit:
107global	gcm_ghash_4bit
108
109ALIGN	16
110gcm_ghash_4bit:
111	mov	QWORD[8+rsp],rdi	;WIN64 prologue
112	mov	QWORD[16+rsp],rsi
113	mov	rax,rsp
114$L$SEH_begin_gcm_ghash_4bit:
115	mov	rdi,rcx
116	mov	rsi,rdx
117	mov	rdx,r8
118	mov	rcx,r9
119
120
121	push	rbx
122	push	rbp
123	push	r12
124	push	r13
125	push	r14
126	push	r15
127	sub	rsp,280
128$L$ghash_prologue:
129	mov	r14,rdx
130	mov	r15,rcx
131	sub	rsi,-128
132	lea	rbp,[((16+128))+rsp]
133	xor	edx,edx
134	mov	r8,QWORD[((0+0-128))+rsi]
135	mov	rax,QWORD[((0+8-128))+rsi]
136	mov	dl,al
137	shr	rax,4
138	mov	r10,r8
139	shr	r8,4
140	mov	r9,QWORD[((16+0-128))+rsi]
141	shl	dl,4
142	mov	rbx,QWORD[((16+8-128))+rsi]
143	shl	r10,60
144	mov	BYTE[rsp],dl
145	or	rax,r10
146	mov	dl,bl
147	shr	rbx,4
148	mov	r10,r9
149	shr	r9,4
150	mov	QWORD[rbp],r8
151	mov	r8,QWORD[((32+0-128))+rsi]
152	shl	dl,4
153	mov	QWORD[((0-128))+rbp],rax
154	mov	rax,QWORD[((32+8-128))+rsi]
155	shl	r10,60
156	mov	BYTE[1+rsp],dl
157	or	rbx,r10
158	mov	dl,al
159	shr	rax,4
160	mov	r10,r8
161	shr	r8,4
162	mov	QWORD[8+rbp],r9
163	mov	r9,QWORD[((48+0-128))+rsi]
164	shl	dl,4
165	mov	QWORD[((8-128))+rbp],rbx
166	mov	rbx,QWORD[((48+8-128))+rsi]
167	shl	r10,60
168	mov	BYTE[2+rsp],dl
169	or	rax,r10
170	mov	dl,bl
171	shr	rbx,4
172	mov	r10,r9
173	shr	r9,4
174	mov	QWORD[16+rbp],r8
175	mov	r8,QWORD[((64+0-128))+rsi]
176	shl	dl,4
177	mov	QWORD[((16-128))+rbp],rax
178	mov	rax,QWORD[((64+8-128))+rsi]
179	shl	r10,60
180	mov	BYTE[3+rsp],dl
181	or	rbx,r10
182	mov	dl,al
183	shr	rax,4
184	mov	r10,r8
185	shr	r8,4
186	mov	QWORD[24+rbp],r9
187	mov	r9,QWORD[((80+0-128))+rsi]
188	shl	dl,4
189	mov	QWORD[((24-128))+rbp],rbx
190	mov	rbx,QWORD[((80+8-128))+rsi]
191	shl	r10,60
192	mov	BYTE[4+rsp],dl
193	or	rax,r10
194	mov	dl,bl
195	shr	rbx,4
196	mov	r10,r9
197	shr	r9,4
198	mov	QWORD[32+rbp],r8
199	mov	r8,QWORD[((96+0-128))+rsi]
200	shl	dl,4
201	mov	QWORD[((32-128))+rbp],rax
202	mov	rax,QWORD[((96+8-128))+rsi]
203	shl	r10,60
204	mov	BYTE[5+rsp],dl
205	or	rbx,r10
206	mov	dl,al
207	shr	rax,4
208	mov	r10,r8
209	shr	r8,4
210	mov	QWORD[40+rbp],r9
211	mov	r9,QWORD[((112+0-128))+rsi]
212	shl	dl,4
213	mov	QWORD[((40-128))+rbp],rbx
214	mov	rbx,QWORD[((112+8-128))+rsi]
215	shl	r10,60
216	mov	BYTE[6+rsp],dl
217	or	rax,r10
218	mov	dl,bl
219	shr	rbx,4
220	mov	r10,r9
221	shr	r9,4
222	mov	QWORD[48+rbp],r8
223	mov	r8,QWORD[((128+0-128))+rsi]
224	shl	dl,4
225	mov	QWORD[((48-128))+rbp],rax
226	mov	rax,QWORD[((128+8-128))+rsi]
227	shl	r10,60
228	mov	BYTE[7+rsp],dl
229	or	rbx,r10
230	mov	dl,al
231	shr	rax,4
232	mov	r10,r8
233	shr	r8,4
234	mov	QWORD[56+rbp],r9
235	mov	r9,QWORD[((144+0-128))+rsi]
236	shl	dl,4
237	mov	QWORD[((56-128))+rbp],rbx
238	mov	rbx,QWORD[((144+8-128))+rsi]
239	shl	r10,60
240	mov	BYTE[8+rsp],dl
241	or	rax,r10
242	mov	dl,bl
243	shr	rbx,4
244	mov	r10,r9
245	shr	r9,4
246	mov	QWORD[64+rbp],r8
247	mov	r8,QWORD[((160+0-128))+rsi]
248	shl	dl,4
249	mov	QWORD[((64-128))+rbp],rax
250	mov	rax,QWORD[((160+8-128))+rsi]
251	shl	r10,60
252	mov	BYTE[9+rsp],dl
253	or	rbx,r10
254	mov	dl,al
255	shr	rax,4
256	mov	r10,r8
257	shr	r8,4
258	mov	QWORD[72+rbp],r9
259	mov	r9,QWORD[((176+0-128))+rsi]
260	shl	dl,4
261	mov	QWORD[((72-128))+rbp],rbx
262	mov	rbx,QWORD[((176+8-128))+rsi]
263	shl	r10,60
264	mov	BYTE[10+rsp],dl
265	or	rax,r10
266	mov	dl,bl
267	shr	rbx,4
268	mov	r10,r9
269	shr	r9,4
270	mov	QWORD[80+rbp],r8
271	mov	r8,QWORD[((192+0-128))+rsi]
272	shl	dl,4
273	mov	QWORD[((80-128))+rbp],rax
274	mov	rax,QWORD[((192+8-128))+rsi]
275	shl	r10,60
276	mov	BYTE[11+rsp],dl
277	or	rbx,r10
278	mov	dl,al
279	shr	rax,4
280	mov	r10,r8
281	shr	r8,4
282	mov	QWORD[88+rbp],r9
283	mov	r9,QWORD[((208+0-128))+rsi]
284	shl	dl,4
285	mov	QWORD[((88-128))+rbp],rbx
286	mov	rbx,QWORD[((208+8-128))+rsi]
287	shl	r10,60
288	mov	BYTE[12+rsp],dl
289	or	rax,r10
290	mov	dl,bl
291	shr	rbx,4
292	mov	r10,r9
293	shr	r9,4
294	mov	QWORD[96+rbp],r8
295	mov	r8,QWORD[((224+0-128))+rsi]
296	shl	dl,4
297	mov	QWORD[((96-128))+rbp],rax
298	mov	rax,QWORD[((224+8-128))+rsi]
299	shl	r10,60
300	mov	BYTE[13+rsp],dl
301	or	rbx,r10
302	mov	dl,al
303	shr	rax,4
304	mov	r10,r8
305	shr	r8,4
306	mov	QWORD[104+rbp],r9
307	mov	r9,QWORD[((240+0-128))+rsi]
308	shl	dl,4
309	mov	QWORD[((104-128))+rbp],rbx
310	mov	rbx,QWORD[((240+8-128))+rsi]
311	shl	r10,60
312	mov	BYTE[14+rsp],dl
313	or	rax,r10
314	mov	dl,bl
315	shr	rbx,4
316	mov	r10,r9
317	shr	r9,4
318	mov	QWORD[112+rbp],r8
319	shl	dl,4
320	mov	QWORD[((112-128))+rbp],rax
321	shl	r10,60
322	mov	BYTE[15+rsp],dl
323	or	rbx,r10
324	mov	QWORD[120+rbp],r9
325	mov	QWORD[((120-128))+rbp],rbx
326	add	rsi,-128
327	mov	r8,QWORD[8+rdi]
328	mov	r9,QWORD[rdi]
329	add	r15,r14
330	lea	r11,[$L$rem_8bit]
331	jmp	NEAR $L$outer_loop
332ALIGN	16
333$L$outer_loop:
334	xor	r9,QWORD[r14]
335	mov	rdx,QWORD[8+r14]
336	lea	r14,[16+r14]
337	xor	rdx,r8
338	mov	QWORD[rdi],r9
339	mov	QWORD[8+rdi],rdx
340	shr	rdx,32
341	xor	rax,rax
342	rol	edx,8
343	mov	al,dl
344	movzx	ebx,dl
345	shl	al,4
346	shr	ebx,4
347	rol	edx,8
348	mov	r8,QWORD[8+rax*1+rsi]
349	mov	r9,QWORD[rax*1+rsi]
350	mov	al,dl
351	movzx	ecx,dl
352	shl	al,4
353	movzx	r12,BYTE[rbx*1+rsp]
354	shr	ecx,4
355	xor	r12,r8
356	mov	r10,r9
357	shr	r8,8
358	movzx	r12,r12b
359	shr	r9,8
360	xor	r8,QWORD[((-128))+rbx*8+rbp]
361	shl	r10,56
362	xor	r9,QWORD[rbx*8+rbp]
363	rol	edx,8
364	xor	r8,QWORD[8+rax*1+rsi]
365	xor	r9,QWORD[rax*1+rsi]
366	mov	al,dl
367	xor	r8,r10
368	movzx	r12,WORD[r12*2+r11]
369	movzx	ebx,dl
370	shl	al,4
371	movzx	r13,BYTE[rcx*1+rsp]
372	shr	ebx,4
373	shl	r12,48
374	xor	r13,r8
375	mov	r10,r9
376	xor	r9,r12
377	shr	r8,8
378	movzx	r13,r13b
379	shr	r9,8
380	xor	r8,QWORD[((-128))+rcx*8+rbp]
381	shl	r10,56
382	xor	r9,QWORD[rcx*8+rbp]
383	rol	edx,8
384	xor	r8,QWORD[8+rax*1+rsi]
385	xor	r9,QWORD[rax*1+rsi]
386	mov	al,dl
387	xor	r8,r10
388	movzx	r13,WORD[r13*2+r11]
389	movzx	ecx,dl
390	shl	al,4
391	movzx	r12,BYTE[rbx*1+rsp]
392	shr	ecx,4
393	shl	r13,48
394	xor	r12,r8
395	mov	r10,r9
396	xor	r9,r13
397	shr	r8,8
398	movzx	r12,r12b
399	mov	edx,DWORD[8+rdi]
400	shr	r9,8
401	xor	r8,QWORD[((-128))+rbx*8+rbp]
402	shl	r10,56
403	xor	r9,QWORD[rbx*8+rbp]
404	rol	edx,8
405	xor	r8,QWORD[8+rax*1+rsi]
406	xor	r9,QWORD[rax*1+rsi]
407	mov	al,dl
408	xor	r8,r10
409	movzx	r12,WORD[r12*2+r11]
410	movzx	ebx,dl
411	shl	al,4
412	movzx	r13,BYTE[rcx*1+rsp]
413	shr	ebx,4
414	shl	r12,48
415	xor	r13,r8
416	mov	r10,r9
417	xor	r9,r12
418	shr	r8,8
419	movzx	r13,r13b
420	shr	r9,8
421	xor	r8,QWORD[((-128))+rcx*8+rbp]
422	shl	r10,56
423	xor	r9,QWORD[rcx*8+rbp]
424	rol	edx,8
425	xor	r8,QWORD[8+rax*1+rsi]
426	xor	r9,QWORD[rax*1+rsi]
427	mov	al,dl
428	xor	r8,r10
429	movzx	r13,WORD[r13*2+r11]
430	movzx	ecx,dl
431	shl	al,4
432	movzx	r12,BYTE[rbx*1+rsp]
433	shr	ecx,4
434	shl	r13,48
435	xor	r12,r8
436	mov	r10,r9
437	xor	r9,r13
438	shr	r8,8
439	movzx	r12,r12b
440	shr	r9,8
441	xor	r8,QWORD[((-128))+rbx*8+rbp]
442	shl	r10,56
443	xor	r9,QWORD[rbx*8+rbp]
444	rol	edx,8
445	xor	r8,QWORD[8+rax*1+rsi]
446	xor	r9,QWORD[rax*1+rsi]
447	mov	al,dl
448	xor	r8,r10
449	movzx	r12,WORD[r12*2+r11]
450	movzx	ebx,dl
451	shl	al,4
452	movzx	r13,BYTE[rcx*1+rsp]
453	shr	ebx,4
454	shl	r12,48
455	xor	r13,r8
456	mov	r10,r9
457	xor	r9,r12
458	shr	r8,8
459	movzx	r13,r13b
460	shr	r9,8
461	xor	r8,QWORD[((-128))+rcx*8+rbp]
462	shl	r10,56
463	xor	r9,QWORD[rcx*8+rbp]
464	rol	edx,8
465	xor	r8,QWORD[8+rax*1+rsi]
466	xor	r9,QWORD[rax*1+rsi]
467	mov	al,dl
468	xor	r8,r10
469	movzx	r13,WORD[r13*2+r11]
470	movzx	ecx,dl
471	shl	al,4
472	movzx	r12,BYTE[rbx*1+rsp]
473	shr	ecx,4
474	shl	r13,48
475	xor	r12,r8
476	mov	r10,r9
477	xor	r9,r13
478	shr	r8,8
479	movzx	r12,r12b
480	mov	edx,DWORD[4+rdi]
481	shr	r9,8
482	xor	r8,QWORD[((-128))+rbx*8+rbp]
483	shl	r10,56
484	xor	r9,QWORD[rbx*8+rbp]
485	rol	edx,8
486	xor	r8,QWORD[8+rax*1+rsi]
487	xor	r9,QWORD[rax*1+rsi]
488	mov	al,dl
489	xor	r8,r10
490	movzx	r12,WORD[r12*2+r11]
491	movzx	ebx,dl
492	shl	al,4
493	movzx	r13,BYTE[rcx*1+rsp]
494	shr	ebx,4
495	shl	r12,48
496	xor	r13,r8
497	mov	r10,r9
498	xor	r9,r12
499	shr	r8,8
500	movzx	r13,r13b
501	shr	r9,8
502	xor	r8,QWORD[((-128))+rcx*8+rbp]
503	shl	r10,56
504	xor	r9,QWORD[rcx*8+rbp]
505	rol	edx,8
506	xor	r8,QWORD[8+rax*1+rsi]
507	xor	r9,QWORD[rax*1+rsi]
508	mov	al,dl
509	xor	r8,r10
510	movzx	r13,WORD[r13*2+r11]
511	movzx	ecx,dl
512	shl	al,4
513	movzx	r12,BYTE[rbx*1+rsp]
514	shr	ecx,4
515	shl	r13,48
516	xor	r12,r8
517	mov	r10,r9
518	xor	r9,r13
519	shr	r8,8
520	movzx	r12,r12b
521	shr	r9,8
522	xor	r8,QWORD[((-128))+rbx*8+rbp]
523	shl	r10,56
524	xor	r9,QWORD[rbx*8+rbp]
525	rol	edx,8
526	xor	r8,QWORD[8+rax*1+rsi]
527	xor	r9,QWORD[rax*1+rsi]
528	mov	al,dl
529	xor	r8,r10
530	movzx	r12,WORD[r12*2+r11]
531	movzx	ebx,dl
532	shl	al,4
533	movzx	r13,BYTE[rcx*1+rsp]
534	shr	ebx,4
535	shl	r12,48
536	xor	r13,r8
537	mov	r10,r9
538	xor	r9,r12
539	shr	r8,8
540	movzx	r13,r13b
541	shr	r9,8
542	xor	r8,QWORD[((-128))+rcx*8+rbp]
543	shl	r10,56
544	xor	r9,QWORD[rcx*8+rbp]
545	rol	edx,8
546	xor	r8,QWORD[8+rax*1+rsi]
547	xor	r9,QWORD[rax*1+rsi]
548	mov	al,dl
549	xor	r8,r10
550	movzx	r13,WORD[r13*2+r11]
551	movzx	ecx,dl
552	shl	al,4
553	movzx	r12,BYTE[rbx*1+rsp]
554	shr	ecx,4
555	shl	r13,48
556	xor	r12,r8
557	mov	r10,r9
558	xor	r9,r13
559	shr	r8,8
560	movzx	r12,r12b
561	mov	edx,DWORD[rdi]
562	shr	r9,8
563	xor	r8,QWORD[((-128))+rbx*8+rbp]
564	shl	r10,56
565	xor	r9,QWORD[rbx*8+rbp]
566	rol	edx,8
567	xor	r8,QWORD[8+rax*1+rsi]
568	xor	r9,QWORD[rax*1+rsi]
569	mov	al,dl
570	xor	r8,r10
571	movzx	r12,WORD[r12*2+r11]
572	movzx	ebx,dl
573	shl	al,4
574	movzx	r13,BYTE[rcx*1+rsp]
575	shr	ebx,4
576	shl	r12,48
577	xor	r13,r8
578	mov	r10,r9
579	xor	r9,r12
580	shr	r8,8
581	movzx	r13,r13b
582	shr	r9,8
583	xor	r8,QWORD[((-128))+rcx*8+rbp]
584	shl	r10,56
585	xor	r9,QWORD[rcx*8+rbp]
586	rol	edx,8
587	xor	r8,QWORD[8+rax*1+rsi]
588	xor	r9,QWORD[rax*1+rsi]
589	mov	al,dl
590	xor	r8,r10
591	movzx	r13,WORD[r13*2+r11]
592	movzx	ecx,dl
593	shl	al,4
594	movzx	r12,BYTE[rbx*1+rsp]
595	shr	ecx,4
596	shl	r13,48
597	xor	r12,r8
598	mov	r10,r9
599	xor	r9,r13
600	shr	r8,8
601	movzx	r12,r12b
602	shr	r9,8
603	xor	r8,QWORD[((-128))+rbx*8+rbp]
604	shl	r10,56
605	xor	r9,QWORD[rbx*8+rbp]
606	rol	edx,8
607	xor	r8,QWORD[8+rax*1+rsi]
608	xor	r9,QWORD[rax*1+rsi]
609	mov	al,dl
610	xor	r8,r10
611	movzx	r12,WORD[r12*2+r11]
612	movzx	ebx,dl
613	shl	al,4
614	movzx	r13,BYTE[rcx*1+rsp]
615	shr	ebx,4
616	shl	r12,48
617	xor	r13,r8
618	mov	r10,r9
619	xor	r9,r12
620	shr	r8,8
621	movzx	r13,r13b
622	shr	r9,8
623	xor	r8,QWORD[((-128))+rcx*8+rbp]
624	shl	r10,56
625	xor	r9,QWORD[rcx*8+rbp]
626	rol	edx,8
627	xor	r8,QWORD[8+rax*1+rsi]
628	xor	r9,QWORD[rax*1+rsi]
629	mov	al,dl
630	xor	r8,r10
631	movzx	r13,WORD[r13*2+r11]
632	movzx	ecx,dl
633	shl	al,4
634	movzx	r12,BYTE[rbx*1+rsp]
635	and	ecx,240
636	shl	r13,48
637	xor	r12,r8
638	mov	r10,r9
639	xor	r9,r13
640	shr	r8,8
641	movzx	r12,r12b
642	mov	edx,DWORD[((-4))+rdi]
643	shr	r9,8
644	xor	r8,QWORD[((-128))+rbx*8+rbp]
645	shl	r10,56
646	xor	r9,QWORD[rbx*8+rbp]
647	movzx	r12,WORD[r12*2+r11]
648	xor	r8,QWORD[8+rax*1+rsi]
649	xor	r9,QWORD[rax*1+rsi]
650	shl	r12,48
651	xor	r8,r10
652	xor	r9,r12
653	movzx	r13,r8b
654	shr	r8,4
655	mov	r10,r9
656	shl	r13b,4
657	shr	r9,4
658	xor	r8,QWORD[8+rcx*1+rsi]
659	movzx	r13,WORD[r13*2+r11]
660	shl	r10,60
661	xor	r9,QWORD[rcx*1+rsi]
662	xor	r8,r10
663	shl	r13,48
664	bswap	r8
665	xor	r9,r13
666	bswap	r9
667	cmp	r14,r15
668	jb	NEAR $L$outer_loop
669	mov	QWORD[8+rdi],r8
670	mov	QWORD[rdi],r9
671
672	lea	rsi,[280+rsp]
673	mov	r15,QWORD[rsi]
674	mov	r14,QWORD[8+rsi]
675	mov	r13,QWORD[16+rsi]
676	mov	r12,QWORD[24+rsi]
677	mov	rbp,QWORD[32+rsi]
678	mov	rbx,QWORD[40+rsi]
679	lea	rsp,[48+rsi]
680$L$ghash_epilogue:
681	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
682	mov	rsi,QWORD[16+rsp]
683	DB	0F3h,0C3h		;repret
684$L$SEH_end_gcm_ghash_4bit:
685global	gcm_init_clmul
686
687ALIGN	16
688gcm_init_clmul:
689$L$_init_clmul:
690$L$SEH_begin_gcm_init_clmul:
691
692DB	0x48,0x83,0xec,0x18
693DB	0x0f,0x29,0x34,0x24
694	movdqu	xmm2,XMMWORD[rdx]
695	pshufd	xmm2,xmm2,78
696
697
698	pshufd	xmm4,xmm2,255
699	movdqa	xmm3,xmm2
700	psllq	xmm2,1
701	pxor	xmm5,xmm5
702	psrlq	xmm3,63
703	pcmpgtd	xmm5,xmm4
704	pslldq	xmm3,8
705	por	xmm2,xmm3
706
707
708	pand	xmm5,XMMWORD[$L$0x1c2_polynomial]
709	pxor	xmm2,xmm5
710
711
712	pshufd	xmm6,xmm2,78
713	movdqa	xmm0,xmm2
714	pxor	xmm6,xmm2
715	movdqa	xmm1,xmm0
716	pshufd	xmm3,xmm0,78
717	pxor	xmm3,xmm0
718DB	102,15,58,68,194,0
719DB	102,15,58,68,202,17
720DB	102,15,58,68,222,0
721	pxor	xmm3,xmm0
722	pxor	xmm3,xmm1
723
724	movdqa	xmm4,xmm3
725	psrldq	xmm3,8
726	pslldq	xmm4,8
727	pxor	xmm1,xmm3
728	pxor	xmm0,xmm4
729
730	movdqa	xmm4,xmm0
731	movdqa	xmm3,xmm0
732	psllq	xmm0,5
733	pxor	xmm3,xmm0
734	psllq	xmm0,1
735	pxor	xmm0,xmm3
736	psllq	xmm0,57
737	movdqa	xmm3,xmm0
738	pslldq	xmm0,8
739	psrldq	xmm3,8
740	pxor	xmm0,xmm4
741	pxor	xmm1,xmm3
742
743
744	movdqa	xmm4,xmm0
745	psrlq	xmm0,1
746	pxor	xmm1,xmm4
747	pxor	xmm4,xmm0
748	psrlq	xmm0,5
749	pxor	xmm0,xmm4
750	psrlq	xmm0,1
751	pxor	xmm0,xmm1
752	pshufd	xmm3,xmm2,78
753	pshufd	xmm4,xmm0,78
754	pxor	xmm3,xmm2
755	movdqu	XMMWORD[rcx],xmm2
756	pxor	xmm4,xmm0
757	movdqu	XMMWORD[16+rcx],xmm0
758DB	102,15,58,15,227,8
759	movdqu	XMMWORD[32+rcx],xmm4
760	movdqa	xmm1,xmm0
761	pshufd	xmm3,xmm0,78
762	pxor	xmm3,xmm0
763DB	102,15,58,68,194,0
764DB	102,15,58,68,202,17
765DB	102,15,58,68,222,0
766	pxor	xmm3,xmm0
767	pxor	xmm3,xmm1
768
769	movdqa	xmm4,xmm3
770	psrldq	xmm3,8
771	pslldq	xmm4,8
772	pxor	xmm1,xmm3
773	pxor	xmm0,xmm4
774
775	movdqa	xmm4,xmm0
776	movdqa	xmm3,xmm0
777	psllq	xmm0,5
778	pxor	xmm3,xmm0
779	psllq	xmm0,1
780	pxor	xmm0,xmm3
781	psllq	xmm0,57
782	movdqa	xmm3,xmm0
783	pslldq	xmm0,8
784	psrldq	xmm3,8
785	pxor	xmm0,xmm4
786	pxor	xmm1,xmm3
787
788
789	movdqa	xmm4,xmm0
790	psrlq	xmm0,1
791	pxor	xmm1,xmm4
792	pxor	xmm4,xmm0
793	psrlq	xmm0,5
794	pxor	xmm0,xmm4
795	psrlq	xmm0,1
796	pxor	xmm0,xmm1
797	movdqa	xmm5,xmm0
798	movdqa	xmm1,xmm0
799	pshufd	xmm3,xmm0,78
800	pxor	xmm3,xmm0
801DB	102,15,58,68,194,0
802DB	102,15,58,68,202,17
803DB	102,15,58,68,222,0
804	pxor	xmm3,xmm0
805	pxor	xmm3,xmm1
806
807	movdqa	xmm4,xmm3
808	psrldq	xmm3,8
809	pslldq	xmm4,8
810	pxor	xmm1,xmm3
811	pxor	xmm0,xmm4
812
813	movdqa	xmm4,xmm0
814	movdqa	xmm3,xmm0
815	psllq	xmm0,5
816	pxor	xmm3,xmm0
817	psllq	xmm0,1
818	pxor	xmm0,xmm3
819	psllq	xmm0,57
820	movdqa	xmm3,xmm0
821	pslldq	xmm0,8
822	psrldq	xmm3,8
823	pxor	xmm0,xmm4
824	pxor	xmm1,xmm3
825
826
827	movdqa	xmm4,xmm0
828	psrlq	xmm0,1
829	pxor	xmm1,xmm4
830	pxor	xmm4,xmm0
831	psrlq	xmm0,5
832	pxor	xmm0,xmm4
833	psrlq	xmm0,1
834	pxor	xmm0,xmm1
835	pshufd	xmm3,xmm5,78
836	pshufd	xmm4,xmm0,78
837	pxor	xmm3,xmm5
838	movdqu	XMMWORD[48+rcx],xmm5
839	pxor	xmm4,xmm0
840	movdqu	XMMWORD[64+rcx],xmm0
841DB	102,15,58,15,227,8
842	movdqu	XMMWORD[80+rcx],xmm4
843	movaps	xmm6,XMMWORD[rsp]
844	lea	rsp,[24+rsp]
845$L$SEH_end_gcm_init_clmul:
846	DB	0F3h,0C3h		;repret
847
848global	gcm_gmult_clmul
849
850ALIGN	16
851gcm_gmult_clmul:
852$L$_gmult_clmul:
853	movdqu	xmm0,XMMWORD[rcx]
854	movdqa	xmm5,XMMWORD[$L$bswap_mask]
855	movdqu	xmm2,XMMWORD[rdx]
856	movdqu	xmm4,XMMWORD[32+rdx]
857DB	102,15,56,0,197
858	movdqa	xmm1,xmm0
859	pshufd	xmm3,xmm0,78
860	pxor	xmm3,xmm0
861DB	102,15,58,68,194,0
862DB	102,15,58,68,202,17
863DB	102,15,58,68,220,0
864	pxor	xmm3,xmm0
865	pxor	xmm3,xmm1
866
867	movdqa	xmm4,xmm3
868	psrldq	xmm3,8
869	pslldq	xmm4,8
870	pxor	xmm1,xmm3
871	pxor	xmm0,xmm4
872
873	movdqa	xmm4,xmm0
874	movdqa	xmm3,xmm0
875	psllq	xmm0,5
876	pxor	xmm3,xmm0
877	psllq	xmm0,1
878	pxor	xmm0,xmm3
879	psllq	xmm0,57
880	movdqa	xmm3,xmm0
881	pslldq	xmm0,8
882	psrldq	xmm3,8
883	pxor	xmm0,xmm4
884	pxor	xmm1,xmm3
885
886
887	movdqa	xmm4,xmm0
888	psrlq	xmm0,1
889	pxor	xmm1,xmm4
890	pxor	xmm4,xmm0
891	psrlq	xmm0,5
892	pxor	xmm0,xmm4
893	psrlq	xmm0,1
894	pxor	xmm0,xmm1
895DB	102,15,56,0,197
896	movdqu	XMMWORD[rcx],xmm0
897	DB	0F3h,0C3h		;repret
898
899global	gcm_ghash_clmul
900
901ALIGN	32
902gcm_ghash_clmul:
903$L$_ghash_clmul:
904	lea	rax,[((-136))+rsp]
905$L$SEH_begin_gcm_ghash_clmul:
906
907DB	0x48,0x8d,0x60,0xe0
908DB	0x0f,0x29,0x70,0xe0
909DB	0x0f,0x29,0x78,0xf0
910DB	0x44,0x0f,0x29,0x00
911DB	0x44,0x0f,0x29,0x48,0x10
912DB	0x44,0x0f,0x29,0x50,0x20
913DB	0x44,0x0f,0x29,0x58,0x30
914DB	0x44,0x0f,0x29,0x60,0x40
915DB	0x44,0x0f,0x29,0x68,0x50
916DB	0x44,0x0f,0x29,0x70,0x60
917DB	0x44,0x0f,0x29,0x78,0x70
918	movdqa	xmm10,XMMWORD[$L$bswap_mask]
919
920	movdqu	xmm0,XMMWORD[rcx]
921	movdqu	xmm2,XMMWORD[rdx]
922	movdqu	xmm7,XMMWORD[32+rdx]
923DB	102,65,15,56,0,194
924
925	sub	r9,0x10
926	jz	NEAR $L$odd_tail
927
928	movdqu	xmm6,XMMWORD[16+rdx]
929	mov	eax,DWORD[((OPENSSL_ia32cap_P+4))]
930	cmp	r9,0x30
931	jb	NEAR $L$skip4x
932
933	and	eax,71303168
934	cmp	eax,4194304
935	je	NEAR $L$skip4x
936
937	sub	r9,0x30
938	mov	rax,0xA040608020C0E000
939	movdqu	xmm14,XMMWORD[48+rdx]
940	movdqu	xmm15,XMMWORD[64+rdx]
941
942
943
944
945	movdqu	xmm3,XMMWORD[48+r8]
946	movdqu	xmm11,XMMWORD[32+r8]
947DB	102,65,15,56,0,218
948DB	102,69,15,56,0,218
949	movdqa	xmm5,xmm3
950	pshufd	xmm4,xmm3,78
951	pxor	xmm4,xmm3
952DB	102,15,58,68,218,0
953DB	102,15,58,68,234,17
954DB	102,15,58,68,231,0
955
956	movdqa	xmm13,xmm11
957	pshufd	xmm12,xmm11,78
958	pxor	xmm12,xmm11
959DB	102,68,15,58,68,222,0
960DB	102,68,15,58,68,238,17
961DB	102,68,15,58,68,231,16
962	xorps	xmm3,xmm11
963	xorps	xmm5,xmm13
964	movups	xmm7,XMMWORD[80+rdx]
965	xorps	xmm4,xmm12
966
967	movdqu	xmm11,XMMWORD[16+r8]
968	movdqu	xmm8,XMMWORD[r8]
969DB	102,69,15,56,0,218
970DB	102,69,15,56,0,194
971	movdqa	xmm13,xmm11
972	pshufd	xmm12,xmm11,78
973	pxor	xmm0,xmm8
974	pxor	xmm12,xmm11
975DB	102,69,15,58,68,222,0
976	movdqa	xmm1,xmm0
977	pshufd	xmm8,xmm0,78
978	pxor	xmm8,xmm0
979DB	102,69,15,58,68,238,17
980DB	102,68,15,58,68,231,0
981	xorps	xmm3,xmm11
982	xorps	xmm5,xmm13
983
984	lea	r8,[64+r8]
985	sub	r9,0x40
986	jc	NEAR $L$tail4x
987
988	jmp	NEAR $L$mod4_loop
989ALIGN	32
990$L$mod4_loop:
991DB	102,65,15,58,68,199,0
992	xorps	xmm4,xmm12
993	movdqu	xmm11,XMMWORD[48+r8]
994DB	102,69,15,56,0,218
995DB	102,65,15,58,68,207,17
996	xorps	xmm0,xmm3
997	movdqu	xmm3,XMMWORD[32+r8]
998	movdqa	xmm13,xmm11
999DB	102,68,15,58,68,199,16
1000	pshufd	xmm12,xmm11,78
1001	xorps	xmm1,xmm5
1002	pxor	xmm12,xmm11
1003DB	102,65,15,56,0,218
1004	movups	xmm7,XMMWORD[32+rdx]
1005	xorps	xmm8,xmm4
1006DB	102,68,15,58,68,218,0
1007	pshufd	xmm4,xmm3,78
1008
1009	pxor	xmm8,xmm0
1010	movdqa	xmm5,xmm3
1011	pxor	xmm8,xmm1
1012	pxor	xmm4,xmm3
1013	movdqa	xmm9,xmm8
1014DB	102,68,15,58,68,234,17
1015	pslldq	xmm8,8
1016	psrldq	xmm9,8
1017	pxor	xmm0,xmm8
1018	movdqa	xmm8,XMMWORD[$L$7_mask]
1019	pxor	xmm1,xmm9
1020DB	102,76,15,110,200
1021
1022	pand	xmm8,xmm0
1023DB	102,69,15,56,0,200
1024	pxor	xmm9,xmm0
1025DB	102,68,15,58,68,231,0
1026	psllq	xmm9,57
1027	movdqa	xmm8,xmm9
1028	pslldq	xmm9,8
1029DB	102,15,58,68,222,0
1030	psrldq	xmm8,8
1031	pxor	xmm0,xmm9
1032	pxor	xmm1,xmm8
1033	movdqu	xmm8,XMMWORD[r8]
1034
1035	movdqa	xmm9,xmm0
1036	psrlq	xmm0,1
1037DB	102,15,58,68,238,17
1038	xorps	xmm3,xmm11
1039	movdqu	xmm11,XMMWORD[16+r8]
1040DB	102,69,15,56,0,218
1041DB	102,15,58,68,231,16
1042	xorps	xmm5,xmm13
1043	movups	xmm7,XMMWORD[80+rdx]
1044DB	102,69,15,56,0,194
1045	pxor	xmm1,xmm9
1046	pxor	xmm9,xmm0
1047	psrlq	xmm0,5
1048
1049	movdqa	xmm13,xmm11
1050	pxor	xmm4,xmm12
1051	pshufd	xmm12,xmm11,78
1052	pxor	xmm0,xmm9
1053	pxor	xmm1,xmm8
1054	pxor	xmm12,xmm11
1055DB	102,69,15,58,68,222,0
1056	psrlq	xmm0,1
1057	pxor	xmm0,xmm1
1058	movdqa	xmm1,xmm0
1059DB	102,69,15,58,68,238,17
1060	xorps	xmm3,xmm11
1061	pshufd	xmm8,xmm0,78
1062	pxor	xmm8,xmm0
1063
1064DB	102,68,15,58,68,231,0
1065	xorps	xmm5,xmm13
1066
1067	lea	r8,[64+r8]
1068	sub	r9,0x40
1069	jnc	NEAR $L$mod4_loop
1070
1071$L$tail4x:
1072DB	102,65,15,58,68,199,0
1073DB	102,65,15,58,68,207,17
1074DB	102,68,15,58,68,199,16
1075	xorps	xmm4,xmm12
1076	xorps	xmm0,xmm3
1077	xorps	xmm1,xmm5
1078	pxor	xmm1,xmm0
1079	pxor	xmm8,xmm4
1080
1081	pxor	xmm8,xmm1
1082	pxor	xmm1,xmm0
1083
1084	movdqa	xmm9,xmm8
1085	psrldq	xmm8,8
1086	pslldq	xmm9,8
1087	pxor	xmm1,xmm8
1088	pxor	xmm0,xmm9
1089
1090	movdqa	xmm4,xmm0
1091	movdqa	xmm3,xmm0
1092	psllq	xmm0,5
1093	pxor	xmm3,xmm0
1094	psllq	xmm0,1
1095	pxor	xmm0,xmm3
1096	psllq	xmm0,57
1097	movdqa	xmm3,xmm0
1098	pslldq	xmm0,8
1099	psrldq	xmm3,8
1100	pxor	xmm0,xmm4
1101	pxor	xmm1,xmm3
1102
1103
1104	movdqa	xmm4,xmm0
1105	psrlq	xmm0,1
1106	pxor	xmm1,xmm4
1107	pxor	xmm4,xmm0
1108	psrlq	xmm0,5
1109	pxor	xmm0,xmm4
1110	psrlq	xmm0,1
1111	pxor	xmm0,xmm1
1112	add	r9,0x40
1113	jz	NEAR $L$done
1114	movdqu	xmm7,XMMWORD[32+rdx]
1115	sub	r9,0x10
1116	jz	NEAR $L$odd_tail
1117$L$skip4x:
1118
1119
1120
1121
1122
1123	movdqu	xmm8,XMMWORD[r8]
1124	movdqu	xmm3,XMMWORD[16+r8]
1125DB	102,69,15,56,0,194
1126DB	102,65,15,56,0,218
1127	pxor	xmm0,xmm8
1128
1129	movdqa	xmm5,xmm3
1130	pshufd	xmm4,xmm3,78
1131	pxor	xmm4,xmm3
1132DB	102,15,58,68,218,0
1133DB	102,15,58,68,234,17
1134DB	102,15,58,68,231,0
1135
1136	lea	r8,[32+r8]
1137	nop
1138	sub	r9,0x20
1139	jbe	NEAR $L$even_tail
1140	nop
1141	jmp	NEAR $L$mod_loop
1142
1143ALIGN	32
1144$L$mod_loop:
1145	movdqa	xmm1,xmm0
1146	movdqa	xmm8,xmm4
1147	pshufd	xmm4,xmm0,78
1148	pxor	xmm4,xmm0
1149
1150DB	102,15,58,68,198,0
1151DB	102,15,58,68,206,17
1152DB	102,15,58,68,231,16
1153
1154	pxor	xmm0,xmm3
1155	pxor	xmm1,xmm5
1156	movdqu	xmm9,XMMWORD[r8]
1157	pxor	xmm8,xmm0
1158DB	102,69,15,56,0,202
1159	movdqu	xmm3,XMMWORD[16+r8]
1160
1161	pxor	xmm8,xmm1
1162	pxor	xmm1,xmm9
1163	pxor	xmm4,xmm8
1164DB	102,65,15,56,0,218
1165	movdqa	xmm8,xmm4
1166	psrldq	xmm8,8
1167	pslldq	xmm4,8
1168	pxor	xmm1,xmm8
1169	pxor	xmm0,xmm4
1170
1171	movdqa	xmm5,xmm3
1172
1173	movdqa	xmm9,xmm0
1174	movdqa	xmm8,xmm0
1175	psllq	xmm0,5
1176	pxor	xmm8,xmm0
1177DB	102,15,58,68,218,0
1178	psllq	xmm0,1
1179	pxor	xmm0,xmm8
1180	psllq	xmm0,57
1181	movdqa	xmm8,xmm0
1182	pslldq	xmm0,8
1183	psrldq	xmm8,8
1184	pxor	xmm0,xmm9
1185	pshufd	xmm4,xmm5,78
1186	pxor	xmm1,xmm8
1187	pxor	xmm4,xmm5
1188
1189	movdqa	xmm9,xmm0
1190	psrlq	xmm0,1
1191DB	102,15,58,68,234,17
1192	pxor	xmm1,xmm9
1193	pxor	xmm9,xmm0
1194	psrlq	xmm0,5
1195	pxor	xmm0,xmm9
1196	lea	r8,[32+r8]
1197	psrlq	xmm0,1
1198DB	102,15,58,68,231,0
1199	pxor	xmm0,xmm1
1200
1201	sub	r9,0x20
1202	ja	NEAR $L$mod_loop
1203
1204$L$even_tail:
1205	movdqa	xmm1,xmm0
1206	movdqa	xmm8,xmm4
1207	pshufd	xmm4,xmm0,78
1208	pxor	xmm4,xmm0
1209
1210DB	102,15,58,68,198,0
1211DB	102,15,58,68,206,17
1212DB	102,15,58,68,231,16
1213
1214	pxor	xmm0,xmm3
1215	pxor	xmm1,xmm5
1216	pxor	xmm8,xmm0
1217	pxor	xmm8,xmm1
1218	pxor	xmm4,xmm8
1219	movdqa	xmm8,xmm4
1220	psrldq	xmm8,8
1221	pslldq	xmm4,8
1222	pxor	xmm1,xmm8
1223	pxor	xmm0,xmm4
1224
1225	movdqa	xmm4,xmm0
1226	movdqa	xmm3,xmm0
1227	psllq	xmm0,5
1228	pxor	xmm3,xmm0
1229	psllq	xmm0,1
1230	pxor	xmm0,xmm3
1231	psllq	xmm0,57
1232	movdqa	xmm3,xmm0
1233	pslldq	xmm0,8
1234	psrldq	xmm3,8
1235	pxor	xmm0,xmm4
1236	pxor	xmm1,xmm3
1237
1238
1239	movdqa	xmm4,xmm0
1240	psrlq	xmm0,1
1241	pxor	xmm1,xmm4
1242	pxor	xmm4,xmm0
1243	psrlq	xmm0,5
1244	pxor	xmm0,xmm4
1245	psrlq	xmm0,1
1246	pxor	xmm0,xmm1
1247	test	r9,r9
1248	jnz	NEAR $L$done
1249
1250$L$odd_tail:
1251	movdqu	xmm8,XMMWORD[r8]
1252DB	102,69,15,56,0,194
1253	pxor	xmm0,xmm8
1254	movdqa	xmm1,xmm0
1255	pshufd	xmm3,xmm0,78
1256	pxor	xmm3,xmm0
1257DB	102,15,58,68,194,0
1258DB	102,15,58,68,202,17
1259DB	102,15,58,68,223,0
1260	pxor	xmm3,xmm0
1261	pxor	xmm3,xmm1
1262
1263	movdqa	xmm4,xmm3
1264	psrldq	xmm3,8
1265	pslldq	xmm4,8
1266	pxor	xmm1,xmm3
1267	pxor	xmm0,xmm4
1268
1269	movdqa	xmm4,xmm0
1270	movdqa	xmm3,xmm0
1271	psllq	xmm0,5
1272	pxor	xmm3,xmm0
1273	psllq	xmm0,1
1274	pxor	xmm0,xmm3
1275	psllq	xmm0,57
1276	movdqa	xmm3,xmm0
1277	pslldq	xmm0,8
1278	psrldq	xmm3,8
1279	pxor	xmm0,xmm4
1280	pxor	xmm1,xmm3
1281
1282
1283	movdqa	xmm4,xmm0
1284	psrlq	xmm0,1
1285	pxor	xmm1,xmm4
1286	pxor	xmm4,xmm0
1287	psrlq	xmm0,5
1288	pxor	xmm0,xmm4
1289	psrlq	xmm0,1
1290	pxor	xmm0,xmm1
1291$L$done:
1292DB	102,65,15,56,0,194
1293	movdqu	XMMWORD[rcx],xmm0
1294	movaps	xmm6,XMMWORD[rsp]
1295	movaps	xmm7,XMMWORD[16+rsp]
1296	movaps	xmm8,XMMWORD[32+rsp]
1297	movaps	xmm9,XMMWORD[48+rsp]
1298	movaps	xmm10,XMMWORD[64+rsp]
1299	movaps	xmm11,XMMWORD[80+rsp]
1300	movaps	xmm12,XMMWORD[96+rsp]
1301	movaps	xmm13,XMMWORD[112+rsp]
1302	movaps	xmm14,XMMWORD[128+rsp]
1303	movaps	xmm15,XMMWORD[144+rsp]
1304	lea	rsp,[168+rsp]
1305$L$SEH_end_gcm_ghash_clmul:
1306	DB	0F3h,0C3h		;repret
1307
1308global	gcm_init_avx
1309
1310ALIGN	32
1311gcm_init_avx:
1312	jmp	NEAR $L$_init_clmul
1313
1314global	gcm_gmult_avx
1315
1316ALIGN	32
1317gcm_gmult_avx:
1318	jmp	NEAR $L$_gmult_clmul
1319
1320global	gcm_ghash_avx
1321
1322ALIGN	32
1323gcm_ghash_avx:
1324	jmp	NEAR $L$_ghash_clmul
1325
1326ALIGN	64
1327$L$bswap_mask:
1328DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1329$L$0x1c2_polynomial:
1330DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1331$L$7_mask:
1332	DD	7,0,7,0
1333$L$7_mask_poly:
1334	DD	7,0,450,0
1335ALIGN	64
1336
1337$L$rem_4bit:
1338	DD	0,0,0,471859200,0,943718400,0,610271232
1339	DD	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1340	DD	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1341	DD	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1342
1343$L$rem_8bit:
1344	DW	0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
1345	DW	0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
1346	DW	0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
1347	DW	0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
1348	DW	0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
1349	DW	0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1350	DW	0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1351	DW	0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1352	DW	0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1353	DW	0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1354	DW	0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1355	DW	0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1356	DW	0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1357	DW	0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1358	DW	0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1359	DW	0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1360	DW	0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1361	DW	0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1362	DW	0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1363	DW	0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1364	DW	0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1365	DW	0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1366	DW	0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1367	DW	0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1368	DW	0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1369	DW	0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1370	DW	0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1371	DW	0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1372	DW	0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1373	DW	0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1374	DW	0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1375	DW	0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1376
1377DB	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
1378DB	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1379DB	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1380DB	114,103,62,0
1381ALIGN	64
1382EXTERN	__imp_RtlVirtualUnwind
1383
1384ALIGN	16
1385se_handler:
1386	push	rsi
1387	push	rdi
1388	push	rbx
1389	push	rbp
1390	push	r12
1391	push	r13
1392	push	r14
1393	push	r15
1394	pushfq
1395	sub	rsp,64
1396
1397	mov	rax,QWORD[120+r8]
1398	mov	rbx,QWORD[248+r8]
1399
1400	mov	rsi,QWORD[8+r9]
1401	mov	r11,QWORD[56+r9]
1402
1403	mov	r10d,DWORD[r11]
1404	lea	r10,[r10*1+rsi]
1405	cmp	rbx,r10
1406	jb	NEAR $L$in_prologue
1407
1408	mov	rax,QWORD[152+r8]
1409
1410	mov	r10d,DWORD[4+r11]
1411	lea	r10,[r10*1+rsi]
1412	cmp	rbx,r10
1413	jae	NEAR $L$in_prologue
1414
1415	lea	rax,[24+rax]
1416
1417	mov	rbx,QWORD[((-8))+rax]
1418	mov	rbp,QWORD[((-16))+rax]
1419	mov	r12,QWORD[((-24))+rax]
1420	mov	QWORD[144+r8],rbx
1421	mov	QWORD[160+r8],rbp
1422	mov	QWORD[216+r8],r12
1423
1424$L$in_prologue:
1425	mov	rdi,QWORD[8+rax]
1426	mov	rsi,QWORD[16+rax]
1427	mov	QWORD[152+r8],rax
1428	mov	QWORD[168+r8],rsi
1429	mov	QWORD[176+r8],rdi
1430
1431	mov	rdi,QWORD[40+r9]
1432	mov	rsi,r8
1433	mov	ecx,154
1434	DD	0xa548f3fc
1435
1436	mov	rsi,r9
1437	xor	rcx,rcx
1438	mov	rdx,QWORD[8+rsi]
1439	mov	r8,QWORD[rsi]
1440	mov	r9,QWORD[16+rsi]
1441	mov	r10,QWORD[40+rsi]
1442	lea	r11,[56+rsi]
1443	lea	r12,[24+rsi]
1444	mov	QWORD[32+rsp],r10
1445	mov	QWORD[40+rsp],r11
1446	mov	QWORD[48+rsp],r12
1447	mov	QWORD[56+rsp],rcx
1448	call	QWORD[__imp_RtlVirtualUnwind]
1449
1450	mov	eax,1
1451	add	rsp,64
1452	popfq
1453	pop	r15
1454	pop	r14
1455	pop	r13
1456	pop	r12
1457	pop	rbp
1458	pop	rbx
1459	pop	rdi
1460	pop	rsi
1461	DB	0F3h,0C3h		;repret
1462
1463
1464section	.pdata rdata align=4
1465ALIGN	4
1466	DD	$L$SEH_begin_gcm_gmult_4bit wrt ..imagebase
1467	DD	$L$SEH_end_gcm_gmult_4bit wrt ..imagebase
1468	DD	$L$SEH_info_gcm_gmult_4bit wrt ..imagebase
1469
1470	DD	$L$SEH_begin_gcm_ghash_4bit wrt ..imagebase
1471	DD	$L$SEH_end_gcm_ghash_4bit wrt ..imagebase
1472	DD	$L$SEH_info_gcm_ghash_4bit wrt ..imagebase
1473
1474	DD	$L$SEH_begin_gcm_init_clmul wrt ..imagebase
1475	DD	$L$SEH_end_gcm_init_clmul wrt ..imagebase
1476	DD	$L$SEH_info_gcm_init_clmul wrt ..imagebase
1477
1478	DD	$L$SEH_begin_gcm_ghash_clmul wrt ..imagebase
1479	DD	$L$SEH_end_gcm_ghash_clmul wrt ..imagebase
1480	DD	$L$SEH_info_gcm_ghash_clmul wrt ..imagebase
1481section	.xdata rdata align=8
1482ALIGN	8
1483$L$SEH_info_gcm_gmult_4bit:
1484DB	9,0,0,0
1485	DD	se_handler wrt ..imagebase
1486	DD	$L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imagebase
1487$L$SEH_info_gcm_ghash_4bit:
1488DB	9,0,0,0
1489	DD	se_handler wrt ..imagebase
1490	DD	$L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imagebase
1491$L$SEH_info_gcm_init_clmul:
1492DB	0x01,0x08,0x03,0x00
1493DB	0x08,0x68,0x00,0x00
1494DB	0x04,0x22,0x00,0x00
1495$L$SEH_info_gcm_ghash_clmul:
1496DB	0x01,0x33,0x16,0x00
1497DB	0x33,0xf8,0x09,0x00
1498DB	0x2e,0xe8,0x08,0x00
1499DB	0x29,0xd8,0x07,0x00
1500DB	0x24,0xc8,0x06,0x00
1501DB	0x1f,0xb8,0x05,0x00
1502DB	0x1a,0xa8,0x04,0x00
1503DB	0x15,0x98,0x03,0x00
1504DB	0x10,0x88,0x02,0x00
1505DB	0x0c,0x78,0x01,0x00
1506DB	0x08,0x68,0x00,0x00
1507DB	0x04,0x01,0x15,0x00
1508