• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8section	.text code align=64
9
10
11
12ALIGN	32
13_aesni_ctr32_ghash_6x:
14
15	vmovdqu	xmm2,XMMWORD[32+r11]
16	sub	rdx,6
17	vpxor	xmm4,xmm4,xmm4
18	vmovdqu	xmm15,XMMWORD[((0-128))+rcx]
19	vpaddb	xmm10,xmm1,xmm2
20	vpaddb	xmm11,xmm10,xmm2
21	vpaddb	xmm12,xmm11,xmm2
22	vpaddb	xmm13,xmm12,xmm2
23	vpaddb	xmm14,xmm13,xmm2
24	vpxor	xmm9,xmm1,xmm15
25	vmovdqu	XMMWORD[(16+8)+rsp],xmm4
26	jmp	NEAR $L$oop6x
27
28ALIGN	32
29$L$oop6x:
30	add	ebx,100663296
31	jc	NEAR $L$handle_ctr32
32	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
33	vpaddb	xmm1,xmm14,xmm2
34	vpxor	xmm10,xmm10,xmm15
35	vpxor	xmm11,xmm11,xmm15
36
37$L$resume_ctr32:
38	vmovdqu	XMMWORD[r8],xmm1
39	vpclmulqdq	xmm5,xmm7,xmm3,0x10
40	vpxor	xmm12,xmm12,xmm15
41	vmovups	xmm2,XMMWORD[((16-128))+rcx]
42	vpclmulqdq	xmm6,xmm7,xmm3,0x01
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60	xor	r12,r12
61	cmp	r15,r14
62
63	vaesenc	xmm9,xmm9,xmm2
64	vmovdqu	xmm0,XMMWORD[((48+8))+rsp]
65	vpxor	xmm13,xmm13,xmm15
66	vpclmulqdq	xmm1,xmm7,xmm3,0x00
67	vaesenc	xmm10,xmm10,xmm2
68	vpxor	xmm14,xmm14,xmm15
69	setnc	r12b
70	vpclmulqdq	xmm7,xmm7,xmm3,0x11
71	vaesenc	xmm11,xmm11,xmm2
72	vmovdqu	xmm3,XMMWORD[((16-32))+r9]
73	neg	r12
74	vaesenc	xmm12,xmm12,xmm2
75	vpxor	xmm6,xmm6,xmm5
76	vpclmulqdq	xmm5,xmm0,xmm3,0x00
77	vpxor	xmm8,xmm8,xmm4
78	vaesenc	xmm13,xmm13,xmm2
79	vpxor	xmm4,xmm1,xmm5
80	and	r12,0x60
81	vmovups	xmm15,XMMWORD[((32-128))+rcx]
82	vpclmulqdq	xmm1,xmm0,xmm3,0x10
83	vaesenc	xmm14,xmm14,xmm2
84
85	vpclmulqdq	xmm2,xmm0,xmm3,0x01
86	lea	r14,[r12*1+r14]
87	vaesenc	xmm9,xmm9,xmm15
88	vpxor	xmm8,xmm8,XMMWORD[((16+8))+rsp]
89	vpclmulqdq	xmm3,xmm0,xmm3,0x11
90	vmovdqu	xmm0,XMMWORD[((64+8))+rsp]
91	vaesenc	xmm10,xmm10,xmm15
92	movbe	r13,QWORD[88+r14]
93	vaesenc	xmm11,xmm11,xmm15
94	movbe	r12,QWORD[80+r14]
95	vaesenc	xmm12,xmm12,xmm15
96	mov	QWORD[((32+8))+rsp],r13
97	vaesenc	xmm13,xmm13,xmm15
98	mov	QWORD[((40+8))+rsp],r12
99	vmovdqu	xmm5,XMMWORD[((48-32))+r9]
100	vaesenc	xmm14,xmm14,xmm15
101
102	vmovups	xmm15,XMMWORD[((48-128))+rcx]
103	vpxor	xmm6,xmm6,xmm1
104	vpclmulqdq	xmm1,xmm0,xmm5,0x00
105	vaesenc	xmm9,xmm9,xmm15
106	vpxor	xmm6,xmm6,xmm2
107	vpclmulqdq	xmm2,xmm0,xmm5,0x10
108	vaesenc	xmm10,xmm10,xmm15
109	vpxor	xmm7,xmm7,xmm3
110	vpclmulqdq	xmm3,xmm0,xmm5,0x01
111	vaesenc	xmm11,xmm11,xmm15
112	vpclmulqdq	xmm5,xmm0,xmm5,0x11
113	vmovdqu	xmm0,XMMWORD[((80+8))+rsp]
114	vaesenc	xmm12,xmm12,xmm15
115	vaesenc	xmm13,xmm13,xmm15
116	vpxor	xmm4,xmm4,xmm1
117	vmovdqu	xmm1,XMMWORD[((64-32))+r9]
118	vaesenc	xmm14,xmm14,xmm15
119
120	vmovups	xmm15,XMMWORD[((64-128))+rcx]
121	vpxor	xmm6,xmm6,xmm2
122	vpclmulqdq	xmm2,xmm0,xmm1,0x00
123	vaesenc	xmm9,xmm9,xmm15
124	vpxor	xmm6,xmm6,xmm3
125	vpclmulqdq	xmm3,xmm0,xmm1,0x10
126	vaesenc	xmm10,xmm10,xmm15
127	movbe	r13,QWORD[72+r14]
128	vpxor	xmm7,xmm7,xmm5
129	vpclmulqdq	xmm5,xmm0,xmm1,0x01
130	vaesenc	xmm11,xmm11,xmm15
131	movbe	r12,QWORD[64+r14]
132	vpclmulqdq	xmm1,xmm0,xmm1,0x11
133	vmovdqu	xmm0,XMMWORD[((96+8))+rsp]
134	vaesenc	xmm12,xmm12,xmm15
135	mov	QWORD[((48+8))+rsp],r13
136	vaesenc	xmm13,xmm13,xmm15
137	mov	QWORD[((56+8))+rsp],r12
138	vpxor	xmm4,xmm4,xmm2
139	vmovdqu	xmm2,XMMWORD[((96-32))+r9]
140	vaesenc	xmm14,xmm14,xmm15
141
142	vmovups	xmm15,XMMWORD[((80-128))+rcx]
143	vpxor	xmm6,xmm6,xmm3
144	vpclmulqdq	xmm3,xmm0,xmm2,0x00
145	vaesenc	xmm9,xmm9,xmm15
146	vpxor	xmm6,xmm6,xmm5
147	vpclmulqdq	xmm5,xmm0,xmm2,0x10
148	vaesenc	xmm10,xmm10,xmm15
149	movbe	r13,QWORD[56+r14]
150	vpxor	xmm7,xmm7,xmm1
151	vpclmulqdq	xmm1,xmm0,xmm2,0x01
152	vpxor	xmm8,xmm8,XMMWORD[((112+8))+rsp]
153	vaesenc	xmm11,xmm11,xmm15
154	movbe	r12,QWORD[48+r14]
155	vpclmulqdq	xmm2,xmm0,xmm2,0x11
156	vaesenc	xmm12,xmm12,xmm15
157	mov	QWORD[((64+8))+rsp],r13
158	vaesenc	xmm13,xmm13,xmm15
159	mov	QWORD[((72+8))+rsp],r12
160	vpxor	xmm4,xmm4,xmm3
161	vmovdqu	xmm3,XMMWORD[((112-32))+r9]
162	vaesenc	xmm14,xmm14,xmm15
163
164	vmovups	xmm15,XMMWORD[((96-128))+rcx]
165	vpxor	xmm6,xmm6,xmm5
166	vpclmulqdq	xmm5,xmm8,xmm3,0x10
167	vaesenc	xmm9,xmm9,xmm15
168	vpxor	xmm6,xmm6,xmm1
169	vpclmulqdq	xmm1,xmm8,xmm3,0x01
170	vaesenc	xmm10,xmm10,xmm15
171	movbe	r13,QWORD[40+r14]
172	vpxor	xmm7,xmm7,xmm2
173	vpclmulqdq	xmm2,xmm8,xmm3,0x00
174	vaesenc	xmm11,xmm11,xmm15
175	movbe	r12,QWORD[32+r14]
176	vpclmulqdq	xmm8,xmm8,xmm3,0x11
177	vaesenc	xmm12,xmm12,xmm15
178	mov	QWORD[((80+8))+rsp],r13
179	vaesenc	xmm13,xmm13,xmm15
180	mov	QWORD[((88+8))+rsp],r12
181	vpxor	xmm6,xmm6,xmm5
182	vaesenc	xmm14,xmm14,xmm15
183	vpxor	xmm6,xmm6,xmm1
184
185	vmovups	xmm15,XMMWORD[((112-128))+rcx]
186	vpslldq	xmm5,xmm6,8
187	vpxor	xmm4,xmm4,xmm2
188	vmovdqu	xmm3,XMMWORD[16+r11]
189
190	vaesenc	xmm9,xmm9,xmm15
191	vpxor	xmm7,xmm7,xmm8
192	vaesenc	xmm10,xmm10,xmm15
193	vpxor	xmm4,xmm4,xmm5
194	movbe	r13,QWORD[24+r14]
195	vaesenc	xmm11,xmm11,xmm15
196	movbe	r12,QWORD[16+r14]
197	vpalignr	xmm0,xmm4,xmm4,8
198	vpclmulqdq	xmm4,xmm4,xmm3,0x10
199	mov	QWORD[((96+8))+rsp],r13
200	vaesenc	xmm12,xmm12,xmm15
201	mov	QWORD[((104+8))+rsp],r12
202	vaesenc	xmm13,xmm13,xmm15
203	vmovups	xmm1,XMMWORD[((128-128))+rcx]
204	vaesenc	xmm14,xmm14,xmm15
205
206	vaesenc	xmm9,xmm9,xmm1
207	vmovups	xmm15,XMMWORD[((144-128))+rcx]
208	vaesenc	xmm10,xmm10,xmm1
209	vpsrldq	xmm6,xmm6,8
210	vaesenc	xmm11,xmm11,xmm1
211	vpxor	xmm7,xmm7,xmm6
212	vaesenc	xmm12,xmm12,xmm1
213	vpxor	xmm4,xmm4,xmm0
214	movbe	r13,QWORD[8+r14]
215	vaesenc	xmm13,xmm13,xmm1
216	movbe	r12,QWORD[r14]
217	vaesenc	xmm14,xmm14,xmm1
218	vmovups	xmm1,XMMWORD[((160-128))+rcx]
219	cmp	ebp,11
220	jb	NEAR $L$enc_tail
221
222	vaesenc	xmm9,xmm9,xmm15
223	vaesenc	xmm10,xmm10,xmm15
224	vaesenc	xmm11,xmm11,xmm15
225	vaesenc	xmm12,xmm12,xmm15
226	vaesenc	xmm13,xmm13,xmm15
227	vaesenc	xmm14,xmm14,xmm15
228
229	vaesenc	xmm9,xmm9,xmm1
230	vaesenc	xmm10,xmm10,xmm1
231	vaesenc	xmm11,xmm11,xmm1
232	vaesenc	xmm12,xmm12,xmm1
233	vaesenc	xmm13,xmm13,xmm1
234	vmovups	xmm15,XMMWORD[((176-128))+rcx]
235	vaesenc	xmm14,xmm14,xmm1
236	vmovups	xmm1,XMMWORD[((192-128))+rcx]
237
238
239	vaesenc	xmm9,xmm9,xmm15
240	vaesenc	xmm10,xmm10,xmm15
241	vaesenc	xmm11,xmm11,xmm15
242	vaesenc	xmm12,xmm12,xmm15
243	vaesenc	xmm13,xmm13,xmm15
244	vaesenc	xmm14,xmm14,xmm15
245
246	vaesenc	xmm9,xmm9,xmm1
247	vaesenc	xmm10,xmm10,xmm1
248	vaesenc	xmm11,xmm11,xmm1
249	vaesenc	xmm12,xmm12,xmm1
250	vaesenc	xmm13,xmm13,xmm1
251	vmovups	xmm15,XMMWORD[((208-128))+rcx]
252	vaesenc	xmm14,xmm14,xmm1
253	vmovups	xmm1,XMMWORD[((224-128))+rcx]
254	jmp	NEAR $L$enc_tail
255
256ALIGN	32
257$L$handle_ctr32:
258	vmovdqu	xmm0,XMMWORD[r11]
259	vpshufb	xmm6,xmm1,xmm0
260	vmovdqu	xmm5,XMMWORD[48+r11]
261	vpaddd	xmm10,xmm6,XMMWORD[64+r11]
262	vpaddd	xmm11,xmm6,xmm5
263	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
264	vpaddd	xmm12,xmm10,xmm5
265	vpshufb	xmm10,xmm10,xmm0
266	vpaddd	xmm13,xmm11,xmm5
267	vpshufb	xmm11,xmm11,xmm0
268	vpxor	xmm10,xmm10,xmm15
269	vpaddd	xmm14,xmm12,xmm5
270	vpshufb	xmm12,xmm12,xmm0
271	vpxor	xmm11,xmm11,xmm15
272	vpaddd	xmm1,xmm13,xmm5
273	vpshufb	xmm13,xmm13,xmm0
274	vpshufb	xmm14,xmm14,xmm0
275	vpshufb	xmm1,xmm1,xmm0
276	jmp	NEAR $L$resume_ctr32
277
278ALIGN	32
279$L$enc_tail:
280	vaesenc	xmm9,xmm9,xmm15
281	vmovdqu	XMMWORD[(16+8)+rsp],xmm7
282	vpalignr	xmm8,xmm4,xmm4,8
283	vaesenc	xmm10,xmm10,xmm15
284	vpclmulqdq	xmm4,xmm4,xmm3,0x10
285	vpxor	xmm2,xmm1,XMMWORD[rdi]
286	vaesenc	xmm11,xmm11,xmm15
287	vpxor	xmm0,xmm1,XMMWORD[16+rdi]
288	vaesenc	xmm12,xmm12,xmm15
289	vpxor	xmm5,xmm1,XMMWORD[32+rdi]
290	vaesenc	xmm13,xmm13,xmm15
291	vpxor	xmm6,xmm1,XMMWORD[48+rdi]
292	vaesenc	xmm14,xmm14,xmm15
293	vpxor	xmm7,xmm1,XMMWORD[64+rdi]
294	vpxor	xmm3,xmm1,XMMWORD[80+rdi]
295	vmovdqu	xmm1,XMMWORD[r8]
296
297	vaesenclast	xmm9,xmm9,xmm2
298	vmovdqu	xmm2,XMMWORD[32+r11]
299	vaesenclast	xmm10,xmm10,xmm0
300	vpaddb	xmm0,xmm1,xmm2
301	mov	QWORD[((112+8))+rsp],r13
302	lea	rdi,[96+rdi]
303	vaesenclast	xmm11,xmm11,xmm5
304	vpaddb	xmm5,xmm0,xmm2
305	mov	QWORD[((120+8))+rsp],r12
306	lea	rsi,[96+rsi]
307	vmovdqu	xmm15,XMMWORD[((0-128))+rcx]
308	vaesenclast	xmm12,xmm12,xmm6
309	vpaddb	xmm6,xmm5,xmm2
310	vaesenclast	xmm13,xmm13,xmm7
311	vpaddb	xmm7,xmm6,xmm2
312	vaesenclast	xmm14,xmm14,xmm3
313	vpaddb	xmm3,xmm7,xmm2
314
315	add	r10,0x60
316	sub	rdx,0x6
317	jc	NEAR $L$6x_done
318
319	vmovups	XMMWORD[(-96)+rsi],xmm9
320	vpxor	xmm9,xmm1,xmm15
321	vmovups	XMMWORD[(-80)+rsi],xmm10
322	vmovdqa	xmm10,xmm0
323	vmovups	XMMWORD[(-64)+rsi],xmm11
324	vmovdqa	xmm11,xmm5
325	vmovups	XMMWORD[(-48)+rsi],xmm12
326	vmovdqa	xmm12,xmm6
327	vmovups	XMMWORD[(-32)+rsi],xmm13
328	vmovdqa	xmm13,xmm7
329	vmovups	XMMWORD[(-16)+rsi],xmm14
330	vmovdqa	xmm14,xmm3
331	vmovdqu	xmm7,XMMWORD[((32+8))+rsp]
332	jmp	NEAR $L$oop6x
333
334$L$6x_done:
335	vpxor	xmm8,xmm8,XMMWORD[((16+8))+rsp]
336	vpxor	xmm8,xmm8,xmm4
337
338	DB	0F3h,0C3h		;repret
339
340
341global	GFp_aesni_gcm_decrypt
342
343ALIGN	32
344GFp_aesni_gcm_decrypt:
345	mov	QWORD[8+rsp],rdi	;WIN64 prologue
346	mov	QWORD[16+rsp],rsi
347	mov	rax,rsp
348$L$SEH_begin_GFp_aesni_gcm_decrypt:
349	mov	rdi,rcx
350	mov	rsi,rdx
351	mov	rdx,r8
352	mov	rcx,r9
353	mov	r8,QWORD[40+rsp]
354	mov	r9,QWORD[48+rsp]
355
356
357
358	xor	r10,r10
359
360
361
362	cmp	rdx,0x60
363	jb	NEAR $L$gcm_dec_abort
364
365	lea	rax,[rsp]
366
367	push	rbx
368
369	push	rbp
370
371	push	r12
372
373	push	r13
374
375	push	r14
376
377	push	r15
378
379	lea	rsp,[((-168))+rsp]
380	movaps	XMMWORD[(-216)+rax],xmm6
381	movaps	XMMWORD[(-200)+rax],xmm7
382	movaps	XMMWORD[(-184)+rax],xmm8
383	movaps	XMMWORD[(-168)+rax],xmm9
384	movaps	XMMWORD[(-152)+rax],xmm10
385	movaps	XMMWORD[(-136)+rax],xmm11
386	movaps	XMMWORD[(-120)+rax],xmm12
387	movaps	XMMWORD[(-104)+rax],xmm13
388	movaps	XMMWORD[(-88)+rax],xmm14
389	movaps	XMMWORD[(-72)+rax],xmm15
390$L$gcm_dec_body:
391	vzeroupper
392
393	vmovdqu	xmm1,XMMWORD[r8]
394	add	rsp,-128
395	mov	ebx,DWORD[12+r8]
396	lea	r11,[$L$bswap_mask]
397	lea	r14,[((-128))+rcx]
398	mov	r15,0xf80
399	vmovdqu	xmm8,XMMWORD[r9]
400	and	rsp,-128
401	vmovdqu	xmm0,XMMWORD[r11]
402	lea	rcx,[128+rcx]
403	lea	r9,[((32+32))+r9]
404	mov	ebp,DWORD[((240-128))+rcx]
405	vpshufb	xmm8,xmm8,xmm0
406
407	and	r14,r15
408	and	r15,rsp
409	sub	r15,r14
410	jc	NEAR $L$dec_no_key_aliasing
411	cmp	r15,768
412	jnc	NEAR $L$dec_no_key_aliasing
413	sub	rsp,r15
414$L$dec_no_key_aliasing:
415
416	vmovdqu	xmm7,XMMWORD[80+rdi]
417	lea	r14,[rdi]
418	vmovdqu	xmm4,XMMWORD[64+rdi]
419
420
421
422
423
424
425
426	lea	r15,[((-192))+rdx*1+rdi]
427
428	vmovdqu	xmm5,XMMWORD[48+rdi]
429	shr	rdx,4
430	xor	r10,r10
431	vmovdqu	xmm6,XMMWORD[32+rdi]
432	vpshufb	xmm7,xmm7,xmm0
433	vmovdqu	xmm2,XMMWORD[16+rdi]
434	vpshufb	xmm4,xmm4,xmm0
435	vmovdqu	xmm3,XMMWORD[rdi]
436	vpshufb	xmm5,xmm5,xmm0
437	vmovdqu	XMMWORD[48+rsp],xmm4
438	vpshufb	xmm6,xmm6,xmm0
439	vmovdqu	XMMWORD[64+rsp],xmm5
440	vpshufb	xmm2,xmm2,xmm0
441	vmovdqu	XMMWORD[80+rsp],xmm6
442	vpshufb	xmm3,xmm3,xmm0
443	vmovdqu	XMMWORD[96+rsp],xmm2
444	vmovdqu	XMMWORD[112+rsp],xmm3
445
446	call	_aesni_ctr32_ghash_6x
447
448	vmovups	XMMWORD[(-96)+rsi],xmm9
449	vmovups	XMMWORD[(-80)+rsi],xmm10
450	vmovups	XMMWORD[(-64)+rsi],xmm11
451	vmovups	XMMWORD[(-48)+rsi],xmm12
452	vmovups	XMMWORD[(-32)+rsi],xmm13
453	vmovups	XMMWORD[(-16)+rsi],xmm14
454
455	vpshufb	xmm8,xmm8,XMMWORD[r11]
456	vmovdqu	XMMWORD[(-64)+r9],xmm8
457
458	vzeroupper
459	movaps	xmm6,XMMWORD[((-216))+rax]
460	movaps	xmm7,XMMWORD[((-200))+rax]
461	movaps	xmm8,XMMWORD[((-184))+rax]
462	movaps	xmm9,XMMWORD[((-168))+rax]
463	movaps	xmm10,XMMWORD[((-152))+rax]
464	movaps	xmm11,XMMWORD[((-136))+rax]
465	movaps	xmm12,XMMWORD[((-120))+rax]
466	movaps	xmm13,XMMWORD[((-104))+rax]
467	movaps	xmm14,XMMWORD[((-88))+rax]
468	movaps	xmm15,XMMWORD[((-72))+rax]
469	mov	r15,QWORD[((-48))+rax]
470
471	mov	r14,QWORD[((-40))+rax]
472
473	mov	r13,QWORD[((-32))+rax]
474
475	mov	r12,QWORD[((-24))+rax]
476
477	mov	rbp,QWORD[((-16))+rax]
478
479	mov	rbx,QWORD[((-8))+rax]
480
481	lea	rsp,[rax]
482
483$L$gcm_dec_abort:
484	mov	rax,r10
485	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
486	mov	rsi,QWORD[16+rsp]
487	DB	0F3h,0C3h		;repret
488
489$L$SEH_end_GFp_aesni_gcm_decrypt:
490
491ALIGN	32
492_aesni_ctr32_6x:
493
494	vmovdqu	xmm4,XMMWORD[((0-128))+rcx]
495	vmovdqu	xmm2,XMMWORD[32+r11]
496	lea	r13,[((-1))+rbp]
497	vmovups	xmm15,XMMWORD[((16-128))+rcx]
498	lea	r12,[((32-128))+rcx]
499	vpxor	xmm9,xmm1,xmm4
500	add	ebx,100663296
501	jc	NEAR $L$handle_ctr32_2
502	vpaddb	xmm10,xmm1,xmm2
503	vpaddb	xmm11,xmm10,xmm2
504	vpxor	xmm10,xmm10,xmm4
505	vpaddb	xmm12,xmm11,xmm2
506	vpxor	xmm11,xmm11,xmm4
507	vpaddb	xmm13,xmm12,xmm2
508	vpxor	xmm12,xmm12,xmm4
509	vpaddb	xmm14,xmm13,xmm2
510	vpxor	xmm13,xmm13,xmm4
511	vpaddb	xmm1,xmm14,xmm2
512	vpxor	xmm14,xmm14,xmm4
513	jmp	NEAR $L$oop_ctr32
514
515ALIGN	16
516$L$oop_ctr32:
517	vaesenc	xmm9,xmm9,xmm15
518	vaesenc	xmm10,xmm10,xmm15
519	vaesenc	xmm11,xmm11,xmm15
520	vaesenc	xmm12,xmm12,xmm15
521	vaesenc	xmm13,xmm13,xmm15
522	vaesenc	xmm14,xmm14,xmm15
523	vmovups	xmm15,XMMWORD[r12]
524	lea	r12,[16+r12]
525	dec	r13d
526	jnz	NEAR $L$oop_ctr32
527
528	vmovdqu	xmm3,XMMWORD[r12]
529	vaesenc	xmm9,xmm9,xmm15
530	vpxor	xmm4,xmm3,XMMWORD[rdi]
531	vaesenc	xmm10,xmm10,xmm15
532	vpxor	xmm5,xmm3,XMMWORD[16+rdi]
533	vaesenc	xmm11,xmm11,xmm15
534	vpxor	xmm6,xmm3,XMMWORD[32+rdi]
535	vaesenc	xmm12,xmm12,xmm15
536	vpxor	xmm8,xmm3,XMMWORD[48+rdi]
537	vaesenc	xmm13,xmm13,xmm15
538	vpxor	xmm2,xmm3,XMMWORD[64+rdi]
539	vaesenc	xmm14,xmm14,xmm15
540	vpxor	xmm3,xmm3,XMMWORD[80+rdi]
541	lea	rdi,[96+rdi]
542
543	vaesenclast	xmm9,xmm9,xmm4
544	vaesenclast	xmm10,xmm10,xmm5
545	vaesenclast	xmm11,xmm11,xmm6
546	vaesenclast	xmm12,xmm12,xmm8
547	vaesenclast	xmm13,xmm13,xmm2
548	vaesenclast	xmm14,xmm14,xmm3
549	vmovups	XMMWORD[rsi],xmm9
550	vmovups	XMMWORD[16+rsi],xmm10
551	vmovups	XMMWORD[32+rsi],xmm11
552	vmovups	XMMWORD[48+rsi],xmm12
553	vmovups	XMMWORD[64+rsi],xmm13
554	vmovups	XMMWORD[80+rsi],xmm14
555	lea	rsi,[96+rsi]
556
557	DB	0F3h,0C3h		;repret
558ALIGN	32
559$L$handle_ctr32_2:
560	vpshufb	xmm6,xmm1,xmm0
561	vmovdqu	xmm5,XMMWORD[48+r11]
562	vpaddd	xmm10,xmm6,XMMWORD[64+r11]
563	vpaddd	xmm11,xmm6,xmm5
564	vpaddd	xmm12,xmm10,xmm5
565	vpshufb	xmm10,xmm10,xmm0
566	vpaddd	xmm13,xmm11,xmm5
567	vpshufb	xmm11,xmm11,xmm0
568	vpxor	xmm10,xmm10,xmm4
569	vpaddd	xmm14,xmm12,xmm5
570	vpshufb	xmm12,xmm12,xmm0
571	vpxor	xmm11,xmm11,xmm4
572	vpaddd	xmm1,xmm13,xmm5
573	vpshufb	xmm13,xmm13,xmm0
574	vpxor	xmm12,xmm12,xmm4
575	vpshufb	xmm14,xmm14,xmm0
576	vpxor	xmm13,xmm13,xmm4
577	vpshufb	xmm1,xmm1,xmm0
578	vpxor	xmm14,xmm14,xmm4
579	jmp	NEAR $L$oop_ctr32
580
581
582
583global	GFp_aesni_gcm_encrypt
584
585ALIGN	32
586GFp_aesni_gcm_encrypt:
587	mov	QWORD[8+rsp],rdi	;WIN64 prologue
588	mov	QWORD[16+rsp],rsi
589	mov	rax,rsp
590$L$SEH_begin_GFp_aesni_gcm_encrypt:
591	mov	rdi,rcx
592	mov	rsi,rdx
593	mov	rdx,r8
594	mov	rcx,r9
595	mov	r8,QWORD[40+rsp]
596	mov	r9,QWORD[48+rsp]
597
598
599
600	xor	r10,r10
601
602
603
604
605	cmp	rdx,0x60*3
606	jb	NEAR $L$gcm_enc_abort
607
608	lea	rax,[rsp]
609
610	push	rbx
611
612	push	rbp
613
614	push	r12
615
616	push	r13
617
618	push	r14
619
620	push	r15
621
622	lea	rsp,[((-168))+rsp]
623	movaps	XMMWORD[(-216)+rax],xmm6
624	movaps	XMMWORD[(-200)+rax],xmm7
625	movaps	XMMWORD[(-184)+rax],xmm8
626	movaps	XMMWORD[(-168)+rax],xmm9
627	movaps	XMMWORD[(-152)+rax],xmm10
628	movaps	XMMWORD[(-136)+rax],xmm11
629	movaps	XMMWORD[(-120)+rax],xmm12
630	movaps	XMMWORD[(-104)+rax],xmm13
631	movaps	XMMWORD[(-88)+rax],xmm14
632	movaps	XMMWORD[(-72)+rax],xmm15
633$L$gcm_enc_body:
634	vzeroupper
635
636	vmovdqu	xmm1,XMMWORD[r8]
637	add	rsp,-128
638	mov	ebx,DWORD[12+r8]
639	lea	r11,[$L$bswap_mask]
640	lea	r14,[((-128))+rcx]
641	mov	r15,0xf80
642	lea	rcx,[128+rcx]
643	vmovdqu	xmm0,XMMWORD[r11]
644	and	rsp,-128
645	mov	ebp,DWORD[((240-128))+rcx]
646
647	and	r14,r15
648	and	r15,rsp
649	sub	r15,r14
650	jc	NEAR $L$enc_no_key_aliasing
651	cmp	r15,768
652	jnc	NEAR $L$enc_no_key_aliasing
653	sub	rsp,r15
654$L$enc_no_key_aliasing:
655
656	lea	r14,[rsi]
657
658
659
660
661
662
663
664
665	lea	r15,[((-192))+rdx*1+rsi]
666
667	shr	rdx,4
668
669	call	_aesni_ctr32_6x
670	vpshufb	xmm8,xmm9,xmm0
671	vpshufb	xmm2,xmm10,xmm0
672	vmovdqu	XMMWORD[112+rsp],xmm8
673	vpshufb	xmm4,xmm11,xmm0
674	vmovdqu	XMMWORD[96+rsp],xmm2
675	vpshufb	xmm5,xmm12,xmm0
676	vmovdqu	XMMWORD[80+rsp],xmm4
677	vpshufb	xmm6,xmm13,xmm0
678	vmovdqu	XMMWORD[64+rsp],xmm5
679	vpshufb	xmm7,xmm14,xmm0
680	vmovdqu	XMMWORD[48+rsp],xmm6
681
682	call	_aesni_ctr32_6x
683
684	vmovdqu	xmm8,XMMWORD[r9]
685	lea	r9,[((32+32))+r9]
686	sub	rdx,12
687	mov	r10,0x60*2
688	vpshufb	xmm8,xmm8,xmm0
689
690	call	_aesni_ctr32_ghash_6x
691	vmovdqu	xmm7,XMMWORD[32+rsp]
692	vmovdqu	xmm0,XMMWORD[r11]
693	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
694	vpunpckhqdq	xmm1,xmm7,xmm7
695	vmovdqu	xmm15,XMMWORD[((32-32))+r9]
696	vmovups	XMMWORD[(-96)+rsi],xmm9
697	vpshufb	xmm9,xmm9,xmm0
698	vpxor	xmm1,xmm1,xmm7
699	vmovups	XMMWORD[(-80)+rsi],xmm10
700	vpshufb	xmm10,xmm10,xmm0
701	vmovups	XMMWORD[(-64)+rsi],xmm11
702	vpshufb	xmm11,xmm11,xmm0
703	vmovups	XMMWORD[(-48)+rsi],xmm12
704	vpshufb	xmm12,xmm12,xmm0
705	vmovups	XMMWORD[(-32)+rsi],xmm13
706	vpshufb	xmm13,xmm13,xmm0
707	vmovups	XMMWORD[(-16)+rsi],xmm14
708	vpshufb	xmm14,xmm14,xmm0
709	vmovdqu	XMMWORD[16+rsp],xmm9
710	vmovdqu	xmm6,XMMWORD[48+rsp]
711	vmovdqu	xmm0,XMMWORD[((16-32))+r9]
712	vpunpckhqdq	xmm2,xmm6,xmm6
713	vpclmulqdq	xmm5,xmm7,xmm3,0x00
714	vpxor	xmm2,xmm2,xmm6
715	vpclmulqdq	xmm7,xmm7,xmm3,0x11
716	vpclmulqdq	xmm1,xmm1,xmm15,0x00
717
718	vmovdqu	xmm9,XMMWORD[64+rsp]
719	vpclmulqdq	xmm4,xmm6,xmm0,0x00
720	vmovdqu	xmm3,XMMWORD[((48-32))+r9]
721	vpxor	xmm4,xmm4,xmm5
722	vpunpckhqdq	xmm5,xmm9,xmm9
723	vpclmulqdq	xmm6,xmm6,xmm0,0x11
724	vpxor	xmm5,xmm5,xmm9
725	vpxor	xmm6,xmm6,xmm7
726	vpclmulqdq	xmm2,xmm2,xmm15,0x10
727	vmovdqu	xmm15,XMMWORD[((80-32))+r9]
728	vpxor	xmm2,xmm2,xmm1
729
730	vmovdqu	xmm1,XMMWORD[80+rsp]
731	vpclmulqdq	xmm7,xmm9,xmm3,0x00
732	vmovdqu	xmm0,XMMWORD[((64-32))+r9]
733	vpxor	xmm7,xmm7,xmm4
734	vpunpckhqdq	xmm4,xmm1,xmm1
735	vpclmulqdq	xmm9,xmm9,xmm3,0x11
736	vpxor	xmm4,xmm4,xmm1
737	vpxor	xmm9,xmm9,xmm6
738	vpclmulqdq	xmm5,xmm5,xmm15,0x00
739	vpxor	xmm5,xmm5,xmm2
740
741	vmovdqu	xmm2,XMMWORD[96+rsp]
742	vpclmulqdq	xmm6,xmm1,xmm0,0x00
743	vmovdqu	xmm3,XMMWORD[((96-32))+r9]
744	vpxor	xmm6,xmm6,xmm7
745	vpunpckhqdq	xmm7,xmm2,xmm2
746	vpclmulqdq	xmm1,xmm1,xmm0,0x11
747	vpxor	xmm7,xmm7,xmm2
748	vpxor	xmm1,xmm1,xmm9
749	vpclmulqdq	xmm4,xmm4,xmm15,0x10
750	vmovdqu	xmm15,XMMWORD[((128-32))+r9]
751	vpxor	xmm4,xmm4,xmm5
752
753	vpxor	xmm8,xmm8,XMMWORD[112+rsp]
754	vpclmulqdq	xmm5,xmm2,xmm3,0x00
755	vmovdqu	xmm0,XMMWORD[((112-32))+r9]
756	vpunpckhqdq	xmm9,xmm8,xmm8
757	vpxor	xmm5,xmm5,xmm6
758	vpclmulqdq	xmm2,xmm2,xmm3,0x11
759	vpxor	xmm9,xmm9,xmm8
760	vpxor	xmm2,xmm2,xmm1
761	vpclmulqdq	xmm7,xmm7,xmm15,0x00
762	vpxor	xmm4,xmm7,xmm4
763
764	vpclmulqdq	xmm6,xmm8,xmm0,0x00
765	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
766	vpunpckhqdq	xmm1,xmm14,xmm14
767	vpclmulqdq	xmm8,xmm8,xmm0,0x11
768	vpxor	xmm1,xmm1,xmm14
769	vpxor	xmm5,xmm6,xmm5
770	vpclmulqdq	xmm9,xmm9,xmm15,0x10
771	vmovdqu	xmm15,XMMWORD[((32-32))+r9]
772	vpxor	xmm7,xmm8,xmm2
773	vpxor	xmm6,xmm9,xmm4
774
775	vmovdqu	xmm0,XMMWORD[((16-32))+r9]
776	vpxor	xmm9,xmm7,xmm5
777	vpclmulqdq	xmm4,xmm14,xmm3,0x00
778	vpxor	xmm6,xmm6,xmm9
779	vpunpckhqdq	xmm2,xmm13,xmm13
780	vpclmulqdq	xmm14,xmm14,xmm3,0x11
781	vpxor	xmm2,xmm2,xmm13
782	vpslldq	xmm9,xmm6,8
783	vpclmulqdq	xmm1,xmm1,xmm15,0x00
784	vpxor	xmm8,xmm5,xmm9
785	vpsrldq	xmm6,xmm6,8
786	vpxor	xmm7,xmm7,xmm6
787
788	vpclmulqdq	xmm5,xmm13,xmm0,0x00
789	vmovdqu	xmm3,XMMWORD[((48-32))+r9]
790	vpxor	xmm5,xmm5,xmm4
791	vpunpckhqdq	xmm9,xmm12,xmm12
792	vpclmulqdq	xmm13,xmm13,xmm0,0x11
793	vpxor	xmm9,xmm9,xmm12
794	vpxor	xmm13,xmm13,xmm14
795	vpalignr	xmm14,xmm8,xmm8,8
796	vpclmulqdq	xmm2,xmm2,xmm15,0x10
797	vmovdqu	xmm15,XMMWORD[((80-32))+r9]
798	vpxor	xmm2,xmm2,xmm1
799
800	vpclmulqdq	xmm4,xmm12,xmm3,0x00
801	vmovdqu	xmm0,XMMWORD[((64-32))+r9]
802	vpxor	xmm4,xmm4,xmm5
803	vpunpckhqdq	xmm1,xmm11,xmm11
804	vpclmulqdq	xmm12,xmm12,xmm3,0x11
805	vpxor	xmm1,xmm1,xmm11
806	vpxor	xmm12,xmm12,xmm13
807	vxorps	xmm7,xmm7,XMMWORD[16+rsp]
808	vpclmulqdq	xmm9,xmm9,xmm15,0x00
809	vpxor	xmm9,xmm9,xmm2
810
811	vpclmulqdq	xmm8,xmm8,XMMWORD[16+r11],0x10
812	vxorps	xmm8,xmm8,xmm14
813
814	vpclmulqdq	xmm5,xmm11,xmm0,0x00
815	vmovdqu	xmm3,XMMWORD[((96-32))+r9]
816	vpxor	xmm5,xmm5,xmm4
817	vpunpckhqdq	xmm2,xmm10,xmm10
818	vpclmulqdq	xmm11,xmm11,xmm0,0x11
819	vpxor	xmm2,xmm2,xmm10
820	vpalignr	xmm14,xmm8,xmm8,8
821	vpxor	xmm11,xmm11,xmm12
822	vpclmulqdq	xmm1,xmm1,xmm15,0x10
823	vmovdqu	xmm15,XMMWORD[((128-32))+r9]
824	vpxor	xmm1,xmm1,xmm9
825
826	vxorps	xmm14,xmm14,xmm7
827	vpclmulqdq	xmm8,xmm8,XMMWORD[16+r11],0x10
828	vxorps	xmm8,xmm8,xmm14
829
830	vpclmulqdq	xmm4,xmm10,xmm3,0x00
831	vmovdqu	xmm0,XMMWORD[((112-32))+r9]
832	vpxor	xmm4,xmm4,xmm5
833	vpunpckhqdq	xmm9,xmm8,xmm8
834	vpclmulqdq	xmm10,xmm10,xmm3,0x11
835	vpxor	xmm9,xmm9,xmm8
836	vpxor	xmm10,xmm10,xmm11
837	vpclmulqdq	xmm2,xmm2,xmm15,0x00
838	vpxor	xmm2,xmm2,xmm1
839
840	vpclmulqdq	xmm5,xmm8,xmm0,0x00
841	vpclmulqdq	xmm7,xmm8,xmm0,0x11
842	vpxor	xmm5,xmm5,xmm4
843	vpclmulqdq	xmm6,xmm9,xmm15,0x10
844	vpxor	xmm7,xmm7,xmm10
845	vpxor	xmm6,xmm6,xmm2
846
847	vpxor	xmm4,xmm7,xmm5
848	vpxor	xmm6,xmm6,xmm4
849	vpslldq	xmm1,xmm6,8
850	vmovdqu	xmm3,XMMWORD[16+r11]
851	vpsrldq	xmm6,xmm6,8
852	vpxor	xmm8,xmm5,xmm1
853	vpxor	xmm7,xmm7,xmm6
854
855	vpalignr	xmm2,xmm8,xmm8,8
856	vpclmulqdq	xmm8,xmm8,xmm3,0x10
857	vpxor	xmm8,xmm8,xmm2
858
859	vpalignr	xmm2,xmm8,xmm8,8
860	vpclmulqdq	xmm8,xmm8,xmm3,0x10
861	vpxor	xmm2,xmm2,xmm7
862	vpxor	xmm8,xmm8,xmm2
863	vpshufb	xmm8,xmm8,XMMWORD[r11]
864	vmovdqu	XMMWORD[(-64)+r9],xmm8
865
866	vzeroupper
867	movaps	xmm6,XMMWORD[((-216))+rax]
868	movaps	xmm7,XMMWORD[((-200))+rax]
869	movaps	xmm8,XMMWORD[((-184))+rax]
870	movaps	xmm9,XMMWORD[((-168))+rax]
871	movaps	xmm10,XMMWORD[((-152))+rax]
872	movaps	xmm11,XMMWORD[((-136))+rax]
873	movaps	xmm12,XMMWORD[((-120))+rax]
874	movaps	xmm13,XMMWORD[((-104))+rax]
875	movaps	xmm14,XMMWORD[((-88))+rax]
876	movaps	xmm15,XMMWORD[((-72))+rax]
877	mov	r15,QWORD[((-48))+rax]
878
879	mov	r14,QWORD[((-40))+rax]
880
881	mov	r13,QWORD[((-32))+rax]
882
883	mov	r12,QWORD[((-24))+rax]
884
885	mov	rbp,QWORD[((-16))+rax]
886
887	mov	rbx,QWORD[((-8))+rax]
888
889	lea	rsp,[rax]
890
891$L$gcm_enc_abort:
892	mov	rax,r10
893	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
894	mov	rsi,QWORD[16+rsp]
895	DB	0F3h,0C3h		;repret
896
897$L$SEH_end_GFp_aesni_gcm_encrypt:
898ALIGN	64
899$L$bswap_mask:
900DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
901$L$poly:
902DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
903$L$one_msb:
904DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
905$L$two_lsb:
906DB	2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
907$L$one_lsb:
908DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
909DB	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108
910DB	101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82
911DB	89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
912DB	114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
913ALIGN	64
914EXTERN	__imp_RtlVirtualUnwind
915
916ALIGN	16
917gcm_se_handler:
918	push	rsi
919	push	rdi
920	push	rbx
921	push	rbp
922	push	r12
923	push	r13
924	push	r14
925	push	r15
926	pushfq
927	sub	rsp,64
928
929	mov	rax,QWORD[120+r8]
930	mov	rbx,QWORD[248+r8]
931
932	mov	rsi,QWORD[8+r9]
933	mov	r11,QWORD[56+r9]
934
935	mov	r10d,DWORD[r11]
936	lea	r10,[r10*1+rsi]
937	cmp	rbx,r10
938	jb	NEAR $L$common_seh_tail
939
940	mov	rax,QWORD[152+r8]
941
942	mov	r10d,DWORD[4+r11]
943	lea	r10,[r10*1+rsi]
944	cmp	rbx,r10
945	jae	NEAR $L$common_seh_tail
946
947	mov	rax,QWORD[120+r8]
948
949	mov	r15,QWORD[((-48))+rax]
950	mov	r14,QWORD[((-40))+rax]
951	mov	r13,QWORD[((-32))+rax]
952	mov	r12,QWORD[((-24))+rax]
953	mov	rbp,QWORD[((-16))+rax]
954	mov	rbx,QWORD[((-8))+rax]
955	mov	QWORD[240+r8],r15
956	mov	QWORD[232+r8],r14
957	mov	QWORD[224+r8],r13
958	mov	QWORD[216+r8],r12
959	mov	QWORD[160+r8],rbp
960	mov	QWORD[144+r8],rbx
961
962	lea	rsi,[((-216))+rax]
963	lea	rdi,[512+r8]
964	mov	ecx,20
965	DD	0xa548f3fc
966
967$L$common_seh_tail:
968	mov	rdi,QWORD[8+rax]
969	mov	rsi,QWORD[16+rax]
970	mov	QWORD[152+r8],rax
971	mov	QWORD[168+r8],rsi
972	mov	QWORD[176+r8],rdi
973
974	mov	rdi,QWORD[40+r9]
975	mov	rsi,r8
976	mov	ecx,154
977	DD	0xa548f3fc
978
979	mov	rsi,r9
980	xor	rcx,rcx
981	mov	rdx,QWORD[8+rsi]
982	mov	r8,QWORD[rsi]
983	mov	r9,QWORD[16+rsi]
984	mov	r10,QWORD[40+rsi]
985	lea	r11,[56+rsi]
986	lea	r12,[24+rsi]
987	mov	QWORD[32+rsp],r10
988	mov	QWORD[40+rsp],r11
989	mov	QWORD[48+rsp],r12
990	mov	QWORD[56+rsp],rcx
991	call	QWORD[__imp_RtlVirtualUnwind]
992
993	mov	eax,1
994	add	rsp,64
995	popfq
996	pop	r15
997	pop	r14
998	pop	r13
999	pop	r12
1000	pop	rbp
1001	pop	rbx
1002	pop	rdi
1003	pop	rsi
1004	DB	0F3h,0C3h		;repret
1005
1006
1007section	.pdata rdata align=4
1008ALIGN	4
1009	DD	$L$SEH_begin_GFp_aesni_gcm_decrypt wrt ..imagebase
1010	DD	$L$SEH_end_GFp_aesni_gcm_decrypt wrt ..imagebase
1011	DD	$L$SEH_gcm_dec_info wrt ..imagebase
1012
1013	DD	$L$SEH_begin_GFp_aesni_gcm_encrypt wrt ..imagebase
1014	DD	$L$SEH_end_GFp_aesni_gcm_encrypt wrt ..imagebase
1015	DD	$L$SEH_GFp_gcm_enc_info wrt ..imagebase
1016section	.xdata rdata align=8
1017ALIGN	8
1018$L$SEH_gcm_dec_info:
1019DB	9,0,0,0
1020	DD	gcm_se_handler wrt ..imagebase
1021	DD	$L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase
1022$L$SEH_GFp_gcm_enc_info:
1023DB	9,0,0,0
1024	DD	gcm_se_handler wrt ..imagebase
1025	DD	$L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase
1026