• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8
9%include "ring_core_generated/prefix_symbols_nasm.inc"
10section	.text code align=64
11
12
13
14ALIGN	32
15_aesni_ctr32_ghash_6x:
16
17	vmovdqu	xmm2,XMMWORD[32+r11]
18	sub	rdx,6
19	vpxor	xmm4,xmm4,xmm4
20	vmovdqu	xmm15,XMMWORD[((0-128))+rcx]
21	vpaddb	xmm10,xmm1,xmm2
22	vpaddb	xmm11,xmm10,xmm2
23	vpaddb	xmm12,xmm11,xmm2
24	vpaddb	xmm13,xmm12,xmm2
25	vpaddb	xmm14,xmm13,xmm2
26	vpxor	xmm9,xmm1,xmm15
27	vmovdqu	XMMWORD[(16+8)+rsp],xmm4
28	jmp	NEAR $L$oop6x
29
30ALIGN	32
31$L$oop6x:
32	add	ebx,100663296
33	jc	NEAR $L$handle_ctr32
34	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
35	vpaddb	xmm1,xmm14,xmm2
36	vpxor	xmm10,xmm10,xmm15
37	vpxor	xmm11,xmm11,xmm15
38
39$L$resume_ctr32:
40	vmovdqu	XMMWORD[r8],xmm1
41	vpclmulqdq	xmm5,xmm7,xmm3,0x10
42	vpxor	xmm12,xmm12,xmm15
43	vmovups	xmm2,XMMWORD[((16-128))+rcx]
44	vpclmulqdq	xmm6,xmm7,xmm3,0x01
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62	xor	r12,r12
63	cmp	r15,r14
64
65	vaesenc	xmm9,xmm9,xmm2
66	vmovdqu	xmm0,XMMWORD[((48+8))+rsp]
67	vpxor	xmm13,xmm13,xmm15
68	vpclmulqdq	xmm1,xmm7,xmm3,0x00
69	vaesenc	xmm10,xmm10,xmm2
70	vpxor	xmm14,xmm14,xmm15
71	setnc	r12b
72	vpclmulqdq	xmm7,xmm7,xmm3,0x11
73	vaesenc	xmm11,xmm11,xmm2
74	vmovdqu	xmm3,XMMWORD[((16-32))+r9]
75	neg	r12
76	vaesenc	xmm12,xmm12,xmm2
77	vpxor	xmm6,xmm6,xmm5
78	vpclmulqdq	xmm5,xmm0,xmm3,0x00
79	vpxor	xmm8,xmm8,xmm4
80	vaesenc	xmm13,xmm13,xmm2
81	vpxor	xmm4,xmm1,xmm5
82	and	r12,0x60
83	vmovups	xmm15,XMMWORD[((32-128))+rcx]
84	vpclmulqdq	xmm1,xmm0,xmm3,0x10
85	vaesenc	xmm14,xmm14,xmm2
86
87	vpclmulqdq	xmm2,xmm0,xmm3,0x01
88	lea	r14,[r12*1+r14]
89	vaesenc	xmm9,xmm9,xmm15
90	vpxor	xmm8,xmm8,XMMWORD[((16+8))+rsp]
91	vpclmulqdq	xmm3,xmm0,xmm3,0x11
92	vmovdqu	xmm0,XMMWORD[((64+8))+rsp]
93	vaesenc	xmm10,xmm10,xmm15
94	movbe	r13,QWORD[88+r14]
95	vaesenc	xmm11,xmm11,xmm15
96	movbe	r12,QWORD[80+r14]
97	vaesenc	xmm12,xmm12,xmm15
98	mov	QWORD[((32+8))+rsp],r13
99	vaesenc	xmm13,xmm13,xmm15
100	mov	QWORD[((40+8))+rsp],r12
101	vmovdqu	xmm5,XMMWORD[((48-32))+r9]
102	vaesenc	xmm14,xmm14,xmm15
103
104	vmovups	xmm15,XMMWORD[((48-128))+rcx]
105	vpxor	xmm6,xmm6,xmm1
106	vpclmulqdq	xmm1,xmm0,xmm5,0x00
107	vaesenc	xmm9,xmm9,xmm15
108	vpxor	xmm6,xmm6,xmm2
109	vpclmulqdq	xmm2,xmm0,xmm5,0x10
110	vaesenc	xmm10,xmm10,xmm15
111	vpxor	xmm7,xmm7,xmm3
112	vpclmulqdq	xmm3,xmm0,xmm5,0x01
113	vaesenc	xmm11,xmm11,xmm15
114	vpclmulqdq	xmm5,xmm0,xmm5,0x11
115	vmovdqu	xmm0,XMMWORD[((80+8))+rsp]
116	vaesenc	xmm12,xmm12,xmm15
117	vaesenc	xmm13,xmm13,xmm15
118	vpxor	xmm4,xmm4,xmm1
119	vmovdqu	xmm1,XMMWORD[((64-32))+r9]
120	vaesenc	xmm14,xmm14,xmm15
121
122	vmovups	xmm15,XMMWORD[((64-128))+rcx]
123	vpxor	xmm6,xmm6,xmm2
124	vpclmulqdq	xmm2,xmm0,xmm1,0x00
125	vaesenc	xmm9,xmm9,xmm15
126	vpxor	xmm6,xmm6,xmm3
127	vpclmulqdq	xmm3,xmm0,xmm1,0x10
128	vaesenc	xmm10,xmm10,xmm15
129	movbe	r13,QWORD[72+r14]
130	vpxor	xmm7,xmm7,xmm5
131	vpclmulqdq	xmm5,xmm0,xmm1,0x01
132	vaesenc	xmm11,xmm11,xmm15
133	movbe	r12,QWORD[64+r14]
134	vpclmulqdq	xmm1,xmm0,xmm1,0x11
135	vmovdqu	xmm0,XMMWORD[((96+8))+rsp]
136	vaesenc	xmm12,xmm12,xmm15
137	mov	QWORD[((48+8))+rsp],r13
138	vaesenc	xmm13,xmm13,xmm15
139	mov	QWORD[((56+8))+rsp],r12
140	vpxor	xmm4,xmm4,xmm2
141	vmovdqu	xmm2,XMMWORD[((96-32))+r9]
142	vaesenc	xmm14,xmm14,xmm15
143
144	vmovups	xmm15,XMMWORD[((80-128))+rcx]
145	vpxor	xmm6,xmm6,xmm3
146	vpclmulqdq	xmm3,xmm0,xmm2,0x00
147	vaesenc	xmm9,xmm9,xmm15
148	vpxor	xmm6,xmm6,xmm5
149	vpclmulqdq	xmm5,xmm0,xmm2,0x10
150	vaesenc	xmm10,xmm10,xmm15
151	movbe	r13,QWORD[56+r14]
152	vpxor	xmm7,xmm7,xmm1
153	vpclmulqdq	xmm1,xmm0,xmm2,0x01
154	vpxor	xmm8,xmm8,XMMWORD[((112+8))+rsp]
155	vaesenc	xmm11,xmm11,xmm15
156	movbe	r12,QWORD[48+r14]
157	vpclmulqdq	xmm2,xmm0,xmm2,0x11
158	vaesenc	xmm12,xmm12,xmm15
159	mov	QWORD[((64+8))+rsp],r13
160	vaesenc	xmm13,xmm13,xmm15
161	mov	QWORD[((72+8))+rsp],r12
162	vpxor	xmm4,xmm4,xmm3
163	vmovdqu	xmm3,XMMWORD[((112-32))+r9]
164	vaesenc	xmm14,xmm14,xmm15
165
166	vmovups	xmm15,XMMWORD[((96-128))+rcx]
167	vpxor	xmm6,xmm6,xmm5
168	vpclmulqdq	xmm5,xmm8,xmm3,0x10
169	vaesenc	xmm9,xmm9,xmm15
170	vpxor	xmm6,xmm6,xmm1
171	vpclmulqdq	xmm1,xmm8,xmm3,0x01
172	vaesenc	xmm10,xmm10,xmm15
173	movbe	r13,QWORD[40+r14]
174	vpxor	xmm7,xmm7,xmm2
175	vpclmulqdq	xmm2,xmm8,xmm3,0x00
176	vaesenc	xmm11,xmm11,xmm15
177	movbe	r12,QWORD[32+r14]
178	vpclmulqdq	xmm8,xmm8,xmm3,0x11
179	vaesenc	xmm12,xmm12,xmm15
180	mov	QWORD[((80+8))+rsp],r13
181	vaesenc	xmm13,xmm13,xmm15
182	mov	QWORD[((88+8))+rsp],r12
183	vpxor	xmm6,xmm6,xmm5
184	vaesenc	xmm14,xmm14,xmm15
185	vpxor	xmm6,xmm6,xmm1
186
187	vmovups	xmm15,XMMWORD[((112-128))+rcx]
188	vpslldq	xmm5,xmm6,8
189	vpxor	xmm4,xmm4,xmm2
190	vmovdqu	xmm3,XMMWORD[16+r11]
191
192	vaesenc	xmm9,xmm9,xmm15
193	vpxor	xmm7,xmm7,xmm8
194	vaesenc	xmm10,xmm10,xmm15
195	vpxor	xmm4,xmm4,xmm5
196	movbe	r13,QWORD[24+r14]
197	vaesenc	xmm11,xmm11,xmm15
198	movbe	r12,QWORD[16+r14]
199	vpalignr	xmm0,xmm4,xmm4,8
200	vpclmulqdq	xmm4,xmm4,xmm3,0x10
201	mov	QWORD[((96+8))+rsp],r13
202	vaesenc	xmm12,xmm12,xmm15
203	mov	QWORD[((104+8))+rsp],r12
204	vaesenc	xmm13,xmm13,xmm15
205	vmovups	xmm1,XMMWORD[((128-128))+rcx]
206	vaesenc	xmm14,xmm14,xmm15
207
208	vaesenc	xmm9,xmm9,xmm1
209	vmovups	xmm15,XMMWORD[((144-128))+rcx]
210	vaesenc	xmm10,xmm10,xmm1
211	vpsrldq	xmm6,xmm6,8
212	vaesenc	xmm11,xmm11,xmm1
213	vpxor	xmm7,xmm7,xmm6
214	vaesenc	xmm12,xmm12,xmm1
215	vpxor	xmm4,xmm4,xmm0
216	movbe	r13,QWORD[8+r14]
217	vaesenc	xmm13,xmm13,xmm1
218	movbe	r12,QWORD[r14]
219	vaesenc	xmm14,xmm14,xmm1
220	vmovups	xmm1,XMMWORD[((160-128))+rcx]
221	cmp	ebp,11
222	jb	NEAR $L$enc_tail
223
224	vaesenc	xmm9,xmm9,xmm15
225	vaesenc	xmm10,xmm10,xmm15
226	vaesenc	xmm11,xmm11,xmm15
227	vaesenc	xmm12,xmm12,xmm15
228	vaesenc	xmm13,xmm13,xmm15
229	vaesenc	xmm14,xmm14,xmm15
230
231	vaesenc	xmm9,xmm9,xmm1
232	vaesenc	xmm10,xmm10,xmm1
233	vaesenc	xmm11,xmm11,xmm1
234	vaesenc	xmm12,xmm12,xmm1
235	vaesenc	xmm13,xmm13,xmm1
236	vmovups	xmm15,XMMWORD[((176-128))+rcx]
237	vaesenc	xmm14,xmm14,xmm1
238	vmovups	xmm1,XMMWORD[((192-128))+rcx]
239
240
241	vaesenc	xmm9,xmm9,xmm15
242	vaesenc	xmm10,xmm10,xmm15
243	vaesenc	xmm11,xmm11,xmm15
244	vaesenc	xmm12,xmm12,xmm15
245	vaesenc	xmm13,xmm13,xmm15
246	vaesenc	xmm14,xmm14,xmm15
247
248	vaesenc	xmm9,xmm9,xmm1
249	vaesenc	xmm10,xmm10,xmm1
250	vaesenc	xmm11,xmm11,xmm1
251	vaesenc	xmm12,xmm12,xmm1
252	vaesenc	xmm13,xmm13,xmm1
253	vmovups	xmm15,XMMWORD[((208-128))+rcx]
254	vaesenc	xmm14,xmm14,xmm1
255	vmovups	xmm1,XMMWORD[((224-128))+rcx]
256	jmp	NEAR $L$enc_tail
257
258ALIGN	32
259$L$handle_ctr32:
260	vmovdqu	xmm0,XMMWORD[r11]
261	vpshufb	xmm6,xmm1,xmm0
262	vmovdqu	xmm5,XMMWORD[48+r11]
263	vpaddd	xmm10,xmm6,XMMWORD[64+r11]
264	vpaddd	xmm11,xmm6,xmm5
265	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
266	vpaddd	xmm12,xmm10,xmm5
267	vpshufb	xmm10,xmm10,xmm0
268	vpaddd	xmm13,xmm11,xmm5
269	vpshufb	xmm11,xmm11,xmm0
270	vpxor	xmm10,xmm10,xmm15
271	vpaddd	xmm14,xmm12,xmm5
272	vpshufb	xmm12,xmm12,xmm0
273	vpxor	xmm11,xmm11,xmm15
274	vpaddd	xmm1,xmm13,xmm5
275	vpshufb	xmm13,xmm13,xmm0
276	vpshufb	xmm14,xmm14,xmm0
277	vpshufb	xmm1,xmm1,xmm0
278	jmp	NEAR $L$resume_ctr32
279
280ALIGN	32
281$L$enc_tail:
282	vaesenc	xmm9,xmm9,xmm15
283	vmovdqu	XMMWORD[(16+8)+rsp],xmm7
284	vpalignr	xmm8,xmm4,xmm4,8
285	vaesenc	xmm10,xmm10,xmm15
286	vpclmulqdq	xmm4,xmm4,xmm3,0x10
287	vpxor	xmm2,xmm1,XMMWORD[rdi]
288	vaesenc	xmm11,xmm11,xmm15
289	vpxor	xmm0,xmm1,XMMWORD[16+rdi]
290	vaesenc	xmm12,xmm12,xmm15
291	vpxor	xmm5,xmm1,XMMWORD[32+rdi]
292	vaesenc	xmm13,xmm13,xmm15
293	vpxor	xmm6,xmm1,XMMWORD[48+rdi]
294	vaesenc	xmm14,xmm14,xmm15
295	vpxor	xmm7,xmm1,XMMWORD[64+rdi]
296	vpxor	xmm3,xmm1,XMMWORD[80+rdi]
297	vmovdqu	xmm1,XMMWORD[r8]
298
299	vaesenclast	xmm9,xmm9,xmm2
300	vmovdqu	xmm2,XMMWORD[32+r11]
301	vaesenclast	xmm10,xmm10,xmm0
302	vpaddb	xmm0,xmm1,xmm2
303	mov	QWORD[((112+8))+rsp],r13
304	lea	rdi,[96+rdi]
305	vaesenclast	xmm11,xmm11,xmm5
306	vpaddb	xmm5,xmm0,xmm2
307	mov	QWORD[((120+8))+rsp],r12
308	lea	rsi,[96+rsi]
309	vmovdqu	xmm15,XMMWORD[((0-128))+rcx]
310	vaesenclast	xmm12,xmm12,xmm6
311	vpaddb	xmm6,xmm5,xmm2
312	vaesenclast	xmm13,xmm13,xmm7
313	vpaddb	xmm7,xmm6,xmm2
314	vaesenclast	xmm14,xmm14,xmm3
315	vpaddb	xmm3,xmm7,xmm2
316
317	add	r10,0x60
318	sub	rdx,0x6
319	jc	NEAR $L$6x_done
320
321	vmovups	XMMWORD[(-96)+rsi],xmm9
322	vpxor	xmm9,xmm1,xmm15
323	vmovups	XMMWORD[(-80)+rsi],xmm10
324	vmovdqa	xmm10,xmm0
325	vmovups	XMMWORD[(-64)+rsi],xmm11
326	vmovdqa	xmm11,xmm5
327	vmovups	XMMWORD[(-48)+rsi],xmm12
328	vmovdqa	xmm12,xmm6
329	vmovups	XMMWORD[(-32)+rsi],xmm13
330	vmovdqa	xmm13,xmm7
331	vmovups	XMMWORD[(-16)+rsi],xmm14
332	vmovdqa	xmm14,xmm3
333	vmovdqu	xmm7,XMMWORD[((32+8))+rsp]
334	jmp	NEAR $L$oop6x
335
336$L$6x_done:
337	vpxor	xmm8,xmm8,XMMWORD[((16+8))+rsp]
338	vpxor	xmm8,xmm8,xmm4
339
340	DB	0F3h,0C3h		;repret
341
342
343global	aesni_gcm_decrypt
344
345ALIGN	32
346aesni_gcm_decrypt:
347	mov	QWORD[8+rsp],rdi	;WIN64 prologue
348	mov	QWORD[16+rsp],rsi
349	mov	rax,rsp
350$L$SEH_begin_aesni_gcm_decrypt:
351	mov	rdi,rcx
352	mov	rsi,rdx
353	mov	rdx,r8
354	mov	rcx,r9
355	mov	r8,QWORD[40+rsp]
356	mov	r9,QWORD[48+rsp]
357
358
359
360	xor	r10,r10
361
362
363
364	cmp	rdx,0x60
365	jb	NEAR $L$gcm_dec_abort
366
367	lea	rax,[rsp]
368
369	push	rbx
370
371	push	rbp
372
373	push	r12
374
375	push	r13
376
377	push	r14
378
379	push	r15
380
381	lea	rsp,[((-168))+rsp]
382	movaps	XMMWORD[(-216)+rax],xmm6
383	movaps	XMMWORD[(-200)+rax],xmm7
384	movaps	XMMWORD[(-184)+rax],xmm8
385	movaps	XMMWORD[(-168)+rax],xmm9
386	movaps	XMMWORD[(-152)+rax],xmm10
387	movaps	XMMWORD[(-136)+rax],xmm11
388	movaps	XMMWORD[(-120)+rax],xmm12
389	movaps	XMMWORD[(-104)+rax],xmm13
390	movaps	XMMWORD[(-88)+rax],xmm14
391	movaps	XMMWORD[(-72)+rax],xmm15
392$L$gcm_dec_body:
393	vzeroupper
394
395	vmovdqu	xmm1,XMMWORD[r8]
396	add	rsp,-128
397	mov	ebx,DWORD[12+r8]
398	lea	r11,[$L$bswap_mask]
399	lea	r14,[((-128))+rcx]
400	mov	r15,0xf80
401	vmovdqu	xmm8,XMMWORD[r9]
402	and	rsp,-128
403	vmovdqu	xmm0,XMMWORD[r11]
404	lea	rcx,[128+rcx]
405	lea	r9,[((32+32))+r9]
406	mov	ebp,DWORD[((240-128))+rcx]
407	vpshufb	xmm8,xmm8,xmm0
408
409	and	r14,r15
410	and	r15,rsp
411	sub	r15,r14
412	jc	NEAR $L$dec_no_key_aliasing
413	cmp	r15,768
414	jnc	NEAR $L$dec_no_key_aliasing
415	sub	rsp,r15
416$L$dec_no_key_aliasing:
417
418	vmovdqu	xmm7,XMMWORD[80+rdi]
419	lea	r14,[rdi]
420	vmovdqu	xmm4,XMMWORD[64+rdi]
421
422
423
424
425
426
427
428	lea	r15,[((-192))+rdx*1+rdi]
429
430	vmovdqu	xmm5,XMMWORD[48+rdi]
431	shr	rdx,4
432	xor	r10,r10
433	vmovdqu	xmm6,XMMWORD[32+rdi]
434	vpshufb	xmm7,xmm7,xmm0
435	vmovdqu	xmm2,XMMWORD[16+rdi]
436	vpshufb	xmm4,xmm4,xmm0
437	vmovdqu	xmm3,XMMWORD[rdi]
438	vpshufb	xmm5,xmm5,xmm0
439	vmovdqu	XMMWORD[48+rsp],xmm4
440	vpshufb	xmm6,xmm6,xmm0
441	vmovdqu	XMMWORD[64+rsp],xmm5
442	vpshufb	xmm2,xmm2,xmm0
443	vmovdqu	XMMWORD[80+rsp],xmm6
444	vpshufb	xmm3,xmm3,xmm0
445	vmovdqu	XMMWORD[96+rsp],xmm2
446	vmovdqu	XMMWORD[112+rsp],xmm3
447
448	call	_aesni_ctr32_ghash_6x
449
450	vmovups	XMMWORD[(-96)+rsi],xmm9
451	vmovups	XMMWORD[(-80)+rsi],xmm10
452	vmovups	XMMWORD[(-64)+rsi],xmm11
453	vmovups	XMMWORD[(-48)+rsi],xmm12
454	vmovups	XMMWORD[(-32)+rsi],xmm13
455	vmovups	XMMWORD[(-16)+rsi],xmm14
456
457	vpshufb	xmm8,xmm8,XMMWORD[r11]
458	vmovdqu	XMMWORD[(-64)+r9],xmm8
459
460	vzeroupper
461	movaps	xmm6,XMMWORD[((-216))+rax]
462	movaps	xmm7,XMMWORD[((-200))+rax]
463	movaps	xmm8,XMMWORD[((-184))+rax]
464	movaps	xmm9,XMMWORD[((-168))+rax]
465	movaps	xmm10,XMMWORD[((-152))+rax]
466	movaps	xmm11,XMMWORD[((-136))+rax]
467	movaps	xmm12,XMMWORD[((-120))+rax]
468	movaps	xmm13,XMMWORD[((-104))+rax]
469	movaps	xmm14,XMMWORD[((-88))+rax]
470	movaps	xmm15,XMMWORD[((-72))+rax]
471	mov	r15,QWORD[((-48))+rax]
472
473	mov	r14,QWORD[((-40))+rax]
474
475	mov	r13,QWORD[((-32))+rax]
476
477	mov	r12,QWORD[((-24))+rax]
478
479	mov	rbp,QWORD[((-16))+rax]
480
481	mov	rbx,QWORD[((-8))+rax]
482
483	lea	rsp,[rax]
484
485$L$gcm_dec_abort:
486	mov	rax,r10
487	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
488	mov	rsi,QWORD[16+rsp]
489	DB	0F3h,0C3h		;repret
490
491$L$SEH_end_aesni_gcm_decrypt:
492
493ALIGN	32
494_aesni_ctr32_6x:
495
496	vmovdqu	xmm4,XMMWORD[((0-128))+rcx]
497	vmovdqu	xmm2,XMMWORD[32+r11]
498	lea	r13,[((-1))+rbp]
499	vmovups	xmm15,XMMWORD[((16-128))+rcx]
500	lea	r12,[((32-128))+rcx]
501	vpxor	xmm9,xmm1,xmm4
502	add	ebx,100663296
503	jc	NEAR $L$handle_ctr32_2
504	vpaddb	xmm10,xmm1,xmm2
505	vpaddb	xmm11,xmm10,xmm2
506	vpxor	xmm10,xmm10,xmm4
507	vpaddb	xmm12,xmm11,xmm2
508	vpxor	xmm11,xmm11,xmm4
509	vpaddb	xmm13,xmm12,xmm2
510	vpxor	xmm12,xmm12,xmm4
511	vpaddb	xmm14,xmm13,xmm2
512	vpxor	xmm13,xmm13,xmm4
513	vpaddb	xmm1,xmm14,xmm2
514	vpxor	xmm14,xmm14,xmm4
515	jmp	NEAR $L$oop_ctr32
516
517ALIGN	16
518$L$oop_ctr32:
519	vaesenc	xmm9,xmm9,xmm15
520	vaesenc	xmm10,xmm10,xmm15
521	vaesenc	xmm11,xmm11,xmm15
522	vaesenc	xmm12,xmm12,xmm15
523	vaesenc	xmm13,xmm13,xmm15
524	vaesenc	xmm14,xmm14,xmm15
525	vmovups	xmm15,XMMWORD[r12]
526	lea	r12,[16+r12]
527	dec	r13d
528	jnz	NEAR $L$oop_ctr32
529
530	vmovdqu	xmm3,XMMWORD[r12]
531	vaesenc	xmm9,xmm9,xmm15
532	vpxor	xmm4,xmm3,XMMWORD[rdi]
533	vaesenc	xmm10,xmm10,xmm15
534	vpxor	xmm5,xmm3,XMMWORD[16+rdi]
535	vaesenc	xmm11,xmm11,xmm15
536	vpxor	xmm6,xmm3,XMMWORD[32+rdi]
537	vaesenc	xmm12,xmm12,xmm15
538	vpxor	xmm8,xmm3,XMMWORD[48+rdi]
539	vaesenc	xmm13,xmm13,xmm15
540	vpxor	xmm2,xmm3,XMMWORD[64+rdi]
541	vaesenc	xmm14,xmm14,xmm15
542	vpxor	xmm3,xmm3,XMMWORD[80+rdi]
543	lea	rdi,[96+rdi]
544
545	vaesenclast	xmm9,xmm9,xmm4
546	vaesenclast	xmm10,xmm10,xmm5
547	vaesenclast	xmm11,xmm11,xmm6
548	vaesenclast	xmm12,xmm12,xmm8
549	vaesenclast	xmm13,xmm13,xmm2
550	vaesenclast	xmm14,xmm14,xmm3
551	vmovups	XMMWORD[rsi],xmm9
552	vmovups	XMMWORD[16+rsi],xmm10
553	vmovups	XMMWORD[32+rsi],xmm11
554	vmovups	XMMWORD[48+rsi],xmm12
555	vmovups	XMMWORD[64+rsi],xmm13
556	vmovups	XMMWORD[80+rsi],xmm14
557	lea	rsi,[96+rsi]
558
559	DB	0F3h,0C3h		;repret
560ALIGN	32
561$L$handle_ctr32_2:
562	vpshufb	xmm6,xmm1,xmm0
563	vmovdqu	xmm5,XMMWORD[48+r11]
564	vpaddd	xmm10,xmm6,XMMWORD[64+r11]
565	vpaddd	xmm11,xmm6,xmm5
566	vpaddd	xmm12,xmm10,xmm5
567	vpshufb	xmm10,xmm10,xmm0
568	vpaddd	xmm13,xmm11,xmm5
569	vpshufb	xmm11,xmm11,xmm0
570	vpxor	xmm10,xmm10,xmm4
571	vpaddd	xmm14,xmm12,xmm5
572	vpshufb	xmm12,xmm12,xmm0
573	vpxor	xmm11,xmm11,xmm4
574	vpaddd	xmm1,xmm13,xmm5
575	vpshufb	xmm13,xmm13,xmm0
576	vpxor	xmm12,xmm12,xmm4
577	vpshufb	xmm14,xmm14,xmm0
578	vpxor	xmm13,xmm13,xmm4
579	vpshufb	xmm1,xmm1,xmm0
580	vpxor	xmm14,xmm14,xmm4
581	jmp	NEAR $L$oop_ctr32
582
583
584
585global	aesni_gcm_encrypt
586
587ALIGN	32
588aesni_gcm_encrypt:
589	mov	QWORD[8+rsp],rdi	;WIN64 prologue
590	mov	QWORD[16+rsp],rsi
591	mov	rax,rsp
592$L$SEH_begin_aesni_gcm_encrypt:
593	mov	rdi,rcx
594	mov	rsi,rdx
595	mov	rdx,r8
596	mov	rcx,r9
597	mov	r8,QWORD[40+rsp]
598	mov	r9,QWORD[48+rsp]
599
600
601
602	xor	r10,r10
603
604
605
606
607	cmp	rdx,0x60*3
608	jb	NEAR $L$gcm_enc_abort
609
610	lea	rax,[rsp]
611
612	push	rbx
613
614	push	rbp
615
616	push	r12
617
618	push	r13
619
620	push	r14
621
622	push	r15
623
624	lea	rsp,[((-168))+rsp]
625	movaps	XMMWORD[(-216)+rax],xmm6
626	movaps	XMMWORD[(-200)+rax],xmm7
627	movaps	XMMWORD[(-184)+rax],xmm8
628	movaps	XMMWORD[(-168)+rax],xmm9
629	movaps	XMMWORD[(-152)+rax],xmm10
630	movaps	XMMWORD[(-136)+rax],xmm11
631	movaps	XMMWORD[(-120)+rax],xmm12
632	movaps	XMMWORD[(-104)+rax],xmm13
633	movaps	XMMWORD[(-88)+rax],xmm14
634	movaps	XMMWORD[(-72)+rax],xmm15
635$L$gcm_enc_body:
636	vzeroupper
637
638	vmovdqu	xmm1,XMMWORD[r8]
639	add	rsp,-128
640	mov	ebx,DWORD[12+r8]
641	lea	r11,[$L$bswap_mask]
642	lea	r14,[((-128))+rcx]
643	mov	r15,0xf80
644	lea	rcx,[128+rcx]
645	vmovdqu	xmm0,XMMWORD[r11]
646	and	rsp,-128
647	mov	ebp,DWORD[((240-128))+rcx]
648
649	and	r14,r15
650	and	r15,rsp
651	sub	r15,r14
652	jc	NEAR $L$enc_no_key_aliasing
653	cmp	r15,768
654	jnc	NEAR $L$enc_no_key_aliasing
655	sub	rsp,r15
656$L$enc_no_key_aliasing:
657
658	lea	r14,[rsi]
659
660
661
662
663
664
665
666
667	lea	r15,[((-192))+rdx*1+rsi]
668
669	shr	rdx,4
670
671	call	_aesni_ctr32_6x
672	vpshufb	xmm8,xmm9,xmm0
673	vpshufb	xmm2,xmm10,xmm0
674	vmovdqu	XMMWORD[112+rsp],xmm8
675	vpshufb	xmm4,xmm11,xmm0
676	vmovdqu	XMMWORD[96+rsp],xmm2
677	vpshufb	xmm5,xmm12,xmm0
678	vmovdqu	XMMWORD[80+rsp],xmm4
679	vpshufb	xmm6,xmm13,xmm0
680	vmovdqu	XMMWORD[64+rsp],xmm5
681	vpshufb	xmm7,xmm14,xmm0
682	vmovdqu	XMMWORD[48+rsp],xmm6
683
684	call	_aesni_ctr32_6x
685
686	vmovdqu	xmm8,XMMWORD[r9]
687	lea	r9,[((32+32))+r9]
688	sub	rdx,12
689	mov	r10,0x60*2
690	vpshufb	xmm8,xmm8,xmm0
691
692	call	_aesni_ctr32_ghash_6x
693	vmovdqu	xmm7,XMMWORD[32+rsp]
694	vmovdqu	xmm0,XMMWORD[r11]
695	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
696	vpunpckhqdq	xmm1,xmm7,xmm7
697	vmovdqu	xmm15,XMMWORD[((32-32))+r9]
698	vmovups	XMMWORD[(-96)+rsi],xmm9
699	vpshufb	xmm9,xmm9,xmm0
700	vpxor	xmm1,xmm1,xmm7
701	vmovups	XMMWORD[(-80)+rsi],xmm10
702	vpshufb	xmm10,xmm10,xmm0
703	vmovups	XMMWORD[(-64)+rsi],xmm11
704	vpshufb	xmm11,xmm11,xmm0
705	vmovups	XMMWORD[(-48)+rsi],xmm12
706	vpshufb	xmm12,xmm12,xmm0
707	vmovups	XMMWORD[(-32)+rsi],xmm13
708	vpshufb	xmm13,xmm13,xmm0
709	vmovups	XMMWORD[(-16)+rsi],xmm14
710	vpshufb	xmm14,xmm14,xmm0
711	vmovdqu	XMMWORD[16+rsp],xmm9
712	vmovdqu	xmm6,XMMWORD[48+rsp]
713	vmovdqu	xmm0,XMMWORD[((16-32))+r9]
714	vpunpckhqdq	xmm2,xmm6,xmm6
715	vpclmulqdq	xmm5,xmm7,xmm3,0x00
716	vpxor	xmm2,xmm2,xmm6
717	vpclmulqdq	xmm7,xmm7,xmm3,0x11
718	vpclmulqdq	xmm1,xmm1,xmm15,0x00
719
720	vmovdqu	xmm9,XMMWORD[64+rsp]
721	vpclmulqdq	xmm4,xmm6,xmm0,0x00
722	vmovdqu	xmm3,XMMWORD[((48-32))+r9]
723	vpxor	xmm4,xmm4,xmm5
724	vpunpckhqdq	xmm5,xmm9,xmm9
725	vpclmulqdq	xmm6,xmm6,xmm0,0x11
726	vpxor	xmm5,xmm5,xmm9
727	vpxor	xmm6,xmm6,xmm7
728	vpclmulqdq	xmm2,xmm2,xmm15,0x10
729	vmovdqu	xmm15,XMMWORD[((80-32))+r9]
730	vpxor	xmm2,xmm2,xmm1
731
732	vmovdqu	xmm1,XMMWORD[80+rsp]
733	vpclmulqdq	xmm7,xmm9,xmm3,0x00
734	vmovdqu	xmm0,XMMWORD[((64-32))+r9]
735	vpxor	xmm7,xmm7,xmm4
736	vpunpckhqdq	xmm4,xmm1,xmm1
737	vpclmulqdq	xmm9,xmm9,xmm3,0x11
738	vpxor	xmm4,xmm4,xmm1
739	vpxor	xmm9,xmm9,xmm6
740	vpclmulqdq	xmm5,xmm5,xmm15,0x00
741	vpxor	xmm5,xmm5,xmm2
742
743	vmovdqu	xmm2,XMMWORD[96+rsp]
744	vpclmulqdq	xmm6,xmm1,xmm0,0x00
745	vmovdqu	xmm3,XMMWORD[((96-32))+r9]
746	vpxor	xmm6,xmm6,xmm7
747	vpunpckhqdq	xmm7,xmm2,xmm2
748	vpclmulqdq	xmm1,xmm1,xmm0,0x11
749	vpxor	xmm7,xmm7,xmm2
750	vpxor	xmm1,xmm1,xmm9
751	vpclmulqdq	xmm4,xmm4,xmm15,0x10
752	vmovdqu	xmm15,XMMWORD[((128-32))+r9]
753	vpxor	xmm4,xmm4,xmm5
754
755	vpxor	xmm8,xmm8,XMMWORD[112+rsp]
756	vpclmulqdq	xmm5,xmm2,xmm3,0x00
757	vmovdqu	xmm0,XMMWORD[((112-32))+r9]
758	vpunpckhqdq	xmm9,xmm8,xmm8
759	vpxor	xmm5,xmm5,xmm6
760	vpclmulqdq	xmm2,xmm2,xmm3,0x11
761	vpxor	xmm9,xmm9,xmm8
762	vpxor	xmm2,xmm2,xmm1
763	vpclmulqdq	xmm7,xmm7,xmm15,0x00
764	vpxor	xmm4,xmm7,xmm4
765
766	vpclmulqdq	xmm6,xmm8,xmm0,0x00
767	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
768	vpunpckhqdq	xmm1,xmm14,xmm14
769	vpclmulqdq	xmm8,xmm8,xmm0,0x11
770	vpxor	xmm1,xmm1,xmm14
771	vpxor	xmm5,xmm6,xmm5
772	vpclmulqdq	xmm9,xmm9,xmm15,0x10
773	vmovdqu	xmm15,XMMWORD[((32-32))+r9]
774	vpxor	xmm7,xmm8,xmm2
775	vpxor	xmm6,xmm9,xmm4
776
777	vmovdqu	xmm0,XMMWORD[((16-32))+r9]
778	vpxor	xmm9,xmm7,xmm5
779	vpclmulqdq	xmm4,xmm14,xmm3,0x00
780	vpxor	xmm6,xmm6,xmm9
781	vpunpckhqdq	xmm2,xmm13,xmm13
782	vpclmulqdq	xmm14,xmm14,xmm3,0x11
783	vpxor	xmm2,xmm2,xmm13
784	vpslldq	xmm9,xmm6,8
785	vpclmulqdq	xmm1,xmm1,xmm15,0x00
786	vpxor	xmm8,xmm5,xmm9
787	vpsrldq	xmm6,xmm6,8
788	vpxor	xmm7,xmm7,xmm6
789
790	vpclmulqdq	xmm5,xmm13,xmm0,0x00
791	vmovdqu	xmm3,XMMWORD[((48-32))+r9]
792	vpxor	xmm5,xmm5,xmm4
793	vpunpckhqdq	xmm9,xmm12,xmm12
794	vpclmulqdq	xmm13,xmm13,xmm0,0x11
795	vpxor	xmm9,xmm9,xmm12
796	vpxor	xmm13,xmm13,xmm14
797	vpalignr	xmm14,xmm8,xmm8,8
798	vpclmulqdq	xmm2,xmm2,xmm15,0x10
799	vmovdqu	xmm15,XMMWORD[((80-32))+r9]
800	vpxor	xmm2,xmm2,xmm1
801
802	vpclmulqdq	xmm4,xmm12,xmm3,0x00
803	vmovdqu	xmm0,XMMWORD[((64-32))+r9]
804	vpxor	xmm4,xmm4,xmm5
805	vpunpckhqdq	xmm1,xmm11,xmm11
806	vpclmulqdq	xmm12,xmm12,xmm3,0x11
807	vpxor	xmm1,xmm1,xmm11
808	vpxor	xmm12,xmm12,xmm13
809	vxorps	xmm7,xmm7,XMMWORD[16+rsp]
810	vpclmulqdq	xmm9,xmm9,xmm15,0x00
811	vpxor	xmm9,xmm9,xmm2
812
813	vpclmulqdq	xmm8,xmm8,XMMWORD[16+r11],0x10
814	vxorps	xmm8,xmm8,xmm14
815
816	vpclmulqdq	xmm5,xmm11,xmm0,0x00
817	vmovdqu	xmm3,XMMWORD[((96-32))+r9]
818	vpxor	xmm5,xmm5,xmm4
819	vpunpckhqdq	xmm2,xmm10,xmm10
820	vpclmulqdq	xmm11,xmm11,xmm0,0x11
821	vpxor	xmm2,xmm2,xmm10
822	vpalignr	xmm14,xmm8,xmm8,8
823	vpxor	xmm11,xmm11,xmm12
824	vpclmulqdq	xmm1,xmm1,xmm15,0x10
825	vmovdqu	xmm15,XMMWORD[((128-32))+r9]
826	vpxor	xmm1,xmm1,xmm9
827
828	vxorps	xmm14,xmm14,xmm7
829	vpclmulqdq	xmm8,xmm8,XMMWORD[16+r11],0x10
830	vxorps	xmm8,xmm8,xmm14
831
832	vpclmulqdq	xmm4,xmm10,xmm3,0x00
833	vmovdqu	xmm0,XMMWORD[((112-32))+r9]
834	vpxor	xmm4,xmm4,xmm5
835	vpunpckhqdq	xmm9,xmm8,xmm8
836	vpclmulqdq	xmm10,xmm10,xmm3,0x11
837	vpxor	xmm9,xmm9,xmm8
838	vpxor	xmm10,xmm10,xmm11
839	vpclmulqdq	xmm2,xmm2,xmm15,0x00
840	vpxor	xmm2,xmm2,xmm1
841
842	vpclmulqdq	xmm5,xmm8,xmm0,0x00
843	vpclmulqdq	xmm7,xmm8,xmm0,0x11
844	vpxor	xmm5,xmm5,xmm4
845	vpclmulqdq	xmm6,xmm9,xmm15,0x10
846	vpxor	xmm7,xmm7,xmm10
847	vpxor	xmm6,xmm6,xmm2
848
849	vpxor	xmm4,xmm7,xmm5
850	vpxor	xmm6,xmm6,xmm4
851	vpslldq	xmm1,xmm6,8
852	vmovdqu	xmm3,XMMWORD[16+r11]
853	vpsrldq	xmm6,xmm6,8
854	vpxor	xmm8,xmm5,xmm1
855	vpxor	xmm7,xmm7,xmm6
856
857	vpalignr	xmm2,xmm8,xmm8,8
858	vpclmulqdq	xmm8,xmm8,xmm3,0x10
859	vpxor	xmm8,xmm8,xmm2
860
861	vpalignr	xmm2,xmm8,xmm8,8
862	vpclmulqdq	xmm8,xmm8,xmm3,0x10
863	vpxor	xmm2,xmm2,xmm7
864	vpxor	xmm8,xmm8,xmm2
865	vpshufb	xmm8,xmm8,XMMWORD[r11]
866	vmovdqu	XMMWORD[(-64)+r9],xmm8
867
868	vzeroupper
869	movaps	xmm6,XMMWORD[((-216))+rax]
870	movaps	xmm7,XMMWORD[((-200))+rax]
871	movaps	xmm8,XMMWORD[((-184))+rax]
872	movaps	xmm9,XMMWORD[((-168))+rax]
873	movaps	xmm10,XMMWORD[((-152))+rax]
874	movaps	xmm11,XMMWORD[((-136))+rax]
875	movaps	xmm12,XMMWORD[((-120))+rax]
876	movaps	xmm13,XMMWORD[((-104))+rax]
877	movaps	xmm14,XMMWORD[((-88))+rax]
878	movaps	xmm15,XMMWORD[((-72))+rax]
879	mov	r15,QWORD[((-48))+rax]
880
881	mov	r14,QWORD[((-40))+rax]
882
883	mov	r13,QWORD[((-32))+rax]
884
885	mov	r12,QWORD[((-24))+rax]
886
887	mov	rbp,QWORD[((-16))+rax]
888
889	mov	rbx,QWORD[((-8))+rax]
890
891	lea	rsp,[rax]
892
893$L$gcm_enc_abort:
894	mov	rax,r10
895	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
896	mov	rsi,QWORD[16+rsp]
897	DB	0F3h,0C3h		;repret
898
899$L$SEH_end_aesni_gcm_encrypt:
900ALIGN	64
901$L$bswap_mask:
902DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
903$L$poly:
904DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
905$L$one_msb:
906DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
907$L$two_lsb:
908DB	2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
909$L$one_lsb:
910DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
911DB	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108
912DB	101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82
913DB	89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
914DB	114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
915ALIGN	64
916EXTERN	__imp_RtlVirtualUnwind
917
918ALIGN	16
919gcm_se_handler:
920	push	rsi
921	push	rdi
922	push	rbx
923	push	rbp
924	push	r12
925	push	r13
926	push	r14
927	push	r15
928	pushfq
929	sub	rsp,64
930
931	mov	rax,QWORD[120+r8]
932	mov	rbx,QWORD[248+r8]
933
934	mov	rsi,QWORD[8+r9]
935	mov	r11,QWORD[56+r9]
936
937	mov	r10d,DWORD[r11]
938	lea	r10,[r10*1+rsi]
939	cmp	rbx,r10
940	jb	NEAR $L$common_seh_tail
941
942	mov	rax,QWORD[152+r8]
943
944	mov	r10d,DWORD[4+r11]
945	lea	r10,[r10*1+rsi]
946	cmp	rbx,r10
947	jae	NEAR $L$common_seh_tail
948
949	mov	rax,QWORD[120+r8]
950
951	mov	r15,QWORD[((-48))+rax]
952	mov	r14,QWORD[((-40))+rax]
953	mov	r13,QWORD[((-32))+rax]
954	mov	r12,QWORD[((-24))+rax]
955	mov	rbp,QWORD[((-16))+rax]
956	mov	rbx,QWORD[((-8))+rax]
957	mov	QWORD[240+r8],r15
958	mov	QWORD[232+r8],r14
959	mov	QWORD[224+r8],r13
960	mov	QWORD[216+r8],r12
961	mov	QWORD[160+r8],rbp
962	mov	QWORD[144+r8],rbx
963
964	lea	rsi,[((-216))+rax]
965	lea	rdi,[512+r8]
966	mov	ecx,20
967	DD	0xa548f3fc
968
969$L$common_seh_tail:
970	mov	rdi,QWORD[8+rax]
971	mov	rsi,QWORD[16+rax]
972	mov	QWORD[152+r8],rax
973	mov	QWORD[168+r8],rsi
974	mov	QWORD[176+r8],rdi
975
976	mov	rdi,QWORD[40+r9]
977	mov	rsi,r8
978	mov	ecx,154
979	DD	0xa548f3fc
980
981	mov	rsi,r9
982	xor	rcx,rcx
983	mov	rdx,QWORD[8+rsi]
984	mov	r8,QWORD[rsi]
985	mov	r9,QWORD[16+rsi]
986	mov	r10,QWORD[40+rsi]
987	lea	r11,[56+rsi]
988	lea	r12,[24+rsi]
989	mov	QWORD[32+rsp],r10
990	mov	QWORD[40+rsp],r11
991	mov	QWORD[48+rsp],r12
992	mov	QWORD[56+rsp],rcx
993	call	QWORD[__imp_RtlVirtualUnwind]
994
995	mov	eax,1
996	add	rsp,64
997	popfq
998	pop	r15
999	pop	r14
1000	pop	r13
1001	pop	r12
1002	pop	rbp
1003	pop	rbx
1004	pop	rdi
1005	pop	rsi
1006	DB	0F3h,0C3h		;repret
1007
1008
1009section	.pdata rdata align=4
1010ALIGN	4
1011	DD	$L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase
1012	DD	$L$SEH_end_aesni_gcm_decrypt wrt ..imagebase
1013	DD	$L$SEH_gcm_dec_info wrt ..imagebase
1014
1015	DD	$L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase
1016	DD	$L$SEH_end_aesni_gcm_encrypt wrt ..imagebase
1017	DD	$L$SEH_gcm_enc_info wrt ..imagebase
1018section	.xdata rdata align=8
1019ALIGN	8
1020$L$SEH_gcm_dec_info:
1021DB	9,0,0,0
1022	DD	gcm_se_handler wrt ..imagebase
1023	DD	$L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase
1024$L$SEH_gcm_enc_info:
1025DB	9,0,0,0
1026	DD	gcm_se_handler wrt ..imagebase
1027	DD	$L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase
1028