• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8
9%ifdef BORINGSSL_PREFIX
10%include "boringssl_prefix_symbols_nasm.inc"
11%endif
12section	.text code align=64
13
14
15
16ALIGN	32
17_aesni_ctr32_ghash_6x:
18
19	vmovdqu	xmm2,XMMWORD[32+r11]
20	sub	rdx,6
21	vpxor	xmm4,xmm4,xmm4
22	vmovdqu	xmm15,XMMWORD[((0-128))+rcx]
23	vpaddb	xmm10,xmm1,xmm2
24	vpaddb	xmm11,xmm10,xmm2
25	vpaddb	xmm12,xmm11,xmm2
26	vpaddb	xmm13,xmm12,xmm2
27	vpaddb	xmm14,xmm13,xmm2
28	vpxor	xmm9,xmm1,xmm15
29	vmovdqu	XMMWORD[(16+8)+rsp],xmm4
30	jmp	NEAR $L$oop6x
31
32ALIGN	32
33$L$oop6x:
34	add	ebx,100663296
35	jc	NEAR $L$handle_ctr32
36	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
37	vpaddb	xmm1,xmm14,xmm2
38	vpxor	xmm10,xmm10,xmm15
39	vpxor	xmm11,xmm11,xmm15
40
41$L$resume_ctr32:
42	vmovdqu	XMMWORD[r8],xmm1
43	vpclmulqdq	xmm5,xmm7,xmm3,0x10
44	vpxor	xmm12,xmm12,xmm15
45	vmovups	xmm2,XMMWORD[((16-128))+rcx]
46	vpclmulqdq	xmm6,xmm7,xmm3,0x01
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64	xor	r12,r12
65	cmp	r15,r14
66
67	vaesenc	xmm9,xmm9,xmm2
68	vmovdqu	xmm0,XMMWORD[((48+8))+rsp]
69	vpxor	xmm13,xmm13,xmm15
70	vpclmulqdq	xmm1,xmm7,xmm3,0x00
71	vaesenc	xmm10,xmm10,xmm2
72	vpxor	xmm14,xmm14,xmm15
73	setnc	r12b
74	vpclmulqdq	xmm7,xmm7,xmm3,0x11
75	vaesenc	xmm11,xmm11,xmm2
76	vmovdqu	xmm3,XMMWORD[((16-32))+r9]
77	neg	r12
78	vaesenc	xmm12,xmm12,xmm2
79	vpxor	xmm6,xmm6,xmm5
80	vpclmulqdq	xmm5,xmm0,xmm3,0x00
81	vpxor	xmm8,xmm8,xmm4
82	vaesenc	xmm13,xmm13,xmm2
83	vpxor	xmm4,xmm1,xmm5
84	and	r12,0x60
85	vmovups	xmm15,XMMWORD[((32-128))+rcx]
86	vpclmulqdq	xmm1,xmm0,xmm3,0x10
87	vaesenc	xmm14,xmm14,xmm2
88
89	vpclmulqdq	xmm2,xmm0,xmm3,0x01
90	lea	r14,[r12*1+r14]
91	vaesenc	xmm9,xmm9,xmm15
92	vpxor	xmm8,xmm8,XMMWORD[((16+8))+rsp]
93	vpclmulqdq	xmm3,xmm0,xmm3,0x11
94	vmovdqu	xmm0,XMMWORD[((64+8))+rsp]
95	vaesenc	xmm10,xmm10,xmm15
96	movbe	r13,QWORD[88+r14]
97	vaesenc	xmm11,xmm11,xmm15
98	movbe	r12,QWORD[80+r14]
99	vaesenc	xmm12,xmm12,xmm15
100	mov	QWORD[((32+8))+rsp],r13
101	vaesenc	xmm13,xmm13,xmm15
102	mov	QWORD[((40+8))+rsp],r12
103	vmovdqu	xmm5,XMMWORD[((48-32))+r9]
104	vaesenc	xmm14,xmm14,xmm15
105
106	vmovups	xmm15,XMMWORD[((48-128))+rcx]
107	vpxor	xmm6,xmm6,xmm1
108	vpclmulqdq	xmm1,xmm0,xmm5,0x00
109	vaesenc	xmm9,xmm9,xmm15
110	vpxor	xmm6,xmm6,xmm2
111	vpclmulqdq	xmm2,xmm0,xmm5,0x10
112	vaesenc	xmm10,xmm10,xmm15
113	vpxor	xmm7,xmm7,xmm3
114	vpclmulqdq	xmm3,xmm0,xmm5,0x01
115	vaesenc	xmm11,xmm11,xmm15
116	vpclmulqdq	xmm5,xmm0,xmm5,0x11
117	vmovdqu	xmm0,XMMWORD[((80+8))+rsp]
118	vaesenc	xmm12,xmm12,xmm15
119	vaesenc	xmm13,xmm13,xmm15
120	vpxor	xmm4,xmm4,xmm1
121	vmovdqu	xmm1,XMMWORD[((64-32))+r9]
122	vaesenc	xmm14,xmm14,xmm15
123
124	vmovups	xmm15,XMMWORD[((64-128))+rcx]
125	vpxor	xmm6,xmm6,xmm2
126	vpclmulqdq	xmm2,xmm0,xmm1,0x00
127	vaesenc	xmm9,xmm9,xmm15
128	vpxor	xmm6,xmm6,xmm3
129	vpclmulqdq	xmm3,xmm0,xmm1,0x10
130	vaesenc	xmm10,xmm10,xmm15
131	movbe	r13,QWORD[72+r14]
132	vpxor	xmm7,xmm7,xmm5
133	vpclmulqdq	xmm5,xmm0,xmm1,0x01
134	vaesenc	xmm11,xmm11,xmm15
135	movbe	r12,QWORD[64+r14]
136	vpclmulqdq	xmm1,xmm0,xmm1,0x11
137	vmovdqu	xmm0,XMMWORD[((96+8))+rsp]
138	vaesenc	xmm12,xmm12,xmm15
139	mov	QWORD[((48+8))+rsp],r13
140	vaesenc	xmm13,xmm13,xmm15
141	mov	QWORD[((56+8))+rsp],r12
142	vpxor	xmm4,xmm4,xmm2
143	vmovdqu	xmm2,XMMWORD[((96-32))+r9]
144	vaesenc	xmm14,xmm14,xmm15
145
146	vmovups	xmm15,XMMWORD[((80-128))+rcx]
147	vpxor	xmm6,xmm6,xmm3
148	vpclmulqdq	xmm3,xmm0,xmm2,0x00
149	vaesenc	xmm9,xmm9,xmm15
150	vpxor	xmm6,xmm6,xmm5
151	vpclmulqdq	xmm5,xmm0,xmm2,0x10
152	vaesenc	xmm10,xmm10,xmm15
153	movbe	r13,QWORD[56+r14]
154	vpxor	xmm7,xmm7,xmm1
155	vpclmulqdq	xmm1,xmm0,xmm2,0x01
156	vpxor	xmm8,xmm8,XMMWORD[((112+8))+rsp]
157	vaesenc	xmm11,xmm11,xmm15
158	movbe	r12,QWORD[48+r14]
159	vpclmulqdq	xmm2,xmm0,xmm2,0x11
160	vaesenc	xmm12,xmm12,xmm15
161	mov	QWORD[((64+8))+rsp],r13
162	vaesenc	xmm13,xmm13,xmm15
163	mov	QWORD[((72+8))+rsp],r12
164	vpxor	xmm4,xmm4,xmm3
165	vmovdqu	xmm3,XMMWORD[((112-32))+r9]
166	vaesenc	xmm14,xmm14,xmm15
167
168	vmovups	xmm15,XMMWORD[((96-128))+rcx]
169	vpxor	xmm6,xmm6,xmm5
170	vpclmulqdq	xmm5,xmm8,xmm3,0x10
171	vaesenc	xmm9,xmm9,xmm15
172	vpxor	xmm6,xmm6,xmm1
173	vpclmulqdq	xmm1,xmm8,xmm3,0x01
174	vaesenc	xmm10,xmm10,xmm15
175	movbe	r13,QWORD[40+r14]
176	vpxor	xmm7,xmm7,xmm2
177	vpclmulqdq	xmm2,xmm8,xmm3,0x00
178	vaesenc	xmm11,xmm11,xmm15
179	movbe	r12,QWORD[32+r14]
180	vpclmulqdq	xmm8,xmm8,xmm3,0x11
181	vaesenc	xmm12,xmm12,xmm15
182	mov	QWORD[((80+8))+rsp],r13
183	vaesenc	xmm13,xmm13,xmm15
184	mov	QWORD[((88+8))+rsp],r12
185	vpxor	xmm6,xmm6,xmm5
186	vaesenc	xmm14,xmm14,xmm15
187	vpxor	xmm6,xmm6,xmm1
188
189	vmovups	xmm15,XMMWORD[((112-128))+rcx]
190	vpslldq	xmm5,xmm6,8
191	vpxor	xmm4,xmm4,xmm2
192	vmovdqu	xmm3,XMMWORD[16+r11]
193
194	vaesenc	xmm9,xmm9,xmm15
195	vpxor	xmm7,xmm7,xmm8
196	vaesenc	xmm10,xmm10,xmm15
197	vpxor	xmm4,xmm4,xmm5
198	movbe	r13,QWORD[24+r14]
199	vaesenc	xmm11,xmm11,xmm15
200	movbe	r12,QWORD[16+r14]
201	vpalignr	xmm0,xmm4,xmm4,8
202	vpclmulqdq	xmm4,xmm4,xmm3,0x10
203	mov	QWORD[((96+8))+rsp],r13
204	vaesenc	xmm12,xmm12,xmm15
205	mov	QWORD[((104+8))+rsp],r12
206	vaesenc	xmm13,xmm13,xmm15
207	vmovups	xmm1,XMMWORD[((128-128))+rcx]
208	vaesenc	xmm14,xmm14,xmm15
209
210	vaesenc	xmm9,xmm9,xmm1
211	vmovups	xmm15,XMMWORD[((144-128))+rcx]
212	vaesenc	xmm10,xmm10,xmm1
213	vpsrldq	xmm6,xmm6,8
214	vaesenc	xmm11,xmm11,xmm1
215	vpxor	xmm7,xmm7,xmm6
216	vaesenc	xmm12,xmm12,xmm1
217	vpxor	xmm4,xmm4,xmm0
218	movbe	r13,QWORD[8+r14]
219	vaesenc	xmm13,xmm13,xmm1
220	movbe	r12,QWORD[r14]
221	vaesenc	xmm14,xmm14,xmm1
222	vmovups	xmm1,XMMWORD[((160-128))+rcx]
223	cmp	ebp,11
224	jb	NEAR $L$enc_tail
225
226	vaesenc	xmm9,xmm9,xmm15
227	vaesenc	xmm10,xmm10,xmm15
228	vaesenc	xmm11,xmm11,xmm15
229	vaesenc	xmm12,xmm12,xmm15
230	vaesenc	xmm13,xmm13,xmm15
231	vaesenc	xmm14,xmm14,xmm15
232
233	vaesenc	xmm9,xmm9,xmm1
234	vaesenc	xmm10,xmm10,xmm1
235	vaesenc	xmm11,xmm11,xmm1
236	vaesenc	xmm12,xmm12,xmm1
237	vaesenc	xmm13,xmm13,xmm1
238	vmovups	xmm15,XMMWORD[((176-128))+rcx]
239	vaesenc	xmm14,xmm14,xmm1
240	vmovups	xmm1,XMMWORD[((192-128))+rcx]
241	je	NEAR $L$enc_tail
242
243	vaesenc	xmm9,xmm9,xmm15
244	vaesenc	xmm10,xmm10,xmm15
245	vaesenc	xmm11,xmm11,xmm15
246	vaesenc	xmm12,xmm12,xmm15
247	vaesenc	xmm13,xmm13,xmm15
248	vaesenc	xmm14,xmm14,xmm15
249
250	vaesenc	xmm9,xmm9,xmm1
251	vaesenc	xmm10,xmm10,xmm1
252	vaesenc	xmm11,xmm11,xmm1
253	vaesenc	xmm12,xmm12,xmm1
254	vaesenc	xmm13,xmm13,xmm1
255	vmovups	xmm15,XMMWORD[((208-128))+rcx]
256	vaesenc	xmm14,xmm14,xmm1
257	vmovups	xmm1,XMMWORD[((224-128))+rcx]
258	jmp	NEAR $L$enc_tail
259
260ALIGN	32
261$L$handle_ctr32:
262	vmovdqu	xmm0,XMMWORD[r11]
263	vpshufb	xmm6,xmm1,xmm0
264	vmovdqu	xmm5,XMMWORD[48+r11]
265	vpaddd	xmm10,xmm6,XMMWORD[64+r11]
266	vpaddd	xmm11,xmm6,xmm5
267	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
268	vpaddd	xmm12,xmm10,xmm5
269	vpshufb	xmm10,xmm10,xmm0
270	vpaddd	xmm13,xmm11,xmm5
271	vpshufb	xmm11,xmm11,xmm0
272	vpxor	xmm10,xmm10,xmm15
273	vpaddd	xmm14,xmm12,xmm5
274	vpshufb	xmm12,xmm12,xmm0
275	vpxor	xmm11,xmm11,xmm15
276	vpaddd	xmm1,xmm13,xmm5
277	vpshufb	xmm13,xmm13,xmm0
278	vpshufb	xmm14,xmm14,xmm0
279	vpshufb	xmm1,xmm1,xmm0
280	jmp	NEAR $L$resume_ctr32
281
282ALIGN	32
283$L$enc_tail:
284	vaesenc	xmm9,xmm9,xmm15
285	vmovdqu	XMMWORD[(16+8)+rsp],xmm7
286	vpalignr	xmm8,xmm4,xmm4,8
287	vaesenc	xmm10,xmm10,xmm15
288	vpclmulqdq	xmm4,xmm4,xmm3,0x10
289	vpxor	xmm2,xmm1,XMMWORD[rdi]
290	vaesenc	xmm11,xmm11,xmm15
291	vpxor	xmm0,xmm1,XMMWORD[16+rdi]
292	vaesenc	xmm12,xmm12,xmm15
293	vpxor	xmm5,xmm1,XMMWORD[32+rdi]
294	vaesenc	xmm13,xmm13,xmm15
295	vpxor	xmm6,xmm1,XMMWORD[48+rdi]
296	vaesenc	xmm14,xmm14,xmm15
297	vpxor	xmm7,xmm1,XMMWORD[64+rdi]
298	vpxor	xmm3,xmm1,XMMWORD[80+rdi]
299	vmovdqu	xmm1,XMMWORD[r8]
300
301	vaesenclast	xmm9,xmm9,xmm2
302	vmovdqu	xmm2,XMMWORD[32+r11]
303	vaesenclast	xmm10,xmm10,xmm0
304	vpaddb	xmm0,xmm1,xmm2
305	mov	QWORD[((112+8))+rsp],r13
306	lea	rdi,[96+rdi]
307	vaesenclast	xmm11,xmm11,xmm5
308	vpaddb	xmm5,xmm0,xmm2
309	mov	QWORD[((120+8))+rsp],r12
310	lea	rsi,[96+rsi]
311	vmovdqu	xmm15,XMMWORD[((0-128))+rcx]
312	vaesenclast	xmm12,xmm12,xmm6
313	vpaddb	xmm6,xmm5,xmm2
314	vaesenclast	xmm13,xmm13,xmm7
315	vpaddb	xmm7,xmm6,xmm2
316	vaesenclast	xmm14,xmm14,xmm3
317	vpaddb	xmm3,xmm7,xmm2
318
319	add	r10,0x60
320	sub	rdx,0x6
321	jc	NEAR $L$6x_done
322
323	vmovups	XMMWORD[(-96)+rsi],xmm9
324	vpxor	xmm9,xmm1,xmm15
325	vmovups	XMMWORD[(-80)+rsi],xmm10
326	vmovdqa	xmm10,xmm0
327	vmovups	XMMWORD[(-64)+rsi],xmm11
328	vmovdqa	xmm11,xmm5
329	vmovups	XMMWORD[(-48)+rsi],xmm12
330	vmovdqa	xmm12,xmm6
331	vmovups	XMMWORD[(-32)+rsi],xmm13
332	vmovdqa	xmm13,xmm7
333	vmovups	XMMWORD[(-16)+rsi],xmm14
334	vmovdqa	xmm14,xmm3
335	vmovdqu	xmm7,XMMWORD[((32+8))+rsp]
336	jmp	NEAR $L$oop6x
337
338$L$6x_done:
339	vpxor	xmm8,xmm8,XMMWORD[((16+8))+rsp]
340	vpxor	xmm8,xmm8,xmm4
341
342	DB	0F3h,0C3h		;repret
343
344
345global	aesni_gcm_decrypt
346
347ALIGN	32
348aesni_gcm_decrypt:
349	mov	QWORD[8+rsp],rdi	;WIN64 prologue
350	mov	QWORD[16+rsp],rsi
351	mov	rax,rsp
352$L$SEH_begin_aesni_gcm_decrypt:
353	mov	rdi,rcx
354	mov	rsi,rdx
355	mov	rdx,r8
356	mov	rcx,r9
357	mov	r8,QWORD[40+rsp]
358	mov	r9,QWORD[48+rsp]
359
360
361
362	xor	r10,r10
363
364
365
366	cmp	rdx,0x60
367	jb	NEAR $L$gcm_dec_abort
368
369	lea	rax,[rsp]
370
371	push	rbx
372
373	push	rbp
374
375	push	r12
376
377	push	r13
378
379	push	r14
380
381	push	r15
382
383	lea	rsp,[((-168))+rsp]
384	movaps	XMMWORD[(-216)+rax],xmm6
385	movaps	XMMWORD[(-200)+rax],xmm7
386	movaps	XMMWORD[(-184)+rax],xmm8
387	movaps	XMMWORD[(-168)+rax],xmm9
388	movaps	XMMWORD[(-152)+rax],xmm10
389	movaps	XMMWORD[(-136)+rax],xmm11
390	movaps	XMMWORD[(-120)+rax],xmm12
391	movaps	XMMWORD[(-104)+rax],xmm13
392	movaps	XMMWORD[(-88)+rax],xmm14
393	movaps	XMMWORD[(-72)+rax],xmm15
394$L$gcm_dec_body:
395	vzeroupper
396
397	vmovdqu	xmm1,XMMWORD[r8]
398	add	rsp,-128
399	mov	ebx,DWORD[12+r8]
400	lea	r11,[$L$bswap_mask]
401	lea	r14,[((-128))+rcx]
402	mov	r15,0xf80
403	vmovdqu	xmm8,XMMWORD[r9]
404	and	rsp,-128
405	vmovdqu	xmm0,XMMWORD[r11]
406	lea	rcx,[128+rcx]
407	lea	r9,[((32+32))+r9]
408	mov	ebp,DWORD[((240-128))+rcx]
409	vpshufb	xmm8,xmm8,xmm0
410
411	and	r14,r15
412	and	r15,rsp
413	sub	r15,r14
414	jc	NEAR $L$dec_no_key_aliasing
415	cmp	r15,768
416	jnc	NEAR $L$dec_no_key_aliasing
417	sub	rsp,r15
418$L$dec_no_key_aliasing:
419
420	vmovdqu	xmm7,XMMWORD[80+rdi]
421	lea	r14,[rdi]
422	vmovdqu	xmm4,XMMWORD[64+rdi]
423
424
425
426
427
428
429
430	lea	r15,[((-192))+rdx*1+rdi]
431
432	vmovdqu	xmm5,XMMWORD[48+rdi]
433	shr	rdx,4
434	xor	r10,r10
435	vmovdqu	xmm6,XMMWORD[32+rdi]
436	vpshufb	xmm7,xmm7,xmm0
437	vmovdqu	xmm2,XMMWORD[16+rdi]
438	vpshufb	xmm4,xmm4,xmm0
439	vmovdqu	xmm3,XMMWORD[rdi]
440	vpshufb	xmm5,xmm5,xmm0
441	vmovdqu	XMMWORD[48+rsp],xmm4
442	vpshufb	xmm6,xmm6,xmm0
443	vmovdqu	XMMWORD[64+rsp],xmm5
444	vpshufb	xmm2,xmm2,xmm0
445	vmovdqu	XMMWORD[80+rsp],xmm6
446	vpshufb	xmm3,xmm3,xmm0
447	vmovdqu	XMMWORD[96+rsp],xmm2
448	vmovdqu	XMMWORD[112+rsp],xmm3
449
450	call	_aesni_ctr32_ghash_6x
451
452	vmovups	XMMWORD[(-96)+rsi],xmm9
453	vmovups	XMMWORD[(-80)+rsi],xmm10
454	vmovups	XMMWORD[(-64)+rsi],xmm11
455	vmovups	XMMWORD[(-48)+rsi],xmm12
456	vmovups	XMMWORD[(-32)+rsi],xmm13
457	vmovups	XMMWORD[(-16)+rsi],xmm14
458
459	vpshufb	xmm8,xmm8,XMMWORD[r11]
460	vmovdqu	XMMWORD[(-64)+r9],xmm8
461
462	vzeroupper
463	movaps	xmm6,XMMWORD[((-216))+rax]
464	movaps	xmm7,XMMWORD[((-200))+rax]
465	movaps	xmm8,XMMWORD[((-184))+rax]
466	movaps	xmm9,XMMWORD[((-168))+rax]
467	movaps	xmm10,XMMWORD[((-152))+rax]
468	movaps	xmm11,XMMWORD[((-136))+rax]
469	movaps	xmm12,XMMWORD[((-120))+rax]
470	movaps	xmm13,XMMWORD[((-104))+rax]
471	movaps	xmm14,XMMWORD[((-88))+rax]
472	movaps	xmm15,XMMWORD[((-72))+rax]
473	mov	r15,QWORD[((-48))+rax]
474
475	mov	r14,QWORD[((-40))+rax]
476
477	mov	r13,QWORD[((-32))+rax]
478
479	mov	r12,QWORD[((-24))+rax]
480
481	mov	rbp,QWORD[((-16))+rax]
482
483	mov	rbx,QWORD[((-8))+rax]
484
485	lea	rsp,[rax]
486
487$L$gcm_dec_abort:
488	mov	rax,r10
489	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
490	mov	rsi,QWORD[16+rsp]
491	DB	0F3h,0C3h		;repret
492
493$L$SEH_end_aesni_gcm_decrypt:
494
495ALIGN	32
496_aesni_ctr32_6x:
497
498	vmovdqu	xmm4,XMMWORD[((0-128))+rcx]
499	vmovdqu	xmm2,XMMWORD[32+r11]
500	lea	r13,[((-1))+rbp]
501	vmovups	xmm15,XMMWORD[((16-128))+rcx]
502	lea	r12,[((32-128))+rcx]
503	vpxor	xmm9,xmm1,xmm4
504	add	ebx,100663296
505	jc	NEAR $L$handle_ctr32_2
506	vpaddb	xmm10,xmm1,xmm2
507	vpaddb	xmm11,xmm10,xmm2
508	vpxor	xmm10,xmm10,xmm4
509	vpaddb	xmm12,xmm11,xmm2
510	vpxor	xmm11,xmm11,xmm4
511	vpaddb	xmm13,xmm12,xmm2
512	vpxor	xmm12,xmm12,xmm4
513	vpaddb	xmm14,xmm13,xmm2
514	vpxor	xmm13,xmm13,xmm4
515	vpaddb	xmm1,xmm14,xmm2
516	vpxor	xmm14,xmm14,xmm4
517	jmp	NEAR $L$oop_ctr32
518
519ALIGN	16
520$L$oop_ctr32:
521	vaesenc	xmm9,xmm9,xmm15
522	vaesenc	xmm10,xmm10,xmm15
523	vaesenc	xmm11,xmm11,xmm15
524	vaesenc	xmm12,xmm12,xmm15
525	vaesenc	xmm13,xmm13,xmm15
526	vaesenc	xmm14,xmm14,xmm15
527	vmovups	xmm15,XMMWORD[r12]
528	lea	r12,[16+r12]
529	dec	r13d
530	jnz	NEAR $L$oop_ctr32
531
532	vmovdqu	xmm3,XMMWORD[r12]
533	vaesenc	xmm9,xmm9,xmm15
534	vpxor	xmm4,xmm3,XMMWORD[rdi]
535	vaesenc	xmm10,xmm10,xmm15
536	vpxor	xmm5,xmm3,XMMWORD[16+rdi]
537	vaesenc	xmm11,xmm11,xmm15
538	vpxor	xmm6,xmm3,XMMWORD[32+rdi]
539	vaesenc	xmm12,xmm12,xmm15
540	vpxor	xmm8,xmm3,XMMWORD[48+rdi]
541	vaesenc	xmm13,xmm13,xmm15
542	vpxor	xmm2,xmm3,XMMWORD[64+rdi]
543	vaesenc	xmm14,xmm14,xmm15
544	vpxor	xmm3,xmm3,XMMWORD[80+rdi]
545	lea	rdi,[96+rdi]
546
547	vaesenclast	xmm9,xmm9,xmm4
548	vaesenclast	xmm10,xmm10,xmm5
549	vaesenclast	xmm11,xmm11,xmm6
550	vaesenclast	xmm12,xmm12,xmm8
551	vaesenclast	xmm13,xmm13,xmm2
552	vaesenclast	xmm14,xmm14,xmm3
553	vmovups	XMMWORD[rsi],xmm9
554	vmovups	XMMWORD[16+rsi],xmm10
555	vmovups	XMMWORD[32+rsi],xmm11
556	vmovups	XMMWORD[48+rsi],xmm12
557	vmovups	XMMWORD[64+rsi],xmm13
558	vmovups	XMMWORD[80+rsi],xmm14
559	lea	rsi,[96+rsi]
560
561	DB	0F3h,0C3h		;repret
562ALIGN	32
563$L$handle_ctr32_2:
564	vpshufb	xmm6,xmm1,xmm0
565	vmovdqu	xmm5,XMMWORD[48+r11]
566	vpaddd	xmm10,xmm6,XMMWORD[64+r11]
567	vpaddd	xmm11,xmm6,xmm5
568	vpaddd	xmm12,xmm10,xmm5
569	vpshufb	xmm10,xmm10,xmm0
570	vpaddd	xmm13,xmm11,xmm5
571	vpshufb	xmm11,xmm11,xmm0
572	vpxor	xmm10,xmm10,xmm4
573	vpaddd	xmm14,xmm12,xmm5
574	vpshufb	xmm12,xmm12,xmm0
575	vpxor	xmm11,xmm11,xmm4
576	vpaddd	xmm1,xmm13,xmm5
577	vpshufb	xmm13,xmm13,xmm0
578	vpxor	xmm12,xmm12,xmm4
579	vpshufb	xmm14,xmm14,xmm0
580	vpxor	xmm13,xmm13,xmm4
581	vpshufb	xmm1,xmm1,xmm0
582	vpxor	xmm14,xmm14,xmm4
583	jmp	NEAR $L$oop_ctr32
584
585
586
587global	aesni_gcm_encrypt
588
589ALIGN	32
590aesni_gcm_encrypt:
591	mov	QWORD[8+rsp],rdi	;WIN64 prologue
592	mov	QWORD[16+rsp],rsi
593	mov	rax,rsp
594$L$SEH_begin_aesni_gcm_encrypt:
595	mov	rdi,rcx
596	mov	rsi,rdx
597	mov	rdx,r8
598	mov	rcx,r9
599	mov	r8,QWORD[40+rsp]
600	mov	r9,QWORD[48+rsp]
601
602
603
604%ifndef NDEBUG
605%ifndef BORINGSSL_FIPS
606EXTERN	BORINGSSL_function_hit
607	mov	BYTE[((BORINGSSL_function_hit+2))],1
608%endif
609%endif
610	xor	r10,r10
611
612
613
614
615	cmp	rdx,0x60*3
616	jb	NEAR $L$gcm_enc_abort
617
618	lea	rax,[rsp]
619
620	push	rbx
621
622	push	rbp
623
624	push	r12
625
626	push	r13
627
628	push	r14
629
630	push	r15
631
632	lea	rsp,[((-168))+rsp]
633	movaps	XMMWORD[(-216)+rax],xmm6
634	movaps	XMMWORD[(-200)+rax],xmm7
635	movaps	XMMWORD[(-184)+rax],xmm8
636	movaps	XMMWORD[(-168)+rax],xmm9
637	movaps	XMMWORD[(-152)+rax],xmm10
638	movaps	XMMWORD[(-136)+rax],xmm11
639	movaps	XMMWORD[(-120)+rax],xmm12
640	movaps	XMMWORD[(-104)+rax],xmm13
641	movaps	XMMWORD[(-88)+rax],xmm14
642	movaps	XMMWORD[(-72)+rax],xmm15
643$L$gcm_enc_body:
644	vzeroupper
645
646	vmovdqu	xmm1,XMMWORD[r8]
647	add	rsp,-128
648	mov	ebx,DWORD[12+r8]
649	lea	r11,[$L$bswap_mask]
650	lea	r14,[((-128))+rcx]
651	mov	r15,0xf80
652	lea	rcx,[128+rcx]
653	vmovdqu	xmm0,XMMWORD[r11]
654	and	rsp,-128
655	mov	ebp,DWORD[((240-128))+rcx]
656
657	and	r14,r15
658	and	r15,rsp
659	sub	r15,r14
660	jc	NEAR $L$enc_no_key_aliasing
661	cmp	r15,768
662	jnc	NEAR $L$enc_no_key_aliasing
663	sub	rsp,r15
664$L$enc_no_key_aliasing:
665
666	lea	r14,[rsi]
667
668
669
670
671
672
673
674
675	lea	r15,[((-192))+rdx*1+rsi]
676
677	shr	rdx,4
678
679	call	_aesni_ctr32_6x
680	vpshufb	xmm8,xmm9,xmm0
681	vpshufb	xmm2,xmm10,xmm0
682	vmovdqu	XMMWORD[112+rsp],xmm8
683	vpshufb	xmm4,xmm11,xmm0
684	vmovdqu	XMMWORD[96+rsp],xmm2
685	vpshufb	xmm5,xmm12,xmm0
686	vmovdqu	XMMWORD[80+rsp],xmm4
687	vpshufb	xmm6,xmm13,xmm0
688	vmovdqu	XMMWORD[64+rsp],xmm5
689	vpshufb	xmm7,xmm14,xmm0
690	vmovdqu	XMMWORD[48+rsp],xmm6
691
692	call	_aesni_ctr32_6x
693
694	vmovdqu	xmm8,XMMWORD[r9]
695	lea	r9,[((32+32))+r9]
696	sub	rdx,12
697	mov	r10,0x60*2
698	vpshufb	xmm8,xmm8,xmm0
699
700	call	_aesni_ctr32_ghash_6x
701	vmovdqu	xmm7,XMMWORD[32+rsp]
702	vmovdqu	xmm0,XMMWORD[r11]
703	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
704	vpunpckhqdq	xmm1,xmm7,xmm7
705	vmovdqu	xmm15,XMMWORD[((32-32))+r9]
706	vmovups	XMMWORD[(-96)+rsi],xmm9
707	vpshufb	xmm9,xmm9,xmm0
708	vpxor	xmm1,xmm1,xmm7
709	vmovups	XMMWORD[(-80)+rsi],xmm10
710	vpshufb	xmm10,xmm10,xmm0
711	vmovups	XMMWORD[(-64)+rsi],xmm11
712	vpshufb	xmm11,xmm11,xmm0
713	vmovups	XMMWORD[(-48)+rsi],xmm12
714	vpshufb	xmm12,xmm12,xmm0
715	vmovups	XMMWORD[(-32)+rsi],xmm13
716	vpshufb	xmm13,xmm13,xmm0
717	vmovups	XMMWORD[(-16)+rsi],xmm14
718	vpshufb	xmm14,xmm14,xmm0
719	vmovdqu	XMMWORD[16+rsp],xmm9
720	vmovdqu	xmm6,XMMWORD[48+rsp]
721	vmovdqu	xmm0,XMMWORD[((16-32))+r9]
722	vpunpckhqdq	xmm2,xmm6,xmm6
723	vpclmulqdq	xmm5,xmm7,xmm3,0x00
724	vpxor	xmm2,xmm2,xmm6
725	vpclmulqdq	xmm7,xmm7,xmm3,0x11
726	vpclmulqdq	xmm1,xmm1,xmm15,0x00
727
728	vmovdqu	xmm9,XMMWORD[64+rsp]
729	vpclmulqdq	xmm4,xmm6,xmm0,0x00
730	vmovdqu	xmm3,XMMWORD[((48-32))+r9]
731	vpxor	xmm4,xmm4,xmm5
732	vpunpckhqdq	xmm5,xmm9,xmm9
733	vpclmulqdq	xmm6,xmm6,xmm0,0x11
734	vpxor	xmm5,xmm5,xmm9
735	vpxor	xmm6,xmm6,xmm7
736	vpclmulqdq	xmm2,xmm2,xmm15,0x10
737	vmovdqu	xmm15,XMMWORD[((80-32))+r9]
738	vpxor	xmm2,xmm2,xmm1
739
740	vmovdqu	xmm1,XMMWORD[80+rsp]
741	vpclmulqdq	xmm7,xmm9,xmm3,0x00
742	vmovdqu	xmm0,XMMWORD[((64-32))+r9]
743	vpxor	xmm7,xmm7,xmm4
744	vpunpckhqdq	xmm4,xmm1,xmm1
745	vpclmulqdq	xmm9,xmm9,xmm3,0x11
746	vpxor	xmm4,xmm4,xmm1
747	vpxor	xmm9,xmm9,xmm6
748	vpclmulqdq	xmm5,xmm5,xmm15,0x00
749	vpxor	xmm5,xmm5,xmm2
750
751	vmovdqu	xmm2,XMMWORD[96+rsp]
752	vpclmulqdq	xmm6,xmm1,xmm0,0x00
753	vmovdqu	xmm3,XMMWORD[((96-32))+r9]
754	vpxor	xmm6,xmm6,xmm7
755	vpunpckhqdq	xmm7,xmm2,xmm2
756	vpclmulqdq	xmm1,xmm1,xmm0,0x11
757	vpxor	xmm7,xmm7,xmm2
758	vpxor	xmm1,xmm1,xmm9
759	vpclmulqdq	xmm4,xmm4,xmm15,0x10
760	vmovdqu	xmm15,XMMWORD[((128-32))+r9]
761	vpxor	xmm4,xmm4,xmm5
762
763	vpxor	xmm8,xmm8,XMMWORD[112+rsp]
764	vpclmulqdq	xmm5,xmm2,xmm3,0x00
765	vmovdqu	xmm0,XMMWORD[((112-32))+r9]
766	vpunpckhqdq	xmm9,xmm8,xmm8
767	vpxor	xmm5,xmm5,xmm6
768	vpclmulqdq	xmm2,xmm2,xmm3,0x11
769	vpxor	xmm9,xmm9,xmm8
770	vpxor	xmm2,xmm2,xmm1
771	vpclmulqdq	xmm7,xmm7,xmm15,0x00
772	vpxor	xmm4,xmm7,xmm4
773
774	vpclmulqdq	xmm6,xmm8,xmm0,0x00
775	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
776	vpunpckhqdq	xmm1,xmm14,xmm14
777	vpclmulqdq	xmm8,xmm8,xmm0,0x11
778	vpxor	xmm1,xmm1,xmm14
779	vpxor	xmm5,xmm6,xmm5
780	vpclmulqdq	xmm9,xmm9,xmm15,0x10
781	vmovdqu	xmm15,XMMWORD[((32-32))+r9]
782	vpxor	xmm7,xmm8,xmm2
783	vpxor	xmm6,xmm9,xmm4
784
785	vmovdqu	xmm0,XMMWORD[((16-32))+r9]
786	vpxor	xmm9,xmm7,xmm5
787	vpclmulqdq	xmm4,xmm14,xmm3,0x00
788	vpxor	xmm6,xmm6,xmm9
789	vpunpckhqdq	xmm2,xmm13,xmm13
790	vpclmulqdq	xmm14,xmm14,xmm3,0x11
791	vpxor	xmm2,xmm2,xmm13
792	vpslldq	xmm9,xmm6,8
793	vpclmulqdq	xmm1,xmm1,xmm15,0x00
794	vpxor	xmm8,xmm5,xmm9
795	vpsrldq	xmm6,xmm6,8
796	vpxor	xmm7,xmm7,xmm6
797
798	vpclmulqdq	xmm5,xmm13,xmm0,0x00
799	vmovdqu	xmm3,XMMWORD[((48-32))+r9]
800	vpxor	xmm5,xmm5,xmm4
801	vpunpckhqdq	xmm9,xmm12,xmm12
802	vpclmulqdq	xmm13,xmm13,xmm0,0x11
803	vpxor	xmm9,xmm9,xmm12
804	vpxor	xmm13,xmm13,xmm14
805	vpalignr	xmm14,xmm8,xmm8,8
806	vpclmulqdq	xmm2,xmm2,xmm15,0x10
807	vmovdqu	xmm15,XMMWORD[((80-32))+r9]
808	vpxor	xmm2,xmm2,xmm1
809
810	vpclmulqdq	xmm4,xmm12,xmm3,0x00
811	vmovdqu	xmm0,XMMWORD[((64-32))+r9]
812	vpxor	xmm4,xmm4,xmm5
813	vpunpckhqdq	xmm1,xmm11,xmm11
814	vpclmulqdq	xmm12,xmm12,xmm3,0x11
815	vpxor	xmm1,xmm1,xmm11
816	vpxor	xmm12,xmm12,xmm13
817	vxorps	xmm7,xmm7,XMMWORD[16+rsp]
818	vpclmulqdq	xmm9,xmm9,xmm15,0x00
819	vpxor	xmm9,xmm9,xmm2
820
821	vpclmulqdq	xmm8,xmm8,XMMWORD[16+r11],0x10
822	vxorps	xmm8,xmm8,xmm14
823
824	vpclmulqdq	xmm5,xmm11,xmm0,0x00
825	vmovdqu	xmm3,XMMWORD[((96-32))+r9]
826	vpxor	xmm5,xmm5,xmm4
827	vpunpckhqdq	xmm2,xmm10,xmm10
828	vpclmulqdq	xmm11,xmm11,xmm0,0x11
829	vpxor	xmm2,xmm2,xmm10
830	vpalignr	xmm14,xmm8,xmm8,8
831	vpxor	xmm11,xmm11,xmm12
832	vpclmulqdq	xmm1,xmm1,xmm15,0x10
833	vmovdqu	xmm15,XMMWORD[((128-32))+r9]
834	vpxor	xmm1,xmm1,xmm9
835
836	vxorps	xmm14,xmm14,xmm7
837	vpclmulqdq	xmm8,xmm8,XMMWORD[16+r11],0x10
838	vxorps	xmm8,xmm8,xmm14
839
840	vpclmulqdq	xmm4,xmm10,xmm3,0x00
841	vmovdqu	xmm0,XMMWORD[((112-32))+r9]
842	vpxor	xmm4,xmm4,xmm5
843	vpunpckhqdq	xmm9,xmm8,xmm8
844	vpclmulqdq	xmm10,xmm10,xmm3,0x11
845	vpxor	xmm9,xmm9,xmm8
846	vpxor	xmm10,xmm10,xmm11
847	vpclmulqdq	xmm2,xmm2,xmm15,0x00
848	vpxor	xmm2,xmm2,xmm1
849
850	vpclmulqdq	xmm5,xmm8,xmm0,0x00
851	vpclmulqdq	xmm7,xmm8,xmm0,0x11
852	vpxor	xmm5,xmm5,xmm4
853	vpclmulqdq	xmm6,xmm9,xmm15,0x10
854	vpxor	xmm7,xmm7,xmm10
855	vpxor	xmm6,xmm6,xmm2
856
857	vpxor	xmm4,xmm7,xmm5
858	vpxor	xmm6,xmm6,xmm4
859	vpslldq	xmm1,xmm6,8
860	vmovdqu	xmm3,XMMWORD[16+r11]
861	vpsrldq	xmm6,xmm6,8
862	vpxor	xmm8,xmm5,xmm1
863	vpxor	xmm7,xmm7,xmm6
864
865	vpalignr	xmm2,xmm8,xmm8,8
866	vpclmulqdq	xmm8,xmm8,xmm3,0x10
867	vpxor	xmm8,xmm8,xmm2
868
869	vpalignr	xmm2,xmm8,xmm8,8
870	vpclmulqdq	xmm8,xmm8,xmm3,0x10
871	vpxor	xmm2,xmm2,xmm7
872	vpxor	xmm8,xmm8,xmm2
873	vpshufb	xmm8,xmm8,XMMWORD[r11]
874	vmovdqu	XMMWORD[(-64)+r9],xmm8
875
876	vzeroupper
877	movaps	xmm6,XMMWORD[((-216))+rax]
878	movaps	xmm7,XMMWORD[((-200))+rax]
879	movaps	xmm8,XMMWORD[((-184))+rax]
880	movaps	xmm9,XMMWORD[((-168))+rax]
881	movaps	xmm10,XMMWORD[((-152))+rax]
882	movaps	xmm11,XMMWORD[((-136))+rax]
883	movaps	xmm12,XMMWORD[((-120))+rax]
884	movaps	xmm13,XMMWORD[((-104))+rax]
885	movaps	xmm14,XMMWORD[((-88))+rax]
886	movaps	xmm15,XMMWORD[((-72))+rax]
887	mov	r15,QWORD[((-48))+rax]
888
889	mov	r14,QWORD[((-40))+rax]
890
891	mov	r13,QWORD[((-32))+rax]
892
893	mov	r12,QWORD[((-24))+rax]
894
895	mov	rbp,QWORD[((-16))+rax]
896
897	mov	rbx,QWORD[((-8))+rax]
898
899	lea	rsp,[rax]
900
901$L$gcm_enc_abort:
902	mov	rax,r10
903	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
904	mov	rsi,QWORD[16+rsp]
905	DB	0F3h,0C3h		;repret
906
907$L$SEH_end_aesni_gcm_encrypt:
908ALIGN	64
909$L$bswap_mask:
910DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
911$L$poly:
912DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
913$L$one_msb:
914DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
915$L$two_lsb:
916DB	2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
917$L$one_lsb:
918DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
919DB	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108
920DB	101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82
921DB	89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
922DB	114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
923ALIGN	64
924EXTERN	__imp_RtlVirtualUnwind
925
926ALIGN	16
927gcm_se_handler:
928	push	rsi
929	push	rdi
930	push	rbx
931	push	rbp
932	push	r12
933	push	r13
934	push	r14
935	push	r15
936	pushfq
937	sub	rsp,64
938
939	mov	rax,QWORD[120+r8]
940	mov	rbx,QWORD[248+r8]
941
942	mov	rsi,QWORD[8+r9]
943	mov	r11,QWORD[56+r9]
944
945	mov	r10d,DWORD[r11]
946	lea	r10,[r10*1+rsi]
947	cmp	rbx,r10
948	jb	NEAR $L$common_seh_tail
949
950	mov	rax,QWORD[152+r8]
951
952	mov	r10d,DWORD[4+r11]
953	lea	r10,[r10*1+rsi]
954	cmp	rbx,r10
955	jae	NEAR $L$common_seh_tail
956
957	mov	rax,QWORD[120+r8]
958
959	mov	r15,QWORD[((-48))+rax]
960	mov	r14,QWORD[((-40))+rax]
961	mov	r13,QWORD[((-32))+rax]
962	mov	r12,QWORD[((-24))+rax]
963	mov	rbp,QWORD[((-16))+rax]
964	mov	rbx,QWORD[((-8))+rax]
965	mov	QWORD[240+r8],r15
966	mov	QWORD[232+r8],r14
967	mov	QWORD[224+r8],r13
968	mov	QWORD[216+r8],r12
969	mov	QWORD[160+r8],rbp
970	mov	QWORD[144+r8],rbx
971
972	lea	rsi,[((-216))+rax]
973	lea	rdi,[512+r8]
974	mov	ecx,20
975	DD	0xa548f3fc
976
977$L$common_seh_tail:
978	mov	rdi,QWORD[8+rax]
979	mov	rsi,QWORD[16+rax]
980	mov	QWORD[152+r8],rax
981	mov	QWORD[168+r8],rsi
982	mov	QWORD[176+r8],rdi
983
984	mov	rdi,QWORD[40+r9]
985	mov	rsi,r8
986	mov	ecx,154
987	DD	0xa548f3fc
988
989	mov	rsi,r9
990	xor	rcx,rcx
991	mov	rdx,QWORD[8+rsi]
992	mov	r8,QWORD[rsi]
993	mov	r9,QWORD[16+rsi]
994	mov	r10,QWORD[40+rsi]
995	lea	r11,[56+rsi]
996	lea	r12,[24+rsi]
997	mov	QWORD[32+rsp],r10
998	mov	QWORD[40+rsp],r11
999	mov	QWORD[48+rsp],r12
1000	mov	QWORD[56+rsp],rcx
1001	call	QWORD[__imp_RtlVirtualUnwind]
1002
1003	mov	eax,1
1004	add	rsp,64
1005	popfq
1006	pop	r15
1007	pop	r14
1008	pop	r13
1009	pop	r12
1010	pop	rbp
1011	pop	rbx
1012	pop	rdi
1013	pop	rsi
1014	DB	0F3h,0C3h		;repret
1015
1016
1017section	.pdata rdata align=4
1018ALIGN	4
1019	DD	$L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase
1020	DD	$L$SEH_end_aesni_gcm_decrypt wrt ..imagebase
1021	DD	$L$SEH_gcm_dec_info wrt ..imagebase
1022
1023	DD	$L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase
1024	DD	$L$SEH_end_aesni_gcm_encrypt wrt ..imagebase
1025	DD	$L$SEH_gcm_enc_info wrt ..imagebase
1026section	.xdata rdata align=8
1027ALIGN	8
1028$L$SEH_gcm_dec_info:
1029DB	9,0,0,0
1030	DD	gcm_se_handler wrt ..imagebase
1031	DD	$L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase
1032$L$SEH_gcm_enc_info:
1033DB	9,0,0,0
1034	DD	gcm_se_handler wrt ..imagebase
1035	DD	$L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase
1036