• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8
9%ifdef BORINGSSL_PREFIX
10%include "boringssl_prefix_symbols_nasm.inc"
11%endif
12section	.text code align=64
13
14
15
16ALIGN	32
17_aesni_ctr32_ghash_6x:
18
19	vmovdqu	xmm2,XMMWORD[32+r11]
20	sub	rdx,6
21	vpxor	xmm4,xmm4,xmm4
22	vmovdqu	xmm15,XMMWORD[((0-128))+rcx]
23	vpaddb	xmm10,xmm1,xmm2
24	vpaddb	xmm11,xmm10,xmm2
25	vpaddb	xmm12,xmm11,xmm2
26	vpaddb	xmm13,xmm12,xmm2
27	vpaddb	xmm14,xmm13,xmm2
28	vpxor	xmm9,xmm1,xmm15
29	vmovdqu	XMMWORD[(16+8)+rsp],xmm4
30	jmp	NEAR $L$oop6x
31
32ALIGN	32
33$L$oop6x:
34	add	ebx,100663296
35	jc	NEAR $L$handle_ctr32
36	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
37	vpaddb	xmm1,xmm14,xmm2
38	vpxor	xmm10,xmm10,xmm15
39	vpxor	xmm11,xmm11,xmm15
40
41$L$resume_ctr32:
42	vmovdqu	XMMWORD[r8],xmm1
43	vpclmulqdq	xmm5,xmm7,xmm3,0x10
44	vpxor	xmm12,xmm12,xmm15
45	vmovups	xmm2,XMMWORD[((16-128))+rcx]
46	vpclmulqdq	xmm6,xmm7,xmm3,0x01
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64	xor	r12,r12
65	cmp	r15,r14
66
67	vaesenc	xmm9,xmm9,xmm2
68	vmovdqu	xmm0,XMMWORD[((48+8))+rsp]
69	vpxor	xmm13,xmm13,xmm15
70	vpclmulqdq	xmm1,xmm7,xmm3,0x00
71	vaesenc	xmm10,xmm10,xmm2
72	vpxor	xmm14,xmm14,xmm15
73	setnc	r12b
74	vpclmulqdq	xmm7,xmm7,xmm3,0x11
75	vaesenc	xmm11,xmm11,xmm2
76	vmovdqu	xmm3,XMMWORD[((16-32))+r9]
77	neg	r12
78	vaesenc	xmm12,xmm12,xmm2
79	vpxor	xmm6,xmm6,xmm5
80	vpclmulqdq	xmm5,xmm0,xmm3,0x00
81	vpxor	xmm8,xmm8,xmm4
82	vaesenc	xmm13,xmm13,xmm2
83	vpxor	xmm4,xmm1,xmm5
84	and	r12,0x60
85	vmovups	xmm15,XMMWORD[((32-128))+rcx]
86	vpclmulqdq	xmm1,xmm0,xmm3,0x10
87	vaesenc	xmm14,xmm14,xmm2
88
89	vpclmulqdq	xmm2,xmm0,xmm3,0x01
90	lea	r14,[r12*1+r14]
91	vaesenc	xmm9,xmm9,xmm15
92	vpxor	xmm8,xmm8,XMMWORD[((16+8))+rsp]
93	vpclmulqdq	xmm3,xmm0,xmm3,0x11
94	vmovdqu	xmm0,XMMWORD[((64+8))+rsp]
95	vaesenc	xmm10,xmm10,xmm15
96	movbe	r13,QWORD[88+r14]
97	vaesenc	xmm11,xmm11,xmm15
98	movbe	r12,QWORD[80+r14]
99	vaesenc	xmm12,xmm12,xmm15
100	mov	QWORD[((32+8))+rsp],r13
101	vaesenc	xmm13,xmm13,xmm15
102	mov	QWORD[((40+8))+rsp],r12
103	vmovdqu	xmm5,XMMWORD[((48-32))+r9]
104	vaesenc	xmm14,xmm14,xmm15
105
106	vmovups	xmm15,XMMWORD[((48-128))+rcx]
107	vpxor	xmm6,xmm6,xmm1
108	vpclmulqdq	xmm1,xmm0,xmm5,0x00
109	vaesenc	xmm9,xmm9,xmm15
110	vpxor	xmm6,xmm6,xmm2
111	vpclmulqdq	xmm2,xmm0,xmm5,0x10
112	vaesenc	xmm10,xmm10,xmm15
113	vpxor	xmm7,xmm7,xmm3
114	vpclmulqdq	xmm3,xmm0,xmm5,0x01
115	vaesenc	xmm11,xmm11,xmm15
116	vpclmulqdq	xmm5,xmm0,xmm5,0x11
117	vmovdqu	xmm0,XMMWORD[((80+8))+rsp]
118	vaesenc	xmm12,xmm12,xmm15
119	vaesenc	xmm13,xmm13,xmm15
120	vpxor	xmm4,xmm4,xmm1
121	vmovdqu	xmm1,XMMWORD[((64-32))+r9]
122	vaesenc	xmm14,xmm14,xmm15
123
124	vmovups	xmm15,XMMWORD[((64-128))+rcx]
125	vpxor	xmm6,xmm6,xmm2
126	vpclmulqdq	xmm2,xmm0,xmm1,0x00
127	vaesenc	xmm9,xmm9,xmm15
128	vpxor	xmm6,xmm6,xmm3
129	vpclmulqdq	xmm3,xmm0,xmm1,0x10
130	vaesenc	xmm10,xmm10,xmm15
131	movbe	r13,QWORD[72+r14]
132	vpxor	xmm7,xmm7,xmm5
133	vpclmulqdq	xmm5,xmm0,xmm1,0x01
134	vaesenc	xmm11,xmm11,xmm15
135	movbe	r12,QWORD[64+r14]
136	vpclmulqdq	xmm1,xmm0,xmm1,0x11
137	vmovdqu	xmm0,XMMWORD[((96+8))+rsp]
138	vaesenc	xmm12,xmm12,xmm15
139	mov	QWORD[((48+8))+rsp],r13
140	vaesenc	xmm13,xmm13,xmm15
141	mov	QWORD[((56+8))+rsp],r12
142	vpxor	xmm4,xmm4,xmm2
143	vmovdqu	xmm2,XMMWORD[((96-32))+r9]
144	vaesenc	xmm14,xmm14,xmm15
145
146	vmovups	xmm15,XMMWORD[((80-128))+rcx]
147	vpxor	xmm6,xmm6,xmm3
148	vpclmulqdq	xmm3,xmm0,xmm2,0x00
149	vaesenc	xmm9,xmm9,xmm15
150	vpxor	xmm6,xmm6,xmm5
151	vpclmulqdq	xmm5,xmm0,xmm2,0x10
152	vaesenc	xmm10,xmm10,xmm15
153	movbe	r13,QWORD[56+r14]
154	vpxor	xmm7,xmm7,xmm1
155	vpclmulqdq	xmm1,xmm0,xmm2,0x01
156	vpxor	xmm8,xmm8,XMMWORD[((112+8))+rsp]
157	vaesenc	xmm11,xmm11,xmm15
158	movbe	r12,QWORD[48+r14]
159	vpclmulqdq	xmm2,xmm0,xmm2,0x11
160	vaesenc	xmm12,xmm12,xmm15
161	mov	QWORD[((64+8))+rsp],r13
162	vaesenc	xmm13,xmm13,xmm15
163	mov	QWORD[((72+8))+rsp],r12
164	vpxor	xmm4,xmm4,xmm3
165	vmovdqu	xmm3,XMMWORD[((112-32))+r9]
166	vaesenc	xmm14,xmm14,xmm15
167
168	vmovups	xmm15,XMMWORD[((96-128))+rcx]
169	vpxor	xmm6,xmm6,xmm5
170	vpclmulqdq	xmm5,xmm8,xmm3,0x10
171	vaesenc	xmm9,xmm9,xmm15
172	vpxor	xmm6,xmm6,xmm1
173	vpclmulqdq	xmm1,xmm8,xmm3,0x01
174	vaesenc	xmm10,xmm10,xmm15
175	movbe	r13,QWORD[40+r14]
176	vpxor	xmm7,xmm7,xmm2
177	vpclmulqdq	xmm2,xmm8,xmm3,0x00
178	vaesenc	xmm11,xmm11,xmm15
179	movbe	r12,QWORD[32+r14]
180	vpclmulqdq	xmm8,xmm8,xmm3,0x11
181	vaesenc	xmm12,xmm12,xmm15
182	mov	QWORD[((80+8))+rsp],r13
183	vaesenc	xmm13,xmm13,xmm15
184	mov	QWORD[((88+8))+rsp],r12
185	vpxor	xmm6,xmm6,xmm5
186	vaesenc	xmm14,xmm14,xmm15
187	vpxor	xmm6,xmm6,xmm1
188
189	vmovups	xmm15,XMMWORD[((112-128))+rcx]
190	vpslldq	xmm5,xmm6,8
191	vpxor	xmm4,xmm4,xmm2
192	vmovdqu	xmm3,XMMWORD[16+r11]
193
194	vaesenc	xmm9,xmm9,xmm15
195	vpxor	xmm7,xmm7,xmm8
196	vaesenc	xmm10,xmm10,xmm15
197	vpxor	xmm4,xmm4,xmm5
198	movbe	r13,QWORD[24+r14]
199	vaesenc	xmm11,xmm11,xmm15
200	movbe	r12,QWORD[16+r14]
201	vpalignr	xmm0,xmm4,xmm4,8
202	vpclmulqdq	xmm4,xmm4,xmm3,0x10
203	mov	QWORD[((96+8))+rsp],r13
204	vaesenc	xmm12,xmm12,xmm15
205	mov	QWORD[((104+8))+rsp],r12
206	vaesenc	xmm13,xmm13,xmm15
207	vmovups	xmm1,XMMWORD[((128-128))+rcx]
208	vaesenc	xmm14,xmm14,xmm15
209
210	vaesenc	xmm9,xmm9,xmm1
211	vmovups	xmm15,XMMWORD[((144-128))+rcx]
212	vaesenc	xmm10,xmm10,xmm1
213	vpsrldq	xmm6,xmm6,8
214	vaesenc	xmm11,xmm11,xmm1
215	vpxor	xmm7,xmm7,xmm6
216	vaesenc	xmm12,xmm12,xmm1
217	vpxor	xmm4,xmm4,xmm0
218	movbe	r13,QWORD[8+r14]
219	vaesenc	xmm13,xmm13,xmm1
220	movbe	r12,QWORD[r14]
221	vaesenc	xmm14,xmm14,xmm1
222	vmovups	xmm1,XMMWORD[((160-128))+rcx]
223	cmp	ebp,11
224	jb	NEAR $L$enc_tail
225
226	vaesenc	xmm9,xmm9,xmm15
227	vaesenc	xmm10,xmm10,xmm15
228	vaesenc	xmm11,xmm11,xmm15
229	vaesenc	xmm12,xmm12,xmm15
230	vaesenc	xmm13,xmm13,xmm15
231	vaesenc	xmm14,xmm14,xmm15
232
233	vaesenc	xmm9,xmm9,xmm1
234	vaesenc	xmm10,xmm10,xmm1
235	vaesenc	xmm11,xmm11,xmm1
236	vaesenc	xmm12,xmm12,xmm1
237	vaesenc	xmm13,xmm13,xmm1
238	vmovups	xmm15,XMMWORD[((176-128))+rcx]
239	vaesenc	xmm14,xmm14,xmm1
240	vmovups	xmm1,XMMWORD[((192-128))+rcx]
241	je	NEAR $L$enc_tail
242
243	vaesenc	xmm9,xmm9,xmm15
244	vaesenc	xmm10,xmm10,xmm15
245	vaesenc	xmm11,xmm11,xmm15
246	vaesenc	xmm12,xmm12,xmm15
247	vaesenc	xmm13,xmm13,xmm15
248	vaesenc	xmm14,xmm14,xmm15
249
250	vaesenc	xmm9,xmm9,xmm1
251	vaesenc	xmm10,xmm10,xmm1
252	vaesenc	xmm11,xmm11,xmm1
253	vaesenc	xmm12,xmm12,xmm1
254	vaesenc	xmm13,xmm13,xmm1
255	vmovups	xmm15,XMMWORD[((208-128))+rcx]
256	vaesenc	xmm14,xmm14,xmm1
257	vmovups	xmm1,XMMWORD[((224-128))+rcx]
258	jmp	NEAR $L$enc_tail
259
260ALIGN	32
261$L$handle_ctr32:
262	vmovdqu	xmm0,XMMWORD[r11]
263	vpshufb	xmm6,xmm1,xmm0
264	vmovdqu	xmm5,XMMWORD[48+r11]
265	vpaddd	xmm10,xmm6,XMMWORD[64+r11]
266	vpaddd	xmm11,xmm6,xmm5
267	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
268	vpaddd	xmm12,xmm10,xmm5
269	vpshufb	xmm10,xmm10,xmm0
270	vpaddd	xmm13,xmm11,xmm5
271	vpshufb	xmm11,xmm11,xmm0
272	vpxor	xmm10,xmm10,xmm15
273	vpaddd	xmm14,xmm12,xmm5
274	vpshufb	xmm12,xmm12,xmm0
275	vpxor	xmm11,xmm11,xmm15
276	vpaddd	xmm1,xmm13,xmm5
277	vpshufb	xmm13,xmm13,xmm0
278	vpshufb	xmm14,xmm14,xmm0
279	vpshufb	xmm1,xmm1,xmm0
280	jmp	NEAR $L$resume_ctr32
281
282ALIGN	32
283$L$enc_tail:
284	vaesenc	xmm9,xmm9,xmm15
285	vmovdqu	XMMWORD[(16+8)+rsp],xmm7
286	vpalignr	xmm8,xmm4,xmm4,8
287	vaesenc	xmm10,xmm10,xmm15
288	vpclmulqdq	xmm4,xmm4,xmm3,0x10
289	vpxor	xmm2,xmm1,XMMWORD[rdi]
290	vaesenc	xmm11,xmm11,xmm15
291	vpxor	xmm0,xmm1,XMMWORD[16+rdi]
292	vaesenc	xmm12,xmm12,xmm15
293	vpxor	xmm5,xmm1,XMMWORD[32+rdi]
294	vaesenc	xmm13,xmm13,xmm15
295	vpxor	xmm6,xmm1,XMMWORD[48+rdi]
296	vaesenc	xmm14,xmm14,xmm15
297	vpxor	xmm7,xmm1,XMMWORD[64+rdi]
298	vpxor	xmm3,xmm1,XMMWORD[80+rdi]
299	vmovdqu	xmm1,XMMWORD[r8]
300
301	vaesenclast	xmm9,xmm9,xmm2
302	vmovdqu	xmm2,XMMWORD[32+r11]
303	vaesenclast	xmm10,xmm10,xmm0
304	vpaddb	xmm0,xmm1,xmm2
305	mov	QWORD[((112+8))+rsp],r13
306	lea	rdi,[96+rdi]
307	vaesenclast	xmm11,xmm11,xmm5
308	vpaddb	xmm5,xmm0,xmm2
309	mov	QWORD[((120+8))+rsp],r12
310	lea	rsi,[96+rsi]
311	vmovdqu	xmm15,XMMWORD[((0-128))+rcx]
312	vaesenclast	xmm12,xmm12,xmm6
313	vpaddb	xmm6,xmm5,xmm2
314	vaesenclast	xmm13,xmm13,xmm7
315	vpaddb	xmm7,xmm6,xmm2
316	vaesenclast	xmm14,xmm14,xmm3
317	vpaddb	xmm3,xmm7,xmm2
318
319	add	r10,0x60
320	sub	rdx,0x6
321	jc	NEAR $L$6x_done
322
323	vmovups	XMMWORD[(-96)+rsi],xmm9
324	vpxor	xmm9,xmm1,xmm15
325	vmovups	XMMWORD[(-80)+rsi],xmm10
326	vmovdqa	xmm10,xmm0
327	vmovups	XMMWORD[(-64)+rsi],xmm11
328	vmovdqa	xmm11,xmm5
329	vmovups	XMMWORD[(-48)+rsi],xmm12
330	vmovdqa	xmm12,xmm6
331	vmovups	XMMWORD[(-32)+rsi],xmm13
332	vmovdqa	xmm13,xmm7
333	vmovups	XMMWORD[(-16)+rsi],xmm14
334	vmovdqa	xmm14,xmm3
335	vmovdqu	xmm7,XMMWORD[((32+8))+rsp]
336	jmp	NEAR $L$oop6x
337
338$L$6x_done:
339	vpxor	xmm8,xmm8,XMMWORD[((16+8))+rsp]
340	vpxor	xmm8,xmm8,xmm4
341
342	DB	0F3h,0C3h		;repret
343
344
345global	aesni_gcm_decrypt
346
347ALIGN	32
348aesni_gcm_decrypt:
349	mov	QWORD[8+rsp],rdi	;WIN64 prologue
350	mov	QWORD[16+rsp],rsi
351	mov	rax,rsp
352$L$SEH_begin_aesni_gcm_decrypt:
353	mov	rdi,rcx
354	mov	rsi,rdx
355	mov	rdx,r8
356	mov	rcx,r9
357	mov	r8,QWORD[40+rsp]
358	mov	r9,QWORD[48+rsp]
359
360
361
362	xor	r10,r10
363
364
365
366	cmp	rdx,0x60
367	jb	NEAR $L$gcm_dec_abort
368
369	lea	rax,[rsp]
370
371	push	rbx
372
373	push	rbp
374
375	push	r12
376
377	push	r13
378
379	push	r14
380
381	push	r15
382
383	lea	rsp,[((-168))+rsp]
384	movaps	XMMWORD[(-216)+rax],xmm6
385	movaps	XMMWORD[(-200)+rax],xmm7
386	movaps	XMMWORD[(-184)+rax],xmm8
387	movaps	XMMWORD[(-168)+rax],xmm9
388	movaps	XMMWORD[(-152)+rax],xmm10
389	movaps	XMMWORD[(-136)+rax],xmm11
390	movaps	XMMWORD[(-120)+rax],xmm12
391	movaps	XMMWORD[(-104)+rax],xmm13
392	movaps	XMMWORD[(-88)+rax],xmm14
393	movaps	XMMWORD[(-72)+rax],xmm15
394$L$gcm_dec_body:
395	vzeroupper
396
397	vmovdqu	xmm1,XMMWORD[r8]
398	add	rsp,-128
399	mov	ebx,DWORD[12+r8]
400	lea	r11,[$L$bswap_mask]
401	lea	r14,[((-128))+rcx]
402	mov	r15,0xf80
403	vmovdqu	xmm8,XMMWORD[r9]
404	and	rsp,-128
405	vmovdqu	xmm0,XMMWORD[r11]
406	lea	rcx,[128+rcx]
407	lea	r9,[((32+32))+r9]
408	mov	ebp,DWORD[((240-128))+rcx]
409	vpshufb	xmm8,xmm8,xmm0
410
411	and	r14,r15
412	and	r15,rsp
413	sub	r15,r14
414	jc	NEAR $L$dec_no_key_aliasing
415	cmp	r15,768
416	jnc	NEAR $L$dec_no_key_aliasing
417	sub	rsp,r15
418$L$dec_no_key_aliasing:
419
420	vmovdqu	xmm7,XMMWORD[80+rdi]
421	lea	r14,[rdi]
422	vmovdqu	xmm4,XMMWORD[64+rdi]
423
424
425
426
427
428
429
430	lea	r15,[((-192))+rdx*1+rdi]
431
432	vmovdqu	xmm5,XMMWORD[48+rdi]
433	shr	rdx,4
434	xor	r10,r10
435	vmovdqu	xmm6,XMMWORD[32+rdi]
436	vpshufb	xmm7,xmm7,xmm0
437	vmovdqu	xmm2,XMMWORD[16+rdi]
438	vpshufb	xmm4,xmm4,xmm0
439	vmovdqu	xmm3,XMMWORD[rdi]
440	vpshufb	xmm5,xmm5,xmm0
441	vmovdqu	XMMWORD[48+rsp],xmm4
442	vpshufb	xmm6,xmm6,xmm0
443	vmovdqu	XMMWORD[64+rsp],xmm5
444	vpshufb	xmm2,xmm2,xmm0
445	vmovdqu	XMMWORD[80+rsp],xmm6
446	vpshufb	xmm3,xmm3,xmm0
447	vmovdqu	XMMWORD[96+rsp],xmm2
448	vmovdqu	XMMWORD[112+rsp],xmm3
449
450	call	_aesni_ctr32_ghash_6x
451
452	vmovups	XMMWORD[(-96)+rsi],xmm9
453	vmovups	XMMWORD[(-80)+rsi],xmm10
454	vmovups	XMMWORD[(-64)+rsi],xmm11
455	vmovups	XMMWORD[(-48)+rsi],xmm12
456	vmovups	XMMWORD[(-32)+rsi],xmm13
457	vmovups	XMMWORD[(-16)+rsi],xmm14
458
459	vpshufb	xmm8,xmm8,XMMWORD[r11]
460	vmovdqu	XMMWORD[(-64)+r9],xmm8
461
462	vzeroupper
463	movaps	xmm6,XMMWORD[((-216))+rax]
464	movaps	xmm7,XMMWORD[((-200))+rax]
465	movaps	xmm8,XMMWORD[((-184))+rax]
466	movaps	xmm9,XMMWORD[((-168))+rax]
467	movaps	xmm10,XMMWORD[((-152))+rax]
468	movaps	xmm11,XMMWORD[((-136))+rax]
469	movaps	xmm12,XMMWORD[((-120))+rax]
470	movaps	xmm13,XMMWORD[((-104))+rax]
471	movaps	xmm14,XMMWORD[((-88))+rax]
472	movaps	xmm15,XMMWORD[((-72))+rax]
473	mov	r15,QWORD[((-48))+rax]
474
475	mov	r14,QWORD[((-40))+rax]
476
477	mov	r13,QWORD[((-32))+rax]
478
479	mov	r12,QWORD[((-24))+rax]
480
481	mov	rbp,QWORD[((-16))+rax]
482
483	mov	rbx,QWORD[((-8))+rax]
484
485	lea	rsp,[rax]
486
487$L$gcm_dec_abort:
488	mov	rax,r10
489	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
490	mov	rsi,QWORD[16+rsp]
491	DB	0F3h,0C3h		;repret
492
493$L$SEH_end_aesni_gcm_decrypt:
494
495ALIGN	32
496_aesni_ctr32_6x:
497
498	vmovdqu	xmm4,XMMWORD[((0-128))+rcx]
499	vmovdqu	xmm2,XMMWORD[32+r11]
500	lea	r13,[((-1))+rbp]
501	vmovups	xmm15,XMMWORD[((16-128))+rcx]
502	lea	r12,[((32-128))+rcx]
503	vpxor	xmm9,xmm1,xmm4
504	add	ebx,100663296
505	jc	NEAR $L$handle_ctr32_2
506	vpaddb	xmm10,xmm1,xmm2
507	vpaddb	xmm11,xmm10,xmm2
508	vpxor	xmm10,xmm10,xmm4
509	vpaddb	xmm12,xmm11,xmm2
510	vpxor	xmm11,xmm11,xmm4
511	vpaddb	xmm13,xmm12,xmm2
512	vpxor	xmm12,xmm12,xmm4
513	vpaddb	xmm14,xmm13,xmm2
514	vpxor	xmm13,xmm13,xmm4
515	vpaddb	xmm1,xmm14,xmm2
516	vpxor	xmm14,xmm14,xmm4
517	jmp	NEAR $L$oop_ctr32
518
519ALIGN	16
520$L$oop_ctr32:
521	vaesenc	xmm9,xmm9,xmm15
522	vaesenc	xmm10,xmm10,xmm15
523	vaesenc	xmm11,xmm11,xmm15
524	vaesenc	xmm12,xmm12,xmm15
525	vaesenc	xmm13,xmm13,xmm15
526	vaesenc	xmm14,xmm14,xmm15
527	vmovups	xmm15,XMMWORD[r12]
528	lea	r12,[16+r12]
529	dec	r13d
530	jnz	NEAR $L$oop_ctr32
531
532	vmovdqu	xmm3,XMMWORD[r12]
533	vaesenc	xmm9,xmm9,xmm15
534	vpxor	xmm4,xmm3,XMMWORD[rdi]
535	vaesenc	xmm10,xmm10,xmm15
536	vpxor	xmm5,xmm3,XMMWORD[16+rdi]
537	vaesenc	xmm11,xmm11,xmm15
538	vpxor	xmm6,xmm3,XMMWORD[32+rdi]
539	vaesenc	xmm12,xmm12,xmm15
540	vpxor	xmm8,xmm3,XMMWORD[48+rdi]
541	vaesenc	xmm13,xmm13,xmm15
542	vpxor	xmm2,xmm3,XMMWORD[64+rdi]
543	vaesenc	xmm14,xmm14,xmm15
544	vpxor	xmm3,xmm3,XMMWORD[80+rdi]
545	lea	rdi,[96+rdi]
546
547	vaesenclast	xmm9,xmm9,xmm4
548	vaesenclast	xmm10,xmm10,xmm5
549	vaesenclast	xmm11,xmm11,xmm6
550	vaesenclast	xmm12,xmm12,xmm8
551	vaesenclast	xmm13,xmm13,xmm2
552	vaesenclast	xmm14,xmm14,xmm3
553	vmovups	XMMWORD[rsi],xmm9
554	vmovups	XMMWORD[16+rsi],xmm10
555	vmovups	XMMWORD[32+rsi],xmm11
556	vmovups	XMMWORD[48+rsi],xmm12
557	vmovups	XMMWORD[64+rsi],xmm13
558	vmovups	XMMWORD[80+rsi],xmm14
559	lea	rsi,[96+rsi]
560
561	DB	0F3h,0C3h		;repret
562ALIGN	32
563$L$handle_ctr32_2:
564	vpshufb	xmm6,xmm1,xmm0
565	vmovdqu	xmm5,XMMWORD[48+r11]
566	vpaddd	xmm10,xmm6,XMMWORD[64+r11]
567	vpaddd	xmm11,xmm6,xmm5
568	vpaddd	xmm12,xmm10,xmm5
569	vpshufb	xmm10,xmm10,xmm0
570	vpaddd	xmm13,xmm11,xmm5
571	vpshufb	xmm11,xmm11,xmm0
572	vpxor	xmm10,xmm10,xmm4
573	vpaddd	xmm14,xmm12,xmm5
574	vpshufb	xmm12,xmm12,xmm0
575	vpxor	xmm11,xmm11,xmm4
576	vpaddd	xmm1,xmm13,xmm5
577	vpshufb	xmm13,xmm13,xmm0
578	vpxor	xmm12,xmm12,xmm4
579	vpshufb	xmm14,xmm14,xmm0
580	vpxor	xmm13,xmm13,xmm4
581	vpshufb	xmm1,xmm1,xmm0
582	vpxor	xmm14,xmm14,xmm4
583	jmp	NEAR $L$oop_ctr32
584
585
586
587global	aesni_gcm_encrypt
588
589ALIGN	32
590aesni_gcm_encrypt:
591	mov	QWORD[8+rsp],rdi	;WIN64 prologue
592	mov	QWORD[16+rsp],rsi
593	mov	rax,rsp
594$L$SEH_begin_aesni_gcm_encrypt:
595	mov	rdi,rcx
596	mov	rsi,rdx
597	mov	rdx,r8
598	mov	rcx,r9
599	mov	r8,QWORD[40+rsp]
600	mov	r9,QWORD[48+rsp]
601
602
603
604%ifdef BORINGSSL_DISPATCH_TEST
605EXTERN	BORINGSSL_function_hit
606	mov	BYTE[((BORINGSSL_function_hit+2))],1
607%endif
608	xor	r10,r10
609
610
611
612
613	cmp	rdx,0x60*3
614	jb	NEAR $L$gcm_enc_abort
615
616	lea	rax,[rsp]
617
618	push	rbx
619
620	push	rbp
621
622	push	r12
623
624	push	r13
625
626	push	r14
627
628	push	r15
629
630	lea	rsp,[((-168))+rsp]
631	movaps	XMMWORD[(-216)+rax],xmm6
632	movaps	XMMWORD[(-200)+rax],xmm7
633	movaps	XMMWORD[(-184)+rax],xmm8
634	movaps	XMMWORD[(-168)+rax],xmm9
635	movaps	XMMWORD[(-152)+rax],xmm10
636	movaps	XMMWORD[(-136)+rax],xmm11
637	movaps	XMMWORD[(-120)+rax],xmm12
638	movaps	XMMWORD[(-104)+rax],xmm13
639	movaps	XMMWORD[(-88)+rax],xmm14
640	movaps	XMMWORD[(-72)+rax],xmm15
641$L$gcm_enc_body:
642	vzeroupper
643
644	vmovdqu	xmm1,XMMWORD[r8]
645	add	rsp,-128
646	mov	ebx,DWORD[12+r8]
647	lea	r11,[$L$bswap_mask]
648	lea	r14,[((-128))+rcx]
649	mov	r15,0xf80
650	lea	rcx,[128+rcx]
651	vmovdqu	xmm0,XMMWORD[r11]
652	and	rsp,-128
653	mov	ebp,DWORD[((240-128))+rcx]
654
655	and	r14,r15
656	and	r15,rsp
657	sub	r15,r14
658	jc	NEAR $L$enc_no_key_aliasing
659	cmp	r15,768
660	jnc	NEAR $L$enc_no_key_aliasing
661	sub	rsp,r15
662$L$enc_no_key_aliasing:
663
664	lea	r14,[rsi]
665
666
667
668
669
670
671
672
673	lea	r15,[((-192))+rdx*1+rsi]
674
675	shr	rdx,4
676
677	call	_aesni_ctr32_6x
678	vpshufb	xmm8,xmm9,xmm0
679	vpshufb	xmm2,xmm10,xmm0
680	vmovdqu	XMMWORD[112+rsp],xmm8
681	vpshufb	xmm4,xmm11,xmm0
682	vmovdqu	XMMWORD[96+rsp],xmm2
683	vpshufb	xmm5,xmm12,xmm0
684	vmovdqu	XMMWORD[80+rsp],xmm4
685	vpshufb	xmm6,xmm13,xmm0
686	vmovdqu	XMMWORD[64+rsp],xmm5
687	vpshufb	xmm7,xmm14,xmm0
688	vmovdqu	XMMWORD[48+rsp],xmm6
689
690	call	_aesni_ctr32_6x
691
692	vmovdqu	xmm8,XMMWORD[r9]
693	lea	r9,[((32+32))+r9]
694	sub	rdx,12
695	mov	r10,0x60*2
696	vpshufb	xmm8,xmm8,xmm0
697
698	call	_aesni_ctr32_ghash_6x
699	vmovdqu	xmm7,XMMWORD[32+rsp]
700	vmovdqu	xmm0,XMMWORD[r11]
701	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
702	vpunpckhqdq	xmm1,xmm7,xmm7
703	vmovdqu	xmm15,XMMWORD[((32-32))+r9]
704	vmovups	XMMWORD[(-96)+rsi],xmm9
705	vpshufb	xmm9,xmm9,xmm0
706	vpxor	xmm1,xmm1,xmm7
707	vmovups	XMMWORD[(-80)+rsi],xmm10
708	vpshufb	xmm10,xmm10,xmm0
709	vmovups	XMMWORD[(-64)+rsi],xmm11
710	vpshufb	xmm11,xmm11,xmm0
711	vmovups	XMMWORD[(-48)+rsi],xmm12
712	vpshufb	xmm12,xmm12,xmm0
713	vmovups	XMMWORD[(-32)+rsi],xmm13
714	vpshufb	xmm13,xmm13,xmm0
715	vmovups	XMMWORD[(-16)+rsi],xmm14
716	vpshufb	xmm14,xmm14,xmm0
717	vmovdqu	XMMWORD[16+rsp],xmm9
718	vmovdqu	xmm6,XMMWORD[48+rsp]
719	vmovdqu	xmm0,XMMWORD[((16-32))+r9]
720	vpunpckhqdq	xmm2,xmm6,xmm6
721	vpclmulqdq	xmm5,xmm7,xmm3,0x00
722	vpxor	xmm2,xmm2,xmm6
723	vpclmulqdq	xmm7,xmm7,xmm3,0x11
724	vpclmulqdq	xmm1,xmm1,xmm15,0x00
725
726	vmovdqu	xmm9,XMMWORD[64+rsp]
727	vpclmulqdq	xmm4,xmm6,xmm0,0x00
728	vmovdqu	xmm3,XMMWORD[((48-32))+r9]
729	vpxor	xmm4,xmm4,xmm5
730	vpunpckhqdq	xmm5,xmm9,xmm9
731	vpclmulqdq	xmm6,xmm6,xmm0,0x11
732	vpxor	xmm5,xmm5,xmm9
733	vpxor	xmm6,xmm6,xmm7
734	vpclmulqdq	xmm2,xmm2,xmm15,0x10
735	vmovdqu	xmm15,XMMWORD[((80-32))+r9]
736	vpxor	xmm2,xmm2,xmm1
737
738	vmovdqu	xmm1,XMMWORD[80+rsp]
739	vpclmulqdq	xmm7,xmm9,xmm3,0x00
740	vmovdqu	xmm0,XMMWORD[((64-32))+r9]
741	vpxor	xmm7,xmm7,xmm4
742	vpunpckhqdq	xmm4,xmm1,xmm1
743	vpclmulqdq	xmm9,xmm9,xmm3,0x11
744	vpxor	xmm4,xmm4,xmm1
745	vpxor	xmm9,xmm9,xmm6
746	vpclmulqdq	xmm5,xmm5,xmm15,0x00
747	vpxor	xmm5,xmm5,xmm2
748
749	vmovdqu	xmm2,XMMWORD[96+rsp]
750	vpclmulqdq	xmm6,xmm1,xmm0,0x00
751	vmovdqu	xmm3,XMMWORD[((96-32))+r9]
752	vpxor	xmm6,xmm6,xmm7
753	vpunpckhqdq	xmm7,xmm2,xmm2
754	vpclmulqdq	xmm1,xmm1,xmm0,0x11
755	vpxor	xmm7,xmm7,xmm2
756	vpxor	xmm1,xmm1,xmm9
757	vpclmulqdq	xmm4,xmm4,xmm15,0x10
758	vmovdqu	xmm15,XMMWORD[((128-32))+r9]
759	vpxor	xmm4,xmm4,xmm5
760
761	vpxor	xmm8,xmm8,XMMWORD[112+rsp]
762	vpclmulqdq	xmm5,xmm2,xmm3,0x00
763	vmovdqu	xmm0,XMMWORD[((112-32))+r9]
764	vpunpckhqdq	xmm9,xmm8,xmm8
765	vpxor	xmm5,xmm5,xmm6
766	vpclmulqdq	xmm2,xmm2,xmm3,0x11
767	vpxor	xmm9,xmm9,xmm8
768	vpxor	xmm2,xmm2,xmm1
769	vpclmulqdq	xmm7,xmm7,xmm15,0x00
770	vpxor	xmm4,xmm7,xmm4
771
772	vpclmulqdq	xmm6,xmm8,xmm0,0x00
773	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
774	vpunpckhqdq	xmm1,xmm14,xmm14
775	vpclmulqdq	xmm8,xmm8,xmm0,0x11
776	vpxor	xmm1,xmm1,xmm14
777	vpxor	xmm5,xmm6,xmm5
778	vpclmulqdq	xmm9,xmm9,xmm15,0x10
779	vmovdqu	xmm15,XMMWORD[((32-32))+r9]
780	vpxor	xmm7,xmm8,xmm2
781	vpxor	xmm6,xmm9,xmm4
782
783	vmovdqu	xmm0,XMMWORD[((16-32))+r9]
784	vpxor	xmm9,xmm7,xmm5
785	vpclmulqdq	xmm4,xmm14,xmm3,0x00
786	vpxor	xmm6,xmm6,xmm9
787	vpunpckhqdq	xmm2,xmm13,xmm13
788	vpclmulqdq	xmm14,xmm14,xmm3,0x11
789	vpxor	xmm2,xmm2,xmm13
790	vpslldq	xmm9,xmm6,8
791	vpclmulqdq	xmm1,xmm1,xmm15,0x00
792	vpxor	xmm8,xmm5,xmm9
793	vpsrldq	xmm6,xmm6,8
794	vpxor	xmm7,xmm7,xmm6
795
796	vpclmulqdq	xmm5,xmm13,xmm0,0x00
797	vmovdqu	xmm3,XMMWORD[((48-32))+r9]
798	vpxor	xmm5,xmm5,xmm4
799	vpunpckhqdq	xmm9,xmm12,xmm12
800	vpclmulqdq	xmm13,xmm13,xmm0,0x11
801	vpxor	xmm9,xmm9,xmm12
802	vpxor	xmm13,xmm13,xmm14
803	vpalignr	xmm14,xmm8,xmm8,8
804	vpclmulqdq	xmm2,xmm2,xmm15,0x10
805	vmovdqu	xmm15,XMMWORD[((80-32))+r9]
806	vpxor	xmm2,xmm2,xmm1
807
808	vpclmulqdq	xmm4,xmm12,xmm3,0x00
809	vmovdqu	xmm0,XMMWORD[((64-32))+r9]
810	vpxor	xmm4,xmm4,xmm5
811	vpunpckhqdq	xmm1,xmm11,xmm11
812	vpclmulqdq	xmm12,xmm12,xmm3,0x11
813	vpxor	xmm1,xmm1,xmm11
814	vpxor	xmm12,xmm12,xmm13
815	vxorps	xmm7,xmm7,XMMWORD[16+rsp]
816	vpclmulqdq	xmm9,xmm9,xmm15,0x00
817	vpxor	xmm9,xmm9,xmm2
818
819	vpclmulqdq	xmm8,xmm8,XMMWORD[16+r11],0x10
820	vxorps	xmm8,xmm8,xmm14
821
822	vpclmulqdq	xmm5,xmm11,xmm0,0x00
823	vmovdqu	xmm3,XMMWORD[((96-32))+r9]
824	vpxor	xmm5,xmm5,xmm4
825	vpunpckhqdq	xmm2,xmm10,xmm10
826	vpclmulqdq	xmm11,xmm11,xmm0,0x11
827	vpxor	xmm2,xmm2,xmm10
828	vpalignr	xmm14,xmm8,xmm8,8
829	vpxor	xmm11,xmm11,xmm12
830	vpclmulqdq	xmm1,xmm1,xmm15,0x10
831	vmovdqu	xmm15,XMMWORD[((128-32))+r9]
832	vpxor	xmm1,xmm1,xmm9
833
834	vxorps	xmm14,xmm14,xmm7
835	vpclmulqdq	xmm8,xmm8,XMMWORD[16+r11],0x10
836	vxorps	xmm8,xmm8,xmm14
837
838	vpclmulqdq	xmm4,xmm10,xmm3,0x00
839	vmovdqu	xmm0,XMMWORD[((112-32))+r9]
840	vpxor	xmm4,xmm4,xmm5
841	vpunpckhqdq	xmm9,xmm8,xmm8
842	vpclmulqdq	xmm10,xmm10,xmm3,0x11
843	vpxor	xmm9,xmm9,xmm8
844	vpxor	xmm10,xmm10,xmm11
845	vpclmulqdq	xmm2,xmm2,xmm15,0x00
846	vpxor	xmm2,xmm2,xmm1
847
848	vpclmulqdq	xmm5,xmm8,xmm0,0x00
849	vpclmulqdq	xmm7,xmm8,xmm0,0x11
850	vpxor	xmm5,xmm5,xmm4
851	vpclmulqdq	xmm6,xmm9,xmm15,0x10
852	vpxor	xmm7,xmm7,xmm10
853	vpxor	xmm6,xmm6,xmm2
854
855	vpxor	xmm4,xmm7,xmm5
856	vpxor	xmm6,xmm6,xmm4
857	vpslldq	xmm1,xmm6,8
858	vmovdqu	xmm3,XMMWORD[16+r11]
859	vpsrldq	xmm6,xmm6,8
860	vpxor	xmm8,xmm5,xmm1
861	vpxor	xmm7,xmm7,xmm6
862
863	vpalignr	xmm2,xmm8,xmm8,8
864	vpclmulqdq	xmm8,xmm8,xmm3,0x10
865	vpxor	xmm8,xmm8,xmm2
866
867	vpalignr	xmm2,xmm8,xmm8,8
868	vpclmulqdq	xmm8,xmm8,xmm3,0x10
869	vpxor	xmm2,xmm2,xmm7
870	vpxor	xmm8,xmm8,xmm2
871	vpshufb	xmm8,xmm8,XMMWORD[r11]
872	vmovdqu	XMMWORD[(-64)+r9],xmm8
873
874	vzeroupper
875	movaps	xmm6,XMMWORD[((-216))+rax]
876	movaps	xmm7,XMMWORD[((-200))+rax]
877	movaps	xmm8,XMMWORD[((-184))+rax]
878	movaps	xmm9,XMMWORD[((-168))+rax]
879	movaps	xmm10,XMMWORD[((-152))+rax]
880	movaps	xmm11,XMMWORD[((-136))+rax]
881	movaps	xmm12,XMMWORD[((-120))+rax]
882	movaps	xmm13,XMMWORD[((-104))+rax]
883	movaps	xmm14,XMMWORD[((-88))+rax]
884	movaps	xmm15,XMMWORD[((-72))+rax]
885	mov	r15,QWORD[((-48))+rax]
886
887	mov	r14,QWORD[((-40))+rax]
888
889	mov	r13,QWORD[((-32))+rax]
890
891	mov	r12,QWORD[((-24))+rax]
892
893	mov	rbp,QWORD[((-16))+rax]
894
895	mov	rbx,QWORD[((-8))+rax]
896
897	lea	rsp,[rax]
898
899$L$gcm_enc_abort:
900	mov	rax,r10
901	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
902	mov	rsi,QWORD[16+rsp]
903	DB	0F3h,0C3h		;repret
904
905$L$SEH_end_aesni_gcm_encrypt:
906ALIGN	64
907$L$bswap_mask:
908DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
909$L$poly:
910DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
911$L$one_msb:
912DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
913$L$two_lsb:
914DB	2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
915$L$one_lsb:
916DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
917DB	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108
918DB	101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82
919DB	89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
920DB	114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
921ALIGN	64
922EXTERN	__imp_RtlVirtualUnwind
923
924ALIGN	16
925gcm_se_handler:
926	push	rsi
927	push	rdi
928	push	rbx
929	push	rbp
930	push	r12
931	push	r13
932	push	r14
933	push	r15
934	pushfq
935	sub	rsp,64
936
937	mov	rax,QWORD[120+r8]
938	mov	rbx,QWORD[248+r8]
939
940	mov	rsi,QWORD[8+r9]
941	mov	r11,QWORD[56+r9]
942
943	mov	r10d,DWORD[r11]
944	lea	r10,[r10*1+rsi]
945	cmp	rbx,r10
946	jb	NEAR $L$common_seh_tail
947
948	mov	rax,QWORD[152+r8]
949
950	mov	r10d,DWORD[4+r11]
951	lea	r10,[r10*1+rsi]
952	cmp	rbx,r10
953	jae	NEAR $L$common_seh_tail
954
955	mov	rax,QWORD[120+r8]
956
957	mov	r15,QWORD[((-48))+rax]
958	mov	r14,QWORD[((-40))+rax]
959	mov	r13,QWORD[((-32))+rax]
960	mov	r12,QWORD[((-24))+rax]
961	mov	rbp,QWORD[((-16))+rax]
962	mov	rbx,QWORD[((-8))+rax]
963	mov	QWORD[240+r8],r15
964	mov	QWORD[232+r8],r14
965	mov	QWORD[224+r8],r13
966	mov	QWORD[216+r8],r12
967	mov	QWORD[160+r8],rbp
968	mov	QWORD[144+r8],rbx
969
970	lea	rsi,[((-216))+rax]
971	lea	rdi,[512+r8]
972	mov	ecx,20
973	DD	0xa548f3fc
974
975$L$common_seh_tail:
976	mov	rdi,QWORD[8+rax]
977	mov	rsi,QWORD[16+rax]
978	mov	QWORD[152+r8],rax
979	mov	QWORD[168+r8],rsi
980	mov	QWORD[176+r8],rdi
981
982	mov	rdi,QWORD[40+r9]
983	mov	rsi,r8
984	mov	ecx,154
985	DD	0xa548f3fc
986
987	mov	rsi,r9
988	xor	rcx,rcx
989	mov	rdx,QWORD[8+rsi]
990	mov	r8,QWORD[rsi]
991	mov	r9,QWORD[16+rsi]
992	mov	r10,QWORD[40+rsi]
993	lea	r11,[56+rsi]
994	lea	r12,[24+rsi]
995	mov	QWORD[32+rsp],r10
996	mov	QWORD[40+rsp],r11
997	mov	QWORD[48+rsp],r12
998	mov	QWORD[56+rsp],rcx
999	call	QWORD[__imp_RtlVirtualUnwind]
1000
1001	mov	eax,1
1002	add	rsp,64
1003	popfq
1004	pop	r15
1005	pop	r14
1006	pop	r13
1007	pop	r12
1008	pop	rbp
1009	pop	rbx
1010	pop	rdi
1011	pop	rsi
1012	DB	0F3h,0C3h		;repret
1013
1014
1015section	.pdata rdata align=4
1016ALIGN	4
1017	DD	$L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase
1018	DD	$L$SEH_end_aesni_gcm_decrypt wrt ..imagebase
1019	DD	$L$SEH_gcm_dec_info wrt ..imagebase
1020
1021	DD	$L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase
1022	DD	$L$SEH_end_aesni_gcm_encrypt wrt ..imagebase
1023	DD	$L$SEH_gcm_enc_info wrt ..imagebase
1024section	.xdata rdata align=8
1025ALIGN	8
1026$L$SEH_gcm_dec_info:
1027DB	9,0,0,0
1028	DD	gcm_se_handler wrt ..imagebase
1029	DD	$L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase
1030$L$SEH_gcm_enc_info:
1031DB	9,0,0,0
1032	DD	gcm_se_handler wrt ..imagebase
1033	DD	$L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase
1034