• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
14.text
15.extern	OPENSSL_ia32cap_P
16.hidden OPENSSL_ia32cap_P
17.globl	aes_hw_encrypt
18.hidden aes_hw_encrypt
19.type	aes_hw_encrypt,@function
20.align	16
21aes_hw_encrypt:
22.cfi_startproc
23#ifdef BORINGSSL_DISPATCH_TEST
24.extern	BORINGSSL_function_hit
25.hidden BORINGSSL_function_hit
26	movb	$1,BORINGSSL_function_hit+1(%rip)
27#endif
28	movups	(%rdi),%xmm2
29	movl	240(%rdx),%eax
30	movups	(%rdx),%xmm0
31	movups	16(%rdx),%xmm1
32	leaq	32(%rdx),%rdx
33	xorps	%xmm0,%xmm2
34.Loop_enc1_1:
35.byte	102,15,56,220,209
36	decl	%eax
37	movups	(%rdx),%xmm1
38	leaq	16(%rdx),%rdx
39	jnz	.Loop_enc1_1
40.byte	102,15,56,221,209
41	pxor	%xmm0,%xmm0
42	pxor	%xmm1,%xmm1
43	movups	%xmm2,(%rsi)
44	pxor	%xmm2,%xmm2
45	.byte	0xf3,0xc3
46.cfi_endproc
47.size	aes_hw_encrypt,.-aes_hw_encrypt
48
49.globl	aes_hw_decrypt
50.hidden aes_hw_decrypt
51.type	aes_hw_decrypt,@function
52.align	16
53aes_hw_decrypt:
54.cfi_startproc
55	movups	(%rdi),%xmm2
56	movl	240(%rdx),%eax
57	movups	(%rdx),%xmm0
58	movups	16(%rdx),%xmm1
59	leaq	32(%rdx),%rdx
60	xorps	%xmm0,%xmm2
61.Loop_dec1_2:
62.byte	102,15,56,222,209
63	decl	%eax
64	movups	(%rdx),%xmm1
65	leaq	16(%rdx),%rdx
66	jnz	.Loop_dec1_2
67.byte	102,15,56,223,209
68	pxor	%xmm0,%xmm0
69	pxor	%xmm1,%xmm1
70	movups	%xmm2,(%rsi)
71	pxor	%xmm2,%xmm2
72	.byte	0xf3,0xc3
73.cfi_endproc
74.size	aes_hw_decrypt, .-aes_hw_decrypt
75.type	_aesni_encrypt2,@function
76.align	16
77_aesni_encrypt2:
78.cfi_startproc
79	movups	(%rcx),%xmm0
80	shll	$4,%eax
81	movups	16(%rcx),%xmm1
82	xorps	%xmm0,%xmm2
83	xorps	%xmm0,%xmm3
84	movups	32(%rcx),%xmm0
85	leaq	32(%rcx,%rax,1),%rcx
86	negq	%rax
87	addq	$16,%rax
88
89.Lenc_loop2:
90.byte	102,15,56,220,209
91.byte	102,15,56,220,217
92	movups	(%rcx,%rax,1),%xmm1
93	addq	$32,%rax
94.byte	102,15,56,220,208
95.byte	102,15,56,220,216
96	movups	-16(%rcx,%rax,1),%xmm0
97	jnz	.Lenc_loop2
98
99.byte	102,15,56,220,209
100.byte	102,15,56,220,217
101.byte	102,15,56,221,208
102.byte	102,15,56,221,216
103	.byte	0xf3,0xc3
104.cfi_endproc
105.size	_aesni_encrypt2,.-_aesni_encrypt2
106.type	_aesni_decrypt2,@function
107.align	16
108_aesni_decrypt2:
109.cfi_startproc
110	movups	(%rcx),%xmm0
111	shll	$4,%eax
112	movups	16(%rcx),%xmm1
113	xorps	%xmm0,%xmm2
114	xorps	%xmm0,%xmm3
115	movups	32(%rcx),%xmm0
116	leaq	32(%rcx,%rax,1),%rcx
117	negq	%rax
118	addq	$16,%rax
119
120.Ldec_loop2:
121.byte	102,15,56,222,209
122.byte	102,15,56,222,217
123	movups	(%rcx,%rax,1),%xmm1
124	addq	$32,%rax
125.byte	102,15,56,222,208
126.byte	102,15,56,222,216
127	movups	-16(%rcx,%rax,1),%xmm0
128	jnz	.Ldec_loop2
129
130.byte	102,15,56,222,209
131.byte	102,15,56,222,217
132.byte	102,15,56,223,208
133.byte	102,15,56,223,216
134	.byte	0xf3,0xc3
135.cfi_endproc
136.size	_aesni_decrypt2,.-_aesni_decrypt2
137.type	_aesni_encrypt3,@function
138.align	16
139_aesni_encrypt3:
140.cfi_startproc
141	movups	(%rcx),%xmm0
142	shll	$4,%eax
143	movups	16(%rcx),%xmm1
144	xorps	%xmm0,%xmm2
145	xorps	%xmm0,%xmm3
146	xorps	%xmm0,%xmm4
147	movups	32(%rcx),%xmm0
148	leaq	32(%rcx,%rax,1),%rcx
149	negq	%rax
150	addq	$16,%rax
151
152.Lenc_loop3:
153.byte	102,15,56,220,209
154.byte	102,15,56,220,217
155.byte	102,15,56,220,225
156	movups	(%rcx,%rax,1),%xmm1
157	addq	$32,%rax
158.byte	102,15,56,220,208
159.byte	102,15,56,220,216
160.byte	102,15,56,220,224
161	movups	-16(%rcx,%rax,1),%xmm0
162	jnz	.Lenc_loop3
163
164.byte	102,15,56,220,209
165.byte	102,15,56,220,217
166.byte	102,15,56,220,225
167.byte	102,15,56,221,208
168.byte	102,15,56,221,216
169.byte	102,15,56,221,224
170	.byte	0xf3,0xc3
171.cfi_endproc
172.size	_aesni_encrypt3,.-_aesni_encrypt3
173.type	_aesni_decrypt3,@function
174.align	16
175_aesni_decrypt3:
176.cfi_startproc
177	movups	(%rcx),%xmm0
178	shll	$4,%eax
179	movups	16(%rcx),%xmm1
180	xorps	%xmm0,%xmm2
181	xorps	%xmm0,%xmm3
182	xorps	%xmm0,%xmm4
183	movups	32(%rcx),%xmm0
184	leaq	32(%rcx,%rax,1),%rcx
185	negq	%rax
186	addq	$16,%rax
187
188.Ldec_loop3:
189.byte	102,15,56,222,209
190.byte	102,15,56,222,217
191.byte	102,15,56,222,225
192	movups	(%rcx,%rax,1),%xmm1
193	addq	$32,%rax
194.byte	102,15,56,222,208
195.byte	102,15,56,222,216
196.byte	102,15,56,222,224
197	movups	-16(%rcx,%rax,1),%xmm0
198	jnz	.Ldec_loop3
199
200.byte	102,15,56,222,209
201.byte	102,15,56,222,217
202.byte	102,15,56,222,225
203.byte	102,15,56,223,208
204.byte	102,15,56,223,216
205.byte	102,15,56,223,224
206	.byte	0xf3,0xc3
207.cfi_endproc
208.size	_aesni_decrypt3,.-_aesni_decrypt3
209.type	_aesni_encrypt4,@function
210.align	16
211_aesni_encrypt4:
212.cfi_startproc
213	movups	(%rcx),%xmm0
214	shll	$4,%eax
215	movups	16(%rcx),%xmm1
216	xorps	%xmm0,%xmm2
217	xorps	%xmm0,%xmm3
218	xorps	%xmm0,%xmm4
219	xorps	%xmm0,%xmm5
220	movups	32(%rcx),%xmm0
221	leaq	32(%rcx,%rax,1),%rcx
222	negq	%rax
223.byte	0x0f,0x1f,0x00
224	addq	$16,%rax
225
226.Lenc_loop4:
227.byte	102,15,56,220,209
228.byte	102,15,56,220,217
229.byte	102,15,56,220,225
230.byte	102,15,56,220,233
231	movups	(%rcx,%rax,1),%xmm1
232	addq	$32,%rax
233.byte	102,15,56,220,208
234.byte	102,15,56,220,216
235.byte	102,15,56,220,224
236.byte	102,15,56,220,232
237	movups	-16(%rcx,%rax,1),%xmm0
238	jnz	.Lenc_loop4
239
240.byte	102,15,56,220,209
241.byte	102,15,56,220,217
242.byte	102,15,56,220,225
243.byte	102,15,56,220,233
244.byte	102,15,56,221,208
245.byte	102,15,56,221,216
246.byte	102,15,56,221,224
247.byte	102,15,56,221,232
248	.byte	0xf3,0xc3
249.cfi_endproc
250.size	_aesni_encrypt4,.-_aesni_encrypt4
251.type	_aesni_decrypt4,@function
252.align	16
253_aesni_decrypt4:
254.cfi_startproc
255	movups	(%rcx),%xmm0
256	shll	$4,%eax
257	movups	16(%rcx),%xmm1
258	xorps	%xmm0,%xmm2
259	xorps	%xmm0,%xmm3
260	xorps	%xmm0,%xmm4
261	xorps	%xmm0,%xmm5
262	movups	32(%rcx),%xmm0
263	leaq	32(%rcx,%rax,1),%rcx
264	negq	%rax
265.byte	0x0f,0x1f,0x00
266	addq	$16,%rax
267
268.Ldec_loop4:
269.byte	102,15,56,222,209
270.byte	102,15,56,222,217
271.byte	102,15,56,222,225
272.byte	102,15,56,222,233
273	movups	(%rcx,%rax,1),%xmm1
274	addq	$32,%rax
275.byte	102,15,56,222,208
276.byte	102,15,56,222,216
277.byte	102,15,56,222,224
278.byte	102,15,56,222,232
279	movups	-16(%rcx,%rax,1),%xmm0
280	jnz	.Ldec_loop4
281
282.byte	102,15,56,222,209
283.byte	102,15,56,222,217
284.byte	102,15,56,222,225
285.byte	102,15,56,222,233
286.byte	102,15,56,223,208
287.byte	102,15,56,223,216
288.byte	102,15,56,223,224
289.byte	102,15,56,223,232
290	.byte	0xf3,0xc3
291.cfi_endproc
292.size	_aesni_decrypt4,.-_aesni_decrypt4
293.type	_aesni_encrypt6,@function
294.align	16
295_aesni_encrypt6:
296.cfi_startproc
297	movups	(%rcx),%xmm0
298	shll	$4,%eax
299	movups	16(%rcx),%xmm1
300	xorps	%xmm0,%xmm2
301	pxor	%xmm0,%xmm3
302	pxor	%xmm0,%xmm4
303.byte	102,15,56,220,209
304	leaq	32(%rcx,%rax,1),%rcx
305	negq	%rax
306.byte	102,15,56,220,217
307	pxor	%xmm0,%xmm5
308	pxor	%xmm0,%xmm6
309.byte	102,15,56,220,225
310	pxor	%xmm0,%xmm7
311	movups	(%rcx,%rax,1),%xmm0
312	addq	$16,%rax
313	jmp	.Lenc_loop6_enter
314.align	16
315.Lenc_loop6:
316.byte	102,15,56,220,209
317.byte	102,15,56,220,217
318.byte	102,15,56,220,225
319.Lenc_loop6_enter:
320.byte	102,15,56,220,233
321.byte	102,15,56,220,241
322.byte	102,15,56,220,249
323	movups	(%rcx,%rax,1),%xmm1
324	addq	$32,%rax
325.byte	102,15,56,220,208
326.byte	102,15,56,220,216
327.byte	102,15,56,220,224
328.byte	102,15,56,220,232
329.byte	102,15,56,220,240
330.byte	102,15,56,220,248
331	movups	-16(%rcx,%rax,1),%xmm0
332	jnz	.Lenc_loop6
333
334.byte	102,15,56,220,209
335.byte	102,15,56,220,217
336.byte	102,15,56,220,225
337.byte	102,15,56,220,233
338.byte	102,15,56,220,241
339.byte	102,15,56,220,249
340.byte	102,15,56,221,208
341.byte	102,15,56,221,216
342.byte	102,15,56,221,224
343.byte	102,15,56,221,232
344.byte	102,15,56,221,240
345.byte	102,15,56,221,248
346	.byte	0xf3,0xc3
347.cfi_endproc
348.size	_aesni_encrypt6,.-_aesni_encrypt6
349.type	_aesni_decrypt6,@function
350.align	16
351_aesni_decrypt6:
352.cfi_startproc
353	movups	(%rcx),%xmm0
354	shll	$4,%eax
355	movups	16(%rcx),%xmm1
356	xorps	%xmm0,%xmm2
357	pxor	%xmm0,%xmm3
358	pxor	%xmm0,%xmm4
359.byte	102,15,56,222,209
360	leaq	32(%rcx,%rax,1),%rcx
361	negq	%rax
362.byte	102,15,56,222,217
363	pxor	%xmm0,%xmm5
364	pxor	%xmm0,%xmm6
365.byte	102,15,56,222,225
366	pxor	%xmm0,%xmm7
367	movups	(%rcx,%rax,1),%xmm0
368	addq	$16,%rax
369	jmp	.Ldec_loop6_enter
370.align	16
371.Ldec_loop6:
372.byte	102,15,56,222,209
373.byte	102,15,56,222,217
374.byte	102,15,56,222,225
375.Ldec_loop6_enter:
376.byte	102,15,56,222,233
377.byte	102,15,56,222,241
378.byte	102,15,56,222,249
379	movups	(%rcx,%rax,1),%xmm1
380	addq	$32,%rax
381.byte	102,15,56,222,208
382.byte	102,15,56,222,216
383.byte	102,15,56,222,224
384.byte	102,15,56,222,232
385.byte	102,15,56,222,240
386.byte	102,15,56,222,248
387	movups	-16(%rcx,%rax,1),%xmm0
388	jnz	.Ldec_loop6
389
390.byte	102,15,56,222,209
391.byte	102,15,56,222,217
392.byte	102,15,56,222,225
393.byte	102,15,56,222,233
394.byte	102,15,56,222,241
395.byte	102,15,56,222,249
396.byte	102,15,56,223,208
397.byte	102,15,56,223,216
398.byte	102,15,56,223,224
399.byte	102,15,56,223,232
400.byte	102,15,56,223,240
401.byte	102,15,56,223,248
402	.byte	0xf3,0xc3
403.cfi_endproc
404.size	_aesni_decrypt6,.-_aesni_decrypt6
405.type	_aesni_encrypt8,@function
406.align	16
407_aesni_encrypt8:
408.cfi_startproc
409	movups	(%rcx),%xmm0
410	shll	$4,%eax
411	movups	16(%rcx),%xmm1
412	xorps	%xmm0,%xmm2
413	xorps	%xmm0,%xmm3
414	pxor	%xmm0,%xmm4
415	pxor	%xmm0,%xmm5
416	pxor	%xmm0,%xmm6
417	leaq	32(%rcx,%rax,1),%rcx
418	negq	%rax
419.byte	102,15,56,220,209
420	pxor	%xmm0,%xmm7
421	pxor	%xmm0,%xmm8
422.byte	102,15,56,220,217
423	pxor	%xmm0,%xmm9
424	movups	(%rcx,%rax,1),%xmm0
425	addq	$16,%rax
426	jmp	.Lenc_loop8_inner
427.align	16
428.Lenc_loop8:
429.byte	102,15,56,220,209
430.byte	102,15,56,220,217
431.Lenc_loop8_inner:
432.byte	102,15,56,220,225
433.byte	102,15,56,220,233
434.byte	102,15,56,220,241
435.byte	102,15,56,220,249
436.byte	102,68,15,56,220,193
437.byte	102,68,15,56,220,201
438.Lenc_loop8_enter:
439	movups	(%rcx,%rax,1),%xmm1
440	addq	$32,%rax
441.byte	102,15,56,220,208
442.byte	102,15,56,220,216
443.byte	102,15,56,220,224
444.byte	102,15,56,220,232
445.byte	102,15,56,220,240
446.byte	102,15,56,220,248
447.byte	102,68,15,56,220,192
448.byte	102,68,15,56,220,200
449	movups	-16(%rcx,%rax,1),%xmm0
450	jnz	.Lenc_loop8
451
452.byte	102,15,56,220,209
453.byte	102,15,56,220,217
454.byte	102,15,56,220,225
455.byte	102,15,56,220,233
456.byte	102,15,56,220,241
457.byte	102,15,56,220,249
458.byte	102,68,15,56,220,193
459.byte	102,68,15,56,220,201
460.byte	102,15,56,221,208
461.byte	102,15,56,221,216
462.byte	102,15,56,221,224
463.byte	102,15,56,221,232
464.byte	102,15,56,221,240
465.byte	102,15,56,221,248
466.byte	102,68,15,56,221,192
467.byte	102,68,15,56,221,200
468	.byte	0xf3,0xc3
469.cfi_endproc
470.size	_aesni_encrypt8,.-_aesni_encrypt8
471.type	_aesni_decrypt8,@function
472.align	16
473_aesni_decrypt8:
474.cfi_startproc
475	movups	(%rcx),%xmm0
476	shll	$4,%eax
477	movups	16(%rcx),%xmm1
478	xorps	%xmm0,%xmm2
479	xorps	%xmm0,%xmm3
480	pxor	%xmm0,%xmm4
481	pxor	%xmm0,%xmm5
482	pxor	%xmm0,%xmm6
483	leaq	32(%rcx,%rax,1),%rcx
484	negq	%rax
485.byte	102,15,56,222,209
486	pxor	%xmm0,%xmm7
487	pxor	%xmm0,%xmm8
488.byte	102,15,56,222,217
489	pxor	%xmm0,%xmm9
490	movups	(%rcx,%rax,1),%xmm0
491	addq	$16,%rax
492	jmp	.Ldec_loop8_inner
493.align	16
494.Ldec_loop8:
495.byte	102,15,56,222,209
496.byte	102,15,56,222,217
497.Ldec_loop8_inner:
498.byte	102,15,56,222,225
499.byte	102,15,56,222,233
500.byte	102,15,56,222,241
501.byte	102,15,56,222,249
502.byte	102,68,15,56,222,193
503.byte	102,68,15,56,222,201
504.Ldec_loop8_enter:
505	movups	(%rcx,%rax,1),%xmm1
506	addq	$32,%rax
507.byte	102,15,56,222,208
508.byte	102,15,56,222,216
509.byte	102,15,56,222,224
510.byte	102,15,56,222,232
511.byte	102,15,56,222,240
512.byte	102,15,56,222,248
513.byte	102,68,15,56,222,192
514.byte	102,68,15,56,222,200
515	movups	-16(%rcx,%rax,1),%xmm0
516	jnz	.Ldec_loop8
517
518.byte	102,15,56,222,209
519.byte	102,15,56,222,217
520.byte	102,15,56,222,225
521.byte	102,15,56,222,233
522.byte	102,15,56,222,241
523.byte	102,15,56,222,249
524.byte	102,68,15,56,222,193
525.byte	102,68,15,56,222,201
526.byte	102,15,56,223,208
527.byte	102,15,56,223,216
528.byte	102,15,56,223,224
529.byte	102,15,56,223,232
530.byte	102,15,56,223,240
531.byte	102,15,56,223,248
532.byte	102,68,15,56,223,192
533.byte	102,68,15,56,223,200
534	.byte	0xf3,0xc3
535.cfi_endproc
536.size	_aesni_decrypt8,.-_aesni_decrypt8
537.globl	aes_hw_ecb_encrypt
538.hidden aes_hw_ecb_encrypt
539.type	aes_hw_ecb_encrypt,@function
540.align	16
541aes_hw_ecb_encrypt:
542.cfi_startproc
543	andq	$-16,%rdx
544	jz	.Lecb_ret
545
546	movl	240(%rcx),%eax
547	movups	(%rcx),%xmm0
548	movq	%rcx,%r11
549	movl	%eax,%r10d
550	testl	%r8d,%r8d
551	jz	.Lecb_decrypt
552
553	cmpq	$0x80,%rdx
554	jb	.Lecb_enc_tail
555
556	movdqu	(%rdi),%xmm2
557	movdqu	16(%rdi),%xmm3
558	movdqu	32(%rdi),%xmm4
559	movdqu	48(%rdi),%xmm5
560	movdqu	64(%rdi),%xmm6
561	movdqu	80(%rdi),%xmm7
562	movdqu	96(%rdi),%xmm8
563	movdqu	112(%rdi),%xmm9
564	leaq	128(%rdi),%rdi
565	subq	$0x80,%rdx
566	jmp	.Lecb_enc_loop8_enter
567.align	16
568.Lecb_enc_loop8:
569	movups	%xmm2,(%rsi)
570	movq	%r11,%rcx
571	movdqu	(%rdi),%xmm2
572	movl	%r10d,%eax
573	movups	%xmm3,16(%rsi)
574	movdqu	16(%rdi),%xmm3
575	movups	%xmm4,32(%rsi)
576	movdqu	32(%rdi),%xmm4
577	movups	%xmm5,48(%rsi)
578	movdqu	48(%rdi),%xmm5
579	movups	%xmm6,64(%rsi)
580	movdqu	64(%rdi),%xmm6
581	movups	%xmm7,80(%rsi)
582	movdqu	80(%rdi),%xmm7
583	movups	%xmm8,96(%rsi)
584	movdqu	96(%rdi),%xmm8
585	movups	%xmm9,112(%rsi)
586	leaq	128(%rsi),%rsi
587	movdqu	112(%rdi),%xmm9
588	leaq	128(%rdi),%rdi
589.Lecb_enc_loop8_enter:
590
591	call	_aesni_encrypt8
592
593	subq	$0x80,%rdx
594	jnc	.Lecb_enc_loop8
595
596	movups	%xmm2,(%rsi)
597	movq	%r11,%rcx
598	movups	%xmm3,16(%rsi)
599	movl	%r10d,%eax
600	movups	%xmm4,32(%rsi)
601	movups	%xmm5,48(%rsi)
602	movups	%xmm6,64(%rsi)
603	movups	%xmm7,80(%rsi)
604	movups	%xmm8,96(%rsi)
605	movups	%xmm9,112(%rsi)
606	leaq	128(%rsi),%rsi
607	addq	$0x80,%rdx
608	jz	.Lecb_ret
609
610.Lecb_enc_tail:
611	movups	(%rdi),%xmm2
612	cmpq	$0x20,%rdx
613	jb	.Lecb_enc_one
614	movups	16(%rdi),%xmm3
615	je	.Lecb_enc_two
616	movups	32(%rdi),%xmm4
617	cmpq	$0x40,%rdx
618	jb	.Lecb_enc_three
619	movups	48(%rdi),%xmm5
620	je	.Lecb_enc_four
621	movups	64(%rdi),%xmm6
622	cmpq	$0x60,%rdx
623	jb	.Lecb_enc_five
624	movups	80(%rdi),%xmm7
625	je	.Lecb_enc_six
626	movdqu	96(%rdi),%xmm8
627	xorps	%xmm9,%xmm9
628	call	_aesni_encrypt8
629	movups	%xmm2,(%rsi)
630	movups	%xmm3,16(%rsi)
631	movups	%xmm4,32(%rsi)
632	movups	%xmm5,48(%rsi)
633	movups	%xmm6,64(%rsi)
634	movups	%xmm7,80(%rsi)
635	movups	%xmm8,96(%rsi)
636	jmp	.Lecb_ret
637.align	16
638.Lecb_enc_one:
639	movups	(%rcx),%xmm0
640	movups	16(%rcx),%xmm1
641	leaq	32(%rcx),%rcx
642	xorps	%xmm0,%xmm2
643.Loop_enc1_3:
644.byte	102,15,56,220,209
645	decl	%eax
646	movups	(%rcx),%xmm1
647	leaq	16(%rcx),%rcx
648	jnz	.Loop_enc1_3
649.byte	102,15,56,221,209
650	movups	%xmm2,(%rsi)
651	jmp	.Lecb_ret
652.align	16
653.Lecb_enc_two:
654	call	_aesni_encrypt2
655	movups	%xmm2,(%rsi)
656	movups	%xmm3,16(%rsi)
657	jmp	.Lecb_ret
658.align	16
659.Lecb_enc_three:
660	call	_aesni_encrypt3
661	movups	%xmm2,(%rsi)
662	movups	%xmm3,16(%rsi)
663	movups	%xmm4,32(%rsi)
664	jmp	.Lecb_ret
665.align	16
666.Lecb_enc_four:
667	call	_aesni_encrypt4
668	movups	%xmm2,(%rsi)
669	movups	%xmm3,16(%rsi)
670	movups	%xmm4,32(%rsi)
671	movups	%xmm5,48(%rsi)
672	jmp	.Lecb_ret
673.align	16
674.Lecb_enc_five:
675	xorps	%xmm7,%xmm7
676	call	_aesni_encrypt6
677	movups	%xmm2,(%rsi)
678	movups	%xmm3,16(%rsi)
679	movups	%xmm4,32(%rsi)
680	movups	%xmm5,48(%rsi)
681	movups	%xmm6,64(%rsi)
682	jmp	.Lecb_ret
683.align	16
684.Lecb_enc_six:
685	call	_aesni_encrypt6
686	movups	%xmm2,(%rsi)
687	movups	%xmm3,16(%rsi)
688	movups	%xmm4,32(%rsi)
689	movups	%xmm5,48(%rsi)
690	movups	%xmm6,64(%rsi)
691	movups	%xmm7,80(%rsi)
692	jmp	.Lecb_ret
693
694.align	16
695.Lecb_decrypt:
696	cmpq	$0x80,%rdx
697	jb	.Lecb_dec_tail
698
699	movdqu	(%rdi),%xmm2
700	movdqu	16(%rdi),%xmm3
701	movdqu	32(%rdi),%xmm4
702	movdqu	48(%rdi),%xmm5
703	movdqu	64(%rdi),%xmm6
704	movdqu	80(%rdi),%xmm7
705	movdqu	96(%rdi),%xmm8
706	movdqu	112(%rdi),%xmm9
707	leaq	128(%rdi),%rdi
708	subq	$0x80,%rdx
709	jmp	.Lecb_dec_loop8_enter
710.align	16
711.Lecb_dec_loop8:
712	movups	%xmm2,(%rsi)
713	movq	%r11,%rcx
714	movdqu	(%rdi),%xmm2
715	movl	%r10d,%eax
716	movups	%xmm3,16(%rsi)
717	movdqu	16(%rdi),%xmm3
718	movups	%xmm4,32(%rsi)
719	movdqu	32(%rdi),%xmm4
720	movups	%xmm5,48(%rsi)
721	movdqu	48(%rdi),%xmm5
722	movups	%xmm6,64(%rsi)
723	movdqu	64(%rdi),%xmm6
724	movups	%xmm7,80(%rsi)
725	movdqu	80(%rdi),%xmm7
726	movups	%xmm8,96(%rsi)
727	movdqu	96(%rdi),%xmm8
728	movups	%xmm9,112(%rsi)
729	leaq	128(%rsi),%rsi
730	movdqu	112(%rdi),%xmm9
731	leaq	128(%rdi),%rdi
732.Lecb_dec_loop8_enter:
733
734	call	_aesni_decrypt8
735
736	movups	(%r11),%xmm0
737	subq	$0x80,%rdx
738	jnc	.Lecb_dec_loop8
739
740	movups	%xmm2,(%rsi)
741	pxor	%xmm2,%xmm2
742	movq	%r11,%rcx
743	movups	%xmm3,16(%rsi)
744	pxor	%xmm3,%xmm3
745	movl	%r10d,%eax
746	movups	%xmm4,32(%rsi)
747	pxor	%xmm4,%xmm4
748	movups	%xmm5,48(%rsi)
749	pxor	%xmm5,%xmm5
750	movups	%xmm6,64(%rsi)
751	pxor	%xmm6,%xmm6
752	movups	%xmm7,80(%rsi)
753	pxor	%xmm7,%xmm7
754	movups	%xmm8,96(%rsi)
755	pxor	%xmm8,%xmm8
756	movups	%xmm9,112(%rsi)
757	pxor	%xmm9,%xmm9
758	leaq	128(%rsi),%rsi
759	addq	$0x80,%rdx
760	jz	.Lecb_ret
761
762.Lecb_dec_tail:
763	movups	(%rdi),%xmm2
764	cmpq	$0x20,%rdx
765	jb	.Lecb_dec_one
766	movups	16(%rdi),%xmm3
767	je	.Lecb_dec_two
768	movups	32(%rdi),%xmm4
769	cmpq	$0x40,%rdx
770	jb	.Lecb_dec_three
771	movups	48(%rdi),%xmm5
772	je	.Lecb_dec_four
773	movups	64(%rdi),%xmm6
774	cmpq	$0x60,%rdx
775	jb	.Lecb_dec_five
776	movups	80(%rdi),%xmm7
777	je	.Lecb_dec_six
778	movups	96(%rdi),%xmm8
779	movups	(%rcx),%xmm0
780	xorps	%xmm9,%xmm9
781	call	_aesni_decrypt8
782	movups	%xmm2,(%rsi)
783	pxor	%xmm2,%xmm2
784	movups	%xmm3,16(%rsi)
785	pxor	%xmm3,%xmm3
786	movups	%xmm4,32(%rsi)
787	pxor	%xmm4,%xmm4
788	movups	%xmm5,48(%rsi)
789	pxor	%xmm5,%xmm5
790	movups	%xmm6,64(%rsi)
791	pxor	%xmm6,%xmm6
792	movups	%xmm7,80(%rsi)
793	pxor	%xmm7,%xmm7
794	movups	%xmm8,96(%rsi)
795	pxor	%xmm8,%xmm8
796	pxor	%xmm9,%xmm9
797	jmp	.Lecb_ret
798.align	16
799.Lecb_dec_one:
800	movups	(%rcx),%xmm0
801	movups	16(%rcx),%xmm1
802	leaq	32(%rcx),%rcx
803	xorps	%xmm0,%xmm2
804.Loop_dec1_4:
805.byte	102,15,56,222,209
806	decl	%eax
807	movups	(%rcx),%xmm1
808	leaq	16(%rcx),%rcx
809	jnz	.Loop_dec1_4
810.byte	102,15,56,223,209
811	movups	%xmm2,(%rsi)
812	pxor	%xmm2,%xmm2
813	jmp	.Lecb_ret
814.align	16
815.Lecb_dec_two:
816	call	_aesni_decrypt2
817	movups	%xmm2,(%rsi)
818	pxor	%xmm2,%xmm2
819	movups	%xmm3,16(%rsi)
820	pxor	%xmm3,%xmm3
821	jmp	.Lecb_ret
822.align	16
823.Lecb_dec_three:
824	call	_aesni_decrypt3
825	movups	%xmm2,(%rsi)
826	pxor	%xmm2,%xmm2
827	movups	%xmm3,16(%rsi)
828	pxor	%xmm3,%xmm3
829	movups	%xmm4,32(%rsi)
830	pxor	%xmm4,%xmm4
831	jmp	.Lecb_ret
832.align	16
833.Lecb_dec_four:
834	call	_aesni_decrypt4
835	movups	%xmm2,(%rsi)
836	pxor	%xmm2,%xmm2
837	movups	%xmm3,16(%rsi)
838	pxor	%xmm3,%xmm3
839	movups	%xmm4,32(%rsi)
840	pxor	%xmm4,%xmm4
841	movups	%xmm5,48(%rsi)
842	pxor	%xmm5,%xmm5
843	jmp	.Lecb_ret
844.align	16
845.Lecb_dec_five:
846	xorps	%xmm7,%xmm7
847	call	_aesni_decrypt6
848	movups	%xmm2,(%rsi)
849	pxor	%xmm2,%xmm2
850	movups	%xmm3,16(%rsi)
851	pxor	%xmm3,%xmm3
852	movups	%xmm4,32(%rsi)
853	pxor	%xmm4,%xmm4
854	movups	%xmm5,48(%rsi)
855	pxor	%xmm5,%xmm5
856	movups	%xmm6,64(%rsi)
857	pxor	%xmm6,%xmm6
858	pxor	%xmm7,%xmm7
859	jmp	.Lecb_ret
860.align	16
861.Lecb_dec_six:
862	call	_aesni_decrypt6
863	movups	%xmm2,(%rsi)
864	pxor	%xmm2,%xmm2
865	movups	%xmm3,16(%rsi)
866	pxor	%xmm3,%xmm3
867	movups	%xmm4,32(%rsi)
868	pxor	%xmm4,%xmm4
869	movups	%xmm5,48(%rsi)
870	pxor	%xmm5,%xmm5
871	movups	%xmm6,64(%rsi)
872	pxor	%xmm6,%xmm6
873	movups	%xmm7,80(%rsi)
874	pxor	%xmm7,%xmm7
875
876.Lecb_ret:
877	xorps	%xmm0,%xmm0
878	pxor	%xmm1,%xmm1
879	.byte	0xf3,0xc3
880.cfi_endproc
881.size	aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt
882.globl	aes_hw_ctr32_encrypt_blocks
883.hidden aes_hw_ctr32_encrypt_blocks
884.type	aes_hw_ctr32_encrypt_blocks,@function
885.align	16
886aes_hw_ctr32_encrypt_blocks:
887.cfi_startproc
888#ifdef BORINGSSL_DISPATCH_TEST
889	movb	$1,BORINGSSL_function_hit(%rip)
890#endif
891	cmpq	$1,%rdx
892	jne	.Lctr32_bulk
893
894
895
896	movups	(%r8),%xmm2
897	movups	(%rdi),%xmm3
898	movl	240(%rcx),%edx
899	movups	(%rcx),%xmm0
900	movups	16(%rcx),%xmm1
901	leaq	32(%rcx),%rcx
902	xorps	%xmm0,%xmm2
903.Loop_enc1_5:
904.byte	102,15,56,220,209
905	decl	%edx
906	movups	(%rcx),%xmm1
907	leaq	16(%rcx),%rcx
908	jnz	.Loop_enc1_5
909.byte	102,15,56,221,209
910	pxor	%xmm0,%xmm0
911	pxor	%xmm1,%xmm1
912	xorps	%xmm3,%xmm2
913	pxor	%xmm3,%xmm3
914	movups	%xmm2,(%rsi)
915	xorps	%xmm2,%xmm2
916	jmp	.Lctr32_epilogue
917
918.align	16
919.Lctr32_bulk:
920	leaq	(%rsp),%r11
921.cfi_def_cfa_register	%r11
922	pushq	%rbp
923.cfi_offset	%rbp,-16
924	subq	$128,%rsp
925	andq	$-16,%rsp
926
927
928
929
930	movdqu	(%r8),%xmm2
931	movdqu	(%rcx),%xmm0
932	movl	12(%r8),%r8d
933	pxor	%xmm0,%xmm2
934	movl	12(%rcx),%ebp
935	movdqa	%xmm2,0(%rsp)
936	bswapl	%r8d
937	movdqa	%xmm2,%xmm3
938	movdqa	%xmm2,%xmm4
939	movdqa	%xmm2,%xmm5
940	movdqa	%xmm2,64(%rsp)
941	movdqa	%xmm2,80(%rsp)
942	movdqa	%xmm2,96(%rsp)
943	movq	%rdx,%r10
944	movdqa	%xmm2,112(%rsp)
945
946	leaq	1(%r8),%rax
947	leaq	2(%r8),%rdx
948	bswapl	%eax
949	bswapl	%edx
950	xorl	%ebp,%eax
951	xorl	%ebp,%edx
952.byte	102,15,58,34,216,3
953	leaq	3(%r8),%rax
954	movdqa	%xmm3,16(%rsp)
955.byte	102,15,58,34,226,3
956	bswapl	%eax
957	movq	%r10,%rdx
958	leaq	4(%r8),%r10
959	movdqa	%xmm4,32(%rsp)
960	xorl	%ebp,%eax
961	bswapl	%r10d
962.byte	102,15,58,34,232,3
963	xorl	%ebp,%r10d
964	movdqa	%xmm5,48(%rsp)
965	leaq	5(%r8),%r9
966	movl	%r10d,64+12(%rsp)
967	bswapl	%r9d
968	leaq	6(%r8),%r10
969	movl	240(%rcx),%eax
970	xorl	%ebp,%r9d
971	bswapl	%r10d
972	movl	%r9d,80+12(%rsp)
973	xorl	%ebp,%r10d
974	leaq	7(%r8),%r9
975	movl	%r10d,96+12(%rsp)
976	bswapl	%r9d
977	leaq	OPENSSL_ia32cap_P(%rip),%r10
978	movl	4(%r10),%r10d
979	xorl	%ebp,%r9d
980	andl	$71303168,%r10d
981	movl	%r9d,112+12(%rsp)
982
983	movups	16(%rcx),%xmm1
984
985	movdqa	64(%rsp),%xmm6
986	movdqa	80(%rsp),%xmm7
987
988	cmpq	$8,%rdx
989	jb	.Lctr32_tail
990
991	subq	$6,%rdx
992	cmpl	$4194304,%r10d
993	je	.Lctr32_6x
994
995	leaq	128(%rcx),%rcx
996	subq	$2,%rdx
997	jmp	.Lctr32_loop8
998
999.align	16
1000.Lctr32_6x:
1001	shll	$4,%eax
1002	movl	$48,%r10d
1003	bswapl	%ebp
1004	leaq	32(%rcx,%rax,1),%rcx
1005	subq	%rax,%r10
1006	jmp	.Lctr32_loop6
1007
1008.align	16
1009.Lctr32_loop6:
1010	addl	$6,%r8d
1011	movups	-48(%rcx,%r10,1),%xmm0
1012.byte	102,15,56,220,209
1013	movl	%r8d,%eax
1014	xorl	%ebp,%eax
1015.byte	102,15,56,220,217
1016.byte	0x0f,0x38,0xf1,0x44,0x24,12
1017	leal	1(%r8),%eax
1018.byte	102,15,56,220,225
1019	xorl	%ebp,%eax
1020.byte	0x0f,0x38,0xf1,0x44,0x24,28
1021.byte	102,15,56,220,233
1022	leal	2(%r8),%eax
1023	xorl	%ebp,%eax
1024.byte	102,15,56,220,241
1025.byte	0x0f,0x38,0xf1,0x44,0x24,44
1026	leal	3(%r8),%eax
1027.byte	102,15,56,220,249
1028	movups	-32(%rcx,%r10,1),%xmm1
1029	xorl	%ebp,%eax
1030
1031.byte	102,15,56,220,208
1032.byte	0x0f,0x38,0xf1,0x44,0x24,60
1033	leal	4(%r8),%eax
1034.byte	102,15,56,220,216
1035	xorl	%ebp,%eax
1036.byte	0x0f,0x38,0xf1,0x44,0x24,76
1037.byte	102,15,56,220,224
1038	leal	5(%r8),%eax
1039	xorl	%ebp,%eax
1040.byte	102,15,56,220,232
1041.byte	0x0f,0x38,0xf1,0x44,0x24,92
1042	movq	%r10,%rax
1043.byte	102,15,56,220,240
1044.byte	102,15,56,220,248
1045	movups	-16(%rcx,%r10,1),%xmm0
1046
1047	call	.Lenc_loop6
1048
1049	movdqu	(%rdi),%xmm8
1050	movdqu	16(%rdi),%xmm9
1051	movdqu	32(%rdi),%xmm10
1052	movdqu	48(%rdi),%xmm11
1053	movdqu	64(%rdi),%xmm12
1054	movdqu	80(%rdi),%xmm13
1055	leaq	96(%rdi),%rdi
1056	movups	-64(%rcx,%r10,1),%xmm1
1057	pxor	%xmm2,%xmm8
1058	movaps	0(%rsp),%xmm2
1059	pxor	%xmm3,%xmm9
1060	movaps	16(%rsp),%xmm3
1061	pxor	%xmm4,%xmm10
1062	movaps	32(%rsp),%xmm4
1063	pxor	%xmm5,%xmm11
1064	movaps	48(%rsp),%xmm5
1065	pxor	%xmm6,%xmm12
1066	movaps	64(%rsp),%xmm6
1067	pxor	%xmm7,%xmm13
1068	movaps	80(%rsp),%xmm7
1069	movdqu	%xmm8,(%rsi)
1070	movdqu	%xmm9,16(%rsi)
1071	movdqu	%xmm10,32(%rsi)
1072	movdqu	%xmm11,48(%rsi)
1073	movdqu	%xmm12,64(%rsi)
1074	movdqu	%xmm13,80(%rsi)
1075	leaq	96(%rsi),%rsi
1076
1077	subq	$6,%rdx
1078	jnc	.Lctr32_loop6
1079
1080	addq	$6,%rdx
1081	jz	.Lctr32_done
1082
1083	leal	-48(%r10),%eax
1084	leaq	-80(%rcx,%r10,1),%rcx
1085	negl	%eax
1086	shrl	$4,%eax
1087	jmp	.Lctr32_tail
1088
1089.align	32
1090.Lctr32_loop8:
1091	addl	$8,%r8d
1092	movdqa	96(%rsp),%xmm8
1093.byte	102,15,56,220,209
1094	movl	%r8d,%r9d
1095	movdqa	112(%rsp),%xmm9
1096.byte	102,15,56,220,217
1097	bswapl	%r9d
1098	movups	32-128(%rcx),%xmm0
1099.byte	102,15,56,220,225
1100	xorl	%ebp,%r9d
1101	nop
1102.byte	102,15,56,220,233
1103	movl	%r9d,0+12(%rsp)
1104	leaq	1(%r8),%r9
1105.byte	102,15,56,220,241
1106.byte	102,15,56,220,249
1107.byte	102,68,15,56,220,193
1108.byte	102,68,15,56,220,201
1109	movups	48-128(%rcx),%xmm1
1110	bswapl	%r9d
1111.byte	102,15,56,220,208
1112.byte	102,15,56,220,216
1113	xorl	%ebp,%r9d
1114.byte	0x66,0x90
1115.byte	102,15,56,220,224
1116.byte	102,15,56,220,232
1117	movl	%r9d,16+12(%rsp)
1118	leaq	2(%r8),%r9
1119.byte	102,15,56,220,240
1120.byte	102,15,56,220,248
1121.byte	102,68,15,56,220,192
1122.byte	102,68,15,56,220,200
1123	movups	64-128(%rcx),%xmm0
1124	bswapl	%r9d
1125.byte	102,15,56,220,209
1126.byte	102,15,56,220,217
1127	xorl	%ebp,%r9d
1128.byte	0x66,0x90
1129.byte	102,15,56,220,225
1130.byte	102,15,56,220,233
1131	movl	%r9d,32+12(%rsp)
1132	leaq	3(%r8),%r9
1133.byte	102,15,56,220,241
1134.byte	102,15,56,220,249
1135.byte	102,68,15,56,220,193
1136.byte	102,68,15,56,220,201
1137	movups	80-128(%rcx),%xmm1
1138	bswapl	%r9d
1139.byte	102,15,56,220,208
1140.byte	102,15,56,220,216
1141	xorl	%ebp,%r9d
1142.byte	0x66,0x90
1143.byte	102,15,56,220,224
1144.byte	102,15,56,220,232
1145	movl	%r9d,48+12(%rsp)
1146	leaq	4(%r8),%r9
1147.byte	102,15,56,220,240
1148.byte	102,15,56,220,248
1149.byte	102,68,15,56,220,192
1150.byte	102,68,15,56,220,200
1151	movups	96-128(%rcx),%xmm0
1152	bswapl	%r9d
1153.byte	102,15,56,220,209
1154.byte	102,15,56,220,217
1155	xorl	%ebp,%r9d
1156.byte	0x66,0x90
1157.byte	102,15,56,220,225
1158.byte	102,15,56,220,233
1159	movl	%r9d,64+12(%rsp)
1160	leaq	5(%r8),%r9
1161.byte	102,15,56,220,241
1162.byte	102,15,56,220,249
1163.byte	102,68,15,56,220,193
1164.byte	102,68,15,56,220,201
1165	movups	112-128(%rcx),%xmm1
1166	bswapl	%r9d
1167.byte	102,15,56,220,208
1168.byte	102,15,56,220,216
1169	xorl	%ebp,%r9d
1170.byte	0x66,0x90
1171.byte	102,15,56,220,224
1172.byte	102,15,56,220,232
1173	movl	%r9d,80+12(%rsp)
1174	leaq	6(%r8),%r9
1175.byte	102,15,56,220,240
1176.byte	102,15,56,220,248
1177.byte	102,68,15,56,220,192
1178.byte	102,68,15,56,220,200
1179	movups	128-128(%rcx),%xmm0
1180	bswapl	%r9d
1181.byte	102,15,56,220,209
1182.byte	102,15,56,220,217
1183	xorl	%ebp,%r9d
1184.byte	0x66,0x90
1185.byte	102,15,56,220,225
1186.byte	102,15,56,220,233
1187	movl	%r9d,96+12(%rsp)
1188	leaq	7(%r8),%r9
1189.byte	102,15,56,220,241
1190.byte	102,15,56,220,249
1191.byte	102,68,15,56,220,193
1192.byte	102,68,15,56,220,201
1193	movups	144-128(%rcx),%xmm1
1194	bswapl	%r9d
1195.byte	102,15,56,220,208
1196.byte	102,15,56,220,216
1197.byte	102,15,56,220,224
1198	xorl	%ebp,%r9d
1199	movdqu	0(%rdi),%xmm10
1200.byte	102,15,56,220,232
1201	movl	%r9d,112+12(%rsp)
1202	cmpl	$11,%eax
1203.byte	102,15,56,220,240
1204.byte	102,15,56,220,248
1205.byte	102,68,15,56,220,192
1206.byte	102,68,15,56,220,200
1207	movups	160-128(%rcx),%xmm0
1208
1209	jb	.Lctr32_enc_done
1210
1211.byte	102,15,56,220,209
1212.byte	102,15,56,220,217
1213.byte	102,15,56,220,225
1214.byte	102,15,56,220,233
1215.byte	102,15,56,220,241
1216.byte	102,15,56,220,249
1217.byte	102,68,15,56,220,193
1218.byte	102,68,15,56,220,201
1219	movups	176-128(%rcx),%xmm1
1220
1221.byte	102,15,56,220,208
1222.byte	102,15,56,220,216
1223.byte	102,15,56,220,224
1224.byte	102,15,56,220,232
1225.byte	102,15,56,220,240
1226.byte	102,15,56,220,248
1227.byte	102,68,15,56,220,192
1228.byte	102,68,15,56,220,200
1229	movups	192-128(%rcx),%xmm0
1230	je	.Lctr32_enc_done
1231
1232.byte	102,15,56,220,209
1233.byte	102,15,56,220,217
1234.byte	102,15,56,220,225
1235.byte	102,15,56,220,233
1236.byte	102,15,56,220,241
1237.byte	102,15,56,220,249
1238.byte	102,68,15,56,220,193
1239.byte	102,68,15,56,220,201
1240	movups	208-128(%rcx),%xmm1
1241
1242.byte	102,15,56,220,208
1243.byte	102,15,56,220,216
1244.byte	102,15,56,220,224
1245.byte	102,15,56,220,232
1246.byte	102,15,56,220,240
1247.byte	102,15,56,220,248
1248.byte	102,68,15,56,220,192
1249.byte	102,68,15,56,220,200
1250	movups	224-128(%rcx),%xmm0
1251	jmp	.Lctr32_enc_done
1252
1253.align	16
1254.Lctr32_enc_done:
1255	movdqu	16(%rdi),%xmm11
1256	pxor	%xmm0,%xmm10
1257	movdqu	32(%rdi),%xmm12
1258	pxor	%xmm0,%xmm11
1259	movdqu	48(%rdi),%xmm13
1260	pxor	%xmm0,%xmm12
1261	movdqu	64(%rdi),%xmm14
1262	pxor	%xmm0,%xmm13
1263	movdqu	80(%rdi),%xmm15
1264	pxor	%xmm0,%xmm14
1265	pxor	%xmm0,%xmm15
1266.byte	102,15,56,220,209
1267.byte	102,15,56,220,217
1268.byte	102,15,56,220,225
1269.byte	102,15,56,220,233
1270.byte	102,15,56,220,241
1271.byte	102,15,56,220,249
1272.byte	102,68,15,56,220,193
1273.byte	102,68,15,56,220,201
1274	movdqu	96(%rdi),%xmm1
1275	leaq	128(%rdi),%rdi
1276
1277.byte	102,65,15,56,221,210
1278	pxor	%xmm0,%xmm1
1279	movdqu	112-128(%rdi),%xmm10
1280.byte	102,65,15,56,221,219
1281	pxor	%xmm0,%xmm10
1282	movdqa	0(%rsp),%xmm11
1283.byte	102,65,15,56,221,228
1284.byte	102,65,15,56,221,237
1285	movdqa	16(%rsp),%xmm12
1286	movdqa	32(%rsp),%xmm13
1287.byte	102,65,15,56,221,246
1288.byte	102,65,15,56,221,255
1289	movdqa	48(%rsp),%xmm14
1290	movdqa	64(%rsp),%xmm15
1291.byte	102,68,15,56,221,193
1292	movdqa	80(%rsp),%xmm0
1293	movups	16-128(%rcx),%xmm1
1294.byte	102,69,15,56,221,202
1295
1296	movups	%xmm2,(%rsi)
1297	movdqa	%xmm11,%xmm2
1298	movups	%xmm3,16(%rsi)
1299	movdqa	%xmm12,%xmm3
1300	movups	%xmm4,32(%rsi)
1301	movdqa	%xmm13,%xmm4
1302	movups	%xmm5,48(%rsi)
1303	movdqa	%xmm14,%xmm5
1304	movups	%xmm6,64(%rsi)
1305	movdqa	%xmm15,%xmm6
1306	movups	%xmm7,80(%rsi)
1307	movdqa	%xmm0,%xmm7
1308	movups	%xmm8,96(%rsi)
1309	movups	%xmm9,112(%rsi)
1310	leaq	128(%rsi),%rsi
1311
1312	subq	$8,%rdx
1313	jnc	.Lctr32_loop8
1314
1315	addq	$8,%rdx
1316	jz	.Lctr32_done
1317	leaq	-128(%rcx),%rcx
1318
1319.Lctr32_tail:
1320
1321
1322	leaq	16(%rcx),%rcx
1323	cmpq	$4,%rdx
1324	jb	.Lctr32_loop3
1325	je	.Lctr32_loop4
1326
1327
1328	shll	$4,%eax
1329	movdqa	96(%rsp),%xmm8
1330	pxor	%xmm9,%xmm9
1331
1332	movups	16(%rcx),%xmm0
1333.byte	102,15,56,220,209
1334.byte	102,15,56,220,217
1335	leaq	32-16(%rcx,%rax,1),%rcx
1336	negq	%rax
1337.byte	102,15,56,220,225
1338	addq	$16,%rax
1339	movups	(%rdi),%xmm10
1340.byte	102,15,56,220,233
1341.byte	102,15,56,220,241
1342	movups	16(%rdi),%xmm11
1343	movups	32(%rdi),%xmm12
1344.byte	102,15,56,220,249
1345.byte	102,68,15,56,220,193
1346
1347	call	.Lenc_loop8_enter
1348
1349	movdqu	48(%rdi),%xmm13
1350	pxor	%xmm10,%xmm2
1351	movdqu	64(%rdi),%xmm10
1352	pxor	%xmm11,%xmm3
1353	movdqu	%xmm2,(%rsi)
1354	pxor	%xmm12,%xmm4
1355	movdqu	%xmm3,16(%rsi)
1356	pxor	%xmm13,%xmm5
1357	movdqu	%xmm4,32(%rsi)
1358	pxor	%xmm10,%xmm6
1359	movdqu	%xmm5,48(%rsi)
1360	movdqu	%xmm6,64(%rsi)
1361	cmpq	$6,%rdx
1362	jb	.Lctr32_done
1363
1364	movups	80(%rdi),%xmm11
1365	xorps	%xmm11,%xmm7
1366	movups	%xmm7,80(%rsi)
1367	je	.Lctr32_done
1368
1369	movups	96(%rdi),%xmm12
1370	xorps	%xmm12,%xmm8
1371	movups	%xmm8,96(%rsi)
1372	jmp	.Lctr32_done
1373
1374.align	32
1375.Lctr32_loop4:
1376.byte	102,15,56,220,209
1377	leaq	16(%rcx),%rcx
1378	decl	%eax
1379.byte	102,15,56,220,217
1380.byte	102,15,56,220,225
1381.byte	102,15,56,220,233
1382	movups	(%rcx),%xmm1
1383	jnz	.Lctr32_loop4
1384.byte	102,15,56,221,209
1385.byte	102,15,56,221,217
1386	movups	(%rdi),%xmm10
1387	movups	16(%rdi),%xmm11
1388.byte	102,15,56,221,225
1389.byte	102,15,56,221,233
1390	movups	32(%rdi),%xmm12
1391	movups	48(%rdi),%xmm13
1392
1393	xorps	%xmm10,%xmm2
1394	movups	%xmm2,(%rsi)
1395	xorps	%xmm11,%xmm3
1396	movups	%xmm3,16(%rsi)
1397	pxor	%xmm12,%xmm4
1398	movdqu	%xmm4,32(%rsi)
1399	pxor	%xmm13,%xmm5
1400	movdqu	%xmm5,48(%rsi)
1401	jmp	.Lctr32_done
1402
1403.align	32
1404.Lctr32_loop3:
1405.byte	102,15,56,220,209
1406	leaq	16(%rcx),%rcx
1407	decl	%eax
1408.byte	102,15,56,220,217
1409.byte	102,15,56,220,225
1410	movups	(%rcx),%xmm1
1411	jnz	.Lctr32_loop3
1412.byte	102,15,56,221,209
1413.byte	102,15,56,221,217
1414.byte	102,15,56,221,225
1415
1416	movups	(%rdi),%xmm10
1417	xorps	%xmm10,%xmm2
1418	movups	%xmm2,(%rsi)
1419	cmpq	$2,%rdx
1420	jb	.Lctr32_done
1421
1422	movups	16(%rdi),%xmm11
1423	xorps	%xmm11,%xmm3
1424	movups	%xmm3,16(%rsi)
1425	je	.Lctr32_done
1426
1427	movups	32(%rdi),%xmm12
1428	xorps	%xmm12,%xmm4
1429	movups	%xmm4,32(%rsi)
1430
1431.Lctr32_done:
1432	xorps	%xmm0,%xmm0
1433	xorl	%ebp,%ebp
1434	pxor	%xmm1,%xmm1
1435	pxor	%xmm2,%xmm2
1436	pxor	%xmm3,%xmm3
1437	pxor	%xmm4,%xmm4
1438	pxor	%xmm5,%xmm5
1439	pxor	%xmm6,%xmm6
1440	pxor	%xmm7,%xmm7
1441	movaps	%xmm0,0(%rsp)
1442	pxor	%xmm8,%xmm8
1443	movaps	%xmm0,16(%rsp)
1444	pxor	%xmm9,%xmm9
1445	movaps	%xmm0,32(%rsp)
1446	pxor	%xmm10,%xmm10
1447	movaps	%xmm0,48(%rsp)
1448	pxor	%xmm11,%xmm11
1449	movaps	%xmm0,64(%rsp)
1450	pxor	%xmm12,%xmm12
1451	movaps	%xmm0,80(%rsp)
1452	pxor	%xmm13,%xmm13
1453	movaps	%xmm0,96(%rsp)
1454	pxor	%xmm14,%xmm14
1455	movaps	%xmm0,112(%rsp)
1456	pxor	%xmm15,%xmm15
1457	movq	-8(%r11),%rbp
1458.cfi_restore	%rbp
1459	leaq	(%r11),%rsp
1460.cfi_def_cfa_register	%rsp
1461.Lctr32_epilogue:
1462	.byte	0xf3,0xc3
1463.cfi_endproc
1464.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
1465.globl	aes_hw_cbc_encrypt
1466.hidden aes_hw_cbc_encrypt
1467.type	aes_hw_cbc_encrypt,@function
1468.align	16
1469aes_hw_cbc_encrypt:
1470.cfi_startproc
1471	testq	%rdx,%rdx
1472	jz	.Lcbc_ret
1473
1474	movl	240(%rcx),%r10d
1475	movq	%rcx,%r11
1476	testl	%r9d,%r9d
1477	jz	.Lcbc_decrypt
1478
1479	movups	(%r8),%xmm2
1480	movl	%r10d,%eax
1481	cmpq	$16,%rdx
1482	jb	.Lcbc_enc_tail
1483	subq	$16,%rdx
1484	jmp	.Lcbc_enc_loop
1485.align	16
1486.Lcbc_enc_loop:
1487	movups	(%rdi),%xmm3
1488	leaq	16(%rdi),%rdi
1489
1490	movups	(%rcx),%xmm0
1491	movups	16(%rcx),%xmm1
1492	xorps	%xmm0,%xmm3
1493	leaq	32(%rcx),%rcx
1494	xorps	%xmm3,%xmm2
1495.Loop_enc1_6:
1496.byte	102,15,56,220,209
1497	decl	%eax
1498	movups	(%rcx),%xmm1
1499	leaq	16(%rcx),%rcx
1500	jnz	.Loop_enc1_6
1501.byte	102,15,56,221,209
1502	movl	%r10d,%eax
1503	movq	%r11,%rcx
1504	movups	%xmm2,0(%rsi)
1505	leaq	16(%rsi),%rsi
1506	subq	$16,%rdx
1507	jnc	.Lcbc_enc_loop
1508	addq	$16,%rdx
1509	jnz	.Lcbc_enc_tail
1510	pxor	%xmm0,%xmm0
1511	pxor	%xmm1,%xmm1
1512	movups	%xmm2,(%r8)
1513	pxor	%xmm2,%xmm2
1514	pxor	%xmm3,%xmm3
1515	jmp	.Lcbc_ret
1516
1517.Lcbc_enc_tail:
1518	movq	%rdx,%rcx
1519	xchgq	%rdi,%rsi
1520.long	0x9066A4F3
1521	movl	$16,%ecx
1522	subq	%rdx,%rcx
1523	xorl	%eax,%eax
1524.long	0x9066AAF3
1525	leaq	-16(%rdi),%rdi
1526	movl	%r10d,%eax
1527	movq	%rdi,%rsi
1528	movq	%r11,%rcx
1529	xorq	%rdx,%rdx
1530	jmp	.Lcbc_enc_loop
1531
1532.align	16
1533.Lcbc_decrypt:
1534	cmpq	$16,%rdx
1535	jne	.Lcbc_decrypt_bulk
1536
1537
1538
1539	movdqu	(%rdi),%xmm2
1540	movdqu	(%r8),%xmm3
1541	movdqa	%xmm2,%xmm4
1542	movups	(%rcx),%xmm0
1543	movups	16(%rcx),%xmm1
1544	leaq	32(%rcx),%rcx
1545	xorps	%xmm0,%xmm2
1546.Loop_dec1_7:
1547.byte	102,15,56,222,209
1548	decl	%r10d
1549	movups	(%rcx),%xmm1
1550	leaq	16(%rcx),%rcx
1551	jnz	.Loop_dec1_7
1552.byte	102,15,56,223,209
1553	pxor	%xmm0,%xmm0
1554	pxor	%xmm1,%xmm1
1555	movdqu	%xmm4,(%r8)
1556	xorps	%xmm3,%xmm2
1557	pxor	%xmm3,%xmm3
1558	movups	%xmm2,(%rsi)
1559	pxor	%xmm2,%xmm2
1560	jmp	.Lcbc_ret
1561.align	16
1562.Lcbc_decrypt_bulk:
1563	leaq	(%rsp),%r11
1564.cfi_def_cfa_register	%r11
1565	pushq	%rbp
1566.cfi_offset	%rbp,-16
1567	subq	$16,%rsp
1568	andq	$-16,%rsp
1569	movq	%rcx,%rbp
1570	movups	(%r8),%xmm10
1571	movl	%r10d,%eax
1572	cmpq	$0x50,%rdx
1573	jbe	.Lcbc_dec_tail
1574
1575	movups	(%rcx),%xmm0
1576	movdqu	0(%rdi),%xmm2
1577	movdqu	16(%rdi),%xmm3
1578	movdqa	%xmm2,%xmm11
1579	movdqu	32(%rdi),%xmm4
1580	movdqa	%xmm3,%xmm12
1581	movdqu	48(%rdi),%xmm5
1582	movdqa	%xmm4,%xmm13
1583	movdqu	64(%rdi),%xmm6
1584	movdqa	%xmm5,%xmm14
1585	movdqu	80(%rdi),%xmm7
1586	movdqa	%xmm6,%xmm15
1587	leaq	OPENSSL_ia32cap_P(%rip),%r9
1588	movl	4(%r9),%r9d
1589	cmpq	$0x70,%rdx
1590	jbe	.Lcbc_dec_six_or_seven
1591
1592	andl	$71303168,%r9d
1593	subq	$0x50,%rdx
1594	cmpl	$4194304,%r9d
1595	je	.Lcbc_dec_loop6_enter
1596	subq	$0x20,%rdx
1597	leaq	112(%rcx),%rcx
1598	jmp	.Lcbc_dec_loop8_enter
1599.align	16
1600.Lcbc_dec_loop8:
1601	movups	%xmm9,(%rsi)
1602	leaq	16(%rsi),%rsi
1603.Lcbc_dec_loop8_enter:
1604	movdqu	96(%rdi),%xmm8
1605	pxor	%xmm0,%xmm2
1606	movdqu	112(%rdi),%xmm9
1607	pxor	%xmm0,%xmm3
1608	movups	16-112(%rcx),%xmm1
1609	pxor	%xmm0,%xmm4
1610	movq	$-1,%rbp
1611	cmpq	$0x70,%rdx
1612	pxor	%xmm0,%xmm5
1613	pxor	%xmm0,%xmm6
1614	pxor	%xmm0,%xmm7
1615	pxor	%xmm0,%xmm8
1616
1617.byte	102,15,56,222,209
1618	pxor	%xmm0,%xmm9
1619	movups	32-112(%rcx),%xmm0
1620.byte	102,15,56,222,217
1621.byte	102,15,56,222,225
1622.byte	102,15,56,222,233
1623.byte	102,15,56,222,241
1624.byte	102,15,56,222,249
1625.byte	102,68,15,56,222,193
1626	adcq	$0,%rbp
1627	andq	$128,%rbp
1628.byte	102,68,15,56,222,201
1629	addq	%rdi,%rbp
1630	movups	48-112(%rcx),%xmm1
1631.byte	102,15,56,222,208
1632.byte	102,15,56,222,216
1633.byte	102,15,56,222,224
1634.byte	102,15,56,222,232
1635.byte	102,15,56,222,240
1636.byte	102,15,56,222,248
1637.byte	102,68,15,56,222,192
1638.byte	102,68,15,56,222,200
1639	movups	64-112(%rcx),%xmm0
1640	nop
1641.byte	102,15,56,222,209
1642.byte	102,15,56,222,217
1643.byte	102,15,56,222,225
1644.byte	102,15,56,222,233
1645.byte	102,15,56,222,241
1646.byte	102,15,56,222,249
1647.byte	102,68,15,56,222,193
1648.byte	102,68,15,56,222,201
1649	movups	80-112(%rcx),%xmm1
1650	nop
1651.byte	102,15,56,222,208
1652.byte	102,15,56,222,216
1653.byte	102,15,56,222,224
1654.byte	102,15,56,222,232
1655.byte	102,15,56,222,240
1656.byte	102,15,56,222,248
1657.byte	102,68,15,56,222,192
1658.byte	102,68,15,56,222,200
1659	movups	96-112(%rcx),%xmm0
1660	nop
1661.byte	102,15,56,222,209
1662.byte	102,15,56,222,217
1663.byte	102,15,56,222,225
1664.byte	102,15,56,222,233
1665.byte	102,15,56,222,241
1666.byte	102,15,56,222,249
1667.byte	102,68,15,56,222,193
1668.byte	102,68,15,56,222,201
1669	movups	112-112(%rcx),%xmm1
1670	nop
1671.byte	102,15,56,222,208
1672.byte	102,15,56,222,216
1673.byte	102,15,56,222,224
1674.byte	102,15,56,222,232
1675.byte	102,15,56,222,240
1676.byte	102,15,56,222,248
1677.byte	102,68,15,56,222,192
1678.byte	102,68,15,56,222,200
1679	movups	128-112(%rcx),%xmm0
1680	nop
1681.byte	102,15,56,222,209
1682.byte	102,15,56,222,217
1683.byte	102,15,56,222,225
1684.byte	102,15,56,222,233
1685.byte	102,15,56,222,241
1686.byte	102,15,56,222,249
1687.byte	102,68,15,56,222,193
1688.byte	102,68,15,56,222,201
1689	movups	144-112(%rcx),%xmm1
1690	cmpl	$11,%eax
1691.byte	102,15,56,222,208
1692.byte	102,15,56,222,216
1693.byte	102,15,56,222,224
1694.byte	102,15,56,222,232
1695.byte	102,15,56,222,240
1696.byte	102,15,56,222,248
1697.byte	102,68,15,56,222,192
1698.byte	102,68,15,56,222,200
1699	movups	160-112(%rcx),%xmm0
1700	jb	.Lcbc_dec_done
1701.byte	102,15,56,222,209
1702.byte	102,15,56,222,217
1703.byte	102,15,56,222,225
1704.byte	102,15,56,222,233
1705.byte	102,15,56,222,241
1706.byte	102,15,56,222,249
1707.byte	102,68,15,56,222,193
1708.byte	102,68,15,56,222,201
1709	movups	176-112(%rcx),%xmm1
1710	nop
1711.byte	102,15,56,222,208
1712.byte	102,15,56,222,216
1713.byte	102,15,56,222,224
1714.byte	102,15,56,222,232
1715.byte	102,15,56,222,240
1716.byte	102,15,56,222,248
1717.byte	102,68,15,56,222,192
1718.byte	102,68,15,56,222,200
1719	movups	192-112(%rcx),%xmm0
1720	je	.Lcbc_dec_done
1721.byte	102,15,56,222,209
1722.byte	102,15,56,222,217
1723.byte	102,15,56,222,225
1724.byte	102,15,56,222,233
1725.byte	102,15,56,222,241
1726.byte	102,15,56,222,249
1727.byte	102,68,15,56,222,193
1728.byte	102,68,15,56,222,201
1729	movups	208-112(%rcx),%xmm1
1730	nop
1731.byte	102,15,56,222,208
1732.byte	102,15,56,222,216
1733.byte	102,15,56,222,224
1734.byte	102,15,56,222,232
1735.byte	102,15,56,222,240
1736.byte	102,15,56,222,248
1737.byte	102,68,15,56,222,192
1738.byte	102,68,15,56,222,200
1739	movups	224-112(%rcx),%xmm0
1740	jmp	.Lcbc_dec_done
1741.align	16
1742.Lcbc_dec_done:
1743.byte	102,15,56,222,209
1744.byte	102,15,56,222,217
1745	pxor	%xmm0,%xmm10
1746	pxor	%xmm0,%xmm11
1747.byte	102,15,56,222,225
1748.byte	102,15,56,222,233
1749	pxor	%xmm0,%xmm12
1750	pxor	%xmm0,%xmm13
1751.byte	102,15,56,222,241
1752.byte	102,15,56,222,249
1753	pxor	%xmm0,%xmm14
1754	pxor	%xmm0,%xmm15
1755.byte	102,68,15,56,222,193
1756.byte	102,68,15,56,222,201
1757	movdqu	80(%rdi),%xmm1
1758
1759.byte	102,65,15,56,223,210
1760	movdqu	96(%rdi),%xmm10
1761	pxor	%xmm0,%xmm1
1762.byte	102,65,15,56,223,219
1763	pxor	%xmm0,%xmm10
1764	movdqu	112(%rdi),%xmm0
1765.byte	102,65,15,56,223,228
1766	leaq	128(%rdi),%rdi
1767	movdqu	0(%rbp),%xmm11
1768.byte	102,65,15,56,223,237
1769.byte	102,65,15,56,223,246
1770	movdqu	16(%rbp),%xmm12
1771	movdqu	32(%rbp),%xmm13
1772.byte	102,65,15,56,223,255
1773.byte	102,68,15,56,223,193
1774	movdqu	48(%rbp),%xmm14
1775	movdqu	64(%rbp),%xmm15
1776.byte	102,69,15,56,223,202
1777	movdqa	%xmm0,%xmm10
1778	movdqu	80(%rbp),%xmm1
1779	movups	-112(%rcx),%xmm0
1780
1781	movups	%xmm2,(%rsi)
1782	movdqa	%xmm11,%xmm2
1783	movups	%xmm3,16(%rsi)
1784	movdqa	%xmm12,%xmm3
1785	movups	%xmm4,32(%rsi)
1786	movdqa	%xmm13,%xmm4
1787	movups	%xmm5,48(%rsi)
1788	movdqa	%xmm14,%xmm5
1789	movups	%xmm6,64(%rsi)
1790	movdqa	%xmm15,%xmm6
1791	movups	%xmm7,80(%rsi)
1792	movdqa	%xmm1,%xmm7
1793	movups	%xmm8,96(%rsi)
1794	leaq	112(%rsi),%rsi
1795
1796	subq	$0x80,%rdx
1797	ja	.Lcbc_dec_loop8
1798
1799	movaps	%xmm9,%xmm2
1800	leaq	-112(%rcx),%rcx
1801	addq	$0x70,%rdx
1802	jle	.Lcbc_dec_clear_tail_collected
1803	movups	%xmm9,(%rsi)
1804	leaq	16(%rsi),%rsi
1805	cmpq	$0x50,%rdx
1806	jbe	.Lcbc_dec_tail
1807
1808	movaps	%xmm11,%xmm2
1809.Lcbc_dec_six_or_seven:
1810	cmpq	$0x60,%rdx
1811	ja	.Lcbc_dec_seven
1812
1813	movaps	%xmm7,%xmm8
1814	call	_aesni_decrypt6
1815	pxor	%xmm10,%xmm2
1816	movaps	%xmm8,%xmm10
1817	pxor	%xmm11,%xmm3
1818	movdqu	%xmm2,(%rsi)
1819	pxor	%xmm12,%xmm4
1820	movdqu	%xmm3,16(%rsi)
1821	pxor	%xmm3,%xmm3
1822	pxor	%xmm13,%xmm5
1823	movdqu	%xmm4,32(%rsi)
1824	pxor	%xmm4,%xmm4
1825	pxor	%xmm14,%xmm6
1826	movdqu	%xmm5,48(%rsi)
1827	pxor	%xmm5,%xmm5
1828	pxor	%xmm15,%xmm7
1829	movdqu	%xmm6,64(%rsi)
1830	pxor	%xmm6,%xmm6
1831	leaq	80(%rsi),%rsi
1832	movdqa	%xmm7,%xmm2
1833	pxor	%xmm7,%xmm7
1834	jmp	.Lcbc_dec_tail_collected
1835
1836.align	16
1837.Lcbc_dec_seven:
1838	movups	96(%rdi),%xmm8
1839	xorps	%xmm9,%xmm9
1840	call	_aesni_decrypt8
1841	movups	80(%rdi),%xmm9
1842	pxor	%xmm10,%xmm2
1843	movups	96(%rdi),%xmm10
1844	pxor	%xmm11,%xmm3
1845	movdqu	%xmm2,(%rsi)
1846	pxor	%xmm12,%xmm4
1847	movdqu	%xmm3,16(%rsi)
1848	pxor	%xmm3,%xmm3
1849	pxor	%xmm13,%xmm5
1850	movdqu	%xmm4,32(%rsi)
1851	pxor	%xmm4,%xmm4
1852	pxor	%xmm14,%xmm6
1853	movdqu	%xmm5,48(%rsi)
1854	pxor	%xmm5,%xmm5
1855	pxor	%xmm15,%xmm7
1856	movdqu	%xmm6,64(%rsi)
1857	pxor	%xmm6,%xmm6
1858	pxor	%xmm9,%xmm8
1859	movdqu	%xmm7,80(%rsi)
1860	pxor	%xmm7,%xmm7
1861	leaq	96(%rsi),%rsi
1862	movdqa	%xmm8,%xmm2
1863	pxor	%xmm8,%xmm8
1864	pxor	%xmm9,%xmm9
1865	jmp	.Lcbc_dec_tail_collected
1866
1867.align	16
1868.Lcbc_dec_loop6:
1869	movups	%xmm7,(%rsi)
1870	leaq	16(%rsi),%rsi
1871	movdqu	0(%rdi),%xmm2
1872	movdqu	16(%rdi),%xmm3
1873	movdqa	%xmm2,%xmm11
1874	movdqu	32(%rdi),%xmm4
1875	movdqa	%xmm3,%xmm12
1876	movdqu	48(%rdi),%xmm5
1877	movdqa	%xmm4,%xmm13
1878	movdqu	64(%rdi),%xmm6
1879	movdqa	%xmm5,%xmm14
1880	movdqu	80(%rdi),%xmm7
1881	movdqa	%xmm6,%xmm15
1882.Lcbc_dec_loop6_enter:
1883	leaq	96(%rdi),%rdi
1884	movdqa	%xmm7,%xmm8
1885
1886	call	_aesni_decrypt6
1887
1888	pxor	%xmm10,%xmm2
1889	movdqa	%xmm8,%xmm10
1890	pxor	%xmm11,%xmm3
1891	movdqu	%xmm2,(%rsi)
1892	pxor	%xmm12,%xmm4
1893	movdqu	%xmm3,16(%rsi)
1894	pxor	%xmm13,%xmm5
1895	movdqu	%xmm4,32(%rsi)
1896	pxor	%xmm14,%xmm6
1897	movq	%rbp,%rcx
1898	movdqu	%xmm5,48(%rsi)
1899	pxor	%xmm15,%xmm7
1900	movl	%r10d,%eax
1901	movdqu	%xmm6,64(%rsi)
1902	leaq	80(%rsi),%rsi
1903	subq	$0x60,%rdx
1904	ja	.Lcbc_dec_loop6
1905
1906	movdqa	%xmm7,%xmm2
1907	addq	$0x50,%rdx
1908	jle	.Lcbc_dec_clear_tail_collected
1909	movups	%xmm7,(%rsi)
1910	leaq	16(%rsi),%rsi
1911
1912.Lcbc_dec_tail:
1913	movups	(%rdi),%xmm2
1914	subq	$0x10,%rdx
1915	jbe	.Lcbc_dec_one
1916
1917	movups	16(%rdi),%xmm3
1918	movaps	%xmm2,%xmm11
1919	subq	$0x10,%rdx
1920	jbe	.Lcbc_dec_two
1921
1922	movups	32(%rdi),%xmm4
1923	movaps	%xmm3,%xmm12
1924	subq	$0x10,%rdx
1925	jbe	.Lcbc_dec_three
1926
1927	movups	48(%rdi),%xmm5
1928	movaps	%xmm4,%xmm13
1929	subq	$0x10,%rdx
1930	jbe	.Lcbc_dec_four
1931
1932	movups	64(%rdi),%xmm6
1933	movaps	%xmm5,%xmm14
1934	movaps	%xmm6,%xmm15
1935	xorps	%xmm7,%xmm7
1936	call	_aesni_decrypt6
1937	pxor	%xmm10,%xmm2
1938	movaps	%xmm15,%xmm10
1939	pxor	%xmm11,%xmm3
1940	movdqu	%xmm2,(%rsi)
1941	pxor	%xmm12,%xmm4
1942	movdqu	%xmm3,16(%rsi)
1943	pxor	%xmm3,%xmm3
1944	pxor	%xmm13,%xmm5
1945	movdqu	%xmm4,32(%rsi)
1946	pxor	%xmm4,%xmm4
1947	pxor	%xmm14,%xmm6
1948	movdqu	%xmm5,48(%rsi)
1949	pxor	%xmm5,%xmm5
1950	leaq	64(%rsi),%rsi
1951	movdqa	%xmm6,%xmm2
1952	pxor	%xmm6,%xmm6
1953	pxor	%xmm7,%xmm7
1954	subq	$0x10,%rdx
1955	jmp	.Lcbc_dec_tail_collected
1956
1957.align	16
1958.Lcbc_dec_one:
1959	movaps	%xmm2,%xmm11
1960	movups	(%rcx),%xmm0
1961	movups	16(%rcx),%xmm1
1962	leaq	32(%rcx),%rcx
1963	xorps	%xmm0,%xmm2
1964.Loop_dec1_8:
1965.byte	102,15,56,222,209
1966	decl	%eax
1967	movups	(%rcx),%xmm1
1968	leaq	16(%rcx),%rcx
1969	jnz	.Loop_dec1_8
1970.byte	102,15,56,223,209
1971	xorps	%xmm10,%xmm2
1972	movaps	%xmm11,%xmm10
1973	jmp	.Lcbc_dec_tail_collected
1974.align	16
1975.Lcbc_dec_two:
1976	movaps	%xmm3,%xmm12
1977	call	_aesni_decrypt2
1978	pxor	%xmm10,%xmm2
1979	movaps	%xmm12,%xmm10
1980	pxor	%xmm11,%xmm3
1981	movdqu	%xmm2,(%rsi)
1982	movdqa	%xmm3,%xmm2
1983	pxor	%xmm3,%xmm3
1984	leaq	16(%rsi),%rsi
1985	jmp	.Lcbc_dec_tail_collected
1986.align	16
1987.Lcbc_dec_three:
1988	movaps	%xmm4,%xmm13
1989	call	_aesni_decrypt3
1990	pxor	%xmm10,%xmm2
1991	movaps	%xmm13,%xmm10
1992	pxor	%xmm11,%xmm3
1993	movdqu	%xmm2,(%rsi)
1994	pxor	%xmm12,%xmm4
1995	movdqu	%xmm3,16(%rsi)
1996	pxor	%xmm3,%xmm3
1997	movdqa	%xmm4,%xmm2
1998	pxor	%xmm4,%xmm4
1999	leaq	32(%rsi),%rsi
2000	jmp	.Lcbc_dec_tail_collected
2001.align	16
2002.Lcbc_dec_four:
2003	movaps	%xmm5,%xmm14
2004	call	_aesni_decrypt4
2005	pxor	%xmm10,%xmm2
2006	movaps	%xmm14,%xmm10
2007	pxor	%xmm11,%xmm3
2008	movdqu	%xmm2,(%rsi)
2009	pxor	%xmm12,%xmm4
2010	movdqu	%xmm3,16(%rsi)
2011	pxor	%xmm3,%xmm3
2012	pxor	%xmm13,%xmm5
2013	movdqu	%xmm4,32(%rsi)
2014	pxor	%xmm4,%xmm4
2015	movdqa	%xmm5,%xmm2
2016	pxor	%xmm5,%xmm5
2017	leaq	48(%rsi),%rsi
2018	jmp	.Lcbc_dec_tail_collected
2019
2020.align	16
2021.Lcbc_dec_clear_tail_collected:
2022	pxor	%xmm3,%xmm3
2023	pxor	%xmm4,%xmm4
2024	pxor	%xmm5,%xmm5
2025	pxor	%xmm6,%xmm6
2026	pxor	%xmm7,%xmm7
2027	pxor	%xmm8,%xmm8
2028	pxor	%xmm9,%xmm9
2029.Lcbc_dec_tail_collected:
2030	movups	%xmm10,(%r8)
2031	andq	$15,%rdx
2032	jnz	.Lcbc_dec_tail_partial
2033	movups	%xmm2,(%rsi)
2034	pxor	%xmm2,%xmm2
2035	jmp	.Lcbc_dec_ret
2036.align	16
2037.Lcbc_dec_tail_partial:
2038	movaps	%xmm2,(%rsp)
2039	pxor	%xmm2,%xmm2
2040	movq	$16,%rcx
2041	movq	%rsi,%rdi
2042	subq	%rdx,%rcx
2043	leaq	(%rsp),%rsi
2044.long	0x9066A4F3
2045	movdqa	%xmm2,(%rsp)
2046
2047.Lcbc_dec_ret:
2048	xorps	%xmm0,%xmm0
2049	pxor	%xmm1,%xmm1
2050	movq	-8(%r11),%rbp
2051.cfi_restore	%rbp
2052	leaq	(%r11),%rsp
2053.cfi_def_cfa_register	%rsp
2054.Lcbc_ret:
2055	.byte	0xf3,0xc3
2056.cfi_endproc
2057.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
2058.globl	aes_hw_set_decrypt_key
2059.hidden aes_hw_set_decrypt_key
2060.type	aes_hw_set_decrypt_key,@function
2061.align	16
2062aes_hw_set_decrypt_key:
2063.cfi_startproc
2064.byte	0x48,0x83,0xEC,0x08
2065.cfi_adjust_cfa_offset	8
2066	call	__aesni_set_encrypt_key
2067	shll	$4,%esi
2068	testl	%eax,%eax
2069	jnz	.Ldec_key_ret
2070	leaq	16(%rdx,%rsi,1),%rdi
2071
2072	movups	(%rdx),%xmm0
2073	movups	(%rdi),%xmm1
2074	movups	%xmm0,(%rdi)
2075	movups	%xmm1,(%rdx)
2076	leaq	16(%rdx),%rdx
2077	leaq	-16(%rdi),%rdi
2078
2079.Ldec_key_inverse:
2080	movups	(%rdx),%xmm0
2081	movups	(%rdi),%xmm1
2082.byte	102,15,56,219,192
2083.byte	102,15,56,219,201
2084	leaq	16(%rdx),%rdx
2085	leaq	-16(%rdi),%rdi
2086	movups	%xmm0,16(%rdi)
2087	movups	%xmm1,-16(%rdx)
2088	cmpq	%rdx,%rdi
2089	ja	.Ldec_key_inverse
2090
2091	movups	(%rdx),%xmm0
2092.byte	102,15,56,219,192
2093	pxor	%xmm1,%xmm1
2094	movups	%xmm0,(%rdi)
2095	pxor	%xmm0,%xmm0
2096.Ldec_key_ret:
2097	addq	$8,%rsp
2098.cfi_adjust_cfa_offset	-8
2099	.byte	0xf3,0xc3
2100.cfi_endproc
2101.LSEH_end_set_decrypt_key:
2102.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
2103.globl	aes_hw_set_encrypt_key
2104.hidden aes_hw_set_encrypt_key
2105.type	aes_hw_set_encrypt_key,@function
2106.align	16
2107aes_hw_set_encrypt_key:
2108__aesni_set_encrypt_key:
2109.cfi_startproc
2110#ifdef BORINGSSL_DISPATCH_TEST
2111	movb	$1,BORINGSSL_function_hit+3(%rip)
2112#endif
2113.byte	0x48,0x83,0xEC,0x08
2114.cfi_adjust_cfa_offset	8
2115	movq	$-1,%rax
2116	testq	%rdi,%rdi
2117	jz	.Lenc_key_ret
2118	testq	%rdx,%rdx
2119	jz	.Lenc_key_ret
2120
2121	movups	(%rdi),%xmm0
2122	xorps	%xmm4,%xmm4
2123	leaq	OPENSSL_ia32cap_P(%rip),%r10
2124	movl	4(%r10),%r10d
2125	andl	$268437504,%r10d
2126	leaq	16(%rdx),%rax
2127	cmpl	$256,%esi
2128	je	.L14rounds
2129	cmpl	$192,%esi
2130	je	.L12rounds
2131	cmpl	$128,%esi
2132	jne	.Lbad_keybits
2133
2134.L10rounds:
2135	movl	$9,%esi
2136	cmpl	$268435456,%r10d
2137	je	.L10rounds_alt
2138
2139	movups	%xmm0,(%rdx)
2140.byte	102,15,58,223,200,1
2141	call	.Lkey_expansion_128_cold
2142.byte	102,15,58,223,200,2
2143	call	.Lkey_expansion_128
2144.byte	102,15,58,223,200,4
2145	call	.Lkey_expansion_128
2146.byte	102,15,58,223,200,8
2147	call	.Lkey_expansion_128
2148.byte	102,15,58,223,200,16
2149	call	.Lkey_expansion_128
2150.byte	102,15,58,223,200,32
2151	call	.Lkey_expansion_128
2152.byte	102,15,58,223,200,64
2153	call	.Lkey_expansion_128
2154.byte	102,15,58,223,200,128
2155	call	.Lkey_expansion_128
2156.byte	102,15,58,223,200,27
2157	call	.Lkey_expansion_128
2158.byte	102,15,58,223,200,54
2159	call	.Lkey_expansion_128
2160	movups	%xmm0,(%rax)
2161	movl	%esi,80(%rax)
2162	xorl	%eax,%eax
2163	jmp	.Lenc_key_ret
2164
2165.align	16
2166.L10rounds_alt:
2167	movdqa	.Lkey_rotate(%rip),%xmm5
2168	movl	$8,%r10d
2169	movdqa	.Lkey_rcon1(%rip),%xmm4
2170	movdqa	%xmm0,%xmm2
2171	movdqu	%xmm0,(%rdx)
2172	jmp	.Loop_key128
2173
2174.align	16
2175.Loop_key128:
2176.byte	102,15,56,0,197
2177.byte	102,15,56,221,196
2178	pslld	$1,%xmm4
2179	leaq	16(%rax),%rax
2180
2181	movdqa	%xmm2,%xmm3
2182	pslldq	$4,%xmm2
2183	pxor	%xmm2,%xmm3
2184	pslldq	$4,%xmm2
2185	pxor	%xmm2,%xmm3
2186	pslldq	$4,%xmm2
2187	pxor	%xmm3,%xmm2
2188
2189	pxor	%xmm2,%xmm0
2190	movdqu	%xmm0,-16(%rax)
2191	movdqa	%xmm0,%xmm2
2192
2193	decl	%r10d
2194	jnz	.Loop_key128
2195
2196	movdqa	.Lkey_rcon1b(%rip),%xmm4
2197
2198.byte	102,15,56,0,197
2199.byte	102,15,56,221,196
2200	pslld	$1,%xmm4
2201
2202	movdqa	%xmm2,%xmm3
2203	pslldq	$4,%xmm2
2204	pxor	%xmm2,%xmm3
2205	pslldq	$4,%xmm2
2206	pxor	%xmm2,%xmm3
2207	pslldq	$4,%xmm2
2208	pxor	%xmm3,%xmm2
2209
2210	pxor	%xmm2,%xmm0
2211	movdqu	%xmm0,(%rax)
2212
2213	movdqa	%xmm0,%xmm2
2214.byte	102,15,56,0,197
2215.byte	102,15,56,221,196
2216
2217	movdqa	%xmm2,%xmm3
2218	pslldq	$4,%xmm2
2219	pxor	%xmm2,%xmm3
2220	pslldq	$4,%xmm2
2221	pxor	%xmm2,%xmm3
2222	pslldq	$4,%xmm2
2223	pxor	%xmm3,%xmm2
2224
2225	pxor	%xmm2,%xmm0
2226	movdqu	%xmm0,16(%rax)
2227
2228	movl	%esi,96(%rax)
2229	xorl	%eax,%eax
2230	jmp	.Lenc_key_ret
2231
2232.align	16
2233.L12rounds:
2234	movq	16(%rdi),%xmm2
2235	movl	$11,%esi
2236	cmpl	$268435456,%r10d
2237	je	.L12rounds_alt
2238
2239	movups	%xmm0,(%rdx)
2240.byte	102,15,58,223,202,1
2241	call	.Lkey_expansion_192a_cold
2242.byte	102,15,58,223,202,2
2243	call	.Lkey_expansion_192b
2244.byte	102,15,58,223,202,4
2245	call	.Lkey_expansion_192a
2246.byte	102,15,58,223,202,8
2247	call	.Lkey_expansion_192b
2248.byte	102,15,58,223,202,16
2249	call	.Lkey_expansion_192a
2250.byte	102,15,58,223,202,32
2251	call	.Lkey_expansion_192b
2252.byte	102,15,58,223,202,64
2253	call	.Lkey_expansion_192a
2254.byte	102,15,58,223,202,128
2255	call	.Lkey_expansion_192b
2256	movups	%xmm0,(%rax)
2257	movl	%esi,48(%rax)
2258	xorq	%rax,%rax
2259	jmp	.Lenc_key_ret
2260
2261.align	16
2262.L12rounds_alt:
2263	movdqa	.Lkey_rotate192(%rip),%xmm5
2264	movdqa	.Lkey_rcon1(%rip),%xmm4
2265	movl	$8,%r10d
2266	movdqu	%xmm0,(%rdx)
2267	jmp	.Loop_key192
2268
2269.align	16
2270.Loop_key192:
2271	movq	%xmm2,0(%rax)
2272	movdqa	%xmm2,%xmm1
2273.byte	102,15,56,0,213
2274.byte	102,15,56,221,212
2275	pslld	$1,%xmm4
2276	leaq	24(%rax),%rax
2277
2278	movdqa	%xmm0,%xmm3
2279	pslldq	$4,%xmm0
2280	pxor	%xmm0,%xmm3
2281	pslldq	$4,%xmm0
2282	pxor	%xmm0,%xmm3
2283	pslldq	$4,%xmm0
2284	pxor	%xmm3,%xmm0
2285
2286	pshufd	$0xff,%xmm0,%xmm3
2287	pxor	%xmm1,%xmm3
2288	pslldq	$4,%xmm1
2289	pxor	%xmm1,%xmm3
2290
2291	pxor	%xmm2,%xmm0
2292	pxor	%xmm3,%xmm2
2293	movdqu	%xmm0,-16(%rax)
2294
2295	decl	%r10d
2296	jnz	.Loop_key192
2297
2298	movl	%esi,32(%rax)
2299	xorl	%eax,%eax
2300	jmp	.Lenc_key_ret
2301
2302.align	16
2303.L14rounds:
2304	movups	16(%rdi),%xmm2
2305	movl	$13,%esi
2306	leaq	16(%rax),%rax
2307	cmpl	$268435456,%r10d
2308	je	.L14rounds_alt
2309
2310	movups	%xmm0,(%rdx)
2311	movups	%xmm2,16(%rdx)
2312.byte	102,15,58,223,202,1
2313	call	.Lkey_expansion_256a_cold
2314.byte	102,15,58,223,200,1
2315	call	.Lkey_expansion_256b
2316.byte	102,15,58,223,202,2
2317	call	.Lkey_expansion_256a
2318.byte	102,15,58,223,200,2
2319	call	.Lkey_expansion_256b
2320.byte	102,15,58,223,202,4
2321	call	.Lkey_expansion_256a
2322.byte	102,15,58,223,200,4
2323	call	.Lkey_expansion_256b
2324.byte	102,15,58,223,202,8
2325	call	.Lkey_expansion_256a
2326.byte	102,15,58,223,200,8
2327	call	.Lkey_expansion_256b
2328.byte	102,15,58,223,202,16
2329	call	.Lkey_expansion_256a
2330.byte	102,15,58,223,200,16
2331	call	.Lkey_expansion_256b
2332.byte	102,15,58,223,202,32
2333	call	.Lkey_expansion_256a
2334.byte	102,15,58,223,200,32
2335	call	.Lkey_expansion_256b
2336.byte	102,15,58,223,202,64
2337	call	.Lkey_expansion_256a
2338	movups	%xmm0,(%rax)
2339	movl	%esi,16(%rax)
2340	xorq	%rax,%rax
2341	jmp	.Lenc_key_ret
2342
2343.align	16
2344.L14rounds_alt:
2345	movdqa	.Lkey_rotate(%rip),%xmm5
2346	movdqa	.Lkey_rcon1(%rip),%xmm4
2347	movl	$7,%r10d
2348	movdqu	%xmm0,0(%rdx)
2349	movdqa	%xmm2,%xmm1
2350	movdqu	%xmm2,16(%rdx)
2351	jmp	.Loop_key256
2352
2353.align	16
2354.Loop_key256:
2355.byte	102,15,56,0,213
2356.byte	102,15,56,221,212
2357
2358	movdqa	%xmm0,%xmm3
2359	pslldq	$4,%xmm0
2360	pxor	%xmm0,%xmm3
2361	pslldq	$4,%xmm0
2362	pxor	%xmm0,%xmm3
2363	pslldq	$4,%xmm0
2364	pxor	%xmm3,%xmm0
2365	pslld	$1,%xmm4
2366
2367	pxor	%xmm2,%xmm0
2368	movdqu	%xmm0,(%rax)
2369
2370	decl	%r10d
2371	jz	.Ldone_key256
2372
2373	pshufd	$0xff,%xmm0,%xmm2
2374	pxor	%xmm3,%xmm3
2375.byte	102,15,56,221,211
2376
2377	movdqa	%xmm1,%xmm3
2378	pslldq	$4,%xmm1
2379	pxor	%xmm1,%xmm3
2380	pslldq	$4,%xmm1
2381	pxor	%xmm1,%xmm3
2382	pslldq	$4,%xmm1
2383	pxor	%xmm3,%xmm1
2384
2385	pxor	%xmm1,%xmm2
2386	movdqu	%xmm2,16(%rax)
2387	leaq	32(%rax),%rax
2388	movdqa	%xmm2,%xmm1
2389
2390	jmp	.Loop_key256
2391
2392.Ldone_key256:
2393	movl	%esi,16(%rax)
2394	xorl	%eax,%eax
2395	jmp	.Lenc_key_ret
2396
2397.align	16
2398.Lbad_keybits:
2399	movq	$-2,%rax
2400.Lenc_key_ret:
2401	pxor	%xmm0,%xmm0
2402	pxor	%xmm1,%xmm1
2403	pxor	%xmm2,%xmm2
2404	pxor	%xmm3,%xmm3
2405	pxor	%xmm4,%xmm4
2406	pxor	%xmm5,%xmm5
2407	addq	$8,%rsp
2408.cfi_adjust_cfa_offset	-8
2409	.byte	0xf3,0xc3
2410.cfi_endproc
2411.LSEH_end_set_encrypt_key:
2412
2413.align	16
2414.Lkey_expansion_128:
2415	movups	%xmm0,(%rax)
2416	leaq	16(%rax),%rax
2417.Lkey_expansion_128_cold:
2418	shufps	$16,%xmm0,%xmm4
2419	xorps	%xmm4,%xmm0
2420	shufps	$140,%xmm0,%xmm4
2421	xorps	%xmm4,%xmm0
2422	shufps	$255,%xmm1,%xmm1
2423	xorps	%xmm1,%xmm0
2424	.byte	0xf3,0xc3
2425
2426.align	16
2427.Lkey_expansion_192a:
2428	movups	%xmm0,(%rax)
2429	leaq	16(%rax),%rax
2430.Lkey_expansion_192a_cold:
2431	movaps	%xmm2,%xmm5
2432.Lkey_expansion_192b_warm:
2433	shufps	$16,%xmm0,%xmm4
2434	movdqa	%xmm2,%xmm3
2435	xorps	%xmm4,%xmm0
2436	shufps	$140,%xmm0,%xmm4
2437	pslldq	$4,%xmm3
2438	xorps	%xmm4,%xmm0
2439	pshufd	$85,%xmm1,%xmm1
2440	pxor	%xmm3,%xmm2
2441	pxor	%xmm1,%xmm0
2442	pshufd	$255,%xmm0,%xmm3
2443	pxor	%xmm3,%xmm2
2444	.byte	0xf3,0xc3
2445
2446.align	16
2447.Lkey_expansion_192b:
2448	movaps	%xmm0,%xmm3
2449	shufps	$68,%xmm0,%xmm5
2450	movups	%xmm5,(%rax)
2451	shufps	$78,%xmm2,%xmm3
2452	movups	%xmm3,16(%rax)
2453	leaq	32(%rax),%rax
2454	jmp	.Lkey_expansion_192b_warm
2455
2456.align	16
2457.Lkey_expansion_256a:
2458	movups	%xmm2,(%rax)
2459	leaq	16(%rax),%rax
2460.Lkey_expansion_256a_cold:
2461	shufps	$16,%xmm0,%xmm4
2462	xorps	%xmm4,%xmm0
2463	shufps	$140,%xmm0,%xmm4
2464	xorps	%xmm4,%xmm0
2465	shufps	$255,%xmm1,%xmm1
2466	xorps	%xmm1,%xmm0
2467	.byte	0xf3,0xc3
2468
2469.align	16
2470.Lkey_expansion_256b:
2471	movups	%xmm0,(%rax)
2472	leaq	16(%rax),%rax
2473
2474	shufps	$16,%xmm2,%xmm4
2475	xorps	%xmm4,%xmm2
2476	shufps	$140,%xmm2,%xmm4
2477	xorps	%xmm4,%xmm2
2478	shufps	$170,%xmm1,%xmm1
2479	xorps	%xmm1,%xmm2
2480	.byte	0xf3,0xc3
2481.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
2482.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
2483.align	64
2484.Lbswap_mask:
2485.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2486.Lincrement32:
2487.long	6,6,6,0
2488.Lincrement64:
2489.long	1,0,0,0
2490.Lxts_magic:
2491.long	0x87,0,1,0
2492.Lincrement1:
2493.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2494.Lkey_rotate:
2495.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
2496.Lkey_rotate192:
2497.long	0x04070605,0x04070605,0x04070605,0x04070605
2498.Lkey_rcon1:
2499.long	1,1,1,1
2500.Lkey_rcon1b:
2501.long	0x1b,0x1b,0x1b,0x1b
2502
2503.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2504.align	64
2505#endif
2506.section	.note.GNU-stack,"",@progbits
2507