• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
14.text
15.extern	OPENSSL_ia32cap_P
16.hidden OPENSSL_ia32cap_P
17.globl	aes_hw_encrypt
18.hidden aes_hw_encrypt
19.type	aes_hw_encrypt,@function
20.align	16
21aes_hw_encrypt:
22.cfi_startproc
23#ifndef NDEBUG
24#ifndef BORINGSSL_FIPS
25.extern	BORINGSSL_function_hit
26.hidden BORINGSSL_function_hit
27	movb	$1,BORINGSSL_function_hit+1(%rip)
28#endif
29#endif
30	movups	(%rdi),%xmm2
31	movl	240(%rdx),%eax
32	movups	(%rdx),%xmm0
33	movups	16(%rdx),%xmm1
34	leaq	32(%rdx),%rdx
35	xorps	%xmm0,%xmm2
36.Loop_enc1_1:
37.byte	102,15,56,220,209
38	decl	%eax
39	movups	(%rdx),%xmm1
40	leaq	16(%rdx),%rdx
41	jnz	.Loop_enc1_1
42.byte	102,15,56,221,209
43	pxor	%xmm0,%xmm0
44	pxor	%xmm1,%xmm1
45	movups	%xmm2,(%rsi)
46	pxor	%xmm2,%xmm2
47	.byte	0xf3,0xc3
48.cfi_endproc
49.size	aes_hw_encrypt,.-aes_hw_encrypt
50
51.globl	aes_hw_decrypt
52.hidden aes_hw_decrypt
53.type	aes_hw_decrypt,@function
54.align	16
55aes_hw_decrypt:
56.cfi_startproc
57	movups	(%rdi),%xmm2
58	movl	240(%rdx),%eax
59	movups	(%rdx),%xmm0
60	movups	16(%rdx),%xmm1
61	leaq	32(%rdx),%rdx
62	xorps	%xmm0,%xmm2
63.Loop_dec1_2:
64.byte	102,15,56,222,209
65	decl	%eax
66	movups	(%rdx),%xmm1
67	leaq	16(%rdx),%rdx
68	jnz	.Loop_dec1_2
69.byte	102,15,56,223,209
70	pxor	%xmm0,%xmm0
71	pxor	%xmm1,%xmm1
72	movups	%xmm2,(%rsi)
73	pxor	%xmm2,%xmm2
74	.byte	0xf3,0xc3
75.cfi_endproc
76.size	aes_hw_decrypt, .-aes_hw_decrypt
77.type	_aesni_encrypt2,@function
78.align	16
79_aesni_encrypt2:
80.cfi_startproc
81	movups	(%rcx),%xmm0
82	shll	$4,%eax
83	movups	16(%rcx),%xmm1
84	xorps	%xmm0,%xmm2
85	xorps	%xmm0,%xmm3
86	movups	32(%rcx),%xmm0
87	leaq	32(%rcx,%rax,1),%rcx
88	negq	%rax
89	addq	$16,%rax
90
91.Lenc_loop2:
92.byte	102,15,56,220,209
93.byte	102,15,56,220,217
94	movups	(%rcx,%rax,1),%xmm1
95	addq	$32,%rax
96.byte	102,15,56,220,208
97.byte	102,15,56,220,216
98	movups	-16(%rcx,%rax,1),%xmm0
99	jnz	.Lenc_loop2
100
101.byte	102,15,56,220,209
102.byte	102,15,56,220,217
103.byte	102,15,56,221,208
104.byte	102,15,56,221,216
105	.byte	0xf3,0xc3
106.cfi_endproc
107.size	_aesni_encrypt2,.-_aesni_encrypt2
108.type	_aesni_decrypt2,@function
109.align	16
110_aesni_decrypt2:
111.cfi_startproc
112	movups	(%rcx),%xmm0
113	shll	$4,%eax
114	movups	16(%rcx),%xmm1
115	xorps	%xmm0,%xmm2
116	xorps	%xmm0,%xmm3
117	movups	32(%rcx),%xmm0
118	leaq	32(%rcx,%rax,1),%rcx
119	negq	%rax
120	addq	$16,%rax
121
122.Ldec_loop2:
123.byte	102,15,56,222,209
124.byte	102,15,56,222,217
125	movups	(%rcx,%rax,1),%xmm1
126	addq	$32,%rax
127.byte	102,15,56,222,208
128.byte	102,15,56,222,216
129	movups	-16(%rcx,%rax,1),%xmm0
130	jnz	.Ldec_loop2
131
132.byte	102,15,56,222,209
133.byte	102,15,56,222,217
134.byte	102,15,56,223,208
135.byte	102,15,56,223,216
136	.byte	0xf3,0xc3
137.cfi_endproc
138.size	_aesni_decrypt2,.-_aesni_decrypt2
139.type	_aesni_encrypt3,@function
140.align	16
141_aesni_encrypt3:
142.cfi_startproc
143	movups	(%rcx),%xmm0
144	shll	$4,%eax
145	movups	16(%rcx),%xmm1
146	xorps	%xmm0,%xmm2
147	xorps	%xmm0,%xmm3
148	xorps	%xmm0,%xmm4
149	movups	32(%rcx),%xmm0
150	leaq	32(%rcx,%rax,1),%rcx
151	negq	%rax
152	addq	$16,%rax
153
154.Lenc_loop3:
155.byte	102,15,56,220,209
156.byte	102,15,56,220,217
157.byte	102,15,56,220,225
158	movups	(%rcx,%rax,1),%xmm1
159	addq	$32,%rax
160.byte	102,15,56,220,208
161.byte	102,15,56,220,216
162.byte	102,15,56,220,224
163	movups	-16(%rcx,%rax,1),%xmm0
164	jnz	.Lenc_loop3
165
166.byte	102,15,56,220,209
167.byte	102,15,56,220,217
168.byte	102,15,56,220,225
169.byte	102,15,56,221,208
170.byte	102,15,56,221,216
171.byte	102,15,56,221,224
172	.byte	0xf3,0xc3
173.cfi_endproc
174.size	_aesni_encrypt3,.-_aesni_encrypt3
175.type	_aesni_decrypt3,@function
176.align	16
177_aesni_decrypt3:
178.cfi_startproc
179	movups	(%rcx),%xmm0
180	shll	$4,%eax
181	movups	16(%rcx),%xmm1
182	xorps	%xmm0,%xmm2
183	xorps	%xmm0,%xmm3
184	xorps	%xmm0,%xmm4
185	movups	32(%rcx),%xmm0
186	leaq	32(%rcx,%rax,1),%rcx
187	negq	%rax
188	addq	$16,%rax
189
190.Ldec_loop3:
191.byte	102,15,56,222,209
192.byte	102,15,56,222,217
193.byte	102,15,56,222,225
194	movups	(%rcx,%rax,1),%xmm1
195	addq	$32,%rax
196.byte	102,15,56,222,208
197.byte	102,15,56,222,216
198.byte	102,15,56,222,224
199	movups	-16(%rcx,%rax,1),%xmm0
200	jnz	.Ldec_loop3
201
202.byte	102,15,56,222,209
203.byte	102,15,56,222,217
204.byte	102,15,56,222,225
205.byte	102,15,56,223,208
206.byte	102,15,56,223,216
207.byte	102,15,56,223,224
208	.byte	0xf3,0xc3
209.cfi_endproc
210.size	_aesni_decrypt3,.-_aesni_decrypt3
211.type	_aesni_encrypt4,@function
212.align	16
213_aesni_encrypt4:
214.cfi_startproc
215	movups	(%rcx),%xmm0
216	shll	$4,%eax
217	movups	16(%rcx),%xmm1
218	xorps	%xmm0,%xmm2
219	xorps	%xmm0,%xmm3
220	xorps	%xmm0,%xmm4
221	xorps	%xmm0,%xmm5
222	movups	32(%rcx),%xmm0
223	leaq	32(%rcx,%rax,1),%rcx
224	negq	%rax
225.byte	0x0f,0x1f,0x00
226	addq	$16,%rax
227
228.Lenc_loop4:
229.byte	102,15,56,220,209
230.byte	102,15,56,220,217
231.byte	102,15,56,220,225
232.byte	102,15,56,220,233
233	movups	(%rcx,%rax,1),%xmm1
234	addq	$32,%rax
235.byte	102,15,56,220,208
236.byte	102,15,56,220,216
237.byte	102,15,56,220,224
238.byte	102,15,56,220,232
239	movups	-16(%rcx,%rax,1),%xmm0
240	jnz	.Lenc_loop4
241
242.byte	102,15,56,220,209
243.byte	102,15,56,220,217
244.byte	102,15,56,220,225
245.byte	102,15,56,220,233
246.byte	102,15,56,221,208
247.byte	102,15,56,221,216
248.byte	102,15,56,221,224
249.byte	102,15,56,221,232
250	.byte	0xf3,0xc3
251.cfi_endproc
252.size	_aesni_encrypt4,.-_aesni_encrypt4
253.type	_aesni_decrypt4,@function
254.align	16
255_aesni_decrypt4:
256.cfi_startproc
257	movups	(%rcx),%xmm0
258	shll	$4,%eax
259	movups	16(%rcx),%xmm1
260	xorps	%xmm0,%xmm2
261	xorps	%xmm0,%xmm3
262	xorps	%xmm0,%xmm4
263	xorps	%xmm0,%xmm5
264	movups	32(%rcx),%xmm0
265	leaq	32(%rcx,%rax,1),%rcx
266	negq	%rax
267.byte	0x0f,0x1f,0x00
268	addq	$16,%rax
269
270.Ldec_loop4:
271.byte	102,15,56,222,209
272.byte	102,15,56,222,217
273.byte	102,15,56,222,225
274.byte	102,15,56,222,233
275	movups	(%rcx,%rax,1),%xmm1
276	addq	$32,%rax
277.byte	102,15,56,222,208
278.byte	102,15,56,222,216
279.byte	102,15,56,222,224
280.byte	102,15,56,222,232
281	movups	-16(%rcx,%rax,1),%xmm0
282	jnz	.Ldec_loop4
283
284.byte	102,15,56,222,209
285.byte	102,15,56,222,217
286.byte	102,15,56,222,225
287.byte	102,15,56,222,233
288.byte	102,15,56,223,208
289.byte	102,15,56,223,216
290.byte	102,15,56,223,224
291.byte	102,15,56,223,232
292	.byte	0xf3,0xc3
293.cfi_endproc
294.size	_aesni_decrypt4,.-_aesni_decrypt4
295.type	_aesni_encrypt6,@function
296.align	16
297_aesni_encrypt6:
298.cfi_startproc
299	movups	(%rcx),%xmm0
300	shll	$4,%eax
301	movups	16(%rcx),%xmm1
302	xorps	%xmm0,%xmm2
303	pxor	%xmm0,%xmm3
304	pxor	%xmm0,%xmm4
305.byte	102,15,56,220,209
306	leaq	32(%rcx,%rax,1),%rcx
307	negq	%rax
308.byte	102,15,56,220,217
309	pxor	%xmm0,%xmm5
310	pxor	%xmm0,%xmm6
311.byte	102,15,56,220,225
312	pxor	%xmm0,%xmm7
313	movups	(%rcx,%rax,1),%xmm0
314	addq	$16,%rax
315	jmp	.Lenc_loop6_enter
316.align	16
317.Lenc_loop6:
318.byte	102,15,56,220,209
319.byte	102,15,56,220,217
320.byte	102,15,56,220,225
321.Lenc_loop6_enter:
322.byte	102,15,56,220,233
323.byte	102,15,56,220,241
324.byte	102,15,56,220,249
325	movups	(%rcx,%rax,1),%xmm1
326	addq	$32,%rax
327.byte	102,15,56,220,208
328.byte	102,15,56,220,216
329.byte	102,15,56,220,224
330.byte	102,15,56,220,232
331.byte	102,15,56,220,240
332.byte	102,15,56,220,248
333	movups	-16(%rcx,%rax,1),%xmm0
334	jnz	.Lenc_loop6
335
336.byte	102,15,56,220,209
337.byte	102,15,56,220,217
338.byte	102,15,56,220,225
339.byte	102,15,56,220,233
340.byte	102,15,56,220,241
341.byte	102,15,56,220,249
342.byte	102,15,56,221,208
343.byte	102,15,56,221,216
344.byte	102,15,56,221,224
345.byte	102,15,56,221,232
346.byte	102,15,56,221,240
347.byte	102,15,56,221,248
348	.byte	0xf3,0xc3
349.cfi_endproc
350.size	_aesni_encrypt6,.-_aesni_encrypt6
351.type	_aesni_decrypt6,@function
352.align	16
353_aesni_decrypt6:
354.cfi_startproc
355	movups	(%rcx),%xmm0
356	shll	$4,%eax
357	movups	16(%rcx),%xmm1
358	xorps	%xmm0,%xmm2
359	pxor	%xmm0,%xmm3
360	pxor	%xmm0,%xmm4
361.byte	102,15,56,222,209
362	leaq	32(%rcx,%rax,1),%rcx
363	negq	%rax
364.byte	102,15,56,222,217
365	pxor	%xmm0,%xmm5
366	pxor	%xmm0,%xmm6
367.byte	102,15,56,222,225
368	pxor	%xmm0,%xmm7
369	movups	(%rcx,%rax,1),%xmm0
370	addq	$16,%rax
371	jmp	.Ldec_loop6_enter
372.align	16
373.Ldec_loop6:
374.byte	102,15,56,222,209
375.byte	102,15,56,222,217
376.byte	102,15,56,222,225
377.Ldec_loop6_enter:
378.byte	102,15,56,222,233
379.byte	102,15,56,222,241
380.byte	102,15,56,222,249
381	movups	(%rcx,%rax,1),%xmm1
382	addq	$32,%rax
383.byte	102,15,56,222,208
384.byte	102,15,56,222,216
385.byte	102,15,56,222,224
386.byte	102,15,56,222,232
387.byte	102,15,56,222,240
388.byte	102,15,56,222,248
389	movups	-16(%rcx,%rax,1),%xmm0
390	jnz	.Ldec_loop6
391
392.byte	102,15,56,222,209
393.byte	102,15,56,222,217
394.byte	102,15,56,222,225
395.byte	102,15,56,222,233
396.byte	102,15,56,222,241
397.byte	102,15,56,222,249
398.byte	102,15,56,223,208
399.byte	102,15,56,223,216
400.byte	102,15,56,223,224
401.byte	102,15,56,223,232
402.byte	102,15,56,223,240
403.byte	102,15,56,223,248
404	.byte	0xf3,0xc3
405.cfi_endproc
406.size	_aesni_decrypt6,.-_aesni_decrypt6
407.type	_aesni_encrypt8,@function
408.align	16
409_aesni_encrypt8:
410.cfi_startproc
411	movups	(%rcx),%xmm0
412	shll	$4,%eax
413	movups	16(%rcx),%xmm1
414	xorps	%xmm0,%xmm2
415	xorps	%xmm0,%xmm3
416	pxor	%xmm0,%xmm4
417	pxor	%xmm0,%xmm5
418	pxor	%xmm0,%xmm6
419	leaq	32(%rcx,%rax,1),%rcx
420	negq	%rax
421.byte	102,15,56,220,209
422	pxor	%xmm0,%xmm7
423	pxor	%xmm0,%xmm8
424.byte	102,15,56,220,217
425	pxor	%xmm0,%xmm9
426	movups	(%rcx,%rax,1),%xmm0
427	addq	$16,%rax
428	jmp	.Lenc_loop8_inner
429.align	16
430.Lenc_loop8:
431.byte	102,15,56,220,209
432.byte	102,15,56,220,217
433.Lenc_loop8_inner:
434.byte	102,15,56,220,225
435.byte	102,15,56,220,233
436.byte	102,15,56,220,241
437.byte	102,15,56,220,249
438.byte	102,68,15,56,220,193
439.byte	102,68,15,56,220,201
440.Lenc_loop8_enter:
441	movups	(%rcx,%rax,1),%xmm1
442	addq	$32,%rax
443.byte	102,15,56,220,208
444.byte	102,15,56,220,216
445.byte	102,15,56,220,224
446.byte	102,15,56,220,232
447.byte	102,15,56,220,240
448.byte	102,15,56,220,248
449.byte	102,68,15,56,220,192
450.byte	102,68,15,56,220,200
451	movups	-16(%rcx,%rax,1),%xmm0
452	jnz	.Lenc_loop8
453
454.byte	102,15,56,220,209
455.byte	102,15,56,220,217
456.byte	102,15,56,220,225
457.byte	102,15,56,220,233
458.byte	102,15,56,220,241
459.byte	102,15,56,220,249
460.byte	102,68,15,56,220,193
461.byte	102,68,15,56,220,201
462.byte	102,15,56,221,208
463.byte	102,15,56,221,216
464.byte	102,15,56,221,224
465.byte	102,15,56,221,232
466.byte	102,15,56,221,240
467.byte	102,15,56,221,248
468.byte	102,68,15,56,221,192
469.byte	102,68,15,56,221,200
470	.byte	0xf3,0xc3
471.cfi_endproc
472.size	_aesni_encrypt8,.-_aesni_encrypt8
473.type	_aesni_decrypt8,@function
474.align	16
475_aesni_decrypt8:
476.cfi_startproc
477	movups	(%rcx),%xmm0
478	shll	$4,%eax
479	movups	16(%rcx),%xmm1
480	xorps	%xmm0,%xmm2
481	xorps	%xmm0,%xmm3
482	pxor	%xmm0,%xmm4
483	pxor	%xmm0,%xmm5
484	pxor	%xmm0,%xmm6
485	leaq	32(%rcx,%rax,1),%rcx
486	negq	%rax
487.byte	102,15,56,222,209
488	pxor	%xmm0,%xmm7
489	pxor	%xmm0,%xmm8
490.byte	102,15,56,222,217
491	pxor	%xmm0,%xmm9
492	movups	(%rcx,%rax,1),%xmm0
493	addq	$16,%rax
494	jmp	.Ldec_loop8_inner
495.align	16
496.Ldec_loop8:
497.byte	102,15,56,222,209
498.byte	102,15,56,222,217
499.Ldec_loop8_inner:
500.byte	102,15,56,222,225
501.byte	102,15,56,222,233
502.byte	102,15,56,222,241
503.byte	102,15,56,222,249
504.byte	102,68,15,56,222,193
505.byte	102,68,15,56,222,201
506.Ldec_loop8_enter:
507	movups	(%rcx,%rax,1),%xmm1
508	addq	$32,%rax
509.byte	102,15,56,222,208
510.byte	102,15,56,222,216
511.byte	102,15,56,222,224
512.byte	102,15,56,222,232
513.byte	102,15,56,222,240
514.byte	102,15,56,222,248
515.byte	102,68,15,56,222,192
516.byte	102,68,15,56,222,200
517	movups	-16(%rcx,%rax,1),%xmm0
518	jnz	.Ldec_loop8
519
520.byte	102,15,56,222,209
521.byte	102,15,56,222,217
522.byte	102,15,56,222,225
523.byte	102,15,56,222,233
524.byte	102,15,56,222,241
525.byte	102,15,56,222,249
526.byte	102,68,15,56,222,193
527.byte	102,68,15,56,222,201
528.byte	102,15,56,223,208
529.byte	102,15,56,223,216
530.byte	102,15,56,223,224
531.byte	102,15,56,223,232
532.byte	102,15,56,223,240
533.byte	102,15,56,223,248
534.byte	102,68,15,56,223,192
535.byte	102,68,15,56,223,200
536	.byte	0xf3,0xc3
537.cfi_endproc
538.size	_aesni_decrypt8,.-_aesni_decrypt8
539.globl	aes_hw_ecb_encrypt
540.hidden aes_hw_ecb_encrypt
541.type	aes_hw_ecb_encrypt,@function
542.align	16
543aes_hw_ecb_encrypt:
544.cfi_startproc
545	andq	$-16,%rdx
546	jz	.Lecb_ret
547
548	movl	240(%rcx),%eax
549	movups	(%rcx),%xmm0
550	movq	%rcx,%r11
551	movl	%eax,%r10d
552	testl	%r8d,%r8d
553	jz	.Lecb_decrypt
554
555	cmpq	$0x80,%rdx
556	jb	.Lecb_enc_tail
557
558	movdqu	(%rdi),%xmm2
559	movdqu	16(%rdi),%xmm3
560	movdqu	32(%rdi),%xmm4
561	movdqu	48(%rdi),%xmm5
562	movdqu	64(%rdi),%xmm6
563	movdqu	80(%rdi),%xmm7
564	movdqu	96(%rdi),%xmm8
565	movdqu	112(%rdi),%xmm9
566	leaq	128(%rdi),%rdi
567	subq	$0x80,%rdx
568	jmp	.Lecb_enc_loop8_enter
569.align	16
570.Lecb_enc_loop8:
571	movups	%xmm2,(%rsi)
572	movq	%r11,%rcx
573	movdqu	(%rdi),%xmm2
574	movl	%r10d,%eax
575	movups	%xmm3,16(%rsi)
576	movdqu	16(%rdi),%xmm3
577	movups	%xmm4,32(%rsi)
578	movdqu	32(%rdi),%xmm4
579	movups	%xmm5,48(%rsi)
580	movdqu	48(%rdi),%xmm5
581	movups	%xmm6,64(%rsi)
582	movdqu	64(%rdi),%xmm6
583	movups	%xmm7,80(%rsi)
584	movdqu	80(%rdi),%xmm7
585	movups	%xmm8,96(%rsi)
586	movdqu	96(%rdi),%xmm8
587	movups	%xmm9,112(%rsi)
588	leaq	128(%rsi),%rsi
589	movdqu	112(%rdi),%xmm9
590	leaq	128(%rdi),%rdi
591.Lecb_enc_loop8_enter:
592
593	call	_aesni_encrypt8
594
595	subq	$0x80,%rdx
596	jnc	.Lecb_enc_loop8
597
598	movups	%xmm2,(%rsi)
599	movq	%r11,%rcx
600	movups	%xmm3,16(%rsi)
601	movl	%r10d,%eax
602	movups	%xmm4,32(%rsi)
603	movups	%xmm5,48(%rsi)
604	movups	%xmm6,64(%rsi)
605	movups	%xmm7,80(%rsi)
606	movups	%xmm8,96(%rsi)
607	movups	%xmm9,112(%rsi)
608	leaq	128(%rsi),%rsi
609	addq	$0x80,%rdx
610	jz	.Lecb_ret
611
612.Lecb_enc_tail:
613	movups	(%rdi),%xmm2
614	cmpq	$0x20,%rdx
615	jb	.Lecb_enc_one
616	movups	16(%rdi),%xmm3
617	je	.Lecb_enc_two
618	movups	32(%rdi),%xmm4
619	cmpq	$0x40,%rdx
620	jb	.Lecb_enc_three
621	movups	48(%rdi),%xmm5
622	je	.Lecb_enc_four
623	movups	64(%rdi),%xmm6
624	cmpq	$0x60,%rdx
625	jb	.Lecb_enc_five
626	movups	80(%rdi),%xmm7
627	je	.Lecb_enc_six
628	movdqu	96(%rdi),%xmm8
629	xorps	%xmm9,%xmm9
630	call	_aesni_encrypt8
631	movups	%xmm2,(%rsi)
632	movups	%xmm3,16(%rsi)
633	movups	%xmm4,32(%rsi)
634	movups	%xmm5,48(%rsi)
635	movups	%xmm6,64(%rsi)
636	movups	%xmm7,80(%rsi)
637	movups	%xmm8,96(%rsi)
638	jmp	.Lecb_ret
639.align	16
640.Lecb_enc_one:
641	movups	(%rcx),%xmm0
642	movups	16(%rcx),%xmm1
643	leaq	32(%rcx),%rcx
644	xorps	%xmm0,%xmm2
645.Loop_enc1_3:
646.byte	102,15,56,220,209
647	decl	%eax
648	movups	(%rcx),%xmm1
649	leaq	16(%rcx),%rcx
650	jnz	.Loop_enc1_3
651.byte	102,15,56,221,209
652	movups	%xmm2,(%rsi)
653	jmp	.Lecb_ret
654.align	16
655.Lecb_enc_two:
656	call	_aesni_encrypt2
657	movups	%xmm2,(%rsi)
658	movups	%xmm3,16(%rsi)
659	jmp	.Lecb_ret
660.align	16
661.Lecb_enc_three:
662	call	_aesni_encrypt3
663	movups	%xmm2,(%rsi)
664	movups	%xmm3,16(%rsi)
665	movups	%xmm4,32(%rsi)
666	jmp	.Lecb_ret
667.align	16
668.Lecb_enc_four:
669	call	_aesni_encrypt4
670	movups	%xmm2,(%rsi)
671	movups	%xmm3,16(%rsi)
672	movups	%xmm4,32(%rsi)
673	movups	%xmm5,48(%rsi)
674	jmp	.Lecb_ret
675.align	16
676.Lecb_enc_five:
677	xorps	%xmm7,%xmm7
678	call	_aesni_encrypt6
679	movups	%xmm2,(%rsi)
680	movups	%xmm3,16(%rsi)
681	movups	%xmm4,32(%rsi)
682	movups	%xmm5,48(%rsi)
683	movups	%xmm6,64(%rsi)
684	jmp	.Lecb_ret
685.align	16
686.Lecb_enc_six:
687	call	_aesni_encrypt6
688	movups	%xmm2,(%rsi)
689	movups	%xmm3,16(%rsi)
690	movups	%xmm4,32(%rsi)
691	movups	%xmm5,48(%rsi)
692	movups	%xmm6,64(%rsi)
693	movups	%xmm7,80(%rsi)
694	jmp	.Lecb_ret
695
696.align	16
697.Lecb_decrypt:
698	cmpq	$0x80,%rdx
699	jb	.Lecb_dec_tail
700
701	movdqu	(%rdi),%xmm2
702	movdqu	16(%rdi),%xmm3
703	movdqu	32(%rdi),%xmm4
704	movdqu	48(%rdi),%xmm5
705	movdqu	64(%rdi),%xmm6
706	movdqu	80(%rdi),%xmm7
707	movdqu	96(%rdi),%xmm8
708	movdqu	112(%rdi),%xmm9
709	leaq	128(%rdi),%rdi
710	subq	$0x80,%rdx
711	jmp	.Lecb_dec_loop8_enter
712.align	16
713.Lecb_dec_loop8:
714	movups	%xmm2,(%rsi)
715	movq	%r11,%rcx
716	movdqu	(%rdi),%xmm2
717	movl	%r10d,%eax
718	movups	%xmm3,16(%rsi)
719	movdqu	16(%rdi),%xmm3
720	movups	%xmm4,32(%rsi)
721	movdqu	32(%rdi),%xmm4
722	movups	%xmm5,48(%rsi)
723	movdqu	48(%rdi),%xmm5
724	movups	%xmm6,64(%rsi)
725	movdqu	64(%rdi),%xmm6
726	movups	%xmm7,80(%rsi)
727	movdqu	80(%rdi),%xmm7
728	movups	%xmm8,96(%rsi)
729	movdqu	96(%rdi),%xmm8
730	movups	%xmm9,112(%rsi)
731	leaq	128(%rsi),%rsi
732	movdqu	112(%rdi),%xmm9
733	leaq	128(%rdi),%rdi
734.Lecb_dec_loop8_enter:
735
736	call	_aesni_decrypt8
737
738	movups	(%r11),%xmm0
739	subq	$0x80,%rdx
740	jnc	.Lecb_dec_loop8
741
742	movups	%xmm2,(%rsi)
743	pxor	%xmm2,%xmm2
744	movq	%r11,%rcx
745	movups	%xmm3,16(%rsi)
746	pxor	%xmm3,%xmm3
747	movl	%r10d,%eax
748	movups	%xmm4,32(%rsi)
749	pxor	%xmm4,%xmm4
750	movups	%xmm5,48(%rsi)
751	pxor	%xmm5,%xmm5
752	movups	%xmm6,64(%rsi)
753	pxor	%xmm6,%xmm6
754	movups	%xmm7,80(%rsi)
755	pxor	%xmm7,%xmm7
756	movups	%xmm8,96(%rsi)
757	pxor	%xmm8,%xmm8
758	movups	%xmm9,112(%rsi)
759	pxor	%xmm9,%xmm9
760	leaq	128(%rsi),%rsi
761	addq	$0x80,%rdx
762	jz	.Lecb_ret
763
764.Lecb_dec_tail:
765	movups	(%rdi),%xmm2
766	cmpq	$0x20,%rdx
767	jb	.Lecb_dec_one
768	movups	16(%rdi),%xmm3
769	je	.Lecb_dec_two
770	movups	32(%rdi),%xmm4
771	cmpq	$0x40,%rdx
772	jb	.Lecb_dec_three
773	movups	48(%rdi),%xmm5
774	je	.Lecb_dec_four
775	movups	64(%rdi),%xmm6
776	cmpq	$0x60,%rdx
777	jb	.Lecb_dec_five
778	movups	80(%rdi),%xmm7
779	je	.Lecb_dec_six
780	movups	96(%rdi),%xmm8
781	movups	(%rcx),%xmm0
782	xorps	%xmm9,%xmm9
783	call	_aesni_decrypt8
784	movups	%xmm2,(%rsi)
785	pxor	%xmm2,%xmm2
786	movups	%xmm3,16(%rsi)
787	pxor	%xmm3,%xmm3
788	movups	%xmm4,32(%rsi)
789	pxor	%xmm4,%xmm4
790	movups	%xmm5,48(%rsi)
791	pxor	%xmm5,%xmm5
792	movups	%xmm6,64(%rsi)
793	pxor	%xmm6,%xmm6
794	movups	%xmm7,80(%rsi)
795	pxor	%xmm7,%xmm7
796	movups	%xmm8,96(%rsi)
797	pxor	%xmm8,%xmm8
798	pxor	%xmm9,%xmm9
799	jmp	.Lecb_ret
800.align	16
801.Lecb_dec_one:
802	movups	(%rcx),%xmm0
803	movups	16(%rcx),%xmm1
804	leaq	32(%rcx),%rcx
805	xorps	%xmm0,%xmm2
806.Loop_dec1_4:
807.byte	102,15,56,222,209
808	decl	%eax
809	movups	(%rcx),%xmm1
810	leaq	16(%rcx),%rcx
811	jnz	.Loop_dec1_4
812.byte	102,15,56,223,209
813	movups	%xmm2,(%rsi)
814	pxor	%xmm2,%xmm2
815	jmp	.Lecb_ret
816.align	16
817.Lecb_dec_two:
818	call	_aesni_decrypt2
819	movups	%xmm2,(%rsi)
820	pxor	%xmm2,%xmm2
821	movups	%xmm3,16(%rsi)
822	pxor	%xmm3,%xmm3
823	jmp	.Lecb_ret
824.align	16
825.Lecb_dec_three:
826	call	_aesni_decrypt3
827	movups	%xmm2,(%rsi)
828	pxor	%xmm2,%xmm2
829	movups	%xmm3,16(%rsi)
830	pxor	%xmm3,%xmm3
831	movups	%xmm4,32(%rsi)
832	pxor	%xmm4,%xmm4
833	jmp	.Lecb_ret
834.align	16
835.Lecb_dec_four:
836	call	_aesni_decrypt4
837	movups	%xmm2,(%rsi)
838	pxor	%xmm2,%xmm2
839	movups	%xmm3,16(%rsi)
840	pxor	%xmm3,%xmm3
841	movups	%xmm4,32(%rsi)
842	pxor	%xmm4,%xmm4
843	movups	%xmm5,48(%rsi)
844	pxor	%xmm5,%xmm5
845	jmp	.Lecb_ret
846.align	16
847.Lecb_dec_five:
848	xorps	%xmm7,%xmm7
849	call	_aesni_decrypt6
850	movups	%xmm2,(%rsi)
851	pxor	%xmm2,%xmm2
852	movups	%xmm3,16(%rsi)
853	pxor	%xmm3,%xmm3
854	movups	%xmm4,32(%rsi)
855	pxor	%xmm4,%xmm4
856	movups	%xmm5,48(%rsi)
857	pxor	%xmm5,%xmm5
858	movups	%xmm6,64(%rsi)
859	pxor	%xmm6,%xmm6
860	pxor	%xmm7,%xmm7
861	jmp	.Lecb_ret
862.align	16
863.Lecb_dec_six:
864	call	_aesni_decrypt6
865	movups	%xmm2,(%rsi)
866	pxor	%xmm2,%xmm2
867	movups	%xmm3,16(%rsi)
868	pxor	%xmm3,%xmm3
869	movups	%xmm4,32(%rsi)
870	pxor	%xmm4,%xmm4
871	movups	%xmm5,48(%rsi)
872	pxor	%xmm5,%xmm5
873	movups	%xmm6,64(%rsi)
874	pxor	%xmm6,%xmm6
875	movups	%xmm7,80(%rsi)
876	pxor	%xmm7,%xmm7
877
878.Lecb_ret:
879	xorps	%xmm0,%xmm0
880	pxor	%xmm1,%xmm1
881	.byte	0xf3,0xc3
882.cfi_endproc
883.size	aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt
884.globl	aes_hw_ctr32_encrypt_blocks
885.hidden aes_hw_ctr32_encrypt_blocks
886.type	aes_hw_ctr32_encrypt_blocks,@function
887.align	16
888aes_hw_ctr32_encrypt_blocks:
889.cfi_startproc
890#ifndef NDEBUG
891#ifndef BORINGSSL_FIPS
892	movb	$1,BORINGSSL_function_hit(%rip)
893#endif
894#endif
895	cmpq	$1,%rdx
896	jne	.Lctr32_bulk
897
898
899
900	movups	(%r8),%xmm2
901	movups	(%rdi),%xmm3
902	movl	240(%rcx),%edx
903	movups	(%rcx),%xmm0
904	movups	16(%rcx),%xmm1
905	leaq	32(%rcx),%rcx
906	xorps	%xmm0,%xmm2
907.Loop_enc1_5:
908.byte	102,15,56,220,209
909	decl	%edx
910	movups	(%rcx),%xmm1
911	leaq	16(%rcx),%rcx
912	jnz	.Loop_enc1_5
913.byte	102,15,56,221,209
914	pxor	%xmm0,%xmm0
915	pxor	%xmm1,%xmm1
916	xorps	%xmm3,%xmm2
917	pxor	%xmm3,%xmm3
918	movups	%xmm2,(%rsi)
919	xorps	%xmm2,%xmm2
920	jmp	.Lctr32_epilogue
921
922.align	16
923.Lctr32_bulk:
924	leaq	(%rsp),%r11
925.cfi_def_cfa_register	%r11
926	pushq	%rbp
927.cfi_offset	%rbp,-16
928	subq	$128,%rsp
929	andq	$-16,%rsp
930
931
932
933
934	movdqu	(%r8),%xmm2
935	movdqu	(%rcx),%xmm0
936	movl	12(%r8),%r8d
937	pxor	%xmm0,%xmm2
938	movl	12(%rcx),%ebp
939	movdqa	%xmm2,0(%rsp)
940	bswapl	%r8d
941	movdqa	%xmm2,%xmm3
942	movdqa	%xmm2,%xmm4
943	movdqa	%xmm2,%xmm5
944	movdqa	%xmm2,64(%rsp)
945	movdqa	%xmm2,80(%rsp)
946	movdqa	%xmm2,96(%rsp)
947	movq	%rdx,%r10
948	movdqa	%xmm2,112(%rsp)
949
950	leaq	1(%r8),%rax
951	leaq	2(%r8),%rdx
952	bswapl	%eax
953	bswapl	%edx
954	xorl	%ebp,%eax
955	xorl	%ebp,%edx
956.byte	102,15,58,34,216,3
957	leaq	3(%r8),%rax
958	movdqa	%xmm3,16(%rsp)
959.byte	102,15,58,34,226,3
960	bswapl	%eax
961	movq	%r10,%rdx
962	leaq	4(%r8),%r10
963	movdqa	%xmm4,32(%rsp)
964	xorl	%ebp,%eax
965	bswapl	%r10d
966.byte	102,15,58,34,232,3
967	xorl	%ebp,%r10d
968	movdqa	%xmm5,48(%rsp)
969	leaq	5(%r8),%r9
970	movl	%r10d,64+12(%rsp)
971	bswapl	%r9d
972	leaq	6(%r8),%r10
973	movl	240(%rcx),%eax
974	xorl	%ebp,%r9d
975	bswapl	%r10d
976	movl	%r9d,80+12(%rsp)
977	xorl	%ebp,%r10d
978	leaq	7(%r8),%r9
979	movl	%r10d,96+12(%rsp)
980	bswapl	%r9d
981	leaq	OPENSSL_ia32cap_P(%rip),%r10
982	movl	4(%r10),%r10d
983	xorl	%ebp,%r9d
984	andl	$71303168,%r10d
985	movl	%r9d,112+12(%rsp)
986
987	movups	16(%rcx),%xmm1
988
989	movdqa	64(%rsp),%xmm6
990	movdqa	80(%rsp),%xmm7
991
992	cmpq	$8,%rdx
993	jb	.Lctr32_tail
994
995	subq	$6,%rdx
996	cmpl	$4194304,%r10d
997	je	.Lctr32_6x
998
999	leaq	128(%rcx),%rcx
1000	subq	$2,%rdx
1001	jmp	.Lctr32_loop8
1002
1003.align	16
1004.Lctr32_6x:
1005	shll	$4,%eax
1006	movl	$48,%r10d
1007	bswapl	%ebp
1008	leaq	32(%rcx,%rax,1),%rcx
1009	subq	%rax,%r10
1010	jmp	.Lctr32_loop6
1011
1012.align	16
1013.Lctr32_loop6:
1014	addl	$6,%r8d
1015	movups	-48(%rcx,%r10,1),%xmm0
1016.byte	102,15,56,220,209
1017	movl	%r8d,%eax
1018	xorl	%ebp,%eax
1019.byte	102,15,56,220,217
1020.byte	0x0f,0x38,0xf1,0x44,0x24,12
1021	leal	1(%r8),%eax
1022.byte	102,15,56,220,225
1023	xorl	%ebp,%eax
1024.byte	0x0f,0x38,0xf1,0x44,0x24,28
1025.byte	102,15,56,220,233
1026	leal	2(%r8),%eax
1027	xorl	%ebp,%eax
1028.byte	102,15,56,220,241
1029.byte	0x0f,0x38,0xf1,0x44,0x24,44
1030	leal	3(%r8),%eax
1031.byte	102,15,56,220,249
1032	movups	-32(%rcx,%r10,1),%xmm1
1033	xorl	%ebp,%eax
1034
1035.byte	102,15,56,220,208
1036.byte	0x0f,0x38,0xf1,0x44,0x24,60
1037	leal	4(%r8),%eax
1038.byte	102,15,56,220,216
1039	xorl	%ebp,%eax
1040.byte	0x0f,0x38,0xf1,0x44,0x24,76
1041.byte	102,15,56,220,224
1042	leal	5(%r8),%eax
1043	xorl	%ebp,%eax
1044.byte	102,15,56,220,232
1045.byte	0x0f,0x38,0xf1,0x44,0x24,92
1046	movq	%r10,%rax
1047.byte	102,15,56,220,240
1048.byte	102,15,56,220,248
1049	movups	-16(%rcx,%r10,1),%xmm0
1050
1051	call	.Lenc_loop6
1052
1053	movdqu	(%rdi),%xmm8
1054	movdqu	16(%rdi),%xmm9
1055	movdqu	32(%rdi),%xmm10
1056	movdqu	48(%rdi),%xmm11
1057	movdqu	64(%rdi),%xmm12
1058	movdqu	80(%rdi),%xmm13
1059	leaq	96(%rdi),%rdi
1060	movups	-64(%rcx,%r10,1),%xmm1
1061	pxor	%xmm2,%xmm8
1062	movaps	0(%rsp),%xmm2
1063	pxor	%xmm3,%xmm9
1064	movaps	16(%rsp),%xmm3
1065	pxor	%xmm4,%xmm10
1066	movaps	32(%rsp),%xmm4
1067	pxor	%xmm5,%xmm11
1068	movaps	48(%rsp),%xmm5
1069	pxor	%xmm6,%xmm12
1070	movaps	64(%rsp),%xmm6
1071	pxor	%xmm7,%xmm13
1072	movaps	80(%rsp),%xmm7
1073	movdqu	%xmm8,(%rsi)
1074	movdqu	%xmm9,16(%rsi)
1075	movdqu	%xmm10,32(%rsi)
1076	movdqu	%xmm11,48(%rsi)
1077	movdqu	%xmm12,64(%rsi)
1078	movdqu	%xmm13,80(%rsi)
1079	leaq	96(%rsi),%rsi
1080
1081	subq	$6,%rdx
1082	jnc	.Lctr32_loop6
1083
1084	addq	$6,%rdx
1085	jz	.Lctr32_done
1086
1087	leal	-48(%r10),%eax
1088	leaq	-80(%rcx,%r10,1),%rcx
1089	negl	%eax
1090	shrl	$4,%eax
1091	jmp	.Lctr32_tail
1092
1093.align	32
1094.Lctr32_loop8:
1095	addl	$8,%r8d
1096	movdqa	96(%rsp),%xmm8
1097.byte	102,15,56,220,209
1098	movl	%r8d,%r9d
1099	movdqa	112(%rsp),%xmm9
1100.byte	102,15,56,220,217
1101	bswapl	%r9d
1102	movups	32-128(%rcx),%xmm0
1103.byte	102,15,56,220,225
1104	xorl	%ebp,%r9d
1105	nop
1106.byte	102,15,56,220,233
1107	movl	%r9d,0+12(%rsp)
1108	leaq	1(%r8),%r9
1109.byte	102,15,56,220,241
1110.byte	102,15,56,220,249
1111.byte	102,68,15,56,220,193
1112.byte	102,68,15,56,220,201
1113	movups	48-128(%rcx),%xmm1
1114	bswapl	%r9d
1115.byte	102,15,56,220,208
1116.byte	102,15,56,220,216
1117	xorl	%ebp,%r9d
1118.byte	0x66,0x90
1119.byte	102,15,56,220,224
1120.byte	102,15,56,220,232
1121	movl	%r9d,16+12(%rsp)
1122	leaq	2(%r8),%r9
1123.byte	102,15,56,220,240
1124.byte	102,15,56,220,248
1125.byte	102,68,15,56,220,192
1126.byte	102,68,15,56,220,200
1127	movups	64-128(%rcx),%xmm0
1128	bswapl	%r9d
1129.byte	102,15,56,220,209
1130.byte	102,15,56,220,217
1131	xorl	%ebp,%r9d
1132.byte	0x66,0x90
1133.byte	102,15,56,220,225
1134.byte	102,15,56,220,233
1135	movl	%r9d,32+12(%rsp)
1136	leaq	3(%r8),%r9
1137.byte	102,15,56,220,241
1138.byte	102,15,56,220,249
1139.byte	102,68,15,56,220,193
1140.byte	102,68,15,56,220,201
1141	movups	80-128(%rcx),%xmm1
1142	bswapl	%r9d
1143.byte	102,15,56,220,208
1144.byte	102,15,56,220,216
1145	xorl	%ebp,%r9d
1146.byte	0x66,0x90
1147.byte	102,15,56,220,224
1148.byte	102,15,56,220,232
1149	movl	%r9d,48+12(%rsp)
1150	leaq	4(%r8),%r9
1151.byte	102,15,56,220,240
1152.byte	102,15,56,220,248
1153.byte	102,68,15,56,220,192
1154.byte	102,68,15,56,220,200
1155	movups	96-128(%rcx),%xmm0
1156	bswapl	%r9d
1157.byte	102,15,56,220,209
1158.byte	102,15,56,220,217
1159	xorl	%ebp,%r9d
1160.byte	0x66,0x90
1161.byte	102,15,56,220,225
1162.byte	102,15,56,220,233
1163	movl	%r9d,64+12(%rsp)
1164	leaq	5(%r8),%r9
1165.byte	102,15,56,220,241
1166.byte	102,15,56,220,249
1167.byte	102,68,15,56,220,193
1168.byte	102,68,15,56,220,201
1169	movups	112-128(%rcx),%xmm1
1170	bswapl	%r9d
1171.byte	102,15,56,220,208
1172.byte	102,15,56,220,216
1173	xorl	%ebp,%r9d
1174.byte	0x66,0x90
1175.byte	102,15,56,220,224
1176.byte	102,15,56,220,232
1177	movl	%r9d,80+12(%rsp)
1178	leaq	6(%r8),%r9
1179.byte	102,15,56,220,240
1180.byte	102,15,56,220,248
1181.byte	102,68,15,56,220,192
1182.byte	102,68,15,56,220,200
1183	movups	128-128(%rcx),%xmm0
1184	bswapl	%r9d
1185.byte	102,15,56,220,209
1186.byte	102,15,56,220,217
1187	xorl	%ebp,%r9d
1188.byte	0x66,0x90
1189.byte	102,15,56,220,225
1190.byte	102,15,56,220,233
1191	movl	%r9d,96+12(%rsp)
1192	leaq	7(%r8),%r9
1193.byte	102,15,56,220,241
1194.byte	102,15,56,220,249
1195.byte	102,68,15,56,220,193
1196.byte	102,68,15,56,220,201
1197	movups	144-128(%rcx),%xmm1
1198	bswapl	%r9d
1199.byte	102,15,56,220,208
1200.byte	102,15,56,220,216
1201.byte	102,15,56,220,224
1202	xorl	%ebp,%r9d
1203	movdqu	0(%rdi),%xmm10
1204.byte	102,15,56,220,232
1205	movl	%r9d,112+12(%rsp)
1206	cmpl	$11,%eax
1207.byte	102,15,56,220,240
1208.byte	102,15,56,220,248
1209.byte	102,68,15,56,220,192
1210.byte	102,68,15,56,220,200
1211	movups	160-128(%rcx),%xmm0
1212
1213	jb	.Lctr32_enc_done
1214
1215.byte	102,15,56,220,209
1216.byte	102,15,56,220,217
1217.byte	102,15,56,220,225
1218.byte	102,15,56,220,233
1219.byte	102,15,56,220,241
1220.byte	102,15,56,220,249
1221.byte	102,68,15,56,220,193
1222.byte	102,68,15,56,220,201
1223	movups	176-128(%rcx),%xmm1
1224
1225.byte	102,15,56,220,208
1226.byte	102,15,56,220,216
1227.byte	102,15,56,220,224
1228.byte	102,15,56,220,232
1229.byte	102,15,56,220,240
1230.byte	102,15,56,220,248
1231.byte	102,68,15,56,220,192
1232.byte	102,68,15,56,220,200
1233	movups	192-128(%rcx),%xmm0
1234	je	.Lctr32_enc_done
1235
1236.byte	102,15,56,220,209
1237.byte	102,15,56,220,217
1238.byte	102,15,56,220,225
1239.byte	102,15,56,220,233
1240.byte	102,15,56,220,241
1241.byte	102,15,56,220,249
1242.byte	102,68,15,56,220,193
1243.byte	102,68,15,56,220,201
1244	movups	208-128(%rcx),%xmm1
1245
1246.byte	102,15,56,220,208
1247.byte	102,15,56,220,216
1248.byte	102,15,56,220,224
1249.byte	102,15,56,220,232
1250.byte	102,15,56,220,240
1251.byte	102,15,56,220,248
1252.byte	102,68,15,56,220,192
1253.byte	102,68,15,56,220,200
1254	movups	224-128(%rcx),%xmm0
1255	jmp	.Lctr32_enc_done
1256
1257.align	16
1258.Lctr32_enc_done:
1259	movdqu	16(%rdi),%xmm11
1260	pxor	%xmm0,%xmm10
1261	movdqu	32(%rdi),%xmm12
1262	pxor	%xmm0,%xmm11
1263	movdqu	48(%rdi),%xmm13
1264	pxor	%xmm0,%xmm12
1265	movdqu	64(%rdi),%xmm14
1266	pxor	%xmm0,%xmm13
1267	movdqu	80(%rdi),%xmm15
1268	pxor	%xmm0,%xmm14
1269	pxor	%xmm0,%xmm15
1270.byte	102,15,56,220,209
1271.byte	102,15,56,220,217
1272.byte	102,15,56,220,225
1273.byte	102,15,56,220,233
1274.byte	102,15,56,220,241
1275.byte	102,15,56,220,249
1276.byte	102,68,15,56,220,193
1277.byte	102,68,15,56,220,201
1278	movdqu	96(%rdi),%xmm1
1279	leaq	128(%rdi),%rdi
1280
1281.byte	102,65,15,56,221,210
1282	pxor	%xmm0,%xmm1
1283	movdqu	112-128(%rdi),%xmm10
1284.byte	102,65,15,56,221,219
1285	pxor	%xmm0,%xmm10
1286	movdqa	0(%rsp),%xmm11
1287.byte	102,65,15,56,221,228
1288.byte	102,65,15,56,221,237
1289	movdqa	16(%rsp),%xmm12
1290	movdqa	32(%rsp),%xmm13
1291.byte	102,65,15,56,221,246
1292.byte	102,65,15,56,221,255
1293	movdqa	48(%rsp),%xmm14
1294	movdqa	64(%rsp),%xmm15
1295.byte	102,68,15,56,221,193
1296	movdqa	80(%rsp),%xmm0
1297	movups	16-128(%rcx),%xmm1
1298.byte	102,69,15,56,221,202
1299
1300	movups	%xmm2,(%rsi)
1301	movdqa	%xmm11,%xmm2
1302	movups	%xmm3,16(%rsi)
1303	movdqa	%xmm12,%xmm3
1304	movups	%xmm4,32(%rsi)
1305	movdqa	%xmm13,%xmm4
1306	movups	%xmm5,48(%rsi)
1307	movdqa	%xmm14,%xmm5
1308	movups	%xmm6,64(%rsi)
1309	movdqa	%xmm15,%xmm6
1310	movups	%xmm7,80(%rsi)
1311	movdqa	%xmm0,%xmm7
1312	movups	%xmm8,96(%rsi)
1313	movups	%xmm9,112(%rsi)
1314	leaq	128(%rsi),%rsi
1315
1316	subq	$8,%rdx
1317	jnc	.Lctr32_loop8
1318
1319	addq	$8,%rdx
1320	jz	.Lctr32_done
1321	leaq	-128(%rcx),%rcx
1322
1323.Lctr32_tail:
1324
1325
1326	leaq	16(%rcx),%rcx
1327	cmpq	$4,%rdx
1328	jb	.Lctr32_loop3
1329	je	.Lctr32_loop4
1330
1331
1332	shll	$4,%eax
1333	movdqa	96(%rsp),%xmm8
1334	pxor	%xmm9,%xmm9
1335
1336	movups	16(%rcx),%xmm0
1337.byte	102,15,56,220,209
1338.byte	102,15,56,220,217
1339	leaq	32-16(%rcx,%rax,1),%rcx
1340	negq	%rax
1341.byte	102,15,56,220,225
1342	addq	$16,%rax
1343	movups	(%rdi),%xmm10
1344.byte	102,15,56,220,233
1345.byte	102,15,56,220,241
1346	movups	16(%rdi),%xmm11
1347	movups	32(%rdi),%xmm12
1348.byte	102,15,56,220,249
1349.byte	102,68,15,56,220,193
1350
1351	call	.Lenc_loop8_enter
1352
1353	movdqu	48(%rdi),%xmm13
1354	pxor	%xmm10,%xmm2
1355	movdqu	64(%rdi),%xmm10
1356	pxor	%xmm11,%xmm3
1357	movdqu	%xmm2,(%rsi)
1358	pxor	%xmm12,%xmm4
1359	movdqu	%xmm3,16(%rsi)
1360	pxor	%xmm13,%xmm5
1361	movdqu	%xmm4,32(%rsi)
1362	pxor	%xmm10,%xmm6
1363	movdqu	%xmm5,48(%rsi)
1364	movdqu	%xmm6,64(%rsi)
1365	cmpq	$6,%rdx
1366	jb	.Lctr32_done
1367
1368	movups	80(%rdi),%xmm11
1369	xorps	%xmm11,%xmm7
1370	movups	%xmm7,80(%rsi)
1371	je	.Lctr32_done
1372
1373	movups	96(%rdi),%xmm12
1374	xorps	%xmm12,%xmm8
1375	movups	%xmm8,96(%rsi)
1376	jmp	.Lctr32_done
1377
1378.align	32
1379.Lctr32_loop4:
1380.byte	102,15,56,220,209
1381	leaq	16(%rcx),%rcx
1382	decl	%eax
1383.byte	102,15,56,220,217
1384.byte	102,15,56,220,225
1385.byte	102,15,56,220,233
1386	movups	(%rcx),%xmm1
1387	jnz	.Lctr32_loop4
1388.byte	102,15,56,221,209
1389.byte	102,15,56,221,217
1390	movups	(%rdi),%xmm10
1391	movups	16(%rdi),%xmm11
1392.byte	102,15,56,221,225
1393.byte	102,15,56,221,233
1394	movups	32(%rdi),%xmm12
1395	movups	48(%rdi),%xmm13
1396
1397	xorps	%xmm10,%xmm2
1398	movups	%xmm2,(%rsi)
1399	xorps	%xmm11,%xmm3
1400	movups	%xmm3,16(%rsi)
1401	pxor	%xmm12,%xmm4
1402	movdqu	%xmm4,32(%rsi)
1403	pxor	%xmm13,%xmm5
1404	movdqu	%xmm5,48(%rsi)
1405	jmp	.Lctr32_done
1406
1407.align	32
1408.Lctr32_loop3:
1409.byte	102,15,56,220,209
1410	leaq	16(%rcx),%rcx
1411	decl	%eax
1412.byte	102,15,56,220,217
1413.byte	102,15,56,220,225
1414	movups	(%rcx),%xmm1
1415	jnz	.Lctr32_loop3
1416.byte	102,15,56,221,209
1417.byte	102,15,56,221,217
1418.byte	102,15,56,221,225
1419
1420	movups	(%rdi),%xmm10
1421	xorps	%xmm10,%xmm2
1422	movups	%xmm2,(%rsi)
1423	cmpq	$2,%rdx
1424	jb	.Lctr32_done
1425
1426	movups	16(%rdi),%xmm11
1427	xorps	%xmm11,%xmm3
1428	movups	%xmm3,16(%rsi)
1429	je	.Lctr32_done
1430
1431	movups	32(%rdi),%xmm12
1432	xorps	%xmm12,%xmm4
1433	movups	%xmm4,32(%rsi)
1434
1435.Lctr32_done:
1436	xorps	%xmm0,%xmm0
1437	xorl	%ebp,%ebp
1438	pxor	%xmm1,%xmm1
1439	pxor	%xmm2,%xmm2
1440	pxor	%xmm3,%xmm3
1441	pxor	%xmm4,%xmm4
1442	pxor	%xmm5,%xmm5
1443	pxor	%xmm6,%xmm6
1444	pxor	%xmm7,%xmm7
1445	movaps	%xmm0,0(%rsp)
1446	pxor	%xmm8,%xmm8
1447	movaps	%xmm0,16(%rsp)
1448	pxor	%xmm9,%xmm9
1449	movaps	%xmm0,32(%rsp)
1450	pxor	%xmm10,%xmm10
1451	movaps	%xmm0,48(%rsp)
1452	pxor	%xmm11,%xmm11
1453	movaps	%xmm0,64(%rsp)
1454	pxor	%xmm12,%xmm12
1455	movaps	%xmm0,80(%rsp)
1456	pxor	%xmm13,%xmm13
1457	movaps	%xmm0,96(%rsp)
1458	pxor	%xmm14,%xmm14
1459	movaps	%xmm0,112(%rsp)
1460	pxor	%xmm15,%xmm15
1461	movq	-8(%r11),%rbp
1462.cfi_restore	%rbp
1463	leaq	(%r11),%rsp
1464.cfi_def_cfa_register	%rsp
1465.Lctr32_epilogue:
1466	.byte	0xf3,0xc3
1467.cfi_endproc
1468.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
1469.globl	aes_hw_cbc_encrypt
1470.hidden aes_hw_cbc_encrypt
1471.type	aes_hw_cbc_encrypt,@function
1472.align	16
1473aes_hw_cbc_encrypt:
1474.cfi_startproc
1475	testq	%rdx,%rdx
1476	jz	.Lcbc_ret
1477
1478	movl	240(%rcx),%r10d
1479	movq	%rcx,%r11
1480	testl	%r9d,%r9d
1481	jz	.Lcbc_decrypt
1482
1483	movups	(%r8),%xmm2
1484	movl	%r10d,%eax
1485	cmpq	$16,%rdx
1486	jb	.Lcbc_enc_tail
1487	subq	$16,%rdx
1488	jmp	.Lcbc_enc_loop
1489.align	16
1490.Lcbc_enc_loop:
1491	movups	(%rdi),%xmm3
1492	leaq	16(%rdi),%rdi
1493
1494	movups	(%rcx),%xmm0
1495	movups	16(%rcx),%xmm1
1496	xorps	%xmm0,%xmm3
1497	leaq	32(%rcx),%rcx
1498	xorps	%xmm3,%xmm2
1499.Loop_enc1_6:
1500.byte	102,15,56,220,209
1501	decl	%eax
1502	movups	(%rcx),%xmm1
1503	leaq	16(%rcx),%rcx
1504	jnz	.Loop_enc1_6
1505.byte	102,15,56,221,209
1506	movl	%r10d,%eax
1507	movq	%r11,%rcx
1508	movups	%xmm2,0(%rsi)
1509	leaq	16(%rsi),%rsi
1510	subq	$16,%rdx
1511	jnc	.Lcbc_enc_loop
1512	addq	$16,%rdx
1513	jnz	.Lcbc_enc_tail
1514	pxor	%xmm0,%xmm0
1515	pxor	%xmm1,%xmm1
1516	movups	%xmm2,(%r8)
1517	pxor	%xmm2,%xmm2
1518	pxor	%xmm3,%xmm3
1519	jmp	.Lcbc_ret
1520
1521.Lcbc_enc_tail:
1522	movq	%rdx,%rcx
1523	xchgq	%rdi,%rsi
1524.long	0x9066A4F3
1525	movl	$16,%ecx
1526	subq	%rdx,%rcx
1527	xorl	%eax,%eax
1528.long	0x9066AAF3
1529	leaq	-16(%rdi),%rdi
1530	movl	%r10d,%eax
1531	movq	%rdi,%rsi
1532	movq	%r11,%rcx
1533	xorq	%rdx,%rdx
1534	jmp	.Lcbc_enc_loop
1535
1536.align	16
1537.Lcbc_decrypt:
1538	cmpq	$16,%rdx
1539	jne	.Lcbc_decrypt_bulk
1540
1541
1542
1543	movdqu	(%rdi),%xmm2
1544	movdqu	(%r8),%xmm3
1545	movdqa	%xmm2,%xmm4
1546	movups	(%rcx),%xmm0
1547	movups	16(%rcx),%xmm1
1548	leaq	32(%rcx),%rcx
1549	xorps	%xmm0,%xmm2
1550.Loop_dec1_7:
1551.byte	102,15,56,222,209
1552	decl	%r10d
1553	movups	(%rcx),%xmm1
1554	leaq	16(%rcx),%rcx
1555	jnz	.Loop_dec1_7
1556.byte	102,15,56,223,209
1557	pxor	%xmm0,%xmm0
1558	pxor	%xmm1,%xmm1
1559	movdqu	%xmm4,(%r8)
1560	xorps	%xmm3,%xmm2
1561	pxor	%xmm3,%xmm3
1562	movups	%xmm2,(%rsi)
1563	pxor	%xmm2,%xmm2
1564	jmp	.Lcbc_ret
1565.align	16
1566.Lcbc_decrypt_bulk:
1567	leaq	(%rsp),%r11
1568.cfi_def_cfa_register	%r11
1569	pushq	%rbp
1570.cfi_offset	%rbp,-16
1571	subq	$16,%rsp
1572	andq	$-16,%rsp
1573	movq	%rcx,%rbp
1574	movups	(%r8),%xmm10
1575	movl	%r10d,%eax
1576	cmpq	$0x50,%rdx
1577	jbe	.Lcbc_dec_tail
1578
1579	movups	(%rcx),%xmm0
1580	movdqu	0(%rdi),%xmm2
1581	movdqu	16(%rdi),%xmm3
1582	movdqa	%xmm2,%xmm11
1583	movdqu	32(%rdi),%xmm4
1584	movdqa	%xmm3,%xmm12
1585	movdqu	48(%rdi),%xmm5
1586	movdqa	%xmm4,%xmm13
1587	movdqu	64(%rdi),%xmm6
1588	movdqa	%xmm5,%xmm14
1589	movdqu	80(%rdi),%xmm7
1590	movdqa	%xmm6,%xmm15
1591	leaq	OPENSSL_ia32cap_P(%rip),%r9
1592	movl	4(%r9),%r9d
1593	cmpq	$0x70,%rdx
1594	jbe	.Lcbc_dec_six_or_seven
1595
1596	andl	$71303168,%r9d
1597	subq	$0x50,%rdx
1598	cmpl	$4194304,%r9d
1599	je	.Lcbc_dec_loop6_enter
1600	subq	$0x20,%rdx
1601	leaq	112(%rcx),%rcx
1602	jmp	.Lcbc_dec_loop8_enter
1603.align	16
1604.Lcbc_dec_loop8:
1605	movups	%xmm9,(%rsi)
1606	leaq	16(%rsi),%rsi
1607.Lcbc_dec_loop8_enter:
1608	movdqu	96(%rdi),%xmm8
1609	pxor	%xmm0,%xmm2
1610	movdqu	112(%rdi),%xmm9
1611	pxor	%xmm0,%xmm3
1612	movups	16-112(%rcx),%xmm1
1613	pxor	%xmm0,%xmm4
1614	movq	$-1,%rbp
1615	cmpq	$0x70,%rdx
1616	pxor	%xmm0,%xmm5
1617	pxor	%xmm0,%xmm6
1618	pxor	%xmm0,%xmm7
1619	pxor	%xmm0,%xmm8
1620
1621.byte	102,15,56,222,209
1622	pxor	%xmm0,%xmm9
1623	movups	32-112(%rcx),%xmm0
1624.byte	102,15,56,222,217
1625.byte	102,15,56,222,225
1626.byte	102,15,56,222,233
1627.byte	102,15,56,222,241
1628.byte	102,15,56,222,249
1629.byte	102,68,15,56,222,193
1630	adcq	$0,%rbp
1631	andq	$128,%rbp
1632.byte	102,68,15,56,222,201
1633	addq	%rdi,%rbp
1634	movups	48-112(%rcx),%xmm1
1635.byte	102,15,56,222,208
1636.byte	102,15,56,222,216
1637.byte	102,15,56,222,224
1638.byte	102,15,56,222,232
1639.byte	102,15,56,222,240
1640.byte	102,15,56,222,248
1641.byte	102,68,15,56,222,192
1642.byte	102,68,15,56,222,200
1643	movups	64-112(%rcx),%xmm0
1644	nop
1645.byte	102,15,56,222,209
1646.byte	102,15,56,222,217
1647.byte	102,15,56,222,225
1648.byte	102,15,56,222,233
1649.byte	102,15,56,222,241
1650.byte	102,15,56,222,249
1651.byte	102,68,15,56,222,193
1652.byte	102,68,15,56,222,201
1653	movups	80-112(%rcx),%xmm1
1654	nop
1655.byte	102,15,56,222,208
1656.byte	102,15,56,222,216
1657.byte	102,15,56,222,224
1658.byte	102,15,56,222,232
1659.byte	102,15,56,222,240
1660.byte	102,15,56,222,248
1661.byte	102,68,15,56,222,192
1662.byte	102,68,15,56,222,200
1663	movups	96-112(%rcx),%xmm0
1664	nop
1665.byte	102,15,56,222,209
1666.byte	102,15,56,222,217
1667.byte	102,15,56,222,225
1668.byte	102,15,56,222,233
1669.byte	102,15,56,222,241
1670.byte	102,15,56,222,249
1671.byte	102,68,15,56,222,193
1672.byte	102,68,15,56,222,201
1673	movups	112-112(%rcx),%xmm1
1674	nop
1675.byte	102,15,56,222,208
1676.byte	102,15,56,222,216
1677.byte	102,15,56,222,224
1678.byte	102,15,56,222,232
1679.byte	102,15,56,222,240
1680.byte	102,15,56,222,248
1681.byte	102,68,15,56,222,192
1682.byte	102,68,15,56,222,200
1683	movups	128-112(%rcx),%xmm0
1684	nop
1685.byte	102,15,56,222,209
1686.byte	102,15,56,222,217
1687.byte	102,15,56,222,225
1688.byte	102,15,56,222,233
1689.byte	102,15,56,222,241
1690.byte	102,15,56,222,249
1691.byte	102,68,15,56,222,193
1692.byte	102,68,15,56,222,201
1693	movups	144-112(%rcx),%xmm1
1694	cmpl	$11,%eax
1695.byte	102,15,56,222,208
1696.byte	102,15,56,222,216
1697.byte	102,15,56,222,224
1698.byte	102,15,56,222,232
1699.byte	102,15,56,222,240
1700.byte	102,15,56,222,248
1701.byte	102,68,15,56,222,192
1702.byte	102,68,15,56,222,200
1703	movups	160-112(%rcx),%xmm0
1704	jb	.Lcbc_dec_done
1705.byte	102,15,56,222,209
1706.byte	102,15,56,222,217
1707.byte	102,15,56,222,225
1708.byte	102,15,56,222,233
1709.byte	102,15,56,222,241
1710.byte	102,15,56,222,249
1711.byte	102,68,15,56,222,193
1712.byte	102,68,15,56,222,201
1713	movups	176-112(%rcx),%xmm1
1714	nop
1715.byte	102,15,56,222,208
1716.byte	102,15,56,222,216
1717.byte	102,15,56,222,224
1718.byte	102,15,56,222,232
1719.byte	102,15,56,222,240
1720.byte	102,15,56,222,248
1721.byte	102,68,15,56,222,192
1722.byte	102,68,15,56,222,200
1723	movups	192-112(%rcx),%xmm0
1724	je	.Lcbc_dec_done
1725.byte	102,15,56,222,209
1726.byte	102,15,56,222,217
1727.byte	102,15,56,222,225
1728.byte	102,15,56,222,233
1729.byte	102,15,56,222,241
1730.byte	102,15,56,222,249
1731.byte	102,68,15,56,222,193
1732.byte	102,68,15,56,222,201
1733	movups	208-112(%rcx),%xmm1
1734	nop
1735.byte	102,15,56,222,208
1736.byte	102,15,56,222,216
1737.byte	102,15,56,222,224
1738.byte	102,15,56,222,232
1739.byte	102,15,56,222,240
1740.byte	102,15,56,222,248
1741.byte	102,68,15,56,222,192
1742.byte	102,68,15,56,222,200
1743	movups	224-112(%rcx),%xmm0
1744	jmp	.Lcbc_dec_done
1745.align	16
1746.Lcbc_dec_done:
1747.byte	102,15,56,222,209
1748.byte	102,15,56,222,217
1749	pxor	%xmm0,%xmm10
1750	pxor	%xmm0,%xmm11
1751.byte	102,15,56,222,225
1752.byte	102,15,56,222,233
1753	pxor	%xmm0,%xmm12
1754	pxor	%xmm0,%xmm13
1755.byte	102,15,56,222,241
1756.byte	102,15,56,222,249
1757	pxor	%xmm0,%xmm14
1758	pxor	%xmm0,%xmm15
1759.byte	102,68,15,56,222,193
1760.byte	102,68,15,56,222,201
1761	movdqu	80(%rdi),%xmm1
1762
1763.byte	102,65,15,56,223,210
1764	movdqu	96(%rdi),%xmm10
1765	pxor	%xmm0,%xmm1
1766.byte	102,65,15,56,223,219
1767	pxor	%xmm0,%xmm10
1768	movdqu	112(%rdi),%xmm0
1769.byte	102,65,15,56,223,228
1770	leaq	128(%rdi),%rdi
1771	movdqu	0(%rbp),%xmm11
1772.byte	102,65,15,56,223,237
1773.byte	102,65,15,56,223,246
1774	movdqu	16(%rbp),%xmm12
1775	movdqu	32(%rbp),%xmm13
1776.byte	102,65,15,56,223,255
1777.byte	102,68,15,56,223,193
1778	movdqu	48(%rbp),%xmm14
1779	movdqu	64(%rbp),%xmm15
1780.byte	102,69,15,56,223,202
1781	movdqa	%xmm0,%xmm10
1782	movdqu	80(%rbp),%xmm1
1783	movups	-112(%rcx),%xmm0
1784
1785	movups	%xmm2,(%rsi)
1786	movdqa	%xmm11,%xmm2
1787	movups	%xmm3,16(%rsi)
1788	movdqa	%xmm12,%xmm3
1789	movups	%xmm4,32(%rsi)
1790	movdqa	%xmm13,%xmm4
1791	movups	%xmm5,48(%rsi)
1792	movdqa	%xmm14,%xmm5
1793	movups	%xmm6,64(%rsi)
1794	movdqa	%xmm15,%xmm6
1795	movups	%xmm7,80(%rsi)
1796	movdqa	%xmm1,%xmm7
1797	movups	%xmm8,96(%rsi)
1798	leaq	112(%rsi),%rsi
1799
1800	subq	$0x80,%rdx
1801	ja	.Lcbc_dec_loop8
1802
1803	movaps	%xmm9,%xmm2
1804	leaq	-112(%rcx),%rcx
1805	addq	$0x70,%rdx
1806	jle	.Lcbc_dec_clear_tail_collected
1807	movups	%xmm9,(%rsi)
1808	leaq	16(%rsi),%rsi
1809	cmpq	$0x50,%rdx
1810	jbe	.Lcbc_dec_tail
1811
1812	movaps	%xmm11,%xmm2
1813.Lcbc_dec_six_or_seven:
1814	cmpq	$0x60,%rdx
1815	ja	.Lcbc_dec_seven
1816
1817	movaps	%xmm7,%xmm8
1818	call	_aesni_decrypt6
1819	pxor	%xmm10,%xmm2
1820	movaps	%xmm8,%xmm10
1821	pxor	%xmm11,%xmm3
1822	movdqu	%xmm2,(%rsi)
1823	pxor	%xmm12,%xmm4
1824	movdqu	%xmm3,16(%rsi)
1825	pxor	%xmm3,%xmm3
1826	pxor	%xmm13,%xmm5
1827	movdqu	%xmm4,32(%rsi)
1828	pxor	%xmm4,%xmm4
1829	pxor	%xmm14,%xmm6
1830	movdqu	%xmm5,48(%rsi)
1831	pxor	%xmm5,%xmm5
1832	pxor	%xmm15,%xmm7
1833	movdqu	%xmm6,64(%rsi)
1834	pxor	%xmm6,%xmm6
1835	leaq	80(%rsi),%rsi
1836	movdqa	%xmm7,%xmm2
1837	pxor	%xmm7,%xmm7
1838	jmp	.Lcbc_dec_tail_collected
1839
1840.align	16
1841.Lcbc_dec_seven:
1842	movups	96(%rdi),%xmm8
1843	xorps	%xmm9,%xmm9
1844	call	_aesni_decrypt8
1845	movups	80(%rdi),%xmm9
1846	pxor	%xmm10,%xmm2
1847	movups	96(%rdi),%xmm10
1848	pxor	%xmm11,%xmm3
1849	movdqu	%xmm2,(%rsi)
1850	pxor	%xmm12,%xmm4
1851	movdqu	%xmm3,16(%rsi)
1852	pxor	%xmm3,%xmm3
1853	pxor	%xmm13,%xmm5
1854	movdqu	%xmm4,32(%rsi)
1855	pxor	%xmm4,%xmm4
1856	pxor	%xmm14,%xmm6
1857	movdqu	%xmm5,48(%rsi)
1858	pxor	%xmm5,%xmm5
1859	pxor	%xmm15,%xmm7
1860	movdqu	%xmm6,64(%rsi)
1861	pxor	%xmm6,%xmm6
1862	pxor	%xmm9,%xmm8
1863	movdqu	%xmm7,80(%rsi)
1864	pxor	%xmm7,%xmm7
1865	leaq	96(%rsi),%rsi
1866	movdqa	%xmm8,%xmm2
1867	pxor	%xmm8,%xmm8
1868	pxor	%xmm9,%xmm9
1869	jmp	.Lcbc_dec_tail_collected
1870
1871.align	16
1872.Lcbc_dec_loop6:
1873	movups	%xmm7,(%rsi)
1874	leaq	16(%rsi),%rsi
1875	movdqu	0(%rdi),%xmm2
1876	movdqu	16(%rdi),%xmm3
1877	movdqa	%xmm2,%xmm11
1878	movdqu	32(%rdi),%xmm4
1879	movdqa	%xmm3,%xmm12
1880	movdqu	48(%rdi),%xmm5
1881	movdqa	%xmm4,%xmm13
1882	movdqu	64(%rdi),%xmm6
1883	movdqa	%xmm5,%xmm14
1884	movdqu	80(%rdi),%xmm7
1885	movdqa	%xmm6,%xmm15
1886.Lcbc_dec_loop6_enter:
1887	leaq	96(%rdi),%rdi
1888	movdqa	%xmm7,%xmm8
1889
1890	call	_aesni_decrypt6
1891
1892	pxor	%xmm10,%xmm2
1893	movdqa	%xmm8,%xmm10
1894	pxor	%xmm11,%xmm3
1895	movdqu	%xmm2,(%rsi)
1896	pxor	%xmm12,%xmm4
1897	movdqu	%xmm3,16(%rsi)
1898	pxor	%xmm13,%xmm5
1899	movdqu	%xmm4,32(%rsi)
1900	pxor	%xmm14,%xmm6
1901	movq	%rbp,%rcx
1902	movdqu	%xmm5,48(%rsi)
1903	pxor	%xmm15,%xmm7
1904	movl	%r10d,%eax
1905	movdqu	%xmm6,64(%rsi)
1906	leaq	80(%rsi),%rsi
1907	subq	$0x60,%rdx
1908	ja	.Lcbc_dec_loop6
1909
1910	movdqa	%xmm7,%xmm2
1911	addq	$0x50,%rdx
1912	jle	.Lcbc_dec_clear_tail_collected
1913	movups	%xmm7,(%rsi)
1914	leaq	16(%rsi),%rsi
1915
1916.Lcbc_dec_tail:
1917	movups	(%rdi),%xmm2
1918	subq	$0x10,%rdx
1919	jbe	.Lcbc_dec_one
1920
1921	movups	16(%rdi),%xmm3
1922	movaps	%xmm2,%xmm11
1923	subq	$0x10,%rdx
1924	jbe	.Lcbc_dec_two
1925
1926	movups	32(%rdi),%xmm4
1927	movaps	%xmm3,%xmm12
1928	subq	$0x10,%rdx
1929	jbe	.Lcbc_dec_three
1930
1931	movups	48(%rdi),%xmm5
1932	movaps	%xmm4,%xmm13
1933	subq	$0x10,%rdx
1934	jbe	.Lcbc_dec_four
1935
1936	movups	64(%rdi),%xmm6
1937	movaps	%xmm5,%xmm14
1938	movaps	%xmm6,%xmm15
1939	xorps	%xmm7,%xmm7
1940	call	_aesni_decrypt6
1941	pxor	%xmm10,%xmm2
1942	movaps	%xmm15,%xmm10
1943	pxor	%xmm11,%xmm3
1944	movdqu	%xmm2,(%rsi)
1945	pxor	%xmm12,%xmm4
1946	movdqu	%xmm3,16(%rsi)
1947	pxor	%xmm3,%xmm3
1948	pxor	%xmm13,%xmm5
1949	movdqu	%xmm4,32(%rsi)
1950	pxor	%xmm4,%xmm4
1951	pxor	%xmm14,%xmm6
1952	movdqu	%xmm5,48(%rsi)
1953	pxor	%xmm5,%xmm5
1954	leaq	64(%rsi),%rsi
1955	movdqa	%xmm6,%xmm2
1956	pxor	%xmm6,%xmm6
1957	pxor	%xmm7,%xmm7
1958	subq	$0x10,%rdx
1959	jmp	.Lcbc_dec_tail_collected
1960
1961.align	16
1962.Lcbc_dec_one:
1963	movaps	%xmm2,%xmm11
1964	movups	(%rcx),%xmm0
1965	movups	16(%rcx),%xmm1
1966	leaq	32(%rcx),%rcx
1967	xorps	%xmm0,%xmm2
1968.Loop_dec1_8:
1969.byte	102,15,56,222,209
1970	decl	%eax
1971	movups	(%rcx),%xmm1
1972	leaq	16(%rcx),%rcx
1973	jnz	.Loop_dec1_8
1974.byte	102,15,56,223,209
1975	xorps	%xmm10,%xmm2
1976	movaps	%xmm11,%xmm10
1977	jmp	.Lcbc_dec_tail_collected
1978.align	16
1979.Lcbc_dec_two:
1980	movaps	%xmm3,%xmm12
1981	call	_aesni_decrypt2
1982	pxor	%xmm10,%xmm2
1983	movaps	%xmm12,%xmm10
1984	pxor	%xmm11,%xmm3
1985	movdqu	%xmm2,(%rsi)
1986	movdqa	%xmm3,%xmm2
1987	pxor	%xmm3,%xmm3
1988	leaq	16(%rsi),%rsi
1989	jmp	.Lcbc_dec_tail_collected
1990.align	16
1991.Lcbc_dec_three:
1992	movaps	%xmm4,%xmm13
1993	call	_aesni_decrypt3
1994	pxor	%xmm10,%xmm2
1995	movaps	%xmm13,%xmm10
1996	pxor	%xmm11,%xmm3
1997	movdqu	%xmm2,(%rsi)
1998	pxor	%xmm12,%xmm4
1999	movdqu	%xmm3,16(%rsi)
2000	pxor	%xmm3,%xmm3
2001	movdqa	%xmm4,%xmm2
2002	pxor	%xmm4,%xmm4
2003	leaq	32(%rsi),%rsi
2004	jmp	.Lcbc_dec_tail_collected
2005.align	16
2006.Lcbc_dec_four:
2007	movaps	%xmm5,%xmm14
2008	call	_aesni_decrypt4
2009	pxor	%xmm10,%xmm2
2010	movaps	%xmm14,%xmm10
2011	pxor	%xmm11,%xmm3
2012	movdqu	%xmm2,(%rsi)
2013	pxor	%xmm12,%xmm4
2014	movdqu	%xmm3,16(%rsi)
2015	pxor	%xmm3,%xmm3
2016	pxor	%xmm13,%xmm5
2017	movdqu	%xmm4,32(%rsi)
2018	pxor	%xmm4,%xmm4
2019	movdqa	%xmm5,%xmm2
2020	pxor	%xmm5,%xmm5
2021	leaq	48(%rsi),%rsi
2022	jmp	.Lcbc_dec_tail_collected
2023
2024.align	16
2025.Lcbc_dec_clear_tail_collected:
2026	pxor	%xmm3,%xmm3
2027	pxor	%xmm4,%xmm4
2028	pxor	%xmm5,%xmm5
2029	pxor	%xmm6,%xmm6
2030	pxor	%xmm7,%xmm7
2031	pxor	%xmm8,%xmm8
2032	pxor	%xmm9,%xmm9
2033.Lcbc_dec_tail_collected:
2034	movups	%xmm10,(%r8)
2035	andq	$15,%rdx
2036	jnz	.Lcbc_dec_tail_partial
2037	movups	%xmm2,(%rsi)
2038	pxor	%xmm2,%xmm2
2039	jmp	.Lcbc_dec_ret
2040.align	16
2041.Lcbc_dec_tail_partial:
2042	movaps	%xmm2,(%rsp)
2043	pxor	%xmm2,%xmm2
2044	movq	$16,%rcx
2045	movq	%rsi,%rdi
2046	subq	%rdx,%rcx
2047	leaq	(%rsp),%rsi
2048.long	0x9066A4F3
2049	movdqa	%xmm2,(%rsp)
2050
2051.Lcbc_dec_ret:
2052	xorps	%xmm0,%xmm0
2053	pxor	%xmm1,%xmm1
2054	movq	-8(%r11),%rbp
2055.cfi_restore	%rbp
2056	leaq	(%r11),%rsp
2057.cfi_def_cfa_register	%rsp
2058.Lcbc_ret:
2059	.byte	0xf3,0xc3
2060.cfi_endproc
2061.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
2062.globl	aes_hw_set_decrypt_key
2063.hidden aes_hw_set_decrypt_key
2064.type	aes_hw_set_decrypt_key,@function
2065.align	16
2066aes_hw_set_decrypt_key:
2067.cfi_startproc
2068.byte	0x48,0x83,0xEC,0x08
2069.cfi_adjust_cfa_offset	8
2070	call	__aesni_set_encrypt_key
2071	shll	$4,%esi
2072	testl	%eax,%eax
2073	jnz	.Ldec_key_ret
2074	leaq	16(%rdx,%rsi,1),%rdi
2075
2076	movups	(%rdx),%xmm0
2077	movups	(%rdi),%xmm1
2078	movups	%xmm0,(%rdi)
2079	movups	%xmm1,(%rdx)
2080	leaq	16(%rdx),%rdx
2081	leaq	-16(%rdi),%rdi
2082
2083.Ldec_key_inverse:
2084	movups	(%rdx),%xmm0
2085	movups	(%rdi),%xmm1
2086.byte	102,15,56,219,192
2087.byte	102,15,56,219,201
2088	leaq	16(%rdx),%rdx
2089	leaq	-16(%rdi),%rdi
2090	movups	%xmm0,16(%rdi)
2091	movups	%xmm1,-16(%rdx)
2092	cmpq	%rdx,%rdi
2093	ja	.Ldec_key_inverse
2094
2095	movups	(%rdx),%xmm0
2096.byte	102,15,56,219,192
2097	pxor	%xmm1,%xmm1
2098	movups	%xmm0,(%rdi)
2099	pxor	%xmm0,%xmm0
2100.Ldec_key_ret:
2101	addq	$8,%rsp
2102.cfi_adjust_cfa_offset	-8
2103	.byte	0xf3,0xc3
2104.cfi_endproc
2105.LSEH_end_set_decrypt_key:
2106.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
2107.globl	aes_hw_set_encrypt_key
2108.hidden aes_hw_set_encrypt_key
2109.type	aes_hw_set_encrypt_key,@function
2110.align	16
2111aes_hw_set_encrypt_key:
2112__aesni_set_encrypt_key:
2113.cfi_startproc
2114#ifndef NDEBUG
2115#ifndef BORINGSSL_FIPS
2116	movb	$1,BORINGSSL_function_hit+3(%rip)
2117#endif
2118#endif
2119.byte	0x48,0x83,0xEC,0x08
2120.cfi_adjust_cfa_offset	8
2121	movq	$-1,%rax
2122	testq	%rdi,%rdi
2123	jz	.Lenc_key_ret
2124	testq	%rdx,%rdx
2125	jz	.Lenc_key_ret
2126
2127	movups	(%rdi),%xmm0
2128	xorps	%xmm4,%xmm4
2129	leaq	OPENSSL_ia32cap_P(%rip),%r10
2130	movl	4(%r10),%r10d
2131	andl	$268437504,%r10d
2132	leaq	16(%rdx),%rax
2133	cmpl	$256,%esi
2134	je	.L14rounds
2135	cmpl	$192,%esi
2136	je	.L12rounds
2137	cmpl	$128,%esi
2138	jne	.Lbad_keybits
2139
2140.L10rounds:
2141	movl	$9,%esi
2142	cmpl	$268435456,%r10d
2143	je	.L10rounds_alt
2144
2145	movups	%xmm0,(%rdx)
2146.byte	102,15,58,223,200,1
2147	call	.Lkey_expansion_128_cold
2148.byte	102,15,58,223,200,2
2149	call	.Lkey_expansion_128
2150.byte	102,15,58,223,200,4
2151	call	.Lkey_expansion_128
2152.byte	102,15,58,223,200,8
2153	call	.Lkey_expansion_128
2154.byte	102,15,58,223,200,16
2155	call	.Lkey_expansion_128
2156.byte	102,15,58,223,200,32
2157	call	.Lkey_expansion_128
2158.byte	102,15,58,223,200,64
2159	call	.Lkey_expansion_128
2160.byte	102,15,58,223,200,128
2161	call	.Lkey_expansion_128
2162.byte	102,15,58,223,200,27
2163	call	.Lkey_expansion_128
2164.byte	102,15,58,223,200,54
2165	call	.Lkey_expansion_128
2166	movups	%xmm0,(%rax)
2167	movl	%esi,80(%rax)
2168	xorl	%eax,%eax
2169	jmp	.Lenc_key_ret
2170
2171.align	16
2172.L10rounds_alt:
2173	movdqa	.Lkey_rotate(%rip),%xmm5
2174	movl	$8,%r10d
2175	movdqa	.Lkey_rcon1(%rip),%xmm4
2176	movdqa	%xmm0,%xmm2
2177	movdqu	%xmm0,(%rdx)
2178	jmp	.Loop_key128
2179
2180.align	16
2181.Loop_key128:
2182.byte	102,15,56,0,197
2183.byte	102,15,56,221,196
2184	pslld	$1,%xmm4
2185	leaq	16(%rax),%rax
2186
2187	movdqa	%xmm2,%xmm3
2188	pslldq	$4,%xmm2
2189	pxor	%xmm2,%xmm3
2190	pslldq	$4,%xmm2
2191	pxor	%xmm2,%xmm3
2192	pslldq	$4,%xmm2
2193	pxor	%xmm3,%xmm2
2194
2195	pxor	%xmm2,%xmm0
2196	movdqu	%xmm0,-16(%rax)
2197	movdqa	%xmm0,%xmm2
2198
2199	decl	%r10d
2200	jnz	.Loop_key128
2201
2202	movdqa	.Lkey_rcon1b(%rip),%xmm4
2203
2204.byte	102,15,56,0,197
2205.byte	102,15,56,221,196
2206	pslld	$1,%xmm4
2207
2208	movdqa	%xmm2,%xmm3
2209	pslldq	$4,%xmm2
2210	pxor	%xmm2,%xmm3
2211	pslldq	$4,%xmm2
2212	pxor	%xmm2,%xmm3
2213	pslldq	$4,%xmm2
2214	pxor	%xmm3,%xmm2
2215
2216	pxor	%xmm2,%xmm0
2217	movdqu	%xmm0,(%rax)
2218
2219	movdqa	%xmm0,%xmm2
2220.byte	102,15,56,0,197
2221.byte	102,15,56,221,196
2222
2223	movdqa	%xmm2,%xmm3
2224	pslldq	$4,%xmm2
2225	pxor	%xmm2,%xmm3
2226	pslldq	$4,%xmm2
2227	pxor	%xmm2,%xmm3
2228	pslldq	$4,%xmm2
2229	pxor	%xmm3,%xmm2
2230
2231	pxor	%xmm2,%xmm0
2232	movdqu	%xmm0,16(%rax)
2233
2234	movl	%esi,96(%rax)
2235	xorl	%eax,%eax
2236	jmp	.Lenc_key_ret
2237
2238.align	16
2239.L12rounds:
2240	movq	16(%rdi),%xmm2
2241	movl	$11,%esi
2242	cmpl	$268435456,%r10d
2243	je	.L12rounds_alt
2244
2245	movups	%xmm0,(%rdx)
2246.byte	102,15,58,223,202,1
2247	call	.Lkey_expansion_192a_cold
2248.byte	102,15,58,223,202,2
2249	call	.Lkey_expansion_192b
2250.byte	102,15,58,223,202,4
2251	call	.Lkey_expansion_192a
2252.byte	102,15,58,223,202,8
2253	call	.Lkey_expansion_192b
2254.byte	102,15,58,223,202,16
2255	call	.Lkey_expansion_192a
2256.byte	102,15,58,223,202,32
2257	call	.Lkey_expansion_192b
2258.byte	102,15,58,223,202,64
2259	call	.Lkey_expansion_192a
2260.byte	102,15,58,223,202,128
2261	call	.Lkey_expansion_192b
2262	movups	%xmm0,(%rax)
2263	movl	%esi,48(%rax)
2264	xorq	%rax,%rax
2265	jmp	.Lenc_key_ret
2266
2267.align	16
2268.L12rounds_alt:
2269	movdqa	.Lkey_rotate192(%rip),%xmm5
2270	movdqa	.Lkey_rcon1(%rip),%xmm4
2271	movl	$8,%r10d
2272	movdqu	%xmm0,(%rdx)
2273	jmp	.Loop_key192
2274
2275.align	16
2276.Loop_key192:
2277	movq	%xmm2,0(%rax)
2278	movdqa	%xmm2,%xmm1
2279.byte	102,15,56,0,213
2280.byte	102,15,56,221,212
2281	pslld	$1,%xmm4
2282	leaq	24(%rax),%rax
2283
2284	movdqa	%xmm0,%xmm3
2285	pslldq	$4,%xmm0
2286	pxor	%xmm0,%xmm3
2287	pslldq	$4,%xmm0
2288	pxor	%xmm0,%xmm3
2289	pslldq	$4,%xmm0
2290	pxor	%xmm3,%xmm0
2291
2292	pshufd	$0xff,%xmm0,%xmm3
2293	pxor	%xmm1,%xmm3
2294	pslldq	$4,%xmm1
2295	pxor	%xmm1,%xmm3
2296
2297	pxor	%xmm2,%xmm0
2298	pxor	%xmm3,%xmm2
2299	movdqu	%xmm0,-16(%rax)
2300
2301	decl	%r10d
2302	jnz	.Loop_key192
2303
2304	movl	%esi,32(%rax)
2305	xorl	%eax,%eax
2306	jmp	.Lenc_key_ret
2307
2308.align	16
2309.L14rounds:
2310	movups	16(%rdi),%xmm2
2311	movl	$13,%esi
2312	leaq	16(%rax),%rax
2313	cmpl	$268435456,%r10d
2314	je	.L14rounds_alt
2315
2316	movups	%xmm0,(%rdx)
2317	movups	%xmm2,16(%rdx)
2318.byte	102,15,58,223,202,1
2319	call	.Lkey_expansion_256a_cold
2320.byte	102,15,58,223,200,1
2321	call	.Lkey_expansion_256b
2322.byte	102,15,58,223,202,2
2323	call	.Lkey_expansion_256a
2324.byte	102,15,58,223,200,2
2325	call	.Lkey_expansion_256b
2326.byte	102,15,58,223,202,4
2327	call	.Lkey_expansion_256a
2328.byte	102,15,58,223,200,4
2329	call	.Lkey_expansion_256b
2330.byte	102,15,58,223,202,8
2331	call	.Lkey_expansion_256a
2332.byte	102,15,58,223,200,8
2333	call	.Lkey_expansion_256b
2334.byte	102,15,58,223,202,16
2335	call	.Lkey_expansion_256a
2336.byte	102,15,58,223,200,16
2337	call	.Lkey_expansion_256b
2338.byte	102,15,58,223,202,32
2339	call	.Lkey_expansion_256a
2340.byte	102,15,58,223,200,32
2341	call	.Lkey_expansion_256b
2342.byte	102,15,58,223,202,64
2343	call	.Lkey_expansion_256a
2344	movups	%xmm0,(%rax)
2345	movl	%esi,16(%rax)
2346	xorq	%rax,%rax
2347	jmp	.Lenc_key_ret
2348
2349.align	16
2350.L14rounds_alt:
2351	movdqa	.Lkey_rotate(%rip),%xmm5
2352	movdqa	.Lkey_rcon1(%rip),%xmm4
2353	movl	$7,%r10d
2354	movdqu	%xmm0,0(%rdx)
2355	movdqa	%xmm2,%xmm1
2356	movdqu	%xmm2,16(%rdx)
2357	jmp	.Loop_key256
2358
2359.align	16
2360.Loop_key256:
2361.byte	102,15,56,0,213
2362.byte	102,15,56,221,212
2363
2364	movdqa	%xmm0,%xmm3
2365	pslldq	$4,%xmm0
2366	pxor	%xmm0,%xmm3
2367	pslldq	$4,%xmm0
2368	pxor	%xmm0,%xmm3
2369	pslldq	$4,%xmm0
2370	pxor	%xmm3,%xmm0
2371	pslld	$1,%xmm4
2372
2373	pxor	%xmm2,%xmm0
2374	movdqu	%xmm0,(%rax)
2375
2376	decl	%r10d
2377	jz	.Ldone_key256
2378
2379	pshufd	$0xff,%xmm0,%xmm2
2380	pxor	%xmm3,%xmm3
2381.byte	102,15,56,221,211
2382
2383	movdqa	%xmm1,%xmm3
2384	pslldq	$4,%xmm1
2385	pxor	%xmm1,%xmm3
2386	pslldq	$4,%xmm1
2387	pxor	%xmm1,%xmm3
2388	pslldq	$4,%xmm1
2389	pxor	%xmm3,%xmm1
2390
2391	pxor	%xmm1,%xmm2
2392	movdqu	%xmm2,16(%rax)
2393	leaq	32(%rax),%rax
2394	movdqa	%xmm2,%xmm1
2395
2396	jmp	.Loop_key256
2397
2398.Ldone_key256:
2399	movl	%esi,16(%rax)
2400	xorl	%eax,%eax
2401	jmp	.Lenc_key_ret
2402
2403.align	16
2404.Lbad_keybits:
2405	movq	$-2,%rax
2406.Lenc_key_ret:
2407	pxor	%xmm0,%xmm0
2408	pxor	%xmm1,%xmm1
2409	pxor	%xmm2,%xmm2
2410	pxor	%xmm3,%xmm3
2411	pxor	%xmm4,%xmm4
2412	pxor	%xmm5,%xmm5
2413	addq	$8,%rsp
2414.cfi_adjust_cfa_offset	-8
2415	.byte	0xf3,0xc3
2416.cfi_endproc
2417.LSEH_end_set_encrypt_key:
2418
2419.align	16
2420.Lkey_expansion_128:
2421	movups	%xmm0,(%rax)
2422	leaq	16(%rax),%rax
2423.Lkey_expansion_128_cold:
2424	shufps	$16,%xmm0,%xmm4
2425	xorps	%xmm4,%xmm0
2426	shufps	$140,%xmm0,%xmm4
2427	xorps	%xmm4,%xmm0
2428	shufps	$255,%xmm1,%xmm1
2429	xorps	%xmm1,%xmm0
2430	.byte	0xf3,0xc3
2431
2432.align	16
2433.Lkey_expansion_192a:
2434	movups	%xmm0,(%rax)
2435	leaq	16(%rax),%rax
2436.Lkey_expansion_192a_cold:
2437	movaps	%xmm2,%xmm5
2438.Lkey_expansion_192b_warm:
2439	shufps	$16,%xmm0,%xmm4
2440	movdqa	%xmm2,%xmm3
2441	xorps	%xmm4,%xmm0
2442	shufps	$140,%xmm0,%xmm4
2443	pslldq	$4,%xmm3
2444	xorps	%xmm4,%xmm0
2445	pshufd	$85,%xmm1,%xmm1
2446	pxor	%xmm3,%xmm2
2447	pxor	%xmm1,%xmm0
2448	pshufd	$255,%xmm0,%xmm3
2449	pxor	%xmm3,%xmm2
2450	.byte	0xf3,0xc3
2451
2452.align	16
2453.Lkey_expansion_192b:
2454	movaps	%xmm0,%xmm3
2455	shufps	$68,%xmm0,%xmm5
2456	movups	%xmm5,(%rax)
2457	shufps	$78,%xmm2,%xmm3
2458	movups	%xmm3,16(%rax)
2459	leaq	32(%rax),%rax
2460	jmp	.Lkey_expansion_192b_warm
2461
2462.align	16
2463.Lkey_expansion_256a:
2464	movups	%xmm2,(%rax)
2465	leaq	16(%rax),%rax
2466.Lkey_expansion_256a_cold:
2467	shufps	$16,%xmm0,%xmm4
2468	xorps	%xmm4,%xmm0
2469	shufps	$140,%xmm0,%xmm4
2470	xorps	%xmm4,%xmm0
2471	shufps	$255,%xmm1,%xmm1
2472	xorps	%xmm1,%xmm0
2473	.byte	0xf3,0xc3
2474
2475.align	16
2476.Lkey_expansion_256b:
2477	movups	%xmm0,(%rax)
2478	leaq	16(%rax),%rax
2479
2480	shufps	$16,%xmm2,%xmm4
2481	xorps	%xmm4,%xmm2
2482	shufps	$140,%xmm2,%xmm4
2483	xorps	%xmm4,%xmm2
2484	shufps	$170,%xmm1,%xmm1
2485	xorps	%xmm1,%xmm2
2486	.byte	0xf3,0xc3
2487.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
2488.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
2489.align	64
2490.Lbswap_mask:
2491.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2492.Lincrement32:
2493.long	6,6,6,0
2494.Lincrement64:
2495.long	1,0,0,0
2496.Lxts_magic:
2497.long	0x87,0,1,0
2498.Lincrement1:
2499.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2500.Lkey_rotate:
2501.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
2502.Lkey_rotate192:
2503.long	0x04070605,0x04070605,0x04070605,0x04070605
2504.Lkey_rcon1:
2505.long	1,1,1,1
2506.Lkey_rcon1b:
2507.long	0x1b,0x1b,0x1b,0x1b
2508
2509.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2510.align	64
2511#endif
2512