• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#include "ring_core_generated/prefix_symbols_asm.h"
12.text
13.extern	OPENSSL_ia32cap_P
14.hidden OPENSSL_ia32cap_P
15.globl	aes_hw_encrypt
16.hidden aes_hw_encrypt
17.type	aes_hw_encrypt,@function
18.align	16
19aes_hw_encrypt:
20.cfi_startproc
21	movups	(%rdi),%xmm2
22	movl	240(%rdx),%eax
23	movups	(%rdx),%xmm0
24	movups	16(%rdx),%xmm1
25	leaq	32(%rdx),%rdx
26	xorps	%xmm0,%xmm2
27.Loop_enc1_1:
28.byte	102,15,56,220,209
29	decl	%eax
30	movups	(%rdx),%xmm1
31	leaq	16(%rdx),%rdx
32	jnz	.Loop_enc1_1
33.byte	102,15,56,221,209
34	pxor	%xmm0,%xmm0
35	pxor	%xmm1,%xmm1
36	movups	%xmm2,(%rsi)
37	pxor	%xmm2,%xmm2
38	.byte	0xf3,0xc3
39.cfi_endproc
40.size	aes_hw_encrypt,.-aes_hw_encrypt
41.type	_aesni_encrypt2,@function
42.align	16
43_aesni_encrypt2:
44.cfi_startproc
45	movups	(%rcx),%xmm0
46	shll	$4,%eax
47	movups	16(%rcx),%xmm1
48	xorps	%xmm0,%xmm2
49	xorps	%xmm0,%xmm3
50	movups	32(%rcx),%xmm0
51	leaq	32(%rcx,%rax,1),%rcx
52	negq	%rax
53	addq	$16,%rax
54
55.Lenc_loop2:
56.byte	102,15,56,220,209
57.byte	102,15,56,220,217
58	movups	(%rcx,%rax,1),%xmm1
59	addq	$32,%rax
60.byte	102,15,56,220,208
61.byte	102,15,56,220,216
62	movups	-16(%rcx,%rax,1),%xmm0
63	jnz	.Lenc_loop2
64
65.byte	102,15,56,220,209
66.byte	102,15,56,220,217
67.byte	102,15,56,221,208
68.byte	102,15,56,221,216
69	.byte	0xf3,0xc3
70.cfi_endproc
71.size	_aesni_encrypt2,.-_aesni_encrypt2
72.type	_aesni_encrypt3,@function
73.align	16
74_aesni_encrypt3:
75.cfi_startproc
76	movups	(%rcx),%xmm0
77	shll	$4,%eax
78	movups	16(%rcx),%xmm1
79	xorps	%xmm0,%xmm2
80	xorps	%xmm0,%xmm3
81	xorps	%xmm0,%xmm4
82	movups	32(%rcx),%xmm0
83	leaq	32(%rcx,%rax,1),%rcx
84	negq	%rax
85	addq	$16,%rax
86
87.Lenc_loop3:
88.byte	102,15,56,220,209
89.byte	102,15,56,220,217
90.byte	102,15,56,220,225
91	movups	(%rcx,%rax,1),%xmm1
92	addq	$32,%rax
93.byte	102,15,56,220,208
94.byte	102,15,56,220,216
95.byte	102,15,56,220,224
96	movups	-16(%rcx,%rax,1),%xmm0
97	jnz	.Lenc_loop3
98
99.byte	102,15,56,220,209
100.byte	102,15,56,220,217
101.byte	102,15,56,220,225
102.byte	102,15,56,221,208
103.byte	102,15,56,221,216
104.byte	102,15,56,221,224
105	.byte	0xf3,0xc3
106.cfi_endproc
107.size	_aesni_encrypt3,.-_aesni_encrypt3
108.type	_aesni_encrypt4,@function
109.align	16
110_aesni_encrypt4:
111.cfi_startproc
112	movups	(%rcx),%xmm0
113	shll	$4,%eax
114	movups	16(%rcx),%xmm1
115	xorps	%xmm0,%xmm2
116	xorps	%xmm0,%xmm3
117	xorps	%xmm0,%xmm4
118	xorps	%xmm0,%xmm5
119	movups	32(%rcx),%xmm0
120	leaq	32(%rcx,%rax,1),%rcx
121	negq	%rax
122.byte	0x0f,0x1f,0x00
123	addq	$16,%rax
124
125.Lenc_loop4:
126.byte	102,15,56,220,209
127.byte	102,15,56,220,217
128.byte	102,15,56,220,225
129.byte	102,15,56,220,233
130	movups	(%rcx,%rax,1),%xmm1
131	addq	$32,%rax
132.byte	102,15,56,220,208
133.byte	102,15,56,220,216
134.byte	102,15,56,220,224
135.byte	102,15,56,220,232
136	movups	-16(%rcx,%rax,1),%xmm0
137	jnz	.Lenc_loop4
138
139.byte	102,15,56,220,209
140.byte	102,15,56,220,217
141.byte	102,15,56,220,225
142.byte	102,15,56,220,233
143.byte	102,15,56,221,208
144.byte	102,15,56,221,216
145.byte	102,15,56,221,224
146.byte	102,15,56,221,232
147	.byte	0xf3,0xc3
148.cfi_endproc
149.size	_aesni_encrypt4,.-_aesni_encrypt4
150.type	_aesni_encrypt6,@function
151.align	16
152_aesni_encrypt6:
153.cfi_startproc
154	movups	(%rcx),%xmm0
155	shll	$4,%eax
156	movups	16(%rcx),%xmm1
157	xorps	%xmm0,%xmm2
158	pxor	%xmm0,%xmm3
159	pxor	%xmm0,%xmm4
160.byte	102,15,56,220,209
161	leaq	32(%rcx,%rax,1),%rcx
162	negq	%rax
163.byte	102,15,56,220,217
164	pxor	%xmm0,%xmm5
165	pxor	%xmm0,%xmm6
166.byte	102,15,56,220,225
167	pxor	%xmm0,%xmm7
168	movups	(%rcx,%rax,1),%xmm0
169	addq	$16,%rax
170	jmp	.Lenc_loop6_enter
171.align	16
172.Lenc_loop6:
173.byte	102,15,56,220,209
174.byte	102,15,56,220,217
175.byte	102,15,56,220,225
176.Lenc_loop6_enter:
177.byte	102,15,56,220,233
178.byte	102,15,56,220,241
179.byte	102,15,56,220,249
180	movups	(%rcx,%rax,1),%xmm1
181	addq	$32,%rax
182.byte	102,15,56,220,208
183.byte	102,15,56,220,216
184.byte	102,15,56,220,224
185.byte	102,15,56,220,232
186.byte	102,15,56,220,240
187.byte	102,15,56,220,248
188	movups	-16(%rcx,%rax,1),%xmm0
189	jnz	.Lenc_loop6
190
191.byte	102,15,56,220,209
192.byte	102,15,56,220,217
193.byte	102,15,56,220,225
194.byte	102,15,56,220,233
195.byte	102,15,56,220,241
196.byte	102,15,56,220,249
197.byte	102,15,56,221,208
198.byte	102,15,56,221,216
199.byte	102,15,56,221,224
200.byte	102,15,56,221,232
201.byte	102,15,56,221,240
202.byte	102,15,56,221,248
203	.byte	0xf3,0xc3
204.cfi_endproc
205.size	_aesni_encrypt6,.-_aesni_encrypt6
206.type	_aesni_encrypt8,@function
207.align	16
208_aesni_encrypt8:
209.cfi_startproc
210	movups	(%rcx),%xmm0
211	shll	$4,%eax
212	movups	16(%rcx),%xmm1
213	xorps	%xmm0,%xmm2
214	xorps	%xmm0,%xmm3
215	pxor	%xmm0,%xmm4
216	pxor	%xmm0,%xmm5
217	pxor	%xmm0,%xmm6
218	leaq	32(%rcx,%rax,1),%rcx
219	negq	%rax
220.byte	102,15,56,220,209
221	pxor	%xmm0,%xmm7
222	pxor	%xmm0,%xmm8
223.byte	102,15,56,220,217
224	pxor	%xmm0,%xmm9
225	movups	(%rcx,%rax,1),%xmm0
226	addq	$16,%rax
227	jmp	.Lenc_loop8_inner
228.align	16
229.Lenc_loop8:
230.byte	102,15,56,220,209
231.byte	102,15,56,220,217
232.Lenc_loop8_inner:
233.byte	102,15,56,220,225
234.byte	102,15,56,220,233
235.byte	102,15,56,220,241
236.byte	102,15,56,220,249
237.byte	102,68,15,56,220,193
238.byte	102,68,15,56,220,201
239.Lenc_loop8_enter:
240	movups	(%rcx,%rax,1),%xmm1
241	addq	$32,%rax
242.byte	102,15,56,220,208
243.byte	102,15,56,220,216
244.byte	102,15,56,220,224
245.byte	102,15,56,220,232
246.byte	102,15,56,220,240
247.byte	102,15,56,220,248
248.byte	102,68,15,56,220,192
249.byte	102,68,15,56,220,200
250	movups	-16(%rcx,%rax,1),%xmm0
251	jnz	.Lenc_loop8
252
253.byte	102,15,56,220,209
254.byte	102,15,56,220,217
255.byte	102,15,56,220,225
256.byte	102,15,56,220,233
257.byte	102,15,56,220,241
258.byte	102,15,56,220,249
259.byte	102,68,15,56,220,193
260.byte	102,68,15,56,220,201
261.byte	102,15,56,221,208
262.byte	102,15,56,221,216
263.byte	102,15,56,221,224
264.byte	102,15,56,221,232
265.byte	102,15,56,221,240
266.byte	102,15,56,221,248
267.byte	102,68,15,56,221,192
268.byte	102,68,15,56,221,200
269	.byte	0xf3,0xc3
270.cfi_endproc
271.size	_aesni_encrypt8,.-_aesni_encrypt8
272.globl	aes_hw_ctr32_encrypt_blocks
273.hidden aes_hw_ctr32_encrypt_blocks
274.type	aes_hw_ctr32_encrypt_blocks,@function
275.align	16
276aes_hw_ctr32_encrypt_blocks:
277.cfi_startproc
278	cmpq	$1,%rdx
279	jne	.Lctr32_bulk
280
281
282
283	movups	(%r8),%xmm2
284	movups	(%rdi),%xmm3
285	movl	240(%rcx),%edx
286	movups	(%rcx),%xmm0
287	movups	16(%rcx),%xmm1
288	leaq	32(%rcx),%rcx
289	xorps	%xmm0,%xmm2
290.Loop_enc1_2:
291.byte	102,15,56,220,209
292	decl	%edx
293	movups	(%rcx),%xmm1
294	leaq	16(%rcx),%rcx
295	jnz	.Loop_enc1_2
296.byte	102,15,56,221,209
297	pxor	%xmm0,%xmm0
298	pxor	%xmm1,%xmm1
299	xorps	%xmm3,%xmm2
300	pxor	%xmm3,%xmm3
301	movups	%xmm2,(%rsi)
302	xorps	%xmm2,%xmm2
303	jmp	.Lctr32_epilogue
304
305.align	16
306.Lctr32_bulk:
307	leaq	(%rsp),%r11
308.cfi_def_cfa_register	%r11
309	pushq	%rbp
310.cfi_offset	%rbp,-16
311	subq	$128,%rsp
312	andq	$-16,%rsp
313
314
315
316
317	movdqu	(%r8),%xmm2
318	movdqu	(%rcx),%xmm0
319	movl	12(%r8),%r8d
320	pxor	%xmm0,%xmm2
321	movl	12(%rcx),%ebp
322	movdqa	%xmm2,0(%rsp)
323	bswapl	%r8d
324	movdqa	%xmm2,%xmm3
325	movdqa	%xmm2,%xmm4
326	movdqa	%xmm2,%xmm5
327	movdqa	%xmm2,64(%rsp)
328	movdqa	%xmm2,80(%rsp)
329	movdqa	%xmm2,96(%rsp)
330	movq	%rdx,%r10
331	movdqa	%xmm2,112(%rsp)
332
333	leaq	1(%r8),%rax
334	leaq	2(%r8),%rdx
335	bswapl	%eax
336	bswapl	%edx
337	xorl	%ebp,%eax
338	xorl	%ebp,%edx
339.byte	102,15,58,34,216,3
340	leaq	3(%r8),%rax
341	movdqa	%xmm3,16(%rsp)
342.byte	102,15,58,34,226,3
343	bswapl	%eax
344	movq	%r10,%rdx
345	leaq	4(%r8),%r10
346	movdqa	%xmm4,32(%rsp)
347	xorl	%ebp,%eax
348	bswapl	%r10d
349.byte	102,15,58,34,232,3
350	xorl	%ebp,%r10d
351	movdqa	%xmm5,48(%rsp)
352	leaq	5(%r8),%r9
353	movl	%r10d,64+12(%rsp)
354	bswapl	%r9d
355	leaq	6(%r8),%r10
356	movl	240(%rcx),%eax
357	xorl	%ebp,%r9d
358	bswapl	%r10d
359	movl	%r9d,80+12(%rsp)
360	xorl	%ebp,%r10d
361	leaq	7(%r8),%r9
362	movl	%r10d,96+12(%rsp)
363	bswapl	%r9d
364	leaq	OPENSSL_ia32cap_P(%rip),%r10
365	movl	4(%r10),%r10d
366	xorl	%ebp,%r9d
367	andl	$71303168,%r10d
368	movl	%r9d,112+12(%rsp)
369
370	movups	16(%rcx),%xmm1
371
372	movdqa	64(%rsp),%xmm6
373	movdqa	80(%rsp),%xmm7
374
375	cmpq	$8,%rdx
376	jb	.Lctr32_tail
377
378	subq	$6,%rdx
379	cmpl	$4194304,%r10d
380	je	.Lctr32_6x
381
382	leaq	128(%rcx),%rcx
383	subq	$2,%rdx
384	jmp	.Lctr32_loop8
385
386.align	16
387.Lctr32_6x:
388	shll	$4,%eax
389	movl	$48,%r10d
390	bswapl	%ebp
391	leaq	32(%rcx,%rax,1),%rcx
392	subq	%rax,%r10
393	jmp	.Lctr32_loop6
394
395.align	16
396.Lctr32_loop6:
397	addl	$6,%r8d
398	movups	-48(%rcx,%r10,1),%xmm0
399.byte	102,15,56,220,209
400	movl	%r8d,%eax
401	xorl	%ebp,%eax
402.byte	102,15,56,220,217
403.byte	0x0f,0x38,0xf1,0x44,0x24,12
404	leal	1(%r8),%eax
405.byte	102,15,56,220,225
406	xorl	%ebp,%eax
407.byte	0x0f,0x38,0xf1,0x44,0x24,28
408.byte	102,15,56,220,233
409	leal	2(%r8),%eax
410	xorl	%ebp,%eax
411.byte	102,15,56,220,241
412.byte	0x0f,0x38,0xf1,0x44,0x24,44
413	leal	3(%r8),%eax
414.byte	102,15,56,220,249
415	movups	-32(%rcx,%r10,1),%xmm1
416	xorl	%ebp,%eax
417
418.byte	102,15,56,220,208
419.byte	0x0f,0x38,0xf1,0x44,0x24,60
420	leal	4(%r8),%eax
421.byte	102,15,56,220,216
422	xorl	%ebp,%eax
423.byte	0x0f,0x38,0xf1,0x44,0x24,76
424.byte	102,15,56,220,224
425	leal	5(%r8),%eax
426	xorl	%ebp,%eax
427.byte	102,15,56,220,232
428.byte	0x0f,0x38,0xf1,0x44,0x24,92
429	movq	%r10,%rax
430.byte	102,15,56,220,240
431.byte	102,15,56,220,248
432	movups	-16(%rcx,%r10,1),%xmm0
433
434	call	.Lenc_loop6
435
436	movdqu	(%rdi),%xmm8
437	movdqu	16(%rdi),%xmm9
438	movdqu	32(%rdi),%xmm10
439	movdqu	48(%rdi),%xmm11
440	movdqu	64(%rdi),%xmm12
441	movdqu	80(%rdi),%xmm13
442	leaq	96(%rdi),%rdi
443	movups	-64(%rcx,%r10,1),%xmm1
444	pxor	%xmm2,%xmm8
445	movaps	0(%rsp),%xmm2
446	pxor	%xmm3,%xmm9
447	movaps	16(%rsp),%xmm3
448	pxor	%xmm4,%xmm10
449	movaps	32(%rsp),%xmm4
450	pxor	%xmm5,%xmm11
451	movaps	48(%rsp),%xmm5
452	pxor	%xmm6,%xmm12
453	movaps	64(%rsp),%xmm6
454	pxor	%xmm7,%xmm13
455	movaps	80(%rsp),%xmm7
456	movdqu	%xmm8,(%rsi)
457	movdqu	%xmm9,16(%rsi)
458	movdqu	%xmm10,32(%rsi)
459	movdqu	%xmm11,48(%rsi)
460	movdqu	%xmm12,64(%rsi)
461	movdqu	%xmm13,80(%rsi)
462	leaq	96(%rsi),%rsi
463
464	subq	$6,%rdx
465	jnc	.Lctr32_loop6
466
467	addq	$6,%rdx
468	jz	.Lctr32_done
469
470	leal	-48(%r10),%eax
471	leaq	-80(%rcx,%r10,1),%rcx
472	negl	%eax
473	shrl	$4,%eax
474	jmp	.Lctr32_tail
475
476.align	32
477.Lctr32_loop8:
478	addl	$8,%r8d
479	movdqa	96(%rsp),%xmm8
480.byte	102,15,56,220,209
481	movl	%r8d,%r9d
482	movdqa	112(%rsp),%xmm9
483.byte	102,15,56,220,217
484	bswapl	%r9d
485	movups	32-128(%rcx),%xmm0
486.byte	102,15,56,220,225
487	xorl	%ebp,%r9d
488	nop
489.byte	102,15,56,220,233
490	movl	%r9d,0+12(%rsp)
491	leaq	1(%r8),%r9
492.byte	102,15,56,220,241
493.byte	102,15,56,220,249
494.byte	102,68,15,56,220,193
495.byte	102,68,15,56,220,201
496	movups	48-128(%rcx),%xmm1
497	bswapl	%r9d
498.byte	102,15,56,220,208
499.byte	102,15,56,220,216
500	xorl	%ebp,%r9d
501.byte	0x66,0x90
502.byte	102,15,56,220,224
503.byte	102,15,56,220,232
504	movl	%r9d,16+12(%rsp)
505	leaq	2(%r8),%r9
506.byte	102,15,56,220,240
507.byte	102,15,56,220,248
508.byte	102,68,15,56,220,192
509.byte	102,68,15,56,220,200
510	movups	64-128(%rcx),%xmm0
511	bswapl	%r9d
512.byte	102,15,56,220,209
513.byte	102,15,56,220,217
514	xorl	%ebp,%r9d
515.byte	0x66,0x90
516.byte	102,15,56,220,225
517.byte	102,15,56,220,233
518	movl	%r9d,32+12(%rsp)
519	leaq	3(%r8),%r9
520.byte	102,15,56,220,241
521.byte	102,15,56,220,249
522.byte	102,68,15,56,220,193
523.byte	102,68,15,56,220,201
524	movups	80-128(%rcx),%xmm1
525	bswapl	%r9d
526.byte	102,15,56,220,208
527.byte	102,15,56,220,216
528	xorl	%ebp,%r9d
529.byte	0x66,0x90
530.byte	102,15,56,220,224
531.byte	102,15,56,220,232
532	movl	%r9d,48+12(%rsp)
533	leaq	4(%r8),%r9
534.byte	102,15,56,220,240
535.byte	102,15,56,220,248
536.byte	102,68,15,56,220,192
537.byte	102,68,15,56,220,200
538	movups	96-128(%rcx),%xmm0
539	bswapl	%r9d
540.byte	102,15,56,220,209
541.byte	102,15,56,220,217
542	xorl	%ebp,%r9d
543.byte	0x66,0x90
544.byte	102,15,56,220,225
545.byte	102,15,56,220,233
546	movl	%r9d,64+12(%rsp)
547	leaq	5(%r8),%r9
548.byte	102,15,56,220,241
549.byte	102,15,56,220,249
550.byte	102,68,15,56,220,193
551.byte	102,68,15,56,220,201
552	movups	112-128(%rcx),%xmm1
553	bswapl	%r9d
554.byte	102,15,56,220,208
555.byte	102,15,56,220,216
556	xorl	%ebp,%r9d
557.byte	0x66,0x90
558.byte	102,15,56,220,224
559.byte	102,15,56,220,232
560	movl	%r9d,80+12(%rsp)
561	leaq	6(%r8),%r9
562.byte	102,15,56,220,240
563.byte	102,15,56,220,248
564.byte	102,68,15,56,220,192
565.byte	102,68,15,56,220,200
566	movups	128-128(%rcx),%xmm0
567	bswapl	%r9d
568.byte	102,15,56,220,209
569.byte	102,15,56,220,217
570	xorl	%ebp,%r9d
571.byte	0x66,0x90
572.byte	102,15,56,220,225
573.byte	102,15,56,220,233
574	movl	%r9d,96+12(%rsp)
575	leaq	7(%r8),%r9
576.byte	102,15,56,220,241
577.byte	102,15,56,220,249
578.byte	102,68,15,56,220,193
579.byte	102,68,15,56,220,201
580	movups	144-128(%rcx),%xmm1
581	bswapl	%r9d
582.byte	102,15,56,220,208
583.byte	102,15,56,220,216
584.byte	102,15,56,220,224
585	xorl	%ebp,%r9d
586	movdqu	0(%rdi),%xmm10
587.byte	102,15,56,220,232
588	movl	%r9d,112+12(%rsp)
589	cmpl	$11,%eax
590.byte	102,15,56,220,240
591.byte	102,15,56,220,248
592.byte	102,68,15,56,220,192
593.byte	102,68,15,56,220,200
594	movups	160-128(%rcx),%xmm0
595
596	jb	.Lctr32_enc_done
597
598.byte	102,15,56,220,209
599.byte	102,15,56,220,217
600.byte	102,15,56,220,225
601.byte	102,15,56,220,233
602.byte	102,15,56,220,241
603.byte	102,15,56,220,249
604.byte	102,68,15,56,220,193
605.byte	102,68,15,56,220,201
606	movups	176-128(%rcx),%xmm1
607
608.byte	102,15,56,220,208
609.byte	102,15,56,220,216
610.byte	102,15,56,220,224
611.byte	102,15,56,220,232
612.byte	102,15,56,220,240
613.byte	102,15,56,220,248
614.byte	102,68,15,56,220,192
615.byte	102,68,15,56,220,200
616	movups	192-128(%rcx),%xmm0
617
618
619
620.byte	102,15,56,220,209
621.byte	102,15,56,220,217
622.byte	102,15,56,220,225
623.byte	102,15,56,220,233
624.byte	102,15,56,220,241
625.byte	102,15,56,220,249
626.byte	102,68,15,56,220,193
627.byte	102,68,15,56,220,201
628	movups	208-128(%rcx),%xmm1
629
630.byte	102,15,56,220,208
631.byte	102,15,56,220,216
632.byte	102,15,56,220,224
633.byte	102,15,56,220,232
634.byte	102,15,56,220,240
635.byte	102,15,56,220,248
636.byte	102,68,15,56,220,192
637.byte	102,68,15,56,220,200
638	movups	224-128(%rcx),%xmm0
639	jmp	.Lctr32_enc_done
640
641.align	16
642.Lctr32_enc_done:
643	movdqu	16(%rdi),%xmm11
644	pxor	%xmm0,%xmm10
645	movdqu	32(%rdi),%xmm12
646	pxor	%xmm0,%xmm11
647	movdqu	48(%rdi),%xmm13
648	pxor	%xmm0,%xmm12
649	movdqu	64(%rdi),%xmm14
650	pxor	%xmm0,%xmm13
651	movdqu	80(%rdi),%xmm15
652	pxor	%xmm0,%xmm14
653	pxor	%xmm0,%xmm15
654.byte	102,15,56,220,209
655.byte	102,15,56,220,217
656.byte	102,15,56,220,225
657.byte	102,15,56,220,233
658.byte	102,15,56,220,241
659.byte	102,15,56,220,249
660.byte	102,68,15,56,220,193
661.byte	102,68,15,56,220,201
662	movdqu	96(%rdi),%xmm1
663	leaq	128(%rdi),%rdi
664
665.byte	102,65,15,56,221,210
666	pxor	%xmm0,%xmm1
667	movdqu	112-128(%rdi),%xmm10
668.byte	102,65,15,56,221,219
669	pxor	%xmm0,%xmm10
670	movdqa	0(%rsp),%xmm11
671.byte	102,65,15,56,221,228
672.byte	102,65,15,56,221,237
673	movdqa	16(%rsp),%xmm12
674	movdqa	32(%rsp),%xmm13
675.byte	102,65,15,56,221,246
676.byte	102,65,15,56,221,255
677	movdqa	48(%rsp),%xmm14
678	movdqa	64(%rsp),%xmm15
679.byte	102,68,15,56,221,193
680	movdqa	80(%rsp),%xmm0
681	movups	16-128(%rcx),%xmm1
682.byte	102,69,15,56,221,202
683
684	movups	%xmm2,(%rsi)
685	movdqa	%xmm11,%xmm2
686	movups	%xmm3,16(%rsi)
687	movdqa	%xmm12,%xmm3
688	movups	%xmm4,32(%rsi)
689	movdqa	%xmm13,%xmm4
690	movups	%xmm5,48(%rsi)
691	movdqa	%xmm14,%xmm5
692	movups	%xmm6,64(%rsi)
693	movdqa	%xmm15,%xmm6
694	movups	%xmm7,80(%rsi)
695	movdqa	%xmm0,%xmm7
696	movups	%xmm8,96(%rsi)
697	movups	%xmm9,112(%rsi)
698	leaq	128(%rsi),%rsi
699
700	subq	$8,%rdx
701	jnc	.Lctr32_loop8
702
703	addq	$8,%rdx
704	jz	.Lctr32_done
705	leaq	-128(%rcx),%rcx
706
707.Lctr32_tail:
708
709
710	leaq	16(%rcx),%rcx
711	cmpq	$4,%rdx
712	jb	.Lctr32_loop3
713	je	.Lctr32_loop4
714
715
716	shll	$4,%eax
717	movdqa	96(%rsp),%xmm8
718	pxor	%xmm9,%xmm9
719
720	movups	16(%rcx),%xmm0
721.byte	102,15,56,220,209
722.byte	102,15,56,220,217
723	leaq	32-16(%rcx,%rax,1),%rcx
724	negq	%rax
725.byte	102,15,56,220,225
726	addq	$16,%rax
727	movups	(%rdi),%xmm10
728.byte	102,15,56,220,233
729.byte	102,15,56,220,241
730	movups	16(%rdi),%xmm11
731	movups	32(%rdi),%xmm12
732.byte	102,15,56,220,249
733.byte	102,68,15,56,220,193
734
735	call	.Lenc_loop8_enter
736
737	movdqu	48(%rdi),%xmm13
738	pxor	%xmm10,%xmm2
739	movdqu	64(%rdi),%xmm10
740	pxor	%xmm11,%xmm3
741	movdqu	%xmm2,(%rsi)
742	pxor	%xmm12,%xmm4
743	movdqu	%xmm3,16(%rsi)
744	pxor	%xmm13,%xmm5
745	movdqu	%xmm4,32(%rsi)
746	pxor	%xmm10,%xmm6
747	movdqu	%xmm5,48(%rsi)
748	movdqu	%xmm6,64(%rsi)
749	cmpq	$6,%rdx
750	jb	.Lctr32_done
751
752	movups	80(%rdi),%xmm11
753	xorps	%xmm11,%xmm7
754	movups	%xmm7,80(%rsi)
755	je	.Lctr32_done
756
757	movups	96(%rdi),%xmm12
758	xorps	%xmm12,%xmm8
759	movups	%xmm8,96(%rsi)
760	jmp	.Lctr32_done
761
762.align	32
763.Lctr32_loop4:
764.byte	102,15,56,220,209
765	leaq	16(%rcx),%rcx
766	decl	%eax
767.byte	102,15,56,220,217
768.byte	102,15,56,220,225
769.byte	102,15,56,220,233
770	movups	(%rcx),%xmm1
771	jnz	.Lctr32_loop4
772.byte	102,15,56,221,209
773.byte	102,15,56,221,217
774	movups	(%rdi),%xmm10
775	movups	16(%rdi),%xmm11
776.byte	102,15,56,221,225
777.byte	102,15,56,221,233
778	movups	32(%rdi),%xmm12
779	movups	48(%rdi),%xmm13
780
781	xorps	%xmm10,%xmm2
782	movups	%xmm2,(%rsi)
783	xorps	%xmm11,%xmm3
784	movups	%xmm3,16(%rsi)
785	pxor	%xmm12,%xmm4
786	movdqu	%xmm4,32(%rsi)
787	pxor	%xmm13,%xmm5
788	movdqu	%xmm5,48(%rsi)
789	jmp	.Lctr32_done
790
791.align	32
792.Lctr32_loop3:
793.byte	102,15,56,220,209
794	leaq	16(%rcx),%rcx
795	decl	%eax
796.byte	102,15,56,220,217
797.byte	102,15,56,220,225
798	movups	(%rcx),%xmm1
799	jnz	.Lctr32_loop3
800.byte	102,15,56,221,209
801.byte	102,15,56,221,217
802.byte	102,15,56,221,225
803
804	movups	(%rdi),%xmm10
805	xorps	%xmm10,%xmm2
806	movups	%xmm2,(%rsi)
807	cmpq	$2,%rdx
808	jb	.Lctr32_done
809
810	movups	16(%rdi),%xmm11
811	xorps	%xmm11,%xmm3
812	movups	%xmm3,16(%rsi)
813	je	.Lctr32_done
814
815	movups	32(%rdi),%xmm12
816	xorps	%xmm12,%xmm4
817	movups	%xmm4,32(%rsi)
818
819.Lctr32_done:
820	xorps	%xmm0,%xmm0
821	xorl	%ebp,%ebp
822	pxor	%xmm1,%xmm1
823	pxor	%xmm2,%xmm2
824	pxor	%xmm3,%xmm3
825	pxor	%xmm4,%xmm4
826	pxor	%xmm5,%xmm5
827	pxor	%xmm6,%xmm6
828	pxor	%xmm7,%xmm7
829	movaps	%xmm0,0(%rsp)
830	pxor	%xmm8,%xmm8
831	movaps	%xmm0,16(%rsp)
832	pxor	%xmm9,%xmm9
833	movaps	%xmm0,32(%rsp)
834	pxor	%xmm10,%xmm10
835	movaps	%xmm0,48(%rsp)
836	pxor	%xmm11,%xmm11
837	movaps	%xmm0,64(%rsp)
838	pxor	%xmm12,%xmm12
839	movaps	%xmm0,80(%rsp)
840	pxor	%xmm13,%xmm13
841	movaps	%xmm0,96(%rsp)
842	pxor	%xmm14,%xmm14
843	movaps	%xmm0,112(%rsp)
844	pxor	%xmm15,%xmm15
845	movq	-8(%r11),%rbp
846.cfi_restore	%rbp
847	leaq	(%r11),%rsp
848.cfi_def_cfa_register	%rsp
849.Lctr32_epilogue:
850	.byte	0xf3,0xc3
851.cfi_endproc
852.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
853.globl	aes_hw_set_encrypt_key
854.hidden aes_hw_set_encrypt_key
855.type	aes_hw_set_encrypt_key,@function
856.align	16
857aes_hw_set_encrypt_key:
858__aesni_set_encrypt_key:
859.cfi_startproc
860.byte	0x48,0x83,0xEC,0x08
861.cfi_adjust_cfa_offset	8
862	movq	$-1,%rax
863	testq	%rdi,%rdi
864	jz	.Lenc_key_ret
865	testq	%rdx,%rdx
866	jz	.Lenc_key_ret
867
868	movups	(%rdi),%xmm0
869	xorps	%xmm4,%xmm4
870	leaq	OPENSSL_ia32cap_P(%rip),%r10
871	movl	4(%r10),%r10d
872	andl	$268437504,%r10d
873	leaq	16(%rdx),%rax
874	cmpl	$256,%esi
875	je	.L14rounds
876
877	cmpl	$128,%esi
878	jne	.Lbad_keybits
879
880.L10rounds:
881	movl	$9,%esi
882	cmpl	$268435456,%r10d
883	je	.L10rounds_alt
884
885	movups	%xmm0,(%rdx)
886.byte	102,15,58,223,200,1
887	call	.Lkey_expansion_128_cold
888.byte	102,15,58,223,200,2
889	call	.Lkey_expansion_128
890.byte	102,15,58,223,200,4
891	call	.Lkey_expansion_128
892.byte	102,15,58,223,200,8
893	call	.Lkey_expansion_128
894.byte	102,15,58,223,200,16
895	call	.Lkey_expansion_128
896.byte	102,15,58,223,200,32
897	call	.Lkey_expansion_128
898.byte	102,15,58,223,200,64
899	call	.Lkey_expansion_128
900.byte	102,15,58,223,200,128
901	call	.Lkey_expansion_128
902.byte	102,15,58,223,200,27
903	call	.Lkey_expansion_128
904.byte	102,15,58,223,200,54
905	call	.Lkey_expansion_128
906	movups	%xmm0,(%rax)
907	movl	%esi,80(%rax)
908	xorl	%eax,%eax
909	jmp	.Lenc_key_ret
910
911.align	16
912.L10rounds_alt:
913	movdqa	.Lkey_rotate(%rip),%xmm5
914	movl	$8,%r10d
915	movdqa	.Lkey_rcon1(%rip),%xmm4
916	movdqa	%xmm0,%xmm2
917	movdqu	%xmm0,(%rdx)
918	jmp	.Loop_key128
919
920.align	16
921.Loop_key128:
922.byte	102,15,56,0,197
923.byte	102,15,56,221,196
924	pslld	$1,%xmm4
925	leaq	16(%rax),%rax
926
927	movdqa	%xmm2,%xmm3
928	pslldq	$4,%xmm2
929	pxor	%xmm2,%xmm3
930	pslldq	$4,%xmm2
931	pxor	%xmm2,%xmm3
932	pslldq	$4,%xmm2
933	pxor	%xmm3,%xmm2
934
935	pxor	%xmm2,%xmm0
936	movdqu	%xmm0,-16(%rax)
937	movdqa	%xmm0,%xmm2
938
939	decl	%r10d
940	jnz	.Loop_key128
941
942	movdqa	.Lkey_rcon1b(%rip),%xmm4
943
944.byte	102,15,56,0,197
945.byte	102,15,56,221,196
946	pslld	$1,%xmm4
947
948	movdqa	%xmm2,%xmm3
949	pslldq	$4,%xmm2
950	pxor	%xmm2,%xmm3
951	pslldq	$4,%xmm2
952	pxor	%xmm2,%xmm3
953	pslldq	$4,%xmm2
954	pxor	%xmm3,%xmm2
955
956	pxor	%xmm2,%xmm0
957	movdqu	%xmm0,(%rax)
958
959	movdqa	%xmm0,%xmm2
960.byte	102,15,56,0,197
961.byte	102,15,56,221,196
962
963	movdqa	%xmm2,%xmm3
964	pslldq	$4,%xmm2
965	pxor	%xmm2,%xmm3
966	pslldq	$4,%xmm2
967	pxor	%xmm2,%xmm3
968	pslldq	$4,%xmm2
969	pxor	%xmm3,%xmm2
970
971	pxor	%xmm2,%xmm0
972	movdqu	%xmm0,16(%rax)
973
974	movl	%esi,96(%rax)
975	xorl	%eax,%eax
976	jmp	.Lenc_key_ret
977
978
979
980.align	16
981.L14rounds:
982	movups	16(%rdi),%xmm2
983	movl	$13,%esi
984	leaq	16(%rax),%rax
985	cmpl	$268435456,%r10d
986	je	.L14rounds_alt
987
988	movups	%xmm0,(%rdx)
989	movups	%xmm2,16(%rdx)
990.byte	102,15,58,223,202,1
991	call	.Lkey_expansion_256a_cold
992.byte	102,15,58,223,200,1
993	call	.Lkey_expansion_256b
994.byte	102,15,58,223,202,2
995	call	.Lkey_expansion_256a
996.byte	102,15,58,223,200,2
997	call	.Lkey_expansion_256b
998.byte	102,15,58,223,202,4
999	call	.Lkey_expansion_256a
1000.byte	102,15,58,223,200,4
1001	call	.Lkey_expansion_256b
1002.byte	102,15,58,223,202,8
1003	call	.Lkey_expansion_256a
1004.byte	102,15,58,223,200,8
1005	call	.Lkey_expansion_256b
1006.byte	102,15,58,223,202,16
1007	call	.Lkey_expansion_256a
1008.byte	102,15,58,223,200,16
1009	call	.Lkey_expansion_256b
1010.byte	102,15,58,223,202,32
1011	call	.Lkey_expansion_256a
1012.byte	102,15,58,223,200,32
1013	call	.Lkey_expansion_256b
1014.byte	102,15,58,223,202,64
1015	call	.Lkey_expansion_256a
1016	movups	%xmm0,(%rax)
1017	movl	%esi,16(%rax)
1018	xorq	%rax,%rax
1019	jmp	.Lenc_key_ret
1020
1021.align	16
1022.L14rounds_alt:
1023	movdqa	.Lkey_rotate(%rip),%xmm5
1024	movdqa	.Lkey_rcon1(%rip),%xmm4
1025	movl	$7,%r10d
1026	movdqu	%xmm0,0(%rdx)
1027	movdqa	%xmm2,%xmm1
1028	movdqu	%xmm2,16(%rdx)
1029	jmp	.Loop_key256
1030
1031.align	16
1032.Loop_key256:
1033.byte	102,15,56,0,213
1034.byte	102,15,56,221,212
1035
1036	movdqa	%xmm0,%xmm3
1037	pslldq	$4,%xmm0
1038	pxor	%xmm0,%xmm3
1039	pslldq	$4,%xmm0
1040	pxor	%xmm0,%xmm3
1041	pslldq	$4,%xmm0
1042	pxor	%xmm3,%xmm0
1043	pslld	$1,%xmm4
1044
1045	pxor	%xmm2,%xmm0
1046	movdqu	%xmm0,(%rax)
1047
1048	decl	%r10d
1049	jz	.Ldone_key256
1050
1051	pshufd	$0xff,%xmm0,%xmm2
1052	pxor	%xmm3,%xmm3
1053.byte	102,15,56,221,211
1054
1055	movdqa	%xmm1,%xmm3
1056	pslldq	$4,%xmm1
1057	pxor	%xmm1,%xmm3
1058	pslldq	$4,%xmm1
1059	pxor	%xmm1,%xmm3
1060	pslldq	$4,%xmm1
1061	pxor	%xmm3,%xmm1
1062
1063	pxor	%xmm1,%xmm2
1064	movdqu	%xmm2,16(%rax)
1065	leaq	32(%rax),%rax
1066	movdqa	%xmm2,%xmm1
1067
1068	jmp	.Loop_key256
1069
1070.Ldone_key256:
1071	movl	%esi,16(%rax)
1072	xorl	%eax,%eax
1073	jmp	.Lenc_key_ret
1074
1075.align	16
1076.Lbad_keybits:
1077	movq	$-2,%rax
1078.Lenc_key_ret:
1079	pxor	%xmm0,%xmm0
1080	pxor	%xmm1,%xmm1
1081	pxor	%xmm2,%xmm2
1082	pxor	%xmm3,%xmm3
1083	pxor	%xmm4,%xmm4
1084	pxor	%xmm5,%xmm5
1085	addq	$8,%rsp
1086.cfi_adjust_cfa_offset	-8
1087	.byte	0xf3,0xc3
1088.cfi_endproc
1089.LSEH_end_set_encrypt_key:
1090
1091.align	16
1092.Lkey_expansion_128:
1093	movups	%xmm0,(%rax)
1094	leaq	16(%rax),%rax
1095.Lkey_expansion_128_cold:
1096	shufps	$16,%xmm0,%xmm4
1097	xorps	%xmm4,%xmm0
1098	shufps	$140,%xmm0,%xmm4
1099	xorps	%xmm4,%xmm0
1100	shufps	$255,%xmm1,%xmm1
1101	xorps	%xmm1,%xmm0
1102	.byte	0xf3,0xc3
1103
1104.align	16
1105.Lkey_expansion_192a:
1106	movups	%xmm0,(%rax)
1107	leaq	16(%rax),%rax
1108.Lkey_expansion_192a_cold:
1109	movaps	%xmm2,%xmm5
1110.Lkey_expansion_192b_warm:
1111	shufps	$16,%xmm0,%xmm4
1112	movdqa	%xmm2,%xmm3
1113	xorps	%xmm4,%xmm0
1114	shufps	$140,%xmm0,%xmm4
1115	pslldq	$4,%xmm3
1116	xorps	%xmm4,%xmm0
1117	pshufd	$85,%xmm1,%xmm1
1118	pxor	%xmm3,%xmm2
1119	pxor	%xmm1,%xmm0
1120	pshufd	$255,%xmm0,%xmm3
1121	pxor	%xmm3,%xmm2
1122	.byte	0xf3,0xc3
1123
1124.align	16
1125.Lkey_expansion_192b:
1126	movaps	%xmm0,%xmm3
1127	shufps	$68,%xmm0,%xmm5
1128	movups	%xmm5,(%rax)
1129	shufps	$78,%xmm2,%xmm3
1130	movups	%xmm3,16(%rax)
1131	leaq	32(%rax),%rax
1132	jmp	.Lkey_expansion_192b_warm
1133
1134.align	16
1135.Lkey_expansion_256a:
1136	movups	%xmm2,(%rax)
1137	leaq	16(%rax),%rax
1138.Lkey_expansion_256a_cold:
1139	shufps	$16,%xmm0,%xmm4
1140	xorps	%xmm4,%xmm0
1141	shufps	$140,%xmm0,%xmm4
1142	xorps	%xmm4,%xmm0
1143	shufps	$255,%xmm1,%xmm1
1144	xorps	%xmm1,%xmm0
1145	.byte	0xf3,0xc3
1146
1147.align	16
1148.Lkey_expansion_256b:
1149	movups	%xmm0,(%rax)
1150	leaq	16(%rax),%rax
1151
1152	shufps	$16,%xmm2,%xmm4
1153	xorps	%xmm4,%xmm2
1154	shufps	$140,%xmm2,%xmm4
1155	xorps	%xmm4,%xmm2
1156	shufps	$170,%xmm1,%xmm1
1157	xorps	%xmm1,%xmm2
1158	.byte	0xf3,0xc3
1159.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
1160.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
1161.align	64
1162.Lbswap_mask:
1163.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1164.Lincrement32:
1165.long	6,6,6,0
1166.Lincrement64:
1167.long	1,0,0,0
1168.Lincrement1:
1169.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1170.Lkey_rotate:
1171.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
1172.Lkey_rotate192:
1173.long	0x04070605,0x04070605,0x04070605,0x04070605
1174.Lkey_rcon1:
1175.long	1,1,1,1
1176.Lkey_rcon1b:
1177.long	0x1b,0x1b,0x1b,0x1b
1178
1179.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1180.align	64
1181#endif
1182.section	.note.GNU-stack,"",@progbits
1183