• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#if defined(__i386__)
2.file	"src/crypto/aes/asm/aesni-x86.S"
3.text
4.globl	_aesni_encrypt
5.private_extern	_aesni_encrypt
6.align	4
7_aesni_encrypt:
8L_aesni_encrypt_begin:
9	movl	4(%esp),%eax
10	movl	12(%esp),%edx
11	movups	(%eax),%xmm2
12	movl	240(%edx),%ecx
13	movl	8(%esp),%eax
14	movups	(%edx),%xmm0
15	movups	16(%edx),%xmm1
16	leal	32(%edx),%edx
17	xorps	%xmm0,%xmm2
18L000enc1_loop_1:
19.byte	102,15,56,220,209
20	decl	%ecx
21	movups	(%edx),%xmm1
22	leal	16(%edx),%edx
23	jnz	L000enc1_loop_1
24.byte	102,15,56,221,209
25	pxor	%xmm0,%xmm0
26	pxor	%xmm1,%xmm1
27	movups	%xmm2,(%eax)
28	pxor	%xmm2,%xmm2
29	ret
30.globl	_aesni_decrypt
31.private_extern	_aesni_decrypt
32.align	4
33_aesni_decrypt:
34L_aesni_decrypt_begin:
35	movl	4(%esp),%eax
36	movl	12(%esp),%edx
37	movups	(%eax),%xmm2
38	movl	240(%edx),%ecx
39	movl	8(%esp),%eax
40	movups	(%edx),%xmm0
41	movups	16(%edx),%xmm1
42	leal	32(%edx),%edx
43	xorps	%xmm0,%xmm2
44L001dec1_loop_2:
45.byte	102,15,56,222,209
46	decl	%ecx
47	movups	(%edx),%xmm1
48	leal	16(%edx),%edx
49	jnz	L001dec1_loop_2
50.byte	102,15,56,223,209
51	pxor	%xmm0,%xmm0
52	pxor	%xmm1,%xmm1
53	movups	%xmm2,(%eax)
54	pxor	%xmm2,%xmm2
55	ret
56.private_extern	__aesni_encrypt2
57.align	4
58__aesni_encrypt2:
59	movups	(%edx),%xmm0
60	shll	$4,%ecx
61	movups	16(%edx),%xmm1
62	xorps	%xmm0,%xmm2
63	pxor	%xmm0,%xmm3
64	movups	32(%edx),%xmm0
65	leal	32(%edx,%ecx,1),%edx
66	negl	%ecx
67	addl	$16,%ecx
68L002enc2_loop:
69.byte	102,15,56,220,209
70.byte	102,15,56,220,217
71	movups	(%edx,%ecx,1),%xmm1
72	addl	$32,%ecx
73.byte	102,15,56,220,208
74.byte	102,15,56,220,216
75	movups	-16(%edx,%ecx,1),%xmm0
76	jnz	L002enc2_loop
77.byte	102,15,56,220,209
78.byte	102,15,56,220,217
79.byte	102,15,56,221,208
80.byte	102,15,56,221,216
81	ret
82.private_extern	__aesni_decrypt2
83.align	4
84__aesni_decrypt2:
85	movups	(%edx),%xmm0
86	shll	$4,%ecx
87	movups	16(%edx),%xmm1
88	xorps	%xmm0,%xmm2
89	pxor	%xmm0,%xmm3
90	movups	32(%edx),%xmm0
91	leal	32(%edx,%ecx,1),%edx
92	negl	%ecx
93	addl	$16,%ecx
94L003dec2_loop:
95.byte	102,15,56,222,209
96.byte	102,15,56,222,217
97	movups	(%edx,%ecx,1),%xmm1
98	addl	$32,%ecx
99.byte	102,15,56,222,208
100.byte	102,15,56,222,216
101	movups	-16(%edx,%ecx,1),%xmm0
102	jnz	L003dec2_loop
103.byte	102,15,56,222,209
104.byte	102,15,56,222,217
105.byte	102,15,56,223,208
106.byte	102,15,56,223,216
107	ret
108.private_extern	__aesni_encrypt3
109.align	4
110__aesni_encrypt3:
111	movups	(%edx),%xmm0
112	shll	$4,%ecx
113	movups	16(%edx),%xmm1
114	xorps	%xmm0,%xmm2
115	pxor	%xmm0,%xmm3
116	pxor	%xmm0,%xmm4
117	movups	32(%edx),%xmm0
118	leal	32(%edx,%ecx,1),%edx
119	negl	%ecx
120	addl	$16,%ecx
121L004enc3_loop:
122.byte	102,15,56,220,209
123.byte	102,15,56,220,217
124.byte	102,15,56,220,225
125	movups	(%edx,%ecx,1),%xmm1
126	addl	$32,%ecx
127.byte	102,15,56,220,208
128.byte	102,15,56,220,216
129.byte	102,15,56,220,224
130	movups	-16(%edx,%ecx,1),%xmm0
131	jnz	L004enc3_loop
132.byte	102,15,56,220,209
133.byte	102,15,56,220,217
134.byte	102,15,56,220,225
135.byte	102,15,56,221,208
136.byte	102,15,56,221,216
137.byte	102,15,56,221,224
138	ret
139.private_extern	__aesni_decrypt3
140.align	4
141__aesni_decrypt3:
142	movups	(%edx),%xmm0
143	shll	$4,%ecx
144	movups	16(%edx),%xmm1
145	xorps	%xmm0,%xmm2
146	pxor	%xmm0,%xmm3
147	pxor	%xmm0,%xmm4
148	movups	32(%edx),%xmm0
149	leal	32(%edx,%ecx,1),%edx
150	negl	%ecx
151	addl	$16,%ecx
152L005dec3_loop:
153.byte	102,15,56,222,209
154.byte	102,15,56,222,217
155.byte	102,15,56,222,225
156	movups	(%edx,%ecx,1),%xmm1
157	addl	$32,%ecx
158.byte	102,15,56,222,208
159.byte	102,15,56,222,216
160.byte	102,15,56,222,224
161	movups	-16(%edx,%ecx,1),%xmm0
162	jnz	L005dec3_loop
163.byte	102,15,56,222,209
164.byte	102,15,56,222,217
165.byte	102,15,56,222,225
166.byte	102,15,56,223,208
167.byte	102,15,56,223,216
168.byte	102,15,56,223,224
169	ret
170.private_extern	__aesni_encrypt4
171.align	4
172__aesni_encrypt4:
173	movups	(%edx),%xmm0
174	movups	16(%edx),%xmm1
175	shll	$4,%ecx
176	xorps	%xmm0,%xmm2
177	pxor	%xmm0,%xmm3
178	pxor	%xmm0,%xmm4
179	pxor	%xmm0,%xmm5
180	movups	32(%edx),%xmm0
181	leal	32(%edx,%ecx,1),%edx
182	negl	%ecx
183.byte	15,31,64,0
184	addl	$16,%ecx
185L006enc4_loop:
186.byte	102,15,56,220,209
187.byte	102,15,56,220,217
188.byte	102,15,56,220,225
189.byte	102,15,56,220,233
190	movups	(%edx,%ecx,1),%xmm1
191	addl	$32,%ecx
192.byte	102,15,56,220,208
193.byte	102,15,56,220,216
194.byte	102,15,56,220,224
195.byte	102,15,56,220,232
196	movups	-16(%edx,%ecx,1),%xmm0
197	jnz	L006enc4_loop
198.byte	102,15,56,220,209
199.byte	102,15,56,220,217
200.byte	102,15,56,220,225
201.byte	102,15,56,220,233
202.byte	102,15,56,221,208
203.byte	102,15,56,221,216
204.byte	102,15,56,221,224
205.byte	102,15,56,221,232
206	ret
207.private_extern	__aesni_decrypt4
208.align	4
209__aesni_decrypt4:
210	movups	(%edx),%xmm0
211	movups	16(%edx),%xmm1
212	shll	$4,%ecx
213	xorps	%xmm0,%xmm2
214	pxor	%xmm0,%xmm3
215	pxor	%xmm0,%xmm4
216	pxor	%xmm0,%xmm5
217	movups	32(%edx),%xmm0
218	leal	32(%edx,%ecx,1),%edx
219	negl	%ecx
220.byte	15,31,64,0
221	addl	$16,%ecx
222L007dec4_loop:
223.byte	102,15,56,222,209
224.byte	102,15,56,222,217
225.byte	102,15,56,222,225
226.byte	102,15,56,222,233
227	movups	(%edx,%ecx,1),%xmm1
228	addl	$32,%ecx
229.byte	102,15,56,222,208
230.byte	102,15,56,222,216
231.byte	102,15,56,222,224
232.byte	102,15,56,222,232
233	movups	-16(%edx,%ecx,1),%xmm0
234	jnz	L007dec4_loop
235.byte	102,15,56,222,209
236.byte	102,15,56,222,217
237.byte	102,15,56,222,225
238.byte	102,15,56,222,233
239.byte	102,15,56,223,208
240.byte	102,15,56,223,216
241.byte	102,15,56,223,224
242.byte	102,15,56,223,232
243	ret
244.private_extern	__aesni_encrypt6
245.align	4
246__aesni_encrypt6:
247	movups	(%edx),%xmm0
248	shll	$4,%ecx
249	movups	16(%edx),%xmm1
250	xorps	%xmm0,%xmm2
251	pxor	%xmm0,%xmm3
252	pxor	%xmm0,%xmm4
253.byte	102,15,56,220,209
254	pxor	%xmm0,%xmm5
255	pxor	%xmm0,%xmm6
256.byte	102,15,56,220,217
257	leal	32(%edx,%ecx,1),%edx
258	negl	%ecx
259.byte	102,15,56,220,225
260	pxor	%xmm0,%xmm7
261	movups	(%edx,%ecx,1),%xmm0
262	addl	$16,%ecx
263	jmp	L008_aesni_encrypt6_inner
264.align	4,0x90
265L009enc6_loop:
266.byte	102,15,56,220,209
267.byte	102,15,56,220,217
268.byte	102,15,56,220,225
269L008_aesni_encrypt6_inner:
270.byte	102,15,56,220,233
271.byte	102,15,56,220,241
272.byte	102,15,56,220,249
273L_aesni_encrypt6_enter:
274	movups	(%edx,%ecx,1),%xmm1
275	addl	$32,%ecx
276.byte	102,15,56,220,208
277.byte	102,15,56,220,216
278.byte	102,15,56,220,224
279.byte	102,15,56,220,232
280.byte	102,15,56,220,240
281.byte	102,15,56,220,248
282	movups	-16(%edx,%ecx,1),%xmm0
283	jnz	L009enc6_loop
284.byte	102,15,56,220,209
285.byte	102,15,56,220,217
286.byte	102,15,56,220,225
287.byte	102,15,56,220,233
288.byte	102,15,56,220,241
289.byte	102,15,56,220,249
290.byte	102,15,56,221,208
291.byte	102,15,56,221,216
292.byte	102,15,56,221,224
293.byte	102,15,56,221,232
294.byte	102,15,56,221,240
295.byte	102,15,56,221,248
296	ret
297.private_extern	__aesni_decrypt6
298.align	4
299__aesni_decrypt6:
300	movups	(%edx),%xmm0
301	shll	$4,%ecx
302	movups	16(%edx),%xmm1
303	xorps	%xmm0,%xmm2
304	pxor	%xmm0,%xmm3
305	pxor	%xmm0,%xmm4
306.byte	102,15,56,222,209
307	pxor	%xmm0,%xmm5
308	pxor	%xmm0,%xmm6
309.byte	102,15,56,222,217
310	leal	32(%edx,%ecx,1),%edx
311	negl	%ecx
312.byte	102,15,56,222,225
313	pxor	%xmm0,%xmm7
314	movups	(%edx,%ecx,1),%xmm0
315	addl	$16,%ecx
316	jmp	L010_aesni_decrypt6_inner
317.align	4,0x90
318L011dec6_loop:
319.byte	102,15,56,222,209
320.byte	102,15,56,222,217
321.byte	102,15,56,222,225
322L010_aesni_decrypt6_inner:
323.byte	102,15,56,222,233
324.byte	102,15,56,222,241
325.byte	102,15,56,222,249
326L_aesni_decrypt6_enter:
327	movups	(%edx,%ecx,1),%xmm1
328	addl	$32,%ecx
329.byte	102,15,56,222,208
330.byte	102,15,56,222,216
331.byte	102,15,56,222,224
332.byte	102,15,56,222,232
333.byte	102,15,56,222,240
334.byte	102,15,56,222,248
335	movups	-16(%edx,%ecx,1),%xmm0
336	jnz	L011dec6_loop
337.byte	102,15,56,222,209
338.byte	102,15,56,222,217
339.byte	102,15,56,222,225
340.byte	102,15,56,222,233
341.byte	102,15,56,222,241
342.byte	102,15,56,222,249
343.byte	102,15,56,223,208
344.byte	102,15,56,223,216
345.byte	102,15,56,223,224
346.byte	102,15,56,223,232
347.byte	102,15,56,223,240
348.byte	102,15,56,223,248
349	ret
350.globl	_aesni_ecb_encrypt
351.private_extern	_aesni_ecb_encrypt
352.align	4
353_aesni_ecb_encrypt:
354L_aesni_ecb_encrypt_begin:
355	pushl	%ebp
356	pushl	%ebx
357	pushl	%esi
358	pushl	%edi
359	movl	20(%esp),%esi
360	movl	24(%esp),%edi
361	movl	28(%esp),%eax
362	movl	32(%esp),%edx
363	movl	36(%esp),%ebx
364	andl	$-16,%eax
365	jz	L012ecb_ret
366	movl	240(%edx),%ecx
367	testl	%ebx,%ebx
368	jz	L013ecb_decrypt
369	movl	%edx,%ebp
370	movl	%ecx,%ebx
371	cmpl	$96,%eax
372	jb	L014ecb_enc_tail
373	movdqu	(%esi),%xmm2
374	movdqu	16(%esi),%xmm3
375	movdqu	32(%esi),%xmm4
376	movdqu	48(%esi),%xmm5
377	movdqu	64(%esi),%xmm6
378	movdqu	80(%esi),%xmm7
379	leal	96(%esi),%esi
380	subl	$96,%eax
381	jmp	L015ecb_enc_loop6_enter
382.align	4,0x90
383L016ecb_enc_loop6:
384	movups	%xmm2,(%edi)
385	movdqu	(%esi),%xmm2
386	movups	%xmm3,16(%edi)
387	movdqu	16(%esi),%xmm3
388	movups	%xmm4,32(%edi)
389	movdqu	32(%esi),%xmm4
390	movups	%xmm5,48(%edi)
391	movdqu	48(%esi),%xmm5
392	movups	%xmm6,64(%edi)
393	movdqu	64(%esi),%xmm6
394	movups	%xmm7,80(%edi)
395	leal	96(%edi),%edi
396	movdqu	80(%esi),%xmm7
397	leal	96(%esi),%esi
398L015ecb_enc_loop6_enter:
399	call	__aesni_encrypt6
400	movl	%ebp,%edx
401	movl	%ebx,%ecx
402	subl	$96,%eax
403	jnc	L016ecb_enc_loop6
404	movups	%xmm2,(%edi)
405	movups	%xmm3,16(%edi)
406	movups	%xmm4,32(%edi)
407	movups	%xmm5,48(%edi)
408	movups	%xmm6,64(%edi)
409	movups	%xmm7,80(%edi)
410	leal	96(%edi),%edi
411	addl	$96,%eax
412	jz	L012ecb_ret
413L014ecb_enc_tail:
414	movups	(%esi),%xmm2
415	cmpl	$32,%eax
416	jb	L017ecb_enc_one
417	movups	16(%esi),%xmm3
418	je	L018ecb_enc_two
419	movups	32(%esi),%xmm4
420	cmpl	$64,%eax
421	jb	L019ecb_enc_three
422	movups	48(%esi),%xmm5
423	je	L020ecb_enc_four
424	movups	64(%esi),%xmm6
425	xorps	%xmm7,%xmm7
426	call	__aesni_encrypt6
427	movups	%xmm2,(%edi)
428	movups	%xmm3,16(%edi)
429	movups	%xmm4,32(%edi)
430	movups	%xmm5,48(%edi)
431	movups	%xmm6,64(%edi)
432	jmp	L012ecb_ret
433.align	4,0x90
434L017ecb_enc_one:
435	movups	(%edx),%xmm0
436	movups	16(%edx),%xmm1
437	leal	32(%edx),%edx
438	xorps	%xmm0,%xmm2
439L021enc1_loop_3:
440.byte	102,15,56,220,209
441	decl	%ecx
442	movups	(%edx),%xmm1
443	leal	16(%edx),%edx
444	jnz	L021enc1_loop_3
445.byte	102,15,56,221,209
446	movups	%xmm2,(%edi)
447	jmp	L012ecb_ret
448.align	4,0x90
449L018ecb_enc_two:
450	call	__aesni_encrypt2
451	movups	%xmm2,(%edi)
452	movups	%xmm3,16(%edi)
453	jmp	L012ecb_ret
454.align	4,0x90
455L019ecb_enc_three:
456	call	__aesni_encrypt3
457	movups	%xmm2,(%edi)
458	movups	%xmm3,16(%edi)
459	movups	%xmm4,32(%edi)
460	jmp	L012ecb_ret
461.align	4,0x90
462L020ecb_enc_four:
463	call	__aesni_encrypt4
464	movups	%xmm2,(%edi)
465	movups	%xmm3,16(%edi)
466	movups	%xmm4,32(%edi)
467	movups	%xmm5,48(%edi)
468	jmp	L012ecb_ret
469.align	4,0x90
470L013ecb_decrypt:
471	movl	%edx,%ebp
472	movl	%ecx,%ebx
473	cmpl	$96,%eax
474	jb	L022ecb_dec_tail
475	movdqu	(%esi),%xmm2
476	movdqu	16(%esi),%xmm3
477	movdqu	32(%esi),%xmm4
478	movdqu	48(%esi),%xmm5
479	movdqu	64(%esi),%xmm6
480	movdqu	80(%esi),%xmm7
481	leal	96(%esi),%esi
482	subl	$96,%eax
483	jmp	L023ecb_dec_loop6_enter
484.align	4,0x90
485L024ecb_dec_loop6:
486	movups	%xmm2,(%edi)
487	movdqu	(%esi),%xmm2
488	movups	%xmm3,16(%edi)
489	movdqu	16(%esi),%xmm3
490	movups	%xmm4,32(%edi)
491	movdqu	32(%esi),%xmm4
492	movups	%xmm5,48(%edi)
493	movdqu	48(%esi),%xmm5
494	movups	%xmm6,64(%edi)
495	movdqu	64(%esi),%xmm6
496	movups	%xmm7,80(%edi)
497	leal	96(%edi),%edi
498	movdqu	80(%esi),%xmm7
499	leal	96(%esi),%esi
500L023ecb_dec_loop6_enter:
501	call	__aesni_decrypt6
502	movl	%ebp,%edx
503	movl	%ebx,%ecx
504	subl	$96,%eax
505	jnc	L024ecb_dec_loop6
506	movups	%xmm2,(%edi)
507	movups	%xmm3,16(%edi)
508	movups	%xmm4,32(%edi)
509	movups	%xmm5,48(%edi)
510	movups	%xmm6,64(%edi)
511	movups	%xmm7,80(%edi)
512	leal	96(%edi),%edi
513	addl	$96,%eax
514	jz	L012ecb_ret
515L022ecb_dec_tail:
516	movups	(%esi),%xmm2
517	cmpl	$32,%eax
518	jb	L025ecb_dec_one
519	movups	16(%esi),%xmm3
520	je	L026ecb_dec_two
521	movups	32(%esi),%xmm4
522	cmpl	$64,%eax
523	jb	L027ecb_dec_three
524	movups	48(%esi),%xmm5
525	je	L028ecb_dec_four
526	movups	64(%esi),%xmm6
527	xorps	%xmm7,%xmm7
528	call	__aesni_decrypt6
529	movups	%xmm2,(%edi)
530	movups	%xmm3,16(%edi)
531	movups	%xmm4,32(%edi)
532	movups	%xmm5,48(%edi)
533	movups	%xmm6,64(%edi)
534	jmp	L012ecb_ret
535.align	4,0x90
536L025ecb_dec_one:
537	movups	(%edx),%xmm0
538	movups	16(%edx),%xmm1
539	leal	32(%edx),%edx
540	xorps	%xmm0,%xmm2
541L029dec1_loop_4:
542.byte	102,15,56,222,209
543	decl	%ecx
544	movups	(%edx),%xmm1
545	leal	16(%edx),%edx
546	jnz	L029dec1_loop_4
547.byte	102,15,56,223,209
548	movups	%xmm2,(%edi)
549	jmp	L012ecb_ret
550.align	4,0x90
551L026ecb_dec_two:
552	call	__aesni_decrypt2
553	movups	%xmm2,(%edi)
554	movups	%xmm3,16(%edi)
555	jmp	L012ecb_ret
556.align	4,0x90
557L027ecb_dec_three:
558	call	__aesni_decrypt3
559	movups	%xmm2,(%edi)
560	movups	%xmm3,16(%edi)
561	movups	%xmm4,32(%edi)
562	jmp	L012ecb_ret
563.align	4,0x90
564L028ecb_dec_four:
565	call	__aesni_decrypt4
566	movups	%xmm2,(%edi)
567	movups	%xmm3,16(%edi)
568	movups	%xmm4,32(%edi)
569	movups	%xmm5,48(%edi)
570L012ecb_ret:
571	pxor	%xmm0,%xmm0
572	pxor	%xmm1,%xmm1
573	pxor	%xmm2,%xmm2
574	pxor	%xmm3,%xmm3
575	pxor	%xmm4,%xmm4
576	pxor	%xmm5,%xmm5
577	pxor	%xmm6,%xmm6
578	pxor	%xmm7,%xmm7
579	popl	%edi
580	popl	%esi
581	popl	%ebx
582	popl	%ebp
583	ret
584.globl	_aesni_ccm64_encrypt_blocks
585.private_extern	_aesni_ccm64_encrypt_blocks
586.align	4
587_aesni_ccm64_encrypt_blocks:
588L_aesni_ccm64_encrypt_blocks_begin:
589	pushl	%ebp
590	pushl	%ebx
591	pushl	%esi
592	pushl	%edi
593	movl	20(%esp),%esi
594	movl	24(%esp),%edi
595	movl	28(%esp),%eax
596	movl	32(%esp),%edx
597	movl	36(%esp),%ebx
598	movl	40(%esp),%ecx
599	movl	%esp,%ebp
600	subl	$60,%esp
601	andl	$-16,%esp
602	movl	%ebp,48(%esp)
603	movdqu	(%ebx),%xmm7
604	movdqu	(%ecx),%xmm3
605	movl	240(%edx),%ecx
606	movl	$202182159,(%esp)
607	movl	$134810123,4(%esp)
608	movl	$67438087,8(%esp)
609	movl	$66051,12(%esp)
610	movl	$1,%ebx
611	xorl	%ebp,%ebp
612	movl	%ebx,16(%esp)
613	movl	%ebp,20(%esp)
614	movl	%ebp,24(%esp)
615	movl	%ebp,28(%esp)
616	shll	$4,%ecx
617	movl	$16,%ebx
618	leal	(%edx),%ebp
619	movdqa	(%esp),%xmm5
620	movdqa	%xmm7,%xmm2
621	leal	32(%edx,%ecx,1),%edx
622	subl	%ecx,%ebx
623.byte	102,15,56,0,253
624L030ccm64_enc_outer:
625	movups	(%ebp),%xmm0
626	movl	%ebx,%ecx
627	movups	(%esi),%xmm6
628	xorps	%xmm0,%xmm2
629	movups	16(%ebp),%xmm1
630	xorps	%xmm6,%xmm0
631	xorps	%xmm0,%xmm3
632	movups	32(%ebp),%xmm0
633L031ccm64_enc2_loop:
634.byte	102,15,56,220,209
635.byte	102,15,56,220,217
636	movups	(%edx,%ecx,1),%xmm1
637	addl	$32,%ecx
638.byte	102,15,56,220,208
639.byte	102,15,56,220,216
640	movups	-16(%edx,%ecx,1),%xmm0
641	jnz	L031ccm64_enc2_loop
642.byte	102,15,56,220,209
643.byte	102,15,56,220,217
644	paddq	16(%esp),%xmm7
645	decl	%eax
646.byte	102,15,56,221,208
647.byte	102,15,56,221,216
648	leal	16(%esi),%esi
649	xorps	%xmm2,%xmm6
650	movdqa	%xmm7,%xmm2
651	movups	%xmm6,(%edi)
652.byte	102,15,56,0,213
653	leal	16(%edi),%edi
654	jnz	L030ccm64_enc_outer
655	movl	48(%esp),%esp
656	movl	40(%esp),%edi
657	movups	%xmm3,(%edi)
658	pxor	%xmm0,%xmm0
659	pxor	%xmm1,%xmm1
660	pxor	%xmm2,%xmm2
661	pxor	%xmm3,%xmm3
662	pxor	%xmm4,%xmm4
663	pxor	%xmm5,%xmm5
664	pxor	%xmm6,%xmm6
665	pxor	%xmm7,%xmm7
666	popl	%edi
667	popl	%esi
668	popl	%ebx
669	popl	%ebp
670	ret
671.globl	_aesni_ccm64_decrypt_blocks
672.private_extern	_aesni_ccm64_decrypt_blocks
673.align	4
674_aesni_ccm64_decrypt_blocks:
675L_aesni_ccm64_decrypt_blocks_begin:
676	pushl	%ebp
677	pushl	%ebx
678	pushl	%esi
679	pushl	%edi
680	movl	20(%esp),%esi
681	movl	24(%esp),%edi
682	movl	28(%esp),%eax
683	movl	32(%esp),%edx
684	movl	36(%esp),%ebx
685	movl	40(%esp),%ecx
686	movl	%esp,%ebp
687	subl	$60,%esp
688	andl	$-16,%esp
689	movl	%ebp,48(%esp)
690	movdqu	(%ebx),%xmm7
691	movdqu	(%ecx),%xmm3
692	movl	240(%edx),%ecx
693	movl	$202182159,(%esp)
694	movl	$134810123,4(%esp)
695	movl	$67438087,8(%esp)
696	movl	$66051,12(%esp)
697	movl	$1,%ebx
698	xorl	%ebp,%ebp
699	movl	%ebx,16(%esp)
700	movl	%ebp,20(%esp)
701	movl	%ebp,24(%esp)
702	movl	%ebp,28(%esp)
703	movdqa	(%esp),%xmm5
704	movdqa	%xmm7,%xmm2
705	movl	%edx,%ebp
706	movl	%ecx,%ebx
707.byte	102,15,56,0,253
708	movups	(%edx),%xmm0
709	movups	16(%edx),%xmm1
710	leal	32(%edx),%edx
711	xorps	%xmm0,%xmm2
712L032enc1_loop_5:
713.byte	102,15,56,220,209
714	decl	%ecx
715	movups	(%edx),%xmm1
716	leal	16(%edx),%edx
717	jnz	L032enc1_loop_5
718.byte	102,15,56,221,209
719	shll	$4,%ebx
720	movl	$16,%ecx
721	movups	(%esi),%xmm6
722	paddq	16(%esp),%xmm7
723	leal	16(%esi),%esi
724	subl	%ebx,%ecx
725	leal	32(%ebp,%ebx,1),%edx
726	movl	%ecx,%ebx
727	jmp	L033ccm64_dec_outer
728.align	4,0x90
729L033ccm64_dec_outer:
730	xorps	%xmm2,%xmm6
731	movdqa	%xmm7,%xmm2
732	movups	%xmm6,(%edi)
733	leal	16(%edi),%edi
734.byte	102,15,56,0,213
735	subl	$1,%eax
736	jz	L034ccm64_dec_break
737	movups	(%ebp),%xmm0
738	movl	%ebx,%ecx
739	movups	16(%ebp),%xmm1
740	xorps	%xmm0,%xmm6
741	xorps	%xmm0,%xmm2
742	xorps	%xmm6,%xmm3
743	movups	32(%ebp),%xmm0
744L035ccm64_dec2_loop:
745.byte	102,15,56,220,209
746.byte	102,15,56,220,217
747	movups	(%edx,%ecx,1),%xmm1
748	addl	$32,%ecx
749.byte	102,15,56,220,208
750.byte	102,15,56,220,216
751	movups	-16(%edx,%ecx,1),%xmm0
752	jnz	L035ccm64_dec2_loop
753	movups	(%esi),%xmm6
754	paddq	16(%esp),%xmm7
755.byte	102,15,56,220,209
756.byte	102,15,56,220,217
757.byte	102,15,56,221,208
758.byte	102,15,56,221,216
759	leal	16(%esi),%esi
760	jmp	L033ccm64_dec_outer
761.align	4,0x90
762L034ccm64_dec_break:
763	movl	240(%ebp),%ecx
764	movl	%ebp,%edx
765	movups	(%edx),%xmm0
766	movups	16(%edx),%xmm1
767	xorps	%xmm0,%xmm6
768	leal	32(%edx),%edx
769	xorps	%xmm6,%xmm3
770L036enc1_loop_6:
771.byte	102,15,56,220,217
772	decl	%ecx
773	movups	(%edx),%xmm1
774	leal	16(%edx),%edx
775	jnz	L036enc1_loop_6
776.byte	102,15,56,221,217
777	movl	48(%esp),%esp
778	movl	40(%esp),%edi
779	movups	%xmm3,(%edi)
780	pxor	%xmm0,%xmm0
781	pxor	%xmm1,%xmm1
782	pxor	%xmm2,%xmm2
783	pxor	%xmm3,%xmm3
784	pxor	%xmm4,%xmm4
785	pxor	%xmm5,%xmm5
786	pxor	%xmm6,%xmm6
787	pxor	%xmm7,%xmm7
788	popl	%edi
789	popl	%esi
790	popl	%ebx
791	popl	%ebp
792	ret
793.globl	_aesni_ctr32_encrypt_blocks
794.private_extern	_aesni_ctr32_encrypt_blocks
795.align	4
796_aesni_ctr32_encrypt_blocks:
797L_aesni_ctr32_encrypt_blocks_begin:
798	pushl	%ebp
799	pushl	%ebx
800	pushl	%esi
801	pushl	%edi
802	movl	20(%esp),%esi
803	movl	24(%esp),%edi
804	movl	28(%esp),%eax
805	movl	32(%esp),%edx
806	movl	36(%esp),%ebx
807	movl	%esp,%ebp
808	subl	$88,%esp
809	andl	$-16,%esp
810	movl	%ebp,80(%esp)
811	cmpl	$1,%eax
812	je	L037ctr32_one_shortcut
813	movdqu	(%ebx),%xmm7
814	movl	$202182159,(%esp)
815	movl	$134810123,4(%esp)
816	movl	$67438087,8(%esp)
817	movl	$66051,12(%esp)
818	movl	$6,%ecx
819	xorl	%ebp,%ebp
820	movl	%ecx,16(%esp)
821	movl	%ecx,20(%esp)
822	movl	%ecx,24(%esp)
823	movl	%ebp,28(%esp)
824.byte	102,15,58,22,251,3
825.byte	102,15,58,34,253,3
826	movl	240(%edx),%ecx
827	bswap	%ebx
828	pxor	%xmm0,%xmm0
829	pxor	%xmm1,%xmm1
830	movdqa	(%esp),%xmm2
831.byte	102,15,58,34,195,0
832	leal	3(%ebx),%ebp
833.byte	102,15,58,34,205,0
834	incl	%ebx
835.byte	102,15,58,34,195,1
836	incl	%ebp
837.byte	102,15,58,34,205,1
838	incl	%ebx
839.byte	102,15,58,34,195,2
840	incl	%ebp
841.byte	102,15,58,34,205,2
842	movdqa	%xmm0,48(%esp)
843.byte	102,15,56,0,194
844	movdqu	(%edx),%xmm6
845	movdqa	%xmm1,64(%esp)
846.byte	102,15,56,0,202
847	pshufd	$192,%xmm0,%xmm2
848	pshufd	$128,%xmm0,%xmm3
849	cmpl	$6,%eax
850	jb	L038ctr32_tail
851	pxor	%xmm6,%xmm7
852	shll	$4,%ecx
853	movl	$16,%ebx
854	movdqa	%xmm7,32(%esp)
855	movl	%edx,%ebp
856	subl	%ecx,%ebx
857	leal	32(%edx,%ecx,1),%edx
858	subl	$6,%eax
859	jmp	L039ctr32_loop6
860.align	4,0x90
861L039ctr32_loop6:
862	pshufd	$64,%xmm0,%xmm4
863	movdqa	32(%esp),%xmm0
864	pshufd	$192,%xmm1,%xmm5
865	pxor	%xmm0,%xmm2
866	pshufd	$128,%xmm1,%xmm6
867	pxor	%xmm0,%xmm3
868	pshufd	$64,%xmm1,%xmm7
869	movups	16(%ebp),%xmm1
870	pxor	%xmm0,%xmm4
871	pxor	%xmm0,%xmm5
872.byte	102,15,56,220,209
873	pxor	%xmm0,%xmm6
874	pxor	%xmm0,%xmm7
875.byte	102,15,56,220,217
876	movups	32(%ebp),%xmm0
877	movl	%ebx,%ecx
878.byte	102,15,56,220,225
879.byte	102,15,56,220,233
880.byte	102,15,56,220,241
881.byte	102,15,56,220,249
882	call	L_aesni_encrypt6_enter
883	movups	(%esi),%xmm1
884	movups	16(%esi),%xmm0
885	xorps	%xmm1,%xmm2
886	movups	32(%esi),%xmm1
887	xorps	%xmm0,%xmm3
888	movups	%xmm2,(%edi)
889	movdqa	16(%esp),%xmm0
890	xorps	%xmm1,%xmm4
891	movdqa	64(%esp),%xmm1
892	movups	%xmm3,16(%edi)
893	movups	%xmm4,32(%edi)
894	paddd	%xmm0,%xmm1
895	paddd	48(%esp),%xmm0
896	movdqa	(%esp),%xmm2
897	movups	48(%esi),%xmm3
898	movups	64(%esi),%xmm4
899	xorps	%xmm3,%xmm5
900	movups	80(%esi),%xmm3
901	leal	96(%esi),%esi
902	movdqa	%xmm0,48(%esp)
903.byte	102,15,56,0,194
904	xorps	%xmm4,%xmm6
905	movups	%xmm5,48(%edi)
906	xorps	%xmm3,%xmm7
907	movdqa	%xmm1,64(%esp)
908.byte	102,15,56,0,202
909	movups	%xmm6,64(%edi)
910	pshufd	$192,%xmm0,%xmm2
911	movups	%xmm7,80(%edi)
912	leal	96(%edi),%edi
913	pshufd	$128,%xmm0,%xmm3
914	subl	$6,%eax
915	jnc	L039ctr32_loop6
916	addl	$6,%eax
917	jz	L040ctr32_ret
918	movdqu	(%ebp),%xmm7
919	movl	%ebp,%edx
920	pxor	32(%esp),%xmm7
921	movl	240(%ebp),%ecx
922L038ctr32_tail:
923	por	%xmm7,%xmm2
924	cmpl	$2,%eax
925	jb	L041ctr32_one
926	pshufd	$64,%xmm0,%xmm4
927	por	%xmm7,%xmm3
928	je	L042ctr32_two
929	pshufd	$192,%xmm1,%xmm5
930	por	%xmm7,%xmm4
931	cmpl	$4,%eax
932	jb	L043ctr32_three
933	pshufd	$128,%xmm1,%xmm6
934	por	%xmm7,%xmm5
935	je	L044ctr32_four
936	por	%xmm7,%xmm6
937	call	__aesni_encrypt6
938	movups	(%esi),%xmm1
939	movups	16(%esi),%xmm0
940	xorps	%xmm1,%xmm2
941	movups	32(%esi),%xmm1
942	xorps	%xmm0,%xmm3
943	movups	48(%esi),%xmm0
944	xorps	%xmm1,%xmm4
945	movups	64(%esi),%xmm1
946	xorps	%xmm0,%xmm5
947	movups	%xmm2,(%edi)
948	xorps	%xmm1,%xmm6
949	movups	%xmm3,16(%edi)
950	movups	%xmm4,32(%edi)
951	movups	%xmm5,48(%edi)
952	movups	%xmm6,64(%edi)
953	jmp	L040ctr32_ret
954.align	4,0x90
955L037ctr32_one_shortcut:
956	movups	(%ebx),%xmm2
957	movl	240(%edx),%ecx
958L041ctr32_one:
959	movups	(%edx),%xmm0
960	movups	16(%edx),%xmm1
961	leal	32(%edx),%edx
962	xorps	%xmm0,%xmm2
963L045enc1_loop_7:
964.byte	102,15,56,220,209
965	decl	%ecx
966	movups	(%edx),%xmm1
967	leal	16(%edx),%edx
968	jnz	L045enc1_loop_7
969.byte	102,15,56,221,209
970	movups	(%esi),%xmm6
971	xorps	%xmm2,%xmm6
972	movups	%xmm6,(%edi)
973	jmp	L040ctr32_ret
974.align	4,0x90
975L042ctr32_two:
976	call	__aesni_encrypt2
977	movups	(%esi),%xmm5
978	movups	16(%esi),%xmm6
979	xorps	%xmm5,%xmm2
980	xorps	%xmm6,%xmm3
981	movups	%xmm2,(%edi)
982	movups	%xmm3,16(%edi)
983	jmp	L040ctr32_ret
984.align	4,0x90
985L043ctr32_three:
986	call	__aesni_encrypt3
987	movups	(%esi),%xmm5
988	movups	16(%esi),%xmm6
989	xorps	%xmm5,%xmm2
990	movups	32(%esi),%xmm7
991	xorps	%xmm6,%xmm3
992	movups	%xmm2,(%edi)
993	xorps	%xmm7,%xmm4
994	movups	%xmm3,16(%edi)
995	movups	%xmm4,32(%edi)
996	jmp	L040ctr32_ret
997.align	4,0x90
998L044ctr32_four:
999	call	__aesni_encrypt4
1000	movups	(%esi),%xmm6
1001	movups	16(%esi),%xmm7
1002	movups	32(%esi),%xmm1
1003	xorps	%xmm6,%xmm2
1004	movups	48(%esi),%xmm0
1005	xorps	%xmm7,%xmm3
1006	movups	%xmm2,(%edi)
1007	xorps	%xmm1,%xmm4
1008	movups	%xmm3,16(%edi)
1009	xorps	%xmm0,%xmm5
1010	movups	%xmm4,32(%edi)
1011	movups	%xmm5,48(%edi)
1012L040ctr32_ret:
1013	pxor	%xmm0,%xmm0
1014	pxor	%xmm1,%xmm1
1015	pxor	%xmm2,%xmm2
1016	pxor	%xmm3,%xmm3
1017	pxor	%xmm4,%xmm4
1018	movdqa	%xmm0,32(%esp)
1019	pxor	%xmm5,%xmm5
1020	movdqa	%xmm0,48(%esp)
1021	pxor	%xmm6,%xmm6
1022	movdqa	%xmm0,64(%esp)
1023	pxor	%xmm7,%xmm7
1024	movl	80(%esp),%esp
1025	popl	%edi
1026	popl	%esi
1027	popl	%ebx
1028	popl	%ebp
1029	ret
1030.globl	_aesni_xts_encrypt
1031.private_extern	_aesni_xts_encrypt
1032.align	4
1033_aesni_xts_encrypt:
1034L_aesni_xts_encrypt_begin:
1035	pushl	%ebp
1036	pushl	%ebx
1037	pushl	%esi
1038	pushl	%edi
1039	movl	36(%esp),%edx
1040	movl	40(%esp),%esi
1041	movl	240(%edx),%ecx
1042	movups	(%esi),%xmm2
1043	movups	(%edx),%xmm0
1044	movups	16(%edx),%xmm1
1045	leal	32(%edx),%edx
1046	xorps	%xmm0,%xmm2
1047L046enc1_loop_8:
1048.byte	102,15,56,220,209
1049	decl	%ecx
1050	movups	(%edx),%xmm1
1051	leal	16(%edx),%edx
1052	jnz	L046enc1_loop_8
1053.byte	102,15,56,221,209
1054	movl	20(%esp),%esi
1055	movl	24(%esp),%edi
1056	movl	28(%esp),%eax
1057	movl	32(%esp),%edx
1058	movl	%esp,%ebp
1059	subl	$120,%esp
1060	movl	240(%edx),%ecx
1061	andl	$-16,%esp
1062	movl	$135,96(%esp)
1063	movl	$0,100(%esp)
1064	movl	$1,104(%esp)
1065	movl	$0,108(%esp)
1066	movl	%eax,112(%esp)
1067	movl	%ebp,116(%esp)
1068	movdqa	%xmm2,%xmm1
1069	pxor	%xmm0,%xmm0
1070	movdqa	96(%esp),%xmm3
1071	pcmpgtd	%xmm1,%xmm0
1072	andl	$-16,%eax
1073	movl	%edx,%ebp
1074	movl	%ecx,%ebx
1075	subl	$96,%eax
1076	jc	L047xts_enc_short
1077	shll	$4,%ecx
1078	movl	$16,%ebx
1079	subl	%ecx,%ebx
1080	leal	32(%edx,%ecx,1),%edx
1081	jmp	L048xts_enc_loop6
1082.align	4,0x90
1083L048xts_enc_loop6:
1084	pshufd	$19,%xmm0,%xmm2
1085	pxor	%xmm0,%xmm0
1086	movdqa	%xmm1,(%esp)
1087	paddq	%xmm1,%xmm1
1088	pand	%xmm3,%xmm2
1089	pcmpgtd	%xmm1,%xmm0
1090	pxor	%xmm2,%xmm1
1091	pshufd	$19,%xmm0,%xmm2
1092	pxor	%xmm0,%xmm0
1093	movdqa	%xmm1,16(%esp)
1094	paddq	%xmm1,%xmm1
1095	pand	%xmm3,%xmm2
1096	pcmpgtd	%xmm1,%xmm0
1097	pxor	%xmm2,%xmm1
1098	pshufd	$19,%xmm0,%xmm2
1099	pxor	%xmm0,%xmm0
1100	movdqa	%xmm1,32(%esp)
1101	paddq	%xmm1,%xmm1
1102	pand	%xmm3,%xmm2
1103	pcmpgtd	%xmm1,%xmm0
1104	pxor	%xmm2,%xmm1
1105	pshufd	$19,%xmm0,%xmm2
1106	pxor	%xmm0,%xmm0
1107	movdqa	%xmm1,48(%esp)
1108	paddq	%xmm1,%xmm1
1109	pand	%xmm3,%xmm2
1110	pcmpgtd	%xmm1,%xmm0
1111	pxor	%xmm2,%xmm1
1112	pshufd	$19,%xmm0,%xmm7
1113	movdqa	%xmm1,64(%esp)
1114	paddq	%xmm1,%xmm1
1115	movups	(%ebp),%xmm0
1116	pand	%xmm3,%xmm7
1117	movups	(%esi),%xmm2
1118	pxor	%xmm1,%xmm7
1119	movl	%ebx,%ecx
1120	movdqu	16(%esi),%xmm3
1121	xorps	%xmm0,%xmm2
1122	movdqu	32(%esi),%xmm4
1123	pxor	%xmm0,%xmm3
1124	movdqu	48(%esi),%xmm5
1125	pxor	%xmm0,%xmm4
1126	movdqu	64(%esi),%xmm6
1127	pxor	%xmm0,%xmm5
1128	movdqu	80(%esi),%xmm1
1129	pxor	%xmm0,%xmm6
1130	leal	96(%esi),%esi
1131	pxor	(%esp),%xmm2
1132	movdqa	%xmm7,80(%esp)
1133	pxor	%xmm1,%xmm7
1134	movups	16(%ebp),%xmm1
1135	pxor	16(%esp),%xmm3
1136	pxor	32(%esp),%xmm4
1137.byte	102,15,56,220,209
1138	pxor	48(%esp),%xmm5
1139	pxor	64(%esp),%xmm6
1140.byte	102,15,56,220,217
1141	pxor	%xmm0,%xmm7
1142	movups	32(%ebp),%xmm0
1143.byte	102,15,56,220,225
1144.byte	102,15,56,220,233
1145.byte	102,15,56,220,241
1146.byte	102,15,56,220,249
1147	call	L_aesni_encrypt6_enter
1148	movdqa	80(%esp),%xmm1
1149	pxor	%xmm0,%xmm0
1150	xorps	(%esp),%xmm2
1151	pcmpgtd	%xmm1,%xmm0
1152	xorps	16(%esp),%xmm3
1153	movups	%xmm2,(%edi)
1154	xorps	32(%esp),%xmm4
1155	movups	%xmm3,16(%edi)
1156	xorps	48(%esp),%xmm5
1157	movups	%xmm4,32(%edi)
1158	xorps	64(%esp),%xmm6
1159	movups	%xmm5,48(%edi)
1160	xorps	%xmm1,%xmm7
1161	movups	%xmm6,64(%edi)
1162	pshufd	$19,%xmm0,%xmm2
1163	movups	%xmm7,80(%edi)
1164	leal	96(%edi),%edi
1165	movdqa	96(%esp),%xmm3
1166	pxor	%xmm0,%xmm0
1167	paddq	%xmm1,%xmm1
1168	pand	%xmm3,%xmm2
1169	pcmpgtd	%xmm1,%xmm0
1170	pxor	%xmm2,%xmm1
1171	subl	$96,%eax
1172	jnc	L048xts_enc_loop6
1173	movl	240(%ebp),%ecx
1174	movl	%ebp,%edx
1175	movl	%ecx,%ebx
1176L047xts_enc_short:
1177	addl	$96,%eax
1178	jz	L049xts_enc_done6x
1179	movdqa	%xmm1,%xmm5
1180	cmpl	$32,%eax
1181	jb	L050xts_enc_one
1182	pshufd	$19,%xmm0,%xmm2
1183	pxor	%xmm0,%xmm0
1184	paddq	%xmm1,%xmm1
1185	pand	%xmm3,%xmm2
1186	pcmpgtd	%xmm1,%xmm0
1187	pxor	%xmm2,%xmm1
1188	je	L051xts_enc_two
1189	pshufd	$19,%xmm0,%xmm2
1190	pxor	%xmm0,%xmm0
1191	movdqa	%xmm1,%xmm6
1192	paddq	%xmm1,%xmm1
1193	pand	%xmm3,%xmm2
1194	pcmpgtd	%xmm1,%xmm0
1195	pxor	%xmm2,%xmm1
1196	cmpl	$64,%eax
1197	jb	L052xts_enc_three
1198	pshufd	$19,%xmm0,%xmm2
1199	pxor	%xmm0,%xmm0
1200	movdqa	%xmm1,%xmm7
1201	paddq	%xmm1,%xmm1
1202	pand	%xmm3,%xmm2
1203	pcmpgtd	%xmm1,%xmm0
1204	pxor	%xmm2,%xmm1
1205	movdqa	%xmm5,(%esp)
1206	movdqa	%xmm6,16(%esp)
1207	je	L053xts_enc_four
1208	movdqa	%xmm7,32(%esp)
1209	pshufd	$19,%xmm0,%xmm7
1210	movdqa	%xmm1,48(%esp)
1211	paddq	%xmm1,%xmm1
1212	pand	%xmm3,%xmm7
1213	pxor	%xmm1,%xmm7
1214	movdqu	(%esi),%xmm2
1215	movdqu	16(%esi),%xmm3
1216	movdqu	32(%esi),%xmm4
1217	pxor	(%esp),%xmm2
1218	movdqu	48(%esi),%xmm5
1219	pxor	16(%esp),%xmm3
1220	movdqu	64(%esi),%xmm6
1221	pxor	32(%esp),%xmm4
1222	leal	80(%esi),%esi
1223	pxor	48(%esp),%xmm5
1224	movdqa	%xmm7,64(%esp)
1225	pxor	%xmm7,%xmm6
1226	call	__aesni_encrypt6
1227	movaps	64(%esp),%xmm1
1228	xorps	(%esp),%xmm2
1229	xorps	16(%esp),%xmm3
1230	xorps	32(%esp),%xmm4
1231	movups	%xmm2,(%edi)
1232	xorps	48(%esp),%xmm5
1233	movups	%xmm3,16(%edi)
1234	xorps	%xmm1,%xmm6
1235	movups	%xmm4,32(%edi)
1236	movups	%xmm5,48(%edi)
1237	movups	%xmm6,64(%edi)
1238	leal	80(%edi),%edi
1239	jmp	L054xts_enc_done
1240.align	4,0x90
1241L050xts_enc_one:
1242	movups	(%esi),%xmm2
1243	leal	16(%esi),%esi
1244	xorps	%xmm5,%xmm2
1245	movups	(%edx),%xmm0
1246	movups	16(%edx),%xmm1
1247	leal	32(%edx),%edx
1248	xorps	%xmm0,%xmm2
1249L055enc1_loop_9:
1250.byte	102,15,56,220,209
1251	decl	%ecx
1252	movups	(%edx),%xmm1
1253	leal	16(%edx),%edx
1254	jnz	L055enc1_loop_9
1255.byte	102,15,56,221,209
1256	xorps	%xmm5,%xmm2
1257	movups	%xmm2,(%edi)
1258	leal	16(%edi),%edi
1259	movdqa	%xmm5,%xmm1
1260	jmp	L054xts_enc_done
1261.align	4,0x90
1262L051xts_enc_two:
1263	movaps	%xmm1,%xmm6
1264	movups	(%esi),%xmm2
1265	movups	16(%esi),%xmm3
1266	leal	32(%esi),%esi
1267	xorps	%xmm5,%xmm2
1268	xorps	%xmm6,%xmm3
1269	call	__aesni_encrypt2
1270	xorps	%xmm5,%xmm2
1271	xorps	%xmm6,%xmm3
1272	movups	%xmm2,(%edi)
1273	movups	%xmm3,16(%edi)
1274	leal	32(%edi),%edi
1275	movdqa	%xmm6,%xmm1
1276	jmp	L054xts_enc_done
1277.align	4,0x90
1278L052xts_enc_three:
1279	movaps	%xmm1,%xmm7
1280	movups	(%esi),%xmm2
1281	movups	16(%esi),%xmm3
1282	movups	32(%esi),%xmm4
1283	leal	48(%esi),%esi
1284	xorps	%xmm5,%xmm2
1285	xorps	%xmm6,%xmm3
1286	xorps	%xmm7,%xmm4
1287	call	__aesni_encrypt3
1288	xorps	%xmm5,%xmm2
1289	xorps	%xmm6,%xmm3
1290	xorps	%xmm7,%xmm4
1291	movups	%xmm2,(%edi)
1292	movups	%xmm3,16(%edi)
1293	movups	%xmm4,32(%edi)
1294	leal	48(%edi),%edi
1295	movdqa	%xmm7,%xmm1
1296	jmp	L054xts_enc_done
1297.align	4,0x90
1298L053xts_enc_four:
1299	movaps	%xmm1,%xmm6
1300	movups	(%esi),%xmm2
1301	movups	16(%esi),%xmm3
1302	movups	32(%esi),%xmm4
1303	xorps	(%esp),%xmm2
1304	movups	48(%esi),%xmm5
1305	leal	64(%esi),%esi
1306	xorps	16(%esp),%xmm3
1307	xorps	%xmm7,%xmm4
1308	xorps	%xmm6,%xmm5
1309	call	__aesni_encrypt4
1310	xorps	(%esp),%xmm2
1311	xorps	16(%esp),%xmm3
1312	xorps	%xmm7,%xmm4
1313	movups	%xmm2,(%edi)
1314	xorps	%xmm6,%xmm5
1315	movups	%xmm3,16(%edi)
1316	movups	%xmm4,32(%edi)
1317	movups	%xmm5,48(%edi)
1318	leal	64(%edi),%edi
1319	movdqa	%xmm6,%xmm1
1320	jmp	L054xts_enc_done
1321.align	4,0x90
1322L049xts_enc_done6x:
1323	movl	112(%esp),%eax
1324	andl	$15,%eax
1325	jz	L056xts_enc_ret
1326	movdqa	%xmm1,%xmm5
1327	movl	%eax,112(%esp)
1328	jmp	L057xts_enc_steal
1329.align	4,0x90
1330L054xts_enc_done:
1331	movl	112(%esp),%eax
1332	pxor	%xmm0,%xmm0
1333	andl	$15,%eax
1334	jz	L056xts_enc_ret
1335	pcmpgtd	%xmm1,%xmm0
1336	movl	%eax,112(%esp)
1337	pshufd	$19,%xmm0,%xmm5
1338	paddq	%xmm1,%xmm1
1339	pand	96(%esp),%xmm5
1340	pxor	%xmm1,%xmm5
1341L057xts_enc_steal:
1342	movzbl	(%esi),%ecx
1343	movzbl	-16(%edi),%edx
1344	leal	1(%esi),%esi
1345	movb	%cl,-16(%edi)
1346	movb	%dl,(%edi)
1347	leal	1(%edi),%edi
1348	subl	$1,%eax
1349	jnz	L057xts_enc_steal
1350	subl	112(%esp),%edi
1351	movl	%ebp,%edx
1352	movl	%ebx,%ecx
1353	movups	-16(%edi),%xmm2
1354	xorps	%xmm5,%xmm2
1355	movups	(%edx),%xmm0
1356	movups	16(%edx),%xmm1
1357	leal	32(%edx),%edx
1358	xorps	%xmm0,%xmm2
1359L058enc1_loop_10:
1360.byte	102,15,56,220,209
1361	decl	%ecx
1362	movups	(%edx),%xmm1
1363	leal	16(%edx),%edx
1364	jnz	L058enc1_loop_10
1365.byte	102,15,56,221,209
1366	xorps	%xmm5,%xmm2
1367	movups	%xmm2,-16(%edi)
1368L056xts_enc_ret:
1369	pxor	%xmm0,%xmm0
1370	pxor	%xmm1,%xmm1
1371	pxor	%xmm2,%xmm2
1372	movdqa	%xmm0,(%esp)
1373	pxor	%xmm3,%xmm3
1374	movdqa	%xmm0,16(%esp)
1375	pxor	%xmm4,%xmm4
1376	movdqa	%xmm0,32(%esp)
1377	pxor	%xmm5,%xmm5
1378	movdqa	%xmm0,48(%esp)
1379	pxor	%xmm6,%xmm6
1380	movdqa	%xmm0,64(%esp)
1381	pxor	%xmm7,%xmm7
1382	movdqa	%xmm0,80(%esp)
1383	movl	116(%esp),%esp
1384	popl	%edi
1385	popl	%esi
1386	popl	%ebx
1387	popl	%ebp
1388	ret
1389.globl	_aesni_xts_decrypt
1390.private_extern	_aesni_xts_decrypt
1391.align	4
1392_aesni_xts_decrypt:
1393L_aesni_xts_decrypt_begin:
1394	pushl	%ebp
1395	pushl	%ebx
1396	pushl	%esi
1397	pushl	%edi
1398	movl	36(%esp),%edx
1399	movl	40(%esp),%esi
1400	movl	240(%edx),%ecx
1401	movups	(%esi),%xmm2
1402	movups	(%edx),%xmm0
1403	movups	16(%edx),%xmm1
1404	leal	32(%edx),%edx
1405	xorps	%xmm0,%xmm2
1406L059enc1_loop_11:
1407.byte	102,15,56,220,209
1408	decl	%ecx
1409	movups	(%edx),%xmm1
1410	leal	16(%edx),%edx
1411	jnz	L059enc1_loop_11
1412.byte	102,15,56,221,209
1413	movl	20(%esp),%esi
1414	movl	24(%esp),%edi
1415	movl	28(%esp),%eax
1416	movl	32(%esp),%edx
1417	movl	%esp,%ebp
1418	subl	$120,%esp
1419	andl	$-16,%esp
1420	xorl	%ebx,%ebx
1421	testl	$15,%eax
1422	setnz	%bl
1423	shll	$4,%ebx
1424	subl	%ebx,%eax
1425	movl	$135,96(%esp)
1426	movl	$0,100(%esp)
1427	movl	$1,104(%esp)
1428	movl	$0,108(%esp)
1429	movl	%eax,112(%esp)
1430	movl	%ebp,116(%esp)
1431	movl	240(%edx),%ecx
1432	movl	%edx,%ebp
1433	movl	%ecx,%ebx
1434	movdqa	%xmm2,%xmm1
1435	pxor	%xmm0,%xmm0
1436	movdqa	96(%esp),%xmm3
1437	pcmpgtd	%xmm1,%xmm0
1438	andl	$-16,%eax
1439	subl	$96,%eax
1440	jc	L060xts_dec_short
1441	shll	$4,%ecx
1442	movl	$16,%ebx
1443	subl	%ecx,%ebx
1444	leal	32(%edx,%ecx,1),%edx
1445	jmp	L061xts_dec_loop6
1446.align	4,0x90
1447L061xts_dec_loop6:
1448	pshufd	$19,%xmm0,%xmm2
1449	pxor	%xmm0,%xmm0
1450	movdqa	%xmm1,(%esp)
1451	paddq	%xmm1,%xmm1
1452	pand	%xmm3,%xmm2
1453	pcmpgtd	%xmm1,%xmm0
1454	pxor	%xmm2,%xmm1
1455	pshufd	$19,%xmm0,%xmm2
1456	pxor	%xmm0,%xmm0
1457	movdqa	%xmm1,16(%esp)
1458	paddq	%xmm1,%xmm1
1459	pand	%xmm3,%xmm2
1460	pcmpgtd	%xmm1,%xmm0
1461	pxor	%xmm2,%xmm1
1462	pshufd	$19,%xmm0,%xmm2
1463	pxor	%xmm0,%xmm0
1464	movdqa	%xmm1,32(%esp)
1465	paddq	%xmm1,%xmm1
1466	pand	%xmm3,%xmm2
1467	pcmpgtd	%xmm1,%xmm0
1468	pxor	%xmm2,%xmm1
1469	pshufd	$19,%xmm0,%xmm2
1470	pxor	%xmm0,%xmm0
1471	movdqa	%xmm1,48(%esp)
1472	paddq	%xmm1,%xmm1
1473	pand	%xmm3,%xmm2
1474	pcmpgtd	%xmm1,%xmm0
1475	pxor	%xmm2,%xmm1
1476	pshufd	$19,%xmm0,%xmm7
1477	movdqa	%xmm1,64(%esp)
1478	paddq	%xmm1,%xmm1
1479	movups	(%ebp),%xmm0
1480	pand	%xmm3,%xmm7
1481	movups	(%esi),%xmm2
1482	pxor	%xmm1,%xmm7
1483	movl	%ebx,%ecx
1484	movdqu	16(%esi),%xmm3
1485	xorps	%xmm0,%xmm2
1486	movdqu	32(%esi),%xmm4
1487	pxor	%xmm0,%xmm3
1488	movdqu	48(%esi),%xmm5
1489	pxor	%xmm0,%xmm4
1490	movdqu	64(%esi),%xmm6
1491	pxor	%xmm0,%xmm5
1492	movdqu	80(%esi),%xmm1
1493	pxor	%xmm0,%xmm6
1494	leal	96(%esi),%esi
1495	pxor	(%esp),%xmm2
1496	movdqa	%xmm7,80(%esp)
1497	pxor	%xmm1,%xmm7
1498	movups	16(%ebp),%xmm1
1499	pxor	16(%esp),%xmm3
1500	pxor	32(%esp),%xmm4
1501.byte	102,15,56,222,209
1502	pxor	48(%esp),%xmm5
1503	pxor	64(%esp),%xmm6
1504.byte	102,15,56,222,217
1505	pxor	%xmm0,%xmm7
1506	movups	32(%ebp),%xmm0
1507.byte	102,15,56,222,225
1508.byte	102,15,56,222,233
1509.byte	102,15,56,222,241
1510.byte	102,15,56,222,249
1511	call	L_aesni_decrypt6_enter
1512	movdqa	80(%esp),%xmm1
1513	pxor	%xmm0,%xmm0
1514	xorps	(%esp),%xmm2
1515	pcmpgtd	%xmm1,%xmm0
1516	xorps	16(%esp),%xmm3
1517	movups	%xmm2,(%edi)
1518	xorps	32(%esp),%xmm4
1519	movups	%xmm3,16(%edi)
1520	xorps	48(%esp),%xmm5
1521	movups	%xmm4,32(%edi)
1522	xorps	64(%esp),%xmm6
1523	movups	%xmm5,48(%edi)
1524	xorps	%xmm1,%xmm7
1525	movups	%xmm6,64(%edi)
1526	pshufd	$19,%xmm0,%xmm2
1527	movups	%xmm7,80(%edi)
1528	leal	96(%edi),%edi
1529	movdqa	96(%esp),%xmm3
1530	pxor	%xmm0,%xmm0
1531	paddq	%xmm1,%xmm1
1532	pand	%xmm3,%xmm2
1533	pcmpgtd	%xmm1,%xmm0
1534	pxor	%xmm2,%xmm1
1535	subl	$96,%eax
1536	jnc	L061xts_dec_loop6
1537	movl	240(%ebp),%ecx
1538	movl	%ebp,%edx
1539	movl	%ecx,%ebx
1540L060xts_dec_short:
1541	addl	$96,%eax
1542	jz	L062xts_dec_done6x
1543	movdqa	%xmm1,%xmm5
1544	cmpl	$32,%eax
1545	jb	L063xts_dec_one
1546	pshufd	$19,%xmm0,%xmm2
1547	pxor	%xmm0,%xmm0
1548	paddq	%xmm1,%xmm1
1549	pand	%xmm3,%xmm2
1550	pcmpgtd	%xmm1,%xmm0
1551	pxor	%xmm2,%xmm1
1552	je	L064xts_dec_two
1553	pshufd	$19,%xmm0,%xmm2
1554	pxor	%xmm0,%xmm0
1555	movdqa	%xmm1,%xmm6
1556	paddq	%xmm1,%xmm1
1557	pand	%xmm3,%xmm2
1558	pcmpgtd	%xmm1,%xmm0
1559	pxor	%xmm2,%xmm1
1560	cmpl	$64,%eax
1561	jb	L065xts_dec_three
1562	pshufd	$19,%xmm0,%xmm2
1563	pxor	%xmm0,%xmm0
1564	movdqa	%xmm1,%xmm7
1565	paddq	%xmm1,%xmm1
1566	pand	%xmm3,%xmm2
1567	pcmpgtd	%xmm1,%xmm0
1568	pxor	%xmm2,%xmm1
1569	movdqa	%xmm5,(%esp)
1570	movdqa	%xmm6,16(%esp)
1571	je	L066xts_dec_four
1572	movdqa	%xmm7,32(%esp)
1573	pshufd	$19,%xmm0,%xmm7
1574	movdqa	%xmm1,48(%esp)
1575	paddq	%xmm1,%xmm1
1576	pand	%xmm3,%xmm7
1577	pxor	%xmm1,%xmm7
1578	movdqu	(%esi),%xmm2
1579	movdqu	16(%esi),%xmm3
1580	movdqu	32(%esi),%xmm4
1581	pxor	(%esp),%xmm2
1582	movdqu	48(%esi),%xmm5
1583	pxor	16(%esp),%xmm3
1584	movdqu	64(%esi),%xmm6
1585	pxor	32(%esp),%xmm4
1586	leal	80(%esi),%esi
1587	pxor	48(%esp),%xmm5
1588	movdqa	%xmm7,64(%esp)
1589	pxor	%xmm7,%xmm6
1590	call	__aesni_decrypt6
1591	movaps	64(%esp),%xmm1
1592	xorps	(%esp),%xmm2
1593	xorps	16(%esp),%xmm3
1594	xorps	32(%esp),%xmm4
1595	movups	%xmm2,(%edi)
1596	xorps	48(%esp),%xmm5
1597	movups	%xmm3,16(%edi)
1598	xorps	%xmm1,%xmm6
1599	movups	%xmm4,32(%edi)
1600	movups	%xmm5,48(%edi)
1601	movups	%xmm6,64(%edi)
1602	leal	80(%edi),%edi
1603	jmp	L067xts_dec_done
1604.align	4,0x90
1605L063xts_dec_one:
1606	movups	(%esi),%xmm2
1607	leal	16(%esi),%esi
1608	xorps	%xmm5,%xmm2
1609	movups	(%edx),%xmm0
1610	movups	16(%edx),%xmm1
1611	leal	32(%edx),%edx
1612	xorps	%xmm0,%xmm2
1613L068dec1_loop_12:
1614.byte	102,15,56,222,209
1615	decl	%ecx
1616	movups	(%edx),%xmm1
1617	leal	16(%edx),%edx
1618	jnz	L068dec1_loop_12
1619.byte	102,15,56,223,209
1620	xorps	%xmm5,%xmm2
1621	movups	%xmm2,(%edi)
1622	leal	16(%edi),%edi
1623	movdqa	%xmm5,%xmm1
1624	jmp	L067xts_dec_done
1625.align	4,0x90
1626L064xts_dec_two:
1627	movaps	%xmm1,%xmm6
1628	movups	(%esi),%xmm2
1629	movups	16(%esi),%xmm3
1630	leal	32(%esi),%esi
1631	xorps	%xmm5,%xmm2
1632	xorps	%xmm6,%xmm3
1633	call	__aesni_decrypt2
1634	xorps	%xmm5,%xmm2
1635	xorps	%xmm6,%xmm3
1636	movups	%xmm2,(%edi)
1637	movups	%xmm3,16(%edi)
1638	leal	32(%edi),%edi
1639	movdqa	%xmm6,%xmm1
1640	jmp	L067xts_dec_done
1641.align	4,0x90
1642L065xts_dec_three:
1643	movaps	%xmm1,%xmm7
1644	movups	(%esi),%xmm2
1645	movups	16(%esi),%xmm3
1646	movups	32(%esi),%xmm4
1647	leal	48(%esi),%esi
1648	xorps	%xmm5,%xmm2
1649	xorps	%xmm6,%xmm3
1650	xorps	%xmm7,%xmm4
1651	call	__aesni_decrypt3
1652	xorps	%xmm5,%xmm2
1653	xorps	%xmm6,%xmm3
1654	xorps	%xmm7,%xmm4
1655	movups	%xmm2,(%edi)
1656	movups	%xmm3,16(%edi)
1657	movups	%xmm4,32(%edi)
1658	leal	48(%edi),%edi
1659	movdqa	%xmm7,%xmm1
1660	jmp	L067xts_dec_done
1661.align	4,0x90
1662L066xts_dec_four:
1663	movaps	%xmm1,%xmm6
1664	movups	(%esi),%xmm2
1665	movups	16(%esi),%xmm3
1666	movups	32(%esi),%xmm4
1667	xorps	(%esp),%xmm2
1668	movups	48(%esi),%xmm5
1669	leal	64(%esi),%esi
1670	xorps	16(%esp),%xmm3
1671	xorps	%xmm7,%xmm4
1672	xorps	%xmm6,%xmm5
1673	call	__aesni_decrypt4
1674	xorps	(%esp),%xmm2
1675	xorps	16(%esp),%xmm3
1676	xorps	%xmm7,%xmm4
1677	movups	%xmm2,(%edi)
1678	xorps	%xmm6,%xmm5
1679	movups	%xmm3,16(%edi)
1680	movups	%xmm4,32(%edi)
1681	movups	%xmm5,48(%edi)
1682	leal	64(%edi),%edi
1683	movdqa	%xmm6,%xmm1
1684	jmp	L067xts_dec_done
1685.align	4,0x90
1686L062xts_dec_done6x:
1687	movl	112(%esp),%eax
1688	andl	$15,%eax
1689	jz	L069xts_dec_ret
1690	movl	%eax,112(%esp)
1691	jmp	L070xts_dec_only_one_more
1692.align	4,0x90
1693L067xts_dec_done:
1694	movl	112(%esp),%eax
1695	pxor	%xmm0,%xmm0
1696	andl	$15,%eax
1697	jz	L069xts_dec_ret
1698	pcmpgtd	%xmm1,%xmm0
1699	movl	%eax,112(%esp)
1700	pshufd	$19,%xmm0,%xmm2
1701	pxor	%xmm0,%xmm0
1702	movdqa	96(%esp),%xmm3
1703	paddq	%xmm1,%xmm1
1704	pand	%xmm3,%xmm2
1705	pcmpgtd	%xmm1,%xmm0
1706	pxor	%xmm2,%xmm1
1707L070xts_dec_only_one_more:
1708	pshufd	$19,%xmm0,%xmm5
1709	movdqa	%xmm1,%xmm6
1710	paddq	%xmm1,%xmm1
1711	pand	%xmm3,%xmm5
1712	pxor	%xmm1,%xmm5
1713	movl	%ebp,%edx
1714	movl	%ebx,%ecx
1715	movups	(%esi),%xmm2
1716	xorps	%xmm5,%xmm2
1717	movups	(%edx),%xmm0
1718	movups	16(%edx),%xmm1
1719	leal	32(%edx),%edx
1720	xorps	%xmm0,%xmm2
1721L071dec1_loop_13:
1722.byte	102,15,56,222,209
1723	decl	%ecx
1724	movups	(%edx),%xmm1
1725	leal	16(%edx),%edx
1726	jnz	L071dec1_loop_13
1727.byte	102,15,56,223,209
1728	xorps	%xmm5,%xmm2
1729	movups	%xmm2,(%edi)
1730L072xts_dec_steal:
1731	movzbl	16(%esi),%ecx
1732	movzbl	(%edi),%edx
1733	leal	1(%esi),%esi
1734	movb	%cl,(%edi)
1735	movb	%dl,16(%edi)
1736	leal	1(%edi),%edi
1737	subl	$1,%eax
1738	jnz	L072xts_dec_steal
1739	subl	112(%esp),%edi
1740	movl	%ebp,%edx
1741	movl	%ebx,%ecx
1742	movups	(%edi),%xmm2
1743	xorps	%xmm6,%xmm2
1744	movups	(%edx),%xmm0
1745	movups	16(%edx),%xmm1
1746	leal	32(%edx),%edx
1747	xorps	%xmm0,%xmm2
1748L073dec1_loop_14:
1749.byte	102,15,56,222,209
1750	decl	%ecx
1751	movups	(%edx),%xmm1
1752	leal	16(%edx),%edx
1753	jnz	L073dec1_loop_14
1754.byte	102,15,56,223,209
1755	xorps	%xmm6,%xmm2
1756	movups	%xmm2,(%edi)
1757L069xts_dec_ret:
1758	pxor	%xmm0,%xmm0
1759	pxor	%xmm1,%xmm1
1760	pxor	%xmm2,%xmm2
1761	movdqa	%xmm0,(%esp)
1762	pxor	%xmm3,%xmm3
1763	movdqa	%xmm0,16(%esp)
1764	pxor	%xmm4,%xmm4
1765	movdqa	%xmm0,32(%esp)
1766	pxor	%xmm5,%xmm5
1767	movdqa	%xmm0,48(%esp)
1768	pxor	%xmm6,%xmm6
1769	movdqa	%xmm0,64(%esp)
1770	pxor	%xmm7,%xmm7
1771	movdqa	%xmm0,80(%esp)
1772	movl	116(%esp),%esp
1773	popl	%edi
1774	popl	%esi
1775	popl	%ebx
1776	popl	%ebp
1777	ret
1778.globl	_aesni_cbc_encrypt
1779.private_extern	_aesni_cbc_encrypt
1780.align	4
1781_aesni_cbc_encrypt:
1782L_aesni_cbc_encrypt_begin:
1783	pushl	%ebp
1784	pushl	%ebx
1785	pushl	%esi
1786	pushl	%edi
1787	movl	20(%esp),%esi
1788	movl	%esp,%ebx
1789	movl	24(%esp),%edi
1790	subl	$24,%ebx
1791	movl	28(%esp),%eax
1792	andl	$-16,%ebx
1793	movl	32(%esp),%edx
1794	movl	36(%esp),%ebp
1795	testl	%eax,%eax
1796	jz	L074cbc_abort
1797	cmpl	$0,40(%esp)
1798	xchgl	%esp,%ebx
1799	movups	(%ebp),%xmm7
1800	movl	240(%edx),%ecx
1801	movl	%edx,%ebp
1802	movl	%ebx,16(%esp)
1803	movl	%ecx,%ebx
1804	je	L075cbc_decrypt
1805	movaps	%xmm7,%xmm2
1806	cmpl	$16,%eax
1807	jb	L076cbc_enc_tail
1808	subl	$16,%eax
1809	jmp	L077cbc_enc_loop
1810.align	4,0x90
1811L077cbc_enc_loop:
1812	movups	(%esi),%xmm7
1813	leal	16(%esi),%esi
1814	movups	(%edx),%xmm0
1815	movups	16(%edx),%xmm1
1816	xorps	%xmm0,%xmm7
1817	leal	32(%edx),%edx
1818	xorps	%xmm7,%xmm2
1819L078enc1_loop_15:
1820.byte	102,15,56,220,209
1821	decl	%ecx
1822	movups	(%edx),%xmm1
1823	leal	16(%edx),%edx
1824	jnz	L078enc1_loop_15
1825.byte	102,15,56,221,209
1826	movl	%ebx,%ecx
1827	movl	%ebp,%edx
1828	movups	%xmm2,(%edi)
1829	leal	16(%edi),%edi
1830	subl	$16,%eax
1831	jnc	L077cbc_enc_loop
1832	addl	$16,%eax
1833	jnz	L076cbc_enc_tail
1834	movaps	%xmm2,%xmm7
1835	pxor	%xmm2,%xmm2
1836	jmp	L079cbc_ret
1837L076cbc_enc_tail:
1838	movl	%eax,%ecx
1839.long	2767451785
1840	movl	$16,%ecx
1841	subl	%eax,%ecx
1842	xorl	%eax,%eax
1843.long	2868115081
1844	leal	-16(%edi),%edi
1845	movl	%ebx,%ecx
1846	movl	%edi,%esi
1847	movl	%ebp,%edx
1848	jmp	L077cbc_enc_loop
1849.align	4,0x90
1850L075cbc_decrypt:
1851	cmpl	$80,%eax
1852	jbe	L080cbc_dec_tail
1853	movaps	%xmm7,(%esp)
1854	subl	$80,%eax
1855	jmp	L081cbc_dec_loop6_enter
1856.align	4,0x90
1857L082cbc_dec_loop6:
1858	movaps	%xmm0,(%esp)
1859	movups	%xmm7,(%edi)
1860	leal	16(%edi),%edi
1861L081cbc_dec_loop6_enter:
1862	movdqu	(%esi),%xmm2
1863	movdqu	16(%esi),%xmm3
1864	movdqu	32(%esi),%xmm4
1865	movdqu	48(%esi),%xmm5
1866	movdqu	64(%esi),%xmm6
1867	movdqu	80(%esi),%xmm7
1868	call	__aesni_decrypt6
1869	movups	(%esi),%xmm1
1870	movups	16(%esi),%xmm0
1871	xorps	(%esp),%xmm2
1872	xorps	%xmm1,%xmm3
1873	movups	32(%esi),%xmm1
1874	xorps	%xmm0,%xmm4
1875	movups	48(%esi),%xmm0
1876	xorps	%xmm1,%xmm5
1877	movups	64(%esi),%xmm1
1878	xorps	%xmm0,%xmm6
1879	movups	80(%esi),%xmm0
1880	xorps	%xmm1,%xmm7
1881	movups	%xmm2,(%edi)
1882	movups	%xmm3,16(%edi)
1883	leal	96(%esi),%esi
1884	movups	%xmm4,32(%edi)
1885	movl	%ebx,%ecx
1886	movups	%xmm5,48(%edi)
1887	movl	%ebp,%edx
1888	movups	%xmm6,64(%edi)
1889	leal	80(%edi),%edi
1890	subl	$96,%eax
1891	ja	L082cbc_dec_loop6
1892	movaps	%xmm7,%xmm2
1893	movaps	%xmm0,%xmm7
1894	addl	$80,%eax
1895	jle	L083cbc_dec_clear_tail_collected
1896	movups	%xmm2,(%edi)
1897	leal	16(%edi),%edi
1898L080cbc_dec_tail:
1899	movups	(%esi),%xmm2
1900	movaps	%xmm2,%xmm6
1901	cmpl	$16,%eax
1902	jbe	L084cbc_dec_one
1903	movups	16(%esi),%xmm3
1904	movaps	%xmm3,%xmm5
1905	cmpl	$32,%eax
1906	jbe	L085cbc_dec_two
1907	movups	32(%esi),%xmm4
1908	cmpl	$48,%eax
1909	jbe	L086cbc_dec_three
1910	movups	48(%esi),%xmm5
1911	cmpl	$64,%eax
1912	jbe	L087cbc_dec_four
1913	movups	64(%esi),%xmm6
1914	movaps	%xmm7,(%esp)
1915	movups	(%esi),%xmm2
1916	xorps	%xmm7,%xmm7
1917	call	__aesni_decrypt6
1918	movups	(%esi),%xmm1
1919	movups	16(%esi),%xmm0
1920	xorps	(%esp),%xmm2
1921	xorps	%xmm1,%xmm3
1922	movups	32(%esi),%xmm1
1923	xorps	%xmm0,%xmm4
1924	movups	48(%esi),%xmm0
1925	xorps	%xmm1,%xmm5
1926	movups	64(%esi),%xmm7
1927	xorps	%xmm0,%xmm6
1928	movups	%xmm2,(%edi)
1929	movups	%xmm3,16(%edi)
1930	pxor	%xmm3,%xmm3
1931	movups	%xmm4,32(%edi)
1932	pxor	%xmm4,%xmm4
1933	movups	%xmm5,48(%edi)
1934	pxor	%xmm5,%xmm5
1935	leal	64(%edi),%edi
1936	movaps	%xmm6,%xmm2
1937	pxor	%xmm6,%xmm6
1938	subl	$80,%eax
1939	jmp	L088cbc_dec_tail_collected
1940.align	4,0x90
1941L084cbc_dec_one:
1942	movups	(%edx),%xmm0
1943	movups	16(%edx),%xmm1
1944	leal	32(%edx),%edx
1945	xorps	%xmm0,%xmm2
1946L089dec1_loop_16:
1947.byte	102,15,56,222,209
1948	decl	%ecx
1949	movups	(%edx),%xmm1
1950	leal	16(%edx),%edx
1951	jnz	L089dec1_loop_16
1952.byte	102,15,56,223,209
1953	xorps	%xmm7,%xmm2
1954	movaps	%xmm6,%xmm7
1955	subl	$16,%eax
1956	jmp	L088cbc_dec_tail_collected
1957.align	4,0x90
1958L085cbc_dec_two:
1959	call	__aesni_decrypt2
1960	xorps	%xmm7,%xmm2
1961	xorps	%xmm6,%xmm3
1962	movups	%xmm2,(%edi)
1963	movaps	%xmm3,%xmm2
1964	pxor	%xmm3,%xmm3
1965	leal	16(%edi),%edi
1966	movaps	%xmm5,%xmm7
1967	subl	$32,%eax
1968	jmp	L088cbc_dec_tail_collected
1969.align	4,0x90
1970L086cbc_dec_three:
1971	call	__aesni_decrypt3
1972	xorps	%xmm7,%xmm2
1973	xorps	%xmm6,%xmm3
1974	xorps	%xmm5,%xmm4
1975	movups	%xmm2,(%edi)
1976	movaps	%xmm4,%xmm2
1977	pxor	%xmm4,%xmm4
1978	movups	%xmm3,16(%edi)
1979	pxor	%xmm3,%xmm3
1980	leal	32(%edi),%edi
1981	movups	32(%esi),%xmm7
1982	subl	$48,%eax
1983	jmp	L088cbc_dec_tail_collected
1984.align	4,0x90
1985L087cbc_dec_four:
1986	call	__aesni_decrypt4
1987	movups	16(%esi),%xmm1
1988	movups	32(%esi),%xmm0
1989	xorps	%xmm7,%xmm2
1990	movups	48(%esi),%xmm7
1991	xorps	%xmm6,%xmm3
1992	movups	%xmm2,(%edi)
1993	xorps	%xmm1,%xmm4
1994	movups	%xmm3,16(%edi)
1995	pxor	%xmm3,%xmm3
1996	xorps	%xmm0,%xmm5
1997	movups	%xmm4,32(%edi)
1998	pxor	%xmm4,%xmm4
1999	leal	48(%edi),%edi
2000	movaps	%xmm5,%xmm2
2001	pxor	%xmm5,%xmm5
2002	subl	$64,%eax
2003	jmp	L088cbc_dec_tail_collected
2004.align	4,0x90
2005L083cbc_dec_clear_tail_collected:
2006	pxor	%xmm3,%xmm3
2007	pxor	%xmm4,%xmm4
2008	pxor	%xmm5,%xmm5
2009	pxor	%xmm6,%xmm6
2010L088cbc_dec_tail_collected:
2011	andl	$15,%eax
2012	jnz	L090cbc_dec_tail_partial
2013	movups	%xmm2,(%edi)
2014	pxor	%xmm0,%xmm0
2015	jmp	L079cbc_ret
2016.align	4,0x90
2017L090cbc_dec_tail_partial:
2018	movaps	%xmm2,(%esp)
2019	pxor	%xmm0,%xmm0
2020	movl	$16,%ecx
2021	movl	%esp,%esi
2022	subl	%eax,%ecx
2023.long	2767451785
2024	movdqa	%xmm2,(%esp)
2025L079cbc_ret:
2026	movl	16(%esp),%esp
2027	movl	36(%esp),%ebp
2028	pxor	%xmm2,%xmm2
2029	pxor	%xmm1,%xmm1
2030	movups	%xmm7,(%ebp)
2031	pxor	%xmm7,%xmm7
2032L074cbc_abort:
2033	popl	%edi
2034	popl	%esi
2035	popl	%ebx
2036	popl	%ebp
2037	ret
2038.private_extern	__aesni_set_encrypt_key
2039.align	4
2040__aesni_set_encrypt_key:
2041	pushl	%ebp
2042	pushl	%ebx
2043	testl	%eax,%eax
2044	jz	L091bad_pointer
2045	testl	%edx,%edx
2046	jz	L091bad_pointer
2047	call	L092pic
2048L092pic:
2049	popl	%ebx
2050	leal	Lkey_const-L092pic(%ebx),%ebx
2051	movl	L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp
2052	movups	(%eax),%xmm0
2053	xorps	%xmm4,%xmm4
2054	movl	4(%ebp),%ebp
2055	leal	16(%edx),%edx
2056	andl	$268437504,%ebp
2057	cmpl	$256,%ecx
2058	je	L09314rounds
2059	cmpl	$192,%ecx
2060	je	L09412rounds
2061	cmpl	$128,%ecx
2062	jne	L095bad_keybits
2063.align	4,0x90
2064L09610rounds:
2065	cmpl	$268435456,%ebp
2066	je	L09710rounds_alt
2067	movl	$9,%ecx
2068	movups	%xmm0,-16(%edx)
2069.byte	102,15,58,223,200,1
2070	call	L098key_128_cold
2071.byte	102,15,58,223,200,2
2072	call	L099key_128
2073.byte	102,15,58,223,200,4
2074	call	L099key_128
2075.byte	102,15,58,223,200,8
2076	call	L099key_128
2077.byte	102,15,58,223,200,16
2078	call	L099key_128
2079.byte	102,15,58,223,200,32
2080	call	L099key_128
2081.byte	102,15,58,223,200,64
2082	call	L099key_128
2083.byte	102,15,58,223,200,128
2084	call	L099key_128
2085.byte	102,15,58,223,200,27
2086	call	L099key_128
2087.byte	102,15,58,223,200,54
2088	call	L099key_128
2089	movups	%xmm0,(%edx)
2090	movl	%ecx,80(%edx)
2091	jmp	L100good_key
2092.align	4,0x90
2093L099key_128:
2094	movups	%xmm0,(%edx)
2095	leal	16(%edx),%edx
2096L098key_128_cold:
2097	shufps	$16,%xmm0,%xmm4
2098	xorps	%xmm4,%xmm0
2099	shufps	$140,%xmm0,%xmm4
2100	xorps	%xmm4,%xmm0
2101	shufps	$255,%xmm1,%xmm1
2102	xorps	%xmm1,%xmm0
2103	ret
2104.align	4,0x90
2105L09710rounds_alt:
2106	movdqa	(%ebx),%xmm5
2107	movl	$8,%ecx
2108	movdqa	32(%ebx),%xmm4
2109	movdqa	%xmm0,%xmm2
2110	movdqu	%xmm0,-16(%edx)
2111L101loop_key128:
2112.byte	102,15,56,0,197
2113.byte	102,15,56,221,196
2114	pslld	$1,%xmm4
2115	leal	16(%edx),%edx
2116	movdqa	%xmm2,%xmm3
2117	pslldq	$4,%xmm2
2118	pxor	%xmm2,%xmm3
2119	pslldq	$4,%xmm2
2120	pxor	%xmm2,%xmm3
2121	pslldq	$4,%xmm2
2122	pxor	%xmm3,%xmm2
2123	pxor	%xmm2,%xmm0
2124	movdqu	%xmm0,-16(%edx)
2125	movdqa	%xmm0,%xmm2
2126	decl	%ecx
2127	jnz	L101loop_key128
2128	movdqa	48(%ebx),%xmm4
2129.byte	102,15,56,0,197
2130.byte	102,15,56,221,196
2131	pslld	$1,%xmm4
2132	movdqa	%xmm2,%xmm3
2133	pslldq	$4,%xmm2
2134	pxor	%xmm2,%xmm3
2135	pslldq	$4,%xmm2
2136	pxor	%xmm2,%xmm3
2137	pslldq	$4,%xmm2
2138	pxor	%xmm3,%xmm2
2139	pxor	%xmm2,%xmm0
2140	movdqu	%xmm0,(%edx)
2141	movdqa	%xmm0,%xmm2
2142.byte	102,15,56,0,197
2143.byte	102,15,56,221,196
2144	movdqa	%xmm2,%xmm3
2145	pslldq	$4,%xmm2
2146	pxor	%xmm2,%xmm3
2147	pslldq	$4,%xmm2
2148	pxor	%xmm2,%xmm3
2149	pslldq	$4,%xmm2
2150	pxor	%xmm3,%xmm2
2151	pxor	%xmm2,%xmm0
2152	movdqu	%xmm0,16(%edx)
2153	movl	$9,%ecx
2154	movl	%ecx,96(%edx)
2155	jmp	L100good_key
2156.align	4,0x90
2157L09412rounds:
2158	movq	16(%eax),%xmm2
2159	cmpl	$268435456,%ebp
2160	je	L10212rounds_alt
2161	movl	$11,%ecx
2162	movups	%xmm0,-16(%edx)
2163.byte	102,15,58,223,202,1
2164	call	L103key_192a_cold
2165.byte	102,15,58,223,202,2
2166	call	L104key_192b
2167.byte	102,15,58,223,202,4
2168	call	L105key_192a
2169.byte	102,15,58,223,202,8
2170	call	L104key_192b
2171.byte	102,15,58,223,202,16
2172	call	L105key_192a
2173.byte	102,15,58,223,202,32
2174	call	L104key_192b
2175.byte	102,15,58,223,202,64
2176	call	L105key_192a
2177.byte	102,15,58,223,202,128
2178	call	L104key_192b
2179	movups	%xmm0,(%edx)
2180	movl	%ecx,48(%edx)
2181	jmp	L100good_key
2182.align	4,0x90
2183L105key_192a:
2184	movups	%xmm0,(%edx)
2185	leal	16(%edx),%edx
2186.align	4,0x90
2187L103key_192a_cold:
2188	movaps	%xmm2,%xmm5
2189L106key_192b_warm:
2190	shufps	$16,%xmm0,%xmm4
2191	movdqa	%xmm2,%xmm3
2192	xorps	%xmm4,%xmm0
2193	shufps	$140,%xmm0,%xmm4
2194	pslldq	$4,%xmm3
2195	xorps	%xmm4,%xmm0
2196	pshufd	$85,%xmm1,%xmm1
2197	pxor	%xmm3,%xmm2
2198	pxor	%xmm1,%xmm0
2199	pshufd	$255,%xmm0,%xmm3
2200	pxor	%xmm3,%xmm2
2201	ret
2202.align	4,0x90
2203L104key_192b:
2204	movaps	%xmm0,%xmm3
2205	shufps	$68,%xmm0,%xmm5
2206	movups	%xmm5,(%edx)
2207	shufps	$78,%xmm2,%xmm3
2208	movups	%xmm3,16(%edx)
2209	leal	32(%edx),%edx
2210	jmp	L106key_192b_warm
2211.align	4,0x90
2212L10212rounds_alt:
2213	movdqa	16(%ebx),%xmm5
2214	movdqa	32(%ebx),%xmm4
2215	movl	$8,%ecx
2216	movdqu	%xmm0,-16(%edx)
2217L107loop_key192:
2218	movq	%xmm2,(%edx)
2219	movdqa	%xmm2,%xmm1
2220.byte	102,15,56,0,213
2221.byte	102,15,56,221,212
2222	pslld	$1,%xmm4
2223	leal	24(%edx),%edx
2224	movdqa	%xmm0,%xmm3
2225	pslldq	$4,%xmm0
2226	pxor	%xmm0,%xmm3
2227	pslldq	$4,%xmm0
2228	pxor	%xmm0,%xmm3
2229	pslldq	$4,%xmm0
2230	pxor	%xmm3,%xmm0
2231	pshufd	$255,%xmm0,%xmm3
2232	pxor	%xmm1,%xmm3
2233	pslldq	$4,%xmm1
2234	pxor	%xmm1,%xmm3
2235	pxor	%xmm2,%xmm0
2236	pxor	%xmm3,%xmm2
2237	movdqu	%xmm0,-16(%edx)
2238	decl	%ecx
2239	jnz	L107loop_key192
2240	movl	$11,%ecx
2241	movl	%ecx,32(%edx)
2242	jmp	L100good_key
2243.align	4,0x90
2244L09314rounds:
2245	movups	16(%eax),%xmm2
2246	leal	16(%edx),%edx
2247	cmpl	$268435456,%ebp
2248	je	L10814rounds_alt
2249	movl	$13,%ecx
2250	movups	%xmm0,-32(%edx)
2251	movups	%xmm2,-16(%edx)
2252.byte	102,15,58,223,202,1
2253	call	L109key_256a_cold
2254.byte	102,15,58,223,200,1
2255	call	L110key_256b
2256.byte	102,15,58,223,202,2
2257	call	L111key_256a
2258.byte	102,15,58,223,200,2
2259	call	L110key_256b
2260.byte	102,15,58,223,202,4
2261	call	L111key_256a
2262.byte	102,15,58,223,200,4
2263	call	L110key_256b
2264.byte	102,15,58,223,202,8
2265	call	L111key_256a
2266.byte	102,15,58,223,200,8
2267	call	L110key_256b
2268.byte	102,15,58,223,202,16
2269	call	L111key_256a
2270.byte	102,15,58,223,200,16
2271	call	L110key_256b
2272.byte	102,15,58,223,202,32
2273	call	L111key_256a
2274.byte	102,15,58,223,200,32
2275	call	L110key_256b
2276.byte	102,15,58,223,202,64
2277	call	L111key_256a
2278	movups	%xmm0,(%edx)
2279	movl	%ecx,16(%edx)
2280	xorl	%eax,%eax
2281	jmp	L100good_key
2282.align	4,0x90
2283L111key_256a:
2284	movups	%xmm2,(%edx)
2285	leal	16(%edx),%edx
2286L109key_256a_cold:
2287	shufps	$16,%xmm0,%xmm4
2288	xorps	%xmm4,%xmm0
2289	shufps	$140,%xmm0,%xmm4
2290	xorps	%xmm4,%xmm0
2291	shufps	$255,%xmm1,%xmm1
2292	xorps	%xmm1,%xmm0
2293	ret
2294.align	4,0x90
2295L110key_256b:
2296	movups	%xmm0,(%edx)
2297	leal	16(%edx),%edx
2298	shufps	$16,%xmm2,%xmm4
2299	xorps	%xmm4,%xmm2
2300	shufps	$140,%xmm2,%xmm4
2301	xorps	%xmm4,%xmm2
2302	shufps	$170,%xmm1,%xmm1
2303	xorps	%xmm1,%xmm2
2304	ret
2305.align	4,0x90
2306L10814rounds_alt:
2307	movdqa	(%ebx),%xmm5
2308	movdqa	32(%ebx),%xmm4
2309	movl	$7,%ecx
2310	movdqu	%xmm0,-32(%edx)
2311	movdqa	%xmm2,%xmm1
2312	movdqu	%xmm2,-16(%edx)
2313L112loop_key256:
2314.byte	102,15,56,0,213
2315.byte	102,15,56,221,212
2316	movdqa	%xmm0,%xmm3
2317	pslldq	$4,%xmm0
2318	pxor	%xmm0,%xmm3
2319	pslldq	$4,%xmm0
2320	pxor	%xmm0,%xmm3
2321	pslldq	$4,%xmm0
2322	pxor	%xmm3,%xmm0
2323	pslld	$1,%xmm4
2324	pxor	%xmm2,%xmm0
2325	movdqu	%xmm0,(%edx)
2326	decl	%ecx
2327	jz	L113done_key256
2328	pshufd	$255,%xmm0,%xmm2
2329	pxor	%xmm3,%xmm3
2330.byte	102,15,56,221,211
2331	movdqa	%xmm1,%xmm3
2332	pslldq	$4,%xmm1
2333	pxor	%xmm1,%xmm3
2334	pslldq	$4,%xmm1
2335	pxor	%xmm1,%xmm3
2336	pslldq	$4,%xmm1
2337	pxor	%xmm3,%xmm1
2338	pxor	%xmm1,%xmm2
2339	movdqu	%xmm2,16(%edx)
2340	leal	32(%edx),%edx
2341	movdqa	%xmm2,%xmm1
2342	jmp	L112loop_key256
2343L113done_key256:
2344	movl	$13,%ecx
2345	movl	%ecx,16(%edx)
2346L100good_key:
2347	pxor	%xmm0,%xmm0
2348	pxor	%xmm1,%xmm1
2349	pxor	%xmm2,%xmm2
2350	pxor	%xmm3,%xmm3
2351	pxor	%xmm4,%xmm4
2352	pxor	%xmm5,%xmm5
2353	xorl	%eax,%eax
2354	popl	%ebx
2355	popl	%ebp
2356	ret
2357.align	2,0x90
2358L091bad_pointer:
2359	movl	$-1,%eax
2360	popl	%ebx
2361	popl	%ebp
2362	ret
2363.align	2,0x90
2364L095bad_keybits:
2365	pxor	%xmm0,%xmm0
2366	movl	$-2,%eax
2367	popl	%ebx
2368	popl	%ebp
2369	ret
2370.globl	_aesni_set_encrypt_key
2371.private_extern	_aesni_set_encrypt_key
2372.align	4
2373_aesni_set_encrypt_key:
2374L_aesni_set_encrypt_key_begin:
2375	movl	4(%esp),%eax
2376	movl	8(%esp),%ecx
2377	movl	12(%esp),%edx
2378	call	__aesni_set_encrypt_key
2379	ret
2380.globl	_aesni_set_decrypt_key
2381.private_extern	_aesni_set_decrypt_key
2382.align	4
2383_aesni_set_decrypt_key:
2384L_aesni_set_decrypt_key_begin:
2385	movl	4(%esp),%eax
2386	movl	8(%esp),%ecx
2387	movl	12(%esp),%edx
2388	call	__aesni_set_encrypt_key
2389	movl	12(%esp),%edx
2390	shll	$4,%ecx
2391	testl	%eax,%eax
2392	jnz	L114dec_key_ret
2393	leal	16(%edx,%ecx,1),%eax
2394	movups	(%edx),%xmm0
2395	movups	(%eax),%xmm1
2396	movups	%xmm0,(%eax)
2397	movups	%xmm1,(%edx)
2398	leal	16(%edx),%edx
2399	leal	-16(%eax),%eax
2400L115dec_key_inverse:
2401	movups	(%edx),%xmm0
2402	movups	(%eax),%xmm1
2403.byte	102,15,56,219,192
2404.byte	102,15,56,219,201
2405	leal	16(%edx),%edx
2406	leal	-16(%eax),%eax
2407	movups	%xmm0,16(%eax)
2408	movups	%xmm1,-16(%edx)
2409	cmpl	%edx,%eax
2410	ja	L115dec_key_inverse
2411	movups	(%edx),%xmm0
2412.byte	102,15,56,219,192
2413	movups	%xmm0,(%edx)
2414	pxor	%xmm0,%xmm0
2415	pxor	%xmm1,%xmm1
2416	xorl	%eax,%eax
2417L114dec_key_ret:
2418	ret
2419.align	6,0x90
2420Lkey_const:
2421.long	202313229,202313229,202313229,202313229
2422.long	67569157,67569157,67569157,67569157
2423.long	1,1,1,1
2424.long	27,27,27,27
2425.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
2426.byte	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
2427.byte	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
2428.byte	115,108,46,111,114,103,62,0
2429.section __IMPORT,__pointers,non_lazy_symbol_pointers
2430L_OPENSSL_ia32cap_P$non_lazy_ptr:
2431.indirect_symbol	_OPENSSL_ia32cap_P
2432.long	0
2433#endif
2434