• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1.text
2.globl	_aesni_encrypt
3.align	4
4_aesni_encrypt:
5L_aesni_encrypt_begin:
6	%ifdef __CET__
7
8.byte	243,15,30,251
9	%endif
10
11	movl	4(%esp),%eax
12	movl	12(%esp),%edx
13	movups	(%eax),%xmm2
14	movl	240(%edx),%ecx
15	movl	8(%esp),%eax
16	movups	(%edx),%xmm0
17	movups	16(%edx),%xmm1
18	leal	32(%edx),%edx
19	xorps	%xmm0,%xmm2
20L000enc1_loop_1:
21.byte	102,15,56,220,209
22	decl	%ecx
23	movups	(%edx),%xmm1
24	leal	16(%edx),%edx
25	jnz	L000enc1_loop_1
26.byte	102,15,56,221,209
27	pxor	%xmm0,%xmm0
28	pxor	%xmm1,%xmm1
29	movups	%xmm2,(%eax)
30	pxor	%xmm2,%xmm2
31	ret
32.globl	_aesni_decrypt
33.align	4
34_aesni_decrypt:
35L_aesni_decrypt_begin:
36	%ifdef __CET__
37
38.byte	243,15,30,251
39	%endif
40
41	movl	4(%esp),%eax
42	movl	12(%esp),%edx
43	movups	(%eax),%xmm2
44	movl	240(%edx),%ecx
45	movl	8(%esp),%eax
46	movups	(%edx),%xmm0
47	movups	16(%edx),%xmm1
48	leal	32(%edx),%edx
49	xorps	%xmm0,%xmm2
50L001dec1_loop_2:
51.byte	102,15,56,222,209
52	decl	%ecx
53	movups	(%edx),%xmm1
54	leal	16(%edx),%edx
55	jnz	L001dec1_loop_2
56.byte	102,15,56,223,209
57	pxor	%xmm0,%xmm0
58	pxor	%xmm1,%xmm1
59	movups	%xmm2,(%eax)
60	pxor	%xmm2,%xmm2
61	ret
62.align	4
63__aesni_encrypt2:
64	%ifdef __CET__
65
66.byte	243,15,30,251
67	%endif
68
69	movups	(%edx),%xmm0
70	shll	$4,%ecx
71	movups	16(%edx),%xmm1
72	xorps	%xmm0,%xmm2
73	pxor	%xmm0,%xmm3
74	movups	32(%edx),%xmm0
75	leal	32(%edx,%ecx,1),%edx
76	negl	%ecx
77	addl	$16,%ecx
78L002enc2_loop:
79.byte	102,15,56,220,209
80.byte	102,15,56,220,217
81	movups	(%edx,%ecx,1),%xmm1
82	addl	$32,%ecx
83.byte	102,15,56,220,208
84.byte	102,15,56,220,216
85	movups	-16(%edx,%ecx,1),%xmm0
86	jnz	L002enc2_loop
87.byte	102,15,56,220,209
88.byte	102,15,56,220,217
89.byte	102,15,56,221,208
90.byte	102,15,56,221,216
91	ret
92.align	4
93__aesni_decrypt2:
94	%ifdef __CET__
95
96.byte	243,15,30,251
97	%endif
98
99	movups	(%edx),%xmm0
100	shll	$4,%ecx
101	movups	16(%edx),%xmm1
102	xorps	%xmm0,%xmm2
103	pxor	%xmm0,%xmm3
104	movups	32(%edx),%xmm0
105	leal	32(%edx,%ecx,1),%edx
106	negl	%ecx
107	addl	$16,%ecx
108L003dec2_loop:
109.byte	102,15,56,222,209
110.byte	102,15,56,222,217
111	movups	(%edx,%ecx,1),%xmm1
112	addl	$32,%ecx
113.byte	102,15,56,222,208
114.byte	102,15,56,222,216
115	movups	-16(%edx,%ecx,1),%xmm0
116	jnz	L003dec2_loop
117.byte	102,15,56,222,209
118.byte	102,15,56,222,217
119.byte	102,15,56,223,208
120.byte	102,15,56,223,216
121	ret
122.align	4
123__aesni_encrypt3:
124	%ifdef __CET__
125
126.byte	243,15,30,251
127	%endif
128
129	movups	(%edx),%xmm0
130	shll	$4,%ecx
131	movups	16(%edx),%xmm1
132	xorps	%xmm0,%xmm2
133	pxor	%xmm0,%xmm3
134	pxor	%xmm0,%xmm4
135	movups	32(%edx),%xmm0
136	leal	32(%edx,%ecx,1),%edx
137	negl	%ecx
138	addl	$16,%ecx
139L004enc3_loop:
140.byte	102,15,56,220,209
141.byte	102,15,56,220,217
142.byte	102,15,56,220,225
143	movups	(%edx,%ecx,1),%xmm1
144	addl	$32,%ecx
145.byte	102,15,56,220,208
146.byte	102,15,56,220,216
147.byte	102,15,56,220,224
148	movups	-16(%edx,%ecx,1),%xmm0
149	jnz	L004enc3_loop
150.byte	102,15,56,220,209
151.byte	102,15,56,220,217
152.byte	102,15,56,220,225
153.byte	102,15,56,221,208
154.byte	102,15,56,221,216
155.byte	102,15,56,221,224
156	ret
157.align	4
158__aesni_decrypt3:
159	%ifdef __CET__
160
161.byte	243,15,30,251
162	%endif
163
164	movups	(%edx),%xmm0
165	shll	$4,%ecx
166	movups	16(%edx),%xmm1
167	xorps	%xmm0,%xmm2
168	pxor	%xmm0,%xmm3
169	pxor	%xmm0,%xmm4
170	movups	32(%edx),%xmm0
171	leal	32(%edx,%ecx,1),%edx
172	negl	%ecx
173	addl	$16,%ecx
174L005dec3_loop:
175.byte	102,15,56,222,209
176.byte	102,15,56,222,217
177.byte	102,15,56,222,225
178	movups	(%edx,%ecx,1),%xmm1
179	addl	$32,%ecx
180.byte	102,15,56,222,208
181.byte	102,15,56,222,216
182.byte	102,15,56,222,224
183	movups	-16(%edx,%ecx,1),%xmm0
184	jnz	L005dec3_loop
185.byte	102,15,56,222,209
186.byte	102,15,56,222,217
187.byte	102,15,56,222,225
188.byte	102,15,56,223,208
189.byte	102,15,56,223,216
190.byte	102,15,56,223,224
191	ret
192.align	4
193__aesni_encrypt4:
194	%ifdef __CET__
195
196.byte	243,15,30,251
197	%endif
198
199	movups	(%edx),%xmm0
200	movups	16(%edx),%xmm1
201	shll	$4,%ecx
202	xorps	%xmm0,%xmm2
203	pxor	%xmm0,%xmm3
204	pxor	%xmm0,%xmm4
205	pxor	%xmm0,%xmm5
206	movups	32(%edx),%xmm0
207	leal	32(%edx,%ecx,1),%edx
208	negl	%ecx
209.byte	15,31,64,0
210	addl	$16,%ecx
211L006enc4_loop:
212.byte	102,15,56,220,209
213.byte	102,15,56,220,217
214.byte	102,15,56,220,225
215.byte	102,15,56,220,233
216	movups	(%edx,%ecx,1),%xmm1
217	addl	$32,%ecx
218.byte	102,15,56,220,208
219.byte	102,15,56,220,216
220.byte	102,15,56,220,224
221.byte	102,15,56,220,232
222	movups	-16(%edx,%ecx,1),%xmm0
223	jnz	L006enc4_loop
224.byte	102,15,56,220,209
225.byte	102,15,56,220,217
226.byte	102,15,56,220,225
227.byte	102,15,56,220,233
228.byte	102,15,56,221,208
229.byte	102,15,56,221,216
230.byte	102,15,56,221,224
231.byte	102,15,56,221,232
232	ret
233.align	4
234__aesni_decrypt4:
235	%ifdef __CET__
236
237.byte	243,15,30,251
238	%endif
239
240	movups	(%edx),%xmm0
241	movups	16(%edx),%xmm1
242	shll	$4,%ecx
243	xorps	%xmm0,%xmm2
244	pxor	%xmm0,%xmm3
245	pxor	%xmm0,%xmm4
246	pxor	%xmm0,%xmm5
247	movups	32(%edx),%xmm0
248	leal	32(%edx,%ecx,1),%edx
249	negl	%ecx
250.byte	15,31,64,0
251	addl	$16,%ecx
252L007dec4_loop:
253.byte	102,15,56,222,209
254.byte	102,15,56,222,217
255.byte	102,15,56,222,225
256.byte	102,15,56,222,233
257	movups	(%edx,%ecx,1),%xmm1
258	addl	$32,%ecx
259.byte	102,15,56,222,208
260.byte	102,15,56,222,216
261.byte	102,15,56,222,224
262.byte	102,15,56,222,232
263	movups	-16(%edx,%ecx,1),%xmm0
264	jnz	L007dec4_loop
265.byte	102,15,56,222,209
266.byte	102,15,56,222,217
267.byte	102,15,56,222,225
268.byte	102,15,56,222,233
269.byte	102,15,56,223,208
270.byte	102,15,56,223,216
271.byte	102,15,56,223,224
272.byte	102,15,56,223,232
273	ret
274.align	4
275__aesni_encrypt6:
276	%ifdef __CET__
277
278.byte	243,15,30,251
279	%endif
280
281	movups	(%edx),%xmm0
282	shll	$4,%ecx
283	movups	16(%edx),%xmm1
284	xorps	%xmm0,%xmm2
285	pxor	%xmm0,%xmm3
286	pxor	%xmm0,%xmm4
287.byte	102,15,56,220,209
288	pxor	%xmm0,%xmm5
289	pxor	%xmm0,%xmm6
290.byte	102,15,56,220,217
291	leal	32(%edx,%ecx,1),%edx
292	negl	%ecx
293.byte	102,15,56,220,225
294	pxor	%xmm0,%xmm7
295	movups	(%edx,%ecx,1),%xmm0
296	addl	$16,%ecx
297	jmp	L008_aesni_encrypt6_inner
298.align	4,0x90
299L009enc6_loop:
300.byte	102,15,56,220,209
301.byte	102,15,56,220,217
302.byte	102,15,56,220,225
303L008_aesni_encrypt6_inner:
304.byte	102,15,56,220,233
305.byte	102,15,56,220,241
306.byte	102,15,56,220,249
307L_aesni_encrypt6_enter:
308	movups	(%edx,%ecx,1),%xmm1
309	addl	$32,%ecx
310.byte	102,15,56,220,208
311.byte	102,15,56,220,216
312.byte	102,15,56,220,224
313.byte	102,15,56,220,232
314.byte	102,15,56,220,240
315.byte	102,15,56,220,248
316	movups	-16(%edx,%ecx,1),%xmm0
317	jnz	L009enc6_loop
318.byte	102,15,56,220,209
319.byte	102,15,56,220,217
320.byte	102,15,56,220,225
321.byte	102,15,56,220,233
322.byte	102,15,56,220,241
323.byte	102,15,56,220,249
324.byte	102,15,56,221,208
325.byte	102,15,56,221,216
326.byte	102,15,56,221,224
327.byte	102,15,56,221,232
328.byte	102,15,56,221,240
329.byte	102,15,56,221,248
330	ret
331.align	4
332__aesni_decrypt6:
333	%ifdef __CET__
334
335.byte	243,15,30,251
336	%endif
337
338	movups	(%edx),%xmm0
339	shll	$4,%ecx
340	movups	16(%edx),%xmm1
341	xorps	%xmm0,%xmm2
342	pxor	%xmm0,%xmm3
343	pxor	%xmm0,%xmm4
344.byte	102,15,56,222,209
345	pxor	%xmm0,%xmm5
346	pxor	%xmm0,%xmm6
347.byte	102,15,56,222,217
348	leal	32(%edx,%ecx,1),%edx
349	negl	%ecx
350.byte	102,15,56,222,225
351	pxor	%xmm0,%xmm7
352	movups	(%edx,%ecx,1),%xmm0
353	addl	$16,%ecx
354	jmp	L010_aesni_decrypt6_inner
355.align	4,0x90
356L011dec6_loop:
357.byte	102,15,56,222,209
358.byte	102,15,56,222,217
359.byte	102,15,56,222,225
360L010_aesni_decrypt6_inner:
361.byte	102,15,56,222,233
362.byte	102,15,56,222,241
363.byte	102,15,56,222,249
364L_aesni_decrypt6_enter:
365	movups	(%edx,%ecx,1),%xmm1
366	addl	$32,%ecx
367.byte	102,15,56,222,208
368.byte	102,15,56,222,216
369.byte	102,15,56,222,224
370.byte	102,15,56,222,232
371.byte	102,15,56,222,240
372.byte	102,15,56,222,248
373	movups	-16(%edx,%ecx,1),%xmm0
374	jnz	L011dec6_loop
375.byte	102,15,56,222,209
376.byte	102,15,56,222,217
377.byte	102,15,56,222,225
378.byte	102,15,56,222,233
379.byte	102,15,56,222,241
380.byte	102,15,56,222,249
381.byte	102,15,56,223,208
382.byte	102,15,56,223,216
383.byte	102,15,56,223,224
384.byte	102,15,56,223,232
385.byte	102,15,56,223,240
386.byte	102,15,56,223,248
387	ret
388.globl	_aesni_ecb_encrypt
389.align	4
390_aesni_ecb_encrypt:
391L_aesni_ecb_encrypt_begin:
392	%ifdef __CET__
393
394.byte	243,15,30,251
395	%endif
396
397	pushl	%ebp
398	pushl	%ebx
399	pushl	%esi
400	pushl	%edi
401	movl	20(%esp),%esi
402	movl	24(%esp),%edi
403	movl	28(%esp),%eax
404	movl	32(%esp),%edx
405	movl	36(%esp),%ebx
406	andl	$-16,%eax
407	jz	L012ecb_ret
408	movl	240(%edx),%ecx
409	testl	%ebx,%ebx
410	jz	L013ecb_decrypt
411	movl	%edx,%ebp
412	movl	%ecx,%ebx
413	cmpl	$96,%eax
414	jb	L014ecb_enc_tail
415	movdqu	(%esi),%xmm2
416	movdqu	16(%esi),%xmm3
417	movdqu	32(%esi),%xmm4
418	movdqu	48(%esi),%xmm5
419	movdqu	64(%esi),%xmm6
420	movdqu	80(%esi),%xmm7
421	leal	96(%esi),%esi
422	subl	$96,%eax
423	jmp	L015ecb_enc_loop6_enter
424.align	4,0x90
425L016ecb_enc_loop6:
426	movups	%xmm2,(%edi)
427	movdqu	(%esi),%xmm2
428	movups	%xmm3,16(%edi)
429	movdqu	16(%esi),%xmm3
430	movups	%xmm4,32(%edi)
431	movdqu	32(%esi),%xmm4
432	movups	%xmm5,48(%edi)
433	movdqu	48(%esi),%xmm5
434	movups	%xmm6,64(%edi)
435	movdqu	64(%esi),%xmm6
436	movups	%xmm7,80(%edi)
437	leal	96(%edi),%edi
438	movdqu	80(%esi),%xmm7
439	leal	96(%esi),%esi
440L015ecb_enc_loop6_enter:
441	call	__aesni_encrypt6
442	movl	%ebp,%edx
443	movl	%ebx,%ecx
444	subl	$96,%eax
445	jnc	L016ecb_enc_loop6
446	movups	%xmm2,(%edi)
447	movups	%xmm3,16(%edi)
448	movups	%xmm4,32(%edi)
449	movups	%xmm5,48(%edi)
450	movups	%xmm6,64(%edi)
451	movups	%xmm7,80(%edi)
452	leal	96(%edi),%edi
453	addl	$96,%eax
454	jz	L012ecb_ret
455L014ecb_enc_tail:
456	movups	(%esi),%xmm2
457	cmpl	$32,%eax
458	jb	L017ecb_enc_one
459	movups	16(%esi),%xmm3
460	je	L018ecb_enc_two
461	movups	32(%esi),%xmm4
462	cmpl	$64,%eax
463	jb	L019ecb_enc_three
464	movups	48(%esi),%xmm5
465	je	L020ecb_enc_four
466	movups	64(%esi),%xmm6
467	xorps	%xmm7,%xmm7
468	call	__aesni_encrypt6
469	movups	%xmm2,(%edi)
470	movups	%xmm3,16(%edi)
471	movups	%xmm4,32(%edi)
472	movups	%xmm5,48(%edi)
473	movups	%xmm6,64(%edi)
474	jmp	L012ecb_ret
475.align	4,0x90
476L017ecb_enc_one:
477	movups	(%edx),%xmm0
478	movups	16(%edx),%xmm1
479	leal	32(%edx),%edx
480	xorps	%xmm0,%xmm2
481L021enc1_loop_3:
482.byte	102,15,56,220,209
483	decl	%ecx
484	movups	(%edx),%xmm1
485	leal	16(%edx),%edx
486	jnz	L021enc1_loop_3
487.byte	102,15,56,221,209
488	movups	%xmm2,(%edi)
489	jmp	L012ecb_ret
490.align	4,0x90
491L018ecb_enc_two:
492	call	__aesni_encrypt2
493	movups	%xmm2,(%edi)
494	movups	%xmm3,16(%edi)
495	jmp	L012ecb_ret
496.align	4,0x90
497L019ecb_enc_three:
498	call	__aesni_encrypt3
499	movups	%xmm2,(%edi)
500	movups	%xmm3,16(%edi)
501	movups	%xmm4,32(%edi)
502	jmp	L012ecb_ret
503.align	4,0x90
504L020ecb_enc_four:
505	call	__aesni_encrypt4
506	movups	%xmm2,(%edi)
507	movups	%xmm3,16(%edi)
508	movups	%xmm4,32(%edi)
509	movups	%xmm5,48(%edi)
510	jmp	L012ecb_ret
511.align	4,0x90
512L013ecb_decrypt:
513	movl	%edx,%ebp
514	movl	%ecx,%ebx
515	cmpl	$96,%eax
516	jb	L022ecb_dec_tail
517	movdqu	(%esi),%xmm2
518	movdqu	16(%esi),%xmm3
519	movdqu	32(%esi),%xmm4
520	movdqu	48(%esi),%xmm5
521	movdqu	64(%esi),%xmm6
522	movdqu	80(%esi),%xmm7
523	leal	96(%esi),%esi
524	subl	$96,%eax
525	jmp	L023ecb_dec_loop6_enter
526.align	4,0x90
527L024ecb_dec_loop6:
528	movups	%xmm2,(%edi)
529	movdqu	(%esi),%xmm2
530	movups	%xmm3,16(%edi)
531	movdqu	16(%esi),%xmm3
532	movups	%xmm4,32(%edi)
533	movdqu	32(%esi),%xmm4
534	movups	%xmm5,48(%edi)
535	movdqu	48(%esi),%xmm5
536	movups	%xmm6,64(%edi)
537	movdqu	64(%esi),%xmm6
538	movups	%xmm7,80(%edi)
539	leal	96(%edi),%edi
540	movdqu	80(%esi),%xmm7
541	leal	96(%esi),%esi
542L023ecb_dec_loop6_enter:
543	call	__aesni_decrypt6
544	movl	%ebp,%edx
545	movl	%ebx,%ecx
546	subl	$96,%eax
547	jnc	L024ecb_dec_loop6
548	movups	%xmm2,(%edi)
549	movups	%xmm3,16(%edi)
550	movups	%xmm4,32(%edi)
551	movups	%xmm5,48(%edi)
552	movups	%xmm6,64(%edi)
553	movups	%xmm7,80(%edi)
554	leal	96(%edi),%edi
555	addl	$96,%eax
556	jz	L012ecb_ret
557L022ecb_dec_tail:
558	movups	(%esi),%xmm2
559	cmpl	$32,%eax
560	jb	L025ecb_dec_one
561	movups	16(%esi),%xmm3
562	je	L026ecb_dec_two
563	movups	32(%esi),%xmm4
564	cmpl	$64,%eax
565	jb	L027ecb_dec_three
566	movups	48(%esi),%xmm5
567	je	L028ecb_dec_four
568	movups	64(%esi),%xmm6
569	xorps	%xmm7,%xmm7
570	call	__aesni_decrypt6
571	movups	%xmm2,(%edi)
572	movups	%xmm3,16(%edi)
573	movups	%xmm4,32(%edi)
574	movups	%xmm5,48(%edi)
575	movups	%xmm6,64(%edi)
576	jmp	L012ecb_ret
577.align	4,0x90
578L025ecb_dec_one:
579	movups	(%edx),%xmm0
580	movups	16(%edx),%xmm1
581	leal	32(%edx),%edx
582	xorps	%xmm0,%xmm2
583L029dec1_loop_4:
584.byte	102,15,56,222,209
585	decl	%ecx
586	movups	(%edx),%xmm1
587	leal	16(%edx),%edx
588	jnz	L029dec1_loop_4
589.byte	102,15,56,223,209
590	movups	%xmm2,(%edi)
591	jmp	L012ecb_ret
592.align	4,0x90
593L026ecb_dec_two:
594	call	__aesni_decrypt2
595	movups	%xmm2,(%edi)
596	movups	%xmm3,16(%edi)
597	jmp	L012ecb_ret
598.align	4,0x90
599L027ecb_dec_three:
600	call	__aesni_decrypt3
601	movups	%xmm2,(%edi)
602	movups	%xmm3,16(%edi)
603	movups	%xmm4,32(%edi)
604	jmp	L012ecb_ret
605.align	4,0x90
606L028ecb_dec_four:
607	call	__aesni_decrypt4
608	movups	%xmm2,(%edi)
609	movups	%xmm3,16(%edi)
610	movups	%xmm4,32(%edi)
611	movups	%xmm5,48(%edi)
612L012ecb_ret:
613	pxor	%xmm0,%xmm0
614	pxor	%xmm1,%xmm1
615	pxor	%xmm2,%xmm2
616	pxor	%xmm3,%xmm3
617	pxor	%xmm4,%xmm4
618	pxor	%xmm5,%xmm5
619	pxor	%xmm6,%xmm6
620	pxor	%xmm7,%xmm7
621	popl	%edi
622	popl	%esi
623	popl	%ebx
624	popl	%ebp
625	ret
626.globl	_aesni_ccm64_encrypt_blocks
627.align	4
628_aesni_ccm64_encrypt_blocks:
629L_aesni_ccm64_encrypt_blocks_begin:
630	%ifdef __CET__
631
632.byte	243,15,30,251
633	%endif
634
635	pushl	%ebp
636	pushl	%ebx
637	pushl	%esi
638	pushl	%edi
639	movl	20(%esp),%esi
640	movl	24(%esp),%edi
641	movl	28(%esp),%eax
642	movl	32(%esp),%edx
643	movl	36(%esp),%ebx
644	movl	40(%esp),%ecx
645	movl	%esp,%ebp
646	subl	$60,%esp
647	andl	$-16,%esp
648	movl	%ebp,48(%esp)
649	movdqu	(%ebx),%xmm7
650	movdqu	(%ecx),%xmm3
651	movl	240(%edx),%ecx
652	movl	$202182159,(%esp)
653	movl	$134810123,4(%esp)
654	movl	$67438087,8(%esp)
655	movl	$66051,12(%esp)
656	movl	$1,%ebx
657	xorl	%ebp,%ebp
658	movl	%ebx,16(%esp)
659	movl	%ebp,20(%esp)
660	movl	%ebp,24(%esp)
661	movl	%ebp,28(%esp)
662	shll	$4,%ecx
663	movl	$16,%ebx
664	leal	(%edx),%ebp
665	movdqa	(%esp),%xmm5
666	movdqa	%xmm7,%xmm2
667	leal	32(%edx,%ecx,1),%edx
668	subl	%ecx,%ebx
669.byte	102,15,56,0,253
670L030ccm64_enc_outer:
671	movups	(%ebp),%xmm0
672	movl	%ebx,%ecx
673	movups	(%esi),%xmm6
674	xorps	%xmm0,%xmm2
675	movups	16(%ebp),%xmm1
676	xorps	%xmm6,%xmm0
677	xorps	%xmm0,%xmm3
678	movups	32(%ebp),%xmm0
679L031ccm64_enc2_loop:
680.byte	102,15,56,220,209
681.byte	102,15,56,220,217
682	movups	(%edx,%ecx,1),%xmm1
683	addl	$32,%ecx
684.byte	102,15,56,220,208
685.byte	102,15,56,220,216
686	movups	-16(%edx,%ecx,1),%xmm0
687	jnz	L031ccm64_enc2_loop
688.byte	102,15,56,220,209
689.byte	102,15,56,220,217
690	paddq	16(%esp),%xmm7
691	decl	%eax
692.byte	102,15,56,221,208
693.byte	102,15,56,221,216
694	leal	16(%esi),%esi
695	xorps	%xmm2,%xmm6
696	movdqa	%xmm7,%xmm2
697	movups	%xmm6,(%edi)
698.byte	102,15,56,0,213
699	leal	16(%edi),%edi
700	jnz	L030ccm64_enc_outer
701	movl	48(%esp),%esp
702	movl	40(%esp),%edi
703	movups	%xmm3,(%edi)
704	pxor	%xmm0,%xmm0
705	pxor	%xmm1,%xmm1
706	pxor	%xmm2,%xmm2
707	pxor	%xmm3,%xmm3
708	pxor	%xmm4,%xmm4
709	pxor	%xmm5,%xmm5
710	pxor	%xmm6,%xmm6
711	pxor	%xmm7,%xmm7
712	popl	%edi
713	popl	%esi
714	popl	%ebx
715	popl	%ebp
716	ret
717.globl	_aesni_ccm64_decrypt_blocks
718.align	4
719_aesni_ccm64_decrypt_blocks:
720L_aesni_ccm64_decrypt_blocks_begin:
721	%ifdef __CET__
722
723.byte	243,15,30,251
724	%endif
725
726	pushl	%ebp
727	pushl	%ebx
728	pushl	%esi
729	pushl	%edi
730	movl	20(%esp),%esi
731	movl	24(%esp),%edi
732	movl	28(%esp),%eax
733	movl	32(%esp),%edx
734	movl	36(%esp),%ebx
735	movl	40(%esp),%ecx
736	movl	%esp,%ebp
737	subl	$60,%esp
738	andl	$-16,%esp
739	movl	%ebp,48(%esp)
740	movdqu	(%ebx),%xmm7
741	movdqu	(%ecx),%xmm3
742	movl	240(%edx),%ecx
743	movl	$202182159,(%esp)
744	movl	$134810123,4(%esp)
745	movl	$67438087,8(%esp)
746	movl	$66051,12(%esp)
747	movl	$1,%ebx
748	xorl	%ebp,%ebp
749	movl	%ebx,16(%esp)
750	movl	%ebp,20(%esp)
751	movl	%ebp,24(%esp)
752	movl	%ebp,28(%esp)
753	movdqa	(%esp),%xmm5
754	movdqa	%xmm7,%xmm2
755	movl	%edx,%ebp
756	movl	%ecx,%ebx
757.byte	102,15,56,0,253
758	movups	(%edx),%xmm0
759	movups	16(%edx),%xmm1
760	leal	32(%edx),%edx
761	xorps	%xmm0,%xmm2
762L032enc1_loop_5:
763.byte	102,15,56,220,209
764	decl	%ecx
765	movups	(%edx),%xmm1
766	leal	16(%edx),%edx
767	jnz	L032enc1_loop_5
768.byte	102,15,56,221,209
769	shll	$4,%ebx
770	movl	$16,%ecx
771	movups	(%esi),%xmm6
772	paddq	16(%esp),%xmm7
773	leal	16(%esi),%esi
774	subl	%ebx,%ecx
775	leal	32(%ebp,%ebx,1),%edx
776	movl	%ecx,%ebx
777	jmp	L033ccm64_dec_outer
778.align	4,0x90
779L033ccm64_dec_outer:
780	xorps	%xmm2,%xmm6
781	movdqa	%xmm7,%xmm2
782	movups	%xmm6,(%edi)
783	leal	16(%edi),%edi
784.byte	102,15,56,0,213
785	subl	$1,%eax
786	jz	L034ccm64_dec_break
787	movups	(%ebp),%xmm0
788	movl	%ebx,%ecx
789	movups	16(%ebp),%xmm1
790	xorps	%xmm0,%xmm6
791	xorps	%xmm0,%xmm2
792	xorps	%xmm6,%xmm3
793	movups	32(%ebp),%xmm0
794L035ccm64_dec2_loop:
795.byte	102,15,56,220,209
796.byte	102,15,56,220,217
797	movups	(%edx,%ecx,1),%xmm1
798	addl	$32,%ecx
799.byte	102,15,56,220,208
800.byte	102,15,56,220,216
801	movups	-16(%edx,%ecx,1),%xmm0
802	jnz	L035ccm64_dec2_loop
803	movups	(%esi),%xmm6
804	paddq	16(%esp),%xmm7
805.byte	102,15,56,220,209
806.byte	102,15,56,220,217
807.byte	102,15,56,221,208
808.byte	102,15,56,221,216
809	leal	16(%esi),%esi
810	jmp	L033ccm64_dec_outer
811.align	4,0x90
812L034ccm64_dec_break:
813	movl	240(%ebp),%ecx
814	movl	%ebp,%edx
815	movups	(%edx),%xmm0
816	movups	16(%edx),%xmm1
817	xorps	%xmm0,%xmm6
818	leal	32(%edx),%edx
819	xorps	%xmm6,%xmm3
820L036enc1_loop_6:
821.byte	102,15,56,220,217
822	decl	%ecx
823	movups	(%edx),%xmm1
824	leal	16(%edx),%edx
825	jnz	L036enc1_loop_6
826.byte	102,15,56,221,217
827	movl	48(%esp),%esp
828	movl	40(%esp),%edi
829	movups	%xmm3,(%edi)
830	pxor	%xmm0,%xmm0
831	pxor	%xmm1,%xmm1
832	pxor	%xmm2,%xmm2
833	pxor	%xmm3,%xmm3
834	pxor	%xmm4,%xmm4
835	pxor	%xmm5,%xmm5
836	pxor	%xmm6,%xmm6
837	pxor	%xmm7,%xmm7
838	popl	%edi
839	popl	%esi
840	popl	%ebx
841	popl	%ebp
842	ret
843.globl	_aesni_ctr32_encrypt_blocks
844.align	4
845_aesni_ctr32_encrypt_blocks:
846L_aesni_ctr32_encrypt_blocks_begin:
847	%ifdef __CET__
848
849.byte	243,15,30,251
850	%endif
851
852	pushl	%ebp
853	pushl	%ebx
854	pushl	%esi
855	pushl	%edi
856	movl	20(%esp),%esi
857	movl	24(%esp),%edi
858	movl	28(%esp),%eax
859	movl	32(%esp),%edx
860	movl	36(%esp),%ebx
861	movl	%esp,%ebp
862	subl	$88,%esp
863	andl	$-16,%esp
864	movl	%ebp,80(%esp)
865	cmpl	$1,%eax
866	je	L037ctr32_one_shortcut
867	movdqu	(%ebx),%xmm7
868	movl	$202182159,(%esp)
869	movl	$134810123,4(%esp)
870	movl	$67438087,8(%esp)
871	movl	$66051,12(%esp)
872	movl	$6,%ecx
873	xorl	%ebp,%ebp
874	movl	%ecx,16(%esp)
875	movl	%ecx,20(%esp)
876	movl	%ecx,24(%esp)
877	movl	%ebp,28(%esp)
878.byte	102,15,58,22,251,3
879.byte	102,15,58,34,253,3
880	movl	240(%edx),%ecx
881	bswap	%ebx
882	pxor	%xmm0,%xmm0
883	pxor	%xmm1,%xmm1
884	movdqa	(%esp),%xmm2
885.byte	102,15,58,34,195,0
886	leal	3(%ebx),%ebp
887.byte	102,15,58,34,205,0
888	incl	%ebx
889.byte	102,15,58,34,195,1
890	incl	%ebp
891.byte	102,15,58,34,205,1
892	incl	%ebx
893.byte	102,15,58,34,195,2
894	incl	%ebp
895.byte	102,15,58,34,205,2
896	movdqa	%xmm0,48(%esp)
897.byte	102,15,56,0,194
898	movdqu	(%edx),%xmm6
899	movdqa	%xmm1,64(%esp)
900.byte	102,15,56,0,202
901	pshufd	$192,%xmm0,%xmm2
902	pshufd	$128,%xmm0,%xmm3
903	cmpl	$6,%eax
904	jb	L038ctr32_tail
905	pxor	%xmm6,%xmm7
906	shll	$4,%ecx
907	movl	$16,%ebx
908	movdqa	%xmm7,32(%esp)
909	movl	%edx,%ebp
910	subl	%ecx,%ebx
911	leal	32(%edx,%ecx,1),%edx
912	subl	$6,%eax
913	jmp	L039ctr32_loop6
914.align	4,0x90
915L039ctr32_loop6:
916	pshufd	$64,%xmm0,%xmm4
917	movdqa	32(%esp),%xmm0
918	pshufd	$192,%xmm1,%xmm5
919	pxor	%xmm0,%xmm2
920	pshufd	$128,%xmm1,%xmm6
921	pxor	%xmm0,%xmm3
922	pshufd	$64,%xmm1,%xmm7
923	movups	16(%ebp),%xmm1
924	pxor	%xmm0,%xmm4
925	pxor	%xmm0,%xmm5
926.byte	102,15,56,220,209
927	pxor	%xmm0,%xmm6
928	pxor	%xmm0,%xmm7
929.byte	102,15,56,220,217
930	movups	32(%ebp),%xmm0
931	movl	%ebx,%ecx
932.byte	102,15,56,220,225
933.byte	102,15,56,220,233
934.byte	102,15,56,220,241
935.byte	102,15,56,220,249
936	call	L_aesni_encrypt6_enter
937	movups	(%esi),%xmm1
938	movups	16(%esi),%xmm0
939	xorps	%xmm1,%xmm2
940	movups	32(%esi),%xmm1
941	xorps	%xmm0,%xmm3
942	movups	%xmm2,(%edi)
943	movdqa	16(%esp),%xmm0
944	xorps	%xmm1,%xmm4
945	movdqa	64(%esp),%xmm1
946	movups	%xmm3,16(%edi)
947	movups	%xmm4,32(%edi)
948	paddd	%xmm0,%xmm1
949	paddd	48(%esp),%xmm0
950	movdqa	(%esp),%xmm2
951	movups	48(%esi),%xmm3
952	movups	64(%esi),%xmm4
953	xorps	%xmm3,%xmm5
954	movups	80(%esi),%xmm3
955	leal	96(%esi),%esi
956	movdqa	%xmm0,48(%esp)
957.byte	102,15,56,0,194
958	xorps	%xmm4,%xmm6
959	movups	%xmm5,48(%edi)
960	xorps	%xmm3,%xmm7
961	movdqa	%xmm1,64(%esp)
962.byte	102,15,56,0,202
963	movups	%xmm6,64(%edi)
964	pshufd	$192,%xmm0,%xmm2
965	movups	%xmm7,80(%edi)
966	leal	96(%edi),%edi
967	pshufd	$128,%xmm0,%xmm3
968	subl	$6,%eax
969	jnc	L039ctr32_loop6
970	addl	$6,%eax
971	jz	L040ctr32_ret
972	movdqu	(%ebp),%xmm7
973	movl	%ebp,%edx
974	pxor	32(%esp),%xmm7
975	movl	240(%ebp),%ecx
976L038ctr32_tail:
977	por	%xmm7,%xmm2
978	cmpl	$2,%eax
979	jb	L041ctr32_one
980	pshufd	$64,%xmm0,%xmm4
981	por	%xmm7,%xmm3
982	je	L042ctr32_two
983	pshufd	$192,%xmm1,%xmm5
984	por	%xmm7,%xmm4
985	cmpl	$4,%eax
986	jb	L043ctr32_three
987	pshufd	$128,%xmm1,%xmm6
988	por	%xmm7,%xmm5
989	je	L044ctr32_four
990	por	%xmm7,%xmm6
991	call	__aesni_encrypt6
992	movups	(%esi),%xmm1
993	movups	16(%esi),%xmm0
994	xorps	%xmm1,%xmm2
995	movups	32(%esi),%xmm1
996	xorps	%xmm0,%xmm3
997	movups	48(%esi),%xmm0
998	xorps	%xmm1,%xmm4
999	movups	64(%esi),%xmm1
1000	xorps	%xmm0,%xmm5
1001	movups	%xmm2,(%edi)
1002	xorps	%xmm1,%xmm6
1003	movups	%xmm3,16(%edi)
1004	movups	%xmm4,32(%edi)
1005	movups	%xmm5,48(%edi)
1006	movups	%xmm6,64(%edi)
1007	jmp	L040ctr32_ret
1008.align	4,0x90
1009L037ctr32_one_shortcut:
1010	movups	(%ebx),%xmm2
1011	movl	240(%edx),%ecx
1012L041ctr32_one:
1013	movups	(%edx),%xmm0
1014	movups	16(%edx),%xmm1
1015	leal	32(%edx),%edx
1016	xorps	%xmm0,%xmm2
1017L045enc1_loop_7:
1018.byte	102,15,56,220,209
1019	decl	%ecx
1020	movups	(%edx),%xmm1
1021	leal	16(%edx),%edx
1022	jnz	L045enc1_loop_7
1023.byte	102,15,56,221,209
1024	movups	(%esi),%xmm6
1025	xorps	%xmm2,%xmm6
1026	movups	%xmm6,(%edi)
1027	jmp	L040ctr32_ret
1028.align	4,0x90
1029L042ctr32_two:
1030	call	__aesni_encrypt2
1031	movups	(%esi),%xmm5
1032	movups	16(%esi),%xmm6
1033	xorps	%xmm5,%xmm2
1034	xorps	%xmm6,%xmm3
1035	movups	%xmm2,(%edi)
1036	movups	%xmm3,16(%edi)
1037	jmp	L040ctr32_ret
1038.align	4,0x90
1039L043ctr32_three:
1040	call	__aesni_encrypt3
1041	movups	(%esi),%xmm5
1042	movups	16(%esi),%xmm6
1043	xorps	%xmm5,%xmm2
1044	movups	32(%esi),%xmm7
1045	xorps	%xmm6,%xmm3
1046	movups	%xmm2,(%edi)
1047	xorps	%xmm7,%xmm4
1048	movups	%xmm3,16(%edi)
1049	movups	%xmm4,32(%edi)
1050	jmp	L040ctr32_ret
1051.align	4,0x90
1052L044ctr32_four:
1053	call	__aesni_encrypt4
1054	movups	(%esi),%xmm6
1055	movups	16(%esi),%xmm7
1056	movups	32(%esi),%xmm1
1057	xorps	%xmm6,%xmm2
1058	movups	48(%esi),%xmm0
1059	xorps	%xmm7,%xmm3
1060	movups	%xmm2,(%edi)
1061	xorps	%xmm1,%xmm4
1062	movups	%xmm3,16(%edi)
1063	xorps	%xmm0,%xmm5
1064	movups	%xmm4,32(%edi)
1065	movups	%xmm5,48(%edi)
1066L040ctr32_ret:
1067	pxor	%xmm0,%xmm0
1068	pxor	%xmm1,%xmm1
1069	pxor	%xmm2,%xmm2
1070	pxor	%xmm3,%xmm3
1071	pxor	%xmm4,%xmm4
1072	movdqa	%xmm0,32(%esp)
1073	pxor	%xmm5,%xmm5
1074	movdqa	%xmm0,48(%esp)
1075	pxor	%xmm6,%xmm6
1076	movdqa	%xmm0,64(%esp)
1077	pxor	%xmm7,%xmm7
1078	movl	80(%esp),%esp
1079	popl	%edi
1080	popl	%esi
1081	popl	%ebx
1082	popl	%ebp
1083	ret
1084.globl	_aesni_xts_encrypt
1085.align	4
1086_aesni_xts_encrypt:
1087L_aesni_xts_encrypt_begin:
1088	%ifdef __CET__
1089
1090.byte	243,15,30,251
1091	%endif
1092
1093	pushl	%ebp
1094	pushl	%ebx
1095	pushl	%esi
1096	pushl	%edi
1097	movl	36(%esp),%edx
1098	movl	40(%esp),%esi
1099	movl	240(%edx),%ecx
1100	movups	(%esi),%xmm2
1101	movups	(%edx),%xmm0
1102	movups	16(%edx),%xmm1
1103	leal	32(%edx),%edx
1104	xorps	%xmm0,%xmm2
1105L046enc1_loop_8:
1106.byte	102,15,56,220,209
1107	decl	%ecx
1108	movups	(%edx),%xmm1
1109	leal	16(%edx),%edx
1110	jnz	L046enc1_loop_8
1111.byte	102,15,56,221,209
1112	movl	20(%esp),%esi
1113	movl	24(%esp),%edi
1114	movl	28(%esp),%eax
1115	movl	32(%esp),%edx
1116	movl	%esp,%ebp
1117	subl	$120,%esp
1118	movl	240(%edx),%ecx
1119	andl	$-16,%esp
1120	movl	$135,96(%esp)
1121	movl	$0,100(%esp)
1122	movl	$1,104(%esp)
1123	movl	$0,108(%esp)
1124	movl	%eax,112(%esp)
1125	movl	%ebp,116(%esp)
1126	movdqa	%xmm2,%xmm1
1127	pxor	%xmm0,%xmm0
1128	movdqa	96(%esp),%xmm3
1129	pcmpgtd	%xmm1,%xmm0
1130	andl	$-16,%eax
1131	movl	%edx,%ebp
1132	movl	%ecx,%ebx
1133	subl	$96,%eax
1134	jc	L047xts_enc_short
1135	shll	$4,%ecx
1136	movl	$16,%ebx
1137	subl	%ecx,%ebx
1138	leal	32(%edx,%ecx,1),%edx
1139	jmp	L048xts_enc_loop6
1140.align	4,0x90
1141L048xts_enc_loop6:
1142	pshufd	$19,%xmm0,%xmm2
1143	pxor	%xmm0,%xmm0
1144	movdqa	%xmm1,(%esp)
1145	paddq	%xmm1,%xmm1
1146	pand	%xmm3,%xmm2
1147	pcmpgtd	%xmm1,%xmm0
1148	pxor	%xmm2,%xmm1
1149	pshufd	$19,%xmm0,%xmm2
1150	pxor	%xmm0,%xmm0
1151	movdqa	%xmm1,16(%esp)
1152	paddq	%xmm1,%xmm1
1153	pand	%xmm3,%xmm2
1154	pcmpgtd	%xmm1,%xmm0
1155	pxor	%xmm2,%xmm1
1156	pshufd	$19,%xmm0,%xmm2
1157	pxor	%xmm0,%xmm0
1158	movdqa	%xmm1,32(%esp)
1159	paddq	%xmm1,%xmm1
1160	pand	%xmm3,%xmm2
1161	pcmpgtd	%xmm1,%xmm0
1162	pxor	%xmm2,%xmm1
1163	pshufd	$19,%xmm0,%xmm2
1164	pxor	%xmm0,%xmm0
1165	movdqa	%xmm1,48(%esp)
1166	paddq	%xmm1,%xmm1
1167	pand	%xmm3,%xmm2
1168	pcmpgtd	%xmm1,%xmm0
1169	pxor	%xmm2,%xmm1
1170	pshufd	$19,%xmm0,%xmm7
1171	movdqa	%xmm1,64(%esp)
1172	paddq	%xmm1,%xmm1
1173	movups	(%ebp),%xmm0
1174	pand	%xmm3,%xmm7
1175	movups	(%esi),%xmm2
1176	pxor	%xmm1,%xmm7
1177	movl	%ebx,%ecx
1178	movdqu	16(%esi),%xmm3
1179	xorps	%xmm0,%xmm2
1180	movdqu	32(%esi),%xmm4
1181	pxor	%xmm0,%xmm3
1182	movdqu	48(%esi),%xmm5
1183	pxor	%xmm0,%xmm4
1184	movdqu	64(%esi),%xmm6
1185	pxor	%xmm0,%xmm5
1186	movdqu	80(%esi),%xmm1
1187	pxor	%xmm0,%xmm6
1188	leal	96(%esi),%esi
1189	pxor	(%esp),%xmm2
1190	movdqa	%xmm7,80(%esp)
1191	pxor	%xmm1,%xmm7
1192	movups	16(%ebp),%xmm1
1193	pxor	16(%esp),%xmm3
1194	pxor	32(%esp),%xmm4
1195.byte	102,15,56,220,209
1196	pxor	48(%esp),%xmm5
1197	pxor	64(%esp),%xmm6
1198.byte	102,15,56,220,217
1199	pxor	%xmm0,%xmm7
1200	movups	32(%ebp),%xmm0
1201.byte	102,15,56,220,225
1202.byte	102,15,56,220,233
1203.byte	102,15,56,220,241
1204.byte	102,15,56,220,249
1205	call	L_aesni_encrypt6_enter
1206	movdqa	80(%esp),%xmm1
1207	pxor	%xmm0,%xmm0
1208	xorps	(%esp),%xmm2
1209	pcmpgtd	%xmm1,%xmm0
1210	xorps	16(%esp),%xmm3
1211	movups	%xmm2,(%edi)
1212	xorps	32(%esp),%xmm4
1213	movups	%xmm3,16(%edi)
1214	xorps	48(%esp),%xmm5
1215	movups	%xmm4,32(%edi)
1216	xorps	64(%esp),%xmm6
1217	movups	%xmm5,48(%edi)
1218	xorps	%xmm1,%xmm7
1219	movups	%xmm6,64(%edi)
1220	pshufd	$19,%xmm0,%xmm2
1221	movups	%xmm7,80(%edi)
1222	leal	96(%edi),%edi
1223	movdqa	96(%esp),%xmm3
1224	pxor	%xmm0,%xmm0
1225	paddq	%xmm1,%xmm1
1226	pand	%xmm3,%xmm2
1227	pcmpgtd	%xmm1,%xmm0
1228	pxor	%xmm2,%xmm1
1229	subl	$96,%eax
1230	jnc	L048xts_enc_loop6
1231	movl	240(%ebp),%ecx
1232	movl	%ebp,%edx
1233	movl	%ecx,%ebx
1234L047xts_enc_short:
1235	addl	$96,%eax
1236	jz	L049xts_enc_done6x
1237	movdqa	%xmm1,%xmm5
1238	cmpl	$32,%eax
1239	jb	L050xts_enc_one
1240	pshufd	$19,%xmm0,%xmm2
1241	pxor	%xmm0,%xmm0
1242	paddq	%xmm1,%xmm1
1243	pand	%xmm3,%xmm2
1244	pcmpgtd	%xmm1,%xmm0
1245	pxor	%xmm2,%xmm1
1246	je	L051xts_enc_two
1247	pshufd	$19,%xmm0,%xmm2
1248	pxor	%xmm0,%xmm0
1249	movdqa	%xmm1,%xmm6
1250	paddq	%xmm1,%xmm1
1251	pand	%xmm3,%xmm2
1252	pcmpgtd	%xmm1,%xmm0
1253	pxor	%xmm2,%xmm1
1254	cmpl	$64,%eax
1255	jb	L052xts_enc_three
1256	pshufd	$19,%xmm0,%xmm2
1257	pxor	%xmm0,%xmm0
1258	movdqa	%xmm1,%xmm7
1259	paddq	%xmm1,%xmm1
1260	pand	%xmm3,%xmm2
1261	pcmpgtd	%xmm1,%xmm0
1262	pxor	%xmm2,%xmm1
1263	movdqa	%xmm5,(%esp)
1264	movdqa	%xmm6,16(%esp)
1265	je	L053xts_enc_four
1266	movdqa	%xmm7,32(%esp)
1267	pshufd	$19,%xmm0,%xmm7
1268	movdqa	%xmm1,48(%esp)
1269	paddq	%xmm1,%xmm1
1270	pand	%xmm3,%xmm7
1271	pxor	%xmm1,%xmm7
1272	movdqu	(%esi),%xmm2
1273	movdqu	16(%esi),%xmm3
1274	movdqu	32(%esi),%xmm4
1275	pxor	(%esp),%xmm2
1276	movdqu	48(%esi),%xmm5
1277	pxor	16(%esp),%xmm3
1278	movdqu	64(%esi),%xmm6
1279	pxor	32(%esp),%xmm4
1280	leal	80(%esi),%esi
1281	pxor	48(%esp),%xmm5
1282	movdqa	%xmm7,64(%esp)
1283	pxor	%xmm7,%xmm6
1284	call	__aesni_encrypt6
1285	movaps	64(%esp),%xmm1
1286	xorps	(%esp),%xmm2
1287	xorps	16(%esp),%xmm3
1288	xorps	32(%esp),%xmm4
1289	movups	%xmm2,(%edi)
1290	xorps	48(%esp),%xmm5
1291	movups	%xmm3,16(%edi)
1292	xorps	%xmm1,%xmm6
1293	movups	%xmm4,32(%edi)
1294	movups	%xmm5,48(%edi)
1295	movups	%xmm6,64(%edi)
1296	leal	80(%edi),%edi
1297	jmp	L054xts_enc_done
1298.align	4,0x90
1299L050xts_enc_one:
1300	movups	(%esi),%xmm2
1301	leal	16(%esi),%esi
1302	xorps	%xmm5,%xmm2
1303	movups	(%edx),%xmm0
1304	movups	16(%edx),%xmm1
1305	leal	32(%edx),%edx
1306	xorps	%xmm0,%xmm2
1307L055enc1_loop_9:
1308.byte	102,15,56,220,209
1309	decl	%ecx
1310	movups	(%edx),%xmm1
1311	leal	16(%edx),%edx
1312	jnz	L055enc1_loop_9
1313.byte	102,15,56,221,209
1314	xorps	%xmm5,%xmm2
1315	movups	%xmm2,(%edi)
1316	leal	16(%edi),%edi
1317	movdqa	%xmm5,%xmm1
1318	jmp	L054xts_enc_done
1319.align	4,0x90
1320L051xts_enc_two:
1321	movaps	%xmm1,%xmm6
1322	movups	(%esi),%xmm2
1323	movups	16(%esi),%xmm3
1324	leal	32(%esi),%esi
1325	xorps	%xmm5,%xmm2
1326	xorps	%xmm6,%xmm3
1327	call	__aesni_encrypt2
1328	xorps	%xmm5,%xmm2
1329	xorps	%xmm6,%xmm3
1330	movups	%xmm2,(%edi)
1331	movups	%xmm3,16(%edi)
1332	leal	32(%edi),%edi
1333	movdqa	%xmm6,%xmm1
1334	jmp	L054xts_enc_done
1335.align	4,0x90
1336L052xts_enc_three:
1337	movaps	%xmm1,%xmm7
1338	movups	(%esi),%xmm2
1339	movups	16(%esi),%xmm3
1340	movups	32(%esi),%xmm4
1341	leal	48(%esi),%esi
1342	xorps	%xmm5,%xmm2
1343	xorps	%xmm6,%xmm3
1344	xorps	%xmm7,%xmm4
1345	call	__aesni_encrypt3
1346	xorps	%xmm5,%xmm2
1347	xorps	%xmm6,%xmm3
1348	xorps	%xmm7,%xmm4
1349	movups	%xmm2,(%edi)
1350	movups	%xmm3,16(%edi)
1351	movups	%xmm4,32(%edi)
1352	leal	48(%edi),%edi
1353	movdqa	%xmm7,%xmm1
1354	jmp	L054xts_enc_done
1355.align	4,0x90
1356L053xts_enc_four:
1357	movaps	%xmm1,%xmm6
1358	movups	(%esi),%xmm2
1359	movups	16(%esi),%xmm3
1360	movups	32(%esi),%xmm4
1361	xorps	(%esp),%xmm2
1362	movups	48(%esi),%xmm5
1363	leal	64(%esi),%esi
1364	xorps	16(%esp),%xmm3
1365	xorps	%xmm7,%xmm4
1366	xorps	%xmm6,%xmm5
1367	call	__aesni_encrypt4
1368	xorps	(%esp),%xmm2
1369	xorps	16(%esp),%xmm3
1370	xorps	%xmm7,%xmm4
1371	movups	%xmm2,(%edi)
1372	xorps	%xmm6,%xmm5
1373	movups	%xmm3,16(%edi)
1374	movups	%xmm4,32(%edi)
1375	movups	%xmm5,48(%edi)
1376	leal	64(%edi),%edi
1377	movdqa	%xmm6,%xmm1
1378	jmp	L054xts_enc_done
1379.align	4,0x90
1380L049xts_enc_done6x:
1381	movl	112(%esp),%eax
1382	andl	$15,%eax
1383	jz	L056xts_enc_ret
1384	movdqa	%xmm1,%xmm5
1385	movl	%eax,112(%esp)
1386	jmp	L057xts_enc_steal
1387.align	4,0x90
1388L054xts_enc_done:
1389	movl	112(%esp),%eax
1390	pxor	%xmm0,%xmm0
1391	andl	$15,%eax
1392	jz	L056xts_enc_ret
1393	pcmpgtd	%xmm1,%xmm0
1394	movl	%eax,112(%esp)
1395	pshufd	$19,%xmm0,%xmm5
1396	paddq	%xmm1,%xmm1
1397	pand	96(%esp),%xmm5
1398	pxor	%xmm1,%xmm5
1399L057xts_enc_steal:
1400	movzbl	(%esi),%ecx
1401	movzbl	-16(%edi),%edx
1402	leal	1(%esi),%esi
1403	movb	%cl,-16(%edi)
1404	movb	%dl,(%edi)
1405	leal	1(%edi),%edi
1406	subl	$1,%eax
1407	jnz	L057xts_enc_steal
1408	subl	112(%esp),%edi
1409	movl	%ebp,%edx
1410	movl	%ebx,%ecx
1411	movups	-16(%edi),%xmm2
1412	xorps	%xmm5,%xmm2
1413	movups	(%edx),%xmm0
1414	movups	16(%edx),%xmm1
1415	leal	32(%edx),%edx
1416	xorps	%xmm0,%xmm2
1417L058enc1_loop_10:
1418.byte	102,15,56,220,209
1419	decl	%ecx
1420	movups	(%edx),%xmm1
1421	leal	16(%edx),%edx
1422	jnz	L058enc1_loop_10
1423.byte	102,15,56,221,209
1424	xorps	%xmm5,%xmm2
1425	movups	%xmm2,-16(%edi)
1426L056xts_enc_ret:
1427	pxor	%xmm0,%xmm0
1428	pxor	%xmm1,%xmm1
1429	pxor	%xmm2,%xmm2
1430	movdqa	%xmm0,(%esp)
1431	pxor	%xmm3,%xmm3
1432	movdqa	%xmm0,16(%esp)
1433	pxor	%xmm4,%xmm4
1434	movdqa	%xmm0,32(%esp)
1435	pxor	%xmm5,%xmm5
1436	movdqa	%xmm0,48(%esp)
1437	pxor	%xmm6,%xmm6
1438	movdqa	%xmm0,64(%esp)
1439	pxor	%xmm7,%xmm7
1440	movdqa	%xmm0,80(%esp)
1441	movl	116(%esp),%esp
1442	popl	%edi
1443	popl	%esi
1444	popl	%ebx
1445	popl	%ebp
1446	ret
1447.globl	_aesni_xts_decrypt
1448.align	4
1449_aesni_xts_decrypt:
1450L_aesni_xts_decrypt_begin:
1451	%ifdef __CET__
1452
1453.byte	243,15,30,251
1454	%endif
1455
1456	pushl	%ebp
1457	pushl	%ebx
1458	pushl	%esi
1459	pushl	%edi
1460	movl	36(%esp),%edx
1461	movl	40(%esp),%esi
1462	movl	240(%edx),%ecx
1463	movups	(%esi),%xmm2
1464	movups	(%edx),%xmm0
1465	movups	16(%edx),%xmm1
1466	leal	32(%edx),%edx
1467	xorps	%xmm0,%xmm2
1468L059enc1_loop_11:
1469.byte	102,15,56,220,209
1470	decl	%ecx
1471	movups	(%edx),%xmm1
1472	leal	16(%edx),%edx
1473	jnz	L059enc1_loop_11
1474.byte	102,15,56,221,209
1475	movl	20(%esp),%esi
1476	movl	24(%esp),%edi
1477	movl	28(%esp),%eax
1478	movl	32(%esp),%edx
1479	movl	%esp,%ebp
1480	subl	$120,%esp
1481	andl	$-16,%esp
1482	xorl	%ebx,%ebx
1483	testl	$15,%eax
1484	setnz	%bl
1485	shll	$4,%ebx
1486	subl	%ebx,%eax
1487	movl	$135,96(%esp)
1488	movl	$0,100(%esp)
1489	movl	$1,104(%esp)
1490	movl	$0,108(%esp)
1491	movl	%eax,112(%esp)
1492	movl	%ebp,116(%esp)
1493	movl	240(%edx),%ecx
1494	movl	%edx,%ebp
1495	movl	%ecx,%ebx
1496	movdqa	%xmm2,%xmm1
1497	pxor	%xmm0,%xmm0
1498	movdqa	96(%esp),%xmm3
1499	pcmpgtd	%xmm1,%xmm0
1500	andl	$-16,%eax
1501	subl	$96,%eax
1502	jc	L060xts_dec_short
1503	shll	$4,%ecx
1504	movl	$16,%ebx
1505	subl	%ecx,%ebx
1506	leal	32(%edx,%ecx,1),%edx
1507	jmp	L061xts_dec_loop6
1508.align	4,0x90
1509L061xts_dec_loop6:
1510	pshufd	$19,%xmm0,%xmm2
1511	pxor	%xmm0,%xmm0
1512	movdqa	%xmm1,(%esp)
1513	paddq	%xmm1,%xmm1
1514	pand	%xmm3,%xmm2
1515	pcmpgtd	%xmm1,%xmm0
1516	pxor	%xmm2,%xmm1
1517	pshufd	$19,%xmm0,%xmm2
1518	pxor	%xmm0,%xmm0
1519	movdqa	%xmm1,16(%esp)
1520	paddq	%xmm1,%xmm1
1521	pand	%xmm3,%xmm2
1522	pcmpgtd	%xmm1,%xmm0
1523	pxor	%xmm2,%xmm1
1524	pshufd	$19,%xmm0,%xmm2
1525	pxor	%xmm0,%xmm0
1526	movdqa	%xmm1,32(%esp)
1527	paddq	%xmm1,%xmm1
1528	pand	%xmm3,%xmm2
1529	pcmpgtd	%xmm1,%xmm0
1530	pxor	%xmm2,%xmm1
1531	pshufd	$19,%xmm0,%xmm2
1532	pxor	%xmm0,%xmm0
1533	movdqa	%xmm1,48(%esp)
1534	paddq	%xmm1,%xmm1
1535	pand	%xmm3,%xmm2
1536	pcmpgtd	%xmm1,%xmm0
1537	pxor	%xmm2,%xmm1
1538	pshufd	$19,%xmm0,%xmm7
1539	movdqa	%xmm1,64(%esp)
1540	paddq	%xmm1,%xmm1
1541	movups	(%ebp),%xmm0
1542	pand	%xmm3,%xmm7
1543	movups	(%esi),%xmm2
1544	pxor	%xmm1,%xmm7
1545	movl	%ebx,%ecx
1546	movdqu	16(%esi),%xmm3
1547	xorps	%xmm0,%xmm2
1548	movdqu	32(%esi),%xmm4
1549	pxor	%xmm0,%xmm3
1550	movdqu	48(%esi),%xmm5
1551	pxor	%xmm0,%xmm4
1552	movdqu	64(%esi),%xmm6
1553	pxor	%xmm0,%xmm5
1554	movdqu	80(%esi),%xmm1
1555	pxor	%xmm0,%xmm6
1556	leal	96(%esi),%esi
1557	pxor	(%esp),%xmm2
1558	movdqa	%xmm7,80(%esp)
1559	pxor	%xmm1,%xmm7
1560	movups	16(%ebp),%xmm1
1561	pxor	16(%esp),%xmm3
1562	pxor	32(%esp),%xmm4
1563.byte	102,15,56,222,209
1564	pxor	48(%esp),%xmm5
1565	pxor	64(%esp),%xmm6
1566.byte	102,15,56,222,217
1567	pxor	%xmm0,%xmm7
1568	movups	32(%ebp),%xmm0
1569.byte	102,15,56,222,225
1570.byte	102,15,56,222,233
1571.byte	102,15,56,222,241
1572.byte	102,15,56,222,249
1573	call	L_aesni_decrypt6_enter
1574	movdqa	80(%esp),%xmm1
1575	pxor	%xmm0,%xmm0
1576	xorps	(%esp),%xmm2
1577	pcmpgtd	%xmm1,%xmm0
1578	xorps	16(%esp),%xmm3
1579	movups	%xmm2,(%edi)
1580	xorps	32(%esp),%xmm4
1581	movups	%xmm3,16(%edi)
1582	xorps	48(%esp),%xmm5
1583	movups	%xmm4,32(%edi)
1584	xorps	64(%esp),%xmm6
1585	movups	%xmm5,48(%edi)
1586	xorps	%xmm1,%xmm7
1587	movups	%xmm6,64(%edi)
1588	pshufd	$19,%xmm0,%xmm2
1589	movups	%xmm7,80(%edi)
1590	leal	96(%edi),%edi
1591	movdqa	96(%esp),%xmm3
1592	pxor	%xmm0,%xmm0
1593	paddq	%xmm1,%xmm1
1594	pand	%xmm3,%xmm2
1595	pcmpgtd	%xmm1,%xmm0
1596	pxor	%xmm2,%xmm1
1597	subl	$96,%eax
1598	jnc	L061xts_dec_loop6
1599	movl	240(%ebp),%ecx
1600	movl	%ebp,%edx
1601	movl	%ecx,%ebx
1602L060xts_dec_short:
1603	addl	$96,%eax
1604	jz	L062xts_dec_done6x
1605	movdqa	%xmm1,%xmm5
1606	cmpl	$32,%eax
1607	jb	L063xts_dec_one
1608	pshufd	$19,%xmm0,%xmm2
1609	pxor	%xmm0,%xmm0
1610	paddq	%xmm1,%xmm1
1611	pand	%xmm3,%xmm2
1612	pcmpgtd	%xmm1,%xmm0
1613	pxor	%xmm2,%xmm1
1614	je	L064xts_dec_two
1615	pshufd	$19,%xmm0,%xmm2
1616	pxor	%xmm0,%xmm0
1617	movdqa	%xmm1,%xmm6
1618	paddq	%xmm1,%xmm1
1619	pand	%xmm3,%xmm2
1620	pcmpgtd	%xmm1,%xmm0
1621	pxor	%xmm2,%xmm1
1622	cmpl	$64,%eax
1623	jb	L065xts_dec_three
1624	pshufd	$19,%xmm0,%xmm2
1625	pxor	%xmm0,%xmm0
1626	movdqa	%xmm1,%xmm7
1627	paddq	%xmm1,%xmm1
1628	pand	%xmm3,%xmm2
1629	pcmpgtd	%xmm1,%xmm0
1630	pxor	%xmm2,%xmm1
1631	movdqa	%xmm5,(%esp)
1632	movdqa	%xmm6,16(%esp)
1633	je	L066xts_dec_four
1634	movdqa	%xmm7,32(%esp)
1635	pshufd	$19,%xmm0,%xmm7
1636	movdqa	%xmm1,48(%esp)
1637	paddq	%xmm1,%xmm1
1638	pand	%xmm3,%xmm7
1639	pxor	%xmm1,%xmm7
1640	movdqu	(%esi),%xmm2
1641	movdqu	16(%esi),%xmm3
1642	movdqu	32(%esi),%xmm4
1643	pxor	(%esp),%xmm2
1644	movdqu	48(%esi),%xmm5
1645	pxor	16(%esp),%xmm3
1646	movdqu	64(%esi),%xmm6
1647	pxor	32(%esp),%xmm4
1648	leal	80(%esi),%esi
1649	pxor	48(%esp),%xmm5
1650	movdqa	%xmm7,64(%esp)
1651	pxor	%xmm7,%xmm6
1652	call	__aesni_decrypt6
1653	movaps	64(%esp),%xmm1
1654	xorps	(%esp),%xmm2
1655	xorps	16(%esp),%xmm3
1656	xorps	32(%esp),%xmm4
1657	movups	%xmm2,(%edi)
1658	xorps	48(%esp),%xmm5
1659	movups	%xmm3,16(%edi)
1660	xorps	%xmm1,%xmm6
1661	movups	%xmm4,32(%edi)
1662	movups	%xmm5,48(%edi)
1663	movups	%xmm6,64(%edi)
1664	leal	80(%edi),%edi
1665	jmp	L067xts_dec_done
1666.align	4,0x90
1667L063xts_dec_one:
1668	movups	(%esi),%xmm2
1669	leal	16(%esi),%esi
1670	xorps	%xmm5,%xmm2
1671	movups	(%edx),%xmm0
1672	movups	16(%edx),%xmm1
1673	leal	32(%edx),%edx
1674	xorps	%xmm0,%xmm2
1675L068dec1_loop_12:
1676.byte	102,15,56,222,209
1677	decl	%ecx
1678	movups	(%edx),%xmm1
1679	leal	16(%edx),%edx
1680	jnz	L068dec1_loop_12
1681.byte	102,15,56,223,209
1682	xorps	%xmm5,%xmm2
1683	movups	%xmm2,(%edi)
1684	leal	16(%edi),%edi
1685	movdqa	%xmm5,%xmm1
1686	jmp	L067xts_dec_done
1687.align	4,0x90
1688L064xts_dec_two:
1689	movaps	%xmm1,%xmm6
1690	movups	(%esi),%xmm2
1691	movups	16(%esi),%xmm3
1692	leal	32(%esi),%esi
1693	xorps	%xmm5,%xmm2
1694	xorps	%xmm6,%xmm3
1695	call	__aesni_decrypt2
1696	xorps	%xmm5,%xmm2
1697	xorps	%xmm6,%xmm3
1698	movups	%xmm2,(%edi)
1699	movups	%xmm3,16(%edi)
1700	leal	32(%edi),%edi
1701	movdqa	%xmm6,%xmm1
1702	jmp	L067xts_dec_done
1703.align	4,0x90
1704L065xts_dec_three:
1705	movaps	%xmm1,%xmm7
1706	movups	(%esi),%xmm2
1707	movups	16(%esi),%xmm3
1708	movups	32(%esi),%xmm4
1709	leal	48(%esi),%esi
1710	xorps	%xmm5,%xmm2
1711	xorps	%xmm6,%xmm3
1712	xorps	%xmm7,%xmm4
1713	call	__aesni_decrypt3
1714	xorps	%xmm5,%xmm2
1715	xorps	%xmm6,%xmm3
1716	xorps	%xmm7,%xmm4
1717	movups	%xmm2,(%edi)
1718	movups	%xmm3,16(%edi)
1719	movups	%xmm4,32(%edi)
1720	leal	48(%edi),%edi
1721	movdqa	%xmm7,%xmm1
1722	jmp	L067xts_dec_done
1723.align	4,0x90
1724L066xts_dec_four:
1725	movaps	%xmm1,%xmm6
1726	movups	(%esi),%xmm2
1727	movups	16(%esi),%xmm3
1728	movups	32(%esi),%xmm4
1729	xorps	(%esp),%xmm2
1730	movups	48(%esi),%xmm5
1731	leal	64(%esi),%esi
1732	xorps	16(%esp),%xmm3
1733	xorps	%xmm7,%xmm4
1734	xorps	%xmm6,%xmm5
1735	call	__aesni_decrypt4
1736	xorps	(%esp),%xmm2
1737	xorps	16(%esp),%xmm3
1738	xorps	%xmm7,%xmm4
1739	movups	%xmm2,(%edi)
1740	xorps	%xmm6,%xmm5
1741	movups	%xmm3,16(%edi)
1742	movups	%xmm4,32(%edi)
1743	movups	%xmm5,48(%edi)
1744	leal	64(%edi),%edi
1745	movdqa	%xmm6,%xmm1
1746	jmp	L067xts_dec_done
1747.align	4,0x90
1748L062xts_dec_done6x:
1749	movl	112(%esp),%eax
1750	andl	$15,%eax
1751	jz	L069xts_dec_ret
1752	movl	%eax,112(%esp)
1753	jmp	L070xts_dec_only_one_more
1754.align	4,0x90
1755L067xts_dec_done:
1756	movl	112(%esp),%eax
1757	pxor	%xmm0,%xmm0
1758	andl	$15,%eax
1759	jz	L069xts_dec_ret
1760	pcmpgtd	%xmm1,%xmm0
1761	movl	%eax,112(%esp)
1762	pshufd	$19,%xmm0,%xmm2
1763	pxor	%xmm0,%xmm0
1764	movdqa	96(%esp),%xmm3
1765	paddq	%xmm1,%xmm1
1766	pand	%xmm3,%xmm2
1767	pcmpgtd	%xmm1,%xmm0
1768	pxor	%xmm2,%xmm1
1769L070xts_dec_only_one_more:
1770	pshufd	$19,%xmm0,%xmm5
1771	movdqa	%xmm1,%xmm6
1772	paddq	%xmm1,%xmm1
1773	pand	%xmm3,%xmm5
1774	pxor	%xmm1,%xmm5
1775	movl	%ebp,%edx
1776	movl	%ebx,%ecx
1777	movups	(%esi),%xmm2
1778	xorps	%xmm5,%xmm2
1779	movups	(%edx),%xmm0
1780	movups	16(%edx),%xmm1
1781	leal	32(%edx),%edx
1782	xorps	%xmm0,%xmm2
1783L071dec1_loop_13:
1784.byte	102,15,56,222,209
1785	decl	%ecx
1786	movups	(%edx),%xmm1
1787	leal	16(%edx),%edx
1788	jnz	L071dec1_loop_13
1789.byte	102,15,56,223,209
1790	xorps	%xmm5,%xmm2
1791	movups	%xmm2,(%edi)
1792L072xts_dec_steal:
1793	movzbl	16(%esi),%ecx
1794	movzbl	(%edi),%edx
1795	leal	1(%esi),%esi
1796	movb	%cl,(%edi)
1797	movb	%dl,16(%edi)
1798	leal	1(%edi),%edi
1799	subl	$1,%eax
1800	jnz	L072xts_dec_steal
1801	subl	112(%esp),%edi
1802	movl	%ebp,%edx
1803	movl	%ebx,%ecx
1804	movups	(%edi),%xmm2
1805	xorps	%xmm6,%xmm2
1806	movups	(%edx),%xmm0
1807	movups	16(%edx),%xmm1
1808	leal	32(%edx),%edx
1809	xorps	%xmm0,%xmm2
1810L073dec1_loop_14:
1811.byte	102,15,56,222,209
1812	decl	%ecx
1813	movups	(%edx),%xmm1
1814	leal	16(%edx),%edx
1815	jnz	L073dec1_loop_14
1816.byte	102,15,56,223,209
1817	xorps	%xmm6,%xmm2
1818	movups	%xmm2,(%edi)
1819L069xts_dec_ret:
1820	pxor	%xmm0,%xmm0
1821	pxor	%xmm1,%xmm1
1822	pxor	%xmm2,%xmm2
1823	movdqa	%xmm0,(%esp)
1824	pxor	%xmm3,%xmm3
1825	movdqa	%xmm0,16(%esp)
1826	pxor	%xmm4,%xmm4
1827	movdqa	%xmm0,32(%esp)
1828	pxor	%xmm5,%xmm5
1829	movdqa	%xmm0,48(%esp)
1830	pxor	%xmm6,%xmm6
1831	movdqa	%xmm0,64(%esp)
1832	pxor	%xmm7,%xmm7
1833	movdqa	%xmm0,80(%esp)
1834	movl	116(%esp),%esp
1835	popl	%edi
1836	popl	%esi
1837	popl	%ebx
1838	popl	%ebp
1839	ret
1840.globl	_aesni_ocb_encrypt
1841.align	4
1842_aesni_ocb_encrypt:
1843L_aesni_ocb_encrypt_begin:
1844	%ifdef __CET__
1845
1846.byte	243,15,30,251
1847	%endif
1848
1849	pushl	%ebp
1850	pushl	%ebx
1851	pushl	%esi
1852	pushl	%edi
1853	movl	40(%esp),%ecx
1854	movl	48(%esp),%ebx
1855	movl	20(%esp),%esi
1856	movl	24(%esp),%edi
1857	movl	28(%esp),%eax
1858	movl	32(%esp),%edx
1859	movdqu	(%ecx),%xmm0
1860	movl	36(%esp),%ebp
1861	movdqu	(%ebx),%xmm1
1862	movl	44(%esp),%ebx
1863	movl	%esp,%ecx
1864	subl	$132,%esp
1865	andl	$-16,%esp
1866	subl	%esi,%edi
1867	shll	$4,%eax
1868	leal	-96(%esi,%eax,1),%eax
1869	movl	%edi,120(%esp)
1870	movl	%eax,124(%esp)
1871	movl	%ecx,128(%esp)
1872	movl	240(%edx),%ecx
1873	testl	$1,%ebp
1874	jnz	L074odd
1875	bsfl	%ebp,%eax
1876	addl	$1,%ebp
1877	shll	$4,%eax
1878	movdqu	(%ebx,%eax,1),%xmm7
1879	movl	%edx,%eax
1880	movdqu	(%esi),%xmm2
1881	leal	16(%esi),%esi
1882	pxor	%xmm0,%xmm7
1883	pxor	%xmm2,%xmm1
1884	pxor	%xmm7,%xmm2
1885	movdqa	%xmm1,%xmm6
1886	movups	(%edx),%xmm0
1887	movups	16(%edx),%xmm1
1888	leal	32(%edx),%edx
1889	xorps	%xmm0,%xmm2
1890L075enc1_loop_15:
1891.byte	102,15,56,220,209
1892	decl	%ecx
1893	movups	(%edx),%xmm1
1894	leal	16(%edx),%edx
1895	jnz	L075enc1_loop_15
1896.byte	102,15,56,221,209
1897	xorps	%xmm7,%xmm2
1898	movdqa	%xmm7,%xmm0
1899	movdqa	%xmm6,%xmm1
1900	movups	%xmm2,-16(%edi,%esi,1)
1901	movl	240(%eax),%ecx
1902	movl	%eax,%edx
1903	movl	124(%esp),%eax
1904L074odd:
1905	shll	$4,%ecx
1906	movl	$16,%edi
1907	subl	%ecx,%edi
1908	movl	%edx,112(%esp)
1909	leal	32(%edx,%ecx,1),%edx
1910	movl	%edi,116(%esp)
1911	cmpl	%eax,%esi
1912	ja	L076short
1913	jmp	L077grandloop
1914.align	5,0x90
1915L077grandloop:
1916	leal	1(%ebp),%ecx
1917	leal	3(%ebp),%eax
1918	leal	5(%ebp),%edi
1919	addl	$6,%ebp
1920	bsfl	%ecx,%ecx
1921	bsfl	%eax,%eax
1922	bsfl	%edi,%edi
1923	shll	$4,%ecx
1924	shll	$4,%eax
1925	shll	$4,%edi
1926	movdqu	(%ebx),%xmm2
1927	movdqu	(%ebx,%ecx,1),%xmm3
1928	movl	116(%esp),%ecx
1929	movdqa	%xmm2,%xmm4
1930	movdqu	(%ebx,%eax,1),%xmm5
1931	movdqa	%xmm2,%xmm6
1932	movdqu	(%ebx,%edi,1),%xmm7
1933	pxor	%xmm0,%xmm2
1934	pxor	%xmm2,%xmm3
1935	movdqa	%xmm2,(%esp)
1936	pxor	%xmm3,%xmm4
1937	movdqa	%xmm3,16(%esp)
1938	pxor	%xmm4,%xmm5
1939	movdqa	%xmm4,32(%esp)
1940	pxor	%xmm5,%xmm6
1941	movdqa	%xmm5,48(%esp)
1942	pxor	%xmm6,%xmm7
1943	movdqa	%xmm6,64(%esp)
1944	movdqa	%xmm7,80(%esp)
1945	movups	-48(%edx,%ecx,1),%xmm0
1946	movdqu	(%esi),%xmm2
1947	movdqu	16(%esi),%xmm3
1948	movdqu	32(%esi),%xmm4
1949	movdqu	48(%esi),%xmm5
1950	movdqu	64(%esi),%xmm6
1951	movdqu	80(%esi),%xmm7
1952	leal	96(%esi),%esi
1953	pxor	%xmm2,%xmm1
1954	pxor	%xmm0,%xmm2
1955	pxor	%xmm3,%xmm1
1956	pxor	%xmm0,%xmm3
1957	pxor	%xmm4,%xmm1
1958	pxor	%xmm0,%xmm4
1959	pxor	%xmm5,%xmm1
1960	pxor	%xmm0,%xmm5
1961	pxor	%xmm6,%xmm1
1962	pxor	%xmm0,%xmm6
1963	pxor	%xmm7,%xmm1
1964	pxor	%xmm0,%xmm7
1965	movdqa	%xmm1,96(%esp)
1966	movups	-32(%edx,%ecx,1),%xmm1
1967	pxor	(%esp),%xmm2
1968	pxor	16(%esp),%xmm3
1969	pxor	32(%esp),%xmm4
1970	pxor	48(%esp),%xmm5
1971	pxor	64(%esp),%xmm6
1972	pxor	80(%esp),%xmm7
1973	movups	-16(%edx,%ecx,1),%xmm0
1974.byte	102,15,56,220,209
1975.byte	102,15,56,220,217
1976.byte	102,15,56,220,225
1977.byte	102,15,56,220,233
1978.byte	102,15,56,220,241
1979.byte	102,15,56,220,249
1980	movl	120(%esp),%edi
1981	movl	124(%esp),%eax
1982	call	L_aesni_encrypt6_enter
1983	movdqa	80(%esp),%xmm0
1984	pxor	(%esp),%xmm2
1985	pxor	16(%esp),%xmm3
1986	pxor	32(%esp),%xmm4
1987	pxor	48(%esp),%xmm5
1988	pxor	64(%esp),%xmm6
1989	pxor	%xmm0,%xmm7
1990	movdqa	96(%esp),%xmm1
1991	movdqu	%xmm2,-96(%edi,%esi,1)
1992	movdqu	%xmm3,-80(%edi,%esi,1)
1993	movdqu	%xmm4,-64(%edi,%esi,1)
1994	movdqu	%xmm5,-48(%edi,%esi,1)
1995	movdqu	%xmm6,-32(%edi,%esi,1)
1996	movdqu	%xmm7,-16(%edi,%esi,1)
1997	cmpl	%eax,%esi
1998	jbe	L077grandloop
1999L076short:
2000	addl	$96,%eax
2001	subl	%esi,%eax
2002	jz	L078done
2003	cmpl	$32,%eax
2004	jb	L079one
2005	je	L080two
2006	cmpl	$64,%eax
2007	jb	L081three
2008	je	L082four
2009	leal	1(%ebp),%ecx
2010	leal	3(%ebp),%eax
2011	bsfl	%ecx,%ecx
2012	bsfl	%eax,%eax
2013	shll	$4,%ecx
2014	shll	$4,%eax
2015	movdqu	(%ebx),%xmm2
2016	movdqu	(%ebx,%ecx,1),%xmm3
2017	movl	116(%esp),%ecx
2018	movdqa	%xmm2,%xmm4
2019	movdqu	(%ebx,%eax,1),%xmm5
2020	movdqa	%xmm2,%xmm6
2021	pxor	%xmm0,%xmm2
2022	pxor	%xmm2,%xmm3
2023	movdqa	%xmm2,(%esp)
2024	pxor	%xmm3,%xmm4
2025	movdqa	%xmm3,16(%esp)
2026	pxor	%xmm4,%xmm5
2027	movdqa	%xmm4,32(%esp)
2028	pxor	%xmm5,%xmm6
2029	movdqa	%xmm5,48(%esp)
2030	pxor	%xmm6,%xmm7
2031	movdqa	%xmm6,64(%esp)
2032	movups	-48(%edx,%ecx,1),%xmm0
2033	movdqu	(%esi),%xmm2
2034	movdqu	16(%esi),%xmm3
2035	movdqu	32(%esi),%xmm4
2036	movdqu	48(%esi),%xmm5
2037	movdqu	64(%esi),%xmm6
2038	pxor	%xmm7,%xmm7
2039	pxor	%xmm2,%xmm1
2040	pxor	%xmm0,%xmm2
2041	pxor	%xmm3,%xmm1
2042	pxor	%xmm0,%xmm3
2043	pxor	%xmm4,%xmm1
2044	pxor	%xmm0,%xmm4
2045	pxor	%xmm5,%xmm1
2046	pxor	%xmm0,%xmm5
2047	pxor	%xmm6,%xmm1
2048	pxor	%xmm0,%xmm6
2049	movdqa	%xmm1,96(%esp)
2050	movups	-32(%edx,%ecx,1),%xmm1
2051	pxor	(%esp),%xmm2
2052	pxor	16(%esp),%xmm3
2053	pxor	32(%esp),%xmm4
2054	pxor	48(%esp),%xmm5
2055	pxor	64(%esp),%xmm6
2056	movups	-16(%edx,%ecx,1),%xmm0
2057.byte	102,15,56,220,209
2058.byte	102,15,56,220,217
2059.byte	102,15,56,220,225
2060.byte	102,15,56,220,233
2061.byte	102,15,56,220,241
2062.byte	102,15,56,220,249
2063	movl	120(%esp),%edi
2064	call	L_aesni_encrypt6_enter
2065	movdqa	64(%esp),%xmm0
2066	pxor	(%esp),%xmm2
2067	pxor	16(%esp),%xmm3
2068	pxor	32(%esp),%xmm4
2069	pxor	48(%esp),%xmm5
2070	pxor	%xmm0,%xmm6
2071	movdqa	96(%esp),%xmm1
2072	movdqu	%xmm2,(%edi,%esi,1)
2073	movdqu	%xmm3,16(%edi,%esi,1)
2074	movdqu	%xmm4,32(%edi,%esi,1)
2075	movdqu	%xmm5,48(%edi,%esi,1)
2076	movdqu	%xmm6,64(%edi,%esi,1)
2077	jmp	L078done
2078.align	4,0x90
2079L079one:
2080	movdqu	(%ebx),%xmm7
2081	movl	112(%esp),%edx
2082	movdqu	(%esi),%xmm2
2083	movl	240(%edx),%ecx
2084	pxor	%xmm0,%xmm7
2085	pxor	%xmm2,%xmm1
2086	pxor	%xmm7,%xmm2
2087	movdqa	%xmm1,%xmm6
2088	movl	120(%esp),%edi
2089	movups	(%edx),%xmm0
2090	movups	16(%edx),%xmm1
2091	leal	32(%edx),%edx
2092	xorps	%xmm0,%xmm2
2093L083enc1_loop_16:
2094.byte	102,15,56,220,209
2095	decl	%ecx
2096	movups	(%edx),%xmm1
2097	leal	16(%edx),%edx
2098	jnz	L083enc1_loop_16
2099.byte	102,15,56,221,209
2100	xorps	%xmm7,%xmm2
2101	movdqa	%xmm7,%xmm0
2102	movdqa	%xmm6,%xmm1
2103	movups	%xmm2,(%edi,%esi,1)
2104	jmp	L078done
2105.align	4,0x90
2106L080two:
2107	leal	1(%ebp),%ecx
2108	movl	112(%esp),%edx
2109	bsfl	%ecx,%ecx
2110	shll	$4,%ecx
2111	movdqu	(%ebx),%xmm6
2112	movdqu	(%ebx,%ecx,1),%xmm7
2113	movdqu	(%esi),%xmm2
2114	movdqu	16(%esi),%xmm3
2115	movl	240(%edx),%ecx
2116	pxor	%xmm0,%xmm6
2117	pxor	%xmm6,%xmm7
2118	pxor	%xmm2,%xmm1
2119	pxor	%xmm6,%xmm2
2120	pxor	%xmm3,%xmm1
2121	pxor	%xmm7,%xmm3
2122	movdqa	%xmm1,%xmm5
2123	movl	120(%esp),%edi
2124	call	__aesni_encrypt2
2125	xorps	%xmm6,%xmm2
2126	xorps	%xmm7,%xmm3
2127	movdqa	%xmm7,%xmm0
2128	movdqa	%xmm5,%xmm1
2129	movups	%xmm2,(%edi,%esi,1)
2130	movups	%xmm3,16(%edi,%esi,1)
2131	jmp	L078done
2132.align	4,0x90
2133L081three:
2134	leal	1(%ebp),%ecx
2135	movl	112(%esp),%edx
2136	bsfl	%ecx,%ecx
2137	shll	$4,%ecx
2138	movdqu	(%ebx),%xmm5
2139	movdqu	(%ebx,%ecx,1),%xmm6
2140	movdqa	%xmm5,%xmm7
2141	movdqu	(%esi),%xmm2
2142	movdqu	16(%esi),%xmm3
2143	movdqu	32(%esi),%xmm4
2144	movl	240(%edx),%ecx
2145	pxor	%xmm0,%xmm5
2146	pxor	%xmm5,%xmm6
2147	pxor	%xmm6,%xmm7
2148	pxor	%xmm2,%xmm1
2149	pxor	%xmm5,%xmm2
2150	pxor	%xmm3,%xmm1
2151	pxor	%xmm6,%xmm3
2152	pxor	%xmm4,%xmm1
2153	pxor	%xmm7,%xmm4
2154	movdqa	%xmm1,96(%esp)
2155	movl	120(%esp),%edi
2156	call	__aesni_encrypt3
2157	xorps	%xmm5,%xmm2
2158	xorps	%xmm6,%xmm3
2159	xorps	%xmm7,%xmm4
2160	movdqa	%xmm7,%xmm0
2161	movdqa	96(%esp),%xmm1
2162	movups	%xmm2,(%edi,%esi,1)
2163	movups	%xmm3,16(%edi,%esi,1)
2164	movups	%xmm4,32(%edi,%esi,1)
2165	jmp	L078done
2166.align	4,0x90
2167L082four:
2168	leal	1(%ebp),%ecx
2169	leal	3(%ebp),%eax
2170	bsfl	%ecx,%ecx
2171	bsfl	%eax,%eax
2172	movl	112(%esp),%edx
2173	shll	$4,%ecx
2174	shll	$4,%eax
2175	movdqu	(%ebx),%xmm4
2176	movdqu	(%ebx,%ecx,1),%xmm5
2177	movdqa	%xmm4,%xmm6
2178	movdqu	(%ebx,%eax,1),%xmm7
2179	pxor	%xmm0,%xmm4
2180	movdqu	(%esi),%xmm2
2181	pxor	%xmm4,%xmm5
2182	movdqu	16(%esi),%xmm3
2183	pxor	%xmm5,%xmm6
2184	movdqa	%xmm4,(%esp)
2185	pxor	%xmm6,%xmm7
2186	movdqa	%xmm5,16(%esp)
2187	movdqu	32(%esi),%xmm4
2188	movdqu	48(%esi),%xmm5
2189	movl	240(%edx),%ecx
2190	pxor	%xmm2,%xmm1
2191	pxor	(%esp),%xmm2
2192	pxor	%xmm3,%xmm1
2193	pxor	16(%esp),%xmm3
2194	pxor	%xmm4,%xmm1
2195	pxor	%xmm6,%xmm4
2196	pxor	%xmm5,%xmm1
2197	pxor	%xmm7,%xmm5
2198	movdqa	%xmm1,96(%esp)
2199	movl	120(%esp),%edi
2200	call	__aesni_encrypt4
2201	xorps	(%esp),%xmm2
2202	xorps	16(%esp),%xmm3
2203	xorps	%xmm6,%xmm4
2204	movups	%xmm2,(%edi,%esi,1)
2205	xorps	%xmm7,%xmm5
2206	movups	%xmm3,16(%edi,%esi,1)
2207	movdqa	%xmm7,%xmm0
2208	movups	%xmm4,32(%edi,%esi,1)
2209	movdqa	96(%esp),%xmm1
2210	movups	%xmm5,48(%edi,%esi,1)
2211L078done:
2212	movl	128(%esp),%edx
2213	pxor	%xmm2,%xmm2
2214	pxor	%xmm3,%xmm3
2215	movdqa	%xmm2,(%esp)
2216	pxor	%xmm4,%xmm4
2217	movdqa	%xmm2,16(%esp)
2218	pxor	%xmm5,%xmm5
2219	movdqa	%xmm2,32(%esp)
2220	pxor	%xmm6,%xmm6
2221	movdqa	%xmm2,48(%esp)
2222	pxor	%xmm7,%xmm7
2223	movdqa	%xmm2,64(%esp)
2224	movdqa	%xmm2,80(%esp)
2225	movdqa	%xmm2,96(%esp)
2226	leal	(%edx),%esp
2227	movl	40(%esp),%ecx
2228	movl	48(%esp),%ebx
2229	movdqu	%xmm0,(%ecx)
2230	pxor	%xmm0,%xmm0
2231	movdqu	%xmm1,(%ebx)
2232	pxor	%xmm1,%xmm1
2233	popl	%edi
2234	popl	%esi
2235	popl	%ebx
2236	popl	%ebp
2237	ret
2238.globl	_aesni_ocb_decrypt
2239.align	4
2240_aesni_ocb_decrypt:
2241L_aesni_ocb_decrypt_begin:
2242	%ifdef __CET__
2243
2244.byte	243,15,30,251
2245	%endif
2246
2247	pushl	%ebp
2248	pushl	%ebx
2249	pushl	%esi
2250	pushl	%edi
2251	movl	40(%esp),%ecx
2252	movl	48(%esp),%ebx
2253	movl	20(%esp),%esi
2254	movl	24(%esp),%edi
2255	movl	28(%esp),%eax
2256	movl	32(%esp),%edx
2257	movdqu	(%ecx),%xmm0
2258	movl	36(%esp),%ebp
2259	movdqu	(%ebx),%xmm1
2260	movl	44(%esp),%ebx
2261	movl	%esp,%ecx
2262	subl	$132,%esp
2263	andl	$-16,%esp
2264	subl	%esi,%edi
2265	shll	$4,%eax
2266	leal	-96(%esi,%eax,1),%eax
2267	movl	%edi,120(%esp)
2268	movl	%eax,124(%esp)
2269	movl	%ecx,128(%esp)
2270	movl	240(%edx),%ecx
2271	testl	$1,%ebp
2272	jnz	L084odd
2273	bsfl	%ebp,%eax
2274	addl	$1,%ebp
2275	shll	$4,%eax
2276	movdqu	(%ebx,%eax,1),%xmm7
2277	movl	%edx,%eax
2278	movdqu	(%esi),%xmm2
2279	leal	16(%esi),%esi
2280	pxor	%xmm0,%xmm7
2281	pxor	%xmm7,%xmm2
2282	movdqa	%xmm1,%xmm6
2283	movups	(%edx),%xmm0
2284	movups	16(%edx),%xmm1
2285	leal	32(%edx),%edx
2286	xorps	%xmm0,%xmm2
2287L085dec1_loop_17:
2288.byte	102,15,56,222,209
2289	decl	%ecx
2290	movups	(%edx),%xmm1
2291	leal	16(%edx),%edx
2292	jnz	L085dec1_loop_17
2293.byte	102,15,56,223,209
2294	xorps	%xmm7,%xmm2
2295	movaps	%xmm6,%xmm1
2296	movdqa	%xmm7,%xmm0
2297	xorps	%xmm2,%xmm1
2298	movups	%xmm2,-16(%edi,%esi,1)
2299	movl	240(%eax),%ecx
2300	movl	%eax,%edx
2301	movl	124(%esp),%eax
2302L084odd:
2303	shll	$4,%ecx
2304	movl	$16,%edi
2305	subl	%ecx,%edi
2306	movl	%edx,112(%esp)
2307	leal	32(%edx,%ecx,1),%edx
2308	movl	%edi,116(%esp)
2309	cmpl	%eax,%esi
2310	ja	L086short
2311	jmp	L087grandloop
2312.align	5,0x90
2313L087grandloop:
2314	leal	1(%ebp),%ecx
2315	leal	3(%ebp),%eax
2316	leal	5(%ebp),%edi
2317	addl	$6,%ebp
2318	bsfl	%ecx,%ecx
2319	bsfl	%eax,%eax
2320	bsfl	%edi,%edi
2321	shll	$4,%ecx
2322	shll	$4,%eax
2323	shll	$4,%edi
2324	movdqu	(%ebx),%xmm2
2325	movdqu	(%ebx,%ecx,1),%xmm3
2326	movl	116(%esp),%ecx
2327	movdqa	%xmm2,%xmm4
2328	movdqu	(%ebx,%eax,1),%xmm5
2329	movdqa	%xmm2,%xmm6
2330	movdqu	(%ebx,%edi,1),%xmm7
2331	pxor	%xmm0,%xmm2
2332	pxor	%xmm2,%xmm3
2333	movdqa	%xmm2,(%esp)
2334	pxor	%xmm3,%xmm4
2335	movdqa	%xmm3,16(%esp)
2336	pxor	%xmm4,%xmm5
2337	movdqa	%xmm4,32(%esp)
2338	pxor	%xmm5,%xmm6
2339	movdqa	%xmm5,48(%esp)
2340	pxor	%xmm6,%xmm7
2341	movdqa	%xmm6,64(%esp)
2342	movdqa	%xmm7,80(%esp)
2343	movups	-48(%edx,%ecx,1),%xmm0
2344	movdqu	(%esi),%xmm2
2345	movdqu	16(%esi),%xmm3
2346	movdqu	32(%esi),%xmm4
2347	movdqu	48(%esi),%xmm5
2348	movdqu	64(%esi),%xmm6
2349	movdqu	80(%esi),%xmm7
2350	leal	96(%esi),%esi
2351	movdqa	%xmm1,96(%esp)
2352	pxor	%xmm0,%xmm2
2353	pxor	%xmm0,%xmm3
2354	pxor	%xmm0,%xmm4
2355	pxor	%xmm0,%xmm5
2356	pxor	%xmm0,%xmm6
2357	pxor	%xmm0,%xmm7
2358	movups	-32(%edx,%ecx,1),%xmm1
2359	pxor	(%esp),%xmm2
2360	pxor	16(%esp),%xmm3
2361	pxor	32(%esp),%xmm4
2362	pxor	48(%esp),%xmm5
2363	pxor	64(%esp),%xmm6
2364	pxor	80(%esp),%xmm7
2365	movups	-16(%edx,%ecx,1),%xmm0
2366.byte	102,15,56,222,209
2367.byte	102,15,56,222,217
2368.byte	102,15,56,222,225
2369.byte	102,15,56,222,233
2370.byte	102,15,56,222,241
2371.byte	102,15,56,222,249
2372	movl	120(%esp),%edi
2373	movl	124(%esp),%eax
2374	call	L_aesni_decrypt6_enter
2375	movdqa	80(%esp),%xmm0
2376	pxor	(%esp),%xmm2
2377	movdqa	96(%esp),%xmm1
2378	pxor	16(%esp),%xmm3
2379	pxor	32(%esp),%xmm4
2380	pxor	48(%esp),%xmm5
2381	pxor	64(%esp),%xmm6
2382	pxor	%xmm0,%xmm7
2383	pxor	%xmm2,%xmm1
2384	movdqu	%xmm2,-96(%edi,%esi,1)
2385	pxor	%xmm3,%xmm1
2386	movdqu	%xmm3,-80(%edi,%esi,1)
2387	pxor	%xmm4,%xmm1
2388	movdqu	%xmm4,-64(%edi,%esi,1)
2389	pxor	%xmm5,%xmm1
2390	movdqu	%xmm5,-48(%edi,%esi,1)
2391	pxor	%xmm6,%xmm1
2392	movdqu	%xmm6,-32(%edi,%esi,1)
2393	pxor	%xmm7,%xmm1
2394	movdqu	%xmm7,-16(%edi,%esi,1)
2395	cmpl	%eax,%esi
2396	jbe	L087grandloop
2397L086short:
2398	addl	$96,%eax
2399	subl	%esi,%eax
2400	jz	L088done
2401	cmpl	$32,%eax
2402	jb	L089one
2403	je	L090two
2404	cmpl	$64,%eax
2405	jb	L091three
2406	je	L092four
2407	leal	1(%ebp),%ecx
2408	leal	3(%ebp),%eax
2409	bsfl	%ecx,%ecx
2410	bsfl	%eax,%eax
2411	shll	$4,%ecx
2412	shll	$4,%eax
2413	movdqu	(%ebx),%xmm2
2414	movdqu	(%ebx,%ecx,1),%xmm3
2415	movl	116(%esp),%ecx
2416	movdqa	%xmm2,%xmm4
2417	movdqu	(%ebx,%eax,1),%xmm5
2418	movdqa	%xmm2,%xmm6
2419	pxor	%xmm0,%xmm2
2420	pxor	%xmm2,%xmm3
2421	movdqa	%xmm2,(%esp)
2422	pxor	%xmm3,%xmm4
2423	movdqa	%xmm3,16(%esp)
2424	pxor	%xmm4,%xmm5
2425	movdqa	%xmm4,32(%esp)
2426	pxor	%xmm5,%xmm6
2427	movdqa	%xmm5,48(%esp)
2428	pxor	%xmm6,%xmm7
2429	movdqa	%xmm6,64(%esp)
2430	movups	-48(%edx,%ecx,1),%xmm0
2431	movdqu	(%esi),%xmm2
2432	movdqu	16(%esi),%xmm3
2433	movdqu	32(%esi),%xmm4
2434	movdqu	48(%esi),%xmm5
2435	movdqu	64(%esi),%xmm6
2436	pxor	%xmm7,%xmm7
2437	movdqa	%xmm1,96(%esp)
2438	pxor	%xmm0,%xmm2
2439	pxor	%xmm0,%xmm3
2440	pxor	%xmm0,%xmm4
2441	pxor	%xmm0,%xmm5
2442	pxor	%xmm0,%xmm6
2443	movups	-32(%edx,%ecx,1),%xmm1
2444	pxor	(%esp),%xmm2
2445	pxor	16(%esp),%xmm3
2446	pxor	32(%esp),%xmm4
2447	pxor	48(%esp),%xmm5
2448	pxor	64(%esp),%xmm6
2449	movups	-16(%edx,%ecx,1),%xmm0
2450.byte	102,15,56,222,209
2451.byte	102,15,56,222,217
2452.byte	102,15,56,222,225
2453.byte	102,15,56,222,233
2454.byte	102,15,56,222,241
2455.byte	102,15,56,222,249
2456	movl	120(%esp),%edi
2457	call	L_aesni_decrypt6_enter
2458	movdqa	64(%esp),%xmm0
2459	pxor	(%esp),%xmm2
2460	movdqa	96(%esp),%xmm1
2461	pxor	16(%esp),%xmm3
2462	pxor	32(%esp),%xmm4
2463	pxor	48(%esp),%xmm5
2464	pxor	%xmm0,%xmm6
2465	pxor	%xmm2,%xmm1
2466	movdqu	%xmm2,(%edi,%esi,1)
2467	pxor	%xmm3,%xmm1
2468	movdqu	%xmm3,16(%edi,%esi,1)
2469	pxor	%xmm4,%xmm1
2470	movdqu	%xmm4,32(%edi,%esi,1)
2471	pxor	%xmm5,%xmm1
2472	movdqu	%xmm5,48(%edi,%esi,1)
2473	pxor	%xmm6,%xmm1
2474	movdqu	%xmm6,64(%edi,%esi,1)
2475	jmp	L088done
2476.align	4,0x90
2477L089one:
2478	movdqu	(%ebx),%xmm7
2479	movl	112(%esp),%edx
2480	movdqu	(%esi),%xmm2
2481	movl	240(%edx),%ecx
2482	pxor	%xmm0,%xmm7
2483	pxor	%xmm7,%xmm2
2484	movdqa	%xmm1,%xmm6
2485	movl	120(%esp),%edi
2486	movups	(%edx),%xmm0
2487	movups	16(%edx),%xmm1
2488	leal	32(%edx),%edx
2489	xorps	%xmm0,%xmm2
2490L093dec1_loop_18:
2491.byte	102,15,56,222,209
2492	decl	%ecx
2493	movups	(%edx),%xmm1
2494	leal	16(%edx),%edx
2495	jnz	L093dec1_loop_18
2496.byte	102,15,56,223,209
2497	xorps	%xmm7,%xmm2
2498	movaps	%xmm6,%xmm1
2499	movdqa	%xmm7,%xmm0
2500	xorps	%xmm2,%xmm1
2501	movups	%xmm2,(%edi,%esi,1)
2502	jmp	L088done
2503.align	4,0x90
2504L090two:
2505	leal	1(%ebp),%ecx
2506	movl	112(%esp),%edx
2507	bsfl	%ecx,%ecx
2508	shll	$4,%ecx
2509	movdqu	(%ebx),%xmm6
2510	movdqu	(%ebx,%ecx,1),%xmm7
2511	movdqu	(%esi),%xmm2
2512	movdqu	16(%esi),%xmm3
2513	movl	240(%edx),%ecx
2514	movdqa	%xmm1,%xmm5
2515	pxor	%xmm0,%xmm6
2516	pxor	%xmm6,%xmm7
2517	pxor	%xmm6,%xmm2
2518	pxor	%xmm7,%xmm3
2519	movl	120(%esp),%edi
2520	call	__aesni_decrypt2
2521	xorps	%xmm6,%xmm2
2522	xorps	%xmm7,%xmm3
2523	movdqa	%xmm7,%xmm0
2524	xorps	%xmm2,%xmm5
2525	movups	%xmm2,(%edi,%esi,1)
2526	xorps	%xmm3,%xmm5
2527	movups	%xmm3,16(%edi,%esi,1)
2528	movaps	%xmm5,%xmm1
2529	jmp	L088done
2530.align	4,0x90
2531L091three:
2532	leal	1(%ebp),%ecx
2533	movl	112(%esp),%edx
2534	bsfl	%ecx,%ecx
2535	shll	$4,%ecx
2536	movdqu	(%ebx),%xmm5
2537	movdqu	(%ebx,%ecx,1),%xmm6
2538	movdqa	%xmm5,%xmm7
2539	movdqu	(%esi),%xmm2
2540	movdqu	16(%esi),%xmm3
2541	movdqu	32(%esi),%xmm4
2542	movl	240(%edx),%ecx
2543	movdqa	%xmm1,96(%esp)
2544	pxor	%xmm0,%xmm5
2545	pxor	%xmm5,%xmm6
2546	pxor	%xmm6,%xmm7
2547	pxor	%xmm5,%xmm2
2548	pxor	%xmm6,%xmm3
2549	pxor	%xmm7,%xmm4
2550	movl	120(%esp),%edi
2551	call	__aesni_decrypt3
2552	movdqa	96(%esp),%xmm1
2553	xorps	%xmm5,%xmm2
2554	xorps	%xmm6,%xmm3
2555	xorps	%xmm7,%xmm4
2556	movups	%xmm2,(%edi,%esi,1)
2557	pxor	%xmm2,%xmm1
2558	movdqa	%xmm7,%xmm0
2559	movups	%xmm3,16(%edi,%esi,1)
2560	pxor	%xmm3,%xmm1
2561	movups	%xmm4,32(%edi,%esi,1)
2562	pxor	%xmm4,%xmm1
2563	jmp	L088done
2564.align	4,0x90
2565L092four:
2566	leal	1(%ebp),%ecx
2567	leal	3(%ebp),%eax
2568	bsfl	%ecx,%ecx
2569	bsfl	%eax,%eax
2570	movl	112(%esp),%edx
2571	shll	$4,%ecx
2572	shll	$4,%eax
2573	movdqu	(%ebx),%xmm4
2574	movdqu	(%ebx,%ecx,1),%xmm5
2575	movdqa	%xmm4,%xmm6
2576	movdqu	(%ebx,%eax,1),%xmm7
2577	pxor	%xmm0,%xmm4
2578	movdqu	(%esi),%xmm2
2579	pxor	%xmm4,%xmm5
2580	movdqu	16(%esi),%xmm3
2581	pxor	%xmm5,%xmm6
2582	movdqa	%xmm4,(%esp)
2583	pxor	%xmm6,%xmm7
2584	movdqa	%xmm5,16(%esp)
2585	movdqu	32(%esi),%xmm4
2586	movdqu	48(%esi),%xmm5
2587	movl	240(%edx),%ecx
2588	movdqa	%xmm1,96(%esp)
2589	pxor	(%esp),%xmm2
2590	pxor	16(%esp),%xmm3
2591	pxor	%xmm6,%xmm4
2592	pxor	%xmm7,%xmm5
2593	movl	120(%esp),%edi
2594	call	__aesni_decrypt4
2595	movdqa	96(%esp),%xmm1
2596	xorps	(%esp),%xmm2
2597	xorps	16(%esp),%xmm3
2598	xorps	%xmm6,%xmm4
2599	movups	%xmm2,(%edi,%esi,1)
2600	pxor	%xmm2,%xmm1
2601	xorps	%xmm7,%xmm5
2602	movups	%xmm3,16(%edi,%esi,1)
2603	pxor	%xmm3,%xmm1
2604	movdqa	%xmm7,%xmm0
2605	movups	%xmm4,32(%edi,%esi,1)
2606	pxor	%xmm4,%xmm1
2607	movups	%xmm5,48(%edi,%esi,1)
2608	pxor	%xmm5,%xmm1
2609L088done:
2610	movl	128(%esp),%edx
2611	pxor	%xmm2,%xmm2
2612	pxor	%xmm3,%xmm3
2613	movdqa	%xmm2,(%esp)
2614	pxor	%xmm4,%xmm4
2615	movdqa	%xmm2,16(%esp)
2616	pxor	%xmm5,%xmm5
2617	movdqa	%xmm2,32(%esp)
2618	pxor	%xmm6,%xmm6
2619	movdqa	%xmm2,48(%esp)
2620	pxor	%xmm7,%xmm7
2621	movdqa	%xmm2,64(%esp)
2622	movdqa	%xmm2,80(%esp)
2623	movdqa	%xmm2,96(%esp)
2624	leal	(%edx),%esp
2625	movl	40(%esp),%ecx
2626	movl	48(%esp),%ebx
2627	movdqu	%xmm0,(%ecx)
2628	pxor	%xmm0,%xmm0
2629	movdqu	%xmm1,(%ebx)
2630	pxor	%xmm1,%xmm1
2631	popl	%edi
2632	popl	%esi
2633	popl	%ebx
2634	popl	%ebp
2635	ret
2636.globl	_aesni_cbc_encrypt
2637.align	4
2638_aesni_cbc_encrypt:
2639L_aesni_cbc_encrypt_begin:
2640	%ifdef __CET__
2641
2642.byte	243,15,30,251
2643	%endif
2644
2645	pushl	%ebp
2646	pushl	%ebx
2647	pushl	%esi
2648	pushl	%edi
2649	movl	20(%esp),%esi
2650	movl	%esp,%ebx
2651	movl	24(%esp),%edi
2652	subl	$24,%ebx
2653	movl	28(%esp),%eax
2654	andl	$-16,%ebx
2655	movl	32(%esp),%edx
2656	movl	36(%esp),%ebp
2657	testl	%eax,%eax
2658	jz	L094cbc_abort
2659	cmpl	$0,40(%esp)
2660	xchgl	%esp,%ebx
2661	movups	(%ebp),%xmm7
2662	movl	240(%edx),%ecx
2663	movl	%edx,%ebp
2664	movl	%ebx,16(%esp)
2665	movl	%ecx,%ebx
2666	je	L095cbc_decrypt
2667	movaps	%xmm7,%xmm2
2668	cmpl	$16,%eax
2669	jb	L096cbc_enc_tail
2670	subl	$16,%eax
2671	jmp	L097cbc_enc_loop
2672.align	4,0x90
2673L097cbc_enc_loop:
2674	movups	(%esi),%xmm7
2675	leal	16(%esi),%esi
2676	movups	(%edx),%xmm0
2677	movups	16(%edx),%xmm1
2678	xorps	%xmm0,%xmm7
2679	leal	32(%edx),%edx
2680	xorps	%xmm7,%xmm2
2681L098enc1_loop_19:
2682.byte	102,15,56,220,209
2683	decl	%ecx
2684	movups	(%edx),%xmm1
2685	leal	16(%edx),%edx
2686	jnz	L098enc1_loop_19
2687.byte	102,15,56,221,209
2688	movl	%ebx,%ecx
2689	movl	%ebp,%edx
2690	movups	%xmm2,(%edi)
2691	leal	16(%edi),%edi
2692	subl	$16,%eax
2693	jnc	L097cbc_enc_loop
2694	addl	$16,%eax
2695	jnz	L096cbc_enc_tail
2696	movaps	%xmm2,%xmm7
2697	pxor	%xmm2,%xmm2
2698	jmp	L099cbc_ret
2699L096cbc_enc_tail:
2700	movl	%eax,%ecx
2701.long	2767451785
2702	movl	$16,%ecx
2703	subl	%eax,%ecx
2704	xorl	%eax,%eax
2705.long	2868115081
2706	leal	-16(%edi),%edi
2707	movl	%ebx,%ecx
2708	movl	%edi,%esi
2709	movl	%ebp,%edx
2710	jmp	L097cbc_enc_loop
2711.align	4,0x90
2712L095cbc_decrypt:
2713	cmpl	$80,%eax
2714	jbe	L100cbc_dec_tail
2715	movaps	%xmm7,(%esp)
2716	subl	$80,%eax
2717	jmp	L101cbc_dec_loop6_enter
2718.align	4,0x90
2719L102cbc_dec_loop6:
2720	movaps	%xmm0,(%esp)
2721	movups	%xmm7,(%edi)
2722	leal	16(%edi),%edi
2723L101cbc_dec_loop6_enter:
2724	movdqu	(%esi),%xmm2
2725	movdqu	16(%esi),%xmm3
2726	movdqu	32(%esi),%xmm4
2727	movdqu	48(%esi),%xmm5
2728	movdqu	64(%esi),%xmm6
2729	movdqu	80(%esi),%xmm7
2730	call	__aesni_decrypt6
2731	movups	(%esi),%xmm1
2732	movups	16(%esi),%xmm0
2733	xorps	(%esp),%xmm2
2734	xorps	%xmm1,%xmm3
2735	movups	32(%esi),%xmm1
2736	xorps	%xmm0,%xmm4
2737	movups	48(%esi),%xmm0
2738	xorps	%xmm1,%xmm5
2739	movups	64(%esi),%xmm1
2740	xorps	%xmm0,%xmm6
2741	movups	80(%esi),%xmm0
2742	xorps	%xmm1,%xmm7
2743	movups	%xmm2,(%edi)
2744	movups	%xmm3,16(%edi)
2745	leal	96(%esi),%esi
2746	movups	%xmm4,32(%edi)
2747	movl	%ebx,%ecx
2748	movups	%xmm5,48(%edi)
2749	movl	%ebp,%edx
2750	movups	%xmm6,64(%edi)
2751	leal	80(%edi),%edi
2752	subl	$96,%eax
2753	ja	L102cbc_dec_loop6
2754	movaps	%xmm7,%xmm2
2755	movaps	%xmm0,%xmm7
2756	addl	$80,%eax
2757	jle	L103cbc_dec_clear_tail_collected
2758	movups	%xmm2,(%edi)
2759	leal	16(%edi),%edi
2760L100cbc_dec_tail:
2761	movups	(%esi),%xmm2
2762	movaps	%xmm2,%xmm6
2763	cmpl	$16,%eax
2764	jbe	L104cbc_dec_one
2765	movups	16(%esi),%xmm3
2766	movaps	%xmm3,%xmm5
2767	cmpl	$32,%eax
2768	jbe	L105cbc_dec_two
2769	movups	32(%esi),%xmm4
2770	cmpl	$48,%eax
2771	jbe	L106cbc_dec_three
2772	movups	48(%esi),%xmm5
2773	cmpl	$64,%eax
2774	jbe	L107cbc_dec_four
2775	movups	64(%esi),%xmm6
2776	movaps	%xmm7,(%esp)
2777	movups	(%esi),%xmm2
2778	xorps	%xmm7,%xmm7
2779	call	__aesni_decrypt6
2780	movups	(%esi),%xmm1
2781	movups	16(%esi),%xmm0
2782	xorps	(%esp),%xmm2
2783	xorps	%xmm1,%xmm3
2784	movups	32(%esi),%xmm1
2785	xorps	%xmm0,%xmm4
2786	movups	48(%esi),%xmm0
2787	xorps	%xmm1,%xmm5
2788	movups	64(%esi),%xmm7
2789	xorps	%xmm0,%xmm6
2790	movups	%xmm2,(%edi)
2791	movups	%xmm3,16(%edi)
2792	pxor	%xmm3,%xmm3
2793	movups	%xmm4,32(%edi)
2794	pxor	%xmm4,%xmm4
2795	movups	%xmm5,48(%edi)
2796	pxor	%xmm5,%xmm5
2797	leal	64(%edi),%edi
2798	movaps	%xmm6,%xmm2
2799	pxor	%xmm6,%xmm6
2800	subl	$80,%eax
2801	jmp	L108cbc_dec_tail_collected
2802.align	4,0x90
2803L104cbc_dec_one:
2804	movups	(%edx),%xmm0
2805	movups	16(%edx),%xmm1
2806	leal	32(%edx),%edx
2807	xorps	%xmm0,%xmm2
2808L109dec1_loop_20:
2809.byte	102,15,56,222,209
2810	decl	%ecx
2811	movups	(%edx),%xmm1
2812	leal	16(%edx),%edx
2813	jnz	L109dec1_loop_20
2814.byte	102,15,56,223,209
2815	xorps	%xmm7,%xmm2
2816	movaps	%xmm6,%xmm7
2817	subl	$16,%eax
2818	jmp	L108cbc_dec_tail_collected
2819.align	4,0x90
2820L105cbc_dec_two:
2821	call	__aesni_decrypt2
2822	xorps	%xmm7,%xmm2
2823	xorps	%xmm6,%xmm3
2824	movups	%xmm2,(%edi)
2825	movaps	%xmm3,%xmm2
2826	pxor	%xmm3,%xmm3
2827	leal	16(%edi),%edi
2828	movaps	%xmm5,%xmm7
2829	subl	$32,%eax
2830	jmp	L108cbc_dec_tail_collected
2831.align	4,0x90
2832L106cbc_dec_three:
2833	call	__aesni_decrypt3
2834	xorps	%xmm7,%xmm2
2835	xorps	%xmm6,%xmm3
2836	xorps	%xmm5,%xmm4
2837	movups	%xmm2,(%edi)
2838	movaps	%xmm4,%xmm2
2839	pxor	%xmm4,%xmm4
2840	movups	%xmm3,16(%edi)
2841	pxor	%xmm3,%xmm3
2842	leal	32(%edi),%edi
2843	movups	32(%esi),%xmm7
2844	subl	$48,%eax
2845	jmp	L108cbc_dec_tail_collected
2846.align	4,0x90
2847L107cbc_dec_four:
2848	call	__aesni_decrypt4
2849	movups	16(%esi),%xmm1
2850	movups	32(%esi),%xmm0
2851	xorps	%xmm7,%xmm2
2852	movups	48(%esi),%xmm7
2853	xorps	%xmm6,%xmm3
2854	movups	%xmm2,(%edi)
2855	xorps	%xmm1,%xmm4
2856	movups	%xmm3,16(%edi)
2857	pxor	%xmm3,%xmm3
2858	xorps	%xmm0,%xmm5
2859	movups	%xmm4,32(%edi)
2860	pxor	%xmm4,%xmm4
2861	leal	48(%edi),%edi
2862	movaps	%xmm5,%xmm2
2863	pxor	%xmm5,%xmm5
2864	subl	$64,%eax
2865	jmp	L108cbc_dec_tail_collected
2866.align	4,0x90
2867L103cbc_dec_clear_tail_collected:
2868	pxor	%xmm3,%xmm3
2869	pxor	%xmm4,%xmm4
2870	pxor	%xmm5,%xmm5
2871	pxor	%xmm6,%xmm6
2872L108cbc_dec_tail_collected:
2873	andl	$15,%eax
2874	jnz	L110cbc_dec_tail_partial
2875	movups	%xmm2,(%edi)
2876	pxor	%xmm0,%xmm0
2877	jmp	L099cbc_ret
2878.align	4,0x90
2879L110cbc_dec_tail_partial:
2880	movaps	%xmm2,(%esp)
2881	pxor	%xmm0,%xmm0
2882	movl	$16,%ecx
2883	movl	%esp,%esi
2884	subl	%eax,%ecx
2885.long	2767451785
2886	movdqa	%xmm2,(%esp)
2887L099cbc_ret:
2888	movl	16(%esp),%esp
2889	movl	36(%esp),%ebp
2890	pxor	%xmm2,%xmm2
2891	pxor	%xmm1,%xmm1
2892	movups	%xmm7,(%ebp)
2893	pxor	%xmm7,%xmm7
2894L094cbc_abort:
2895	popl	%edi
2896	popl	%esi
2897	popl	%ebx
2898	popl	%ebp
2899	ret
2900.align	4
2901__aesni_set_encrypt_key:
2902	%ifdef __CET__
2903
2904.byte	243,15,30,251
2905	%endif
2906
2907	pushl	%ebp
2908	pushl	%ebx
2909	testl	%eax,%eax
2910	jz	L111bad_pointer
2911	testl	%edx,%edx
2912	jz	L111bad_pointer
2913	call	L112pic
2914L112pic:
2915	popl	%ebx
2916	leal	Lkey_const-L112pic(%ebx),%ebx
2917	movl	L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp
2918	movups	(%eax),%xmm0
2919	xorps	%xmm4,%xmm4
2920	movl	4(%ebp),%ebp
2921	leal	16(%edx),%edx
2922	andl	$268437504,%ebp
2923	cmpl	$256,%ecx
2924	je	L11314rounds
2925	cmpl	$192,%ecx
2926	je	L11412rounds
2927	cmpl	$128,%ecx
2928	jne	L115bad_keybits
2929.align	4,0x90
2930L11610rounds:
2931	cmpl	$268435456,%ebp
2932	je	L11710rounds_alt
2933	movl	$9,%ecx
2934	movups	%xmm0,-16(%edx)
2935.byte	102,15,58,223,200,1
2936	call	L118key_128_cold
2937.byte	102,15,58,223,200,2
2938	call	L119key_128
2939.byte	102,15,58,223,200,4
2940	call	L119key_128
2941.byte	102,15,58,223,200,8
2942	call	L119key_128
2943.byte	102,15,58,223,200,16
2944	call	L119key_128
2945.byte	102,15,58,223,200,32
2946	call	L119key_128
2947.byte	102,15,58,223,200,64
2948	call	L119key_128
2949.byte	102,15,58,223,200,128
2950	call	L119key_128
2951.byte	102,15,58,223,200,27
2952	call	L119key_128
2953.byte	102,15,58,223,200,54
2954	call	L119key_128
2955	movups	%xmm0,(%edx)
2956	movl	%ecx,80(%edx)
2957	jmp	L120good_key
2958.align	4,0x90
2959L119key_128:
2960	movups	%xmm0,(%edx)
2961	leal	16(%edx),%edx
2962L118key_128_cold:
2963	shufps	$16,%xmm0,%xmm4
2964	xorps	%xmm4,%xmm0
2965	shufps	$140,%xmm0,%xmm4
2966	xorps	%xmm4,%xmm0
2967	shufps	$255,%xmm1,%xmm1
2968	xorps	%xmm1,%xmm0
2969	ret
2970.align	4,0x90
2971L11710rounds_alt:
2972	movdqa	(%ebx),%xmm5
2973	movl	$8,%ecx
2974	movdqa	32(%ebx),%xmm4
2975	movdqa	%xmm0,%xmm2
2976	movdqu	%xmm0,-16(%edx)
2977L121loop_key128:
2978.byte	102,15,56,0,197
2979.byte	102,15,56,221,196
2980	pslld	$1,%xmm4
2981	leal	16(%edx),%edx
2982	movdqa	%xmm2,%xmm3
2983	pslldq	$4,%xmm2
2984	pxor	%xmm2,%xmm3
2985	pslldq	$4,%xmm2
2986	pxor	%xmm2,%xmm3
2987	pslldq	$4,%xmm2
2988	pxor	%xmm3,%xmm2
2989	pxor	%xmm2,%xmm0
2990	movdqu	%xmm0,-16(%edx)
2991	movdqa	%xmm0,%xmm2
2992	decl	%ecx
2993	jnz	L121loop_key128
2994	movdqa	48(%ebx),%xmm4
2995.byte	102,15,56,0,197
2996.byte	102,15,56,221,196
2997	pslld	$1,%xmm4
2998	movdqa	%xmm2,%xmm3
2999	pslldq	$4,%xmm2
3000	pxor	%xmm2,%xmm3
3001	pslldq	$4,%xmm2
3002	pxor	%xmm2,%xmm3
3003	pslldq	$4,%xmm2
3004	pxor	%xmm3,%xmm2
3005	pxor	%xmm2,%xmm0
3006	movdqu	%xmm0,(%edx)
3007	movdqa	%xmm0,%xmm2
3008.byte	102,15,56,0,197
3009.byte	102,15,56,221,196
3010	movdqa	%xmm2,%xmm3
3011	pslldq	$4,%xmm2
3012	pxor	%xmm2,%xmm3
3013	pslldq	$4,%xmm2
3014	pxor	%xmm2,%xmm3
3015	pslldq	$4,%xmm2
3016	pxor	%xmm3,%xmm2
3017	pxor	%xmm2,%xmm0
3018	movdqu	%xmm0,16(%edx)
3019	movl	$9,%ecx
3020	movl	%ecx,96(%edx)
3021	jmp	L120good_key
3022.align	4,0x90
3023L11412rounds:
3024	movq	16(%eax),%xmm2
3025	cmpl	$268435456,%ebp
3026	je	L12212rounds_alt
3027	movl	$11,%ecx
3028	movups	%xmm0,-16(%edx)
3029.byte	102,15,58,223,202,1
3030	call	L123key_192a_cold
3031.byte	102,15,58,223,202,2
3032	call	L124key_192b
3033.byte	102,15,58,223,202,4
3034	call	L125key_192a
3035.byte	102,15,58,223,202,8
3036	call	L124key_192b
3037.byte	102,15,58,223,202,16
3038	call	L125key_192a
3039.byte	102,15,58,223,202,32
3040	call	L124key_192b
3041.byte	102,15,58,223,202,64
3042	call	L125key_192a
3043.byte	102,15,58,223,202,128
3044	call	L124key_192b
3045	movups	%xmm0,(%edx)
3046	movl	%ecx,48(%edx)
3047	jmp	L120good_key
3048.align	4,0x90
3049L125key_192a:
3050	movups	%xmm0,(%edx)
3051	leal	16(%edx),%edx
3052.align	4,0x90
3053L123key_192a_cold:
3054	movaps	%xmm2,%xmm5
3055L126key_192b_warm:
3056	shufps	$16,%xmm0,%xmm4
3057	movdqa	%xmm2,%xmm3
3058	xorps	%xmm4,%xmm0
3059	shufps	$140,%xmm0,%xmm4
3060	pslldq	$4,%xmm3
3061	xorps	%xmm4,%xmm0
3062	pshufd	$85,%xmm1,%xmm1
3063	pxor	%xmm3,%xmm2
3064	pxor	%xmm1,%xmm0
3065	pshufd	$255,%xmm0,%xmm3
3066	pxor	%xmm3,%xmm2
3067	ret
3068.align	4,0x90
3069L124key_192b:
3070	movaps	%xmm0,%xmm3
3071	shufps	$68,%xmm0,%xmm5
3072	movups	%xmm5,(%edx)
3073	shufps	$78,%xmm2,%xmm3
3074	movups	%xmm3,16(%edx)
3075	leal	32(%edx),%edx
3076	jmp	L126key_192b_warm
3077.align	4,0x90
3078L12212rounds_alt:
3079	movdqa	16(%ebx),%xmm5
3080	movdqa	32(%ebx),%xmm4
3081	movl	$8,%ecx
3082	movdqu	%xmm0,-16(%edx)
3083L127loop_key192:
3084	movq	%xmm2,(%edx)
3085	movdqa	%xmm2,%xmm1
3086.byte	102,15,56,0,213
3087.byte	102,15,56,221,212
3088	pslld	$1,%xmm4
3089	leal	24(%edx),%edx
3090	movdqa	%xmm0,%xmm3
3091	pslldq	$4,%xmm0
3092	pxor	%xmm0,%xmm3
3093	pslldq	$4,%xmm0
3094	pxor	%xmm0,%xmm3
3095	pslldq	$4,%xmm0
3096	pxor	%xmm3,%xmm0
3097	pshufd	$255,%xmm0,%xmm3
3098	pxor	%xmm1,%xmm3
3099	pslldq	$4,%xmm1
3100	pxor	%xmm1,%xmm3
3101	pxor	%xmm2,%xmm0
3102	pxor	%xmm3,%xmm2
3103	movdqu	%xmm0,-16(%edx)
3104	decl	%ecx
3105	jnz	L127loop_key192
3106	movl	$11,%ecx
3107	movl	%ecx,32(%edx)
3108	jmp	L120good_key
3109.align	4,0x90
3110L11314rounds:
3111	movups	16(%eax),%xmm2
3112	leal	16(%edx),%edx
3113	cmpl	$268435456,%ebp
3114	je	L12814rounds_alt
3115	movl	$13,%ecx
3116	movups	%xmm0,-32(%edx)
3117	movups	%xmm2,-16(%edx)
3118.byte	102,15,58,223,202,1
3119	call	L129key_256a_cold
3120.byte	102,15,58,223,200,1
3121	call	L130key_256b
3122.byte	102,15,58,223,202,2
3123	call	L131key_256a
3124.byte	102,15,58,223,200,2
3125	call	L130key_256b
3126.byte	102,15,58,223,202,4
3127	call	L131key_256a
3128.byte	102,15,58,223,200,4
3129	call	L130key_256b
3130.byte	102,15,58,223,202,8
3131	call	L131key_256a
3132.byte	102,15,58,223,200,8
3133	call	L130key_256b
3134.byte	102,15,58,223,202,16
3135	call	L131key_256a
3136.byte	102,15,58,223,200,16
3137	call	L130key_256b
3138.byte	102,15,58,223,202,32
3139	call	L131key_256a
3140.byte	102,15,58,223,200,32
3141	call	L130key_256b
3142.byte	102,15,58,223,202,64
3143	call	L131key_256a
3144	movups	%xmm0,(%edx)
3145	movl	%ecx,16(%edx)
3146	xorl	%eax,%eax
3147	jmp	L120good_key
3148.align	4,0x90
3149L131key_256a:
3150	movups	%xmm2,(%edx)
3151	leal	16(%edx),%edx
3152L129key_256a_cold:
3153	shufps	$16,%xmm0,%xmm4
3154	xorps	%xmm4,%xmm0
3155	shufps	$140,%xmm0,%xmm4
3156	xorps	%xmm4,%xmm0
3157	shufps	$255,%xmm1,%xmm1
3158	xorps	%xmm1,%xmm0
3159	ret
3160.align	4,0x90
3161L130key_256b:
3162	movups	%xmm0,(%edx)
3163	leal	16(%edx),%edx
3164	shufps	$16,%xmm2,%xmm4
3165	xorps	%xmm4,%xmm2
3166	shufps	$140,%xmm2,%xmm4
3167	xorps	%xmm4,%xmm2
3168	shufps	$170,%xmm1,%xmm1
3169	xorps	%xmm1,%xmm2
3170	ret
3171.align	4,0x90
3172L12814rounds_alt:
3173	movdqa	(%ebx),%xmm5
3174	movdqa	32(%ebx),%xmm4
3175	movl	$7,%ecx
3176	movdqu	%xmm0,-32(%edx)
3177	movdqa	%xmm2,%xmm1
3178	movdqu	%xmm2,-16(%edx)
3179L132loop_key256:
3180.byte	102,15,56,0,213
3181.byte	102,15,56,221,212
3182	movdqa	%xmm0,%xmm3
3183	pslldq	$4,%xmm0
3184	pxor	%xmm0,%xmm3
3185	pslldq	$4,%xmm0
3186	pxor	%xmm0,%xmm3
3187	pslldq	$4,%xmm0
3188	pxor	%xmm3,%xmm0
3189	pslld	$1,%xmm4
3190	pxor	%xmm2,%xmm0
3191	movdqu	%xmm0,(%edx)
3192	decl	%ecx
3193	jz	L133done_key256
3194	pshufd	$255,%xmm0,%xmm2
3195	pxor	%xmm3,%xmm3
3196.byte	102,15,56,221,211
3197	movdqa	%xmm1,%xmm3
3198	pslldq	$4,%xmm1
3199	pxor	%xmm1,%xmm3
3200	pslldq	$4,%xmm1
3201	pxor	%xmm1,%xmm3
3202	pslldq	$4,%xmm1
3203	pxor	%xmm3,%xmm1
3204	pxor	%xmm1,%xmm2
3205	movdqu	%xmm2,16(%edx)
3206	leal	32(%edx),%edx
3207	movdqa	%xmm2,%xmm1
3208	jmp	L132loop_key256
3209L133done_key256:
3210	movl	$13,%ecx
3211	movl	%ecx,16(%edx)
3212L120good_key:
3213	pxor	%xmm0,%xmm0
3214	pxor	%xmm1,%xmm1
3215	pxor	%xmm2,%xmm2
3216	pxor	%xmm3,%xmm3
3217	pxor	%xmm4,%xmm4
3218	pxor	%xmm5,%xmm5
3219	xorl	%eax,%eax
3220	popl	%ebx
3221	popl	%ebp
3222	ret
3223.align	2,0x90
3224L111bad_pointer:
3225	movl	$-1,%eax
3226	popl	%ebx
3227	popl	%ebp
3228	ret
3229.align	2,0x90
3230L115bad_keybits:
3231	pxor	%xmm0,%xmm0
3232	movl	$-2,%eax
3233	popl	%ebx
3234	popl	%ebp
3235	ret
3236.globl	_aesni_set_encrypt_key
3237.align	4
3238_aesni_set_encrypt_key:
3239L_aesni_set_encrypt_key_begin:
3240	%ifdef __CET__
3241
3242.byte	243,15,30,251
3243	%endif
3244
3245	movl	4(%esp),%eax
3246	movl	8(%esp),%ecx
3247	movl	12(%esp),%edx
3248	call	__aesni_set_encrypt_key
3249	ret
3250.globl	_aesni_set_decrypt_key
3251.align	4
3252_aesni_set_decrypt_key:
3253L_aesni_set_decrypt_key_begin:
3254	%ifdef __CET__
3255
3256.byte	243,15,30,251
3257	%endif
3258
3259	movl	4(%esp),%eax
3260	movl	8(%esp),%ecx
3261	movl	12(%esp),%edx
3262	call	__aesni_set_encrypt_key
3263	movl	12(%esp),%edx
3264	shll	$4,%ecx
3265	testl	%eax,%eax
3266	jnz	L134dec_key_ret
3267	leal	16(%edx,%ecx,1),%eax
3268	movups	(%edx),%xmm0
3269	movups	(%eax),%xmm1
3270	movups	%xmm0,(%eax)
3271	movups	%xmm1,(%edx)
3272	leal	16(%edx),%edx
3273	leal	-16(%eax),%eax
3274L135dec_key_inverse:
3275	movups	(%edx),%xmm0
3276	movups	(%eax),%xmm1
3277.byte	102,15,56,219,192
3278.byte	102,15,56,219,201
3279	leal	16(%edx),%edx
3280	leal	-16(%eax),%eax
3281	movups	%xmm0,16(%eax)
3282	movups	%xmm1,-16(%edx)
3283	cmpl	%edx,%eax
3284	ja	L135dec_key_inverse
3285	movups	(%edx),%xmm0
3286.byte	102,15,56,219,192
3287	movups	%xmm0,(%edx)
3288	pxor	%xmm0,%xmm0
3289	pxor	%xmm1,%xmm1
3290	xorl	%eax,%eax
3291L134dec_key_ret:
3292	ret
3293.align	6,0x90
3294Lkey_const:
3295.long	202313229,202313229,202313229,202313229
3296.long	67569157,67569157,67569157,67569157
3297.long	1,1,1,1
3298.long	27,27,27,27
3299.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
3300.byte	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
3301.byte	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
3302.byte	115,108,46,111,114,103,62,0
3303.section __IMPORT,__pointers,non_lazy_symbol_pointers
3304L_OPENSSL_ia32cap_P$non_lazy_ptr:
3305.indirect_symbol	_OPENSSL_ia32cap_P
3306.long	0
3307.comm	_OPENSSL_ia32cap_P,16,2
3308