• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1default	rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section	.text code align=64
6
7EXTERN	OPENSSL_ia32cap_P
8global	aesni_encrypt
9
10ALIGN	16
11aesni_encrypt:
12	movups	xmm2,XMMWORD[rcx]
13	mov	eax,DWORD[240+r8]
14	movups	xmm0,XMMWORD[r8]
15	movups	xmm1,XMMWORD[16+r8]
16	lea	r8,[32+r8]
17	xorps	xmm2,xmm0
18$L$oop_enc1_1:
19DB	102,15,56,220,209
20	dec	eax
21	movups	xmm1,XMMWORD[r8]
22	lea	r8,[16+r8]
23	jnz	NEAR $L$oop_enc1_1
24DB	102,15,56,221,209
25	pxor	xmm0,xmm0
26	pxor	xmm1,xmm1
27	movups	XMMWORD[rdx],xmm2
28	pxor	xmm2,xmm2
29	DB	0F3h,0C3h		;repret
30
31
32global	aesni_decrypt
33
34ALIGN	16
35aesni_decrypt:
36	movups	xmm2,XMMWORD[rcx]
37	mov	eax,DWORD[240+r8]
38	movups	xmm0,XMMWORD[r8]
39	movups	xmm1,XMMWORD[16+r8]
40	lea	r8,[32+r8]
41	xorps	xmm2,xmm0
42$L$oop_dec1_2:
43DB	102,15,56,222,209
44	dec	eax
45	movups	xmm1,XMMWORD[r8]
46	lea	r8,[16+r8]
47	jnz	NEAR $L$oop_dec1_2
48DB	102,15,56,223,209
49	pxor	xmm0,xmm0
50	pxor	xmm1,xmm1
51	movups	XMMWORD[rdx],xmm2
52	pxor	xmm2,xmm2
53	DB	0F3h,0C3h		;repret
54
55
56ALIGN	16
57_aesni_encrypt2:
58	movups	xmm0,XMMWORD[rcx]
59	shl	eax,4
60	movups	xmm1,XMMWORD[16+rcx]
61	xorps	xmm2,xmm0
62	xorps	xmm3,xmm0
63	movups	xmm0,XMMWORD[32+rcx]
64	lea	rcx,[32+rax*1+rcx]
65	neg	rax
66	add	rax,16
67
68$L$enc_loop2:
69DB	102,15,56,220,209
70DB	102,15,56,220,217
71	movups	xmm1,XMMWORD[rax*1+rcx]
72	add	rax,32
73DB	102,15,56,220,208
74DB	102,15,56,220,216
75	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
76	jnz	NEAR $L$enc_loop2
77
78DB	102,15,56,220,209
79DB	102,15,56,220,217
80DB	102,15,56,221,208
81DB	102,15,56,221,216
82	DB	0F3h,0C3h		;repret
83
84
85ALIGN	16
86_aesni_decrypt2:
87	movups	xmm0,XMMWORD[rcx]
88	shl	eax,4
89	movups	xmm1,XMMWORD[16+rcx]
90	xorps	xmm2,xmm0
91	xorps	xmm3,xmm0
92	movups	xmm0,XMMWORD[32+rcx]
93	lea	rcx,[32+rax*1+rcx]
94	neg	rax
95	add	rax,16
96
97$L$dec_loop2:
98DB	102,15,56,222,209
99DB	102,15,56,222,217
100	movups	xmm1,XMMWORD[rax*1+rcx]
101	add	rax,32
102DB	102,15,56,222,208
103DB	102,15,56,222,216
104	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
105	jnz	NEAR $L$dec_loop2
106
107DB	102,15,56,222,209
108DB	102,15,56,222,217
109DB	102,15,56,223,208
110DB	102,15,56,223,216
111	DB	0F3h,0C3h		;repret
112
113
114ALIGN	16
115_aesni_encrypt3:
116	movups	xmm0,XMMWORD[rcx]
117	shl	eax,4
118	movups	xmm1,XMMWORD[16+rcx]
119	xorps	xmm2,xmm0
120	xorps	xmm3,xmm0
121	xorps	xmm4,xmm0
122	movups	xmm0,XMMWORD[32+rcx]
123	lea	rcx,[32+rax*1+rcx]
124	neg	rax
125	add	rax,16
126
127$L$enc_loop3:
128DB	102,15,56,220,209
129DB	102,15,56,220,217
130DB	102,15,56,220,225
131	movups	xmm1,XMMWORD[rax*1+rcx]
132	add	rax,32
133DB	102,15,56,220,208
134DB	102,15,56,220,216
135DB	102,15,56,220,224
136	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
137	jnz	NEAR $L$enc_loop3
138
139DB	102,15,56,220,209
140DB	102,15,56,220,217
141DB	102,15,56,220,225
142DB	102,15,56,221,208
143DB	102,15,56,221,216
144DB	102,15,56,221,224
145	DB	0F3h,0C3h		;repret
146
147
148ALIGN	16
149_aesni_decrypt3:
150	movups	xmm0,XMMWORD[rcx]
151	shl	eax,4
152	movups	xmm1,XMMWORD[16+rcx]
153	xorps	xmm2,xmm0
154	xorps	xmm3,xmm0
155	xorps	xmm4,xmm0
156	movups	xmm0,XMMWORD[32+rcx]
157	lea	rcx,[32+rax*1+rcx]
158	neg	rax
159	add	rax,16
160
161$L$dec_loop3:
162DB	102,15,56,222,209
163DB	102,15,56,222,217
164DB	102,15,56,222,225
165	movups	xmm1,XMMWORD[rax*1+rcx]
166	add	rax,32
167DB	102,15,56,222,208
168DB	102,15,56,222,216
169DB	102,15,56,222,224
170	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
171	jnz	NEAR $L$dec_loop3
172
173DB	102,15,56,222,209
174DB	102,15,56,222,217
175DB	102,15,56,222,225
176DB	102,15,56,223,208
177DB	102,15,56,223,216
178DB	102,15,56,223,224
179	DB	0F3h,0C3h		;repret
180
181
182ALIGN	16
183_aesni_encrypt4:
184	movups	xmm0,XMMWORD[rcx]
185	shl	eax,4
186	movups	xmm1,XMMWORD[16+rcx]
187	xorps	xmm2,xmm0
188	xorps	xmm3,xmm0
189	xorps	xmm4,xmm0
190	xorps	xmm5,xmm0
191	movups	xmm0,XMMWORD[32+rcx]
192	lea	rcx,[32+rax*1+rcx]
193	neg	rax
194DB	0x0f,0x1f,0x00
195	add	rax,16
196
197$L$enc_loop4:
198DB	102,15,56,220,209
199DB	102,15,56,220,217
200DB	102,15,56,220,225
201DB	102,15,56,220,233
202	movups	xmm1,XMMWORD[rax*1+rcx]
203	add	rax,32
204DB	102,15,56,220,208
205DB	102,15,56,220,216
206DB	102,15,56,220,224
207DB	102,15,56,220,232
208	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
209	jnz	NEAR $L$enc_loop4
210
211DB	102,15,56,220,209
212DB	102,15,56,220,217
213DB	102,15,56,220,225
214DB	102,15,56,220,233
215DB	102,15,56,221,208
216DB	102,15,56,221,216
217DB	102,15,56,221,224
218DB	102,15,56,221,232
219	DB	0F3h,0C3h		;repret
220
221
222ALIGN	16
223_aesni_decrypt4:
224	movups	xmm0,XMMWORD[rcx]
225	shl	eax,4
226	movups	xmm1,XMMWORD[16+rcx]
227	xorps	xmm2,xmm0
228	xorps	xmm3,xmm0
229	xorps	xmm4,xmm0
230	xorps	xmm5,xmm0
231	movups	xmm0,XMMWORD[32+rcx]
232	lea	rcx,[32+rax*1+rcx]
233	neg	rax
234DB	0x0f,0x1f,0x00
235	add	rax,16
236
237$L$dec_loop4:
238DB	102,15,56,222,209
239DB	102,15,56,222,217
240DB	102,15,56,222,225
241DB	102,15,56,222,233
242	movups	xmm1,XMMWORD[rax*1+rcx]
243	add	rax,32
244DB	102,15,56,222,208
245DB	102,15,56,222,216
246DB	102,15,56,222,224
247DB	102,15,56,222,232
248	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
249	jnz	NEAR $L$dec_loop4
250
251DB	102,15,56,222,209
252DB	102,15,56,222,217
253DB	102,15,56,222,225
254DB	102,15,56,222,233
255DB	102,15,56,223,208
256DB	102,15,56,223,216
257DB	102,15,56,223,224
258DB	102,15,56,223,232
259	DB	0F3h,0C3h		;repret
260
261
262ALIGN	16
263_aesni_encrypt6:
264	movups	xmm0,XMMWORD[rcx]
265	shl	eax,4
266	movups	xmm1,XMMWORD[16+rcx]
267	xorps	xmm2,xmm0
268	pxor	xmm3,xmm0
269	pxor	xmm4,xmm0
270DB	102,15,56,220,209
271	lea	rcx,[32+rax*1+rcx]
272	neg	rax
273DB	102,15,56,220,217
274	pxor	xmm5,xmm0
275	pxor	xmm6,xmm0
276DB	102,15,56,220,225
277	pxor	xmm7,xmm0
278	movups	xmm0,XMMWORD[rax*1+rcx]
279	add	rax,16
280	jmp	NEAR $L$enc_loop6_enter
281ALIGN	16
282$L$enc_loop6:
283DB	102,15,56,220,209
284DB	102,15,56,220,217
285DB	102,15,56,220,225
286$L$enc_loop6_enter:
287DB	102,15,56,220,233
288DB	102,15,56,220,241
289DB	102,15,56,220,249
290	movups	xmm1,XMMWORD[rax*1+rcx]
291	add	rax,32
292DB	102,15,56,220,208
293DB	102,15,56,220,216
294DB	102,15,56,220,224
295DB	102,15,56,220,232
296DB	102,15,56,220,240
297DB	102,15,56,220,248
298	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
299	jnz	NEAR $L$enc_loop6
300
301DB	102,15,56,220,209
302DB	102,15,56,220,217
303DB	102,15,56,220,225
304DB	102,15,56,220,233
305DB	102,15,56,220,241
306DB	102,15,56,220,249
307DB	102,15,56,221,208
308DB	102,15,56,221,216
309DB	102,15,56,221,224
310DB	102,15,56,221,232
311DB	102,15,56,221,240
312DB	102,15,56,221,248
313	DB	0F3h,0C3h		;repret
314
315
316ALIGN	16
317_aesni_decrypt6:
318	movups	xmm0,XMMWORD[rcx]
319	shl	eax,4
320	movups	xmm1,XMMWORD[16+rcx]
321	xorps	xmm2,xmm0
322	pxor	xmm3,xmm0
323	pxor	xmm4,xmm0
324DB	102,15,56,222,209
325	lea	rcx,[32+rax*1+rcx]
326	neg	rax
327DB	102,15,56,222,217
328	pxor	xmm5,xmm0
329	pxor	xmm6,xmm0
330DB	102,15,56,222,225
331	pxor	xmm7,xmm0
332	movups	xmm0,XMMWORD[rax*1+rcx]
333	add	rax,16
334	jmp	NEAR $L$dec_loop6_enter
335ALIGN	16
336$L$dec_loop6:
337DB	102,15,56,222,209
338DB	102,15,56,222,217
339DB	102,15,56,222,225
340$L$dec_loop6_enter:
341DB	102,15,56,222,233
342DB	102,15,56,222,241
343DB	102,15,56,222,249
344	movups	xmm1,XMMWORD[rax*1+rcx]
345	add	rax,32
346DB	102,15,56,222,208
347DB	102,15,56,222,216
348DB	102,15,56,222,224
349DB	102,15,56,222,232
350DB	102,15,56,222,240
351DB	102,15,56,222,248
352	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
353	jnz	NEAR $L$dec_loop6
354
355DB	102,15,56,222,209
356DB	102,15,56,222,217
357DB	102,15,56,222,225
358DB	102,15,56,222,233
359DB	102,15,56,222,241
360DB	102,15,56,222,249
361DB	102,15,56,223,208
362DB	102,15,56,223,216
363DB	102,15,56,223,224
364DB	102,15,56,223,232
365DB	102,15,56,223,240
366DB	102,15,56,223,248
367	DB	0F3h,0C3h		;repret
368
369
370ALIGN	16
371_aesni_encrypt8:
372	movups	xmm0,XMMWORD[rcx]
373	shl	eax,4
374	movups	xmm1,XMMWORD[16+rcx]
375	xorps	xmm2,xmm0
376	xorps	xmm3,xmm0
377	pxor	xmm4,xmm0
378	pxor	xmm5,xmm0
379	pxor	xmm6,xmm0
380	lea	rcx,[32+rax*1+rcx]
381	neg	rax
382DB	102,15,56,220,209
383	pxor	xmm7,xmm0
384	pxor	xmm8,xmm0
385DB	102,15,56,220,217
386	pxor	xmm9,xmm0
387	movups	xmm0,XMMWORD[rax*1+rcx]
388	add	rax,16
389	jmp	NEAR $L$enc_loop8_inner
390ALIGN	16
391$L$enc_loop8:
392DB	102,15,56,220,209
393DB	102,15,56,220,217
394$L$enc_loop8_inner:
395DB	102,15,56,220,225
396DB	102,15,56,220,233
397DB	102,15,56,220,241
398DB	102,15,56,220,249
399DB	102,68,15,56,220,193
400DB	102,68,15,56,220,201
401$L$enc_loop8_enter:
402	movups	xmm1,XMMWORD[rax*1+rcx]
403	add	rax,32
404DB	102,15,56,220,208
405DB	102,15,56,220,216
406DB	102,15,56,220,224
407DB	102,15,56,220,232
408DB	102,15,56,220,240
409DB	102,15,56,220,248
410DB	102,68,15,56,220,192
411DB	102,68,15,56,220,200
412	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
413	jnz	NEAR $L$enc_loop8
414
415DB	102,15,56,220,209
416DB	102,15,56,220,217
417DB	102,15,56,220,225
418DB	102,15,56,220,233
419DB	102,15,56,220,241
420DB	102,15,56,220,249
421DB	102,68,15,56,220,193
422DB	102,68,15,56,220,201
423DB	102,15,56,221,208
424DB	102,15,56,221,216
425DB	102,15,56,221,224
426DB	102,15,56,221,232
427DB	102,15,56,221,240
428DB	102,15,56,221,248
429DB	102,68,15,56,221,192
430DB	102,68,15,56,221,200
431	DB	0F3h,0C3h		;repret
432
433
434ALIGN	16
435_aesni_decrypt8:
436	movups	xmm0,XMMWORD[rcx]
437	shl	eax,4
438	movups	xmm1,XMMWORD[16+rcx]
439	xorps	xmm2,xmm0
440	xorps	xmm3,xmm0
441	pxor	xmm4,xmm0
442	pxor	xmm5,xmm0
443	pxor	xmm6,xmm0
444	lea	rcx,[32+rax*1+rcx]
445	neg	rax
446DB	102,15,56,222,209
447	pxor	xmm7,xmm0
448	pxor	xmm8,xmm0
449DB	102,15,56,222,217
450	pxor	xmm9,xmm0
451	movups	xmm0,XMMWORD[rax*1+rcx]
452	add	rax,16
453	jmp	NEAR $L$dec_loop8_inner
454ALIGN	16
455$L$dec_loop8:
456DB	102,15,56,222,209
457DB	102,15,56,222,217
458$L$dec_loop8_inner:
459DB	102,15,56,222,225
460DB	102,15,56,222,233
461DB	102,15,56,222,241
462DB	102,15,56,222,249
463DB	102,68,15,56,222,193
464DB	102,68,15,56,222,201
465$L$dec_loop8_enter:
466	movups	xmm1,XMMWORD[rax*1+rcx]
467	add	rax,32
468DB	102,15,56,222,208
469DB	102,15,56,222,216
470DB	102,15,56,222,224
471DB	102,15,56,222,232
472DB	102,15,56,222,240
473DB	102,15,56,222,248
474DB	102,68,15,56,222,192
475DB	102,68,15,56,222,200
476	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
477	jnz	NEAR $L$dec_loop8
478
479DB	102,15,56,222,209
480DB	102,15,56,222,217
481DB	102,15,56,222,225
482DB	102,15,56,222,233
483DB	102,15,56,222,241
484DB	102,15,56,222,249
485DB	102,68,15,56,222,193
486DB	102,68,15,56,222,201
487DB	102,15,56,223,208
488DB	102,15,56,223,216
489DB	102,15,56,223,224
490DB	102,15,56,223,232
491DB	102,15,56,223,240
492DB	102,15,56,223,248
493DB	102,68,15,56,223,192
494DB	102,68,15,56,223,200
495	DB	0F3h,0C3h		;repret
496
497global	aesni_ecb_encrypt
498
499ALIGN	16
500aesni_ecb_encrypt:
501	mov	QWORD[8+rsp],rdi	;WIN64 prologue
502	mov	QWORD[16+rsp],rsi
503	mov	rax,rsp
504$L$SEH_begin_aesni_ecb_encrypt:
505	mov	rdi,rcx
506	mov	rsi,rdx
507	mov	rdx,r8
508	mov	rcx,r9
509	mov	r8,QWORD[40+rsp]
510
511
512	lea	rsp,[((-88))+rsp]
513	movaps	XMMWORD[rsp],xmm6
514	movaps	XMMWORD[16+rsp],xmm7
515	movaps	XMMWORD[32+rsp],xmm8
516	movaps	XMMWORD[48+rsp],xmm9
517$L$ecb_enc_body:
518	and	rdx,-16
519	jz	NEAR $L$ecb_ret
520
521	mov	eax,DWORD[240+rcx]
522	movups	xmm0,XMMWORD[rcx]
523	mov	r11,rcx
524	mov	r10d,eax
525	test	r8d,r8d
526	jz	NEAR $L$ecb_decrypt
527
528	cmp	rdx,0x80
529	jb	NEAR $L$ecb_enc_tail
530
531	movdqu	xmm2,XMMWORD[rdi]
532	movdqu	xmm3,XMMWORD[16+rdi]
533	movdqu	xmm4,XMMWORD[32+rdi]
534	movdqu	xmm5,XMMWORD[48+rdi]
535	movdqu	xmm6,XMMWORD[64+rdi]
536	movdqu	xmm7,XMMWORD[80+rdi]
537	movdqu	xmm8,XMMWORD[96+rdi]
538	movdqu	xmm9,XMMWORD[112+rdi]
539	lea	rdi,[128+rdi]
540	sub	rdx,0x80
541	jmp	NEAR $L$ecb_enc_loop8_enter
542ALIGN	16
543$L$ecb_enc_loop8:
544	movups	XMMWORD[rsi],xmm2
545	mov	rcx,r11
546	movdqu	xmm2,XMMWORD[rdi]
547	mov	eax,r10d
548	movups	XMMWORD[16+rsi],xmm3
549	movdqu	xmm3,XMMWORD[16+rdi]
550	movups	XMMWORD[32+rsi],xmm4
551	movdqu	xmm4,XMMWORD[32+rdi]
552	movups	XMMWORD[48+rsi],xmm5
553	movdqu	xmm5,XMMWORD[48+rdi]
554	movups	XMMWORD[64+rsi],xmm6
555	movdqu	xmm6,XMMWORD[64+rdi]
556	movups	XMMWORD[80+rsi],xmm7
557	movdqu	xmm7,XMMWORD[80+rdi]
558	movups	XMMWORD[96+rsi],xmm8
559	movdqu	xmm8,XMMWORD[96+rdi]
560	movups	XMMWORD[112+rsi],xmm9
561	lea	rsi,[128+rsi]
562	movdqu	xmm9,XMMWORD[112+rdi]
563	lea	rdi,[128+rdi]
564$L$ecb_enc_loop8_enter:
565
566	call	_aesni_encrypt8
567
568	sub	rdx,0x80
569	jnc	NEAR $L$ecb_enc_loop8
570
571	movups	XMMWORD[rsi],xmm2
572	mov	rcx,r11
573	movups	XMMWORD[16+rsi],xmm3
574	mov	eax,r10d
575	movups	XMMWORD[32+rsi],xmm4
576	movups	XMMWORD[48+rsi],xmm5
577	movups	XMMWORD[64+rsi],xmm6
578	movups	XMMWORD[80+rsi],xmm7
579	movups	XMMWORD[96+rsi],xmm8
580	movups	XMMWORD[112+rsi],xmm9
581	lea	rsi,[128+rsi]
582	add	rdx,0x80
583	jz	NEAR $L$ecb_ret
584
585$L$ecb_enc_tail:
586	movups	xmm2,XMMWORD[rdi]
587	cmp	rdx,0x20
588	jb	NEAR $L$ecb_enc_one
589	movups	xmm3,XMMWORD[16+rdi]
590	je	NEAR $L$ecb_enc_two
591	movups	xmm4,XMMWORD[32+rdi]
592	cmp	rdx,0x40
593	jb	NEAR $L$ecb_enc_three
594	movups	xmm5,XMMWORD[48+rdi]
595	je	NEAR $L$ecb_enc_four
596	movups	xmm6,XMMWORD[64+rdi]
597	cmp	rdx,0x60
598	jb	NEAR $L$ecb_enc_five
599	movups	xmm7,XMMWORD[80+rdi]
600	je	NEAR $L$ecb_enc_six
601	movdqu	xmm8,XMMWORD[96+rdi]
602	xorps	xmm9,xmm9
603	call	_aesni_encrypt8
604	movups	XMMWORD[rsi],xmm2
605	movups	XMMWORD[16+rsi],xmm3
606	movups	XMMWORD[32+rsi],xmm4
607	movups	XMMWORD[48+rsi],xmm5
608	movups	XMMWORD[64+rsi],xmm6
609	movups	XMMWORD[80+rsi],xmm7
610	movups	XMMWORD[96+rsi],xmm8
611	jmp	NEAR $L$ecb_ret
612ALIGN	16
613$L$ecb_enc_one:
614	movups	xmm0,XMMWORD[rcx]
615	movups	xmm1,XMMWORD[16+rcx]
616	lea	rcx,[32+rcx]
617	xorps	xmm2,xmm0
618$L$oop_enc1_3:
619DB	102,15,56,220,209
620	dec	eax
621	movups	xmm1,XMMWORD[rcx]
622	lea	rcx,[16+rcx]
623	jnz	NEAR $L$oop_enc1_3
624DB	102,15,56,221,209
625	movups	XMMWORD[rsi],xmm2
626	jmp	NEAR $L$ecb_ret
627ALIGN	16
628$L$ecb_enc_two:
629	call	_aesni_encrypt2
630	movups	XMMWORD[rsi],xmm2
631	movups	XMMWORD[16+rsi],xmm3
632	jmp	NEAR $L$ecb_ret
633ALIGN	16
634$L$ecb_enc_three:
635	call	_aesni_encrypt3
636	movups	XMMWORD[rsi],xmm2
637	movups	XMMWORD[16+rsi],xmm3
638	movups	XMMWORD[32+rsi],xmm4
639	jmp	NEAR $L$ecb_ret
640ALIGN	16
641$L$ecb_enc_four:
642	call	_aesni_encrypt4
643	movups	XMMWORD[rsi],xmm2
644	movups	XMMWORD[16+rsi],xmm3
645	movups	XMMWORD[32+rsi],xmm4
646	movups	XMMWORD[48+rsi],xmm5
647	jmp	NEAR $L$ecb_ret
648ALIGN	16
649$L$ecb_enc_five:
650	xorps	xmm7,xmm7
651	call	_aesni_encrypt6
652	movups	XMMWORD[rsi],xmm2
653	movups	XMMWORD[16+rsi],xmm3
654	movups	XMMWORD[32+rsi],xmm4
655	movups	XMMWORD[48+rsi],xmm5
656	movups	XMMWORD[64+rsi],xmm6
657	jmp	NEAR $L$ecb_ret
658ALIGN	16
659$L$ecb_enc_six:
660	call	_aesni_encrypt6
661	movups	XMMWORD[rsi],xmm2
662	movups	XMMWORD[16+rsi],xmm3
663	movups	XMMWORD[32+rsi],xmm4
664	movups	XMMWORD[48+rsi],xmm5
665	movups	XMMWORD[64+rsi],xmm6
666	movups	XMMWORD[80+rsi],xmm7
667	jmp	NEAR $L$ecb_ret
668
669ALIGN	16
670$L$ecb_decrypt:
671	cmp	rdx,0x80
672	jb	NEAR $L$ecb_dec_tail
673
674	movdqu	xmm2,XMMWORD[rdi]
675	movdqu	xmm3,XMMWORD[16+rdi]
676	movdqu	xmm4,XMMWORD[32+rdi]
677	movdqu	xmm5,XMMWORD[48+rdi]
678	movdqu	xmm6,XMMWORD[64+rdi]
679	movdqu	xmm7,XMMWORD[80+rdi]
680	movdqu	xmm8,XMMWORD[96+rdi]
681	movdqu	xmm9,XMMWORD[112+rdi]
682	lea	rdi,[128+rdi]
683	sub	rdx,0x80
684	jmp	NEAR $L$ecb_dec_loop8_enter
685ALIGN	16
686$L$ecb_dec_loop8:
687	movups	XMMWORD[rsi],xmm2
688	mov	rcx,r11
689	movdqu	xmm2,XMMWORD[rdi]
690	mov	eax,r10d
691	movups	XMMWORD[16+rsi],xmm3
692	movdqu	xmm3,XMMWORD[16+rdi]
693	movups	XMMWORD[32+rsi],xmm4
694	movdqu	xmm4,XMMWORD[32+rdi]
695	movups	XMMWORD[48+rsi],xmm5
696	movdqu	xmm5,XMMWORD[48+rdi]
697	movups	XMMWORD[64+rsi],xmm6
698	movdqu	xmm6,XMMWORD[64+rdi]
699	movups	XMMWORD[80+rsi],xmm7
700	movdqu	xmm7,XMMWORD[80+rdi]
701	movups	XMMWORD[96+rsi],xmm8
702	movdqu	xmm8,XMMWORD[96+rdi]
703	movups	XMMWORD[112+rsi],xmm9
704	lea	rsi,[128+rsi]
705	movdqu	xmm9,XMMWORD[112+rdi]
706	lea	rdi,[128+rdi]
707$L$ecb_dec_loop8_enter:
708
709	call	_aesni_decrypt8
710
711	movups	xmm0,XMMWORD[r11]
712	sub	rdx,0x80
713	jnc	NEAR $L$ecb_dec_loop8
714
715	movups	XMMWORD[rsi],xmm2
716	pxor	xmm2,xmm2
717	mov	rcx,r11
718	movups	XMMWORD[16+rsi],xmm3
719	pxor	xmm3,xmm3
720	mov	eax,r10d
721	movups	XMMWORD[32+rsi],xmm4
722	pxor	xmm4,xmm4
723	movups	XMMWORD[48+rsi],xmm5
724	pxor	xmm5,xmm5
725	movups	XMMWORD[64+rsi],xmm6
726	pxor	xmm6,xmm6
727	movups	XMMWORD[80+rsi],xmm7
728	pxor	xmm7,xmm7
729	movups	XMMWORD[96+rsi],xmm8
730	pxor	xmm8,xmm8
731	movups	XMMWORD[112+rsi],xmm9
732	pxor	xmm9,xmm9
733	lea	rsi,[128+rsi]
734	add	rdx,0x80
735	jz	NEAR $L$ecb_ret
736
737$L$ecb_dec_tail:
738	movups	xmm2,XMMWORD[rdi]
739	cmp	rdx,0x20
740	jb	NEAR $L$ecb_dec_one
741	movups	xmm3,XMMWORD[16+rdi]
742	je	NEAR $L$ecb_dec_two
743	movups	xmm4,XMMWORD[32+rdi]
744	cmp	rdx,0x40
745	jb	NEAR $L$ecb_dec_three
746	movups	xmm5,XMMWORD[48+rdi]
747	je	NEAR $L$ecb_dec_four
748	movups	xmm6,XMMWORD[64+rdi]
749	cmp	rdx,0x60
750	jb	NEAR $L$ecb_dec_five
751	movups	xmm7,XMMWORD[80+rdi]
752	je	NEAR $L$ecb_dec_six
753	movups	xmm8,XMMWORD[96+rdi]
754	movups	xmm0,XMMWORD[rcx]
755	xorps	xmm9,xmm9
756	call	_aesni_decrypt8
757	movups	XMMWORD[rsi],xmm2
758	pxor	xmm2,xmm2
759	movups	XMMWORD[16+rsi],xmm3
760	pxor	xmm3,xmm3
761	movups	XMMWORD[32+rsi],xmm4
762	pxor	xmm4,xmm4
763	movups	XMMWORD[48+rsi],xmm5
764	pxor	xmm5,xmm5
765	movups	XMMWORD[64+rsi],xmm6
766	pxor	xmm6,xmm6
767	movups	XMMWORD[80+rsi],xmm7
768	pxor	xmm7,xmm7
769	movups	XMMWORD[96+rsi],xmm8
770	pxor	xmm8,xmm8
771	pxor	xmm9,xmm9
772	jmp	NEAR $L$ecb_ret
773ALIGN	16
774$L$ecb_dec_one:
775	movups	xmm0,XMMWORD[rcx]
776	movups	xmm1,XMMWORD[16+rcx]
777	lea	rcx,[32+rcx]
778	xorps	xmm2,xmm0
779$L$oop_dec1_4:
780DB	102,15,56,222,209
781	dec	eax
782	movups	xmm1,XMMWORD[rcx]
783	lea	rcx,[16+rcx]
784	jnz	NEAR $L$oop_dec1_4
785DB	102,15,56,223,209
786	movups	XMMWORD[rsi],xmm2
787	pxor	xmm2,xmm2
788	jmp	NEAR $L$ecb_ret
789ALIGN	16
790$L$ecb_dec_two:
791	call	_aesni_decrypt2
792	movups	XMMWORD[rsi],xmm2
793	pxor	xmm2,xmm2
794	movups	XMMWORD[16+rsi],xmm3
795	pxor	xmm3,xmm3
796	jmp	NEAR $L$ecb_ret
797ALIGN	16
798$L$ecb_dec_three:
799	call	_aesni_decrypt3
800	movups	XMMWORD[rsi],xmm2
801	pxor	xmm2,xmm2
802	movups	XMMWORD[16+rsi],xmm3
803	pxor	xmm3,xmm3
804	movups	XMMWORD[32+rsi],xmm4
805	pxor	xmm4,xmm4
806	jmp	NEAR $L$ecb_ret
807ALIGN	16
808$L$ecb_dec_four:
809	call	_aesni_decrypt4
810	movups	XMMWORD[rsi],xmm2
811	pxor	xmm2,xmm2
812	movups	XMMWORD[16+rsi],xmm3
813	pxor	xmm3,xmm3
814	movups	XMMWORD[32+rsi],xmm4
815	pxor	xmm4,xmm4
816	movups	XMMWORD[48+rsi],xmm5
817	pxor	xmm5,xmm5
818	jmp	NEAR $L$ecb_ret
819ALIGN	16
820$L$ecb_dec_five:
821	xorps	xmm7,xmm7
822	call	_aesni_decrypt6
823	movups	XMMWORD[rsi],xmm2
824	pxor	xmm2,xmm2
825	movups	XMMWORD[16+rsi],xmm3
826	pxor	xmm3,xmm3
827	movups	XMMWORD[32+rsi],xmm4
828	pxor	xmm4,xmm4
829	movups	XMMWORD[48+rsi],xmm5
830	pxor	xmm5,xmm5
831	movups	XMMWORD[64+rsi],xmm6
832	pxor	xmm6,xmm6
833	pxor	xmm7,xmm7
834	jmp	NEAR $L$ecb_ret
835ALIGN	16
836$L$ecb_dec_six:
837	call	_aesni_decrypt6
838	movups	XMMWORD[rsi],xmm2
839	pxor	xmm2,xmm2
840	movups	XMMWORD[16+rsi],xmm3
841	pxor	xmm3,xmm3
842	movups	XMMWORD[32+rsi],xmm4
843	pxor	xmm4,xmm4
844	movups	XMMWORD[48+rsi],xmm5
845	pxor	xmm5,xmm5
846	movups	XMMWORD[64+rsi],xmm6
847	pxor	xmm6,xmm6
848	movups	XMMWORD[80+rsi],xmm7
849	pxor	xmm7,xmm7
850
851$L$ecb_ret:
852	xorps	xmm0,xmm0
853	pxor	xmm1,xmm1
854	movaps	xmm6,XMMWORD[rsp]
855	movaps	XMMWORD[rsp],xmm0
856	movaps	xmm7,XMMWORD[16+rsp]
857	movaps	XMMWORD[16+rsp],xmm0
858	movaps	xmm8,XMMWORD[32+rsp]
859	movaps	XMMWORD[32+rsp],xmm0
860	movaps	xmm9,XMMWORD[48+rsp]
861	movaps	XMMWORD[48+rsp],xmm0
862	lea	rsp,[88+rsp]
863$L$ecb_enc_ret:
864	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
865	mov	rsi,QWORD[16+rsp]
866	DB	0F3h,0C3h		;repret
867$L$SEH_end_aesni_ecb_encrypt:
868global	aesni_ccm64_encrypt_blocks
869
870ALIGN	16
871aesni_ccm64_encrypt_blocks:
872	mov	QWORD[8+rsp],rdi	;WIN64 prologue
873	mov	QWORD[16+rsp],rsi
874	mov	rax,rsp
875$L$SEH_begin_aesni_ccm64_encrypt_blocks:
876	mov	rdi,rcx
877	mov	rsi,rdx
878	mov	rdx,r8
879	mov	rcx,r9
880	mov	r8,QWORD[40+rsp]
881	mov	r9,QWORD[48+rsp]
882
883
884	lea	rsp,[((-88))+rsp]
885	movaps	XMMWORD[rsp],xmm6
886	movaps	XMMWORD[16+rsp],xmm7
887	movaps	XMMWORD[32+rsp],xmm8
888	movaps	XMMWORD[48+rsp],xmm9
889$L$ccm64_enc_body:
890	mov	eax,DWORD[240+rcx]
891	movdqu	xmm6,XMMWORD[r8]
892	movdqa	xmm9,XMMWORD[$L$increment64]
893	movdqa	xmm7,XMMWORD[$L$bswap_mask]
894
895	shl	eax,4
896	mov	r10d,16
897	lea	r11,[rcx]
898	movdqu	xmm3,XMMWORD[r9]
899	movdqa	xmm2,xmm6
900	lea	rcx,[32+rax*1+rcx]
901DB	102,15,56,0,247
902	sub	r10,rax
903	jmp	NEAR $L$ccm64_enc_outer
904ALIGN	16
905$L$ccm64_enc_outer:
906	movups	xmm0,XMMWORD[r11]
907	mov	rax,r10
908	movups	xmm8,XMMWORD[rdi]
909
910	xorps	xmm2,xmm0
911	movups	xmm1,XMMWORD[16+r11]
912	xorps	xmm0,xmm8
913	xorps	xmm3,xmm0
914	movups	xmm0,XMMWORD[32+r11]
915
916$L$ccm64_enc2_loop:
917DB	102,15,56,220,209
918DB	102,15,56,220,217
919	movups	xmm1,XMMWORD[rax*1+rcx]
920	add	rax,32
921DB	102,15,56,220,208
922DB	102,15,56,220,216
923	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
924	jnz	NEAR $L$ccm64_enc2_loop
925DB	102,15,56,220,209
926DB	102,15,56,220,217
927	paddq	xmm6,xmm9
928	dec	rdx
929DB	102,15,56,221,208
930DB	102,15,56,221,216
931
932	lea	rdi,[16+rdi]
933	xorps	xmm8,xmm2
934	movdqa	xmm2,xmm6
935	movups	XMMWORD[rsi],xmm8
936DB	102,15,56,0,215
937	lea	rsi,[16+rsi]
938	jnz	NEAR $L$ccm64_enc_outer
939
940	pxor	xmm0,xmm0
941	pxor	xmm1,xmm1
942	pxor	xmm2,xmm2
943	movups	XMMWORD[r9],xmm3
944	pxor	xmm3,xmm3
945	pxor	xmm8,xmm8
946	pxor	xmm6,xmm6
947	movaps	xmm6,XMMWORD[rsp]
948	movaps	XMMWORD[rsp],xmm0
949	movaps	xmm7,XMMWORD[16+rsp]
950	movaps	XMMWORD[16+rsp],xmm0
951	movaps	xmm8,XMMWORD[32+rsp]
952	movaps	XMMWORD[32+rsp],xmm0
953	movaps	xmm9,XMMWORD[48+rsp]
954	movaps	XMMWORD[48+rsp],xmm0
955	lea	rsp,[88+rsp]
956$L$ccm64_enc_ret:
957	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
958	mov	rsi,QWORD[16+rsp]
959	DB	0F3h,0C3h		;repret
960$L$SEH_end_aesni_ccm64_encrypt_blocks:
961global	aesni_ccm64_decrypt_blocks
962
963ALIGN	16
964aesni_ccm64_decrypt_blocks:
965	mov	QWORD[8+rsp],rdi	;WIN64 prologue
966	mov	QWORD[16+rsp],rsi
967	mov	rax,rsp
968$L$SEH_begin_aesni_ccm64_decrypt_blocks:
969	mov	rdi,rcx
970	mov	rsi,rdx
971	mov	rdx,r8
972	mov	rcx,r9
973	mov	r8,QWORD[40+rsp]
974	mov	r9,QWORD[48+rsp]
975
976
977	lea	rsp,[((-88))+rsp]
978	movaps	XMMWORD[rsp],xmm6
979	movaps	XMMWORD[16+rsp],xmm7
980	movaps	XMMWORD[32+rsp],xmm8
981	movaps	XMMWORD[48+rsp],xmm9
982$L$ccm64_dec_body:
983	mov	eax,DWORD[240+rcx]
984	movups	xmm6,XMMWORD[r8]
985	movdqu	xmm3,XMMWORD[r9]
986	movdqa	xmm9,XMMWORD[$L$increment64]
987	movdqa	xmm7,XMMWORD[$L$bswap_mask]
988
989	movaps	xmm2,xmm6
990	mov	r10d,eax
991	mov	r11,rcx
992DB	102,15,56,0,247
993	movups	xmm0,XMMWORD[rcx]
994	movups	xmm1,XMMWORD[16+rcx]
995	lea	rcx,[32+rcx]
996	xorps	xmm2,xmm0
997$L$oop_enc1_5:
998DB	102,15,56,220,209
999	dec	eax
1000	movups	xmm1,XMMWORD[rcx]
1001	lea	rcx,[16+rcx]
1002	jnz	NEAR $L$oop_enc1_5
1003DB	102,15,56,221,209
1004	shl	r10d,4
1005	mov	eax,16
1006	movups	xmm8,XMMWORD[rdi]
1007	paddq	xmm6,xmm9
1008	lea	rdi,[16+rdi]
1009	sub	rax,r10
1010	lea	rcx,[32+r10*1+r11]
1011	mov	r10,rax
1012	jmp	NEAR $L$ccm64_dec_outer
1013ALIGN	16
1014$L$ccm64_dec_outer:
1015	xorps	xmm8,xmm2
1016	movdqa	xmm2,xmm6
1017	movups	XMMWORD[rsi],xmm8
1018	lea	rsi,[16+rsi]
1019DB	102,15,56,0,215
1020
1021	sub	rdx,1
1022	jz	NEAR $L$ccm64_dec_break
1023
1024	movups	xmm0,XMMWORD[r11]
1025	mov	rax,r10
1026	movups	xmm1,XMMWORD[16+r11]
1027	xorps	xmm8,xmm0
1028	xorps	xmm2,xmm0
1029	xorps	xmm3,xmm8
1030	movups	xmm0,XMMWORD[32+r11]
1031	jmp	NEAR $L$ccm64_dec2_loop
1032ALIGN	16
1033$L$ccm64_dec2_loop:
1034DB	102,15,56,220,209
1035DB	102,15,56,220,217
1036	movups	xmm1,XMMWORD[rax*1+rcx]
1037	add	rax,32
1038DB	102,15,56,220,208
1039DB	102,15,56,220,216
1040	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
1041	jnz	NEAR $L$ccm64_dec2_loop
1042	movups	xmm8,XMMWORD[rdi]
1043	paddq	xmm6,xmm9
1044DB	102,15,56,220,209
1045DB	102,15,56,220,217
1046DB	102,15,56,221,208
1047DB	102,15,56,221,216
1048	lea	rdi,[16+rdi]
1049	jmp	NEAR $L$ccm64_dec_outer
1050
1051ALIGN	16
1052$L$ccm64_dec_break:
1053
1054	mov	eax,DWORD[240+r11]
1055	movups	xmm0,XMMWORD[r11]
1056	movups	xmm1,XMMWORD[16+r11]
1057	xorps	xmm8,xmm0
1058	lea	r11,[32+r11]
1059	xorps	xmm3,xmm8
1060$L$oop_enc1_6:
1061DB	102,15,56,220,217
1062	dec	eax
1063	movups	xmm1,XMMWORD[r11]
1064	lea	r11,[16+r11]
1065	jnz	NEAR $L$oop_enc1_6
1066DB	102,15,56,221,217
1067	pxor	xmm0,xmm0
1068	pxor	xmm1,xmm1
1069	pxor	xmm2,xmm2
1070	movups	XMMWORD[r9],xmm3
1071	pxor	xmm3,xmm3
1072	pxor	xmm8,xmm8
1073	pxor	xmm6,xmm6
1074	movaps	xmm6,XMMWORD[rsp]
1075	movaps	XMMWORD[rsp],xmm0
1076	movaps	xmm7,XMMWORD[16+rsp]
1077	movaps	XMMWORD[16+rsp],xmm0
1078	movaps	xmm8,XMMWORD[32+rsp]
1079	movaps	XMMWORD[32+rsp],xmm0
1080	movaps	xmm9,XMMWORD[48+rsp]
1081	movaps	XMMWORD[48+rsp],xmm0
1082	lea	rsp,[88+rsp]
1083$L$ccm64_dec_ret:
1084	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1085	mov	rsi,QWORD[16+rsp]
1086	DB	0F3h,0C3h		;repret
1087$L$SEH_end_aesni_ccm64_decrypt_blocks:
1088global	aesni_ctr32_encrypt_blocks
1089
1090ALIGN	16
1091aesni_ctr32_encrypt_blocks:
1092	mov	QWORD[8+rsp],rdi	;WIN64 prologue
1093	mov	QWORD[16+rsp],rsi
1094	mov	rax,rsp
1095$L$SEH_begin_aesni_ctr32_encrypt_blocks:
1096	mov	rdi,rcx
1097	mov	rsi,rdx
1098	mov	rdx,r8
1099	mov	rcx,r9
1100	mov	r8,QWORD[40+rsp]
1101
1102
1103	cmp	rdx,1
1104	jne	NEAR $L$ctr32_bulk
1105
1106
1107
1108	movups	xmm2,XMMWORD[r8]
1109	movups	xmm3,XMMWORD[rdi]
1110	mov	edx,DWORD[240+rcx]
1111	movups	xmm0,XMMWORD[rcx]
1112	movups	xmm1,XMMWORD[16+rcx]
1113	lea	rcx,[32+rcx]
1114	xorps	xmm2,xmm0
1115$L$oop_enc1_7:
1116DB	102,15,56,220,209
1117	dec	edx
1118	movups	xmm1,XMMWORD[rcx]
1119	lea	rcx,[16+rcx]
1120	jnz	NEAR $L$oop_enc1_7
1121DB	102,15,56,221,209
1122	pxor	xmm0,xmm0
1123	pxor	xmm1,xmm1
1124	xorps	xmm2,xmm3
1125	pxor	xmm3,xmm3
1126	movups	XMMWORD[rsi],xmm2
1127	xorps	xmm2,xmm2
1128	jmp	NEAR $L$ctr32_epilogue
1129
1130ALIGN	16
1131$L$ctr32_bulk:
1132	lea	r11,[rsp]
1133	push	rbp
1134	sub	rsp,288
1135	and	rsp,-16
1136	movaps	XMMWORD[(-168)+r11],xmm6
1137	movaps	XMMWORD[(-152)+r11],xmm7
1138	movaps	XMMWORD[(-136)+r11],xmm8
1139	movaps	XMMWORD[(-120)+r11],xmm9
1140	movaps	XMMWORD[(-104)+r11],xmm10
1141	movaps	XMMWORD[(-88)+r11],xmm11
1142	movaps	XMMWORD[(-72)+r11],xmm12
1143	movaps	XMMWORD[(-56)+r11],xmm13
1144	movaps	XMMWORD[(-40)+r11],xmm14
1145	movaps	XMMWORD[(-24)+r11],xmm15
1146$L$ctr32_body:
1147
1148
1149
1150
1151	movdqu	xmm2,XMMWORD[r8]
1152	movdqu	xmm0,XMMWORD[rcx]
1153	mov	r8d,DWORD[12+r8]
1154	pxor	xmm2,xmm0
1155	mov	ebp,DWORD[12+rcx]
1156	movdqa	XMMWORD[rsp],xmm2
1157	bswap	r8d
1158	movdqa	xmm3,xmm2
1159	movdqa	xmm4,xmm2
1160	movdqa	xmm5,xmm2
1161	movdqa	XMMWORD[64+rsp],xmm2
1162	movdqa	XMMWORD[80+rsp],xmm2
1163	movdqa	XMMWORD[96+rsp],xmm2
1164	mov	r10,rdx
1165	movdqa	XMMWORD[112+rsp],xmm2
1166
1167	lea	rax,[1+r8]
1168	lea	rdx,[2+r8]
1169	bswap	eax
1170	bswap	edx
1171	xor	eax,ebp
1172	xor	edx,ebp
1173DB	102,15,58,34,216,3
1174	lea	rax,[3+r8]
1175	movdqa	XMMWORD[16+rsp],xmm3
1176DB	102,15,58,34,226,3
1177	bswap	eax
1178	mov	rdx,r10
1179	lea	r10,[4+r8]
1180	movdqa	XMMWORD[32+rsp],xmm4
1181	xor	eax,ebp
1182	bswap	r10d
1183DB	102,15,58,34,232,3
1184	xor	r10d,ebp
1185	movdqa	XMMWORD[48+rsp],xmm5
1186	lea	r9,[5+r8]
1187	mov	DWORD[((64+12))+rsp],r10d
1188	bswap	r9d
1189	lea	r10,[6+r8]
1190	mov	eax,DWORD[240+rcx]
1191	xor	r9d,ebp
1192	bswap	r10d
1193	mov	DWORD[((80+12))+rsp],r9d
1194	xor	r10d,ebp
1195	lea	r9,[7+r8]
1196	mov	DWORD[((96+12))+rsp],r10d
1197	bswap	r9d
1198	lea	r10,[OPENSSL_ia32cap_P]
1199	mov	r10d,DWORD[4+r10]
1200	xor	r9d,ebp
1201	and	r10d,71303168
1202	mov	DWORD[((112+12))+rsp],r9d
1203
1204	movups	xmm1,XMMWORD[16+rcx]
1205
1206	movdqa	xmm6,XMMWORD[64+rsp]
1207	movdqa	xmm7,XMMWORD[80+rsp]
1208
1209	cmp	rdx,8
1210	jb	NEAR $L$ctr32_tail
1211
1212	sub	rdx,6
1213	cmp	r10d,4194304
1214	je	NEAR $L$ctr32_6x
1215
1216	lea	rcx,[128+rcx]
1217	sub	rdx,2
1218	jmp	NEAR $L$ctr32_loop8
1219
1220ALIGN	16
1221$L$ctr32_6x:
1222	shl	eax,4
1223	mov	r10d,48
1224	bswap	ebp
1225	lea	rcx,[32+rax*1+rcx]
1226	sub	r10,rax
1227	jmp	NEAR $L$ctr32_loop6
1228
1229ALIGN	16
1230$L$ctr32_loop6:
1231	add	r8d,6
1232	movups	xmm0,XMMWORD[((-48))+r10*1+rcx]
1233DB	102,15,56,220,209
1234	mov	eax,r8d
1235	xor	eax,ebp
1236DB	102,15,56,220,217
1237DB	0x0f,0x38,0xf1,0x44,0x24,12
1238	lea	eax,[1+r8]
1239DB	102,15,56,220,225
1240	xor	eax,ebp
1241DB	0x0f,0x38,0xf1,0x44,0x24,28
1242DB	102,15,56,220,233
1243	lea	eax,[2+r8]
1244	xor	eax,ebp
1245DB	102,15,56,220,241
1246DB	0x0f,0x38,0xf1,0x44,0x24,44
1247	lea	eax,[3+r8]
1248DB	102,15,56,220,249
1249	movups	xmm1,XMMWORD[((-32))+r10*1+rcx]
1250	xor	eax,ebp
1251
1252DB	102,15,56,220,208
1253DB	0x0f,0x38,0xf1,0x44,0x24,60
1254	lea	eax,[4+r8]
1255DB	102,15,56,220,216
1256	xor	eax,ebp
1257DB	0x0f,0x38,0xf1,0x44,0x24,76
1258DB	102,15,56,220,224
1259	lea	eax,[5+r8]
1260	xor	eax,ebp
1261DB	102,15,56,220,232
1262DB	0x0f,0x38,0xf1,0x44,0x24,92
1263	mov	rax,r10
1264DB	102,15,56,220,240
1265DB	102,15,56,220,248
1266	movups	xmm0,XMMWORD[((-16))+r10*1+rcx]
1267
1268	call	$L$enc_loop6
1269
1270	movdqu	xmm8,XMMWORD[rdi]
1271	movdqu	xmm9,XMMWORD[16+rdi]
1272	movdqu	xmm10,XMMWORD[32+rdi]
1273	movdqu	xmm11,XMMWORD[48+rdi]
1274	movdqu	xmm12,XMMWORD[64+rdi]
1275	movdqu	xmm13,XMMWORD[80+rdi]
1276	lea	rdi,[96+rdi]
1277	movups	xmm1,XMMWORD[((-64))+r10*1+rcx]
1278	pxor	xmm8,xmm2
1279	movaps	xmm2,XMMWORD[rsp]
1280	pxor	xmm9,xmm3
1281	movaps	xmm3,XMMWORD[16+rsp]
1282	pxor	xmm10,xmm4
1283	movaps	xmm4,XMMWORD[32+rsp]
1284	pxor	xmm11,xmm5
1285	movaps	xmm5,XMMWORD[48+rsp]
1286	pxor	xmm12,xmm6
1287	movaps	xmm6,XMMWORD[64+rsp]
1288	pxor	xmm13,xmm7
1289	movaps	xmm7,XMMWORD[80+rsp]
1290	movdqu	XMMWORD[rsi],xmm8
1291	movdqu	XMMWORD[16+rsi],xmm9
1292	movdqu	XMMWORD[32+rsi],xmm10
1293	movdqu	XMMWORD[48+rsi],xmm11
1294	movdqu	XMMWORD[64+rsi],xmm12
1295	movdqu	XMMWORD[80+rsi],xmm13
1296	lea	rsi,[96+rsi]
1297
1298	sub	rdx,6
1299	jnc	NEAR $L$ctr32_loop6
1300
1301	add	rdx,6
1302	jz	NEAR $L$ctr32_done
1303
1304	lea	eax,[((-48))+r10]
1305	lea	rcx,[((-80))+r10*1+rcx]
1306	neg	eax
1307	shr	eax,4
1308	jmp	NEAR $L$ctr32_tail
1309
1310ALIGN	32
1311$L$ctr32_loop8:
1312	add	r8d,8
1313	movdqa	xmm8,XMMWORD[96+rsp]
1314DB	102,15,56,220,209
1315	mov	r9d,r8d
1316	movdqa	xmm9,XMMWORD[112+rsp]
1317DB	102,15,56,220,217
1318	bswap	r9d
1319	movups	xmm0,XMMWORD[((32-128))+rcx]
1320DB	102,15,56,220,225
1321	xor	r9d,ebp
1322	nop
1323DB	102,15,56,220,233
1324	mov	DWORD[((0+12))+rsp],r9d
1325	lea	r9,[1+r8]
1326DB	102,15,56,220,241
1327DB	102,15,56,220,249
1328DB	102,68,15,56,220,193
1329DB	102,68,15,56,220,201
1330	movups	xmm1,XMMWORD[((48-128))+rcx]
1331	bswap	r9d
1332DB	102,15,56,220,208
1333DB	102,15,56,220,216
1334	xor	r9d,ebp
1335DB	0x66,0x90
1336DB	102,15,56,220,224
1337DB	102,15,56,220,232
1338	mov	DWORD[((16+12))+rsp],r9d
1339	lea	r9,[2+r8]
1340DB	102,15,56,220,240
1341DB	102,15,56,220,248
1342DB	102,68,15,56,220,192
1343DB	102,68,15,56,220,200
1344	movups	xmm0,XMMWORD[((64-128))+rcx]
1345	bswap	r9d
1346DB	102,15,56,220,209
1347DB	102,15,56,220,217
1348	xor	r9d,ebp
1349DB	0x66,0x90
1350DB	102,15,56,220,225
1351DB	102,15,56,220,233
1352	mov	DWORD[((32+12))+rsp],r9d
1353	lea	r9,[3+r8]
1354DB	102,15,56,220,241
1355DB	102,15,56,220,249
1356DB	102,68,15,56,220,193
1357DB	102,68,15,56,220,201
1358	movups	xmm1,XMMWORD[((80-128))+rcx]
1359	bswap	r9d
1360DB	102,15,56,220,208
1361DB	102,15,56,220,216
1362	xor	r9d,ebp
1363DB	0x66,0x90
1364DB	102,15,56,220,224
1365DB	102,15,56,220,232
1366	mov	DWORD[((48+12))+rsp],r9d
1367	lea	r9,[4+r8]
1368DB	102,15,56,220,240
1369DB	102,15,56,220,248
1370DB	102,68,15,56,220,192
1371DB	102,68,15,56,220,200
1372	movups	xmm0,XMMWORD[((96-128))+rcx]
1373	bswap	r9d
1374DB	102,15,56,220,209
1375DB	102,15,56,220,217
1376	xor	r9d,ebp
1377DB	0x66,0x90
1378DB	102,15,56,220,225
1379DB	102,15,56,220,233
1380	mov	DWORD[((64+12))+rsp],r9d
1381	lea	r9,[5+r8]
1382DB	102,15,56,220,241
1383DB	102,15,56,220,249
1384DB	102,68,15,56,220,193
1385DB	102,68,15,56,220,201
1386	movups	xmm1,XMMWORD[((112-128))+rcx]
1387	bswap	r9d
1388DB	102,15,56,220,208
1389DB	102,15,56,220,216
1390	xor	r9d,ebp
1391DB	0x66,0x90
1392DB	102,15,56,220,224
1393DB	102,15,56,220,232
1394	mov	DWORD[((80+12))+rsp],r9d
1395	lea	r9,[6+r8]
1396DB	102,15,56,220,240
1397DB	102,15,56,220,248
1398DB	102,68,15,56,220,192
1399DB	102,68,15,56,220,200
1400	movups	xmm0,XMMWORD[((128-128))+rcx]
1401	bswap	r9d
1402DB	102,15,56,220,209
1403DB	102,15,56,220,217
1404	xor	r9d,ebp
1405DB	0x66,0x90
1406DB	102,15,56,220,225
1407DB	102,15,56,220,233
1408	mov	DWORD[((96+12))+rsp],r9d
1409	lea	r9,[7+r8]
1410DB	102,15,56,220,241
1411DB	102,15,56,220,249
1412DB	102,68,15,56,220,193
1413DB	102,68,15,56,220,201
1414	movups	xmm1,XMMWORD[((144-128))+rcx]
1415	bswap	r9d
1416DB	102,15,56,220,208
1417DB	102,15,56,220,216
1418DB	102,15,56,220,224
1419	xor	r9d,ebp
1420	movdqu	xmm10,XMMWORD[rdi]
1421DB	102,15,56,220,232
1422	mov	DWORD[((112+12))+rsp],r9d
1423	cmp	eax,11
1424DB	102,15,56,220,240
1425DB	102,15,56,220,248
1426DB	102,68,15,56,220,192
1427DB	102,68,15,56,220,200
1428	movups	xmm0,XMMWORD[((160-128))+rcx]
1429
1430	jb	NEAR $L$ctr32_enc_done
1431
1432DB	102,15,56,220,209
1433DB	102,15,56,220,217
1434DB	102,15,56,220,225
1435DB	102,15,56,220,233
1436DB	102,15,56,220,241
1437DB	102,15,56,220,249
1438DB	102,68,15,56,220,193
1439DB	102,68,15,56,220,201
1440	movups	xmm1,XMMWORD[((176-128))+rcx]
1441
1442DB	102,15,56,220,208
1443DB	102,15,56,220,216
1444DB	102,15,56,220,224
1445DB	102,15,56,220,232
1446DB	102,15,56,220,240
1447DB	102,15,56,220,248
1448DB	102,68,15,56,220,192
1449DB	102,68,15,56,220,200
1450	movups	xmm0,XMMWORD[((192-128))+rcx]
1451	je	NEAR $L$ctr32_enc_done
1452
1453DB	102,15,56,220,209
1454DB	102,15,56,220,217
1455DB	102,15,56,220,225
1456DB	102,15,56,220,233
1457DB	102,15,56,220,241
1458DB	102,15,56,220,249
1459DB	102,68,15,56,220,193
1460DB	102,68,15,56,220,201
1461	movups	xmm1,XMMWORD[((208-128))+rcx]
1462
1463DB	102,15,56,220,208
1464DB	102,15,56,220,216
1465DB	102,15,56,220,224
1466DB	102,15,56,220,232
1467DB	102,15,56,220,240
1468DB	102,15,56,220,248
1469DB	102,68,15,56,220,192
1470DB	102,68,15,56,220,200
1471	movups	xmm0,XMMWORD[((224-128))+rcx]
1472	jmp	NEAR $L$ctr32_enc_done
1473
1474ALIGN	16
1475$L$ctr32_enc_done:
1476	movdqu	xmm11,XMMWORD[16+rdi]
1477	pxor	xmm10,xmm0
1478	movdqu	xmm12,XMMWORD[32+rdi]
1479	pxor	xmm11,xmm0
1480	movdqu	xmm13,XMMWORD[48+rdi]
1481	pxor	xmm12,xmm0
1482	movdqu	xmm14,XMMWORD[64+rdi]
1483	pxor	xmm13,xmm0
1484	movdqu	xmm15,XMMWORD[80+rdi]
1485	pxor	xmm14,xmm0
1486	pxor	xmm15,xmm0
1487DB	102,15,56,220,209
1488DB	102,15,56,220,217
1489DB	102,15,56,220,225
1490DB	102,15,56,220,233
1491DB	102,15,56,220,241
1492DB	102,15,56,220,249
1493DB	102,68,15,56,220,193
1494DB	102,68,15,56,220,201
1495	movdqu	xmm1,XMMWORD[96+rdi]
1496	lea	rdi,[128+rdi]
1497
1498DB	102,65,15,56,221,210
1499	pxor	xmm1,xmm0
1500	movdqu	xmm10,XMMWORD[((112-128))+rdi]
1501DB	102,65,15,56,221,219
1502	pxor	xmm10,xmm0
1503	movdqa	xmm11,XMMWORD[rsp]
1504DB	102,65,15,56,221,228
1505DB	102,65,15,56,221,237
1506	movdqa	xmm12,XMMWORD[16+rsp]
1507	movdqa	xmm13,XMMWORD[32+rsp]
1508DB	102,65,15,56,221,246
1509DB	102,65,15,56,221,255
1510	movdqa	xmm14,XMMWORD[48+rsp]
1511	movdqa	xmm15,XMMWORD[64+rsp]
1512DB	102,68,15,56,221,193
1513	movdqa	xmm0,XMMWORD[80+rsp]
1514	movups	xmm1,XMMWORD[((16-128))+rcx]
1515DB	102,69,15,56,221,202
1516
1517	movups	XMMWORD[rsi],xmm2
1518	movdqa	xmm2,xmm11
1519	movups	XMMWORD[16+rsi],xmm3
1520	movdqa	xmm3,xmm12
1521	movups	XMMWORD[32+rsi],xmm4
1522	movdqa	xmm4,xmm13
1523	movups	XMMWORD[48+rsi],xmm5
1524	movdqa	xmm5,xmm14
1525	movups	XMMWORD[64+rsi],xmm6
1526	movdqa	xmm6,xmm15
1527	movups	XMMWORD[80+rsi],xmm7
1528	movdqa	xmm7,xmm0
1529	movups	XMMWORD[96+rsi],xmm8
1530	movups	XMMWORD[112+rsi],xmm9
1531	lea	rsi,[128+rsi]
1532
1533	sub	rdx,8
1534	jnc	NEAR $L$ctr32_loop8
1535
1536	add	rdx,8
1537	jz	NEAR $L$ctr32_done
1538	lea	rcx,[((-128))+rcx]
1539
1540$L$ctr32_tail:
1541
1542
1543	lea	rcx,[16+rcx]
1544	cmp	rdx,4
1545	jb	NEAR $L$ctr32_loop3
1546	je	NEAR $L$ctr32_loop4
1547
1548
1549	shl	eax,4
1550	movdqa	xmm8,XMMWORD[96+rsp]
1551	pxor	xmm9,xmm9
1552
1553	movups	xmm0,XMMWORD[16+rcx]
1554DB	102,15,56,220,209
1555DB	102,15,56,220,217
1556	lea	rcx,[((32-16))+rax*1+rcx]
1557	neg	rax
1558DB	102,15,56,220,225
1559	add	rax,16
1560	movups	xmm10,XMMWORD[rdi]
1561DB	102,15,56,220,233
1562DB	102,15,56,220,241
1563	movups	xmm11,XMMWORD[16+rdi]
1564	movups	xmm12,XMMWORD[32+rdi]
1565DB	102,15,56,220,249
1566DB	102,68,15,56,220,193
1567
1568	call	$L$enc_loop8_enter
1569
1570	movdqu	xmm13,XMMWORD[48+rdi]
1571	pxor	xmm2,xmm10
1572	movdqu	xmm10,XMMWORD[64+rdi]
1573	pxor	xmm3,xmm11
1574	movdqu	XMMWORD[rsi],xmm2
1575	pxor	xmm4,xmm12
1576	movdqu	XMMWORD[16+rsi],xmm3
1577	pxor	xmm5,xmm13
1578	movdqu	XMMWORD[32+rsi],xmm4
1579	pxor	xmm6,xmm10
1580	movdqu	XMMWORD[48+rsi],xmm5
1581	movdqu	XMMWORD[64+rsi],xmm6
1582	cmp	rdx,6
1583	jb	NEAR $L$ctr32_done
1584
1585	movups	xmm11,XMMWORD[80+rdi]
1586	xorps	xmm7,xmm11
1587	movups	XMMWORD[80+rsi],xmm7
1588	je	NEAR $L$ctr32_done
1589
1590	movups	xmm12,XMMWORD[96+rdi]
1591	xorps	xmm8,xmm12
1592	movups	XMMWORD[96+rsi],xmm8
1593	jmp	NEAR $L$ctr32_done
1594
1595ALIGN	32
1596$L$ctr32_loop4:
1597DB	102,15,56,220,209
1598	lea	rcx,[16+rcx]
1599	dec	eax
1600DB	102,15,56,220,217
1601DB	102,15,56,220,225
1602DB	102,15,56,220,233
1603	movups	xmm1,XMMWORD[rcx]
1604	jnz	NEAR $L$ctr32_loop4
1605DB	102,15,56,221,209
1606DB	102,15,56,221,217
1607	movups	xmm10,XMMWORD[rdi]
1608	movups	xmm11,XMMWORD[16+rdi]
1609DB	102,15,56,221,225
1610DB	102,15,56,221,233
1611	movups	xmm12,XMMWORD[32+rdi]
1612	movups	xmm13,XMMWORD[48+rdi]
1613
1614	xorps	xmm2,xmm10
1615	movups	XMMWORD[rsi],xmm2
1616	xorps	xmm3,xmm11
1617	movups	XMMWORD[16+rsi],xmm3
1618	pxor	xmm4,xmm12
1619	movdqu	XMMWORD[32+rsi],xmm4
1620	pxor	xmm5,xmm13
1621	movdqu	XMMWORD[48+rsi],xmm5
1622	jmp	NEAR $L$ctr32_done
1623
1624ALIGN	32
1625$L$ctr32_loop3:
1626DB	102,15,56,220,209
1627	lea	rcx,[16+rcx]
1628	dec	eax
1629DB	102,15,56,220,217
1630DB	102,15,56,220,225
1631	movups	xmm1,XMMWORD[rcx]
1632	jnz	NEAR $L$ctr32_loop3
1633DB	102,15,56,221,209
1634DB	102,15,56,221,217
1635DB	102,15,56,221,225
1636
1637	movups	xmm10,XMMWORD[rdi]
1638	xorps	xmm2,xmm10
1639	movups	XMMWORD[rsi],xmm2
1640	cmp	rdx,2
1641	jb	NEAR $L$ctr32_done
1642
1643	movups	xmm11,XMMWORD[16+rdi]
1644	xorps	xmm3,xmm11
1645	movups	XMMWORD[16+rsi],xmm3
1646	je	NEAR $L$ctr32_done
1647
1648	movups	xmm12,XMMWORD[32+rdi]
1649	xorps	xmm4,xmm12
1650	movups	XMMWORD[32+rsi],xmm4
1651
1652$L$ctr32_done:
1653	xorps	xmm0,xmm0
1654	xor	ebp,ebp
1655	pxor	xmm1,xmm1
1656	pxor	xmm2,xmm2
1657	pxor	xmm3,xmm3
1658	pxor	xmm4,xmm4
1659	pxor	xmm5,xmm5
1660	movaps	xmm6,XMMWORD[((-168))+r11]
1661	movaps	XMMWORD[(-168)+r11],xmm0
1662	movaps	xmm7,XMMWORD[((-152))+r11]
1663	movaps	XMMWORD[(-152)+r11],xmm0
1664	movaps	xmm8,XMMWORD[((-136))+r11]
1665	movaps	XMMWORD[(-136)+r11],xmm0
1666	movaps	xmm9,XMMWORD[((-120))+r11]
1667	movaps	XMMWORD[(-120)+r11],xmm0
1668	movaps	xmm10,XMMWORD[((-104))+r11]
1669	movaps	XMMWORD[(-104)+r11],xmm0
1670	movaps	xmm11,XMMWORD[((-88))+r11]
1671	movaps	XMMWORD[(-88)+r11],xmm0
1672	movaps	xmm12,XMMWORD[((-72))+r11]
1673	movaps	XMMWORD[(-72)+r11],xmm0
1674	movaps	xmm13,XMMWORD[((-56))+r11]
1675	movaps	XMMWORD[(-56)+r11],xmm0
1676	movaps	xmm14,XMMWORD[((-40))+r11]
1677	movaps	XMMWORD[(-40)+r11],xmm0
1678	movaps	xmm15,XMMWORD[((-24))+r11]
1679	movaps	XMMWORD[(-24)+r11],xmm0
1680	movaps	XMMWORD[rsp],xmm0
1681	movaps	XMMWORD[16+rsp],xmm0
1682	movaps	XMMWORD[32+rsp],xmm0
1683	movaps	XMMWORD[48+rsp],xmm0
1684	movaps	XMMWORD[64+rsp],xmm0
1685	movaps	XMMWORD[80+rsp],xmm0
1686	movaps	XMMWORD[96+rsp],xmm0
1687	movaps	XMMWORD[112+rsp],xmm0
1688	mov	rbp,QWORD[((-8))+r11]
1689	lea	rsp,[r11]
1690$L$ctr32_epilogue:
1691	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1692	mov	rsi,QWORD[16+rsp]
1693	DB	0F3h,0C3h		;repret
1694$L$SEH_end_aesni_ctr32_encrypt_blocks:
1695global	aesni_xts_encrypt
1696
1697ALIGN	16
1698aesni_xts_encrypt:
1699	mov	QWORD[8+rsp],rdi	;WIN64 prologue
1700	mov	QWORD[16+rsp],rsi
1701	mov	rax,rsp
1702$L$SEH_begin_aesni_xts_encrypt:
1703	mov	rdi,rcx
1704	mov	rsi,rdx
1705	mov	rdx,r8
1706	mov	rcx,r9
1707	mov	r8,QWORD[40+rsp]
1708	mov	r9,QWORD[48+rsp]
1709
1710
1711	lea	r11,[rsp]
1712	push	rbp
1713	sub	rsp,272
1714	and	rsp,-16
1715	movaps	XMMWORD[(-168)+r11],xmm6
1716	movaps	XMMWORD[(-152)+r11],xmm7
1717	movaps	XMMWORD[(-136)+r11],xmm8
1718	movaps	XMMWORD[(-120)+r11],xmm9
1719	movaps	XMMWORD[(-104)+r11],xmm10
1720	movaps	XMMWORD[(-88)+r11],xmm11
1721	movaps	XMMWORD[(-72)+r11],xmm12
1722	movaps	XMMWORD[(-56)+r11],xmm13
1723	movaps	XMMWORD[(-40)+r11],xmm14
1724	movaps	XMMWORD[(-24)+r11],xmm15
1725$L$xts_enc_body:
1726	movups	xmm2,XMMWORD[r9]
1727	mov	eax,DWORD[240+r8]
1728	mov	r10d,DWORD[240+rcx]
1729	movups	xmm0,XMMWORD[r8]
1730	movups	xmm1,XMMWORD[16+r8]
1731	lea	r8,[32+r8]
1732	xorps	xmm2,xmm0
1733$L$oop_enc1_8:
1734DB	102,15,56,220,209
1735	dec	eax
1736	movups	xmm1,XMMWORD[r8]
1737	lea	r8,[16+r8]
1738	jnz	NEAR $L$oop_enc1_8
1739DB	102,15,56,221,209
1740	movups	xmm0,XMMWORD[rcx]
1741	mov	rbp,rcx
1742	mov	eax,r10d
1743	shl	r10d,4
1744	mov	r9,rdx
1745	and	rdx,-16
1746
1747	movups	xmm1,XMMWORD[16+r10*1+rcx]
1748
1749	movdqa	xmm8,XMMWORD[$L$xts_magic]
1750	movdqa	xmm15,xmm2
1751	pshufd	xmm9,xmm2,0x5f
1752	pxor	xmm1,xmm0
1753	movdqa	xmm14,xmm9
1754	paddd	xmm9,xmm9
1755	movdqa	xmm10,xmm15
1756	psrad	xmm14,31
1757	paddq	xmm15,xmm15
1758	pand	xmm14,xmm8
1759	pxor	xmm10,xmm0
1760	pxor	xmm15,xmm14
1761	movdqa	xmm14,xmm9
1762	paddd	xmm9,xmm9
1763	movdqa	xmm11,xmm15
1764	psrad	xmm14,31
1765	paddq	xmm15,xmm15
1766	pand	xmm14,xmm8
1767	pxor	xmm11,xmm0
1768	pxor	xmm15,xmm14
1769	movdqa	xmm14,xmm9
1770	paddd	xmm9,xmm9
1771	movdqa	xmm12,xmm15
1772	psrad	xmm14,31
1773	paddq	xmm15,xmm15
1774	pand	xmm14,xmm8
1775	pxor	xmm12,xmm0
1776	pxor	xmm15,xmm14
1777	movdqa	xmm14,xmm9
1778	paddd	xmm9,xmm9
1779	movdqa	xmm13,xmm15
1780	psrad	xmm14,31
1781	paddq	xmm15,xmm15
1782	pand	xmm14,xmm8
1783	pxor	xmm13,xmm0
1784	pxor	xmm15,xmm14
1785	movdqa	xmm14,xmm15
1786	psrad	xmm9,31
1787	paddq	xmm15,xmm15
1788	pand	xmm9,xmm8
1789	pxor	xmm14,xmm0
1790	pxor	xmm15,xmm9
1791	movaps	XMMWORD[96+rsp],xmm1
1792
1793	sub	rdx,16*6
1794	jc	NEAR $L$xts_enc_short
1795
1796	mov	eax,16+96
1797	lea	rcx,[32+r10*1+rbp]
1798	sub	rax,r10
1799	movups	xmm1,XMMWORD[16+rbp]
1800	mov	r10,rax
1801	lea	r8,[$L$xts_magic]
1802	jmp	NEAR $L$xts_enc_grandloop
1803
1804ALIGN	32
1805$L$xts_enc_grandloop:
1806	movdqu	xmm2,XMMWORD[rdi]
1807	movdqa	xmm8,xmm0
1808	movdqu	xmm3,XMMWORD[16+rdi]
1809	pxor	xmm2,xmm10
1810	movdqu	xmm4,XMMWORD[32+rdi]
1811	pxor	xmm3,xmm11
1812DB	102,15,56,220,209
1813	movdqu	xmm5,XMMWORD[48+rdi]
1814	pxor	xmm4,xmm12
1815DB	102,15,56,220,217
1816	movdqu	xmm6,XMMWORD[64+rdi]
1817	pxor	xmm5,xmm13
1818DB	102,15,56,220,225
1819	movdqu	xmm7,XMMWORD[80+rdi]
1820	pxor	xmm8,xmm15
1821	movdqa	xmm9,XMMWORD[96+rsp]
1822	pxor	xmm6,xmm14
1823DB	102,15,56,220,233
1824	movups	xmm0,XMMWORD[32+rbp]
1825	lea	rdi,[96+rdi]
1826	pxor	xmm7,xmm8
1827
1828	pxor	xmm10,xmm9
1829DB	102,15,56,220,241
1830	pxor	xmm11,xmm9
1831	movdqa	XMMWORD[rsp],xmm10
1832DB	102,15,56,220,249
1833	movups	xmm1,XMMWORD[48+rbp]
1834	pxor	xmm12,xmm9
1835
1836DB	102,15,56,220,208
1837	pxor	xmm13,xmm9
1838	movdqa	XMMWORD[16+rsp],xmm11
1839DB	102,15,56,220,216
1840	pxor	xmm14,xmm9
1841	movdqa	XMMWORD[32+rsp],xmm12
1842DB	102,15,56,220,224
1843DB	102,15,56,220,232
1844	pxor	xmm8,xmm9
1845	movdqa	XMMWORD[64+rsp],xmm14
1846DB	102,15,56,220,240
1847DB	102,15,56,220,248
1848	movups	xmm0,XMMWORD[64+rbp]
1849	movdqa	XMMWORD[80+rsp],xmm8
1850	pshufd	xmm9,xmm15,0x5f
1851	jmp	NEAR $L$xts_enc_loop6
1852ALIGN	32
1853$L$xts_enc_loop6:
1854DB	102,15,56,220,209
1855DB	102,15,56,220,217
1856DB	102,15,56,220,225
1857DB	102,15,56,220,233
1858DB	102,15,56,220,241
1859DB	102,15,56,220,249
1860	movups	xmm1,XMMWORD[((-64))+rax*1+rcx]
1861	add	rax,32
1862
1863DB	102,15,56,220,208
1864DB	102,15,56,220,216
1865DB	102,15,56,220,224
1866DB	102,15,56,220,232
1867DB	102,15,56,220,240
1868DB	102,15,56,220,248
1869	movups	xmm0,XMMWORD[((-80))+rax*1+rcx]
1870	jnz	NEAR $L$xts_enc_loop6
1871
1872	movdqa	xmm8,XMMWORD[r8]
1873	movdqa	xmm14,xmm9
1874	paddd	xmm9,xmm9
1875DB	102,15,56,220,209
1876	paddq	xmm15,xmm15
1877	psrad	xmm14,31
1878DB	102,15,56,220,217
1879	pand	xmm14,xmm8
1880	movups	xmm10,XMMWORD[rbp]
1881DB	102,15,56,220,225
1882DB	102,15,56,220,233
1883DB	102,15,56,220,241
1884	pxor	xmm15,xmm14
1885	movaps	xmm11,xmm10
1886DB	102,15,56,220,249
1887	movups	xmm1,XMMWORD[((-64))+rcx]
1888
1889	movdqa	xmm14,xmm9
1890DB	102,15,56,220,208
1891	paddd	xmm9,xmm9
1892	pxor	xmm10,xmm15
1893DB	102,15,56,220,216
1894	psrad	xmm14,31
1895	paddq	xmm15,xmm15
1896DB	102,15,56,220,224
1897DB	102,15,56,220,232
1898	pand	xmm14,xmm8
1899	movaps	xmm12,xmm11
1900DB	102,15,56,220,240
1901	pxor	xmm15,xmm14
1902	movdqa	xmm14,xmm9
1903DB	102,15,56,220,248
1904	movups	xmm0,XMMWORD[((-48))+rcx]
1905
1906	paddd	xmm9,xmm9
1907DB	102,15,56,220,209
1908	pxor	xmm11,xmm15
1909	psrad	xmm14,31
1910DB	102,15,56,220,217
1911	paddq	xmm15,xmm15
1912	pand	xmm14,xmm8
1913DB	102,15,56,220,225
1914DB	102,15,56,220,233
1915	movdqa	XMMWORD[48+rsp],xmm13
1916	pxor	xmm15,xmm14
1917DB	102,15,56,220,241
1918	movaps	xmm13,xmm12
1919	movdqa	xmm14,xmm9
1920DB	102,15,56,220,249
1921	movups	xmm1,XMMWORD[((-32))+rcx]
1922
1923	paddd	xmm9,xmm9
1924DB	102,15,56,220,208
1925	pxor	xmm12,xmm15
1926	psrad	xmm14,31
1927DB	102,15,56,220,216
1928	paddq	xmm15,xmm15
1929	pand	xmm14,xmm8
1930DB	102,15,56,220,224
1931DB	102,15,56,220,232
1932DB	102,15,56,220,240
1933	pxor	xmm15,xmm14
1934	movaps	xmm14,xmm13
1935DB	102,15,56,220,248
1936
1937	movdqa	xmm0,xmm9
1938	paddd	xmm9,xmm9
1939DB	102,15,56,220,209
1940	pxor	xmm13,xmm15
1941	psrad	xmm0,31
1942DB	102,15,56,220,217
1943	paddq	xmm15,xmm15
1944	pand	xmm0,xmm8
1945DB	102,15,56,220,225
1946DB	102,15,56,220,233
1947	pxor	xmm15,xmm0
1948	movups	xmm0,XMMWORD[rbp]
1949DB	102,15,56,220,241
1950DB	102,15,56,220,249
1951	movups	xmm1,XMMWORD[16+rbp]
1952
1953	pxor	xmm14,xmm15
1954DB	102,15,56,221,84,36,0
1955	psrad	xmm9,31
1956	paddq	xmm15,xmm15
1957DB	102,15,56,221,92,36,16
1958DB	102,15,56,221,100,36,32
1959	pand	xmm9,xmm8
1960	mov	rax,r10
1961DB	102,15,56,221,108,36,48
1962DB	102,15,56,221,116,36,64
1963DB	102,15,56,221,124,36,80
1964	pxor	xmm15,xmm9
1965
1966	lea	rsi,[96+rsi]
1967	movups	XMMWORD[(-96)+rsi],xmm2
1968	movups	XMMWORD[(-80)+rsi],xmm3
1969	movups	XMMWORD[(-64)+rsi],xmm4
1970	movups	XMMWORD[(-48)+rsi],xmm5
1971	movups	XMMWORD[(-32)+rsi],xmm6
1972	movups	XMMWORD[(-16)+rsi],xmm7
1973	sub	rdx,16*6
1974	jnc	NEAR $L$xts_enc_grandloop
1975
1976	mov	eax,16+96
1977	sub	eax,r10d
1978	mov	rcx,rbp
1979	shr	eax,4
1980
1981$L$xts_enc_short:
1982
1983	mov	r10d,eax
1984	pxor	xmm10,xmm0
1985	add	rdx,16*6
1986	jz	NEAR $L$xts_enc_done
1987
1988	pxor	xmm11,xmm0
1989	cmp	rdx,0x20
1990	jb	NEAR $L$xts_enc_one
1991	pxor	xmm12,xmm0
1992	je	NEAR $L$xts_enc_two
1993
1994	pxor	xmm13,xmm0
1995	cmp	rdx,0x40
1996	jb	NEAR $L$xts_enc_three
1997	pxor	xmm14,xmm0
1998	je	NEAR $L$xts_enc_four
1999
2000	movdqu	xmm2,XMMWORD[rdi]
2001	movdqu	xmm3,XMMWORD[16+rdi]
2002	movdqu	xmm4,XMMWORD[32+rdi]
2003	pxor	xmm2,xmm10
2004	movdqu	xmm5,XMMWORD[48+rdi]
2005	pxor	xmm3,xmm11
2006	movdqu	xmm6,XMMWORD[64+rdi]
2007	lea	rdi,[80+rdi]
2008	pxor	xmm4,xmm12
2009	pxor	xmm5,xmm13
2010	pxor	xmm6,xmm14
2011	pxor	xmm7,xmm7
2012
2013	call	_aesni_encrypt6
2014
2015	xorps	xmm2,xmm10
2016	movdqa	xmm10,xmm15
2017	xorps	xmm3,xmm11
2018	xorps	xmm4,xmm12
2019	movdqu	XMMWORD[rsi],xmm2
2020	xorps	xmm5,xmm13
2021	movdqu	XMMWORD[16+rsi],xmm3
2022	xorps	xmm6,xmm14
2023	movdqu	XMMWORD[32+rsi],xmm4
2024	movdqu	XMMWORD[48+rsi],xmm5
2025	movdqu	XMMWORD[64+rsi],xmm6
2026	lea	rsi,[80+rsi]
2027	jmp	NEAR $L$xts_enc_done
2028
2029ALIGN	16
2030$L$xts_enc_one:
2031	movups	xmm2,XMMWORD[rdi]
2032	lea	rdi,[16+rdi]
2033	xorps	xmm2,xmm10
2034	movups	xmm0,XMMWORD[rcx]
2035	movups	xmm1,XMMWORD[16+rcx]
2036	lea	rcx,[32+rcx]
2037	xorps	xmm2,xmm0
2038$L$oop_enc1_9:
2039DB	102,15,56,220,209
2040	dec	eax
2041	movups	xmm1,XMMWORD[rcx]
2042	lea	rcx,[16+rcx]
2043	jnz	NEAR $L$oop_enc1_9
2044DB	102,15,56,221,209
2045	xorps	xmm2,xmm10
2046	movdqa	xmm10,xmm11
2047	movups	XMMWORD[rsi],xmm2
2048	lea	rsi,[16+rsi]
2049	jmp	NEAR $L$xts_enc_done
2050
2051ALIGN	16
2052$L$xts_enc_two:
2053	movups	xmm2,XMMWORD[rdi]
2054	movups	xmm3,XMMWORD[16+rdi]
2055	lea	rdi,[32+rdi]
2056	xorps	xmm2,xmm10
2057	xorps	xmm3,xmm11
2058
2059	call	_aesni_encrypt2
2060
2061	xorps	xmm2,xmm10
2062	movdqa	xmm10,xmm12
2063	xorps	xmm3,xmm11
2064	movups	XMMWORD[rsi],xmm2
2065	movups	XMMWORD[16+rsi],xmm3
2066	lea	rsi,[32+rsi]
2067	jmp	NEAR $L$xts_enc_done
2068
2069ALIGN	16
2070$L$xts_enc_three:
2071	movups	xmm2,XMMWORD[rdi]
2072	movups	xmm3,XMMWORD[16+rdi]
2073	movups	xmm4,XMMWORD[32+rdi]
2074	lea	rdi,[48+rdi]
2075	xorps	xmm2,xmm10
2076	xorps	xmm3,xmm11
2077	xorps	xmm4,xmm12
2078
2079	call	_aesni_encrypt3
2080
2081	xorps	xmm2,xmm10
2082	movdqa	xmm10,xmm13
2083	xorps	xmm3,xmm11
2084	xorps	xmm4,xmm12
2085	movups	XMMWORD[rsi],xmm2
2086	movups	XMMWORD[16+rsi],xmm3
2087	movups	XMMWORD[32+rsi],xmm4
2088	lea	rsi,[48+rsi]
2089	jmp	NEAR $L$xts_enc_done
2090
2091ALIGN	16
2092$L$xts_enc_four:
2093	movups	xmm2,XMMWORD[rdi]
2094	movups	xmm3,XMMWORD[16+rdi]
2095	movups	xmm4,XMMWORD[32+rdi]
2096	xorps	xmm2,xmm10
2097	movups	xmm5,XMMWORD[48+rdi]
2098	lea	rdi,[64+rdi]
2099	xorps	xmm3,xmm11
2100	xorps	xmm4,xmm12
2101	xorps	xmm5,xmm13
2102
2103	call	_aesni_encrypt4
2104
2105	pxor	xmm2,xmm10
2106	movdqa	xmm10,xmm14
2107	pxor	xmm3,xmm11
2108	pxor	xmm4,xmm12
2109	movdqu	XMMWORD[rsi],xmm2
2110	pxor	xmm5,xmm13
2111	movdqu	XMMWORD[16+rsi],xmm3
2112	movdqu	XMMWORD[32+rsi],xmm4
2113	movdqu	XMMWORD[48+rsi],xmm5
2114	lea	rsi,[64+rsi]
2115	jmp	NEAR $L$xts_enc_done
2116
2117ALIGN	16
2118$L$xts_enc_done:
2119	and	r9,15
2120	jz	NEAR $L$xts_enc_ret
2121	mov	rdx,r9
2122
2123$L$xts_enc_steal:
2124	movzx	eax,BYTE[rdi]
2125	movzx	ecx,BYTE[((-16))+rsi]
2126	lea	rdi,[1+rdi]
2127	mov	BYTE[((-16))+rsi],al
2128	mov	BYTE[rsi],cl
2129	lea	rsi,[1+rsi]
2130	sub	rdx,1
2131	jnz	NEAR $L$xts_enc_steal
2132
2133	sub	rsi,r9
2134	mov	rcx,rbp
2135	mov	eax,r10d
2136
2137	movups	xmm2,XMMWORD[((-16))+rsi]
2138	xorps	xmm2,xmm10
2139	movups	xmm0,XMMWORD[rcx]
2140	movups	xmm1,XMMWORD[16+rcx]
2141	lea	rcx,[32+rcx]
2142	xorps	xmm2,xmm0
2143$L$oop_enc1_10:
2144DB	102,15,56,220,209
2145	dec	eax
2146	movups	xmm1,XMMWORD[rcx]
2147	lea	rcx,[16+rcx]
2148	jnz	NEAR $L$oop_enc1_10
2149DB	102,15,56,221,209
2150	xorps	xmm2,xmm10
2151	movups	XMMWORD[(-16)+rsi],xmm2
2152
2153$L$xts_enc_ret:
2154	xorps	xmm0,xmm0
2155	pxor	xmm1,xmm1
2156	pxor	xmm2,xmm2
2157	pxor	xmm3,xmm3
2158	pxor	xmm4,xmm4
2159	pxor	xmm5,xmm5
2160	movaps	xmm6,XMMWORD[((-168))+r11]
2161	movaps	XMMWORD[(-168)+r11],xmm0
2162	movaps	xmm7,XMMWORD[((-152))+r11]
2163	movaps	XMMWORD[(-152)+r11],xmm0
2164	movaps	xmm8,XMMWORD[((-136))+r11]
2165	movaps	XMMWORD[(-136)+r11],xmm0
2166	movaps	xmm9,XMMWORD[((-120))+r11]
2167	movaps	XMMWORD[(-120)+r11],xmm0
2168	movaps	xmm10,XMMWORD[((-104))+r11]
2169	movaps	XMMWORD[(-104)+r11],xmm0
2170	movaps	xmm11,XMMWORD[((-88))+r11]
2171	movaps	XMMWORD[(-88)+r11],xmm0
2172	movaps	xmm12,XMMWORD[((-72))+r11]
2173	movaps	XMMWORD[(-72)+r11],xmm0
2174	movaps	xmm13,XMMWORD[((-56))+r11]
2175	movaps	XMMWORD[(-56)+r11],xmm0
2176	movaps	xmm14,XMMWORD[((-40))+r11]
2177	movaps	XMMWORD[(-40)+r11],xmm0
2178	movaps	xmm15,XMMWORD[((-24))+r11]
2179	movaps	XMMWORD[(-24)+r11],xmm0
2180	movaps	XMMWORD[rsp],xmm0
2181	movaps	XMMWORD[16+rsp],xmm0
2182	movaps	XMMWORD[32+rsp],xmm0
2183	movaps	XMMWORD[48+rsp],xmm0
2184	movaps	XMMWORD[64+rsp],xmm0
2185	movaps	XMMWORD[80+rsp],xmm0
2186	movaps	XMMWORD[96+rsp],xmm0
2187	mov	rbp,QWORD[((-8))+r11]
2188	lea	rsp,[r11]
2189$L$xts_enc_epilogue:
2190	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
2191	mov	rsi,QWORD[16+rsp]
2192	DB	0F3h,0C3h		;repret
2193$L$SEH_end_aesni_xts_encrypt:
2194global	aesni_xts_decrypt
2195
2196ALIGN	16
2197aesni_xts_decrypt:
2198	mov	QWORD[8+rsp],rdi	;WIN64 prologue
2199	mov	QWORD[16+rsp],rsi
2200	mov	rax,rsp
2201$L$SEH_begin_aesni_xts_decrypt:
2202	mov	rdi,rcx
2203	mov	rsi,rdx
2204	mov	rdx,r8
2205	mov	rcx,r9
2206	mov	r8,QWORD[40+rsp]
2207	mov	r9,QWORD[48+rsp]
2208
2209
2210	lea	r11,[rsp]
2211	push	rbp
2212	sub	rsp,272
2213	and	rsp,-16
2214	movaps	XMMWORD[(-168)+r11],xmm6
2215	movaps	XMMWORD[(-152)+r11],xmm7
2216	movaps	XMMWORD[(-136)+r11],xmm8
2217	movaps	XMMWORD[(-120)+r11],xmm9
2218	movaps	XMMWORD[(-104)+r11],xmm10
2219	movaps	XMMWORD[(-88)+r11],xmm11
2220	movaps	XMMWORD[(-72)+r11],xmm12
2221	movaps	XMMWORD[(-56)+r11],xmm13
2222	movaps	XMMWORD[(-40)+r11],xmm14
2223	movaps	XMMWORD[(-24)+r11],xmm15
2224$L$xts_dec_body:
2225	movups	xmm2,XMMWORD[r9]
2226	mov	eax,DWORD[240+r8]
2227	mov	r10d,DWORD[240+rcx]
2228	movups	xmm0,XMMWORD[r8]
2229	movups	xmm1,XMMWORD[16+r8]
2230	lea	r8,[32+r8]
2231	xorps	xmm2,xmm0
2232$L$oop_enc1_11:
2233DB	102,15,56,220,209
2234	dec	eax
2235	movups	xmm1,XMMWORD[r8]
2236	lea	r8,[16+r8]
2237	jnz	NEAR $L$oop_enc1_11
2238DB	102,15,56,221,209
2239	xor	eax,eax
2240	test	rdx,15
2241	setnz	al
2242	shl	rax,4
2243	sub	rdx,rax
2244
2245	movups	xmm0,XMMWORD[rcx]
2246	mov	rbp,rcx
2247	mov	eax,r10d
2248	shl	r10d,4
2249	mov	r9,rdx
2250	and	rdx,-16
2251
2252	movups	xmm1,XMMWORD[16+r10*1+rcx]
2253
2254	movdqa	xmm8,XMMWORD[$L$xts_magic]
2255	movdqa	xmm15,xmm2
2256	pshufd	xmm9,xmm2,0x5f
2257	pxor	xmm1,xmm0
2258	movdqa	xmm14,xmm9
2259	paddd	xmm9,xmm9
2260	movdqa	xmm10,xmm15
2261	psrad	xmm14,31
2262	paddq	xmm15,xmm15
2263	pand	xmm14,xmm8
2264	pxor	xmm10,xmm0
2265	pxor	xmm15,xmm14
2266	movdqa	xmm14,xmm9
2267	paddd	xmm9,xmm9
2268	movdqa	xmm11,xmm15
2269	psrad	xmm14,31
2270	paddq	xmm15,xmm15
2271	pand	xmm14,xmm8
2272	pxor	xmm11,xmm0
2273	pxor	xmm15,xmm14
2274	movdqa	xmm14,xmm9
2275	paddd	xmm9,xmm9
2276	movdqa	xmm12,xmm15
2277	psrad	xmm14,31
2278	paddq	xmm15,xmm15
2279	pand	xmm14,xmm8
2280	pxor	xmm12,xmm0
2281	pxor	xmm15,xmm14
2282	movdqa	xmm14,xmm9
2283	paddd	xmm9,xmm9
2284	movdqa	xmm13,xmm15
2285	psrad	xmm14,31
2286	paddq	xmm15,xmm15
2287	pand	xmm14,xmm8
2288	pxor	xmm13,xmm0
2289	pxor	xmm15,xmm14
2290	movdqa	xmm14,xmm15
2291	psrad	xmm9,31
2292	paddq	xmm15,xmm15
2293	pand	xmm9,xmm8
2294	pxor	xmm14,xmm0
2295	pxor	xmm15,xmm9
2296	movaps	XMMWORD[96+rsp],xmm1
2297
2298	sub	rdx,16*6
2299	jc	NEAR $L$xts_dec_short
2300
2301	mov	eax,16+96
2302	lea	rcx,[32+r10*1+rbp]
2303	sub	rax,r10
2304	movups	xmm1,XMMWORD[16+rbp]
2305	mov	r10,rax
2306	lea	r8,[$L$xts_magic]
2307	jmp	NEAR $L$xts_dec_grandloop
2308
2309ALIGN	32
2310$L$xts_dec_grandloop:
2311	movdqu	xmm2,XMMWORD[rdi]
2312	movdqa	xmm8,xmm0
2313	movdqu	xmm3,XMMWORD[16+rdi]
2314	pxor	xmm2,xmm10
2315	movdqu	xmm4,XMMWORD[32+rdi]
2316	pxor	xmm3,xmm11
2317DB	102,15,56,222,209
2318	movdqu	xmm5,XMMWORD[48+rdi]
2319	pxor	xmm4,xmm12
2320DB	102,15,56,222,217
2321	movdqu	xmm6,XMMWORD[64+rdi]
2322	pxor	xmm5,xmm13
2323DB	102,15,56,222,225
2324	movdqu	xmm7,XMMWORD[80+rdi]
2325	pxor	xmm8,xmm15
2326	movdqa	xmm9,XMMWORD[96+rsp]
2327	pxor	xmm6,xmm14
2328DB	102,15,56,222,233
2329	movups	xmm0,XMMWORD[32+rbp]
2330	lea	rdi,[96+rdi]
2331	pxor	xmm7,xmm8
2332
2333	pxor	xmm10,xmm9
2334DB	102,15,56,222,241
2335	pxor	xmm11,xmm9
2336	movdqa	XMMWORD[rsp],xmm10
2337DB	102,15,56,222,249
2338	movups	xmm1,XMMWORD[48+rbp]
2339	pxor	xmm12,xmm9
2340
2341DB	102,15,56,222,208
2342	pxor	xmm13,xmm9
2343	movdqa	XMMWORD[16+rsp],xmm11
2344DB	102,15,56,222,216
2345	pxor	xmm14,xmm9
2346	movdqa	XMMWORD[32+rsp],xmm12
2347DB	102,15,56,222,224
2348DB	102,15,56,222,232
2349	pxor	xmm8,xmm9
2350	movdqa	XMMWORD[64+rsp],xmm14
2351DB	102,15,56,222,240
2352DB	102,15,56,222,248
2353	movups	xmm0,XMMWORD[64+rbp]
2354	movdqa	XMMWORD[80+rsp],xmm8
2355	pshufd	xmm9,xmm15,0x5f
2356	jmp	NEAR $L$xts_dec_loop6
2357ALIGN	32
2358$L$xts_dec_loop6:
2359DB	102,15,56,222,209
2360DB	102,15,56,222,217
2361DB	102,15,56,222,225
2362DB	102,15,56,222,233
2363DB	102,15,56,222,241
2364DB	102,15,56,222,249
2365	movups	xmm1,XMMWORD[((-64))+rax*1+rcx]
2366	add	rax,32
2367
2368DB	102,15,56,222,208
2369DB	102,15,56,222,216
2370DB	102,15,56,222,224
2371DB	102,15,56,222,232
2372DB	102,15,56,222,240
2373DB	102,15,56,222,248
2374	movups	xmm0,XMMWORD[((-80))+rax*1+rcx]
2375	jnz	NEAR $L$xts_dec_loop6
2376
2377	movdqa	xmm8,XMMWORD[r8]
2378	movdqa	xmm14,xmm9
2379	paddd	xmm9,xmm9
2380DB	102,15,56,222,209
2381	paddq	xmm15,xmm15
2382	psrad	xmm14,31
2383DB	102,15,56,222,217
2384	pand	xmm14,xmm8
2385	movups	xmm10,XMMWORD[rbp]
2386DB	102,15,56,222,225
2387DB	102,15,56,222,233
2388DB	102,15,56,222,241
2389	pxor	xmm15,xmm14
2390	movaps	xmm11,xmm10
2391DB	102,15,56,222,249
2392	movups	xmm1,XMMWORD[((-64))+rcx]
2393
2394	movdqa	xmm14,xmm9
2395DB	102,15,56,222,208
2396	paddd	xmm9,xmm9
2397	pxor	xmm10,xmm15
2398DB	102,15,56,222,216
2399	psrad	xmm14,31
2400	paddq	xmm15,xmm15
2401DB	102,15,56,222,224
2402DB	102,15,56,222,232
2403	pand	xmm14,xmm8
2404	movaps	xmm12,xmm11
2405DB	102,15,56,222,240
2406	pxor	xmm15,xmm14
2407	movdqa	xmm14,xmm9
2408DB	102,15,56,222,248
2409	movups	xmm0,XMMWORD[((-48))+rcx]
2410
2411	paddd	xmm9,xmm9
2412DB	102,15,56,222,209
2413	pxor	xmm11,xmm15
2414	psrad	xmm14,31
2415DB	102,15,56,222,217
2416	paddq	xmm15,xmm15
2417	pand	xmm14,xmm8
2418DB	102,15,56,222,225
2419DB	102,15,56,222,233
2420	movdqa	XMMWORD[48+rsp],xmm13
2421	pxor	xmm15,xmm14
2422DB	102,15,56,222,241
2423	movaps	xmm13,xmm12
2424	movdqa	xmm14,xmm9
2425DB	102,15,56,222,249
2426	movups	xmm1,XMMWORD[((-32))+rcx]
2427
2428	paddd	xmm9,xmm9
2429DB	102,15,56,222,208
2430	pxor	xmm12,xmm15
2431	psrad	xmm14,31
2432DB	102,15,56,222,216
2433	paddq	xmm15,xmm15
2434	pand	xmm14,xmm8
2435DB	102,15,56,222,224
2436DB	102,15,56,222,232
2437DB	102,15,56,222,240
2438	pxor	xmm15,xmm14
2439	movaps	xmm14,xmm13
2440DB	102,15,56,222,248
2441
2442	movdqa	xmm0,xmm9
2443	paddd	xmm9,xmm9
2444DB	102,15,56,222,209
2445	pxor	xmm13,xmm15
2446	psrad	xmm0,31
2447DB	102,15,56,222,217
2448	paddq	xmm15,xmm15
2449	pand	xmm0,xmm8
2450DB	102,15,56,222,225
2451DB	102,15,56,222,233
2452	pxor	xmm15,xmm0
2453	movups	xmm0,XMMWORD[rbp]
2454DB	102,15,56,222,241
2455DB	102,15,56,222,249
2456	movups	xmm1,XMMWORD[16+rbp]
2457
2458	pxor	xmm14,xmm15
2459DB	102,15,56,223,84,36,0
2460	psrad	xmm9,31
2461	paddq	xmm15,xmm15
2462DB	102,15,56,223,92,36,16
2463DB	102,15,56,223,100,36,32
2464	pand	xmm9,xmm8
2465	mov	rax,r10
2466DB	102,15,56,223,108,36,48
2467DB	102,15,56,223,116,36,64
2468DB	102,15,56,223,124,36,80
2469	pxor	xmm15,xmm9
2470
2471	lea	rsi,[96+rsi]
2472	movups	XMMWORD[(-96)+rsi],xmm2
2473	movups	XMMWORD[(-80)+rsi],xmm3
2474	movups	XMMWORD[(-64)+rsi],xmm4
2475	movups	XMMWORD[(-48)+rsi],xmm5
2476	movups	XMMWORD[(-32)+rsi],xmm6
2477	movups	XMMWORD[(-16)+rsi],xmm7
2478	sub	rdx,16*6
2479	jnc	NEAR $L$xts_dec_grandloop
2480
2481	mov	eax,16+96
2482	sub	eax,r10d
2483	mov	rcx,rbp
2484	shr	eax,4
2485
2486$L$xts_dec_short:
2487
2488	mov	r10d,eax
2489	pxor	xmm10,xmm0
2490	pxor	xmm11,xmm0
2491	add	rdx,16*6
2492	jz	NEAR $L$xts_dec_done
2493
2494	pxor	xmm12,xmm0
2495	cmp	rdx,0x20
2496	jb	NEAR $L$xts_dec_one
2497	pxor	xmm13,xmm0
2498	je	NEAR $L$xts_dec_two
2499
2500	pxor	xmm14,xmm0
2501	cmp	rdx,0x40
2502	jb	NEAR $L$xts_dec_three
2503	je	NEAR $L$xts_dec_four
2504
2505	movdqu	xmm2,XMMWORD[rdi]
2506	movdqu	xmm3,XMMWORD[16+rdi]
2507	movdqu	xmm4,XMMWORD[32+rdi]
2508	pxor	xmm2,xmm10
2509	movdqu	xmm5,XMMWORD[48+rdi]
2510	pxor	xmm3,xmm11
2511	movdqu	xmm6,XMMWORD[64+rdi]
2512	lea	rdi,[80+rdi]
2513	pxor	xmm4,xmm12
2514	pxor	xmm5,xmm13
2515	pxor	xmm6,xmm14
2516
2517	call	_aesni_decrypt6
2518
2519	xorps	xmm2,xmm10
2520	xorps	xmm3,xmm11
2521	xorps	xmm4,xmm12
2522	movdqu	XMMWORD[rsi],xmm2
2523	xorps	xmm5,xmm13
2524	movdqu	XMMWORD[16+rsi],xmm3
2525	xorps	xmm6,xmm14
2526	movdqu	XMMWORD[32+rsi],xmm4
2527	pxor	xmm14,xmm14
2528	movdqu	XMMWORD[48+rsi],xmm5
2529	pcmpgtd	xmm14,xmm15
2530	movdqu	XMMWORD[64+rsi],xmm6
2531	lea	rsi,[80+rsi]
2532	pshufd	xmm11,xmm14,0x13
2533	and	r9,15
2534	jz	NEAR $L$xts_dec_ret
2535
2536	movdqa	xmm10,xmm15
2537	paddq	xmm15,xmm15
2538	pand	xmm11,xmm8
2539	pxor	xmm11,xmm15
2540	jmp	NEAR $L$xts_dec_done2
2541
2542ALIGN	16
2543$L$xts_dec_one:
2544	movups	xmm2,XMMWORD[rdi]
2545	lea	rdi,[16+rdi]
2546	xorps	xmm2,xmm10
2547	movups	xmm0,XMMWORD[rcx]
2548	movups	xmm1,XMMWORD[16+rcx]
2549	lea	rcx,[32+rcx]
2550	xorps	xmm2,xmm0
2551$L$oop_dec1_12:
2552DB	102,15,56,222,209
2553	dec	eax
2554	movups	xmm1,XMMWORD[rcx]
2555	lea	rcx,[16+rcx]
2556	jnz	NEAR $L$oop_dec1_12
2557DB	102,15,56,223,209
2558	xorps	xmm2,xmm10
2559	movdqa	xmm10,xmm11
2560	movups	XMMWORD[rsi],xmm2
2561	movdqa	xmm11,xmm12
2562	lea	rsi,[16+rsi]
2563	jmp	NEAR $L$xts_dec_done
2564
2565ALIGN	16
2566$L$xts_dec_two:
2567	movups	xmm2,XMMWORD[rdi]
2568	movups	xmm3,XMMWORD[16+rdi]
2569	lea	rdi,[32+rdi]
2570	xorps	xmm2,xmm10
2571	xorps	xmm3,xmm11
2572
2573	call	_aesni_decrypt2
2574
2575	xorps	xmm2,xmm10
2576	movdqa	xmm10,xmm12
2577	xorps	xmm3,xmm11
2578	movdqa	xmm11,xmm13
2579	movups	XMMWORD[rsi],xmm2
2580	movups	XMMWORD[16+rsi],xmm3
2581	lea	rsi,[32+rsi]
2582	jmp	NEAR $L$xts_dec_done
2583
2584ALIGN	16
2585$L$xts_dec_three:
2586	movups	xmm2,XMMWORD[rdi]
2587	movups	xmm3,XMMWORD[16+rdi]
2588	movups	xmm4,XMMWORD[32+rdi]
2589	lea	rdi,[48+rdi]
2590	xorps	xmm2,xmm10
2591	xorps	xmm3,xmm11
2592	xorps	xmm4,xmm12
2593
2594	call	_aesni_decrypt3
2595
2596	xorps	xmm2,xmm10
2597	movdqa	xmm10,xmm13
2598	xorps	xmm3,xmm11
2599	movdqa	xmm11,xmm14
2600	xorps	xmm4,xmm12
2601	movups	XMMWORD[rsi],xmm2
2602	movups	XMMWORD[16+rsi],xmm3
2603	movups	XMMWORD[32+rsi],xmm4
2604	lea	rsi,[48+rsi]
2605	jmp	NEAR $L$xts_dec_done
2606
2607ALIGN	16
2608$L$xts_dec_four:
2609	movups	xmm2,XMMWORD[rdi]
2610	movups	xmm3,XMMWORD[16+rdi]
2611	movups	xmm4,XMMWORD[32+rdi]
2612	xorps	xmm2,xmm10
2613	movups	xmm5,XMMWORD[48+rdi]
2614	lea	rdi,[64+rdi]
2615	xorps	xmm3,xmm11
2616	xorps	xmm4,xmm12
2617	xorps	xmm5,xmm13
2618
2619	call	_aesni_decrypt4
2620
2621	pxor	xmm2,xmm10
2622	movdqa	xmm10,xmm14
2623	pxor	xmm3,xmm11
2624	movdqa	xmm11,xmm15
2625	pxor	xmm4,xmm12
2626	movdqu	XMMWORD[rsi],xmm2
2627	pxor	xmm5,xmm13
2628	movdqu	XMMWORD[16+rsi],xmm3
2629	movdqu	XMMWORD[32+rsi],xmm4
2630	movdqu	XMMWORD[48+rsi],xmm5
2631	lea	rsi,[64+rsi]
2632	jmp	NEAR $L$xts_dec_done
2633
2634ALIGN	16
2635$L$xts_dec_done:
2636	and	r9,15
2637	jz	NEAR $L$xts_dec_ret
2638$L$xts_dec_done2:
2639	mov	rdx,r9
2640	mov	rcx,rbp
2641	mov	eax,r10d
2642
2643	movups	xmm2,XMMWORD[rdi]
2644	xorps	xmm2,xmm11
2645	movups	xmm0,XMMWORD[rcx]
2646	movups	xmm1,XMMWORD[16+rcx]
2647	lea	rcx,[32+rcx]
2648	xorps	xmm2,xmm0
2649$L$oop_dec1_13:
2650DB	102,15,56,222,209
2651	dec	eax
2652	movups	xmm1,XMMWORD[rcx]
2653	lea	rcx,[16+rcx]
2654	jnz	NEAR $L$oop_dec1_13
2655DB	102,15,56,223,209
2656	xorps	xmm2,xmm11
2657	movups	XMMWORD[rsi],xmm2
2658
2659$L$xts_dec_steal:
2660	movzx	eax,BYTE[16+rdi]
2661	movzx	ecx,BYTE[rsi]
2662	lea	rdi,[1+rdi]
2663	mov	BYTE[rsi],al
2664	mov	BYTE[16+rsi],cl
2665	lea	rsi,[1+rsi]
2666	sub	rdx,1
2667	jnz	NEAR $L$xts_dec_steal
2668
2669	sub	rsi,r9
2670	mov	rcx,rbp
2671	mov	eax,r10d
2672
2673	movups	xmm2,XMMWORD[rsi]
2674	xorps	xmm2,xmm10
2675	movups	xmm0,XMMWORD[rcx]
2676	movups	xmm1,XMMWORD[16+rcx]
2677	lea	rcx,[32+rcx]
2678	xorps	xmm2,xmm0
2679$L$oop_dec1_14:
2680DB	102,15,56,222,209
2681	dec	eax
2682	movups	xmm1,XMMWORD[rcx]
2683	lea	rcx,[16+rcx]
2684	jnz	NEAR $L$oop_dec1_14
2685DB	102,15,56,223,209
2686	xorps	xmm2,xmm10
2687	movups	XMMWORD[rsi],xmm2
2688
2689$L$xts_dec_ret:
2690	xorps	xmm0,xmm0
2691	pxor	xmm1,xmm1
2692	pxor	xmm2,xmm2
2693	pxor	xmm3,xmm3
2694	pxor	xmm4,xmm4
2695	pxor	xmm5,xmm5
2696	movaps	xmm6,XMMWORD[((-168))+r11]
2697	movaps	XMMWORD[(-168)+r11],xmm0
2698	movaps	xmm7,XMMWORD[((-152))+r11]
2699	movaps	XMMWORD[(-152)+r11],xmm0
2700	movaps	xmm8,XMMWORD[((-136))+r11]
2701	movaps	XMMWORD[(-136)+r11],xmm0
2702	movaps	xmm9,XMMWORD[((-120))+r11]
2703	movaps	XMMWORD[(-120)+r11],xmm0
2704	movaps	xmm10,XMMWORD[((-104))+r11]
2705	movaps	XMMWORD[(-104)+r11],xmm0
2706	movaps	xmm11,XMMWORD[((-88))+r11]
2707	movaps	XMMWORD[(-88)+r11],xmm0
2708	movaps	xmm12,XMMWORD[((-72))+r11]
2709	movaps	XMMWORD[(-72)+r11],xmm0
2710	movaps	xmm13,XMMWORD[((-56))+r11]
2711	movaps	XMMWORD[(-56)+r11],xmm0
2712	movaps	xmm14,XMMWORD[((-40))+r11]
2713	movaps	XMMWORD[(-40)+r11],xmm0
2714	movaps	xmm15,XMMWORD[((-24))+r11]
2715	movaps	XMMWORD[(-24)+r11],xmm0
2716	movaps	XMMWORD[rsp],xmm0
2717	movaps	XMMWORD[16+rsp],xmm0
2718	movaps	XMMWORD[32+rsp],xmm0
2719	movaps	XMMWORD[48+rsp],xmm0
2720	movaps	XMMWORD[64+rsp],xmm0
2721	movaps	XMMWORD[80+rsp],xmm0
2722	movaps	XMMWORD[96+rsp],xmm0
2723	mov	rbp,QWORD[((-8))+r11]
2724	lea	rsp,[r11]
2725$L$xts_dec_epilogue:
2726	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
2727	mov	rsi,QWORD[16+rsp]
2728	DB	0F3h,0C3h		;repret
2729$L$SEH_end_aesni_xts_decrypt:
2730global	aesni_ocb_encrypt
2731
2732ALIGN	32
2733aesni_ocb_encrypt:
2734	mov	QWORD[8+rsp],rdi	;WIN64 prologue
2735	mov	QWORD[16+rsp],rsi
2736	mov	rax,rsp
2737$L$SEH_begin_aesni_ocb_encrypt:
2738	mov	rdi,rcx
2739	mov	rsi,rdx
2740	mov	rdx,r8
2741	mov	rcx,r9
2742	mov	r8,QWORD[40+rsp]
2743	mov	r9,QWORD[48+rsp]
2744
2745
2746	lea	rax,[rsp]
2747	push	rbx
2748	push	rbp
2749	push	r12
2750	push	r13
2751	push	r14
2752	lea	rsp,[((-160))+rsp]
2753	movaps	XMMWORD[rsp],xmm6
2754	movaps	XMMWORD[16+rsp],xmm7
2755	movaps	XMMWORD[32+rsp],xmm8
2756	movaps	XMMWORD[48+rsp],xmm9
2757	movaps	XMMWORD[64+rsp],xmm10
2758	movaps	XMMWORD[80+rsp],xmm11
2759	movaps	XMMWORD[96+rsp],xmm12
2760	movaps	XMMWORD[112+rsp],xmm13
2761	movaps	XMMWORD[128+rsp],xmm14
2762	movaps	XMMWORD[144+rsp],xmm15
2763$L$ocb_enc_body:
2764	mov	rbx,QWORD[56+rax]
2765	mov	rbp,QWORD[((56+8))+rax]
2766
2767	mov	r10d,DWORD[240+rcx]
2768	mov	r11,rcx
2769	shl	r10d,4
2770	movups	xmm9,XMMWORD[rcx]
2771	movups	xmm1,XMMWORD[16+r10*1+rcx]
2772
2773	movdqu	xmm15,XMMWORD[r9]
2774	pxor	xmm9,xmm1
2775	pxor	xmm15,xmm1
2776
2777	mov	eax,16+32
2778	lea	rcx,[32+r10*1+r11]
2779	movups	xmm1,XMMWORD[16+r11]
2780	sub	rax,r10
2781	mov	r10,rax
2782
2783	movdqu	xmm10,XMMWORD[rbx]
2784	movdqu	xmm8,XMMWORD[rbp]
2785
2786	test	r8,1
2787	jnz	NEAR $L$ocb_enc_odd
2788
2789	bsf	r12,r8
2790	add	r8,1
2791	shl	r12,4
2792	movdqu	xmm7,XMMWORD[r12*1+rbx]
2793	movdqu	xmm2,XMMWORD[rdi]
2794	lea	rdi,[16+rdi]
2795
2796	call	__ocb_encrypt1
2797
2798	movdqa	xmm15,xmm7
2799	movups	XMMWORD[rsi],xmm2
2800	lea	rsi,[16+rsi]
2801	sub	rdx,1
2802	jz	NEAR $L$ocb_enc_done
2803
2804$L$ocb_enc_odd:
2805	lea	r12,[1+r8]
2806	lea	r13,[3+r8]
2807	lea	r14,[5+r8]
2808	lea	r8,[6+r8]
2809	bsf	r12,r12
2810	bsf	r13,r13
2811	bsf	r14,r14
2812	shl	r12,4
2813	shl	r13,4
2814	shl	r14,4
2815
2816	sub	rdx,6
2817	jc	NEAR $L$ocb_enc_short
2818	jmp	NEAR $L$ocb_enc_grandloop
2819
2820ALIGN	32
2821$L$ocb_enc_grandloop:
2822	movdqu	xmm2,XMMWORD[rdi]
2823	movdqu	xmm3,XMMWORD[16+rdi]
2824	movdqu	xmm4,XMMWORD[32+rdi]
2825	movdqu	xmm5,XMMWORD[48+rdi]
2826	movdqu	xmm6,XMMWORD[64+rdi]
2827	movdqu	xmm7,XMMWORD[80+rdi]
2828	lea	rdi,[96+rdi]
2829
2830	call	__ocb_encrypt6
2831
2832	movups	XMMWORD[rsi],xmm2
2833	movups	XMMWORD[16+rsi],xmm3
2834	movups	XMMWORD[32+rsi],xmm4
2835	movups	XMMWORD[48+rsi],xmm5
2836	movups	XMMWORD[64+rsi],xmm6
2837	movups	XMMWORD[80+rsi],xmm7
2838	lea	rsi,[96+rsi]
2839	sub	rdx,6
2840	jnc	NEAR $L$ocb_enc_grandloop
2841
2842$L$ocb_enc_short:
2843	add	rdx,6
2844	jz	NEAR $L$ocb_enc_done
2845
2846	movdqu	xmm2,XMMWORD[rdi]
2847	cmp	rdx,2
2848	jb	NEAR $L$ocb_enc_one
2849	movdqu	xmm3,XMMWORD[16+rdi]
2850	je	NEAR $L$ocb_enc_two
2851
2852	movdqu	xmm4,XMMWORD[32+rdi]
2853	cmp	rdx,4
2854	jb	NEAR $L$ocb_enc_three
2855	movdqu	xmm5,XMMWORD[48+rdi]
2856	je	NEAR $L$ocb_enc_four
2857
2858	movdqu	xmm6,XMMWORD[64+rdi]
2859	pxor	xmm7,xmm7
2860
2861	call	__ocb_encrypt6
2862
2863	movdqa	xmm15,xmm14
2864	movups	XMMWORD[rsi],xmm2
2865	movups	XMMWORD[16+rsi],xmm3
2866	movups	XMMWORD[32+rsi],xmm4
2867	movups	XMMWORD[48+rsi],xmm5
2868	movups	XMMWORD[64+rsi],xmm6
2869
2870	jmp	NEAR $L$ocb_enc_done
2871
2872ALIGN	16
2873$L$ocb_enc_one:
2874	movdqa	xmm7,xmm10
2875
2876	call	__ocb_encrypt1
2877
2878	movdqa	xmm15,xmm7
2879	movups	XMMWORD[rsi],xmm2
2880	jmp	NEAR $L$ocb_enc_done
2881
2882ALIGN	16
2883$L$ocb_enc_two:
2884	pxor	xmm4,xmm4
2885	pxor	xmm5,xmm5
2886
2887	call	__ocb_encrypt4
2888
2889	movdqa	xmm15,xmm11
2890	movups	XMMWORD[rsi],xmm2
2891	movups	XMMWORD[16+rsi],xmm3
2892
2893	jmp	NEAR $L$ocb_enc_done
2894
2895ALIGN	16
2896$L$ocb_enc_three:
2897	pxor	xmm5,xmm5
2898
2899	call	__ocb_encrypt4
2900
2901	movdqa	xmm15,xmm12
2902	movups	XMMWORD[rsi],xmm2
2903	movups	XMMWORD[16+rsi],xmm3
2904	movups	XMMWORD[32+rsi],xmm4
2905
2906	jmp	NEAR $L$ocb_enc_done
2907
2908ALIGN	16
2909$L$ocb_enc_four:
2910	call	__ocb_encrypt4
2911
2912	movdqa	xmm15,xmm13
2913	movups	XMMWORD[rsi],xmm2
2914	movups	XMMWORD[16+rsi],xmm3
2915	movups	XMMWORD[32+rsi],xmm4
2916	movups	XMMWORD[48+rsi],xmm5
2917
2918$L$ocb_enc_done:
2919	pxor	xmm15,xmm0
2920	movdqu	XMMWORD[rbp],xmm8
2921	movdqu	XMMWORD[r9],xmm15
2922
2923	xorps	xmm0,xmm0
2924	pxor	xmm1,xmm1
2925	pxor	xmm2,xmm2
2926	pxor	xmm3,xmm3
2927	pxor	xmm4,xmm4
2928	pxor	xmm5,xmm5
2929	movaps	xmm6,XMMWORD[rsp]
2930	movaps	XMMWORD[rsp],xmm0
2931	movaps	xmm7,XMMWORD[16+rsp]
2932	movaps	XMMWORD[16+rsp],xmm0
2933	movaps	xmm8,XMMWORD[32+rsp]
2934	movaps	XMMWORD[32+rsp],xmm0
2935	movaps	xmm9,XMMWORD[48+rsp]
2936	movaps	XMMWORD[48+rsp],xmm0
2937	movaps	xmm10,XMMWORD[64+rsp]
2938	movaps	XMMWORD[64+rsp],xmm0
2939	movaps	xmm11,XMMWORD[80+rsp]
2940	movaps	XMMWORD[80+rsp],xmm0
2941	movaps	xmm12,XMMWORD[96+rsp]
2942	movaps	XMMWORD[96+rsp],xmm0
2943	movaps	xmm13,XMMWORD[112+rsp]
2944	movaps	XMMWORD[112+rsp],xmm0
2945	movaps	xmm14,XMMWORD[128+rsp]
2946	movaps	XMMWORD[128+rsp],xmm0
2947	movaps	xmm15,XMMWORD[144+rsp]
2948	movaps	XMMWORD[144+rsp],xmm0
2949	lea	rax,[((160+40))+rsp]
2950$L$ocb_enc_pop:
2951	mov	r14,QWORD[((-40))+rax]
2952	mov	r13,QWORD[((-32))+rax]
2953	mov	r12,QWORD[((-24))+rax]
2954	mov	rbp,QWORD[((-16))+rax]
2955	mov	rbx,QWORD[((-8))+rax]
2956	lea	rsp,[rax]
2957$L$ocb_enc_epilogue:
2958	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
2959	mov	rsi,QWORD[16+rsp]
2960	DB	0F3h,0C3h		;repret
2961$L$SEH_end_aesni_ocb_encrypt:
2962
2963
2964ALIGN	32
2965__ocb_encrypt6:
2966	pxor	xmm15,xmm9
2967	movdqu	xmm11,XMMWORD[r12*1+rbx]
2968	movdqa	xmm12,xmm10
2969	movdqu	xmm13,XMMWORD[r13*1+rbx]
2970	movdqa	xmm14,xmm10
2971	pxor	xmm10,xmm15
2972	movdqu	xmm15,XMMWORD[r14*1+rbx]
2973	pxor	xmm11,xmm10
2974	pxor	xmm8,xmm2
2975	pxor	xmm2,xmm10
2976	pxor	xmm12,xmm11
2977	pxor	xmm8,xmm3
2978	pxor	xmm3,xmm11
2979	pxor	xmm13,xmm12
2980	pxor	xmm8,xmm4
2981	pxor	xmm4,xmm12
2982	pxor	xmm14,xmm13
2983	pxor	xmm8,xmm5
2984	pxor	xmm5,xmm13
2985	pxor	xmm15,xmm14
2986	pxor	xmm8,xmm6
2987	pxor	xmm6,xmm14
2988	pxor	xmm8,xmm7
2989	pxor	xmm7,xmm15
2990	movups	xmm0,XMMWORD[32+r11]
2991
2992	lea	r12,[1+r8]
2993	lea	r13,[3+r8]
2994	lea	r14,[5+r8]
2995	add	r8,6
2996	pxor	xmm10,xmm9
2997	bsf	r12,r12
2998	bsf	r13,r13
2999	bsf	r14,r14
3000
3001DB	102,15,56,220,209
3002DB	102,15,56,220,217
3003DB	102,15,56,220,225
3004DB	102,15,56,220,233
3005	pxor	xmm11,xmm9
3006	pxor	xmm12,xmm9
3007DB	102,15,56,220,241
3008	pxor	xmm13,xmm9
3009	pxor	xmm14,xmm9
3010DB	102,15,56,220,249
3011	movups	xmm1,XMMWORD[48+r11]
3012	pxor	xmm15,xmm9
3013
3014DB	102,15,56,220,208
3015DB	102,15,56,220,216
3016DB	102,15,56,220,224
3017DB	102,15,56,220,232
3018DB	102,15,56,220,240
3019DB	102,15,56,220,248
3020	movups	xmm0,XMMWORD[64+r11]
3021	shl	r12,4
3022	shl	r13,4
3023	jmp	NEAR $L$ocb_enc_loop6
3024
3025ALIGN	32
3026$L$ocb_enc_loop6:
3027DB	102,15,56,220,209
3028DB	102,15,56,220,217
3029DB	102,15,56,220,225
3030DB	102,15,56,220,233
3031DB	102,15,56,220,241
3032DB	102,15,56,220,249
3033	movups	xmm1,XMMWORD[rax*1+rcx]
3034	add	rax,32
3035
3036DB	102,15,56,220,208
3037DB	102,15,56,220,216
3038DB	102,15,56,220,224
3039DB	102,15,56,220,232
3040DB	102,15,56,220,240
3041DB	102,15,56,220,248
3042	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
3043	jnz	NEAR $L$ocb_enc_loop6
3044
3045DB	102,15,56,220,209
3046DB	102,15,56,220,217
3047DB	102,15,56,220,225
3048DB	102,15,56,220,233
3049DB	102,15,56,220,241
3050DB	102,15,56,220,249
3051	movups	xmm1,XMMWORD[16+r11]
3052	shl	r14,4
3053
3054DB	102,65,15,56,221,210
3055	movdqu	xmm10,XMMWORD[rbx]
3056	mov	rax,r10
3057DB	102,65,15,56,221,219
3058DB	102,65,15,56,221,228
3059DB	102,65,15,56,221,237
3060DB	102,65,15,56,221,246
3061DB	102,65,15,56,221,255
3062	DB	0F3h,0C3h		;repret
3063
3064
3065
3066ALIGN	32
3067__ocb_encrypt4:
3068	pxor	xmm15,xmm9
3069	movdqu	xmm11,XMMWORD[r12*1+rbx]
3070	movdqa	xmm12,xmm10
3071	movdqu	xmm13,XMMWORD[r13*1+rbx]
3072	pxor	xmm10,xmm15
3073	pxor	xmm11,xmm10
3074	pxor	xmm8,xmm2
3075	pxor	xmm2,xmm10
3076	pxor	xmm12,xmm11
3077	pxor	xmm8,xmm3
3078	pxor	xmm3,xmm11
3079	pxor	xmm13,xmm12
3080	pxor	xmm8,xmm4
3081	pxor	xmm4,xmm12
3082	pxor	xmm8,xmm5
3083	pxor	xmm5,xmm13
3084	movups	xmm0,XMMWORD[32+r11]
3085
3086	pxor	xmm10,xmm9
3087	pxor	xmm11,xmm9
3088	pxor	xmm12,xmm9
3089	pxor	xmm13,xmm9
3090
3091DB	102,15,56,220,209
3092DB	102,15,56,220,217
3093DB	102,15,56,220,225
3094DB	102,15,56,220,233
3095	movups	xmm1,XMMWORD[48+r11]
3096
3097DB	102,15,56,220,208
3098DB	102,15,56,220,216
3099DB	102,15,56,220,224
3100DB	102,15,56,220,232
3101	movups	xmm0,XMMWORD[64+r11]
3102	jmp	NEAR $L$ocb_enc_loop4
3103
3104ALIGN	32
3105$L$ocb_enc_loop4:
3106DB	102,15,56,220,209
3107DB	102,15,56,220,217
3108DB	102,15,56,220,225
3109DB	102,15,56,220,233
3110	movups	xmm1,XMMWORD[rax*1+rcx]
3111	add	rax,32
3112
3113DB	102,15,56,220,208
3114DB	102,15,56,220,216
3115DB	102,15,56,220,224
3116DB	102,15,56,220,232
3117	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
3118	jnz	NEAR $L$ocb_enc_loop4
3119
3120DB	102,15,56,220,209
3121DB	102,15,56,220,217
3122DB	102,15,56,220,225
3123DB	102,15,56,220,233
3124	movups	xmm1,XMMWORD[16+r11]
3125	mov	rax,r10
3126
3127DB	102,65,15,56,221,210
3128DB	102,65,15,56,221,219
3129DB	102,65,15,56,221,228
3130DB	102,65,15,56,221,237
3131	DB	0F3h,0C3h		;repret
3132
3133
3134
3135ALIGN	32
3136__ocb_encrypt1:
3137	pxor	xmm7,xmm15
3138	pxor	xmm7,xmm9
3139	pxor	xmm8,xmm2
3140	pxor	xmm2,xmm7
3141	movups	xmm0,XMMWORD[32+r11]
3142
3143DB	102,15,56,220,209
3144	movups	xmm1,XMMWORD[48+r11]
3145	pxor	xmm7,xmm9
3146
3147DB	102,15,56,220,208
3148	movups	xmm0,XMMWORD[64+r11]
3149	jmp	NEAR $L$ocb_enc_loop1
3150
3151ALIGN	32
3152$L$ocb_enc_loop1:
3153DB	102,15,56,220,209
3154	movups	xmm1,XMMWORD[rax*1+rcx]
3155	add	rax,32
3156
3157DB	102,15,56,220,208
3158	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
3159	jnz	NEAR $L$ocb_enc_loop1
3160
3161DB	102,15,56,220,209
3162	movups	xmm1,XMMWORD[16+r11]
3163	mov	rax,r10
3164
3165DB	102,15,56,221,215
3166	DB	0F3h,0C3h		;repret
3167
3168
3169global	aesni_ocb_decrypt
3170
3171ALIGN	32
3172aesni_ocb_decrypt:
3173	mov	QWORD[8+rsp],rdi	;WIN64 prologue
3174	mov	QWORD[16+rsp],rsi
3175	mov	rax,rsp
3176$L$SEH_begin_aesni_ocb_decrypt:
3177	mov	rdi,rcx
3178	mov	rsi,rdx
3179	mov	rdx,r8
3180	mov	rcx,r9
3181	mov	r8,QWORD[40+rsp]
3182	mov	r9,QWORD[48+rsp]
3183
3184
3185	lea	rax,[rsp]
3186	push	rbx
3187	push	rbp
3188	push	r12
3189	push	r13
3190	push	r14
3191	lea	rsp,[((-160))+rsp]
3192	movaps	XMMWORD[rsp],xmm6
3193	movaps	XMMWORD[16+rsp],xmm7
3194	movaps	XMMWORD[32+rsp],xmm8
3195	movaps	XMMWORD[48+rsp],xmm9
3196	movaps	XMMWORD[64+rsp],xmm10
3197	movaps	XMMWORD[80+rsp],xmm11
3198	movaps	XMMWORD[96+rsp],xmm12
3199	movaps	XMMWORD[112+rsp],xmm13
3200	movaps	XMMWORD[128+rsp],xmm14
3201	movaps	XMMWORD[144+rsp],xmm15
3202$L$ocb_dec_body:
3203	mov	rbx,QWORD[56+rax]
3204	mov	rbp,QWORD[((56+8))+rax]
3205
3206	mov	r10d,DWORD[240+rcx]
3207	mov	r11,rcx
3208	shl	r10d,4
3209	movups	xmm9,XMMWORD[rcx]
3210	movups	xmm1,XMMWORD[16+r10*1+rcx]
3211
3212	movdqu	xmm15,XMMWORD[r9]
3213	pxor	xmm9,xmm1
3214	pxor	xmm15,xmm1
3215
3216	mov	eax,16+32
3217	lea	rcx,[32+r10*1+r11]
3218	movups	xmm1,XMMWORD[16+r11]
3219	sub	rax,r10
3220	mov	r10,rax
3221
3222	movdqu	xmm10,XMMWORD[rbx]
3223	movdqu	xmm8,XMMWORD[rbp]
3224
3225	test	r8,1
3226	jnz	NEAR $L$ocb_dec_odd
3227
3228	bsf	r12,r8
3229	add	r8,1
3230	shl	r12,4
3231	movdqu	xmm7,XMMWORD[r12*1+rbx]
3232	movdqu	xmm2,XMMWORD[rdi]
3233	lea	rdi,[16+rdi]
3234
3235	call	__ocb_decrypt1
3236
3237	movdqa	xmm15,xmm7
3238	movups	XMMWORD[rsi],xmm2
3239	xorps	xmm8,xmm2
3240	lea	rsi,[16+rsi]
3241	sub	rdx,1
3242	jz	NEAR $L$ocb_dec_done
3243
3244$L$ocb_dec_odd:
3245	lea	r12,[1+r8]
3246	lea	r13,[3+r8]
3247	lea	r14,[5+r8]
3248	lea	r8,[6+r8]
3249	bsf	r12,r12
3250	bsf	r13,r13
3251	bsf	r14,r14
3252	shl	r12,4
3253	shl	r13,4
3254	shl	r14,4
3255
3256	sub	rdx,6
3257	jc	NEAR $L$ocb_dec_short
3258	jmp	NEAR $L$ocb_dec_grandloop
3259
3260ALIGN	32
3261$L$ocb_dec_grandloop:
3262	movdqu	xmm2,XMMWORD[rdi]
3263	movdqu	xmm3,XMMWORD[16+rdi]
3264	movdqu	xmm4,XMMWORD[32+rdi]
3265	movdqu	xmm5,XMMWORD[48+rdi]
3266	movdqu	xmm6,XMMWORD[64+rdi]
3267	movdqu	xmm7,XMMWORD[80+rdi]
3268	lea	rdi,[96+rdi]
3269
3270	call	__ocb_decrypt6
3271
3272	movups	XMMWORD[rsi],xmm2
3273	pxor	xmm8,xmm2
3274	movups	XMMWORD[16+rsi],xmm3
3275	pxor	xmm8,xmm3
3276	movups	XMMWORD[32+rsi],xmm4
3277	pxor	xmm8,xmm4
3278	movups	XMMWORD[48+rsi],xmm5
3279	pxor	xmm8,xmm5
3280	movups	XMMWORD[64+rsi],xmm6
3281	pxor	xmm8,xmm6
3282	movups	XMMWORD[80+rsi],xmm7
3283	pxor	xmm8,xmm7
3284	lea	rsi,[96+rsi]
3285	sub	rdx,6
3286	jnc	NEAR $L$ocb_dec_grandloop
3287
3288$L$ocb_dec_short:
3289	add	rdx,6
3290	jz	NEAR $L$ocb_dec_done
3291
3292	movdqu	xmm2,XMMWORD[rdi]
3293	cmp	rdx,2
3294	jb	NEAR $L$ocb_dec_one
3295	movdqu	xmm3,XMMWORD[16+rdi]
3296	je	NEAR $L$ocb_dec_two
3297
3298	movdqu	xmm4,XMMWORD[32+rdi]
3299	cmp	rdx,4
3300	jb	NEAR $L$ocb_dec_three
3301	movdqu	xmm5,XMMWORD[48+rdi]
3302	je	NEAR $L$ocb_dec_four
3303
3304	movdqu	xmm6,XMMWORD[64+rdi]
3305	pxor	xmm7,xmm7
3306
3307	call	__ocb_decrypt6
3308
3309	movdqa	xmm15,xmm14
3310	movups	XMMWORD[rsi],xmm2
3311	pxor	xmm8,xmm2
3312	movups	XMMWORD[16+rsi],xmm3
3313	pxor	xmm8,xmm3
3314	movups	XMMWORD[32+rsi],xmm4
3315	pxor	xmm8,xmm4
3316	movups	XMMWORD[48+rsi],xmm5
3317	pxor	xmm8,xmm5
3318	movups	XMMWORD[64+rsi],xmm6
3319	pxor	xmm8,xmm6
3320
3321	jmp	NEAR $L$ocb_dec_done
3322
3323ALIGN	16
3324$L$ocb_dec_one:
3325	movdqa	xmm7,xmm10
3326
3327	call	__ocb_decrypt1
3328
3329	movdqa	xmm15,xmm7
3330	movups	XMMWORD[rsi],xmm2
3331	xorps	xmm8,xmm2
3332	jmp	NEAR $L$ocb_dec_done
3333
3334ALIGN	16
3335$L$ocb_dec_two:
3336	pxor	xmm4,xmm4
3337	pxor	xmm5,xmm5
3338
3339	call	__ocb_decrypt4
3340
3341	movdqa	xmm15,xmm11
3342	movups	XMMWORD[rsi],xmm2
3343	xorps	xmm8,xmm2
3344	movups	XMMWORD[16+rsi],xmm3
3345	xorps	xmm8,xmm3
3346
3347	jmp	NEAR $L$ocb_dec_done
3348
3349ALIGN	16
3350$L$ocb_dec_three:
3351	pxor	xmm5,xmm5
3352
3353	call	__ocb_decrypt4
3354
3355	movdqa	xmm15,xmm12
3356	movups	XMMWORD[rsi],xmm2
3357	xorps	xmm8,xmm2
3358	movups	XMMWORD[16+rsi],xmm3
3359	xorps	xmm8,xmm3
3360	movups	XMMWORD[32+rsi],xmm4
3361	xorps	xmm8,xmm4
3362
3363	jmp	NEAR $L$ocb_dec_done
3364
3365ALIGN	16
3366$L$ocb_dec_four:
3367	call	__ocb_decrypt4
3368
3369	movdqa	xmm15,xmm13
3370	movups	XMMWORD[rsi],xmm2
3371	pxor	xmm8,xmm2
3372	movups	XMMWORD[16+rsi],xmm3
3373	pxor	xmm8,xmm3
3374	movups	XMMWORD[32+rsi],xmm4
3375	pxor	xmm8,xmm4
3376	movups	XMMWORD[48+rsi],xmm5
3377	pxor	xmm8,xmm5
3378
3379$L$ocb_dec_done:
3380	pxor	xmm15,xmm0
3381	movdqu	XMMWORD[rbp],xmm8
3382	movdqu	XMMWORD[r9],xmm15
3383
3384	xorps	xmm0,xmm0
3385	pxor	xmm1,xmm1
3386	pxor	xmm2,xmm2
3387	pxor	xmm3,xmm3
3388	pxor	xmm4,xmm4
3389	pxor	xmm5,xmm5
3390	movaps	xmm6,XMMWORD[rsp]
3391	movaps	XMMWORD[rsp],xmm0
3392	movaps	xmm7,XMMWORD[16+rsp]
3393	movaps	XMMWORD[16+rsp],xmm0
3394	movaps	xmm8,XMMWORD[32+rsp]
3395	movaps	XMMWORD[32+rsp],xmm0
3396	movaps	xmm9,XMMWORD[48+rsp]
3397	movaps	XMMWORD[48+rsp],xmm0
3398	movaps	xmm10,XMMWORD[64+rsp]
3399	movaps	XMMWORD[64+rsp],xmm0
3400	movaps	xmm11,XMMWORD[80+rsp]
3401	movaps	XMMWORD[80+rsp],xmm0
3402	movaps	xmm12,XMMWORD[96+rsp]
3403	movaps	XMMWORD[96+rsp],xmm0
3404	movaps	xmm13,XMMWORD[112+rsp]
3405	movaps	XMMWORD[112+rsp],xmm0
3406	movaps	xmm14,XMMWORD[128+rsp]
3407	movaps	XMMWORD[128+rsp],xmm0
3408	movaps	xmm15,XMMWORD[144+rsp]
3409	movaps	XMMWORD[144+rsp],xmm0
3410	lea	rax,[((160+40))+rsp]
3411$L$ocb_dec_pop:
3412	mov	r14,QWORD[((-40))+rax]
3413	mov	r13,QWORD[((-32))+rax]
3414	mov	r12,QWORD[((-24))+rax]
3415	mov	rbp,QWORD[((-16))+rax]
3416	mov	rbx,QWORD[((-8))+rax]
3417	lea	rsp,[rax]
3418$L$ocb_dec_epilogue:
3419	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
3420	mov	rsi,QWORD[16+rsp]
3421	DB	0F3h,0C3h		;repret
3422$L$SEH_end_aesni_ocb_decrypt:
3423
3424
3425ALIGN	32
3426__ocb_decrypt6:
3427	pxor	xmm15,xmm9
3428	movdqu	xmm11,XMMWORD[r12*1+rbx]
3429	movdqa	xmm12,xmm10
3430	movdqu	xmm13,XMMWORD[r13*1+rbx]
3431	movdqa	xmm14,xmm10
3432	pxor	xmm10,xmm15
3433	movdqu	xmm15,XMMWORD[r14*1+rbx]
3434	pxor	xmm11,xmm10
3435	pxor	xmm2,xmm10
3436	pxor	xmm12,xmm11
3437	pxor	xmm3,xmm11
3438	pxor	xmm13,xmm12
3439	pxor	xmm4,xmm12
3440	pxor	xmm14,xmm13
3441	pxor	xmm5,xmm13
3442	pxor	xmm15,xmm14
3443	pxor	xmm6,xmm14
3444	pxor	xmm7,xmm15
3445	movups	xmm0,XMMWORD[32+r11]
3446
3447	lea	r12,[1+r8]
3448	lea	r13,[3+r8]
3449	lea	r14,[5+r8]
3450	add	r8,6
3451	pxor	xmm10,xmm9
3452	bsf	r12,r12
3453	bsf	r13,r13
3454	bsf	r14,r14
3455
3456DB	102,15,56,222,209
3457DB	102,15,56,222,217
3458DB	102,15,56,222,225
3459DB	102,15,56,222,233
3460	pxor	xmm11,xmm9
3461	pxor	xmm12,xmm9
3462DB	102,15,56,222,241
3463	pxor	xmm13,xmm9
3464	pxor	xmm14,xmm9
3465DB	102,15,56,222,249
3466	movups	xmm1,XMMWORD[48+r11]
3467	pxor	xmm15,xmm9
3468
3469DB	102,15,56,222,208
3470DB	102,15,56,222,216
3471DB	102,15,56,222,224
3472DB	102,15,56,222,232
3473DB	102,15,56,222,240
3474DB	102,15,56,222,248
3475	movups	xmm0,XMMWORD[64+r11]
3476	shl	r12,4
3477	shl	r13,4
3478	jmp	NEAR $L$ocb_dec_loop6
3479
3480ALIGN	32
3481$L$ocb_dec_loop6:
3482DB	102,15,56,222,209
3483DB	102,15,56,222,217
3484DB	102,15,56,222,225
3485DB	102,15,56,222,233
3486DB	102,15,56,222,241
3487DB	102,15,56,222,249
3488	movups	xmm1,XMMWORD[rax*1+rcx]
3489	add	rax,32
3490
3491DB	102,15,56,222,208
3492DB	102,15,56,222,216
3493DB	102,15,56,222,224
3494DB	102,15,56,222,232
3495DB	102,15,56,222,240
3496DB	102,15,56,222,248
3497	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
3498	jnz	NEAR $L$ocb_dec_loop6
3499
3500DB	102,15,56,222,209
3501DB	102,15,56,222,217
3502DB	102,15,56,222,225
3503DB	102,15,56,222,233
3504DB	102,15,56,222,241
3505DB	102,15,56,222,249
3506	movups	xmm1,XMMWORD[16+r11]
3507	shl	r14,4
3508
3509DB	102,65,15,56,223,210
3510	movdqu	xmm10,XMMWORD[rbx]
3511	mov	rax,r10
3512DB	102,65,15,56,223,219
3513DB	102,65,15,56,223,228
3514DB	102,65,15,56,223,237
3515DB	102,65,15,56,223,246
3516DB	102,65,15,56,223,255
3517	DB	0F3h,0C3h		;repret
3518
3519
3520
3521ALIGN	32
3522__ocb_decrypt4:
3523	pxor	xmm15,xmm9
3524	movdqu	xmm11,XMMWORD[r12*1+rbx]
3525	movdqa	xmm12,xmm10
3526	movdqu	xmm13,XMMWORD[r13*1+rbx]
3527	pxor	xmm10,xmm15
3528	pxor	xmm11,xmm10
3529	pxor	xmm2,xmm10
3530	pxor	xmm12,xmm11
3531	pxor	xmm3,xmm11
3532	pxor	xmm13,xmm12
3533	pxor	xmm4,xmm12
3534	pxor	xmm5,xmm13
3535	movups	xmm0,XMMWORD[32+r11]
3536
3537	pxor	xmm10,xmm9
3538	pxor	xmm11,xmm9
3539	pxor	xmm12,xmm9
3540	pxor	xmm13,xmm9
3541
3542DB	102,15,56,222,209
3543DB	102,15,56,222,217
3544DB	102,15,56,222,225
3545DB	102,15,56,222,233
3546	movups	xmm1,XMMWORD[48+r11]
3547
3548DB	102,15,56,222,208
3549DB	102,15,56,222,216
3550DB	102,15,56,222,224
3551DB	102,15,56,222,232
3552	movups	xmm0,XMMWORD[64+r11]
3553	jmp	NEAR $L$ocb_dec_loop4
3554
3555ALIGN	32
3556$L$ocb_dec_loop4:
3557DB	102,15,56,222,209
3558DB	102,15,56,222,217
3559DB	102,15,56,222,225
3560DB	102,15,56,222,233
3561	movups	xmm1,XMMWORD[rax*1+rcx]
3562	add	rax,32
3563
3564DB	102,15,56,222,208
3565DB	102,15,56,222,216
3566DB	102,15,56,222,224
3567DB	102,15,56,222,232
3568	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
3569	jnz	NEAR $L$ocb_dec_loop4
3570
3571DB	102,15,56,222,209
3572DB	102,15,56,222,217
3573DB	102,15,56,222,225
3574DB	102,15,56,222,233
3575	movups	xmm1,XMMWORD[16+r11]
3576	mov	rax,r10
3577
3578DB	102,65,15,56,223,210
3579DB	102,65,15,56,223,219
3580DB	102,65,15,56,223,228
3581DB	102,65,15,56,223,237
3582	DB	0F3h,0C3h		;repret
3583
3584
3585
3586ALIGN	32
3587__ocb_decrypt1:
3588	pxor	xmm7,xmm15
3589	pxor	xmm7,xmm9
3590	pxor	xmm2,xmm7
3591	movups	xmm0,XMMWORD[32+r11]
3592
3593DB	102,15,56,222,209
3594	movups	xmm1,XMMWORD[48+r11]
3595	pxor	xmm7,xmm9
3596
3597DB	102,15,56,222,208
3598	movups	xmm0,XMMWORD[64+r11]
3599	jmp	NEAR $L$ocb_dec_loop1
3600
3601ALIGN	32
3602$L$ocb_dec_loop1:
3603DB	102,15,56,222,209
3604	movups	xmm1,XMMWORD[rax*1+rcx]
3605	add	rax,32
3606
3607DB	102,15,56,222,208
3608	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
3609	jnz	NEAR $L$ocb_dec_loop1
3610
3611DB	102,15,56,222,209
3612	movups	xmm1,XMMWORD[16+r11]
3613	mov	rax,r10
3614
3615DB	102,15,56,223,215
3616	DB	0F3h,0C3h		;repret
3617
3618global	aesni_cbc_encrypt
3619
3620ALIGN	16
3621aesni_cbc_encrypt:
3622	mov	QWORD[8+rsp],rdi	;WIN64 prologue
3623	mov	QWORD[16+rsp],rsi
3624	mov	rax,rsp
3625$L$SEH_begin_aesni_cbc_encrypt:
3626	mov	rdi,rcx
3627	mov	rsi,rdx
3628	mov	rdx,r8
3629	mov	rcx,r9
3630	mov	r8,QWORD[40+rsp]
3631	mov	r9,QWORD[48+rsp]
3632
3633
3634	test	rdx,rdx
3635	jz	NEAR $L$cbc_ret
3636
3637	mov	r10d,DWORD[240+rcx]
3638	mov	r11,rcx
3639	test	r9d,r9d
3640	jz	NEAR $L$cbc_decrypt
3641
3642	movups	xmm2,XMMWORD[r8]
3643	mov	eax,r10d
3644	cmp	rdx,16
3645	jb	NEAR $L$cbc_enc_tail
3646	sub	rdx,16
3647	jmp	NEAR $L$cbc_enc_loop
3648ALIGN	16
3649$L$cbc_enc_loop:
3650	movups	xmm3,XMMWORD[rdi]
3651	lea	rdi,[16+rdi]
3652
3653	movups	xmm0,XMMWORD[rcx]
3654	movups	xmm1,XMMWORD[16+rcx]
3655	xorps	xmm3,xmm0
3656	lea	rcx,[32+rcx]
3657	xorps	xmm2,xmm3
3658$L$oop_enc1_15:
3659DB	102,15,56,220,209
3660	dec	eax
3661	movups	xmm1,XMMWORD[rcx]
3662	lea	rcx,[16+rcx]
3663	jnz	NEAR $L$oop_enc1_15
3664DB	102,15,56,221,209
3665	mov	eax,r10d
3666	mov	rcx,r11
3667	movups	XMMWORD[rsi],xmm2
3668	lea	rsi,[16+rsi]
3669	sub	rdx,16
3670	jnc	NEAR $L$cbc_enc_loop
3671	add	rdx,16
3672	jnz	NEAR $L$cbc_enc_tail
3673	pxor	xmm0,xmm0
3674	pxor	xmm1,xmm1
3675	movups	XMMWORD[r8],xmm2
3676	pxor	xmm2,xmm2
3677	pxor	xmm3,xmm3
3678	jmp	NEAR $L$cbc_ret
3679
3680$L$cbc_enc_tail:
3681	mov	rcx,rdx
3682	xchg	rsi,rdi
3683	DD	0x9066A4F3
3684	mov	ecx,16
3685	sub	rcx,rdx
3686	xor	eax,eax
3687	DD	0x9066AAF3
3688	lea	rdi,[((-16))+rdi]
3689	mov	eax,r10d
3690	mov	rsi,rdi
3691	mov	rcx,r11
3692	xor	rdx,rdx
3693	jmp	NEAR $L$cbc_enc_loop
3694
3695ALIGN	16
3696$L$cbc_decrypt:
3697	cmp	rdx,16
3698	jne	NEAR $L$cbc_decrypt_bulk
3699
3700
3701
3702	movdqu	xmm2,XMMWORD[rdi]
3703	movdqu	xmm3,XMMWORD[r8]
3704	movdqa	xmm4,xmm2
3705	movups	xmm0,XMMWORD[rcx]
3706	movups	xmm1,XMMWORD[16+rcx]
3707	lea	rcx,[32+rcx]
3708	xorps	xmm2,xmm0
3709$L$oop_dec1_16:
3710DB	102,15,56,222,209
3711	dec	r10d
3712	movups	xmm1,XMMWORD[rcx]
3713	lea	rcx,[16+rcx]
3714	jnz	NEAR $L$oop_dec1_16
3715DB	102,15,56,223,209
3716	pxor	xmm0,xmm0
3717	pxor	xmm1,xmm1
3718	movdqu	XMMWORD[r8],xmm4
3719	xorps	xmm2,xmm3
3720	pxor	xmm3,xmm3
3721	movups	XMMWORD[rsi],xmm2
3722	pxor	xmm2,xmm2
3723	jmp	NEAR $L$cbc_ret
3724ALIGN	16
3725$L$cbc_decrypt_bulk:
3726	lea	r11,[rsp]
3727	push	rbp
3728	sub	rsp,176
3729	and	rsp,-16
3730	movaps	XMMWORD[16+rsp],xmm6
3731	movaps	XMMWORD[32+rsp],xmm7
3732	movaps	XMMWORD[48+rsp],xmm8
3733	movaps	XMMWORD[64+rsp],xmm9
3734	movaps	XMMWORD[80+rsp],xmm10
3735	movaps	XMMWORD[96+rsp],xmm11
3736	movaps	XMMWORD[112+rsp],xmm12
3737	movaps	XMMWORD[128+rsp],xmm13
3738	movaps	XMMWORD[144+rsp],xmm14
3739	movaps	XMMWORD[160+rsp],xmm15
3740$L$cbc_decrypt_body:
3741	mov	rbp,rcx
3742	movups	xmm10,XMMWORD[r8]
3743	mov	eax,r10d
3744	cmp	rdx,0x50
3745	jbe	NEAR $L$cbc_dec_tail
3746
3747	movups	xmm0,XMMWORD[rcx]
3748	movdqu	xmm2,XMMWORD[rdi]
3749	movdqu	xmm3,XMMWORD[16+rdi]
3750	movdqa	xmm11,xmm2
3751	movdqu	xmm4,XMMWORD[32+rdi]
3752	movdqa	xmm12,xmm3
3753	movdqu	xmm5,XMMWORD[48+rdi]
3754	movdqa	xmm13,xmm4
3755	movdqu	xmm6,XMMWORD[64+rdi]
3756	movdqa	xmm14,xmm5
3757	movdqu	xmm7,XMMWORD[80+rdi]
3758	movdqa	xmm15,xmm6
3759	lea	r9,[OPENSSL_ia32cap_P]
3760	mov	r9d,DWORD[4+r9]
3761	cmp	rdx,0x70
3762	jbe	NEAR $L$cbc_dec_six_or_seven
3763
3764	and	r9d,71303168
3765	sub	rdx,0x50
3766	cmp	r9d,4194304
3767	je	NEAR $L$cbc_dec_loop6_enter
3768	sub	rdx,0x20
3769	lea	rcx,[112+rcx]
3770	jmp	NEAR $L$cbc_dec_loop8_enter
3771ALIGN	16
3772$L$cbc_dec_loop8:
3773	movups	XMMWORD[rsi],xmm9
3774	lea	rsi,[16+rsi]
3775$L$cbc_dec_loop8_enter:
3776	movdqu	xmm8,XMMWORD[96+rdi]
3777	pxor	xmm2,xmm0
3778	movdqu	xmm9,XMMWORD[112+rdi]
3779	pxor	xmm3,xmm0
3780	movups	xmm1,XMMWORD[((16-112))+rcx]
3781	pxor	xmm4,xmm0
3782	mov	rbp,-1
3783	cmp	rdx,0x70
3784	pxor	xmm5,xmm0
3785	pxor	xmm6,xmm0
3786	pxor	xmm7,xmm0
3787	pxor	xmm8,xmm0
3788
3789DB	102,15,56,222,209
3790	pxor	xmm9,xmm0
3791	movups	xmm0,XMMWORD[((32-112))+rcx]
3792DB	102,15,56,222,217
3793DB	102,15,56,222,225
3794DB	102,15,56,222,233
3795DB	102,15,56,222,241
3796DB	102,15,56,222,249
3797DB	102,68,15,56,222,193
3798	adc	rbp,0
3799	and	rbp,128
3800DB	102,68,15,56,222,201
3801	add	rbp,rdi
3802	movups	xmm1,XMMWORD[((48-112))+rcx]
3803DB	102,15,56,222,208
3804DB	102,15,56,222,216
3805DB	102,15,56,222,224
3806DB	102,15,56,222,232
3807DB	102,15,56,222,240
3808DB	102,15,56,222,248
3809DB	102,68,15,56,222,192
3810DB	102,68,15,56,222,200
3811	movups	xmm0,XMMWORD[((64-112))+rcx]
3812	nop
3813DB	102,15,56,222,209
3814DB	102,15,56,222,217
3815DB	102,15,56,222,225
3816DB	102,15,56,222,233
3817DB	102,15,56,222,241
3818DB	102,15,56,222,249
3819DB	102,68,15,56,222,193
3820DB	102,68,15,56,222,201
3821	movups	xmm1,XMMWORD[((80-112))+rcx]
3822	nop
3823DB	102,15,56,222,208
3824DB	102,15,56,222,216
3825DB	102,15,56,222,224
3826DB	102,15,56,222,232
3827DB	102,15,56,222,240
3828DB	102,15,56,222,248
3829DB	102,68,15,56,222,192
3830DB	102,68,15,56,222,200
3831	movups	xmm0,XMMWORD[((96-112))+rcx]
3832	nop
3833DB	102,15,56,222,209
3834DB	102,15,56,222,217
3835DB	102,15,56,222,225
3836DB	102,15,56,222,233
3837DB	102,15,56,222,241
3838DB	102,15,56,222,249
3839DB	102,68,15,56,222,193
3840DB	102,68,15,56,222,201
3841	movups	xmm1,XMMWORD[((112-112))+rcx]
3842	nop
3843DB	102,15,56,222,208
3844DB	102,15,56,222,216
3845DB	102,15,56,222,224
3846DB	102,15,56,222,232
3847DB	102,15,56,222,240
3848DB	102,15,56,222,248
3849DB	102,68,15,56,222,192
3850DB	102,68,15,56,222,200
3851	movups	xmm0,XMMWORD[((128-112))+rcx]
3852	nop
3853DB	102,15,56,222,209
3854DB	102,15,56,222,217
3855DB	102,15,56,222,225
3856DB	102,15,56,222,233
3857DB	102,15,56,222,241
3858DB	102,15,56,222,249
3859DB	102,68,15,56,222,193
3860DB	102,68,15,56,222,201
3861	movups	xmm1,XMMWORD[((144-112))+rcx]
3862	cmp	eax,11
3863DB	102,15,56,222,208
3864DB	102,15,56,222,216
3865DB	102,15,56,222,224
3866DB	102,15,56,222,232
3867DB	102,15,56,222,240
3868DB	102,15,56,222,248
3869DB	102,68,15,56,222,192
3870DB	102,68,15,56,222,200
3871	movups	xmm0,XMMWORD[((160-112))+rcx]
3872	jb	NEAR $L$cbc_dec_done
3873DB	102,15,56,222,209
3874DB	102,15,56,222,217
3875DB	102,15,56,222,225
3876DB	102,15,56,222,233
3877DB	102,15,56,222,241
3878DB	102,15,56,222,249
3879DB	102,68,15,56,222,193
3880DB	102,68,15,56,222,201
3881	movups	xmm1,XMMWORD[((176-112))+rcx]
3882	nop
3883DB	102,15,56,222,208
3884DB	102,15,56,222,216
3885DB	102,15,56,222,224
3886DB	102,15,56,222,232
3887DB	102,15,56,222,240
3888DB	102,15,56,222,248
3889DB	102,68,15,56,222,192
3890DB	102,68,15,56,222,200
3891	movups	xmm0,XMMWORD[((192-112))+rcx]
3892	je	NEAR $L$cbc_dec_done
3893DB	102,15,56,222,209
3894DB	102,15,56,222,217
3895DB	102,15,56,222,225
3896DB	102,15,56,222,233
3897DB	102,15,56,222,241
3898DB	102,15,56,222,249
3899DB	102,68,15,56,222,193
3900DB	102,68,15,56,222,201
3901	movups	xmm1,XMMWORD[((208-112))+rcx]
3902	nop
3903DB	102,15,56,222,208
3904DB	102,15,56,222,216
3905DB	102,15,56,222,224
3906DB	102,15,56,222,232
3907DB	102,15,56,222,240
3908DB	102,15,56,222,248
3909DB	102,68,15,56,222,192
3910DB	102,68,15,56,222,200
3911	movups	xmm0,XMMWORD[((224-112))+rcx]
3912	jmp	NEAR $L$cbc_dec_done
3913ALIGN	16
3914$L$cbc_dec_done:
3915DB	102,15,56,222,209
3916DB	102,15,56,222,217
3917	pxor	xmm10,xmm0
3918	pxor	xmm11,xmm0
3919DB	102,15,56,222,225
3920DB	102,15,56,222,233
3921	pxor	xmm12,xmm0
3922	pxor	xmm13,xmm0
3923DB	102,15,56,222,241
3924DB	102,15,56,222,249
3925	pxor	xmm14,xmm0
3926	pxor	xmm15,xmm0
3927DB	102,68,15,56,222,193
3928DB	102,68,15,56,222,201
3929	movdqu	xmm1,XMMWORD[80+rdi]
3930
3931DB	102,65,15,56,223,210
3932	movdqu	xmm10,XMMWORD[96+rdi]
3933	pxor	xmm1,xmm0
3934DB	102,65,15,56,223,219
3935	pxor	xmm10,xmm0
3936	movdqu	xmm0,XMMWORD[112+rdi]
3937DB	102,65,15,56,223,228
3938	lea	rdi,[128+rdi]
3939	movdqu	xmm11,XMMWORD[rbp]
3940DB	102,65,15,56,223,237
3941DB	102,65,15,56,223,246
3942	movdqu	xmm12,XMMWORD[16+rbp]
3943	movdqu	xmm13,XMMWORD[32+rbp]
3944DB	102,65,15,56,223,255
3945DB	102,68,15,56,223,193
3946	movdqu	xmm14,XMMWORD[48+rbp]
3947	movdqu	xmm15,XMMWORD[64+rbp]
3948DB	102,69,15,56,223,202
3949	movdqa	xmm10,xmm0
3950	movdqu	xmm1,XMMWORD[80+rbp]
3951	movups	xmm0,XMMWORD[((-112))+rcx]
3952
3953	movups	XMMWORD[rsi],xmm2
3954	movdqa	xmm2,xmm11
3955	movups	XMMWORD[16+rsi],xmm3
3956	movdqa	xmm3,xmm12
3957	movups	XMMWORD[32+rsi],xmm4
3958	movdqa	xmm4,xmm13
3959	movups	XMMWORD[48+rsi],xmm5
3960	movdqa	xmm5,xmm14
3961	movups	XMMWORD[64+rsi],xmm6
3962	movdqa	xmm6,xmm15
3963	movups	XMMWORD[80+rsi],xmm7
3964	movdqa	xmm7,xmm1
3965	movups	XMMWORD[96+rsi],xmm8
3966	lea	rsi,[112+rsi]
3967
3968	sub	rdx,0x80
3969	ja	NEAR $L$cbc_dec_loop8
3970
3971	movaps	xmm2,xmm9
3972	lea	rcx,[((-112))+rcx]
3973	add	rdx,0x70
3974	jle	NEAR $L$cbc_dec_clear_tail_collected
3975	movups	XMMWORD[rsi],xmm9
3976	lea	rsi,[16+rsi]
3977	cmp	rdx,0x50
3978	jbe	NEAR $L$cbc_dec_tail
3979
3980	movaps	xmm2,xmm11
3981$L$cbc_dec_six_or_seven:
3982	cmp	rdx,0x60
3983	ja	NEAR $L$cbc_dec_seven
3984
3985	movaps	xmm8,xmm7
3986	call	_aesni_decrypt6
3987	pxor	xmm2,xmm10
3988	movaps	xmm10,xmm8
3989	pxor	xmm3,xmm11
3990	movdqu	XMMWORD[rsi],xmm2
3991	pxor	xmm4,xmm12
3992	movdqu	XMMWORD[16+rsi],xmm3
3993	pxor	xmm3,xmm3
3994	pxor	xmm5,xmm13
3995	movdqu	XMMWORD[32+rsi],xmm4
3996	pxor	xmm4,xmm4
3997	pxor	xmm6,xmm14
3998	movdqu	XMMWORD[48+rsi],xmm5
3999	pxor	xmm5,xmm5
4000	pxor	xmm7,xmm15
4001	movdqu	XMMWORD[64+rsi],xmm6
4002	pxor	xmm6,xmm6
4003	lea	rsi,[80+rsi]
4004	movdqa	xmm2,xmm7
4005	pxor	xmm7,xmm7
4006	jmp	NEAR $L$cbc_dec_tail_collected
4007
4008ALIGN	16
4009$L$cbc_dec_seven:
4010	movups	xmm8,XMMWORD[96+rdi]
4011	xorps	xmm9,xmm9
4012	call	_aesni_decrypt8
4013	movups	xmm9,XMMWORD[80+rdi]
4014	pxor	xmm2,xmm10
4015	movups	xmm10,XMMWORD[96+rdi]
4016	pxor	xmm3,xmm11
4017	movdqu	XMMWORD[rsi],xmm2
4018	pxor	xmm4,xmm12
4019	movdqu	XMMWORD[16+rsi],xmm3
4020	pxor	xmm3,xmm3
4021	pxor	xmm5,xmm13
4022	movdqu	XMMWORD[32+rsi],xmm4
4023	pxor	xmm4,xmm4
4024	pxor	xmm6,xmm14
4025	movdqu	XMMWORD[48+rsi],xmm5
4026	pxor	xmm5,xmm5
4027	pxor	xmm7,xmm15
4028	movdqu	XMMWORD[64+rsi],xmm6
4029	pxor	xmm6,xmm6
4030	pxor	xmm8,xmm9
4031	movdqu	XMMWORD[80+rsi],xmm7
4032	pxor	xmm7,xmm7
4033	lea	rsi,[96+rsi]
4034	movdqa	xmm2,xmm8
4035	pxor	xmm8,xmm8
4036	pxor	xmm9,xmm9
4037	jmp	NEAR $L$cbc_dec_tail_collected
4038
4039ALIGN	16
4040$L$cbc_dec_loop6:
4041	movups	XMMWORD[rsi],xmm7
4042	lea	rsi,[16+rsi]
4043	movdqu	xmm2,XMMWORD[rdi]
4044	movdqu	xmm3,XMMWORD[16+rdi]
4045	movdqa	xmm11,xmm2
4046	movdqu	xmm4,XMMWORD[32+rdi]
4047	movdqa	xmm12,xmm3
4048	movdqu	xmm5,XMMWORD[48+rdi]
4049	movdqa	xmm13,xmm4
4050	movdqu	xmm6,XMMWORD[64+rdi]
4051	movdqa	xmm14,xmm5
4052	movdqu	xmm7,XMMWORD[80+rdi]
4053	movdqa	xmm15,xmm6
4054$L$cbc_dec_loop6_enter:
4055	lea	rdi,[96+rdi]
4056	movdqa	xmm8,xmm7
4057
4058	call	_aesni_decrypt6
4059
4060	pxor	xmm2,xmm10
4061	movdqa	xmm10,xmm8
4062	pxor	xmm3,xmm11
4063	movdqu	XMMWORD[rsi],xmm2
4064	pxor	xmm4,xmm12
4065	movdqu	XMMWORD[16+rsi],xmm3
4066	pxor	xmm5,xmm13
4067	movdqu	XMMWORD[32+rsi],xmm4
4068	pxor	xmm6,xmm14
4069	mov	rcx,rbp
4070	movdqu	XMMWORD[48+rsi],xmm5
4071	pxor	xmm7,xmm15
4072	mov	eax,r10d
4073	movdqu	XMMWORD[64+rsi],xmm6
4074	lea	rsi,[80+rsi]
4075	sub	rdx,0x60
4076	ja	NEAR $L$cbc_dec_loop6
4077
4078	movdqa	xmm2,xmm7
4079	add	rdx,0x50
4080	jle	NEAR $L$cbc_dec_clear_tail_collected
4081	movups	XMMWORD[rsi],xmm7
4082	lea	rsi,[16+rsi]
4083
4084$L$cbc_dec_tail:
4085	movups	xmm2,XMMWORD[rdi]
4086	sub	rdx,0x10
4087	jbe	NEAR $L$cbc_dec_one
4088
4089	movups	xmm3,XMMWORD[16+rdi]
4090	movaps	xmm11,xmm2
4091	sub	rdx,0x10
4092	jbe	NEAR $L$cbc_dec_two
4093
4094	movups	xmm4,XMMWORD[32+rdi]
4095	movaps	xmm12,xmm3
4096	sub	rdx,0x10
4097	jbe	NEAR $L$cbc_dec_three
4098
4099	movups	xmm5,XMMWORD[48+rdi]
4100	movaps	xmm13,xmm4
4101	sub	rdx,0x10
4102	jbe	NEAR $L$cbc_dec_four
4103
4104	movups	xmm6,XMMWORD[64+rdi]
4105	movaps	xmm14,xmm5
4106	movaps	xmm15,xmm6
4107	xorps	xmm7,xmm7
4108	call	_aesni_decrypt6
4109	pxor	xmm2,xmm10
4110	movaps	xmm10,xmm15
4111	pxor	xmm3,xmm11
4112	movdqu	XMMWORD[rsi],xmm2
4113	pxor	xmm4,xmm12
4114	movdqu	XMMWORD[16+rsi],xmm3
4115	pxor	xmm3,xmm3
4116	pxor	xmm5,xmm13
4117	movdqu	XMMWORD[32+rsi],xmm4
4118	pxor	xmm4,xmm4
4119	pxor	xmm6,xmm14
4120	movdqu	XMMWORD[48+rsi],xmm5
4121	pxor	xmm5,xmm5
4122	lea	rsi,[64+rsi]
4123	movdqa	xmm2,xmm6
4124	pxor	xmm6,xmm6
4125	pxor	xmm7,xmm7
4126	sub	rdx,0x10
4127	jmp	NEAR $L$cbc_dec_tail_collected
4128
4129ALIGN	16
4130$L$cbc_dec_one:
4131	movaps	xmm11,xmm2
4132	movups	xmm0,XMMWORD[rcx]
4133	movups	xmm1,XMMWORD[16+rcx]
4134	lea	rcx,[32+rcx]
4135	xorps	xmm2,xmm0
4136$L$oop_dec1_17:
4137DB	102,15,56,222,209
4138	dec	eax
4139	movups	xmm1,XMMWORD[rcx]
4140	lea	rcx,[16+rcx]
4141	jnz	NEAR $L$oop_dec1_17
4142DB	102,15,56,223,209
4143	xorps	xmm2,xmm10
4144	movaps	xmm10,xmm11
4145	jmp	NEAR $L$cbc_dec_tail_collected
4146ALIGN	16
4147$L$cbc_dec_two:
4148	movaps	xmm12,xmm3
4149	call	_aesni_decrypt2
4150	pxor	xmm2,xmm10
4151	movaps	xmm10,xmm12
4152	pxor	xmm3,xmm11
4153	movdqu	XMMWORD[rsi],xmm2
4154	movdqa	xmm2,xmm3
4155	pxor	xmm3,xmm3
4156	lea	rsi,[16+rsi]
4157	jmp	NEAR $L$cbc_dec_tail_collected
4158ALIGN	16
4159$L$cbc_dec_three:
4160	movaps	xmm13,xmm4
4161	call	_aesni_decrypt3
4162	pxor	xmm2,xmm10
4163	movaps	xmm10,xmm13
4164	pxor	xmm3,xmm11
4165	movdqu	XMMWORD[rsi],xmm2
4166	pxor	xmm4,xmm12
4167	movdqu	XMMWORD[16+rsi],xmm3
4168	pxor	xmm3,xmm3
4169	movdqa	xmm2,xmm4
4170	pxor	xmm4,xmm4
4171	lea	rsi,[32+rsi]
4172	jmp	NEAR $L$cbc_dec_tail_collected
4173ALIGN	16
4174$L$cbc_dec_four:
4175	movaps	xmm14,xmm5
4176	call	_aesni_decrypt4
4177	pxor	xmm2,xmm10
4178	movaps	xmm10,xmm14
4179	pxor	xmm3,xmm11
4180	movdqu	XMMWORD[rsi],xmm2
4181	pxor	xmm4,xmm12
4182	movdqu	XMMWORD[16+rsi],xmm3
4183	pxor	xmm3,xmm3
4184	pxor	xmm5,xmm13
4185	movdqu	XMMWORD[32+rsi],xmm4
4186	pxor	xmm4,xmm4
4187	movdqa	xmm2,xmm5
4188	pxor	xmm5,xmm5
4189	lea	rsi,[48+rsi]
4190	jmp	NEAR $L$cbc_dec_tail_collected
4191
4192ALIGN	16
4193$L$cbc_dec_clear_tail_collected:
4194	pxor	xmm3,xmm3
4195	pxor	xmm4,xmm4
4196	pxor	xmm5,xmm5
4197$L$cbc_dec_tail_collected:
4198	movups	XMMWORD[r8],xmm10
4199	and	rdx,15
4200	jnz	NEAR $L$cbc_dec_tail_partial
4201	movups	XMMWORD[rsi],xmm2
4202	pxor	xmm2,xmm2
4203	jmp	NEAR $L$cbc_dec_ret
4204ALIGN	16
4205$L$cbc_dec_tail_partial:
4206	movaps	XMMWORD[rsp],xmm2
4207	pxor	xmm2,xmm2
4208	mov	rcx,16
4209	mov	rdi,rsi
4210	sub	rcx,rdx
4211	lea	rsi,[rsp]
4212	DD	0x9066A4F3
4213	movdqa	XMMWORD[rsp],xmm2
4214
4215$L$cbc_dec_ret:
4216	xorps	xmm0,xmm0
4217	pxor	xmm1,xmm1
4218	movaps	xmm6,XMMWORD[16+rsp]
4219	movaps	XMMWORD[16+rsp],xmm0
4220	movaps	xmm7,XMMWORD[32+rsp]
4221	movaps	XMMWORD[32+rsp],xmm0
4222	movaps	xmm8,XMMWORD[48+rsp]
4223	movaps	XMMWORD[48+rsp],xmm0
4224	movaps	xmm9,XMMWORD[64+rsp]
4225	movaps	XMMWORD[64+rsp],xmm0
4226	movaps	xmm10,XMMWORD[80+rsp]
4227	movaps	XMMWORD[80+rsp],xmm0
4228	movaps	xmm11,XMMWORD[96+rsp]
4229	movaps	XMMWORD[96+rsp],xmm0
4230	movaps	xmm12,XMMWORD[112+rsp]
4231	movaps	XMMWORD[112+rsp],xmm0
4232	movaps	xmm13,XMMWORD[128+rsp]
4233	movaps	XMMWORD[128+rsp],xmm0
4234	movaps	xmm14,XMMWORD[144+rsp]
4235	movaps	XMMWORD[144+rsp],xmm0
4236	movaps	xmm15,XMMWORD[160+rsp]
4237	movaps	XMMWORD[160+rsp],xmm0
4238	mov	rbp,QWORD[((-8))+r11]
4239	lea	rsp,[r11]
4240$L$cbc_ret:
4241	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
4242	mov	rsi,QWORD[16+rsp]
4243	DB	0F3h,0C3h		;repret
4244$L$SEH_end_aesni_cbc_encrypt:
4245global	aesni_set_decrypt_key
4246
4247ALIGN	16
4248aesni_set_decrypt_key:
4249DB	0x48,0x83,0xEC,0x08
4250	call	__aesni_set_encrypt_key
4251	shl	edx,4
4252	test	eax,eax
4253	jnz	NEAR $L$dec_key_ret
4254	lea	rcx,[16+rdx*1+r8]
4255
4256	movups	xmm0,XMMWORD[r8]
4257	movups	xmm1,XMMWORD[rcx]
4258	movups	XMMWORD[rcx],xmm0
4259	movups	XMMWORD[r8],xmm1
4260	lea	r8,[16+r8]
4261	lea	rcx,[((-16))+rcx]
4262
4263$L$dec_key_inverse:
4264	movups	xmm0,XMMWORD[r8]
4265	movups	xmm1,XMMWORD[rcx]
4266DB	102,15,56,219,192
4267DB	102,15,56,219,201
4268	lea	r8,[16+r8]
4269	lea	rcx,[((-16))+rcx]
4270	movups	XMMWORD[16+rcx],xmm0
4271	movups	XMMWORD[(-16)+r8],xmm1
4272	cmp	rcx,r8
4273	ja	NEAR $L$dec_key_inverse
4274
4275	movups	xmm0,XMMWORD[r8]
4276DB	102,15,56,219,192
4277	pxor	xmm1,xmm1
4278	movups	XMMWORD[rcx],xmm0
4279	pxor	xmm0,xmm0
4280$L$dec_key_ret:
4281	add	rsp,8
4282	DB	0F3h,0C3h		;repret
4283$L$SEH_end_set_decrypt_key:
4284
4285global	aesni_set_encrypt_key
4286
4287ALIGN	16
4288aesni_set_encrypt_key:
4289__aesni_set_encrypt_key:
4290DB	0x48,0x83,0xEC,0x08
4291	mov	rax,-1
4292	test	rcx,rcx
4293	jz	NEAR $L$enc_key_ret
4294	test	r8,r8
4295	jz	NEAR $L$enc_key_ret
4296
4297	movups	xmm0,XMMWORD[rcx]
4298	xorps	xmm4,xmm4
4299	lea	r10,[OPENSSL_ia32cap_P]
4300	mov	r10d,DWORD[4+r10]
4301	and	r10d,268437504
4302	lea	rax,[16+r8]
4303	cmp	edx,256
4304	je	NEAR $L$14rounds
4305	cmp	edx,192
4306	je	NEAR $L$12rounds
4307	cmp	edx,128
4308	jne	NEAR $L$bad_keybits
4309
4310$L$10rounds:
4311	mov	edx,9
4312	cmp	r10d,268435456
4313	je	NEAR $L$10rounds_alt
4314
4315	movups	XMMWORD[r8],xmm0
4316DB	102,15,58,223,200,1
4317	call	$L$key_expansion_128_cold
4318DB	102,15,58,223,200,2
4319	call	$L$key_expansion_128
4320DB	102,15,58,223,200,4
4321	call	$L$key_expansion_128
4322DB	102,15,58,223,200,8
4323	call	$L$key_expansion_128
4324DB	102,15,58,223,200,16
4325	call	$L$key_expansion_128
4326DB	102,15,58,223,200,32
4327	call	$L$key_expansion_128
4328DB	102,15,58,223,200,64
4329	call	$L$key_expansion_128
4330DB	102,15,58,223,200,128
4331	call	$L$key_expansion_128
4332DB	102,15,58,223,200,27
4333	call	$L$key_expansion_128
4334DB	102,15,58,223,200,54
4335	call	$L$key_expansion_128
4336	movups	XMMWORD[rax],xmm0
4337	mov	DWORD[80+rax],edx
4338	xor	eax,eax
4339	jmp	NEAR $L$enc_key_ret
4340
4341ALIGN	16
4342$L$10rounds_alt:
4343	movdqa	xmm5,XMMWORD[$L$key_rotate]
4344	mov	r10d,8
4345	movdqa	xmm4,XMMWORD[$L$key_rcon1]
4346	movdqa	xmm2,xmm0
4347	movdqu	XMMWORD[r8],xmm0
4348	jmp	NEAR $L$oop_key128
4349
4350ALIGN	16
4351$L$oop_key128:
4352DB	102,15,56,0,197
4353DB	102,15,56,221,196
4354	pslld	xmm4,1
4355	lea	rax,[16+rax]
4356
4357	movdqa	xmm3,xmm2
4358	pslldq	xmm2,4
4359	pxor	xmm3,xmm2
4360	pslldq	xmm2,4
4361	pxor	xmm3,xmm2
4362	pslldq	xmm2,4
4363	pxor	xmm2,xmm3
4364
4365	pxor	xmm0,xmm2
4366	movdqu	XMMWORD[(-16)+rax],xmm0
4367	movdqa	xmm2,xmm0
4368
4369	dec	r10d
4370	jnz	NEAR $L$oop_key128
4371
4372	movdqa	xmm4,XMMWORD[$L$key_rcon1b]
4373
4374DB	102,15,56,0,197
4375DB	102,15,56,221,196
4376	pslld	xmm4,1
4377
4378	movdqa	xmm3,xmm2
4379	pslldq	xmm2,4
4380	pxor	xmm3,xmm2
4381	pslldq	xmm2,4
4382	pxor	xmm3,xmm2
4383	pslldq	xmm2,4
4384	pxor	xmm2,xmm3
4385
4386	pxor	xmm0,xmm2
4387	movdqu	XMMWORD[rax],xmm0
4388
4389	movdqa	xmm2,xmm0
4390DB	102,15,56,0,197
4391DB	102,15,56,221,196
4392
4393	movdqa	xmm3,xmm2
4394	pslldq	xmm2,4
4395	pxor	xmm3,xmm2
4396	pslldq	xmm2,4
4397	pxor	xmm3,xmm2
4398	pslldq	xmm2,4
4399	pxor	xmm2,xmm3
4400
4401	pxor	xmm0,xmm2
4402	movdqu	XMMWORD[16+rax],xmm0
4403
4404	mov	DWORD[96+rax],edx
4405	xor	eax,eax
4406	jmp	NEAR $L$enc_key_ret
4407
4408ALIGN	16
4409$L$12rounds:
4410	movq	xmm2,QWORD[16+rcx]
4411	mov	edx,11
4412	cmp	r10d,268435456
4413	je	NEAR $L$12rounds_alt
4414
4415	movups	XMMWORD[r8],xmm0
4416DB	102,15,58,223,202,1
4417	call	$L$key_expansion_192a_cold
4418DB	102,15,58,223,202,2
4419	call	$L$key_expansion_192b
4420DB	102,15,58,223,202,4
4421	call	$L$key_expansion_192a
4422DB	102,15,58,223,202,8
4423	call	$L$key_expansion_192b
4424DB	102,15,58,223,202,16
4425	call	$L$key_expansion_192a
4426DB	102,15,58,223,202,32
4427	call	$L$key_expansion_192b
4428DB	102,15,58,223,202,64
4429	call	$L$key_expansion_192a
4430DB	102,15,58,223,202,128
4431	call	$L$key_expansion_192b
4432	movups	XMMWORD[rax],xmm0
4433	mov	DWORD[48+rax],edx
4434	xor	rax,rax
4435	jmp	NEAR $L$enc_key_ret
4436
4437ALIGN	16
4438$L$12rounds_alt:
4439	movdqa	xmm5,XMMWORD[$L$key_rotate192]
4440	movdqa	xmm4,XMMWORD[$L$key_rcon1]
4441	mov	r10d,8
4442	movdqu	XMMWORD[r8],xmm0
4443	jmp	NEAR $L$oop_key192
4444
4445ALIGN	16
4446$L$oop_key192:
4447	movq	QWORD[rax],xmm2
4448	movdqa	xmm1,xmm2
4449DB	102,15,56,0,213
4450DB	102,15,56,221,212
4451	pslld	xmm4,1
4452	lea	rax,[24+rax]
4453
4454	movdqa	xmm3,xmm0
4455	pslldq	xmm0,4
4456	pxor	xmm3,xmm0
4457	pslldq	xmm0,4
4458	pxor	xmm3,xmm0
4459	pslldq	xmm0,4
4460	pxor	xmm0,xmm3
4461
4462	pshufd	xmm3,xmm0,0xff
4463	pxor	xmm3,xmm1
4464	pslldq	xmm1,4
4465	pxor	xmm3,xmm1
4466
4467	pxor	xmm0,xmm2
4468	pxor	xmm2,xmm3
4469	movdqu	XMMWORD[(-16)+rax],xmm0
4470
4471	dec	r10d
4472	jnz	NEAR $L$oop_key192
4473
4474	mov	DWORD[32+rax],edx
4475	xor	eax,eax
4476	jmp	NEAR $L$enc_key_ret
4477
4478ALIGN	16
4479$L$14rounds:
4480	movups	xmm2,XMMWORD[16+rcx]
4481	mov	edx,13
4482	lea	rax,[16+rax]
4483	cmp	r10d,268435456
4484	je	NEAR $L$14rounds_alt
4485
4486	movups	XMMWORD[r8],xmm0
4487	movups	XMMWORD[16+r8],xmm2
4488DB	102,15,58,223,202,1
4489	call	$L$key_expansion_256a_cold
4490DB	102,15,58,223,200,1
4491	call	$L$key_expansion_256b
4492DB	102,15,58,223,202,2
4493	call	$L$key_expansion_256a
4494DB	102,15,58,223,200,2
4495	call	$L$key_expansion_256b
4496DB	102,15,58,223,202,4
4497	call	$L$key_expansion_256a
4498DB	102,15,58,223,200,4
4499	call	$L$key_expansion_256b
4500DB	102,15,58,223,202,8
4501	call	$L$key_expansion_256a
4502DB	102,15,58,223,200,8
4503	call	$L$key_expansion_256b
4504DB	102,15,58,223,202,16
4505	call	$L$key_expansion_256a
4506DB	102,15,58,223,200,16
4507	call	$L$key_expansion_256b
4508DB	102,15,58,223,202,32
4509	call	$L$key_expansion_256a
4510DB	102,15,58,223,200,32
4511	call	$L$key_expansion_256b
4512DB	102,15,58,223,202,64
4513	call	$L$key_expansion_256a
4514	movups	XMMWORD[rax],xmm0
4515	mov	DWORD[16+rax],edx
4516	xor	rax,rax
4517	jmp	NEAR $L$enc_key_ret
4518
4519ALIGN	16
4520$L$14rounds_alt:
4521	movdqa	xmm5,XMMWORD[$L$key_rotate]
4522	movdqa	xmm4,XMMWORD[$L$key_rcon1]
4523	mov	r10d,7
4524	movdqu	XMMWORD[r8],xmm0
4525	movdqa	xmm1,xmm2
4526	movdqu	XMMWORD[16+r8],xmm2
4527	jmp	NEAR $L$oop_key256
4528
4529ALIGN	16
4530$L$oop_key256:
4531DB	102,15,56,0,213
4532DB	102,15,56,221,212
4533
4534	movdqa	xmm3,xmm0
4535	pslldq	xmm0,4
4536	pxor	xmm3,xmm0
4537	pslldq	xmm0,4
4538	pxor	xmm3,xmm0
4539	pslldq	xmm0,4
4540	pxor	xmm0,xmm3
4541	pslld	xmm4,1
4542
4543	pxor	xmm0,xmm2
4544	movdqu	XMMWORD[rax],xmm0
4545
4546	dec	r10d
4547	jz	NEAR $L$done_key256
4548
4549	pshufd	xmm2,xmm0,0xff
4550	pxor	xmm3,xmm3
4551DB	102,15,56,221,211
4552
4553	movdqa	xmm3,xmm1
4554	pslldq	xmm1,4
4555	pxor	xmm3,xmm1
4556	pslldq	xmm1,4
4557	pxor	xmm3,xmm1
4558	pslldq	xmm1,4
4559	pxor	xmm1,xmm3
4560
4561	pxor	xmm2,xmm1
4562	movdqu	XMMWORD[16+rax],xmm2
4563	lea	rax,[32+rax]
4564	movdqa	xmm1,xmm2
4565
4566	jmp	NEAR $L$oop_key256
4567
4568$L$done_key256:
4569	mov	DWORD[16+rax],edx
4570	xor	eax,eax
4571	jmp	NEAR $L$enc_key_ret
4572
4573ALIGN	16
4574$L$bad_keybits:
4575	mov	rax,-2
4576$L$enc_key_ret:
4577	pxor	xmm0,xmm0
4578	pxor	xmm1,xmm1
4579	pxor	xmm2,xmm2
4580	pxor	xmm3,xmm3
4581	pxor	xmm4,xmm4
4582	pxor	xmm5,xmm5
4583	add	rsp,8
4584	DB	0F3h,0C3h		;repret
4585$L$SEH_end_set_encrypt_key:
4586
4587ALIGN	16
4588$L$key_expansion_128:
4589	movups	XMMWORD[rax],xmm0
4590	lea	rax,[16+rax]
4591$L$key_expansion_128_cold:
4592	shufps	xmm4,xmm0,16
4593	xorps	xmm0,xmm4
4594	shufps	xmm4,xmm0,140
4595	xorps	xmm0,xmm4
4596	shufps	xmm1,xmm1,255
4597	xorps	xmm0,xmm1
4598	DB	0F3h,0C3h		;repret
4599
4600ALIGN	16
4601$L$key_expansion_192a:
4602	movups	XMMWORD[rax],xmm0
4603	lea	rax,[16+rax]
4604$L$key_expansion_192a_cold:
4605	movaps	xmm5,xmm2
4606$L$key_expansion_192b_warm:
4607	shufps	xmm4,xmm0,16
4608	movdqa	xmm3,xmm2
4609	xorps	xmm0,xmm4
4610	shufps	xmm4,xmm0,140
4611	pslldq	xmm3,4
4612	xorps	xmm0,xmm4
4613	pshufd	xmm1,xmm1,85
4614	pxor	xmm2,xmm3
4615	pxor	xmm0,xmm1
4616	pshufd	xmm3,xmm0,255
4617	pxor	xmm2,xmm3
4618	DB	0F3h,0C3h		;repret
4619
4620ALIGN	16
4621$L$key_expansion_192b:
4622	movaps	xmm3,xmm0
4623	shufps	xmm5,xmm0,68
4624	movups	XMMWORD[rax],xmm5
4625	shufps	xmm3,xmm2,78
4626	movups	XMMWORD[16+rax],xmm3
4627	lea	rax,[32+rax]
4628	jmp	NEAR $L$key_expansion_192b_warm
4629
4630ALIGN	16
4631$L$key_expansion_256a:
4632	movups	XMMWORD[rax],xmm2
4633	lea	rax,[16+rax]
4634$L$key_expansion_256a_cold:
4635	shufps	xmm4,xmm0,16
4636	xorps	xmm0,xmm4
4637	shufps	xmm4,xmm0,140
4638	xorps	xmm0,xmm4
4639	shufps	xmm1,xmm1,255
4640	xorps	xmm0,xmm1
4641	DB	0F3h,0C3h		;repret
4642
4643ALIGN	16
4644$L$key_expansion_256b:
4645	movups	XMMWORD[rax],xmm0
4646	lea	rax,[16+rax]
4647
4648	shufps	xmm4,xmm2,16
4649	xorps	xmm2,xmm4
4650	shufps	xmm4,xmm2,140
4651	xorps	xmm2,xmm4
4652	shufps	xmm1,xmm1,170
4653	xorps	xmm2,xmm1
4654	DB	0F3h,0C3h		;repret
4655
4656
4657ALIGN	64
4658$L$bswap_mask:
4659DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4660$L$increment32:
4661	DD	6,6,6,0
4662$L$increment64:
4663	DD	1,0,0,0
4664$L$xts_magic:
4665	DD	0x87,0,1,0
4666$L$increment1:
4667DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4668$L$key_rotate:
4669	DD	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4670$L$key_rotate192:
4671	DD	0x04070605,0x04070605,0x04070605,0x04070605
4672$L$key_rcon1:
4673	DD	1,1,1,1
4674$L$key_rcon1b:
4675	DD	0x1b,0x1b,0x1b,0x1b
4676
4677DB	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
4678DB	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
4679DB	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
4680DB	115,108,46,111,114,103,62,0
4681ALIGN	64
4682EXTERN	__imp_RtlVirtualUnwind
4683
4684ALIGN	16
4685ecb_ccm64_se_handler:
4686	push	rsi
4687	push	rdi
4688	push	rbx
4689	push	rbp
4690	push	r12
4691	push	r13
4692	push	r14
4693	push	r15
4694	pushfq
4695	sub	rsp,64
4696
4697	mov	rax,QWORD[120+r8]
4698	mov	rbx,QWORD[248+r8]
4699
4700	mov	rsi,QWORD[8+r9]
4701	mov	r11,QWORD[56+r9]
4702
4703	mov	r10d,DWORD[r11]
4704	lea	r10,[r10*1+rsi]
4705	cmp	rbx,r10
4706	jb	NEAR $L$common_seh_tail
4707
4708	mov	rax,QWORD[152+r8]
4709
4710	mov	r10d,DWORD[4+r11]
4711	lea	r10,[r10*1+rsi]
4712	cmp	rbx,r10
4713	jae	NEAR $L$common_seh_tail
4714
4715	lea	rsi,[rax]
4716	lea	rdi,[512+r8]
4717	mov	ecx,8
4718	DD	0xa548f3fc
4719	lea	rax,[88+rax]
4720
4721	jmp	NEAR $L$common_seh_tail
4722
4723
4724
4725ALIGN	16
4726ctr_xts_se_handler:
4727	push	rsi
4728	push	rdi
4729	push	rbx
4730	push	rbp
4731	push	r12
4732	push	r13
4733	push	r14
4734	push	r15
4735	pushfq
4736	sub	rsp,64
4737
4738	mov	rax,QWORD[120+r8]
4739	mov	rbx,QWORD[248+r8]
4740
4741	mov	rsi,QWORD[8+r9]
4742	mov	r11,QWORD[56+r9]
4743
4744	mov	r10d,DWORD[r11]
4745	lea	r10,[r10*1+rsi]
4746	cmp	rbx,r10
4747	jb	NEAR $L$common_seh_tail
4748
4749	mov	rax,QWORD[152+r8]
4750
4751	mov	r10d,DWORD[4+r11]
4752	lea	r10,[r10*1+rsi]
4753	cmp	rbx,r10
4754	jae	NEAR $L$common_seh_tail
4755
4756	mov	rax,QWORD[208+r8]
4757
4758	lea	rsi,[((-168))+rax]
4759	lea	rdi,[512+r8]
4760	mov	ecx,20
4761	DD	0xa548f3fc
4762
4763	mov	rbp,QWORD[((-8))+rax]
4764	mov	QWORD[160+r8],rbp
4765	jmp	NEAR $L$common_seh_tail
4766
4767
4768
4769ALIGN	16
4770ocb_se_handler:
4771	push	rsi
4772	push	rdi
4773	push	rbx
4774	push	rbp
4775	push	r12
4776	push	r13
4777	push	r14
4778	push	r15
4779	pushfq
4780	sub	rsp,64
4781
4782	mov	rax,QWORD[120+r8]
4783	mov	rbx,QWORD[248+r8]
4784
4785	mov	rsi,QWORD[8+r9]
4786	mov	r11,QWORD[56+r9]
4787
4788	mov	r10d,DWORD[r11]
4789	lea	r10,[r10*1+rsi]
4790	cmp	rbx,r10
4791	jb	NEAR $L$common_seh_tail
4792
4793	mov	r10d,DWORD[4+r11]
4794	lea	r10,[r10*1+rsi]
4795	cmp	rbx,r10
4796	jae	NEAR $L$common_seh_tail
4797
4798	mov	r10d,DWORD[8+r11]
4799	lea	r10,[r10*1+rsi]
4800	cmp	rbx,r10
4801	jae	NEAR $L$ocb_no_xmm
4802
4803	mov	rax,QWORD[152+r8]
4804
4805	lea	rsi,[rax]
4806	lea	rdi,[512+r8]
4807	mov	ecx,20
4808	DD	0xa548f3fc
4809	lea	rax,[((160+40))+rax]
4810
4811$L$ocb_no_xmm:
4812	mov	rbx,QWORD[((-8))+rax]
4813	mov	rbp,QWORD[((-16))+rax]
4814	mov	r12,QWORD[((-24))+rax]
4815	mov	r13,QWORD[((-32))+rax]
4816	mov	r14,QWORD[((-40))+rax]
4817
4818	mov	QWORD[144+r8],rbx
4819	mov	QWORD[160+r8],rbp
4820	mov	QWORD[216+r8],r12
4821	mov	QWORD[224+r8],r13
4822	mov	QWORD[232+r8],r14
4823
4824	jmp	NEAR $L$common_seh_tail
4825
4826
4827ALIGN	16
4828cbc_se_handler:
4829	push	rsi
4830	push	rdi
4831	push	rbx
4832	push	rbp
4833	push	r12
4834	push	r13
4835	push	r14
4836	push	r15
4837	pushfq
4838	sub	rsp,64
4839
4840	mov	rax,QWORD[152+r8]
4841	mov	rbx,QWORD[248+r8]
4842
4843	lea	r10,[$L$cbc_decrypt_bulk]
4844	cmp	rbx,r10
4845	jb	NEAR $L$common_seh_tail
4846
4847	mov	rax,QWORD[120+r8]
4848
4849	lea	r10,[$L$cbc_decrypt_body]
4850	cmp	rbx,r10
4851	jb	NEAR $L$common_seh_tail
4852
4853	mov	rax,QWORD[152+r8]
4854
4855	lea	r10,[$L$cbc_ret]
4856	cmp	rbx,r10
4857	jae	NEAR $L$common_seh_tail
4858
4859	lea	rsi,[16+rax]
4860	lea	rdi,[512+r8]
4861	mov	ecx,20
4862	DD	0xa548f3fc
4863
4864	mov	rax,QWORD[208+r8]
4865
4866	mov	rbp,QWORD[((-8))+rax]
4867	mov	QWORD[160+r8],rbp
4868
4869$L$common_seh_tail:
4870	mov	rdi,QWORD[8+rax]
4871	mov	rsi,QWORD[16+rax]
4872	mov	QWORD[152+r8],rax
4873	mov	QWORD[168+r8],rsi
4874	mov	QWORD[176+r8],rdi
4875
4876	mov	rdi,QWORD[40+r9]
4877	mov	rsi,r8
4878	mov	ecx,154
4879	DD	0xa548f3fc
4880
4881	mov	rsi,r9
4882	xor	rcx,rcx
4883	mov	rdx,QWORD[8+rsi]
4884	mov	r8,QWORD[rsi]
4885	mov	r9,QWORD[16+rsi]
4886	mov	r10,QWORD[40+rsi]
4887	lea	r11,[56+rsi]
4888	lea	r12,[24+rsi]
4889	mov	QWORD[32+rsp],r10
4890	mov	QWORD[40+rsp],r11
4891	mov	QWORD[48+rsp],r12
4892	mov	QWORD[56+rsp],rcx
4893	call	QWORD[__imp_RtlVirtualUnwind]
4894
4895	mov	eax,1
4896	add	rsp,64
4897	popfq
4898	pop	r15
4899	pop	r14
4900	pop	r13
4901	pop	r12
4902	pop	rbp
4903	pop	rbx
4904	pop	rdi
4905	pop	rsi
4906	DB	0F3h,0C3h		;repret
4907
4908
4909section	.pdata rdata align=4
4910ALIGN	4
4911	DD	$L$SEH_begin_aesni_ecb_encrypt wrt ..imagebase
4912	DD	$L$SEH_end_aesni_ecb_encrypt wrt ..imagebase
4913	DD	$L$SEH_info_ecb wrt ..imagebase
4914
4915	DD	$L$SEH_begin_aesni_ccm64_encrypt_blocks wrt ..imagebase
4916	DD	$L$SEH_end_aesni_ccm64_encrypt_blocks wrt ..imagebase
4917	DD	$L$SEH_info_ccm64_enc wrt ..imagebase
4918
4919	DD	$L$SEH_begin_aesni_ccm64_decrypt_blocks wrt ..imagebase
4920	DD	$L$SEH_end_aesni_ccm64_decrypt_blocks wrt ..imagebase
4921	DD	$L$SEH_info_ccm64_dec wrt ..imagebase
4922
4923	DD	$L$SEH_begin_aesni_ctr32_encrypt_blocks wrt ..imagebase
4924	DD	$L$SEH_end_aesni_ctr32_encrypt_blocks wrt ..imagebase
4925	DD	$L$SEH_info_ctr32 wrt ..imagebase
4926
4927	DD	$L$SEH_begin_aesni_xts_encrypt wrt ..imagebase
4928	DD	$L$SEH_end_aesni_xts_encrypt wrt ..imagebase
4929	DD	$L$SEH_info_xts_enc wrt ..imagebase
4930
4931	DD	$L$SEH_begin_aesni_xts_decrypt wrt ..imagebase
4932	DD	$L$SEH_end_aesni_xts_decrypt wrt ..imagebase
4933	DD	$L$SEH_info_xts_dec wrt ..imagebase
4934
4935	DD	$L$SEH_begin_aesni_ocb_encrypt wrt ..imagebase
4936	DD	$L$SEH_end_aesni_ocb_encrypt wrt ..imagebase
4937	DD	$L$SEH_info_ocb_enc wrt ..imagebase
4938
4939	DD	$L$SEH_begin_aesni_ocb_decrypt wrt ..imagebase
4940	DD	$L$SEH_end_aesni_ocb_decrypt wrt ..imagebase
4941	DD	$L$SEH_info_ocb_dec wrt ..imagebase
4942	DD	$L$SEH_begin_aesni_cbc_encrypt wrt ..imagebase
4943	DD	$L$SEH_end_aesni_cbc_encrypt wrt ..imagebase
4944	DD	$L$SEH_info_cbc wrt ..imagebase
4945
4946	DD	aesni_set_decrypt_key wrt ..imagebase
4947	DD	$L$SEH_end_set_decrypt_key wrt ..imagebase
4948	DD	$L$SEH_info_key wrt ..imagebase
4949
4950	DD	aesni_set_encrypt_key wrt ..imagebase
4951	DD	$L$SEH_end_set_encrypt_key wrt ..imagebase
4952	DD	$L$SEH_info_key wrt ..imagebase
4953section	.xdata rdata align=8
4954ALIGN	8
4955$L$SEH_info_ecb:
4956DB	9,0,0,0
4957	DD	ecb_ccm64_se_handler wrt ..imagebase
4958	DD	$L$ecb_enc_body wrt ..imagebase,$L$ecb_enc_ret wrt ..imagebase
4959$L$SEH_info_ccm64_enc:
4960DB	9,0,0,0
4961	DD	ecb_ccm64_se_handler wrt ..imagebase
4962	DD	$L$ccm64_enc_body wrt ..imagebase,$L$ccm64_enc_ret wrt ..imagebase
4963$L$SEH_info_ccm64_dec:
4964DB	9,0,0,0
4965	DD	ecb_ccm64_se_handler wrt ..imagebase
4966	DD	$L$ccm64_dec_body wrt ..imagebase,$L$ccm64_dec_ret wrt ..imagebase
4967$L$SEH_info_ctr32:
4968DB	9,0,0,0
4969	DD	ctr_xts_se_handler wrt ..imagebase
4970	DD	$L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase
4971$L$SEH_info_xts_enc:
4972DB	9,0,0,0
4973	DD	ctr_xts_se_handler wrt ..imagebase
4974	DD	$L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
4975$L$SEH_info_xts_dec:
4976DB	9,0,0,0
4977	DD	ctr_xts_se_handler wrt ..imagebase
4978	DD	$L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase
4979$L$SEH_info_ocb_enc:
4980DB	9,0,0,0
4981	DD	ocb_se_handler wrt ..imagebase
4982	DD	$L$ocb_enc_body wrt ..imagebase,$L$ocb_enc_epilogue wrt ..imagebase
4983	DD	$L$ocb_enc_pop wrt ..imagebase
4984	DD	0
4985$L$SEH_info_ocb_dec:
4986DB	9,0,0,0
4987	DD	ocb_se_handler wrt ..imagebase
4988	DD	$L$ocb_dec_body wrt ..imagebase,$L$ocb_dec_epilogue wrt ..imagebase
4989	DD	$L$ocb_dec_pop wrt ..imagebase
4990	DD	0
4991$L$SEH_info_cbc:
4992DB	9,0,0,0
4993	DD	cbc_se_handler wrt ..imagebase
4994$L$SEH_info_key:
4995DB	0x01,0x04,0x01,0x00
4996DB	0x04,0x02,0x00,0x00
4997