• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8
9%include "ring_core_generated/prefix_symbols_nasm.inc"
10section	.text code align=64
11
12EXTERN	OPENSSL_ia32cap_P
13global	aes_hw_encrypt
14
15ALIGN	16
16aes_hw_encrypt:
17
18	movups	xmm2,XMMWORD[rcx]
19	mov	eax,DWORD[240+r8]
20	movups	xmm0,XMMWORD[r8]
21	movups	xmm1,XMMWORD[16+r8]
22	lea	r8,[32+r8]
23	xorps	xmm2,xmm0
24$L$oop_enc1_1:
25DB	102,15,56,220,209
26	dec	eax
27	movups	xmm1,XMMWORD[r8]
28	lea	r8,[16+r8]
29	jnz	NEAR $L$oop_enc1_1
30DB	102,15,56,221,209
31	pxor	xmm0,xmm0
32	pxor	xmm1,xmm1
33	movups	XMMWORD[rdx],xmm2
34	pxor	xmm2,xmm2
35	DB	0F3h,0C3h		;repret
36
37
38
39ALIGN	16
40_aesni_encrypt2:
41
42	movups	xmm0,XMMWORD[rcx]
43	shl	eax,4
44	movups	xmm1,XMMWORD[16+rcx]
45	xorps	xmm2,xmm0
46	xorps	xmm3,xmm0
47	movups	xmm0,XMMWORD[32+rcx]
48	lea	rcx,[32+rax*1+rcx]
49	neg	rax
50	add	rax,16
51
52$L$enc_loop2:
53DB	102,15,56,220,209
54DB	102,15,56,220,217
55	movups	xmm1,XMMWORD[rax*1+rcx]
56	add	rax,32
57DB	102,15,56,220,208
58DB	102,15,56,220,216
59	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
60	jnz	NEAR $L$enc_loop2
61
62DB	102,15,56,220,209
63DB	102,15,56,220,217
64DB	102,15,56,221,208
65DB	102,15,56,221,216
66	DB	0F3h,0C3h		;repret
67
68
69
70ALIGN	16
71_aesni_encrypt3:
72
73	movups	xmm0,XMMWORD[rcx]
74	shl	eax,4
75	movups	xmm1,XMMWORD[16+rcx]
76	xorps	xmm2,xmm0
77	xorps	xmm3,xmm0
78	xorps	xmm4,xmm0
79	movups	xmm0,XMMWORD[32+rcx]
80	lea	rcx,[32+rax*1+rcx]
81	neg	rax
82	add	rax,16
83
84$L$enc_loop3:
85DB	102,15,56,220,209
86DB	102,15,56,220,217
87DB	102,15,56,220,225
88	movups	xmm1,XMMWORD[rax*1+rcx]
89	add	rax,32
90DB	102,15,56,220,208
91DB	102,15,56,220,216
92DB	102,15,56,220,224
93	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
94	jnz	NEAR $L$enc_loop3
95
96DB	102,15,56,220,209
97DB	102,15,56,220,217
98DB	102,15,56,220,225
99DB	102,15,56,221,208
100DB	102,15,56,221,216
101DB	102,15,56,221,224
102	DB	0F3h,0C3h		;repret
103
104
105
106ALIGN	16
107_aesni_encrypt4:
108
109	movups	xmm0,XMMWORD[rcx]
110	shl	eax,4
111	movups	xmm1,XMMWORD[16+rcx]
112	xorps	xmm2,xmm0
113	xorps	xmm3,xmm0
114	xorps	xmm4,xmm0
115	xorps	xmm5,xmm0
116	movups	xmm0,XMMWORD[32+rcx]
117	lea	rcx,[32+rax*1+rcx]
118	neg	rax
119DB	0x0f,0x1f,0x00
120	add	rax,16
121
122$L$enc_loop4:
123DB	102,15,56,220,209
124DB	102,15,56,220,217
125DB	102,15,56,220,225
126DB	102,15,56,220,233
127	movups	xmm1,XMMWORD[rax*1+rcx]
128	add	rax,32
129DB	102,15,56,220,208
130DB	102,15,56,220,216
131DB	102,15,56,220,224
132DB	102,15,56,220,232
133	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
134	jnz	NEAR $L$enc_loop4
135
136DB	102,15,56,220,209
137DB	102,15,56,220,217
138DB	102,15,56,220,225
139DB	102,15,56,220,233
140DB	102,15,56,221,208
141DB	102,15,56,221,216
142DB	102,15,56,221,224
143DB	102,15,56,221,232
144	DB	0F3h,0C3h		;repret
145
146
147
148ALIGN	16
149_aesni_encrypt6:
150
151	movups	xmm0,XMMWORD[rcx]
152	shl	eax,4
153	movups	xmm1,XMMWORD[16+rcx]
154	xorps	xmm2,xmm0
155	pxor	xmm3,xmm0
156	pxor	xmm4,xmm0
157DB	102,15,56,220,209
158	lea	rcx,[32+rax*1+rcx]
159	neg	rax
160DB	102,15,56,220,217
161	pxor	xmm5,xmm0
162	pxor	xmm6,xmm0
163DB	102,15,56,220,225
164	pxor	xmm7,xmm0
165	movups	xmm0,XMMWORD[rax*1+rcx]
166	add	rax,16
167	jmp	NEAR $L$enc_loop6_enter
168ALIGN	16
169$L$enc_loop6:
170DB	102,15,56,220,209
171DB	102,15,56,220,217
172DB	102,15,56,220,225
173$L$enc_loop6_enter:
174DB	102,15,56,220,233
175DB	102,15,56,220,241
176DB	102,15,56,220,249
177	movups	xmm1,XMMWORD[rax*1+rcx]
178	add	rax,32
179DB	102,15,56,220,208
180DB	102,15,56,220,216
181DB	102,15,56,220,224
182DB	102,15,56,220,232
183DB	102,15,56,220,240
184DB	102,15,56,220,248
185	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
186	jnz	NEAR $L$enc_loop6
187
188DB	102,15,56,220,209
189DB	102,15,56,220,217
190DB	102,15,56,220,225
191DB	102,15,56,220,233
192DB	102,15,56,220,241
193DB	102,15,56,220,249
194DB	102,15,56,221,208
195DB	102,15,56,221,216
196DB	102,15,56,221,224
197DB	102,15,56,221,232
198DB	102,15,56,221,240
199DB	102,15,56,221,248
200	DB	0F3h,0C3h		;repret
201
202
203
204ALIGN	16
205_aesni_encrypt8:
206
207	movups	xmm0,XMMWORD[rcx]
208	shl	eax,4
209	movups	xmm1,XMMWORD[16+rcx]
210	xorps	xmm2,xmm0
211	xorps	xmm3,xmm0
212	pxor	xmm4,xmm0
213	pxor	xmm5,xmm0
214	pxor	xmm6,xmm0
215	lea	rcx,[32+rax*1+rcx]
216	neg	rax
217DB	102,15,56,220,209
218	pxor	xmm7,xmm0
219	pxor	xmm8,xmm0
220DB	102,15,56,220,217
221	pxor	xmm9,xmm0
222	movups	xmm0,XMMWORD[rax*1+rcx]
223	add	rax,16
224	jmp	NEAR $L$enc_loop8_inner
225ALIGN	16
226$L$enc_loop8:
227DB	102,15,56,220,209
228DB	102,15,56,220,217
229$L$enc_loop8_inner:
230DB	102,15,56,220,225
231DB	102,15,56,220,233
232DB	102,15,56,220,241
233DB	102,15,56,220,249
234DB	102,68,15,56,220,193
235DB	102,68,15,56,220,201
236$L$enc_loop8_enter:
237	movups	xmm1,XMMWORD[rax*1+rcx]
238	add	rax,32
239DB	102,15,56,220,208
240DB	102,15,56,220,216
241DB	102,15,56,220,224
242DB	102,15,56,220,232
243DB	102,15,56,220,240
244DB	102,15,56,220,248
245DB	102,68,15,56,220,192
246DB	102,68,15,56,220,200
247	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
248	jnz	NEAR $L$enc_loop8
249
250DB	102,15,56,220,209
251DB	102,15,56,220,217
252DB	102,15,56,220,225
253DB	102,15,56,220,233
254DB	102,15,56,220,241
255DB	102,15,56,220,249
256DB	102,68,15,56,220,193
257DB	102,68,15,56,220,201
258DB	102,15,56,221,208
259DB	102,15,56,221,216
260DB	102,15,56,221,224
261DB	102,15,56,221,232
262DB	102,15,56,221,240
263DB	102,15,56,221,248
264DB	102,68,15,56,221,192
265DB	102,68,15,56,221,200
266	DB	0F3h,0C3h		;repret
267
268
269global	aes_hw_ctr32_encrypt_blocks
270
271ALIGN	16
272aes_hw_ctr32_encrypt_blocks:
273	mov	QWORD[8+rsp],rdi	;WIN64 prologue
274	mov	QWORD[16+rsp],rsi
275	mov	rax,rsp
276$L$SEH_begin_aes_hw_ctr32_encrypt_blocks:
277	mov	rdi,rcx
278	mov	rsi,rdx
279	mov	rdx,r8
280	mov	rcx,r9
281	mov	r8,QWORD[40+rsp]
282
283
284
285	cmp	rdx,1
286	jne	NEAR $L$ctr32_bulk
287
288
289
290	movups	xmm2,XMMWORD[r8]
291	movups	xmm3,XMMWORD[rdi]
292	mov	edx,DWORD[240+rcx]
293	movups	xmm0,XMMWORD[rcx]
294	movups	xmm1,XMMWORD[16+rcx]
295	lea	rcx,[32+rcx]
296	xorps	xmm2,xmm0
297$L$oop_enc1_2:
298DB	102,15,56,220,209
299	dec	edx
300	movups	xmm1,XMMWORD[rcx]
301	lea	rcx,[16+rcx]
302	jnz	NEAR $L$oop_enc1_2
303DB	102,15,56,221,209
304	pxor	xmm0,xmm0
305	pxor	xmm1,xmm1
306	xorps	xmm2,xmm3
307	pxor	xmm3,xmm3
308	movups	XMMWORD[rsi],xmm2
309	xorps	xmm2,xmm2
310	jmp	NEAR $L$ctr32_epilogue
311
312ALIGN	16
313$L$ctr32_bulk:
314	lea	r11,[rsp]
315
316	push	rbp
317
318	sub	rsp,288
319	and	rsp,-16
320	movaps	XMMWORD[(-168)+r11],xmm6
321	movaps	XMMWORD[(-152)+r11],xmm7
322	movaps	XMMWORD[(-136)+r11],xmm8
323	movaps	XMMWORD[(-120)+r11],xmm9
324	movaps	XMMWORD[(-104)+r11],xmm10
325	movaps	XMMWORD[(-88)+r11],xmm11
326	movaps	XMMWORD[(-72)+r11],xmm12
327	movaps	XMMWORD[(-56)+r11],xmm13
328	movaps	XMMWORD[(-40)+r11],xmm14
329	movaps	XMMWORD[(-24)+r11],xmm15
330$L$ctr32_body:
331
332
333
334
335	movdqu	xmm2,XMMWORD[r8]
336	movdqu	xmm0,XMMWORD[rcx]
337	mov	r8d,DWORD[12+r8]
338	pxor	xmm2,xmm0
339	mov	ebp,DWORD[12+rcx]
340	movdqa	XMMWORD[rsp],xmm2
341	bswap	r8d
342	movdqa	xmm3,xmm2
343	movdqa	xmm4,xmm2
344	movdqa	xmm5,xmm2
345	movdqa	XMMWORD[64+rsp],xmm2
346	movdqa	XMMWORD[80+rsp],xmm2
347	movdqa	XMMWORD[96+rsp],xmm2
348	mov	r10,rdx
349	movdqa	XMMWORD[112+rsp],xmm2
350
351	lea	rax,[1+r8]
352	lea	rdx,[2+r8]
353	bswap	eax
354	bswap	edx
355	xor	eax,ebp
356	xor	edx,ebp
357DB	102,15,58,34,216,3
358	lea	rax,[3+r8]
359	movdqa	XMMWORD[16+rsp],xmm3
360DB	102,15,58,34,226,3
361	bswap	eax
362	mov	rdx,r10
363	lea	r10,[4+r8]
364	movdqa	XMMWORD[32+rsp],xmm4
365	xor	eax,ebp
366	bswap	r10d
367DB	102,15,58,34,232,3
368	xor	r10d,ebp
369	movdqa	XMMWORD[48+rsp],xmm5
370	lea	r9,[5+r8]
371	mov	DWORD[((64+12))+rsp],r10d
372	bswap	r9d
373	lea	r10,[6+r8]
374	mov	eax,DWORD[240+rcx]
375	xor	r9d,ebp
376	bswap	r10d
377	mov	DWORD[((80+12))+rsp],r9d
378	xor	r10d,ebp
379	lea	r9,[7+r8]
380	mov	DWORD[((96+12))+rsp],r10d
381	bswap	r9d
382	lea	r10,[OPENSSL_ia32cap_P]
383	mov	r10d,DWORD[4+r10]
384	xor	r9d,ebp
385	and	r10d,71303168
386	mov	DWORD[((112+12))+rsp],r9d
387
388	movups	xmm1,XMMWORD[16+rcx]
389
390	movdqa	xmm6,XMMWORD[64+rsp]
391	movdqa	xmm7,XMMWORD[80+rsp]
392
393	cmp	rdx,8
394	jb	NEAR $L$ctr32_tail
395
396	sub	rdx,6
397	cmp	r10d,4194304
398	je	NEAR $L$ctr32_6x
399
400	lea	rcx,[128+rcx]
401	sub	rdx,2
402	jmp	NEAR $L$ctr32_loop8
403
404ALIGN	16
405$L$ctr32_6x:
406	shl	eax,4
407	mov	r10d,48
408	bswap	ebp
409	lea	rcx,[32+rax*1+rcx]
410	sub	r10,rax
411	jmp	NEAR $L$ctr32_loop6
412
413ALIGN	16
414$L$ctr32_loop6:
415	add	r8d,6
416	movups	xmm0,XMMWORD[((-48))+r10*1+rcx]
417DB	102,15,56,220,209
418	mov	eax,r8d
419	xor	eax,ebp
420DB	102,15,56,220,217
421DB	0x0f,0x38,0xf1,0x44,0x24,12
422	lea	eax,[1+r8]
423DB	102,15,56,220,225
424	xor	eax,ebp
425DB	0x0f,0x38,0xf1,0x44,0x24,28
426DB	102,15,56,220,233
427	lea	eax,[2+r8]
428	xor	eax,ebp
429DB	102,15,56,220,241
430DB	0x0f,0x38,0xf1,0x44,0x24,44
431	lea	eax,[3+r8]
432DB	102,15,56,220,249
433	movups	xmm1,XMMWORD[((-32))+r10*1+rcx]
434	xor	eax,ebp
435
436DB	102,15,56,220,208
437DB	0x0f,0x38,0xf1,0x44,0x24,60
438	lea	eax,[4+r8]
439DB	102,15,56,220,216
440	xor	eax,ebp
441DB	0x0f,0x38,0xf1,0x44,0x24,76
442DB	102,15,56,220,224
443	lea	eax,[5+r8]
444	xor	eax,ebp
445DB	102,15,56,220,232
446DB	0x0f,0x38,0xf1,0x44,0x24,92
447	mov	rax,r10
448DB	102,15,56,220,240
449DB	102,15,56,220,248
450	movups	xmm0,XMMWORD[((-16))+r10*1+rcx]
451
452	call	$L$enc_loop6
453
454	movdqu	xmm8,XMMWORD[rdi]
455	movdqu	xmm9,XMMWORD[16+rdi]
456	movdqu	xmm10,XMMWORD[32+rdi]
457	movdqu	xmm11,XMMWORD[48+rdi]
458	movdqu	xmm12,XMMWORD[64+rdi]
459	movdqu	xmm13,XMMWORD[80+rdi]
460	lea	rdi,[96+rdi]
461	movups	xmm1,XMMWORD[((-64))+r10*1+rcx]
462	pxor	xmm8,xmm2
463	movaps	xmm2,XMMWORD[rsp]
464	pxor	xmm9,xmm3
465	movaps	xmm3,XMMWORD[16+rsp]
466	pxor	xmm10,xmm4
467	movaps	xmm4,XMMWORD[32+rsp]
468	pxor	xmm11,xmm5
469	movaps	xmm5,XMMWORD[48+rsp]
470	pxor	xmm12,xmm6
471	movaps	xmm6,XMMWORD[64+rsp]
472	pxor	xmm13,xmm7
473	movaps	xmm7,XMMWORD[80+rsp]
474	movdqu	XMMWORD[rsi],xmm8
475	movdqu	XMMWORD[16+rsi],xmm9
476	movdqu	XMMWORD[32+rsi],xmm10
477	movdqu	XMMWORD[48+rsi],xmm11
478	movdqu	XMMWORD[64+rsi],xmm12
479	movdqu	XMMWORD[80+rsi],xmm13
480	lea	rsi,[96+rsi]
481
482	sub	rdx,6
483	jnc	NEAR $L$ctr32_loop6
484
485	add	rdx,6
486	jz	NEAR $L$ctr32_done
487
488	lea	eax,[((-48))+r10]
489	lea	rcx,[((-80))+r10*1+rcx]
490	neg	eax
491	shr	eax,4
492	jmp	NEAR $L$ctr32_tail
493
494ALIGN	32
495$L$ctr32_loop8:
496	add	r8d,8
497	movdqa	xmm8,XMMWORD[96+rsp]
498DB	102,15,56,220,209
499	mov	r9d,r8d
500	movdqa	xmm9,XMMWORD[112+rsp]
501DB	102,15,56,220,217
502	bswap	r9d
503	movups	xmm0,XMMWORD[((32-128))+rcx]
504DB	102,15,56,220,225
505	xor	r9d,ebp
506	nop
507DB	102,15,56,220,233
508	mov	DWORD[((0+12))+rsp],r9d
509	lea	r9,[1+r8]
510DB	102,15,56,220,241
511DB	102,15,56,220,249
512DB	102,68,15,56,220,193
513DB	102,68,15,56,220,201
514	movups	xmm1,XMMWORD[((48-128))+rcx]
515	bswap	r9d
516DB	102,15,56,220,208
517DB	102,15,56,220,216
518	xor	r9d,ebp
519DB	0x66,0x90
520DB	102,15,56,220,224
521DB	102,15,56,220,232
522	mov	DWORD[((16+12))+rsp],r9d
523	lea	r9,[2+r8]
524DB	102,15,56,220,240
525DB	102,15,56,220,248
526DB	102,68,15,56,220,192
527DB	102,68,15,56,220,200
528	movups	xmm0,XMMWORD[((64-128))+rcx]
529	bswap	r9d
530DB	102,15,56,220,209
531DB	102,15,56,220,217
532	xor	r9d,ebp
533DB	0x66,0x90
534DB	102,15,56,220,225
535DB	102,15,56,220,233
536	mov	DWORD[((32+12))+rsp],r9d
537	lea	r9,[3+r8]
538DB	102,15,56,220,241
539DB	102,15,56,220,249
540DB	102,68,15,56,220,193
541DB	102,68,15,56,220,201
542	movups	xmm1,XMMWORD[((80-128))+rcx]
543	bswap	r9d
544DB	102,15,56,220,208
545DB	102,15,56,220,216
546	xor	r9d,ebp
547DB	0x66,0x90
548DB	102,15,56,220,224
549DB	102,15,56,220,232
550	mov	DWORD[((48+12))+rsp],r9d
551	lea	r9,[4+r8]
552DB	102,15,56,220,240
553DB	102,15,56,220,248
554DB	102,68,15,56,220,192
555DB	102,68,15,56,220,200
556	movups	xmm0,XMMWORD[((96-128))+rcx]
557	bswap	r9d
558DB	102,15,56,220,209
559DB	102,15,56,220,217
560	xor	r9d,ebp
561DB	0x66,0x90
562DB	102,15,56,220,225
563DB	102,15,56,220,233
564	mov	DWORD[((64+12))+rsp],r9d
565	lea	r9,[5+r8]
566DB	102,15,56,220,241
567DB	102,15,56,220,249
568DB	102,68,15,56,220,193
569DB	102,68,15,56,220,201
570	movups	xmm1,XMMWORD[((112-128))+rcx]
571	bswap	r9d
572DB	102,15,56,220,208
573DB	102,15,56,220,216
574	xor	r9d,ebp
575DB	0x66,0x90
576DB	102,15,56,220,224
577DB	102,15,56,220,232
578	mov	DWORD[((80+12))+rsp],r9d
579	lea	r9,[6+r8]
580DB	102,15,56,220,240
581DB	102,15,56,220,248
582DB	102,68,15,56,220,192
583DB	102,68,15,56,220,200
584	movups	xmm0,XMMWORD[((128-128))+rcx]
585	bswap	r9d
586DB	102,15,56,220,209
587DB	102,15,56,220,217
588	xor	r9d,ebp
589DB	0x66,0x90
590DB	102,15,56,220,225
591DB	102,15,56,220,233
592	mov	DWORD[((96+12))+rsp],r9d
593	lea	r9,[7+r8]
594DB	102,15,56,220,241
595DB	102,15,56,220,249
596DB	102,68,15,56,220,193
597DB	102,68,15,56,220,201
598	movups	xmm1,XMMWORD[((144-128))+rcx]
599	bswap	r9d
600DB	102,15,56,220,208
601DB	102,15,56,220,216
602DB	102,15,56,220,224
603	xor	r9d,ebp
604	movdqu	xmm10,XMMWORD[rdi]
605DB	102,15,56,220,232
606	mov	DWORD[((112+12))+rsp],r9d
607	cmp	eax,11
608DB	102,15,56,220,240
609DB	102,15,56,220,248
610DB	102,68,15,56,220,192
611DB	102,68,15,56,220,200
612	movups	xmm0,XMMWORD[((160-128))+rcx]
613
614	jb	NEAR $L$ctr32_enc_done
615
616DB	102,15,56,220,209
617DB	102,15,56,220,217
618DB	102,15,56,220,225
619DB	102,15,56,220,233
620DB	102,15,56,220,241
621DB	102,15,56,220,249
622DB	102,68,15,56,220,193
623DB	102,68,15,56,220,201
624	movups	xmm1,XMMWORD[((176-128))+rcx]
625
626DB	102,15,56,220,208
627DB	102,15,56,220,216
628DB	102,15,56,220,224
629DB	102,15,56,220,232
630DB	102,15,56,220,240
631DB	102,15,56,220,248
632DB	102,68,15,56,220,192
633DB	102,68,15,56,220,200
634	movups	xmm0,XMMWORD[((192-128))+rcx]
635
636
637
638DB	102,15,56,220,209
639DB	102,15,56,220,217
640DB	102,15,56,220,225
641DB	102,15,56,220,233
642DB	102,15,56,220,241
643DB	102,15,56,220,249
644DB	102,68,15,56,220,193
645DB	102,68,15,56,220,201
646	movups	xmm1,XMMWORD[((208-128))+rcx]
647
648DB	102,15,56,220,208
649DB	102,15,56,220,216
650DB	102,15,56,220,224
651DB	102,15,56,220,232
652DB	102,15,56,220,240
653DB	102,15,56,220,248
654DB	102,68,15,56,220,192
655DB	102,68,15,56,220,200
656	movups	xmm0,XMMWORD[((224-128))+rcx]
657	jmp	NEAR $L$ctr32_enc_done
658
659ALIGN	16
660$L$ctr32_enc_done:
661	movdqu	xmm11,XMMWORD[16+rdi]
662	pxor	xmm10,xmm0
663	movdqu	xmm12,XMMWORD[32+rdi]
664	pxor	xmm11,xmm0
665	movdqu	xmm13,XMMWORD[48+rdi]
666	pxor	xmm12,xmm0
667	movdqu	xmm14,XMMWORD[64+rdi]
668	pxor	xmm13,xmm0
669	movdqu	xmm15,XMMWORD[80+rdi]
670	pxor	xmm14,xmm0
671	pxor	xmm15,xmm0
672DB	102,15,56,220,209
673DB	102,15,56,220,217
674DB	102,15,56,220,225
675DB	102,15,56,220,233
676DB	102,15,56,220,241
677DB	102,15,56,220,249
678DB	102,68,15,56,220,193
679DB	102,68,15,56,220,201
680	movdqu	xmm1,XMMWORD[96+rdi]
681	lea	rdi,[128+rdi]
682
683DB	102,65,15,56,221,210
684	pxor	xmm1,xmm0
685	movdqu	xmm10,XMMWORD[((112-128))+rdi]
686DB	102,65,15,56,221,219
687	pxor	xmm10,xmm0
688	movdqa	xmm11,XMMWORD[rsp]
689DB	102,65,15,56,221,228
690DB	102,65,15,56,221,237
691	movdqa	xmm12,XMMWORD[16+rsp]
692	movdqa	xmm13,XMMWORD[32+rsp]
693DB	102,65,15,56,221,246
694DB	102,65,15,56,221,255
695	movdqa	xmm14,XMMWORD[48+rsp]
696	movdqa	xmm15,XMMWORD[64+rsp]
697DB	102,68,15,56,221,193
698	movdqa	xmm0,XMMWORD[80+rsp]
699	movups	xmm1,XMMWORD[((16-128))+rcx]
700DB	102,69,15,56,221,202
701
702	movups	XMMWORD[rsi],xmm2
703	movdqa	xmm2,xmm11
704	movups	XMMWORD[16+rsi],xmm3
705	movdqa	xmm3,xmm12
706	movups	XMMWORD[32+rsi],xmm4
707	movdqa	xmm4,xmm13
708	movups	XMMWORD[48+rsi],xmm5
709	movdqa	xmm5,xmm14
710	movups	XMMWORD[64+rsi],xmm6
711	movdqa	xmm6,xmm15
712	movups	XMMWORD[80+rsi],xmm7
713	movdqa	xmm7,xmm0
714	movups	XMMWORD[96+rsi],xmm8
715	movups	XMMWORD[112+rsi],xmm9
716	lea	rsi,[128+rsi]
717
718	sub	rdx,8
719	jnc	NEAR $L$ctr32_loop8
720
721	add	rdx,8
722	jz	NEAR $L$ctr32_done
723	lea	rcx,[((-128))+rcx]
724
725$L$ctr32_tail:
726
727
728	lea	rcx,[16+rcx]
729	cmp	rdx,4
730	jb	NEAR $L$ctr32_loop3
731	je	NEAR $L$ctr32_loop4
732
733
734	shl	eax,4
735	movdqa	xmm8,XMMWORD[96+rsp]
736	pxor	xmm9,xmm9
737
738	movups	xmm0,XMMWORD[16+rcx]
739DB	102,15,56,220,209
740DB	102,15,56,220,217
741	lea	rcx,[((32-16))+rax*1+rcx]
742	neg	rax
743DB	102,15,56,220,225
744	add	rax,16
745	movups	xmm10,XMMWORD[rdi]
746DB	102,15,56,220,233
747DB	102,15,56,220,241
748	movups	xmm11,XMMWORD[16+rdi]
749	movups	xmm12,XMMWORD[32+rdi]
750DB	102,15,56,220,249
751DB	102,68,15,56,220,193
752
753	call	$L$enc_loop8_enter
754
755	movdqu	xmm13,XMMWORD[48+rdi]
756	pxor	xmm2,xmm10
757	movdqu	xmm10,XMMWORD[64+rdi]
758	pxor	xmm3,xmm11
759	movdqu	XMMWORD[rsi],xmm2
760	pxor	xmm4,xmm12
761	movdqu	XMMWORD[16+rsi],xmm3
762	pxor	xmm5,xmm13
763	movdqu	XMMWORD[32+rsi],xmm4
764	pxor	xmm6,xmm10
765	movdqu	XMMWORD[48+rsi],xmm5
766	movdqu	XMMWORD[64+rsi],xmm6
767	cmp	rdx,6
768	jb	NEAR $L$ctr32_done
769
770	movups	xmm11,XMMWORD[80+rdi]
771	xorps	xmm7,xmm11
772	movups	XMMWORD[80+rsi],xmm7
773	je	NEAR $L$ctr32_done
774
775	movups	xmm12,XMMWORD[96+rdi]
776	xorps	xmm8,xmm12
777	movups	XMMWORD[96+rsi],xmm8
778	jmp	NEAR $L$ctr32_done
779
780ALIGN	32
781$L$ctr32_loop4:
782DB	102,15,56,220,209
783	lea	rcx,[16+rcx]
784	dec	eax
785DB	102,15,56,220,217
786DB	102,15,56,220,225
787DB	102,15,56,220,233
788	movups	xmm1,XMMWORD[rcx]
789	jnz	NEAR $L$ctr32_loop4
790DB	102,15,56,221,209
791DB	102,15,56,221,217
792	movups	xmm10,XMMWORD[rdi]
793	movups	xmm11,XMMWORD[16+rdi]
794DB	102,15,56,221,225
795DB	102,15,56,221,233
796	movups	xmm12,XMMWORD[32+rdi]
797	movups	xmm13,XMMWORD[48+rdi]
798
799	xorps	xmm2,xmm10
800	movups	XMMWORD[rsi],xmm2
801	xorps	xmm3,xmm11
802	movups	XMMWORD[16+rsi],xmm3
803	pxor	xmm4,xmm12
804	movdqu	XMMWORD[32+rsi],xmm4
805	pxor	xmm5,xmm13
806	movdqu	XMMWORD[48+rsi],xmm5
807	jmp	NEAR $L$ctr32_done
808
809ALIGN	32
810$L$ctr32_loop3:
811DB	102,15,56,220,209
812	lea	rcx,[16+rcx]
813	dec	eax
814DB	102,15,56,220,217
815DB	102,15,56,220,225
816	movups	xmm1,XMMWORD[rcx]
817	jnz	NEAR $L$ctr32_loop3
818DB	102,15,56,221,209
819DB	102,15,56,221,217
820DB	102,15,56,221,225
821
822	movups	xmm10,XMMWORD[rdi]
823	xorps	xmm2,xmm10
824	movups	XMMWORD[rsi],xmm2
825	cmp	rdx,2
826	jb	NEAR $L$ctr32_done
827
828	movups	xmm11,XMMWORD[16+rdi]
829	xorps	xmm3,xmm11
830	movups	XMMWORD[16+rsi],xmm3
831	je	NEAR $L$ctr32_done
832
833	movups	xmm12,XMMWORD[32+rdi]
834	xorps	xmm4,xmm12
835	movups	XMMWORD[32+rsi],xmm4
836
837$L$ctr32_done:
838	xorps	xmm0,xmm0
839	xor	ebp,ebp
840	pxor	xmm1,xmm1
841	pxor	xmm2,xmm2
842	pxor	xmm3,xmm3
843	pxor	xmm4,xmm4
844	pxor	xmm5,xmm5
845	movaps	xmm6,XMMWORD[((-168))+r11]
846	movaps	XMMWORD[(-168)+r11],xmm0
847	movaps	xmm7,XMMWORD[((-152))+r11]
848	movaps	XMMWORD[(-152)+r11],xmm0
849	movaps	xmm8,XMMWORD[((-136))+r11]
850	movaps	XMMWORD[(-136)+r11],xmm0
851	movaps	xmm9,XMMWORD[((-120))+r11]
852	movaps	XMMWORD[(-120)+r11],xmm0
853	movaps	xmm10,XMMWORD[((-104))+r11]
854	movaps	XMMWORD[(-104)+r11],xmm0
855	movaps	xmm11,XMMWORD[((-88))+r11]
856	movaps	XMMWORD[(-88)+r11],xmm0
857	movaps	xmm12,XMMWORD[((-72))+r11]
858	movaps	XMMWORD[(-72)+r11],xmm0
859	movaps	xmm13,XMMWORD[((-56))+r11]
860	movaps	XMMWORD[(-56)+r11],xmm0
861	movaps	xmm14,XMMWORD[((-40))+r11]
862	movaps	XMMWORD[(-40)+r11],xmm0
863	movaps	xmm15,XMMWORD[((-24))+r11]
864	movaps	XMMWORD[(-24)+r11],xmm0
865	movaps	XMMWORD[rsp],xmm0
866	movaps	XMMWORD[16+rsp],xmm0
867	movaps	XMMWORD[32+rsp],xmm0
868	movaps	XMMWORD[48+rsp],xmm0
869	movaps	XMMWORD[64+rsp],xmm0
870	movaps	XMMWORD[80+rsp],xmm0
871	movaps	XMMWORD[96+rsp],xmm0
872	movaps	XMMWORD[112+rsp],xmm0
873	mov	rbp,QWORD[((-8))+r11]
874
875	lea	rsp,[r11]
876
877$L$ctr32_epilogue:
878	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
879	mov	rsi,QWORD[16+rsp]
880	DB	0F3h,0C3h		;repret
881
882$L$SEH_end_aes_hw_ctr32_encrypt_blocks:
883global	aes_hw_set_encrypt_key
884
885ALIGN	16
886aes_hw_set_encrypt_key:
887__aesni_set_encrypt_key:
888
889DB	0x48,0x83,0xEC,0x08
890
891	mov	rax,-1
892	test	rcx,rcx
893	jz	NEAR $L$enc_key_ret
894	test	r8,r8
895	jz	NEAR $L$enc_key_ret
896
897	movups	xmm0,XMMWORD[rcx]
898	xorps	xmm4,xmm4
899	lea	r10,[OPENSSL_ia32cap_P]
900	mov	r10d,DWORD[4+r10]
901	and	r10d,268437504
902	lea	rax,[16+r8]
903	cmp	edx,256
904	je	NEAR $L$14rounds
905
906	cmp	edx,128
907	jne	NEAR $L$bad_keybits
908
909$L$10rounds:
910	mov	edx,9
911	cmp	r10d,268435456
912	je	NEAR $L$10rounds_alt
913
914	movups	XMMWORD[r8],xmm0
915DB	102,15,58,223,200,1
916	call	$L$key_expansion_128_cold
917DB	102,15,58,223,200,2
918	call	$L$key_expansion_128
919DB	102,15,58,223,200,4
920	call	$L$key_expansion_128
921DB	102,15,58,223,200,8
922	call	$L$key_expansion_128
923DB	102,15,58,223,200,16
924	call	$L$key_expansion_128
925DB	102,15,58,223,200,32
926	call	$L$key_expansion_128
927DB	102,15,58,223,200,64
928	call	$L$key_expansion_128
929DB	102,15,58,223,200,128
930	call	$L$key_expansion_128
931DB	102,15,58,223,200,27
932	call	$L$key_expansion_128
933DB	102,15,58,223,200,54
934	call	$L$key_expansion_128
935	movups	XMMWORD[rax],xmm0
936	mov	DWORD[80+rax],edx
937	xor	eax,eax
938	jmp	NEAR $L$enc_key_ret
939
940ALIGN	16
941$L$10rounds_alt:
942	movdqa	xmm5,XMMWORD[$L$key_rotate]
943	mov	r10d,8
944	movdqa	xmm4,XMMWORD[$L$key_rcon1]
945	movdqa	xmm2,xmm0
946	movdqu	XMMWORD[r8],xmm0
947	jmp	NEAR $L$oop_key128
948
949ALIGN	16
950$L$oop_key128:
951DB	102,15,56,0,197
952DB	102,15,56,221,196
953	pslld	xmm4,1
954	lea	rax,[16+rax]
955
956	movdqa	xmm3,xmm2
957	pslldq	xmm2,4
958	pxor	xmm3,xmm2
959	pslldq	xmm2,4
960	pxor	xmm3,xmm2
961	pslldq	xmm2,4
962	pxor	xmm2,xmm3
963
964	pxor	xmm0,xmm2
965	movdqu	XMMWORD[(-16)+rax],xmm0
966	movdqa	xmm2,xmm0
967
968	dec	r10d
969	jnz	NEAR $L$oop_key128
970
971	movdqa	xmm4,XMMWORD[$L$key_rcon1b]
972
973DB	102,15,56,0,197
974DB	102,15,56,221,196
975	pslld	xmm4,1
976
977	movdqa	xmm3,xmm2
978	pslldq	xmm2,4
979	pxor	xmm3,xmm2
980	pslldq	xmm2,4
981	pxor	xmm3,xmm2
982	pslldq	xmm2,4
983	pxor	xmm2,xmm3
984
985	pxor	xmm0,xmm2
986	movdqu	XMMWORD[rax],xmm0
987
988	movdqa	xmm2,xmm0
989DB	102,15,56,0,197
990DB	102,15,56,221,196
991
992	movdqa	xmm3,xmm2
993	pslldq	xmm2,4
994	pxor	xmm3,xmm2
995	pslldq	xmm2,4
996	pxor	xmm3,xmm2
997	pslldq	xmm2,4
998	pxor	xmm2,xmm3
999
1000	pxor	xmm0,xmm2
1001	movdqu	XMMWORD[16+rax],xmm0
1002
1003	mov	DWORD[96+rax],edx
1004	xor	eax,eax
1005	jmp	NEAR $L$enc_key_ret
1006
1007
1008
1009ALIGN	16
1010$L$14rounds:
1011	movups	xmm2,XMMWORD[16+rcx]
1012	mov	edx,13
1013	lea	rax,[16+rax]
1014	cmp	r10d,268435456
1015	je	NEAR $L$14rounds_alt
1016
1017	movups	XMMWORD[r8],xmm0
1018	movups	XMMWORD[16+r8],xmm2
1019DB	102,15,58,223,202,1
1020	call	$L$key_expansion_256a_cold
1021DB	102,15,58,223,200,1
1022	call	$L$key_expansion_256b
1023DB	102,15,58,223,202,2
1024	call	$L$key_expansion_256a
1025DB	102,15,58,223,200,2
1026	call	$L$key_expansion_256b
1027DB	102,15,58,223,202,4
1028	call	$L$key_expansion_256a
1029DB	102,15,58,223,200,4
1030	call	$L$key_expansion_256b
1031DB	102,15,58,223,202,8
1032	call	$L$key_expansion_256a
1033DB	102,15,58,223,200,8
1034	call	$L$key_expansion_256b
1035DB	102,15,58,223,202,16
1036	call	$L$key_expansion_256a
1037DB	102,15,58,223,200,16
1038	call	$L$key_expansion_256b
1039DB	102,15,58,223,202,32
1040	call	$L$key_expansion_256a
1041DB	102,15,58,223,200,32
1042	call	$L$key_expansion_256b
1043DB	102,15,58,223,202,64
1044	call	$L$key_expansion_256a
1045	movups	XMMWORD[rax],xmm0
1046	mov	DWORD[16+rax],edx
1047	xor	rax,rax
1048	jmp	NEAR $L$enc_key_ret
1049
1050ALIGN	16
1051$L$14rounds_alt:
1052	movdqa	xmm5,XMMWORD[$L$key_rotate]
1053	movdqa	xmm4,XMMWORD[$L$key_rcon1]
1054	mov	r10d,7
1055	movdqu	XMMWORD[r8],xmm0
1056	movdqa	xmm1,xmm2
1057	movdqu	XMMWORD[16+r8],xmm2
1058	jmp	NEAR $L$oop_key256
1059
1060ALIGN	16
1061$L$oop_key256:
1062DB	102,15,56,0,213
1063DB	102,15,56,221,212
1064
1065	movdqa	xmm3,xmm0
1066	pslldq	xmm0,4
1067	pxor	xmm3,xmm0
1068	pslldq	xmm0,4
1069	pxor	xmm3,xmm0
1070	pslldq	xmm0,4
1071	pxor	xmm0,xmm3
1072	pslld	xmm4,1
1073
1074	pxor	xmm0,xmm2
1075	movdqu	XMMWORD[rax],xmm0
1076
1077	dec	r10d
1078	jz	NEAR $L$done_key256
1079
1080	pshufd	xmm2,xmm0,0xff
1081	pxor	xmm3,xmm3
1082DB	102,15,56,221,211
1083
1084	movdqa	xmm3,xmm1
1085	pslldq	xmm1,4
1086	pxor	xmm3,xmm1
1087	pslldq	xmm1,4
1088	pxor	xmm3,xmm1
1089	pslldq	xmm1,4
1090	pxor	xmm1,xmm3
1091
1092	pxor	xmm2,xmm1
1093	movdqu	XMMWORD[16+rax],xmm2
1094	lea	rax,[32+rax]
1095	movdqa	xmm1,xmm2
1096
1097	jmp	NEAR $L$oop_key256
1098
1099$L$done_key256:
1100	mov	DWORD[16+rax],edx
1101	xor	eax,eax
1102	jmp	NEAR $L$enc_key_ret
1103
1104ALIGN	16
1105$L$bad_keybits:
1106	mov	rax,-2
1107$L$enc_key_ret:
1108	pxor	xmm0,xmm0
1109	pxor	xmm1,xmm1
1110	pxor	xmm2,xmm2
1111	pxor	xmm3,xmm3
1112	pxor	xmm4,xmm4
1113	pxor	xmm5,xmm5
1114	add	rsp,8
1115
1116	DB	0F3h,0C3h		;repret
1117
1118$L$SEH_end_set_encrypt_key:
1119
1120ALIGN	16
1121$L$key_expansion_128:
1122	movups	XMMWORD[rax],xmm0
1123	lea	rax,[16+rax]
1124$L$key_expansion_128_cold:
1125	shufps	xmm4,xmm0,16
1126	xorps	xmm0,xmm4
1127	shufps	xmm4,xmm0,140
1128	xorps	xmm0,xmm4
1129	shufps	xmm1,xmm1,255
1130	xorps	xmm0,xmm1
1131	DB	0F3h,0C3h		;repret
1132
1133ALIGN	16
1134$L$key_expansion_192a:
1135	movups	XMMWORD[rax],xmm0
1136	lea	rax,[16+rax]
1137$L$key_expansion_192a_cold:
1138	movaps	xmm5,xmm2
1139$L$key_expansion_192b_warm:
1140	shufps	xmm4,xmm0,16
1141	movdqa	xmm3,xmm2
1142	xorps	xmm0,xmm4
1143	shufps	xmm4,xmm0,140
1144	pslldq	xmm3,4
1145	xorps	xmm0,xmm4
1146	pshufd	xmm1,xmm1,85
1147	pxor	xmm2,xmm3
1148	pxor	xmm0,xmm1
1149	pshufd	xmm3,xmm0,255
1150	pxor	xmm2,xmm3
1151	DB	0F3h,0C3h		;repret
1152
1153ALIGN	16
1154$L$key_expansion_192b:
1155	movaps	xmm3,xmm0
1156	shufps	xmm5,xmm0,68
1157	movups	XMMWORD[rax],xmm5
1158	shufps	xmm3,xmm2,78
1159	movups	XMMWORD[16+rax],xmm3
1160	lea	rax,[32+rax]
1161	jmp	NEAR $L$key_expansion_192b_warm
1162
1163ALIGN	16
1164$L$key_expansion_256a:
1165	movups	XMMWORD[rax],xmm2
1166	lea	rax,[16+rax]
1167$L$key_expansion_256a_cold:
1168	shufps	xmm4,xmm0,16
1169	xorps	xmm0,xmm4
1170	shufps	xmm4,xmm0,140
1171	xorps	xmm0,xmm4
1172	shufps	xmm1,xmm1,255
1173	xorps	xmm0,xmm1
1174	DB	0F3h,0C3h		;repret
1175
1176ALIGN	16
1177$L$key_expansion_256b:
1178	movups	XMMWORD[rax],xmm0
1179	lea	rax,[16+rax]
1180
1181	shufps	xmm4,xmm2,16
1182	xorps	xmm2,xmm4
1183	shufps	xmm4,xmm2,140
1184	xorps	xmm2,xmm4
1185	shufps	xmm1,xmm1,170
1186	xorps	xmm2,xmm1
1187	DB	0F3h,0C3h		;repret
1188
1189
1190ALIGN	64
1191$L$bswap_mask:
1192DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1193$L$increment32:
1194	DD	6,6,6,0
1195$L$increment64:
1196	DD	1,0,0,0
1197$L$increment1:
1198DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1199$L$key_rotate:
1200	DD	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
1201$L$key_rotate192:
1202	DD	0x04070605,0x04070605,0x04070605,0x04070605
1203$L$key_rcon1:
1204	DD	1,1,1,1
1205$L$key_rcon1b:
1206	DD	0x1b,0x1b,0x1b,0x1b
1207
1208DB	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
1209DB	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
1210DB	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
1211DB	115,108,46,111,114,103,62,0
1212ALIGN	64
1213EXTERN	__imp_RtlVirtualUnwind
1214
1215ALIGN	16
1216ctr_xts_se_handler:
1217	push	rsi
1218	push	rdi
1219	push	rbx
1220	push	rbp
1221	push	r12
1222	push	r13
1223	push	r14
1224	push	r15
1225	pushfq
1226	sub	rsp,64
1227
1228	mov	rax,QWORD[120+r8]
1229	mov	rbx,QWORD[248+r8]
1230
1231	mov	rsi,QWORD[8+r9]
1232	mov	r11,QWORD[56+r9]
1233
1234	mov	r10d,DWORD[r11]
1235	lea	r10,[r10*1+rsi]
1236	cmp	rbx,r10
1237	jb	NEAR $L$common_seh_tail
1238
1239	mov	rax,QWORD[152+r8]
1240
1241	mov	r10d,DWORD[4+r11]
1242	lea	r10,[r10*1+rsi]
1243	cmp	rbx,r10
1244	jae	NEAR $L$common_seh_tail
1245
1246	mov	rax,QWORD[208+r8]
1247
1248	lea	rsi,[((-168))+rax]
1249	lea	rdi,[512+r8]
1250	mov	ecx,20
1251	DD	0xa548f3fc
1252
1253	mov	rbp,QWORD[((-8))+rax]
1254	mov	QWORD[160+r8],rbp
1255
1256
1257$L$common_seh_tail:
1258	mov	rdi,QWORD[8+rax]
1259	mov	rsi,QWORD[16+rax]
1260	mov	QWORD[152+r8],rax
1261	mov	QWORD[168+r8],rsi
1262	mov	QWORD[176+r8],rdi
1263
1264	mov	rdi,QWORD[40+r9]
1265	mov	rsi,r8
1266	mov	ecx,154
1267	DD	0xa548f3fc
1268
1269	mov	rsi,r9
1270	xor	rcx,rcx
1271	mov	rdx,QWORD[8+rsi]
1272	mov	r8,QWORD[rsi]
1273	mov	r9,QWORD[16+rsi]
1274	mov	r10,QWORD[40+rsi]
1275	lea	r11,[56+rsi]
1276	lea	r12,[24+rsi]
1277	mov	QWORD[32+rsp],r10
1278	mov	QWORD[40+rsp],r11
1279	mov	QWORD[48+rsp],r12
1280	mov	QWORD[56+rsp],rcx
1281	call	QWORD[__imp_RtlVirtualUnwind]
1282
1283	mov	eax,1
1284	add	rsp,64
1285	popfq
1286	pop	r15
1287	pop	r14
1288	pop	r13
1289	pop	r12
1290	pop	rbp
1291	pop	rbx
1292	pop	rdi
1293	pop	rsi
1294	DB	0F3h,0C3h		;repret
1295
1296
1297section	.pdata rdata align=4
1298ALIGN	4
1299	DD	$L$SEH_begin_aes_hw_ctr32_encrypt_blocks wrt ..imagebase
1300	DD	$L$SEH_end_aes_hw_ctr32_encrypt_blocks wrt ..imagebase
1301	DD	$L$SEH_info_ctr32 wrt ..imagebase
1302	DD	aes_hw_set_encrypt_key wrt ..imagebase
1303	DD	$L$SEH_end_set_encrypt_key wrt ..imagebase
1304	DD	$L$SEH_info_key wrt ..imagebase
1305section	.xdata rdata align=8
1306ALIGN	8
1307$L$SEH_info_ctr32:
1308DB	9,0,0,0
1309	DD	ctr_xts_se_handler wrt ..imagebase
1310	DD	$L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase
1311$L$SEH_info_key:
1312DB	0x01,0x04,0x01,0x00
1313DB	0x04,0x02,0x00,0x00
1314