• Home
  • Raw
  • Download

Lines Matching +full:- +full:out

2 # Copyright 2013-2020 The OpenSSL Project Authors. All Rights Reserved.
17 # Multi-buffer AES-NI procedures process several independent buffers
23 # ---------------------------
32 # pre-AVX processors, where higher interleave factor incidentally
36 # ---------------------------
53 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
54 ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
55 die "can't locate x86_64-xlate.pl";
58 require "x86_64-support.pl";
64 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
65 =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
70 `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
75 `ml64 2>&1` =~ /Version ([0-9]+)\./) {
79 if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) {
83 open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
85 *STDOUT=*OUT;
88 # struct { void *inp,*out; int blocks; double iv[2]; } inp[8];
102 @out=map("%xmm$_",(2..5));
146 lea -0xa8(%rsp),%rsp
154 movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler
155 movaps %xmm14,-0x58(%rax)
156 movaps %xmm15,-0x48(%rax)
166 and \$-64,%rsp
171 movdqu ($key),$zero # 0-round key
184 mov `$inp_elm_size*$i+2*$ptr_size-$inp_elm_size*2`($inp),$one
185 mov `$inp_elm_size*$i+0-$inp_elm_size*2`($inp),$inptr_reg
187 mov `$inp_elm_size*$i+$ptr_size-$inp_elm_size*2`($inp),$outptr_reg
191 movdqu `$inp_elm_size*$i+2*$ptr_size+8-$inp_elm_size*2`($inp),@out[$i]
200 movups 0x10-0x78($key),$rndkey1
201 pxor $zero,@out[0]
202 movups 0x20-0x78($key),$rndkey0
203 pxor $zero,@out[1]
204 mov 0xf0-0x78($key),$rounds
205 pxor $zero,@out[2]
207 pxor $zero,@out[3]
209 pxor @inp[0],@out[0]
211 pxor @inp[1],@out[1]
213 pxor @inp[2],@out[2]
214 pxor @inp[3],@out[3]
226 aesenc $rndkey1,@out[0]
229 aesenc $rndkey1,@out[1]
232 aesenc $rndkey1,@out[2]
233 aesenc $rndkey1,@out[3]
234 movups 0x30-0x78($key),$rndkey1
240 aesenc $rndkey,@out[0]
241 aesenc $rndkey,@out[1]
242 aesenc $rndkey,@out[2]
245 aesenc $rndkey,@out[3]
246 movups `0x40+16*$i-0x78`($key),$rndkey
251 aesenc $rndkey0,@out[0]
254 aesenc $rndkey0,@out[1]
257 aesenc $rndkey0,@out[2]
258 aesenc $rndkey0,@out[3]
259 movups 0x80-0x78($key),$rndkey0
262 aesenc $rndkey1,@out[0]
264 movdqu -0x78($key),$zero # reload 0-round key
265 aesenc $rndkey1,@out[1]
268 aesenc $rndkey1,@out[2]
269 aesenc $rndkey1,@out[3]
270 movups 0x90-0x78($key),$rndkey1
274 aesenc $rndkey0,@out[0]
275 aesenc $rndkey0,@out[1]
276 aesenc $rndkey0,@out[2]
277 aesenc $rndkey0,@out[3]
278 movups 0xa0-0x78($key),$rndkey0
282 aesenc $rndkey1,@out[0]
283 aesenc $rndkey1,@out[1]
284 aesenc $rndkey1,@out[2]
285 aesenc $rndkey1,@out[3]
286 movups 0xb0-0x78($key),$rndkey1
288 aesenc $rndkey0,@out[0]
289 aesenc $rndkey0,@out[1]
290 aesenc $rndkey0,@out[2]
291 aesenc $rndkey0,@out[3]
292 movups 0xc0-0x78($key),$rndkey0
296 aesenc $rndkey1,@out[0]
297 aesenc $rndkey1,@out[1]
298 aesenc $rndkey1,@out[2]
299 aesenc $rndkey1,@out[3]
300 movups 0xd0-0x78($key),$rndkey1
302 aesenc $rndkey0,@out[0]
303 aesenc $rndkey0,@out[1]
304 aesenc $rndkey0,@out[2]
305 aesenc $rndkey0,@out[3]
306 movups 0xe0-0x78($key),$rndkey0
311 aesenc $rndkey1,@out[0]
312 aesenc $rndkey1,@out[1]
313 aesenc $rndkey1,@out[2]
314 aesenc $rndkey1,@out[3]
316 movdqu 0x10-0x78($key),$rndkey1
318 aesenclast $rndkey0,@out[0]
321 aesenclast $rndkey0,@out[1]
324 aesenclast $rndkey0,@out[2]
327 aesenclast $rndkey0,@out[3]
328 movdqu 0x20-0x78($key),$rndkey0
331 movups @out[0],-16(@outptr[0],$offset)
332 pxor @inp[0],@out[0]
333 movups @out[1],-16(@outptr[1],$offset)
334 pxor @inp[1],@out[1]
335 movups @out[2],-16(@outptr[2],$offset)
336 pxor @inp[2],@out[2]
337 movups @out[3],-16(@outptr[3],$offset)
338 pxor @inp[3],@out[3]
347 #pxor @inp[0],@out[0]
348 #pxor @inp[1],@out[1]
350 #movdqu @out[0],`$inp_elm_size*0+2*$ptr_size+8-$inp_elm_size*2`($inp)
351 #pxor @inp[2],@out[2]
352 #movdqu @out[1],`$inp_elm_size*1+2*$ptr_size+8-$inp_elm_size*2`($inp)
353 #pxor @inp[3],@out[3]
354 #movdqu @out[2],`$inp_elm_size*2+2*$ptr_size+8-$inp_elm_size*2`($inp) # won't fix, let caller
355 #movdqu @out[3],`$inp_elm_size*3+2*$ptr_size+8-$inp_elm_size*2`($inp) # figure this out...
364 movaps -0xd8(%rax),%xmm6
365 movaps -0xc8(%rax),%xmm7
366 movaps -0xb8(%rax),%xmm8
367 movaps -0xa8(%rax),%xmm9
368 movaps -0x98(%rax),%xmm10
369 movaps -0x88(%rax),%xmm11
370 movaps -0x78(%rax),%xmm12
371 #movaps -0x68(%rax),%xmm13
372 #movaps -0x58(%rax),%xmm14
373 #movaps -0x48(%rax),%xmm15
376 mov -48(%rax),%r15
378 mov -40(%rax),%r14
380 mov -32(%rax),%r13
382 mov -24(%rax),%r12
384 mov -16(%rax),%rbp
386 mov -8(%rax),%rbx
393 .size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt
428 lea -0xa8(%rsp),%rsp
436 movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler
437 movaps %xmm14,-0x58(%rax)
438 movaps %xmm15,-0x48(%rax)
448 and \$-64,%rsp
453 movdqu ($key),$zero # 0-round key
466 mov `$inp_elm_size*$i+2*$ptr_size-$inp_elm_size*2`($inp),$one
467 mov `$inp_elm_size*$i+0-$inp_elm_size*2`($inp),$inptr_reg
469 mov `$inp_elm_size*$i+$ptr_size-$inp_elm_size*2`($inp),$outptr_reg
473 movdqu `$inp_elm_size*$i+2*$ptr_size+8-$inp_elm_size*2`($inp),@inp[$i]
482 movups 0x10-0x78($key),$rndkey1
483 movups 0x20-0x78($key),$rndkey0
484 mov 0xf0-0x78($key),$rounds
485 movdqu (@inptr[0]),@out[0] # load inputs
486 movdqu (@inptr[1]),@out[1]
487 pxor $zero,@out[0]
488 movdqu (@inptr[2]),@out[2]
489 pxor $zero,@out[1]
490 movdqu (@inptr[3]),@out[3]
491 pxor $zero,@out[2]
492 pxor $zero,@out[3]
504 aesdec $rndkey1,@out[0]
507 aesdec $rndkey1,@out[1]
510 aesdec $rndkey1,@out[2]
511 aesdec $rndkey1,@out[3]
512 movups 0x30-0x78($key),$rndkey1
518 aesdec $rndkey,@out[0]
519 aesdec $rndkey,@out[1]
520 aesdec $rndkey,@out[2]
523 aesdec $rndkey,@out[3]
524 movups `0x40+16*$i-0x78`($key),$rndkey
529 aesdec $rndkey0,@out[0]
532 aesdec $rndkey0,@out[1]
535 aesdec $rndkey0,@out[2]
536 aesdec $rndkey0,@out[3]
537 movups 0x80-0x78($key),$rndkey0
540 aesdec $rndkey1,@out[0]
542 movdqu -0x78($key),$zero # reload 0-round key
543 aesdec $rndkey1,@out[1]
546 aesdec $rndkey1,@out[2]
547 aesdec $rndkey1,@out[3]
548 movups 0x90-0x78($key),$rndkey1
552 aesdec $rndkey0,@out[0]
553 aesdec $rndkey0,@out[1]
554 aesdec $rndkey0,@out[2]
555 aesdec $rndkey0,@out[3]
556 movups 0xa0-0x78($key),$rndkey0
560 aesdec $rndkey1,@out[0]
561 aesdec $rndkey1,@out[1]
562 aesdec $rndkey1,@out[2]
563 aesdec $rndkey1,@out[3]
564 movups 0xb0-0x78($key),$rndkey1
566 aesdec $rndkey0,@out[0]
567 aesdec $rndkey0,@out[1]
568 aesdec $rndkey0,@out[2]
569 aesdec $rndkey0,@out[3]
570 movups 0xc0-0x78($key),$rndkey0
574 aesdec $rndkey1,@out[0]
575 aesdec $rndkey1,@out[1]
576 aesdec $rndkey1,@out[2]
577 aesdec $rndkey1,@out[3]
578 movups 0xd0-0x78($key),$rndkey1
580 aesdec $rndkey0,@out[0]
581 aesdec $rndkey0,@out[1]
582 aesdec $rndkey0,@out[2]
583 aesdec $rndkey0,@out[3]
584 movups 0xe0-0x78($key),$rndkey0
589 aesdec $rndkey1,@out[0]
590 aesdec $rndkey1,@out[1]
591 aesdec $rndkey1,@out[2]
594 aesdec $rndkey1,@out[3]
595 movdqu 0x10-0x78($key),$rndkey1
598 movdqu 0x20-0x78($key),$rndkey0
600 aesdeclast @inp[0],@out[0]
601 aesdeclast @inp[1],@out[1]
602 movdqu -16(@inptr[0],$offset),@inp[0] # load next IV
603 movdqu -16(@inptr[1],$offset),@inp[1]
604 aesdeclast @inp[2],@out[2]
605 aesdeclast @inp[3],@out[3]
606 movdqu -16(@inptr[2],$offset),@inp[2]
607 movdqu -16(@inptr[3],$offset),@inp[3]
609 movups @out[0],-16(@outptr[0],$offset)
610 movdqu (@inptr[0],$offset),@out[0]
611 movups @out[1],-16(@outptr[1],$offset)
612 movdqu (@inptr[1],$offset),@out[1]
613 pxor $zero,@out[0]
614 movups @out[2],-16(@outptr[2],$offset)
615 movdqu (@inptr[2],$offset),@out[2]
616 pxor $zero,@out[1]
617 movups @out[3],-16(@outptr[3],$offset)
618 movdqu (@inptr[3],$offset),@out[3]
619 pxor $zero,@out[2]
620 pxor $zero,@out[3]
636 movaps -0xd8(%rax),%xmm6
637 movaps -0xc8(%rax),%xmm7
638 movaps -0xb8(%rax),%xmm8
639 movaps -0xa8(%rax),%xmm9
640 movaps -0x98(%rax),%xmm10
641 movaps -0x88(%rax),%xmm11
642 movaps -0x78(%rax),%xmm12
643 #movaps -0x68(%rax),%xmm13
644 #movaps -0x58(%rax),%xmm14
645 #movaps -0x48(%rax),%xmm15
648 mov -48(%rax),%r15
650 mov -40(%rax),%r14
652 mov -32(%rax),%r13
654 mov -24(%rax),%r12
656 mov -16(%rax),%rbp
658 mov -8(%rax),%rbx
665 .size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
672 my @out=map("%xmm$_",(2..9));
698 lea -0xa8(%rsp),%rsp
705 movaps %xmm12,-0x78(%rax)
706 movaps %xmm13,-0x68(%rax)
707 movaps %xmm14,-0x58(%rax)
708 movaps %xmm15,-0x48(%rax)
717 # +128 off-load area for @inp[0..3]
720 and \$-128,%rsp
726 vmovdqu ($key),$zero # 0-round key
741 mov `$inp_elm_size*$i+2*$ptr_size-$inp_elm_size*4`($inp),$one
743 mov `$inp_elm_size*$i+0-$inp_elm_size*4`($inp),$ptr_reg
746 mov `$inp_elm_size*$i+$ptr_size-$inp_elm_size*4`($inp),$temp_reg
750 vmovdqu `$inp_elm_size*$i+2*$ptr_size+8-$inp_elm_size*4`($inp),@out[$i]
761 vmovups 0x10-0x78($key),$rndkey1
762 vmovups 0x20-0x78($key),$rndkey0
763 mov 0xf0-0x78($key),$rounds
765 vpxor (@ptr[0]),$zero,@inp[0] # load inputs and xor with 0-round
770 vpxor @inp[0],@out[0],@out[0]
772 vpxor @inp[1],@out[1],@out[1]
774 vpxor @inp[2],@out[2],@out[2]
776 vpxor @inp[3],@out[3],@out[3]
778 vpxor @inp[0],@out[4],@out[4]
780 vpxor @inp[1],@out[5],@out[5]
781 vpxor @inp[2],@out[6],@out[6]
782 vpxor @inp[3],@out[7],@out[7]
791 vaesenc $rndkey,@out[0],@out[0]
798 vaesenc $rndkey,@out[1],@out[1]
800 vaesenc $rndkey,@out[2],@out[2]
803 prefetcht0 15(@ptr[$i-2]) # prefetch output
806 vaesenc $rndkey,@out[3],@out[3]
809 vaesenc $rndkey,@out[4],@out[4]
811 vaesenc $rndkey,@out[5],@out[5]
813 vaesenc $rndkey,@out[6],@out[6]
814 vpxor 16(@ptr[$i]),$zero,@inp[$i%4] # load input and xor with 0-round
816 vaesenc $rndkey,@out[7],@out[7]
817 vmovups `16*(3+$i)-0x78`($key),$rndkey
821 vmovdqu @inp[$i%4],`16*$i`($offload) # off-load
826 prefetcht0 15(@ptr[$i-2]) # prefetch output
827 prefetcht0 15(@ptr[$i-1])
831 vaesenc $rndkey1,@out[0],@out[0]
832 vaesenc $rndkey1,@out[1],@out[1]
833 vaesenc $rndkey1,@out[2],@out[2]
834 vaesenc $rndkey1,@out[3],@out[3]
835 vaesenc $rndkey1,@out[4],@out[4]
836 vaesenc $rndkey1,@out[5],@out[5]
837 vaesenc $rndkey1,@out[6],@out[6]
838 vaesenc $rndkey1,@out[7],@out[7]
839 vmovups 0xb0-0x78($key),$rndkey1
841 vaesenc $rndkey0,@out[0],@out[0]
842 vaesenc $rndkey0,@out[1],@out[1]
843 vaesenc $rndkey0,@out[2],@out[2]
844 vaesenc $rndkey0,@out[3],@out[3]
845 vaesenc $rndkey0,@out[4],@out[4]
846 vaesenc $rndkey0,@out[5],@out[5]
847 vaesenc $rndkey0,@out[6],@out[6]
848 vaesenc $rndkey0,@out[7],@out[7]
849 vmovups 0xc0-0x78($key),$rndkey0
852 vaesenc $rndkey1,@out[0],@out[0]
853 vaesenc $rndkey1,@out[1],@out[1]
854 vaesenc $rndkey1,@out[2],@out[2]
855 vaesenc $rndkey1,@out[3],@out[3]
856 vaesenc $rndkey1,@out[4],@out[4]
857 vaesenc $rndkey1,@out[5],@out[5]
858 vaesenc $rndkey1,@out[6],@out[6]
859 vaesenc $rndkey1,@out[7],@out[7]
860 vmovups 0xd0-0x78($key),$rndkey1
862 vaesenc $rndkey0,@out[0],@out[0]
863 vaesenc $rndkey0,@out[1],@out[1]
864 vaesenc $rndkey0,@out[2],@out[2]
865 vaesenc $rndkey0,@out[3],@out[3]
866 vaesenc $rndkey0,@out[4],@out[4]
867 vaesenc $rndkey0,@out[5],@out[5]
868 vaesenc $rndkey0,@out[6],@out[6]
869 vaesenc $rndkey0,@out[7],@out[7]
870 vmovups 0xe0-0x78($key),$rndkey0
873 vaesenc $rndkey1,@out[0],@out[0]
875 vaesenc $rndkey1,@out[1],@out[1]
876 vaesenc $rndkey1,@out[2],@out[2]
878 vaesenc $rndkey1,@out[3],@out[3]
879 vaesenc $rndkey1,@out[4],@out[4]
882 vaesenc $rndkey1,@out[5],@out[5]
883 mov 64(%rsp),$offset # pre-load 1st offset
884 vaesenc $rndkey1,@out[6],@out[6]
885 vaesenc $rndkey1,@out[7],@out[7]
886 vmovups 0x10-0x78($key),$rndkey1
888 vaesenclast $rndkey0,@out[0],@out[0]
891 vaesenclast $rndkey0,@out[1],@out[1]
892 vaesenclast $rndkey0,@out[2],@out[2]
894 vaesenclast $rndkey0,@out[3],@out[3]
895 vaesenclast $rndkey0,@out[4],@out[4]
897 vmovdqu -0x78($key),$zero # 0-round
898 vaesenclast $rndkey0,@out[5],@out[5]
899 vaesenclast $rndkey0,@out[6],@out[6]
901 vaesenclast $rndkey0,@out[7],@out[7]
902 vmovups 0x20-0x78($key),$rndkey0
904 vmovups @out[0],-16(@ptr[0]) # write output
906 vpxor 0x00($offload),@out[0],@out[0]
907 vmovups @out[1],-16(@ptr[1])
909 vpxor 0x10($offload),@out[1],@out[1]
910 vmovups @out[2],-16(@ptr[2])
912 vpxor 0x20($offload),@out[2],@out[2]
913 vmovups @out[3],-16(@ptr[3])
915 vpxor 0x30($offload),@out[3],@out[3]
916 vmovups @out[4],-16(@ptr[4])
918 vpxor @inp[0],@out[4],@out[4]
919 vmovups @out[5],-16(@ptr[5])
921 vpxor @inp[1],@out[5],@out[5]
922 vmovups @out[6],-16(@ptr[6])
924 vpxor @inp[2],@out[6],@out[6]
925 vmovups @out[7],-16(@ptr[7])
927 vpxor @inp[3],@out[7],@out[7]
943 movaps -0xd8(%rax),%xmm6
944 movaps -0xc8(%rax),%xmm7
945 movaps -0xb8(%rax),%xmm8
946 movaps -0xa8(%rax),%xmm9
947 movaps -0x98(%rax),%xmm10
948 movaps -0x88(%rax),%xmm11
949 movaps -0x78(%rax),%xmm12
950 movaps -0x68(%rax),%xmm13
951 movaps -0x58(%rax),%xmm14
952 movaps -0x48(%rax),%xmm15
955 mov -48(%rax),%r15
957 mov -40(%rax),%r14
959 mov -32(%rax),%r13
961 mov -24(%rax),%r12
963 mov -16(%rax),%rbp
965 mov -8(%rax),%rbx
972 .size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx
995 lea -0xa8(%rsp),%rsp
1002 movaps %xmm12,-0x78(%rax)
1003 movaps %xmm13,-0x68(%rax)
1004 movaps %xmm14,-0x58(%rax)
1005 movaps %xmm15,-0x48(%rax)
1014 # +128 off-load area for @inp[0..3]
1018 and \$-256,%rsp
1025 vmovdqu ($key),$zero # 0-round key
1040 mov `$inp_elm_size*$i+2*$ptr_size-$inp_elm_size*4`($inp),$one
1042 mov `$inp_elm_size*$i+0-$inp_elm_size*4`($inp),$ptr_reg
1045 mov `$inp_elm_size*$i+$ptr_size-$inp_elm_size*4`($inp),$temp_reg
1049 vmovdqu `$inp_elm_size*$i+2*$ptr_size+8-$inp_elm_size*4`($inp),@out[$i]
1054 vmovdqu @out[$i],`192+16*$i`(%rsp) # offload IV
1061 vmovups 0x10-0x78($key),$rndkey1
1062 vmovups 0x20-0x78($key),$rndkey0
1063 mov 0xf0-0x78($key),$rounds
1066 vmovdqu (@ptr[0]),@out[0] # load inputs
1067 vmovdqu (@ptr[1]),@out[1]
1068 vmovdqu (@ptr[2]),@out[2]
1069 vmovdqu (@ptr[3]),@out[3]
1070 vmovdqu (@ptr[4]),@out[4]
1071 vmovdqu (@ptr[5]),@out[5]
1072 vmovdqu (@ptr[6]),@out[6]
1073 vmovdqu (@ptr[7]),@out[7]
1074 vmovdqu @out[0],0x00($offload) # offload inputs
1075 vpxor $zero,@out[0],@out[0] # xor inputs with 0-round
1076 vmovdqu @out[1],0x10($offload)
1077 vpxor $zero,@out[1],@out[1]
1078 vmovdqu @out[2],0x20($offload)
1079 vpxor $zero,@out[2],@out[2]
1080 vmovdqu @out[3],0x30($offload)
1081 vpxor $zero,@out[3],@out[3]
1082 vmovdqu @out[4],0x40($offload)
1083 vpxor $zero,@out[4],@out[4]
1084 vmovdqu @out[5],0x50($offload)
1085 vpxor $zero,@out[5],@out[5]
1086 vmovdqu @out[6],0x60($offload)
1087 vpxor $zero,@out[6],@out[6]
1088 vmovdqu @out[7],0x70($offload)
1089 vpxor $zero,@out[7],@out[7]
1100 vaesdec $rndkey,@out[0],@out[0]
1107 vaesdec $rndkey,@out[1],@out[1]
1109 vaesdec $rndkey,@out[2],@out[2]
1112 prefetcht0 15(@ptr[$i-2]) # prefetch output
1115 vaesdec $rndkey,@out[3],@out[3]
1118 vaesdec $rndkey,@out[4],@out[4]
1120 vaesdec $rndkey,@out[5],@out[5]
1122 vaesdec $rndkey,@out[6],@out[6]
1125 vaesdec $rndkey,@out[7],@out[7]
1126 vmovups `16*(3+$i)-0x78`($key),$rndkey
1130 vmovdqu @inp[$i%4],`128+16*$i`(%rsp) # off-load
1135 prefetcht0 15(@ptr[$i-2]) # prefetch output
1136 prefetcht0 15(@ptr[$i-1])
1140 vaesdec $rndkey1,@out[0],@out[0]
1141 vaesdec $rndkey1,@out[1],@out[1]
1142 vaesdec $rndkey1,@out[2],@out[2]
1143 vaesdec $rndkey1,@out[3],@out[3]
1144 vaesdec $rndkey1,@out[4],@out[4]
1145 vaesdec $rndkey1,@out[5],@out[5]
1146 vaesdec $rndkey1,@out[6],@out[6]
1147 vaesdec $rndkey1,@out[7],@out[7]
1148 vmovups 0xb0-0x78($key),$rndkey1
1150 vaesdec $rndkey0,@out[0],@out[0]
1151 vaesdec $rndkey0,@out[1],@out[1]
1152 vaesdec $rndkey0,@out[2],@out[2]
1153 vaesdec $rndkey0,@out[3],@out[3]
1154 vaesdec $rndkey0,@out[4],@out[4]
1155 vaesdec $rndkey0,@out[5],@out[5]
1156 vaesdec $rndkey0,@out[6],@out[6]
1157 vaesdec $rndkey0,@out[7],@out[7]
1158 vmovups 0xc0-0x78($key),$rndkey0
1161 vaesdec $rndkey1,@out[0],@out[0]
1162 vaesdec $rndkey1,@out[1],@out[1]
1163 vaesdec $rndkey1,@out[2],@out[2]
1164 vaesdec $rndkey1,@out[3],@out[3]
1165 vaesdec $rndkey1,@out[4],@out[4]
1166 vaesdec $rndkey1,@out[5],@out[5]
1167 vaesdec $rndkey1,@out[6],@out[6]
1168 vaesdec $rndkey1,@out[7],@out[7]
1169 vmovups 0xd0-0x78($key),$rndkey1
1171 vaesdec $rndkey0,@out[0],@out[0]
1172 vaesdec $rndkey0,@out[1],@out[1]
1173 vaesdec $rndkey0,@out[2],@out[2]
1174 vaesdec $rndkey0,@out[3],@out[3]
1175 vaesdec $rndkey0,@out[4],@out[4]
1176 vaesdec $rndkey0,@out[5],@out[5]
1177 vaesdec $rndkey0,@out[6],@out[6]
1178 vaesdec $rndkey0,@out[7],@out[7]
1179 vmovups 0xe0-0x78($key),$rndkey0
1182 vaesdec $rndkey1,@out[0],@out[0]
1184 vaesdec $rndkey1,@out[1],@out[1]
1185 vaesdec $rndkey1,@out[2],@out[2]
1187 vaesdec $rndkey1,@out[3],@out[3]
1188 vaesdec $rndkey1,@out[4],@out[4]
1191 vaesdec $rndkey1,@out[5],@out[5]
1192 mov 64(%rsp),$offset # pre-load 1st offset
1193 vaesdec $rndkey1,@out[6],@out[6]
1194 vaesdec $rndkey1,@out[7],@out[7]
1195 vmovups 0x10-0x78($key),$rndkey1
1197 vaesdeclast $rndkey0,@out[0],@out[0]
1200 vaesdeclast $rndkey0,@out[1],@out[1]
1201 vpxor 0x00($offload),@out[0],@out[0] # xor with IV
1202 vaesdeclast $rndkey0,@out[2],@out[2]
1203 vpxor 0x10($offload),@out[1],@out[1]
1205 vaesdeclast $rndkey0,@out[3],@out[3]
1206 vpxor 0x20($offload),@out[2],@out[2]
1207 vaesdeclast $rndkey0,@out[4],@out[4]
1208 vpxor 0x30($offload),@out[3],@out[3]
1210 vmovdqu -0x78($key),$zero # 0-round
1211 vaesdeclast $rndkey0,@out[5],@out[5]
1212 vpxor 0x40($offload),@out[4],@out[4]
1213 vaesdeclast $rndkey0,@out[6],@out[6]
1214 vpxor 0x50($offload),@out[5],@out[5]
1216 vaesdeclast $rndkey0,@out[7],@out[7]
1217 vpxor 0x60($offload),@out[6],@out[6]
1218 vmovups 0x20-0x78($key),$rndkey0
1220 vmovups @out[0],-16(@ptr[0]) # write output
1222 vmovdqu 128+0(%rsp),@out[0]
1223 vpxor 0x70($offload),@out[7],@out[7]
1224 vmovups @out[1],-16(@ptr[1])
1226 vmovdqu @out[0],0x00($offload)
1227 vpxor $zero,@out[0],@out[0]
1228 vmovdqu 128+16(%rsp),@out[1]
1229 vmovups @out[2],-16(@ptr[2])
1231 vmovdqu @out[1],0x10($offload)
1232 vpxor $zero,@out[1],@out[1]
1233 vmovdqu 128+32(%rsp),@out[2]
1234 vmovups @out[3],-16(@ptr[3])
1236 vmovdqu @out[2],0x20($offload)
1237 vpxor $zero,@out[2],@out[2]
1238 vmovdqu 128+48(%rsp),@out[3]
1239 vmovups @out[4],-16(@ptr[4])
1241 vmovdqu @out[3],0x30($offload)
1242 vpxor $zero,@out[3],@out[3]
1244 vpxor @inp[0],$zero,@out[4]
1245 vmovups @out[5],-16(@ptr[5])
1248 vpxor @inp[1],$zero,@out[5]
1249 vmovups @out[6],-16(@ptr[6])
1252 vpxor @inp[2],$zero,@out[6]
1253 vmovups @out[7],-16(@ptr[7])
1256 vpxor @inp[3],$zero,@out[7]
1273 movaps -0xd8(%rax),%xmm6
1274 movaps -0xc8(%rax),%xmm7
1275 movaps -0xb8(%rax),%xmm8
1276 movaps -0xa8(%rax),%xmm9
1277 movaps -0x98(%rax),%xmm10
1278 movaps -0x88(%rax),%xmm11
1279 movaps -0x78(%rax),%xmm12
1280 movaps -0x68(%rax),%xmm13
1281 movaps -0x58(%rax),%xmm14
1282 movaps -0x48(%rax),%xmm15
1285 mov -48(%rax),%r15
1287 mov -40(%rax),%r14
1289 mov -32(%rax),%r13
1291 mov -24(%rax),%r12
1293 mov -16(%rax),%rbp
1295 mov -8(%rax),%rbx
1302 .size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx
1316 .type se_handler,\@abi-omnipotent
1330 mov 120($context),%rax # pull context->Rax
1331 mov 248($context),%rbx # pull context->Rip
1333 mov 8($disp),%rsi # disp->ImageBase
1334 mov 56($disp),%r11 # disp->HandlerData
1338 cmp %r10,%rbx # context->Rip<.Lprologue
1341 mov 152($context),%rax # pull context->Rsp
1345 cmp %r10,%rbx # context->Rip>=.Lepilogue
1350 mov -8(%rax),%rbx
1351 mov -16(%rax),%rbp
1352 mov -24(%rax),%r12
1353 mov -32(%rax),%r13
1354 mov -40(%rax),%r14
1355 mov -48(%rax),%r15
1356 mov %rbx,144($context) # restore context->Rbx
1357 mov %rbp,160($context) # restore context->Rbp
1358 mov %r12,216($context) # restore context->R12
1359 mov %r13,224($context) # restore context->R13
1360 mov %r14,232($context) # restore context->R14
1361 mov %r15,240($context) # restore context->R15
1363 lea -56-10*16(%rax),%rsi
1371 mov %rax,152($context) # restore context->Rsp
1372 mov %rsi,168($context) # restore context->Rsi
1373 mov %rdi,176($context) # restore context->Rdi
1375 mov 40($disp),%rdi # disp->ContextRecord
1382 mov 8(%rsi),%rdx # arg2, disp->ImageBase
1383 mov 0(%rsi),%r8 # arg3, disp->ControlPc
1384 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
1385 mov 40(%rsi),%r10 # disp->ContextRecord
1386 lea 56(%rsi),%r11 # &disp->HandlerData
1387 lea 24(%rsi),%r12 # &disp->EstablisherFrame
1406 .size se_handler,.-se_handler
1464 if ($line=~/(aeskeygenassist)\s+\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
1472 elsif ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) {
1484 elsif ($line=~/(aes[a-z]+)\s+([0x1-9a-fA-F]*)\(%rsp\),\s*%xmm([0-9]+)/) {
1501 $code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem;