Lines Matching +full:linux +full:- +full:32 +full:- +full:no +full:- +full:asm
1 ; AesOpt.asm -- AES optimized code for x86 AES hardware instructions
2 ; 2021-12-25 : Igor Pavlov : Public domain
4 include 7zAsm.asm
18 ECHO "-- NO VAES 256"
22 ECHO "x86-64"
28 ECHO "ABI : no CDECL : FASTCALL"
33 ECHO "ABI : LINUX"
77 ; ret-ip <- (r4)
128 ; we use 32 bytes of home space in stack in WIN64-x64
129 NUM_HOME_MM_REGS equ (32 / 16)
130 ; we preserve xmm registers starting from xmm6 in WIN64-x64
134 num_save_mm_regs = num_used_mm_regs - MM_START_SAVE_REG
136 num_save_mm_regs2 = num_save_mm_regs - NUM_HOME_MM_REGS
137 ; RSP is (16*x + 8) after entering the function in WIN64-x64
150 movdqa [r4 + (i - NUM_HOME_MM_REGS) * 16], @CatStr(xmm, %(MM_START_SAVE_REG + i))
169 num_low_regs = num_save_mm_regs - i
242 ; ---------- AES-CBC Decode ----------
273 add keys, 32
296 rept ways - 1
297 pxor @CatStr(xmm, %(ways_start_reg + i)), [rD + i * 16 - 16]
300 movdqa iv, [rD + ways * 16 - 16]
316 ; OP_KEY pxor, 1 * ksize_r + 32
324 sub koffs_r, 32
340 movdqa [keys - 32], iv
346 ; ---------- AVX ----------
373 ; vpxor reg, key_ymm, ymmword ptr [rD + 32 * i]
374 vpxor reg, key0_ymm, ymmword ptr [rD + 32 * i]
381 vpxor reg, reg, ymmword ptr [rD + i * 32 - 16]
387 vmovdqu ymmword ptr [rD + 32 * i], reg
391 vpxor reg, reg, ymmword ptr [rD + 32 * i]
403 db 2 + 040H + 020h * (1 - (a2) / 8) + 080h * (1 - (dest) / 8)
457 add keys, 32
465 AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 2) * 32)
468 ; sub r4, 32
470 ; lea keys2, [r4 + 32]
472 and keys2, -32
485 ; AVX__WOP_KEY AVX__CBC_START, 1 * koffs_r + 32
489 sub koffs_r, 32
497 vmovdqa iv, xmmword ptr [rD + ways * 32 - 16]
500 add rD, ways * 32
507 ; lea r4, [r4 + 1 * ksize_r + 32]
522 ; ---------- AES-CBC Encode ----------
534 add keys, 32
556 rept (CENC_NUM_REG_KEYS - 1)
564 add koffs_r, 32
575 ; movdqa [keys - 32], state
576 movdqa [keys + 1 * ksize_r - (16 * CENC_NUM_REG_KEYS) - 32], state
582 ; ---------- AES-CTR ----------
607 add keys, 32
623 ; WOP_KEY pxor, 1 * koffs_r -16
647 ; movdqa state, [keys + 1 * koffs_r - 16]
655 add koffs_r, 32
667 ; movdqa [keys - 32], iv
668 movdqa [keys + 1 * ksize_r - 16 - 32], iv
680 add keys, 32
694 AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 1) * 32)
696 lea keys2, [r4 - 32]
698 and keys2, -32
713 ; AVX__WOP_KEY AVX__CTR_START, 1 * koffs_r - 32
716 add koffs_r, 32
723 add rD, ways * 32