1; AesOpt.asm -- Intel's AES. 2; 2009-12-12 : Igor Pavlov : Public domain 3 4include 7zAsm.asm 5 6MY_ASM_START 7 8ifndef x64 9 .xmm 10endif 11 12ifdef x64 13 num equ r8 14else 15 num equ [r4 + REG_SIZE * 4] 16endif 17 18rD equ r2 19rN equ r0 20 21MY_PROLOG macro reg:req 22 ifdef x64 23 movdqa [r4 + 8], xmm6 24 movdqa [r4 + 8 + 16], xmm7 25 endif 26 27 push r3 28 push r5 29 push r6 30 31 mov rN, num 32 mov x6, [r1 + 16] 33 shl x6, 5 34 35 movdqa reg, [r1] 36 add r1, 32 37endm 38 39MY_EPILOG macro 40 pop r6 41 pop r5 42 pop r3 43 44 ifdef x64 45 movdqa xmm6, [r4 + 8] 46 movdqa xmm7, [r4 + 8 + 16] 47 endif 48 49 MY_ENDP 50endm 51 52ways equ 4 53ways16 equ (ways * 16) 54 55OP_W macro op, op2 56 i = 0 57 rept ways 58 op @CatStr(xmm,%i), op2 59 i = i + 1 60 endm 61endm 62 63LOAD_OP macro op:req, offs:req 64 op xmm0, [r1 + r3 offs] 65endm 66 67LOAD_OP_W macro op:req, offs:req 68 movdqa xmm7, [r1 + r3 offs] 69 OP_W op, xmm7 70endm 71 72 73; ---------- AES-CBC Decode ---------- 74 75CBC_DEC_UPDATE macro reg, offs 76 pxor reg, xmm6 77 movdqa xmm6, [rD + offs] 78 movdqa [rD + offs], reg 79endm 80 81DECODE macro op:req 82 op aesdec, +16 83 @@: 84 op aesdec, +0 85 op aesdec, -16 86 sub x3, 32 87 jnz @B 88 op aesdeclast, +0 89endm 90 91MY_PROC AesCbc_Decode_Intel, 3 92 MY_PROLOG xmm6 93 94 sub x6, 32 95 96 jmp check2 97 98 align 16 99 nextBlocks2: 100 mov x3, x6 101 OP_W movdqa, [rD + i * 16] 102 LOAD_OP_W pxor, +32 103 DECODE LOAD_OP_W 104 OP_W CBC_DEC_UPDATE, i * 16 105 add rD, ways16 106 check2: 107 sub rN, ways 108 jnc nextBlocks2 109 110 add rN, ways 111 jmp check 112 113 nextBlock: 114 mov x3, x6 115 movdqa xmm1, [rD] 116 LOAD_OP movdqa, +32 117 pxor xmm0, xmm1 118 DECODE LOAD_OP 119 pxor xmm0, xmm6 120 movdqa [rD], xmm0 121 movdqa xmm6, xmm1 122 add rD, 16 123 check: 124 sub rN, 1 125 jnc nextBlock 126 127 movdqa [r1 - 32], xmm6 128 MY_EPILOG 129 130 131; ---------- AES-CBC Encode ---------- 132 133ENCODE macro op:req 134 op aesenc, -16 135 @@: 136 op aesenc, +0 137 op aesenc, +16 138 add r3, 32 139 jnz @B 140 op aesenclast, +0 141endm 142 143MY_PROC AesCbc_Encode_Intel, 3 144 MY_PROLOG xmm0 145 146 add r1, r6 147 neg r6 148 add r6, 32 149 150 jmp check_e 151 152 align 16 153 nextBlock_e: 154 mov r3, r6 155 pxor xmm0, [rD] 156 pxor xmm0, [r1 + r3 - 32] 157 ENCODE LOAD_OP 158 movdqa [rD], xmm0 159 add rD, 16 160 check_e: 161 sub rN, 1 162 jnc nextBlock_e 163 164 movdqa [r1 + r6 - 64], xmm0 165 MY_EPILOG 166 167 168; ---------- AES-CTR ---------- 169 170XOR_UPD_1 macro reg, offs 171 pxor reg, [rD + offs] 172endm 173 174XOR_UPD_2 macro reg, offs 175 movdqa [rD + offs], reg 176endm 177 178MY_PROC AesCtr_Code_Intel, 3 179 MY_PROLOG xmm6 180 181 mov r5, r4 182 shr r5, 4 183 dec r5 184 shl r5, 4 185 186 mov DWORD PTR [r5], 1 187 mov DWORD PTR [r5 + 4], 0 188 mov DWORD PTR [r5 + 8], 0 189 mov DWORD PTR [r5 + 12], 0 190 191 add r1, r6 192 neg r6 193 add r6, 32 194 195 jmp check2_c 196 197 align 16 198 nextBlocks2_c: 199 movdqa xmm7, [r5] 200 201 i = 0 202 rept ways 203 paddq xmm6, xmm7 204 movdqa @CatStr(xmm,%i), xmm6 205 i = i + 1 206 endm 207 208 mov r3, r6 209 LOAD_OP_W pxor, -32 210 ENCODE LOAD_OP_W 211 OP_W XOR_UPD_1, i * 16 212 OP_W XOR_UPD_2, i * 16 213 add rD, ways16 214 check2_c: 215 sub rN, ways 216 jnc nextBlocks2_c 217 218 add rN, ways 219 jmp check_c 220 221 nextBlock_c: 222 paddq xmm6, [r5] 223 mov r3, r6 224 movdqa xmm0, [r1 + r3 - 32] 225 pxor xmm0, xmm6 226 ENCODE LOAD_OP 227 XOR_UPD_1 xmm0, 0 228 XOR_UPD_2 xmm0, 0 229 add rD, 16 230 check_c: 231 sub rN, 1 232 jnc nextBlock_c 233 234 movdqa [r1 + r6 - 64], xmm6 235 MY_EPILOG 236 237end 238