1default rel 2%define XMMWORD 3%define YMMWORD 4%define ZMMWORD 5section .text code align=64 6 7 8 9ALIGN 32 10_aesni_ctr32_ghash_6x: 11 vmovdqu xmm2,XMMWORD[32+r11] 12 sub rdx,6 13 vpxor xmm4,xmm4,xmm4 14 vmovdqu xmm15,XMMWORD[((0-128))+rcx] 15 vpaddb xmm10,xmm1,xmm2 16 vpaddb xmm11,xmm10,xmm2 17 vpaddb xmm12,xmm11,xmm2 18 vpaddb xmm13,xmm12,xmm2 19 vpaddb xmm14,xmm13,xmm2 20 vpxor xmm9,xmm1,xmm15 21 vmovdqu XMMWORD[(16+8)+rsp],xmm4 22 jmp NEAR $L$oop6x 23 24ALIGN 32 25$L$oop6x: 26 add ebx,100663296 27 jc NEAR $L$handle_ctr32 28 vmovdqu xmm3,XMMWORD[((0-32))+r9] 29 vpaddb xmm1,xmm14,xmm2 30 vpxor xmm10,xmm10,xmm15 31 vpxor xmm11,xmm11,xmm15 32 33$L$resume_ctr32: 34 vmovdqu XMMWORD[r8],xmm1 35 vpclmulqdq xmm5,xmm7,xmm3,0x10 36 vpxor xmm12,xmm12,xmm15 37 vmovups xmm2,XMMWORD[((16-128))+rcx] 38 vpclmulqdq xmm6,xmm7,xmm3,0x01 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 xor r12,r12 57 cmp r15,r14 58 59 vaesenc xmm9,xmm9,xmm2 60 vmovdqu xmm0,XMMWORD[((48+8))+rsp] 61 vpxor xmm13,xmm13,xmm15 62 vpclmulqdq xmm1,xmm7,xmm3,0x00 63 vaesenc xmm10,xmm10,xmm2 64 vpxor xmm14,xmm14,xmm15 65 setnc r12b 66 vpclmulqdq xmm7,xmm7,xmm3,0x11 67 vaesenc xmm11,xmm11,xmm2 68 vmovdqu xmm3,XMMWORD[((16-32))+r9] 69 neg r12 70 vaesenc xmm12,xmm12,xmm2 71 vpxor xmm6,xmm6,xmm5 72 vpclmulqdq xmm5,xmm0,xmm3,0x00 73 vpxor xmm8,xmm8,xmm4 74 vaesenc xmm13,xmm13,xmm2 75 vpxor xmm4,xmm1,xmm5 76 and r12,0x60 77 vmovups xmm15,XMMWORD[((32-128))+rcx] 78 vpclmulqdq xmm1,xmm0,xmm3,0x10 79 vaesenc xmm14,xmm14,xmm2 80 81 vpclmulqdq xmm2,xmm0,xmm3,0x01 82 lea r14,[r12*1+r14] 83 vaesenc xmm9,xmm9,xmm15 84 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] 85 vpclmulqdq xmm3,xmm0,xmm3,0x11 86 vmovdqu xmm0,XMMWORD[((64+8))+rsp] 87 vaesenc xmm10,xmm10,xmm15 88 movbe r13,QWORD[88+r14] 89 vaesenc xmm11,xmm11,xmm15 90 movbe r12,QWORD[80+r14] 91 vaesenc xmm12,xmm12,xmm15 92 mov QWORD[((32+8))+rsp],r13 93 vaesenc xmm13,xmm13,xmm15 94 mov QWORD[((40+8))+rsp],r12 95 vmovdqu xmm5,XMMWORD[((48-32))+r9] 96 vaesenc xmm14,xmm14,xmm15 97 98 vmovups xmm15,XMMWORD[((48-128))+rcx] 99 vpxor xmm6,xmm6,xmm1 100 vpclmulqdq xmm1,xmm0,xmm5,0x00 101 vaesenc xmm9,xmm9,xmm15 102 vpxor xmm6,xmm6,xmm2 103 vpclmulqdq xmm2,xmm0,xmm5,0x10 104 vaesenc xmm10,xmm10,xmm15 105 vpxor xmm7,xmm7,xmm3 106 vpclmulqdq xmm3,xmm0,xmm5,0x01 107 vaesenc xmm11,xmm11,xmm15 108 vpclmulqdq xmm5,xmm0,xmm5,0x11 109 vmovdqu xmm0,XMMWORD[((80+8))+rsp] 110 vaesenc xmm12,xmm12,xmm15 111 vaesenc xmm13,xmm13,xmm15 112 vpxor xmm4,xmm4,xmm1 113 vmovdqu xmm1,XMMWORD[((64-32))+r9] 114 vaesenc xmm14,xmm14,xmm15 115 116 vmovups xmm15,XMMWORD[((64-128))+rcx] 117 vpxor xmm6,xmm6,xmm2 118 vpclmulqdq xmm2,xmm0,xmm1,0x00 119 vaesenc xmm9,xmm9,xmm15 120 vpxor xmm6,xmm6,xmm3 121 vpclmulqdq xmm3,xmm0,xmm1,0x10 122 vaesenc xmm10,xmm10,xmm15 123 movbe r13,QWORD[72+r14] 124 vpxor xmm7,xmm7,xmm5 125 vpclmulqdq xmm5,xmm0,xmm1,0x01 126 vaesenc xmm11,xmm11,xmm15 127 movbe r12,QWORD[64+r14] 128 vpclmulqdq xmm1,xmm0,xmm1,0x11 129 vmovdqu xmm0,XMMWORD[((96+8))+rsp] 130 vaesenc xmm12,xmm12,xmm15 131 mov QWORD[((48+8))+rsp],r13 132 vaesenc xmm13,xmm13,xmm15 133 mov QWORD[((56+8))+rsp],r12 134 vpxor xmm4,xmm4,xmm2 135 vmovdqu xmm2,XMMWORD[((96-32))+r9] 136 vaesenc xmm14,xmm14,xmm15 137 138 vmovups xmm15,XMMWORD[((80-128))+rcx] 139 vpxor xmm6,xmm6,xmm3 140 vpclmulqdq xmm3,xmm0,xmm2,0x00 141 vaesenc xmm9,xmm9,xmm15 142 vpxor xmm6,xmm6,xmm5 143 vpclmulqdq xmm5,xmm0,xmm2,0x10 144 vaesenc xmm10,xmm10,xmm15 145 movbe r13,QWORD[56+r14] 146 vpxor xmm7,xmm7,xmm1 147 vpclmulqdq xmm1,xmm0,xmm2,0x01 148 vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp] 149 vaesenc xmm11,xmm11,xmm15 150 movbe r12,QWORD[48+r14] 151 vpclmulqdq xmm2,xmm0,xmm2,0x11 152 vaesenc xmm12,xmm12,xmm15 153 mov QWORD[((64+8))+rsp],r13 154 vaesenc xmm13,xmm13,xmm15 155 mov QWORD[((72+8))+rsp],r12 156 vpxor xmm4,xmm4,xmm3 157 vmovdqu xmm3,XMMWORD[((112-32))+r9] 158 vaesenc xmm14,xmm14,xmm15 159 160 vmovups xmm15,XMMWORD[((96-128))+rcx] 161 vpxor xmm6,xmm6,xmm5 162 vpclmulqdq xmm5,xmm8,xmm3,0x10 163 vaesenc xmm9,xmm9,xmm15 164 vpxor xmm6,xmm6,xmm1 165 vpclmulqdq xmm1,xmm8,xmm3,0x01 166 vaesenc xmm10,xmm10,xmm15 167 movbe r13,QWORD[40+r14] 168 vpxor xmm7,xmm7,xmm2 169 vpclmulqdq xmm2,xmm8,xmm3,0x00 170 vaesenc xmm11,xmm11,xmm15 171 movbe r12,QWORD[32+r14] 172 vpclmulqdq xmm8,xmm8,xmm3,0x11 173 vaesenc xmm12,xmm12,xmm15 174 mov QWORD[((80+8))+rsp],r13 175 vaesenc xmm13,xmm13,xmm15 176 mov QWORD[((88+8))+rsp],r12 177 vpxor xmm6,xmm6,xmm5 178 vaesenc xmm14,xmm14,xmm15 179 vpxor xmm6,xmm6,xmm1 180 181 vmovups xmm15,XMMWORD[((112-128))+rcx] 182 vpslldq xmm5,xmm6,8 183 vpxor xmm4,xmm4,xmm2 184 vmovdqu xmm3,XMMWORD[16+r11] 185 186 vaesenc xmm9,xmm9,xmm15 187 vpxor xmm7,xmm7,xmm8 188 vaesenc xmm10,xmm10,xmm15 189 vpxor xmm4,xmm4,xmm5 190 movbe r13,QWORD[24+r14] 191 vaesenc xmm11,xmm11,xmm15 192 movbe r12,QWORD[16+r14] 193 vpalignr xmm0,xmm4,xmm4,8 194 vpclmulqdq xmm4,xmm4,xmm3,0x10 195 mov QWORD[((96+8))+rsp],r13 196 vaesenc xmm12,xmm12,xmm15 197 mov QWORD[((104+8))+rsp],r12 198 vaesenc xmm13,xmm13,xmm15 199 vmovups xmm1,XMMWORD[((128-128))+rcx] 200 vaesenc xmm14,xmm14,xmm15 201 202 vaesenc xmm9,xmm9,xmm1 203 vmovups xmm15,XMMWORD[((144-128))+rcx] 204 vaesenc xmm10,xmm10,xmm1 205 vpsrldq xmm6,xmm6,8 206 vaesenc xmm11,xmm11,xmm1 207 vpxor xmm7,xmm7,xmm6 208 vaesenc xmm12,xmm12,xmm1 209 vpxor xmm4,xmm4,xmm0 210 movbe r13,QWORD[8+r14] 211 vaesenc xmm13,xmm13,xmm1 212 movbe r12,QWORD[r14] 213 vaesenc xmm14,xmm14,xmm1 214 vmovups xmm1,XMMWORD[((160-128))+rcx] 215 cmp ebp,11 216 jb NEAR $L$enc_tail 217 218 vaesenc xmm9,xmm9,xmm15 219 vaesenc xmm10,xmm10,xmm15 220 vaesenc xmm11,xmm11,xmm15 221 vaesenc xmm12,xmm12,xmm15 222 vaesenc xmm13,xmm13,xmm15 223 vaesenc xmm14,xmm14,xmm15 224 225 vaesenc xmm9,xmm9,xmm1 226 vaesenc xmm10,xmm10,xmm1 227 vaesenc xmm11,xmm11,xmm1 228 vaesenc xmm12,xmm12,xmm1 229 vaesenc xmm13,xmm13,xmm1 230 vmovups xmm15,XMMWORD[((176-128))+rcx] 231 vaesenc xmm14,xmm14,xmm1 232 vmovups xmm1,XMMWORD[((192-128))+rcx] 233 je NEAR $L$enc_tail 234 235 vaesenc xmm9,xmm9,xmm15 236 vaesenc xmm10,xmm10,xmm15 237 vaesenc xmm11,xmm11,xmm15 238 vaesenc xmm12,xmm12,xmm15 239 vaesenc xmm13,xmm13,xmm15 240 vaesenc xmm14,xmm14,xmm15 241 242 vaesenc xmm9,xmm9,xmm1 243 vaesenc xmm10,xmm10,xmm1 244 vaesenc xmm11,xmm11,xmm1 245 vaesenc xmm12,xmm12,xmm1 246 vaesenc xmm13,xmm13,xmm1 247 vmovups xmm15,XMMWORD[((208-128))+rcx] 248 vaesenc xmm14,xmm14,xmm1 249 vmovups xmm1,XMMWORD[((224-128))+rcx] 250 jmp NEAR $L$enc_tail 251 252ALIGN 32 253$L$handle_ctr32: 254 vmovdqu xmm0,XMMWORD[r11] 255 vpshufb xmm6,xmm1,xmm0 256 vmovdqu xmm5,XMMWORD[48+r11] 257 vpaddd xmm10,xmm6,XMMWORD[64+r11] 258 vpaddd xmm11,xmm6,xmm5 259 vmovdqu xmm3,XMMWORD[((0-32))+r9] 260 vpaddd xmm12,xmm10,xmm5 261 vpshufb xmm10,xmm10,xmm0 262 vpaddd xmm13,xmm11,xmm5 263 vpshufb xmm11,xmm11,xmm0 264 vpxor xmm10,xmm10,xmm15 265 vpaddd xmm14,xmm12,xmm5 266 vpshufb xmm12,xmm12,xmm0 267 vpxor xmm11,xmm11,xmm15 268 vpaddd xmm1,xmm13,xmm5 269 vpshufb xmm13,xmm13,xmm0 270 vpshufb xmm14,xmm14,xmm0 271 vpshufb xmm1,xmm1,xmm0 272 jmp NEAR $L$resume_ctr32 273 274ALIGN 32 275$L$enc_tail: 276 vaesenc xmm9,xmm9,xmm15 277 vmovdqu XMMWORD[(16+8)+rsp],xmm7 278 vpalignr xmm8,xmm4,xmm4,8 279 vaesenc xmm10,xmm10,xmm15 280 vpclmulqdq xmm4,xmm4,xmm3,0x10 281 vpxor xmm2,xmm1,XMMWORD[rdi] 282 vaesenc xmm11,xmm11,xmm15 283 vpxor xmm0,xmm1,XMMWORD[16+rdi] 284 vaesenc xmm12,xmm12,xmm15 285 vpxor xmm5,xmm1,XMMWORD[32+rdi] 286 vaesenc xmm13,xmm13,xmm15 287 vpxor xmm6,xmm1,XMMWORD[48+rdi] 288 vaesenc xmm14,xmm14,xmm15 289 vpxor xmm7,xmm1,XMMWORD[64+rdi] 290 vpxor xmm3,xmm1,XMMWORD[80+rdi] 291 vmovdqu xmm1,XMMWORD[r8] 292 293 vaesenclast xmm9,xmm9,xmm2 294 vmovdqu xmm2,XMMWORD[32+r11] 295 vaesenclast xmm10,xmm10,xmm0 296 vpaddb xmm0,xmm1,xmm2 297 mov QWORD[((112+8))+rsp],r13 298 lea rdi,[96+rdi] 299 vaesenclast xmm11,xmm11,xmm5 300 vpaddb xmm5,xmm0,xmm2 301 mov QWORD[((120+8))+rsp],r12 302 lea rsi,[96+rsi] 303 vmovdqu xmm15,XMMWORD[((0-128))+rcx] 304 vaesenclast xmm12,xmm12,xmm6 305 vpaddb xmm6,xmm5,xmm2 306 vaesenclast xmm13,xmm13,xmm7 307 vpaddb xmm7,xmm6,xmm2 308 vaesenclast xmm14,xmm14,xmm3 309 vpaddb xmm3,xmm7,xmm2 310 311 add r10,0x60 312 sub rdx,0x6 313 jc NEAR $L$6x_done 314 315 vmovups XMMWORD[(-96)+rsi],xmm9 316 vpxor xmm9,xmm1,xmm15 317 vmovups XMMWORD[(-80)+rsi],xmm10 318 vmovdqa xmm10,xmm0 319 vmovups XMMWORD[(-64)+rsi],xmm11 320 vmovdqa xmm11,xmm5 321 vmovups XMMWORD[(-48)+rsi],xmm12 322 vmovdqa xmm12,xmm6 323 vmovups XMMWORD[(-32)+rsi],xmm13 324 vmovdqa xmm13,xmm7 325 vmovups XMMWORD[(-16)+rsi],xmm14 326 vmovdqa xmm14,xmm3 327 vmovdqu xmm7,XMMWORD[((32+8))+rsp] 328 jmp NEAR $L$oop6x 329 330$L$6x_done: 331 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] 332 vpxor xmm8,xmm8,xmm4 333 334 DB 0F3h,0C3h ;repret 335 336global aesni_gcm_decrypt 337 338ALIGN 32 339aesni_gcm_decrypt: 340 mov QWORD[8+rsp],rdi ;WIN64 prologue 341 mov QWORD[16+rsp],rsi 342 mov rax,rsp 343$L$SEH_begin_aesni_gcm_decrypt: 344 mov rdi,rcx 345 mov rsi,rdx 346 mov rdx,r8 347 mov rcx,r9 348 mov r8,QWORD[40+rsp] 349 mov r9,QWORD[48+rsp] 350 351 352 xor r10,r10 353 354 355 356 cmp rdx,0x60 357 jb NEAR $L$gcm_dec_abort 358 359 lea rax,[rsp] 360 push rbx 361 push rbp 362 push r12 363 push r13 364 push r14 365 push r15 366 lea rsp,[((-168))+rsp] 367 movaps XMMWORD[(-216)+rax],xmm6 368 movaps XMMWORD[(-200)+rax],xmm7 369 movaps XMMWORD[(-184)+rax],xmm8 370 movaps XMMWORD[(-168)+rax],xmm9 371 movaps XMMWORD[(-152)+rax],xmm10 372 movaps XMMWORD[(-136)+rax],xmm11 373 movaps XMMWORD[(-120)+rax],xmm12 374 movaps XMMWORD[(-104)+rax],xmm13 375 movaps XMMWORD[(-88)+rax],xmm14 376 movaps XMMWORD[(-72)+rax],xmm15 377$L$gcm_dec_body: 378 vzeroupper 379 380 vmovdqu xmm1,XMMWORD[r8] 381 add rsp,-128 382 mov ebx,DWORD[12+r8] 383 lea r11,[$L$bswap_mask] 384 lea r14,[((-128))+rcx] 385 mov r15,0xf80 386 vmovdqu xmm8,XMMWORD[r9] 387 and rsp,-128 388 vmovdqu xmm0,XMMWORD[r11] 389 lea rcx,[128+rcx] 390 lea r9,[((32+32))+r9] 391 mov ebp,DWORD[((240-128))+rcx] 392 vpshufb xmm8,xmm8,xmm0 393 394 and r14,r15 395 and r15,rsp 396 sub r15,r14 397 jc NEAR $L$dec_no_key_aliasing 398 cmp r15,768 399 jnc NEAR $L$dec_no_key_aliasing 400 sub rsp,r15 401$L$dec_no_key_aliasing: 402 403 vmovdqu xmm7,XMMWORD[80+rdi] 404 lea r14,[rdi] 405 vmovdqu xmm4,XMMWORD[64+rdi] 406 407 408 409 410 411 412 413 lea r15,[((-192))+rdx*1+rdi] 414 415 vmovdqu xmm5,XMMWORD[48+rdi] 416 shr rdx,4 417 xor r10,r10 418 vmovdqu xmm6,XMMWORD[32+rdi] 419 vpshufb xmm7,xmm7,xmm0 420 vmovdqu xmm2,XMMWORD[16+rdi] 421 vpshufb xmm4,xmm4,xmm0 422 vmovdqu xmm3,XMMWORD[rdi] 423 vpshufb xmm5,xmm5,xmm0 424 vmovdqu XMMWORD[48+rsp],xmm4 425 vpshufb xmm6,xmm6,xmm0 426 vmovdqu XMMWORD[64+rsp],xmm5 427 vpshufb xmm2,xmm2,xmm0 428 vmovdqu XMMWORD[80+rsp],xmm6 429 vpshufb xmm3,xmm3,xmm0 430 vmovdqu XMMWORD[96+rsp],xmm2 431 vmovdqu XMMWORD[112+rsp],xmm3 432 433 call _aesni_ctr32_ghash_6x 434 435 vmovups XMMWORD[(-96)+rsi],xmm9 436 vmovups XMMWORD[(-80)+rsi],xmm10 437 vmovups XMMWORD[(-64)+rsi],xmm11 438 vmovups XMMWORD[(-48)+rsi],xmm12 439 vmovups XMMWORD[(-32)+rsi],xmm13 440 vmovups XMMWORD[(-16)+rsi],xmm14 441 442 vpshufb xmm8,xmm8,XMMWORD[r11] 443 vmovdqu XMMWORD[(-64)+r9],xmm8 444 445 vzeroupper 446 movaps xmm6,XMMWORD[((-216))+rax] 447 movaps xmm7,XMMWORD[((-200))+rax] 448 movaps xmm8,XMMWORD[((-184))+rax] 449 movaps xmm9,XMMWORD[((-168))+rax] 450 movaps xmm10,XMMWORD[((-152))+rax] 451 movaps xmm11,XMMWORD[((-136))+rax] 452 movaps xmm12,XMMWORD[((-120))+rax] 453 movaps xmm13,XMMWORD[((-104))+rax] 454 movaps xmm14,XMMWORD[((-88))+rax] 455 movaps xmm15,XMMWORD[((-72))+rax] 456 mov r15,QWORD[((-48))+rax] 457 mov r14,QWORD[((-40))+rax] 458 mov r13,QWORD[((-32))+rax] 459 mov r12,QWORD[((-24))+rax] 460 mov rbp,QWORD[((-16))+rax] 461 mov rbx,QWORD[((-8))+rax] 462 lea rsp,[rax] 463$L$gcm_dec_abort: 464 mov rax,r10 465 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 466 mov rsi,QWORD[16+rsp] 467 DB 0F3h,0C3h ;repret 468$L$SEH_end_aesni_gcm_decrypt: 469 470ALIGN 32 471_aesni_ctr32_6x: 472 vmovdqu xmm4,XMMWORD[((0-128))+rcx] 473 vmovdqu xmm2,XMMWORD[32+r11] 474 lea r13,[((-1))+rbp] 475 vmovups xmm15,XMMWORD[((16-128))+rcx] 476 lea r12,[((32-128))+rcx] 477 vpxor xmm9,xmm1,xmm4 478 add ebx,100663296 479 jc NEAR $L$handle_ctr32_2 480 vpaddb xmm10,xmm1,xmm2 481 vpaddb xmm11,xmm10,xmm2 482 vpxor xmm10,xmm10,xmm4 483 vpaddb xmm12,xmm11,xmm2 484 vpxor xmm11,xmm11,xmm4 485 vpaddb xmm13,xmm12,xmm2 486 vpxor xmm12,xmm12,xmm4 487 vpaddb xmm14,xmm13,xmm2 488 vpxor xmm13,xmm13,xmm4 489 vpaddb xmm1,xmm14,xmm2 490 vpxor xmm14,xmm14,xmm4 491 jmp NEAR $L$oop_ctr32 492 493ALIGN 16 494$L$oop_ctr32: 495 vaesenc xmm9,xmm9,xmm15 496 vaesenc xmm10,xmm10,xmm15 497 vaesenc xmm11,xmm11,xmm15 498 vaesenc xmm12,xmm12,xmm15 499 vaesenc xmm13,xmm13,xmm15 500 vaesenc xmm14,xmm14,xmm15 501 vmovups xmm15,XMMWORD[r12] 502 lea r12,[16+r12] 503 dec r13d 504 jnz NEAR $L$oop_ctr32 505 506 vmovdqu xmm3,XMMWORD[r12] 507 vaesenc xmm9,xmm9,xmm15 508 vpxor xmm4,xmm3,XMMWORD[rdi] 509 vaesenc xmm10,xmm10,xmm15 510 vpxor xmm5,xmm3,XMMWORD[16+rdi] 511 vaesenc xmm11,xmm11,xmm15 512 vpxor xmm6,xmm3,XMMWORD[32+rdi] 513 vaesenc xmm12,xmm12,xmm15 514 vpxor xmm8,xmm3,XMMWORD[48+rdi] 515 vaesenc xmm13,xmm13,xmm15 516 vpxor xmm2,xmm3,XMMWORD[64+rdi] 517 vaesenc xmm14,xmm14,xmm15 518 vpxor xmm3,xmm3,XMMWORD[80+rdi] 519 lea rdi,[96+rdi] 520 521 vaesenclast xmm9,xmm9,xmm4 522 vaesenclast xmm10,xmm10,xmm5 523 vaesenclast xmm11,xmm11,xmm6 524 vaesenclast xmm12,xmm12,xmm8 525 vaesenclast xmm13,xmm13,xmm2 526 vaesenclast xmm14,xmm14,xmm3 527 vmovups XMMWORD[rsi],xmm9 528 vmovups XMMWORD[16+rsi],xmm10 529 vmovups XMMWORD[32+rsi],xmm11 530 vmovups XMMWORD[48+rsi],xmm12 531 vmovups XMMWORD[64+rsi],xmm13 532 vmovups XMMWORD[80+rsi],xmm14 533 lea rsi,[96+rsi] 534 535 DB 0F3h,0C3h ;repret 536ALIGN 32 537$L$handle_ctr32_2: 538 vpshufb xmm6,xmm1,xmm0 539 vmovdqu xmm5,XMMWORD[48+r11] 540 vpaddd xmm10,xmm6,XMMWORD[64+r11] 541 vpaddd xmm11,xmm6,xmm5 542 vpaddd xmm12,xmm10,xmm5 543 vpshufb xmm10,xmm10,xmm0 544 vpaddd xmm13,xmm11,xmm5 545 vpshufb xmm11,xmm11,xmm0 546 vpxor xmm10,xmm10,xmm4 547 vpaddd xmm14,xmm12,xmm5 548 vpshufb xmm12,xmm12,xmm0 549 vpxor xmm11,xmm11,xmm4 550 vpaddd xmm1,xmm13,xmm5 551 vpshufb xmm13,xmm13,xmm0 552 vpxor xmm12,xmm12,xmm4 553 vpshufb xmm14,xmm14,xmm0 554 vpxor xmm13,xmm13,xmm4 555 vpshufb xmm1,xmm1,xmm0 556 vpxor xmm14,xmm14,xmm4 557 jmp NEAR $L$oop_ctr32 558 559 560global aesni_gcm_encrypt 561 562ALIGN 32 563aesni_gcm_encrypt: 564 mov QWORD[8+rsp],rdi ;WIN64 prologue 565 mov QWORD[16+rsp],rsi 566 mov rax,rsp 567$L$SEH_begin_aesni_gcm_encrypt: 568 mov rdi,rcx 569 mov rsi,rdx 570 mov rdx,r8 571 mov rcx,r9 572 mov r8,QWORD[40+rsp] 573 mov r9,QWORD[48+rsp] 574 575 576 xor r10,r10 577 578 579 580 581 cmp rdx,0x60*3 582 jb NEAR $L$gcm_enc_abort 583 584 lea rax,[rsp] 585 push rbx 586 push rbp 587 push r12 588 push r13 589 push r14 590 push r15 591 lea rsp,[((-168))+rsp] 592 movaps XMMWORD[(-216)+rax],xmm6 593 movaps XMMWORD[(-200)+rax],xmm7 594 movaps XMMWORD[(-184)+rax],xmm8 595 movaps XMMWORD[(-168)+rax],xmm9 596 movaps XMMWORD[(-152)+rax],xmm10 597 movaps XMMWORD[(-136)+rax],xmm11 598 movaps XMMWORD[(-120)+rax],xmm12 599 movaps XMMWORD[(-104)+rax],xmm13 600 movaps XMMWORD[(-88)+rax],xmm14 601 movaps XMMWORD[(-72)+rax],xmm15 602$L$gcm_enc_body: 603 vzeroupper 604 605 vmovdqu xmm1,XMMWORD[r8] 606 add rsp,-128 607 mov ebx,DWORD[12+r8] 608 lea r11,[$L$bswap_mask] 609 lea r14,[((-128))+rcx] 610 mov r15,0xf80 611 lea rcx,[128+rcx] 612 vmovdqu xmm0,XMMWORD[r11] 613 and rsp,-128 614 mov ebp,DWORD[((240-128))+rcx] 615 616 and r14,r15 617 and r15,rsp 618 sub r15,r14 619 jc NEAR $L$enc_no_key_aliasing 620 cmp r15,768 621 jnc NEAR $L$enc_no_key_aliasing 622 sub rsp,r15 623$L$enc_no_key_aliasing: 624 625 lea r14,[rsi] 626 627 628 629 630 631 632 633 634 lea r15,[((-192))+rdx*1+rsi] 635 636 shr rdx,4 637 638 call _aesni_ctr32_6x 639 vpshufb xmm8,xmm9,xmm0 640 vpshufb xmm2,xmm10,xmm0 641 vmovdqu XMMWORD[112+rsp],xmm8 642 vpshufb xmm4,xmm11,xmm0 643 vmovdqu XMMWORD[96+rsp],xmm2 644 vpshufb xmm5,xmm12,xmm0 645 vmovdqu XMMWORD[80+rsp],xmm4 646 vpshufb xmm6,xmm13,xmm0 647 vmovdqu XMMWORD[64+rsp],xmm5 648 vpshufb xmm7,xmm14,xmm0 649 vmovdqu XMMWORD[48+rsp],xmm6 650 651 call _aesni_ctr32_6x 652 653 vmovdqu xmm8,XMMWORD[r9] 654 lea r9,[((32+32))+r9] 655 sub rdx,12 656 mov r10,0x60*2 657 vpshufb xmm8,xmm8,xmm0 658 659 call _aesni_ctr32_ghash_6x 660 vmovdqu xmm7,XMMWORD[32+rsp] 661 vmovdqu xmm0,XMMWORD[r11] 662 vmovdqu xmm3,XMMWORD[((0-32))+r9] 663 vpunpckhqdq xmm1,xmm7,xmm7 664 vmovdqu xmm15,XMMWORD[((32-32))+r9] 665 vmovups XMMWORD[(-96)+rsi],xmm9 666 vpshufb xmm9,xmm9,xmm0 667 vpxor xmm1,xmm1,xmm7 668 vmovups XMMWORD[(-80)+rsi],xmm10 669 vpshufb xmm10,xmm10,xmm0 670 vmovups XMMWORD[(-64)+rsi],xmm11 671 vpshufb xmm11,xmm11,xmm0 672 vmovups XMMWORD[(-48)+rsi],xmm12 673 vpshufb xmm12,xmm12,xmm0 674 vmovups XMMWORD[(-32)+rsi],xmm13 675 vpshufb xmm13,xmm13,xmm0 676 vmovups XMMWORD[(-16)+rsi],xmm14 677 vpshufb xmm14,xmm14,xmm0 678 vmovdqu XMMWORD[16+rsp],xmm9 679 vmovdqu xmm6,XMMWORD[48+rsp] 680 vmovdqu xmm0,XMMWORD[((16-32))+r9] 681 vpunpckhqdq xmm2,xmm6,xmm6 682 vpclmulqdq xmm5,xmm7,xmm3,0x00 683 vpxor xmm2,xmm2,xmm6 684 vpclmulqdq xmm7,xmm7,xmm3,0x11 685 vpclmulqdq xmm1,xmm1,xmm15,0x00 686 687 vmovdqu xmm9,XMMWORD[64+rsp] 688 vpclmulqdq xmm4,xmm6,xmm0,0x00 689 vmovdqu xmm3,XMMWORD[((48-32))+r9] 690 vpxor xmm4,xmm4,xmm5 691 vpunpckhqdq xmm5,xmm9,xmm9 692 vpclmulqdq xmm6,xmm6,xmm0,0x11 693 vpxor xmm5,xmm5,xmm9 694 vpxor xmm6,xmm6,xmm7 695 vpclmulqdq xmm2,xmm2,xmm15,0x10 696 vmovdqu xmm15,XMMWORD[((80-32))+r9] 697 vpxor xmm2,xmm2,xmm1 698 699 vmovdqu xmm1,XMMWORD[80+rsp] 700 vpclmulqdq xmm7,xmm9,xmm3,0x00 701 vmovdqu xmm0,XMMWORD[((64-32))+r9] 702 vpxor xmm7,xmm7,xmm4 703 vpunpckhqdq xmm4,xmm1,xmm1 704 vpclmulqdq xmm9,xmm9,xmm3,0x11 705 vpxor xmm4,xmm4,xmm1 706 vpxor xmm9,xmm9,xmm6 707 vpclmulqdq xmm5,xmm5,xmm15,0x00 708 vpxor xmm5,xmm5,xmm2 709 710 vmovdqu xmm2,XMMWORD[96+rsp] 711 vpclmulqdq xmm6,xmm1,xmm0,0x00 712 vmovdqu xmm3,XMMWORD[((96-32))+r9] 713 vpxor xmm6,xmm6,xmm7 714 vpunpckhqdq xmm7,xmm2,xmm2 715 vpclmulqdq xmm1,xmm1,xmm0,0x11 716 vpxor xmm7,xmm7,xmm2 717 vpxor xmm1,xmm1,xmm9 718 vpclmulqdq xmm4,xmm4,xmm15,0x10 719 vmovdqu xmm15,XMMWORD[((128-32))+r9] 720 vpxor xmm4,xmm4,xmm5 721 722 vpxor xmm8,xmm8,XMMWORD[112+rsp] 723 vpclmulqdq xmm5,xmm2,xmm3,0x00 724 vmovdqu xmm0,XMMWORD[((112-32))+r9] 725 vpunpckhqdq xmm9,xmm8,xmm8 726 vpxor xmm5,xmm5,xmm6 727 vpclmulqdq xmm2,xmm2,xmm3,0x11 728 vpxor xmm9,xmm9,xmm8 729 vpxor xmm2,xmm2,xmm1 730 vpclmulqdq xmm7,xmm7,xmm15,0x00 731 vpxor xmm4,xmm7,xmm4 732 733 vpclmulqdq xmm6,xmm8,xmm0,0x00 734 vmovdqu xmm3,XMMWORD[((0-32))+r9] 735 vpunpckhqdq xmm1,xmm14,xmm14 736 vpclmulqdq xmm8,xmm8,xmm0,0x11 737 vpxor xmm1,xmm1,xmm14 738 vpxor xmm5,xmm6,xmm5 739 vpclmulqdq xmm9,xmm9,xmm15,0x10 740 vmovdqu xmm15,XMMWORD[((32-32))+r9] 741 vpxor xmm7,xmm8,xmm2 742 vpxor xmm6,xmm9,xmm4 743 744 vmovdqu xmm0,XMMWORD[((16-32))+r9] 745 vpxor xmm9,xmm7,xmm5 746 vpclmulqdq xmm4,xmm14,xmm3,0x00 747 vpxor xmm6,xmm6,xmm9 748 vpunpckhqdq xmm2,xmm13,xmm13 749 vpclmulqdq xmm14,xmm14,xmm3,0x11 750 vpxor xmm2,xmm2,xmm13 751 vpslldq xmm9,xmm6,8 752 vpclmulqdq xmm1,xmm1,xmm15,0x00 753 vpxor xmm8,xmm5,xmm9 754 vpsrldq xmm6,xmm6,8 755 vpxor xmm7,xmm7,xmm6 756 757 vpclmulqdq xmm5,xmm13,xmm0,0x00 758 vmovdqu xmm3,XMMWORD[((48-32))+r9] 759 vpxor xmm5,xmm5,xmm4 760 vpunpckhqdq xmm9,xmm12,xmm12 761 vpclmulqdq xmm13,xmm13,xmm0,0x11 762 vpxor xmm9,xmm9,xmm12 763 vpxor xmm13,xmm13,xmm14 764 vpalignr xmm14,xmm8,xmm8,8 765 vpclmulqdq xmm2,xmm2,xmm15,0x10 766 vmovdqu xmm15,XMMWORD[((80-32))+r9] 767 vpxor xmm2,xmm2,xmm1 768 769 vpclmulqdq xmm4,xmm12,xmm3,0x00 770 vmovdqu xmm0,XMMWORD[((64-32))+r9] 771 vpxor xmm4,xmm4,xmm5 772 vpunpckhqdq xmm1,xmm11,xmm11 773 vpclmulqdq xmm12,xmm12,xmm3,0x11 774 vpxor xmm1,xmm1,xmm11 775 vpxor xmm12,xmm12,xmm13 776 vxorps xmm7,xmm7,XMMWORD[16+rsp] 777 vpclmulqdq xmm9,xmm9,xmm15,0x00 778 vpxor xmm9,xmm9,xmm2 779 780 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 781 vxorps xmm8,xmm8,xmm14 782 783 vpclmulqdq xmm5,xmm11,xmm0,0x00 784 vmovdqu xmm3,XMMWORD[((96-32))+r9] 785 vpxor xmm5,xmm5,xmm4 786 vpunpckhqdq xmm2,xmm10,xmm10 787 vpclmulqdq xmm11,xmm11,xmm0,0x11 788 vpxor xmm2,xmm2,xmm10 789 vpalignr xmm14,xmm8,xmm8,8 790 vpxor xmm11,xmm11,xmm12 791 vpclmulqdq xmm1,xmm1,xmm15,0x10 792 vmovdqu xmm15,XMMWORD[((128-32))+r9] 793 vpxor xmm1,xmm1,xmm9 794 795 vxorps xmm14,xmm14,xmm7 796 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 797 vxorps xmm8,xmm8,xmm14 798 799 vpclmulqdq xmm4,xmm10,xmm3,0x00 800 vmovdqu xmm0,XMMWORD[((112-32))+r9] 801 vpxor xmm4,xmm4,xmm5 802 vpunpckhqdq xmm9,xmm8,xmm8 803 vpclmulqdq xmm10,xmm10,xmm3,0x11 804 vpxor xmm9,xmm9,xmm8 805 vpxor xmm10,xmm10,xmm11 806 vpclmulqdq xmm2,xmm2,xmm15,0x00 807 vpxor xmm2,xmm2,xmm1 808 809 vpclmulqdq xmm5,xmm8,xmm0,0x00 810 vpclmulqdq xmm7,xmm8,xmm0,0x11 811 vpxor xmm5,xmm5,xmm4 812 vpclmulqdq xmm6,xmm9,xmm15,0x10 813 vpxor xmm7,xmm7,xmm10 814 vpxor xmm6,xmm6,xmm2 815 816 vpxor xmm4,xmm7,xmm5 817 vpxor xmm6,xmm6,xmm4 818 vpslldq xmm1,xmm6,8 819 vmovdqu xmm3,XMMWORD[16+r11] 820 vpsrldq xmm6,xmm6,8 821 vpxor xmm8,xmm5,xmm1 822 vpxor xmm7,xmm7,xmm6 823 824 vpalignr xmm2,xmm8,xmm8,8 825 vpclmulqdq xmm8,xmm8,xmm3,0x10 826 vpxor xmm8,xmm8,xmm2 827 828 vpalignr xmm2,xmm8,xmm8,8 829 vpclmulqdq xmm8,xmm8,xmm3,0x10 830 vpxor xmm2,xmm2,xmm7 831 vpxor xmm8,xmm8,xmm2 832 vpshufb xmm8,xmm8,XMMWORD[r11] 833 vmovdqu XMMWORD[(-64)+r9],xmm8 834 835 vzeroupper 836 movaps xmm6,XMMWORD[((-216))+rax] 837 movaps xmm7,XMMWORD[((-200))+rax] 838 movaps xmm8,XMMWORD[((-184))+rax] 839 movaps xmm9,XMMWORD[((-168))+rax] 840 movaps xmm10,XMMWORD[((-152))+rax] 841 movaps xmm11,XMMWORD[((-136))+rax] 842 movaps xmm12,XMMWORD[((-120))+rax] 843 movaps xmm13,XMMWORD[((-104))+rax] 844 movaps xmm14,XMMWORD[((-88))+rax] 845 movaps xmm15,XMMWORD[((-72))+rax] 846 mov r15,QWORD[((-48))+rax] 847 mov r14,QWORD[((-40))+rax] 848 mov r13,QWORD[((-32))+rax] 849 mov r12,QWORD[((-24))+rax] 850 mov rbp,QWORD[((-16))+rax] 851 mov rbx,QWORD[((-8))+rax] 852 lea rsp,[rax] 853$L$gcm_enc_abort: 854 mov rax,r10 855 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 856 mov rsi,QWORD[16+rsp] 857 DB 0F3h,0C3h ;repret 858$L$SEH_end_aesni_gcm_encrypt: 859ALIGN 64 860$L$bswap_mask: 861DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 862$L$poly: 863DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 864$L$one_msb: 865DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 866$L$two_lsb: 867DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 868$L$one_lsb: 869DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 870DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108 871DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 872DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 873DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 874ALIGN 64 875EXTERN __imp_RtlVirtualUnwind 876 877ALIGN 16 878gcm_se_handler: 879 push rsi 880 push rdi 881 push rbx 882 push rbp 883 push r12 884 push r13 885 push r14 886 push r15 887 pushfq 888 sub rsp,64 889 890 mov rax,QWORD[120+r8] 891 mov rbx,QWORD[248+r8] 892 893 mov rsi,QWORD[8+r9] 894 mov r11,QWORD[56+r9] 895 896 mov r10d,DWORD[r11] 897 lea r10,[r10*1+rsi] 898 cmp rbx,r10 899 jb NEAR $L$common_seh_tail 900 901 mov rax,QWORD[152+r8] 902 903 mov r10d,DWORD[4+r11] 904 lea r10,[r10*1+rsi] 905 cmp rbx,r10 906 jae NEAR $L$common_seh_tail 907 908 mov rax,QWORD[120+r8] 909 910 mov r15,QWORD[((-48))+rax] 911 mov r14,QWORD[((-40))+rax] 912 mov r13,QWORD[((-32))+rax] 913 mov r12,QWORD[((-24))+rax] 914 mov rbp,QWORD[((-16))+rax] 915 mov rbx,QWORD[((-8))+rax] 916 mov QWORD[240+r8],r15 917 mov QWORD[232+r8],r14 918 mov QWORD[224+r8],r13 919 mov QWORD[216+r8],r12 920 mov QWORD[160+r8],rbp 921 mov QWORD[144+r8],rbx 922 923 lea rsi,[((-216))+rax] 924 lea rdi,[512+r8] 925 mov ecx,20 926 DD 0xa548f3fc 927 928$L$common_seh_tail: 929 mov rdi,QWORD[8+rax] 930 mov rsi,QWORD[16+rax] 931 mov QWORD[152+r8],rax 932 mov QWORD[168+r8],rsi 933 mov QWORD[176+r8],rdi 934 935 mov rdi,QWORD[40+r9] 936 mov rsi,r8 937 mov ecx,154 938 DD 0xa548f3fc 939 940 mov rsi,r9 941 xor rcx,rcx 942 mov rdx,QWORD[8+rsi] 943 mov r8,QWORD[rsi] 944 mov r9,QWORD[16+rsi] 945 mov r10,QWORD[40+rsi] 946 lea r11,[56+rsi] 947 lea r12,[24+rsi] 948 mov QWORD[32+rsp],r10 949 mov QWORD[40+rsp],r11 950 mov QWORD[48+rsp],r12 951 mov QWORD[56+rsp],rcx 952 call QWORD[__imp_RtlVirtualUnwind] 953 954 mov eax,1 955 add rsp,64 956 popfq 957 pop r15 958 pop r14 959 pop r13 960 pop r12 961 pop rbp 962 pop rbx 963 pop rdi 964 pop rsi 965 DB 0F3h,0C3h ;repret 966 967 968section .pdata rdata align=4 969ALIGN 4 970 DD $L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase 971 DD $L$SEH_end_aesni_gcm_decrypt wrt ..imagebase 972 DD $L$SEH_gcm_dec_info wrt ..imagebase 973 974 DD $L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase 975 DD $L$SEH_end_aesni_gcm_encrypt wrt ..imagebase 976 DD $L$SEH_gcm_enc_info wrt ..imagebase 977section .xdata rdata align=8 978ALIGN 8 979$L$SEH_gcm_dec_info: 980DB 9,0,0,0 981 DD gcm_se_handler wrt ..imagebase 982 DD $L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase 983$L$SEH_gcm_enc_info: 984DB 9,0,0,0 985 DD gcm_se_handler wrt ..imagebase 986 DD $L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase 987