1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8 9%include "ring_core_generated/prefix_symbols_nasm.inc" 10section .text code align=64 11 12 13 14ALIGN 32 15_aesni_ctr32_ghash_6x: 16 17 vmovdqu xmm2,XMMWORD[32+r11] 18 sub rdx,6 19 vpxor xmm4,xmm4,xmm4 20 vmovdqu xmm15,XMMWORD[((0-128))+rcx] 21 vpaddb xmm10,xmm1,xmm2 22 vpaddb xmm11,xmm10,xmm2 23 vpaddb xmm12,xmm11,xmm2 24 vpaddb xmm13,xmm12,xmm2 25 vpaddb xmm14,xmm13,xmm2 26 vpxor xmm9,xmm1,xmm15 27 vmovdqu XMMWORD[(16+8)+rsp],xmm4 28 jmp NEAR $L$oop6x 29 30ALIGN 32 31$L$oop6x: 32 add ebx,100663296 33 jc NEAR $L$handle_ctr32 34 vmovdqu xmm3,XMMWORD[((0-32))+r9] 35 vpaddb xmm1,xmm14,xmm2 36 vpxor xmm10,xmm10,xmm15 37 vpxor xmm11,xmm11,xmm15 38 39$L$resume_ctr32: 40 vmovdqu XMMWORD[r8],xmm1 41 vpclmulqdq xmm5,xmm7,xmm3,0x10 42 vpxor xmm12,xmm12,xmm15 43 vmovups xmm2,XMMWORD[((16-128))+rcx] 44 vpclmulqdq xmm6,xmm7,xmm3,0x01 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 xor r12,r12 63 cmp r15,r14 64 65 vaesenc xmm9,xmm9,xmm2 66 vmovdqu xmm0,XMMWORD[((48+8))+rsp] 67 vpxor xmm13,xmm13,xmm15 68 vpclmulqdq xmm1,xmm7,xmm3,0x00 69 vaesenc xmm10,xmm10,xmm2 70 vpxor xmm14,xmm14,xmm15 71 setnc r12b 72 vpclmulqdq xmm7,xmm7,xmm3,0x11 73 vaesenc xmm11,xmm11,xmm2 74 vmovdqu xmm3,XMMWORD[((16-32))+r9] 75 neg r12 76 vaesenc xmm12,xmm12,xmm2 77 vpxor xmm6,xmm6,xmm5 78 vpclmulqdq xmm5,xmm0,xmm3,0x00 79 vpxor xmm8,xmm8,xmm4 80 vaesenc xmm13,xmm13,xmm2 81 vpxor xmm4,xmm1,xmm5 82 and r12,0x60 83 vmovups xmm15,XMMWORD[((32-128))+rcx] 84 vpclmulqdq xmm1,xmm0,xmm3,0x10 85 vaesenc xmm14,xmm14,xmm2 86 87 vpclmulqdq xmm2,xmm0,xmm3,0x01 88 lea r14,[r12*1+r14] 89 vaesenc xmm9,xmm9,xmm15 90 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] 91 vpclmulqdq xmm3,xmm0,xmm3,0x11 92 vmovdqu xmm0,XMMWORD[((64+8))+rsp] 93 vaesenc xmm10,xmm10,xmm15 94 movbe r13,QWORD[88+r14] 95 vaesenc xmm11,xmm11,xmm15 96 movbe r12,QWORD[80+r14] 97 vaesenc xmm12,xmm12,xmm15 98 mov QWORD[((32+8))+rsp],r13 99 vaesenc xmm13,xmm13,xmm15 100 mov QWORD[((40+8))+rsp],r12 101 vmovdqu xmm5,XMMWORD[((48-32))+r9] 102 vaesenc xmm14,xmm14,xmm15 103 104 vmovups xmm15,XMMWORD[((48-128))+rcx] 105 vpxor xmm6,xmm6,xmm1 106 vpclmulqdq xmm1,xmm0,xmm5,0x00 107 vaesenc xmm9,xmm9,xmm15 108 vpxor xmm6,xmm6,xmm2 109 vpclmulqdq xmm2,xmm0,xmm5,0x10 110 vaesenc xmm10,xmm10,xmm15 111 vpxor xmm7,xmm7,xmm3 112 vpclmulqdq xmm3,xmm0,xmm5,0x01 113 vaesenc xmm11,xmm11,xmm15 114 vpclmulqdq xmm5,xmm0,xmm5,0x11 115 vmovdqu xmm0,XMMWORD[((80+8))+rsp] 116 vaesenc xmm12,xmm12,xmm15 117 vaesenc xmm13,xmm13,xmm15 118 vpxor xmm4,xmm4,xmm1 119 vmovdqu xmm1,XMMWORD[((64-32))+r9] 120 vaesenc xmm14,xmm14,xmm15 121 122 vmovups xmm15,XMMWORD[((64-128))+rcx] 123 vpxor xmm6,xmm6,xmm2 124 vpclmulqdq xmm2,xmm0,xmm1,0x00 125 vaesenc xmm9,xmm9,xmm15 126 vpxor xmm6,xmm6,xmm3 127 vpclmulqdq xmm3,xmm0,xmm1,0x10 128 vaesenc xmm10,xmm10,xmm15 129 movbe r13,QWORD[72+r14] 130 vpxor xmm7,xmm7,xmm5 131 vpclmulqdq xmm5,xmm0,xmm1,0x01 132 vaesenc xmm11,xmm11,xmm15 133 movbe r12,QWORD[64+r14] 134 vpclmulqdq xmm1,xmm0,xmm1,0x11 135 vmovdqu xmm0,XMMWORD[((96+8))+rsp] 136 vaesenc xmm12,xmm12,xmm15 137 mov QWORD[((48+8))+rsp],r13 138 vaesenc xmm13,xmm13,xmm15 139 mov QWORD[((56+8))+rsp],r12 140 vpxor xmm4,xmm4,xmm2 141 vmovdqu xmm2,XMMWORD[((96-32))+r9] 142 vaesenc xmm14,xmm14,xmm15 143 144 vmovups xmm15,XMMWORD[((80-128))+rcx] 145 vpxor xmm6,xmm6,xmm3 146 vpclmulqdq xmm3,xmm0,xmm2,0x00 147 vaesenc xmm9,xmm9,xmm15 148 vpxor xmm6,xmm6,xmm5 149 vpclmulqdq xmm5,xmm0,xmm2,0x10 150 vaesenc xmm10,xmm10,xmm15 151 movbe r13,QWORD[56+r14] 152 vpxor xmm7,xmm7,xmm1 153 vpclmulqdq xmm1,xmm0,xmm2,0x01 154 vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp] 155 vaesenc xmm11,xmm11,xmm15 156 movbe r12,QWORD[48+r14] 157 vpclmulqdq xmm2,xmm0,xmm2,0x11 158 vaesenc xmm12,xmm12,xmm15 159 mov QWORD[((64+8))+rsp],r13 160 vaesenc xmm13,xmm13,xmm15 161 mov QWORD[((72+8))+rsp],r12 162 vpxor xmm4,xmm4,xmm3 163 vmovdqu xmm3,XMMWORD[((112-32))+r9] 164 vaesenc xmm14,xmm14,xmm15 165 166 vmovups xmm15,XMMWORD[((96-128))+rcx] 167 vpxor xmm6,xmm6,xmm5 168 vpclmulqdq xmm5,xmm8,xmm3,0x10 169 vaesenc xmm9,xmm9,xmm15 170 vpxor xmm6,xmm6,xmm1 171 vpclmulqdq xmm1,xmm8,xmm3,0x01 172 vaesenc xmm10,xmm10,xmm15 173 movbe r13,QWORD[40+r14] 174 vpxor xmm7,xmm7,xmm2 175 vpclmulqdq xmm2,xmm8,xmm3,0x00 176 vaesenc xmm11,xmm11,xmm15 177 movbe r12,QWORD[32+r14] 178 vpclmulqdq xmm8,xmm8,xmm3,0x11 179 vaesenc xmm12,xmm12,xmm15 180 mov QWORD[((80+8))+rsp],r13 181 vaesenc xmm13,xmm13,xmm15 182 mov QWORD[((88+8))+rsp],r12 183 vpxor xmm6,xmm6,xmm5 184 vaesenc xmm14,xmm14,xmm15 185 vpxor xmm6,xmm6,xmm1 186 187 vmovups xmm15,XMMWORD[((112-128))+rcx] 188 vpslldq xmm5,xmm6,8 189 vpxor xmm4,xmm4,xmm2 190 vmovdqu xmm3,XMMWORD[16+r11] 191 192 vaesenc xmm9,xmm9,xmm15 193 vpxor xmm7,xmm7,xmm8 194 vaesenc xmm10,xmm10,xmm15 195 vpxor xmm4,xmm4,xmm5 196 movbe r13,QWORD[24+r14] 197 vaesenc xmm11,xmm11,xmm15 198 movbe r12,QWORD[16+r14] 199 vpalignr xmm0,xmm4,xmm4,8 200 vpclmulqdq xmm4,xmm4,xmm3,0x10 201 mov QWORD[((96+8))+rsp],r13 202 vaesenc xmm12,xmm12,xmm15 203 mov QWORD[((104+8))+rsp],r12 204 vaesenc xmm13,xmm13,xmm15 205 vmovups xmm1,XMMWORD[((128-128))+rcx] 206 vaesenc xmm14,xmm14,xmm15 207 208 vaesenc xmm9,xmm9,xmm1 209 vmovups xmm15,XMMWORD[((144-128))+rcx] 210 vaesenc xmm10,xmm10,xmm1 211 vpsrldq xmm6,xmm6,8 212 vaesenc xmm11,xmm11,xmm1 213 vpxor xmm7,xmm7,xmm6 214 vaesenc xmm12,xmm12,xmm1 215 vpxor xmm4,xmm4,xmm0 216 movbe r13,QWORD[8+r14] 217 vaesenc xmm13,xmm13,xmm1 218 movbe r12,QWORD[r14] 219 vaesenc xmm14,xmm14,xmm1 220 vmovups xmm1,XMMWORD[((160-128))+rcx] 221 cmp ebp,11 222 jb NEAR $L$enc_tail 223 224 vaesenc xmm9,xmm9,xmm15 225 vaesenc xmm10,xmm10,xmm15 226 vaesenc xmm11,xmm11,xmm15 227 vaesenc xmm12,xmm12,xmm15 228 vaesenc xmm13,xmm13,xmm15 229 vaesenc xmm14,xmm14,xmm15 230 231 vaesenc xmm9,xmm9,xmm1 232 vaesenc xmm10,xmm10,xmm1 233 vaesenc xmm11,xmm11,xmm1 234 vaesenc xmm12,xmm12,xmm1 235 vaesenc xmm13,xmm13,xmm1 236 vmovups xmm15,XMMWORD[((176-128))+rcx] 237 vaesenc xmm14,xmm14,xmm1 238 vmovups xmm1,XMMWORD[((192-128))+rcx] 239 240 241 vaesenc xmm9,xmm9,xmm15 242 vaesenc xmm10,xmm10,xmm15 243 vaesenc xmm11,xmm11,xmm15 244 vaesenc xmm12,xmm12,xmm15 245 vaesenc xmm13,xmm13,xmm15 246 vaesenc xmm14,xmm14,xmm15 247 248 vaesenc xmm9,xmm9,xmm1 249 vaesenc xmm10,xmm10,xmm1 250 vaesenc xmm11,xmm11,xmm1 251 vaesenc xmm12,xmm12,xmm1 252 vaesenc xmm13,xmm13,xmm1 253 vmovups xmm15,XMMWORD[((208-128))+rcx] 254 vaesenc xmm14,xmm14,xmm1 255 vmovups xmm1,XMMWORD[((224-128))+rcx] 256 jmp NEAR $L$enc_tail 257 258ALIGN 32 259$L$handle_ctr32: 260 vmovdqu xmm0,XMMWORD[r11] 261 vpshufb xmm6,xmm1,xmm0 262 vmovdqu xmm5,XMMWORD[48+r11] 263 vpaddd xmm10,xmm6,XMMWORD[64+r11] 264 vpaddd xmm11,xmm6,xmm5 265 vmovdqu xmm3,XMMWORD[((0-32))+r9] 266 vpaddd xmm12,xmm10,xmm5 267 vpshufb xmm10,xmm10,xmm0 268 vpaddd xmm13,xmm11,xmm5 269 vpshufb xmm11,xmm11,xmm0 270 vpxor xmm10,xmm10,xmm15 271 vpaddd xmm14,xmm12,xmm5 272 vpshufb xmm12,xmm12,xmm0 273 vpxor xmm11,xmm11,xmm15 274 vpaddd xmm1,xmm13,xmm5 275 vpshufb xmm13,xmm13,xmm0 276 vpshufb xmm14,xmm14,xmm0 277 vpshufb xmm1,xmm1,xmm0 278 jmp NEAR $L$resume_ctr32 279 280ALIGN 32 281$L$enc_tail: 282 vaesenc xmm9,xmm9,xmm15 283 vmovdqu XMMWORD[(16+8)+rsp],xmm7 284 vpalignr xmm8,xmm4,xmm4,8 285 vaesenc xmm10,xmm10,xmm15 286 vpclmulqdq xmm4,xmm4,xmm3,0x10 287 vpxor xmm2,xmm1,XMMWORD[rdi] 288 vaesenc xmm11,xmm11,xmm15 289 vpxor xmm0,xmm1,XMMWORD[16+rdi] 290 vaesenc xmm12,xmm12,xmm15 291 vpxor xmm5,xmm1,XMMWORD[32+rdi] 292 vaesenc xmm13,xmm13,xmm15 293 vpxor xmm6,xmm1,XMMWORD[48+rdi] 294 vaesenc xmm14,xmm14,xmm15 295 vpxor xmm7,xmm1,XMMWORD[64+rdi] 296 vpxor xmm3,xmm1,XMMWORD[80+rdi] 297 vmovdqu xmm1,XMMWORD[r8] 298 299 vaesenclast xmm9,xmm9,xmm2 300 vmovdqu xmm2,XMMWORD[32+r11] 301 vaesenclast xmm10,xmm10,xmm0 302 vpaddb xmm0,xmm1,xmm2 303 mov QWORD[((112+8))+rsp],r13 304 lea rdi,[96+rdi] 305 vaesenclast xmm11,xmm11,xmm5 306 vpaddb xmm5,xmm0,xmm2 307 mov QWORD[((120+8))+rsp],r12 308 lea rsi,[96+rsi] 309 vmovdqu xmm15,XMMWORD[((0-128))+rcx] 310 vaesenclast xmm12,xmm12,xmm6 311 vpaddb xmm6,xmm5,xmm2 312 vaesenclast xmm13,xmm13,xmm7 313 vpaddb xmm7,xmm6,xmm2 314 vaesenclast xmm14,xmm14,xmm3 315 vpaddb xmm3,xmm7,xmm2 316 317 add r10,0x60 318 sub rdx,0x6 319 jc NEAR $L$6x_done 320 321 vmovups XMMWORD[(-96)+rsi],xmm9 322 vpxor xmm9,xmm1,xmm15 323 vmovups XMMWORD[(-80)+rsi],xmm10 324 vmovdqa xmm10,xmm0 325 vmovups XMMWORD[(-64)+rsi],xmm11 326 vmovdqa xmm11,xmm5 327 vmovups XMMWORD[(-48)+rsi],xmm12 328 vmovdqa xmm12,xmm6 329 vmovups XMMWORD[(-32)+rsi],xmm13 330 vmovdqa xmm13,xmm7 331 vmovups XMMWORD[(-16)+rsi],xmm14 332 vmovdqa xmm14,xmm3 333 vmovdqu xmm7,XMMWORD[((32+8))+rsp] 334 jmp NEAR $L$oop6x 335 336$L$6x_done: 337 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] 338 vpxor xmm8,xmm8,xmm4 339 340 DB 0F3h,0C3h ;repret 341 342 343global aesni_gcm_decrypt 344 345ALIGN 32 346aesni_gcm_decrypt: 347 mov QWORD[8+rsp],rdi ;WIN64 prologue 348 mov QWORD[16+rsp],rsi 349 mov rax,rsp 350$L$SEH_begin_aesni_gcm_decrypt: 351 mov rdi,rcx 352 mov rsi,rdx 353 mov rdx,r8 354 mov rcx,r9 355 mov r8,QWORD[40+rsp] 356 mov r9,QWORD[48+rsp] 357 358 359 360 xor r10,r10 361 362 363 364 cmp rdx,0x60 365 jb NEAR $L$gcm_dec_abort 366 367 lea rax,[rsp] 368 369 push rbx 370 371 push rbp 372 373 push r12 374 375 push r13 376 377 push r14 378 379 push r15 380 381 lea rsp,[((-168))+rsp] 382 movaps XMMWORD[(-216)+rax],xmm6 383 movaps XMMWORD[(-200)+rax],xmm7 384 movaps XMMWORD[(-184)+rax],xmm8 385 movaps XMMWORD[(-168)+rax],xmm9 386 movaps XMMWORD[(-152)+rax],xmm10 387 movaps XMMWORD[(-136)+rax],xmm11 388 movaps XMMWORD[(-120)+rax],xmm12 389 movaps XMMWORD[(-104)+rax],xmm13 390 movaps XMMWORD[(-88)+rax],xmm14 391 movaps XMMWORD[(-72)+rax],xmm15 392$L$gcm_dec_body: 393 vzeroupper 394 395 vmovdqu xmm1,XMMWORD[r8] 396 add rsp,-128 397 mov ebx,DWORD[12+r8] 398 lea r11,[$L$bswap_mask] 399 lea r14,[((-128))+rcx] 400 mov r15,0xf80 401 vmovdqu xmm8,XMMWORD[r9] 402 and rsp,-128 403 vmovdqu xmm0,XMMWORD[r11] 404 lea rcx,[128+rcx] 405 lea r9,[((32+32))+r9] 406 mov ebp,DWORD[((240-128))+rcx] 407 vpshufb xmm8,xmm8,xmm0 408 409 and r14,r15 410 and r15,rsp 411 sub r15,r14 412 jc NEAR $L$dec_no_key_aliasing 413 cmp r15,768 414 jnc NEAR $L$dec_no_key_aliasing 415 sub rsp,r15 416$L$dec_no_key_aliasing: 417 418 vmovdqu xmm7,XMMWORD[80+rdi] 419 lea r14,[rdi] 420 vmovdqu xmm4,XMMWORD[64+rdi] 421 422 423 424 425 426 427 428 lea r15,[((-192))+rdx*1+rdi] 429 430 vmovdqu xmm5,XMMWORD[48+rdi] 431 shr rdx,4 432 xor r10,r10 433 vmovdqu xmm6,XMMWORD[32+rdi] 434 vpshufb xmm7,xmm7,xmm0 435 vmovdqu xmm2,XMMWORD[16+rdi] 436 vpshufb xmm4,xmm4,xmm0 437 vmovdqu xmm3,XMMWORD[rdi] 438 vpshufb xmm5,xmm5,xmm0 439 vmovdqu XMMWORD[48+rsp],xmm4 440 vpshufb xmm6,xmm6,xmm0 441 vmovdqu XMMWORD[64+rsp],xmm5 442 vpshufb xmm2,xmm2,xmm0 443 vmovdqu XMMWORD[80+rsp],xmm6 444 vpshufb xmm3,xmm3,xmm0 445 vmovdqu XMMWORD[96+rsp],xmm2 446 vmovdqu XMMWORD[112+rsp],xmm3 447 448 call _aesni_ctr32_ghash_6x 449 450 vmovups XMMWORD[(-96)+rsi],xmm9 451 vmovups XMMWORD[(-80)+rsi],xmm10 452 vmovups XMMWORD[(-64)+rsi],xmm11 453 vmovups XMMWORD[(-48)+rsi],xmm12 454 vmovups XMMWORD[(-32)+rsi],xmm13 455 vmovups XMMWORD[(-16)+rsi],xmm14 456 457 vpshufb xmm8,xmm8,XMMWORD[r11] 458 vmovdqu XMMWORD[(-64)+r9],xmm8 459 460 vzeroupper 461 movaps xmm6,XMMWORD[((-216))+rax] 462 movaps xmm7,XMMWORD[((-200))+rax] 463 movaps xmm8,XMMWORD[((-184))+rax] 464 movaps xmm9,XMMWORD[((-168))+rax] 465 movaps xmm10,XMMWORD[((-152))+rax] 466 movaps xmm11,XMMWORD[((-136))+rax] 467 movaps xmm12,XMMWORD[((-120))+rax] 468 movaps xmm13,XMMWORD[((-104))+rax] 469 movaps xmm14,XMMWORD[((-88))+rax] 470 movaps xmm15,XMMWORD[((-72))+rax] 471 mov r15,QWORD[((-48))+rax] 472 473 mov r14,QWORD[((-40))+rax] 474 475 mov r13,QWORD[((-32))+rax] 476 477 mov r12,QWORD[((-24))+rax] 478 479 mov rbp,QWORD[((-16))+rax] 480 481 mov rbx,QWORD[((-8))+rax] 482 483 lea rsp,[rax] 484 485$L$gcm_dec_abort: 486 mov rax,r10 487 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 488 mov rsi,QWORD[16+rsp] 489 DB 0F3h,0C3h ;repret 490 491$L$SEH_end_aesni_gcm_decrypt: 492 493ALIGN 32 494_aesni_ctr32_6x: 495 496 vmovdqu xmm4,XMMWORD[((0-128))+rcx] 497 vmovdqu xmm2,XMMWORD[32+r11] 498 lea r13,[((-1))+rbp] 499 vmovups xmm15,XMMWORD[((16-128))+rcx] 500 lea r12,[((32-128))+rcx] 501 vpxor xmm9,xmm1,xmm4 502 add ebx,100663296 503 jc NEAR $L$handle_ctr32_2 504 vpaddb xmm10,xmm1,xmm2 505 vpaddb xmm11,xmm10,xmm2 506 vpxor xmm10,xmm10,xmm4 507 vpaddb xmm12,xmm11,xmm2 508 vpxor xmm11,xmm11,xmm4 509 vpaddb xmm13,xmm12,xmm2 510 vpxor xmm12,xmm12,xmm4 511 vpaddb xmm14,xmm13,xmm2 512 vpxor xmm13,xmm13,xmm4 513 vpaddb xmm1,xmm14,xmm2 514 vpxor xmm14,xmm14,xmm4 515 jmp NEAR $L$oop_ctr32 516 517ALIGN 16 518$L$oop_ctr32: 519 vaesenc xmm9,xmm9,xmm15 520 vaesenc xmm10,xmm10,xmm15 521 vaesenc xmm11,xmm11,xmm15 522 vaesenc xmm12,xmm12,xmm15 523 vaesenc xmm13,xmm13,xmm15 524 vaesenc xmm14,xmm14,xmm15 525 vmovups xmm15,XMMWORD[r12] 526 lea r12,[16+r12] 527 dec r13d 528 jnz NEAR $L$oop_ctr32 529 530 vmovdqu xmm3,XMMWORD[r12] 531 vaesenc xmm9,xmm9,xmm15 532 vpxor xmm4,xmm3,XMMWORD[rdi] 533 vaesenc xmm10,xmm10,xmm15 534 vpxor xmm5,xmm3,XMMWORD[16+rdi] 535 vaesenc xmm11,xmm11,xmm15 536 vpxor xmm6,xmm3,XMMWORD[32+rdi] 537 vaesenc xmm12,xmm12,xmm15 538 vpxor xmm8,xmm3,XMMWORD[48+rdi] 539 vaesenc xmm13,xmm13,xmm15 540 vpxor xmm2,xmm3,XMMWORD[64+rdi] 541 vaesenc xmm14,xmm14,xmm15 542 vpxor xmm3,xmm3,XMMWORD[80+rdi] 543 lea rdi,[96+rdi] 544 545 vaesenclast xmm9,xmm9,xmm4 546 vaesenclast xmm10,xmm10,xmm5 547 vaesenclast xmm11,xmm11,xmm6 548 vaesenclast xmm12,xmm12,xmm8 549 vaesenclast xmm13,xmm13,xmm2 550 vaesenclast xmm14,xmm14,xmm3 551 vmovups XMMWORD[rsi],xmm9 552 vmovups XMMWORD[16+rsi],xmm10 553 vmovups XMMWORD[32+rsi],xmm11 554 vmovups XMMWORD[48+rsi],xmm12 555 vmovups XMMWORD[64+rsi],xmm13 556 vmovups XMMWORD[80+rsi],xmm14 557 lea rsi,[96+rsi] 558 559 DB 0F3h,0C3h ;repret 560ALIGN 32 561$L$handle_ctr32_2: 562 vpshufb xmm6,xmm1,xmm0 563 vmovdqu xmm5,XMMWORD[48+r11] 564 vpaddd xmm10,xmm6,XMMWORD[64+r11] 565 vpaddd xmm11,xmm6,xmm5 566 vpaddd xmm12,xmm10,xmm5 567 vpshufb xmm10,xmm10,xmm0 568 vpaddd xmm13,xmm11,xmm5 569 vpshufb xmm11,xmm11,xmm0 570 vpxor xmm10,xmm10,xmm4 571 vpaddd xmm14,xmm12,xmm5 572 vpshufb xmm12,xmm12,xmm0 573 vpxor xmm11,xmm11,xmm4 574 vpaddd xmm1,xmm13,xmm5 575 vpshufb xmm13,xmm13,xmm0 576 vpxor xmm12,xmm12,xmm4 577 vpshufb xmm14,xmm14,xmm0 578 vpxor xmm13,xmm13,xmm4 579 vpshufb xmm1,xmm1,xmm0 580 vpxor xmm14,xmm14,xmm4 581 jmp NEAR $L$oop_ctr32 582 583 584 585global aesni_gcm_encrypt 586 587ALIGN 32 588aesni_gcm_encrypt: 589 mov QWORD[8+rsp],rdi ;WIN64 prologue 590 mov QWORD[16+rsp],rsi 591 mov rax,rsp 592$L$SEH_begin_aesni_gcm_encrypt: 593 mov rdi,rcx 594 mov rsi,rdx 595 mov rdx,r8 596 mov rcx,r9 597 mov r8,QWORD[40+rsp] 598 mov r9,QWORD[48+rsp] 599 600 601 602 xor r10,r10 603 604 605 606 607 cmp rdx,0x60*3 608 jb NEAR $L$gcm_enc_abort 609 610 lea rax,[rsp] 611 612 push rbx 613 614 push rbp 615 616 push r12 617 618 push r13 619 620 push r14 621 622 push r15 623 624 lea rsp,[((-168))+rsp] 625 movaps XMMWORD[(-216)+rax],xmm6 626 movaps XMMWORD[(-200)+rax],xmm7 627 movaps XMMWORD[(-184)+rax],xmm8 628 movaps XMMWORD[(-168)+rax],xmm9 629 movaps XMMWORD[(-152)+rax],xmm10 630 movaps XMMWORD[(-136)+rax],xmm11 631 movaps XMMWORD[(-120)+rax],xmm12 632 movaps XMMWORD[(-104)+rax],xmm13 633 movaps XMMWORD[(-88)+rax],xmm14 634 movaps XMMWORD[(-72)+rax],xmm15 635$L$gcm_enc_body: 636 vzeroupper 637 638 vmovdqu xmm1,XMMWORD[r8] 639 add rsp,-128 640 mov ebx,DWORD[12+r8] 641 lea r11,[$L$bswap_mask] 642 lea r14,[((-128))+rcx] 643 mov r15,0xf80 644 lea rcx,[128+rcx] 645 vmovdqu xmm0,XMMWORD[r11] 646 and rsp,-128 647 mov ebp,DWORD[((240-128))+rcx] 648 649 and r14,r15 650 and r15,rsp 651 sub r15,r14 652 jc NEAR $L$enc_no_key_aliasing 653 cmp r15,768 654 jnc NEAR $L$enc_no_key_aliasing 655 sub rsp,r15 656$L$enc_no_key_aliasing: 657 658 lea r14,[rsi] 659 660 661 662 663 664 665 666 667 lea r15,[((-192))+rdx*1+rsi] 668 669 shr rdx,4 670 671 call _aesni_ctr32_6x 672 vpshufb xmm8,xmm9,xmm0 673 vpshufb xmm2,xmm10,xmm0 674 vmovdqu XMMWORD[112+rsp],xmm8 675 vpshufb xmm4,xmm11,xmm0 676 vmovdqu XMMWORD[96+rsp],xmm2 677 vpshufb xmm5,xmm12,xmm0 678 vmovdqu XMMWORD[80+rsp],xmm4 679 vpshufb xmm6,xmm13,xmm0 680 vmovdqu XMMWORD[64+rsp],xmm5 681 vpshufb xmm7,xmm14,xmm0 682 vmovdqu XMMWORD[48+rsp],xmm6 683 684 call _aesni_ctr32_6x 685 686 vmovdqu xmm8,XMMWORD[r9] 687 lea r9,[((32+32))+r9] 688 sub rdx,12 689 mov r10,0x60*2 690 vpshufb xmm8,xmm8,xmm0 691 692 call _aesni_ctr32_ghash_6x 693 vmovdqu xmm7,XMMWORD[32+rsp] 694 vmovdqu xmm0,XMMWORD[r11] 695 vmovdqu xmm3,XMMWORD[((0-32))+r9] 696 vpunpckhqdq xmm1,xmm7,xmm7 697 vmovdqu xmm15,XMMWORD[((32-32))+r9] 698 vmovups XMMWORD[(-96)+rsi],xmm9 699 vpshufb xmm9,xmm9,xmm0 700 vpxor xmm1,xmm1,xmm7 701 vmovups XMMWORD[(-80)+rsi],xmm10 702 vpshufb xmm10,xmm10,xmm0 703 vmovups XMMWORD[(-64)+rsi],xmm11 704 vpshufb xmm11,xmm11,xmm0 705 vmovups XMMWORD[(-48)+rsi],xmm12 706 vpshufb xmm12,xmm12,xmm0 707 vmovups XMMWORD[(-32)+rsi],xmm13 708 vpshufb xmm13,xmm13,xmm0 709 vmovups XMMWORD[(-16)+rsi],xmm14 710 vpshufb xmm14,xmm14,xmm0 711 vmovdqu XMMWORD[16+rsp],xmm9 712 vmovdqu xmm6,XMMWORD[48+rsp] 713 vmovdqu xmm0,XMMWORD[((16-32))+r9] 714 vpunpckhqdq xmm2,xmm6,xmm6 715 vpclmulqdq xmm5,xmm7,xmm3,0x00 716 vpxor xmm2,xmm2,xmm6 717 vpclmulqdq xmm7,xmm7,xmm3,0x11 718 vpclmulqdq xmm1,xmm1,xmm15,0x00 719 720 vmovdqu xmm9,XMMWORD[64+rsp] 721 vpclmulqdq xmm4,xmm6,xmm0,0x00 722 vmovdqu xmm3,XMMWORD[((48-32))+r9] 723 vpxor xmm4,xmm4,xmm5 724 vpunpckhqdq xmm5,xmm9,xmm9 725 vpclmulqdq xmm6,xmm6,xmm0,0x11 726 vpxor xmm5,xmm5,xmm9 727 vpxor xmm6,xmm6,xmm7 728 vpclmulqdq xmm2,xmm2,xmm15,0x10 729 vmovdqu xmm15,XMMWORD[((80-32))+r9] 730 vpxor xmm2,xmm2,xmm1 731 732 vmovdqu xmm1,XMMWORD[80+rsp] 733 vpclmulqdq xmm7,xmm9,xmm3,0x00 734 vmovdqu xmm0,XMMWORD[((64-32))+r9] 735 vpxor xmm7,xmm7,xmm4 736 vpunpckhqdq xmm4,xmm1,xmm1 737 vpclmulqdq xmm9,xmm9,xmm3,0x11 738 vpxor xmm4,xmm4,xmm1 739 vpxor xmm9,xmm9,xmm6 740 vpclmulqdq xmm5,xmm5,xmm15,0x00 741 vpxor xmm5,xmm5,xmm2 742 743 vmovdqu xmm2,XMMWORD[96+rsp] 744 vpclmulqdq xmm6,xmm1,xmm0,0x00 745 vmovdqu xmm3,XMMWORD[((96-32))+r9] 746 vpxor xmm6,xmm6,xmm7 747 vpunpckhqdq xmm7,xmm2,xmm2 748 vpclmulqdq xmm1,xmm1,xmm0,0x11 749 vpxor xmm7,xmm7,xmm2 750 vpxor xmm1,xmm1,xmm9 751 vpclmulqdq xmm4,xmm4,xmm15,0x10 752 vmovdqu xmm15,XMMWORD[((128-32))+r9] 753 vpxor xmm4,xmm4,xmm5 754 755 vpxor xmm8,xmm8,XMMWORD[112+rsp] 756 vpclmulqdq xmm5,xmm2,xmm3,0x00 757 vmovdqu xmm0,XMMWORD[((112-32))+r9] 758 vpunpckhqdq xmm9,xmm8,xmm8 759 vpxor xmm5,xmm5,xmm6 760 vpclmulqdq xmm2,xmm2,xmm3,0x11 761 vpxor xmm9,xmm9,xmm8 762 vpxor xmm2,xmm2,xmm1 763 vpclmulqdq xmm7,xmm7,xmm15,0x00 764 vpxor xmm4,xmm7,xmm4 765 766 vpclmulqdq xmm6,xmm8,xmm0,0x00 767 vmovdqu xmm3,XMMWORD[((0-32))+r9] 768 vpunpckhqdq xmm1,xmm14,xmm14 769 vpclmulqdq xmm8,xmm8,xmm0,0x11 770 vpxor xmm1,xmm1,xmm14 771 vpxor xmm5,xmm6,xmm5 772 vpclmulqdq xmm9,xmm9,xmm15,0x10 773 vmovdqu xmm15,XMMWORD[((32-32))+r9] 774 vpxor xmm7,xmm8,xmm2 775 vpxor xmm6,xmm9,xmm4 776 777 vmovdqu xmm0,XMMWORD[((16-32))+r9] 778 vpxor xmm9,xmm7,xmm5 779 vpclmulqdq xmm4,xmm14,xmm3,0x00 780 vpxor xmm6,xmm6,xmm9 781 vpunpckhqdq xmm2,xmm13,xmm13 782 vpclmulqdq xmm14,xmm14,xmm3,0x11 783 vpxor xmm2,xmm2,xmm13 784 vpslldq xmm9,xmm6,8 785 vpclmulqdq xmm1,xmm1,xmm15,0x00 786 vpxor xmm8,xmm5,xmm9 787 vpsrldq xmm6,xmm6,8 788 vpxor xmm7,xmm7,xmm6 789 790 vpclmulqdq xmm5,xmm13,xmm0,0x00 791 vmovdqu xmm3,XMMWORD[((48-32))+r9] 792 vpxor xmm5,xmm5,xmm4 793 vpunpckhqdq xmm9,xmm12,xmm12 794 vpclmulqdq xmm13,xmm13,xmm0,0x11 795 vpxor xmm9,xmm9,xmm12 796 vpxor xmm13,xmm13,xmm14 797 vpalignr xmm14,xmm8,xmm8,8 798 vpclmulqdq xmm2,xmm2,xmm15,0x10 799 vmovdqu xmm15,XMMWORD[((80-32))+r9] 800 vpxor xmm2,xmm2,xmm1 801 802 vpclmulqdq xmm4,xmm12,xmm3,0x00 803 vmovdqu xmm0,XMMWORD[((64-32))+r9] 804 vpxor xmm4,xmm4,xmm5 805 vpunpckhqdq xmm1,xmm11,xmm11 806 vpclmulqdq xmm12,xmm12,xmm3,0x11 807 vpxor xmm1,xmm1,xmm11 808 vpxor xmm12,xmm12,xmm13 809 vxorps xmm7,xmm7,XMMWORD[16+rsp] 810 vpclmulqdq xmm9,xmm9,xmm15,0x00 811 vpxor xmm9,xmm9,xmm2 812 813 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 814 vxorps xmm8,xmm8,xmm14 815 816 vpclmulqdq xmm5,xmm11,xmm0,0x00 817 vmovdqu xmm3,XMMWORD[((96-32))+r9] 818 vpxor xmm5,xmm5,xmm4 819 vpunpckhqdq xmm2,xmm10,xmm10 820 vpclmulqdq xmm11,xmm11,xmm0,0x11 821 vpxor xmm2,xmm2,xmm10 822 vpalignr xmm14,xmm8,xmm8,8 823 vpxor xmm11,xmm11,xmm12 824 vpclmulqdq xmm1,xmm1,xmm15,0x10 825 vmovdqu xmm15,XMMWORD[((128-32))+r9] 826 vpxor xmm1,xmm1,xmm9 827 828 vxorps xmm14,xmm14,xmm7 829 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 830 vxorps xmm8,xmm8,xmm14 831 832 vpclmulqdq xmm4,xmm10,xmm3,0x00 833 vmovdqu xmm0,XMMWORD[((112-32))+r9] 834 vpxor xmm4,xmm4,xmm5 835 vpunpckhqdq xmm9,xmm8,xmm8 836 vpclmulqdq xmm10,xmm10,xmm3,0x11 837 vpxor xmm9,xmm9,xmm8 838 vpxor xmm10,xmm10,xmm11 839 vpclmulqdq xmm2,xmm2,xmm15,0x00 840 vpxor xmm2,xmm2,xmm1 841 842 vpclmulqdq xmm5,xmm8,xmm0,0x00 843 vpclmulqdq xmm7,xmm8,xmm0,0x11 844 vpxor xmm5,xmm5,xmm4 845 vpclmulqdq xmm6,xmm9,xmm15,0x10 846 vpxor xmm7,xmm7,xmm10 847 vpxor xmm6,xmm6,xmm2 848 849 vpxor xmm4,xmm7,xmm5 850 vpxor xmm6,xmm6,xmm4 851 vpslldq xmm1,xmm6,8 852 vmovdqu xmm3,XMMWORD[16+r11] 853 vpsrldq xmm6,xmm6,8 854 vpxor xmm8,xmm5,xmm1 855 vpxor xmm7,xmm7,xmm6 856 857 vpalignr xmm2,xmm8,xmm8,8 858 vpclmulqdq xmm8,xmm8,xmm3,0x10 859 vpxor xmm8,xmm8,xmm2 860 861 vpalignr xmm2,xmm8,xmm8,8 862 vpclmulqdq xmm8,xmm8,xmm3,0x10 863 vpxor xmm2,xmm2,xmm7 864 vpxor xmm8,xmm8,xmm2 865 vpshufb xmm8,xmm8,XMMWORD[r11] 866 vmovdqu XMMWORD[(-64)+r9],xmm8 867 868 vzeroupper 869 movaps xmm6,XMMWORD[((-216))+rax] 870 movaps xmm7,XMMWORD[((-200))+rax] 871 movaps xmm8,XMMWORD[((-184))+rax] 872 movaps xmm9,XMMWORD[((-168))+rax] 873 movaps xmm10,XMMWORD[((-152))+rax] 874 movaps xmm11,XMMWORD[((-136))+rax] 875 movaps xmm12,XMMWORD[((-120))+rax] 876 movaps xmm13,XMMWORD[((-104))+rax] 877 movaps xmm14,XMMWORD[((-88))+rax] 878 movaps xmm15,XMMWORD[((-72))+rax] 879 mov r15,QWORD[((-48))+rax] 880 881 mov r14,QWORD[((-40))+rax] 882 883 mov r13,QWORD[((-32))+rax] 884 885 mov r12,QWORD[((-24))+rax] 886 887 mov rbp,QWORD[((-16))+rax] 888 889 mov rbx,QWORD[((-8))+rax] 890 891 lea rsp,[rax] 892 893$L$gcm_enc_abort: 894 mov rax,r10 895 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 896 mov rsi,QWORD[16+rsp] 897 DB 0F3h,0C3h ;repret 898 899$L$SEH_end_aesni_gcm_encrypt: 900ALIGN 64 901$L$bswap_mask: 902DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 903$L$poly: 904DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 905$L$one_msb: 906DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 907$L$two_lsb: 908DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 909$L$one_lsb: 910DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 911DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108 912DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 913DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 914DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 915ALIGN 64 916EXTERN __imp_RtlVirtualUnwind 917 918ALIGN 16 919gcm_se_handler: 920 push rsi 921 push rdi 922 push rbx 923 push rbp 924 push r12 925 push r13 926 push r14 927 push r15 928 pushfq 929 sub rsp,64 930 931 mov rax,QWORD[120+r8] 932 mov rbx,QWORD[248+r8] 933 934 mov rsi,QWORD[8+r9] 935 mov r11,QWORD[56+r9] 936 937 mov r10d,DWORD[r11] 938 lea r10,[r10*1+rsi] 939 cmp rbx,r10 940 jb NEAR $L$common_seh_tail 941 942 mov rax,QWORD[152+r8] 943 944 mov r10d,DWORD[4+r11] 945 lea r10,[r10*1+rsi] 946 cmp rbx,r10 947 jae NEAR $L$common_seh_tail 948 949 mov rax,QWORD[120+r8] 950 951 mov r15,QWORD[((-48))+rax] 952 mov r14,QWORD[((-40))+rax] 953 mov r13,QWORD[((-32))+rax] 954 mov r12,QWORD[((-24))+rax] 955 mov rbp,QWORD[((-16))+rax] 956 mov rbx,QWORD[((-8))+rax] 957 mov QWORD[240+r8],r15 958 mov QWORD[232+r8],r14 959 mov QWORD[224+r8],r13 960 mov QWORD[216+r8],r12 961 mov QWORD[160+r8],rbp 962 mov QWORD[144+r8],rbx 963 964 lea rsi,[((-216))+rax] 965 lea rdi,[512+r8] 966 mov ecx,20 967 DD 0xa548f3fc 968 969$L$common_seh_tail: 970 mov rdi,QWORD[8+rax] 971 mov rsi,QWORD[16+rax] 972 mov QWORD[152+r8],rax 973 mov QWORD[168+r8],rsi 974 mov QWORD[176+r8],rdi 975 976 mov rdi,QWORD[40+r9] 977 mov rsi,r8 978 mov ecx,154 979 DD 0xa548f3fc 980 981 mov rsi,r9 982 xor rcx,rcx 983 mov rdx,QWORD[8+rsi] 984 mov r8,QWORD[rsi] 985 mov r9,QWORD[16+rsi] 986 mov r10,QWORD[40+rsi] 987 lea r11,[56+rsi] 988 lea r12,[24+rsi] 989 mov QWORD[32+rsp],r10 990 mov QWORD[40+rsp],r11 991 mov QWORD[48+rsp],r12 992 mov QWORD[56+rsp],rcx 993 call QWORD[__imp_RtlVirtualUnwind] 994 995 mov eax,1 996 add rsp,64 997 popfq 998 pop r15 999 pop r14 1000 pop r13 1001 pop r12 1002 pop rbp 1003 pop rbx 1004 pop rdi 1005 pop rsi 1006 DB 0F3h,0C3h ;repret 1007 1008 1009section .pdata rdata align=4 1010ALIGN 4 1011 DD $L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase 1012 DD $L$SEH_end_aesni_gcm_decrypt wrt ..imagebase 1013 DD $L$SEH_gcm_dec_info wrt ..imagebase 1014 1015 DD $L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase 1016 DD $L$SEH_end_aesni_gcm_encrypt wrt ..imagebase 1017 DD $L$SEH_gcm_enc_info wrt ..imagebase 1018section .xdata rdata align=8 1019ALIGN 8 1020$L$SEH_gcm_dec_info: 1021DB 9,0,0,0 1022 DD gcm_se_handler wrt ..imagebase 1023 DD $L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase 1024$L$SEH_gcm_enc_info: 1025DB 9,0,0,0 1026 DD gcm_se_handler wrt ..imagebase 1027 DD $L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase 1028