1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8section .text code align=64 9 10 11 12ALIGN 32 13_aesni_ctr32_ghash_6x: 14 15 vmovdqu xmm2,XMMWORD[32+r11] 16 sub rdx,6 17 vpxor xmm4,xmm4,xmm4 18 vmovdqu xmm15,XMMWORD[((0-128))+rcx] 19 vpaddb xmm10,xmm1,xmm2 20 vpaddb xmm11,xmm10,xmm2 21 vpaddb xmm12,xmm11,xmm2 22 vpaddb xmm13,xmm12,xmm2 23 vpaddb xmm14,xmm13,xmm2 24 vpxor xmm9,xmm1,xmm15 25 vmovdqu XMMWORD[(16+8)+rsp],xmm4 26 jmp NEAR $L$oop6x 27 28ALIGN 32 29$L$oop6x: 30 add ebx,100663296 31 jc NEAR $L$handle_ctr32 32 vmovdqu xmm3,XMMWORD[((0-32))+r9] 33 vpaddb xmm1,xmm14,xmm2 34 vpxor xmm10,xmm10,xmm15 35 vpxor xmm11,xmm11,xmm15 36 37$L$resume_ctr32: 38 vmovdqu XMMWORD[r8],xmm1 39 vpclmulqdq xmm5,xmm7,xmm3,0x10 40 vpxor xmm12,xmm12,xmm15 41 vmovups xmm2,XMMWORD[((16-128))+rcx] 42 vpclmulqdq xmm6,xmm7,xmm3,0x01 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 xor r12,r12 61 cmp r15,r14 62 63 vaesenc xmm9,xmm9,xmm2 64 vmovdqu xmm0,XMMWORD[((48+8))+rsp] 65 vpxor xmm13,xmm13,xmm15 66 vpclmulqdq xmm1,xmm7,xmm3,0x00 67 vaesenc xmm10,xmm10,xmm2 68 vpxor xmm14,xmm14,xmm15 69 setnc r12b 70 vpclmulqdq xmm7,xmm7,xmm3,0x11 71 vaesenc xmm11,xmm11,xmm2 72 vmovdqu xmm3,XMMWORD[((16-32))+r9] 73 neg r12 74 vaesenc xmm12,xmm12,xmm2 75 vpxor xmm6,xmm6,xmm5 76 vpclmulqdq xmm5,xmm0,xmm3,0x00 77 vpxor xmm8,xmm8,xmm4 78 vaesenc xmm13,xmm13,xmm2 79 vpxor xmm4,xmm1,xmm5 80 and r12,0x60 81 vmovups xmm15,XMMWORD[((32-128))+rcx] 82 vpclmulqdq xmm1,xmm0,xmm3,0x10 83 vaesenc xmm14,xmm14,xmm2 84 85 vpclmulqdq xmm2,xmm0,xmm3,0x01 86 lea r14,[r12*1+r14] 87 vaesenc xmm9,xmm9,xmm15 88 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] 89 vpclmulqdq xmm3,xmm0,xmm3,0x11 90 vmovdqu xmm0,XMMWORD[((64+8))+rsp] 91 vaesenc xmm10,xmm10,xmm15 92 movbe r13,QWORD[88+r14] 93 vaesenc xmm11,xmm11,xmm15 94 movbe r12,QWORD[80+r14] 95 vaesenc xmm12,xmm12,xmm15 96 mov QWORD[((32+8))+rsp],r13 97 vaesenc xmm13,xmm13,xmm15 98 mov QWORD[((40+8))+rsp],r12 99 vmovdqu xmm5,XMMWORD[((48-32))+r9] 100 vaesenc xmm14,xmm14,xmm15 101 102 vmovups xmm15,XMMWORD[((48-128))+rcx] 103 vpxor xmm6,xmm6,xmm1 104 vpclmulqdq xmm1,xmm0,xmm5,0x00 105 vaesenc xmm9,xmm9,xmm15 106 vpxor xmm6,xmm6,xmm2 107 vpclmulqdq xmm2,xmm0,xmm5,0x10 108 vaesenc xmm10,xmm10,xmm15 109 vpxor xmm7,xmm7,xmm3 110 vpclmulqdq xmm3,xmm0,xmm5,0x01 111 vaesenc xmm11,xmm11,xmm15 112 vpclmulqdq xmm5,xmm0,xmm5,0x11 113 vmovdqu xmm0,XMMWORD[((80+8))+rsp] 114 vaesenc xmm12,xmm12,xmm15 115 vaesenc xmm13,xmm13,xmm15 116 vpxor xmm4,xmm4,xmm1 117 vmovdqu xmm1,XMMWORD[((64-32))+r9] 118 vaesenc xmm14,xmm14,xmm15 119 120 vmovups xmm15,XMMWORD[((64-128))+rcx] 121 vpxor xmm6,xmm6,xmm2 122 vpclmulqdq xmm2,xmm0,xmm1,0x00 123 vaesenc xmm9,xmm9,xmm15 124 vpxor xmm6,xmm6,xmm3 125 vpclmulqdq xmm3,xmm0,xmm1,0x10 126 vaesenc xmm10,xmm10,xmm15 127 movbe r13,QWORD[72+r14] 128 vpxor xmm7,xmm7,xmm5 129 vpclmulqdq xmm5,xmm0,xmm1,0x01 130 vaesenc xmm11,xmm11,xmm15 131 movbe r12,QWORD[64+r14] 132 vpclmulqdq xmm1,xmm0,xmm1,0x11 133 vmovdqu xmm0,XMMWORD[((96+8))+rsp] 134 vaesenc xmm12,xmm12,xmm15 135 mov QWORD[((48+8))+rsp],r13 136 vaesenc xmm13,xmm13,xmm15 137 mov QWORD[((56+8))+rsp],r12 138 vpxor xmm4,xmm4,xmm2 139 vmovdqu xmm2,XMMWORD[((96-32))+r9] 140 vaesenc xmm14,xmm14,xmm15 141 142 vmovups xmm15,XMMWORD[((80-128))+rcx] 143 vpxor xmm6,xmm6,xmm3 144 vpclmulqdq xmm3,xmm0,xmm2,0x00 145 vaesenc xmm9,xmm9,xmm15 146 vpxor xmm6,xmm6,xmm5 147 vpclmulqdq xmm5,xmm0,xmm2,0x10 148 vaesenc xmm10,xmm10,xmm15 149 movbe r13,QWORD[56+r14] 150 vpxor xmm7,xmm7,xmm1 151 vpclmulqdq xmm1,xmm0,xmm2,0x01 152 vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp] 153 vaesenc xmm11,xmm11,xmm15 154 movbe r12,QWORD[48+r14] 155 vpclmulqdq xmm2,xmm0,xmm2,0x11 156 vaesenc xmm12,xmm12,xmm15 157 mov QWORD[((64+8))+rsp],r13 158 vaesenc xmm13,xmm13,xmm15 159 mov QWORD[((72+8))+rsp],r12 160 vpxor xmm4,xmm4,xmm3 161 vmovdqu xmm3,XMMWORD[((112-32))+r9] 162 vaesenc xmm14,xmm14,xmm15 163 164 vmovups xmm15,XMMWORD[((96-128))+rcx] 165 vpxor xmm6,xmm6,xmm5 166 vpclmulqdq xmm5,xmm8,xmm3,0x10 167 vaesenc xmm9,xmm9,xmm15 168 vpxor xmm6,xmm6,xmm1 169 vpclmulqdq xmm1,xmm8,xmm3,0x01 170 vaesenc xmm10,xmm10,xmm15 171 movbe r13,QWORD[40+r14] 172 vpxor xmm7,xmm7,xmm2 173 vpclmulqdq xmm2,xmm8,xmm3,0x00 174 vaesenc xmm11,xmm11,xmm15 175 movbe r12,QWORD[32+r14] 176 vpclmulqdq xmm8,xmm8,xmm3,0x11 177 vaesenc xmm12,xmm12,xmm15 178 mov QWORD[((80+8))+rsp],r13 179 vaesenc xmm13,xmm13,xmm15 180 mov QWORD[((88+8))+rsp],r12 181 vpxor xmm6,xmm6,xmm5 182 vaesenc xmm14,xmm14,xmm15 183 vpxor xmm6,xmm6,xmm1 184 185 vmovups xmm15,XMMWORD[((112-128))+rcx] 186 vpslldq xmm5,xmm6,8 187 vpxor xmm4,xmm4,xmm2 188 vmovdqu xmm3,XMMWORD[16+r11] 189 190 vaesenc xmm9,xmm9,xmm15 191 vpxor xmm7,xmm7,xmm8 192 vaesenc xmm10,xmm10,xmm15 193 vpxor xmm4,xmm4,xmm5 194 movbe r13,QWORD[24+r14] 195 vaesenc xmm11,xmm11,xmm15 196 movbe r12,QWORD[16+r14] 197 vpalignr xmm0,xmm4,xmm4,8 198 vpclmulqdq xmm4,xmm4,xmm3,0x10 199 mov QWORD[((96+8))+rsp],r13 200 vaesenc xmm12,xmm12,xmm15 201 mov QWORD[((104+8))+rsp],r12 202 vaesenc xmm13,xmm13,xmm15 203 vmovups xmm1,XMMWORD[((128-128))+rcx] 204 vaesenc xmm14,xmm14,xmm15 205 206 vaesenc xmm9,xmm9,xmm1 207 vmovups xmm15,XMMWORD[((144-128))+rcx] 208 vaesenc xmm10,xmm10,xmm1 209 vpsrldq xmm6,xmm6,8 210 vaesenc xmm11,xmm11,xmm1 211 vpxor xmm7,xmm7,xmm6 212 vaesenc xmm12,xmm12,xmm1 213 vpxor xmm4,xmm4,xmm0 214 movbe r13,QWORD[8+r14] 215 vaesenc xmm13,xmm13,xmm1 216 movbe r12,QWORD[r14] 217 vaesenc xmm14,xmm14,xmm1 218 vmovups xmm1,XMMWORD[((160-128))+rcx] 219 cmp ebp,11 220 jb NEAR $L$enc_tail 221 222 vaesenc xmm9,xmm9,xmm15 223 vaesenc xmm10,xmm10,xmm15 224 vaesenc xmm11,xmm11,xmm15 225 vaesenc xmm12,xmm12,xmm15 226 vaesenc xmm13,xmm13,xmm15 227 vaesenc xmm14,xmm14,xmm15 228 229 vaesenc xmm9,xmm9,xmm1 230 vaesenc xmm10,xmm10,xmm1 231 vaesenc xmm11,xmm11,xmm1 232 vaesenc xmm12,xmm12,xmm1 233 vaesenc xmm13,xmm13,xmm1 234 vmovups xmm15,XMMWORD[((176-128))+rcx] 235 vaesenc xmm14,xmm14,xmm1 236 vmovups xmm1,XMMWORD[((192-128))+rcx] 237 238 239 vaesenc xmm9,xmm9,xmm15 240 vaesenc xmm10,xmm10,xmm15 241 vaesenc xmm11,xmm11,xmm15 242 vaesenc xmm12,xmm12,xmm15 243 vaesenc xmm13,xmm13,xmm15 244 vaesenc xmm14,xmm14,xmm15 245 246 vaesenc xmm9,xmm9,xmm1 247 vaesenc xmm10,xmm10,xmm1 248 vaesenc xmm11,xmm11,xmm1 249 vaesenc xmm12,xmm12,xmm1 250 vaesenc xmm13,xmm13,xmm1 251 vmovups xmm15,XMMWORD[((208-128))+rcx] 252 vaesenc xmm14,xmm14,xmm1 253 vmovups xmm1,XMMWORD[((224-128))+rcx] 254 jmp NEAR $L$enc_tail 255 256ALIGN 32 257$L$handle_ctr32: 258 vmovdqu xmm0,XMMWORD[r11] 259 vpshufb xmm6,xmm1,xmm0 260 vmovdqu xmm5,XMMWORD[48+r11] 261 vpaddd xmm10,xmm6,XMMWORD[64+r11] 262 vpaddd xmm11,xmm6,xmm5 263 vmovdqu xmm3,XMMWORD[((0-32))+r9] 264 vpaddd xmm12,xmm10,xmm5 265 vpshufb xmm10,xmm10,xmm0 266 vpaddd xmm13,xmm11,xmm5 267 vpshufb xmm11,xmm11,xmm0 268 vpxor xmm10,xmm10,xmm15 269 vpaddd xmm14,xmm12,xmm5 270 vpshufb xmm12,xmm12,xmm0 271 vpxor xmm11,xmm11,xmm15 272 vpaddd xmm1,xmm13,xmm5 273 vpshufb xmm13,xmm13,xmm0 274 vpshufb xmm14,xmm14,xmm0 275 vpshufb xmm1,xmm1,xmm0 276 jmp NEAR $L$resume_ctr32 277 278ALIGN 32 279$L$enc_tail: 280 vaesenc xmm9,xmm9,xmm15 281 vmovdqu XMMWORD[(16+8)+rsp],xmm7 282 vpalignr xmm8,xmm4,xmm4,8 283 vaesenc xmm10,xmm10,xmm15 284 vpclmulqdq xmm4,xmm4,xmm3,0x10 285 vpxor xmm2,xmm1,XMMWORD[rdi] 286 vaesenc xmm11,xmm11,xmm15 287 vpxor xmm0,xmm1,XMMWORD[16+rdi] 288 vaesenc xmm12,xmm12,xmm15 289 vpxor xmm5,xmm1,XMMWORD[32+rdi] 290 vaesenc xmm13,xmm13,xmm15 291 vpxor xmm6,xmm1,XMMWORD[48+rdi] 292 vaesenc xmm14,xmm14,xmm15 293 vpxor xmm7,xmm1,XMMWORD[64+rdi] 294 vpxor xmm3,xmm1,XMMWORD[80+rdi] 295 vmovdqu xmm1,XMMWORD[r8] 296 297 vaesenclast xmm9,xmm9,xmm2 298 vmovdqu xmm2,XMMWORD[32+r11] 299 vaesenclast xmm10,xmm10,xmm0 300 vpaddb xmm0,xmm1,xmm2 301 mov QWORD[((112+8))+rsp],r13 302 lea rdi,[96+rdi] 303 vaesenclast xmm11,xmm11,xmm5 304 vpaddb xmm5,xmm0,xmm2 305 mov QWORD[((120+8))+rsp],r12 306 lea rsi,[96+rsi] 307 vmovdqu xmm15,XMMWORD[((0-128))+rcx] 308 vaesenclast xmm12,xmm12,xmm6 309 vpaddb xmm6,xmm5,xmm2 310 vaesenclast xmm13,xmm13,xmm7 311 vpaddb xmm7,xmm6,xmm2 312 vaesenclast xmm14,xmm14,xmm3 313 vpaddb xmm3,xmm7,xmm2 314 315 add r10,0x60 316 sub rdx,0x6 317 jc NEAR $L$6x_done 318 319 vmovups XMMWORD[(-96)+rsi],xmm9 320 vpxor xmm9,xmm1,xmm15 321 vmovups XMMWORD[(-80)+rsi],xmm10 322 vmovdqa xmm10,xmm0 323 vmovups XMMWORD[(-64)+rsi],xmm11 324 vmovdqa xmm11,xmm5 325 vmovups XMMWORD[(-48)+rsi],xmm12 326 vmovdqa xmm12,xmm6 327 vmovups XMMWORD[(-32)+rsi],xmm13 328 vmovdqa xmm13,xmm7 329 vmovups XMMWORD[(-16)+rsi],xmm14 330 vmovdqa xmm14,xmm3 331 vmovdqu xmm7,XMMWORD[((32+8))+rsp] 332 jmp NEAR $L$oop6x 333 334$L$6x_done: 335 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] 336 vpxor xmm8,xmm8,xmm4 337 338 DB 0F3h,0C3h ;repret 339 340 341global GFp_aesni_gcm_decrypt 342 343ALIGN 32 344GFp_aesni_gcm_decrypt: 345 mov QWORD[8+rsp],rdi ;WIN64 prologue 346 mov QWORD[16+rsp],rsi 347 mov rax,rsp 348$L$SEH_begin_GFp_aesni_gcm_decrypt: 349 mov rdi,rcx 350 mov rsi,rdx 351 mov rdx,r8 352 mov rcx,r9 353 mov r8,QWORD[40+rsp] 354 mov r9,QWORD[48+rsp] 355 356 357 358 xor r10,r10 359 360 361 362 cmp rdx,0x60 363 jb NEAR $L$gcm_dec_abort 364 365 lea rax,[rsp] 366 367 push rbx 368 369 push rbp 370 371 push r12 372 373 push r13 374 375 push r14 376 377 push r15 378 379 lea rsp,[((-168))+rsp] 380 movaps XMMWORD[(-216)+rax],xmm6 381 movaps XMMWORD[(-200)+rax],xmm7 382 movaps XMMWORD[(-184)+rax],xmm8 383 movaps XMMWORD[(-168)+rax],xmm9 384 movaps XMMWORD[(-152)+rax],xmm10 385 movaps XMMWORD[(-136)+rax],xmm11 386 movaps XMMWORD[(-120)+rax],xmm12 387 movaps XMMWORD[(-104)+rax],xmm13 388 movaps XMMWORD[(-88)+rax],xmm14 389 movaps XMMWORD[(-72)+rax],xmm15 390$L$gcm_dec_body: 391 vzeroupper 392 393 vmovdqu xmm1,XMMWORD[r8] 394 add rsp,-128 395 mov ebx,DWORD[12+r8] 396 lea r11,[$L$bswap_mask] 397 lea r14,[((-128))+rcx] 398 mov r15,0xf80 399 vmovdqu xmm8,XMMWORD[r9] 400 and rsp,-128 401 vmovdqu xmm0,XMMWORD[r11] 402 lea rcx,[128+rcx] 403 lea r9,[((32+32))+r9] 404 mov ebp,DWORD[((240-128))+rcx] 405 vpshufb xmm8,xmm8,xmm0 406 407 and r14,r15 408 and r15,rsp 409 sub r15,r14 410 jc NEAR $L$dec_no_key_aliasing 411 cmp r15,768 412 jnc NEAR $L$dec_no_key_aliasing 413 sub rsp,r15 414$L$dec_no_key_aliasing: 415 416 vmovdqu xmm7,XMMWORD[80+rdi] 417 lea r14,[rdi] 418 vmovdqu xmm4,XMMWORD[64+rdi] 419 420 421 422 423 424 425 426 lea r15,[((-192))+rdx*1+rdi] 427 428 vmovdqu xmm5,XMMWORD[48+rdi] 429 shr rdx,4 430 xor r10,r10 431 vmovdqu xmm6,XMMWORD[32+rdi] 432 vpshufb xmm7,xmm7,xmm0 433 vmovdqu xmm2,XMMWORD[16+rdi] 434 vpshufb xmm4,xmm4,xmm0 435 vmovdqu xmm3,XMMWORD[rdi] 436 vpshufb xmm5,xmm5,xmm0 437 vmovdqu XMMWORD[48+rsp],xmm4 438 vpshufb xmm6,xmm6,xmm0 439 vmovdqu XMMWORD[64+rsp],xmm5 440 vpshufb xmm2,xmm2,xmm0 441 vmovdqu XMMWORD[80+rsp],xmm6 442 vpshufb xmm3,xmm3,xmm0 443 vmovdqu XMMWORD[96+rsp],xmm2 444 vmovdqu XMMWORD[112+rsp],xmm3 445 446 call _aesni_ctr32_ghash_6x 447 448 vmovups XMMWORD[(-96)+rsi],xmm9 449 vmovups XMMWORD[(-80)+rsi],xmm10 450 vmovups XMMWORD[(-64)+rsi],xmm11 451 vmovups XMMWORD[(-48)+rsi],xmm12 452 vmovups XMMWORD[(-32)+rsi],xmm13 453 vmovups XMMWORD[(-16)+rsi],xmm14 454 455 vpshufb xmm8,xmm8,XMMWORD[r11] 456 vmovdqu XMMWORD[(-64)+r9],xmm8 457 458 vzeroupper 459 movaps xmm6,XMMWORD[((-216))+rax] 460 movaps xmm7,XMMWORD[((-200))+rax] 461 movaps xmm8,XMMWORD[((-184))+rax] 462 movaps xmm9,XMMWORD[((-168))+rax] 463 movaps xmm10,XMMWORD[((-152))+rax] 464 movaps xmm11,XMMWORD[((-136))+rax] 465 movaps xmm12,XMMWORD[((-120))+rax] 466 movaps xmm13,XMMWORD[((-104))+rax] 467 movaps xmm14,XMMWORD[((-88))+rax] 468 movaps xmm15,XMMWORD[((-72))+rax] 469 mov r15,QWORD[((-48))+rax] 470 471 mov r14,QWORD[((-40))+rax] 472 473 mov r13,QWORD[((-32))+rax] 474 475 mov r12,QWORD[((-24))+rax] 476 477 mov rbp,QWORD[((-16))+rax] 478 479 mov rbx,QWORD[((-8))+rax] 480 481 lea rsp,[rax] 482 483$L$gcm_dec_abort: 484 mov rax,r10 485 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 486 mov rsi,QWORD[16+rsp] 487 DB 0F3h,0C3h ;repret 488 489$L$SEH_end_GFp_aesni_gcm_decrypt: 490 491ALIGN 32 492_aesni_ctr32_6x: 493 494 vmovdqu xmm4,XMMWORD[((0-128))+rcx] 495 vmovdqu xmm2,XMMWORD[32+r11] 496 lea r13,[((-1))+rbp] 497 vmovups xmm15,XMMWORD[((16-128))+rcx] 498 lea r12,[((32-128))+rcx] 499 vpxor xmm9,xmm1,xmm4 500 add ebx,100663296 501 jc NEAR $L$handle_ctr32_2 502 vpaddb xmm10,xmm1,xmm2 503 vpaddb xmm11,xmm10,xmm2 504 vpxor xmm10,xmm10,xmm4 505 vpaddb xmm12,xmm11,xmm2 506 vpxor xmm11,xmm11,xmm4 507 vpaddb xmm13,xmm12,xmm2 508 vpxor xmm12,xmm12,xmm4 509 vpaddb xmm14,xmm13,xmm2 510 vpxor xmm13,xmm13,xmm4 511 vpaddb xmm1,xmm14,xmm2 512 vpxor xmm14,xmm14,xmm4 513 jmp NEAR $L$oop_ctr32 514 515ALIGN 16 516$L$oop_ctr32: 517 vaesenc xmm9,xmm9,xmm15 518 vaesenc xmm10,xmm10,xmm15 519 vaesenc xmm11,xmm11,xmm15 520 vaesenc xmm12,xmm12,xmm15 521 vaesenc xmm13,xmm13,xmm15 522 vaesenc xmm14,xmm14,xmm15 523 vmovups xmm15,XMMWORD[r12] 524 lea r12,[16+r12] 525 dec r13d 526 jnz NEAR $L$oop_ctr32 527 528 vmovdqu xmm3,XMMWORD[r12] 529 vaesenc xmm9,xmm9,xmm15 530 vpxor xmm4,xmm3,XMMWORD[rdi] 531 vaesenc xmm10,xmm10,xmm15 532 vpxor xmm5,xmm3,XMMWORD[16+rdi] 533 vaesenc xmm11,xmm11,xmm15 534 vpxor xmm6,xmm3,XMMWORD[32+rdi] 535 vaesenc xmm12,xmm12,xmm15 536 vpxor xmm8,xmm3,XMMWORD[48+rdi] 537 vaesenc xmm13,xmm13,xmm15 538 vpxor xmm2,xmm3,XMMWORD[64+rdi] 539 vaesenc xmm14,xmm14,xmm15 540 vpxor xmm3,xmm3,XMMWORD[80+rdi] 541 lea rdi,[96+rdi] 542 543 vaesenclast xmm9,xmm9,xmm4 544 vaesenclast xmm10,xmm10,xmm5 545 vaesenclast xmm11,xmm11,xmm6 546 vaesenclast xmm12,xmm12,xmm8 547 vaesenclast xmm13,xmm13,xmm2 548 vaesenclast xmm14,xmm14,xmm3 549 vmovups XMMWORD[rsi],xmm9 550 vmovups XMMWORD[16+rsi],xmm10 551 vmovups XMMWORD[32+rsi],xmm11 552 vmovups XMMWORD[48+rsi],xmm12 553 vmovups XMMWORD[64+rsi],xmm13 554 vmovups XMMWORD[80+rsi],xmm14 555 lea rsi,[96+rsi] 556 557 DB 0F3h,0C3h ;repret 558ALIGN 32 559$L$handle_ctr32_2: 560 vpshufb xmm6,xmm1,xmm0 561 vmovdqu xmm5,XMMWORD[48+r11] 562 vpaddd xmm10,xmm6,XMMWORD[64+r11] 563 vpaddd xmm11,xmm6,xmm5 564 vpaddd xmm12,xmm10,xmm5 565 vpshufb xmm10,xmm10,xmm0 566 vpaddd xmm13,xmm11,xmm5 567 vpshufb xmm11,xmm11,xmm0 568 vpxor xmm10,xmm10,xmm4 569 vpaddd xmm14,xmm12,xmm5 570 vpshufb xmm12,xmm12,xmm0 571 vpxor xmm11,xmm11,xmm4 572 vpaddd xmm1,xmm13,xmm5 573 vpshufb xmm13,xmm13,xmm0 574 vpxor xmm12,xmm12,xmm4 575 vpshufb xmm14,xmm14,xmm0 576 vpxor xmm13,xmm13,xmm4 577 vpshufb xmm1,xmm1,xmm0 578 vpxor xmm14,xmm14,xmm4 579 jmp NEAR $L$oop_ctr32 580 581 582 583global GFp_aesni_gcm_encrypt 584 585ALIGN 32 586GFp_aesni_gcm_encrypt: 587 mov QWORD[8+rsp],rdi ;WIN64 prologue 588 mov QWORD[16+rsp],rsi 589 mov rax,rsp 590$L$SEH_begin_GFp_aesni_gcm_encrypt: 591 mov rdi,rcx 592 mov rsi,rdx 593 mov rdx,r8 594 mov rcx,r9 595 mov r8,QWORD[40+rsp] 596 mov r9,QWORD[48+rsp] 597 598 599 600 xor r10,r10 601 602 603 604 605 cmp rdx,0x60*3 606 jb NEAR $L$gcm_enc_abort 607 608 lea rax,[rsp] 609 610 push rbx 611 612 push rbp 613 614 push r12 615 616 push r13 617 618 push r14 619 620 push r15 621 622 lea rsp,[((-168))+rsp] 623 movaps XMMWORD[(-216)+rax],xmm6 624 movaps XMMWORD[(-200)+rax],xmm7 625 movaps XMMWORD[(-184)+rax],xmm8 626 movaps XMMWORD[(-168)+rax],xmm9 627 movaps XMMWORD[(-152)+rax],xmm10 628 movaps XMMWORD[(-136)+rax],xmm11 629 movaps XMMWORD[(-120)+rax],xmm12 630 movaps XMMWORD[(-104)+rax],xmm13 631 movaps XMMWORD[(-88)+rax],xmm14 632 movaps XMMWORD[(-72)+rax],xmm15 633$L$gcm_enc_body: 634 vzeroupper 635 636 vmovdqu xmm1,XMMWORD[r8] 637 add rsp,-128 638 mov ebx,DWORD[12+r8] 639 lea r11,[$L$bswap_mask] 640 lea r14,[((-128))+rcx] 641 mov r15,0xf80 642 lea rcx,[128+rcx] 643 vmovdqu xmm0,XMMWORD[r11] 644 and rsp,-128 645 mov ebp,DWORD[((240-128))+rcx] 646 647 and r14,r15 648 and r15,rsp 649 sub r15,r14 650 jc NEAR $L$enc_no_key_aliasing 651 cmp r15,768 652 jnc NEAR $L$enc_no_key_aliasing 653 sub rsp,r15 654$L$enc_no_key_aliasing: 655 656 lea r14,[rsi] 657 658 659 660 661 662 663 664 665 lea r15,[((-192))+rdx*1+rsi] 666 667 shr rdx,4 668 669 call _aesni_ctr32_6x 670 vpshufb xmm8,xmm9,xmm0 671 vpshufb xmm2,xmm10,xmm0 672 vmovdqu XMMWORD[112+rsp],xmm8 673 vpshufb xmm4,xmm11,xmm0 674 vmovdqu XMMWORD[96+rsp],xmm2 675 vpshufb xmm5,xmm12,xmm0 676 vmovdqu XMMWORD[80+rsp],xmm4 677 vpshufb xmm6,xmm13,xmm0 678 vmovdqu XMMWORD[64+rsp],xmm5 679 vpshufb xmm7,xmm14,xmm0 680 vmovdqu XMMWORD[48+rsp],xmm6 681 682 call _aesni_ctr32_6x 683 684 vmovdqu xmm8,XMMWORD[r9] 685 lea r9,[((32+32))+r9] 686 sub rdx,12 687 mov r10,0x60*2 688 vpshufb xmm8,xmm8,xmm0 689 690 call _aesni_ctr32_ghash_6x 691 vmovdqu xmm7,XMMWORD[32+rsp] 692 vmovdqu xmm0,XMMWORD[r11] 693 vmovdqu xmm3,XMMWORD[((0-32))+r9] 694 vpunpckhqdq xmm1,xmm7,xmm7 695 vmovdqu xmm15,XMMWORD[((32-32))+r9] 696 vmovups XMMWORD[(-96)+rsi],xmm9 697 vpshufb xmm9,xmm9,xmm0 698 vpxor xmm1,xmm1,xmm7 699 vmovups XMMWORD[(-80)+rsi],xmm10 700 vpshufb xmm10,xmm10,xmm0 701 vmovups XMMWORD[(-64)+rsi],xmm11 702 vpshufb xmm11,xmm11,xmm0 703 vmovups XMMWORD[(-48)+rsi],xmm12 704 vpshufb xmm12,xmm12,xmm0 705 vmovups XMMWORD[(-32)+rsi],xmm13 706 vpshufb xmm13,xmm13,xmm0 707 vmovups XMMWORD[(-16)+rsi],xmm14 708 vpshufb xmm14,xmm14,xmm0 709 vmovdqu XMMWORD[16+rsp],xmm9 710 vmovdqu xmm6,XMMWORD[48+rsp] 711 vmovdqu xmm0,XMMWORD[((16-32))+r9] 712 vpunpckhqdq xmm2,xmm6,xmm6 713 vpclmulqdq xmm5,xmm7,xmm3,0x00 714 vpxor xmm2,xmm2,xmm6 715 vpclmulqdq xmm7,xmm7,xmm3,0x11 716 vpclmulqdq xmm1,xmm1,xmm15,0x00 717 718 vmovdqu xmm9,XMMWORD[64+rsp] 719 vpclmulqdq xmm4,xmm6,xmm0,0x00 720 vmovdqu xmm3,XMMWORD[((48-32))+r9] 721 vpxor xmm4,xmm4,xmm5 722 vpunpckhqdq xmm5,xmm9,xmm9 723 vpclmulqdq xmm6,xmm6,xmm0,0x11 724 vpxor xmm5,xmm5,xmm9 725 vpxor xmm6,xmm6,xmm7 726 vpclmulqdq xmm2,xmm2,xmm15,0x10 727 vmovdqu xmm15,XMMWORD[((80-32))+r9] 728 vpxor xmm2,xmm2,xmm1 729 730 vmovdqu xmm1,XMMWORD[80+rsp] 731 vpclmulqdq xmm7,xmm9,xmm3,0x00 732 vmovdqu xmm0,XMMWORD[((64-32))+r9] 733 vpxor xmm7,xmm7,xmm4 734 vpunpckhqdq xmm4,xmm1,xmm1 735 vpclmulqdq xmm9,xmm9,xmm3,0x11 736 vpxor xmm4,xmm4,xmm1 737 vpxor xmm9,xmm9,xmm6 738 vpclmulqdq xmm5,xmm5,xmm15,0x00 739 vpxor xmm5,xmm5,xmm2 740 741 vmovdqu xmm2,XMMWORD[96+rsp] 742 vpclmulqdq xmm6,xmm1,xmm0,0x00 743 vmovdqu xmm3,XMMWORD[((96-32))+r9] 744 vpxor xmm6,xmm6,xmm7 745 vpunpckhqdq xmm7,xmm2,xmm2 746 vpclmulqdq xmm1,xmm1,xmm0,0x11 747 vpxor xmm7,xmm7,xmm2 748 vpxor xmm1,xmm1,xmm9 749 vpclmulqdq xmm4,xmm4,xmm15,0x10 750 vmovdqu xmm15,XMMWORD[((128-32))+r9] 751 vpxor xmm4,xmm4,xmm5 752 753 vpxor xmm8,xmm8,XMMWORD[112+rsp] 754 vpclmulqdq xmm5,xmm2,xmm3,0x00 755 vmovdqu xmm0,XMMWORD[((112-32))+r9] 756 vpunpckhqdq xmm9,xmm8,xmm8 757 vpxor xmm5,xmm5,xmm6 758 vpclmulqdq xmm2,xmm2,xmm3,0x11 759 vpxor xmm9,xmm9,xmm8 760 vpxor xmm2,xmm2,xmm1 761 vpclmulqdq xmm7,xmm7,xmm15,0x00 762 vpxor xmm4,xmm7,xmm4 763 764 vpclmulqdq xmm6,xmm8,xmm0,0x00 765 vmovdqu xmm3,XMMWORD[((0-32))+r9] 766 vpunpckhqdq xmm1,xmm14,xmm14 767 vpclmulqdq xmm8,xmm8,xmm0,0x11 768 vpxor xmm1,xmm1,xmm14 769 vpxor xmm5,xmm6,xmm5 770 vpclmulqdq xmm9,xmm9,xmm15,0x10 771 vmovdqu xmm15,XMMWORD[((32-32))+r9] 772 vpxor xmm7,xmm8,xmm2 773 vpxor xmm6,xmm9,xmm4 774 775 vmovdqu xmm0,XMMWORD[((16-32))+r9] 776 vpxor xmm9,xmm7,xmm5 777 vpclmulqdq xmm4,xmm14,xmm3,0x00 778 vpxor xmm6,xmm6,xmm9 779 vpunpckhqdq xmm2,xmm13,xmm13 780 vpclmulqdq xmm14,xmm14,xmm3,0x11 781 vpxor xmm2,xmm2,xmm13 782 vpslldq xmm9,xmm6,8 783 vpclmulqdq xmm1,xmm1,xmm15,0x00 784 vpxor xmm8,xmm5,xmm9 785 vpsrldq xmm6,xmm6,8 786 vpxor xmm7,xmm7,xmm6 787 788 vpclmulqdq xmm5,xmm13,xmm0,0x00 789 vmovdqu xmm3,XMMWORD[((48-32))+r9] 790 vpxor xmm5,xmm5,xmm4 791 vpunpckhqdq xmm9,xmm12,xmm12 792 vpclmulqdq xmm13,xmm13,xmm0,0x11 793 vpxor xmm9,xmm9,xmm12 794 vpxor xmm13,xmm13,xmm14 795 vpalignr xmm14,xmm8,xmm8,8 796 vpclmulqdq xmm2,xmm2,xmm15,0x10 797 vmovdqu xmm15,XMMWORD[((80-32))+r9] 798 vpxor xmm2,xmm2,xmm1 799 800 vpclmulqdq xmm4,xmm12,xmm3,0x00 801 vmovdqu xmm0,XMMWORD[((64-32))+r9] 802 vpxor xmm4,xmm4,xmm5 803 vpunpckhqdq xmm1,xmm11,xmm11 804 vpclmulqdq xmm12,xmm12,xmm3,0x11 805 vpxor xmm1,xmm1,xmm11 806 vpxor xmm12,xmm12,xmm13 807 vxorps xmm7,xmm7,XMMWORD[16+rsp] 808 vpclmulqdq xmm9,xmm9,xmm15,0x00 809 vpxor xmm9,xmm9,xmm2 810 811 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 812 vxorps xmm8,xmm8,xmm14 813 814 vpclmulqdq xmm5,xmm11,xmm0,0x00 815 vmovdqu xmm3,XMMWORD[((96-32))+r9] 816 vpxor xmm5,xmm5,xmm4 817 vpunpckhqdq xmm2,xmm10,xmm10 818 vpclmulqdq xmm11,xmm11,xmm0,0x11 819 vpxor xmm2,xmm2,xmm10 820 vpalignr xmm14,xmm8,xmm8,8 821 vpxor xmm11,xmm11,xmm12 822 vpclmulqdq xmm1,xmm1,xmm15,0x10 823 vmovdqu xmm15,XMMWORD[((128-32))+r9] 824 vpxor xmm1,xmm1,xmm9 825 826 vxorps xmm14,xmm14,xmm7 827 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 828 vxorps xmm8,xmm8,xmm14 829 830 vpclmulqdq xmm4,xmm10,xmm3,0x00 831 vmovdqu xmm0,XMMWORD[((112-32))+r9] 832 vpxor xmm4,xmm4,xmm5 833 vpunpckhqdq xmm9,xmm8,xmm8 834 vpclmulqdq xmm10,xmm10,xmm3,0x11 835 vpxor xmm9,xmm9,xmm8 836 vpxor xmm10,xmm10,xmm11 837 vpclmulqdq xmm2,xmm2,xmm15,0x00 838 vpxor xmm2,xmm2,xmm1 839 840 vpclmulqdq xmm5,xmm8,xmm0,0x00 841 vpclmulqdq xmm7,xmm8,xmm0,0x11 842 vpxor xmm5,xmm5,xmm4 843 vpclmulqdq xmm6,xmm9,xmm15,0x10 844 vpxor xmm7,xmm7,xmm10 845 vpxor xmm6,xmm6,xmm2 846 847 vpxor xmm4,xmm7,xmm5 848 vpxor xmm6,xmm6,xmm4 849 vpslldq xmm1,xmm6,8 850 vmovdqu xmm3,XMMWORD[16+r11] 851 vpsrldq xmm6,xmm6,8 852 vpxor xmm8,xmm5,xmm1 853 vpxor xmm7,xmm7,xmm6 854 855 vpalignr xmm2,xmm8,xmm8,8 856 vpclmulqdq xmm8,xmm8,xmm3,0x10 857 vpxor xmm8,xmm8,xmm2 858 859 vpalignr xmm2,xmm8,xmm8,8 860 vpclmulqdq xmm8,xmm8,xmm3,0x10 861 vpxor xmm2,xmm2,xmm7 862 vpxor xmm8,xmm8,xmm2 863 vpshufb xmm8,xmm8,XMMWORD[r11] 864 vmovdqu XMMWORD[(-64)+r9],xmm8 865 866 vzeroupper 867 movaps xmm6,XMMWORD[((-216))+rax] 868 movaps xmm7,XMMWORD[((-200))+rax] 869 movaps xmm8,XMMWORD[((-184))+rax] 870 movaps xmm9,XMMWORD[((-168))+rax] 871 movaps xmm10,XMMWORD[((-152))+rax] 872 movaps xmm11,XMMWORD[((-136))+rax] 873 movaps xmm12,XMMWORD[((-120))+rax] 874 movaps xmm13,XMMWORD[((-104))+rax] 875 movaps xmm14,XMMWORD[((-88))+rax] 876 movaps xmm15,XMMWORD[((-72))+rax] 877 mov r15,QWORD[((-48))+rax] 878 879 mov r14,QWORD[((-40))+rax] 880 881 mov r13,QWORD[((-32))+rax] 882 883 mov r12,QWORD[((-24))+rax] 884 885 mov rbp,QWORD[((-16))+rax] 886 887 mov rbx,QWORD[((-8))+rax] 888 889 lea rsp,[rax] 890 891$L$gcm_enc_abort: 892 mov rax,r10 893 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 894 mov rsi,QWORD[16+rsp] 895 DB 0F3h,0C3h ;repret 896 897$L$SEH_end_GFp_aesni_gcm_encrypt: 898ALIGN 64 899$L$bswap_mask: 900DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 901$L$poly: 902DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 903$L$one_msb: 904DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 905$L$two_lsb: 906DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 907$L$one_lsb: 908DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 909DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108 910DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 911DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 912DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 913ALIGN 64 914EXTERN __imp_RtlVirtualUnwind 915 916ALIGN 16 917gcm_se_handler: 918 push rsi 919 push rdi 920 push rbx 921 push rbp 922 push r12 923 push r13 924 push r14 925 push r15 926 pushfq 927 sub rsp,64 928 929 mov rax,QWORD[120+r8] 930 mov rbx,QWORD[248+r8] 931 932 mov rsi,QWORD[8+r9] 933 mov r11,QWORD[56+r9] 934 935 mov r10d,DWORD[r11] 936 lea r10,[r10*1+rsi] 937 cmp rbx,r10 938 jb NEAR $L$common_seh_tail 939 940 mov rax,QWORD[152+r8] 941 942 mov r10d,DWORD[4+r11] 943 lea r10,[r10*1+rsi] 944 cmp rbx,r10 945 jae NEAR $L$common_seh_tail 946 947 mov rax,QWORD[120+r8] 948 949 mov r15,QWORD[((-48))+rax] 950 mov r14,QWORD[((-40))+rax] 951 mov r13,QWORD[((-32))+rax] 952 mov r12,QWORD[((-24))+rax] 953 mov rbp,QWORD[((-16))+rax] 954 mov rbx,QWORD[((-8))+rax] 955 mov QWORD[240+r8],r15 956 mov QWORD[232+r8],r14 957 mov QWORD[224+r8],r13 958 mov QWORD[216+r8],r12 959 mov QWORD[160+r8],rbp 960 mov QWORD[144+r8],rbx 961 962 lea rsi,[((-216))+rax] 963 lea rdi,[512+r8] 964 mov ecx,20 965 DD 0xa548f3fc 966 967$L$common_seh_tail: 968 mov rdi,QWORD[8+rax] 969 mov rsi,QWORD[16+rax] 970 mov QWORD[152+r8],rax 971 mov QWORD[168+r8],rsi 972 mov QWORD[176+r8],rdi 973 974 mov rdi,QWORD[40+r9] 975 mov rsi,r8 976 mov ecx,154 977 DD 0xa548f3fc 978 979 mov rsi,r9 980 xor rcx,rcx 981 mov rdx,QWORD[8+rsi] 982 mov r8,QWORD[rsi] 983 mov r9,QWORD[16+rsi] 984 mov r10,QWORD[40+rsi] 985 lea r11,[56+rsi] 986 lea r12,[24+rsi] 987 mov QWORD[32+rsp],r10 988 mov QWORD[40+rsp],r11 989 mov QWORD[48+rsp],r12 990 mov QWORD[56+rsp],rcx 991 call QWORD[__imp_RtlVirtualUnwind] 992 993 mov eax,1 994 add rsp,64 995 popfq 996 pop r15 997 pop r14 998 pop r13 999 pop r12 1000 pop rbp 1001 pop rbx 1002 pop rdi 1003 pop rsi 1004 DB 0F3h,0C3h ;repret 1005 1006 1007section .pdata rdata align=4 1008ALIGN 4 1009 DD $L$SEH_begin_GFp_aesni_gcm_decrypt wrt ..imagebase 1010 DD $L$SEH_end_GFp_aesni_gcm_decrypt wrt ..imagebase 1011 DD $L$SEH_gcm_dec_info wrt ..imagebase 1012 1013 DD $L$SEH_begin_GFp_aesni_gcm_encrypt wrt ..imagebase 1014 DD $L$SEH_end_GFp_aesni_gcm_encrypt wrt ..imagebase 1015 DD $L$SEH_GFp_gcm_enc_info wrt ..imagebase 1016section .xdata rdata align=8 1017ALIGN 8 1018$L$SEH_gcm_dec_info: 1019DB 9,0,0,0 1020 DD gcm_se_handler wrt ..imagebase 1021 DD $L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase 1022$L$SEH_GFp_gcm_enc_info: 1023DB 9,0,0,0 1024 DD gcm_se_handler wrt ..imagebase 1025 DD $L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase 1026