1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8 9%ifdef BORINGSSL_PREFIX 10%include "boringssl_prefix_symbols_nasm.inc" 11%endif 12section .text code align=64 13 14 15 16ALIGN 32 17_aesni_ctr32_ghash_6x: 18 19 vmovdqu xmm2,XMMWORD[32+r11] 20 sub rdx,6 21 vpxor xmm4,xmm4,xmm4 22 vmovdqu xmm15,XMMWORD[((0-128))+rcx] 23 vpaddb xmm10,xmm1,xmm2 24 vpaddb xmm11,xmm10,xmm2 25 vpaddb xmm12,xmm11,xmm2 26 vpaddb xmm13,xmm12,xmm2 27 vpaddb xmm14,xmm13,xmm2 28 vpxor xmm9,xmm1,xmm15 29 vmovdqu XMMWORD[(16+8)+rsp],xmm4 30 jmp NEAR $L$oop6x 31 32ALIGN 32 33$L$oop6x: 34 add ebx,100663296 35 jc NEAR $L$handle_ctr32 36 vmovdqu xmm3,XMMWORD[((0-32))+r9] 37 vpaddb xmm1,xmm14,xmm2 38 vpxor xmm10,xmm10,xmm15 39 vpxor xmm11,xmm11,xmm15 40 41$L$resume_ctr32: 42 vmovdqu XMMWORD[r8],xmm1 43 vpclmulqdq xmm5,xmm7,xmm3,0x10 44 vpxor xmm12,xmm12,xmm15 45 vmovups xmm2,XMMWORD[((16-128))+rcx] 46 vpclmulqdq xmm6,xmm7,xmm3,0x01 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 xor r12,r12 65 cmp r15,r14 66 67 vaesenc xmm9,xmm9,xmm2 68 vmovdqu xmm0,XMMWORD[((48+8))+rsp] 69 vpxor xmm13,xmm13,xmm15 70 vpclmulqdq xmm1,xmm7,xmm3,0x00 71 vaesenc xmm10,xmm10,xmm2 72 vpxor xmm14,xmm14,xmm15 73 setnc r12b 74 vpclmulqdq xmm7,xmm7,xmm3,0x11 75 vaesenc xmm11,xmm11,xmm2 76 vmovdqu xmm3,XMMWORD[((16-32))+r9] 77 neg r12 78 vaesenc xmm12,xmm12,xmm2 79 vpxor xmm6,xmm6,xmm5 80 vpclmulqdq xmm5,xmm0,xmm3,0x00 81 vpxor xmm8,xmm8,xmm4 82 vaesenc xmm13,xmm13,xmm2 83 vpxor xmm4,xmm1,xmm5 84 and r12,0x60 85 vmovups xmm15,XMMWORD[((32-128))+rcx] 86 vpclmulqdq xmm1,xmm0,xmm3,0x10 87 vaesenc xmm14,xmm14,xmm2 88 89 vpclmulqdq xmm2,xmm0,xmm3,0x01 90 lea r14,[r12*1+r14] 91 vaesenc xmm9,xmm9,xmm15 92 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] 93 vpclmulqdq xmm3,xmm0,xmm3,0x11 94 vmovdqu xmm0,XMMWORD[((64+8))+rsp] 95 vaesenc xmm10,xmm10,xmm15 96 movbe r13,QWORD[88+r14] 97 vaesenc xmm11,xmm11,xmm15 98 movbe r12,QWORD[80+r14] 99 vaesenc xmm12,xmm12,xmm15 100 mov QWORD[((32+8))+rsp],r13 101 vaesenc xmm13,xmm13,xmm15 102 mov QWORD[((40+8))+rsp],r12 103 vmovdqu xmm5,XMMWORD[((48-32))+r9] 104 vaesenc xmm14,xmm14,xmm15 105 106 vmovups xmm15,XMMWORD[((48-128))+rcx] 107 vpxor xmm6,xmm6,xmm1 108 vpclmulqdq xmm1,xmm0,xmm5,0x00 109 vaesenc xmm9,xmm9,xmm15 110 vpxor xmm6,xmm6,xmm2 111 vpclmulqdq xmm2,xmm0,xmm5,0x10 112 vaesenc xmm10,xmm10,xmm15 113 vpxor xmm7,xmm7,xmm3 114 vpclmulqdq xmm3,xmm0,xmm5,0x01 115 vaesenc xmm11,xmm11,xmm15 116 vpclmulqdq xmm5,xmm0,xmm5,0x11 117 vmovdqu xmm0,XMMWORD[((80+8))+rsp] 118 vaesenc xmm12,xmm12,xmm15 119 vaesenc xmm13,xmm13,xmm15 120 vpxor xmm4,xmm4,xmm1 121 vmovdqu xmm1,XMMWORD[((64-32))+r9] 122 vaesenc xmm14,xmm14,xmm15 123 124 vmovups xmm15,XMMWORD[((64-128))+rcx] 125 vpxor xmm6,xmm6,xmm2 126 vpclmulqdq xmm2,xmm0,xmm1,0x00 127 vaesenc xmm9,xmm9,xmm15 128 vpxor xmm6,xmm6,xmm3 129 vpclmulqdq xmm3,xmm0,xmm1,0x10 130 vaesenc xmm10,xmm10,xmm15 131 movbe r13,QWORD[72+r14] 132 vpxor xmm7,xmm7,xmm5 133 vpclmulqdq xmm5,xmm0,xmm1,0x01 134 vaesenc xmm11,xmm11,xmm15 135 movbe r12,QWORD[64+r14] 136 vpclmulqdq xmm1,xmm0,xmm1,0x11 137 vmovdqu xmm0,XMMWORD[((96+8))+rsp] 138 vaesenc xmm12,xmm12,xmm15 139 mov QWORD[((48+8))+rsp],r13 140 vaesenc xmm13,xmm13,xmm15 141 mov QWORD[((56+8))+rsp],r12 142 vpxor xmm4,xmm4,xmm2 143 vmovdqu xmm2,XMMWORD[((96-32))+r9] 144 vaesenc xmm14,xmm14,xmm15 145 146 vmovups xmm15,XMMWORD[((80-128))+rcx] 147 vpxor xmm6,xmm6,xmm3 148 vpclmulqdq xmm3,xmm0,xmm2,0x00 149 vaesenc xmm9,xmm9,xmm15 150 vpxor xmm6,xmm6,xmm5 151 vpclmulqdq xmm5,xmm0,xmm2,0x10 152 vaesenc xmm10,xmm10,xmm15 153 movbe r13,QWORD[56+r14] 154 vpxor xmm7,xmm7,xmm1 155 vpclmulqdq xmm1,xmm0,xmm2,0x01 156 vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp] 157 vaesenc xmm11,xmm11,xmm15 158 movbe r12,QWORD[48+r14] 159 vpclmulqdq xmm2,xmm0,xmm2,0x11 160 vaesenc xmm12,xmm12,xmm15 161 mov QWORD[((64+8))+rsp],r13 162 vaesenc xmm13,xmm13,xmm15 163 mov QWORD[((72+8))+rsp],r12 164 vpxor xmm4,xmm4,xmm3 165 vmovdqu xmm3,XMMWORD[((112-32))+r9] 166 vaesenc xmm14,xmm14,xmm15 167 168 vmovups xmm15,XMMWORD[((96-128))+rcx] 169 vpxor xmm6,xmm6,xmm5 170 vpclmulqdq xmm5,xmm8,xmm3,0x10 171 vaesenc xmm9,xmm9,xmm15 172 vpxor xmm6,xmm6,xmm1 173 vpclmulqdq xmm1,xmm8,xmm3,0x01 174 vaesenc xmm10,xmm10,xmm15 175 movbe r13,QWORD[40+r14] 176 vpxor xmm7,xmm7,xmm2 177 vpclmulqdq xmm2,xmm8,xmm3,0x00 178 vaesenc xmm11,xmm11,xmm15 179 movbe r12,QWORD[32+r14] 180 vpclmulqdq xmm8,xmm8,xmm3,0x11 181 vaesenc xmm12,xmm12,xmm15 182 mov QWORD[((80+8))+rsp],r13 183 vaesenc xmm13,xmm13,xmm15 184 mov QWORD[((88+8))+rsp],r12 185 vpxor xmm6,xmm6,xmm5 186 vaesenc xmm14,xmm14,xmm15 187 vpxor xmm6,xmm6,xmm1 188 189 vmovups xmm15,XMMWORD[((112-128))+rcx] 190 vpslldq xmm5,xmm6,8 191 vpxor xmm4,xmm4,xmm2 192 vmovdqu xmm3,XMMWORD[16+r11] 193 194 vaesenc xmm9,xmm9,xmm15 195 vpxor xmm7,xmm7,xmm8 196 vaesenc xmm10,xmm10,xmm15 197 vpxor xmm4,xmm4,xmm5 198 movbe r13,QWORD[24+r14] 199 vaesenc xmm11,xmm11,xmm15 200 movbe r12,QWORD[16+r14] 201 vpalignr xmm0,xmm4,xmm4,8 202 vpclmulqdq xmm4,xmm4,xmm3,0x10 203 mov QWORD[((96+8))+rsp],r13 204 vaesenc xmm12,xmm12,xmm15 205 mov QWORD[((104+8))+rsp],r12 206 vaesenc xmm13,xmm13,xmm15 207 vmovups xmm1,XMMWORD[((128-128))+rcx] 208 vaesenc xmm14,xmm14,xmm15 209 210 vaesenc xmm9,xmm9,xmm1 211 vmovups xmm15,XMMWORD[((144-128))+rcx] 212 vaesenc xmm10,xmm10,xmm1 213 vpsrldq xmm6,xmm6,8 214 vaesenc xmm11,xmm11,xmm1 215 vpxor xmm7,xmm7,xmm6 216 vaesenc xmm12,xmm12,xmm1 217 vpxor xmm4,xmm4,xmm0 218 movbe r13,QWORD[8+r14] 219 vaesenc xmm13,xmm13,xmm1 220 movbe r12,QWORD[r14] 221 vaesenc xmm14,xmm14,xmm1 222 vmovups xmm1,XMMWORD[((160-128))+rcx] 223 cmp ebp,11 224 jb NEAR $L$enc_tail 225 226 vaesenc xmm9,xmm9,xmm15 227 vaesenc xmm10,xmm10,xmm15 228 vaesenc xmm11,xmm11,xmm15 229 vaesenc xmm12,xmm12,xmm15 230 vaesenc xmm13,xmm13,xmm15 231 vaesenc xmm14,xmm14,xmm15 232 233 vaesenc xmm9,xmm9,xmm1 234 vaesenc xmm10,xmm10,xmm1 235 vaesenc xmm11,xmm11,xmm1 236 vaesenc xmm12,xmm12,xmm1 237 vaesenc xmm13,xmm13,xmm1 238 vmovups xmm15,XMMWORD[((176-128))+rcx] 239 vaesenc xmm14,xmm14,xmm1 240 vmovups xmm1,XMMWORD[((192-128))+rcx] 241 je NEAR $L$enc_tail 242 243 vaesenc xmm9,xmm9,xmm15 244 vaesenc xmm10,xmm10,xmm15 245 vaesenc xmm11,xmm11,xmm15 246 vaesenc xmm12,xmm12,xmm15 247 vaesenc xmm13,xmm13,xmm15 248 vaesenc xmm14,xmm14,xmm15 249 250 vaesenc xmm9,xmm9,xmm1 251 vaesenc xmm10,xmm10,xmm1 252 vaesenc xmm11,xmm11,xmm1 253 vaesenc xmm12,xmm12,xmm1 254 vaesenc xmm13,xmm13,xmm1 255 vmovups xmm15,XMMWORD[((208-128))+rcx] 256 vaesenc xmm14,xmm14,xmm1 257 vmovups xmm1,XMMWORD[((224-128))+rcx] 258 jmp NEAR $L$enc_tail 259 260ALIGN 32 261$L$handle_ctr32: 262 vmovdqu xmm0,XMMWORD[r11] 263 vpshufb xmm6,xmm1,xmm0 264 vmovdqu xmm5,XMMWORD[48+r11] 265 vpaddd xmm10,xmm6,XMMWORD[64+r11] 266 vpaddd xmm11,xmm6,xmm5 267 vmovdqu xmm3,XMMWORD[((0-32))+r9] 268 vpaddd xmm12,xmm10,xmm5 269 vpshufb xmm10,xmm10,xmm0 270 vpaddd xmm13,xmm11,xmm5 271 vpshufb xmm11,xmm11,xmm0 272 vpxor xmm10,xmm10,xmm15 273 vpaddd xmm14,xmm12,xmm5 274 vpshufb xmm12,xmm12,xmm0 275 vpxor xmm11,xmm11,xmm15 276 vpaddd xmm1,xmm13,xmm5 277 vpshufb xmm13,xmm13,xmm0 278 vpshufb xmm14,xmm14,xmm0 279 vpshufb xmm1,xmm1,xmm0 280 jmp NEAR $L$resume_ctr32 281 282ALIGN 32 283$L$enc_tail: 284 vaesenc xmm9,xmm9,xmm15 285 vmovdqu XMMWORD[(16+8)+rsp],xmm7 286 vpalignr xmm8,xmm4,xmm4,8 287 vaesenc xmm10,xmm10,xmm15 288 vpclmulqdq xmm4,xmm4,xmm3,0x10 289 vpxor xmm2,xmm1,XMMWORD[rdi] 290 vaesenc xmm11,xmm11,xmm15 291 vpxor xmm0,xmm1,XMMWORD[16+rdi] 292 vaesenc xmm12,xmm12,xmm15 293 vpxor xmm5,xmm1,XMMWORD[32+rdi] 294 vaesenc xmm13,xmm13,xmm15 295 vpxor xmm6,xmm1,XMMWORD[48+rdi] 296 vaesenc xmm14,xmm14,xmm15 297 vpxor xmm7,xmm1,XMMWORD[64+rdi] 298 vpxor xmm3,xmm1,XMMWORD[80+rdi] 299 vmovdqu xmm1,XMMWORD[r8] 300 301 vaesenclast xmm9,xmm9,xmm2 302 vmovdqu xmm2,XMMWORD[32+r11] 303 vaesenclast xmm10,xmm10,xmm0 304 vpaddb xmm0,xmm1,xmm2 305 mov QWORD[((112+8))+rsp],r13 306 lea rdi,[96+rdi] 307 vaesenclast xmm11,xmm11,xmm5 308 vpaddb xmm5,xmm0,xmm2 309 mov QWORD[((120+8))+rsp],r12 310 lea rsi,[96+rsi] 311 vmovdqu xmm15,XMMWORD[((0-128))+rcx] 312 vaesenclast xmm12,xmm12,xmm6 313 vpaddb xmm6,xmm5,xmm2 314 vaesenclast xmm13,xmm13,xmm7 315 vpaddb xmm7,xmm6,xmm2 316 vaesenclast xmm14,xmm14,xmm3 317 vpaddb xmm3,xmm7,xmm2 318 319 add r10,0x60 320 sub rdx,0x6 321 jc NEAR $L$6x_done 322 323 vmovups XMMWORD[(-96)+rsi],xmm9 324 vpxor xmm9,xmm1,xmm15 325 vmovups XMMWORD[(-80)+rsi],xmm10 326 vmovdqa xmm10,xmm0 327 vmovups XMMWORD[(-64)+rsi],xmm11 328 vmovdqa xmm11,xmm5 329 vmovups XMMWORD[(-48)+rsi],xmm12 330 vmovdqa xmm12,xmm6 331 vmovups XMMWORD[(-32)+rsi],xmm13 332 vmovdqa xmm13,xmm7 333 vmovups XMMWORD[(-16)+rsi],xmm14 334 vmovdqa xmm14,xmm3 335 vmovdqu xmm7,XMMWORD[((32+8))+rsp] 336 jmp NEAR $L$oop6x 337 338$L$6x_done: 339 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] 340 vpxor xmm8,xmm8,xmm4 341 342 DB 0F3h,0C3h ;repret 343 344 345global aesni_gcm_decrypt 346 347ALIGN 32 348aesni_gcm_decrypt: 349 mov QWORD[8+rsp],rdi ;WIN64 prologue 350 mov QWORD[16+rsp],rsi 351 mov rax,rsp 352$L$SEH_begin_aesni_gcm_decrypt: 353 mov rdi,rcx 354 mov rsi,rdx 355 mov rdx,r8 356 mov rcx,r9 357 mov r8,QWORD[40+rsp] 358 mov r9,QWORD[48+rsp] 359 360 361 362 xor r10,r10 363 364 365 366 cmp rdx,0x60 367 jb NEAR $L$gcm_dec_abort 368 369 lea rax,[rsp] 370 371 push rbx 372 373 push rbp 374 375 push r12 376 377 push r13 378 379 push r14 380 381 push r15 382 383 lea rsp,[((-168))+rsp] 384 movaps XMMWORD[(-216)+rax],xmm6 385 movaps XMMWORD[(-200)+rax],xmm7 386 movaps XMMWORD[(-184)+rax],xmm8 387 movaps XMMWORD[(-168)+rax],xmm9 388 movaps XMMWORD[(-152)+rax],xmm10 389 movaps XMMWORD[(-136)+rax],xmm11 390 movaps XMMWORD[(-120)+rax],xmm12 391 movaps XMMWORD[(-104)+rax],xmm13 392 movaps XMMWORD[(-88)+rax],xmm14 393 movaps XMMWORD[(-72)+rax],xmm15 394$L$gcm_dec_body: 395 vzeroupper 396 397 vmovdqu xmm1,XMMWORD[r8] 398 add rsp,-128 399 mov ebx,DWORD[12+r8] 400 lea r11,[$L$bswap_mask] 401 lea r14,[((-128))+rcx] 402 mov r15,0xf80 403 vmovdqu xmm8,XMMWORD[r9] 404 and rsp,-128 405 vmovdqu xmm0,XMMWORD[r11] 406 lea rcx,[128+rcx] 407 lea r9,[((32+32))+r9] 408 mov ebp,DWORD[((240-128))+rcx] 409 vpshufb xmm8,xmm8,xmm0 410 411 and r14,r15 412 and r15,rsp 413 sub r15,r14 414 jc NEAR $L$dec_no_key_aliasing 415 cmp r15,768 416 jnc NEAR $L$dec_no_key_aliasing 417 sub rsp,r15 418$L$dec_no_key_aliasing: 419 420 vmovdqu xmm7,XMMWORD[80+rdi] 421 lea r14,[rdi] 422 vmovdqu xmm4,XMMWORD[64+rdi] 423 424 425 426 427 428 429 430 lea r15,[((-192))+rdx*1+rdi] 431 432 vmovdqu xmm5,XMMWORD[48+rdi] 433 shr rdx,4 434 xor r10,r10 435 vmovdqu xmm6,XMMWORD[32+rdi] 436 vpshufb xmm7,xmm7,xmm0 437 vmovdqu xmm2,XMMWORD[16+rdi] 438 vpshufb xmm4,xmm4,xmm0 439 vmovdqu xmm3,XMMWORD[rdi] 440 vpshufb xmm5,xmm5,xmm0 441 vmovdqu XMMWORD[48+rsp],xmm4 442 vpshufb xmm6,xmm6,xmm0 443 vmovdqu XMMWORD[64+rsp],xmm5 444 vpshufb xmm2,xmm2,xmm0 445 vmovdqu XMMWORD[80+rsp],xmm6 446 vpshufb xmm3,xmm3,xmm0 447 vmovdqu XMMWORD[96+rsp],xmm2 448 vmovdqu XMMWORD[112+rsp],xmm3 449 450 call _aesni_ctr32_ghash_6x 451 452 vmovups XMMWORD[(-96)+rsi],xmm9 453 vmovups XMMWORD[(-80)+rsi],xmm10 454 vmovups XMMWORD[(-64)+rsi],xmm11 455 vmovups XMMWORD[(-48)+rsi],xmm12 456 vmovups XMMWORD[(-32)+rsi],xmm13 457 vmovups XMMWORD[(-16)+rsi],xmm14 458 459 vpshufb xmm8,xmm8,XMMWORD[r11] 460 vmovdqu XMMWORD[(-64)+r9],xmm8 461 462 vzeroupper 463 movaps xmm6,XMMWORD[((-216))+rax] 464 movaps xmm7,XMMWORD[((-200))+rax] 465 movaps xmm8,XMMWORD[((-184))+rax] 466 movaps xmm9,XMMWORD[((-168))+rax] 467 movaps xmm10,XMMWORD[((-152))+rax] 468 movaps xmm11,XMMWORD[((-136))+rax] 469 movaps xmm12,XMMWORD[((-120))+rax] 470 movaps xmm13,XMMWORD[((-104))+rax] 471 movaps xmm14,XMMWORD[((-88))+rax] 472 movaps xmm15,XMMWORD[((-72))+rax] 473 mov r15,QWORD[((-48))+rax] 474 475 mov r14,QWORD[((-40))+rax] 476 477 mov r13,QWORD[((-32))+rax] 478 479 mov r12,QWORD[((-24))+rax] 480 481 mov rbp,QWORD[((-16))+rax] 482 483 mov rbx,QWORD[((-8))+rax] 484 485 lea rsp,[rax] 486 487$L$gcm_dec_abort: 488 mov rax,r10 489 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 490 mov rsi,QWORD[16+rsp] 491 DB 0F3h,0C3h ;repret 492 493$L$SEH_end_aesni_gcm_decrypt: 494 495ALIGN 32 496_aesni_ctr32_6x: 497 498 vmovdqu xmm4,XMMWORD[((0-128))+rcx] 499 vmovdqu xmm2,XMMWORD[32+r11] 500 lea r13,[((-1))+rbp] 501 vmovups xmm15,XMMWORD[((16-128))+rcx] 502 lea r12,[((32-128))+rcx] 503 vpxor xmm9,xmm1,xmm4 504 add ebx,100663296 505 jc NEAR $L$handle_ctr32_2 506 vpaddb xmm10,xmm1,xmm2 507 vpaddb xmm11,xmm10,xmm2 508 vpxor xmm10,xmm10,xmm4 509 vpaddb xmm12,xmm11,xmm2 510 vpxor xmm11,xmm11,xmm4 511 vpaddb xmm13,xmm12,xmm2 512 vpxor xmm12,xmm12,xmm4 513 vpaddb xmm14,xmm13,xmm2 514 vpxor xmm13,xmm13,xmm4 515 vpaddb xmm1,xmm14,xmm2 516 vpxor xmm14,xmm14,xmm4 517 jmp NEAR $L$oop_ctr32 518 519ALIGN 16 520$L$oop_ctr32: 521 vaesenc xmm9,xmm9,xmm15 522 vaesenc xmm10,xmm10,xmm15 523 vaesenc xmm11,xmm11,xmm15 524 vaesenc xmm12,xmm12,xmm15 525 vaesenc xmm13,xmm13,xmm15 526 vaesenc xmm14,xmm14,xmm15 527 vmovups xmm15,XMMWORD[r12] 528 lea r12,[16+r12] 529 dec r13d 530 jnz NEAR $L$oop_ctr32 531 532 vmovdqu xmm3,XMMWORD[r12] 533 vaesenc xmm9,xmm9,xmm15 534 vpxor xmm4,xmm3,XMMWORD[rdi] 535 vaesenc xmm10,xmm10,xmm15 536 vpxor xmm5,xmm3,XMMWORD[16+rdi] 537 vaesenc xmm11,xmm11,xmm15 538 vpxor xmm6,xmm3,XMMWORD[32+rdi] 539 vaesenc xmm12,xmm12,xmm15 540 vpxor xmm8,xmm3,XMMWORD[48+rdi] 541 vaesenc xmm13,xmm13,xmm15 542 vpxor xmm2,xmm3,XMMWORD[64+rdi] 543 vaesenc xmm14,xmm14,xmm15 544 vpxor xmm3,xmm3,XMMWORD[80+rdi] 545 lea rdi,[96+rdi] 546 547 vaesenclast xmm9,xmm9,xmm4 548 vaesenclast xmm10,xmm10,xmm5 549 vaesenclast xmm11,xmm11,xmm6 550 vaesenclast xmm12,xmm12,xmm8 551 vaesenclast xmm13,xmm13,xmm2 552 vaesenclast xmm14,xmm14,xmm3 553 vmovups XMMWORD[rsi],xmm9 554 vmovups XMMWORD[16+rsi],xmm10 555 vmovups XMMWORD[32+rsi],xmm11 556 vmovups XMMWORD[48+rsi],xmm12 557 vmovups XMMWORD[64+rsi],xmm13 558 vmovups XMMWORD[80+rsi],xmm14 559 lea rsi,[96+rsi] 560 561 DB 0F3h,0C3h ;repret 562ALIGN 32 563$L$handle_ctr32_2: 564 vpshufb xmm6,xmm1,xmm0 565 vmovdqu xmm5,XMMWORD[48+r11] 566 vpaddd xmm10,xmm6,XMMWORD[64+r11] 567 vpaddd xmm11,xmm6,xmm5 568 vpaddd xmm12,xmm10,xmm5 569 vpshufb xmm10,xmm10,xmm0 570 vpaddd xmm13,xmm11,xmm5 571 vpshufb xmm11,xmm11,xmm0 572 vpxor xmm10,xmm10,xmm4 573 vpaddd xmm14,xmm12,xmm5 574 vpshufb xmm12,xmm12,xmm0 575 vpxor xmm11,xmm11,xmm4 576 vpaddd xmm1,xmm13,xmm5 577 vpshufb xmm13,xmm13,xmm0 578 vpxor xmm12,xmm12,xmm4 579 vpshufb xmm14,xmm14,xmm0 580 vpxor xmm13,xmm13,xmm4 581 vpshufb xmm1,xmm1,xmm0 582 vpxor xmm14,xmm14,xmm4 583 jmp NEAR $L$oop_ctr32 584 585 586 587global aesni_gcm_encrypt 588 589ALIGN 32 590aesni_gcm_encrypt: 591 mov QWORD[8+rsp],rdi ;WIN64 prologue 592 mov QWORD[16+rsp],rsi 593 mov rax,rsp 594$L$SEH_begin_aesni_gcm_encrypt: 595 mov rdi,rcx 596 mov rsi,rdx 597 mov rdx,r8 598 mov rcx,r9 599 mov r8,QWORD[40+rsp] 600 mov r9,QWORD[48+rsp] 601 602 603 604%ifndef NDEBUG 605%ifndef BORINGSSL_FIPS 606EXTERN BORINGSSL_function_hit 607 mov BYTE[((BORINGSSL_function_hit+2))],1 608%endif 609%endif 610 xor r10,r10 611 612 613 614 615 cmp rdx,0x60*3 616 jb NEAR $L$gcm_enc_abort 617 618 lea rax,[rsp] 619 620 push rbx 621 622 push rbp 623 624 push r12 625 626 push r13 627 628 push r14 629 630 push r15 631 632 lea rsp,[((-168))+rsp] 633 movaps XMMWORD[(-216)+rax],xmm6 634 movaps XMMWORD[(-200)+rax],xmm7 635 movaps XMMWORD[(-184)+rax],xmm8 636 movaps XMMWORD[(-168)+rax],xmm9 637 movaps XMMWORD[(-152)+rax],xmm10 638 movaps XMMWORD[(-136)+rax],xmm11 639 movaps XMMWORD[(-120)+rax],xmm12 640 movaps XMMWORD[(-104)+rax],xmm13 641 movaps XMMWORD[(-88)+rax],xmm14 642 movaps XMMWORD[(-72)+rax],xmm15 643$L$gcm_enc_body: 644 vzeroupper 645 646 vmovdqu xmm1,XMMWORD[r8] 647 add rsp,-128 648 mov ebx,DWORD[12+r8] 649 lea r11,[$L$bswap_mask] 650 lea r14,[((-128))+rcx] 651 mov r15,0xf80 652 lea rcx,[128+rcx] 653 vmovdqu xmm0,XMMWORD[r11] 654 and rsp,-128 655 mov ebp,DWORD[((240-128))+rcx] 656 657 and r14,r15 658 and r15,rsp 659 sub r15,r14 660 jc NEAR $L$enc_no_key_aliasing 661 cmp r15,768 662 jnc NEAR $L$enc_no_key_aliasing 663 sub rsp,r15 664$L$enc_no_key_aliasing: 665 666 lea r14,[rsi] 667 668 669 670 671 672 673 674 675 lea r15,[((-192))+rdx*1+rsi] 676 677 shr rdx,4 678 679 call _aesni_ctr32_6x 680 vpshufb xmm8,xmm9,xmm0 681 vpshufb xmm2,xmm10,xmm0 682 vmovdqu XMMWORD[112+rsp],xmm8 683 vpshufb xmm4,xmm11,xmm0 684 vmovdqu XMMWORD[96+rsp],xmm2 685 vpshufb xmm5,xmm12,xmm0 686 vmovdqu XMMWORD[80+rsp],xmm4 687 vpshufb xmm6,xmm13,xmm0 688 vmovdqu XMMWORD[64+rsp],xmm5 689 vpshufb xmm7,xmm14,xmm0 690 vmovdqu XMMWORD[48+rsp],xmm6 691 692 call _aesni_ctr32_6x 693 694 vmovdqu xmm8,XMMWORD[r9] 695 lea r9,[((32+32))+r9] 696 sub rdx,12 697 mov r10,0x60*2 698 vpshufb xmm8,xmm8,xmm0 699 700 call _aesni_ctr32_ghash_6x 701 vmovdqu xmm7,XMMWORD[32+rsp] 702 vmovdqu xmm0,XMMWORD[r11] 703 vmovdqu xmm3,XMMWORD[((0-32))+r9] 704 vpunpckhqdq xmm1,xmm7,xmm7 705 vmovdqu xmm15,XMMWORD[((32-32))+r9] 706 vmovups XMMWORD[(-96)+rsi],xmm9 707 vpshufb xmm9,xmm9,xmm0 708 vpxor xmm1,xmm1,xmm7 709 vmovups XMMWORD[(-80)+rsi],xmm10 710 vpshufb xmm10,xmm10,xmm0 711 vmovups XMMWORD[(-64)+rsi],xmm11 712 vpshufb xmm11,xmm11,xmm0 713 vmovups XMMWORD[(-48)+rsi],xmm12 714 vpshufb xmm12,xmm12,xmm0 715 vmovups XMMWORD[(-32)+rsi],xmm13 716 vpshufb xmm13,xmm13,xmm0 717 vmovups XMMWORD[(-16)+rsi],xmm14 718 vpshufb xmm14,xmm14,xmm0 719 vmovdqu XMMWORD[16+rsp],xmm9 720 vmovdqu xmm6,XMMWORD[48+rsp] 721 vmovdqu xmm0,XMMWORD[((16-32))+r9] 722 vpunpckhqdq xmm2,xmm6,xmm6 723 vpclmulqdq xmm5,xmm7,xmm3,0x00 724 vpxor xmm2,xmm2,xmm6 725 vpclmulqdq xmm7,xmm7,xmm3,0x11 726 vpclmulqdq xmm1,xmm1,xmm15,0x00 727 728 vmovdqu xmm9,XMMWORD[64+rsp] 729 vpclmulqdq xmm4,xmm6,xmm0,0x00 730 vmovdqu xmm3,XMMWORD[((48-32))+r9] 731 vpxor xmm4,xmm4,xmm5 732 vpunpckhqdq xmm5,xmm9,xmm9 733 vpclmulqdq xmm6,xmm6,xmm0,0x11 734 vpxor xmm5,xmm5,xmm9 735 vpxor xmm6,xmm6,xmm7 736 vpclmulqdq xmm2,xmm2,xmm15,0x10 737 vmovdqu xmm15,XMMWORD[((80-32))+r9] 738 vpxor xmm2,xmm2,xmm1 739 740 vmovdqu xmm1,XMMWORD[80+rsp] 741 vpclmulqdq xmm7,xmm9,xmm3,0x00 742 vmovdqu xmm0,XMMWORD[((64-32))+r9] 743 vpxor xmm7,xmm7,xmm4 744 vpunpckhqdq xmm4,xmm1,xmm1 745 vpclmulqdq xmm9,xmm9,xmm3,0x11 746 vpxor xmm4,xmm4,xmm1 747 vpxor xmm9,xmm9,xmm6 748 vpclmulqdq xmm5,xmm5,xmm15,0x00 749 vpxor xmm5,xmm5,xmm2 750 751 vmovdqu xmm2,XMMWORD[96+rsp] 752 vpclmulqdq xmm6,xmm1,xmm0,0x00 753 vmovdqu xmm3,XMMWORD[((96-32))+r9] 754 vpxor xmm6,xmm6,xmm7 755 vpunpckhqdq xmm7,xmm2,xmm2 756 vpclmulqdq xmm1,xmm1,xmm0,0x11 757 vpxor xmm7,xmm7,xmm2 758 vpxor xmm1,xmm1,xmm9 759 vpclmulqdq xmm4,xmm4,xmm15,0x10 760 vmovdqu xmm15,XMMWORD[((128-32))+r9] 761 vpxor xmm4,xmm4,xmm5 762 763 vpxor xmm8,xmm8,XMMWORD[112+rsp] 764 vpclmulqdq xmm5,xmm2,xmm3,0x00 765 vmovdqu xmm0,XMMWORD[((112-32))+r9] 766 vpunpckhqdq xmm9,xmm8,xmm8 767 vpxor xmm5,xmm5,xmm6 768 vpclmulqdq xmm2,xmm2,xmm3,0x11 769 vpxor xmm9,xmm9,xmm8 770 vpxor xmm2,xmm2,xmm1 771 vpclmulqdq xmm7,xmm7,xmm15,0x00 772 vpxor xmm4,xmm7,xmm4 773 774 vpclmulqdq xmm6,xmm8,xmm0,0x00 775 vmovdqu xmm3,XMMWORD[((0-32))+r9] 776 vpunpckhqdq xmm1,xmm14,xmm14 777 vpclmulqdq xmm8,xmm8,xmm0,0x11 778 vpxor xmm1,xmm1,xmm14 779 vpxor xmm5,xmm6,xmm5 780 vpclmulqdq xmm9,xmm9,xmm15,0x10 781 vmovdqu xmm15,XMMWORD[((32-32))+r9] 782 vpxor xmm7,xmm8,xmm2 783 vpxor xmm6,xmm9,xmm4 784 785 vmovdqu xmm0,XMMWORD[((16-32))+r9] 786 vpxor xmm9,xmm7,xmm5 787 vpclmulqdq xmm4,xmm14,xmm3,0x00 788 vpxor xmm6,xmm6,xmm9 789 vpunpckhqdq xmm2,xmm13,xmm13 790 vpclmulqdq xmm14,xmm14,xmm3,0x11 791 vpxor xmm2,xmm2,xmm13 792 vpslldq xmm9,xmm6,8 793 vpclmulqdq xmm1,xmm1,xmm15,0x00 794 vpxor xmm8,xmm5,xmm9 795 vpsrldq xmm6,xmm6,8 796 vpxor xmm7,xmm7,xmm6 797 798 vpclmulqdq xmm5,xmm13,xmm0,0x00 799 vmovdqu xmm3,XMMWORD[((48-32))+r9] 800 vpxor xmm5,xmm5,xmm4 801 vpunpckhqdq xmm9,xmm12,xmm12 802 vpclmulqdq xmm13,xmm13,xmm0,0x11 803 vpxor xmm9,xmm9,xmm12 804 vpxor xmm13,xmm13,xmm14 805 vpalignr xmm14,xmm8,xmm8,8 806 vpclmulqdq xmm2,xmm2,xmm15,0x10 807 vmovdqu xmm15,XMMWORD[((80-32))+r9] 808 vpxor xmm2,xmm2,xmm1 809 810 vpclmulqdq xmm4,xmm12,xmm3,0x00 811 vmovdqu xmm0,XMMWORD[((64-32))+r9] 812 vpxor xmm4,xmm4,xmm5 813 vpunpckhqdq xmm1,xmm11,xmm11 814 vpclmulqdq xmm12,xmm12,xmm3,0x11 815 vpxor xmm1,xmm1,xmm11 816 vpxor xmm12,xmm12,xmm13 817 vxorps xmm7,xmm7,XMMWORD[16+rsp] 818 vpclmulqdq xmm9,xmm9,xmm15,0x00 819 vpxor xmm9,xmm9,xmm2 820 821 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 822 vxorps xmm8,xmm8,xmm14 823 824 vpclmulqdq xmm5,xmm11,xmm0,0x00 825 vmovdqu xmm3,XMMWORD[((96-32))+r9] 826 vpxor xmm5,xmm5,xmm4 827 vpunpckhqdq xmm2,xmm10,xmm10 828 vpclmulqdq xmm11,xmm11,xmm0,0x11 829 vpxor xmm2,xmm2,xmm10 830 vpalignr xmm14,xmm8,xmm8,8 831 vpxor xmm11,xmm11,xmm12 832 vpclmulqdq xmm1,xmm1,xmm15,0x10 833 vmovdqu xmm15,XMMWORD[((128-32))+r9] 834 vpxor xmm1,xmm1,xmm9 835 836 vxorps xmm14,xmm14,xmm7 837 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 838 vxorps xmm8,xmm8,xmm14 839 840 vpclmulqdq xmm4,xmm10,xmm3,0x00 841 vmovdqu xmm0,XMMWORD[((112-32))+r9] 842 vpxor xmm4,xmm4,xmm5 843 vpunpckhqdq xmm9,xmm8,xmm8 844 vpclmulqdq xmm10,xmm10,xmm3,0x11 845 vpxor xmm9,xmm9,xmm8 846 vpxor xmm10,xmm10,xmm11 847 vpclmulqdq xmm2,xmm2,xmm15,0x00 848 vpxor xmm2,xmm2,xmm1 849 850 vpclmulqdq xmm5,xmm8,xmm0,0x00 851 vpclmulqdq xmm7,xmm8,xmm0,0x11 852 vpxor xmm5,xmm5,xmm4 853 vpclmulqdq xmm6,xmm9,xmm15,0x10 854 vpxor xmm7,xmm7,xmm10 855 vpxor xmm6,xmm6,xmm2 856 857 vpxor xmm4,xmm7,xmm5 858 vpxor xmm6,xmm6,xmm4 859 vpslldq xmm1,xmm6,8 860 vmovdqu xmm3,XMMWORD[16+r11] 861 vpsrldq xmm6,xmm6,8 862 vpxor xmm8,xmm5,xmm1 863 vpxor xmm7,xmm7,xmm6 864 865 vpalignr xmm2,xmm8,xmm8,8 866 vpclmulqdq xmm8,xmm8,xmm3,0x10 867 vpxor xmm8,xmm8,xmm2 868 869 vpalignr xmm2,xmm8,xmm8,8 870 vpclmulqdq xmm8,xmm8,xmm3,0x10 871 vpxor xmm2,xmm2,xmm7 872 vpxor xmm8,xmm8,xmm2 873 vpshufb xmm8,xmm8,XMMWORD[r11] 874 vmovdqu XMMWORD[(-64)+r9],xmm8 875 876 vzeroupper 877 movaps xmm6,XMMWORD[((-216))+rax] 878 movaps xmm7,XMMWORD[((-200))+rax] 879 movaps xmm8,XMMWORD[((-184))+rax] 880 movaps xmm9,XMMWORD[((-168))+rax] 881 movaps xmm10,XMMWORD[((-152))+rax] 882 movaps xmm11,XMMWORD[((-136))+rax] 883 movaps xmm12,XMMWORD[((-120))+rax] 884 movaps xmm13,XMMWORD[((-104))+rax] 885 movaps xmm14,XMMWORD[((-88))+rax] 886 movaps xmm15,XMMWORD[((-72))+rax] 887 mov r15,QWORD[((-48))+rax] 888 889 mov r14,QWORD[((-40))+rax] 890 891 mov r13,QWORD[((-32))+rax] 892 893 mov r12,QWORD[((-24))+rax] 894 895 mov rbp,QWORD[((-16))+rax] 896 897 mov rbx,QWORD[((-8))+rax] 898 899 lea rsp,[rax] 900 901$L$gcm_enc_abort: 902 mov rax,r10 903 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 904 mov rsi,QWORD[16+rsp] 905 DB 0F3h,0C3h ;repret 906 907$L$SEH_end_aesni_gcm_encrypt: 908ALIGN 64 909$L$bswap_mask: 910DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 911$L$poly: 912DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 913$L$one_msb: 914DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 915$L$two_lsb: 916DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 917$L$one_lsb: 918DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 919DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108 920DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 921DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 922DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 923ALIGN 64 924EXTERN __imp_RtlVirtualUnwind 925 926ALIGN 16 927gcm_se_handler: 928 push rsi 929 push rdi 930 push rbx 931 push rbp 932 push r12 933 push r13 934 push r14 935 push r15 936 pushfq 937 sub rsp,64 938 939 mov rax,QWORD[120+r8] 940 mov rbx,QWORD[248+r8] 941 942 mov rsi,QWORD[8+r9] 943 mov r11,QWORD[56+r9] 944 945 mov r10d,DWORD[r11] 946 lea r10,[r10*1+rsi] 947 cmp rbx,r10 948 jb NEAR $L$common_seh_tail 949 950 mov rax,QWORD[152+r8] 951 952 mov r10d,DWORD[4+r11] 953 lea r10,[r10*1+rsi] 954 cmp rbx,r10 955 jae NEAR $L$common_seh_tail 956 957 mov rax,QWORD[120+r8] 958 959 mov r15,QWORD[((-48))+rax] 960 mov r14,QWORD[((-40))+rax] 961 mov r13,QWORD[((-32))+rax] 962 mov r12,QWORD[((-24))+rax] 963 mov rbp,QWORD[((-16))+rax] 964 mov rbx,QWORD[((-8))+rax] 965 mov QWORD[240+r8],r15 966 mov QWORD[232+r8],r14 967 mov QWORD[224+r8],r13 968 mov QWORD[216+r8],r12 969 mov QWORD[160+r8],rbp 970 mov QWORD[144+r8],rbx 971 972 lea rsi,[((-216))+rax] 973 lea rdi,[512+r8] 974 mov ecx,20 975 DD 0xa548f3fc 976 977$L$common_seh_tail: 978 mov rdi,QWORD[8+rax] 979 mov rsi,QWORD[16+rax] 980 mov QWORD[152+r8],rax 981 mov QWORD[168+r8],rsi 982 mov QWORD[176+r8],rdi 983 984 mov rdi,QWORD[40+r9] 985 mov rsi,r8 986 mov ecx,154 987 DD 0xa548f3fc 988 989 mov rsi,r9 990 xor rcx,rcx 991 mov rdx,QWORD[8+rsi] 992 mov r8,QWORD[rsi] 993 mov r9,QWORD[16+rsi] 994 mov r10,QWORD[40+rsi] 995 lea r11,[56+rsi] 996 lea r12,[24+rsi] 997 mov QWORD[32+rsp],r10 998 mov QWORD[40+rsp],r11 999 mov QWORD[48+rsp],r12 1000 mov QWORD[56+rsp],rcx 1001 call QWORD[__imp_RtlVirtualUnwind] 1002 1003 mov eax,1 1004 add rsp,64 1005 popfq 1006 pop r15 1007 pop r14 1008 pop r13 1009 pop r12 1010 pop rbp 1011 pop rbx 1012 pop rdi 1013 pop rsi 1014 DB 0F3h,0C3h ;repret 1015 1016 1017section .pdata rdata align=4 1018ALIGN 4 1019 DD $L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase 1020 DD $L$SEH_end_aesni_gcm_decrypt wrt ..imagebase 1021 DD $L$SEH_gcm_dec_info wrt ..imagebase 1022 1023 DD $L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase 1024 DD $L$SEH_end_aesni_gcm_encrypt wrt ..imagebase 1025 DD $L$SEH_gcm_enc_info wrt ..imagebase 1026section .xdata rdata align=8 1027ALIGN 8 1028$L$SEH_gcm_dec_info: 1029DB 9,0,0,0 1030 DD gcm_se_handler wrt ..imagebase 1031 DD $L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase 1032$L$SEH_gcm_enc_info: 1033DB 9,0,0,0 1034 DD gcm_se_handler wrt ..imagebase 1035 DD $L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase 1036