1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8 9%ifdef BORINGSSL_PREFIX 10%include "boringssl_prefix_symbols_nasm.inc" 11%endif 12section .text code align=64 13 14 15 16ALIGN 32 17_aesni_ctr32_ghash_6x: 18 19 vmovdqu xmm2,XMMWORD[32+r11] 20 sub rdx,6 21 vpxor xmm4,xmm4,xmm4 22 vmovdqu xmm15,XMMWORD[((0-128))+rcx] 23 vpaddb xmm10,xmm1,xmm2 24 vpaddb xmm11,xmm10,xmm2 25 vpaddb xmm12,xmm11,xmm2 26 vpaddb xmm13,xmm12,xmm2 27 vpaddb xmm14,xmm13,xmm2 28 vpxor xmm9,xmm1,xmm15 29 vmovdqu XMMWORD[(16+8)+rsp],xmm4 30 jmp NEAR $L$oop6x 31 32ALIGN 32 33$L$oop6x: 34 add ebx,100663296 35 jc NEAR $L$handle_ctr32 36 vmovdqu xmm3,XMMWORD[((0-32))+r9] 37 vpaddb xmm1,xmm14,xmm2 38 vpxor xmm10,xmm10,xmm15 39 vpxor xmm11,xmm11,xmm15 40 41$L$resume_ctr32: 42 vmovdqu XMMWORD[r8],xmm1 43 vpclmulqdq xmm5,xmm7,xmm3,0x10 44 vpxor xmm12,xmm12,xmm15 45 vmovups xmm2,XMMWORD[((16-128))+rcx] 46 vpclmulqdq xmm6,xmm7,xmm3,0x01 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 xor r12,r12 65 cmp r15,r14 66 67 vaesenc xmm9,xmm9,xmm2 68 vmovdqu xmm0,XMMWORD[((48+8))+rsp] 69 vpxor xmm13,xmm13,xmm15 70 vpclmulqdq xmm1,xmm7,xmm3,0x00 71 vaesenc xmm10,xmm10,xmm2 72 vpxor xmm14,xmm14,xmm15 73 setnc r12b 74 vpclmulqdq xmm7,xmm7,xmm3,0x11 75 vaesenc xmm11,xmm11,xmm2 76 vmovdqu xmm3,XMMWORD[((16-32))+r9] 77 neg r12 78 vaesenc xmm12,xmm12,xmm2 79 vpxor xmm6,xmm6,xmm5 80 vpclmulqdq xmm5,xmm0,xmm3,0x00 81 vpxor xmm8,xmm8,xmm4 82 vaesenc xmm13,xmm13,xmm2 83 vpxor xmm4,xmm1,xmm5 84 and r12,0x60 85 vmovups xmm15,XMMWORD[((32-128))+rcx] 86 vpclmulqdq xmm1,xmm0,xmm3,0x10 87 vaesenc xmm14,xmm14,xmm2 88 89 vpclmulqdq xmm2,xmm0,xmm3,0x01 90 lea r14,[r12*1+r14] 91 vaesenc xmm9,xmm9,xmm15 92 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] 93 vpclmulqdq xmm3,xmm0,xmm3,0x11 94 vmovdqu xmm0,XMMWORD[((64+8))+rsp] 95 vaesenc xmm10,xmm10,xmm15 96 movbe r13,QWORD[88+r14] 97 vaesenc xmm11,xmm11,xmm15 98 movbe r12,QWORD[80+r14] 99 vaesenc xmm12,xmm12,xmm15 100 mov QWORD[((32+8))+rsp],r13 101 vaesenc xmm13,xmm13,xmm15 102 mov QWORD[((40+8))+rsp],r12 103 vmovdqu xmm5,XMMWORD[((48-32))+r9] 104 vaesenc xmm14,xmm14,xmm15 105 106 vmovups xmm15,XMMWORD[((48-128))+rcx] 107 vpxor xmm6,xmm6,xmm1 108 vpclmulqdq xmm1,xmm0,xmm5,0x00 109 vaesenc xmm9,xmm9,xmm15 110 vpxor xmm6,xmm6,xmm2 111 vpclmulqdq xmm2,xmm0,xmm5,0x10 112 vaesenc xmm10,xmm10,xmm15 113 vpxor xmm7,xmm7,xmm3 114 vpclmulqdq xmm3,xmm0,xmm5,0x01 115 vaesenc xmm11,xmm11,xmm15 116 vpclmulqdq xmm5,xmm0,xmm5,0x11 117 vmovdqu xmm0,XMMWORD[((80+8))+rsp] 118 vaesenc xmm12,xmm12,xmm15 119 vaesenc xmm13,xmm13,xmm15 120 vpxor xmm4,xmm4,xmm1 121 vmovdqu xmm1,XMMWORD[((64-32))+r9] 122 vaesenc xmm14,xmm14,xmm15 123 124 vmovups xmm15,XMMWORD[((64-128))+rcx] 125 vpxor xmm6,xmm6,xmm2 126 vpclmulqdq xmm2,xmm0,xmm1,0x00 127 vaesenc xmm9,xmm9,xmm15 128 vpxor xmm6,xmm6,xmm3 129 vpclmulqdq xmm3,xmm0,xmm1,0x10 130 vaesenc xmm10,xmm10,xmm15 131 movbe r13,QWORD[72+r14] 132 vpxor xmm7,xmm7,xmm5 133 vpclmulqdq xmm5,xmm0,xmm1,0x01 134 vaesenc xmm11,xmm11,xmm15 135 movbe r12,QWORD[64+r14] 136 vpclmulqdq xmm1,xmm0,xmm1,0x11 137 vmovdqu xmm0,XMMWORD[((96+8))+rsp] 138 vaesenc xmm12,xmm12,xmm15 139 mov QWORD[((48+8))+rsp],r13 140 vaesenc xmm13,xmm13,xmm15 141 mov QWORD[((56+8))+rsp],r12 142 vpxor xmm4,xmm4,xmm2 143 vmovdqu xmm2,XMMWORD[((96-32))+r9] 144 vaesenc xmm14,xmm14,xmm15 145 146 vmovups xmm15,XMMWORD[((80-128))+rcx] 147 vpxor xmm6,xmm6,xmm3 148 vpclmulqdq xmm3,xmm0,xmm2,0x00 149 vaesenc xmm9,xmm9,xmm15 150 vpxor xmm6,xmm6,xmm5 151 vpclmulqdq xmm5,xmm0,xmm2,0x10 152 vaesenc xmm10,xmm10,xmm15 153 movbe r13,QWORD[56+r14] 154 vpxor xmm7,xmm7,xmm1 155 vpclmulqdq xmm1,xmm0,xmm2,0x01 156 vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp] 157 vaesenc xmm11,xmm11,xmm15 158 movbe r12,QWORD[48+r14] 159 vpclmulqdq xmm2,xmm0,xmm2,0x11 160 vaesenc xmm12,xmm12,xmm15 161 mov QWORD[((64+8))+rsp],r13 162 vaesenc xmm13,xmm13,xmm15 163 mov QWORD[((72+8))+rsp],r12 164 vpxor xmm4,xmm4,xmm3 165 vmovdqu xmm3,XMMWORD[((112-32))+r9] 166 vaesenc xmm14,xmm14,xmm15 167 168 vmovups xmm15,XMMWORD[((96-128))+rcx] 169 vpxor xmm6,xmm6,xmm5 170 vpclmulqdq xmm5,xmm8,xmm3,0x10 171 vaesenc xmm9,xmm9,xmm15 172 vpxor xmm6,xmm6,xmm1 173 vpclmulqdq xmm1,xmm8,xmm3,0x01 174 vaesenc xmm10,xmm10,xmm15 175 movbe r13,QWORD[40+r14] 176 vpxor xmm7,xmm7,xmm2 177 vpclmulqdq xmm2,xmm8,xmm3,0x00 178 vaesenc xmm11,xmm11,xmm15 179 movbe r12,QWORD[32+r14] 180 vpclmulqdq xmm8,xmm8,xmm3,0x11 181 vaesenc xmm12,xmm12,xmm15 182 mov QWORD[((80+8))+rsp],r13 183 vaesenc xmm13,xmm13,xmm15 184 mov QWORD[((88+8))+rsp],r12 185 vpxor xmm6,xmm6,xmm5 186 vaesenc xmm14,xmm14,xmm15 187 vpxor xmm6,xmm6,xmm1 188 189 vmovups xmm15,XMMWORD[((112-128))+rcx] 190 vpslldq xmm5,xmm6,8 191 vpxor xmm4,xmm4,xmm2 192 vmovdqu xmm3,XMMWORD[16+r11] 193 194 vaesenc xmm9,xmm9,xmm15 195 vpxor xmm7,xmm7,xmm8 196 vaesenc xmm10,xmm10,xmm15 197 vpxor xmm4,xmm4,xmm5 198 movbe r13,QWORD[24+r14] 199 vaesenc xmm11,xmm11,xmm15 200 movbe r12,QWORD[16+r14] 201 vpalignr xmm0,xmm4,xmm4,8 202 vpclmulqdq xmm4,xmm4,xmm3,0x10 203 mov QWORD[((96+8))+rsp],r13 204 vaesenc xmm12,xmm12,xmm15 205 mov QWORD[((104+8))+rsp],r12 206 vaesenc xmm13,xmm13,xmm15 207 vmovups xmm1,XMMWORD[((128-128))+rcx] 208 vaesenc xmm14,xmm14,xmm15 209 210 vaesenc xmm9,xmm9,xmm1 211 vmovups xmm15,XMMWORD[((144-128))+rcx] 212 vaesenc xmm10,xmm10,xmm1 213 vpsrldq xmm6,xmm6,8 214 vaesenc xmm11,xmm11,xmm1 215 vpxor xmm7,xmm7,xmm6 216 vaesenc xmm12,xmm12,xmm1 217 vpxor xmm4,xmm4,xmm0 218 movbe r13,QWORD[8+r14] 219 vaesenc xmm13,xmm13,xmm1 220 movbe r12,QWORD[r14] 221 vaesenc xmm14,xmm14,xmm1 222 vmovups xmm1,XMMWORD[((160-128))+rcx] 223 cmp ebp,11 224 jb NEAR $L$enc_tail 225 226 vaesenc xmm9,xmm9,xmm15 227 vaesenc xmm10,xmm10,xmm15 228 vaesenc xmm11,xmm11,xmm15 229 vaesenc xmm12,xmm12,xmm15 230 vaesenc xmm13,xmm13,xmm15 231 vaesenc xmm14,xmm14,xmm15 232 233 vaesenc xmm9,xmm9,xmm1 234 vaesenc xmm10,xmm10,xmm1 235 vaesenc xmm11,xmm11,xmm1 236 vaesenc xmm12,xmm12,xmm1 237 vaesenc xmm13,xmm13,xmm1 238 vmovups xmm15,XMMWORD[((176-128))+rcx] 239 vaesenc xmm14,xmm14,xmm1 240 vmovups xmm1,XMMWORD[((192-128))+rcx] 241 je NEAR $L$enc_tail 242 243 vaesenc xmm9,xmm9,xmm15 244 vaesenc xmm10,xmm10,xmm15 245 vaesenc xmm11,xmm11,xmm15 246 vaesenc xmm12,xmm12,xmm15 247 vaesenc xmm13,xmm13,xmm15 248 vaesenc xmm14,xmm14,xmm15 249 250 vaesenc xmm9,xmm9,xmm1 251 vaesenc xmm10,xmm10,xmm1 252 vaesenc xmm11,xmm11,xmm1 253 vaesenc xmm12,xmm12,xmm1 254 vaesenc xmm13,xmm13,xmm1 255 vmovups xmm15,XMMWORD[((208-128))+rcx] 256 vaesenc xmm14,xmm14,xmm1 257 vmovups xmm1,XMMWORD[((224-128))+rcx] 258 jmp NEAR $L$enc_tail 259 260ALIGN 32 261$L$handle_ctr32: 262 vmovdqu xmm0,XMMWORD[r11] 263 vpshufb xmm6,xmm1,xmm0 264 vmovdqu xmm5,XMMWORD[48+r11] 265 vpaddd xmm10,xmm6,XMMWORD[64+r11] 266 vpaddd xmm11,xmm6,xmm5 267 vmovdqu xmm3,XMMWORD[((0-32))+r9] 268 vpaddd xmm12,xmm10,xmm5 269 vpshufb xmm10,xmm10,xmm0 270 vpaddd xmm13,xmm11,xmm5 271 vpshufb xmm11,xmm11,xmm0 272 vpxor xmm10,xmm10,xmm15 273 vpaddd xmm14,xmm12,xmm5 274 vpshufb xmm12,xmm12,xmm0 275 vpxor xmm11,xmm11,xmm15 276 vpaddd xmm1,xmm13,xmm5 277 vpshufb xmm13,xmm13,xmm0 278 vpshufb xmm14,xmm14,xmm0 279 vpshufb xmm1,xmm1,xmm0 280 jmp NEAR $L$resume_ctr32 281 282ALIGN 32 283$L$enc_tail: 284 vaesenc xmm9,xmm9,xmm15 285 vmovdqu XMMWORD[(16+8)+rsp],xmm7 286 vpalignr xmm8,xmm4,xmm4,8 287 vaesenc xmm10,xmm10,xmm15 288 vpclmulqdq xmm4,xmm4,xmm3,0x10 289 vpxor xmm2,xmm1,XMMWORD[rdi] 290 vaesenc xmm11,xmm11,xmm15 291 vpxor xmm0,xmm1,XMMWORD[16+rdi] 292 vaesenc xmm12,xmm12,xmm15 293 vpxor xmm5,xmm1,XMMWORD[32+rdi] 294 vaesenc xmm13,xmm13,xmm15 295 vpxor xmm6,xmm1,XMMWORD[48+rdi] 296 vaesenc xmm14,xmm14,xmm15 297 vpxor xmm7,xmm1,XMMWORD[64+rdi] 298 vpxor xmm3,xmm1,XMMWORD[80+rdi] 299 vmovdqu xmm1,XMMWORD[r8] 300 301 vaesenclast xmm9,xmm9,xmm2 302 vmovdqu xmm2,XMMWORD[32+r11] 303 vaesenclast xmm10,xmm10,xmm0 304 vpaddb xmm0,xmm1,xmm2 305 mov QWORD[((112+8))+rsp],r13 306 lea rdi,[96+rdi] 307 vaesenclast xmm11,xmm11,xmm5 308 vpaddb xmm5,xmm0,xmm2 309 mov QWORD[((120+8))+rsp],r12 310 lea rsi,[96+rsi] 311 vmovdqu xmm15,XMMWORD[((0-128))+rcx] 312 vaesenclast xmm12,xmm12,xmm6 313 vpaddb xmm6,xmm5,xmm2 314 vaesenclast xmm13,xmm13,xmm7 315 vpaddb xmm7,xmm6,xmm2 316 vaesenclast xmm14,xmm14,xmm3 317 vpaddb xmm3,xmm7,xmm2 318 319 add r10,0x60 320 sub rdx,0x6 321 jc NEAR $L$6x_done 322 323 vmovups XMMWORD[(-96)+rsi],xmm9 324 vpxor xmm9,xmm1,xmm15 325 vmovups XMMWORD[(-80)+rsi],xmm10 326 vmovdqa xmm10,xmm0 327 vmovups XMMWORD[(-64)+rsi],xmm11 328 vmovdqa xmm11,xmm5 329 vmovups XMMWORD[(-48)+rsi],xmm12 330 vmovdqa xmm12,xmm6 331 vmovups XMMWORD[(-32)+rsi],xmm13 332 vmovdqa xmm13,xmm7 333 vmovups XMMWORD[(-16)+rsi],xmm14 334 vmovdqa xmm14,xmm3 335 vmovdqu xmm7,XMMWORD[((32+8))+rsp] 336 jmp NEAR $L$oop6x 337 338$L$6x_done: 339 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] 340 vpxor xmm8,xmm8,xmm4 341 342 DB 0F3h,0C3h ;repret 343 344 345global aesni_gcm_decrypt 346 347ALIGN 32 348aesni_gcm_decrypt: 349 mov QWORD[8+rsp],rdi ;WIN64 prologue 350 mov QWORD[16+rsp],rsi 351 mov rax,rsp 352$L$SEH_begin_aesni_gcm_decrypt: 353 mov rdi,rcx 354 mov rsi,rdx 355 mov rdx,r8 356 mov rcx,r9 357 mov r8,QWORD[40+rsp] 358 mov r9,QWORD[48+rsp] 359 360 361 362 xor r10,r10 363 364 365 366 cmp rdx,0x60 367 jb NEAR $L$gcm_dec_abort 368 369 lea rax,[rsp] 370 371 push rbx 372 373 push rbp 374 375 push r12 376 377 push r13 378 379 push r14 380 381 push r15 382 383 lea rsp,[((-168))+rsp] 384 movaps XMMWORD[(-216)+rax],xmm6 385 movaps XMMWORD[(-200)+rax],xmm7 386 movaps XMMWORD[(-184)+rax],xmm8 387 movaps XMMWORD[(-168)+rax],xmm9 388 movaps XMMWORD[(-152)+rax],xmm10 389 movaps XMMWORD[(-136)+rax],xmm11 390 movaps XMMWORD[(-120)+rax],xmm12 391 movaps XMMWORD[(-104)+rax],xmm13 392 movaps XMMWORD[(-88)+rax],xmm14 393 movaps XMMWORD[(-72)+rax],xmm15 394$L$gcm_dec_body: 395 vzeroupper 396 397 vmovdqu xmm1,XMMWORD[r8] 398 add rsp,-128 399 mov ebx,DWORD[12+r8] 400 lea r11,[$L$bswap_mask] 401 lea r14,[((-128))+rcx] 402 mov r15,0xf80 403 vmovdqu xmm8,XMMWORD[r9] 404 and rsp,-128 405 vmovdqu xmm0,XMMWORD[r11] 406 lea rcx,[128+rcx] 407 lea r9,[((32+32))+r9] 408 mov ebp,DWORD[((240-128))+rcx] 409 vpshufb xmm8,xmm8,xmm0 410 411 and r14,r15 412 and r15,rsp 413 sub r15,r14 414 jc NEAR $L$dec_no_key_aliasing 415 cmp r15,768 416 jnc NEAR $L$dec_no_key_aliasing 417 sub rsp,r15 418$L$dec_no_key_aliasing: 419 420 vmovdqu xmm7,XMMWORD[80+rdi] 421 lea r14,[rdi] 422 vmovdqu xmm4,XMMWORD[64+rdi] 423 424 425 426 427 428 429 430 lea r15,[((-192))+rdx*1+rdi] 431 432 vmovdqu xmm5,XMMWORD[48+rdi] 433 shr rdx,4 434 xor r10,r10 435 vmovdqu xmm6,XMMWORD[32+rdi] 436 vpshufb xmm7,xmm7,xmm0 437 vmovdqu xmm2,XMMWORD[16+rdi] 438 vpshufb xmm4,xmm4,xmm0 439 vmovdqu xmm3,XMMWORD[rdi] 440 vpshufb xmm5,xmm5,xmm0 441 vmovdqu XMMWORD[48+rsp],xmm4 442 vpshufb xmm6,xmm6,xmm0 443 vmovdqu XMMWORD[64+rsp],xmm5 444 vpshufb xmm2,xmm2,xmm0 445 vmovdqu XMMWORD[80+rsp],xmm6 446 vpshufb xmm3,xmm3,xmm0 447 vmovdqu XMMWORD[96+rsp],xmm2 448 vmovdqu XMMWORD[112+rsp],xmm3 449 450 call _aesni_ctr32_ghash_6x 451 452 vmovups XMMWORD[(-96)+rsi],xmm9 453 vmovups XMMWORD[(-80)+rsi],xmm10 454 vmovups XMMWORD[(-64)+rsi],xmm11 455 vmovups XMMWORD[(-48)+rsi],xmm12 456 vmovups XMMWORD[(-32)+rsi],xmm13 457 vmovups XMMWORD[(-16)+rsi],xmm14 458 459 vpshufb xmm8,xmm8,XMMWORD[r11] 460 vmovdqu XMMWORD[(-64)+r9],xmm8 461 462 vzeroupper 463 movaps xmm6,XMMWORD[((-216))+rax] 464 movaps xmm7,XMMWORD[((-200))+rax] 465 movaps xmm8,XMMWORD[((-184))+rax] 466 movaps xmm9,XMMWORD[((-168))+rax] 467 movaps xmm10,XMMWORD[((-152))+rax] 468 movaps xmm11,XMMWORD[((-136))+rax] 469 movaps xmm12,XMMWORD[((-120))+rax] 470 movaps xmm13,XMMWORD[((-104))+rax] 471 movaps xmm14,XMMWORD[((-88))+rax] 472 movaps xmm15,XMMWORD[((-72))+rax] 473 mov r15,QWORD[((-48))+rax] 474 475 mov r14,QWORD[((-40))+rax] 476 477 mov r13,QWORD[((-32))+rax] 478 479 mov r12,QWORD[((-24))+rax] 480 481 mov rbp,QWORD[((-16))+rax] 482 483 mov rbx,QWORD[((-8))+rax] 484 485 lea rsp,[rax] 486 487$L$gcm_dec_abort: 488 mov rax,r10 489 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 490 mov rsi,QWORD[16+rsp] 491 DB 0F3h,0C3h ;repret 492 493$L$SEH_end_aesni_gcm_decrypt: 494 495ALIGN 32 496_aesni_ctr32_6x: 497 498 vmovdqu xmm4,XMMWORD[((0-128))+rcx] 499 vmovdqu xmm2,XMMWORD[32+r11] 500 lea r13,[((-1))+rbp] 501 vmovups xmm15,XMMWORD[((16-128))+rcx] 502 lea r12,[((32-128))+rcx] 503 vpxor xmm9,xmm1,xmm4 504 add ebx,100663296 505 jc NEAR $L$handle_ctr32_2 506 vpaddb xmm10,xmm1,xmm2 507 vpaddb xmm11,xmm10,xmm2 508 vpxor xmm10,xmm10,xmm4 509 vpaddb xmm12,xmm11,xmm2 510 vpxor xmm11,xmm11,xmm4 511 vpaddb xmm13,xmm12,xmm2 512 vpxor xmm12,xmm12,xmm4 513 vpaddb xmm14,xmm13,xmm2 514 vpxor xmm13,xmm13,xmm4 515 vpaddb xmm1,xmm14,xmm2 516 vpxor xmm14,xmm14,xmm4 517 jmp NEAR $L$oop_ctr32 518 519ALIGN 16 520$L$oop_ctr32: 521 vaesenc xmm9,xmm9,xmm15 522 vaesenc xmm10,xmm10,xmm15 523 vaesenc xmm11,xmm11,xmm15 524 vaesenc xmm12,xmm12,xmm15 525 vaesenc xmm13,xmm13,xmm15 526 vaesenc xmm14,xmm14,xmm15 527 vmovups xmm15,XMMWORD[r12] 528 lea r12,[16+r12] 529 dec r13d 530 jnz NEAR $L$oop_ctr32 531 532 vmovdqu xmm3,XMMWORD[r12] 533 vaesenc xmm9,xmm9,xmm15 534 vpxor xmm4,xmm3,XMMWORD[rdi] 535 vaesenc xmm10,xmm10,xmm15 536 vpxor xmm5,xmm3,XMMWORD[16+rdi] 537 vaesenc xmm11,xmm11,xmm15 538 vpxor xmm6,xmm3,XMMWORD[32+rdi] 539 vaesenc xmm12,xmm12,xmm15 540 vpxor xmm8,xmm3,XMMWORD[48+rdi] 541 vaesenc xmm13,xmm13,xmm15 542 vpxor xmm2,xmm3,XMMWORD[64+rdi] 543 vaesenc xmm14,xmm14,xmm15 544 vpxor xmm3,xmm3,XMMWORD[80+rdi] 545 lea rdi,[96+rdi] 546 547 vaesenclast xmm9,xmm9,xmm4 548 vaesenclast xmm10,xmm10,xmm5 549 vaesenclast xmm11,xmm11,xmm6 550 vaesenclast xmm12,xmm12,xmm8 551 vaesenclast xmm13,xmm13,xmm2 552 vaesenclast xmm14,xmm14,xmm3 553 vmovups XMMWORD[rsi],xmm9 554 vmovups XMMWORD[16+rsi],xmm10 555 vmovups XMMWORD[32+rsi],xmm11 556 vmovups XMMWORD[48+rsi],xmm12 557 vmovups XMMWORD[64+rsi],xmm13 558 vmovups XMMWORD[80+rsi],xmm14 559 lea rsi,[96+rsi] 560 561 DB 0F3h,0C3h ;repret 562ALIGN 32 563$L$handle_ctr32_2: 564 vpshufb xmm6,xmm1,xmm0 565 vmovdqu xmm5,XMMWORD[48+r11] 566 vpaddd xmm10,xmm6,XMMWORD[64+r11] 567 vpaddd xmm11,xmm6,xmm5 568 vpaddd xmm12,xmm10,xmm5 569 vpshufb xmm10,xmm10,xmm0 570 vpaddd xmm13,xmm11,xmm5 571 vpshufb xmm11,xmm11,xmm0 572 vpxor xmm10,xmm10,xmm4 573 vpaddd xmm14,xmm12,xmm5 574 vpshufb xmm12,xmm12,xmm0 575 vpxor xmm11,xmm11,xmm4 576 vpaddd xmm1,xmm13,xmm5 577 vpshufb xmm13,xmm13,xmm0 578 vpxor xmm12,xmm12,xmm4 579 vpshufb xmm14,xmm14,xmm0 580 vpxor xmm13,xmm13,xmm4 581 vpshufb xmm1,xmm1,xmm0 582 vpxor xmm14,xmm14,xmm4 583 jmp NEAR $L$oop_ctr32 584 585 586 587global aesni_gcm_encrypt 588 589ALIGN 32 590aesni_gcm_encrypt: 591 mov QWORD[8+rsp],rdi ;WIN64 prologue 592 mov QWORD[16+rsp],rsi 593 mov rax,rsp 594$L$SEH_begin_aesni_gcm_encrypt: 595 mov rdi,rcx 596 mov rsi,rdx 597 mov rdx,r8 598 mov rcx,r9 599 mov r8,QWORD[40+rsp] 600 mov r9,QWORD[48+rsp] 601 602 603 604%ifdef BORINGSSL_DISPATCH_TEST 605EXTERN BORINGSSL_function_hit 606 mov BYTE[((BORINGSSL_function_hit+2))],1 607%endif 608 xor r10,r10 609 610 611 612 613 cmp rdx,0x60*3 614 jb NEAR $L$gcm_enc_abort 615 616 lea rax,[rsp] 617 618 push rbx 619 620 push rbp 621 622 push r12 623 624 push r13 625 626 push r14 627 628 push r15 629 630 lea rsp,[((-168))+rsp] 631 movaps XMMWORD[(-216)+rax],xmm6 632 movaps XMMWORD[(-200)+rax],xmm7 633 movaps XMMWORD[(-184)+rax],xmm8 634 movaps XMMWORD[(-168)+rax],xmm9 635 movaps XMMWORD[(-152)+rax],xmm10 636 movaps XMMWORD[(-136)+rax],xmm11 637 movaps XMMWORD[(-120)+rax],xmm12 638 movaps XMMWORD[(-104)+rax],xmm13 639 movaps XMMWORD[(-88)+rax],xmm14 640 movaps XMMWORD[(-72)+rax],xmm15 641$L$gcm_enc_body: 642 vzeroupper 643 644 vmovdqu xmm1,XMMWORD[r8] 645 add rsp,-128 646 mov ebx,DWORD[12+r8] 647 lea r11,[$L$bswap_mask] 648 lea r14,[((-128))+rcx] 649 mov r15,0xf80 650 lea rcx,[128+rcx] 651 vmovdqu xmm0,XMMWORD[r11] 652 and rsp,-128 653 mov ebp,DWORD[((240-128))+rcx] 654 655 and r14,r15 656 and r15,rsp 657 sub r15,r14 658 jc NEAR $L$enc_no_key_aliasing 659 cmp r15,768 660 jnc NEAR $L$enc_no_key_aliasing 661 sub rsp,r15 662$L$enc_no_key_aliasing: 663 664 lea r14,[rsi] 665 666 667 668 669 670 671 672 673 lea r15,[((-192))+rdx*1+rsi] 674 675 shr rdx,4 676 677 call _aesni_ctr32_6x 678 vpshufb xmm8,xmm9,xmm0 679 vpshufb xmm2,xmm10,xmm0 680 vmovdqu XMMWORD[112+rsp],xmm8 681 vpshufb xmm4,xmm11,xmm0 682 vmovdqu XMMWORD[96+rsp],xmm2 683 vpshufb xmm5,xmm12,xmm0 684 vmovdqu XMMWORD[80+rsp],xmm4 685 vpshufb xmm6,xmm13,xmm0 686 vmovdqu XMMWORD[64+rsp],xmm5 687 vpshufb xmm7,xmm14,xmm0 688 vmovdqu XMMWORD[48+rsp],xmm6 689 690 call _aesni_ctr32_6x 691 692 vmovdqu xmm8,XMMWORD[r9] 693 lea r9,[((32+32))+r9] 694 sub rdx,12 695 mov r10,0x60*2 696 vpshufb xmm8,xmm8,xmm0 697 698 call _aesni_ctr32_ghash_6x 699 vmovdqu xmm7,XMMWORD[32+rsp] 700 vmovdqu xmm0,XMMWORD[r11] 701 vmovdqu xmm3,XMMWORD[((0-32))+r9] 702 vpunpckhqdq xmm1,xmm7,xmm7 703 vmovdqu xmm15,XMMWORD[((32-32))+r9] 704 vmovups XMMWORD[(-96)+rsi],xmm9 705 vpshufb xmm9,xmm9,xmm0 706 vpxor xmm1,xmm1,xmm7 707 vmovups XMMWORD[(-80)+rsi],xmm10 708 vpshufb xmm10,xmm10,xmm0 709 vmovups XMMWORD[(-64)+rsi],xmm11 710 vpshufb xmm11,xmm11,xmm0 711 vmovups XMMWORD[(-48)+rsi],xmm12 712 vpshufb xmm12,xmm12,xmm0 713 vmovups XMMWORD[(-32)+rsi],xmm13 714 vpshufb xmm13,xmm13,xmm0 715 vmovups XMMWORD[(-16)+rsi],xmm14 716 vpshufb xmm14,xmm14,xmm0 717 vmovdqu XMMWORD[16+rsp],xmm9 718 vmovdqu xmm6,XMMWORD[48+rsp] 719 vmovdqu xmm0,XMMWORD[((16-32))+r9] 720 vpunpckhqdq xmm2,xmm6,xmm6 721 vpclmulqdq xmm5,xmm7,xmm3,0x00 722 vpxor xmm2,xmm2,xmm6 723 vpclmulqdq xmm7,xmm7,xmm3,0x11 724 vpclmulqdq xmm1,xmm1,xmm15,0x00 725 726 vmovdqu xmm9,XMMWORD[64+rsp] 727 vpclmulqdq xmm4,xmm6,xmm0,0x00 728 vmovdqu xmm3,XMMWORD[((48-32))+r9] 729 vpxor xmm4,xmm4,xmm5 730 vpunpckhqdq xmm5,xmm9,xmm9 731 vpclmulqdq xmm6,xmm6,xmm0,0x11 732 vpxor xmm5,xmm5,xmm9 733 vpxor xmm6,xmm6,xmm7 734 vpclmulqdq xmm2,xmm2,xmm15,0x10 735 vmovdqu xmm15,XMMWORD[((80-32))+r9] 736 vpxor xmm2,xmm2,xmm1 737 738 vmovdqu xmm1,XMMWORD[80+rsp] 739 vpclmulqdq xmm7,xmm9,xmm3,0x00 740 vmovdqu xmm0,XMMWORD[((64-32))+r9] 741 vpxor xmm7,xmm7,xmm4 742 vpunpckhqdq xmm4,xmm1,xmm1 743 vpclmulqdq xmm9,xmm9,xmm3,0x11 744 vpxor xmm4,xmm4,xmm1 745 vpxor xmm9,xmm9,xmm6 746 vpclmulqdq xmm5,xmm5,xmm15,0x00 747 vpxor xmm5,xmm5,xmm2 748 749 vmovdqu xmm2,XMMWORD[96+rsp] 750 vpclmulqdq xmm6,xmm1,xmm0,0x00 751 vmovdqu xmm3,XMMWORD[((96-32))+r9] 752 vpxor xmm6,xmm6,xmm7 753 vpunpckhqdq xmm7,xmm2,xmm2 754 vpclmulqdq xmm1,xmm1,xmm0,0x11 755 vpxor xmm7,xmm7,xmm2 756 vpxor xmm1,xmm1,xmm9 757 vpclmulqdq xmm4,xmm4,xmm15,0x10 758 vmovdqu xmm15,XMMWORD[((128-32))+r9] 759 vpxor xmm4,xmm4,xmm5 760 761 vpxor xmm8,xmm8,XMMWORD[112+rsp] 762 vpclmulqdq xmm5,xmm2,xmm3,0x00 763 vmovdqu xmm0,XMMWORD[((112-32))+r9] 764 vpunpckhqdq xmm9,xmm8,xmm8 765 vpxor xmm5,xmm5,xmm6 766 vpclmulqdq xmm2,xmm2,xmm3,0x11 767 vpxor xmm9,xmm9,xmm8 768 vpxor xmm2,xmm2,xmm1 769 vpclmulqdq xmm7,xmm7,xmm15,0x00 770 vpxor xmm4,xmm7,xmm4 771 772 vpclmulqdq xmm6,xmm8,xmm0,0x00 773 vmovdqu xmm3,XMMWORD[((0-32))+r9] 774 vpunpckhqdq xmm1,xmm14,xmm14 775 vpclmulqdq xmm8,xmm8,xmm0,0x11 776 vpxor xmm1,xmm1,xmm14 777 vpxor xmm5,xmm6,xmm5 778 vpclmulqdq xmm9,xmm9,xmm15,0x10 779 vmovdqu xmm15,XMMWORD[((32-32))+r9] 780 vpxor xmm7,xmm8,xmm2 781 vpxor xmm6,xmm9,xmm4 782 783 vmovdqu xmm0,XMMWORD[((16-32))+r9] 784 vpxor xmm9,xmm7,xmm5 785 vpclmulqdq xmm4,xmm14,xmm3,0x00 786 vpxor xmm6,xmm6,xmm9 787 vpunpckhqdq xmm2,xmm13,xmm13 788 vpclmulqdq xmm14,xmm14,xmm3,0x11 789 vpxor xmm2,xmm2,xmm13 790 vpslldq xmm9,xmm6,8 791 vpclmulqdq xmm1,xmm1,xmm15,0x00 792 vpxor xmm8,xmm5,xmm9 793 vpsrldq xmm6,xmm6,8 794 vpxor xmm7,xmm7,xmm6 795 796 vpclmulqdq xmm5,xmm13,xmm0,0x00 797 vmovdqu xmm3,XMMWORD[((48-32))+r9] 798 vpxor xmm5,xmm5,xmm4 799 vpunpckhqdq xmm9,xmm12,xmm12 800 vpclmulqdq xmm13,xmm13,xmm0,0x11 801 vpxor xmm9,xmm9,xmm12 802 vpxor xmm13,xmm13,xmm14 803 vpalignr xmm14,xmm8,xmm8,8 804 vpclmulqdq xmm2,xmm2,xmm15,0x10 805 vmovdqu xmm15,XMMWORD[((80-32))+r9] 806 vpxor xmm2,xmm2,xmm1 807 808 vpclmulqdq xmm4,xmm12,xmm3,0x00 809 vmovdqu xmm0,XMMWORD[((64-32))+r9] 810 vpxor xmm4,xmm4,xmm5 811 vpunpckhqdq xmm1,xmm11,xmm11 812 vpclmulqdq xmm12,xmm12,xmm3,0x11 813 vpxor xmm1,xmm1,xmm11 814 vpxor xmm12,xmm12,xmm13 815 vxorps xmm7,xmm7,XMMWORD[16+rsp] 816 vpclmulqdq xmm9,xmm9,xmm15,0x00 817 vpxor xmm9,xmm9,xmm2 818 819 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 820 vxorps xmm8,xmm8,xmm14 821 822 vpclmulqdq xmm5,xmm11,xmm0,0x00 823 vmovdqu xmm3,XMMWORD[((96-32))+r9] 824 vpxor xmm5,xmm5,xmm4 825 vpunpckhqdq xmm2,xmm10,xmm10 826 vpclmulqdq xmm11,xmm11,xmm0,0x11 827 vpxor xmm2,xmm2,xmm10 828 vpalignr xmm14,xmm8,xmm8,8 829 vpxor xmm11,xmm11,xmm12 830 vpclmulqdq xmm1,xmm1,xmm15,0x10 831 vmovdqu xmm15,XMMWORD[((128-32))+r9] 832 vpxor xmm1,xmm1,xmm9 833 834 vxorps xmm14,xmm14,xmm7 835 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 836 vxorps xmm8,xmm8,xmm14 837 838 vpclmulqdq xmm4,xmm10,xmm3,0x00 839 vmovdqu xmm0,XMMWORD[((112-32))+r9] 840 vpxor xmm4,xmm4,xmm5 841 vpunpckhqdq xmm9,xmm8,xmm8 842 vpclmulqdq xmm10,xmm10,xmm3,0x11 843 vpxor xmm9,xmm9,xmm8 844 vpxor xmm10,xmm10,xmm11 845 vpclmulqdq xmm2,xmm2,xmm15,0x00 846 vpxor xmm2,xmm2,xmm1 847 848 vpclmulqdq xmm5,xmm8,xmm0,0x00 849 vpclmulqdq xmm7,xmm8,xmm0,0x11 850 vpxor xmm5,xmm5,xmm4 851 vpclmulqdq xmm6,xmm9,xmm15,0x10 852 vpxor xmm7,xmm7,xmm10 853 vpxor xmm6,xmm6,xmm2 854 855 vpxor xmm4,xmm7,xmm5 856 vpxor xmm6,xmm6,xmm4 857 vpslldq xmm1,xmm6,8 858 vmovdqu xmm3,XMMWORD[16+r11] 859 vpsrldq xmm6,xmm6,8 860 vpxor xmm8,xmm5,xmm1 861 vpxor xmm7,xmm7,xmm6 862 863 vpalignr xmm2,xmm8,xmm8,8 864 vpclmulqdq xmm8,xmm8,xmm3,0x10 865 vpxor xmm8,xmm8,xmm2 866 867 vpalignr xmm2,xmm8,xmm8,8 868 vpclmulqdq xmm8,xmm8,xmm3,0x10 869 vpxor xmm2,xmm2,xmm7 870 vpxor xmm8,xmm8,xmm2 871 vpshufb xmm8,xmm8,XMMWORD[r11] 872 vmovdqu XMMWORD[(-64)+r9],xmm8 873 874 vzeroupper 875 movaps xmm6,XMMWORD[((-216))+rax] 876 movaps xmm7,XMMWORD[((-200))+rax] 877 movaps xmm8,XMMWORD[((-184))+rax] 878 movaps xmm9,XMMWORD[((-168))+rax] 879 movaps xmm10,XMMWORD[((-152))+rax] 880 movaps xmm11,XMMWORD[((-136))+rax] 881 movaps xmm12,XMMWORD[((-120))+rax] 882 movaps xmm13,XMMWORD[((-104))+rax] 883 movaps xmm14,XMMWORD[((-88))+rax] 884 movaps xmm15,XMMWORD[((-72))+rax] 885 mov r15,QWORD[((-48))+rax] 886 887 mov r14,QWORD[((-40))+rax] 888 889 mov r13,QWORD[((-32))+rax] 890 891 mov r12,QWORD[((-24))+rax] 892 893 mov rbp,QWORD[((-16))+rax] 894 895 mov rbx,QWORD[((-8))+rax] 896 897 lea rsp,[rax] 898 899$L$gcm_enc_abort: 900 mov rax,r10 901 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 902 mov rsi,QWORD[16+rsp] 903 DB 0F3h,0C3h ;repret 904 905$L$SEH_end_aesni_gcm_encrypt: 906ALIGN 64 907$L$bswap_mask: 908DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 909$L$poly: 910DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 911$L$one_msb: 912DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 913$L$two_lsb: 914DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 915$L$one_lsb: 916DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 917DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108 918DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 919DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 920DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 921ALIGN 64 922EXTERN __imp_RtlVirtualUnwind 923 924ALIGN 16 925gcm_se_handler: 926 push rsi 927 push rdi 928 push rbx 929 push rbp 930 push r12 931 push r13 932 push r14 933 push r15 934 pushfq 935 sub rsp,64 936 937 mov rax,QWORD[120+r8] 938 mov rbx,QWORD[248+r8] 939 940 mov rsi,QWORD[8+r9] 941 mov r11,QWORD[56+r9] 942 943 mov r10d,DWORD[r11] 944 lea r10,[r10*1+rsi] 945 cmp rbx,r10 946 jb NEAR $L$common_seh_tail 947 948 mov rax,QWORD[152+r8] 949 950 mov r10d,DWORD[4+r11] 951 lea r10,[r10*1+rsi] 952 cmp rbx,r10 953 jae NEAR $L$common_seh_tail 954 955 mov rax,QWORD[120+r8] 956 957 mov r15,QWORD[((-48))+rax] 958 mov r14,QWORD[((-40))+rax] 959 mov r13,QWORD[((-32))+rax] 960 mov r12,QWORD[((-24))+rax] 961 mov rbp,QWORD[((-16))+rax] 962 mov rbx,QWORD[((-8))+rax] 963 mov QWORD[240+r8],r15 964 mov QWORD[232+r8],r14 965 mov QWORD[224+r8],r13 966 mov QWORD[216+r8],r12 967 mov QWORD[160+r8],rbp 968 mov QWORD[144+r8],rbx 969 970 lea rsi,[((-216))+rax] 971 lea rdi,[512+r8] 972 mov ecx,20 973 DD 0xa548f3fc 974 975$L$common_seh_tail: 976 mov rdi,QWORD[8+rax] 977 mov rsi,QWORD[16+rax] 978 mov QWORD[152+r8],rax 979 mov QWORD[168+r8],rsi 980 mov QWORD[176+r8],rdi 981 982 mov rdi,QWORD[40+r9] 983 mov rsi,r8 984 mov ecx,154 985 DD 0xa548f3fc 986 987 mov rsi,r9 988 xor rcx,rcx 989 mov rdx,QWORD[8+rsi] 990 mov r8,QWORD[rsi] 991 mov r9,QWORD[16+rsi] 992 mov r10,QWORD[40+rsi] 993 lea r11,[56+rsi] 994 lea r12,[24+rsi] 995 mov QWORD[32+rsp],r10 996 mov QWORD[40+rsp],r11 997 mov QWORD[48+rsp],r12 998 mov QWORD[56+rsp],rcx 999 call QWORD[__imp_RtlVirtualUnwind] 1000 1001 mov eax,1 1002 add rsp,64 1003 popfq 1004 pop r15 1005 pop r14 1006 pop r13 1007 pop r12 1008 pop rbp 1009 pop rbx 1010 pop rdi 1011 pop rsi 1012 DB 0F3h,0C3h ;repret 1013 1014 1015section .pdata rdata align=4 1016ALIGN 4 1017 DD $L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase 1018 DD $L$SEH_end_aesni_gcm_decrypt wrt ..imagebase 1019 DD $L$SEH_gcm_dec_info wrt ..imagebase 1020 1021 DD $L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase 1022 DD $L$SEH_end_aesni_gcm_encrypt wrt ..imagebase 1023 DD $L$SEH_gcm_enc_info wrt ..imagebase 1024section .xdata rdata align=8 1025ALIGN 8 1026$L$SEH_gcm_dec_info: 1027DB 9,0,0,0 1028 DD gcm_se_handler wrt ..imagebase 1029 DD $L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase 1030$L$SEH_gcm_enc_info: 1031DB 9,0,0,0 1032 DD gcm_se_handler wrt ..imagebase 1033 DD $L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase 1034