1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifdef BORINGSSL_PREFIX 5%include "boringssl_prefix_symbols_nasm.inc" 6%endif 7%ifidn __OUTPUT_FORMAT__, win32 8%ifidn __OUTPUT_FORMAT__,obj 9section code use32 class=code align=64 10%elifidn __OUTPUT_FORMAT__,win32 11$@feat.00 equ 1 12section .text code align=64 13%else 14section .text code 15%endif 16;extern _OPENSSL_ia32cap_P 17global _bn_mul_mont 18align 16 19_bn_mul_mont: 20L$_bn_mul_mont_begin: 21 push ebp 22 push ebx 23 push esi 24 push edi 25 xor eax,eax 26 mov edi,DWORD [40+esp] 27 cmp edi,4 28 jl NEAR L$000just_leave 29 lea esi,[20+esp] 30 lea edx,[24+esp] 31 add edi,2 32 neg edi 33 lea ebp,[edi*4+esp-32] 34 neg edi 35 mov eax,ebp 36 sub eax,edx 37 and eax,2047 38 sub ebp,eax 39 xor edx,ebp 40 and edx,2048 41 xor edx,2048 42 sub ebp,edx 43 and ebp,-64 44 mov eax,esp 45 sub eax,ebp 46 and eax,-4096 47 mov edx,esp 48 lea esp,[eax*1+ebp] 49 mov eax,DWORD [esp] 50 cmp esp,ebp 51 ja NEAR L$001page_walk 52 jmp NEAR L$002page_walk_done 53align 16 54L$001page_walk: 55 lea esp,[esp-4096] 56 mov eax,DWORD [esp] 57 cmp esp,ebp 58 ja NEAR L$001page_walk 59L$002page_walk_done: 60 mov eax,DWORD [esi] 61 mov ebx,DWORD [4+esi] 62 mov ecx,DWORD [8+esi] 63 mov ebp,DWORD [12+esi] 64 mov esi,DWORD [16+esi] 65 mov esi,DWORD [esi] 66 mov DWORD [4+esp],eax 67 mov DWORD [8+esp],ebx 68 mov DWORD [12+esp],ecx 69 mov DWORD [16+esp],ebp 70 mov DWORD [20+esp],esi 71 lea ebx,[edi-3] 72 mov DWORD [24+esp],edx 73 lea eax,[_OPENSSL_ia32cap_P] 74 bt DWORD [eax],26 75 jnc NEAR L$003non_sse2 76 mov eax,-1 77 movd mm7,eax 78 mov esi,DWORD [8+esp] 79 mov edi,DWORD [12+esp] 80 mov ebp,DWORD [16+esp] 81 xor edx,edx 82 xor ecx,ecx 83 movd mm4,DWORD [edi] 84 movd mm5,DWORD [esi] 85 movd mm3,DWORD [ebp] 86 pmuludq mm5,mm4 87 movq mm2,mm5 88 movq mm0,mm5 89 pand mm0,mm7 90 pmuludq mm5,[20+esp] 91 pmuludq mm3,mm5 92 paddq mm3,mm0 93 movd mm1,DWORD [4+ebp] 94 movd mm0,DWORD [4+esi] 95 psrlq mm2,32 96 psrlq mm3,32 97 inc ecx 98align 16 99L$0041st: 100 pmuludq mm0,mm4 101 pmuludq mm1,mm5 102 paddq mm2,mm0 103 paddq mm3,mm1 104 movq mm0,mm2 105 pand mm0,mm7 106 movd mm1,DWORD [4+ecx*4+ebp] 107 paddq mm3,mm0 108 movd mm0,DWORD [4+ecx*4+esi] 109 psrlq mm2,32 110 movd DWORD [28+ecx*4+esp],mm3 111 psrlq mm3,32 112 lea ecx,[1+ecx] 113 cmp ecx,ebx 114 jl NEAR L$0041st 115 pmuludq mm0,mm4 116 pmuludq mm1,mm5 117 paddq mm2,mm0 118 paddq mm3,mm1 119 movq mm0,mm2 120 pand mm0,mm7 121 paddq mm3,mm0 122 movd DWORD [28+ecx*4+esp],mm3 123 psrlq mm2,32 124 psrlq mm3,32 125 paddq mm3,mm2 126 movq [32+ebx*4+esp],mm3 127 inc edx 128L$005outer: 129 xor ecx,ecx 130 movd mm4,DWORD [edx*4+edi] 131 movd mm5,DWORD [esi] 132 movd mm6,DWORD [32+esp] 133 movd mm3,DWORD [ebp] 134 pmuludq mm5,mm4 135 paddq mm5,mm6 136 movq mm0,mm5 137 movq mm2,mm5 138 pand mm0,mm7 139 pmuludq mm5,[20+esp] 140 pmuludq mm3,mm5 141 paddq mm3,mm0 142 movd mm6,DWORD [36+esp] 143 movd mm1,DWORD [4+ebp] 144 movd mm0,DWORD [4+esi] 145 psrlq mm2,32 146 psrlq mm3,32 147 paddq mm2,mm6 148 inc ecx 149 dec ebx 150L$006inner: 151 pmuludq mm0,mm4 152 pmuludq mm1,mm5 153 paddq mm2,mm0 154 paddq mm3,mm1 155 movq mm0,mm2 156 movd mm6,DWORD [36+ecx*4+esp] 157 pand mm0,mm7 158 movd mm1,DWORD [4+ecx*4+ebp] 159 paddq mm3,mm0 160 movd mm0,DWORD [4+ecx*4+esi] 161 psrlq mm2,32 162 movd DWORD [28+ecx*4+esp],mm3 163 psrlq mm3,32 164 paddq mm2,mm6 165 dec ebx 166 lea ecx,[1+ecx] 167 jnz NEAR L$006inner 168 mov ebx,ecx 169 pmuludq mm0,mm4 170 pmuludq mm1,mm5 171 paddq mm2,mm0 172 paddq mm3,mm1 173 movq mm0,mm2 174 pand mm0,mm7 175 paddq mm3,mm0 176 movd DWORD [28+ecx*4+esp],mm3 177 psrlq mm2,32 178 psrlq mm3,32 179 movd mm6,DWORD [36+ebx*4+esp] 180 paddq mm3,mm2 181 paddq mm3,mm6 182 movq [32+ebx*4+esp],mm3 183 lea edx,[1+edx] 184 cmp edx,ebx 185 jle NEAR L$005outer 186 emms 187 jmp NEAR L$007common_tail 188align 16 189L$003non_sse2: 190 mov esi,DWORD [8+esp] 191 lea ebp,[1+ebx] 192 mov edi,DWORD [12+esp] 193 xor ecx,ecx 194 mov edx,esi 195 and ebp,1 196 sub edx,edi 197 lea eax,[4+ebx*4+edi] 198 or ebp,edx 199 mov edi,DWORD [edi] 200 jz NEAR L$008bn_sqr_mont 201 mov DWORD [28+esp],eax 202 mov eax,DWORD [esi] 203 xor edx,edx 204align 16 205L$009mull: 206 mov ebp,edx 207 mul edi 208 add ebp,eax 209 lea ecx,[1+ecx] 210 adc edx,0 211 mov eax,DWORD [ecx*4+esi] 212 cmp ecx,ebx 213 mov DWORD [28+ecx*4+esp],ebp 214 jl NEAR L$009mull 215 mov ebp,edx 216 mul edi 217 mov edi,DWORD [20+esp] 218 add eax,ebp 219 mov esi,DWORD [16+esp] 220 adc edx,0 221 imul edi,DWORD [32+esp] 222 mov DWORD [32+ebx*4+esp],eax 223 xor ecx,ecx 224 mov DWORD [36+ebx*4+esp],edx 225 mov DWORD [40+ebx*4+esp],ecx 226 mov eax,DWORD [esi] 227 mul edi 228 add eax,DWORD [32+esp] 229 mov eax,DWORD [4+esi] 230 adc edx,0 231 inc ecx 232 jmp NEAR L$0102ndmadd 233align 16 234L$0111stmadd: 235 mov ebp,edx 236 mul edi 237 add ebp,DWORD [32+ecx*4+esp] 238 lea ecx,[1+ecx] 239 adc edx,0 240 add ebp,eax 241 mov eax,DWORD [ecx*4+esi] 242 adc edx,0 243 cmp ecx,ebx 244 mov DWORD [28+ecx*4+esp],ebp 245 jl NEAR L$0111stmadd 246 mov ebp,edx 247 mul edi 248 add eax,DWORD [32+ebx*4+esp] 249 mov edi,DWORD [20+esp] 250 adc edx,0 251 mov esi,DWORD [16+esp] 252 add ebp,eax 253 adc edx,0 254 imul edi,DWORD [32+esp] 255 xor ecx,ecx 256 add edx,DWORD [36+ebx*4+esp] 257 mov DWORD [32+ebx*4+esp],ebp 258 adc ecx,0 259 mov eax,DWORD [esi] 260 mov DWORD [36+ebx*4+esp],edx 261 mov DWORD [40+ebx*4+esp],ecx 262 mul edi 263 add eax,DWORD [32+esp] 264 mov eax,DWORD [4+esi] 265 adc edx,0 266 mov ecx,1 267align 16 268L$0102ndmadd: 269 mov ebp,edx 270 mul edi 271 add ebp,DWORD [32+ecx*4+esp] 272 lea ecx,[1+ecx] 273 adc edx,0 274 add ebp,eax 275 mov eax,DWORD [ecx*4+esi] 276 adc edx,0 277 cmp ecx,ebx 278 mov DWORD [24+ecx*4+esp],ebp 279 jl NEAR L$0102ndmadd 280 mov ebp,edx 281 mul edi 282 add ebp,DWORD [32+ebx*4+esp] 283 adc edx,0 284 add ebp,eax 285 adc edx,0 286 mov DWORD [28+ebx*4+esp],ebp 287 xor eax,eax 288 mov ecx,DWORD [12+esp] 289 add edx,DWORD [36+ebx*4+esp] 290 adc eax,DWORD [40+ebx*4+esp] 291 lea ecx,[4+ecx] 292 mov DWORD [32+ebx*4+esp],edx 293 cmp ecx,DWORD [28+esp] 294 mov DWORD [36+ebx*4+esp],eax 295 je NEAR L$007common_tail 296 mov edi,DWORD [ecx] 297 mov esi,DWORD [8+esp] 298 mov DWORD [12+esp],ecx 299 xor ecx,ecx 300 xor edx,edx 301 mov eax,DWORD [esi] 302 jmp NEAR L$0111stmadd 303align 16 304L$008bn_sqr_mont: 305 mov DWORD [esp],ebx 306 mov DWORD [12+esp],ecx 307 mov eax,edi 308 mul edi 309 mov DWORD [32+esp],eax 310 mov ebx,edx 311 shr edx,1 312 and ebx,1 313 inc ecx 314align 16 315L$012sqr: 316 mov eax,DWORD [ecx*4+esi] 317 mov ebp,edx 318 mul edi 319 add eax,ebp 320 lea ecx,[1+ecx] 321 adc edx,0 322 lea ebp,[eax*2+ebx] 323 shr eax,31 324 cmp ecx,DWORD [esp] 325 mov ebx,eax 326 mov DWORD [28+ecx*4+esp],ebp 327 jl NEAR L$012sqr 328 mov eax,DWORD [ecx*4+esi] 329 mov ebp,edx 330 mul edi 331 add eax,ebp 332 mov edi,DWORD [20+esp] 333 adc edx,0 334 mov esi,DWORD [16+esp] 335 lea ebp,[eax*2+ebx] 336 imul edi,DWORD [32+esp] 337 shr eax,31 338 mov DWORD [32+ecx*4+esp],ebp 339 lea ebp,[edx*2+eax] 340 mov eax,DWORD [esi] 341 shr edx,31 342 mov DWORD [36+ecx*4+esp],ebp 343 mov DWORD [40+ecx*4+esp],edx 344 mul edi 345 add eax,DWORD [32+esp] 346 mov ebx,ecx 347 adc edx,0 348 mov eax,DWORD [4+esi] 349 mov ecx,1 350align 16 351L$0133rdmadd: 352 mov ebp,edx 353 mul edi 354 add ebp,DWORD [32+ecx*4+esp] 355 adc edx,0 356 add ebp,eax 357 mov eax,DWORD [4+ecx*4+esi] 358 adc edx,0 359 mov DWORD [28+ecx*4+esp],ebp 360 mov ebp,edx 361 mul edi 362 add ebp,DWORD [36+ecx*4+esp] 363 lea ecx,[2+ecx] 364 adc edx,0 365 add ebp,eax 366 mov eax,DWORD [ecx*4+esi] 367 adc edx,0 368 cmp ecx,ebx 369 mov DWORD [24+ecx*4+esp],ebp 370 jl NEAR L$0133rdmadd 371 mov ebp,edx 372 mul edi 373 add ebp,DWORD [32+ebx*4+esp] 374 adc edx,0 375 add ebp,eax 376 adc edx,0 377 mov DWORD [28+ebx*4+esp],ebp 378 mov ecx,DWORD [12+esp] 379 xor eax,eax 380 mov esi,DWORD [8+esp] 381 add edx,DWORD [36+ebx*4+esp] 382 adc eax,DWORD [40+ebx*4+esp] 383 mov DWORD [32+ebx*4+esp],edx 384 cmp ecx,ebx 385 mov DWORD [36+ebx*4+esp],eax 386 je NEAR L$007common_tail 387 mov edi,DWORD [4+ecx*4+esi] 388 lea ecx,[1+ecx] 389 mov eax,edi 390 mov DWORD [12+esp],ecx 391 mul edi 392 add eax,DWORD [32+ecx*4+esp] 393 adc edx,0 394 mov DWORD [32+ecx*4+esp],eax 395 xor ebp,ebp 396 cmp ecx,ebx 397 lea ecx,[1+ecx] 398 je NEAR L$014sqrlast 399 mov ebx,edx 400 shr edx,1 401 and ebx,1 402align 16 403L$015sqradd: 404 mov eax,DWORD [ecx*4+esi] 405 mov ebp,edx 406 mul edi 407 add eax,ebp 408 lea ebp,[eax*1+eax] 409 adc edx,0 410 shr eax,31 411 add ebp,DWORD [32+ecx*4+esp] 412 lea ecx,[1+ecx] 413 adc eax,0 414 add ebp,ebx 415 adc eax,0 416 cmp ecx,DWORD [esp] 417 mov DWORD [28+ecx*4+esp],ebp 418 mov ebx,eax 419 jle NEAR L$015sqradd 420 mov ebp,edx 421 add edx,edx 422 shr ebp,31 423 add edx,ebx 424 adc ebp,0 425L$014sqrlast: 426 mov edi,DWORD [20+esp] 427 mov esi,DWORD [16+esp] 428 imul edi,DWORD [32+esp] 429 add edx,DWORD [32+ecx*4+esp] 430 mov eax,DWORD [esi] 431 adc ebp,0 432 mov DWORD [32+ecx*4+esp],edx 433 mov DWORD [36+ecx*4+esp],ebp 434 mul edi 435 add eax,DWORD [32+esp] 436 lea ebx,[ecx-1] 437 adc edx,0 438 mov ecx,1 439 mov eax,DWORD [4+esi] 440 jmp NEAR L$0133rdmadd 441align 16 442L$007common_tail: 443 mov ebp,DWORD [16+esp] 444 mov edi,DWORD [4+esp] 445 lea esi,[32+esp] 446 mov eax,DWORD [esi] 447 mov ecx,ebx 448 xor edx,edx 449align 16 450L$016sub: 451 sbb eax,DWORD [edx*4+ebp] 452 mov DWORD [edx*4+edi],eax 453 dec ecx 454 mov eax,DWORD [4+edx*4+esi] 455 lea edx,[1+edx] 456 jge NEAR L$016sub 457 sbb eax,0 458 mov edx,-1 459 xor edx,eax 460 jmp NEAR L$017copy 461align 16 462L$017copy: 463 mov esi,DWORD [32+ebx*4+esp] 464 mov ebp,DWORD [ebx*4+edi] 465 mov DWORD [32+ebx*4+esp],ecx 466 and esi,eax 467 and ebp,edx 468 or ebp,esi 469 mov DWORD [ebx*4+edi],ebp 470 dec ebx 471 jge NEAR L$017copy 472 mov esp,DWORD [24+esp] 473 mov eax,1 474L$000just_leave: 475 pop edi 476 pop esi 477 pop ebx 478 pop ebp 479 ret 480db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 481db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 482db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 483db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 484db 111,114,103,62,0 485segment .bss 486common _OPENSSL_ia32cap_P 16 487%else 488; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 489ret 490%endif 491