1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifdef BORINGSSL_PREFIX 5%include "boringssl_prefix_symbols_nasm.inc" 6%endif 7%ifidn __OUTPUT_FORMAT__,obj 8section code use32 class=code align=64 9%elifidn __OUTPUT_FORMAT__,win32 10$@feat.00 equ 1 11section .text code align=64 12%else 13section .text code 14%endif 15;extern _OPENSSL_ia32cap_P 16global _bn_mul_mont 17align 16 18_bn_mul_mont: 19L$_bn_mul_mont_begin: 20 push ebp 21 push ebx 22 push esi 23 push edi 24 xor eax,eax 25 mov edi,DWORD [40+esp] 26 cmp edi,4 27 jl NEAR L$000just_leave 28 lea esi,[20+esp] 29 lea edx,[24+esp] 30 add edi,2 31 neg edi 32 lea ebp,[edi*4+esp-32] 33 neg edi 34 mov eax,ebp 35 sub eax,edx 36 and eax,2047 37 sub ebp,eax 38 xor edx,ebp 39 and edx,2048 40 xor edx,2048 41 sub ebp,edx 42 and ebp,-64 43 mov eax,esp 44 sub eax,ebp 45 and eax,-4096 46 mov edx,esp 47 lea esp,[eax*1+ebp] 48 mov eax,DWORD [esp] 49 cmp esp,ebp 50 ja NEAR L$001page_walk 51 jmp NEAR L$002page_walk_done 52align 16 53L$001page_walk: 54 lea esp,[esp-4096] 55 mov eax,DWORD [esp] 56 cmp esp,ebp 57 ja NEAR L$001page_walk 58L$002page_walk_done: 59 mov eax,DWORD [esi] 60 mov ebx,DWORD [4+esi] 61 mov ecx,DWORD [8+esi] 62 mov ebp,DWORD [12+esi] 63 mov esi,DWORD [16+esi] 64 mov esi,DWORD [esi] 65 mov DWORD [4+esp],eax 66 mov DWORD [8+esp],ebx 67 mov DWORD [12+esp],ecx 68 mov DWORD [16+esp],ebp 69 mov DWORD [20+esp],esi 70 lea ebx,[edi-3] 71 mov DWORD [24+esp],edx 72 lea eax,[_OPENSSL_ia32cap_P] 73 bt DWORD [eax],26 74 jnc NEAR L$003non_sse2 75 mov eax,-1 76 movd mm7,eax 77 mov esi,DWORD [8+esp] 78 mov edi,DWORD [12+esp] 79 mov ebp,DWORD [16+esp] 80 xor edx,edx 81 xor ecx,ecx 82 movd mm4,DWORD [edi] 83 movd mm5,DWORD [esi] 84 movd mm3,DWORD [ebp] 85 pmuludq mm5,mm4 86 movq mm2,mm5 87 movq mm0,mm5 88 pand mm0,mm7 89 pmuludq mm5,[20+esp] 90 pmuludq mm3,mm5 91 paddq mm3,mm0 92 movd mm1,DWORD [4+ebp] 93 movd mm0,DWORD [4+esi] 94 psrlq mm2,32 95 psrlq mm3,32 96 inc ecx 97align 16 98L$0041st: 99 pmuludq mm0,mm4 100 pmuludq mm1,mm5 101 paddq mm2,mm0 102 paddq mm3,mm1 103 movq mm0,mm2 104 pand mm0,mm7 105 movd mm1,DWORD [4+ecx*4+ebp] 106 paddq mm3,mm0 107 movd mm0,DWORD [4+ecx*4+esi] 108 psrlq mm2,32 109 movd DWORD [28+ecx*4+esp],mm3 110 psrlq mm3,32 111 lea ecx,[1+ecx] 112 cmp ecx,ebx 113 jl NEAR L$0041st 114 pmuludq mm0,mm4 115 pmuludq mm1,mm5 116 paddq mm2,mm0 117 paddq mm3,mm1 118 movq mm0,mm2 119 pand mm0,mm7 120 paddq mm3,mm0 121 movd DWORD [28+ecx*4+esp],mm3 122 psrlq mm2,32 123 psrlq mm3,32 124 paddq mm3,mm2 125 movq [32+ebx*4+esp],mm3 126 inc edx 127L$005outer: 128 xor ecx,ecx 129 movd mm4,DWORD [edx*4+edi] 130 movd mm5,DWORD [esi] 131 movd mm6,DWORD [32+esp] 132 movd mm3,DWORD [ebp] 133 pmuludq mm5,mm4 134 paddq mm5,mm6 135 movq mm0,mm5 136 movq mm2,mm5 137 pand mm0,mm7 138 pmuludq mm5,[20+esp] 139 pmuludq mm3,mm5 140 paddq mm3,mm0 141 movd mm6,DWORD [36+esp] 142 movd mm1,DWORD [4+ebp] 143 movd mm0,DWORD [4+esi] 144 psrlq mm2,32 145 psrlq mm3,32 146 paddq mm2,mm6 147 inc ecx 148 dec ebx 149L$006inner: 150 pmuludq mm0,mm4 151 pmuludq mm1,mm5 152 paddq mm2,mm0 153 paddq mm3,mm1 154 movq mm0,mm2 155 movd mm6,DWORD [36+ecx*4+esp] 156 pand mm0,mm7 157 movd mm1,DWORD [4+ecx*4+ebp] 158 paddq mm3,mm0 159 movd mm0,DWORD [4+ecx*4+esi] 160 psrlq mm2,32 161 movd DWORD [28+ecx*4+esp],mm3 162 psrlq mm3,32 163 paddq mm2,mm6 164 dec ebx 165 lea ecx,[1+ecx] 166 jnz NEAR L$006inner 167 mov ebx,ecx 168 pmuludq mm0,mm4 169 pmuludq mm1,mm5 170 paddq mm2,mm0 171 paddq mm3,mm1 172 movq mm0,mm2 173 pand mm0,mm7 174 paddq mm3,mm0 175 movd DWORD [28+ecx*4+esp],mm3 176 psrlq mm2,32 177 psrlq mm3,32 178 movd mm6,DWORD [36+ebx*4+esp] 179 paddq mm3,mm2 180 paddq mm3,mm6 181 movq [32+ebx*4+esp],mm3 182 lea edx,[1+edx] 183 cmp edx,ebx 184 jle NEAR L$005outer 185 emms 186 jmp NEAR L$007common_tail 187align 16 188L$003non_sse2: 189 mov esi,DWORD [8+esp] 190 lea ebp,[1+ebx] 191 mov edi,DWORD [12+esp] 192 xor ecx,ecx 193 mov edx,esi 194 and ebp,1 195 sub edx,edi 196 lea eax,[4+ebx*4+edi] 197 or ebp,edx 198 mov edi,DWORD [edi] 199 jz NEAR L$008bn_sqr_mont 200 mov DWORD [28+esp],eax 201 mov eax,DWORD [esi] 202 xor edx,edx 203align 16 204L$009mull: 205 mov ebp,edx 206 mul edi 207 add ebp,eax 208 lea ecx,[1+ecx] 209 adc edx,0 210 mov eax,DWORD [ecx*4+esi] 211 cmp ecx,ebx 212 mov DWORD [28+ecx*4+esp],ebp 213 jl NEAR L$009mull 214 mov ebp,edx 215 mul edi 216 mov edi,DWORD [20+esp] 217 add eax,ebp 218 mov esi,DWORD [16+esp] 219 adc edx,0 220 imul edi,DWORD [32+esp] 221 mov DWORD [32+ebx*4+esp],eax 222 xor ecx,ecx 223 mov DWORD [36+ebx*4+esp],edx 224 mov DWORD [40+ebx*4+esp],ecx 225 mov eax,DWORD [esi] 226 mul edi 227 add eax,DWORD [32+esp] 228 mov eax,DWORD [4+esi] 229 adc edx,0 230 inc ecx 231 jmp NEAR L$0102ndmadd 232align 16 233L$0111stmadd: 234 mov ebp,edx 235 mul edi 236 add ebp,DWORD [32+ecx*4+esp] 237 lea ecx,[1+ecx] 238 adc edx,0 239 add ebp,eax 240 mov eax,DWORD [ecx*4+esi] 241 adc edx,0 242 cmp ecx,ebx 243 mov DWORD [28+ecx*4+esp],ebp 244 jl NEAR L$0111stmadd 245 mov ebp,edx 246 mul edi 247 add eax,DWORD [32+ebx*4+esp] 248 mov edi,DWORD [20+esp] 249 adc edx,0 250 mov esi,DWORD [16+esp] 251 add ebp,eax 252 adc edx,0 253 imul edi,DWORD [32+esp] 254 xor ecx,ecx 255 add edx,DWORD [36+ebx*4+esp] 256 mov DWORD [32+ebx*4+esp],ebp 257 adc ecx,0 258 mov eax,DWORD [esi] 259 mov DWORD [36+ebx*4+esp],edx 260 mov DWORD [40+ebx*4+esp],ecx 261 mul edi 262 add eax,DWORD [32+esp] 263 mov eax,DWORD [4+esi] 264 adc edx,0 265 mov ecx,1 266align 16 267L$0102ndmadd: 268 mov ebp,edx 269 mul edi 270 add ebp,DWORD [32+ecx*4+esp] 271 lea ecx,[1+ecx] 272 adc edx,0 273 add ebp,eax 274 mov eax,DWORD [ecx*4+esi] 275 adc edx,0 276 cmp ecx,ebx 277 mov DWORD [24+ecx*4+esp],ebp 278 jl NEAR L$0102ndmadd 279 mov ebp,edx 280 mul edi 281 add ebp,DWORD [32+ebx*4+esp] 282 adc edx,0 283 add ebp,eax 284 adc edx,0 285 mov DWORD [28+ebx*4+esp],ebp 286 xor eax,eax 287 mov ecx,DWORD [12+esp] 288 add edx,DWORD [36+ebx*4+esp] 289 adc eax,DWORD [40+ebx*4+esp] 290 lea ecx,[4+ecx] 291 mov DWORD [32+ebx*4+esp],edx 292 cmp ecx,DWORD [28+esp] 293 mov DWORD [36+ebx*4+esp],eax 294 je NEAR L$007common_tail 295 mov edi,DWORD [ecx] 296 mov esi,DWORD [8+esp] 297 mov DWORD [12+esp],ecx 298 xor ecx,ecx 299 xor edx,edx 300 mov eax,DWORD [esi] 301 jmp NEAR L$0111stmadd 302align 16 303L$008bn_sqr_mont: 304 mov DWORD [esp],ebx 305 mov DWORD [12+esp],ecx 306 mov eax,edi 307 mul edi 308 mov DWORD [32+esp],eax 309 mov ebx,edx 310 shr edx,1 311 and ebx,1 312 inc ecx 313align 16 314L$012sqr: 315 mov eax,DWORD [ecx*4+esi] 316 mov ebp,edx 317 mul edi 318 add eax,ebp 319 lea ecx,[1+ecx] 320 adc edx,0 321 lea ebp,[eax*2+ebx] 322 shr eax,31 323 cmp ecx,DWORD [esp] 324 mov ebx,eax 325 mov DWORD [28+ecx*4+esp],ebp 326 jl NEAR L$012sqr 327 mov eax,DWORD [ecx*4+esi] 328 mov ebp,edx 329 mul edi 330 add eax,ebp 331 mov edi,DWORD [20+esp] 332 adc edx,0 333 mov esi,DWORD [16+esp] 334 lea ebp,[eax*2+ebx] 335 imul edi,DWORD [32+esp] 336 shr eax,31 337 mov DWORD [32+ecx*4+esp],ebp 338 lea ebp,[edx*2+eax] 339 mov eax,DWORD [esi] 340 shr edx,31 341 mov DWORD [36+ecx*4+esp],ebp 342 mov DWORD [40+ecx*4+esp],edx 343 mul edi 344 add eax,DWORD [32+esp] 345 mov ebx,ecx 346 adc edx,0 347 mov eax,DWORD [4+esi] 348 mov ecx,1 349align 16 350L$0133rdmadd: 351 mov ebp,edx 352 mul edi 353 add ebp,DWORD [32+ecx*4+esp] 354 adc edx,0 355 add ebp,eax 356 mov eax,DWORD [4+ecx*4+esi] 357 adc edx,0 358 mov DWORD [28+ecx*4+esp],ebp 359 mov ebp,edx 360 mul edi 361 add ebp,DWORD [36+ecx*4+esp] 362 lea ecx,[2+ecx] 363 adc edx,0 364 add ebp,eax 365 mov eax,DWORD [ecx*4+esi] 366 adc edx,0 367 cmp ecx,ebx 368 mov DWORD [24+ecx*4+esp],ebp 369 jl NEAR L$0133rdmadd 370 mov ebp,edx 371 mul edi 372 add ebp,DWORD [32+ebx*4+esp] 373 adc edx,0 374 add ebp,eax 375 adc edx,0 376 mov DWORD [28+ebx*4+esp],ebp 377 mov ecx,DWORD [12+esp] 378 xor eax,eax 379 mov esi,DWORD [8+esp] 380 add edx,DWORD [36+ebx*4+esp] 381 adc eax,DWORD [40+ebx*4+esp] 382 mov DWORD [32+ebx*4+esp],edx 383 cmp ecx,ebx 384 mov DWORD [36+ebx*4+esp],eax 385 je NEAR L$007common_tail 386 mov edi,DWORD [4+ecx*4+esi] 387 lea ecx,[1+ecx] 388 mov eax,edi 389 mov DWORD [12+esp],ecx 390 mul edi 391 add eax,DWORD [32+ecx*4+esp] 392 adc edx,0 393 mov DWORD [32+ecx*4+esp],eax 394 xor ebp,ebp 395 cmp ecx,ebx 396 lea ecx,[1+ecx] 397 je NEAR L$014sqrlast 398 mov ebx,edx 399 shr edx,1 400 and ebx,1 401align 16 402L$015sqradd: 403 mov eax,DWORD [ecx*4+esi] 404 mov ebp,edx 405 mul edi 406 add eax,ebp 407 lea ebp,[eax*1+eax] 408 adc edx,0 409 shr eax,31 410 add ebp,DWORD [32+ecx*4+esp] 411 lea ecx,[1+ecx] 412 adc eax,0 413 add ebp,ebx 414 adc eax,0 415 cmp ecx,DWORD [esp] 416 mov DWORD [28+ecx*4+esp],ebp 417 mov ebx,eax 418 jle NEAR L$015sqradd 419 mov ebp,edx 420 add edx,edx 421 shr ebp,31 422 add edx,ebx 423 adc ebp,0 424L$014sqrlast: 425 mov edi,DWORD [20+esp] 426 mov esi,DWORD [16+esp] 427 imul edi,DWORD [32+esp] 428 add edx,DWORD [32+ecx*4+esp] 429 mov eax,DWORD [esi] 430 adc ebp,0 431 mov DWORD [32+ecx*4+esp],edx 432 mov DWORD [36+ecx*4+esp],ebp 433 mul edi 434 add eax,DWORD [32+esp] 435 lea ebx,[ecx-1] 436 adc edx,0 437 mov ecx,1 438 mov eax,DWORD [4+esi] 439 jmp NEAR L$0133rdmadd 440align 16 441L$007common_tail: 442 mov ebp,DWORD [16+esp] 443 mov edi,DWORD [4+esp] 444 lea esi,[32+esp] 445 mov eax,DWORD [esi] 446 mov ecx,ebx 447 xor edx,edx 448align 16 449L$016sub: 450 sbb eax,DWORD [edx*4+ebp] 451 mov DWORD [edx*4+edi],eax 452 dec ecx 453 mov eax,DWORD [4+edx*4+esi] 454 lea edx,[1+edx] 455 jge NEAR L$016sub 456 sbb eax,0 457 mov edx,-1 458 xor edx,eax 459 jmp NEAR L$017copy 460align 16 461L$017copy: 462 mov esi,DWORD [32+ebx*4+esp] 463 mov ebp,DWORD [ebx*4+edi] 464 mov DWORD [32+ebx*4+esp],ecx 465 and esi,eax 466 and ebp,edx 467 or ebp,esi 468 mov DWORD [ebx*4+edi],ebp 469 dec ebx 470 jge NEAR L$017copy 471 mov esp,DWORD [24+esp] 472 mov eax,1 473L$000just_leave: 474 pop edi 475 pop esi 476 pop ebx 477 pop ebp 478 ret 479db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 480db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 481db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 482db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 483db 111,114,103,62,0 484segment .bss 485common _OPENSSL_ia32cap_P 16 486