1%ifidn __OUTPUT_FORMAT__,obj 2section code use32 class=code align=64 3%elifidn __OUTPUT_FORMAT__,win32 4$@feat.00 equ 1 5section .text code align=64 6%else 7section .text code 8%endif 9;extern _OPENSSL_ia32cap_P 10global _bn_mul_mont 11align 16 12_bn_mul_mont: 13L$_bn_mul_mont_begin: 14 push ebp 15 push ebx 16 push esi 17 push edi 18 xor eax,eax 19 mov edi,DWORD [40+esp] 20 cmp edi,4 21 jl NEAR L$000just_leave 22 lea esi,[20+esp] 23 lea edx,[24+esp] 24 add edi,2 25 neg edi 26 lea ebp,[edi*4+esp-32] 27 neg edi 28 mov eax,ebp 29 sub eax,edx 30 and eax,2047 31 sub ebp,eax 32 xor edx,ebp 33 and edx,2048 34 xor edx,2048 35 sub ebp,edx 36 and ebp,-64 37 mov eax,esp 38 sub eax,ebp 39 and eax,-4096 40 mov edx,esp 41 lea esp,[eax*1+ebp] 42 mov eax,DWORD [esp] 43 cmp esp,ebp 44 ja NEAR L$001page_walk 45 jmp NEAR L$002page_walk_done 46align 16 47L$001page_walk: 48 lea esp,[esp-4096] 49 mov eax,DWORD [esp] 50 cmp esp,ebp 51 ja NEAR L$001page_walk 52L$002page_walk_done: 53 mov eax,DWORD [esi] 54 mov ebx,DWORD [4+esi] 55 mov ecx,DWORD [8+esi] 56 mov ebp,DWORD [12+esi] 57 mov esi,DWORD [16+esi] 58 mov esi,DWORD [esi] 59 mov DWORD [4+esp],eax 60 mov DWORD [8+esp],ebx 61 mov DWORD [12+esp],ecx 62 mov DWORD [16+esp],ebp 63 mov DWORD [20+esp],esi 64 lea ebx,[edi-3] 65 mov DWORD [24+esp],edx 66 lea eax,[_OPENSSL_ia32cap_P] 67 bt DWORD [eax],26 68 jnc NEAR L$003non_sse2 69 mov eax,-1 70 movd mm7,eax 71 mov esi,DWORD [8+esp] 72 mov edi,DWORD [12+esp] 73 mov ebp,DWORD [16+esp] 74 xor edx,edx 75 xor ecx,ecx 76 movd mm4,DWORD [edi] 77 movd mm5,DWORD [esi] 78 movd mm3,DWORD [ebp] 79 pmuludq mm5,mm4 80 movq mm2,mm5 81 movq mm0,mm5 82 pand mm0,mm7 83 pmuludq mm5,[20+esp] 84 pmuludq mm3,mm5 85 paddq mm3,mm0 86 movd mm1,DWORD [4+ebp] 87 movd mm0,DWORD [4+esi] 88 psrlq mm2,32 89 psrlq mm3,32 90 inc ecx 91align 16 92L$0041st: 93 pmuludq mm0,mm4 94 pmuludq mm1,mm5 95 paddq mm2,mm0 96 paddq mm3,mm1 97 movq mm0,mm2 98 pand mm0,mm7 99 movd mm1,DWORD [4+ecx*4+ebp] 100 paddq mm3,mm0 101 movd mm0,DWORD [4+ecx*4+esi] 102 psrlq mm2,32 103 movd DWORD [28+ecx*4+esp],mm3 104 psrlq mm3,32 105 lea ecx,[1+ecx] 106 cmp ecx,ebx 107 jl NEAR L$0041st 108 pmuludq mm0,mm4 109 pmuludq mm1,mm5 110 paddq mm2,mm0 111 paddq mm3,mm1 112 movq mm0,mm2 113 pand mm0,mm7 114 paddq mm3,mm0 115 movd DWORD [28+ecx*4+esp],mm3 116 psrlq mm2,32 117 psrlq mm3,32 118 paddq mm3,mm2 119 movq [32+ebx*4+esp],mm3 120 inc edx 121L$005outer: 122 xor ecx,ecx 123 movd mm4,DWORD [edx*4+edi] 124 movd mm5,DWORD [esi] 125 movd mm6,DWORD [32+esp] 126 movd mm3,DWORD [ebp] 127 pmuludq mm5,mm4 128 paddq mm5,mm6 129 movq mm0,mm5 130 movq mm2,mm5 131 pand mm0,mm7 132 pmuludq mm5,[20+esp] 133 pmuludq mm3,mm5 134 paddq mm3,mm0 135 movd mm6,DWORD [36+esp] 136 movd mm1,DWORD [4+ebp] 137 movd mm0,DWORD [4+esi] 138 psrlq mm2,32 139 psrlq mm3,32 140 paddq mm2,mm6 141 inc ecx 142 dec ebx 143L$006inner: 144 pmuludq mm0,mm4 145 pmuludq mm1,mm5 146 paddq mm2,mm0 147 paddq mm3,mm1 148 movq mm0,mm2 149 movd mm6,DWORD [36+ecx*4+esp] 150 pand mm0,mm7 151 movd mm1,DWORD [4+ecx*4+ebp] 152 paddq mm3,mm0 153 movd mm0,DWORD [4+ecx*4+esi] 154 psrlq mm2,32 155 movd DWORD [28+ecx*4+esp],mm3 156 psrlq mm3,32 157 paddq mm2,mm6 158 dec ebx 159 lea ecx,[1+ecx] 160 jnz NEAR L$006inner 161 mov ebx,ecx 162 pmuludq mm0,mm4 163 pmuludq mm1,mm5 164 paddq mm2,mm0 165 paddq mm3,mm1 166 movq mm0,mm2 167 pand mm0,mm7 168 paddq mm3,mm0 169 movd DWORD [28+ecx*4+esp],mm3 170 psrlq mm2,32 171 psrlq mm3,32 172 movd mm6,DWORD [36+ebx*4+esp] 173 paddq mm3,mm2 174 paddq mm3,mm6 175 movq [32+ebx*4+esp],mm3 176 lea edx,[1+edx] 177 cmp edx,ebx 178 jle NEAR L$005outer 179 emms 180 jmp NEAR L$007common_tail 181align 16 182L$003non_sse2: 183 mov esi,DWORD [8+esp] 184 lea ebp,[1+ebx] 185 mov edi,DWORD [12+esp] 186 xor ecx,ecx 187 mov edx,esi 188 and ebp,1 189 sub edx,edi 190 lea eax,[4+ebx*4+edi] 191 or ebp,edx 192 mov edi,DWORD [edi] 193 jz NEAR L$008bn_sqr_mont 194 mov DWORD [28+esp],eax 195 mov eax,DWORD [esi] 196 xor edx,edx 197align 16 198L$009mull: 199 mov ebp,edx 200 mul edi 201 add ebp,eax 202 lea ecx,[1+ecx] 203 adc edx,0 204 mov eax,DWORD [ecx*4+esi] 205 cmp ecx,ebx 206 mov DWORD [28+ecx*4+esp],ebp 207 jl NEAR L$009mull 208 mov ebp,edx 209 mul edi 210 mov edi,DWORD [20+esp] 211 add eax,ebp 212 mov esi,DWORD [16+esp] 213 adc edx,0 214 imul edi,DWORD [32+esp] 215 mov DWORD [32+ebx*4+esp],eax 216 xor ecx,ecx 217 mov DWORD [36+ebx*4+esp],edx 218 mov DWORD [40+ebx*4+esp],ecx 219 mov eax,DWORD [esi] 220 mul edi 221 add eax,DWORD [32+esp] 222 mov eax,DWORD [4+esi] 223 adc edx,0 224 inc ecx 225 jmp NEAR L$0102ndmadd 226align 16 227L$0111stmadd: 228 mov ebp,edx 229 mul edi 230 add ebp,DWORD [32+ecx*4+esp] 231 lea ecx,[1+ecx] 232 adc edx,0 233 add ebp,eax 234 mov eax,DWORD [ecx*4+esi] 235 adc edx,0 236 cmp ecx,ebx 237 mov DWORD [28+ecx*4+esp],ebp 238 jl NEAR L$0111stmadd 239 mov ebp,edx 240 mul edi 241 add eax,DWORD [32+ebx*4+esp] 242 mov edi,DWORD [20+esp] 243 adc edx,0 244 mov esi,DWORD [16+esp] 245 add ebp,eax 246 adc edx,0 247 imul edi,DWORD [32+esp] 248 xor ecx,ecx 249 add edx,DWORD [36+ebx*4+esp] 250 mov DWORD [32+ebx*4+esp],ebp 251 adc ecx,0 252 mov eax,DWORD [esi] 253 mov DWORD [36+ebx*4+esp],edx 254 mov DWORD [40+ebx*4+esp],ecx 255 mul edi 256 add eax,DWORD [32+esp] 257 mov eax,DWORD [4+esi] 258 adc edx,0 259 mov ecx,1 260align 16 261L$0102ndmadd: 262 mov ebp,edx 263 mul edi 264 add ebp,DWORD [32+ecx*4+esp] 265 lea ecx,[1+ecx] 266 adc edx,0 267 add ebp,eax 268 mov eax,DWORD [ecx*4+esi] 269 adc edx,0 270 cmp ecx,ebx 271 mov DWORD [24+ecx*4+esp],ebp 272 jl NEAR L$0102ndmadd 273 mov ebp,edx 274 mul edi 275 add ebp,DWORD [32+ebx*4+esp] 276 adc edx,0 277 add ebp,eax 278 adc edx,0 279 mov DWORD [28+ebx*4+esp],ebp 280 xor eax,eax 281 mov ecx,DWORD [12+esp] 282 add edx,DWORD [36+ebx*4+esp] 283 adc eax,DWORD [40+ebx*4+esp] 284 lea ecx,[4+ecx] 285 mov DWORD [32+ebx*4+esp],edx 286 cmp ecx,DWORD [28+esp] 287 mov DWORD [36+ebx*4+esp],eax 288 je NEAR L$007common_tail 289 mov edi,DWORD [ecx] 290 mov esi,DWORD [8+esp] 291 mov DWORD [12+esp],ecx 292 xor ecx,ecx 293 xor edx,edx 294 mov eax,DWORD [esi] 295 jmp NEAR L$0111stmadd 296align 16 297L$008bn_sqr_mont: 298 mov DWORD [esp],ebx 299 mov DWORD [12+esp],ecx 300 mov eax,edi 301 mul edi 302 mov DWORD [32+esp],eax 303 mov ebx,edx 304 shr edx,1 305 and ebx,1 306 inc ecx 307align 16 308L$012sqr: 309 mov eax,DWORD [ecx*4+esi] 310 mov ebp,edx 311 mul edi 312 add eax,ebp 313 lea ecx,[1+ecx] 314 adc edx,0 315 lea ebp,[eax*2+ebx] 316 shr eax,31 317 cmp ecx,DWORD [esp] 318 mov ebx,eax 319 mov DWORD [28+ecx*4+esp],ebp 320 jl NEAR L$012sqr 321 mov eax,DWORD [ecx*4+esi] 322 mov ebp,edx 323 mul edi 324 add eax,ebp 325 mov edi,DWORD [20+esp] 326 adc edx,0 327 mov esi,DWORD [16+esp] 328 lea ebp,[eax*2+ebx] 329 imul edi,DWORD [32+esp] 330 shr eax,31 331 mov DWORD [32+ecx*4+esp],ebp 332 lea ebp,[edx*2+eax] 333 mov eax,DWORD [esi] 334 shr edx,31 335 mov DWORD [36+ecx*4+esp],ebp 336 mov DWORD [40+ecx*4+esp],edx 337 mul edi 338 add eax,DWORD [32+esp] 339 mov ebx,ecx 340 adc edx,0 341 mov eax,DWORD [4+esi] 342 mov ecx,1 343align 16 344L$0133rdmadd: 345 mov ebp,edx 346 mul edi 347 add ebp,DWORD [32+ecx*4+esp] 348 adc edx,0 349 add ebp,eax 350 mov eax,DWORD [4+ecx*4+esi] 351 adc edx,0 352 mov DWORD [28+ecx*4+esp],ebp 353 mov ebp,edx 354 mul edi 355 add ebp,DWORD [36+ecx*4+esp] 356 lea ecx,[2+ecx] 357 adc edx,0 358 add ebp,eax 359 mov eax,DWORD [ecx*4+esi] 360 adc edx,0 361 cmp ecx,ebx 362 mov DWORD [24+ecx*4+esp],ebp 363 jl NEAR L$0133rdmadd 364 mov ebp,edx 365 mul edi 366 add ebp,DWORD [32+ebx*4+esp] 367 adc edx,0 368 add ebp,eax 369 adc edx,0 370 mov DWORD [28+ebx*4+esp],ebp 371 mov ecx,DWORD [12+esp] 372 xor eax,eax 373 mov esi,DWORD [8+esp] 374 add edx,DWORD [36+ebx*4+esp] 375 adc eax,DWORD [40+ebx*4+esp] 376 mov DWORD [32+ebx*4+esp],edx 377 cmp ecx,ebx 378 mov DWORD [36+ebx*4+esp],eax 379 je NEAR L$007common_tail 380 mov edi,DWORD [4+ecx*4+esi] 381 lea ecx,[1+ecx] 382 mov eax,edi 383 mov DWORD [12+esp],ecx 384 mul edi 385 add eax,DWORD [32+ecx*4+esp] 386 adc edx,0 387 mov DWORD [32+ecx*4+esp],eax 388 xor ebp,ebp 389 cmp ecx,ebx 390 lea ecx,[1+ecx] 391 je NEAR L$014sqrlast 392 mov ebx,edx 393 shr edx,1 394 and ebx,1 395align 16 396L$015sqradd: 397 mov eax,DWORD [ecx*4+esi] 398 mov ebp,edx 399 mul edi 400 add eax,ebp 401 lea ebp,[eax*1+eax] 402 adc edx,0 403 shr eax,31 404 add ebp,DWORD [32+ecx*4+esp] 405 lea ecx,[1+ecx] 406 adc eax,0 407 add ebp,ebx 408 adc eax,0 409 cmp ecx,DWORD [esp] 410 mov DWORD [28+ecx*4+esp],ebp 411 mov ebx,eax 412 jle NEAR L$015sqradd 413 mov ebp,edx 414 add edx,edx 415 shr ebp,31 416 add edx,ebx 417 adc ebp,0 418L$014sqrlast: 419 mov edi,DWORD [20+esp] 420 mov esi,DWORD [16+esp] 421 imul edi,DWORD [32+esp] 422 add edx,DWORD [32+ecx*4+esp] 423 mov eax,DWORD [esi] 424 adc ebp,0 425 mov DWORD [32+ecx*4+esp],edx 426 mov DWORD [36+ecx*4+esp],ebp 427 mul edi 428 add eax,DWORD [32+esp] 429 lea ebx,[ecx-1] 430 adc edx,0 431 mov ecx,1 432 mov eax,DWORD [4+esi] 433 jmp NEAR L$0133rdmadd 434align 16 435L$007common_tail: 436 mov ebp,DWORD [16+esp] 437 mov edi,DWORD [4+esp] 438 lea esi,[32+esp] 439 mov eax,DWORD [esi] 440 mov ecx,ebx 441 xor edx,edx 442align 16 443L$016sub: 444 sbb eax,DWORD [edx*4+ebp] 445 mov DWORD [edx*4+edi],eax 446 dec ecx 447 mov eax,DWORD [4+edx*4+esi] 448 lea edx,[1+edx] 449 jge NEAR L$016sub 450 sbb eax,0 451 mov edx,-1 452 xor edx,eax 453 jmp NEAR L$017copy 454align 16 455L$017copy: 456 mov esi,DWORD [32+ebx*4+esp] 457 mov ebp,DWORD [ebx*4+edi] 458 mov DWORD [32+ebx*4+esp],ecx 459 and esi,eax 460 and ebp,edx 461 or ebp,esi 462 mov DWORD [ebx*4+edi],ebp 463 dec ebx 464 jge NEAR L$017copy 465 mov esp,DWORD [24+esp] 466 mov eax,1 467L$000just_leave: 468 pop edi 469 pop esi 470 pop ebx 471 pop ebp 472 ret 473db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 474db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 475db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 476db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 477db 111,114,103,62,0 478segment .bss 479common _OPENSSL_ia32cap_P 16 480