1%ifidn __OUTPUT_FORMAT__,obj 2section code use32 class=code align=64 3%elifidn __OUTPUT_FORMAT__,win32 4$@feat.00 equ 1 5section .text code align=64 6%else 7section .text code 8%endif 9;extern _OPENSSL_ia32cap_P 10align 16 11__mul_1x1_mmx: 12 sub esp,36 13 mov ecx,eax 14 lea edx,[eax*1+eax] 15 and ecx,1073741823 16 lea ebp,[edx*1+edx] 17 mov DWORD [esp],0 18 and edx,2147483647 19 movd mm2,eax 20 movd mm3,ebx 21 mov DWORD [4+esp],ecx 22 xor ecx,edx 23 pxor mm5,mm5 24 pxor mm4,mm4 25 mov DWORD [8+esp],edx 26 xor edx,ebp 27 mov DWORD [12+esp],ecx 28 pcmpgtd mm5,mm2 29 paddd mm2,mm2 30 xor ecx,edx 31 mov DWORD [16+esp],ebp 32 xor ebp,edx 33 pand mm5,mm3 34 pcmpgtd mm4,mm2 35 mov DWORD [20+esp],ecx 36 xor ebp,ecx 37 psllq mm5,31 38 pand mm4,mm3 39 mov DWORD [24+esp],edx 40 mov esi,7 41 mov DWORD [28+esp],ebp 42 mov ebp,esi 43 and esi,ebx 44 shr ebx,3 45 mov edi,ebp 46 psllq mm4,30 47 and edi,ebx 48 shr ebx,3 49 movd mm0,DWORD [esi*4+esp] 50 mov esi,ebp 51 and esi,ebx 52 shr ebx,3 53 movd mm2,DWORD [edi*4+esp] 54 mov edi,ebp 55 psllq mm2,3 56 and edi,ebx 57 shr ebx,3 58 pxor mm0,mm2 59 movd mm1,DWORD [esi*4+esp] 60 mov esi,ebp 61 psllq mm1,6 62 and esi,ebx 63 shr ebx,3 64 pxor mm0,mm1 65 movd mm2,DWORD [edi*4+esp] 66 mov edi,ebp 67 psllq mm2,9 68 and edi,ebx 69 shr ebx,3 70 pxor mm0,mm2 71 movd mm1,DWORD [esi*4+esp] 72 mov esi,ebp 73 psllq mm1,12 74 and esi,ebx 75 shr ebx,3 76 pxor mm0,mm1 77 movd mm2,DWORD [edi*4+esp] 78 mov edi,ebp 79 psllq mm2,15 80 and edi,ebx 81 shr ebx,3 82 pxor mm0,mm2 83 movd mm1,DWORD [esi*4+esp] 84 mov esi,ebp 85 psllq mm1,18 86 and esi,ebx 87 shr ebx,3 88 pxor mm0,mm1 89 movd mm2,DWORD [edi*4+esp] 90 mov edi,ebp 91 psllq mm2,21 92 and edi,ebx 93 shr ebx,3 94 pxor mm0,mm2 95 movd mm1,DWORD [esi*4+esp] 96 mov esi,ebp 97 psllq mm1,24 98 and esi,ebx 99 shr ebx,3 100 pxor mm0,mm1 101 movd mm2,DWORD [edi*4+esp] 102 pxor mm0,mm4 103 psllq mm2,27 104 pxor mm0,mm2 105 movd mm1,DWORD [esi*4+esp] 106 pxor mm0,mm5 107 psllq mm1,30 108 add esp,36 109 pxor mm0,mm1 110 ret 111align 16 112__mul_1x1_ialu: 113 sub esp,36 114 mov ecx,eax 115 lea edx,[eax*1+eax] 116 lea ebp,[eax*4] 117 and ecx,1073741823 118 lea edi,[eax*1+eax] 119 sar eax,31 120 mov DWORD [esp],0 121 and edx,2147483647 122 mov DWORD [4+esp],ecx 123 xor ecx,edx 124 mov DWORD [8+esp],edx 125 xor edx,ebp 126 mov DWORD [12+esp],ecx 127 xor ecx,edx 128 mov DWORD [16+esp],ebp 129 xor ebp,edx 130 mov DWORD [20+esp],ecx 131 xor ebp,ecx 132 sar edi,31 133 and eax,ebx 134 mov DWORD [24+esp],edx 135 and edi,ebx 136 mov DWORD [28+esp],ebp 137 mov edx,eax 138 shl eax,31 139 mov ecx,edi 140 shr edx,1 141 mov esi,7 142 shl edi,30 143 and esi,ebx 144 shr ecx,2 145 xor eax,edi 146 shr ebx,3 147 mov edi,7 148 and edi,ebx 149 shr ebx,3 150 xor edx,ecx 151 xor eax,DWORD [esi*4+esp] 152 mov esi,7 153 and esi,ebx 154 shr ebx,3 155 mov ebp,DWORD [edi*4+esp] 156 mov edi,7 157 mov ecx,ebp 158 shl ebp,3 159 and edi,ebx 160 shr ecx,29 161 xor eax,ebp 162 shr ebx,3 163 xor edx,ecx 164 mov ecx,DWORD [esi*4+esp] 165 mov esi,7 166 mov ebp,ecx 167 shl ecx,6 168 and esi,ebx 169 shr ebp,26 170 xor eax,ecx 171 shr ebx,3 172 xor edx,ebp 173 mov ebp,DWORD [edi*4+esp] 174 mov edi,7 175 mov ecx,ebp 176 shl ebp,9 177 and edi,ebx 178 shr ecx,23 179 xor eax,ebp 180 shr ebx,3 181 xor edx,ecx 182 mov ecx,DWORD [esi*4+esp] 183 mov esi,7 184 mov ebp,ecx 185 shl ecx,12 186 and esi,ebx 187 shr ebp,20 188 xor eax,ecx 189 shr ebx,3 190 xor edx,ebp 191 mov ebp,DWORD [edi*4+esp] 192 mov edi,7 193 mov ecx,ebp 194 shl ebp,15 195 and edi,ebx 196 shr ecx,17 197 xor eax,ebp 198 shr ebx,3 199 xor edx,ecx 200 mov ecx,DWORD [esi*4+esp] 201 mov esi,7 202 mov ebp,ecx 203 shl ecx,18 204 and esi,ebx 205 shr ebp,14 206 xor eax,ecx 207 shr ebx,3 208 xor edx,ebp 209 mov ebp,DWORD [edi*4+esp] 210 mov edi,7 211 mov ecx,ebp 212 shl ebp,21 213 and edi,ebx 214 shr ecx,11 215 xor eax,ebp 216 shr ebx,3 217 xor edx,ecx 218 mov ecx,DWORD [esi*4+esp] 219 mov esi,7 220 mov ebp,ecx 221 shl ecx,24 222 and esi,ebx 223 shr ebp,8 224 xor eax,ecx 225 shr ebx,3 226 xor edx,ebp 227 mov ebp,DWORD [edi*4+esp] 228 mov ecx,ebp 229 shl ebp,27 230 mov edi,DWORD [esi*4+esp] 231 shr ecx,5 232 mov esi,edi 233 xor eax,ebp 234 shl edi,30 235 xor edx,ecx 236 shr esi,2 237 xor eax,edi 238 xor edx,esi 239 add esp,36 240 ret 241global _bn_GF2m_mul_2x2 242align 16 243_bn_GF2m_mul_2x2: 244L$_bn_GF2m_mul_2x2_begin: 245 lea edx,[_OPENSSL_ia32cap_P] 246 mov eax,DWORD [edx] 247 mov edx,DWORD [4+edx] 248 test eax,8388608 249 jz NEAR L$000ialu 250 test eax,16777216 251 jz NEAR L$001mmx 252 test edx,2 253 jz NEAR L$001mmx 254 movups xmm0,[8+esp] 255 shufps xmm0,xmm0,177 256db 102,15,58,68,192,1 257 mov eax,DWORD [4+esp] 258 movups [eax],xmm0 259 ret 260align 16 261L$001mmx: 262 push ebp 263 push ebx 264 push esi 265 push edi 266 mov eax,DWORD [24+esp] 267 mov ebx,DWORD [32+esp] 268 call __mul_1x1_mmx 269 movq mm7,mm0 270 mov eax,DWORD [28+esp] 271 mov ebx,DWORD [36+esp] 272 call __mul_1x1_mmx 273 movq mm6,mm0 274 mov eax,DWORD [24+esp] 275 mov ebx,DWORD [32+esp] 276 xor eax,DWORD [28+esp] 277 xor ebx,DWORD [36+esp] 278 call __mul_1x1_mmx 279 pxor mm0,mm7 280 mov eax,DWORD [20+esp] 281 pxor mm0,mm6 282 movq mm2,mm0 283 psllq mm0,32 284 pop edi 285 psrlq mm2,32 286 pop esi 287 pxor mm0,mm6 288 pop ebx 289 pxor mm2,mm7 290 movq [eax],mm0 291 pop ebp 292 movq [8+eax],mm2 293 emms 294 ret 295align 16 296L$000ialu: 297 push ebp 298 push ebx 299 push esi 300 push edi 301 sub esp,20 302 mov eax,DWORD [44+esp] 303 mov ebx,DWORD [52+esp] 304 call __mul_1x1_ialu 305 mov DWORD [8+esp],eax 306 mov DWORD [12+esp],edx 307 mov eax,DWORD [48+esp] 308 mov ebx,DWORD [56+esp] 309 call __mul_1x1_ialu 310 mov DWORD [esp],eax 311 mov DWORD [4+esp],edx 312 mov eax,DWORD [44+esp] 313 mov ebx,DWORD [52+esp] 314 xor eax,DWORD [48+esp] 315 xor ebx,DWORD [56+esp] 316 call __mul_1x1_ialu 317 mov ebp,DWORD [40+esp] 318 mov ebx,DWORD [esp] 319 mov ecx,DWORD [4+esp] 320 mov edi,DWORD [8+esp] 321 mov esi,DWORD [12+esp] 322 xor eax,edx 323 xor edx,ecx 324 xor eax,ebx 325 mov DWORD [ebp],ebx 326 xor edx,edi 327 mov DWORD [12+ebp],esi 328 xor eax,esi 329 add esp,20 330 xor edx,esi 331 pop edi 332 xor eax,edx 333 pop esi 334 mov DWORD [8+ebp],edx 335 pop ebx 336 mov DWORD [4+ebp],eax 337 pop ebp 338 ret 339db 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 340db 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 341db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 342db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 343db 62,0 344segment .bss 345common _OPENSSL_ia32cap_P 16 346