1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8 9%ifdef BORINGSSL_PREFIX 10%include "boringssl_prefix_symbols_nasm.inc" 11%endif 12section .text code align=64 13 14 15 16 17global beeu_mod_inverse_vartime 18ALIGN 32 19beeu_mod_inverse_vartime: 20 mov QWORD[8+rsp],rdi ;WIN64 prologue 21 mov QWORD[16+rsp],rsi 22 mov rax,rsp 23$L$SEH_begin_beeu_mod_inverse_vartime: 24 mov rdi,rcx 25 mov rsi,rdx 26 mov rdx,r8 27 mov rcx,r9 28 mov r8,QWORD[40+rsp] 29 mov r9,QWORD[48+rsp] 30 31 32 33 push rbp 34 35 push r12 36 37 push r13 38 39 push r14 40 41 push r15 42 43 push rbx 44 45 push rsi 46 47 48 sub rsp,80 49 50 mov QWORD[rsp],rdi 51 52 53 mov r8,1 54 xor r9,r9 55 xor r10,r10 56 xor r11,r11 57 xor rdi,rdi 58 59 xor r12,r12 60 xor r13,r13 61 xor r14,r14 62 xor r15,r15 63 xor rbp,rbp 64 65 66 vmovdqu xmm0,XMMWORD[rsi] 67 vmovdqu xmm1,XMMWORD[16+rsi] 68 vmovdqu XMMWORD[48+rsp],xmm0 69 vmovdqu XMMWORD[64+rsp],xmm1 70 71 vmovdqu xmm0,XMMWORD[rdx] 72 vmovdqu xmm1,XMMWORD[16+rdx] 73 vmovdqu XMMWORD[16+rsp],xmm0 74 vmovdqu XMMWORD[32+rsp],xmm1 75 76$L$beeu_loop: 77 xor rbx,rbx 78 or rbx,QWORD[48+rsp] 79 or rbx,QWORD[56+rsp] 80 or rbx,QWORD[64+rsp] 81 or rbx,QWORD[72+rsp] 82 jz NEAR $L$beeu_loop_end 83 84 85 86 87 88 89 90 91 92 93 mov rcx,1 94 95 96$L$beeu_shift_loop_XB: 97 mov rbx,rcx 98 and rbx,QWORD[48+rsp] 99 jnz NEAR $L$beeu_shift_loop_end_XB 100 101 102 mov rbx,1 103 and rbx,r8 104 jz NEAR $L$shift1_0 105 add r8,QWORD[rdx] 106 adc r9,QWORD[8+rdx] 107 adc r10,QWORD[16+rdx] 108 adc r11,QWORD[24+rdx] 109 adc rdi,0 110 111$L$shift1_0: 112 shrd r8,r9,1 113 shrd r9,r10,1 114 shrd r10,r11,1 115 shrd r11,rdi,1 116 shr rdi,1 117 118 shl rcx,1 119 120 121 122 123 124 cmp rcx,0x8000000 125 jne NEAR $L$beeu_shift_loop_XB 126 127$L$beeu_shift_loop_end_XB: 128 bsf rcx,rcx 129 test rcx,rcx 130 jz NEAR $L$beeu_no_shift_XB 131 132 133 134 mov rax,QWORD[((8+48))+rsp] 135 mov rbx,QWORD[((16+48))+rsp] 136 mov rsi,QWORD[((24+48))+rsp] 137 138 shrd QWORD[((0+48))+rsp],rax,cl 139 shrd QWORD[((8+48))+rsp],rbx,cl 140 shrd QWORD[((16+48))+rsp],rsi,cl 141 142 shr rsi,cl 143 mov QWORD[((24+48))+rsp],rsi 144 145 146$L$beeu_no_shift_XB: 147 148 mov rcx,1 149 150 151$L$beeu_shift_loop_YA: 152 mov rbx,rcx 153 and rbx,QWORD[16+rsp] 154 jnz NEAR $L$beeu_shift_loop_end_YA 155 156 157 mov rbx,1 158 and rbx,r12 159 jz NEAR $L$shift1_1 160 add r12,QWORD[rdx] 161 adc r13,QWORD[8+rdx] 162 adc r14,QWORD[16+rdx] 163 adc r15,QWORD[24+rdx] 164 adc rbp,0 165 166$L$shift1_1: 167 shrd r12,r13,1 168 shrd r13,r14,1 169 shrd r14,r15,1 170 shrd r15,rbp,1 171 shr rbp,1 172 173 shl rcx,1 174 175 176 177 178 179 cmp rcx,0x8000000 180 jne NEAR $L$beeu_shift_loop_YA 181 182$L$beeu_shift_loop_end_YA: 183 bsf rcx,rcx 184 test rcx,rcx 185 jz NEAR $L$beeu_no_shift_YA 186 187 188 189 mov rax,QWORD[((8+16))+rsp] 190 mov rbx,QWORD[((16+16))+rsp] 191 mov rsi,QWORD[((24+16))+rsp] 192 193 shrd QWORD[((0+16))+rsp],rax,cl 194 shrd QWORD[((8+16))+rsp],rbx,cl 195 shrd QWORD[((16+16))+rsp],rsi,cl 196 197 shr rsi,cl 198 mov QWORD[((24+16))+rsp],rsi 199 200 201$L$beeu_no_shift_YA: 202 203 mov rax,QWORD[48+rsp] 204 mov rbx,QWORD[56+rsp] 205 mov rsi,QWORD[64+rsp] 206 mov rcx,QWORD[72+rsp] 207 sub rax,QWORD[16+rsp] 208 sbb rbx,QWORD[24+rsp] 209 sbb rsi,QWORD[32+rsp] 210 sbb rcx,QWORD[40+rsp] 211 jnc NEAR $L$beeu_B_bigger_than_A 212 213 214 mov rax,QWORD[16+rsp] 215 mov rbx,QWORD[24+rsp] 216 mov rsi,QWORD[32+rsp] 217 mov rcx,QWORD[40+rsp] 218 sub rax,QWORD[48+rsp] 219 sbb rbx,QWORD[56+rsp] 220 sbb rsi,QWORD[64+rsp] 221 sbb rcx,QWORD[72+rsp] 222 mov QWORD[16+rsp],rax 223 mov QWORD[24+rsp],rbx 224 mov QWORD[32+rsp],rsi 225 mov QWORD[40+rsp],rcx 226 227 228 add r12,r8 229 adc r13,r9 230 adc r14,r10 231 adc r15,r11 232 adc rbp,rdi 233 jmp NEAR $L$beeu_loop 234 235$L$beeu_B_bigger_than_A: 236 237 mov QWORD[48+rsp],rax 238 mov QWORD[56+rsp],rbx 239 mov QWORD[64+rsp],rsi 240 mov QWORD[72+rsp],rcx 241 242 243 add r8,r12 244 adc r9,r13 245 adc r10,r14 246 adc r11,r15 247 adc rdi,rbp 248 249 jmp NEAR $L$beeu_loop 250 251$L$beeu_loop_end: 252 253 254 255 256 mov rbx,QWORD[16+rsp] 257 sub rbx,1 258 or rbx,QWORD[24+rsp] 259 or rbx,QWORD[32+rsp] 260 or rbx,QWORD[40+rsp] 261 262 jnz NEAR $L$beeu_err 263 264 265 266 267 mov r8,QWORD[rdx] 268 mov r9,QWORD[8+rdx] 269 mov r10,QWORD[16+rdx] 270 mov r11,QWORD[24+rdx] 271 xor rdi,rdi 272 273$L$beeu_reduction_loop: 274 mov QWORD[16+rsp],r12 275 mov QWORD[24+rsp],r13 276 mov QWORD[32+rsp],r14 277 mov QWORD[40+rsp],r15 278 mov QWORD[48+rsp],rbp 279 280 281 sub r12,r8 282 sbb r13,r9 283 sbb r14,r10 284 sbb r15,r11 285 sbb rbp,0 286 287 288 cmovc r12,QWORD[16+rsp] 289 cmovc r13,QWORD[24+rsp] 290 cmovc r14,QWORD[32+rsp] 291 cmovc r15,QWORD[40+rsp] 292 jnc NEAR $L$beeu_reduction_loop 293 294 295 sub r8,r12 296 sbb r9,r13 297 sbb r10,r14 298 sbb r11,r15 299 300$L$beeu_save: 301 302 mov rdi,QWORD[rsp] 303 304 mov QWORD[rdi],r8 305 mov QWORD[8+rdi],r9 306 mov QWORD[16+rdi],r10 307 mov QWORD[24+rdi],r11 308 309 310 mov rax,1 311 jmp NEAR $L$beeu_finish 312 313$L$beeu_err: 314 315 xor rax,rax 316 317$L$beeu_finish: 318 add rsp,80 319 320 pop rsi 321 322 pop rbx 323 324 pop r15 325 326 pop r14 327 328 pop r13 329 330 pop r12 331 332 pop rbp 333 334 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 335 mov rsi,QWORD[16+rsp] 336 DB 0F3h,0C3h ;repret 337 338 339$L$SEH_end_beeu_mod_inverse_vartime: 340