1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14 15;void vpx_lpf_horizontal_4_mmx 16;( 17; unsigned char *src_ptr, 18; int src_pixel_step, 19; const char *blimit, 20; const char *limit, 21; const char *thresh, 22; int count 23;) 24global sym(vpx_lpf_horizontal_4_mmx) PRIVATE 25sym(vpx_lpf_horizontal_4_mmx): 26 push rbp 27 mov rbp, rsp 28 SHADOW_ARGS_TO_STACK 6 29 GET_GOT rbx 30 push rsi 31 push rdi 32 ; end prolog 33 34 ALIGN_STACK 16, rax 35 sub rsp, 32 ; reserve 32 bytes 36 %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; 37 %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; 38 39 mov rsi, arg(0) ;src_ptr 40 movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? 41 42 movsxd rcx, dword ptr arg(5) ;count 43.next8_h: 44 mov rdx, arg(3) ;limit 45 movq mm7, [rdx] 46 mov rdi, rsi ; rdi points to row +1 for indirect addressing 47 add rdi, rax 48 49 ; calculate breakout conditions 50 movq mm2, [rdi+2*rax] ; q3 51 movq mm1, [rsi+2*rax] ; q2 52 movq mm6, mm1 ; q2 53 psubusb mm1, mm2 ; q2-=q3 54 psubusb mm2, mm6 ; q3-=q2 55 por mm1, mm2 ; abs(q3-q2) 56 psubusb mm1, mm7 ; 57 58 59 movq mm4, [rsi+rax] ; q1 60 movq mm3, mm4 ; q1 61 psubusb mm4, mm6 ; q1-=q2 62 psubusb mm6, mm3 ; q2-=q1 63 por mm4, mm6 ; abs(q2-q1) 64 65 psubusb mm4, mm7 66 por mm1, mm4 67 68 movq mm4, [rsi] ; q0 69 movq mm0, mm4 ; q0 70 psubusb mm4, mm3 ; q0-=q1 71 psubusb mm3, mm0 ; q1-=q0 72 por mm4, mm3 ; abs(q0-q1) 73 movq t0, mm4 ; save to t0 74 psubusb mm4, mm7 75 por mm1, mm4 76 77 78 neg rax ; negate pitch to deal with above border 79 80 movq mm2, [rsi+4*rax] ; p3 81 movq mm4, [rdi+4*rax] ; p2 82 movq mm5, mm4 ; p2 83 psubusb mm4, mm2 ; p2-=p3 84 psubusb mm2, mm5 ; p3-=p2 85 por mm4, mm2 ; abs(p3 - p2) 86 psubusb mm4, mm7 87 por mm1, mm4 88 89 90 movq mm4, [rsi+2*rax] ; p1 91 movq mm3, mm4 ; p1 92 psubusb mm4, mm5 ; p1-=p2 93 psubusb mm5, mm3 ; p2-=p1 94 por mm4, mm5 ; abs(p2 - p1) 95 psubusb mm4, mm7 96 por mm1, mm4 97 98 movq mm2, mm3 ; p1 99 100 movq mm4, [rsi+rax] ; p0 101 movq mm5, mm4 ; p0 102 psubusb mm4, mm3 ; p0-=p1 103 psubusb mm3, mm5 ; p1-=p0 104 por mm4, mm3 ; abs(p1 - p0) 105 movq t1, mm4 ; save to t1 106 psubusb mm4, mm7 107 por mm1, mm4 108 109 movq mm3, [rdi] ; q1 110 movq mm4, mm3 ; q1 111 psubusb mm3, mm2 ; q1-=p1 112 psubusb mm2, mm4 ; p1-=q1 113 por mm2, mm3 ; abs(p1-q1) 114 pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero 115 psrlw mm2, 1 ; abs(p1-q1)/2 116 117 movq mm6, mm5 ; p0 118 movq mm3, [rsi] ; q0 119 psubusb mm5, mm3 ; p0-=q0 120 psubusb mm3, mm6 ; q0-=p0 121 por mm5, mm3 ; abs(p0 - q0) 122 paddusb mm5, mm5 ; abs(p0-q0)*2 123 paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 124 125 mov rdx, arg(2) ;blimit ; get blimit 126 movq mm7, [rdx] ; blimit 127 128 psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit 129 por mm1, mm5 130 pxor mm5, mm5 131 pcmpeqb mm1, mm5 ; mask mm1 132 133 ; calculate high edge variance 134 mov rdx, arg(4) ;thresh ; get thresh 135 movq mm7, [rdx] ; 136 movq mm4, t0 ; get abs (q1 - q0) 137 psubusb mm4, mm7 138 movq mm3, t1 ; get abs (p1 - p0) 139 psubusb mm3, mm7 140 paddb mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh 141 142 pcmpeqb mm4, mm5 143 144 pcmpeqb mm5, mm5 145 pxor mm4, mm5 146 147 148 ; start work on filters 149 movq mm2, [rsi+2*rax] ; p1 150 movq mm7, [rdi] ; q1 151 pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values 152 pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values 153 psubsb mm2, mm7 ; p1 - q1 154 pand mm2, mm4 ; high var mask (hvm)(p1 - q1) 155 pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values 156 pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values 157 movq mm3, mm0 ; q0 158 psubsb mm0, mm6 ; q0 - p0 159 paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1) 160 paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1) 161 paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1) 162 pand mm1, mm2 ; mask filter values we don't care about 163 movq mm2, mm1 164 paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 165 paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 166 167 pxor mm0, mm0 ; 168 pxor mm5, mm5 169 punpcklbw mm0, mm2 ; 170 punpckhbw mm5, mm2 ; 171 psraw mm0, 11 ; 172 psraw mm5, 11 173 packsswb mm0, mm5 174 movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; 175 176 pxor mm0, mm0 ; 0 177 movq mm5, mm1 ; abcdefgh 178 punpcklbw mm0, mm1 ; e0f0g0h0 179 psraw mm0, 11 ; sign extended shift right by 3 180 pxor mm1, mm1 ; 0 181 punpckhbw mm1, mm5 ; a0b0c0d0 182 psraw mm1, 11 ; sign extended shift right by 3 183 movq mm5, mm0 ; save results 184 185 packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 186 paddsw mm5, [GLOBAL(ones)] 187 paddsw mm1, [GLOBAL(ones)] 188 psraw mm5, 1 ; partial shifted one more time for 2nd tap 189 psraw mm1, 1 ; partial shifted one more time for 2nd tap 190 packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 191 pandn mm4, mm5 ; high edge variance additive 192 193 paddsb mm6, mm2 ; p0+= p0 add 194 pxor mm6, [GLOBAL(t80)] ; unoffset 195 movq [rsi+rax], mm6 ; write back 196 197 movq mm6, [rsi+2*rax] ; p1 198 pxor mm6, [GLOBAL(t80)] ; reoffset 199 paddsb mm6, mm4 ; p1+= p1 add 200 pxor mm6, [GLOBAL(t80)] ; unoffset 201 movq [rsi+2*rax], mm6 ; write back 202 203 psubsb mm3, mm0 ; q0-= q0 add 204 pxor mm3, [GLOBAL(t80)] ; unoffset 205 movq [rsi], mm3 ; write back 206 207 psubsb mm7, mm4 ; q1-= q1 add 208 pxor mm7, [GLOBAL(t80)] ; unoffset 209 movq [rdi], mm7 ; write back 210 211 add rsi,8 212 neg rax 213 dec rcx 214 jnz .next8_h 215 216 add rsp, 32 217 pop rsp 218 ; begin epilog 219 pop rdi 220 pop rsi 221 RESTORE_GOT 222 UNSHADOW_ARGS 223 pop rbp 224 ret 225 226 227;void vpx_lpf_vertical_4_mmx 228;( 229; unsigned char *src_ptr, 230; int src_pixel_step, 231; const char *blimit, 232; const char *limit, 233; const char *thresh, 234; int count 235;) 236global sym(vpx_lpf_vertical_4_mmx) PRIVATE 237sym(vpx_lpf_vertical_4_mmx): 238 push rbp 239 mov rbp, rsp 240 SHADOW_ARGS_TO_STACK 6 241 GET_GOT rbx 242 push rsi 243 push rdi 244 ; end prolog 245 246 ALIGN_STACK 16, rax 247 sub rsp, 64 ; reserve 64 bytes 248 %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; 249 %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; 250 %define srct [rsp + 32] ;__declspec(align(16)) char srct[32]; 251 252 mov rsi, arg(0) ;src_ptr 253 movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? 254 255 lea rsi, [rsi + rax*4 - 4] 256 257 movsxd rcx, dword ptr arg(5) ;count 258.next8_v: 259 mov rdi, rsi ; rdi points to row +1 for indirect addressing 260 add rdi, rax 261 262 263 ;transpose 264 movq mm6, [rsi+2*rax] ; 67 66 65 64 63 62 61 60 265 movq mm7, mm6 ; 77 76 75 74 73 72 71 70 266 267 punpckhbw mm7, [rdi+2*rax] ; 77 67 76 66 75 65 74 64 268 punpcklbw mm6, [rdi+2*rax] ; 73 63 72 62 71 61 70 60 269 270 movq mm4, [rsi] ; 47 46 45 44 43 42 41 40 271 movq mm5, mm4 ; 47 46 45 44 43 42 41 40 272 273 punpckhbw mm5, [rsi+rax] ; 57 47 56 46 55 45 54 44 274 punpcklbw mm4, [rsi+rax] ; 53 43 52 42 51 41 50 40 275 276 movq mm3, mm5 ; 57 47 56 46 55 45 54 44 277 punpckhwd mm5, mm7 ; 77 67 57 47 76 66 56 46 278 279 punpcklwd mm3, mm7 ; 75 65 55 45 74 64 54 44 280 movq mm2, mm4 ; 53 43 52 42 51 41 50 40 281 282 punpckhwd mm4, mm6 ; 73 63 53 43 72 62 52 42 283 punpcklwd mm2, mm6 ; 71 61 51 41 70 60 50 40 284 285 neg rax 286 movq mm6, [rsi+rax*2] ; 27 26 25 24 23 22 21 20 287 288 movq mm1, mm6 ; 27 26 25 24 23 22 21 20 289 punpckhbw mm6, [rsi+rax] ; 37 27 36 36 35 25 34 24 290 291 punpcklbw mm1, [rsi+rax] ; 33 23 32 22 31 21 30 20 292 movq mm7, [rsi+rax*4]; ; 07 06 05 04 03 02 01 00 293 294 punpckhbw mm7, [rdi+rax*4] ; 17 07 16 06 15 05 14 04 295 movq mm0, mm7 ; 17 07 16 06 15 05 14 04 296 297 punpckhwd mm7, mm6 ; 37 27 17 07 36 26 16 06 298 punpcklwd mm0, mm6 ; 35 25 15 05 34 24 14 04 299 300 movq mm6, mm7 ; 37 27 17 07 36 26 16 06 301 punpckhdq mm7, mm5 ; 77 67 57 47 37 27 17 07 = q3 302 303 punpckldq mm6, mm5 ; 76 66 56 46 36 26 16 06 = q2 304 305 movq mm5, mm6 ; 76 66 56 46 36 26 16 06 306 psubusb mm5, mm7 ; q2-q3 307 308 psubusb mm7, mm6 ; q3-q2 309 por mm7, mm5; ; mm7=abs (q3-q2) 310 311 movq mm5, mm0 ; 35 25 15 05 34 24 14 04 312 punpckhdq mm5, mm3 ; 75 65 55 45 35 25 15 05 = q1 313 314 punpckldq mm0, mm3 ; 74 64 54 44 34 24 15 04 = q0 315 movq mm3, mm5 ; 75 65 55 45 35 25 15 05 = q1 316 317 psubusb mm3, mm6 ; q1-q2 318 psubusb mm6, mm5 ; q2-q1 319 320 por mm6, mm3 ; mm6=abs(q2-q1) 321 lea rdx, srct 322 323 movq [rdx+24], mm5 ; save q1 324 movq [rdx+16], mm0 ; save q0 325 326 movq mm3, [rsi+rax*4] ; 07 06 05 04 03 02 01 00 327 punpcklbw mm3, [rdi+rax*4] ; 13 03 12 02 11 01 10 00 328 329 movq mm0, mm3 ; 13 03 12 02 11 01 10 00 330 punpcklwd mm0, mm1 ; 31 21 11 01 30 20 10 00 331 332 punpckhwd mm3, mm1 ; 33 23 13 03 32 22 12 02 333 movq mm1, mm0 ; 31 21 11 01 30 20 10 00 334 335 punpckldq mm0, mm2 ; 70 60 50 40 30 20 10 00 =p3 336 punpckhdq mm1, mm2 ; 71 61 51 41 31 21 11 01 =p2 337 338 movq mm2, mm1 ; 71 61 51 41 31 21 11 01 =p2 339 psubusb mm2, mm0 ; p2-p3 340 341 psubusb mm0, mm1 ; p3-p2 342 por mm0, mm2 ; mm0=abs(p3-p2) 343 344 movq mm2, mm3 ; 33 23 13 03 32 22 12 02 345 punpckldq mm2, mm4 ; 72 62 52 42 32 22 12 02 = p1 346 347 punpckhdq mm3, mm4 ; 73 63 53 43 33 23 13 03 = p0 348 movq [rdx+8], mm3 ; save p0 349 350 movq [rdx], mm2 ; save p1 351 movq mm5, mm2 ; mm5 = p1 352 353 psubusb mm2, mm1 ; p1-p2 354 psubusb mm1, mm5 ; p2-p1 355 356 por mm1, mm2 ; mm1=abs(p2-p1) 357 mov rdx, arg(3) ;limit 358 359 movq mm4, [rdx] ; mm4 = limit 360 psubusb mm7, mm4 361 362 psubusb mm0, mm4 363 psubusb mm1, mm4 364 365 psubusb mm6, mm4 366 por mm7, mm6 367 368 por mm0, mm1 369 por mm0, mm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit 370 371 movq mm1, mm5 ; p1 372 373 movq mm7, mm3 ; mm3=mm7=p0 374 psubusb mm7, mm5 ; p0 - p1 375 376 psubusb mm5, mm3 ; p1 - p0 377 por mm5, mm7 ; abs(p1-p0) 378 379 movq t0, mm5 ; save abs(p1-p0) 380 lea rdx, srct 381 382 psubusb mm5, mm4 383 por mm0, mm5 ; mm0=mask 384 385 movq mm5, [rdx+16] ; mm5=q0 386 movq mm7, [rdx+24] ; mm7=q1 387 388 movq mm6, mm5 ; mm6=q0 389 movq mm2, mm7 ; q1 390 psubusb mm5, mm7 ; q0-q1 391 392 psubusb mm7, mm6 ; q1-q0 393 por mm7, mm5 ; abs(q1-q0) 394 395 movq t1, mm7 ; save abs(q1-q0) 396 psubusb mm7, mm4 397 398 por mm0, mm7 ; mask 399 400 movq mm5, mm2 ; q1 401 psubusb mm5, mm1 ; q1-=p1 402 psubusb mm1, mm2 ; p1-=q1 403 por mm5, mm1 ; abs(p1-q1) 404 pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero 405 psrlw mm5, 1 ; abs(p1-q1)/2 406 407 mov rdx, arg(2) ;blimit ; 408 409 movq mm4, [rdx] ;blimit 410 movq mm1, mm3 ; mm1=mm3=p0 411 412 movq mm7, mm6 ; mm7=mm6=q0 413 psubusb mm1, mm7 ; p0-q0 414 415 psubusb mm7, mm3 ; q0-p0 416 por mm1, mm7 ; abs(q0-p0) 417 paddusb mm1, mm1 ; abs(q0-p0)*2 418 paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 419 420 psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit 421 por mm1, mm0; ; mask 422 423 pxor mm0, mm0 424 pcmpeqb mm1, mm0 425 426 ; calculate high edge variance 427 mov rdx, arg(4) ;thresh ; get thresh 428 movq mm7, [rdx] 429 ; 430 movq mm4, t0 ; get abs (q1 - q0) 431 psubusb mm4, mm7 432 433 movq mm3, t1 ; get abs (p1 - p0) 434 psubusb mm3, mm7 435 436 por mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh 437 pcmpeqb mm4, mm0 438 439 pcmpeqb mm0, mm0 440 pxor mm4, mm0 441 442 443 444 ; start work on filters 445 lea rdx, srct 446 447 movq mm2, [rdx] ; p1 448 movq mm7, [rdx+24] ; q1 449 450 movq mm6, [rdx+8] ; p0 451 movq mm0, [rdx+16] ; q0 452 453 pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values 454 pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values 455 456 psubsb mm2, mm7 ; p1 - q1 457 pand mm2, mm4 ; high var mask (hvm)(p1 - q1) 458 459 pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values 460 pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values 461 462 movq mm3, mm0 ; q0 463 psubsb mm0, mm6 ; q0 - p0 464 465 paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1) 466 paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1) 467 468 paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1) 469 pand mm1, mm2 ; mask filter values we don't care about 470 471 movq mm2, mm1 472 paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 473 474 paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 475 pxor mm0, mm0 ; 476 477 pxor mm5, mm5 478 punpcklbw mm0, mm2 ; 479 480 punpckhbw mm5, mm2 ; 481 psraw mm0, 11 ; 482 483 psraw mm5, 11 484 packsswb mm0, mm5 485 486 movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; 487 488 pxor mm0, mm0 ; 0 489 movq mm5, mm1 ; abcdefgh 490 491 punpcklbw mm0, mm1 ; e0f0g0h0 492 psraw mm0, 11 ; sign extended shift right by 3 493 494 pxor mm1, mm1 ; 0 495 punpckhbw mm1, mm5 ; a0b0c0d0 496 497 psraw mm1, 11 ; sign extended shift right by 3 498 movq mm5, mm0 ; save results 499 500 packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 501 paddsw mm5, [GLOBAL(ones)] 502 503 paddsw mm1, [GLOBAL(ones)] 504 psraw mm5, 1 ; partial shifted one more time for 2nd tap 505 506 psraw mm1, 1 ; partial shifted one more time for 2nd tap 507 packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 508 509 pandn mm4, mm5 ; high edge variance additive 510 511 paddsb mm6, mm2 ; p0+= p0 add 512 pxor mm6, [GLOBAL(t80)] ; unoffset 513 514 ; mm6=p0 ; 515 movq mm1, [rdx] ; p1 516 pxor mm1, [GLOBAL(t80)] ; reoffset 517 518 paddsb mm1, mm4 ; p1+= p1 add 519 pxor mm1, [GLOBAL(t80)] ; unoffset 520 ; mm6 = p0 mm1 = p1 521 522 psubsb mm3, mm0 ; q0-= q0 add 523 pxor mm3, [GLOBAL(t80)] ; unoffset 524 525 ; mm3 = q0 526 psubsb mm7, mm4 ; q1-= q1 add 527 pxor mm7, [GLOBAL(t80)] ; unoffset 528 ; mm7 = q1 529 530 ; transpose and write back 531 ; mm1 = 72 62 52 42 32 22 12 02 532 ; mm6 = 73 63 53 43 33 23 13 03 533 ; mm3 = 74 64 54 44 34 24 14 04 534 ; mm7 = 75 65 55 45 35 25 15 05 535 536 movq mm2, mm1 ; 72 62 52 42 32 22 12 02 537 punpcklbw mm2, mm6 ; 33 32 23 22 13 12 03 02 538 539 movq mm4, mm3 ; 74 64 54 44 34 24 14 04 540 punpckhbw mm1, mm6 ; 73 72 63 62 53 52 43 42 541 542 punpcklbw mm4, mm7 ; 35 34 25 24 15 14 05 04 543 punpckhbw mm3, mm7 ; 75 74 65 64 55 54 45 44 544 545 movq mm6, mm2 ; 33 32 23 22 13 12 03 02 546 punpcklwd mm2, mm4 ; 15 14 13 12 05 04 03 02 547 548 punpckhwd mm6, mm4 ; 35 34 33 32 25 24 23 22 549 movq mm5, mm1 ; 73 72 63 62 53 52 43 42 550 551 punpcklwd mm1, mm3 ; 55 54 53 52 45 44 43 42 552 punpckhwd mm5, mm3 ; 75 74 73 72 65 64 63 62 553 554 555 ; mm2 = 15 14 13 12 05 04 03 02 556 ; mm6 = 35 34 33 32 25 24 23 22 557 ; mm5 = 55 54 53 52 45 44 43 42 558 ; mm1 = 75 74 73 72 65 64 63 62 559 560 561 562 movd [rsi+rax*4+2], mm2 563 psrlq mm2, 32 564 565 movd [rdi+rax*4+2], mm2 566 movd [rsi+rax*2+2], mm6 567 568 psrlq mm6, 32 569 movd [rsi+rax+2],mm6 570 571 movd [rsi+2], mm1 572 psrlq mm1, 32 573 574 movd [rdi+2], mm1 575 neg rax 576 577 movd [rdi+rax+2],mm5 578 psrlq mm5, 32 579 580 movd [rdi+rax*2+2], mm5 581 582 lea rsi, [rsi+rax*8] 583 dec rcx 584 jnz .next8_v 585 586 add rsp, 64 587 pop rsp 588 ; begin epilog 589 pop rdi 590 pop rsi 591 RESTORE_GOT 592 UNSHADOW_ARGS 593 pop rbp 594 ret 595 596SECTION_RODATA 597align 16 598tfe: 599 times 8 db 0xfe 600align 16 601t80: 602 times 8 db 0x80 603align 16 604t3: 605 times 8 db 0x03 606align 16 607t4: 608 times 8 db 0x04 609align 16 610ones: 611 times 4 dw 0x0001 612