1/* <copyright> 2 This file is provided under a dual BSD/GPLv2 license. When using or 3 redistributing this file, you may do so under either license. 4 5 GPL LICENSE SUMMARY 6 7 Copyright (c) 2017-2020 Intel Corporation. All rights reserved. 8 9 This program is free software; you can redistribute it and/or modify 10 it under the terms of version 2 of the GNU General Public License as 11 published by the Free Software Foundation. 12 13 This program is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 21 The full GNU General Public License is included in this distribution 22 in the file called LICENSE.GPL. 23 24 Contact Information: 25 http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/ 26 27 BSD LICENSE 28 29 Copyright (c) 2017-2020 Intel Corporation. All rights reserved. 30 All rights reserved. 31 32 Redistribution and use in source and binary forms, with or without 33 modification, are permitted provided that the following conditions 34 are met: 35 36 * Redistributions of source code must retain the above copyright 37 notice, this list of conditions and the following disclaimer. 38 * Redistributions in binary form must reproduce the above copyright 39 notice, this list of conditions and the following disclaimer in 40 the documentation and/or other materials provided with the 41 distribution. 42 * Neither the name of Intel Corporation nor the names of its 43 contributors may be used to endorse or promote products derived 44 from this software without specific prior written permission. 45 46 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 47 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 48 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 49 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 50 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 51 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 52 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 53 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 54 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 55 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 56 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 57</copyright> */ 58 59// ///////////////////////////////////////////////////////////////////////// 60////// Intel Processor Trace Marker Functionality 61//////////////////////////////////////////////////////////////////////////// 62 63 .text 64 .align 16 65 .globl __itt_pt_mark 66 .globl __itt_pt_event 67 .globl __itt_pt_mark_event 68 .globl __itt_pt_mark_threshold 69 .globl __itt_pt_byte 70 .globl __itt_pt_write 71 72/// void __itt_pt_mark(unsigned char index); 73__itt_pt_mark: 74__itt_pt_mark_int: 75 and $0xff, %rdi 76 call __itt_pt_mark_pic 77__itt_pt_mark_pic: 78 popq %rax 79 lea (__itt_pt_mark_call_table - __itt_pt_mark_pic) (%rax,%rdi,4), %rdi 80 jmp *%rdi 81 82 .long 0, 1, 2, 3 // GUID 83 .long 0xfadefade 84 85__itt_pt_mark_call_table: 86 retq 87 retq $0x0 88 retq 89 retq $0x1 90 retq 91 retq $0x2 92 retq 93 retq $0x3 94 retq 95 retq $0x4 96 retq 97 retq $0x5 98 retq 99 retq $0x6 100 retq 101 retq $0x7 102 retq 103 retq $0x8 104 retq 105 retq $0x9 106 retq 107 retq $0xa 108 retq 109 retq $0xb 110 retq 111 retq $0xc 112 retq 113 retq $0xd 114 retq 115 retq $0xe 116 retq 117 retq $0xf 118 119 retq 120 retq $0x10 121 retq 122 retq $0x11 123 retq 124 retq $0x12 125 retq 126 retq $0x13 127 retq 128 retq $0x14 129 retq 130 retq $0x15 131 retq 132 retq $0x16 133 retq 134 retq $0x17 135 retq 136 retq $0x18 137 retq 138 retq $0x19 139 retq 140 retq $0x1a 141 retq 142 retq $0x1b 143 retq 144 retq $0x1c 145 retq 146 retq $0x1d 147 retq 148 retq $0x1e 149 retq 150 retq $0x1f 151 152 retq 153 retq $0x20 154 retq 155 retq $0x21 156 retq 157 retq $0x22 158 retq 159 retq $0x23 160 retq 161 retq $0x24 162 retq 163 retq $0x25 164 retq 165 retq $0x26 166 retq 167 retq $0x27 168 retq 169 retq $0x28 170 retq 171 retq $0x29 172 retq 173 retq $0x2a 174 retq 175 retq $0x2b 176 retq 177 retq $0x2c 178 retq 179 retq $0x2d 180 retq 181 retq $0x2e 182 retq 183 retq $0x2f 184 185 retq 186 retq $0x30 187 retq 188 retq $0x31 189 retq 190 retq $0x32 191 retq 192 retq $0x33 193 retq 194 retq $0x34 195 retq 196 retq $0x35 197 retq 198 retq $0x36 199 retq 200 retq $0x37 201 retq 202 retq $0x38 203 retq 204 retq $0x39 205 retq 206 retq $0x3a 207 retq 208 retq $0x3b 209 retq 210 retq $0x3c 211 retq 212 retq $0x3d 213 retq 214 retq $0x3e 215 retq 216 retq $0x3f 217 218 retq 219 retq $0x40 220 retq 221 retq $0x41 222 retq 223 retq $0x42 224 retq 225 retq $0x43 226 retq 227 retq $0x44 228 retq 229 retq $0x45 230 retq 231 retq $0x46 232 retq 233 retq $0x47 234 retq 235 retq $0x48 236 retq 237 retq $0x49 238 retq 239 retq $0x4a 240 retq 241 retq $0x4b 242 retq 243 retq $0x4c 244 retq 245 retq $0x4d 246 retq 247 retq $0x4e 248 retq 249 retq $0x4f 250 251 retq 252 retq $0x50 253 retq 254 retq $0x51 255 retq 256 retq $0x52 257 retq 258 retq $0x53 259 retq 260 retq $0x54 261 retq 262 retq $0x55 263 retq 264 retq $0x56 265 retq 266 retq $0x57 267 retq 268 retq $0x58 269 retq 270 retq $0x59 271 retq 272 retq $0x5a 273 retq 274 retq $0x5b 275 retq 276 retq $0x5c 277 retq 278 retq $0x5d 279 retq 280 retq $0x5e 281 retq 282 retq $0x5f 283 284 retq 285 retq $0x60 286 retq 287 retq $0x61 288 retq 289 retq $0x62 290 retq 291 retq $0x63 292 retq 293 retq $0x64 294 retq 295 retq $0x65 296 retq 297 retq $0x66 298 retq 299 retq $0x67 300 retq 301 retq $0x68 302 retq 303 retq $0x69 304 retq 305 retq $0x6a 306 retq 307 retq $0x6b 308 retq 309 retq $0x6c 310 retq 311 retq $0x6d 312 retq 313 retq $0x6e 314 retq 315 retq $0x6f 316 317 retq 318 retq $0x70 319 retq 320 retq $0x71 321 retq 322 retq $0x72 323 retq 324 retq $0x73 325 retq 326 retq $0x74 327 retq 328 retq $0x75 329 retq 330 retq $0x76 331 retq 332 retq $0x77 333 retq 334 retq $0x78 335 retq 336 retq $0x79 337 retq 338 retq $0x7a 339 retq 340 retq $0x7b 341 retq 342 retq $0x7c 343 retq 344 retq $0x7d 345 retq 346 retq $0x7e 347 retq 348 retq $0x7f 349 350 retq 351 retq $0x80 352 retq 353 retq $0x81 354 retq 355 retq $0x82 356 retq 357 retq $0x83 358 retq 359 retq $0x84 360 retq 361 retq $0x85 362 retq 363 retq $0x86 364 retq 365 retq $0x87 366 retq 367 retq $0x88 368 retq 369 retq $0x89 370 retq 371 retq $0x8a 372 retq 373 retq $0x8b 374 retq 375 retq $0x8c 376 retq 377 retq $0x8d 378 retq 379 retq $0x8e 380 retq 381 retq $0x8f 382 383 retq 384 retq $0x90 385 retq 386 retq $0x91 387 retq 388 retq $0x92 389 retq 390 retq $0x93 391 retq 392 retq $0x94 393 retq 394 retq $0x95 395 retq 396 retq $0x96 397 retq 398 retq $0x97 399 retq 400 retq $0x98 401 retq 402 retq $0x99 403 retq 404 retq $0x9a 405 retq 406 retq $0x9b 407 retq 408 retq $0x9c 409 retq 410 retq $0x9d 411 retq 412 retq $0x9e 413 retq 414 retq $0x9f 415 416 retq 417 retq $0xa0 418 retq 419 retq $0xa1 420 retq 421 retq $0xa2 422 retq 423 retq $0xa3 424 retq 425 retq $0xa4 426 retq 427 retq $0xa5 428 retq 429 retq $0xa6 430 retq 431 retq $0xa7 432 retq 433 retq $0xa8 434 retq 435 retq $0xa9 436 retq 437 retq $0xaa 438 retq 439 retq $0xab 440 retq 441 retq $0xac 442 retq 443 retq $0xad 444 retq 445 retq $0xae 446 retq 447 retq $0xaf 448 449 retq 450 retq $0xb0 451 retq 452 retq $0xb1 453 retq 454 retq $0xb2 455 retq 456 retq $0xb3 457 retq 458 retq $0xb4 459 retq 460 retq $0xb5 461 retq 462 retq $0xb6 463 retq 464 retq $0xb7 465 retq 466 retq $0xb8 467 retq 468 retq $0xb9 469 retq 470 retq $0xba 471 retq 472 retq $0xbb 473 retq 474 retq $0xbc 475 retq 476 retq $0xbd 477 retq 478 retq $0xbe 479 retq 480 retq $0xbf 481 482 retq 483 retq $0xc0 484 retq 485 retq $0xc1 486 retq 487 retq $0xc2 488 retq 489 retq $0xc3 490 retq 491 retq $0xc4 492 retq 493 retq $0xc5 494 retq 495 retq $0xc6 496 retq 497 retq $0xc7 498 retq 499 retq $0xc8 500 retq 501 retq $0xc9 502 retq 503 retq $0xca 504 retq 505 retq $0xcb 506 retq 507 retq $0xcc 508 retq 509 retq $0xcd 510 retq 511 retq $0xce 512 retq 513 retq $0xcf 514 515 retq 516 retq $0xd0 517 retq 518 retq $0xd1 519 retq 520 retq $0xd2 521 retq 522 retq $0xd3 523 retq 524 retq $0xd4 525 retq 526 retq $0xd5 527 retq 528 retq $0xd6 529 retq 530 retq $0xd7 531 retq 532 retq $0xd8 533 retq 534 retq $0xd9 535 retq 536 retq $0xda 537 retq 538 retq $0xdb 539 retq 540 retq $0xdc 541 retq 542 retq $0xdd 543 retq 544 retq $0xde 545 retq 546 retq $0xdf 547 548 retq 549 retq $0xe0 550 retq 551 retq $0xe1 552 retq 553 retq $0xe2 554 retq 555 retq $0xe3 556 retq 557 retq $0xe4 558 retq 559 retq $0xe5 560 retq 561 retq $0xe6 562 retq 563 retq $0xe7 564 retq 565 retq $0xe8 566 retq 567 retq $0xe9 568 retq 569 retq $0xea 570 retq 571 retq $0xeb 572 retq 573 retq $0xec 574 retq 575 retq $0xed 576 retq 577 retq $0xee 578 retq 579 retq $0xef 580 581 retq 582 retq $0xf0 583 retq 584 retq $0xf1 585 retq 586 retq $0xf2 587 retq 588 retq $0xf3 589 retq 590 retq $0xf4 591 retq 592 retq $0xf5 593 retq 594 retq $0xf6 595 retq 596 retq $0xf7 597 retq 598 retq $0xf8 599 retq 600 retq $0xf9 601 retq 602 retq $0xfa 603 retq 604 retq $0xfb 605 retq 606 retq $0xfc 607 retq 608 retq $0xfd 609 retq 610 retq $0xfe 611 retq 612 retq $0xff 613 614 .align 16 615 616__itt_pt_byte: 617__itt_pt_byte_int: 618 619 and $0xff, %rdi 620 call __itt_pt_byte_pic 621__itt_pt_byte_pic: 622 popq %rcx 623 lea (__itt_pt_byte_call_table - __itt_pt_byte_pic) (%rcx,%rdi,1), %rdi 624 jmp *%rdi 625 626 .align 4 627 628 .long 0, 1, 2, 3 // GUID 629 630 .long 0xfadedeaf 631 632__itt_pt_byte_call_table: 633 634 .fill 256,1,0xc3 635 636 .align 16 637 638__itt_pt_event: 639__itt_pt_event_int: 640 641 pushq %rcx 642 mov %rdi,%rcx 643 rdpmc 644 645 xor %rdi, %rdi 646 mov %al, %dil 647 call __itt_pt_byte_int 648 shr $8, %eax 649 mov %al, %dil 650 call __itt_pt_byte_int 651 shr $8, %eax 652 mov %al, %dil 653 call __itt_pt_byte_int 654 shr $8, %eax 655 mov %al, %dil 656 call __itt_pt_byte_int 657 658 mov %dl, %dil 659 call __itt_pt_byte_int 660 shr $8, %edx 661 mov %dl, %dil 662 call __itt_pt_byte_int 663 shr $8, %edx 664 mov %dl, %dil 665 call __itt_pt_byte_int 666 shr $8, %edx 667 mov %dl, %dil 668 call __itt_pt_byte_int 669 670 popq %rcx 671 ret 672 673 .align 16 674 675__itt_pt_mark_event: 676 677 test $1, %rdi 678 jnz odd 679 mov %rdi, %rsi 680 xor %rdi,%rdi 681 call __itt_pt_event_int 682 mov %rsi, %rdi 683 jmp __itt_pt_mark_int 684 685odd: 686 call __itt_pt_mark_int 687 xor %rdi,%rdi 688 jmp __itt_pt_event_int 689 690 691 .align 16 692 693__itt_pt_flush: 694 695 call __itt_pt_flush_pic 696__itt_pt_flush_pic: 697 popq %rdx 698 lea (__itt_pt_mark_flush_1 - __itt_pt_flush_pic) (%rdx), %rax 699 jmp *%rax 700 701 .align 16 702 nop 703__itt_pt_mark_flush_1: 704 lea (__itt_pt_mark_flush_2 - __itt_pt_flush_pic) (%rdx), %rax 705 jmp *%rax 706 707 .align 16 708 nop 709 nop 710__itt_pt_mark_flush_2: 711 lea (__itt_pt_mark_flush_3 - __itt_pt_flush_pic) (%rdx), %rax 712 jmp *%rax 713 714 .align 16 715 nop 716 nop 717 nop 718__itt_pt_mark_flush_3: 719 ret 720 721 .align 16 722 723// int __itt_pt_mark_threshold(unsigned char index, unsigned long long* tmp, int threshold); 724 725__itt_pt_mark_threshold: 726 // rdi == index 727 // rsi == tmp 728 // rdx == threshold 729 mov %rdx, %r8 // r8 = threshold 730 xor %rdx, %rdx 731 xor %rax, %rax 732 test $1, %rdi 733 jnz mark_end 734mark_begin: 735 mov $((1 << 30) + 1),%rcx 736 rdpmc 737 shl $32, %rdx 738 or %rax, %rdx 739 mov %rdx, (%rsi) 740 jmp __itt_pt_mark_int 741mark_end: 742 mov $((1 << 30) + 1),%rcx 743 rdpmc 744 shl $32, %rdx 745 or %rax, %rdx 746 sub (%rsi), %rdx 747 cmp %r8, %rdx // threshold 748 jnc found 749 jmp __itt_pt_mark_int 750found: 751 call __itt_pt_mark_int 752 jmp __itt_pt_flush 753 754// PTWRITE 755 756 .align 16 757 758// void __itt_pt_write(unsigned long long value); 759 760 .long 0, 1, 2, 3 // GUID 761 762__itt_pt_write: 763 764// ptwrite rcx 765 .byte 0xF3, 0x48, 0x0F, 0xAE, 0xE1 766 ret 767