1// 2// Copyright (C) 2009-2021 Intel Corporation 3// 4// SPDX-License-Identifier: MIT 5// 6// 7 8// LSC Loads 9// uchar 10uint load_uchar_to_uint_L1UC_L3UC(global uchar* it, int offset) 11{ 12 return (uint)(it[offset]); 13} 14 15uint load_uchar_to_uint_L1UC_L3C(global uchar* it, int offset) 16{ 17 return (uint)(it[offset]); 18} 19 20uint load_uchar_to_uint_L1C_L3UC(global uchar* it, int offset) 21{ 22 return (uint)(it[offset]); 23} 24 25uint load_uchar_to_uint_L1C_L3C(global uchar* it, int offset) 26{ 27 return (uint)(it[offset]); 28} 29 30uint load_uchar_to_uint_L1S_L3UC(global uchar* it, int offset) 31{ 32 return (uint)(it[offset]); 33} 34 35uint load_uchar_to_uint_L1S_L3C(global uchar* it, int offset) 36{ 37 return (uint)(it[offset]); 38} 39 40uint load_uchar_to_uint_L1IAR_L3C(global uchar* it, int offset) 41{ 42 return (uint)(it[offset]); 43} 44 45// ushort 46uint load_ushort_to_uint_L1UC_L3UC(global ushort* it, int offset) 47{ 48 return (uint)(it[offset]); 49} 50 51uint load_ushort_to_uint_L1UC_L3C(global ushort* it, int offset) 52{ 53 return (uint)(it[offset]); 54} 55 56uint load_ushort_to_uint_L1C_L3UC(global ushort* it, int offset) 57{ 58 return (uint)(it[offset]); 59} 60 61uint load_ushort_to_uint_L1C_L3C(global ushort* it, int offset) 62{ 63 return (uint)(it[offset]); 64} 65 66uint load_ushort_to_uint_L1S_L3UC(global ushort* it, int offset) 67{ 68 return (uint)(it[offset]); 69} 70 71uint load_ushort_to_uint_L1S_L3C(global ushort* it, int offset) 72{ 73 return (uint)(it[offset]); 74} 75 76uint load_ushort_to_uint_L1IAR_L3C(global ushort* it, int offset) 77{ 78 return (uint)(it[offset]); 79} 80 81// uint 82uint load_uint_L1UC_L3UC(global uint* it, int offset) 83{ 84 return it[offset]; 85} 86 87uint load_uint_L1UC_L3C(global uint* it, int offset) 88{ 89 return it[offset]; 90} 91 92uint load_uint_L1C_L3UC(global uint* it, int offset) 93{ 94 return it[offset]; 95} 96 97uint load_uint_L1C_L3C(global uint* it, int offset) 98{ 99 return it[offset]; 100} 101 102uint load_uint_L1S_L3UC(global uint* it, int offset) 103{ 104 return it[offset]; 105} 106 107uint load_uint_L1S_L3C(global uint* it, int offset) 108{ 109 return it[offset]; 110} 111 112uint load_uint_L1IAR_L3C(global uint* it, int offset) 113{ 114 return it[offset]; 115} 116 117// uint2 118uint2 load_uint2_L1UC_L3UC(global uint2* it, int offset) 119{ 120 return it[offset]; 121} 122 123uint2 load_uint2_L1UC_L3C(global uint2* it, int offset) 124{ 125 return it[offset]; 126} 127 128uint2 load_uint2_L1C_L3UC(global uint2* it, int offset) 129{ 130 return it[offset]; 131} 132 133uint2 load_uint2_L1C_L3C(global uint2* it, int offset) 134{ 135 return it[offset]; 136} 137 138uint2 load_uint2_L1S_L3UC(global uint2* it, int offset) 139{ 140 return it[offset]; 141} 142 143uint2 load_uint2_L1S_L3C(global uint2* it, int offset) 144{ 145 return it[offset]; 146} 147 148uint2 load_uint2_L1IAR_L3C(global uint2* it, int offset) 149{ 150 return it[offset]; 151} 152 153// uint3 154uint3 load_uint3_L1UC_L3UC(global uint3* it, int offset) 155{ 156 return it[offset]; 157} 158 159uint3 load_uint3_L1UC_L3C(global uint3* it, int offset) 160{ 161 return it[offset]; 162} 163 164uint3 load_uint3_L1C_L3UC(global uint3* it, int offset) 165{ 166 return it[offset]; 167} 168 169uint3 load_uint3_L1C_L3C(global uint3* it, int offset) 170{ 171 return it[offset]; 172} 173 174uint3 load_uint3_L1S_L3UC(global uint3* it, int offset) 175{ 176 return it[offset]; 177} 178 179uint3 load_uint3_L1S_L3C(global uint3* it, int offset) 180{ 181 return it[offset]; 182} 183 184uint3 load_uint3_L1IAR_L3C(global uint3* it, int offset) 185{ 186 return it[offset]; 187} 188 189// uint4 190uint4 load_uint4_L1UC_L3UC(global uint4* it, int offset) 191{ 192 return it[offset]; 193} 194 195uint4 load_uint4_L1UC_L3C(global uint4* it, int offset) 196{ 197 return it[offset]; 198} 199 200uint4 load_uint4_L1C_L3UC(global uint4* it, int offset) 201{ 202 return it[offset]; 203} 204 205uint4 load_uint4_L1C_L3C(global uint4* it, int offset) 206{ 207 return it[offset]; 208} 209 210uint4 load_uint4_L1S_L3UC(global uint4* it, int offset) 211{ 212 return it[offset]; 213} 214 215uint4 load_uint4_L1S_L3C(global uint4* it, int offset) 216{ 217 return it[offset]; 218} 219 220uint4 load_uint4_L1IAR_L3C(global uint4* it, int offset) 221{ 222 return it[offset]; 223} 224 225// uint8 226uint8 load_uint8_L1UC_L3UC(global uint8* it, int offset) 227{ 228 return it[offset]; 229} 230 231uint8 load_uint8_L1UC_L3C(global uint8* it, int offset) 232{ 233 return it[offset]; 234} 235 236uint8 load_uint8_L1C_L3UC(global uint8* it, int offset) 237{ 238 return it[offset]; 239} 240 241uint8 load_uint8_L1C_L3C(global uint8* it, int offset) 242{ 243 return it[offset]; 244} 245 246uint8 load_uint8_L1S_L3UC(global uint8* it, int offset) 247{ 248 return it[offset]; 249} 250 251uint8 load_uint8_L1S_L3C(global uint8* it, int offset) 252{ 253 return it[offset]; 254} 255 256uint8 load_uint8_L1IAR_L3C(global uint8* it, int offset) 257{ 258 return it[offset]; 259} 260 261// ulong 262ulong load_ulong_L1UC_L3UC(global ulong* it, int offset) 263{ 264 return it[offset]; 265} 266 267ulong load_ulong_L1UC_L3C(global ulong* it, int offset) 268{ 269 return it[offset]; 270} 271 272ulong load_ulong_L1C_L3UC(global ulong* it, int offset) 273{ 274 return it[offset]; 275} 276 277ulong load_ulong_L1C_L3C(global ulong* it, int offset) 278{ 279 return it[offset]; 280} 281 282ulong load_ulong_L1S_L3UC(global ulong* it, int offset) 283{ 284 return it[offset]; 285} 286 287ulong load_ulong_L1S_L3C(global ulong* it, int offset) 288{ 289 return it[offset]; 290} 291 292ulong load_ulong_L1IAR_L3C(global ulong* it, int offset) 293{ 294 return it[offset]; 295} 296 297// ulong2 298ulong2 load_ulong2_L1UC_L3UC(global ulong2* it, int offset) 299{ 300 return it[offset]; 301} 302 303ulong2 load_ulong2_L1UC_L3C(global ulong2* it, int offset) 304{ 305 return it[offset]; 306} 307 308ulong2 load_ulong2_L1C_L3UC(global ulong2* it, int offset) 309{ 310 return it[offset]; 311} 312 313ulong2 load_ulong2_L1C_L3C(global ulong2* it, int offset) 314{ 315 return it[offset]; 316} 317 318ulong2 load_ulong2_L1S_L3UC(global ulong2* it, int offset) 319{ 320 return it[offset]; 321} 322 323ulong2 load_ulong2_L1S_L3C(global ulong2* it, int offset) 324{ 325 return it[offset]; 326} 327 328ulong2 load_ulong2_L1IAR_L3C(global ulong2* it, int offset) 329{ 330 return it[offset]; 331} 332 333// ulong3 334ulong3 load_ulong3_L1UC_L3UC(global ulong3* it, int offset) 335{ 336 return it[offset]; 337} 338 339ulong3 load_ulong3_L1UC_L3C(global ulong3* it, int offset) 340{ 341 return it[offset]; 342} 343 344ulong3 load_ulong3_L1C_L3UC(global ulong3* it, int offset) 345{ 346 return it[offset]; 347} 348 349ulong3 load_ulong3_L1C_L3C(global ulong3* it, int offset) 350{ 351 return it[offset]; 352} 353 354ulong3 load_ulong3_L1S_L3UC(global ulong3* it, int offset) 355{ 356 return it[offset]; 357} 358 359ulong3 load_ulong3_L1S_L3C(global ulong3* it, int offset) 360{ 361 return it[offset]; 362} 363 364ulong3 load_ulong3_L1IAR_L3C(global ulong3* it, int offset) 365{ 366 return it[offset]; 367} 368 369// ulong4 370ulong4 load_ulong4_L1UC_L3UC(global ulong4* it, int offset) 371{ 372 return it[offset]; 373} 374 375ulong4 load_ulong4_L1UC_L3C(global ulong4* it, int offset) 376{ 377 return it[offset]; 378} 379 380ulong4 load_ulong4_L1C_L3UC(global ulong4* it, int offset) 381{ 382 return it[offset]; 383} 384 385ulong4 load_ulong4_L1C_L3C(global ulong4* it, int offset) 386{ 387 return it[offset]; 388} 389 390ulong4 load_ulong4_L1S_L3UC(global ulong4* it, int offset) 391{ 392 return it[offset]; 393} 394 395ulong4 load_ulong4_L1S_L3C(global ulong4* it, int offset) 396{ 397 return it[offset]; 398} 399 400ulong4 load_ulong4_L1IAR_L3C(global ulong4* it, int offset) 401{ 402 return it[offset]; 403} 404 405// ulong8 406ulong8 load_ulong8_L1UC_L3UC(global ulong8* it, int offset) 407{ 408 return it[offset]; 409} 410 411ulong8 load_ulong8_L1UC_L3C(global ulong8* it, int offset) 412{ 413 return it[offset]; 414} 415 416ulong8 load_ulong8_L1C_L3UC(global ulong8* it, int offset) 417{ 418 return it[offset]; 419} 420 421ulong8 load_ulong8_L1C_L3C(global ulong8* it, int offset) 422{ 423 return it[offset]; 424} 425 426ulong8 load_ulong8_L1S_L3UC(global ulong8* it, int offset) 427{ 428 return it[offset]; 429} 430 431ulong8 load_ulong8_L1S_L3C(global ulong8* it, int offset) 432{ 433 return it[offset]; 434} 435 436ulong8 load_ulong8_L1IAR_L3C(global ulong8* it, int offset) 437{ 438 return it[offset]; 439} 440 441// LSC Stores 442// uchar 443void store_uchar_from_uint_L1UC_L3UC(global uchar* it, int offset, uint value) 444{ 445 it[offset] = (uchar)(value); 446} 447 448void store_uchar_from_uint_L1UC_L3WB(global uchar* it, int offset, uint value) 449{ 450 it[offset] = (uchar)(value); 451} 452 453void store_uchar_from_uint_L1WT_L3UC(global uchar* it, int offset, uint value) 454{ 455 it[offset] = (uchar)(value); 456} 457 458void store_uchar_from_uint_L1WT_L3WB(global uchar* it, int offset, uint value) 459{ 460 it[offset] = (uchar)(value); 461} 462 463void store_uchar_from_uint_L1S_L3UC(global uchar* it, int offset, uint value) 464{ 465 it[offset] = (uchar)(value); 466} 467 468void store_uchar_from_uint_L1S_L3WB(global uchar* it, int offset, uint value) 469{ 470 it[offset] = (uchar)(value); 471} 472 473void store_uchar_from_uint_L1WB_L3WB(global uchar* it, int offset, uint value) 474{ 475 it[offset] = (uchar)(value); 476} 477 478// ushort 479void store_ushort_from_uint_L1UC_L3UC(global ushort* it, int offset, uint value) 480{ 481 it[offset] = (ushort)(value); 482} 483 484void store_ushort_from_uint_L1UC_L3WB(global ushort* it, int offset, uint value) 485{ 486 it[offset] = (ushort)(value); 487} 488 489void store_ushort_from_uint_L1WT_L3UC(global ushort* it, int offset, uint value) 490{ 491 it[offset] = (ushort)(value); 492} 493 494void store_ushort_from_uint_L1WT_L3WB(global ushort* it, int offset, uint value) 495{ 496 it[offset] = (ushort)(value); 497} 498 499void store_ushort_from_uint_L1S_L3UC(global ushort* it, int offset, uint value) 500{ 501 it[offset] = (ushort)(value); 502} 503 504void store_ushort_from_uint_L1S_L3WB(global ushort* it, int offset, uint value) 505{ 506 it[offset] = (ushort)(value); 507} 508 509void store_ushort_from_uint_L1WB_L3WB(global ushort* it, int offset, uint value) 510{ 511 it[offset] = (ushort)(value); 512} 513 514// uint 515void store_uint_L1UC_L3UC(global uint* it, int offset, uint value) 516{ 517 it[offset] = value; 518} 519 520void store_uint_L1UC_L3WB(global uint* it, int offset, uint value) 521{ 522 it[offset] = value; 523} 524 525void store_uint_L1WT_L3UC(global uint* it, int offset, uint value) 526{ 527 it[offset] = value; 528} 529 530void store_uint_L1WT_L3WB(global uint* it, int offset, uint value) 531{ 532 it[offset] = value; 533} 534 535void store_uint_L1S_L3UC(global uint* it, int offset, uint value) 536{ 537 it[offset] = value; 538} 539 540void store_uint_L1S_L3WB(global uint* it, int offset, uint value) 541{ 542 it[offset] = value; 543} 544 545void store_uint_L1WB_L3WB(global uint* it, int offset, uint value) 546{ 547 it[offset] = value; 548} 549 550// uint2 551void store_uint2_L1UC_L3UC(global uint2* it, int offset, uint2 value) 552{ 553 it[offset] = value; 554} 555 556void store_uint2_L1UC_L3WB(global uint2* it, int offset, uint2 value) 557{ 558 it[offset] = value; 559} 560 561void store_uint2_L1WT_L3UC(global uint2* it, int offset, uint2 value) 562{ 563 it[offset] = value; 564} 565 566void store_uint2_L1WT_L3WB(global uint2* it, int offset, uint2 value) 567{ 568 it[offset] = value; 569} 570 571void store_uint2_L1S_L3UC(global uint2* it, int offset, uint2 value) 572{ 573 it[offset] = value; 574} 575 576void store_uint2_L1S_L3WB(global uint2* it, int offset, uint2 value) 577{ 578 it[offset] = value; 579} 580 581void store_uint2_L1WB_L3WB(global uint2* it, int offset, uint2 value) 582{ 583 it[offset] = value; 584} 585 586// uint3 587void store_uint3_L1UC_L3UC(global uint3* it, int offset, uint3 value) 588{ 589 it[offset] = value; 590} 591 592void store_uint3_L1UC_L3WB(global uint3* it, int offset, uint3 value) 593{ 594 it[offset] = value; 595} 596 597void store_uint3_L1WT_L3UC(global uint3* it, int offset, uint3 value) 598{ 599 it[offset] = value; 600} 601 602void store_uint3_L1WT_L3WB(global uint3* it, int offset, uint3 value) 603{ 604 it[offset] = value; 605} 606 607void store_uint3_L1S_L3UC(global uint3* it, int offset, uint3 value) 608{ 609 it[offset] = value; 610} 611 612void store_uint3_L1S_L3WB(global uint3* it, int offset, uint3 value) 613{ 614 it[offset] = value; 615} 616 617void store_uint3_L1WB_L3WB(global uint3* it, int offset, uint3 value) 618{ 619 it[offset] = value; 620} 621 622// uint4 623void store_uint4_L1UC_L3UC(global uint4* it, int offset, uint4 value) 624{ 625 it[offset] = value; 626} 627 628void store_uint4_L1UC_L3WB(global uint4* it, int offset, uint4 value) 629{ 630 it[offset] = value; 631} 632 633void store_uint4_L1WT_L3UC(global uint4* it, int offset, uint4 value) 634{ 635 it[offset] = value; 636} 637 638void store_uint4_L1WT_L3WB(global uint4* it, int offset, uint4 value) 639{ 640 it[offset] = value; 641} 642 643void store_uint4_L1S_L3UC(global uint4* it, int offset, uint4 value) 644{ 645 it[offset] = value; 646} 647 648void store_uint4_L1S_L3WB(global uint4* it, int offset, uint4 value) 649{ 650 it[offset] = value; 651} 652 653void store_uint4_L1WB_L3WB(global uint4* it, int offset, uint4 value) 654{ 655 it[offset] = value; 656} 657 658// uint8 659void store_uint8_L1UC_L3UC(global uint8* it, int offset, uint8 value) 660{ 661 it[offset] = value; 662} 663 664void store_uint8_L1UC_L3WB(global uint8* it, int offset, uint8 value) 665{ 666 it[offset] = value; 667} 668 669void store_uint8_L1WT_L3UC(global uint8* it, int offset, uint8 value) 670{ 671 it[offset] = value; 672} 673 674void store_uint8_L1WT_L3WB(global uint8* it, int offset, uint8 value) 675{ 676 it[offset] = value; 677} 678 679void store_uint8_L1S_L3UC(global uint8* it, int offset, uint8 value) 680{ 681 it[offset] = value; 682} 683 684void store_uint8_L1S_L3WB(global uint8* it, int offset, uint8 value) 685{ 686 it[offset] = value; 687} 688 689void store_uint8_L1WB_L3WB(global uint8* it, int offset, uint8 value) 690{ 691 it[offset] = value; 692} 693 694// ulong 695void store_ulong_L1UC_L3UC(global ulong* it, int offset, ulong value) 696{ 697 it[offset] = value; 698} 699 700void store_ulong_L1UC_L3WB(global ulong* it, int offset, ulong value) 701{ 702 it[offset] = value; 703} 704 705void store_ulong_L1WT_L3UC(global ulong* it, int offset, ulong value) 706{ 707 it[offset] = value; 708} 709 710void store_ulong_L1WT_L3WB(global ulong* it, int offset, ulong value) 711{ 712 it[offset] = value; 713} 714 715void store_ulong_L1S_L3UC(global ulong* it, int offset, ulong value) 716{ 717 it[offset] = value; 718} 719 720void store_ulong_L1S_L3WB(global ulong* it, int offset, ulong value) 721{ 722 it[offset] = value; 723} 724 725void store_ulong_L1WB_L3WB(global ulong* it, int offset, ulong value) 726{ 727 it[offset] = value; 728} 729 730// ulong2 731void store_ulong2_L1UC_L3UC(global ulong2* it, int offset, ulong2 value) 732{ 733 it[offset] = value; 734} 735 736void store_ulong2_L1UC_L3WB(global ulong2* it, int offset, ulong2 value) 737{ 738 it[offset] = value; 739} 740 741void store_ulong2_L1WT_L3UC(global ulong2* it, int offset, ulong2 value) 742{ 743 it[offset] = value; 744} 745 746void store_ulong2_L1WT_L3WB(global ulong2* it, int offset, ulong2 value) 747{ 748 it[offset] = value; 749} 750 751void store_ulong2_L1S_L3UC(global ulong2* it, int offset, ulong2 value) 752{ 753 it[offset] = value; 754} 755 756void store_ulong2_L1S_L3WB(global ulong2* it, int offset, ulong2 value) 757{ 758 it[offset] = value; 759} 760 761void store_ulong2_L1WB_L3WB(global ulong2* it, int offset, ulong2 value) 762{ 763 it[offset] = value; 764} 765 766// ulong3 767void store_ulong3_L1UC_L3UC(global ulong3* it, int offset, ulong3 value) 768{ 769 it[offset] = value; 770} 771 772void store_ulong3_L1UC_L3WB(global ulong3* it, int offset, ulong3 value) 773{ 774 it[offset] = value; 775} 776 777void store_ulong3_L1WT_L3UC(global ulong3* it, int offset, ulong3 value) 778{ 779 it[offset] = value; 780} 781 782void store_ulong3_L1WT_L3WB(global ulong3* it, int offset, ulong3 value) 783{ 784 it[offset] = value; 785} 786 787void store_ulong3_L1S_L3UC(global ulong3* it, int offset, ulong3 value) 788{ 789 it[offset] = value; 790} 791 792void store_ulong3_L1S_L3WB(global ulong3* it, int offset, ulong3 value) 793{ 794 it[offset] = value; 795} 796 797void store_ulong3_L1WB_L3WB(global ulong3* it, int offset, ulong3 value) 798{ 799 it[offset] = value; 800} 801 802// ulong4 803void store_ulong4_L1UC_L3UC(global ulong4* it, int offset, ulong4 value) 804{ 805 it[offset] = value; 806} 807 808void store_ulong4_L1UC_L3WB(global ulong4* it, int offset, ulong4 value) 809{ 810 it[offset] = value; 811} 812 813void store_ulong4_L1WT_L3UC(global ulong4* it, int offset, ulong4 value) 814{ 815 it[offset] = value; 816} 817 818void store_ulong4_L1WT_L3WB(global ulong4* it, int offset, ulong4 value) 819{ 820 it[offset] = value; 821} 822 823void store_ulong4_L1S_L3UC(global ulong4* it, int offset, ulong4 value) 824{ 825 it[offset] = value; 826} 827 828void store_ulong4_L1S_L3WB(global ulong4* it, int offset, ulong4 value) 829{ 830 it[offset] = value; 831} 832 833void store_ulong4_L1WB_L3WB(global ulong4* it, int offset, ulong4 value) 834{ 835 it[offset] = value; 836} 837 838// ulong8 839void store_ulong8_L1UC_L3UC(global ulong8* it, int offset, ulong8 value) 840{ 841 it[offset] = value; 842} 843 844void store_ulong8_L1UC_L3WB(global ulong8* it, int offset, ulong8 value) 845{ 846 it[offset] = value; 847} 848 849void store_ulong8_L1WT_L3UC(global ulong8* it, int offset, ulong8 value) 850{ 851 it[offset] = value; 852} 853 854void store_ulong8_L1WT_L3WB(global ulong8* it, int offset, ulong8 value) 855{ 856 it[offset] = value; 857} 858 859void store_ulong8_L1S_L3UC(global ulong8* it, int offset, ulong8 value) 860{ 861 it[offset] = value; 862} 863 864void store_ulong8_L1S_L3WB(global ulong8* it, int offset, ulong8 value) 865{ 866 it[offset] = value; 867} 868 869void store_ulong8_L1WB_L3WB(global ulong8* it, int offset, ulong8 value) 870{ 871 it[offset] = value; 872} 873 874// LSC Fence support 875void mem_fence_gpu_default() 876{ 877 write_mem_fence(CLK_GLOBAL_MEM_FENCE); 878} 879 880void mem_fence_workgroup_default() 881{ 882 write_mem_fence( CLK_GLOBAL_MEM_FENCE ); 883} 884 885void mem_fence_gpu_invalidate() 886{ 887 read_mem_fence(CLK_GLOBAL_MEM_FENCE); 888} 889 890void mem_fence_gpu_evict() 891{ 892 read_mem_fence(CLK_GLOBAL_MEM_FENCE); 893} 894 895void mem_fence_evict_to_memory() 896{ 897 mem_fence(CLK_GLOBAL_MEM_FENCE); 898} 899