1// 2// Copyright (C) 2009-2021 Intel Corporation 3// 4// SPDX-License-Identifier: MIT 5// 6// 7 8// LSC Cache options 9// Load message caching control 10enum LSC_LDCC { 11 LSC_LDCC_DEFAULT, 12 LSC_LDCC_L1UC_L3UC, // Override to L1 uncached and L3 uncached 13 LSC_LDCC_L1UC_L3C, // Override to L1 uncached and L3 cached 14 LSC_LDCC_L1C_L3UC, // Override to L1 cached and L3 uncached 15 LSC_LDCC_L1C_L3C, // Override to L1 cached and L3 cached 16 LSC_LDCC_L1S_L3UC, // Override to L1 streaming load and L3 uncached 17 LSC_LDCC_L1S_L3C, // Override to L1 streaming load and L3 cached 18 LSC_LDCC_L1IAR_L3C, // Override to L1 invalidate-after-read, and L3 cached 19}; 20 21// Store message caching control (also used for atomics) 22enum LSC_STCC { 23 LSC_STCC_DEFAULT, 24 LSC_STCC_L1UC_L3UC, // Override to L1 uncached and L3 uncached 25 LSC_STCC_L1UC_L3WB, // Override to L1 uncached and L3 written back 26 LSC_STCC_L1WT_L3UC, // Override to L1 written through and L3 uncached 27 LSC_STCC_L1WT_L3WB, // Override to L1 written through and L3 written back 28 LSC_STCC_L1S_L3UC, // Override to L1 streaming and L3 uncached 29 LSC_STCC_L1S_L3WB, // Override to L1 streaming and L3 written back 30 LSC_STCC_L1WB_L3WB, // Override to L1 written through and L3 written back 31}; 32 33// LSC Loads 34 35// Global address space 36uint __builtin_IB_lsc_load_global_uchar_to_uint (const __global uchar *base, int immElemOff, enum LSC_LDCC cacheOpt); //D8U32 37uint __builtin_IB_lsc_load_global_ushort_to_uint(const __global ushort *base, int immElemOff, enum LSC_LDCC cacheOpt); //D16U32 38uint __builtin_IB_lsc_load_global_uint (const __global uint *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V1 39uint2 __builtin_IB_lsc_load_global_uint2 (const __global uint2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V2 40uint3 __builtin_IB_lsc_load_global_uint3 (const __global uint3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V3 41uint4 __builtin_IB_lsc_load_global_uint4 (const __global uint4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V4 42uint8 __builtin_IB_lsc_load_global_uint8 (const __global uint8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V8 43ulong __builtin_IB_lsc_load_global_ulong (const __global ulong *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V1 44ulong2 __builtin_IB_lsc_load_global_ulong2(const __global ulong2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V2 45ulong3 __builtin_IB_lsc_load_global_ulong3(const __global ulong3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V3 46ulong4 __builtin_IB_lsc_load_global_ulong4(const __global ulong4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V4 47ulong8 __builtin_IB_lsc_load_global_ulong8(const __global ulong8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V8 48 49// Local address space 50uint __builtin_IB_lsc_load_local_uchar_to_uint( const __local uchar *base, int immElemOff); //D8U32 51uint __builtin_IB_lsc_load_local_ushort_to_uint(const __local ushort *base, int immElemOff); //D16U32 52uint __builtin_IB_lsc_load_local_uint (const __local uint *base, int immElemOff); //D32V1 53uint2 __builtin_IB_lsc_load_local_uint2 (const __local uint2 *base, int immElemOff); //D32V2 54uint3 __builtin_IB_lsc_load_local_uint3 (const __local uint3 *base, int immElemOff); //D32V3 55uint4 __builtin_IB_lsc_load_local_uint4 (const __local uint4 *base, int immElemOff); //D32V4 56uint8 __builtin_IB_lsc_load_local_uint8 (const __local uint8 *base, int immElemOff); //D32V8 57ulong __builtin_IB_lsc_load_local_ulong (const __local ulong *base, int immElemOff); //D64V1 58ulong2 __builtin_IB_lsc_load_local_ulong2(const __local ulong2 *base, int immElemOff); //D64V2 59ulong3 __builtin_IB_lsc_load_local_ulong3(const __local ulong3 *base, int immElemOff); //D64V3 60ulong4 __builtin_IB_lsc_load_local_ulong4(const __local ulong4 *base, int immElemOff); //D64V4 61ulong8 __builtin_IB_lsc_load_local_ulong8(const __local ulong8 *base, int immElemOff); //D64V8 62 63// LSC Stores 64 65// Global address space 66void __builtin_IB_lsc_store_global_uchar_from_uint (__global uchar *base, int immElemOff, uint val, enum LSC_STCC cacheOpt); //D8U32 67void __builtin_IB_lsc_store_global_ushort_from_uint(__global ushort *base, int immElemOff, uint val, enum LSC_STCC cacheOpt); //D16U32 68void __builtin_IB_lsc_store_global_uint (__global uint *base, int immElemOff, uint val, enum LSC_STCC cacheOpt); //D32V1 69void __builtin_IB_lsc_store_global_uint2 (__global uint2 *base, int immElemOff, uint2 val, enum LSC_STCC cacheOpt); //D32V2 70void __builtin_IB_lsc_store_global_uint3 (__global uint3 *base, int immElemOff, uint3 val, enum LSC_STCC cacheOpt); //D32V3 71void __builtin_IB_lsc_store_global_uint4 (__global uint4 *base, int immElemOff, uint4 val, enum LSC_STCC cacheOpt); //D32V4 72void __builtin_IB_lsc_store_global_uint8 (__global uint8 *base, int immElemOff, uint8 val, enum LSC_STCC cacheOpt); //D32V8 73void __builtin_IB_lsc_store_global_ulong (__global ulong *base, int immElemOff, ulong val, enum LSC_STCC cacheOpt); //D64V1 74void __builtin_IB_lsc_store_global_ulong2(__global ulong2 *base, int immElemOff, ulong2 val, enum LSC_STCC cacheOpt); //D64V2 75void __builtin_IB_lsc_store_global_ulong3(__global ulong3 *base, int immElemOff, ulong3 val, enum LSC_STCC cacheOpt); //D64V3 76void __builtin_IB_lsc_store_global_ulong4(__global ulong4 *base, int immElemOff, ulong4 val, enum LSC_STCC cacheOpt); //D64V4 77void __builtin_IB_lsc_store_global_ulong8(__global ulong8 *base, int immElemOff, ulong8 val, enum LSC_STCC cacheOpt); //D64V8 78 79// Local address space 80void __builtin_IB_lsc_store_local_uchar_from_uint (__local uchar *base, int immElemOff, uint val); //D8U32 81void __builtin_IB_lsc_store_local_ushort_from_uint(__local ushort *base, int immElemOff, uint val); //D16U32 82void __builtin_IB_lsc_store_local_uint (__local uint *base, int immElemOff, uint val); //D32V1 83void __builtin_IB_lsc_store_local_uint2 (__local uint2 *base, int immElemOff, uint2 val); //D32V2 84void __builtin_IB_lsc_store_local_uint3 (__local uint3 *base, int immElemOff, uint3 val); //D32V3 85void __builtin_IB_lsc_store_local_uint4 (__local uint4 *base, int immElemOff, uint4 val); //D32V4 86void __builtin_IB_lsc_store_local_uint8 (__local uint8 *base, int immElemOff, uint8 val); //D32V8 87void __builtin_IB_lsc_store_local_ulong (__local ulong *base, int immElemOff, ulong val); //D64V1 88void __builtin_IB_lsc_store_local_ulong2(__local ulong2 *base, int immElemOff, ulong2 val); //D64V2 89void __builtin_IB_lsc_store_local_ulong3(__local ulong3 *base, int immElemOff, ulong3 val); //D64V3 90void __builtin_IB_lsc_store_local_ulong4(__local ulong4 *base, int immElemOff, ulong4 val); //D64V4 91void __builtin_IB_lsc_store_local_ulong8(__local ulong8 *base, int immElemOff, ulong8 val); //D64V8 92 93// LSC prefetching 94 95// LSC Pre-Fetch Load functions with CacheControls 96// Global address space 97void __builtin_IB_lsc_prefetch_global_uchar (const __global uchar *base, int immElemOff, enum LSC_LDCC cacheOpt); //D8U32 98void __builtin_IB_lsc_prefetch_global_ushort(const __global ushort *base, int immElemOff, enum LSC_LDCC cacheOpt); //D16U32 99void __builtin_IB_lsc_prefetch_global_uint (const __global uint *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V1 100void __builtin_IB_lsc_prefetch_global_uint2 (const __global uint2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V2 101void __builtin_IB_lsc_prefetch_global_uint3 (const __global uint3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V3 102void __builtin_IB_lsc_prefetch_global_uint4 (const __global uint4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V4 103void __builtin_IB_lsc_prefetch_global_uint8 (const __global uint8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V8 104void __builtin_IB_lsc_prefetch_global_ulong (const __global ulong *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V1 105void __builtin_IB_lsc_prefetch_global_ulong2(const __global ulong2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V2 106void __builtin_IB_lsc_prefetch_global_ulong3(const __global ulong3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V3 107void __builtin_IB_lsc_prefetch_global_ulong4(const __global ulong4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V4 108void __builtin_IB_lsc_prefetch_global_ulong8(const __global ulong8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V8 109 110// LSC Fence support 111 112// FS - Fence Scope 113enum LSC_FS { 114 LSC_FS_THREAD_GROUP, 115 LSC_FS_LOCAL, 116 LSC_FS_TILE, 117 LSC_FS_GPU, 118 LSC_FS_GPUs, 119 LSC_FS_SYSTEM_RELEASE, 120 LSC_FS_SYSTEM_ACQUIRE 121}; 122 123// FT - Fence Type 124enum LSC_FT { 125 LSC_FT_DEFAULT, 126 LSC_FT_EVICT, 127 LSC_FT_INVALIDATE, 128 LSC_FT_DISCARD, 129 LSC_FT_CLEAN, 130 LSC_FT_L3 131}; 132 133// LSC Fence functions 134void __builtin_IB_lsc_fence_global_untyped(enum LSC_FS scope, enum LSC_FT flushType); // Mem Port - UGM 135void __builtin_IB_lsc_fence_global_untyped_cross_tile(enum LSC_FS scope, enum LSC_FT flushType); // Mem Port - UGML 136void __builtin_IB_lsc_fence_global_typed(enum LSC_FS scope, enum LSC_FT flushType); // Mem Port - TGM 137void __builtin_IB_lsc_fence_local(); // Mem Port - SLM 138 139// Exported functions 140 141// LSC Loads 142// uchar 143uint load_uchar_to_uint_L1UC_L3UC(global uchar* it, int offset) 144{ 145 return __builtin_IB_lsc_load_global_uchar_to_uint(it, offset, LSC_LDCC_L1UC_L3UC); 146} 147 148uint load_uchar_to_uint_L1UC_L3C(global uchar* it, int offset) 149{ 150 return __builtin_IB_lsc_load_global_uchar_to_uint(it, offset, LSC_LDCC_L1UC_L3C); 151} 152 153uint load_uchar_to_uint_L1C_L3UC(global uchar* it, int offset) 154{ 155 return __builtin_IB_lsc_load_global_uchar_to_uint(it, offset, LSC_LDCC_L1C_L3UC); 156} 157 158uint load_uchar_to_uint_L1C_L3C(global uchar* it, int offset) 159{ 160 return __builtin_IB_lsc_load_global_uchar_to_uint(it, offset, LSC_LDCC_L1C_L3C); 161} 162 163uint load_uchar_to_uint_L1S_L3UC(global uchar* it, int offset) 164{ 165 return __builtin_IB_lsc_load_global_uchar_to_uint(it, offset, LSC_LDCC_L1S_L3UC); 166} 167 168uint load_uchar_to_uint_L1S_L3C(global uchar* it, int offset) 169{ 170 return __builtin_IB_lsc_load_global_uchar_to_uint(it, offset, LSC_LDCC_L1S_L3C); 171} 172 173uint load_uchar_to_uint_L1IAR_L3C(global uchar* it, int offset) 174{ 175 return __builtin_IB_lsc_load_global_uchar_to_uint(it, offset, LSC_LDCC_L1IAR_L3C); 176} 177 178// ushort 179uint load_ushort_to_uint_L1UC_L3UC(global ushort* it, int offset) 180{ 181 return __builtin_IB_lsc_load_global_ushort_to_uint(it, offset, LSC_LDCC_L1UC_L3UC); 182} 183 184uint load_ushort_to_uint_L1UC_L3C(global ushort* it, int offset) 185{ 186 return __builtin_IB_lsc_load_global_ushort_to_uint(it, offset, LSC_LDCC_L1UC_L3C); 187} 188 189uint load_ushort_to_uint_L1C_L3UC(global ushort* it, int offset) 190{ 191 return __builtin_IB_lsc_load_global_ushort_to_uint(it, offset, LSC_LDCC_L1C_L3UC); 192} 193 194uint load_ushort_to_uint_L1C_L3C(global ushort* it, int offset) 195{ 196 return __builtin_IB_lsc_load_global_ushort_to_uint(it, offset, LSC_LDCC_L1C_L3C); 197} 198 199uint load_ushort_to_uint_L1S_L3UC(global ushort* it, int offset) 200{ 201 return __builtin_IB_lsc_load_global_ushort_to_uint(it, offset, LSC_LDCC_L1S_L3UC); 202} 203 204uint load_ushort_to_uint_L1S_L3C(global ushort* it, int offset) 205{ 206 return __builtin_IB_lsc_load_global_ushort_to_uint(it, offset, LSC_LDCC_L1S_L3C); 207} 208 209uint load_ushort_to_uint_L1IAR_L3C(global ushort* it, int offset) 210{ 211 return __builtin_IB_lsc_load_global_ushort_to_uint(it, offset, LSC_LDCC_L1IAR_L3C); 212} 213 214// uint 215uint load_uint_L1UC_L3UC(global uint* it, int offset) 216{ 217 return __builtin_IB_lsc_load_global_uint(it, offset, LSC_LDCC_L1UC_L3UC); 218} 219 220uint load_uint_L1UC_L3C(global uint* it, int offset) 221{ 222 return __builtin_IB_lsc_load_global_uint(it, offset, LSC_LDCC_L1UC_L3C); 223} 224 225uint load_uint_L1C_L3UC(global uint* it, int offset) 226{ 227 return __builtin_IB_lsc_load_global_uint(it, offset, LSC_LDCC_L1C_L3UC); 228} 229 230uint load_uint_L1C_L3C(global uint* it, int offset) 231{ 232 return __builtin_IB_lsc_load_global_uint(it, offset, LSC_LDCC_L1C_L3C); 233} 234 235uint load_uint_L1S_L3UC(global uint* it, int offset) 236{ 237 return __builtin_IB_lsc_load_global_uint(it, offset, LSC_LDCC_L1S_L3UC); 238} 239 240uint load_uint_L1S_L3C(global uint* it, int offset) 241{ 242 return __builtin_IB_lsc_load_global_uint(it, offset, LSC_LDCC_L1S_L3C); 243} 244 245uint load_uint_L1IAR_L3C(global uint* it, int offset) 246{ 247 return __builtin_IB_lsc_load_global_uint(it, offset, LSC_LDCC_L1IAR_L3C); 248} 249 250// uint2 251uint2 load_uint2_L1UC_L3UC(global uint2* it, int offset) 252{ 253 return __builtin_IB_lsc_load_global_uint2(it, offset, LSC_LDCC_L1UC_L3UC); 254} 255 256uint2 load_uint2_L1UC_L3C(global uint2* it, int offset) 257{ 258 return __builtin_IB_lsc_load_global_uint2(it, offset, LSC_LDCC_L1UC_L3C); 259} 260 261uint2 load_uint2_L1C_L3UC(global uint2* it, int offset) 262{ 263 return __builtin_IB_lsc_load_global_uint2(it, offset, LSC_LDCC_L1C_L3UC); 264} 265 266uint2 load_uint2_L1C_L3C(global uint2* it, int offset) 267{ 268 return __builtin_IB_lsc_load_global_uint2(it, offset, LSC_LDCC_L1C_L3C); 269} 270 271uint2 load_uint2_L1S_L3UC(global uint2* it, int offset) 272{ 273 return __builtin_IB_lsc_load_global_uint2(it, offset, LSC_LDCC_L1S_L3UC); 274} 275 276uint2 load_uint2_L1S_L3C(global uint2* it, int offset) 277{ 278 return __builtin_IB_lsc_load_global_uint2(it, offset, LSC_LDCC_L1S_L3C); 279} 280 281uint2 load_uint2_L1IAR_L3C(global uint2* it, int offset) 282{ 283 return __builtin_IB_lsc_load_global_uint2(it, offset, LSC_LDCC_L1IAR_L3C); 284} 285 286// uint3 287uint3 load_uint3_L1UC_L3UC(global uint3* it, int offset) 288{ 289 return __builtin_IB_lsc_load_global_uint3(it, offset, LSC_LDCC_L1UC_L3UC); 290} 291 292uint3 load_uint3_L1UC_L3C(global uint3* it, int offset) 293{ 294 return __builtin_IB_lsc_load_global_uint3(it, offset, LSC_LDCC_L1UC_L3C); 295} 296 297uint3 load_uint3_L1C_L3UC(global uint3* it, int offset) 298{ 299 return __builtin_IB_lsc_load_global_uint3(it, offset, LSC_LDCC_L1C_L3UC); 300} 301 302uint3 load_uint3_L1C_L3C(global uint3* it, int offset) 303{ 304 return __builtin_IB_lsc_load_global_uint3(it, offset, LSC_LDCC_L1C_L3C); 305} 306 307uint3 load_uint3_L1S_L3UC(global uint3* it, int offset) 308{ 309 return __builtin_IB_lsc_load_global_uint3(it, offset, LSC_LDCC_L1S_L3UC); 310} 311 312uint3 load_uint3_L1S_L3C(global uint3* it, int offset) 313{ 314 return __builtin_IB_lsc_load_global_uint3(it, offset, LSC_LDCC_L1S_L3C); 315} 316 317uint3 load_uint3_L1IAR_L3C(global uint3* it, int offset) 318{ 319 return __builtin_IB_lsc_load_global_uint3(it, offset, LSC_LDCC_L1IAR_L3C); 320} 321 322// uint4 323uint4 load_uint4_L1UC_L3UC(global uint4* it, int offset) 324{ 325 return __builtin_IB_lsc_load_global_uint4(it, offset, LSC_LDCC_L1UC_L3UC); 326} 327 328uint4 load_uint4_L1UC_L3C(global uint4* it, int offset) 329{ 330 return __builtin_IB_lsc_load_global_uint4(it, offset, LSC_LDCC_L1UC_L3C); 331} 332 333uint4 load_uint4_L1C_L3UC(global uint4* it, int offset) 334{ 335 return __builtin_IB_lsc_load_global_uint4(it, offset, LSC_LDCC_L1C_L3UC); 336} 337 338uint4 load_uint4_L1C_L3C(global uint4* it, int offset) 339{ 340 return __builtin_IB_lsc_load_global_uint4(it, offset, LSC_LDCC_L1C_L3C); 341} 342 343uint4 load_uint4_L1S_L3UC(global uint4* it, int offset) 344{ 345 return __builtin_IB_lsc_load_global_uint4(it, offset, LSC_LDCC_L1S_L3UC); 346} 347 348uint4 load_uint4_L1S_L3C(global uint4* it, int offset) 349{ 350 return __builtin_IB_lsc_load_global_uint4(it, offset, LSC_LDCC_L1S_L3C); 351} 352 353uint4 load_uint4_L1IAR_L3C(global uint4* it, int offset) 354{ 355 return __builtin_IB_lsc_load_global_uint4(it, offset, LSC_LDCC_L1IAR_L3C); 356} 357 358// uint8 359uint8 load_uint8_L1UC_L3UC(global uint8* it, int offset) 360{ 361 return __builtin_IB_lsc_load_global_uint8(it, offset, LSC_LDCC_L1UC_L3UC); 362} 363 364uint8 load_uint8_L1UC_L3C(global uint8* it, int offset) 365{ 366 return __builtin_IB_lsc_load_global_uint8(it, offset, LSC_LDCC_L1UC_L3C); 367} 368 369uint8 load_uint8_L1C_L3UC(global uint8* it, int offset) 370{ 371 return __builtin_IB_lsc_load_global_uint8(it, offset, LSC_LDCC_L1C_L3UC); 372} 373 374uint8 load_uint8_L1C_L3C(global uint8* it, int offset) 375{ 376 return __builtin_IB_lsc_load_global_uint8(it, offset, LSC_LDCC_L1C_L3C); 377} 378 379uint8 load_uint8_L1S_L3UC(global uint8* it, int offset) 380{ 381 return __builtin_IB_lsc_load_global_uint8(it, offset, LSC_LDCC_L1S_L3UC); 382} 383 384uint8 load_uint8_L1S_L3C(global uint8* it, int offset) 385{ 386 return __builtin_IB_lsc_load_global_uint8(it, offset, LSC_LDCC_L1S_L3C); 387} 388 389uint8 load_uint8_L1IAR_L3C(global uint8* it, int offset) 390{ 391 return __builtin_IB_lsc_load_global_uint8(it, offset, LSC_LDCC_L1IAR_L3C); 392} 393 394// ulong 395ulong load_ulong_L1UC_L3UC(global ulong* it, int offset) 396{ 397 return __builtin_IB_lsc_load_global_ulong(it, offset, LSC_LDCC_L1UC_L3UC); 398} 399 400ulong load_ulong_L1UC_L3C(global ulong* it, int offset) 401{ 402 return __builtin_IB_lsc_load_global_ulong(it, offset, LSC_LDCC_L1UC_L3C); 403} 404 405ulong load_ulong_L1C_L3UC(global ulong* it, int offset) 406{ 407 return __builtin_IB_lsc_load_global_ulong(it, offset, LSC_LDCC_L1C_L3UC); 408} 409 410ulong load_ulong_L1C_L3C(global ulong* it, int offset) 411{ 412 return __builtin_IB_lsc_load_global_ulong(it, offset, LSC_LDCC_L1C_L3C); 413} 414 415ulong load_ulong_L1S_L3UC(global ulong* it, int offset) 416{ 417 return __builtin_IB_lsc_load_global_ulong(it, offset, LSC_LDCC_L1S_L3UC); 418} 419 420ulong load_ulong_L1S_L3C(global ulong* it, int offset) 421{ 422 return __builtin_IB_lsc_load_global_ulong(it, offset, LSC_LDCC_L1S_L3C); 423} 424 425ulong load_ulong_L1IAR_L3C(global ulong* it, int offset) 426{ 427 return __builtin_IB_lsc_load_global_ulong(it, offset, LSC_LDCC_L1IAR_L3C); 428} 429 430// ulong2 431ulong2 load_ulong2_L1UC_L3UC(global ulong2* it, int offset) 432{ 433 return __builtin_IB_lsc_load_global_ulong2(it, offset, LSC_LDCC_L1UC_L3UC); 434} 435 436ulong2 load_ulong2_L1UC_L3C(global ulong2* it, int offset) 437{ 438 return __builtin_IB_lsc_load_global_ulong2(it, offset, LSC_LDCC_L1UC_L3C); 439} 440 441ulong2 load_ulong2_L1C_L3UC(global ulong2* it, int offset) 442{ 443 return __builtin_IB_lsc_load_global_ulong2(it, offset, LSC_LDCC_L1C_L3UC); 444} 445 446ulong2 load_ulong2_L1C_L3C(global ulong2* it, int offset) 447{ 448 return __builtin_IB_lsc_load_global_ulong2(it, offset, LSC_LDCC_L1C_L3C); 449} 450 451ulong2 load_ulong2_L1S_L3UC(global ulong2* it, int offset) 452{ 453 return __builtin_IB_lsc_load_global_ulong2(it, offset, LSC_LDCC_L1S_L3UC); 454} 455 456ulong2 load_ulong2_L1S_L3C(global ulong2* it, int offset) 457{ 458 return __builtin_IB_lsc_load_global_ulong2(it, offset, LSC_LDCC_L1S_L3C); 459} 460 461ulong2 load_ulong2_L1IAR_L3C(global ulong2* it, int offset) 462{ 463 return __builtin_IB_lsc_load_global_ulong2(it, offset, LSC_LDCC_L1IAR_L3C); 464} 465 466// ulong3 467ulong3 load_ulong3_L1UC_L3UC(global ulong3* it, int offset) 468{ 469 return __builtin_IB_lsc_load_global_ulong3(it, offset, LSC_LDCC_L1UC_L3UC); 470} 471 472ulong3 load_ulong3_L1UC_L3C(global ulong3* it, int offset) 473{ 474 return __builtin_IB_lsc_load_global_ulong3(it, offset, LSC_LDCC_L1UC_L3C); 475} 476 477ulong3 load_ulong3_L1C_L3UC(global ulong3* it, int offset) 478{ 479 return __builtin_IB_lsc_load_global_ulong3(it, offset, LSC_LDCC_L1C_L3UC); 480} 481 482ulong3 load_ulong3_L1C_L3C(global ulong3* it, int offset) 483{ 484 return __builtin_IB_lsc_load_global_ulong3(it, offset, LSC_LDCC_L1C_L3C); 485} 486 487ulong3 load_ulong3_L1S_L3UC(global ulong3* it, int offset) 488{ 489 return __builtin_IB_lsc_load_global_ulong3(it, offset, LSC_LDCC_L1S_L3UC); 490} 491 492ulong3 load_ulong3_L1S_L3C(global ulong3* it, int offset) 493{ 494 return __builtin_IB_lsc_load_global_ulong3(it, offset, LSC_LDCC_L1S_L3C); 495} 496 497ulong3 load_ulong3_L1IAR_L3C(global ulong3* it, int offset) 498{ 499 return __builtin_IB_lsc_load_global_ulong3(it, offset, LSC_LDCC_L1IAR_L3C); 500} 501 502// ulong4 503ulong4 load_ulong4_L1UC_L3UC(global ulong4* it, int offset) 504{ 505 return __builtin_IB_lsc_load_global_ulong4(it, offset, LSC_LDCC_L1UC_L3UC); 506} 507 508ulong4 load_ulong4_L1UC_L3C(global ulong4* it, int offset) 509{ 510 return __builtin_IB_lsc_load_global_ulong4(it, offset, LSC_LDCC_L1UC_L3C); 511} 512 513ulong4 load_ulong4_L1C_L3UC(global ulong4* it, int offset) 514{ 515 return __builtin_IB_lsc_load_global_ulong4(it, offset, LSC_LDCC_L1C_L3UC); 516} 517 518ulong4 load_ulong4_L1C_L3C(global ulong4* it, int offset) 519{ 520 return __builtin_IB_lsc_load_global_ulong4(it, offset, LSC_LDCC_L1C_L3C); 521} 522 523ulong4 load_ulong4_L1S_L3UC(global ulong4* it, int offset) 524{ 525 return __builtin_IB_lsc_load_global_ulong4(it, offset, LSC_LDCC_L1S_L3UC); 526} 527 528ulong4 load_ulong4_L1S_L3C(global ulong4* it, int offset) 529{ 530 return __builtin_IB_lsc_load_global_ulong4(it, offset, LSC_LDCC_L1S_L3C); 531} 532 533ulong4 load_ulong4_L1IAR_L3C(global ulong4* it, int offset) 534{ 535 return __builtin_IB_lsc_load_global_ulong4(it, offset, LSC_LDCC_L1IAR_L3C); 536} 537 538// ulong8 539ulong8 load_ulong8_L1UC_L3UC(global ulong8* it, int offset) 540{ 541 return __builtin_IB_lsc_load_global_ulong8(it, offset, LSC_LDCC_L1UC_L3UC); 542} 543 544ulong8 load_ulong8_L1UC_L3C(global ulong8* it, int offset) 545{ 546 return __builtin_IB_lsc_load_global_ulong8(it, offset, LSC_LDCC_L1UC_L3C); 547} 548 549ulong8 load_ulong8_L1C_L3UC(global ulong8* it, int offset) 550{ 551 return __builtin_IB_lsc_load_global_ulong8(it, offset, LSC_LDCC_L1C_L3UC); 552} 553 554ulong8 load_ulong8_L1C_L3C(global ulong8* it, int offset) 555{ 556 return __builtin_IB_lsc_load_global_ulong8(it, offset, LSC_LDCC_L1C_L3C); 557} 558 559ulong8 load_ulong8_L1S_L3UC(global ulong8* it, int offset) 560{ 561 return __builtin_IB_lsc_load_global_ulong8(it, offset, LSC_LDCC_L1S_L3UC); 562} 563 564ulong8 load_ulong8_L1S_L3C(global ulong8* it, int offset) 565{ 566 return __builtin_IB_lsc_load_global_ulong8(it, offset, LSC_LDCC_L1S_L3C); 567} 568 569ulong8 load_ulong8_L1IAR_L3C(global ulong8* it, int offset) 570{ 571 return __builtin_IB_lsc_load_global_ulong8(it, offset, LSC_LDCC_L1IAR_L3C); 572} 573 574// LSC Stores 575// uchar 576void store_uchar_from_uint_L1UC_L3UC(global uchar* it, int offset, uint value) 577{ 578 __builtin_IB_lsc_store_global_uchar_from_uint(it, offset, value, LSC_STCC_L1UC_L3UC); 579} 580 581void store_uchar_from_uint_L1UC_L3WB(global uchar* it, int offset, uint value) 582{ 583 __builtin_IB_lsc_store_global_uchar_from_uint(it, offset, value, LSC_STCC_L1UC_L3WB); 584} 585 586void store_uchar_from_uint_L1WT_L3UC(global uchar* it, int offset, uint value) 587{ 588 __builtin_IB_lsc_store_global_uchar_from_uint(it, offset, value, LSC_STCC_L1WT_L3UC); 589} 590 591void store_uchar_from_uint_L1WT_L3WB(global uchar* it, int offset, uint value) 592{ 593 __builtin_IB_lsc_store_global_uchar_from_uint(it, offset, value, LSC_STCC_L1WT_L3WB); 594} 595 596void store_uchar_from_uint_L1S_L3UC(global uchar* it, int offset, uint value) 597{ 598 __builtin_IB_lsc_store_global_uchar_from_uint(it, offset, value, LSC_STCC_L1S_L3UC); 599} 600 601void store_uchar_from_uint_L1S_L3WB(global uchar* it, int offset, uint value) 602{ 603 __builtin_IB_lsc_store_global_uchar_from_uint(it, offset, value, LSC_STCC_L1S_L3WB); 604} 605 606void store_uchar_from_uint_L1WB_L3WB(global uchar* it, int offset, uint value) 607{ 608 __builtin_IB_lsc_store_global_uchar_from_uint(it, offset, value, LSC_STCC_L1WB_L3WB); 609} 610 611// ushort 612void store_ushort_from_uint_L1UC_L3UC(global ushort* it, int offset, uint value) 613{ 614 __builtin_IB_lsc_store_global_ushort_from_uint(it, offset, value, LSC_STCC_L1UC_L3UC); 615} 616 617void store_ushort_from_uint_L1UC_L3WB(global ushort* it, int offset, uint value) 618{ 619 __builtin_IB_lsc_store_global_ushort_from_uint(it, offset, value, LSC_STCC_L1UC_L3WB); 620} 621 622void store_ushort_from_uint_L1WT_L3UC(global ushort* it, int offset, uint value) 623{ 624 __builtin_IB_lsc_store_global_ushort_from_uint(it, offset, value, LSC_STCC_L1WT_L3UC); 625} 626 627void store_ushort_from_uint_L1WT_L3WB(global ushort* it, int offset, uint value) 628{ 629 __builtin_IB_lsc_store_global_ushort_from_uint(it, offset, value, LSC_STCC_L1WT_L3WB); 630} 631 632void store_ushort_from_uint_L1S_L3UC(global ushort* it, int offset, uint value) 633{ 634 __builtin_IB_lsc_store_global_ushort_from_uint(it, offset, value, LSC_STCC_L1S_L3UC); 635} 636 637void store_ushort_from_uint_L1S_L3WB(global ushort* it, int offset, uint value) 638{ 639 __builtin_IB_lsc_store_global_ushort_from_uint(it, offset, value, LSC_STCC_L1S_L3WB); 640} 641 642void store_ushort_from_uint_L1WB_L3WB(global ushort* it, int offset, uint value) 643{ 644 __builtin_IB_lsc_store_global_ushort_from_uint(it, offset, value, LSC_STCC_L1WB_L3WB); 645} 646 647// uint 648void store_uint_L1UC_L3UC(global uint* it, int offset, uint value) 649{ 650 __builtin_IB_lsc_store_global_uint(it, offset, value, LSC_STCC_L1UC_L3UC); 651} 652 653void store_uint_L1UC_L3WB(global uint* it, int offset, uint value) 654{ 655 __builtin_IB_lsc_store_global_uint(it, offset, value, LSC_STCC_L1UC_L3WB); 656} 657 658void store_uint_L1WT_L3UC(global uint* it, int offset, uint value) 659{ 660 __builtin_IB_lsc_store_global_uint(it, offset, value, LSC_STCC_L1WT_L3UC); 661} 662 663void store_uint_L1WT_L3WB(global uint* it, int offset, uint value) 664{ 665 __builtin_IB_lsc_store_global_uint(it, offset, value, LSC_STCC_L1WT_L3WB); 666} 667 668void store_uint_L1S_L3UC(global uint* it, int offset, uint value) 669{ 670 __builtin_IB_lsc_store_global_uint(it, offset, value, LSC_STCC_L1S_L3UC); 671} 672 673void store_uint_L1S_L3WB(global uint* it, int offset, uint value) 674{ 675 __builtin_IB_lsc_store_global_uint(it, offset, value, LSC_STCC_L1S_L3WB); 676} 677 678void store_uint_L1WB_L3WB(global uint* it, int offset, uint value) 679{ 680 __builtin_IB_lsc_store_global_uint(it, offset, value, LSC_STCC_L1WB_L3WB); 681} 682 683// uint2 684void store_uint2_L1UC_L3UC(global uint2* it, int offset, uint2 value) 685{ 686 __builtin_IB_lsc_store_global_uint2(it, offset, value, LSC_STCC_L1UC_L3UC); 687} 688 689void store_uint2_L1UC_L3WB(global uint2* it, int offset, uint2 value) 690{ 691 __builtin_IB_lsc_store_global_uint2(it, offset, value, LSC_STCC_L1UC_L3WB); 692} 693 694void store_uint2_L1WT_L3UC(global uint2* it, int offset, uint2 value) 695{ 696 __builtin_IB_lsc_store_global_uint2(it, offset, value, LSC_STCC_L1WT_L3UC); 697} 698 699void store_uint2_L1WT_L3WB(global uint2* it, int offset, uint2 value) 700{ 701 __builtin_IB_lsc_store_global_uint2(it, offset, value, LSC_STCC_L1WT_L3WB); 702} 703 704void store_uint2_L1S_L3UC(global uint2* it, int offset, uint2 value) 705{ 706 __builtin_IB_lsc_store_global_uint2(it, offset, value, LSC_STCC_L1S_L3UC); 707} 708 709void store_uint2_L1S_L3WB(global uint2* it, int offset, uint2 value) 710{ 711 __builtin_IB_lsc_store_global_uint2(it, offset, value, LSC_STCC_L1S_L3WB); 712} 713 714void store_uint2_L1WB_L3WB(global uint2* it, int offset, uint2 value) 715{ 716 __builtin_IB_lsc_store_global_uint2(it, offset, value, LSC_STCC_L1WB_L3WB); 717} 718 719// uint3 720void store_uint3_L1UC_L3UC(global uint3* it, int offset, uint3 value) 721{ 722 __builtin_IB_lsc_store_global_uint3(it, offset, value, LSC_STCC_L1UC_L3UC); 723} 724 725void store_uint3_L1UC_L3WB(global uint3* it, int offset, uint3 value) 726{ 727 __builtin_IB_lsc_store_global_uint3(it, offset, value, LSC_STCC_L1UC_L3WB); 728} 729 730void store_uint3_L1WT_L3UC(global uint3* it, int offset, uint3 value) 731{ 732 __builtin_IB_lsc_store_global_uint3(it, offset, value, LSC_STCC_L1WT_L3UC); 733} 734 735void store_uint3_L1WT_L3WB(global uint3* it, int offset, uint3 value) 736{ 737 __builtin_IB_lsc_store_global_uint3(it, offset, value, LSC_STCC_L1WT_L3WB); 738} 739 740void store_uint3_L1S_L3UC(global uint3* it, int offset, uint3 value) 741{ 742 __builtin_IB_lsc_store_global_uint3(it, offset, value, LSC_STCC_L1S_L3UC); 743} 744 745void store_uint3_L1S_L3WB(global uint3* it, int offset, uint3 value) 746{ 747 __builtin_IB_lsc_store_global_uint3(it, offset, value, LSC_STCC_L1S_L3WB); 748} 749 750void store_uint3_L1WB_L3WB(global uint3* it, int offset, uint3 value) 751{ 752 __builtin_IB_lsc_store_global_uint3(it, offset, value, LSC_STCC_L1WB_L3WB); 753} 754 755// uint4 756void store_uint4_L1UC_L3UC(global uint4* it, int offset, uint4 value) 757{ 758 __builtin_IB_lsc_store_global_uint4(it, offset, value, LSC_STCC_L1UC_L3UC); 759} 760 761void store_uint4_L1UC_L3WB(global uint4* it, int offset, uint4 value) 762{ 763 __builtin_IB_lsc_store_global_uint4(it, offset, value, LSC_STCC_L1UC_L3WB); 764} 765 766void store_uint4_L1WT_L3UC(global uint4* it, int offset, uint4 value) 767{ 768 __builtin_IB_lsc_store_global_uint4(it, offset, value, LSC_STCC_L1WT_L3UC); 769} 770 771void store_uint4_L1WT_L3WB(global uint4* it, int offset, uint4 value) 772{ 773 __builtin_IB_lsc_store_global_uint4(it, offset, value, LSC_STCC_L1WT_L3WB); 774} 775 776void store_uint4_L1S_L3UC(global uint4* it, int offset, uint4 value) 777{ 778 __builtin_IB_lsc_store_global_uint4(it, offset, value, LSC_STCC_L1S_L3UC); 779} 780 781void store_uint4_L1S_L3WB(global uint4* it, int offset, uint4 value) 782{ 783 __builtin_IB_lsc_store_global_uint4(it, offset, value, LSC_STCC_L1S_L3WB); 784} 785 786void store_uint4_L1WB_L3WB(global uint4* it, int offset, uint4 value) 787{ 788 __builtin_IB_lsc_store_global_uint4(it, offset, value, LSC_STCC_L1WB_L3WB); 789} 790 791// uint8 792void store_uint8_L1UC_L3UC(global uint8* it, int offset, uint8 value) 793{ 794 __builtin_IB_lsc_store_global_uint8(it, offset, value, LSC_STCC_L1UC_L3UC); 795} 796 797void store_uint8_L1UC_L3WB(global uint8* it, int offset, uint8 value) 798{ 799 __builtin_IB_lsc_store_global_uint8(it, offset, value, LSC_STCC_L1UC_L3WB); 800} 801 802void store_uint8_L1WT_L3UC(global uint8* it, int offset, uint8 value) 803{ 804 __builtin_IB_lsc_store_global_uint8(it, offset, value, LSC_STCC_L1WT_L3UC); 805} 806 807void store_uint8_L1WT_L3WB(global uint8* it, int offset, uint8 value) 808{ 809 __builtin_IB_lsc_store_global_uint8(it, offset, value, LSC_STCC_L1WT_L3WB); 810} 811 812void store_uint8_L1S_L3UC(global uint8* it, int offset, uint8 value) 813{ 814 __builtin_IB_lsc_store_global_uint8(it, offset, value, LSC_STCC_L1S_L3UC); 815} 816 817void store_uint8_L1S_L3WB(global uint8* it, int offset, uint8 value) 818{ 819 __builtin_IB_lsc_store_global_uint8(it, offset, value, LSC_STCC_L1S_L3WB); 820} 821 822void store_uint8_L1WB_L3WB(global uint8* it, int offset, uint8 value) 823{ 824 __builtin_IB_lsc_store_global_uint8(it, offset, value, LSC_STCC_L1WB_L3WB); 825} 826 827// ulong 828void store_ulong_L1UC_L3UC(global ulong* it, int offset, ulong value) 829{ 830 __builtin_IB_lsc_store_global_ulong(it, offset, value, LSC_STCC_L1UC_L3UC); 831} 832 833void store_ulong_L1UC_L3WB(global ulong* it, int offset, ulong value) 834{ 835 __builtin_IB_lsc_store_global_ulong(it, offset, value, LSC_STCC_L1UC_L3WB); 836} 837 838void store_ulong_L1WT_L3UC(global ulong* it, int offset, ulong value) 839{ 840 __builtin_IB_lsc_store_global_ulong(it, offset, value, LSC_STCC_L1WT_L3UC); 841} 842 843void store_ulong_L1WT_L3WB(global ulong* it, int offset, ulong value) 844{ 845 __builtin_IB_lsc_store_global_ulong(it, offset, value, LSC_STCC_L1WT_L3WB); 846} 847 848void store_ulong_L1S_L3UC(global ulong* it, int offset, ulong value) 849{ 850 __builtin_IB_lsc_store_global_ulong(it, offset, value, LSC_STCC_L1S_L3UC); 851} 852 853void store_ulong_L1S_L3WB(global ulong* it, int offset, ulong value) 854{ 855 __builtin_IB_lsc_store_global_ulong(it, offset, value, LSC_STCC_L1S_L3WB); 856} 857 858void store_ulong_L1WB_L3WB(global ulong* it, int offset, ulong value) 859{ 860 __builtin_IB_lsc_store_global_ulong(it, offset, value, LSC_STCC_L1WB_L3WB); 861} 862 863// ulong2 864void store_ulong2_L1UC_L3UC(global ulong2* it, int offset, ulong2 value) 865{ 866 __builtin_IB_lsc_store_global_ulong2(it, offset, value, LSC_STCC_L1UC_L3UC); 867} 868 869void store_ulong2_L1UC_L3WB(global ulong2* it, int offset, ulong2 value) 870{ 871 __builtin_IB_lsc_store_global_ulong2(it, offset, value, LSC_STCC_L1UC_L3WB); 872} 873 874void store_ulong2_L1WT_L3UC(global ulong2* it, int offset, ulong2 value) 875{ 876 __builtin_IB_lsc_store_global_ulong2(it, offset, value, LSC_STCC_L1WT_L3UC); 877} 878 879void store_ulong2_L1WT_L3WB(global ulong2* it, int offset, ulong2 value) 880{ 881 __builtin_IB_lsc_store_global_ulong2(it, offset, value, LSC_STCC_L1WT_L3WB); 882} 883 884void store_ulong2_L1S_L3UC(global ulong2* it, int offset, ulong2 value) 885{ 886 __builtin_IB_lsc_store_global_ulong2(it, offset, value, LSC_STCC_L1S_L3UC); 887} 888 889void store_ulong2_L1S_L3WB(global ulong2* it, int offset, ulong2 value) 890{ 891 __builtin_IB_lsc_store_global_ulong2(it, offset, value, LSC_STCC_L1S_L3WB); 892} 893 894void store_ulong2_L1WB_L3WB(global ulong2* it, int offset, ulong2 value) 895{ 896 __builtin_IB_lsc_store_global_ulong2(it, offset, value, LSC_STCC_L1WB_L3WB); 897} 898 899// ulong3 900void store_ulong3_L1UC_L3UC(global ulong3* it, int offset, ulong3 value) 901{ 902 __builtin_IB_lsc_store_global_ulong3(it, offset, value, LSC_STCC_L1UC_L3UC); 903} 904 905void store_ulong3_L1UC_L3WB(global ulong3* it, int offset, ulong3 value) 906{ 907 __builtin_IB_lsc_store_global_ulong3(it, offset, value, LSC_STCC_L1UC_L3WB); 908} 909 910void store_ulong3_L1WT_L3UC(global ulong3* it, int offset, ulong3 value) 911{ 912 __builtin_IB_lsc_store_global_ulong3(it, offset, value, LSC_STCC_L1WT_L3UC); 913} 914 915void store_ulong3_L1WT_L3WB(global ulong3* it, int offset, ulong3 value) 916{ 917 __builtin_IB_lsc_store_global_ulong3(it, offset, value, LSC_STCC_L1WT_L3WB); 918} 919 920void store_ulong3_L1S_L3UC(global ulong3* it, int offset, ulong3 value) 921{ 922 __builtin_IB_lsc_store_global_ulong3(it, offset, value, LSC_STCC_L1S_L3UC); 923} 924 925void store_ulong3_L1S_L3WB(global ulong3* it, int offset, ulong3 value) 926{ 927 __builtin_IB_lsc_store_global_ulong3(it, offset, value, LSC_STCC_L1S_L3WB); 928} 929 930void store_ulong3_L1WB_L3WB(global ulong3* it, int offset, ulong3 value) 931{ 932 __builtin_IB_lsc_store_global_ulong3(it, offset, value, LSC_STCC_L1WB_L3WB); 933} 934 935// ulong4 936void store_ulong4_L1UC_L3UC(global ulong4* it, int offset, ulong4 value) 937{ 938 __builtin_IB_lsc_store_global_ulong4(it, offset, value, LSC_STCC_L1UC_L3UC); 939} 940 941void store_ulong4_L1UC_L3WB(global ulong4* it, int offset, ulong4 value) 942{ 943 __builtin_IB_lsc_store_global_ulong4(it, offset, value, LSC_STCC_L1UC_L3WB); 944} 945 946void store_ulong4_L1WT_L3UC(global ulong4* it, int offset, ulong4 value) 947{ 948 __builtin_IB_lsc_store_global_ulong4(it, offset, value, LSC_STCC_L1WT_L3UC); 949} 950 951void store_ulong4_L1WT_L3WB(global ulong4* it, int offset, ulong4 value) 952{ 953 __builtin_IB_lsc_store_global_ulong4(it, offset, value, LSC_STCC_L1WT_L3WB); 954} 955 956void store_ulong4_L1S_L3UC(global ulong4* it, int offset, ulong4 value) 957{ 958 __builtin_IB_lsc_store_global_ulong4(it, offset, value, LSC_STCC_L1S_L3UC); 959} 960 961void store_ulong4_L1S_L3WB(global ulong4* it, int offset, ulong4 value) 962{ 963 __builtin_IB_lsc_store_global_ulong4(it, offset, value, LSC_STCC_L1S_L3WB); 964} 965 966void store_ulong4_L1WB_L3WB(global ulong4* it, int offset, ulong4 value) 967{ 968 __builtin_IB_lsc_store_global_ulong4(it, offset, value, LSC_STCC_L1WB_L3WB); 969} 970 971// ulong8 972void store_ulong8_L1UC_L3UC(global ulong8* it, int offset, ulong8 value) 973{ 974 __builtin_IB_lsc_store_global_ulong8(it, offset, value, LSC_STCC_L1UC_L3UC); 975} 976 977void store_ulong8_L1UC_L3WB(global ulong8* it, int offset, ulong8 value) 978{ 979 __builtin_IB_lsc_store_global_ulong8(it, offset, value, LSC_STCC_L1UC_L3WB); 980} 981 982void store_ulong8_L1WT_L3UC(global ulong8* it, int offset, ulong8 value) 983{ 984 __builtin_IB_lsc_store_global_ulong8(it, offset, value, LSC_STCC_L1WT_L3UC); 985} 986 987void store_ulong8_L1WT_L3WB(global ulong8* it, int offset, ulong8 value) 988{ 989 __builtin_IB_lsc_store_global_ulong8(it, offset, value, LSC_STCC_L1WT_L3WB); 990} 991 992void store_ulong8_L1S_L3UC(global ulong8* it, int offset, ulong8 value) 993{ 994 __builtin_IB_lsc_store_global_ulong8(it, offset, value, LSC_STCC_L1S_L3UC); 995} 996 997void store_ulong8_L1S_L3WB(global ulong8* it, int offset, ulong8 value) 998{ 999 __builtin_IB_lsc_store_global_ulong8(it, offset, value, LSC_STCC_L1S_L3WB); 1000} 1001 1002void store_ulong8_L1WB_L3WB(global ulong8* it, int offset, ulong8 value) 1003{ 1004 __builtin_IB_lsc_store_global_ulong8(it, offset, value, LSC_STCC_L1WB_L3WB); 1005} 1006 1007// LSC Fence support 1008void mem_fence_gpu_default() 1009{ 1010 __builtin_IB_lsc_fence_global_untyped(LSC_FS_TILE, LSC_FT_DEFAULT); 1011} 1012 1013void mem_fence_workgroup_default() 1014{ 1015 __builtin_IB_lsc_fence_global_untyped(LSC_FS_THREAD_GROUP, LSC_FT_DEFAULT); 1016} 1017 1018void mem_fence_gpu_invalidate() 1019{ 1020 // NOTE: 'FS_TILE' is used here to avoid DG2 HW bug where L3 is needlessly flushed on a 'GPU' scope fence 1021 __builtin_IB_lsc_fence_global_untyped(LSC_FS_TILE, LSC_FT_INVALIDATE); 1022} 1023 1024void mem_fence_gpu_evict() 1025{ 1026 __builtin_IB_lsc_fence_global_untyped(LSC_FS_TILE, LSC_FT_EVICT); 1027} 1028 1029void mem_fence_evict_to_memory() 1030{ 1031 __builtin_IB_lsc_fence_global_untyped(LSC_FS_GPU, LSC_FT_EVICT); 1032 __builtin_IB_lsc_fence_global_untyped(LSC_FS_GPU, LSC_FT_L3); 1033} 1034