1#include <clc/clc.h> 2 3void __clc_amdgcn_s_waitcnt(unsigned flags); 4 5// s_waitcnt takes 16bit argument with a combined number of maximum allowed 6// pending operations: 7// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages 8// [7] -- undefined 9// [6:4] -- exports, GDS, and mem write 10// [3:0] -- vector memory operations 11 12// Newer clang supports __builtin_amdgcn_s_waitcnt 13#if __clang_major__ >= 5 14# define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x) 15#else 16# define __waitcnt(x) __clc_amdgcn_s_waitcnt(x) 17_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt"); 18#endif 19 20_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { 21 if (flags & CLK_GLOBAL_MEM_FENCE) { 22 // scalar loads are counted with LGKM but we don't know whether 23 // the compiler turned any loads to scalar 24 __waitcnt(0); 25 } else if (flags & CLK_LOCAL_MEM_FENCE) 26 __waitcnt(0xff); // LGKM is [12:8] 27} 28#undef __waitcnt 29 30// We don't have separate mechanism for read and write fences 31_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) { 32 mem_fence(flags); 33} 34 35_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) { 36 mem_fence(flags); 37} 38