1 // REQUIRES: nvptx-registered-target
2 // REQUIRES: nvptx64-registered-target
3 // RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
4 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
5
read_tid()6 int read_tid() {
7
8 // CHECK: call i32 @llvm.ptx.read.tid.x()
9 // CHECK: call i32 @llvm.ptx.read.tid.y()
10 // CHECK: call i32 @llvm.ptx.read.tid.z()
11 // CHECK: call i32 @llvm.ptx.read.tid.w()
12
13 int x = __builtin_ptx_read_tid_x();
14 int y = __builtin_ptx_read_tid_y();
15 int z = __builtin_ptx_read_tid_z();
16 int w = __builtin_ptx_read_tid_w();
17
18 return x + y + z + w;
19
20 }
21
read_ntid()22 int read_ntid() {
23
24 // CHECK: call i32 @llvm.ptx.read.ntid.x()
25 // CHECK: call i32 @llvm.ptx.read.ntid.y()
26 // CHECK: call i32 @llvm.ptx.read.ntid.z()
27 // CHECK: call i32 @llvm.ptx.read.ntid.w()
28
29 int x = __builtin_ptx_read_ntid_x();
30 int y = __builtin_ptx_read_ntid_y();
31 int z = __builtin_ptx_read_ntid_z();
32 int w = __builtin_ptx_read_ntid_w();
33
34 return x + y + z + w;
35
36 }
37
read_ctaid()38 int read_ctaid() {
39
40 // CHECK: call i32 @llvm.ptx.read.ctaid.x()
41 // CHECK: call i32 @llvm.ptx.read.ctaid.y()
42 // CHECK: call i32 @llvm.ptx.read.ctaid.z()
43 // CHECK: call i32 @llvm.ptx.read.ctaid.w()
44
45 int x = __builtin_ptx_read_ctaid_x();
46 int y = __builtin_ptx_read_ctaid_y();
47 int z = __builtin_ptx_read_ctaid_z();
48 int w = __builtin_ptx_read_ctaid_w();
49
50 return x + y + z + w;
51
52 }
53
read_nctaid()54 int read_nctaid() {
55
56 // CHECK: call i32 @llvm.ptx.read.nctaid.x()
57 // CHECK: call i32 @llvm.ptx.read.nctaid.y()
58 // CHECK: call i32 @llvm.ptx.read.nctaid.z()
59 // CHECK: call i32 @llvm.ptx.read.nctaid.w()
60
61 int x = __builtin_ptx_read_nctaid_x();
62 int y = __builtin_ptx_read_nctaid_y();
63 int z = __builtin_ptx_read_nctaid_z();
64 int w = __builtin_ptx_read_nctaid_w();
65
66 return x + y + z + w;
67
68 }
69
read_ids()70 int read_ids() {
71
72 // CHECK: call i32 @llvm.ptx.read.laneid()
73 // CHECK: call i32 @llvm.ptx.read.warpid()
74 // CHECK: call i32 @llvm.ptx.read.nwarpid()
75 // CHECK: call i32 @llvm.ptx.read.smid()
76 // CHECK: call i32 @llvm.ptx.read.nsmid()
77 // CHECK: call i32 @llvm.ptx.read.gridid()
78
79 int a = __builtin_ptx_read_laneid();
80 int b = __builtin_ptx_read_warpid();
81 int c = __builtin_ptx_read_nwarpid();
82 int d = __builtin_ptx_read_smid();
83 int e = __builtin_ptx_read_nsmid();
84 int f = __builtin_ptx_read_gridid();
85
86 return a + b + c + d + e + f;
87
88 }
89
read_lanemasks()90 int read_lanemasks() {
91
92 // CHECK: call i32 @llvm.ptx.read.lanemask.eq()
93 // CHECK: call i32 @llvm.ptx.read.lanemask.le()
94 // CHECK: call i32 @llvm.ptx.read.lanemask.lt()
95 // CHECK: call i32 @llvm.ptx.read.lanemask.ge()
96 // CHECK: call i32 @llvm.ptx.read.lanemask.gt()
97
98 int a = __builtin_ptx_read_lanemask_eq();
99 int b = __builtin_ptx_read_lanemask_le();
100 int c = __builtin_ptx_read_lanemask_lt();
101 int d = __builtin_ptx_read_lanemask_ge();
102 int e = __builtin_ptx_read_lanemask_gt();
103
104 return a + b + c + d + e;
105
106 }
107
108
read_clocks()109 long read_clocks() {
110
111 // CHECK: call i32 @llvm.ptx.read.clock()
112 // CHECK: call i64 @llvm.ptx.read.clock64()
113
114 int a = __builtin_ptx_read_clock();
115 long b = __builtin_ptx_read_clock64();
116
117 return (long)a + b;
118
119 }
120
read_pms()121 int read_pms() {
122
123 // CHECK: call i32 @llvm.ptx.read.pm0()
124 // CHECK: call i32 @llvm.ptx.read.pm1()
125 // CHECK: call i32 @llvm.ptx.read.pm2()
126 // CHECK: call i32 @llvm.ptx.read.pm3()
127
128 int a = __builtin_ptx_read_pm0();
129 int b = __builtin_ptx_read_pm1();
130 int c = __builtin_ptx_read_pm2();
131 int d = __builtin_ptx_read_pm3();
132
133 return a + b + c + d;
134
135 }
136
sync()137 void sync() {
138
139 // CHECK: call void @llvm.ptx.bar.sync(i32 0)
140
141 __builtin_ptx_bar_sync(0);
142
143 }
144
145
146 // NVVM intrinsics
147
148 // The idea is not to test all intrinsics, just that Clang is recognizing the
149 // builtins defined in BuiltinsNVPTX.def
nvvm_math(float f1,float f2,double d1,double d2)150 void nvvm_math(float f1, float f2, double d1, double d2) {
151 // CHECK: call float @llvm.nvvm.fmax.f
152 float t1 = __nvvm_fmax_f(f1, f2);
153 // CHECK: call float @llvm.nvvm.fmin.f
154 float t2 = __nvvm_fmin_f(f1, f2);
155 // CHECK: call float @llvm.nvvm.sqrt.rn.f
156 float t3 = __nvvm_sqrt_rn_f(f1);
157 // CHECK: call float @llvm.nvvm.rcp.rn.f
158 float t4 = __nvvm_rcp_rn_f(f2);
159
160 // CHECK: call double @llvm.nvvm.fmax.d
161 double td1 = __nvvm_fmax_d(d1, d2);
162 // CHECK: call double @llvm.nvvm.fmin.d
163 double td2 = __nvvm_fmin_d(d1, d2);
164 // CHECK: call double @llvm.nvvm.sqrt.rn.d
165 double td3 = __nvvm_sqrt_rn_d(d1);
166 // CHECK: call double @llvm.nvvm.rcp.rn.d
167 double td4 = __nvvm_rcp_rn_d(d2);
168 }
169