• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
2; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
3; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-intr-range \
4; RUN:   | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s
5; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
6; RUN:    -nvvm-intr-range -nvvm-intr-range-sm=30 \
7; RUN:   | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s
8
9define ptx_device i32 @test_tid_x() {
10; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
11; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
12; CHECK: ret;
13	%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
14	ret i32 %x
15}
16
17define ptx_device i32 @test_tid_y() {
18; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
19; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.y(), !range ![[BLK_IDX_XY]]
20; CHECK: ret;
21	%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
22	ret i32 %x
23}
24
25define ptx_device i32 @test_tid_z() {
26; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
27; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
28; CHECK: ret;
29	%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
30	ret i32 %x
31}
32
33define ptx_device i32 @test_tid_w() {
34; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w;
35; CHECK: ret;
36	%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
37	ret i32 %x
38}
39
40define ptx_device i32 @test_ntid_x() {
41; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
42; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
43; CHECK: ret;
44	%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
45	ret i32 %x
46}
47
48define ptx_device i32 @test_ntid_y() {
49; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
50; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y(), !range ![[BLK_SIZE_XY]]
51; CHECK: ret;
52	%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
53	ret i32 %x
54}
55
56define ptx_device i32 @test_ntid_z() {
57; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
58; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
59; CHECK: ret;
60	%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
61	ret i32 %x
62}
63
64define ptx_device i32 @test_ntid_w() {
65; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w;
66; CHECK: ret;
67	%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
68	ret i32 %x
69}
70
71define ptx_device i32 @test_laneid() {
72; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
73; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.laneid(), !range ![[LANEID:[0-9]+]]
74; CHECK: ret;
75	%x = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
76	ret i32 %x
77}
78
79define ptx_device i32 @test_warpsize() {
80; CHECK: mov.u32 %r{{[0-9]+}}, WARP_SZ;
81; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.warpsize(), !range ![[WARPSIZE:[0-9]+]]
82; CHECK: ret;
83	%x = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
84	ret i32 %x
85}
86
87define ptx_device i32 @test_warpid() {
88; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;
89; CHECK: ret;
90	%x = call i32 @llvm.nvvm.read.ptx.sreg.warpid()
91	ret i32 %x
92}
93
94define ptx_device i32 @test_nwarpid() {
95; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid;
96; CHECK: ret;
97	%x = call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
98	ret i32 %x
99}
100
101define ptx_device i32 @test_ctaid_y() {
102; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
103; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
104; CHECK: ret;
105	%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
106	ret i32 %x
107}
108
109define ptx_device i32 @test_ctaid_z() {
110; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
111; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z(), !range ![[GRID_IDX_YZ]]
112; CHECK: ret;
113	%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
114	ret i32 %x
115}
116
117define ptx_device i32 @test_ctaid_x() {
118; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
119; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
120; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_YZ]]
121; CHECK: ret;
122	%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
123	ret i32 %x
124}
125
126define ptx_device i32 @test_ctaid_w() {
127; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
128; CHECK: ret;
129	%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
130	ret i32 %x
131}
132
133define ptx_device i32 @test_nctaid_y() {
134; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
135; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
136; CHECK: ret;
137	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
138	ret i32 %x
139}
140
141define ptx_device i32 @test_nctaid_z() {
142; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
143; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z(), !range ![[GRID_SIZE_YZ]]
144; CHECK: ret;
145	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
146	ret i32 %x
147}
148
149define ptx_device i32 @test_nctaid_x() {
150; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
151; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
152; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_YZ]]
153; CHECK: ret;
154	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
155	ret i32 %x
156}
157
158define ptx_device i32 @test_already_has_range_md() {
159; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
160; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[ALREADY:[0-9]+]]
161	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range !0
162	ret i32 %x
163}
164
165
166define ptx_device i32 @test_nctaid_w() {
167; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;
168; CHECK: ret;
169	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
170	ret i32 %x
171}
172
173define ptx_device i32 @test_smid() {
174; CHECK: mov.u32 %r{{[0-9]+}}, %smid;
175; CHECK: ret;
176	%x = call i32 @llvm.nvvm.read.ptx.sreg.smid()
177	ret i32 %x
178}
179
180define ptx_device i32 @test_nsmid() {
181; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid;
182; CHECK: ret;
183	%x = call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
184	ret i32 %x
185}
186
187define ptx_device i32 @test_gridid() {
188; CHECK: mov.u32 %r{{[0-9]+}}, %gridid;
189; CHECK: ret;
190	%x = call i32 @llvm.nvvm.read.ptx.sreg.gridid()
191	ret i32 %x
192}
193
194define ptx_device i32 @test_lanemask_eq() {
195; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq;
196; CHECK: ret;
197	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
198	ret i32 %x
199}
200
201define ptx_device i32 @test_lanemask_le() {
202; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le;
203; CHECK: ret;
204	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
205	ret i32 %x
206}
207
208define ptx_device i32 @test_lanemask_lt() {
209; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt;
210; CHECK: ret;
211	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
212	ret i32 %x
213}
214
215define ptx_device i32 @test_lanemask_ge() {
216; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge;
217; CHECK: ret;
218	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
219	ret i32 %x
220}
221
222define ptx_device i32 @test_lanemask_gt() {
223; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt;
224; CHECK: ret;
225	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
226	ret i32 %x
227}
228
229define ptx_device i32 @test_clock() {
230; CHECK: mov.u32 %r{{[0-9]+}}, %clock;
231; CHECK: ret;
232	%x = call i32 @llvm.nvvm.read.ptx.sreg.clock()
233	ret i32 %x
234}
235
236define ptx_device i64 @test_clock64() {
237; CHECK: mov.u64 %rd{{[0-9]+}}, %clock64;
238; CHECK: ret;
239	%x = call i64 @llvm.nvvm.read.ptx.sreg.clock64()
240	ret i64 %x
241}
242
243define ptx_device i32 @test_pm0() {
244; CHECK: mov.u32 %r{{[0-9]+}}, %pm0;
245; CHECK: ret;
246	%x = call i32 @llvm.nvvm.read.ptx.sreg.pm0()
247	ret i32 %x
248}
249
250define ptx_device i32 @test_pm1() {
251; CHECK: mov.u32 %r{{[0-9]+}}, %pm1;
252; CHECK: ret;
253	%x = call i32 @llvm.nvvm.read.ptx.sreg.pm1()
254	ret i32 %x
255}
256
257define ptx_device i32 @test_pm2() {
258; CHECK: mov.u32 %r{{[0-9]+}}, %pm2;
259; CHECK: ret;
260	%x = call i32 @llvm.nvvm.read.ptx.sreg.pm2()
261	ret i32 %x
262}
263
264define ptx_device i32 @test_pm3() {
265; CHECK: mov.u32 %r{{[0-9]+}}, %pm3;
266; CHECK: ret;
267	%x = call i32 @llvm.nvvm.read.ptx.sreg.pm3()
268	ret i32 %x
269}
270
271define ptx_device void @test_bar_sync() {
272; CHECK: bar.sync 0
273; CHECK: ret;
274	call void @llvm.nvvm.bar.sync(i32 0)
275	ret void
276}
277
278declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
279declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
280declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
281declare i32 @llvm.nvvm.read.ptx.sreg.tid.w()
282declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
283declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
284declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
285declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
286
287declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
288declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
289declare i32 @llvm.nvvm.read.ptx.sreg.warpid()
290declare i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
291
292declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
293declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
294declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
295declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
296declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
297declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
298declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
299declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
300
301declare i32 @llvm.nvvm.read.ptx.sreg.smid()
302declare i32 @llvm.nvvm.read.ptx.sreg.nsmid()
303declare i32 @llvm.nvvm.read.ptx.sreg.gridid()
304
305declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
306declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
307declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
308declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
309declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
310
311declare i32 @llvm.nvvm.read.ptx.sreg.clock()
312declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
313
314declare i32 @llvm.nvvm.read.ptx.sreg.pm0()
315declare i32 @llvm.nvvm.read.ptx.sreg.pm1()
316declare i32 @llvm.nvvm.read.ptx.sreg.pm2()
317declare i32 @llvm.nvvm.read.ptx.sreg.pm3()
318
319declare void @llvm.nvvm.bar.sync(i32 %i)
320
321!0 = !{i32 0, i32 19}
322; RANGE-DAG: ![[ALREADY]] = !{i32 0, i32 19}
323; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
324; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
325; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64}
326; RANGE-DAG: ![[BLK_SIZE_XY]] = !{i32 1, i32 1025}
327; RANGE-DAG: ![[BLK_SIZE_Z]] = !{i32 1, i32 65}
328; RANGE-DAG: ![[LANEID]] = !{i32 0, i32 32}
329; RANGE-DAG: ![[WARPSIZE]] = !{i32 32, i32 33}
330; RANGE_30-DAG: ![[GRID_IDX_X]] = !{i32 0, i32 2147483647}
331; RANGE-DAG: ![[GRID_IDX_YZ]] = !{i32 0, i32 65535}
332; RANGE_30-DAG: ![[GRID_SIZE_X]] = !{i32 1, i32 -2147483648}
333; RANGE-DAG: ![[GRID_SIZE_YZ]] = !{i32 1, i32 65536}
334