1; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s 2; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s 3; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-intr-range \ 4; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s 5; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \ 6; RUN: -nvvm-intr-range -nvvm-intr-range-sm=30 \ 7; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s 8 9define ptx_device i32 @test_tid_x() { 10; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x; 11; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]] 12; CHECK: ret; 13 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 14 ret i32 %x 15} 16 17define ptx_device i32 @test_tid_y() { 18; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y; 19; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.y(), !range ![[BLK_IDX_XY]] 20; CHECK: ret; 21 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() 22 ret i32 %x 23} 24 25define ptx_device i32 @test_tid_z() { 26; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z; 27; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]] 28; CHECK: ret; 29 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() 30 ret i32 %x 31} 32 33define ptx_device i32 @test_tid_w() { 34; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w; 35; CHECK: ret; 36 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.w() 37 ret i32 %x 38} 39 40define ptx_device i32 @test_ntid_x() { 41; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x; 42; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]] 43; CHECK: ret; 44 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 45 ret i32 %x 46} 47 48define ptx_device i32 @test_ntid_y() { 49; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y; 50; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y(), !range ![[BLK_SIZE_XY]] 51; CHECK: ret; 52 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() 53 ret i32 %x 54} 55 56define ptx_device i32 @test_ntid_z() { 57; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z; 58; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]] 59; CHECK: ret; 60 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() 61 ret i32 %x 62} 63 64define ptx_device i32 @test_ntid_w() { 65; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w; 66; CHECK: ret; 67 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.w() 68 ret i32 %x 69} 70 71define ptx_device i32 @test_laneid() { 72; CHECK: mov.u32 %r{{[0-9]+}}, %laneid; 73; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.laneid(), !range ![[LANEID:[0-9]+]] 74; CHECK: ret; 75 %x = call i32 @llvm.nvvm.read.ptx.sreg.laneid() 76 ret i32 %x 77} 78 79define ptx_device i32 @test_warpsize() { 80; CHECK: mov.u32 %r{{[0-9]+}}, WARP_SZ; 81; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.warpsize(), !range ![[WARPSIZE:[0-9]+]] 82; CHECK: ret; 83 %x = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 84 ret i32 %x 85} 86 87define ptx_device i32 @test_warpid() { 88; CHECK: mov.u32 %r{{[0-9]+}}, %warpid; 89; CHECK: ret; 90 %x = call i32 @llvm.nvvm.read.ptx.sreg.warpid() 91 ret i32 %x 92} 93 94define ptx_device i32 @test_nwarpid() { 95; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid; 96; CHECK: ret; 97 %x = call i32 @llvm.nvvm.read.ptx.sreg.nwarpid() 98 ret i32 %x 99} 100 101define ptx_device i32 @test_ctaid_y() { 102; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y; 103; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]] 104; CHECK: ret; 105 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() 106 ret i32 %x 107} 108 109define ptx_device i32 @test_ctaid_z() { 110; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z; 111; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z(), !range ![[GRID_IDX_YZ]] 112; CHECK: ret; 113 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() 114 ret i32 %x 115} 116 117define ptx_device i32 @test_ctaid_x() { 118; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x; 119; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]] 120; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_YZ]] 121; CHECK: ret; 122 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() 123 ret i32 %x 124} 125 126define ptx_device i32 @test_ctaid_w() { 127; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w; 128; CHECK: ret; 129 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w() 130 ret i32 %x 131} 132 133define ptx_device i32 @test_nctaid_y() { 134; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y; 135; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]] 136; CHECK: ret; 137 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() 138 ret i32 %x 139} 140 141define ptx_device i32 @test_nctaid_z() { 142; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z; 143; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z(), !range ![[GRID_SIZE_YZ]] 144; CHECK: ret; 145 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() 146 ret i32 %x 147} 148 149define ptx_device i32 @test_nctaid_x() { 150; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x; 151; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]] 152; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_YZ]] 153; CHECK: ret; 154 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() 155 ret i32 %x 156} 157 158define ptx_device i32 @test_already_has_range_md() { 159; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x; 160; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[ALREADY:[0-9]+]] 161 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range !0 162 ret i32 %x 163} 164 165 166define ptx_device i32 @test_nctaid_w() { 167; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w; 168; CHECK: ret; 169 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w() 170 ret i32 %x 171} 172 173define ptx_device i32 @test_smid() { 174; CHECK: mov.u32 %r{{[0-9]+}}, %smid; 175; CHECK: ret; 176 %x = call i32 @llvm.nvvm.read.ptx.sreg.smid() 177 ret i32 %x 178} 179 180define ptx_device i32 @test_nsmid() { 181; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid; 182; CHECK: ret; 183 %x = call i32 @llvm.nvvm.read.ptx.sreg.nsmid() 184 ret i32 %x 185} 186 187define ptx_device i32 @test_gridid() { 188; CHECK: mov.u32 %r{{[0-9]+}}, %gridid; 189; CHECK: ret; 190 %x = call i32 @llvm.nvvm.read.ptx.sreg.gridid() 191 ret i32 %x 192} 193 194define ptx_device i32 @test_lanemask_eq() { 195; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq; 196; CHECK: ret; 197 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq() 198 ret i32 %x 199} 200 201define ptx_device i32 @test_lanemask_le() { 202; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le; 203; CHECK: ret; 204 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le() 205 ret i32 %x 206} 207 208define ptx_device i32 @test_lanemask_lt() { 209; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt; 210; CHECK: ret; 211 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt() 212 ret i32 %x 213} 214 215define ptx_device i32 @test_lanemask_ge() { 216; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge; 217; CHECK: ret; 218 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge() 219 ret i32 %x 220} 221 222define ptx_device i32 @test_lanemask_gt() { 223; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt; 224; CHECK: ret; 225 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt() 226 ret i32 %x 227} 228 229define ptx_device i32 @test_clock() { 230; CHECK: mov.u32 %r{{[0-9]+}}, %clock; 231; CHECK: ret; 232 %x = call i32 @llvm.nvvm.read.ptx.sreg.clock() 233 ret i32 %x 234} 235 236define ptx_device i64 @test_clock64() { 237; CHECK: mov.u64 %rd{{[0-9]+}}, %clock64; 238; CHECK: ret; 239 %x = call i64 @llvm.nvvm.read.ptx.sreg.clock64() 240 ret i64 %x 241} 242 243define ptx_device i32 @test_pm0() { 244; CHECK: mov.u32 %r{{[0-9]+}}, %pm0; 245; CHECK: ret; 246 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm0() 247 ret i32 %x 248} 249 250define ptx_device i32 @test_pm1() { 251; CHECK: mov.u32 %r{{[0-9]+}}, %pm1; 252; CHECK: ret; 253 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm1() 254 ret i32 %x 255} 256 257define ptx_device i32 @test_pm2() { 258; CHECK: mov.u32 %r{{[0-9]+}}, %pm2; 259; CHECK: ret; 260 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm2() 261 ret i32 %x 262} 263 264define ptx_device i32 @test_pm3() { 265; CHECK: mov.u32 %r{{[0-9]+}}, %pm3; 266; CHECK: ret; 267 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm3() 268 ret i32 %x 269} 270 271define ptx_device void @test_bar_sync() { 272; CHECK: bar.sync 0 273; CHECK: ret; 274 call void @llvm.nvvm.bar.sync(i32 0) 275 ret void 276} 277 278declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() 279declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() 280declare i32 @llvm.nvvm.read.ptx.sreg.tid.z() 281declare i32 @llvm.nvvm.read.ptx.sreg.tid.w() 282declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 283declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y() 284declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z() 285declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w() 286 287declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() 288declare i32 @llvm.nvvm.read.ptx.sreg.laneid() 289declare i32 @llvm.nvvm.read.ptx.sreg.warpid() 290declare i32 @llvm.nvvm.read.ptx.sreg.nwarpid() 291 292declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() 293declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() 294declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() 295declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.w() 296declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() 297declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() 298declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() 299declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w() 300 301declare i32 @llvm.nvvm.read.ptx.sreg.smid() 302declare i32 @llvm.nvvm.read.ptx.sreg.nsmid() 303declare i32 @llvm.nvvm.read.ptx.sreg.gridid() 304 305declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq() 306declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.le() 307declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt() 308declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge() 309declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt() 310 311declare i32 @llvm.nvvm.read.ptx.sreg.clock() 312declare i64 @llvm.nvvm.read.ptx.sreg.clock64() 313 314declare i32 @llvm.nvvm.read.ptx.sreg.pm0() 315declare i32 @llvm.nvvm.read.ptx.sreg.pm1() 316declare i32 @llvm.nvvm.read.ptx.sreg.pm2() 317declare i32 @llvm.nvvm.read.ptx.sreg.pm3() 318 319declare void @llvm.nvvm.bar.sync(i32 %i) 320 321!0 = !{i32 0, i32 19} 322; RANGE-DAG: ![[ALREADY]] = !{i32 0, i32 19} 323; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024} 324; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024} 325; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64} 326; RANGE-DAG: ![[BLK_SIZE_XY]] = !{i32 1, i32 1025} 327; RANGE-DAG: ![[BLK_SIZE_Z]] = !{i32 1, i32 65} 328; RANGE-DAG: ![[LANEID]] = !{i32 0, i32 32} 329; RANGE-DAG: ![[WARPSIZE]] = !{i32 32, i32 33} 330; RANGE_30-DAG: ![[GRID_IDX_X]] = !{i32 0, i32 2147483647} 331; RANGE-DAG: ![[GRID_IDX_YZ]] = !{i32 0, i32 65535} 332; RANGE_30-DAG: ![[GRID_SIZE_X]] = !{i32 1, i32 -2147483648} 333; RANGE-DAG: ![[GRID_SIZE_YZ]] = !{i32 1, i32 65536} 334