1; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s 2; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s 3; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-intr-range \ 4; RUN: | FileCheck --check-prefix=RANGE --check-prefix=RANGE_20 %s 5; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \ 6; RUN: -nvvm-intr-range -nvvm-intr-range-sm=30 \ 7; RUN: | FileCheck --check-prefix=RANGE --check-prefix=RANGE_30 %s 8 9define ptx_device i32 @test_tid_x() { 10; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x; 11; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]] 12; CHECK: ret; 13 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 14 ret i32 %x 15} 16 17define ptx_device i32 @test_tid_y() { 18; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y; 19; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.y(), !range ![[BLK_IDX_XY]] 20; CHECK: ret; 21 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() 22 ret i32 %x 23} 24 25define ptx_device i32 @test_tid_z() { 26; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z; 27; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]] 28; CHECK: ret; 29 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() 30 ret i32 %x 31} 32 33define ptx_device i32 @test_tid_w() { 34; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w; 35; CHECK: ret; 36 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.w() 37 ret i32 %x 38} 39 40define ptx_device i32 @test_ntid_x() { 41; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x; 42; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]] 43; CHECK: ret; 44 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 45 ret i32 %x 46} 47 48define ptx_device i32 @test_ntid_y() { 49; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y; 50; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y(), !range ![[BLK_SIZE_XY]] 51; CHECK: ret; 52 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() 53 ret i32 %x 54} 55 56define ptx_device i32 @test_ntid_z() { 57; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z; 58; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]] 59; CHECK: ret; 60 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() 61 ret i32 %x 62} 63 64define ptx_device i32 @test_ntid_w() { 65; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w; 66; CHECK: ret; 67 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.w() 68 ret i32 %x 69} 70 71define ptx_device i32 @test_laneid() { 72; CHECK: mov.u32 %r{{[0-9]+}}, %laneid; 73; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.laneid(), !range ![[LANEID:[0-9]+]] 74; CHECK: ret; 75 %x = call i32 @llvm.nvvm.read.ptx.sreg.laneid() 76 ret i32 %x 77} 78 79define ptx_device i32 @test_warpsize() { 80; CHECK: mov.u32 %r{{[0-9]+}}, WARP_SZ; 81; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.warpsize(), !range ![[WARPSIZE:[0-9]+]] 82; CHECK: ret; 83 %x = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 84 ret i32 %x 85} 86 87define ptx_device i32 @test_warpid() { 88; CHECK: mov.u32 %r{{[0-9]+}}, %warpid; 89; CHECK: ret; 90 %x = call i32 @llvm.nvvm.read.ptx.sreg.warpid() 91 ret i32 %x 92} 93 94define ptx_device i32 @test_nwarpid() { 95; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid; 96; CHECK: ret; 97 %x = call i32 @llvm.nvvm.read.ptx.sreg.nwarpid() 98 ret i32 %x 99} 100 101define ptx_device i32 @test_ctaid_y() { 102; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y; 103; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]] 104; CHECK: ret; 105 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() 106 ret i32 %x 107} 108 109define ptx_device i32 @test_ctaid_z() { 110; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z; 111; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z(), !range ![[GRID_IDX_YZ]] 112; CHECK: ret; 113 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() 114 ret i32 %x 115} 116 117define ptx_device i32 @test_ctaid_x() { 118; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x; 119; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]] 120; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_YZ]] 121; CHECK: ret; 122 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() 123 ret i32 %x 124} 125 126define ptx_device i32 @test_ctaid_w() { 127; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w; 128; CHECK: ret; 129 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w() 130 ret i32 %x 131} 132 133define ptx_device i32 @test_nctaid_y() { 134; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y; 135; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]] 136; CHECK: ret; 137 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() 138 ret i32 %x 139} 140 141define ptx_device i32 @test_nctaid_z() { 142; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z; 143; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z(), !range ![[GRID_SIZE_YZ]] 144; CHECK: ret; 145 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() 146 ret i32 %x 147} 148 149define ptx_device i32 @test_nctaid_x() { 150; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x; 151; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]] 152; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_YZ]] 153; CHECK: ret; 154 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() 155 ret i32 %x 156} 157 158 159define ptx_device i32 @test_nctaid_w() { 160; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w; 161; CHECK: ret; 162 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w() 163 ret i32 %x 164} 165 166define ptx_device i32 @test_smid() { 167; CHECK: mov.u32 %r{{[0-9]+}}, %smid; 168; CHECK: ret; 169 %x = call i32 @llvm.nvvm.read.ptx.sreg.smid() 170 ret i32 %x 171} 172 173define ptx_device i32 @test_nsmid() { 174; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid; 175; CHECK: ret; 176 %x = call i32 @llvm.nvvm.read.ptx.sreg.nsmid() 177 ret i32 %x 178} 179 180define ptx_device i32 @test_gridid() { 181; CHECK: mov.u32 %r{{[0-9]+}}, %gridid; 182; CHECK: ret; 183 %x = call i32 @llvm.nvvm.read.ptx.sreg.gridid() 184 ret i32 %x 185} 186 187define ptx_device i32 @test_lanemask_eq() { 188; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq; 189; CHECK: ret; 190 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq() 191 ret i32 %x 192} 193 194define ptx_device i32 @test_lanemask_le() { 195; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le; 196; CHECK: ret; 197 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le() 198 ret i32 %x 199} 200 201define ptx_device i32 @test_lanemask_lt() { 202; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt; 203; CHECK: ret; 204 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt() 205 ret i32 %x 206} 207 208define ptx_device i32 @test_lanemask_ge() { 209; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge; 210; CHECK: ret; 211 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge() 212 ret i32 %x 213} 214 215define ptx_device i32 @test_lanemask_gt() { 216; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt; 217; CHECK: ret; 218 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt() 219 ret i32 %x 220} 221 222define ptx_device i32 @test_clock() { 223; CHECK: mov.u32 %r{{[0-9]+}}, %clock; 224; CHECK: ret; 225 %x = call i32 @llvm.nvvm.read.ptx.sreg.clock() 226 ret i32 %x 227} 228 229define ptx_device i64 @test_clock64() { 230; CHECK: mov.u64 %rd{{[0-9]+}}, %clock64; 231; CHECK: ret; 232 %x = call i64 @llvm.nvvm.read.ptx.sreg.clock64() 233 ret i64 %x 234} 235 236define ptx_device i32 @test_pm0() { 237; CHECK: mov.u32 %r{{[0-9]+}}, %pm0; 238; CHECK: ret; 239 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm0() 240 ret i32 %x 241} 242 243define ptx_device i32 @test_pm1() { 244; CHECK: mov.u32 %r{{[0-9]+}}, %pm1; 245; CHECK: ret; 246 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm1() 247 ret i32 %x 248} 249 250define ptx_device i32 @test_pm2() { 251; CHECK: mov.u32 %r{{[0-9]+}}, %pm2; 252; CHECK: ret; 253 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm2() 254 ret i32 %x 255} 256 257define ptx_device i32 @test_pm3() { 258; CHECK: mov.u32 %r{{[0-9]+}}, %pm3; 259; CHECK: ret; 260 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm3() 261 ret i32 %x 262} 263 264define ptx_device void @test_bar_sync() { 265; CHECK: bar.sync 0 266; CHECK: ret; 267 call void @llvm.nvvm.bar.sync(i32 0) 268 ret void 269} 270 271declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() 272declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() 273declare i32 @llvm.nvvm.read.ptx.sreg.tid.z() 274declare i32 @llvm.nvvm.read.ptx.sreg.tid.w() 275declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 276declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y() 277declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z() 278declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w() 279 280declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() 281declare i32 @llvm.nvvm.read.ptx.sreg.laneid() 282declare i32 @llvm.nvvm.read.ptx.sreg.warpid() 283declare i32 @llvm.nvvm.read.ptx.sreg.nwarpid() 284 285declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() 286declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() 287declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() 288declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.w() 289declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() 290declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() 291declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() 292declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w() 293 294declare i32 @llvm.nvvm.read.ptx.sreg.smid() 295declare i32 @llvm.nvvm.read.ptx.sreg.nsmid() 296declare i32 @llvm.nvvm.read.ptx.sreg.gridid() 297 298declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq() 299declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.le() 300declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt() 301declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge() 302declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt() 303 304declare i32 @llvm.nvvm.read.ptx.sreg.clock() 305declare i64 @llvm.nvvm.read.ptx.sreg.clock64() 306 307declare i32 @llvm.nvvm.read.ptx.sreg.pm0() 308declare i32 @llvm.nvvm.read.ptx.sreg.pm1() 309declare i32 @llvm.nvvm.read.ptx.sreg.pm2() 310declare i32 @llvm.nvvm.read.ptx.sreg.pm3() 311 312declare void @llvm.nvvm.bar.sync(i32 %i) 313 314; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024} 315; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64} 316; RANGE-DAG: ![[BLK_SIZE_XY]] = !{i32 1, i32 1025} 317; RANGE-DAG: ![[BLK_SIZE_Z]] = !{i32 1, i32 65} 318; RANGE-DAG: ![[LANEID]] = !{i32 0, i32 32} 319; RANGE-DAG: ![[WARPSIZE]] = !{i32 32, i32 33} 320; RANGE_30-DAG: ![[GRID_IDX_X]] = !{i32 0, i32 2147483647} 321; RANGE-DAG: ![[GRID_IDX_YZ]] = !{i32 0, i32 65535} 322; RANGE_30-DAG: ![[GRID_SIZE_X]] = !{i32 1, i32 -2147483648} 323; RANGE-DAG: ![[GRID_SIZE_YZ]] = !{i32 1, i32 65536} 324