• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=nvptx -mcpu=sm_20 -disable-nvptx-favor-non-generic | FileCheck %s -check-prefix=PTX32
2; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -disable-nvptx-favor-non-generic | FileCheck %s -check-prefix=PTX64
3
4
5define i32 @conv1(i32 addrspace(1)* %ptr) {
6; PTX32: conv1
7; PTX32: cvta.global.u32
8; PTX32: ld.u32
9; PTX64: conv1
10; PTX64: cvta.global.u64
11; PTX64: ld.u32
12  %genptr = addrspacecast i32 addrspace(1)* %ptr to i32*
13  %val = load i32, i32* %genptr
14  ret i32 %val
15}
16
17define i32 @conv2(i32 addrspace(3)* %ptr) {
18; PTX32: conv2
19; PTX32: cvta.shared.u32
20; PTX32: ld.u32
21; PTX64: conv2
22; PTX64: cvta.shared.u64
23; PTX64: ld.u32
24  %genptr = addrspacecast i32 addrspace(3)* %ptr to i32*
25  %val = load i32, i32* %genptr
26  ret i32 %val
27}
28
29define i32 @conv3(i32 addrspace(4)* %ptr) {
30; PTX32: conv3
31; PTX32: cvta.const.u32
32; PTX32: ld.u32
33; PTX64: conv3
34; PTX64: cvta.const.u64
35; PTX64: ld.u32
36  %genptr = addrspacecast i32 addrspace(4)* %ptr to i32*
37  %val = load i32, i32* %genptr
38  ret i32 %val
39}
40
41define i32 @conv4(i32 addrspace(5)* %ptr) {
42; PTX32: conv4
43; PTX32: cvta.local.u32
44; PTX32: ld.u32
45; PTX64: conv4
46; PTX64: cvta.local.u64
47; PTX64: ld.u32
48  %genptr = addrspacecast i32 addrspace(5)* %ptr to i32*
49  %val = load i32, i32* %genptr
50  ret i32 %val
51}
52
53define i32 @conv5(i32* %ptr) {
54; PTX32: conv5
55; PTX32: cvta.to.global.u32
56; PTX32: ld.global.u32
57; PTX64: conv5
58; PTX64: cvta.to.global.u64
59; PTX64: ld.global.u32
60  %specptr = addrspacecast i32* %ptr to i32 addrspace(1)*
61  %val = load i32, i32 addrspace(1)* %specptr
62  ret i32 %val
63}
64
65define i32 @conv6(i32* %ptr) {
66; PTX32: conv6
67; PTX32: cvta.to.shared.u32
68; PTX32: ld.shared.u32
69; PTX64: conv6
70; PTX64: cvta.to.shared.u64
71; PTX64: ld.shared.u32
72  %specptr = addrspacecast i32* %ptr to i32 addrspace(3)*
73  %val = load i32, i32 addrspace(3)* %specptr
74  ret i32 %val
75}
76
77define i32 @conv7(i32* %ptr) {
78; PTX32: conv7
79; PTX32: cvta.to.const.u32
80; PTX32: ld.const.u32
81; PTX64: conv7
82; PTX64: cvta.to.const.u64
83; PTX64: ld.const.u32
84  %specptr = addrspacecast i32* %ptr to i32 addrspace(4)*
85  %val = load i32, i32 addrspace(4)* %specptr
86  ret i32 %val
87}
88
89define i32 @conv8(i32* %ptr) {
90; PTX32: conv8
91; PTX32: cvta.to.local.u32
92; PTX32: ld.local.u32
93; PTX64: conv8
94; PTX64: cvta.to.local.u64
95; PTX64: ld.local.u32
96  %specptr = addrspacecast i32* %ptr to i32 addrspace(5)*
97  %val = load i32, i32 addrspace(5)* %specptr
98  ret i32 %val
99}
100