• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
2; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
3
4
5;; i8
6define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
7; PTX32: ld.global.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
8; PTX32: ret
9; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
10; PTX64: ret
11  %a = load i8 addrspace(1)* %ptr
12  ret i8 %a
13}
14
15define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
16; PTX32: ld.shared.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
17; PTX32: ret
18; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
19; PTX64: ret
20  %a = load i8 addrspace(3)* %ptr
21  ret i8 %a
22}
23
24define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
25; PTX32: ld.local.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
26; PTX32: ret
27; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
28; PTX64: ret
29  %a = load i8 addrspace(5)* %ptr
30  ret i8 %a
31}
32
33;; i16
34define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
35; PTX32: ld.global.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
36; PTX32: ret
37; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
38; PTX64: ret
39  %a = load i16 addrspace(1)* %ptr
40  ret i16 %a
41}
42
43define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
44; PTX32: ld.shared.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
45; PTX32: ret
46; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
47; PTX64: ret
48  %a = load i16 addrspace(3)* %ptr
49  ret i16 %a
50}
51
52define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
53; PTX32: ld.local.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
54; PTX32: ret
55; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
56; PTX64: ret
57  %a = load i16 addrspace(5)* %ptr
58  ret i16 %a
59}
60
61;; i32
62define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
63; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
64; PTX32: ret
65; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
66; PTX64: ret
67  %a = load i32 addrspace(1)* %ptr
68  ret i32 %a
69}
70
71define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
72; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
73; PTX32: ret
74; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
75; PTX64: ret
76  %a = load i32 addrspace(3)* %ptr
77  ret i32 %a
78}
79
80define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
81; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
82; PTX32: ret
83; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
84; PTX64: ret
85  %a = load i32 addrspace(5)* %ptr
86  ret i32 %a
87}
88
89;; i64
90define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
91; PTX32: ld.global.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
92; PTX32: ret
93; PTX64: ld.global.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
94; PTX64: ret
95  %a = load i64 addrspace(1)* %ptr
96  ret i64 %a
97}
98
99define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
100; PTX32: ld.shared.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
101; PTX32: ret
102; PTX64: ld.shared.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
103; PTX64: ret
104  %a = load i64 addrspace(3)* %ptr
105  ret i64 %a
106}
107
108define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
109; PTX32: ld.local.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
110; PTX32: ret
111; PTX64: ld.local.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
112; PTX64: ret
113  %a = load i64 addrspace(5)* %ptr
114  ret i64 %a
115}
116
117;; f32
118define float @ld_global_f32(float addrspace(1)* %ptr) {
119; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
120; PTX32: ret
121; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
122; PTX64: ret
123  %a = load float addrspace(1)* %ptr
124  ret float %a
125}
126
127define float @ld_shared_f32(float addrspace(3)* %ptr) {
128; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
129; PTX32: ret
130; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
131; PTX64: ret
132  %a = load float addrspace(3)* %ptr
133  ret float %a
134}
135
136define float @ld_local_f32(float addrspace(5)* %ptr) {
137; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
138; PTX32: ret
139; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
140; PTX64: ret
141  %a = load float addrspace(5)* %ptr
142  ret float %a
143}
144
145;; f64
146define double @ld_global_f64(double addrspace(1)* %ptr) {
147; PTX32: ld.global.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
148; PTX32: ret
149; PTX64: ld.global.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
150; PTX64: ret
151  %a = load double addrspace(1)* %ptr
152  ret double %a
153}
154
155define double @ld_shared_f64(double addrspace(3)* %ptr) {
156; PTX32: ld.shared.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
157; PTX32: ret
158; PTX64: ld.shared.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
159; PTX64: ret
160  %a = load double addrspace(3)* %ptr
161  ret double %a
162}
163
164define double @ld_local_f64(double addrspace(5)* %ptr) {
165; PTX32: ld.local.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
166; PTX32: ret
167; PTX64: ld.local.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
168; PTX64: ret
169  %a = load double addrspace(5)* %ptr
170  ret double %a
171}
172