• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
2; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
3; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
4; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
5
6
7;; i8
8define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
9; PTX32: ld.global.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
10; PTX32: ret
11; PTX64: ld.global.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
12; PTX64: ret
13  %a = load i8 addrspace(1)* %ptr
14  ret i8 %a
15}
16
17define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
18; PTX32: ld.shared.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
19; PTX32: ret
20; PTX64: ld.shared.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
21; PTX64: ret
22  %a = load i8 addrspace(3)* %ptr
23  ret i8 %a
24}
25
26define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
27; PTX32: ld.local.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
28; PTX32: ret
29; PTX64: ld.local.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
30; PTX64: ret
31  %a = load i8 addrspace(5)* %ptr
32  ret i8 %a
33}
34
35;; i16
36define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
37; PTX32: ld.global.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
38; PTX32: ret
39; PTX64: ld.global.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
40; PTX64: ret
41  %a = load i16 addrspace(1)* %ptr
42  ret i16 %a
43}
44
45define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
46; PTX32: ld.shared.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
47; PTX32: ret
48; PTX64: ld.shared.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
49; PTX64: ret
50  %a = load i16 addrspace(3)* %ptr
51  ret i16 %a
52}
53
54define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
55; PTX32: ld.local.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
56; PTX32: ret
57; PTX64: ld.local.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
58; PTX64: ret
59  %a = load i16 addrspace(5)* %ptr
60  ret i16 %a
61}
62
63;; i32
64define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
65; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
66; PTX32: ret
67; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
68; PTX64: ret
69  %a = load i32 addrspace(1)* %ptr
70  ret i32 %a
71}
72
73define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
74; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
75; PTX32: ret
76; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
77; PTX64: ret
78  %a = load i32 addrspace(3)* %ptr
79  ret i32 %a
80}
81
82define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
83; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
84; PTX32: ret
85; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
86; PTX64: ret
87  %a = load i32 addrspace(5)* %ptr
88  ret i32 %a
89}
90
91;; i64
92define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
93; PTX32: ld.global.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
94; PTX32: ret
95; PTX64: ld.global.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
96; PTX64: ret
97  %a = load i64 addrspace(1)* %ptr
98  ret i64 %a
99}
100
101define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
102; PTX32: ld.shared.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
103; PTX32: ret
104; PTX64: ld.shared.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
105; PTX64: ret
106  %a = load i64 addrspace(3)* %ptr
107  ret i64 %a
108}
109
110define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
111; PTX32: ld.local.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
112; PTX32: ret
113; PTX64: ld.local.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
114; PTX64: ret
115  %a = load i64 addrspace(5)* %ptr
116  ret i64 %a
117}
118
119;; f32
120define float @ld_global_f32(float addrspace(1)* %ptr) {
121; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
122; PTX32: ret
123; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
124; PTX64: ret
125  %a = load float addrspace(1)* %ptr
126  ret float %a
127}
128
129define float @ld_shared_f32(float addrspace(3)* %ptr) {
130; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
131; PTX32: ret
132; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
133; PTX64: ret
134  %a = load float addrspace(3)* %ptr
135  ret float %a
136}
137
138define float @ld_local_f32(float addrspace(5)* %ptr) {
139; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
140; PTX32: ret
141; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
142; PTX64: ret
143  %a = load float addrspace(5)* %ptr
144  ret float %a
145}
146
147;; f64
148define double @ld_global_f64(double addrspace(1)* %ptr) {
149; PTX32: ld.global.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
150; PTX32: ret
151; PTX64: ld.global.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
152; PTX64: ret
153  %a = load double addrspace(1)* %ptr
154  ret double %a
155}
156
157define double @ld_shared_f64(double addrspace(3)* %ptr) {
158; PTX32: ld.shared.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
159; PTX32: ret
160; PTX64: ld.shared.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
161; PTX64: ret
162  %a = load double addrspace(3)* %ptr
163  ret double %a
164}
165
166define double @ld_local_f64(double addrspace(5)* %ptr) {
167; PTX32: ld.local.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
168; PTX32: ret
169; PTX64: ld.local.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
170; PTX64: ret
171  %a = load double addrspace(5)* %ptr
172  ret double %a
173}
174