1; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32 2; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 3; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64 4; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 5 6 7;; i8 8define i8 @ld_global_i8(i8 addrspace(1)* %ptr) { 9; PTX32: ld.global.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}] 10; PTX32: ret 11; PTX64: ld.global.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}] 12; PTX64: ret 13 %a = load i8 addrspace(1)* %ptr 14 ret i8 %a 15} 16 17define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) { 18; PTX32: ld.shared.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}] 19; PTX32: ret 20; PTX64: ld.shared.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}] 21; PTX64: ret 22 %a = load i8 addrspace(3)* %ptr 23 ret i8 %a 24} 25 26define i8 @ld_local_i8(i8 addrspace(5)* %ptr) { 27; PTX32: ld.local.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}] 28; PTX32: ret 29; PTX64: ld.local.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}] 30; PTX64: ret 31 %a = load i8 addrspace(5)* %ptr 32 ret i8 %a 33} 34 35;; i16 36define i16 @ld_global_i16(i16 addrspace(1)* %ptr) { 37; PTX32: ld.global.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] 38; PTX32: ret 39; PTX64: ld.global.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] 40; PTX64: ret 41 %a = load i16 addrspace(1)* %ptr 42 ret i16 %a 43} 44 45define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) { 46; PTX32: ld.shared.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] 47; PTX32: ret 48; PTX64: ld.shared.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] 49; PTX64: ret 50 %a = load i16 addrspace(3)* %ptr 51 ret i16 %a 52} 53 54define i16 @ld_local_i16(i16 addrspace(5)* %ptr) { 55; PTX32: ld.local.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}] 56; PTX32: ret 57; PTX64: ld.local.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}] 58; PTX64: ret 59 %a = load i16 addrspace(5)* %ptr 60 ret i16 %a 61} 62 63;; i32 64define i32 @ld_global_i32(i32 addrspace(1)* %ptr) { 65; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}] 66; PTX32: ret 67; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}] 68; PTX64: ret 69 %a = load i32 addrspace(1)* %ptr 70 ret i32 %a 71} 72 73define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) { 74; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}] 75; PTX32: ret 76; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}] 77; PTX64: ret 78 %a = load i32 addrspace(3)* %ptr 79 ret i32 %a 80} 81 82define i32 @ld_local_i32(i32 addrspace(5)* %ptr) { 83; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}] 84; PTX32: ret 85; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}] 86; PTX64: ret 87 %a = load i32 addrspace(5)* %ptr 88 ret i32 %a 89} 90 91;; i64 92define i64 @ld_global_i64(i64 addrspace(1)* %ptr) { 93; PTX32: ld.global.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}] 94; PTX32: ret 95; PTX64: ld.global.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}] 96; PTX64: ret 97 %a = load i64 addrspace(1)* %ptr 98 ret i64 %a 99} 100 101define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) { 102; PTX32: ld.shared.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}] 103; PTX32: ret 104; PTX64: ld.shared.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}] 105; PTX64: ret 106 %a = load i64 addrspace(3)* %ptr 107 ret i64 %a 108} 109 110define i64 @ld_local_i64(i64 addrspace(5)* %ptr) { 111; PTX32: ld.local.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}] 112; PTX32: ret 113; PTX64: ld.local.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}] 114; PTX64: ret 115 %a = load i64 addrspace(5)* %ptr 116 ret i64 %a 117} 118 119;; f32 120define float @ld_global_f32(float addrspace(1)* %ptr) { 121; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}] 122; PTX32: ret 123; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}] 124; PTX64: ret 125 %a = load float addrspace(1)* %ptr 126 ret float %a 127} 128 129define float @ld_shared_f32(float addrspace(3)* %ptr) { 130; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}] 131; PTX32: ret 132; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}] 133; PTX64: ret 134 %a = load float addrspace(3)* %ptr 135 ret float %a 136} 137 138define float @ld_local_f32(float addrspace(5)* %ptr) { 139; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}] 140; PTX32: ret 141; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}] 142; PTX64: ret 143 %a = load float addrspace(5)* %ptr 144 ret float %a 145} 146 147;; f64 148define double @ld_global_f64(double addrspace(1)* %ptr) { 149; PTX32: ld.global.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}] 150; PTX32: ret 151; PTX64: ld.global.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}] 152; PTX64: ret 153 %a = load double addrspace(1)* %ptr 154 ret double %a 155} 156 157define double @ld_shared_f64(double addrspace(3)* %ptr) { 158; PTX32: ld.shared.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}] 159; PTX32: ret 160; PTX64: ld.shared.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}] 161; PTX64: ret 162 %a = load double addrspace(3)* %ptr 163 ret double %a 164} 165 166define double @ld_local_f64(double addrspace(5)* %ptr) { 167; PTX32: ld.local.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}] 168; PTX32: ret 169; PTX64: ld.local.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}] 170; PTX64: ret 171 %a = load double addrspace(5)* %ptr 172 ret double %a 173} 174