1; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 2; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 3 4 5;; i8 6define i8 @ld_global_i8(i8 addrspace(1)* %ptr) { 7; PTX32: ld.global.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}] 8; PTX32: ret 9; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}] 10; PTX64: ret 11 %a = load i8 addrspace(1)* %ptr 12 ret i8 %a 13} 14 15define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) { 16; PTX32: ld.shared.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}] 17; PTX32: ret 18; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}] 19; PTX64: ret 20 %a = load i8 addrspace(3)* %ptr 21 ret i8 %a 22} 23 24define i8 @ld_local_i8(i8 addrspace(5)* %ptr) { 25; PTX32: ld.local.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}] 26; PTX32: ret 27; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}] 28; PTX64: ret 29 %a = load i8 addrspace(5)* %ptr 30 ret i8 %a 31} 32 33;; i16 34define i16 @ld_global_i16(i16 addrspace(1)* %ptr) { 35; PTX32: ld.global.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}] 36; PTX32: ret 37; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}] 38; PTX64: ret 39 %a = load i16 addrspace(1)* %ptr 40 ret i16 %a 41} 42 43define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) { 44; PTX32: ld.shared.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}] 45; PTX32: ret 46; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}] 47; PTX64: ret 48 %a = load i16 addrspace(3)* %ptr 49 ret i16 %a 50} 51 52define i16 @ld_local_i16(i16 addrspace(5)* %ptr) { 53; PTX32: ld.local.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}] 54; PTX32: ret 55; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}] 56; PTX64: ret 57 %a = load i16 addrspace(5)* %ptr 58 ret i16 %a 59} 60 61;; i32 62define i32 @ld_global_i32(i32 addrspace(1)* %ptr) { 63; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}] 64; PTX32: ret 65; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}] 66; PTX64: ret 67 %a = load i32 addrspace(1)* %ptr 68 ret i32 %a 69} 70 71define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) { 72; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}] 73; PTX32: ret 74; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}] 75; PTX64: ret 76 %a = load i32 addrspace(3)* %ptr 77 ret i32 %a 78} 79 80define i32 @ld_local_i32(i32 addrspace(5)* %ptr) { 81; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}] 82; PTX32: ret 83; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}] 84; PTX64: ret 85 %a = load i32 addrspace(5)* %ptr 86 ret i32 %a 87} 88 89;; i64 90define i64 @ld_global_i64(i64 addrspace(1)* %ptr) { 91; PTX32: ld.global.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}] 92; PTX32: ret 93; PTX64: ld.global.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}] 94; PTX64: ret 95 %a = load i64 addrspace(1)* %ptr 96 ret i64 %a 97} 98 99define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) { 100; PTX32: ld.shared.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}] 101; PTX32: ret 102; PTX64: ld.shared.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}] 103; PTX64: ret 104 %a = load i64 addrspace(3)* %ptr 105 ret i64 %a 106} 107 108define i64 @ld_local_i64(i64 addrspace(5)* %ptr) { 109; PTX32: ld.local.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}] 110; PTX32: ret 111; PTX64: ld.local.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}] 112; PTX64: ret 113 %a = load i64 addrspace(5)* %ptr 114 ret i64 %a 115} 116 117;; f32 118define float @ld_global_f32(float addrspace(1)* %ptr) { 119; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}] 120; PTX32: ret 121; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}] 122; PTX64: ret 123 %a = load float addrspace(1)* %ptr 124 ret float %a 125} 126 127define float @ld_shared_f32(float addrspace(3)* %ptr) { 128; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}] 129; PTX32: ret 130; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}] 131; PTX64: ret 132 %a = load float addrspace(3)* %ptr 133 ret float %a 134} 135 136define float @ld_local_f32(float addrspace(5)* %ptr) { 137; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}] 138; PTX32: ret 139; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}] 140; PTX64: ret 141 %a = load float addrspace(5)* %ptr 142 ret float %a 143} 144 145;; f64 146define double @ld_global_f64(double addrspace(1)* %ptr) { 147; PTX32: ld.global.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}] 148; PTX32: ret 149; PTX64: ld.global.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}] 150; PTX64: ret 151 %a = load double addrspace(1)* %ptr 152 ret double %a 153} 154 155define double @ld_shared_f64(double addrspace(3)* %ptr) { 156; PTX32: ld.shared.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}] 157; PTX32: ret 158; PTX64: ld.shared.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}] 159; PTX64: ret 160 %a = load double addrspace(3)* %ptr 161 ret double %a 162} 163 164define double @ld_local_f64(double addrspace(5)* %ptr) { 165; PTX32: ld.local.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}] 166; PTX32: ret 167; PTX64: ld.local.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}] 168; PTX64: ret 169 %a = load double addrspace(5)* %ptr 170 ret double %a 171} 172