1; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 2; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s 3 4; GCN-LABEL: flat_inst_offset: 5; GFX9: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}] offset:4 6; GFX9: flat_store_dword v[{{[0-9:]+}}], v{{[0-9]+}} offset:4 7; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} 8; GFX10: flat_store_dword v[{{[0-9:]+}}], v{{[0-9]+}}{{$}} 9define void @flat_inst_offset(i32* nocapture %p) { 10 %gep = getelementptr inbounds i32, i32* %p, i64 1 11 %load = load i32, i32* %gep, align 4 12 %inc = add nsw i32 %load, 1 13 store i32 %inc, i32* %gep, align 4 14 ret void 15} 16 17; GCN-LABEL: global_inst_offset: 18; GCN: global_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}], off offset:4 19; GCN: global_store_dword v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:4 20define void @global_inst_offset(i32 addrspace(1)* nocapture %p) { 21 %gep = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 1 22 %load = load i32, i32 addrspace(1)* %gep, align 4 23 %inc = add nsw i32 %load, 1 24 store i32 %inc, i32 addrspace(1)* %gep, align 4 25 ret void 26} 27 28; GCN-LABEL: load_i16_lo: 29; GFX9: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}} 30; GFX10: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} 31define amdgpu_kernel void @load_i16_lo(i16* %arg, <2 x i16>* %out) { 32 %gep = getelementptr inbounds i16, i16* %arg, i32 4 33 %ld = load i16, i16* %gep, align 2 34 %vec = insertelement <2 x i16> <i16 undef, i16 0>, i16 %ld, i32 0 35 %v = add <2 x i16> %vec, %vec 36 store <2 x i16> %v, <2 x i16>* %out, align 4 37 ret void 38} 39 40; GCN-LABEL: load_i16_hi: 41; GFX9: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}} 42; GFX10: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} 43define amdgpu_kernel void @load_i16_hi(i16* %arg, <2 x i16>* %out) { 44 %gep = getelementptr inbounds i16, i16* %arg, i32 4 45 %ld = load i16, i16* %gep, align 2 46 %vec = insertelement <2 x i16> <i16 0, i16 undef>, i16 %ld, i32 1 47 %v = add <2 x i16> %vec, %vec 48 store <2 x i16> %v, <2 x i16>* %out, align 4 49 ret void 50} 51 52; GCN-LABEL: load_half_lo: 53; GFX9: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}} 54; GFX10: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} 55define amdgpu_kernel void @load_half_lo(half* %arg, <2 x half>* %out) { 56 %gep = getelementptr inbounds half, half* %arg, i32 4 57 %ld = load half, half* %gep, align 2 58 %vec = insertelement <2 x half> <half undef, half 0xH0000>, half %ld, i32 0 59 %v = fadd <2 x half> %vec, %vec 60 store <2 x half> %v, <2 x half>* %out, align 4 61 ret void 62} 63 64; GCN-LABEL: load_half_hi: 65; GFX9: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}} 66; GFX10: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} 67define amdgpu_kernel void @load_half_hi(half* %arg, <2 x half>* %out) { 68 %gep = getelementptr inbounds half, half* %arg, i32 4 69 %ld = load half, half* %gep, align 2 70 %vec = insertelement <2 x half> <half 0xH0000, half undef>, half %ld, i32 1 71 %v = fadd <2 x half> %vec, %vec 72 store <2 x half> %v, <2 x half>* %out, align 4 73 ret void 74} 75 76; GCN-LABEL: load_float_lo: 77; GFX9: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}] offset:16{{$}} 78; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} 79define amdgpu_kernel void @load_float_lo(float* %arg, float* %out) { 80 %gep = getelementptr inbounds float, float* %arg, i32 4 81 %ld = load float, float* %gep, align 4 82 %v = fadd float %ld, %ld 83 store float %v, float* %out, align 4 84 ret void 85} 86