1; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 3 4; When a frame index offset is more than 12-bits, make sure we don't store 5; it in mubuf's offset field. 6 7; Also, make sure we use the same register for storing the scratch buffer addresss 8; for both stores. This register is allocated by the register scavenger, so we 9; should be able to reuse the same regiser for each scratch buffer access. 10 11; GCN-LABEL: {{^}}legal_offset_fi: 12; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:4{{$}} 13; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8004 14; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offen{{$}} 15 16define amdgpu_kernel void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) { 17entry: 18 %scratch0 = alloca [8192 x i32], addrspace(5) 19 %scratch1 = alloca [8192 x i32], addrspace(5) 20 21 %scratchptr0 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 0 22 store i32 1, i32 addrspace(5)* %scratchptr0 23 24 %scratchptr1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 0 25 store i32 2, i32 addrspace(5)* %scratchptr1 26 27 %cmp = icmp eq i32 %cond, 0 28 br i1 %cmp, label %if, label %else 29 30if: 31 %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset 32 %if_value = load i32, i32 addrspace(5)* %if_ptr 33 br label %done 34 35else: 36 %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset 37 %else_value = load i32, i32 addrspace(5)* %else_ptr 38 br label %done 39 40done: 41 %value = phi i32 [%if_value, %if], [%else_value, %else] 42 store i32 %value, i32 addrspace(1)* %out 43 ret void 44 45 ret void 46 47} 48 49; GCN-LABEL: {{^}}legal_offset_fi_offset: 50; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offen{{$}} 51; This constant isn't folded, because it has multiple uses. 52; GCN-DAG: v_mov_b32_e32 [[K8000:v[0-9]+]], 0x8004 53; GCN-DAG: v_add_{{[iu]}}32_e32 [[OFFSET:v[0-9]+]], vcc, [[K8000]] 54; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offen{{$}} 55 56define amdgpu_kernel void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { 57entry: 58 %scratch0 = alloca [8192 x i32], addrspace(5) 59 %scratch1 = alloca [8192 x i32], addrspace(5) 60 61 %offset0 = load i32, i32 addrspace(1)* %offsets 62 %scratchptr0 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %offset0 63 store i32 %offset0, i32 addrspace(5)* %scratchptr0 64 65 %offsetptr1 = getelementptr i32, i32 addrspace(1)* %offsets, i32 1 66 %offset1 = load i32, i32 addrspace(1)* %offsetptr1 67 %scratchptr1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %offset1 68 store i32 %offset1, i32 addrspace(5)* %scratchptr1 69 70 %cmp = icmp eq i32 %cond, 0 71 br i1 %cmp, label %if, label %else 72 73if: 74 %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset 75 %if_value = load i32, i32 addrspace(5)* %if_ptr 76 br label %done 77 78else: 79 %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset 80 %else_value = load i32, i32 addrspace(5)* %else_ptr 81 br label %done 82 83done: 84 %value = phi i32 [%if_value, %if], [%else_value, %else] 85 store i32 %value, i32 addrspace(1)* %out 86 ret void 87} 88 89; GCN-LABEL: {{^}}neg_vaddr_offset_inbounds: 90; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}} 91; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], 0 offen{{$}} 92define amdgpu_kernel void @neg_vaddr_offset_inbounds(i32 %offset) { 93entry: 94 %array = alloca [8192 x i32], addrspace(5) 95 %ptr_offset = add i32 %offset, 4 96 %ptr = getelementptr inbounds [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %ptr_offset 97 store i32 0, i32 addrspace(5)* %ptr 98 ret void 99} 100 101; GCN-LABEL: {{^}}neg_vaddr_offset: 102; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}} 103; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], 0 offen{{$}} 104define amdgpu_kernel void @neg_vaddr_offset(i32 %offset) { 105entry: 106 %array = alloca [8192 x i32], addrspace(5) 107 %ptr_offset = add i32 %offset, 4 108 %ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %ptr_offset 109 store i32 0, i32 addrspace(5)* %ptr 110 ret void 111} 112 113; GCN-LABEL: {{^}}pos_vaddr_offset: 114; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:20 115define amdgpu_kernel void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) { 116entry: 117 %array = alloca [8192 x i32], addrspace(5) 118 %ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 4 119 store i32 0, i32 addrspace(5)* %ptr 120 %load_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %offset 121 %val = load i32, i32 addrspace(5)* %load_ptr 122 store i32 %val, i32 addrspace(1)* %out 123 ret void 124} 125