1; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=ELT16 -check-prefix=HSA -check-prefix=HSA-ELT16 -check-prefix=ALL -check-prefix=HSA_ELTGE8 %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-promote-alloca,+max-private-element-size-8 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=ELT8 -check-prefix=HSA -check-prefix=HSA-ELT8 -check-prefix=ALL -check-prefix=HSA-ELTGE8 %s 3; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-promote-alloca,+max-private-element-size-4 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=ELT4 -check-prefix=HSA -check-prefix=HSA-ELT4 -check-prefix=ALL %s 4 5 6; ALL-LABEL: {{^}}private_elt_size_v4i32: 7 8; HSA-ELT16: private_element_size = 3 9; HSA-ELT8: private_element_size = 2 10; HSA-ELT4: private_element_size = 1 11 12 13; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16 14; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32 15; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} 16 17; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:24{{$}} 18; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16 19; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32 20; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:40 21 22; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen 23; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen 24 25 26; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}} 27; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}} 28; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}} 29; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}} 30; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:32{{$}} 31; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:36{{$}} 32; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:40{{$}} 33; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:44{{$}} 34 35; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} 36; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} 37; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}} 38; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}} 39define amdgpu_kernel void @private_elt_size_v4i32(<4 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { 40entry: 41 %tid = call i32 @llvm.amdgcn.workitem.id.x() 42 %idxprom = sext i32 %tid to i64 43 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom 44 %index.load = load i32, i32 addrspace(1)* %gep.index 45 %index = and i32 %index.load, 2 46 %alloca = alloca [2 x <4 x i32>], align 16, addrspace(5) 47 %gep0 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 0 48 %gep1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 1 49 store <4 x i32> zeroinitializer, <4 x i32> addrspace(5)* %gep0 50 store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %gep1 51 %gep2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 %index 52 %load = load <4 x i32>, <4 x i32> addrspace(5)* %gep2 53 store <4 x i32> %load, <4 x i32> addrspace(1)* %out 54 ret void 55} 56 57; ALL-LABEL: {{^}}private_elt_size_v8i32: 58; HSA-ELT16: private_element_size = 3 59; HSA-ELT8: private_element_size = 2 60; HSA-ELT4: private_element_size = 1 61 62; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32 63; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:48 64; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:64 65; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:80 66 67; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} 68; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} 69 70 71; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32 72; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:40 73; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:48 74; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:56 75; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:88 76; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:80 77; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:72 78; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:64 79 80; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen 81; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen 82 83 84; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:32{{$}} 85; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:36{{$}} 86; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:40{{$}} 87; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:44{{$}} 88; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:48{{$}} 89; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:52{{$}} 90; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:56{{$}} 91; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:60{{$}} 92; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:64{{$}} 93; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:68{{$}} 94; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:72{{$}} 95; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:76{{$}} 96; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:80{{$}} 97; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:84{{$}} 98; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:88{{$}} 99; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:92{{$}} 100 101; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} 102; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} 103; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}} 104; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}} 105; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:16{{$}} 106; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:20{{$}} 107; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:24{{$}} 108; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:28{{$}} 109define amdgpu_kernel void @private_elt_size_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { 110entry: 111 %tid = call i32 @llvm.amdgcn.workitem.id.x() 112 %idxprom = sext i32 %tid to i64 113 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom 114 %index.load = load i32, i32 addrspace(1)* %gep.index 115 %index = and i32 %index.load, 2 116 %alloca = alloca [2 x <8 x i32>], align 32, addrspace(5) 117 %gep0 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 0 118 %gep1 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 1 119 store <8 x i32> zeroinitializer, <8 x i32> addrspace(5)* %gep0 120 store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32> addrspace(5)* %gep1 121 %gep2 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 %index 122 %load = load <8 x i32>, <8 x i32> addrspace(5)* %gep2 123 store <8 x i32> %load, <8 x i32> addrspace(1)* %out 124 ret void 125} 126 127 128; ALL-LABEL: {{^}}private_elt_size_i64: 129; HSA-ELT16: private_element_size = 3 130; HSA-ELT8: private_element_size = 2 131; HSA-ELT4: private_element_size = 1 132 133; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], 0 offset:1 134; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], 0 offset:2 135; HSA-ELTGE8-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[0:3], 0 offen 136; HSA-ELTGE8: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]] 137 138 139; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}} 140; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}} 141; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}} 142; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}} 143 144; HSA-ELT4-DAG: buffer_load_dword v[[HI:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} 145; HSA-ELT4-DAG: buffer_load_dword v[[LO:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen{{$}} 146; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]] 147define amdgpu_kernel void @private_elt_size_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { 148entry: 149 %tid = call i32 @llvm.amdgcn.workitem.id.x() 150 %idxprom = sext i32 %tid to i64 151 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom 152 %index.load = load i32, i32 addrspace(1)* %gep.index 153 %index = and i32 %index.load, 2 154 %alloca = alloca [2 x i64], align 16, addrspace(5) 155 %gep0 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 0 156 %gep1 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 1 157 store i64 0, i64 addrspace(5)* %gep0 158 store i64 34359738602, i64 addrspace(5)* %gep1 159 %gep2 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 %index 160 %load = load i64, i64 addrspace(5)* %gep2 161 store i64 %load, i64 addrspace(1)* %out 162 ret void 163} 164 165; ALL-LABEL: {{^}}private_elt_size_f64: 166; HSA-ELT16: private_element_size = 3 167; HSA-ELT8: private_element_size = 2 168; HSA-ELT4: private_element_size = 1 169 170; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16 171; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:24 172; HSA-ELTGE8-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[0:3], 0 offen 173; HSA-ELTGE8: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]] 174 175 176; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}} 177; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}} 178; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}} 179; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}} 180 181; HSA-ELT4-DAG: buffer_load_dword v[[HI:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} 182; HSA-ELT4-DAG: buffer_load_dword v[[LO:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen{{$}} 183; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]] 184define amdgpu_kernel void @private_elt_size_f64(double addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { 185entry: 186 %tid = call i32 @llvm.amdgcn.workitem.id.x() 187 %idxprom = sext i32 %tid to i64 188 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom 189 %index.load = load i32, i32 addrspace(1)* %gep.index 190 %index = and i32 %index.load, 2 191 %alloca = alloca [2 x double], align 16, addrspace(5) 192 %gep0 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 0 193 %gep1 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 1 194 store double 0.0, double addrspace(5)* %gep0 195 store double 4.0, double addrspace(5)* %gep1 196 %gep2 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 %index 197 %load = load double, double addrspace(5)* %gep2 198 store double %load, double addrspace(1)* %out 199 ret void 200} 201 202; ALL-LABEL: {{^}}private_elt_size_v2i64: 203; HSA-ELT16: private_element_size = 3 204; HSA-ELT8: private_element_size = 2 205; HSA-ELT4: private_element_size = 1 206 207; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16 208; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32 209; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} 210 211; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16{{$}} 212; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:24 213; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:40 214; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32 215 216; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen 217; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen 218 219 220; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}} 221; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}} 222; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}} 223; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}} 224; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:32{{$}} 225; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:36{{$}} 226; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:40{{$}} 227; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:44{{$}} 228 229; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}} 230; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}} 231; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} 232; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} 233define amdgpu_kernel void @private_elt_size_v2i64(<2 x i64> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { 234entry: 235 %tid = call i32 @llvm.amdgcn.workitem.id.x() 236 %idxprom = sext i32 %tid to i64 237 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom 238 %index.load = load i32, i32 addrspace(1)* %gep.index 239 %index = and i32 %index.load, 2 240 %alloca = alloca [2 x <2 x i64>], align 16, addrspace(5) 241 %gep0 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 0 242 %gep1 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 1 243 store <2 x i64> zeroinitializer, <2 x i64> addrspace(5)* %gep0 244 store <2 x i64> <i64 1, i64 2>, <2 x i64> addrspace(5)* %gep1 245 %gep2 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 %index 246 %load = load <2 x i64>, <2 x i64> addrspace(5)* %gep2 247 store <2 x i64> %load, <2 x i64> addrspace(1)* %out 248 ret void 249} 250 251declare i32 @llvm.amdgcn.workitem.id.x() #1 252 253attributes #0 = { nounwind } 254attributes #1 = { nounwind readnone } 255