1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 3; GCN-LABEL: {{^}}store_build_vector_multiple_uses_v4i32: 4; GCN: buffer_load_dword 5; GCN: buffer_load_dword 6; GCN: buffer_load_dword 7; GCN: buffer_load_dword 8 9; GCN: buffer_store_dwordx4 10; GCN: buffer_store_dwordx4 11 12; GCN: buffer_store_dword 13; GCN: buffer_store_dword 14; GCN: buffer_store_dword 15; GCN: buffer_store_dword 16define amdgpu_kernel void @store_build_vector_multiple_uses_v4i32(<4 x i32> addrspace(1)* noalias %out0, 17 <4 x i32> addrspace(1)* noalias %out1, 18 i32 addrspace(1)* noalias %out2, 19 i32 addrspace(1)* %in) { 20 %elt0 = load volatile i32, i32 addrspace(1)* %in 21 %elt1 = load volatile i32, i32 addrspace(1)* %in 22 %elt2 = load volatile i32, i32 addrspace(1)* %in 23 %elt3 = load volatile i32, i32 addrspace(1)* %in 24 25 %vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0 26 %vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1 27 %vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2 28 %vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3 29 30 store <4 x i32> %vec3, <4 x i32> addrspace(1)* %out0 31 store <4 x i32> %vec3, <4 x i32> addrspace(1)* %out1 32 33 %extract0 = extractelement <4 x i32> %vec3, i32 0 34 %extract1 = extractelement <4 x i32> %vec3, i32 1 35 %extract2 = extractelement <4 x i32> %vec3, i32 2 36 %extract3 = extractelement <4 x i32> %vec3, i32 3 37 38 store volatile i32 %extract0, i32 addrspace(1)* %out2 39 store volatile i32 %extract1, i32 addrspace(1)* %out2 40 store volatile i32 %extract2, i32 addrspace(1)* %out2 41 store volatile i32 %extract3, i32 addrspace(1)* %out2 42 43 ret void 44} 45 46; GCN-LABEL: {{^}}store_build_vector_multiple_extract_uses_v4i32: 47; GCN: buffer_load_dword 48; GCN: buffer_load_dword 49; GCN: buffer_load_dword 50; GCN: buffer_load_dword 51 52; GCN: buffer_store_dwordx4 53 54; GCN: buffer_store_dword 55; GCN: buffer_store_dword 56; GCN: buffer_store_dword 57; GCN: buffer_store_dword 58define amdgpu_kernel void @store_build_vector_multiple_extract_uses_v4i32(<4 x i32> addrspace(1)* noalias %out0, 59 <4 x i32> addrspace(1)* noalias %out1, 60 i32 addrspace(1)* noalias %out2, 61 i32 addrspace(1)* %in) { 62 %elt0 = load volatile i32, i32 addrspace(1)* %in 63 %elt1 = load volatile i32, i32 addrspace(1)* %in 64 %elt2 = load volatile i32, i32 addrspace(1)* %in 65 %elt3 = load volatile i32, i32 addrspace(1)* %in 66 67 %vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0 68 %vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1 69 %vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2 70 %vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3 71 72 %extract0 = extractelement <4 x i32> %vec3, i32 0 73 %extract1 = extractelement <4 x i32> %vec3, i32 1 74 %extract2 = extractelement <4 x i32> %vec3, i32 2 75 %extract3 = extractelement <4 x i32> %vec3, i32 3 76 77 %op0 = add i32 %extract0, 3 78 %op1 = sub i32 %extract1, 9 79 %op2 = xor i32 %extract2, 1231412 80 %op3 = and i32 %extract3, 258233412312 81 82 store <4 x i32> %vec3, <4 x i32> addrspace(1)* %out0 83 84 store volatile i32 %op0, i32 addrspace(1)* %out2 85 store volatile i32 %op1, i32 addrspace(1)* %out2 86 store volatile i32 %op2, i32 addrspace(1)* %out2 87 store volatile i32 %op3, i32 addrspace(1)* %out2 88 89 ret void 90} 91 92; GCN-LABEL: {{^}}store_build_vector_multiple_uses_v4i32_bitcast_to_v2i64: 93; GCN: buffer_load_dword 94; GCN: buffer_load_dword 95; GCN: buffer_load_dword 96; GCN: buffer_load_dword 97 98; GCN: buffer_store_dwordx4 99 100; GCN: buffer_store_dwordx2 101; GCN: buffer_store_dwordx2 102define amdgpu_kernel void @store_build_vector_multiple_uses_v4i32_bitcast_to_v2i64(<2 x i64> addrspace(1)* noalias %out0, 103 <4 x i32> addrspace(1)* noalias %out1, 104 i64 addrspace(1)* noalias %out2, 105 i32 addrspace(1)* %in) { 106 %elt0 = load volatile i32, i32 addrspace(1)* %in 107 %elt1 = load volatile i32, i32 addrspace(1)* %in 108 %elt2 = load volatile i32, i32 addrspace(1)* %in 109 %elt3 = load volatile i32, i32 addrspace(1)* %in 110 111 %vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0 112 %vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1 113 %vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2 114 %vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3 115 116 %bc.vec3 = bitcast <4 x i32> %vec3 to <2 x i64> 117 store <2 x i64> %bc.vec3, <2 x i64> addrspace(1)* %out0 118 119 %extract0 = extractelement <2 x i64> %bc.vec3, i32 0 120 %extract1 = extractelement <2 x i64> %bc.vec3, i32 1 121 122 store volatile i64 %extract0, i64 addrspace(1)* %out2 123 store volatile i64 %extract1, i64 addrspace(1)* %out2 124 125 ret void 126} 127