1; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s 2 3; FIXME: Broken on evergreen 4; FIXME: For some reason the 8 and 16 vectors are being stored as 5; individual elements instead of 128-bit stores. 6 7 8; FIXME: Why is the constant moved into the intermediate register and 9; not just directly into the vector component? 10 11; SI-LABEL: @insertelement_v4f32_0: 12; S_LOAD_DWORDX4 s{{[}}[[LOW_REG:[0-9]+]]: 13; V_MOV_B32_e32 14; V_MOV_B32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00 15; V_MOV_B32_e32 v[[LOW_REG]], [[CONSTREG]] 16; BUFFER_STORE_DWORDX4 v{{[}}[[LOW_REG]]: 17define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { 18 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0 19 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 20 ret void 21} 22 23; SI-LABEL: @insertelement_v4f32_1: 24define void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { 25 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 1 26 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 27 ret void 28} 29 30; SI-LABEL: @insertelement_v4f32_2: 31define void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { 32 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 2 33 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 34 ret void 35} 36 37; SI-LABEL: @insertelement_v4f32_3: 38define void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { 39 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 3 40 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 41 ret void 42} 43 44; SI-LABEL: @insertelement_v4i32_0: 45define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) nounwind { 46 %vecins = insertelement <4 x i32> %a, i32 999, i32 0 47 store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16 48 ret void 49} 50 51; SI-LABEL: @dynamic_insertelement_v2f32: 52; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00 53; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]] 54; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[LOW_RESULT_REG]]: 55define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind { 56 %vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b 57 store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8 58 ret void 59} 60 61; SI-LABEL: @dynamic_insertelement_v4f32: 62; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00 63; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]] 64; SI: BUFFER_STORE_DWORDX4 {{v\[}}[[LOW_RESULT_REG]]: 65define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind { 66 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b 67 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 68 ret void 69} 70 71; SI-LABEL: @dynamic_insertelement_v8f32: 72; FIXMESI: BUFFER_STORE_DWORDX4 73; FIXMESI: BUFFER_STORE_DWORDX4 74define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind { 75 %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b 76 store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32 77 ret void 78} 79 80; SI-LABEL: @dynamic_insertelement_v16f32: 81; FIXMESI: BUFFER_STORE_DWORDX4 82; FIXMESI: BUFFER_STORE_DWORDX4 83; FIXMESI: BUFFER_STORE_DWORDX4 84; FIXMESI: BUFFER_STORE_DWORDX4 85define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind { 86 %vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b 87 store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64 88 ret void 89} 90 91; SI-LABEL: @dynamic_insertelement_v2i32: 92; SI: BUFFER_STORE_DWORDX2 93define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind { 94 %vecins = insertelement <2 x i32> %a, i32 5, i32 %b 95 store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8 96 ret void 97} 98 99; SI-LABEL: @dynamic_insertelement_v4i32: 100; SI: BUFFER_STORE_DWORDX4 101define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind { 102 %vecins = insertelement <4 x i32> %a, i32 5, i32 %b 103 store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16 104 ret void 105} 106 107; SI-LABEL: @dynamic_insertelement_v8i32: 108; FIXMESI: BUFFER_STORE_DWORDX4 109; FIXMESI: BUFFER_STORE_DWORDX4 110define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind { 111 %vecins = insertelement <8 x i32> %a, i32 5, i32 %b 112 store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32 113 ret void 114} 115 116; SI-LABEL: @dynamic_insertelement_v16i32: 117; FIXMESI: BUFFER_STORE_DWORDX4 118; FIXMESI: BUFFER_STORE_DWORDX4 119; FIXMESI: BUFFER_STORE_DWORDX4 120; FIXMESI: BUFFER_STORE_DWORDX4 121define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind { 122 %vecins = insertelement <16 x i32> %a, i32 5, i32 %b 123 store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64 124 ret void 125} 126 127 128; SI-LABEL: @dynamic_insertelement_v2i16: 129; FIXMESI: BUFFER_STORE_DWORDX2 130define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind { 131 %vecins = insertelement <2 x i16> %a, i16 5, i32 %b 132 store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8 133 ret void 134} 135 136; SI-LABEL: @dynamic_insertelement_v4i16: 137; FIXMESI: BUFFER_STORE_DWORDX4 138define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind { 139 %vecins = insertelement <4 x i16> %a, i16 5, i32 %b 140 store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 16 141 ret void 142} 143 144 145; SI-LABEL: @dynamic_insertelement_v2i8: 146; FIXMESI: BUFFER_STORE_USHORT 147define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a, i32 %b) nounwind { 148 %vecins = insertelement <2 x i8> %a, i8 5, i32 %b 149 store <2 x i8> %vecins, <2 x i8> addrspace(1)* %out, align 8 150 ret void 151} 152 153; SI-LABEL: @dynamic_insertelement_v4i8: 154; FIXMESI: BUFFER_STORE_DWORD 155define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind { 156 %vecins = insertelement <4 x i8> %a, i8 5, i32 %b 157 store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 16 158 ret void 159} 160 161; SI-LABEL: @dynamic_insertelement_v8i8: 162; FIXMESI: BUFFER_STORE_DWORDX2 163define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind { 164 %vecins = insertelement <8 x i8> %a, i8 5, i32 %b 165 store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 16 166 ret void 167} 168 169; SI-LABEL: @dynamic_insertelement_v16i8: 170; FIXMESI: BUFFER_STORE_DWORDX4 171define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind { 172 %vecins = insertelement <16 x i8> %a, i8 5, i32 %b 173 store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16 174 ret void 175} 176 177; This test requires handling INSERT_SUBREG in SIFixSGPRCopies. Check that 178; the compiler doesn't crash. 179; SI-LABEL: @insert_split_bb 180define void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) { 181entry: 182 %0 = insertelement <2 x i32> undef, i32 %a, i32 0 183 %1 = icmp eq i32 %a, 0 184 br i1 %1, label %if, label %else 185 186if: 187 %2 = load i32 addrspace(1)* %in 188 %3 = insertelement <2 x i32> %0, i32 %2, i32 1 189 br label %endif 190 191else: 192 %4 = getelementptr i32 addrspace(1)* %in, i32 1 193 %5 = load i32 addrspace(1)* %4 194 %6 = insertelement <2 x i32> %0, i32 %5, i32 1 195 br label %endif 196 197endif: 198 %7 = phi <2 x i32> [%3, %if], [%6, %else] 199 store <2 x i32> %7, <2 x i32> addrspace(1)* %out 200 ret void 201} 202