1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g3 | FileCheck %s --check-prefixes=ALL,G3 3; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | FileCheck %s --check-prefixes=ALL,G5 4 5; Test that vectors are scalarized/lowered correctly. 6 7%f4 = type <4 x float> 8%i4 = type <4 x i32> 9 10define void @splat(%f4* %P, %f4* %Q, float %X) nounwind { 11; G3-LABEL: splat: 12; G3: # %bb.0: 13; G3-NEXT: lfs 0, 12(4) 14; G3-NEXT: lfs 2, 8(4) 15; G3-NEXT: lfs 3, 4(4) 16; G3-NEXT: fadds 0, 0, 1 17; G3-NEXT: lfs 4, 0(4) 18; G3-NEXT: stfs 0, 12(3) 19; G3-NEXT: fadds 0, 2, 1 20; G3-NEXT: stfs 0, 8(3) 21; G3-NEXT: fadds 0, 3, 1 22; G3-NEXT: stfs 0, 4(3) 23; G3-NEXT: fadds 0, 4, 1 24; G3-NEXT: stfs 0, 0(3) 25; G3-NEXT: blr 26; 27; G5-LABEL: splat: 28; G5: # %bb.0: 29; G5-NEXT: stwu 1, -32(1) 30; G5-NEXT: stfs 1, 16(1) 31; G5-NEXT: addi 5, 1, 16 32; G5-NEXT: lvx 2, 0, 5 33; G5-NEXT: lvx 3, 0, 4 34; G5-NEXT: vspltw 2, 2, 0 35; G5-NEXT: vaddfp 2, 3, 2 36; G5-NEXT: stvx 2, 0, 3 37; G5-NEXT: addi 1, 1, 32 38; G5-NEXT: blr 39 %tmp = insertelement %f4 undef, float %X, i32 0 ; <%f4> [#uses=1] 40 %tmp2 = insertelement %f4 %tmp, float %X, i32 1 ; <%f4> [#uses=1] 41 %tmp4 = insertelement %f4 %tmp2, float %X, i32 2 ; <%f4> [#uses=1] 42 %tmp6 = insertelement %f4 %tmp4, float %X, i32 3 ; <%f4> [#uses=1] 43 %q = load %f4, %f4* %Q ; <%f4> [#uses=1] 44 %R = fadd %f4 %q, %tmp6 ; <%f4> [#uses=1] 45 store %f4 %R, %f4* %P 46 ret void 47} 48 49define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind { 50; G3-LABEL: splat_i4: 51; G3: # %bb.0: 52; G3-NEXT: lwz 6, 12(4) 53; G3-NEXT: lwz 7, 8(4) 54; G3-NEXT: lwz 8, 4(4) 55; G3-NEXT: add 6, 6, 5 56; G3-NEXT: lwz 4, 0(4) 57; G3-NEXT: stw 6, 12(3) 58; G3-NEXT: add 6, 7, 5 59; G3-NEXT: stw 6, 8(3) 60; G3-NEXT: add 6, 8, 5 61; G3-NEXT: add 4, 4, 5 62; G3-NEXT: stw 6, 4(3) 63; G3-NEXT: stw 4, 0(3) 64; G3-NEXT: blr 65; 66; G5-LABEL: splat_i4: 67; G5: # %bb.0: 68; G5-NEXT: stwu 1, -32(1) 69; G5-NEXT: stw 5, 16(1) 70; G5-NEXT: addi 5, 1, 16 71; G5-NEXT: lvx 2, 0, 5 72; G5-NEXT: lvx 3, 0, 4 73; G5-NEXT: vspltw 2, 2, 0 74; G5-NEXT: vadduwm 2, 3, 2 75; G5-NEXT: stvx 2, 0, 3 76; G5-NEXT: addi 1, 1, 32 77; G5-NEXT: blr 78 %tmp = insertelement %i4 undef, i32 %X, i32 0 ; <%i4> [#uses=1] 79 %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1 ; <%i4> [#uses=1] 80 %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2 ; <%i4> [#uses=1] 81 %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3 ; <%i4> [#uses=1] 82 %q = load %i4, %i4* %Q ; <%i4> [#uses=1] 83 %R = add %i4 %q, %tmp6 ; <%i4> [#uses=1] 84 store %i4 %R, %i4* %P 85 ret void 86} 87 88define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind { 89; G3-LABEL: splat_imm_i32: 90; G3: # %bb.0: 91; G3-NEXT: lwz 5, 12(4) 92; G3-NEXT: lwz 6, 8(4) 93; G3-NEXT: lwz 7, 4(4) 94; G3-NEXT: addi 5, 5, -1 95; G3-NEXT: lwz 4, 0(4) 96; G3-NEXT: stw 5, 12(3) 97; G3-NEXT: addi 5, 6, -1 98; G3-NEXT: stw 5, 8(3) 99; G3-NEXT: addi 5, 7, -1 100; G3-NEXT: addi 4, 4, -1 101; G3-NEXT: stw 5, 4(3) 102; G3-NEXT: stw 4, 0(3) 103; G3-NEXT: blr 104; 105; G5-LABEL: splat_imm_i32: 106; G5: # %bb.0: 107; G5-NEXT: lvx 2, 0, 4 108; G5-NEXT: vspltisb 3, -1 109; G5-NEXT: vadduwm 2, 2, 3 110; G5-NEXT: stvx 2, 0, 3 111; G5-NEXT: blr 112 %q = load %i4, %i4* %Q ; <%i4> [#uses=1] 113 %R = add %i4 %q, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <%i4> [#uses=1] 114 store %i4 %R, %i4* %P 115 ret void 116} 117 118define void @splat_imm_i16(%i4* %P, %i4* %Q, i32 %X) nounwind { 119; G3-LABEL: splat_imm_i16: 120; G3: # %bb.0: 121; G3-NEXT: lwz 5, 8(4) 122; G3-NEXT: lwz 6, 0(4) 123; G3-NEXT: lwz 7, 4(4) 124; G3-NEXT: addi 5, 5, 1 125; G3-NEXT: lwz 4, 12(4) 126; G3-NEXT: addi 6, 6, 1 127; G3-NEXT: addi 7, 7, 1 128; G3-NEXT: addi 4, 4, 1 129; G3-NEXT: addis 4, 4, 1 130; G3-NEXT: stw 4, 12(3) 131; G3-NEXT: addis 4, 5, 1 132; G3-NEXT: stw 4, 8(3) 133; G3-NEXT: addis 4, 7, 1 134; G3-NEXT: stw 4, 4(3) 135; G3-NEXT: addis 4, 6, 1 136; G3-NEXT: stw 4, 0(3) 137; G3-NEXT: blr 138; 139; G5-LABEL: splat_imm_i16: 140; G5: # %bb.0: 141; G5-NEXT: lvx 2, 0, 4 142; G5-NEXT: vspltish 3, 1 143; G5-NEXT: vadduwm 2, 2, 3 144; G5-NEXT: stvx 2, 0, 3 145; G5-NEXT: blr 146 %q = load %i4, %i4* %Q ; <%i4> [#uses=1] 147 %R = add %i4 %q, < i32 65537, i32 65537, i32 65537, i32 65537 > ; <%i4> [#uses=1] 148 store %i4 %R, %i4* %P 149 ret void 150} 151 152define void @splat_h(i16 %tmp, <16 x i8>* %dst) nounwind { 153; G3-LABEL: splat_h: 154; G3: # %bb.0: 155; G3-NEXT: sth 3, 14(4) 156; G3-NEXT: sth 3, 12(4) 157; G3-NEXT: sth 3, 10(4) 158; G3-NEXT: sth 3, 8(4) 159; G3-NEXT: sth 3, 6(4) 160; G3-NEXT: sth 3, 4(4) 161; G3-NEXT: sth 3, 2(4) 162; G3-NEXT: sth 3, 0(4) 163; G3-NEXT: blr 164; 165; G5-LABEL: splat_h: 166; G5: # %bb.0: 167; G5-NEXT: stwu 1, -32(1) 168; G5-NEXT: sth 3, 16(1) 169; G5-NEXT: addi 3, 1, 16 170; G5-NEXT: lvx 2, 0, 3 171; G5-NEXT: vsplth 2, 2, 0 172; G5-NEXT: stvx 2, 0, 4 173; G5-NEXT: addi 1, 1, 32 174; G5-NEXT: blr 175 %tmp.upgrd.1 = insertelement <8 x i16> undef, i16 %tmp, i32 0 176 %tmp72 = insertelement <8 x i16> %tmp.upgrd.1, i16 %tmp, i32 1 177 %tmp73 = insertelement <8 x i16> %tmp72, i16 %tmp, i32 2 178 %tmp74 = insertelement <8 x i16> %tmp73, i16 %tmp, i32 3 179 %tmp75 = insertelement <8 x i16> %tmp74, i16 %tmp, i32 4 180 %tmp76 = insertelement <8 x i16> %tmp75, i16 %tmp, i32 5 181 %tmp77 = insertelement <8 x i16> %tmp76, i16 %tmp, i32 6 182 %tmp78 = insertelement <8 x i16> %tmp77, i16 %tmp, i32 7 183 %tmp78.upgrd.2 = bitcast <8 x i16> %tmp78 to <16 x i8> 184 store <16 x i8> %tmp78.upgrd.2, <16 x i8>* %dst 185 ret void 186} 187 188define void @spltish(<16 x i8>* %A, <16 x i8>* %B) nounwind { 189; G3-LABEL: spltish: 190; G3: # %bb.0: 191; G3-NEXT: stwu 1, -48(1) 192; G3-NEXT: stw 30, 40(1) # 4-byte Folded Spill 193; G3-NEXT: lbz 5, 0(4) 194; G3-NEXT: lbz 30, 15(4) 195; G3-NEXT: stw 29, 36(1) # 4-byte Folded Spill 196; G3-NEXT: lbz 29, 13(4) 197; G3-NEXT: stw 28, 32(1) # 4-byte Folded Spill 198; G3-NEXT: lbz 28, 11(4) 199; G3-NEXT: stw 27, 28(1) # 4-byte Folded Spill 200; G3-NEXT: lbz 27, 9(4) 201; G3-NEXT: stw 24, 16(1) # 4-byte Folded Spill 202; G3-NEXT: stw 25, 20(1) # 4-byte Folded Spill 203; G3-NEXT: stw 26, 24(1) # 4-byte Folded Spill 204; G3-NEXT: lbz 6, 2(4) 205; G3-NEXT: lbz 7, 4(4) 206; G3-NEXT: lbz 8, 6(4) 207; G3-NEXT: lbz 9, 8(4) 208; G3-NEXT: lbz 10, 10(4) 209; G3-NEXT: lbz 11, 12(4) 210; G3-NEXT: lbz 12, 14(4) 211; G3-NEXT: lbz 26, 7(4) 212; G3-NEXT: lbz 25, 5(4) 213; G3-NEXT: lbz 24, 3(4) 214; G3-NEXT: lbz 4, 1(4) 215; G3-NEXT: stb 5, 0(3) 216; G3-NEXT: addi 5, 30, -15 217; G3-NEXT: stb 5, 15(3) 218; G3-NEXT: addi 5, 29, -15 219; G3-NEXT: stb 5, 13(3) 220; G3-NEXT: addi 5, 28, -15 221; G3-NEXT: stb 5, 11(3) 222; G3-NEXT: addi 5, 27, -15 223; G3-NEXT: stb 5, 9(3) 224; G3-NEXT: addi 5, 26, -15 225; G3-NEXT: stb 5, 7(3) 226; G3-NEXT: addi 5, 25, -15 227; G3-NEXT: stb 5, 5(3) 228; G3-NEXT: addi 5, 24, -15 229; G3-NEXT: addi 4, 4, -15 230; G3-NEXT: stb 12, 14(3) 231; G3-NEXT: stb 11, 12(3) 232; G3-NEXT: stb 10, 10(3) 233; G3-NEXT: stb 9, 8(3) 234; G3-NEXT: stb 8, 6(3) 235; G3-NEXT: stb 7, 4(3) 236; G3-NEXT: stb 6, 2(3) 237; G3-NEXT: stb 5, 3(3) 238; G3-NEXT: stb 4, 1(3) 239; G3-NEXT: lwz 30, 40(1) # 4-byte Folded Reload 240; G3-NEXT: lwz 29, 36(1) # 4-byte Folded Reload 241; G3-NEXT: lwz 28, 32(1) # 4-byte Folded Reload 242; G3-NEXT: lwz 27, 28(1) # 4-byte Folded Reload 243; G3-NEXT: lwz 26, 24(1) # 4-byte Folded Reload 244; G3-NEXT: lwz 25, 20(1) # 4-byte Folded Reload 245; G3-NEXT: lwz 24, 16(1) # 4-byte Folded Reload 246; G3-NEXT: addi 1, 1, 48 247; G3-NEXT: blr 248; 249; G5-LABEL: spltish: 250; G5: # %bb.0: 251; G5-NEXT: lvx 2, 0, 4 252; G5-NEXT: vspltish 3, 15 253; G5-NEXT: vsububm 2, 2, 3 254; G5-NEXT: stvx 2, 0, 3 255; G5-NEXT: blr 256 %tmp = load <16 x i8>, <16 x i8>* %B ; <<16 x i8>> [#uses=1] 257 %tmp.s = bitcast <16 x i8> %tmp to <16 x i8> ; <<16 x i8>> [#uses=1] 258 %tmp4 = sub <16 x i8> %tmp.s, bitcast (<8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 259 15, i16 15, i16 15 > to <16 x i8>) ; <<16 x i8>> [#uses=1] 260 %tmp4.u = bitcast <16 x i8> %tmp4 to <16 x i8> ; <<16 x i8>> [#uses=1] 261 store <16 x i8> %tmp4.u, <16 x i8>* %A 262 ret void 263} 264