1; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 2 3;CHECK-LABEL: test1: 4;CHECK: vinsertps 5;CHECK: vinsertf32x4 6;CHECK: ret 7define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind { 8 %rrr = load float* %br 9 %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1 10 %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14 11 ret <16 x float> %rrr3 12} 13 14;CHECK-LABEL: test2: 15;CHECK: vinsertf32x4 16;CHECK: vextractf32x4 17;CHECK: vinsertf32x4 18;CHECK: ret 19define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind { 20 %rrr = load double* %br 21 %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1 22 %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6 23 ret <8 x double> %rrr3 24} 25 26;CHECK-LABEL: test3: 27;CHECK: vextractf32x4 28;CHECK: vinsertf32x4 29;CHECK: ret 30define <16 x float> @test3(<16 x float> %x) nounwind { 31 %eee = extractelement <16 x float> %x, i32 4 32 %rrr2 = insertelement <16 x float> %x, float %eee, i32 1 33 ret <16 x float> %rrr2 34} 35 36;CHECK-LABEL: test4: 37;CHECK: vextracti32x4 38;CHECK: vinserti32x4 39;CHECK: ret 40define <8 x i64> @test4(<8 x i64> %x) nounwind { 41 %eee = extractelement <8 x i64> %x, i32 4 42 %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1 43 ret <8 x i64> %rrr2 44} 45 46;CHECK-LABEL: test5: 47;CHECK: vextractps 48;CHECK: ret 49define i32 @test5(<4 x float> %x) nounwind { 50 %ef = extractelement <4 x float> %x, i32 3 51 %ei = bitcast float %ef to i32 52 ret i32 %ei 53} 54 55;CHECK-LABEL: test6: 56;CHECK: vextractps {{.*}}, (%rdi) 57;CHECK: ret 58define void @test6(<4 x float> %x, float* %out) nounwind { 59 %ef = extractelement <4 x float> %x, i32 3 60 store float %ef, float* %out, align 4 61 ret void 62} 63 64;CHECK-LABEL: test7 65;CHECK: vmovd 66;CHECK: vpermps %zmm 67;CHECK: ret 68define float @test7(<16 x float> %x, i32 %ind) nounwind { 69 %e = extractelement <16 x float> %x, i32 %ind 70 ret float %e 71} 72 73;CHECK-LABEL: test8 74;CHECK: vmovq 75;CHECK: vpermpd %zmm 76;CHECK: ret 77define double @test8(<8 x double> %x, i32 %ind) nounwind { 78 %e = extractelement <8 x double> %x, i32 %ind 79 ret double %e 80} 81 82;CHECK-LABEL: test9 83;CHECK: vmovd 84;CHECK: vpermps %ymm 85;CHECK: ret 86define float @test9(<8 x float> %x, i32 %ind) nounwind { 87 %e = extractelement <8 x float> %x, i32 %ind 88 ret float %e 89} 90 91;CHECK-LABEL: test10 92;CHECK: vmovd 93;CHECK: vpermd %zmm 94;CHECK: vmovd %xmm0, %eax 95;CHECK: ret 96define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { 97 %e = extractelement <16 x i32> %x, i32 %ind 98 ret i32 %e 99} 100 101;CHECK-LABEL: test11 102;CHECK: vpcmpltud 103;CHECK: kshiftlw $11 104;CHECK: kshiftrw $15 105;CHECK: kortestw 106;CHECK: je 107;CHECK: ret 108;CHECK: ret 109define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) { 110 %cmp_res = icmp ult <16 x i32> %a, %b 111 %ia = extractelement <16 x i1> %cmp_res, i32 4 112 br i1 %ia, label %A, label %B 113 A: 114 ret <16 x i32>%b 115 B: 116 %c = add <16 x i32>%b, %a 117 ret <16 x i32>%c 118} 119 120;CHECK-LABEL: test12 121;CHECK: vpcmpgtq 122;CHECK: kshiftlw $15 123;CHECK: kshiftrw $15 124;CHECK: kortestw 125;CHECK: ret 126 127define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { 128 129 %cmpvector_func.i = icmp slt <16 x i64> %a, %b 130 %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0 131 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 132 ret i64 %res 133} 134 135;CHECK-LABEL: test13 136;CHECK: cmpl 137;CHECK: sbbl 138;CHECK: orl $65532 139;CHECK: ret 140define i16 @test13(i32 %a, i32 %b) { 141 %cmp_res = icmp ult i32 %a, %b 142 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0 143 %res = bitcast <16 x i1> %maskv to i16 144 ret i16 %res 145} 146 147;CHECK-LABEL: test14 148;CHECK: vpcmpgtq 149;CHECK: kshiftlw $11 150;CHECK: kshiftrw $15 151;CHECK: kortestw 152;CHECK: ret 153 154define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { 155 156 %cmpvector_func.i = icmp slt <8 x i64> %a, %b 157 %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4 158 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 159 ret i64 %res 160} 161 162;CHECK-LABEL: test15 163;CHECK: kshiftlw 164;CHECK: kmovw 165;CHECK: ret 166define i16 @test15(i1 *%addr) { 167 %x = load i1 * %addr, align 128 168 %x1 = insertelement <16 x i1> undef, i1 %x, i32 10 169 %x2 = bitcast <16 x i1>%x1 to i16 170 ret i16 %x2 171} 172 173;CHECK-LABEL: test16 174;CHECK: kshiftlw 175;CHECK: kshiftrw 176;CHECK: korw 177;CHECK: ret 178define i16 @test16(i1 *%addr, i16 %a) { 179 %x = load i1 * %addr, align 128 180 %a1 = bitcast i16 %a to <16 x i1> 181 %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10 182 %x2 = bitcast <16 x i1>%x1 to i16 183 ret i16 %x2 184} 185 186;CHECK-LABEL: test17 187;CHECK: kshiftlw 188;CHECK: kshiftrw 189;CHECK: korw 190;CHECK: ret 191define i8 @test17(i1 *%addr, i8 %a) { 192 %x = load i1 * %addr, align 128 193 %a1 = bitcast i8 %a to <8 x i1> 194 %x1 = insertelement <8 x i1> %a1, i1 %x, i32 10 195 %x2 = bitcast <8 x i1>%x1 to i8 196 ret i8 %x2 197} 198 199