1; RUN: opt < %s -instcombine -S | FileCheck %s 2 3; This should never happen, but make sure we don't crash handling a non-constant immediate byte. 4 5define <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i8 %b) { 6 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b) 7 ret <4 x double> %res 8 9; CHECK-LABEL: @perm2pd_non_const_imm 10; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b) 11; CHECK-NEXT: ret <4 x double> 12} 13 14 15; In the following 4 tests, both zero mask bits of the immediate are set. 16 17define <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) { 18 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136) 19 ret <4 x double> %res 20 21; CHECK-LABEL: @perm2pd_0x88 22; CHECK-NEXT: ret <4 x double> zeroinitializer 23} 24 25define <8 x float> @perm2ps_0x88(<8 x float> %a0, <8 x float> %a1) { 26 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 136) 27 ret <8 x float> %res 28 29; CHECK-LABEL: @perm2ps_0x88 30; CHECK-NEXT: ret <8 x float> zeroinitializer 31} 32 33define <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) { 34 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 136) 35 ret <8 x i32> %res 36 37; CHECK-LABEL: @perm2si_0x88 38; CHECK-NEXT: ret <8 x i32> zeroinitializer 39} 40 41define <4 x i64> @perm2i_0x88(<4 x i64> %a0, <4 x i64> %a1) { 42 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 136) 43 ret <4 x i64> %res 44 45; CHECK-LABEL: @perm2i_0x88 46; CHECK-NEXT: ret <4 x i64> zeroinitializer 47} 48 49 50; The other control bits are ignored when zero mask bits of the immediate are set. 51 52define <4 x double> @perm2pd_0xff(<4 x double> %a0, <4 x double> %a1) { 53 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 255) 54 ret <4 x double> %res 55 56; CHECK-LABEL: @perm2pd_0xff 57; CHECK-NEXT: ret <4 x double> zeroinitializer 58} 59 60 61; The following 16 tests are simple shuffles, except for 2 cases where we can just return one of the 62; source vectors. Verify that we generate the right shuffle masks and undef source operand where possible.. 63 64define <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) { 65 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0) 66 ret <4 x double> %res 67 68; CHECK-LABEL: @perm2pd_0x00 69; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 70; CHECK-NEXT: ret <4 x double> %1 71} 72 73define <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) { 74 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 1) 75 ret <4 x double> %res 76 77; CHECK-LABEL: @perm2pd_0x01 78; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 79; CHECK-NEXT: ret <4 x double> %1 80} 81 82define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) { 83 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 2) 84 ret <4 x double> %res 85 86; CHECK-LABEL: @perm2pd_0x02 87; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 88; CHECK-NEXT: ret <4 x double> %1 89} 90 91define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) { 92 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) 93 ret <4 x double> %res 94 95; CHECK-LABEL: @perm2pd_0x03 96; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 97; CHECK-NEXT: ret <4 x double> %1 98} 99 100define <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) { 101 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 16) 102 ret <4 x double> %res 103 104; CHECK-LABEL: @perm2pd_0x10 105; CHECK-NEXT: ret <4 x double> %a0 106} 107 108define <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) { 109 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 17) 110 ret <4 x double> %res 111 112; CHECK-LABEL: @perm2pd_0x11 113; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> 114; CHECK-NEXT: ret <4 x double> %1 115} 116 117define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) { 118 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 18) 119 ret <4 x double> %res 120 121; CHECK-LABEL: @perm2pd_0x12 122; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 123; CHECK-NEXT: ret <4 x double> %1 124} 125 126define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) { 127 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 19) 128 ret <4 x double> %res 129 130; CHECK-LABEL: @perm2pd_0x13 131; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 132; CHECK-NEXT: ret <4 x double> %1 133} 134 135define <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) { 136 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 32) 137 ret <4 x double> %res 138 139; CHECK-LABEL: @perm2pd_0x20 140; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 141; CHECK-NEXT: ret <4 x double> %1 142} 143 144define <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) { 145 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 33) 146 ret <4 x double> %res 147 148; CHECK-LABEL: @perm2pd_0x21 149; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 150; CHECK-NEXT: ret <4 x double> %1 151} 152 153define <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) { 154 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 34) 155 ret <4 x double> %res 156 157; CHECK-LABEL: @perm2pd_0x22 158; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 159; CHECK-NEXT: ret <4 x double> %1 160} 161 162define <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) { 163 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 35) 164 ret <4 x double> %res 165 166; CHECK-LABEL: @perm2pd_0x23 167; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 168; CHECK-NEXT: ret <4 x double> %1 169} 170 171define <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) { 172 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 48) 173 ret <4 x double> %res 174 175; CHECK-LABEL: @perm2pd_0x30 176; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 177; CHECK-NEXT: ret <4 x double> %1 178} 179 180define <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) { 181 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 49) 182 ret <4 x double> %res 183 184; CHECK-LABEL: @perm2pd_0x31 185; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 186; CHECK-NEXT: ret <4 x double> %1 187} 188 189define <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) { 190 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 50) 191 ret <4 x double> %res 192 193; CHECK-LABEL: @perm2pd_0x32 194; CHECK-NEXT: ret <4 x double> %a1 195} 196 197define <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) { 198 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 51) 199 ret <4 x double> %res 200 201; CHECK-LABEL: @perm2pd_0x33 202; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> 203; CHECK-NEXT: ret <4 x double> %1 204} 205 206; Confirm that a mask for 32-bit elements is also correct. 207 208define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) { 209 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 49) 210 ret <8 x float> %res 211 212; CHECK-LABEL: @perm2ps_0x31 213; CHECK-NEXT: %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 214; CHECK-NEXT: ret <8 x float> %1 215} 216 217 218; Confirm that the AVX2 version works the same. 219 220define <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) { 221 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 51) 222 ret <4 x i64> %res 223 224; CHECK-LABEL: @perm2i_0x33 225; CHECK-NEXT: %1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> 226; CHECK-NEXT: ret <4 x i64> %1 227} 228 229 230; Confirm that when a single zero mask bit is set, we replace a source vector with zeros. 231 232define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) { 233 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129) 234 ret <4 x double> %res 235 236; CHECK-LABEL: @perm2pd_0x81 237; CHECK-NEXT: shufflevector <4 x double> %a0, <4 x double> <double 0.0{{.*}}<4 x i32> <i32 2, i32 3, i32 4, i32 5> 238; CHECK-NEXT: ret <4 x double> 239} 240 241define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) { 242 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131) 243 ret <4 x double> %res 244 245; CHECK-LABEL: @perm2pd_0x83 246; CHECK-NEXT: shufflevector <4 x double> %a1, <4 x double> <double 0.0{{.*}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 247; CHECK-NEXT: ret <4 x double> 248} 249 250define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) { 251 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40) 252 ret <4 x double> %res 253 254; CHECK-LABEL: @perm2pd_0x28 255; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 256; CHECK-NEXT: ret <4 x double> 257} 258 259define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) { 260 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8) 261 ret <4 x double> %res 262 263; CHECK-LABEL: @perm2pd_0x08 264; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 265; CHECK-NEXT: ret <4 x double> 266} 267 268; Check one more with the AVX2 version. 269 270define <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) { 271 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 40) 272 ret <4 x i64> %res 273 274; CHECK-LABEL: @perm2i_0x28 275; CHECK-NEXT: shufflevector <4 x i64> <i64 0{{.*}}, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 276; CHECK-NEXT: ret <4 x i64> 277} 278 279declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 280declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 281declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 282declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readnone 283 284