1; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s 2 3; Splat test for v8i16 4define <8 x i16> @shuf_8i16_0(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 5 %tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef> 6 ret <8 x i16> %tmp6 7 8; CHECK-LABEL: shuf_8i16_0: 9; CHECK: pshuflw $0 10} 11 12define <8 x i16> @shuf_8i16_1(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 13 %tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 14 ret <8 x i16> %tmp6 15 16; CHECK-LABEL: shuf_8i16_1: 17; CHECK: pshuflw $5 18} 19 20define <8 x i16> @shuf_8i16_2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 21 %tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef, i32 undef> 22 ret <8 x i16> %tmp6 23 24; CHECK-LABEL: shuf_8i16_2: 25; CHECK: punpcklwd 26; CHECK-NEXT: pshufd $-86 27} 28 29define <8 x i16> @shuf_8i16_3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 30 %tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 31 ret <8 x i16> %tmp6 32 33; CHECK-LABEL: shuf_8i16_3: 34; CHECK: pshuflw $15 35} 36 37define <8 x i16> @shuf_8i16_4(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 38 %tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef> 39 ret <8 x i16> %tmp6 40 41; CHECK-LABEL: shuf_8i16_4: 42; CHECK: movhlps 43} 44 45define <8 x i16> @shuf_8i16_5(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 46 %tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> 47 ret <8 x i16> %tmp6 48 49; CHECK-LABEL: shuf_8i16_5: 50; CHECK: punpckhwd 51; CHECK-NEXT: pshufd $85 52} 53 54define <8 x i16> @shuf_8i16_6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 55 %tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 6, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 56 ret <8 x i16> %tmp6 57 58; CHECK-LABEL: shuf_8i16_6: 59; CHECK: punpckhwd 60; CHECK-NEXT: pshufd $-86 61} 62 63define <8 x i16> @shuf_8i16_7(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 64 %tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 65 ret <8 x i16> %tmp6 66 67; CHECK-LABEL: shuf_8i16_7: 68; CHECK: punpckhwd 69; CHECK-NEXT: pshufd $-1 70} 71 72; Splat test for v16i8 73define <16 x i8> @shuf_16i8_8(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 74 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 75 ret <16 x i8> %tmp6 76 77; CHECK-LABEL: shuf_16i8_8: 78; CHECK: punpcklbw 79; CHECK-NEXT: punpcklbw 80; CHECK-NEXT: pshufd $0 81} 82 83define <16 x i8> @shuf_16i8_9(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 84 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef > 85 ret <16 x i8> %tmp6 86 87; CHECK-LABEL: shuf_16i8_9: 88; CHECK: punpcklbw 89; CHECK-NEXT: punpcklbw 90; CHECK-NEXT: pshufd $85 91} 92 93define <16 x i8> @shuf_16i8_10(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 94 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 95 ret <16 x i8> %tmp6 96 97; CHECK-LABEL: shuf_16i8_10: 98; CHECK: punpcklbw 99; CHECK-NEXT: punpcklbw 100; CHECK-NEXT: pshufd $-86 101} 102 103define <16 x i8> @shuf_16i8_11(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 104 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 3, i32 undef, i32 undef, i32 3, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 105 ret <16 x i8> %tmp6 106 107; CHECK-LABEL: shuf_16i8_11: 108; CHECK: punpcklbw 109; CHECK-NEXT: punpcklbw 110; CHECK-NEXT: pshufd $-1 111} 112 113 114define <16 x i8> @shuf_16i8_12(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 115 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef > 116 ret <16 x i8> %tmp6 117 118; CHECK-LABEL: shuf_16i8_12: 119; CHECK: pshufd $5 120} 121 122define <16 x i8> @shuf_16i8_13(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 123 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 124 ret <16 x i8> %tmp6 125 126; CHECK-LABEL: shuf_16i8_13: 127; CHECK: punpcklbw 128; CHECK-NEXT: punpckhbw 129; CHECK-NEXT: pshufd $85 130} 131 132define <16 x i8> @shuf_16i8_14(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 133 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 6, i32 undef, i32 undef, i32 6, i32 undef, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6> 134 ret <16 x i8> %tmp6 135 136; CHECK-LABEL: shuf_16i8_14: 137; CHECK: punpcklbw 138; CHECK-NEXT: punpckhbw 139; CHECK-NEXT: pshufd $-86 140} 141 142define <16 x i8> @shuf_16i8_15(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 143 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef > 144 ret <16 x i8> %tmp6 145 146; CHECK-LABEL: shuf_16i8_15: 147; CHECK: punpcklbw 148; CHECK-NEXT: punpckhbw 149; CHECK-NEXT: pshufd $-1 150} 151 152define <16 x i8> @shuf_16i8_16(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 153 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 8, i32 undef, i32 undef, i32 8, i32 undef, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 154 ret <16 x i8> %tmp6 155 156; CHECK-LABEL: shuf_16i8_16: 157; CHECK: punpckhbw 158; CHECK-NEXT: punpcklbw 159; CHECK-NEXT: pshufd $0 160} 161 162define <16 x i8> @shuf_16i8_17(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 163 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 9, i32 undef, i32 undef, i32 9, i32 undef, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9> 164 ret <16 x i8> %tmp6 165 166; CHECK-LABEL: shuf_16i8_17: 167; CHECK: punpckhbw 168; CHECK-NEXT: punpcklbw 169; CHECK-NEXT: pshufd $85 170} 171 172define <16 x i8> @shuf_16i8_18(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 173 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 10, i32 undef, i32 undef, i32 10, i32 undef, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10> 174 ret <16 x i8> %tmp6 175 176; CHECK-LABEL: shuf_16i8_18: 177; CHECK: punpckhbw 178; CHECK-NEXT: punpcklbw 179; CHECK-NEXT: pshufd $-86 180} 181 182define <16 x i8> @shuf_16i8_19(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 183 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 11, i32 undef, i32 undef, i32 11, i32 undef, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11> 184 ret <16 x i8> %tmp6 185 186; CHECK-LABEL: shuf_16i8_19: 187; CHECK: punpckhbw 188; CHECK-NEXT: punpcklbw 189; CHECK-NEXT: pshufd $-1 190} 191 192define <16 x i8> @shuf_16i8_20(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 193 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 12, i32 undef, i32 undef, i32 12, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12> 194 ret <16 x i8> %tmp6 195 196; CHECK-LABEL: shuf_16i8_20: 197; CHECK: punpckhbw 198; CHECK-NEXT: punpckhbw 199; CHECK-NEXT: pshufd $0 200} 201 202define <16 x i8> @shuf_16i8_21(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 203 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 13, i32 undef, i32 undef, i32 13, i32 undef, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13> 204 ret <16 x i8> %tmp6 205 206; CHECK-LABEL: shuf_16i8_21: 207; CHECK: punpckhbw 208; CHECK-NEXT: punpckhbw 209; CHECK-NEXT: pshufd $85 210} 211 212define <16 x i8> @shuf_16i8_22(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 213 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 14, i32 undef, i32 undef, i32 14, i32 undef, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14> 214 ret <16 x i8> %tmp6 215 216; CHECK-LABEL: shuf_16i8_22: 217; CHECK: punpckhbw 218; CHECK-NEXT: punpckhbw 219; CHECK-NEXT: pshufd $-86 220} 221 222define <16 x i8> @shuf_16i8_23(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone { 223 %tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 15, i32 undef, i32 undef, i32 15, i32 undef, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 224 ret <16 x i8> %tmp6 225 226; CHECK-LABEL: shuf_16i8_23: 227; CHECK: punpckhbw 228; CHECK-NEXT: punpckhbw 229; CHECK-NEXT: pshufd $-1 230} 231