1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=AMD10H 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER1 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER2 5 6; 7; EXTRQI 8; 9 10; A length of zero is equivalent to a bit length of 64. 11define <2 x i64> @extrqi_len0_idx0(<2 x i64> %a) { 12; ALL-LABEL: extrqi_len0_idx0: 13; ALL: # %bb.0: 14; ALL-NEXT: retq 15 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 0, i8 0) 16 ret <2 x i64> %1 17} 18 19define <2 x i64> @extrqi_len8_idx16(<2 x i64> %a) { 20; ALL-LABEL: extrqi_len8_idx16: 21; ALL: # %bb.0: 22; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 23; ALL-NEXT: retq 24 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 8, i8 16) 25 ret <2 x i64> %1 26} 27 28; If the length + index exceeds the bottom 64 bits the result is undefined. 29define <2 x i64> @extrqi_len32_idx48(<2 x i64> %a) { 30; ALL-LABEL: extrqi_len32_idx48: 31; ALL: # %bb.0: 32; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 33; ALL-NEXT: retq 34 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 32, i8 48) 35 ret <2 x i64> %1 36} 37 38define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(<16 x i8> %a0) { 39; AMD10H-LABEL: shuf_0zzzuuuuuuuuuuuu: 40; AMD10H: # %bb.0: 41; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 42; AMD10H-NEXT: retq 43; 44; BTVER1-LABEL: shuf_0zzzuuuuuuuuuuuu: 45; BTVER1: # %bb.0: 46; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 47; BTVER1-NEXT: retq 48; 49; BTVER2-LABEL: shuf_0zzzuuuuuuuuuuuu: 50; BTVER2: # %bb.0: 51; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 52; BTVER2-NEXT: retq 53 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 54 ret <16 x i8> %s 55} 56 57define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) { 58; AMD10H-LABEL: shuf_0zzzzzzz1zzzzzzz: 59; AMD10H: # %bb.0: 60; AMD10H-NEXT: movdqa %xmm0, %xmm1 61; AMD10H-NEXT: extrq {{.*#+}} xmm1 = xmm1[1],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 62; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 63; AMD10H-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 64; AMD10H-NEXT: retq 65; 66; BTVER1-LABEL: shuf_0zzzzzzz1zzzzzzz: 67; BTVER1: # %bb.0: 68; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 69; BTVER1-NEXT: retq 70; 71; BTVER2-LABEL: shuf_0zzzzzzz1zzzzzzz: 72; BTVER2: # %bb.0: 73; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 74; BTVER2-NEXT: retq 75 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 76 ret <16 x i8> %s 77} 78 79define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) { 80; AMD10H-LABEL: shuf_2zzzzzzz3zzzzzzz: 81; AMD10H: # %bb.0: 82; AMD10H-NEXT: movdqa %xmm0, %xmm1 83; AMD10H-NEXT: extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 84; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 85; AMD10H-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 86; AMD10H-NEXT: retq 87; 88; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz: 89; BTVER1: # %bb.0: 90; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 91; BTVER1-NEXT: retq 92; 93; BTVER2-LABEL: shuf_2zzzzzzz3zzzzzzz: 94; BTVER2: # %bb.0: 95; BTVER2-NEXT: vpsrld $16, %xmm0, %xmm0 96; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 97; BTVER2-NEXT: retq 98 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 99 ret <16 x i8> %s 100} 101 102define <16 x i8> @shuf_01zzuuuuuuuuuuuu(<16 x i8> %a0) { 103; AMD10H-LABEL: shuf_01zzuuuuuuuuuuuu: 104; AMD10H: # %bb.0: 105; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 106; AMD10H-NEXT: retq 107; 108; BTVER1-LABEL: shuf_01zzuuuuuuuuuuuu: 109; BTVER1: # %bb.0: 110; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 111; BTVER1-NEXT: retq 112; 113; BTVER2-LABEL: shuf_01zzuuuuuuuuuuuu: 114; BTVER2: # %bb.0: 115; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 116; BTVER2-NEXT: retq 117 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 118 ret <16 x i8> %s 119} 120 121define <16 x i8> @shuf_01zzzzzz23zzzzzz(<16 x i8> %a0) { 122; AMD10H-LABEL: shuf_01zzzzzz23zzzzzz: 123; AMD10H: # %bb.0: 124; AMD10H-NEXT: movdqa %xmm0, %xmm1 125; AMD10H-NEXT: extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 126; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 127; AMD10H-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 128; AMD10H-NEXT: retq 129; 130; BTVER1-LABEL: shuf_01zzzzzz23zzzzzz: 131; BTVER1: # %bb.0: 132; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero 133; BTVER1-NEXT: retq 134; 135; BTVER2-LABEL: shuf_01zzzzzz23zzzzzz: 136; BTVER2: # %bb.0: 137; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 138; BTVER2-NEXT: retq 139 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 140 ret <16 x i8> %s 141} 142 143define <16 x i8> @shuf_1zzzuuuuuuuuuuuu(<16 x i8> %a0) { 144; ALL-LABEL: shuf_1zzzuuuuuuuuuuuu: 145; ALL: # %bb.0: 146; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 147; ALL-NEXT: retq 148 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 149 ret <16 x i8> %s 150} 151 152define <8 x i16> @shuf_1zzzuuuu(<8 x i16> %a0) { 153; ALL-LABEL: shuf_1zzzuuuu: 154; ALL: # %bb.0: 155; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 156; ALL-NEXT: retq 157 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 158 ret <8 x i16> %s 159} 160 161define <8 x i16> @shuf_12zzuuuu(<8 x i16> %a0) { 162; ALL-LABEL: shuf_12zzuuuu: 163; ALL: # %bb.0: 164; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3,4,5],zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 165; ALL-NEXT: retq 166 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 167 ret <8 x i16> %s 168} 169 170define <8 x i16> @shuf_012zuuuu(<8 x i16> %a0) { 171; AMD10H-LABEL: shuf_012zuuuu: 172; AMD10H: # %bb.0: 173; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 174; AMD10H-NEXT: retq 175; 176; BTVER1-LABEL: shuf_012zuuuu: 177; BTVER1: # %bb.0: 178; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 179; BTVER1-NEXT: retq 180; 181; BTVER2-LABEL: shuf_012zuuuu: 182; BTVER2: # %bb.0: 183; BTVER2-NEXT: vpxor %xmm1, %xmm1, %xmm1 184; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 185; BTVER2-NEXT: retq 186 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 187 ret <8 x i16> %s 188} 189 190define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) { 191; AMD10H-LABEL: shuf_0zzz1zzz: 192; AMD10H: # %bb.0: 193; AMD10H-NEXT: movdqa %xmm0, %xmm1 194; AMD10H-NEXT: extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 195; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 196; AMD10H-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 197; AMD10H-NEXT: retq 198; 199; BTVER1-LABEL: shuf_0zzz1zzz: 200; BTVER1: # %bb.0: 201; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero 202; BTVER1-NEXT: retq 203; 204; BTVER2-LABEL: shuf_0zzz1zzz: 205; BTVER2: # %bb.0: 206; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 207; BTVER2-NEXT: retq 208 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8> 209 ret <8 x i16> %s 210} 211 212define <4 x i32> @shuf_0z1z(<4 x i32> %a0) { 213; AMD10H-LABEL: shuf_0z1z: 214; AMD10H: # %bb.0: 215; AMD10H-NEXT: xorps %xmm1, %xmm1 216; AMD10H-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 217; AMD10H-NEXT: retq 218; 219; BTVER1-LABEL: shuf_0z1z: 220; BTVER1: # %bb.0: 221; BTVER1-NEXT: xorps %xmm1, %xmm1 222; BTVER1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 223; BTVER1-NEXT: retq 224; 225; BTVER2-LABEL: shuf_0z1z: 226; BTVER2: # %bb.0: 227; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 228; BTVER2-NEXT: retq 229 %s = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 4> 230 ret <4 x i32> %s 231} 232 233; 234; INSERTQI 235; 236 237; A length of zero is equivalent to a bit length of 64. 238define <2 x i64> @insertqi_len0_idx0(<2 x i64> %a, <2 x i64> %b) { 239; AMD10H-LABEL: insertqi_len0_idx0: 240; AMD10H: # %bb.0: 241; AMD10H-NEXT: movaps %xmm1, %xmm0 242; AMD10H-NEXT: retq 243; 244; BTVER1-LABEL: insertqi_len0_idx0: 245; BTVER1: # %bb.0: 246; BTVER1-NEXT: movaps %xmm1, %xmm0 247; BTVER1-NEXT: retq 248; 249; BTVER2-LABEL: insertqi_len0_idx0: 250; BTVER2: # %bb.0: 251; BTVER2-NEXT: vmovaps %xmm1, %xmm0 252; BTVER2-NEXT: retq 253 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 0, i8 0) 254 ret <2 x i64> %1 255} 256 257define <2 x i64> @insertqi_len8_idx16(<2 x i64> %a, <2 x i64> %b) { 258; ALL-LABEL: insertqi_len8_idx16: 259; ALL: # %bb.0: 260; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u] 261; ALL-NEXT: retq 262 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 8, i8 16) 263 ret <2 x i64> %1 264} 265 266; If the length + index exceeds the bottom 64 bits the result is undefined 267define <2 x i64> @insertqi_len32_idx48(<2 x i64> %a, <2 x i64> %b) { 268; ALL-LABEL: insertqi_len32_idx48: 269; ALL: # %bb.0: 270; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 271; ALL-NEXT: retq 272 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 32, i8 48) 273 ret <2 x i64> %1 274} 275 276define <16 x i8> @shuf_0_0_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) { 277; ALL-LABEL: shuf_0_0_2_3_uuuu_uuuu_uuuu: 278; ALL: # %bb.0: 279; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7,u,u,u,u,u,u,u,u] 280; ALL-NEXT: retq 281 %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 0, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 282 ret <16 x i8> %s 283} 284 285define <16 x i8> @shuf_0_16_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) { 286; ALL-LABEL: shuf_0_16_2_3_uuuu_uuuu_uuuu: 287; ALL: # %bb.0: 288; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3,4,5,6,7,u,u,u,u,u,u,u,u] 289; ALL-NEXT: retq 290 %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 291 ret <16 x i8> %s 292} 293 294define <16 x i8> @shuf_16_1_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) { 295; ALL-LABEL: shuf_16_1_2_3_uuuu_uuuu_uuuu: 296; ALL: # %bb.0: 297; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7,u,u,u,u,u,u,u,u] 298; ALL-NEXT: retq 299 %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 300 ret <16 x i8> %s 301} 302 303define <8 x i16> @shuf_0823uuuu(<8 x i16> %a0, <8 x i16> %a1) { 304; ALL-LABEL: shuf_0823uuuu: 305; ALL: # %bb.0: 306; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1],xmm0[4,5,6,7,u,u,u,u,u,u,u,u] 307; ALL-NEXT: retq 308 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 309 ret <8 x i16> %s 310} 311 312define <8 x i16> @shuf_0183uuuu(<8 x i16> %a0, <8 x i16> %a1) { 313; ALL-LABEL: shuf_0183uuuu: 314; ALL: # %bb.0: 315; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[0,1],xmm0[6,7,u,u,u,u,u,u,u,u] 316; ALL-NEXT: retq 317 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 318 ret <8 x i16> %s 319} 320 321define <8 x i16> @shuf_0128uuuu(<8 x i16> %a0, <8 x i16> %a1) { 322; ALL-LABEL: shuf_0128uuuu: 323; ALL: # %bb.0: 324; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[0,1],xmm0[u,u,u,u,u,u,u,u] 325; ALL-NEXT: retq 326 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 327 ret <8 x i16> %s 328} 329 330define <8 x i16> @shuf_0893uuuu(<8 x i16> %a0, <8 x i16> %a1) { 331; ALL-LABEL: shuf_0893uuuu: 332; ALL: # %bb.0: 333; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u] 334; ALL-NEXT: retq 335 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 336 ret <8 x i16> %s 337} 338 339define <8 x i16> @shuf_089Auuuu(<8 x i16> %a0, <8 x i16> %a1) { 340; ALL-LABEL: shuf_089Auuuu: 341; ALL: # %bb.0: 342; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3,4,5],xmm0[u,u,u,u,u,u,u,u] 343; ALL-NEXT: retq 344 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 undef, i32 undef, i32 undef, i32 undef> 345 ret <8 x i16> %s 346} 347 348define <8 x i16> @shuf_089uuuuu(<8 x i16> %a0, <8 x i16> %a1) { 349; ALL-LABEL: shuf_089uuuuu: 350; ALL: # %bb.0: 351; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u] 352; ALL-NEXT: retq 353 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 354 ret <8 x i16> %s 355} 356 357; 358; Special Cases 359; 360 361; Out of range. 362define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) { 363; AMD10H-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: 364; AMD10H: # %bb.0: 365; AMD10H-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 366; AMD10H-NEXT: andpd {{.*}}(%rip), %xmm0 367; AMD10H-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 368; AMD10H-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7] 369; AMD10H-NEXT: packuswb %xmm0, %xmm0 370; AMD10H-NEXT: retq 371; 372; BTVER1-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: 373; BTVER1: # %bb.0: 374; BTVER1-NEXT: psrld $16, %xmm1 375; BTVER1-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 376; BTVER1-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 377; BTVER1-NEXT: retq 378; 379; BTVER2-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: 380; BTVER2: # %bb.0: 381; BTVER2-NEXT: vpsrld $16, %xmm1, %xmm1 382; BTVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 383; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 384; BTVER2-NEXT: retq 385 %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 386 ret <16 x i8> %1 387} 388 389define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { 390; AMD10H-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 391; AMD10H: # %bb.0: 392; AMD10H-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 393; AMD10H-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 394; AMD10H-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] 395; AMD10H-NEXT: retq 396; 397; BTVER1-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 398; BTVER1: # %bb.0: 399; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7] 400; BTVER1-NEXT: retq 401; 402; BTVER2-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 403; BTVER2: # %bb.0: 404; BTVER2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7] 405; BTVER2-NEXT: retq 406 %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 0, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 407 ret <16 x i8> %1 408} 409 410define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { 411; AMD10H-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 412; AMD10H: # %bb.0: 413; AMD10H-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] 414; AMD10H-NEXT: psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 415; AMD10H-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 416; AMD10H-NEXT: retq 417; 418; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 419; BTVER1: # %bb.0: 420; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] 421; BTVER1-NEXT: retq 422; 423; BTVER2-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 424; BTVER2: # %bb.0: 425; BTVER2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] 426; BTVER2-NEXT: retq 427 %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 16, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 428 ret <16 x i8> %1 429} 430 431define <16 x i8> @shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { 432; ALL-LABEL: shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 433; ALL: # %bb.0: 434; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3,4],zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 435; ALL-NEXT: retq 436 %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 undef, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 437 ret <16 x i8> %1 438} 439 440declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind 441declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind 442