1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER2 4 5; 6; EXTRQI 7; 8 9; A length of zero is equivalent to a bit length of 64. 10define <2 x i64> @extrqi_len0_idx0(<2 x i64> %a) { 11; ALL-LABEL: extrqi_len0_idx0: 12; ALL: # BB#0: 13; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,u,u,u,u,u,u,u,u] 14; ALL-NEXT: retq 15 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 0, i8 0) 16 ret <2 x i64> %1 17} 18 19define <2 x i64> @extrqi_len8_idx16(<2 x i64> %a) { 20; ALL-LABEL: extrqi_len8_idx16: 21; ALL: # BB#0: 22; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 23; ALL-NEXT: retq 24 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 8, i8 16) 25 ret <2 x i64> %1 26} 27 28; If the length + index exceeds the bottom 64 bits the result is undefined. 29define <2 x i64> @extrqi_len32_idx48(<2 x i64> %a) { 30; ALL-LABEL: extrqi_len32_idx48: 31; ALL: # BB#0: 32; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 33; ALL-NEXT: retq 34 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 32, i8 48) 35 ret <2 x i64> %1 36} 37 38define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(<16 x i8> %a0) { 39; BTVER1-LABEL: shuf_0zzzuuuuuuuuuuuu: 40; BTVER1: # BB#0: 41; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 42; BTVER1-NEXT: retq 43; 44; BTVER2-LABEL: shuf_0zzzuuuuuuuuuuuu: 45; BTVER2: # BB#0: 46; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 47; BTVER2-NEXT: retq 48 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 49 ret <16 x i8> %s 50} 51 52define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) { 53; BTVER1-LABEL: shuf_0zzzzzzz1zzzzzzz: 54; BTVER1: # BB#0: 55; BTVER1-NEXT: movaps %xmm0, %xmm1 56; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[1],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 57; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 58; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 59; BTVER1-NEXT: retq 60; 61; BTVER2-LABEL: shuf_0zzzzzzz1zzzzzzz: 62; BTVER2: # BB#0: 63; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 64; BTVER2-NEXT: retq 65 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 66 ret <16 x i8> %s 67} 68 69define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) { 70; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz: 71; BTVER1: # BB#0: 72; BTVER1-NEXT: movaps %xmm0, %xmm1 73; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 74; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 75; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 76; BTVER1-NEXT: retq 77; 78; BTVER2-LABEL: shuf_2zzzzzzz3zzzzzzz: 79; BTVER2: # BB#0: 80; BTVER2-NEXT: vpsrld $16, %xmm0, %xmm0 81; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 82; BTVER2-NEXT: retq 83 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 84 ret <16 x i8> %s 85} 86 87define <16 x i8> @shuf_01zzuuuuuuuuuuuu(<16 x i8> %a0) { 88; BTVER1-LABEL: shuf_01zzuuuuuuuuuuuu: 89; BTVER1: # BB#0: 90; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 91; BTVER1-NEXT: retq 92; 93; BTVER2-LABEL: shuf_01zzuuuuuuuuuuuu: 94; BTVER2: # BB#0: 95; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 96; BTVER2-NEXT: retq 97 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 98 ret <16 x i8> %s 99} 100 101define <16 x i8> @shuf_01zzzzzz23zzzzzz(<16 x i8> %a0) { 102; BTVER1-LABEL: shuf_01zzzzzz23zzzzzz: 103; BTVER1: # BB#0: 104; BTVER1-NEXT: movaps %xmm0, %xmm1 105; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 106; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 107; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 108; BTVER1-NEXT: retq 109; 110; BTVER2-LABEL: shuf_01zzzzzz23zzzzzz: 111; BTVER2: # BB#0: 112; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 113; BTVER2-NEXT: retq 114 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 115 ret <16 x i8> %s 116} 117 118define <16 x i8> @shuf_1zzzuuuuuuuuuuuu(<16 x i8> %a0) { 119; ALL-LABEL: shuf_1zzzuuuuuuuuuuuu: 120; ALL: # BB#0: 121; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 122; ALL-NEXT: retq 123 %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 124 ret <16 x i8> %s 125} 126 127define <8 x i16> @shuf_1zzzuuuu(<8 x i16> %a0) { 128; ALL-LABEL: shuf_1zzzuuuu: 129; ALL: # BB#0: 130; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 131; ALL-NEXT: retq 132 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 133 ret <8 x i16> %s 134} 135 136define <8 x i16> @shuf_12zzuuuu(<8 x i16> %a0) { 137; ALL-LABEL: shuf_12zzuuuu: 138; ALL: # BB#0: 139; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3,4,5],zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 140; ALL-NEXT: retq 141 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 142 ret <8 x i16> %s 143} 144 145define <8 x i16> @shuf_012zuuuu(<8 x i16> %a0) { 146; ALL-LABEL: shuf_012zuuuu: 147; ALL: # BB#0: 148; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 149; ALL-NEXT: retq 150 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 151 ret <8 x i16> %s 152} 153 154define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) { 155; BTVER1-LABEL: shuf_0zzz1zzz: 156; BTVER1: # BB#0: 157; BTVER1-NEXT: movaps %xmm0, %xmm1 158; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] 159; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 160; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 161; BTVER1-NEXT: retq 162; 163; BTVER2-LABEL: shuf_0zzz1zzz: 164; BTVER2: # BB#0: 165; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 166; BTVER2-NEXT: retq 167 %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8> 168 ret <8 x i16> %s 169} 170 171define <4 x i32> @shuf_0z1z(<4 x i32> %a0) { 172; BTVER1-LABEL: shuf_0z1z: 173; BTVER1: # BB#0: 174; BTVER1-NEXT: pxor %xmm1, %xmm1 175; BTVER1-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 176; BTVER1-NEXT: retq 177; 178; BTVER2-LABEL: shuf_0z1z: 179; BTVER2: # BB#0: 180; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 181; BTVER2-NEXT: retq 182 %s = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 4> 183 ret <4 x i32> %s 184} 185 186; 187; INSERTQI 188; 189 190; A length of zero is equivalent to a bit length of 64. 191define <2 x i64> @insertqi_len0_idx0(<2 x i64> %a, <2 x i64> %b) { 192; ALL-LABEL: insertqi_len0_idx0: 193; ALL: # BB#0: 194; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6,7],xmm0[u,u,u,u,u,u,u,u] 195; ALL-NEXT: retq 196 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 0, i8 0) 197 ret <2 x i64> %1 198} 199 200define <2 x i64> @insertqi_len8_idx16(<2 x i64> %a, <2 x i64> %b) { 201; ALL-LABEL: insertqi_len8_idx16: 202; ALL: # BB#0: 203; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u] 204; ALL-NEXT: retq 205 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 8, i8 16) 206 ret <2 x i64> %1 207} 208 209; If the length + index exceeds the bottom 64 bits the result is undefined 210define <2 x i64> @insertqi_len32_idx48(<2 x i64> %a, <2 x i64> %b) { 211; ALL-LABEL: insertqi_len32_idx48: 212; ALL: # BB#0: 213; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 214; ALL-NEXT: retq 215 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 32, i8 48) 216 ret <2 x i64> %1 217} 218 219define <16 x i8> @shuf_0_0_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) { 220; ALL-LABEL: shuf_0_0_2_3_uuuu_uuuu_uuuu: 221; ALL: # BB#0: 222; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7,u,u,u,u,u,u,u,u] 223; ALL-NEXT: retq 224 %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 0, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 225 ret <16 x i8> %s 226} 227 228define <16 x i8> @shuf_0_16_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) { 229; ALL-LABEL: shuf_0_16_2_3_uuuu_uuuu_uuuu: 230; ALL: # BB#0: 231; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3,4,5,6,7,u,u,u,u,u,u,u,u] 232; ALL-NEXT: retq 233 %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 234 ret <16 x i8> %s 235} 236 237define <16 x i8> @shuf_16_1_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) { 238; ALL-LABEL: shuf_16_1_2_3_uuuu_uuuu_uuuu: 239; ALL: # BB#0: 240; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7,u,u,u,u,u,u,u,u] 241; ALL-NEXT: retq 242 %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 243 ret <16 x i8> %s 244} 245 246define <8 x i16> @shuf_0823uuuu(<8 x i16> %a0, <8 x i16> %a1) { 247; ALL-LABEL: shuf_0823uuuu: 248; ALL: # BB#0: 249; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1],xmm0[4,5,6,7,u,u,u,u,u,u,u,u] 250; ALL-NEXT: retq 251 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 252 ret <8 x i16> %s 253} 254 255define <8 x i16> @shuf_0183uuuu(<8 x i16> %a0, <8 x i16> %a1) { 256; ALL-LABEL: shuf_0183uuuu: 257; ALL: # BB#0: 258; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[0,1],xmm0[6,7,u,u,u,u,u,u,u,u] 259; ALL-NEXT: retq 260 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 261 ret <8 x i16> %s 262} 263 264define <8 x i16> @shuf_0128uuuu(<8 x i16> %a0, <8 x i16> %a1) { 265; ALL-LABEL: shuf_0128uuuu: 266; ALL: # BB#0: 267; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[0,1],xmm0[u,u,u,u,u,u,u,u] 268; ALL-NEXT: retq 269 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 270 ret <8 x i16> %s 271} 272 273define <8 x i16> @shuf_0893uuuu(<8 x i16> %a0, <8 x i16> %a1) { 274; ALL-LABEL: shuf_0893uuuu: 275; ALL: # BB#0: 276; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u] 277; ALL-NEXT: retq 278 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 279 ret <8 x i16> %s 280} 281 282define <8 x i16> @shuf_089Auuuu(<8 x i16> %a0, <8 x i16> %a1) { 283; ALL-LABEL: shuf_089Auuuu: 284; ALL: # BB#0: 285; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3,4,5],xmm0[u,u,u,u,u,u,u,u] 286; ALL-NEXT: retq 287 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 undef, i32 undef, i32 undef, i32 undef> 288 ret <8 x i16> %s 289} 290 291define <8 x i16> @shuf_089uuuuu(<8 x i16> %a0, <8 x i16> %a1) { 292; ALL-LABEL: shuf_089uuuuu: 293; ALL: # BB#0: 294; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u] 295; ALL-NEXT: retq 296 %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 297 ret <8 x i16> %s 298} 299 300; 301; Special Cases 302; 303 304; Out of range. 305define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) { 306; BTVER1-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: 307; BTVER1: # BB#0: 308; BTVER1-NEXT: psrld $16, %xmm1 309; BTVER1-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 310; BTVER1-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 311; BTVER1-NEXT: retq 312; 313; BTVER2-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: 314; BTVER2: # BB#0: 315; BTVER2-NEXT: vpsrld $16, %xmm1, %xmm1 316; BTVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 317; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 318; BTVER2-NEXT: retq 319 %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 320 ret <16 x i8> %1 321} 322 323define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { 324; BTVER1-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 325; BTVER1: # BB#0: 326; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7] 327; BTVER1-NEXT: retq 328; 329; BTVER2-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 330; BTVER2: # BB#0: 331; BTVER2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7] 332; BTVER2-NEXT: retq 333 %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 0, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 334 ret <16 x i8> %1 335} 336 337define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { 338; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 339; BTVER1: # BB#0: 340; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] 341; BTVER1-NEXT: retq 342; 343; BTVER2-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 344; BTVER2: # BB#0: 345; BTVER2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] 346; BTVER2-NEXT: retq 347 %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 16, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 348 ret <16 x i8> %1 349} 350 351define <16 x i8> @shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { 352; ALL-LABEL: shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 353; ALL: # BB#0: 354; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3,4],zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 355; ALL-NEXT: retq 356 %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 undef, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 357 ret <16 x i8> %1 358} 359 360declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind 361declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind 362