1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=skx | FileCheck %s --check-prefix=SKX64 3; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=knl | FileCheck %s --check-prefix=KNL64 4; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=skx | FileCheck %s --check-prefix=SKX32 5; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=knl | FileCheck %s --check-prefix=KNL32 6 7;expand 128 -> 256 include <4 x float> <2 x double> 8define <8 x float> @expand(<4 x float> %a) { 9; SKX64-LABEL: expand: 10; SKX64: # %bb.0: 11; SKX64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 12; SKX64-NEXT: movb $5, %al 13; SKX64-NEXT: kmovd %eax, %k1 14; SKX64-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} 15; SKX64-NEXT: retq 16; 17; KNL64-LABEL: expand: 18; KNL64: # %bb.0: 19; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 20; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1 21; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4,5,6,7] 22; KNL64-NEXT: retq 23; 24; SKX32-LABEL: expand: 25; SKX32: # %bb.0: 26; SKX32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 27; SKX32-NEXT: movb $5, %al 28; SKX32-NEXT: kmovd %eax, %k1 29; SKX32-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} 30; SKX32-NEXT: retl 31; 32; KNL32-LABEL: expand: 33; KNL32: # %bb.0: 34; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 35; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1 36; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4,5,6,7] 37; KNL32-NEXT: retl 38 %res = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 5, i32 1, i32 5, i32 5, i32 5, i32 5, i32 5> 39 ret <8 x float> %res 40} 41 42define <8 x float> @expand1(<4 x float> %a ) { 43; SKX64-LABEL: expand1: 44; SKX64: # %bb.0: 45; SKX64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 46; SKX64-NEXT: movb $-86, %al 47; SKX64-NEXT: kmovd %eax, %k1 48; SKX64-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} 49; SKX64-NEXT: retq 50; 51; KNL64-LABEL: expand1: 52; KNL64: # %bb.0: 53; KNL64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 54; KNL64-NEXT: vmovaps {{.*#+}} ymm1 = <u,0,u,1,u,2,u,3> 55; KNL64-NEXT: vpermps %ymm0, %ymm1, %ymm0 56; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1 57; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 58; KNL64-NEXT: retq 59; 60; SKX32-LABEL: expand1: 61; SKX32: # %bb.0: 62; SKX32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 63; SKX32-NEXT: movb $-86, %al 64; SKX32-NEXT: kmovd %eax, %k1 65; SKX32-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} 66; SKX32-NEXT: retl 67; 68; KNL32-LABEL: expand1: 69; KNL32: # %bb.0: 70; KNL32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 71; KNL32-NEXT: vmovaps {{.*#+}} ymm1 = <u,0,u,1,u,2,u,3> 72; KNL32-NEXT: vpermps %ymm0, %ymm1, %ymm0 73; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1 74; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 75; KNL32-NEXT: retl 76 %res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 77 ret <8 x float> %res 78} 79 80;Expand 128 -> 256 test <2 x double> -> <4 x double> 81define <4 x double> @expand2(<2 x double> %a) { 82; SKX64-LABEL: expand2: 83; SKX64: # %bb.0: 84; SKX64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 85; SKX64-NEXT: movb $9, %al 86; SKX64-NEXT: kmovd %eax, %k1 87; SKX64-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} 88; SKX64-NEXT: retq 89; 90; KNL64-LABEL: expand2: 91; KNL64: # %bb.0: 92; KNL64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 93; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1] 94; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1 95; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 96; KNL64-NEXT: retq 97; 98; SKX32-LABEL: expand2: 99; SKX32: # %bb.0: 100; SKX32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 101; SKX32-NEXT: movb $9, %al 102; SKX32-NEXT: kmovd %eax, %k1 103; SKX32-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} 104; SKX32-NEXT: retl 105; 106; KNL32-LABEL: expand2: 107; KNL32: # %bb.0: 108; KNL32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 109; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1] 110; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1 111; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 112; KNL32-NEXT: retl 113 %res = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 2, i32 1> 114 ret <4 x double> %res 115} 116 117;expand 128 -> 256 include case <4 x i32> <8 x i32> 118define <8 x i32> @expand3(<4 x i32> %a ) { 119; SKX64-LABEL: expand3: 120; SKX64: # %bb.0: 121; SKX64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 122; SKX64-NEXT: movb $-127, %al 123; SKX64-NEXT: kmovd %eax, %k1 124; SKX64-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} 125; SKX64-NEXT: retq 126; 127; KNL64-LABEL: expand3: 128; KNL64: # %bb.0: 129; KNL64-NEXT: vbroadcastsd %xmm0, %ymm0 130; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1 131; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6],ymm0[7] 132; KNL64-NEXT: retq 133; 134; SKX32-LABEL: expand3: 135; SKX32: # %bb.0: 136; SKX32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 137; SKX32-NEXT: movb $-127, %al 138; SKX32-NEXT: kmovd %eax, %k1 139; SKX32-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} 140; SKX32-NEXT: retl 141; 142; KNL32-LABEL: expand3: 143; KNL32: # %bb.0: 144; KNL32-NEXT: vbroadcastsd %xmm0, %ymm0 145; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1 146; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6],ymm0[7] 147; KNL32-NEXT: retl 148 %res = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <8 x i32> <i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,i32 5> 149 ret <8 x i32> %res 150} 151 152;expand 128 -> 256 include case <2 x i64> <4 x i64> 153define <4 x i64> @expand4(<2 x i64> %a ) { 154; SKX64-LABEL: expand4: 155; SKX64: # %bb.0: 156; SKX64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 157; SKX64-NEXT: movb $9, %al 158; SKX64-NEXT: kmovd %eax, %k1 159; SKX64-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} 160; SKX64-NEXT: retq 161; 162; KNL64-LABEL: expand4: 163; KNL64: # %bb.0: 164; KNL64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 165; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1] 166; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1 167; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 168; KNL64-NEXT: retq 169; 170; SKX32-LABEL: expand4: 171; SKX32: # %bb.0: 172; SKX32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 173; SKX32-NEXT: movb $9, %al 174; SKX32-NEXT: kmovd %eax, %k1 175; SKX32-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} 176; SKX32-NEXT: retl 177; 178; KNL32-LABEL: expand4: 179; KNL32: # %bb.0: 180; KNL32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 181; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1] 182; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1 183; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 184; KNL32-NEXT: retl 185 %res = shufflevector <2 x i64> zeroinitializer, <2 x i64> %a, <4 x i32> <i32 2, i32 0, i32 0, i32 3> 186 ret <4 x i64> %res 187} 188 189;Negative test for 128-> 256 190define <8 x float> @expand5(<4 x float> %a ) { 191; SKX64-LABEL: expand5: 192; SKX64: # %bb.0: 193; SKX64-NEXT: vbroadcastss %xmm0, %ymm0 194; SKX64-NEXT: vxorps %xmm1, %xmm1, %xmm1 195; SKX64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 196; SKX64-NEXT: retq 197; 198; KNL64-LABEL: expand5: 199; KNL64: # %bb.0: 200; KNL64-NEXT: vbroadcastss %xmm0, %ymm0 201; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1 202; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 203; KNL64-NEXT: retq 204; 205; SKX32-LABEL: expand5: 206; SKX32: # %bb.0: 207; SKX32-NEXT: vbroadcastss %xmm0, %ymm0 208; SKX32-NEXT: vxorps %xmm1, %xmm1, %xmm1 209; SKX32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 210; SKX32-NEXT: retl 211; 212; KNL32-LABEL: expand5: 213; KNL32: # %bb.0: 214; KNL32-NEXT: vbroadcastss %xmm0, %ymm0 215; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1 216; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 217; KNL32-NEXT: retl 218 %res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4> 219 ret <8 x float> %res 220} 221 222;expand 256 -> 512 include <8 x float> <16 x float> 223define <8 x float> @expand6(<4 x float> %a ) { 224; SKX64-LABEL: expand6: 225; SKX64: # %bb.0: 226; SKX64-NEXT: vxorps %xmm1, %xmm1, %xmm1 227; SKX64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 228; SKX64-NEXT: retq 229; 230; KNL64-LABEL: expand6: 231; KNL64: # %bb.0: 232; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1 233; KNL64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 234; KNL64-NEXT: retq 235; 236; SKX32-LABEL: expand6: 237; SKX32: # %bb.0: 238; SKX32-NEXT: vxorps %xmm1, %xmm1, %xmm1 239; SKX32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 240; SKX32-NEXT: retl 241; 242; KNL32-LABEL: expand6: 243; KNL32: # %bb.0: 244; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1 245; KNL32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 246; KNL32-NEXT: retl 247 %res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 248 ret <8 x float> %res 249} 250 251define <16 x float> @expand7(<8 x float> %a) { 252; SKX64-LABEL: expand7: 253; SKX64: # %bb.0: 254; SKX64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 255; SKX64-NEXT: movw $1285, %ax # imm = 0x505 256; SKX64-NEXT: kmovd %eax, %k1 257; SKX64-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} 258; SKX64-NEXT: retq 259; 260; KNL64-LABEL: expand7: 261; KNL64: # %bb.0: 262; KNL64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 263; KNL64-NEXT: movw $1285, %ax # imm = 0x505 264; KNL64-NEXT: kmovw %eax, %k1 265; KNL64-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} 266; KNL64-NEXT: retq 267; 268; SKX32-LABEL: expand7: 269; SKX32: # %bb.0: 270; SKX32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 271; SKX32-NEXT: movw $1285, %ax # imm = 0x505 272; SKX32-NEXT: kmovd %eax, %k1 273; SKX32-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} 274; SKX32-NEXT: retl 275; 276; KNL32-LABEL: expand7: 277; KNL32: # %bb.0: 278; KNL32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 279; KNL32-NEXT: movw $1285, %ax # imm = 0x505 280; KNL32-NEXT: kmovw %eax, %k1 281; KNL32-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} 282; KNL32-NEXT: retl 283 %res = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 8, i32 8, i32 8, i32 8, i32 2, i32 8, i32 3, i32 8, i32 8, i32 8, i32 8, i32 8> 284 ret <16 x float> %res 285} 286 287define <16 x float> @expand8(<8 x float> %a ) { 288; SKX64-LABEL: expand8: 289; SKX64: # %bb.0: 290; SKX64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 291; SKX64-NEXT: movw $-21846, %ax # imm = 0xAAAA 292; SKX64-NEXT: kmovd %eax, %k1 293; SKX64-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} 294; SKX64-NEXT: retq 295; 296; KNL64-LABEL: expand8: 297; KNL64: # %bb.0: 298; KNL64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 299; KNL64-NEXT: movw $-21846, %ax # imm = 0xAAAA 300; KNL64-NEXT: kmovw %eax, %k1 301; KNL64-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} 302; KNL64-NEXT: retq 303; 304; SKX32-LABEL: expand8: 305; SKX32: # %bb.0: 306; SKX32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 307; SKX32-NEXT: movw $-21846, %ax # imm = 0xAAAA 308; SKX32-NEXT: kmovd %eax, %k1 309; SKX32-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} 310; SKX32-NEXT: retl 311; 312; KNL32-LABEL: expand8: 313; KNL32: # %bb.0: 314; KNL32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 315; KNL32-NEXT: movw $-21846, %ax # imm = 0xAAAA 316; KNL32-NEXT: kmovw %eax, %k1 317; KNL32-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} 318; KNL32-NEXT: retl 319 %res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 320 ret <16 x float> %res 321} 322 323;expand 256 -> 512 include <4 x double> <8 x double> 324define <8 x double> @expand9(<4 x double> %a) { 325; SKX64-LABEL: expand9: 326; SKX64: # %bb.0: 327; SKX64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 328; SKX64-NEXT: movb $-127, %al 329; SKX64-NEXT: kmovd %eax, %k1 330; SKX64-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} 331; SKX64-NEXT: retq 332; 333; KNL64-LABEL: expand9: 334; KNL64: # %bb.0: 335; KNL64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 336; KNL64-NEXT: movb $-127, %al 337; KNL64-NEXT: kmovw %eax, %k1 338; KNL64-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} 339; KNL64-NEXT: retq 340; 341; SKX32-LABEL: expand9: 342; SKX32: # %bb.0: 343; SKX32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 344; SKX32-NEXT: movb $-127, %al 345; SKX32-NEXT: kmovd %eax, %k1 346; SKX32-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} 347; SKX32-NEXT: retl 348; 349; KNL32-LABEL: expand9: 350; KNL32: # %bb.0: 351; KNL32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 352; KNL32-NEXT: movb $-127, %al 353; KNL32-NEXT: kmovw %eax, %k1 354; KNL32-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} 355; KNL32-NEXT: retl 356 %res = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 1> 357 ret <8 x double> %res 358} 359 360define <16 x i32> @expand10(<8 x i32> %a ) { 361; SKX64-LABEL: expand10: 362; SKX64: # %bb.0: 363; SKX64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 364; SKX64-NEXT: movw $-21846, %ax # imm = 0xAAAA 365; SKX64-NEXT: kmovd %eax, %k1 366; SKX64-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} 367; SKX64-NEXT: retq 368; 369; KNL64-LABEL: expand10: 370; KNL64: # %bb.0: 371; KNL64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 372; KNL64-NEXT: movw $-21846, %ax # imm = 0xAAAA 373; KNL64-NEXT: kmovw %eax, %k1 374; KNL64-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} 375; KNL64-NEXT: retq 376; 377; SKX32-LABEL: expand10: 378; SKX32: # %bb.0: 379; SKX32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 380; SKX32-NEXT: movw $-21846, %ax # imm = 0xAAAA 381; SKX32-NEXT: kmovd %eax, %k1 382; SKX32-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} 383; SKX32-NEXT: retl 384; 385; KNL32-LABEL: expand10: 386; KNL32: # %bb.0: 387; KNL32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 388; KNL32-NEXT: movw $-21846, %ax # imm = 0xAAAA 389; KNL32-NEXT: kmovw %eax, %k1 390; KNL32-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} 391; KNL32-NEXT: retl 392 %res = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 393 ret <16 x i32> %res 394} 395 396define <8 x i64> @expand11(<4 x i64> %a) { 397; SKX64-LABEL: expand11: 398; SKX64: # %bb.0: 399; SKX64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 400; SKX64-NEXT: movb $-127, %al 401; SKX64-NEXT: kmovd %eax, %k1 402; SKX64-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} 403; SKX64-NEXT: retq 404; 405; KNL64-LABEL: expand11: 406; KNL64: # %bb.0: 407; KNL64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 408; KNL64-NEXT: movb $-127, %al 409; KNL64-NEXT: kmovw %eax, %k1 410; KNL64-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} 411; KNL64-NEXT: retq 412; 413; SKX32-LABEL: expand11: 414; SKX32: # %bb.0: 415; SKX32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 416; SKX32-NEXT: movb $-127, %al 417; SKX32-NEXT: kmovd %eax, %k1 418; SKX32-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} 419; SKX32-NEXT: retl 420; 421; KNL32-LABEL: expand11: 422; KNL32: # %bb.0: 423; KNL32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 424; KNL32-NEXT: movb $-127, %al 425; KNL32-NEXT: kmovw %eax, %k1 426; KNL32-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} 427; KNL32-NEXT: retl 428 %res = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 1> 429 ret <8 x i64> %res 430} 431 432;Negative test for 256-> 512 433define <16 x float> @expand12(<8 x float> %a) { 434; SKX64-LABEL: expand12: 435; SKX64: # %bb.0: 436; SKX64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 437; SKX64-NEXT: vmovaps {{.*#+}} zmm2 = [0,16,2,16,4,16,6,16,0,16,1,16,2,16,3,16] 438; SKX64-NEXT: vxorps %xmm1, %xmm1, %xmm1 439; SKX64-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 440; SKX64-NEXT: vmovaps %zmm1, %zmm0 441; SKX64-NEXT: retq 442; 443; KNL64-LABEL: expand12: 444; KNL64: # %bb.0: 445; KNL64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 446; KNL64-NEXT: vmovaps {{.*#+}} zmm2 = [0,16,2,16,4,16,6,16,0,16,1,16,2,16,3,16] 447; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1 448; KNL64-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 449; KNL64-NEXT: vmovaps %zmm1, %zmm0 450; KNL64-NEXT: retq 451; 452; SKX32-LABEL: expand12: 453; SKX32: # %bb.0: 454; SKX32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 455; SKX32-NEXT: vmovaps {{.*#+}} zmm2 = [0,16,2,16,4,16,6,16,0,16,1,16,2,16,3,16] 456; SKX32-NEXT: vxorps %xmm1, %xmm1, %xmm1 457; SKX32-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 458; SKX32-NEXT: vmovaps %zmm1, %zmm0 459; SKX32-NEXT: retl 460; 461; KNL32-LABEL: expand12: 462; KNL32: # %bb.0: 463; KNL32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 464; KNL32-NEXT: vmovaps {{.*#+}} zmm2 = [0,16,2,16,4,16,6,16,0,16,1,16,2,16,3,16] 465; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1 466; KNL32-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 467; KNL32-NEXT: vmovaps %zmm1, %zmm0 468; KNL32-NEXT: retl 469 %res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8,i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8> 470 ret <16 x float> %res 471} 472 473define <16 x float> @expand13(<8 x float> %a ) { 474; SKX64-LABEL: expand13: 475; SKX64: # %bb.0: 476; SKX64-NEXT: vxorps %xmm1, %xmm1, %xmm1 477; SKX64-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 478; SKX64-NEXT: retq 479; 480; KNL64-LABEL: expand13: 481; KNL64: # %bb.0: 482; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1 483; KNL64-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 484; KNL64-NEXT: retq 485; 486; SKX32-LABEL: expand13: 487; SKX32: # %bb.0: 488; SKX32-NEXT: vxorps %xmm1, %xmm1, %xmm1 489; SKX32-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 490; SKX32-NEXT: retl 491; 492; KNL32-LABEL: expand13: 493; KNL32: # %bb.0: 494; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1 495; KNL32-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 496; KNL32-NEXT: retl 497 %res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 498 ret <16 x float> %res 499} 500 501; The function checks for a case where the vector is mixed values vector ,and the mask points on zero elements from this vector. 502 503define <8 x float> @expand14(<4 x float> %a) { 504; SKX64-LABEL: expand14: 505; SKX64: # %bb.0: 506; SKX64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 507; SKX64-NEXT: movb $20, %al 508; SKX64-NEXT: kmovd %eax, %k1 509; SKX64-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} 510; SKX64-NEXT: retq 511; 512; KNL64-LABEL: expand14: 513; KNL64: # %bb.0: 514; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 515; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] 516; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1 517; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7] 518; KNL64-NEXT: retq 519; 520; SKX32-LABEL: expand14: 521; SKX32: # %bb.0: 522; SKX32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 523; SKX32-NEXT: movb $20, %al 524; SKX32-NEXT: kmovd %eax, %k1 525; SKX32-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} 526; SKX32-NEXT: retl 527; 528; KNL32-LABEL: expand14: 529; KNL32: # %bb.0: 530; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 531; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] 532; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1 533; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7] 534; KNL32-NEXT: retl 535 %addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0> 536 %res = shufflevector <4 x float> %addV, <4 x float> %a, <8 x i32> <i32 3, i32 3, i32 4, i32 0, i32 5, i32 0, i32 0, i32 0> 537 ret <8 x float> %res 538} 539 540;Negative test. 541define <8 x float> @expand15(<4 x float> %a) { 542; SKX64-LABEL: expand15: 543; SKX64: # %bb.0: 544; SKX64-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0] 545; SKX64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 546; SKX64-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3] 547; SKX64-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0 548; SKX64-NEXT: retq 549; 550; KNL64-LABEL: expand15: 551; KNL64: # %bb.0: 552; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0] 553; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1] 554; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 555; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] 556; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7] 557; KNL64-NEXT: retq 558; 559; SKX32-LABEL: expand15: 560; SKX32: # %bb.0: 561; SKX32-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0] 562; SKX32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 563; SKX32-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3] 564; SKX32-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0 565; SKX32-NEXT: retl 566; 567; KNL32-LABEL: expand15: 568; KNL32: # %bb.0: 569; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0] 570; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1] 571; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 572; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] 573; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7] 574; KNL32-NEXT: retl 575 %addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0> 576 %res = shufflevector <4 x float> %addV, <4 x float> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 0, i32 5, i32 0, i32 0, i32 0> 577 ret <8 x float> %res 578} 579 580 581; Shuffle to blend test 582 583define <64 x i8> @test_mm512_mask_blend_epi8(<64 x i8> %A, <64 x i8> %W){ 584; SKX64-LABEL: test_mm512_mask_blend_epi8: 585; SKX64: # %bb.0: # %entry 586; SKX64-NEXT: movabsq $-6148914691236517206, %rax # imm = 0xAAAAAAAAAAAAAAAA 587; SKX64-NEXT: kmovq %rax, %k1 588; SKX64-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} 589; SKX64-NEXT: retq 590; 591; KNL64-LABEL: test_mm512_mask_blend_epi8: 592; KNL64: # %bb.0: # %entry 593; KNL64-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 594; KNL64-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0 595; KNL64-NEXT: vpblendvb %ymm4, %ymm3, %ymm1, %ymm1 596; KNL64-NEXT: retq 597; 598; SKX32-LABEL: test_mm512_mask_blend_epi8: 599; SKX32: # %bb.0: # %entry 600; SKX32-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA 601; SKX32-NEXT: kmovd %eax, %k0 602; SKX32-NEXT: kunpckdq %k0, %k0, %k1 603; SKX32-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} 604; SKX32-NEXT: retl 605; 606; KNL32-LABEL: test_mm512_mask_blend_epi8: 607; KNL32: # %bb.0: # %entry 608; KNL32-NEXT: pushl %ebp 609; KNL32-NEXT: .cfi_def_cfa_offset 8 610; KNL32-NEXT: .cfi_offset %ebp, -8 611; KNL32-NEXT: movl %esp, %ebp 612; KNL32-NEXT: .cfi_def_cfa_register %ebp 613; KNL32-NEXT: andl $-32, %esp 614; KNL32-NEXT: subl $32, %esp 615; KNL32-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 616; KNL32-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0 617; KNL32-NEXT: vpblendvb %ymm3, 8(%ebp), %ymm1, %ymm1 618; KNL32-NEXT: movl %ebp, %esp 619; KNL32-NEXT: popl %ebp 620; KNL32-NEXT: .cfi_def_cfa %esp, 4 621; KNL32-NEXT: retl 622entry: 623 %0 = shufflevector <64 x i8> %A, <64 x i8> %W, <64 x i32> <i32 64, i32 1, i32 66, i32 3, i32 68, i32 5, i32 70, i32 7, i32 72, i32 9, i32 74, i32 11, i32 76, i32 13, i32 78, i32 15, i32 80, i32 17, i32 82, i32 19, i32 84, i32 21, i32 86, i32 23, i32 88, i32 25, i32 90, i32 27, i32 92, i32 29, i32 94, i32 31, i32 96, i32 33, i32 98, i32 35, i32 100, i32 37, i32 102, i32 39, i32 104, i32 41, i32 106, i32 43, i32 108, i32 45, i32 110, i32 47, i32 112, i32 49, i32 114, i32 51, i32 116, i32 53, i32 118, i32 55, i32 120, i32 57, i32 122, i32 59, i32 124, i32 61, i32 126, i32 63> 624 ret <64 x i8> %0 625} 626 627define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){ 628; SKX64-LABEL: test_mm512_mask_blend_epi16: 629; SKX64: # %bb.0: # %entry 630; SKX64-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA 631; SKX64-NEXT: kmovd %eax, %k1 632; SKX64-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} 633; SKX64-NEXT: retq 634; 635; KNL64-LABEL: test_mm512_mask_blend_epi16: 636; KNL64: # %bb.0: # %entry 637; KNL64-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3],ymm2[4],ymm0[5],ymm2[6],ymm0[7],ymm2[8],ymm0[9],ymm2[10],ymm0[11],ymm2[12],ymm0[13],ymm2[14],ymm0[15] 638; KNL64-NEXT: vpblendw {{.*#+}} ymm1 = ymm3[0],ymm1[1],ymm3[2],ymm1[3],ymm3[4],ymm1[5],ymm3[6],ymm1[7],ymm3[8],ymm1[9],ymm3[10],ymm1[11],ymm3[12],ymm1[13],ymm3[14],ymm1[15] 639; KNL64-NEXT: retq 640; 641; SKX32-LABEL: test_mm512_mask_blend_epi16: 642; SKX32: # %bb.0: # %entry 643; SKX32-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA 644; SKX32-NEXT: kmovd %eax, %k1 645; SKX32-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} 646; SKX32-NEXT: retl 647; 648; KNL32-LABEL: test_mm512_mask_blend_epi16: 649; KNL32: # %bb.0: # %entry 650; KNL32-NEXT: pushl %ebp 651; KNL32-NEXT: .cfi_def_cfa_offset 8 652; KNL32-NEXT: .cfi_offset %ebp, -8 653; KNL32-NEXT: movl %esp, %ebp 654; KNL32-NEXT: .cfi_def_cfa_register %ebp 655; KNL32-NEXT: andl $-32, %esp 656; KNL32-NEXT: subl $32, %esp 657; KNL32-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3],ymm2[4],ymm0[5],ymm2[6],ymm0[7],ymm2[8],ymm0[9],ymm2[10],ymm0[11],ymm2[12],ymm0[13],ymm2[14],ymm0[15] 658; KNL32-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] 659; KNL32-NEXT: movl %ebp, %esp 660; KNL32-NEXT: popl %ebp 661; KNL32-NEXT: .cfi_def_cfa %esp, 4 662; KNL32-NEXT: retl 663entry: 664 %0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31> 665 ret <32 x i16> %0 666} 667 668define <16 x i32> @test_mm512_mask_blend_epi32(<16 x i32> %A, <16 x i32> %W){ 669; SKX64-LABEL: test_mm512_mask_blend_epi32: 670; SKX64: # %bb.0: # %entry 671; SKX64-NEXT: movw $-21846, %ax # imm = 0xAAAA 672; SKX64-NEXT: kmovd %eax, %k1 673; SKX64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 674; SKX64-NEXT: retq 675; 676; KNL64-LABEL: test_mm512_mask_blend_epi32: 677; KNL64: # %bb.0: # %entry 678; KNL64-NEXT: movw $-21846, %ax # imm = 0xAAAA 679; KNL64-NEXT: kmovw %eax, %k1 680; KNL64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 681; KNL64-NEXT: retq 682; 683; SKX32-LABEL: test_mm512_mask_blend_epi32: 684; SKX32: # %bb.0: # %entry 685; SKX32-NEXT: movw $-21846, %ax # imm = 0xAAAA 686; SKX32-NEXT: kmovd %eax, %k1 687; SKX32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 688; SKX32-NEXT: retl 689; 690; KNL32-LABEL: test_mm512_mask_blend_epi32: 691; KNL32: # %bb.0: # %entry 692; KNL32-NEXT: movw $-21846, %ax # imm = 0xAAAA 693; KNL32-NEXT: kmovw %eax, %k1 694; KNL32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 695; KNL32-NEXT: retl 696entry: 697 %0 = shufflevector <16 x i32> %A, <16 x i32> %W, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 698 ret <16 x i32> %0 699} 700 701define <8 x i64> @test_mm512_mask_blend_epi64(<8 x i64> %A, <8 x i64> %W){ 702; SKX64-LABEL: test_mm512_mask_blend_epi64: 703; SKX64: # %bb.0: # %entry 704; SKX64-NEXT: movb $-86, %al 705; SKX64-NEXT: kmovd %eax, %k1 706; SKX64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 707; SKX64-NEXT: retq 708; 709; KNL64-LABEL: test_mm512_mask_blend_epi64: 710; KNL64: # %bb.0: # %entry 711; KNL64-NEXT: movb $-86, %al 712; KNL64-NEXT: kmovw %eax, %k1 713; KNL64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 714; KNL64-NEXT: retq 715; 716; SKX32-LABEL: test_mm512_mask_blend_epi64: 717; SKX32: # %bb.0: # %entry 718; SKX32-NEXT: movb $-86, %al 719; SKX32-NEXT: kmovd %eax, %k1 720; SKX32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 721; SKX32-NEXT: retl 722; 723; KNL32-LABEL: test_mm512_mask_blend_epi64: 724; KNL32: # %bb.0: # %entry 725; KNL32-NEXT: movb $-86, %al 726; KNL32-NEXT: kmovw %eax, %k1 727; KNL32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 728; KNL32-NEXT: retl 729entry: 730 %0 = shufflevector <8 x i64> %A, <8 x i64> %W, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 731 ret <8 x i64> %0 732} 733 734define <16 x float> @test_mm512_mask_blend_ps(<16 x float> %A, <16 x float> %W){ 735; SKX64-LABEL: test_mm512_mask_blend_ps: 736; SKX64: # %bb.0: # %entry 737; SKX64-NEXT: movw $-21846, %ax # imm = 0xAAAA 738; SKX64-NEXT: kmovd %eax, %k1 739; SKX64-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 740; SKX64-NEXT: retq 741; 742; KNL64-LABEL: test_mm512_mask_blend_ps: 743; KNL64: # %bb.0: # %entry 744; KNL64-NEXT: movw $-21846, %ax # imm = 0xAAAA 745; KNL64-NEXT: kmovw %eax, %k1 746; KNL64-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 747; KNL64-NEXT: retq 748; 749; SKX32-LABEL: test_mm512_mask_blend_ps: 750; SKX32: # %bb.0: # %entry 751; SKX32-NEXT: movw $-21846, %ax # imm = 0xAAAA 752; SKX32-NEXT: kmovd %eax, %k1 753; SKX32-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 754; SKX32-NEXT: retl 755; 756; KNL32-LABEL: test_mm512_mask_blend_ps: 757; KNL32: # %bb.0: # %entry 758; KNL32-NEXT: movw $-21846, %ax # imm = 0xAAAA 759; KNL32-NEXT: kmovw %eax, %k1 760; KNL32-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 761; KNL32-NEXT: retl 762entry: 763 %0 = shufflevector <16 x float> %A, <16 x float> %W, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 764 ret <16 x float> %0 765} 766 767define <8 x double> @test_mm512_mask_blend_pd(<8 x double> %A, <8 x double> %W){ 768; SKX64-LABEL: test_mm512_mask_blend_pd: 769; SKX64: # %bb.0: # %entry 770; SKX64-NEXT: movb $-88, %al 771; SKX64-NEXT: kmovd %eax, %k1 772; SKX64-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 773; SKX64-NEXT: retq 774; 775; KNL64-LABEL: test_mm512_mask_blend_pd: 776; KNL64: # %bb.0: # %entry 777; KNL64-NEXT: movb $-88, %al 778; KNL64-NEXT: kmovw %eax, %k1 779; KNL64-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 780; KNL64-NEXT: retq 781; 782; SKX32-LABEL: test_mm512_mask_blend_pd: 783; SKX32: # %bb.0: # %entry 784; SKX32-NEXT: movb $-88, %al 785; SKX32-NEXT: kmovd %eax, %k1 786; SKX32-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 787; SKX32-NEXT: retl 788; 789; KNL32-LABEL: test_mm512_mask_blend_pd: 790; KNL32: # %bb.0: # %entry 791; KNL32-NEXT: movb $-88, %al 792; KNL32-NEXT: kmovw %eax, %k1 793; KNL32-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 794; KNL32-NEXT: retl 795entry: 796 %0 = shufflevector <8 x double> %A, <8 x double> %W, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 797 ret <8 x double> %0 798} 799 800 801define <32 x i8> @test_mm256_mask_blend_epi8(<32 x i8> %A, <32 x i8> %W){ 802; SKX64-LABEL: test_mm256_mask_blend_epi8: 803; SKX64: # %bb.0: # %entry 804; SKX64-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA 805; SKX64-NEXT: kmovd %eax, %k1 806; SKX64-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} 807; SKX64-NEXT: retq 808; 809; KNL64-LABEL: test_mm256_mask_blend_epi8: 810; KNL64: # %bb.0: # %entry 811; KNL64-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 812; KNL64-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 813; KNL64-NEXT: retq 814; 815; SKX32-LABEL: test_mm256_mask_blend_epi8: 816; SKX32: # %bb.0: # %entry 817; SKX32-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA 818; SKX32-NEXT: kmovd %eax, %k1 819; SKX32-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} 820; SKX32-NEXT: retl 821; 822; KNL32-LABEL: test_mm256_mask_blend_epi8: 823; KNL32: # %bb.0: # %entry 824; KNL32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 825; KNL32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 826; KNL32-NEXT: retl 827entry: 828 %0 = shufflevector <32 x i8> %A, <32 x i8> %W, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31> 829 ret <32 x i8> %0 830} 831 832define <16 x i8> @test_mm_mask_blend_epi8(<16 x i8> %A, <16 x i8> %W){ 833; SKX64-LABEL: test_mm_mask_blend_epi8: 834; SKX64: # %bb.0: # %entry 835; SKX64-NEXT: movw $-21846, %ax # imm = 0xAAAA 836; SKX64-NEXT: kmovd %eax, %k1 837; SKX64-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} 838; SKX64-NEXT: retq 839; 840; KNL64-LABEL: test_mm_mask_blend_epi8: 841; KNL64: # %bb.0: # %entry 842; KNL64-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 843; KNL64-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 844; KNL64-NEXT: retq 845; 846; SKX32-LABEL: test_mm_mask_blend_epi8: 847; SKX32: # %bb.0: # %entry 848; SKX32-NEXT: movw $-21846, %ax # imm = 0xAAAA 849; SKX32-NEXT: kmovd %eax, %k1 850; SKX32-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} 851; SKX32-NEXT: retl 852; 853; KNL32-LABEL: test_mm_mask_blend_epi8: 854; KNL32: # %bb.0: # %entry 855; KNL32-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 856; KNL32-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 857; KNL32-NEXT: retl 858entry: 859 %0 = shufflevector <16 x i8> %A, <16 x i8> %W, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 860 ret <16 x i8> %0 861} 862 863; PR34370 864define <8 x float> @test_masked_permps_v8f32(<8 x float>* %vp, <8 x float> %vec2) { 865; SKX64-LABEL: test_masked_permps_v8f32: 866; SKX64: # %bb.0: 867; SKX64-NEXT: vmovaps (%rdi), %ymm2 868; SKX64-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,3,11,7,6,14,15] 869; SKX64-NEXT: vpermi2ps %ymm0, %ymm2, %ymm1 870; SKX64-NEXT: vmovaps %ymm1, %ymm0 871; SKX64-NEXT: retq 872; 873; KNL64-LABEL: test_masked_permps_v8f32: 874; KNL64: # %bb.0: 875; KNL64-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,2,3,7,6,6,7] 876; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,0,2,3] 877; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5],ymm0[6,7] 878; KNL64-NEXT: retq 879; 880; SKX32-LABEL: test_masked_permps_v8f32: 881; SKX32: # %bb.0: 882; SKX32-NEXT: movl {{[0-9]+}}(%esp), %eax 883; SKX32-NEXT: vmovaps (%eax), %ymm2 884; SKX32-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,3,11,7,6,14,15] 885; SKX32-NEXT: vpermi2ps %ymm0, %ymm2, %ymm1 886; SKX32-NEXT: vmovaps %ymm1, %ymm0 887; SKX32-NEXT: retl 888; 889; KNL32-LABEL: test_masked_permps_v8f32: 890; KNL32: # %bb.0: 891; KNL32-NEXT: movl {{[0-9]+}}(%esp), %eax 892; KNL32-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,2,3,7,6,6,7] 893; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,0,2,3] 894; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5],ymm0[6,7] 895; KNL32-NEXT: retl 896 %vec = load <8 x float>, <8 x float>* %vp 897 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 3, i32 0, i32 7, i32 6, i32 3, i32 0> 898 %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0>, <8 x float> %shuf, <8 x float> %vec2 899 ret <8 x float> %res 900} 901 902define <16 x float> @test_masked_permps_v16f32(<16 x float>* %vp, <16 x float> %vec2) { 903; SKX64-LABEL: test_masked_permps_v16f32: 904; SKX64: # %bb.0: 905; SKX64-NEXT: vmovaps (%rdi), %zmm2 906; SKX64-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] 907; SKX64-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 908; SKX64-NEXT: vmovaps %zmm1, %zmm0 909; SKX64-NEXT: retq 910; 911; KNL64-LABEL: test_masked_permps_v16f32: 912; KNL64: # %bb.0: 913; KNL64-NEXT: vmovaps (%rdi), %zmm2 914; KNL64-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] 915; KNL64-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 916; KNL64-NEXT: vmovaps %zmm1, %zmm0 917; KNL64-NEXT: retq 918; 919; SKX32-LABEL: test_masked_permps_v16f32: 920; SKX32: # %bb.0: 921; SKX32-NEXT: movl {{[0-9]+}}(%esp), %eax 922; SKX32-NEXT: vmovaps (%eax), %zmm2 923; SKX32-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] 924; SKX32-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 925; SKX32-NEXT: vmovaps %zmm1, %zmm0 926; SKX32-NEXT: retl 927; 928; KNL32-LABEL: test_masked_permps_v16f32: 929; KNL32: # %bb.0: 930; KNL32-NEXT: movl {{[0-9]+}}(%esp), %eax 931; KNL32-NEXT: vmovaps (%eax), %zmm2 932; KNL32-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] 933; KNL32-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 934; KNL32-NEXT: vmovaps %zmm1, %zmm0 935; KNL32-NEXT: retl 936 %vec = load <16 x float>, <16 x float>* %vp 937 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 13, i32 11, i32 9, i32 14, i32 12, i32 10, i32 8, i32 7, i32 6, i32 3, i32 0, i32 7, i32 6, i32 3, i32 0> 938 %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, <16 x float> %shuf, <16 x float> %vec2 939 ret <16 x float> %res 940} 941