1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7 8; AVX128 tests: 9 10define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { 11; SSE2-LABEL: vsel_float: 12; SSE2: # BB#0: # %entry 13; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 14; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 15; SSE2-NEXT: retq 16; 17; SSSE3-LABEL: vsel_float: 18; SSSE3: # BB#0: # %entry 19; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 20; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 21; SSSE3-NEXT: retq 22; 23; SSE41-LABEL: vsel_float: 24; SSE41: # BB#0: # %entry 25; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 26; SSE41-NEXT: retq 27; 28; AVX-LABEL: vsel_float: 29; AVX: # BB#0: # %entry 30; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 31; AVX-NEXT: retq 32entry: 33 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2 34 ret <4 x float> %vsel 35} 36 37define <4 x float> @vsel_float2(<4 x float> %v1, <4 x float> %v2) { 38; SSE2-LABEL: vsel_float2: 39; SSE2: # BB#0: # %entry 40; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 41; SSE2-NEXT: movaps %xmm1, %xmm0 42; SSE2-NEXT: retq 43; 44; SSSE3-LABEL: vsel_float2: 45; SSSE3: # BB#0: # %entry 46; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 47; SSSE3-NEXT: movaps %xmm1, %xmm0 48; SSSE3-NEXT: retq 49; 50; SSE41-LABEL: vsel_float2: 51; SSE41: # BB#0: # %entry 52; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 53; SSE41-NEXT: retq 54; 55; AVX-LABEL: vsel_float2: 56; AVX: # BB#0: # %entry 57; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 58; AVX-NEXT: retq 59entry: 60 %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2 61 ret <4 x float> %vsel 62} 63 64define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { 65; SSE2-LABEL: vsel_4xi8: 66; SSE2: # BB#0: # %entry 67; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0] 68; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 69; SSE2-NEXT: retq 70; 71; SSSE3-LABEL: vsel_4xi8: 72; SSSE3: # BB#0: # %entry 73; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0] 74; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 75; SSSE3-NEXT: retq 76; 77; SSE41-LABEL: vsel_4xi8: 78; SSE41: # BB#0: # %entry 79; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 80; SSE41-NEXT: retq 81; 82; AVX1-LABEL: vsel_4xi8: 83; AVX1: # BB#0: # %entry 84; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 85; AVX1-NEXT: retq 86; 87; AVX2-LABEL: vsel_4xi8: 88; AVX2: # BB#0: # %entry 89; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 90; AVX2-NEXT: retq 91entry: 92 %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2 93 ret <4 x i8> %vsel 94} 95 96define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) { 97; SSE2-LABEL: vsel_4xi16: 98; SSE2: # BB#0: # %entry 99; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 100; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 101; SSE2-NEXT: movaps %xmm1, %xmm0 102; SSE2-NEXT: retq 103; 104; SSSE3-LABEL: vsel_4xi16: 105; SSSE3: # BB#0: # %entry 106; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 107; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 108; SSSE3-NEXT: movaps %xmm1, %xmm0 109; SSSE3-NEXT: retq 110; 111; SSE41-LABEL: vsel_4xi16: 112; SSE41: # BB#0: # %entry 113; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 114; SSE41-NEXT: retq 115; 116; AVX1-LABEL: vsel_4xi16: 117; AVX1: # BB#0: # %entry 118; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 119; AVX1-NEXT: retq 120; 121; AVX2-LABEL: vsel_4xi16: 122; AVX2: # BB#0: # %entry 123; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 124; AVX2-NEXT: retq 125entry: 126 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2 127 ret <4 x i16> %vsel 128} 129 130define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { 131; SSE2-LABEL: vsel_i32: 132; SSE2: # BB#0: # %entry 133; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 134; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 135; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 136; SSE2-NEXT: retq 137; 138; SSSE3-LABEL: vsel_i32: 139; SSSE3: # BB#0: # %entry 140; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 141; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 142; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 143; SSSE3-NEXT: retq 144; 145; SSE41-LABEL: vsel_i32: 146; SSE41: # BB#0: # %entry 147; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 148; SSE41-NEXT: retq 149; 150; AVX1-LABEL: vsel_i32: 151; AVX1: # BB#0: # %entry 152; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 153; AVX1-NEXT: retq 154; 155; AVX2-LABEL: vsel_i32: 156; AVX2: # BB#0: # %entry 157; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 158; AVX2-NEXT: retq 159entry: 160 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2 161 ret <4 x i32> %vsel 162} 163 164define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) { 165; SSE2-LABEL: vsel_double: 166; SSE2: # BB#0: # %entry 167; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 168; SSE2-NEXT: movapd %xmm1, %xmm0 169; SSE2-NEXT: retq 170; 171; SSSE3-LABEL: vsel_double: 172; SSSE3: # BB#0: # %entry 173; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 174; SSSE3-NEXT: movapd %xmm1, %xmm0 175; SSSE3-NEXT: retq 176; 177; SSE41-LABEL: vsel_double: 178; SSE41: # BB#0: # %entry 179; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 180; SSE41-NEXT: retq 181; 182; AVX-LABEL: vsel_double: 183; AVX: # BB#0: # %entry 184; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 185; AVX-NEXT: retq 186entry: 187 %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2 188 ret <2 x double> %vsel 189} 190 191define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) { 192; SSE2-LABEL: vsel_i64: 193; SSE2: # BB#0: # %entry 194; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 195; SSE2-NEXT: movapd %xmm1, %xmm0 196; SSE2-NEXT: retq 197; 198; SSSE3-LABEL: vsel_i64: 199; SSSE3: # BB#0: # %entry 200; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 201; SSSE3-NEXT: movapd %xmm1, %xmm0 202; SSSE3-NEXT: retq 203; 204; SSE41-LABEL: vsel_i64: 205; SSE41: # BB#0: # %entry 206; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 207; SSE41-NEXT: retq 208; 209; AVX1-LABEL: vsel_i64: 210; AVX1: # BB#0: # %entry 211; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 212; AVX1-NEXT: retq 213; 214; AVX2-LABEL: vsel_i64: 215; AVX2: # BB#0: # %entry 216; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 217; AVX2-NEXT: retq 218entry: 219 %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2 220 ret <2 x i64> %vsel 221} 222 223define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) { 224; SSE2-LABEL: vsel_8xi16: 225; SSE2: # BB#0: # %entry 226; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535] 227; SSE2-NEXT: andps %xmm2, %xmm1 228; SSE2-NEXT: andnps %xmm0, %xmm2 229; SSE2-NEXT: orps %xmm1, %xmm2 230; SSE2-NEXT: movaps %xmm2, %xmm0 231; SSE2-NEXT: retq 232; 233; SSSE3-LABEL: vsel_8xi16: 234; SSSE3: # BB#0: # %entry 235; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535] 236; SSSE3-NEXT: andps %xmm2, %xmm1 237; SSSE3-NEXT: andnps %xmm0, %xmm2 238; SSSE3-NEXT: orps %xmm1, %xmm2 239; SSSE3-NEXT: movaps %xmm2, %xmm0 240; SSSE3-NEXT: retq 241; 242; SSE41-LABEL: vsel_8xi16: 243; SSE41: # BB#0: # %entry 244; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7] 245; SSE41-NEXT: retq 246; 247; AVX-LABEL: vsel_8xi16: 248; AVX: # BB#0: # %entry 249; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7] 250; AVX-NEXT: retq 251entry: 252 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2 253 ret <8 x i16> %vsel 254} 255 256define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) { 257; SSE2-LABEL: vsel_i8: 258; SSE2: # BB#0: # %entry 259; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] 260; SSE2-NEXT: andps %xmm2, %xmm1 261; SSE2-NEXT: andnps %xmm0, %xmm2 262; SSE2-NEXT: orps %xmm1, %xmm2 263; SSE2-NEXT: movaps %xmm2, %xmm0 264; SSE2-NEXT: retq 265; 266; SSSE3-LABEL: vsel_i8: 267; SSSE3: # BB#0: # %entry 268; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[12],zero,zero,zero 269; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3],zero,xmm1[5,6,7],zero,xmm1[9,10,11],zero,xmm1[13,14,15] 270; SSSE3-NEXT: por %xmm1, %xmm0 271; SSSE3-NEXT: retq 272; 273; SSE41-LABEL: vsel_i8: 274; SSE41: # BB#0: # %entry 275; SSE41-NEXT: movdqa %xmm0, %xmm2 276; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 277; SSE41-NEXT: pblendvb %xmm2, %xmm1 278; SSE41-NEXT: movdqa %xmm1, %xmm0 279; SSE41-NEXT: retq 280; 281; AVX-LABEL: vsel_i8: 282; AVX: # BB#0: # %entry 283; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 284; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 285; AVX-NEXT: retq 286entry: 287 %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2 288 ret <16 x i8> %vsel 289} 290 291 292; AVX256 tests: 293 294define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { 295; SSE2-LABEL: vsel_float8: 296; SSE2: # BB#0: # %entry 297; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 298; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] 299; SSE2-NEXT: movaps %xmm2, %xmm0 300; SSE2-NEXT: movaps %xmm3, %xmm1 301; SSE2-NEXT: retq 302; 303; SSSE3-LABEL: vsel_float8: 304; SSSE3: # BB#0: # %entry 305; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 306; SSSE3-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] 307; SSSE3-NEXT: movaps %xmm2, %xmm0 308; SSSE3-NEXT: movaps %xmm3, %xmm1 309; SSSE3-NEXT: retq 310; 311; SSE41-LABEL: vsel_float8: 312; SSE41: # BB#0: # %entry 313; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] 314; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3] 315; SSE41-NEXT: retq 316; 317; AVX-LABEL: vsel_float8: 318; AVX: # BB#0: # %entry 319; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 320; AVX-NEXT: retq 321entry: 322 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2 323 ret <8 x float> %vsel 324} 325 326define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) { 327; SSE2-LABEL: vsel_i328: 328; SSE2: # BB#0: # %entry 329; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 330; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] 331; SSE2-NEXT: movaps %xmm2, %xmm0 332; SSE2-NEXT: movaps %xmm3, %xmm1 333; SSE2-NEXT: retq 334; 335; SSSE3-LABEL: vsel_i328: 336; SSSE3: # BB#0: # %entry 337; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 338; SSSE3-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] 339; SSSE3-NEXT: movaps %xmm2, %xmm0 340; SSSE3-NEXT: movaps %xmm3, %xmm1 341; SSSE3-NEXT: retq 342; 343; SSE41-LABEL: vsel_i328: 344; SSE41: # BB#0: # %entry 345; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7] 346; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5,6,7] 347; SSE41-NEXT: retq 348; 349; AVX1-LABEL: vsel_i328: 350; AVX1: # BB#0: # %entry 351; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 352; AVX1-NEXT: retq 353; 354; AVX2-LABEL: vsel_i328: 355; AVX2: # BB#0: # %entry 356; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 357; AVX2-NEXT: retq 358entry: 359 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2 360 ret <8 x i32> %vsel 361} 362 363define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) { 364; SSE2-LABEL: vsel_double8: 365; SSE2: # BB#0: # %entry 366; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] 367; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] 368; SSE2-NEXT: movapd %xmm4, %xmm0 369; SSE2-NEXT: movaps %xmm5, %xmm1 370; SSE2-NEXT: movapd %xmm6, %xmm2 371; SSE2-NEXT: movaps %xmm7, %xmm3 372; SSE2-NEXT: retq 373; 374; SSSE3-LABEL: vsel_double8: 375; SSSE3: # BB#0: # %entry 376; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] 377; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] 378; SSSE3-NEXT: movapd %xmm4, %xmm0 379; SSSE3-NEXT: movaps %xmm5, %xmm1 380; SSSE3-NEXT: movapd %xmm6, %xmm2 381; SSSE3-NEXT: movaps %xmm7, %xmm3 382; SSSE3-NEXT: retq 383; 384; SSE41-LABEL: vsel_double8: 385; SSE41: # BB#0: # %entry 386; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm4[1] 387; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm6[1] 388; SSE41-NEXT: movaps %xmm5, %xmm1 389; SSE41-NEXT: movaps %xmm7, %xmm3 390; SSE41-NEXT: retq 391; 392; AVX-LABEL: vsel_double8: 393; AVX: # BB#0: # %entry 394; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3] 395; AVX-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm3[1,2,3] 396; AVX-NEXT: retq 397entry: 398 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2 399 ret <8 x double> %vsel 400} 401 402define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) { 403; SSE2-LABEL: vsel_i648: 404; SSE2: # BB#0: # %entry 405; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] 406; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] 407; SSE2-NEXT: movapd %xmm4, %xmm0 408; SSE2-NEXT: movaps %xmm5, %xmm1 409; SSE2-NEXT: movapd %xmm6, %xmm2 410; SSE2-NEXT: movaps %xmm7, %xmm3 411; SSE2-NEXT: retq 412; 413; SSSE3-LABEL: vsel_i648: 414; SSSE3: # BB#0: # %entry 415; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1] 416; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1] 417; SSSE3-NEXT: movapd %xmm4, %xmm0 418; SSSE3-NEXT: movaps %xmm5, %xmm1 419; SSSE3-NEXT: movapd %xmm6, %xmm2 420; SSSE3-NEXT: movaps %xmm7, %xmm3 421; SSSE3-NEXT: retq 422; 423; SSE41-LABEL: vsel_i648: 424; SSE41: # BB#0: # %entry 425; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7] 426; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] 427; SSE41-NEXT: movaps %xmm5, %xmm1 428; SSE41-NEXT: movaps %xmm7, %xmm3 429; SSE41-NEXT: retq 430; 431; AVX1-LABEL: vsel_i648: 432; AVX1: # BB#0: # %entry 433; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3] 434; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm3[1,2,3] 435; AVX1-NEXT: retq 436; 437; AVX2-LABEL: vsel_i648: 438; AVX2: # BB#0: # %entry 439; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7] 440; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7] 441; AVX2-NEXT: retq 442entry: 443 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2 444 ret <8 x i64> %vsel 445} 446 447define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) { 448; SSE2-LABEL: vsel_double4: 449; SSE2: # BB#0: # %entry 450; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 451; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] 452; SSE2-NEXT: movapd %xmm2, %xmm0 453; SSE2-NEXT: movapd %xmm3, %xmm1 454; SSE2-NEXT: retq 455; 456; SSSE3-LABEL: vsel_double4: 457; SSSE3: # BB#0: # %entry 458; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 459; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] 460; SSSE3-NEXT: movapd %xmm2, %xmm0 461; SSSE3-NEXT: movapd %xmm3, %xmm1 462; SSSE3-NEXT: retq 463; 464; SSE41-LABEL: vsel_double4: 465; SSE41: # BB#0: # %entry 466; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] 467; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1] 468; SSE41-NEXT: retq 469; 470; AVX-LABEL: vsel_double4: 471; AVX: # BB#0: # %entry 472; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] 473; AVX-NEXT: retq 474entry: 475 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2 476 ret <4 x double> %vsel 477} 478 479define <2 x double> @testa(<2 x double> %x, <2 x double> %y) { 480; SSE2-LABEL: testa: 481; SSE2: # BB#0: # %entry 482; SSE2-NEXT: movapd %xmm1, %xmm2 483; SSE2-NEXT: cmplepd %xmm0, %xmm2 484; SSE2-NEXT: andpd %xmm2, %xmm0 485; SSE2-NEXT: andnpd %xmm1, %xmm2 486; SSE2-NEXT: orpd %xmm2, %xmm0 487; SSE2-NEXT: retq 488; 489; SSSE3-LABEL: testa: 490; SSSE3: # BB#0: # %entry 491; SSSE3-NEXT: movapd %xmm1, %xmm2 492; SSSE3-NEXT: cmplepd %xmm0, %xmm2 493; SSSE3-NEXT: andpd %xmm2, %xmm0 494; SSSE3-NEXT: andnpd %xmm1, %xmm2 495; SSSE3-NEXT: orpd %xmm2, %xmm0 496; SSSE3-NEXT: retq 497; 498; SSE41-LABEL: testa: 499; SSE41: # BB#0: # %entry 500; SSE41-NEXT: movapd %xmm0, %xmm2 501; SSE41-NEXT: movapd %xmm1, %xmm0 502; SSE41-NEXT: cmplepd %xmm2, %xmm0 503; SSE41-NEXT: blendvpd %xmm2, %xmm1 504; SSE41-NEXT: movapd %xmm1, %xmm0 505; SSE41-NEXT: retq 506; 507; AVX-LABEL: testa: 508; AVX: # BB#0: # %entry 509; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm2 510; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 511; AVX-NEXT: retq 512entry: 513 %max_is_x = fcmp oge <2 x double> %x, %y 514 %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y 515 ret <2 x double> %max 516} 517 518define <2 x double> @testb(<2 x double> %x, <2 x double> %y) { 519; SSE2-LABEL: testb: 520; SSE2: # BB#0: # %entry 521; SSE2-NEXT: movapd %xmm1, %xmm2 522; SSE2-NEXT: cmpnlepd %xmm0, %xmm2 523; SSE2-NEXT: andpd %xmm2, %xmm0 524; SSE2-NEXT: andnpd %xmm1, %xmm2 525; SSE2-NEXT: orpd %xmm2, %xmm0 526; SSE2-NEXT: retq 527; 528; SSSE3-LABEL: testb: 529; SSSE3: # BB#0: # %entry 530; SSSE3-NEXT: movapd %xmm1, %xmm2 531; SSSE3-NEXT: cmpnlepd %xmm0, %xmm2 532; SSSE3-NEXT: andpd %xmm2, %xmm0 533; SSSE3-NEXT: andnpd %xmm1, %xmm2 534; SSSE3-NEXT: orpd %xmm2, %xmm0 535; SSSE3-NEXT: retq 536; 537; SSE41-LABEL: testb: 538; SSE41: # BB#0: # %entry 539; SSE41-NEXT: movapd %xmm0, %xmm2 540; SSE41-NEXT: movapd %xmm1, %xmm0 541; SSE41-NEXT: cmpnlepd %xmm2, %xmm0 542; SSE41-NEXT: blendvpd %xmm2, %xmm1 543; SSE41-NEXT: movapd %xmm1, %xmm0 544; SSE41-NEXT: retq 545; 546; AVX-LABEL: testb: 547; AVX: # BB#0: # %entry 548; AVX-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm2 549; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 550; AVX-NEXT: retq 551entry: 552 %min_is_x = fcmp ult <2 x double> %x, %y 553 %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y 554 ret <2 x double> %min 555} 556 557; If we can figure out a blend has a constant mask, we should emit the 558; blend instruction with an immediate mask 559define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { 560; SSE2-LABEL: constant_blendvpd_avx: 561; SSE2: # BB#0: # %entry 562; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] 563; SSE2-NEXT: movaps %xmm2, %xmm0 564; SSE2-NEXT: movapd %xmm3, %xmm1 565; SSE2-NEXT: retq 566; 567; SSSE3-LABEL: constant_blendvpd_avx: 568; SSSE3: # BB#0: # %entry 569; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1] 570; SSSE3-NEXT: movaps %xmm2, %xmm0 571; SSSE3-NEXT: movapd %xmm3, %xmm1 572; SSSE3-NEXT: retq 573; 574; SSE41-LABEL: constant_blendvpd_avx: 575; SSE41: # BB#0: # %entry 576; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1] 577; SSE41-NEXT: movaps %xmm2, %xmm0 578; SSE41-NEXT: retq 579; 580; AVX-LABEL: constant_blendvpd_avx: 581; AVX: # BB#0: # %entry 582; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3] 583; AVX-NEXT: retq 584entry: 585 %select = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab 586 ret <4 x double> %select 587} 588 589define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) { 590; SSE2-LABEL: constant_blendvps_avx: 591; SSE2: # BB#0: # %entry 592; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0] 593; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0] 594; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0] 595; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0] 596; SSE2-NEXT: movaps %xmm2, %xmm0 597; SSE2-NEXT: movaps %xmm3, %xmm1 598; SSE2-NEXT: retq 599; 600; SSSE3-LABEL: constant_blendvps_avx: 601; SSSE3: # BB#0: # %entry 602; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0] 603; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0] 604; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0] 605; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0] 606; SSSE3-NEXT: movaps %xmm2, %xmm0 607; SSSE3-NEXT: movaps %xmm3, %xmm1 608; SSSE3-NEXT: retq 609; 610; SSE41-LABEL: constant_blendvps_avx: 611; SSE41: # BB#0: # %entry 612; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3] 613; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[3] 614; SSE41-NEXT: retq 615; 616; AVX-LABEL: constant_blendvps_avx: 617; AVX: # BB#0: # %entry 618; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7] 619; AVX-NEXT: retq 620entry: 621 %select = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %xyzw, <8 x float> %abcd 622 ret <8 x float> %select 623} 624 625define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) { 626; SSE2-LABEL: constant_pblendvb_avx2: 627; SSE2: # BB#0: # %entry 628; SSE2-NEXT: movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 629; SSE2-NEXT: movaps %xmm4, %xmm5 630; SSE2-NEXT: andnps %xmm0, %xmm5 631; SSE2-NEXT: andps %xmm4, %xmm2 632; SSE2-NEXT: orps %xmm2, %xmm5 633; SSE2-NEXT: andps %xmm4, %xmm3 634; SSE2-NEXT: andnps %xmm1, %xmm4 635; SSE2-NEXT: orps %xmm3, %xmm4 636; SSE2-NEXT: movaps %xmm5, %xmm0 637; SSE2-NEXT: movaps %xmm4, %xmm1 638; SSE2-NEXT: retq 639; 640; SSSE3-LABEL: constant_pblendvb_avx2: 641; SSSE3: # BB#0: # %entry 642; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [128,128,2,128,4,5,6,128,128,128,10,128,12,13,14,128] 643; SSSE3-NEXT: pshufb %xmm4, %xmm0 644; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [0,1,128,3,128,128,128,7,8,9,128,11,128,128,128,15] 645; SSSE3-NEXT: pshufb %xmm5, %xmm2 646; SSSE3-NEXT: por %xmm2, %xmm0 647; SSSE3-NEXT: pshufb %xmm4, %xmm1 648; SSSE3-NEXT: pshufb %xmm5, %xmm3 649; SSSE3-NEXT: por %xmm3, %xmm1 650; SSSE3-NEXT: retq 651; 652; SSE41-LABEL: constant_pblendvb_avx2: 653; SSE41: # BB#0: # %entry 654; SSE41-NEXT: movdqa %xmm0, %xmm4 655; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0] 656; SSE41-NEXT: pblendvb %xmm4, %xmm2 657; SSE41-NEXT: pblendvb %xmm1, %xmm3 658; SSE41-NEXT: movdqa %xmm2, %xmm0 659; SSE41-NEXT: movdqa %xmm3, %xmm1 660; SSE41-NEXT: retq 661; 662; AVX1-LABEL: constant_pblendvb_avx2: 663; AVX1: # BB#0: # %entry 664; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 665; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0 666; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 667; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 668; AVX1-NEXT: retq 669; 670; AVX2-LABEL: constant_pblendvb_avx2: 671; AVX2: # BB#0: # %entry 672; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0] 673; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 674; AVX2-NEXT: retq 675entry: 676 %select = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %xyzw, <32 x i8> %abcd 677 ret <32 x i8> %select 678} 679 680declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) 681declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) 682 683;; 4 tests for shufflevectors that optimize to blend + immediate 684define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) { 685; SSE2-LABEL: blend_shufflevector_4xfloat: 686; SSE2: # BB#0: # %entry 687; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 688; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 689; SSE2-NEXT: retq 690; 691; SSSE3-LABEL: blend_shufflevector_4xfloat: 692; SSSE3: # BB#0: # %entry 693; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 694; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 695; SSSE3-NEXT: retq 696; 697; SSE41-LABEL: blend_shufflevector_4xfloat: 698; SSE41: # BB#0: # %entry 699; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 700; SSE41-NEXT: retq 701; 702; AVX-LABEL: blend_shufflevector_4xfloat: 703; AVX: # BB#0: # %entry 704; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 705; AVX-NEXT: retq 706entry: 707 %select = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 708 ret <4 x float> %select 709} 710 711define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b) { 712; SSE2-LABEL: blend_shufflevector_8xfloat: 713; SSE2: # BB#0: # %entry 714; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 715; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0] 716; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2] 717; SSE2-NEXT: movaps %xmm2, %xmm0 718; SSE2-NEXT: movaps %xmm3, %xmm1 719; SSE2-NEXT: retq 720; 721; SSSE3-LABEL: blend_shufflevector_8xfloat: 722; SSSE3: # BB#0: # %entry 723; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 724; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0] 725; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2] 726; SSSE3-NEXT: movaps %xmm2, %xmm0 727; SSSE3-NEXT: movaps %xmm3, %xmm1 728; SSSE3-NEXT: retq 729; 730; SSE41-LABEL: blend_shufflevector_8xfloat: 731; SSE41: # BB#0: # %entry 732; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] 733; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm3[0,1],xmm1[2],xmm3[3] 734; SSE41-NEXT: retq 735; 736; AVX-LABEL: blend_shufflevector_8xfloat: 737; AVX: # BB#0: # %entry 738; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5],ymm0[6],ymm1[7] 739; AVX-NEXT: retq 740entry: 741 %select = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 6, i32 15> 742 ret <8 x float> %select 743} 744 745define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) { 746; SSE2-LABEL: blend_shufflevector_4xdouble: 747; SSE2: # BB#0: # %entry 748; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 749; SSE2-NEXT: movapd %xmm2, %xmm0 750; SSE2-NEXT: retq 751; 752; SSSE3-LABEL: blend_shufflevector_4xdouble: 753; SSSE3: # BB#0: # %entry 754; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 755; SSSE3-NEXT: movapd %xmm2, %xmm0 756; SSSE3-NEXT: retq 757; 758; SSE41-LABEL: blend_shufflevector_4xdouble: 759; SSE41: # BB#0: # %entry 760; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] 761; SSE41-NEXT: retq 762; 763; AVX-LABEL: blend_shufflevector_4xdouble: 764; AVX: # BB#0: # %entry 765; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] 766; AVX-NEXT: retq 767entry: 768 %select = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 769 ret <4 x double> %select 770} 771 772define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) { 773; SSE2-LABEL: blend_shufflevector_4xi64: 774; SSE2: # BB#0: # %entry 775; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 776; SSE2-NEXT: movaps %xmm3, %xmm1 777; SSE2-NEXT: retq 778; 779; SSSE3-LABEL: blend_shufflevector_4xi64: 780; SSSE3: # BB#0: # %entry 781; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 782; SSSE3-NEXT: movaps %xmm3, %xmm1 783; SSSE3-NEXT: retq 784; 785; SSE41-LABEL: blend_shufflevector_4xi64: 786; SSE41: # BB#0: # %entry 787; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] 788; SSE41-NEXT: movaps %xmm3, %xmm1 789; SSE41-NEXT: retq 790; 791; AVX1-LABEL: blend_shufflevector_4xi64: 792; AVX1: # BB#0: # %entry 793; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3] 794; AVX1-NEXT: retq 795; 796; AVX2-LABEL: blend_shufflevector_4xi64: 797; AVX2: # BB#0: # %entry 798; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7] 799; AVX2-NEXT: retq 800entry: 801 %select = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7> 802 ret <4 x i64> %select 803} 804 805define <4 x i32> @blend_logic_v4i32(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) { 806; SSE2-LABEL: blend_logic_v4i32: 807; SSE2: # BB#0: # %entry 808; SSE2-NEXT: psrad $31, %xmm0 809; SSE2-NEXT: pand %xmm0, %xmm1 810; SSE2-NEXT: pandn %xmm2, %xmm0 811; SSE2-NEXT: por %xmm1, %xmm0 812; SSE2-NEXT: retq 813; 814; SSSE3-LABEL: blend_logic_v4i32: 815; SSSE3: # BB#0: # %entry 816; SSSE3-NEXT: psrad $31, %xmm0 817; SSSE3-NEXT: pand %xmm0, %xmm1 818; SSSE3-NEXT: pandn %xmm2, %xmm0 819; SSSE3-NEXT: por %xmm1, %xmm0 820; SSSE3-NEXT: retq 821; 822; SSE41-LABEL: blend_logic_v4i32: 823; SSE41: # BB#0: # %entry 824; SSE41-NEXT: psrad $31, %xmm0 825; SSE41-NEXT: pblendvb %xmm1, %xmm2 826; SSE41-NEXT: movdqa %xmm2, %xmm0 827; SSE41-NEXT: retq 828; 829; AVX-LABEL: blend_logic_v4i32: 830; AVX: # BB#0: # %entry 831; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 832; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 833; AVX-NEXT: retq 834entry: 835 %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> 836 %sub = sub nsw <4 x i32> zeroinitializer, %a 837 %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1> 838 %1 = and <4 x i32> %c, %0 839 %2 = and <4 x i32> %a, %b.lobit 840 %cond = or <4 x i32> %1, %2 841 ret <4 x i32> %cond 842} 843 844define <8 x i32> @blend_logic_v8i32(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) { 845; SSE2-LABEL: blend_logic_v8i32: 846; SSE2: # BB#0: # %entry 847; SSE2-NEXT: psrad $31, %xmm0 848; SSE2-NEXT: psrad $31, %xmm1 849; SSE2-NEXT: pand %xmm1, %xmm3 850; SSE2-NEXT: pandn %xmm5, %xmm1 851; SSE2-NEXT: pand %xmm0, %xmm2 852; SSE2-NEXT: pandn %xmm4, %xmm0 853; SSE2-NEXT: por %xmm2, %xmm0 854; SSE2-NEXT: por %xmm3, %xmm1 855; SSE2-NEXT: retq 856; 857; SSSE3-LABEL: blend_logic_v8i32: 858; SSSE3: # BB#0: # %entry 859; SSSE3-NEXT: psrad $31, %xmm0 860; SSSE3-NEXT: psrad $31, %xmm1 861; SSSE3-NEXT: pand %xmm1, %xmm3 862; SSSE3-NEXT: pandn %xmm5, %xmm1 863; SSSE3-NEXT: pand %xmm0, %xmm2 864; SSSE3-NEXT: pandn %xmm4, %xmm0 865; SSSE3-NEXT: por %xmm2, %xmm0 866; SSSE3-NEXT: por %xmm3, %xmm1 867; SSSE3-NEXT: retq 868; 869; SSE41-LABEL: blend_logic_v8i32: 870; SSE41: # BB#0: # %entry 871; SSE41-NEXT: psrad $31, %xmm1 872; SSE41-NEXT: psrad $31, %xmm0 873; SSE41-NEXT: pblendvb %xmm2, %xmm4 874; SSE41-NEXT: movdqa %xmm1, %xmm0 875; SSE41-NEXT: pblendvb %xmm3, %xmm5 876; SSE41-NEXT: movdqa %xmm4, %xmm0 877; SSE41-NEXT: movdqa %xmm5, %xmm1 878; SSE41-NEXT: retq 879; 880; AVX1-LABEL: blend_logic_v8i32: 881; AVX1: # BB#0: # %entry 882; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3 883; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 884; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 885; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 886; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2 887; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 888; AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0 889; AVX1-NEXT: retq 890; 891; AVX2-LABEL: blend_logic_v8i32: 892; AVX2: # BB#0: # %entry 893; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 894; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 895; AVX2-NEXT: retq 896entry: 897 %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 898 %sub = sub nsw <8 x i32> zeroinitializer, %a 899 %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 900 %1 = and <8 x i32> %c, %0 901 %2 = and <8 x i32> %a, %b.lobit 902 %cond = or <8 x i32> %1, %2 903 ret <8 x i32> %cond 904} 905 906define <4 x i32> @blend_neg_logic_v4i32(<4 x i32> %a, <4 x i32> %b) { 907; SSE2-LABEL: blend_neg_logic_v4i32: 908; SSE2: # BB#0: # %entry 909; SSE2-NEXT: psrad $31, %xmm1 910; SSE2-NEXT: pxor %xmm1, %xmm0 911; SSE2-NEXT: psubd %xmm1, %xmm0 912; SSE2-NEXT: retq 913; 914; SSSE3-LABEL: blend_neg_logic_v4i32: 915; SSSE3: # BB#0: # %entry 916; SSSE3-NEXT: psrad $31, %xmm1 917; SSSE3-NEXT: pxor %xmm1, %xmm0 918; SSSE3-NEXT: psubd %xmm1, %xmm0 919; SSSE3-NEXT: retq 920; 921; SSE41-LABEL: blend_neg_logic_v4i32: 922; SSE41: # BB#0: # %entry 923; SSE41-NEXT: psrad $31, %xmm1 924; SSE41-NEXT: pxor %xmm1, %xmm0 925; SSE41-NEXT: psubd %xmm1, %xmm0 926; SSE41-NEXT: retq 927; 928; AVX-LABEL: blend_neg_logic_v4i32: 929; AVX: # BB#0: # %entry 930; AVX-NEXT: vpsrad $31, %xmm1, %xmm1 931; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 932; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 933; AVX-NEXT: retq 934entry: 935 %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> 936 %sub = sub nsw <4 x i32> zeroinitializer, %a 937 %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1> 938 %1 = and <4 x i32> %a, %0 939 %2 = and <4 x i32> %b.lobit, %sub 940 %cond = or <4 x i32> %1, %2 941 ret <4 x i32> %cond 942} 943 944define <8 x i32> @blend_neg_logic_v8i32(<8 x i32> %a, <8 x i32> %b) { 945; SSE2-LABEL: blend_neg_logic_v8i32: 946; SSE2: # BB#0: # %entry 947; SSE2-NEXT: psrad $31, %xmm3 948; SSE2-NEXT: psrad $31, %xmm2 949; SSE2-NEXT: pxor %xmm2, %xmm0 950; SSE2-NEXT: psubd %xmm2, %xmm0 951; SSE2-NEXT: pxor %xmm3, %xmm1 952; SSE2-NEXT: psubd %xmm3, %xmm1 953; SSE2-NEXT: retq 954; 955; SSSE3-LABEL: blend_neg_logic_v8i32: 956; SSSE3: # BB#0: # %entry 957; SSSE3-NEXT: psrad $31, %xmm3 958; SSSE3-NEXT: psrad $31, %xmm2 959; SSSE3-NEXT: pxor %xmm2, %xmm0 960; SSSE3-NEXT: psubd %xmm2, %xmm0 961; SSSE3-NEXT: pxor %xmm3, %xmm1 962; SSSE3-NEXT: psubd %xmm3, %xmm1 963; SSSE3-NEXT: retq 964; 965; SSE41-LABEL: blend_neg_logic_v8i32: 966; SSE41: # BB#0: # %entry 967; SSE41-NEXT: psrad $31, %xmm3 968; SSE41-NEXT: psrad $31, %xmm2 969; SSE41-NEXT: pxor %xmm2, %xmm0 970; SSE41-NEXT: psubd %xmm2, %xmm0 971; SSE41-NEXT: pxor %xmm3, %xmm1 972; SSE41-NEXT: psubd %xmm3, %xmm1 973; SSE41-NEXT: retq 974; 975; AVX1-LABEL: blend_neg_logic_v8i32: 976; AVX1: # BB#0: # %entry 977; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 978; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 979; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 980; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 981; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 982; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 983; AVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2 984; AVX1-NEXT: vpsubd %xmm0, %xmm3, %xmm3 985; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 986; AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0 987; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 988; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 989; AVX1-NEXT: retq 990; 991; AVX2-LABEL: blend_neg_logic_v8i32: 992; AVX2: # BB#0: # %entry 993; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1 994; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 995; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 996; AVX2-NEXT: retq 997entry: 998 %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 999 %sub = sub nsw <8 x i32> zeroinitializer, %a 1000 %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 1001 %1 = and <8 x i32> %a, %0 1002 %2 = and <8 x i32> %b.lobit, %sub 1003 %cond = or <8 x i32> %1, %2 1004 ret <8 x i32> %cond 1005} 1006 1007define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) { 1008; SSE2-LABEL: blend_neg_logic_v4i32_2: 1009; SSE2: # BB#0: # %entry 1010; SSE2-NEXT: psrld $31, %xmm1 1011; SSE2-NEXT: pslld $31, %xmm1 1012; SSE2-NEXT: psrad $31, %xmm1 1013; SSE2-NEXT: pxor %xmm1, %xmm0 1014; SSE2-NEXT: psubd %xmm0, %xmm1 1015; SSE2-NEXT: movdqa %xmm1, %xmm0 1016; SSE2-NEXT: retq 1017; 1018; SSSE3-LABEL: blend_neg_logic_v4i32_2: 1019; SSSE3: # BB#0: # %entry 1020; SSSE3-NEXT: psrld $31, %xmm1 1021; SSSE3-NEXT: pslld $31, %xmm1 1022; SSSE3-NEXT: psrad $31, %xmm1 1023; SSSE3-NEXT: pxor %xmm1, %xmm0 1024; SSSE3-NEXT: psubd %xmm0, %xmm1 1025; SSSE3-NEXT: movdqa %xmm1, %xmm0 1026; SSSE3-NEXT: retq 1027; 1028; SSE41-LABEL: blend_neg_logic_v4i32_2: 1029; SSE41: # BB#0: # %entry 1030; SSE41-NEXT: movdqa %xmm0, %xmm2 1031; SSE41-NEXT: psrld $31, %xmm1 1032; SSE41-NEXT: pslld $31, %xmm1 1033; SSE41-NEXT: pxor %xmm3, %xmm3 1034; SSE41-NEXT: psubd %xmm2, %xmm3 1035; SSE41-NEXT: movdqa %xmm1, %xmm0 1036; SSE41-NEXT: blendvps %xmm2, %xmm3 1037; SSE41-NEXT: movaps %xmm3, %xmm0 1038; SSE41-NEXT: retq 1039; 1040; AVX-LABEL: blend_neg_logic_v4i32_2: 1041; AVX: # BB#0: # %entry 1042; AVX-NEXT: vpsrld $31, %xmm1, %xmm1 1043; AVX-NEXT: vpslld $31, %xmm1, %xmm1 1044; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 1045; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm2 1046; AVX-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 1047; AVX-NEXT: retq 1048entry: 1049 %0 = ashr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> 1050 %1 = trunc <4 x i32> %0 to <4 x i1> 1051 %2 = sub nsw <4 x i32> zeroinitializer, %v 1052 %3 = select <4 x i1> %1, <4 x i32> %v, <4 x i32> %2 1053 ret <4 x i32> %3 1054} 1055