1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512 8 9; 10; vXi64 11; 12 13define i64 @test_v2i64(<2 x i64> %a0) { 14; SSE-LABEL: test_v2i64: 15; SSE: # %bb.0: 16; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 17; SSE-NEXT: por %xmm0, %xmm1 18; SSE-NEXT: movq %xmm1, %rax 19; SSE-NEXT: retq 20; 21; AVX-LABEL: test_v2i64: 22; AVX: # %bb.0: 23; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 24; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 25; AVX-NEXT: vmovq %xmm0, %rax 26; AVX-NEXT: retq 27 %1 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a0) 28 ret i64 %1 29} 30 31define i64 @test_v4i64(<4 x i64> %a0) { 32; SSE-LABEL: test_v4i64: 33; SSE: # %bb.0: 34; SSE-NEXT: por %xmm1, %xmm0 35; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 36; SSE-NEXT: por %xmm0, %xmm1 37; SSE-NEXT: movq %xmm1, %rax 38; SSE-NEXT: retq 39; 40; AVX1-LABEL: test_v4i64: 41; AVX1: # %bb.0: 42; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 43; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 44; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 45; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 46; AVX1-NEXT: vmovq %xmm0, %rax 47; AVX1-NEXT: vzeroupper 48; AVX1-NEXT: retq 49; 50; AVX2-LABEL: test_v4i64: 51; AVX2: # %bb.0: 52; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 53; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 54; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 55; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 56; AVX2-NEXT: vmovq %xmm0, %rax 57; AVX2-NEXT: vzeroupper 58; AVX2-NEXT: retq 59; 60; AVX512-LABEL: test_v4i64: 61; AVX512: # %bb.0: 62; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 63; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 64; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 65; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 66; AVX512-NEXT: vmovq %xmm0, %rax 67; AVX512-NEXT: vzeroupper 68; AVX512-NEXT: retq 69 %1 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %a0) 70 ret i64 %1 71} 72 73define i64 @test_v8i64(<8 x i64> %a0) { 74; SSE-LABEL: test_v8i64: 75; SSE: # %bb.0: 76; SSE-NEXT: por %xmm3, %xmm1 77; SSE-NEXT: por %xmm2, %xmm1 78; SSE-NEXT: por %xmm0, %xmm1 79; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 80; SSE-NEXT: por %xmm1, %xmm0 81; SSE-NEXT: movq %xmm0, %rax 82; SSE-NEXT: retq 83; 84; AVX1-LABEL: test_v8i64: 85; AVX1: # %bb.0: 86; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 87; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 88; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 89; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 90; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 91; AVX1-NEXT: vmovq %xmm0, %rax 92; AVX1-NEXT: vzeroupper 93; AVX1-NEXT: retq 94; 95; AVX2-LABEL: test_v8i64: 96; AVX2: # %bb.0: 97; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 98; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 99; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 100; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 101; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 102; AVX2-NEXT: vmovq %xmm0, %rax 103; AVX2-NEXT: vzeroupper 104; AVX2-NEXT: retq 105; 106; AVX512-LABEL: test_v8i64: 107; AVX512: # %bb.0: 108; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 109; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 110; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 111; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 112; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 113; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 114; AVX512-NEXT: vmovq %xmm0, %rax 115; AVX512-NEXT: vzeroupper 116; AVX512-NEXT: retq 117 %1 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %a0) 118 ret i64 %1 119} 120 121define i64 @test_v16i64(<16 x i64> %a0) { 122; SSE-LABEL: test_v16i64: 123; SSE: # %bb.0: 124; SSE-NEXT: por %xmm6, %xmm2 125; SSE-NEXT: por %xmm7, %xmm3 126; SSE-NEXT: por %xmm5, %xmm3 127; SSE-NEXT: por %xmm1, %xmm3 128; SSE-NEXT: por %xmm4, %xmm2 129; SSE-NEXT: por %xmm3, %xmm2 130; SSE-NEXT: por %xmm0, %xmm2 131; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 132; SSE-NEXT: por %xmm2, %xmm0 133; SSE-NEXT: movq %xmm0, %rax 134; SSE-NEXT: retq 135; 136; AVX1-LABEL: test_v16i64: 137; AVX1: # %bb.0: 138; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1 139; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1 140; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 141; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 142; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 143; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 144; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 145; AVX1-NEXT: vmovq %xmm0, %rax 146; AVX1-NEXT: vzeroupper 147; AVX1-NEXT: retq 148; 149; AVX2-LABEL: test_v16i64: 150; AVX2: # %bb.0: 151; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1 152; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 153; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 154; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 155; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 156; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 157; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 158; AVX2-NEXT: vmovq %xmm0, %rax 159; AVX2-NEXT: vzeroupper 160; AVX2-NEXT: retq 161; 162; AVX512-LABEL: test_v16i64: 163; AVX512: # %bb.0: 164; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 165; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 166; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 167; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 168; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 169; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 170; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 171; AVX512-NEXT: vmovq %xmm0, %rax 172; AVX512-NEXT: vzeroupper 173; AVX512-NEXT: retq 174 %1 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %a0) 175 ret i64 %1 176} 177 178; 179; vXi32 180; 181 182define i32 @test_v2i32(<2 x i32> %a0) { 183; SSE-LABEL: test_v2i32: 184; SSE: # %bb.0: 185; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 186; SSE-NEXT: por %xmm0, %xmm1 187; SSE-NEXT: movd %xmm1, %eax 188; SSE-NEXT: retq 189; 190; AVX-LABEL: test_v2i32: 191; AVX: # %bb.0: 192; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 193; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 194; AVX-NEXT: vmovd %xmm0, %eax 195; AVX-NEXT: retq 196 %1 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a0) 197 ret i32 %1 198} 199 200define i32 @test_v4i32(<4 x i32> %a0) { 201; SSE-LABEL: test_v4i32: 202; SSE: # %bb.0: 203; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 204; SSE-NEXT: por %xmm0, %xmm1 205; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 206; SSE-NEXT: por %xmm1, %xmm0 207; SSE-NEXT: movd %xmm0, %eax 208; SSE-NEXT: retq 209; 210; AVX-LABEL: test_v4i32: 211; AVX: # %bb.0: 212; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 213; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 214; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 215; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 216; AVX-NEXT: vmovd %xmm0, %eax 217; AVX-NEXT: retq 218 %1 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a0) 219 ret i32 %1 220} 221 222define i32 @test_v8i32(<8 x i32> %a0) { 223; SSE-LABEL: test_v8i32: 224; SSE: # %bb.0: 225; SSE-NEXT: por %xmm1, %xmm0 226; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 227; SSE-NEXT: por %xmm0, %xmm1 228; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 229; SSE-NEXT: por %xmm1, %xmm0 230; SSE-NEXT: movd %xmm0, %eax 231; SSE-NEXT: retq 232; 233; AVX1-LABEL: test_v8i32: 234; AVX1: # %bb.0: 235; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 236; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 237; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 238; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 239; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 240; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 241; AVX1-NEXT: vmovd %xmm0, %eax 242; AVX1-NEXT: vzeroupper 243; AVX1-NEXT: retq 244; 245; AVX2-LABEL: test_v8i32: 246; AVX2: # %bb.0: 247; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 248; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 249; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 250; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 251; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 252; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 253; AVX2-NEXT: vmovd %xmm0, %eax 254; AVX2-NEXT: vzeroupper 255; AVX2-NEXT: retq 256; 257; AVX512-LABEL: test_v8i32: 258; AVX512: # %bb.0: 259; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 260; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 261; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 262; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 263; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 264; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 265; AVX512-NEXT: vmovd %xmm0, %eax 266; AVX512-NEXT: vzeroupper 267; AVX512-NEXT: retq 268 %1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0) 269 ret i32 %1 270} 271 272define i32 @test_v16i32(<16 x i32> %a0) { 273; SSE-LABEL: test_v16i32: 274; SSE: # %bb.0: 275; SSE-NEXT: por %xmm3, %xmm1 276; SSE-NEXT: por %xmm2, %xmm1 277; SSE-NEXT: por %xmm0, %xmm1 278; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 279; SSE-NEXT: por %xmm1, %xmm0 280; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 281; SSE-NEXT: por %xmm0, %xmm1 282; SSE-NEXT: movd %xmm1, %eax 283; SSE-NEXT: retq 284; 285; AVX1-LABEL: test_v16i32: 286; AVX1: # %bb.0: 287; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 288; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 289; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 290; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 291; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 292; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1] 293; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 294; AVX1-NEXT: vmovd %xmm0, %eax 295; AVX1-NEXT: vzeroupper 296; AVX1-NEXT: retq 297; 298; AVX2-LABEL: test_v16i32: 299; AVX2: # %bb.0: 300; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 301; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 302; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 303; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 304; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 305; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 306; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 307; AVX2-NEXT: vmovd %xmm0, %eax 308; AVX2-NEXT: vzeroupper 309; AVX2-NEXT: retq 310; 311; AVX512-LABEL: test_v16i32: 312; AVX512: # %bb.0: 313; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 314; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 315; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 316; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 317; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 318; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 319; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 320; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 321; AVX512-NEXT: vmovd %xmm0, %eax 322; AVX512-NEXT: vzeroupper 323; AVX512-NEXT: retq 324 %1 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %a0) 325 ret i32 %1 326} 327 328define i32 @test_v32i32(<32 x i32> %a0) { 329; SSE-LABEL: test_v32i32: 330; SSE: # %bb.0: 331; SSE-NEXT: por %xmm6, %xmm2 332; SSE-NEXT: por %xmm7, %xmm3 333; SSE-NEXT: por %xmm5, %xmm3 334; SSE-NEXT: por %xmm1, %xmm3 335; SSE-NEXT: por %xmm4, %xmm2 336; SSE-NEXT: por %xmm3, %xmm2 337; SSE-NEXT: por %xmm0, %xmm2 338; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 339; SSE-NEXT: por %xmm2, %xmm0 340; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 341; SSE-NEXT: por %xmm0, %xmm1 342; SSE-NEXT: movd %xmm1, %eax 343; SSE-NEXT: retq 344; 345; AVX1-LABEL: test_v32i32: 346; AVX1: # %bb.0: 347; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1 348; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1 349; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 350; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 351; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 352; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 353; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 354; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1] 355; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 356; AVX1-NEXT: vmovd %xmm0, %eax 357; AVX1-NEXT: vzeroupper 358; AVX1-NEXT: retq 359; 360; AVX2-LABEL: test_v32i32: 361; AVX2: # %bb.0: 362; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1 363; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 364; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 365; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 366; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 367; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 368; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 369; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 370; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 371; AVX2-NEXT: vmovd %xmm0, %eax 372; AVX2-NEXT: vzeroupper 373; AVX2-NEXT: retq 374; 375; AVX512-LABEL: test_v32i32: 376; AVX512: # %bb.0: 377; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 378; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 379; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 380; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 381; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 382; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 383; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 384; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 385; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 386; AVX512-NEXT: vmovd %xmm0, %eax 387; AVX512-NEXT: vzeroupper 388; AVX512-NEXT: retq 389 %1 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> %a0) 390 ret i32 %1 391} 392 393; 394; vXi16 395; 396 397define i16 @test_v2i16(<2 x i16> %a0) { 398; SSE-LABEL: test_v2i16: 399; SSE: # %bb.0: 400; SSE-NEXT: movdqa %xmm0, %xmm1 401; SSE-NEXT: psrld $16, %xmm1 402; SSE-NEXT: por %xmm0, %xmm1 403; SSE-NEXT: movd %xmm1, %eax 404; SSE-NEXT: # kill: def $ax killed $ax killed $eax 405; SSE-NEXT: retq 406; 407; AVX-LABEL: test_v2i16: 408; AVX: # %bb.0: 409; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 410; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 411; AVX-NEXT: vmovd %xmm0, %eax 412; AVX-NEXT: # kill: def $ax killed $ax killed $eax 413; AVX-NEXT: retq 414 %1 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> %a0) 415 ret i16 %1 416} 417 418define i16 @test_v4i16(<4 x i16> %a0) { 419; SSE-LABEL: test_v4i16: 420; SSE: # %bb.0: 421; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 422; SSE-NEXT: por %xmm0, %xmm1 423; SSE-NEXT: movdqa %xmm1, %xmm0 424; SSE-NEXT: psrld $16, %xmm0 425; SSE-NEXT: por %xmm1, %xmm0 426; SSE-NEXT: movd %xmm0, %eax 427; SSE-NEXT: # kill: def $ax killed $ax killed $eax 428; SSE-NEXT: retq 429; 430; AVX-LABEL: test_v4i16: 431; AVX: # %bb.0: 432; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 433; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 434; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 435; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 436; AVX-NEXT: vmovd %xmm0, %eax 437; AVX-NEXT: # kill: def $ax killed $ax killed $eax 438; AVX-NEXT: retq 439 %1 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a0) 440 ret i16 %1 441} 442 443define i16 @test_v8i16(<8 x i16> %a0) { 444; SSE-LABEL: test_v8i16: 445; SSE: # %bb.0: 446; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 447; SSE-NEXT: por %xmm0, %xmm1 448; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 449; SSE-NEXT: por %xmm1, %xmm0 450; SSE-NEXT: movdqa %xmm0, %xmm1 451; SSE-NEXT: psrld $16, %xmm1 452; SSE-NEXT: por %xmm0, %xmm1 453; SSE-NEXT: movd %xmm1, %eax 454; SSE-NEXT: # kill: def $ax killed $ax killed $eax 455; SSE-NEXT: retq 456; 457; AVX-LABEL: test_v8i16: 458; AVX: # %bb.0: 459; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 460; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 461; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 462; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 463; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 464; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 465; AVX-NEXT: vmovd %xmm0, %eax 466; AVX-NEXT: # kill: def $ax killed $ax killed $eax 467; AVX-NEXT: retq 468 %1 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a0) 469 ret i16 %1 470} 471 472define i16 @test_v16i16(<16 x i16> %a0) { 473; SSE-LABEL: test_v16i16: 474; SSE: # %bb.0: 475; SSE-NEXT: por %xmm1, %xmm0 476; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 477; SSE-NEXT: por %xmm0, %xmm1 478; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 479; SSE-NEXT: por %xmm1, %xmm0 480; SSE-NEXT: movdqa %xmm0, %xmm1 481; SSE-NEXT: psrld $16, %xmm1 482; SSE-NEXT: por %xmm0, %xmm1 483; SSE-NEXT: movd %xmm1, %eax 484; SSE-NEXT: # kill: def $ax killed $ax killed $eax 485; SSE-NEXT: retq 486; 487; AVX1-LABEL: test_v16i16: 488; AVX1: # %bb.0: 489; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 490; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 491; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 492; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 493; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 494; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 495; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 496; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 497; AVX1-NEXT: vmovd %xmm0, %eax 498; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 499; AVX1-NEXT: vzeroupper 500; AVX1-NEXT: retq 501; 502; AVX2-LABEL: test_v16i16: 503; AVX2: # %bb.0: 504; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 505; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 506; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 507; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 508; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 509; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 510; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 511; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 512; AVX2-NEXT: vmovd %xmm0, %eax 513; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 514; AVX2-NEXT: vzeroupper 515; AVX2-NEXT: retq 516; 517; AVX512-LABEL: test_v16i16: 518; AVX512: # %bb.0: 519; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 520; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 521; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 522; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 523; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 524; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 525; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 526; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 527; AVX512-NEXT: vmovd %xmm0, %eax 528; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 529; AVX512-NEXT: vzeroupper 530; AVX512-NEXT: retq 531 %1 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %a0) 532 ret i16 %1 533} 534 535define i16 @test_v32i16(<32 x i16> %a0) { 536; SSE-LABEL: test_v32i16: 537; SSE: # %bb.0: 538; SSE-NEXT: por %xmm3, %xmm1 539; SSE-NEXT: por %xmm2, %xmm1 540; SSE-NEXT: por %xmm0, %xmm1 541; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 542; SSE-NEXT: por %xmm1, %xmm0 543; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 544; SSE-NEXT: por %xmm0, %xmm1 545; SSE-NEXT: movdqa %xmm1, %xmm0 546; SSE-NEXT: psrld $16, %xmm0 547; SSE-NEXT: por %xmm1, %xmm0 548; SSE-NEXT: movd %xmm0, %eax 549; SSE-NEXT: # kill: def $ax killed $ax killed $eax 550; SSE-NEXT: retq 551; 552; AVX1-LABEL: test_v32i16: 553; AVX1: # %bb.0: 554; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 555; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 556; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 557; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 558; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 559; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1] 560; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 561; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 562; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 563; AVX1-NEXT: vmovd %xmm0, %eax 564; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 565; AVX1-NEXT: vzeroupper 566; AVX1-NEXT: retq 567; 568; AVX2-LABEL: test_v32i16: 569; AVX2: # %bb.0: 570; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 571; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 572; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 573; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 574; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 575; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 576; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 577; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 578; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 579; AVX2-NEXT: vmovd %xmm0, %eax 580; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 581; AVX2-NEXT: vzeroupper 582; AVX2-NEXT: retq 583; 584; AVX512-LABEL: test_v32i16: 585; AVX512: # %bb.0: 586; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 587; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 588; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 589; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 590; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 591; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 592; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 593; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 594; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 595; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 596; AVX512-NEXT: vmovd %xmm0, %eax 597; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 598; AVX512-NEXT: vzeroupper 599; AVX512-NEXT: retq 600 %1 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> %a0) 601 ret i16 %1 602} 603 604define i16 @test_v64i16(<64 x i16> %a0) { 605; SSE-LABEL: test_v64i16: 606; SSE: # %bb.0: 607; SSE-NEXT: por %xmm6, %xmm2 608; SSE-NEXT: por %xmm7, %xmm3 609; SSE-NEXT: por %xmm5, %xmm3 610; SSE-NEXT: por %xmm1, %xmm3 611; SSE-NEXT: por %xmm4, %xmm2 612; SSE-NEXT: por %xmm3, %xmm2 613; SSE-NEXT: por %xmm0, %xmm2 614; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 615; SSE-NEXT: por %xmm2, %xmm0 616; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 617; SSE-NEXT: por %xmm0, %xmm1 618; SSE-NEXT: movdqa %xmm1, %xmm0 619; SSE-NEXT: psrld $16, %xmm0 620; SSE-NEXT: por %xmm1, %xmm0 621; SSE-NEXT: movd %xmm0, %eax 622; SSE-NEXT: # kill: def $ax killed $ax killed $eax 623; SSE-NEXT: retq 624; 625; AVX1-LABEL: test_v64i16: 626; AVX1: # %bb.0: 627; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1 628; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1 629; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 630; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 631; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 632; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 633; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 634; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1] 635; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 636; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 637; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 638; AVX1-NEXT: vmovd %xmm0, %eax 639; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 640; AVX1-NEXT: vzeroupper 641; AVX1-NEXT: retq 642; 643; AVX2-LABEL: test_v64i16: 644; AVX2: # %bb.0: 645; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1 646; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 647; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 648; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 649; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 650; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 651; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 652; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 653; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 654; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 655; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 656; AVX2-NEXT: vmovd %xmm0, %eax 657; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 658; AVX2-NEXT: vzeroupper 659; AVX2-NEXT: retq 660; 661; AVX512-LABEL: test_v64i16: 662; AVX512: # %bb.0: 663; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 664; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 665; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 666; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 667; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 668; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 669; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 670; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 671; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 672; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 673; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 674; AVX512-NEXT: vmovd %xmm0, %eax 675; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 676; AVX512-NEXT: vzeroupper 677; AVX512-NEXT: retq 678 %1 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> %a0) 679 ret i16 %1 680} 681 682; 683; vXi8 684; 685 686define i8 @test_v2i8(<2 x i8> %a0) { 687; SSE-LABEL: test_v2i8: 688; SSE: # %bb.0: 689; SSE-NEXT: movdqa %xmm0, %xmm1 690; SSE-NEXT: psrlw $8, %xmm1 691; SSE-NEXT: por %xmm0, %xmm1 692; SSE-NEXT: movd %xmm1, %eax 693; SSE-NEXT: # kill: def $al killed $al killed $eax 694; SSE-NEXT: retq 695; 696; AVX-LABEL: test_v2i8: 697; AVX: # %bb.0: 698; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 699; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 700; AVX-NEXT: vmovd %xmm0, %eax 701; AVX-NEXT: # kill: def $al killed $al killed $eax 702; AVX-NEXT: retq 703 %1 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> %a0) 704 ret i8 %1 705} 706 707define i8 @test_v4i8(<4 x i8> %a0) { 708; SSE-LABEL: test_v4i8: 709; SSE: # %bb.0: 710; SSE-NEXT: movdqa %xmm0, %xmm1 711; SSE-NEXT: psrld $16, %xmm1 712; SSE-NEXT: por %xmm0, %xmm1 713; SSE-NEXT: movdqa %xmm1, %xmm0 714; SSE-NEXT: psrlw $8, %xmm0 715; SSE-NEXT: por %xmm1, %xmm0 716; SSE-NEXT: movd %xmm0, %eax 717; SSE-NEXT: # kill: def $al killed $al killed $eax 718; SSE-NEXT: retq 719; 720; AVX-LABEL: test_v4i8: 721; AVX: # %bb.0: 722; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 723; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 724; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 725; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 726; AVX-NEXT: vmovd %xmm0, %eax 727; AVX-NEXT: # kill: def $al killed $al killed $eax 728; AVX-NEXT: retq 729 %1 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a0) 730 ret i8 %1 731} 732 733define i8 @test_v8i8(<8 x i8> %a0) { 734; SSE-LABEL: test_v8i8: 735; SSE: # %bb.0: 736; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 737; SSE-NEXT: por %xmm0, %xmm1 738; SSE-NEXT: movdqa %xmm1, %xmm0 739; SSE-NEXT: psrld $16, %xmm0 740; SSE-NEXT: por %xmm1, %xmm0 741; SSE-NEXT: movdqa %xmm0, %xmm1 742; SSE-NEXT: psrlw $8, %xmm1 743; SSE-NEXT: por %xmm0, %xmm1 744; SSE-NEXT: movd %xmm1, %eax 745; SSE-NEXT: # kill: def $al killed $al killed $eax 746; SSE-NEXT: retq 747; 748; AVX-LABEL: test_v8i8: 749; AVX: # %bb.0: 750; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 751; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 752; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 753; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 754; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 755; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 756; AVX-NEXT: vmovd %xmm0, %eax 757; AVX-NEXT: # kill: def $al killed $al killed $eax 758; AVX-NEXT: retq 759 %1 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a0) 760 ret i8 %1 761} 762 763define i8 @test_v16i8(<16 x i8> %a0) { 764; SSE-LABEL: test_v16i8: 765; SSE: # %bb.0: 766; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 767; SSE-NEXT: por %xmm0, %xmm1 768; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 769; SSE-NEXT: por %xmm1, %xmm0 770; SSE-NEXT: movdqa %xmm0, %xmm1 771; SSE-NEXT: psrld $16, %xmm1 772; SSE-NEXT: por %xmm0, %xmm1 773; SSE-NEXT: movdqa %xmm1, %xmm0 774; SSE-NEXT: psrlw $8, %xmm0 775; SSE-NEXT: por %xmm1, %xmm0 776; SSE-NEXT: movd %xmm0, %eax 777; SSE-NEXT: # kill: def $al killed $al killed $eax 778; SSE-NEXT: retq 779; 780; AVX-LABEL: test_v16i8: 781; AVX: # %bb.0: 782; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 783; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 784; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 785; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 786; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 787; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 788; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 789; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 790; AVX-NEXT: vmovd %xmm0, %eax 791; AVX-NEXT: # kill: def $al killed $al killed $eax 792; AVX-NEXT: retq 793 %1 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a0) 794 ret i8 %1 795} 796 797define i8 @test_v32i8(<32 x i8> %a0) { 798; SSE-LABEL: test_v32i8: 799; SSE: # %bb.0: 800; SSE-NEXT: por %xmm1, %xmm0 801; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 802; SSE-NEXT: por %xmm0, %xmm1 803; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 804; SSE-NEXT: por %xmm1, %xmm0 805; SSE-NEXT: movdqa %xmm0, %xmm1 806; SSE-NEXT: psrld $16, %xmm1 807; SSE-NEXT: por %xmm0, %xmm1 808; SSE-NEXT: movdqa %xmm1, %xmm0 809; SSE-NEXT: psrlw $8, %xmm0 810; SSE-NEXT: por %xmm1, %xmm0 811; SSE-NEXT: movd %xmm0, %eax 812; SSE-NEXT: # kill: def $al killed $al killed $eax 813; SSE-NEXT: retq 814; 815; AVX1-LABEL: test_v32i8: 816; AVX1: # %bb.0: 817; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 818; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 819; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 820; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 821; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 822; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 823; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 824; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 825; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 826; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 827; AVX1-NEXT: vmovd %xmm0, %eax 828; AVX1-NEXT: # kill: def $al killed $al killed $eax 829; AVX1-NEXT: vzeroupper 830; AVX1-NEXT: retq 831; 832; AVX2-LABEL: test_v32i8: 833; AVX2: # %bb.0: 834; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 835; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 836; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 837; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 838; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 839; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 840; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 841; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 842; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 843; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 844; AVX2-NEXT: vmovd %xmm0, %eax 845; AVX2-NEXT: # kill: def $al killed $al killed $eax 846; AVX2-NEXT: vzeroupper 847; AVX2-NEXT: retq 848; 849; AVX512-LABEL: test_v32i8: 850; AVX512: # %bb.0: 851; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 852; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 853; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 854; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 855; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 856; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 857; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 858; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 859; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 860; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 861; AVX512-NEXT: vmovd %xmm0, %eax 862; AVX512-NEXT: # kill: def $al killed $al killed $eax 863; AVX512-NEXT: vzeroupper 864; AVX512-NEXT: retq 865 %1 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a0) 866 ret i8 %1 867} 868 869define i8 @test_v64i8(<64 x i8> %a0) { 870; SSE-LABEL: test_v64i8: 871; SSE: # %bb.0: 872; SSE-NEXT: por %xmm3, %xmm1 873; SSE-NEXT: por %xmm2, %xmm1 874; SSE-NEXT: por %xmm0, %xmm1 875; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 876; SSE-NEXT: por %xmm1, %xmm0 877; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 878; SSE-NEXT: por %xmm0, %xmm1 879; SSE-NEXT: movdqa %xmm1, %xmm0 880; SSE-NEXT: psrld $16, %xmm0 881; SSE-NEXT: por %xmm1, %xmm0 882; SSE-NEXT: movdqa %xmm0, %xmm1 883; SSE-NEXT: psrlw $8, %xmm1 884; SSE-NEXT: por %xmm0, %xmm1 885; SSE-NEXT: movd %xmm1, %eax 886; SSE-NEXT: # kill: def $al killed $al killed $eax 887; SSE-NEXT: retq 888; 889; AVX1-LABEL: test_v64i8: 890; AVX1: # %bb.0: 891; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 892; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 893; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 894; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 895; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 896; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1] 897; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 898; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 899; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 900; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 901; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 902; AVX1-NEXT: vmovd %xmm0, %eax 903; AVX1-NEXT: # kill: def $al killed $al killed $eax 904; AVX1-NEXT: vzeroupper 905; AVX1-NEXT: retq 906; 907; AVX2-LABEL: test_v64i8: 908; AVX2: # %bb.0: 909; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 910; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 911; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 912; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 913; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 914; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 915; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 916; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 917; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 918; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 919; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 920; AVX2-NEXT: vmovd %xmm0, %eax 921; AVX2-NEXT: # kill: def $al killed $al killed $eax 922; AVX2-NEXT: vzeroupper 923; AVX2-NEXT: retq 924; 925; AVX512-LABEL: test_v64i8: 926; AVX512: # %bb.0: 927; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 928; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 929; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 930; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 931; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 932; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 933; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 934; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 935; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 936; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 937; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 938; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 939; AVX512-NEXT: vmovd %xmm0, %eax 940; AVX512-NEXT: # kill: def $al killed $al killed $eax 941; AVX512-NEXT: vzeroupper 942; AVX512-NEXT: retq 943 %1 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> %a0) 944 ret i8 %1 945} 946 947define i8 @test_v128i8(<128 x i8> %a0) { 948; SSE-LABEL: test_v128i8: 949; SSE: # %bb.0: 950; SSE-NEXT: por %xmm6, %xmm2 951; SSE-NEXT: por %xmm7, %xmm3 952; SSE-NEXT: por %xmm5, %xmm3 953; SSE-NEXT: por %xmm1, %xmm3 954; SSE-NEXT: por %xmm4, %xmm2 955; SSE-NEXT: por %xmm3, %xmm2 956; SSE-NEXT: por %xmm0, %xmm2 957; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 958; SSE-NEXT: por %xmm2, %xmm0 959; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 960; SSE-NEXT: por %xmm0, %xmm1 961; SSE-NEXT: movdqa %xmm1, %xmm0 962; SSE-NEXT: psrld $16, %xmm0 963; SSE-NEXT: por %xmm1, %xmm0 964; SSE-NEXT: movdqa %xmm0, %xmm1 965; SSE-NEXT: psrlw $8, %xmm1 966; SSE-NEXT: por %xmm0, %xmm1 967; SSE-NEXT: movd %xmm1, %eax 968; SSE-NEXT: # kill: def $al killed $al killed $eax 969; SSE-NEXT: retq 970; 971; AVX1-LABEL: test_v128i8: 972; AVX1: # %bb.0: 973; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1 974; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1 975; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 976; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 977; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 978; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 979; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 980; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1] 981; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 982; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 983; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 984; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 985; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 986; AVX1-NEXT: vmovd %xmm0, %eax 987; AVX1-NEXT: # kill: def $al killed $al killed $eax 988; AVX1-NEXT: vzeroupper 989; AVX1-NEXT: retq 990; 991; AVX2-LABEL: test_v128i8: 992; AVX2: # %bb.0: 993; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1 994; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 995; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 996; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 997; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 998; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 999; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1000; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1001; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1002; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 1003; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1004; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1005; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1006; AVX2-NEXT: vmovd %xmm0, %eax 1007; AVX2-NEXT: # kill: def $al killed $al killed $eax 1008; AVX2-NEXT: vzeroupper 1009; AVX2-NEXT: retq 1010; 1011; AVX512-LABEL: test_v128i8: 1012; AVX512: # %bb.0: 1013; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 1014; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1015; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 1016; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1017; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 1018; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1019; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 1020; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1021; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 1022; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 1023; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 1024; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1025; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 1026; AVX512-NEXT: vmovd %xmm0, %eax 1027; AVX512-NEXT: # kill: def $al killed $al killed $eax 1028; AVX512-NEXT: vzeroupper 1029; AVX512-NEXT: retq 1030 %1 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> %a0) 1031 ret i8 %1 1032} 1033 1034declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) 1035declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) 1036declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>) 1037declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>) 1038 1039declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>) 1040declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) 1041declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) 1042declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>) 1043declare i32 @llvm.vector.reduce.or.v32i32(<32 x i32>) 1044 1045declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>) 1046declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>) 1047declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>) 1048declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>) 1049declare i16 @llvm.vector.reduce.or.v32i16(<32 x i16>) 1050declare i16 @llvm.vector.reduce.or.v64i16(<64 x i16>) 1051 1052declare i8 @llvm.vector.reduce.or.v2i8(<2 x i8>) 1053declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>) 1054declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>) 1055declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>) 1056declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>) 1057declare i8 @llvm.vector.reduce.or.v64i8(<64 x i8>) 1058declare i8 @llvm.vector.reduce.or.v128i8(<128 x i8>) 1059