1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 8 9define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) { 10; GENERIC-LABEL: test_broadcasti128: 11; GENERIC: # %bb.0: 12; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:1.00] 13; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 14; GENERIC-NEXT: retq # sched: [1:1.00] 15; 16; HASWELL-LABEL: test_broadcasti128: 17; HASWELL: # %bb.0: 18; HASWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50] 19; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 20; HASWELL-NEXT: retq # sched: [7:1.00] 21; 22; BROADWELL-LABEL: test_broadcasti128: 23; BROADWELL: # %bb.0: 24; BROADWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:0.50] 25; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 26; BROADWELL-NEXT: retq # sched: [7:1.00] 27; 28; SKYLAKE-LABEL: test_broadcasti128: 29; SKYLAKE: # %bb.0: 30; SKYLAKE-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50] 31; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 32; SKYLAKE-NEXT: retq # sched: [7:1.00] 33; 34; SKX-LABEL: test_broadcasti128: 35; SKX: # %bb.0: 36; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50] 37; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 38; SKX-NEXT: retq # sched: [7:1.00] 39; 40; ZNVER1-LABEL: test_broadcasti128: 41; ZNVER1: # %bb.0: 42; ZNVER1-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [8:0.50] 43; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 44; ZNVER1-NEXT: retq # sched: [1:0.50] 45 %1 = load <4 x i32>, <4 x i32> *%a1, align 16 46 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 47 %3 = add <8 x i32> %2, %a0 48 ret <8 x i32> %3 49} 50 51define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) { 52; GENERIC-LABEL: test_broadcastsd_ymm: 53; GENERIC: # %bb.0: 54; GENERIC-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [1:1.00] 55; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 56; GENERIC-NEXT: retq # sched: [1:1.00] 57; 58; HASWELL-LABEL: test_broadcastsd_ymm: 59; HASWELL: # %bb.0: 60; HASWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] 61; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 62; HASWELL-NEXT: retq # sched: [7:1.00] 63; 64; BROADWELL-LABEL: test_broadcastsd_ymm: 65; BROADWELL: # %bb.0: 66; BROADWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] 67; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 68; BROADWELL-NEXT: retq # sched: [7:1.00] 69; 70; SKYLAKE-LABEL: test_broadcastsd_ymm: 71; SKYLAKE: # %bb.0: 72; SKYLAKE-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] 73; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 74; SKYLAKE-NEXT: retq # sched: [7:1.00] 75; 76; SKX-LABEL: test_broadcastsd_ymm: 77; SKX: # %bb.0: 78; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] 79; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 80; SKX-NEXT: retq # sched: [7:1.00] 81; 82; ZNVER1-LABEL: test_broadcastsd_ymm: 83; ZNVER1: # %bb.0: 84; ZNVER1-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [100:0.25] 85; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 86; ZNVER1-NEXT: retq # sched: [1:0.50] 87 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer 88 %2 = fadd <4 x double> %1, %1 89 ret <4 x double> %2 90} 91 92define <4 x float> @test_broadcastss(<4 x float> %a0) { 93; GENERIC-LABEL: test_broadcastss: 94; GENERIC: # %bb.0: 95; GENERIC-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] 96; GENERIC-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 97; GENERIC-NEXT: retq # sched: [1:1.00] 98; 99; HASWELL-LABEL: test_broadcastss: 100; HASWELL: # %bb.0: 101; HASWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] 102; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 103; HASWELL-NEXT: retq # sched: [7:1.00] 104; 105; BROADWELL-LABEL: test_broadcastss: 106; BROADWELL: # %bb.0: 107; BROADWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] 108; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 109; BROADWELL-NEXT: retq # sched: [7:1.00] 110; 111; SKYLAKE-LABEL: test_broadcastss: 112; SKYLAKE: # %bb.0: 113; SKYLAKE-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] 114; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 115; SKYLAKE-NEXT: retq # sched: [7:1.00] 116; 117; SKX-LABEL: test_broadcastss: 118; SKX: # %bb.0: 119; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] 120; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 121; SKX-NEXT: retq # sched: [7:1.00] 122; 123; ZNVER1-LABEL: test_broadcastss: 124; ZNVER1: # %bb.0: 125; ZNVER1-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:0.50] 126; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 127; ZNVER1-NEXT: retq # sched: [1:0.50] 128 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer 129 %2 = fadd <4 x float> %1, %1 130 ret <4 x float> %2 131} 132 133define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) { 134; GENERIC-LABEL: test_broadcastss_ymm: 135; GENERIC: # %bb.0: 136; GENERIC-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [1:1.00] 137; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 138; GENERIC-NEXT: retq # sched: [1:1.00] 139; 140; HASWELL-LABEL: test_broadcastss_ymm: 141; HASWELL: # %bb.0: 142; HASWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] 143; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 144; HASWELL-NEXT: retq # sched: [7:1.00] 145; 146; BROADWELL-LABEL: test_broadcastss_ymm: 147; BROADWELL: # %bb.0: 148; BROADWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] 149; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 150; BROADWELL-NEXT: retq # sched: [7:1.00] 151; 152; SKYLAKE-LABEL: test_broadcastss_ymm: 153; SKYLAKE: # %bb.0: 154; SKYLAKE-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] 155; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 156; SKYLAKE-NEXT: retq # sched: [7:1.00] 157; 158; SKX-LABEL: test_broadcastss_ymm: 159; SKX: # %bb.0: 160; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] 161; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 162; SKX-NEXT: retq # sched: [7:1.00] 163; 164; ZNVER1-LABEL: test_broadcastss_ymm: 165; ZNVER1: # %bb.0: 166; ZNVER1-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [100:0.25] 167; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 168; ZNVER1-NEXT: retq # sched: [1:0.50] 169 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer 170 %2 = fadd <8 x float> %1, %1 171 ret <8 x float> %2 172} 173 174define <4 x i32> @test_extracti128(<8 x i32> %a0, <8 x i32> %a1, <4 x i32> *%a2) { 175; GENERIC-LABEL: test_extracti128: 176; GENERIC: # %bb.0: 177; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50] 178; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 179; GENERIC-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [1:1.00] 180; GENERIC-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] 181; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 182; GENERIC-NEXT: retq # sched: [1:1.00] 183; 184; HASWELL-LABEL: test_extracti128: 185; HASWELL: # %bb.0: 186; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50] 187; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 188; HASWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] 189; HASWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] 190; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 191; HASWELL-NEXT: retq # sched: [7:1.00] 192; 193; BROADWELL-LABEL: test_extracti128: 194; BROADWELL: # %bb.0: 195; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50] 196; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 197; BROADWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] 198; BROADWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] 199; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 200; BROADWELL-NEXT: retq # sched: [7:1.00] 201; 202; SKYLAKE-LABEL: test_extracti128: 203; SKYLAKE: # %bb.0: 204; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33] 205; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 206; SKYLAKE-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] 207; SKYLAKE-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] 208; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 209; SKYLAKE-NEXT: retq # sched: [7:1.00] 210; 211; SKX-LABEL: test_extracti128: 212; SKX: # %bb.0: 213; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33] 214; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 215; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] 216; SKX-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] 217; SKX-NEXT: vzeroupper # sched: [4:1.00] 218; SKX-NEXT: retq # sched: [7:1.00] 219; 220; ZNVER1-LABEL: test_extracti128: 221; ZNVER1: # %bb.0: 222; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.25] 223; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 224; ZNVER1-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [2:0.25] 225; ZNVER1-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:0.50] 226; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 227; ZNVER1-NEXT: retq # sched: [1:0.50] 228 %1 = add <8 x i32> %a0, %a1 229 %2 = sub <8 x i32> %a0, %a1 230 %3 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 231 %4 = shufflevector <8 x i32> %2, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 232 store <4 x i32> %3, <4 x i32> *%a2 233 ret <4 x i32> %4 234} 235 236define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) { 237; GENERIC-LABEL: test_gatherdpd: 238; GENERIC: # %bb.0: 239; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 240; GENERIC-NEXT: retq # sched: [1:1.00] 241; 242; HASWELL-LABEL: test_gatherdpd: 243; HASWELL: # %bb.0: 244; HASWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67] 245; HASWELL-NEXT: retq # sched: [7:1.00] 246; 247; BROADWELL-LABEL: test_gatherdpd: 248; BROADWELL: # %bb.0: 249; BROADWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00] 250; BROADWELL-NEXT: retq # sched: [7:1.00] 251; 252; SKYLAKE-LABEL: test_gatherdpd: 253; SKYLAKE: # %bb.0: 254; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 255; SKYLAKE-NEXT: retq # sched: [7:1.00] 256; 257; SKX-LABEL: test_gatherdpd: 258; SKX: # %bb.0: 259; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 260; SKX-NEXT: retq # sched: [7:1.00] 261; 262; ZNVER1-LABEL: test_gatherdpd: 263; ZNVER1: # %bb.0: 264; ZNVER1-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 265; ZNVER1-NEXT: retq # sched: [1:0.50] 266 %1 = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3, i8 2) 267 ret <2 x double> %1 268} 269declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly 270 271define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) { 272; GENERIC-LABEL: test_gatherdpd_ymm: 273; GENERIC: # %bb.0: 274; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [5:0.50] 275; GENERIC-NEXT: retq # sched: [1:1.00] 276; 277; HASWELL-LABEL: test_gatherdpd_ymm: 278; HASWELL: # %bb.0: 279; HASWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [27:4.00] 280; HASWELL-NEXT: retq # sched: [7:1.00] 281; 282; BROADWELL-LABEL: test_gatherdpd_ymm: 283; BROADWELL: # %bb.0: 284; BROADWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [26:5.00] 285; BROADWELL-NEXT: retq # sched: [7:1.00] 286; 287; SKYLAKE-LABEL: test_gatherdpd_ymm: 288; SKYLAKE: # %bb.0: 289; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00] 290; SKYLAKE-NEXT: retq # sched: [7:1.00] 291; 292; SKX-LABEL: test_gatherdpd_ymm: 293; SKX: # %bb.0: 294; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00] 295; SKX-NEXT: retq # sched: [7:1.00] 296; 297; ZNVER1-LABEL: test_gatherdpd_ymm: 298; ZNVER1: # %bb.0: 299; ZNVER1-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [100:0.25] 300; ZNVER1-NEXT: retq # sched: [1:0.50] 301 %1 = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3, i8 8) 302 ret <4 x double> %1 303} 304declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) nounwind readonly 305 306define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) { 307; GENERIC-LABEL: test_gatherdps: 308; GENERIC: # %bb.0: 309; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 310; GENERIC-NEXT: retq # sched: [1:1.00] 311; 312; HASWELL-LABEL: test_gatherdps: 313; HASWELL: # %bb.0: 314; HASWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67] 315; HASWELL-NEXT: retq # sched: [7:1.00] 316; 317; BROADWELL-LABEL: test_gatherdps: 318; BROADWELL: # %bb.0: 319; BROADWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00] 320; BROADWELL-NEXT: retq # sched: [7:1.00] 321; 322; SKYLAKE-LABEL: test_gatherdps: 323; SKYLAKE: # %bb.0: 324; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 325; SKYLAKE-NEXT: retq # sched: [7:1.00] 326; 327; SKX-LABEL: test_gatherdps: 328; SKX: # %bb.0: 329; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 330; SKX-NEXT: retq # sched: [7:1.00] 331; 332; ZNVER1-LABEL: test_gatherdps: 333; ZNVER1: # %bb.0: 334; ZNVER1-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 335; ZNVER1-NEXT: retq # sched: [1:0.50] 336 %1 = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3, i8 2) 337 ret <4 x float> %1 338} 339declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) nounwind readonly 340 341define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) { 342; GENERIC-LABEL: test_gatherdps_ymm: 343; GENERIC: # %bb.0: 344; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [5:0.50] 345; GENERIC-NEXT: retq # sched: [1:1.00] 346; 347; HASWELL-LABEL: test_gatherdps_ymm: 348; HASWELL: # %bb.0: 349; HASWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [27:6.50] 350; HASWELL-NEXT: retq # sched: [7:1.00] 351; 352; BROADWELL-LABEL: test_gatherdps_ymm: 353; BROADWELL: # %bb.0: 354; BROADWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [26:4.00] 355; BROADWELL-NEXT: retq # sched: [7:1.00] 356; 357; SKYLAKE-LABEL: test_gatherdps_ymm: 358; SKYLAKE: # %bb.0: 359; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00] 360; SKYLAKE-NEXT: retq # sched: [7:1.00] 361; 362; SKX-LABEL: test_gatherdps_ymm: 363; SKX: # %bb.0: 364; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00] 365; SKX-NEXT: retq # sched: [7:1.00] 366; 367; ZNVER1-LABEL: test_gatherdps_ymm: 368; ZNVER1: # %bb.0: 369; ZNVER1-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [100:0.25] 370; ZNVER1-NEXT: retq # sched: [1:0.50] 371 %1 = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3, i8 4) 372 ret <8 x float> %1 373} 374declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) nounwind readonly 375 376define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) { 377; GENERIC-LABEL: test_gatherqpd: 378; GENERIC: # %bb.0: 379; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 380; GENERIC-NEXT: retq # sched: [1:1.00] 381; 382; HASWELL-LABEL: test_gatherqpd: 383; HASWELL: # %bb.0: 384; HASWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33] 385; HASWELL-NEXT: retq # sched: [7:1.00] 386; 387; BROADWELL-LABEL: test_gatherqpd: 388; BROADWELL: # %bb.0: 389; BROADWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:3.00] 390; BROADWELL-NEXT: retq # sched: [7:1.00] 391; 392; SKYLAKE-LABEL: test_gatherqpd: 393; SKYLAKE: # %bb.0: 394; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 395; SKYLAKE-NEXT: retq # sched: [7:1.00] 396; 397; SKX-LABEL: test_gatherqpd: 398; SKX: # %bb.0: 399; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 400; SKX-NEXT: retq # sched: [7:1.00] 401; 402; ZNVER1-LABEL: test_gatherqpd: 403; ZNVER1: # %bb.0: 404; ZNVER1-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 405; ZNVER1-NEXT: retq # sched: [1:0.50] 406 %1 = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3, i8 2) 407 ret <2 x double> %1 408} 409declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) nounwind readonly 410 411define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) { 412; GENERIC-LABEL: test_gatherqpd_ymm: 413; GENERIC: # %bb.0: 414; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [5:0.50] 415; GENERIC-NEXT: retq # sched: [1:1.00] 416; 417; HASWELL-LABEL: test_gatherqpd_ymm: 418; HASWELL: # %bb.0: 419; HASWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [24:5.00] 420; HASWELL-NEXT: retq # sched: [7:1.00] 421; 422; BROADWELL-LABEL: test_gatherqpd_ymm: 423; BROADWELL: # %bb.0: 424; BROADWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [23:3.00] 425; BROADWELL-NEXT: retq # sched: [7:1.00] 426; 427; SKYLAKE-LABEL: test_gatherqpd_ymm: 428; SKYLAKE: # %bb.0: 429; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00] 430; SKYLAKE-NEXT: retq # sched: [7:1.00] 431; 432; SKX-LABEL: test_gatherqpd_ymm: 433; SKX: # %bb.0: 434; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00] 435; SKX-NEXT: retq # sched: [7:1.00] 436; 437; ZNVER1-LABEL: test_gatherqpd_ymm: 438; ZNVER1: # %bb.0: 439; ZNVER1-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [100:0.25] 440; ZNVER1-NEXT: retq # sched: [1:0.50] 441 %1 = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3, i8 8) 442 ret <4 x double> %1 443} 444declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) nounwind readonly 445 446define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) { 447; GENERIC-LABEL: test_gatherqps: 448; GENERIC: # %bb.0: 449; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 450; GENERIC-NEXT: retq # sched: [1:1.00] 451; 452; HASWELL-LABEL: test_gatherqps: 453; HASWELL: # %bb.0: 454; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67] 455; HASWELL-NEXT: retq # sched: [7:1.00] 456; 457; BROADWELL-LABEL: test_gatherqps: 458; BROADWELL: # %bb.0: 459; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [27:5.00] 460; BROADWELL-NEXT: retq # sched: [7:1.00] 461; 462; SKYLAKE-LABEL: test_gatherqps: 463; SKYLAKE: # %bb.0: 464; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 465; SKYLAKE-NEXT: retq # sched: [7:1.00] 466; 467; SKX-LABEL: test_gatherqps: 468; SKX: # %bb.0: 469; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 470; SKX-NEXT: retq # sched: [7:1.00] 471; 472; ZNVER1-LABEL: test_gatherqps: 473; ZNVER1: # %bb.0: 474; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 475; ZNVER1-NEXT: retq # sched: [1:0.50] 476 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3, i8 2) 477 ret <4 x float> %1 478} 479declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly 480 481define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) { 482; GENERIC-LABEL: test_gatherqps_ymm: 483; GENERIC: # %bb.0: 484; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [5:0.50] 485; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 486; GENERIC-NEXT: retq # sched: [1:1.00] 487; 488; HASWELL-LABEL: test_gatherqps_ymm: 489; HASWELL: # %bb.0: 490; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [28:3.67] 491; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 492; HASWELL-NEXT: retq # sched: [7:1.00] 493; 494; BROADWELL-LABEL: test_gatherqps_ymm: 495; BROADWELL: # %bb.0: 496; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [24:5.00] 497; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 498; BROADWELL-NEXT: retq # sched: [7:1.00] 499; 500; SKYLAKE-LABEL: test_gatherqps_ymm: 501; SKYLAKE: # %bb.0: 502; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00] 503; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 504; SKYLAKE-NEXT: retq # sched: [7:1.00] 505; 506; SKX-LABEL: test_gatherqps_ymm: 507; SKX: # %bb.0: 508; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00] 509; SKX-NEXT: vzeroupper # sched: [4:1.00] 510; SKX-NEXT: retq # sched: [7:1.00] 511; 512; ZNVER1-LABEL: test_gatherqps_ymm: 513; ZNVER1: # %bb.0: 514; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [100:0.25] 515; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 516; ZNVER1-NEXT: retq # sched: [1:0.50] 517 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3, i8 4) 518 ret <4 x float> %1 519} 520declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) nounwind readonly 521 522define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 523; GENERIC-LABEL: test_inserti128: 524; GENERIC: # %bb.0: 525; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] 526; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 527; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 528; GENERIC-NEXT: retq # sched: [1:1.00] 529; 530; HASWELL-LABEL: test_inserti128: 531; HASWELL: # %bb.0: 532; HASWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] 533; HASWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 534; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 535; HASWELL-NEXT: retq # sched: [7:1.00] 536; 537; BROADWELL-LABEL: test_inserti128: 538; BROADWELL: # %bb.0: 539; BROADWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] 540; BROADWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50] 541; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 542; BROADWELL-NEXT: retq # sched: [7:1.00] 543; 544; SKYLAKE-LABEL: test_inserti128: 545; SKYLAKE: # %bb.0: 546; SKYLAKE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] 547; SKYLAKE-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 548; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 549; SKYLAKE-NEXT: retq # sched: [7:1.00] 550; 551; SKX-LABEL: test_inserti128: 552; SKX: # %bb.0: 553; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] 554; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 555; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 556; SKX-NEXT: retq # sched: [7:1.00] 557; 558; ZNVER1-LABEL: test_inserti128: 559; ZNVER1: # %bb.0: 560; ZNVER1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.25] 561; ZNVER1-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] 562; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 563; ZNVER1-NEXT: retq # sched: [1:0.50] 564 %1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 565 %2 = shufflevector <8 x i32> %a0, <8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 566 %3 = load <4 x i32>, <4 x i32> *%a2, align 16 567 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 568 %5 = shufflevector <8 x i32> %a0, <8 x i32> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 569 %6 = add <8 x i32> %2, %5 570 ret <8 x i32> %6 571} 572 573define <4 x i64> @test_movntdqa(i8* %a0) { 574; GENERIC-LABEL: test_movntdqa: 575; GENERIC: # %bb.0: 576; GENERIC-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] 577; GENERIC-NEXT: retq # sched: [1:1.00] 578; 579; HASWELL-LABEL: test_movntdqa: 580; HASWELL: # %bb.0: 581; HASWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] 582; HASWELL-NEXT: retq # sched: [7:1.00] 583; 584; BROADWELL-LABEL: test_movntdqa: 585; BROADWELL: # %bb.0: 586; BROADWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [6:0.50] 587; BROADWELL-NEXT: retq # sched: [7:1.00] 588; 589; SKYLAKE-LABEL: test_movntdqa: 590; SKYLAKE: # %bb.0: 591; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] 592; SKYLAKE-NEXT: retq # sched: [7:1.00] 593; 594; SKX-LABEL: test_movntdqa: 595; SKX: # %bb.0: 596; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] 597; SKX-NEXT: retq # sched: [7:1.00] 598; 599; ZNVER1-LABEL: test_movntdqa: 600; ZNVER1: # %bb.0: 601; ZNVER1-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [8:0.50] 602; ZNVER1-NEXT: retq # sched: [1:0.50] 603 %1 = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) 604 ret <4 x i64> %1 605} 606declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly 607 608define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 609; GENERIC-LABEL: test_mpsadbw: 610; GENERIC: # %bb.0: 611; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:1.00] 612; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:1.00] 613; GENERIC-NEXT: retq # sched: [1:1.00] 614; 615; HASWELL-LABEL: test_mpsadbw: 616; HASWELL: # %bb.0: 617; HASWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00] 618; HASWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00] 619; HASWELL-NEXT: retq # sched: [7:1.00] 620; 621; BROADWELL-LABEL: test_mpsadbw: 622; BROADWELL: # %bb.0: 623; BROADWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00] 624; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 625; BROADWELL-NEXT: retq # sched: [7:1.00] 626; 627; SKYLAKE-LABEL: test_mpsadbw: 628; SKYLAKE: # %bb.0: 629; SKYLAKE-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00] 630; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00] 631; SKYLAKE-NEXT: retq # sched: [7:1.00] 632; 633; SKX-LABEL: test_mpsadbw: 634; SKX: # %bb.0: 635; SKX-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00] 636; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00] 637; SKX-NEXT: retq # sched: [7:1.00] 638; 639; ZNVER1-LABEL: test_mpsadbw: 640; ZNVER1: # %bb.0: 641; ZNVER1-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 642; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 643; ZNVER1-NEXT: retq # sched: [1:0.50] 644 %1 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) 645 %2 = bitcast <16 x i16> %1 to <32 x i8> 646 %3 = load <32 x i8>, <32 x i8> *%a2, align 32 647 %4 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %2, <32 x i8> %3, i8 7) 648 ret <16 x i16> %4 649} 650declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone 651 652define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) { 653; GENERIC-LABEL: test_pabsb: 654; GENERIC: # %bb.0: 655; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] 656; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] 657; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 658; GENERIC-NEXT: retq # sched: [1:1.00] 659; 660; HASWELL-LABEL: test_pabsb: 661; HASWELL: # %bb.0: 662; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] 663; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] 664; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 665; HASWELL-NEXT: retq # sched: [7:1.00] 666; 667; BROADWELL-LABEL: test_pabsb: 668; BROADWELL: # %bb.0: 669; BROADWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] 670; BROADWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:0.50] 671; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 672; BROADWELL-NEXT: retq # sched: [7:1.00] 673; 674; SKYLAKE-LABEL: test_pabsb: 675; SKYLAKE: # %bb.0: 676; SKYLAKE-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] 677; SKYLAKE-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] 678; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 679; SKYLAKE-NEXT: retq # sched: [7:1.00] 680; 681; SKX-LABEL: test_pabsb: 682; SKX: # %bb.0: 683; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] 684; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] 685; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 686; SKX-NEXT: retq # sched: [7:1.00] 687; 688; ZNVER1-LABEL: test_pabsb: 689; ZNVER1: # %bb.0: 690; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] 691; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.25] 692; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 693; ZNVER1-NEXT: retq # sched: [1:0.50] 694 %1 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) 695 %2 = load <32 x i8>, <32 x i8> *%a1, align 32 696 %3 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %2) 697 %4 = or <32 x i8> %1, %3 698 ret <32 x i8> %4 699} 700declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone 701 702define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) { 703; GENERIC-LABEL: test_pabsd: 704; GENERIC: # %bb.0: 705; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] 706; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] 707; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 708; GENERIC-NEXT: retq # sched: [1:1.00] 709; 710; HASWELL-LABEL: test_pabsd: 711; HASWELL: # %bb.0: 712; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] 713; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] 714; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 715; HASWELL-NEXT: retq # sched: [7:1.00] 716; 717; BROADWELL-LABEL: test_pabsd: 718; BROADWELL: # %bb.0: 719; BROADWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] 720; BROADWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:0.50] 721; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 722; BROADWELL-NEXT: retq # sched: [7:1.00] 723; 724; SKYLAKE-LABEL: test_pabsd: 725; SKYLAKE: # %bb.0: 726; SKYLAKE-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] 727; SKYLAKE-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] 728; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 729; SKYLAKE-NEXT: retq # sched: [7:1.00] 730; 731; SKX-LABEL: test_pabsd: 732; SKX: # %bb.0: 733; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] 734; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] 735; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 736; SKX-NEXT: retq # sched: [7:1.00] 737; 738; ZNVER1-LABEL: test_pabsd: 739; ZNVER1: # %bb.0: 740; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] 741; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.25] 742; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 743; ZNVER1-NEXT: retq # sched: [1:0.50] 744 %1 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) 745 %2 = load <8 x i32>, <8 x i32> *%a1, align 32 746 %3 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %2) 747 %4 = or <8 x i32> %1, %3 748 ret <8 x i32> %4 749} 750declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone 751 752define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) { 753; GENERIC-LABEL: test_pabsw: 754; GENERIC: # %bb.0: 755; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] 756; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] 757; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 758; GENERIC-NEXT: retq # sched: [1:1.00] 759; 760; HASWELL-LABEL: test_pabsw: 761; HASWELL: # %bb.0: 762; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] 763; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] 764; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 765; HASWELL-NEXT: retq # sched: [7:1.00] 766; 767; BROADWELL-LABEL: test_pabsw: 768; BROADWELL: # %bb.0: 769; BROADWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] 770; BROADWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:0.50] 771; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 772; BROADWELL-NEXT: retq # sched: [7:1.00] 773; 774; SKYLAKE-LABEL: test_pabsw: 775; SKYLAKE: # %bb.0: 776; SKYLAKE-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] 777; SKYLAKE-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] 778; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 779; SKYLAKE-NEXT: retq # sched: [7:1.00] 780; 781; SKX-LABEL: test_pabsw: 782; SKX: # %bb.0: 783; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] 784; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] 785; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 786; SKX-NEXT: retq # sched: [7:1.00] 787; 788; ZNVER1-LABEL: test_pabsw: 789; ZNVER1: # %bb.0: 790; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] 791; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.25] 792; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 793; ZNVER1-NEXT: retq # sched: [1:0.50] 794 %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) 795 %2 = load <16 x i16>, <16 x i16> *%a1, align 32 796 %3 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %2) 797 %4 = or <16 x i16> %1, %3 798 ret <16 x i16> %4 799} 800declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone 801 802define <16 x i16> @test_packssdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 803; GENERIC-LABEL: test_packssdw: 804; GENERIC: # %bb.0: 805; GENERIC-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 806; GENERIC-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 807; GENERIC-NEXT: retq # sched: [1:1.00] 808; 809; HASWELL-LABEL: test_packssdw: 810; HASWELL: # %bb.0: 811; HASWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 812; HASWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 813; HASWELL-NEXT: retq # sched: [7:1.00] 814; 815; BROADWELL-LABEL: test_packssdw: 816; BROADWELL: # %bb.0: 817; BROADWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 818; BROADWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 819; BROADWELL-NEXT: retq # sched: [7:1.00] 820; 821; SKYLAKE-LABEL: test_packssdw: 822; SKYLAKE: # %bb.0: 823; SKYLAKE-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 824; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 825; SKYLAKE-NEXT: retq # sched: [7:1.00] 826; 827; SKX-LABEL: test_packssdw: 828; SKX: # %bb.0: 829; SKX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 830; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 831; SKX-NEXT: retq # sched: [7:1.00] 832; 833; ZNVER1-LABEL: test_packssdw: 834; ZNVER1: # %bb.0: 835; ZNVER1-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 836; ZNVER1-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 837; ZNVER1-NEXT: retq # sched: [1:0.50] 838 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) 839 %2 = bitcast <16 x i16> %1 to <8 x i32> 840 %3 = load <8 x i32>, <8 x i32> *%a2, align 32 841 %4 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %2, <8 x i32> %3) 842 ret <16 x i16> %4 843} 844declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone 845 846define <32 x i8> @test_packsswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 847; GENERIC-LABEL: test_packsswb: 848; GENERIC: # %bb.0: 849; GENERIC-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 850; GENERIC-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 851; GENERIC-NEXT: retq # sched: [1:1.00] 852; 853; HASWELL-LABEL: test_packsswb: 854; HASWELL: # %bb.0: 855; HASWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 856; HASWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 857; HASWELL-NEXT: retq # sched: [7:1.00] 858; 859; BROADWELL-LABEL: test_packsswb: 860; BROADWELL: # %bb.0: 861; BROADWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 862; BROADWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 863; BROADWELL-NEXT: retq # sched: [7:1.00] 864; 865; SKYLAKE-LABEL: test_packsswb: 866; SKYLAKE: # %bb.0: 867; SKYLAKE-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 868; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 869; SKYLAKE-NEXT: retq # sched: [7:1.00] 870; 871; SKX-LABEL: test_packsswb: 872; SKX: # %bb.0: 873; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 874; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 875; SKX-NEXT: retq # sched: [7:1.00] 876; 877; ZNVER1-LABEL: test_packsswb: 878; ZNVER1: # %bb.0: 879; ZNVER1-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 880; ZNVER1-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 881; ZNVER1-NEXT: retq # sched: [1:0.50] 882 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) 883 %2 = bitcast <32 x i8> %1 to <16 x i16> 884 %3 = load <16 x i16>, <16 x i16> *%a2, align 32 885 %4 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %2, <16 x i16> %3) 886 ret <32 x i8> %4 887} 888declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone 889 890define <16 x i16> @test_packusdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 891; GENERIC-LABEL: test_packusdw: 892; GENERIC: # %bb.0: 893; GENERIC-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 894; GENERIC-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 895; GENERIC-NEXT: retq # sched: [1:1.00] 896; 897; HASWELL-LABEL: test_packusdw: 898; HASWELL: # %bb.0: 899; HASWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 900; HASWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 901; HASWELL-NEXT: retq # sched: [7:1.00] 902; 903; BROADWELL-LABEL: test_packusdw: 904; BROADWELL: # %bb.0: 905; BROADWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 906; BROADWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 907; BROADWELL-NEXT: retq # sched: [7:1.00] 908; 909; SKYLAKE-LABEL: test_packusdw: 910; SKYLAKE: # %bb.0: 911; SKYLAKE-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 912; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 913; SKYLAKE-NEXT: retq # sched: [7:1.00] 914; 915; SKX-LABEL: test_packusdw: 916; SKX: # %bb.0: 917; SKX-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 918; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 919; SKX-NEXT: retq # sched: [7:1.00] 920; 921; ZNVER1-LABEL: test_packusdw: 922; ZNVER1: # %bb.0: 923; ZNVER1-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 924; ZNVER1-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 925; ZNVER1-NEXT: retq # sched: [1:0.50] 926 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) 927 %2 = bitcast <16 x i16> %1 to <8 x i32> 928 %3 = load <8 x i32>, <8 x i32> *%a2, align 32 929 %4 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %2, <8 x i32> %3) 930 ret <16 x i16> %4 931} 932declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone 933 934define <32 x i8> @test_packuswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 935; GENERIC-LABEL: test_packuswb: 936; GENERIC: # %bb.0: 937; GENERIC-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 938; GENERIC-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 939; GENERIC-NEXT: retq # sched: [1:1.00] 940; 941; HASWELL-LABEL: test_packuswb: 942; HASWELL: # %bb.0: 943; HASWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 944; HASWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 945; HASWELL-NEXT: retq # sched: [7:1.00] 946; 947; BROADWELL-LABEL: test_packuswb: 948; BROADWELL: # %bb.0: 949; BROADWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 950; BROADWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 951; BROADWELL-NEXT: retq # sched: [7:1.00] 952; 953; SKYLAKE-LABEL: test_packuswb: 954; SKYLAKE: # %bb.0: 955; SKYLAKE-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 956; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 957; SKYLAKE-NEXT: retq # sched: [7:1.00] 958; 959; SKX-LABEL: test_packuswb: 960; SKX: # %bb.0: 961; SKX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 962; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 963; SKX-NEXT: retq # sched: [7:1.00] 964; 965; ZNVER1-LABEL: test_packuswb: 966; ZNVER1: # %bb.0: 967; ZNVER1-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 968; ZNVER1-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 969; ZNVER1-NEXT: retq # sched: [1:0.50] 970 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) 971 %2 = bitcast <32 x i8> %1 to <16 x i16> 972 %3 = load <16 x i16>, <16 x i16> *%a2, align 32 973 %4 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %2, <16 x i16> %3) 974 ret <32 x i8> %4 975} 976declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone 977 978define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 979; GENERIC-LABEL: test_paddb: 980; GENERIC: # %bb.0: 981; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 982; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 983; GENERIC-NEXT: retq # sched: [1:1.00] 984; 985; HASWELL-LABEL: test_paddb: 986; HASWELL: # %bb.0: 987; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 988; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 989; HASWELL-NEXT: retq # sched: [7:1.00] 990; 991; BROADWELL-LABEL: test_paddb: 992; BROADWELL: # %bb.0: 993; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 994; BROADWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 995; BROADWELL-NEXT: retq # sched: [7:1.00] 996; 997; SKYLAKE-LABEL: test_paddb: 998; SKYLAKE: # %bb.0: 999; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1000; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1001; SKYLAKE-NEXT: retq # sched: [7:1.00] 1002; 1003; SKX-LABEL: test_paddb: 1004; SKX: # %bb.0: 1005; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1006; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1007; SKX-NEXT: retq # sched: [7:1.00] 1008; 1009; ZNVER1-LABEL: test_paddb: 1010; ZNVER1: # %bb.0: 1011; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1012; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1013; ZNVER1-NEXT: retq # sched: [1:0.50] 1014 %1 = add <32 x i8> %a0, %a1 1015 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 1016 %3 = add <32 x i8> %1, %2 1017 ret <32 x i8> %3 1018} 1019 1020define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 1021; GENERIC-LABEL: test_paddd: 1022; GENERIC: # %bb.0: 1023; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1024; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1025; GENERIC-NEXT: retq # sched: [1:1.00] 1026; 1027; HASWELL-LABEL: test_paddd: 1028; HASWELL: # %bb.0: 1029; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1030; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1031; HASWELL-NEXT: retq # sched: [7:1.00] 1032; 1033; BROADWELL-LABEL: test_paddd: 1034; BROADWELL: # %bb.0: 1035; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1036; BROADWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1037; BROADWELL-NEXT: retq # sched: [7:1.00] 1038; 1039; SKYLAKE-LABEL: test_paddd: 1040; SKYLAKE: # %bb.0: 1041; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1042; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1043; SKYLAKE-NEXT: retq # sched: [7:1.00] 1044; 1045; SKX-LABEL: test_paddd: 1046; SKX: # %bb.0: 1047; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1048; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1049; SKX-NEXT: retq # sched: [7:1.00] 1050; 1051; ZNVER1-LABEL: test_paddd: 1052; ZNVER1: # %bb.0: 1053; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1054; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1055; ZNVER1-NEXT: retq # sched: [1:0.50] 1056 %1 = add <8 x i32> %a0, %a1 1057 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 1058 %3 = add <8 x i32> %1, %2 1059 ret <8 x i32> %3 1060} 1061 1062define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 1063; GENERIC-LABEL: test_paddq: 1064; GENERIC: # %bb.0: 1065; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1066; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1067; GENERIC-NEXT: retq # sched: [1:1.00] 1068; 1069; HASWELL-LABEL: test_paddq: 1070; HASWELL: # %bb.0: 1071; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1072; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1073; HASWELL-NEXT: retq # sched: [7:1.00] 1074; 1075; BROADWELL-LABEL: test_paddq: 1076; BROADWELL: # %bb.0: 1077; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1078; BROADWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1079; BROADWELL-NEXT: retq # sched: [7:1.00] 1080; 1081; SKYLAKE-LABEL: test_paddq: 1082; SKYLAKE: # %bb.0: 1083; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1084; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1085; SKYLAKE-NEXT: retq # sched: [7:1.00] 1086; 1087; SKX-LABEL: test_paddq: 1088; SKX: # %bb.0: 1089; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1090; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1091; SKX-NEXT: retq # sched: [7:1.00] 1092; 1093; ZNVER1-LABEL: test_paddq: 1094; ZNVER1: # %bb.0: 1095; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1096; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1097; ZNVER1-NEXT: retq # sched: [1:0.50] 1098 %1 = add <4 x i64> %a0, %a1 1099 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 1100 %3 = add <4 x i64> %1, %2 1101 ret <4 x i64> %3 1102} 1103 1104define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 1105; GENERIC-LABEL: test_paddsb: 1106; GENERIC: # %bb.0: 1107; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1108; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1109; GENERIC-NEXT: retq # sched: [1:1.00] 1110; 1111; HASWELL-LABEL: test_paddsb: 1112; HASWELL: # %bb.0: 1113; HASWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1114; HASWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1115; HASWELL-NEXT: retq # sched: [7:1.00] 1116; 1117; BROADWELL-LABEL: test_paddsb: 1118; BROADWELL: # %bb.0: 1119; BROADWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1120; BROADWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1121; BROADWELL-NEXT: retq # sched: [7:1.00] 1122; 1123; SKYLAKE-LABEL: test_paddsb: 1124; SKYLAKE: # %bb.0: 1125; SKYLAKE-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1126; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1127; SKYLAKE-NEXT: retq # sched: [7:1.00] 1128; 1129; SKX-LABEL: test_paddsb: 1130; SKX: # %bb.0: 1131; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1132; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1133; SKX-NEXT: retq # sched: [7:1.00] 1134; 1135; ZNVER1-LABEL: test_paddsb: 1136; ZNVER1: # %bb.0: 1137; ZNVER1-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1138; ZNVER1-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1139; ZNVER1-NEXT: retq # sched: [1:0.50] 1140 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) 1141 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 1142 %3 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %1, <32 x i8> %2) 1143 ret <32 x i8> %3 1144} 1145declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone 1146 1147define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 1148; GENERIC-LABEL: test_paddsw: 1149; GENERIC: # %bb.0: 1150; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1151; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1152; GENERIC-NEXT: retq # sched: [1:1.00] 1153; 1154; HASWELL-LABEL: test_paddsw: 1155; HASWELL: # %bb.0: 1156; HASWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1157; HASWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1158; HASWELL-NEXT: retq # sched: [7:1.00] 1159; 1160; BROADWELL-LABEL: test_paddsw: 1161; BROADWELL: # %bb.0: 1162; BROADWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1163; BROADWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1164; BROADWELL-NEXT: retq # sched: [7:1.00] 1165; 1166; SKYLAKE-LABEL: test_paddsw: 1167; SKYLAKE: # %bb.0: 1168; SKYLAKE-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1169; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1170; SKYLAKE-NEXT: retq # sched: [7:1.00] 1171; 1172; SKX-LABEL: test_paddsw: 1173; SKX: # %bb.0: 1174; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1175; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1176; SKX-NEXT: retq # sched: [7:1.00] 1177; 1178; ZNVER1-LABEL: test_paddsw: 1179; ZNVER1: # %bb.0: 1180; ZNVER1-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1181; ZNVER1-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1182; ZNVER1-NEXT: retq # sched: [1:0.50] 1183 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) 1184 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 1185 %3 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %1, <16 x i16> %2) 1186 ret <16 x i16> %3 1187} 1188declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone 1189 1190define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 1191; GENERIC-LABEL: test_paddusb: 1192; GENERIC: # %bb.0: 1193; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1194; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1195; GENERIC-NEXT: retq # sched: [1:1.00] 1196; 1197; HASWELL-LABEL: test_paddusb: 1198; HASWELL: # %bb.0: 1199; HASWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1200; HASWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1201; HASWELL-NEXT: retq # sched: [7:1.00] 1202; 1203; BROADWELL-LABEL: test_paddusb: 1204; BROADWELL: # %bb.0: 1205; BROADWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1206; BROADWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1207; BROADWELL-NEXT: retq # sched: [7:1.00] 1208; 1209; SKYLAKE-LABEL: test_paddusb: 1210; SKYLAKE: # %bb.0: 1211; SKYLAKE-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1212; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1213; SKYLAKE-NEXT: retq # sched: [7:1.00] 1214; 1215; SKX-LABEL: test_paddusb: 1216; SKX: # %bb.0: 1217; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1218; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1219; SKX-NEXT: retq # sched: [7:1.00] 1220; 1221; ZNVER1-LABEL: test_paddusb: 1222; ZNVER1: # %bb.0: 1223; ZNVER1-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1224; ZNVER1-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1225; ZNVER1-NEXT: retq # sched: [1:0.50] 1226 %1 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) 1227 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 1228 %3 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %1, <32 x i8> %2) 1229 ret <32 x i8> %3 1230} 1231declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone 1232 1233define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 1234; GENERIC-LABEL: test_paddusw: 1235; GENERIC: # %bb.0: 1236; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1237; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1238; GENERIC-NEXT: retq # sched: [1:1.00] 1239; 1240; HASWELL-LABEL: test_paddusw: 1241; HASWELL: # %bb.0: 1242; HASWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1243; HASWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1244; HASWELL-NEXT: retq # sched: [7:1.00] 1245; 1246; BROADWELL-LABEL: test_paddusw: 1247; BROADWELL: # %bb.0: 1248; BROADWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1249; BROADWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1250; BROADWELL-NEXT: retq # sched: [7:1.00] 1251; 1252; SKYLAKE-LABEL: test_paddusw: 1253; SKYLAKE: # %bb.0: 1254; SKYLAKE-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1255; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1256; SKYLAKE-NEXT: retq # sched: [7:1.00] 1257; 1258; SKX-LABEL: test_paddusw: 1259; SKX: # %bb.0: 1260; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1261; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1262; SKX-NEXT: retq # sched: [7:1.00] 1263; 1264; ZNVER1-LABEL: test_paddusw: 1265; ZNVER1: # %bb.0: 1266; ZNVER1-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1267; ZNVER1-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1268; ZNVER1-NEXT: retq # sched: [1:0.50] 1269 %1 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) 1270 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 1271 %3 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %1, <16 x i16> %2) 1272 ret <16 x i16> %3 1273} 1274declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone 1275 1276define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 1277; GENERIC-LABEL: test_paddw: 1278; GENERIC: # %bb.0: 1279; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1280; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1281; GENERIC-NEXT: retq # sched: [1:1.00] 1282; 1283; HASWELL-LABEL: test_paddw: 1284; HASWELL: # %bb.0: 1285; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1286; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1287; HASWELL-NEXT: retq # sched: [7:1.00] 1288; 1289; BROADWELL-LABEL: test_paddw: 1290; BROADWELL: # %bb.0: 1291; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1292; BROADWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1293; BROADWELL-NEXT: retq # sched: [7:1.00] 1294; 1295; SKYLAKE-LABEL: test_paddw: 1296; SKYLAKE: # %bb.0: 1297; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1298; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1299; SKYLAKE-NEXT: retq # sched: [7:1.00] 1300; 1301; SKX-LABEL: test_paddw: 1302; SKX: # %bb.0: 1303; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1304; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1305; SKX-NEXT: retq # sched: [7:1.00] 1306; 1307; ZNVER1-LABEL: test_paddw: 1308; ZNVER1: # %bb.0: 1309; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1310; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1311; ZNVER1-NEXT: retq # sched: [1:0.50] 1312 %1 = add <16 x i16> %a0, %a1 1313 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 1314 %3 = add <16 x i16> %1, %2 1315 ret <16 x i16> %3 1316} 1317 1318define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 1319; GENERIC-LABEL: test_palignr: 1320; GENERIC: # %bb.0: 1321; GENERIC-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] 1322; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] 1323; GENERIC-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 1324; GENERIC-NEXT: retq # sched: [1:1.00] 1325; 1326; HASWELL-LABEL: test_palignr: 1327; HASWELL: # %bb.0: 1328; HASWELL-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] 1329; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] 1330; HASWELL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 1331; HASWELL-NEXT: retq # sched: [7:1.00] 1332; 1333; BROADWELL-LABEL: test_palignr: 1334; BROADWELL: # %bb.0: 1335; BROADWELL-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] 1336; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] 1337; BROADWELL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 1338; BROADWELL-NEXT: retq # sched: [7:1.00] 1339; 1340; SKYLAKE-LABEL: test_palignr: 1341; SKYLAKE: # %bb.0: 1342; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] 1343; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] 1344; SKYLAKE-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 1345; SKYLAKE-NEXT: retq # sched: [7:1.00] 1346; 1347; SKX-LABEL: test_palignr: 1348; SKX: # %bb.0: 1349; SKX-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] 1350; SKX-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] 1351; SKX-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 1352; SKX-NEXT: retq # sched: [7:1.00] 1353; 1354; ZNVER1-LABEL: test_palignr: 1355; ZNVER1: # %bb.0: 1356; ZNVER1-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:0.25] 1357; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:0.25] 1358; ZNVER1-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 1359; ZNVER1-NEXT: retq # sched: [1:0.50] 1360 %1 = shufflevector <32 x i8> %a1, <32 x i8> %a0, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48> 1361 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 1362 %3 = shufflevector <32 x i8> %a0, <32 x i8> %1, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48> 1363 %4 = add <32 x i8> %1, %3 1364 ret <32 x i8> %4 1365} 1366 1367define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 1368; GENERIC-LABEL: test_pand: 1369; GENERIC: # %bb.0: 1370; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1371; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1372; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1373; GENERIC-NEXT: retq # sched: [1:1.00] 1374; 1375; HASWELL-LABEL: test_pand: 1376; HASWELL: # %bb.0: 1377; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1378; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1379; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1380; HASWELL-NEXT: retq # sched: [7:1.00] 1381; 1382; BROADWELL-LABEL: test_pand: 1383; BROADWELL: # %bb.0: 1384; BROADWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1385; BROADWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1386; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1387; BROADWELL-NEXT: retq # sched: [7:1.00] 1388; 1389; SKYLAKE-LABEL: test_pand: 1390; SKYLAKE: # %bb.0: 1391; SKYLAKE-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1392; SKYLAKE-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1393; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1394; SKYLAKE-NEXT: retq # sched: [7:1.00] 1395; 1396; SKX-LABEL: test_pand: 1397; SKX: # %bb.0: 1398; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1399; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1400; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1401; SKX-NEXT: retq # sched: [7:1.00] 1402; 1403; ZNVER1-LABEL: test_pand: 1404; ZNVER1: # %bb.0: 1405; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1406; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1407; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1408; ZNVER1-NEXT: retq # sched: [1:0.50] 1409 %1 = and <4 x i64> %a0, %a1 1410 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 1411 %3 = and <4 x i64> %1, %2 1412 %4 = add <4 x i64> %3, %a1 1413 ret <4 x i64> %4 1414} 1415 1416define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 1417; GENERIC-LABEL: test_pandn: 1418; GENERIC: # %bb.0: 1419; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1420; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] 1421; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1422; GENERIC-NEXT: retq # sched: [1:1.00] 1423; 1424; HASWELL-LABEL: test_pandn: 1425; HASWELL: # %bb.0: 1426; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1427; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] 1428; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1429; HASWELL-NEXT: retq # sched: [7:1.00] 1430; 1431; BROADWELL-LABEL: test_pandn: 1432; BROADWELL: # %bb.0: 1433; BROADWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1434; BROADWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50] 1435; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1436; BROADWELL-NEXT: retq # sched: [7:1.00] 1437; 1438; SKYLAKE-LABEL: test_pandn: 1439; SKYLAKE: # %bb.0: 1440; SKYLAKE-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1441; SKYLAKE-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] 1442; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1443; SKYLAKE-NEXT: retq # sched: [7:1.00] 1444; 1445; SKX-LABEL: test_pandn: 1446; SKX: # %bb.0: 1447; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1448; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] 1449; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1450; SKX-NEXT: retq # sched: [7:1.00] 1451; 1452; ZNVER1-LABEL: test_pandn: 1453; ZNVER1: # %bb.0: 1454; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1455; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] 1456; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1457; ZNVER1-NEXT: retq # sched: [1:0.50] 1458 %1 = xor <4 x i64> %a0, <i64 -1, i64 -1, i64 -1, i64 -1> 1459 %2 = and <4 x i64> %a1, %1 1460 %3 = load <4 x i64>, <4 x i64> *%a2, align 32 1461 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1> 1462 %5 = and <4 x i64> %3, %4 1463 %6 = add <4 x i64> %2, %5 1464 ret <4 x i64> %6 1465} 1466 1467define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 1468; GENERIC-LABEL: test_pavgb: 1469; GENERIC: # %bb.0: 1470; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1471; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1472; GENERIC-NEXT: retq # sched: [1:1.00] 1473; 1474; HASWELL-LABEL: test_pavgb: 1475; HASWELL: # %bb.0: 1476; HASWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1477; HASWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1478; HASWELL-NEXT: retq # sched: [7:1.00] 1479; 1480; BROADWELL-LABEL: test_pavgb: 1481; BROADWELL: # %bb.0: 1482; BROADWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1483; BROADWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1484; BROADWELL-NEXT: retq # sched: [7:1.00] 1485; 1486; SKYLAKE-LABEL: test_pavgb: 1487; SKYLAKE: # %bb.0: 1488; SKYLAKE-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1489; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1490; SKYLAKE-NEXT: retq # sched: [7:1.00] 1491; 1492; SKX-LABEL: test_pavgb: 1493; SKX: # %bb.0: 1494; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1495; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1496; SKX-NEXT: retq # sched: [7:1.00] 1497; 1498; ZNVER1-LABEL: test_pavgb: 1499; ZNVER1: # %bb.0: 1500; ZNVER1-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1501; ZNVER1-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1502; ZNVER1-NEXT: retq # sched: [1:0.50] 1503 %1 = zext <32 x i8> %a0 to <32 x i16> 1504 %2 = zext <32 x i8> %a1 to <32 x i16> 1505 %3 = add <32 x i16> %1, %2 1506 %4 = add <32 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1507 %5 = lshr <32 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1508 %6 = trunc <32 x i16> %5 to <32 x i8> 1509 %7 = load <32 x i8>, <32 x i8> *%a2, align 32 1510 %8 = zext <32 x i8> %6 to <32 x i16> 1511 %9 = zext <32 x i8> %7 to <32 x i16> 1512 %10 = add <32 x i16> %8, %9 1513 %11 = add <32 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1514 %12 = lshr <32 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1515 %13 = trunc <32 x i16> %12 to <32 x i8> 1516 ret <32 x i8> %13 1517} 1518 1519define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 1520; GENERIC-LABEL: test_pavgw: 1521; GENERIC: # %bb.0: 1522; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1523; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1524; GENERIC-NEXT: retq # sched: [1:1.00] 1525; 1526; HASWELL-LABEL: test_pavgw: 1527; HASWELL: # %bb.0: 1528; HASWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1529; HASWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1530; HASWELL-NEXT: retq # sched: [7:1.00] 1531; 1532; BROADWELL-LABEL: test_pavgw: 1533; BROADWELL: # %bb.0: 1534; BROADWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1535; BROADWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1536; BROADWELL-NEXT: retq # sched: [7:1.00] 1537; 1538; SKYLAKE-LABEL: test_pavgw: 1539; SKYLAKE: # %bb.0: 1540; SKYLAKE-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1541; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1542; SKYLAKE-NEXT: retq # sched: [7:1.00] 1543; 1544; SKX-LABEL: test_pavgw: 1545; SKX: # %bb.0: 1546; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1547; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1548; SKX-NEXT: retq # sched: [7:1.00] 1549; 1550; ZNVER1-LABEL: test_pavgw: 1551; ZNVER1: # %bb.0: 1552; ZNVER1-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1553; ZNVER1-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1554; ZNVER1-NEXT: retq # sched: [1:0.50] 1555 %1 = zext <16 x i16> %a0 to <16 x i32> 1556 %2 = zext <16 x i16> %a1 to <16 x i32> 1557 %3 = add <16 x i32> %1, %2 1558 %4 = add <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1559 %5 = lshr <16 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1560 %6 = trunc <16 x i32> %5 to <16 x i16> 1561 %7 = load <16 x i16>, <16 x i16> *%a2, align 32 1562 %8 = zext <16 x i16> %6 to <16 x i32> 1563 %9 = zext <16 x i16> %7 to <16 x i32> 1564 %10 = add <16 x i32> %8, %9 1565 %11 = add <16 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1566 %12 = lshr <16 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1567 %13 = trunc <16 x i32> %12 to <16 x i16> 1568 ret <16 x i16> %13 1569} 1570 1571define <4 x i32> @test_pblendd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 1572; GENERIC-LABEL: test_pblendd: 1573; GENERIC: # %bb.0: 1574; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50] 1575; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] 1576; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1577; GENERIC-NEXT: retq # sched: [1:1.00] 1578; 1579; HASWELL-LABEL: test_pblendd: 1580; HASWELL: # %bb.0: 1581; HASWELL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] 1582; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] 1583; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1584; HASWELL-NEXT: retq # sched: [7:1.00] 1585; 1586; BROADWELL-LABEL: test_pblendd: 1587; BROADWELL: # %bb.0: 1588; BROADWELL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] 1589; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [6:0.50] 1590; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1591; BROADWELL-NEXT: retq # sched: [7:1.00] 1592; 1593; SKYLAKE-LABEL: test_pblendd: 1594; SKYLAKE: # %bb.0: 1595; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] 1596; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] 1597; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1598; SKYLAKE-NEXT: retq # sched: [7:1.00] 1599; 1600; SKX-LABEL: test_pblendd: 1601; SKX: # %bb.0: 1602; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] 1603; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] 1604; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1605; SKX-NEXT: retq # sched: [7:1.00] 1606; 1607; ZNVER1-LABEL: test_pblendd: 1608; ZNVER1: # %bb.0: 1609; ZNVER1-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50] 1610; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [8:1.00] 1611; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1612; ZNVER1-NEXT: retq # sched: [1:0.50] 1613 %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 3> 1614 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 1615 %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1616 %4 = add <4 x i32> %1, %3 1617 ret <4 x i32> %4 1618} 1619 1620define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 1621; GENERIC-LABEL: test_pblendd_ymm: 1622; GENERIC: # %bb.0: 1623; GENERIC-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50] 1624; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] 1625; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1626; GENERIC-NEXT: retq # sched: [1:1.00] 1627; 1628; HASWELL-LABEL: test_pblendd_ymm: 1629; HASWELL: # %bb.0: 1630; HASWELL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] 1631; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] 1632; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1633; HASWELL-NEXT: retq # sched: [7:1.00] 1634; 1635; BROADWELL-LABEL: test_pblendd_ymm: 1636; BROADWELL: # %bb.0: 1637; BROADWELL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] 1638; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [7:0.50] 1639; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1640; BROADWELL-NEXT: retq # sched: [7:1.00] 1641; 1642; SKYLAKE-LABEL: test_pblendd_ymm: 1643; SKYLAKE: # %bb.0: 1644; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] 1645; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] 1646; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1647; SKYLAKE-NEXT: retq # sched: [7:1.00] 1648; 1649; SKX-LABEL: test_pblendd_ymm: 1650; SKX: # %bb.0: 1651; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] 1652; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] 1653; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1654; SKX-NEXT: retq # sched: [7:1.00] 1655; 1656; ZNVER1-LABEL: test_pblendd_ymm: 1657; ZNVER1: # %bb.0: 1658; ZNVER1-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50] 1659; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [9:1.50] 1660; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1661; ZNVER1-NEXT: retq # sched: [1:0.50] 1662 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 15> 1663 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 1664 %3 = shufflevector <8 x i32> %a1, <8 x i32> %2, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7> 1665 %4 = add <8 x i32> %1, %3 1666 ret <8 x i32> %4 1667} 1668 1669define <32 x i8> @test_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2, <32 x i8> *%a3, <32 x i8> %a4) { 1670; GENERIC-LABEL: test_pblendvb: 1671; GENERIC: # %bb.0: 1672; GENERIC-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] 1673; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 1674; GENERIC-NEXT: retq # sched: [1:1.00] 1675; 1676; HASWELL-LABEL: test_pblendvb: 1677; HASWELL: # %bb.0: 1678; HASWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] 1679; HASWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 1680; HASWELL-NEXT: retq # sched: [7:1.00] 1681; 1682; BROADWELL-LABEL: test_pblendvb: 1683; BROADWELL: # %bb.0: 1684; BROADWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] 1685; BROADWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 1686; BROADWELL-NEXT: retq # sched: [7:1.00] 1687; 1688; SKYLAKE-LABEL: test_pblendvb: 1689; SKYLAKE: # %bb.0: 1690; SKYLAKE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] 1691; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67] 1692; SKYLAKE-NEXT: retq # sched: [7:1.00] 1693; 1694; SKX-LABEL: test_pblendvb: 1695; SKX: # %bb.0: 1696; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] 1697; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67] 1698; SKX-NEXT: retq # sched: [7:1.00] 1699; 1700; ZNVER1-LABEL: test_pblendvb: 1701; ZNVER1: # %bb.0: 1702; ZNVER1-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 1703; ZNVER1-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 1704; ZNVER1-NEXT: retq # sched: [1:0.50] 1705 %1 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) 1706 %2 = load <32 x i8>, <32 x i8> *%a3, align 32 1707 %3 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %1, <32 x i8> %2, <32 x i8> %a4) 1708 ret <32 x i8> %3 1709} 1710declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone 1711 1712define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 1713; GENERIC-LABEL: test_pblendw: 1714; GENERIC: # %bb.0: 1715; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50] 1716; GENERIC-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:0.50] 1717; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1718; GENERIC-NEXT: retq # sched: [1:1.00] 1719; 1720; HASWELL-LABEL: test_pblendw: 1721; HASWELL: # %bb.0: 1722; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] 1723; HASWELL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00] 1724; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1725; HASWELL-NEXT: retq # sched: [7:1.00] 1726; 1727; BROADWELL-LABEL: test_pblendw: 1728; BROADWELL: # %bb.0: 1729; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] 1730; BROADWELL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [7:1.00] 1731; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1732; BROADWELL-NEXT: retq # sched: [7:1.00] 1733; 1734; SKYLAKE-LABEL: test_pblendw: 1735; SKYLAKE: # %bb.0: 1736; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] 1737; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00] 1738; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1739; SKYLAKE-NEXT: retq # sched: [7:1.00] 1740; 1741; SKX-LABEL: test_pblendw: 1742; SKX: # %bb.0: 1743; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] 1744; SKX-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00] 1745; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1746; SKX-NEXT: retq # sched: [7:1.00] 1747; 1748; ZNVER1-LABEL: test_pblendw: 1749; ZNVER1: # %bb.0: 1750; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [2:0.33] 1751; ZNVER1-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [9:0.50] 1752; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1753; ZNVER1-NEXT: retq # sched: [1:0.50] 1754 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 28, i32 13, i32 14, i32 15> 1755 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 1756 %3 = shufflevector <16 x i16> %a1, <16 x i16> %2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 1757 %4 = add <16 x i16> %1, %3 1758 ret <16 x i16> %4 1759} 1760 1761define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) { 1762; GENERIC-LABEL: test_pbroadcastb: 1763; GENERIC: # %bb.0: 1764; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.50] 1765; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:0.50] 1766; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1767; GENERIC-NEXT: retq # sched: [1:1.00] 1768; 1769; HASWELL-LABEL: test_pbroadcastb: 1770; HASWELL: # %bb.0: 1771; HASWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] 1772; HASWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00] 1773; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1774; HASWELL-NEXT: retq # sched: [7:1.00] 1775; 1776; BROADWELL-LABEL: test_pbroadcastb: 1777; BROADWELL: # %bb.0: 1778; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00] 1779; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] 1780; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1781; BROADWELL-NEXT: retq # sched: [7:1.00] 1782; 1783; SKYLAKE-LABEL: test_pbroadcastb: 1784; SKYLAKE: # %bb.0: 1785; SKYLAKE-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] 1786; SKYLAKE-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00] 1787; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1788; SKYLAKE-NEXT: retq # sched: [7:1.00] 1789; 1790; SKX-LABEL: test_pbroadcastb: 1791; SKX: # %bb.0: 1792; SKX-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] 1793; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00] 1794; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1795; SKX-NEXT: retq # sched: [7:1.00] 1796; 1797; ZNVER1-LABEL: test_pbroadcastb: 1798; ZNVER1: # %bb.0: 1799; ZNVER1-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [8:1.00] 1800; ZNVER1-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.25] 1801; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1802; ZNVER1-NEXT: retq # sched: [1:0.50] 1803 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer 1804 %2 = load <16 x i8>, <16 x i8> *%a1, align 16 1805 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer 1806 %4 = add <16 x i8> %1, %3 1807 ret <16 x i8> %4 1808} 1809 1810define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) { 1811; GENERIC-LABEL: test_pbroadcastb_ymm: 1812; GENERIC: # %bb.0: 1813; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00] 1814; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [7:0.50] 1815; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1816; GENERIC-NEXT: retq # sched: [1:1.00] 1817; 1818; HASWELL-LABEL: test_pbroadcastb_ymm: 1819; HASWELL: # %bb.0: 1820; HASWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] 1821; HASWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00] 1822; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1823; HASWELL-NEXT: retq # sched: [7:1.00] 1824; 1825; BROADWELL-LABEL: test_pbroadcastb_ymm: 1826; BROADWELL: # %bb.0: 1827; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00] 1828; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] 1829; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1830; BROADWELL-NEXT: retq # sched: [7:1.00] 1831; 1832; SKYLAKE-LABEL: test_pbroadcastb_ymm: 1833; SKYLAKE: # %bb.0: 1834; SKYLAKE-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] 1835; SKYLAKE-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00] 1836; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1837; SKYLAKE-NEXT: retq # sched: [7:1.00] 1838; 1839; SKX-LABEL: test_pbroadcastb_ymm: 1840; SKX: # %bb.0: 1841; SKX-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] 1842; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00] 1843; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1844; SKX-NEXT: retq # sched: [7:1.00] 1845; 1846; ZNVER1-LABEL: test_pbroadcastb_ymm: 1847; ZNVER1: # %bb.0: 1848; ZNVER1-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:2.00] 1849; ZNVER1-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [2:0.25] 1850; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1851; ZNVER1-NEXT: retq # sched: [1:0.50] 1852 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer 1853 %2 = load <32 x i8>, <32 x i8> *%a1, align 32 1854 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> zeroinitializer 1855 %4 = add <32 x i8> %1, %3 1856 ret <32 x i8> %4 1857} 1858 1859define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) { 1860; GENERIC-LABEL: test_pbroadcastd: 1861; GENERIC: # %bb.0: 1862; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.50] 1863; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [7:0.50] 1864; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1865; GENERIC-NEXT: retq # sched: [1:1.00] 1866; 1867; HASWELL-LABEL: test_pbroadcastd: 1868; HASWELL: # %bb.0: 1869; HASWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] 1870; HASWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50] 1871; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1872; HASWELL-NEXT: retq # sched: [7:1.00] 1873; 1874; BROADWELL-LABEL: test_pbroadcastd: 1875; BROADWELL: # %bb.0: 1876; BROADWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] 1877; BROADWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [5:0.50] 1878; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1879; BROADWELL-NEXT: retq # sched: [7:1.00] 1880; 1881; SKYLAKE-LABEL: test_pbroadcastd: 1882; SKYLAKE: # %bb.0: 1883; SKYLAKE-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] 1884; SKYLAKE-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50] 1885; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1886; SKYLAKE-NEXT: retq # sched: [7:1.00] 1887; 1888; SKX-LABEL: test_pbroadcastd: 1889; SKX: # %bb.0: 1890; SKX-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] 1891; SKX-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50] 1892; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1893; SKX-NEXT: retq # sched: [7:1.00] 1894; 1895; ZNVER1-LABEL: test_pbroadcastd: 1896; ZNVER1: # %bb.0: 1897; ZNVER1-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [8:0.50] 1898; ZNVER1-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.25] 1899; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1900; ZNVER1-NEXT: retq # sched: [1:0.50] 1901 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer 1902 %2 = load <4 x i32>, <4 x i32> *%a1, align 16 1903 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer 1904 %4 = add <4 x i32> %1, %3 1905 ret <4 x i32> %4 1906} 1907 1908define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) { 1909; GENERIC-LABEL: test_pbroadcastd_ymm: 1910; GENERIC: # %bb.0: 1911; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00] 1912; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] 1913; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1914; GENERIC-NEXT: retq # sched: [1:1.00] 1915; 1916; HASWELL-LABEL: test_pbroadcastd_ymm: 1917; HASWELL: # %bb.0: 1918; HASWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] 1919; HASWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] 1920; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1921; HASWELL-NEXT: retq # sched: [7:1.00] 1922; 1923; BROADWELL-LABEL: test_pbroadcastd_ymm: 1924; BROADWELL: # %bb.0: 1925; BROADWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] 1926; BROADWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:0.50] 1927; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1928; BROADWELL-NEXT: retq # sched: [7:1.00] 1929; 1930; SKYLAKE-LABEL: test_pbroadcastd_ymm: 1931; SKYLAKE: # %bb.0: 1932; SKYLAKE-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] 1933; SKYLAKE-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] 1934; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1935; SKYLAKE-NEXT: retq # sched: [7:1.00] 1936; 1937; SKX-LABEL: test_pbroadcastd_ymm: 1938; SKX: # %bb.0: 1939; SKX-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] 1940; SKX-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] 1941; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1942; SKX-NEXT: retq # sched: [7:1.00] 1943; 1944; ZNVER1-LABEL: test_pbroadcastd_ymm: 1945; ZNVER1: # %bb.0: 1946; ZNVER1-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [8:0.50] 1947; ZNVER1-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [2:0.25] 1948; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1949; ZNVER1-NEXT: retq # sched: [1:0.50] 1950 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer 1951 %2 = load <8 x i32>, <8 x i32> *%a1, align 32 1952 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> zeroinitializer 1953 %4 = add <8 x i32> %1, %3 1954 ret <8 x i32> %4 1955} 1956 1957define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) { 1958; GENERIC-LABEL: test_pbroadcastq: 1959; GENERIC: # %bb.0: 1960; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.50] 1961; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [7:0.50] 1962; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1963; GENERIC-NEXT: retq # sched: [1:1.00] 1964; 1965; HASWELL-LABEL: test_pbroadcastq: 1966; HASWELL: # %bb.0: 1967; HASWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] 1968; HASWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50] 1969; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1970; HASWELL-NEXT: retq # sched: [7:1.00] 1971; 1972; BROADWELL-LABEL: test_pbroadcastq: 1973; BROADWELL: # %bb.0: 1974; BROADWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] 1975; BROADWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [5:0.50] 1976; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1977; BROADWELL-NEXT: retq # sched: [7:1.00] 1978; 1979; SKYLAKE-LABEL: test_pbroadcastq: 1980; SKYLAKE: # %bb.0: 1981; SKYLAKE-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] 1982; SKYLAKE-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50] 1983; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1984; SKYLAKE-NEXT: retq # sched: [7:1.00] 1985; 1986; SKX-LABEL: test_pbroadcastq: 1987; SKX: # %bb.0: 1988; SKX-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] 1989; SKX-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50] 1990; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1991; SKX-NEXT: retq # sched: [7:1.00] 1992; 1993; ZNVER1-LABEL: test_pbroadcastq: 1994; ZNVER1: # %bb.0: 1995; ZNVER1-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [8:0.50] 1996; ZNVER1-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.25] 1997; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1998; ZNVER1-NEXT: retq # sched: [1:0.50] 1999 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer 2000 %2 = load <2 x i64>, <2 x i64> *%a1, align 16 2001 %3 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer 2002 %4 = add <2 x i64> %1, %3 2003 ret <2 x i64> %4 2004} 2005 2006define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) { 2007; GENERIC-LABEL: test_pbroadcastq_ymm: 2008; GENERIC: # %bb.0: 2009; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00] 2010; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] 2011; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2012; GENERIC-NEXT: retq # sched: [1:1.00] 2013; 2014; HASWELL-LABEL: test_pbroadcastq_ymm: 2015; HASWELL: # %bb.0: 2016; HASWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] 2017; HASWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] 2018; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2019; HASWELL-NEXT: retq # sched: [7:1.00] 2020; 2021; BROADWELL-LABEL: test_pbroadcastq_ymm: 2022; BROADWELL: # %bb.0: 2023; BROADWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] 2024; BROADWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:0.50] 2025; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2026; BROADWELL-NEXT: retq # sched: [7:1.00] 2027; 2028; SKYLAKE-LABEL: test_pbroadcastq_ymm: 2029; SKYLAKE: # %bb.0: 2030; SKYLAKE-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] 2031; SKYLAKE-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] 2032; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 2033; SKYLAKE-NEXT: retq # sched: [7:1.00] 2034; 2035; SKX-LABEL: test_pbroadcastq_ymm: 2036; SKX: # %bb.0: 2037; SKX-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] 2038; SKX-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] 2039; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 2040; SKX-NEXT: retq # sched: [7:1.00] 2041; 2042; ZNVER1-LABEL: test_pbroadcastq_ymm: 2043; ZNVER1: # %bb.0: 2044; ZNVER1-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [8:0.50] 2045; ZNVER1-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [2:0.25] 2046; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2047; ZNVER1-NEXT: retq # sched: [1:0.50] 2048 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer 2049 %2 = load <4 x i64>, <4 x i64> *%a1, align 32 2050 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> zeroinitializer 2051 %4 = add <4 x i64> %1, %3 2052 ret <4 x i64> %4 2053} 2054 2055define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) { 2056; GENERIC-LABEL: test_pbroadcastw: 2057; GENERIC: # %bb.0: 2058; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.50] 2059; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:0.50] 2060; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2061; GENERIC-NEXT: retq # sched: [1:1.00] 2062; 2063; HASWELL-LABEL: test_pbroadcastw: 2064; HASWELL: # %bb.0: 2065; HASWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] 2066; HASWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00] 2067; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2068; HASWELL-NEXT: retq # sched: [7:1.00] 2069; 2070; BROADWELL-LABEL: test_pbroadcastw: 2071; BROADWELL: # %bb.0: 2072; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00] 2073; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] 2074; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2075; BROADWELL-NEXT: retq # sched: [7:1.00] 2076; 2077; SKYLAKE-LABEL: test_pbroadcastw: 2078; SKYLAKE: # %bb.0: 2079; SKYLAKE-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] 2080; SKYLAKE-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00] 2081; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 2082; SKYLAKE-NEXT: retq # sched: [7:1.00] 2083; 2084; SKX-LABEL: test_pbroadcastw: 2085; SKX: # %bb.0: 2086; SKX-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] 2087; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00] 2088; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 2089; SKX-NEXT: retq # sched: [7:1.00] 2090; 2091; ZNVER1-LABEL: test_pbroadcastw: 2092; ZNVER1: # %bb.0: 2093; ZNVER1-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [8:1.00] 2094; ZNVER1-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.25] 2095; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 2096; ZNVER1-NEXT: retq # sched: [1:0.50] 2097 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer 2098 %2 = load <8 x i16>, <8 x i16> *%a1, align 16 2099 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer 2100 %4 = add <8 x i16> %1, %3 2101 ret <8 x i16> %4 2102} 2103 2104define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) { 2105; GENERIC-LABEL: test_pbroadcastw_ymm: 2106; GENERIC: # %bb.0: 2107; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00] 2108; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [7:0.50] 2109; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2110; GENERIC-NEXT: retq # sched: [1:1.00] 2111; 2112; HASWELL-LABEL: test_pbroadcastw_ymm: 2113; HASWELL: # %bb.0: 2114; HASWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] 2115; HASWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00] 2116; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2117; HASWELL-NEXT: retq # sched: [7:1.00] 2118; 2119; BROADWELL-LABEL: test_pbroadcastw_ymm: 2120; BROADWELL: # %bb.0: 2121; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00] 2122; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] 2123; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2124; BROADWELL-NEXT: retq # sched: [7:1.00] 2125; 2126; SKYLAKE-LABEL: test_pbroadcastw_ymm: 2127; SKYLAKE: # %bb.0: 2128; SKYLAKE-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] 2129; SKYLAKE-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00] 2130; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 2131; SKYLAKE-NEXT: retq # sched: [7:1.00] 2132; 2133; SKX-LABEL: test_pbroadcastw_ymm: 2134; SKX: # %bb.0: 2135; SKX-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] 2136; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00] 2137; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 2138; SKX-NEXT: retq # sched: [7:1.00] 2139; 2140; ZNVER1-LABEL: test_pbroadcastw_ymm: 2141; ZNVER1: # %bb.0: 2142; ZNVER1-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:2.00] 2143; ZNVER1-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [2:0.25] 2144; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2145; ZNVER1-NEXT: retq # sched: [1:0.50] 2146 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer 2147 %2 = load <16 x i16>, <16 x i16> *%a1, align 32 2148 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> zeroinitializer 2149 %4 = add <16 x i16> %1, %3 2150 ret <16 x i16> %4 2151} 2152 2153define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 2154; GENERIC-LABEL: test_pcmpeqb: 2155; GENERIC: # %bb.0: 2156; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2157; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2158; GENERIC-NEXT: retq # sched: [1:1.00] 2159; 2160; HASWELL-LABEL: test_pcmpeqb: 2161; HASWELL: # %bb.0: 2162; HASWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2163; HASWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2164; HASWELL-NEXT: retq # sched: [7:1.00] 2165; 2166; BROADWELL-LABEL: test_pcmpeqb: 2167; BROADWELL: # %bb.0: 2168; BROADWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2169; BROADWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2170; BROADWELL-NEXT: retq # sched: [7:1.00] 2171; 2172; SKYLAKE-LABEL: test_pcmpeqb: 2173; SKYLAKE: # %bb.0: 2174; SKYLAKE-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2175; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2176; SKYLAKE-NEXT: retq # sched: [7:1.00] 2177; 2178; SKX-LABEL: test_pcmpeqb: 2179; SKX: # %bb.0: 2180; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2181; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2182; SKX-NEXT: retq # sched: [7:1.00] 2183; 2184; ZNVER1-LABEL: test_pcmpeqb: 2185; ZNVER1: # %bb.0: 2186; ZNVER1-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2187; ZNVER1-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2188; ZNVER1-NEXT: retq # sched: [1:0.50] 2189 %1 = icmp eq <32 x i8> %a0, %a1 2190 %2 = sext <32 x i1> %1 to <32 x i8> 2191 %3 = load <32 x i8>, <32 x i8> *%a2, align 32 2192 %4 = icmp eq <32 x i8> %2, %3 2193 %5 = sext <32 x i1> %4 to <32 x i8> 2194 ret <32 x i8> %5 2195} 2196 2197define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 2198; GENERIC-LABEL: test_pcmpeqd: 2199; GENERIC: # %bb.0: 2200; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2201; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2202; GENERIC-NEXT: retq # sched: [1:1.00] 2203; 2204; HASWELL-LABEL: test_pcmpeqd: 2205; HASWELL: # %bb.0: 2206; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2207; HASWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2208; HASWELL-NEXT: retq # sched: [7:1.00] 2209; 2210; BROADWELL-LABEL: test_pcmpeqd: 2211; BROADWELL: # %bb.0: 2212; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2213; BROADWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2214; BROADWELL-NEXT: retq # sched: [7:1.00] 2215; 2216; SKYLAKE-LABEL: test_pcmpeqd: 2217; SKYLAKE: # %bb.0: 2218; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2219; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2220; SKYLAKE-NEXT: retq # sched: [7:1.00] 2221; 2222; SKX-LABEL: test_pcmpeqd: 2223; SKX: # %bb.0: 2224; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2225; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2226; SKX-NEXT: retq # sched: [7:1.00] 2227; 2228; ZNVER1-LABEL: test_pcmpeqd: 2229; ZNVER1: # %bb.0: 2230; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2231; ZNVER1-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2232; ZNVER1-NEXT: retq # sched: [1:0.50] 2233 %1 = icmp eq <8 x i32> %a0, %a1 2234 %2 = sext <8 x i1> %1 to <8 x i32> 2235 %3 = load <8 x i32>, <8 x i32> *%a2, align 32 2236 %4 = icmp eq <8 x i32> %2, %3 2237 %5 = sext <8 x i1> %4 to <8 x i32> 2238 ret <8 x i32> %5 2239} 2240 2241define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 2242; GENERIC-LABEL: test_pcmpeqq: 2243; GENERIC: # %bb.0: 2244; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2245; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2246; GENERIC-NEXT: retq # sched: [1:1.00] 2247; 2248; HASWELL-LABEL: test_pcmpeqq: 2249; HASWELL: # %bb.0: 2250; HASWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2251; HASWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2252; HASWELL-NEXT: retq # sched: [7:1.00] 2253; 2254; BROADWELL-LABEL: test_pcmpeqq: 2255; BROADWELL: # %bb.0: 2256; BROADWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2257; BROADWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2258; BROADWELL-NEXT: retq # sched: [7:1.00] 2259; 2260; SKYLAKE-LABEL: test_pcmpeqq: 2261; SKYLAKE: # %bb.0: 2262; SKYLAKE-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2263; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2264; SKYLAKE-NEXT: retq # sched: [7:1.00] 2265; 2266; SKX-LABEL: test_pcmpeqq: 2267; SKX: # %bb.0: 2268; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2269; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2270; SKX-NEXT: retq # sched: [7:1.00] 2271; 2272; ZNVER1-LABEL: test_pcmpeqq: 2273; ZNVER1: # %bb.0: 2274; ZNVER1-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2275; ZNVER1-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2276; ZNVER1-NEXT: retq # sched: [1:0.50] 2277 %1 = icmp eq <4 x i64> %a0, %a1 2278 %2 = sext <4 x i1> %1 to <4 x i64> 2279 %3 = load <4 x i64>, <4 x i64> *%a2, align 32 2280 %4 = icmp eq <4 x i64> %2, %3 2281 %5 = sext <4 x i1> %4 to <4 x i64> 2282 ret <4 x i64> %5 2283} 2284 2285define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 2286; GENERIC-LABEL: test_pcmpeqw: 2287; GENERIC: # %bb.0: 2288; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2289; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2290; GENERIC-NEXT: retq # sched: [1:1.00] 2291; 2292; HASWELL-LABEL: test_pcmpeqw: 2293; HASWELL: # %bb.0: 2294; HASWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2295; HASWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2296; HASWELL-NEXT: retq # sched: [7:1.00] 2297; 2298; BROADWELL-LABEL: test_pcmpeqw: 2299; BROADWELL: # %bb.0: 2300; BROADWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2301; BROADWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2302; BROADWELL-NEXT: retq # sched: [7:1.00] 2303; 2304; SKYLAKE-LABEL: test_pcmpeqw: 2305; SKYLAKE: # %bb.0: 2306; SKYLAKE-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2307; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2308; SKYLAKE-NEXT: retq # sched: [7:1.00] 2309; 2310; SKX-LABEL: test_pcmpeqw: 2311; SKX: # %bb.0: 2312; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2313; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2314; SKX-NEXT: retq # sched: [7:1.00] 2315; 2316; ZNVER1-LABEL: test_pcmpeqw: 2317; ZNVER1: # %bb.0: 2318; ZNVER1-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2319; ZNVER1-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2320; ZNVER1-NEXT: retq # sched: [1:0.50] 2321 %1 = icmp eq <16 x i16> %a0, %a1 2322 %2 = sext <16 x i1> %1 to <16 x i16> 2323 %3 = load <16 x i16>, <16 x i16> *%a2, align 32 2324 %4 = icmp eq <16 x i16> %2, %3 2325 %5 = sext <16 x i1> %4 to <16 x i16> 2326 ret <16 x i16> %5 2327} 2328 2329define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 2330; GENERIC-LABEL: test_pcmpgtb: 2331; GENERIC: # %bb.0: 2332; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2333; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2334; GENERIC-NEXT: retq # sched: [1:1.00] 2335; 2336; HASWELL-LABEL: test_pcmpgtb: 2337; HASWELL: # %bb.0: 2338; HASWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2339; HASWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2340; HASWELL-NEXT: retq # sched: [7:1.00] 2341; 2342; BROADWELL-LABEL: test_pcmpgtb: 2343; BROADWELL: # %bb.0: 2344; BROADWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2345; BROADWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2346; BROADWELL-NEXT: retq # sched: [7:1.00] 2347; 2348; SKYLAKE-LABEL: test_pcmpgtb: 2349; SKYLAKE: # %bb.0: 2350; SKYLAKE-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2351; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2352; SKYLAKE-NEXT: retq # sched: [7:1.00] 2353; 2354; SKX-LABEL: test_pcmpgtb: 2355; SKX: # %bb.0: 2356; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2357; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2358; SKX-NEXT: retq # sched: [7:1.00] 2359; 2360; ZNVER1-LABEL: test_pcmpgtb: 2361; ZNVER1: # %bb.0: 2362; ZNVER1-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2363; ZNVER1-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2364; ZNVER1-NEXT: retq # sched: [1:0.50] 2365 %1 = icmp sgt <32 x i8> %a0, %a1 2366 %2 = sext <32 x i1> %1 to <32 x i8> 2367 %3 = load <32 x i8>, <32 x i8> *%a2, align 32 2368 %4 = icmp sgt <32 x i8> %2, %3 2369 %5 = sext <32 x i1> %4 to <32 x i8> 2370 ret <32 x i8> %5 2371} 2372 2373define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 2374; GENERIC-LABEL: test_pcmpgtd: 2375; GENERIC: # %bb.0: 2376; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2377; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2378; GENERIC-NEXT: retq # sched: [1:1.00] 2379; 2380; HASWELL-LABEL: test_pcmpgtd: 2381; HASWELL: # %bb.0: 2382; HASWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2383; HASWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2384; HASWELL-NEXT: retq # sched: [7:1.00] 2385; 2386; BROADWELL-LABEL: test_pcmpgtd: 2387; BROADWELL: # %bb.0: 2388; BROADWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2389; BROADWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2390; BROADWELL-NEXT: retq # sched: [7:1.00] 2391; 2392; SKYLAKE-LABEL: test_pcmpgtd: 2393; SKYLAKE: # %bb.0: 2394; SKYLAKE-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2395; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2396; SKYLAKE-NEXT: retq # sched: [7:1.00] 2397; 2398; SKX-LABEL: test_pcmpgtd: 2399; SKX: # %bb.0: 2400; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2401; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2402; SKX-NEXT: retq # sched: [7:1.00] 2403; 2404; ZNVER1-LABEL: test_pcmpgtd: 2405; ZNVER1: # %bb.0: 2406; ZNVER1-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2407; ZNVER1-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2408; ZNVER1-NEXT: retq # sched: [1:0.50] 2409 %1 = icmp sgt <8 x i32> %a0, %a1 2410 %2 = sext <8 x i1> %1 to <8 x i32> 2411 %3 = load <8 x i32>, <8 x i32> *%a2, align 32 2412 %4 = icmp sgt <8 x i32> %2, %3 2413 %5 = sext <8 x i1> %4 to <8 x i32> 2414 ret <8 x i32> %5 2415} 2416 2417define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 2418; GENERIC-LABEL: test_pcmpgtq: 2419; GENERIC: # %bb.0: 2420; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2421; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2422; GENERIC-NEXT: retq # sched: [1:1.00] 2423; 2424; HASWELL-LABEL: test_pcmpgtq: 2425; HASWELL: # %bb.0: 2426; HASWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 2427; HASWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 2428; HASWELL-NEXT: retq # sched: [7:1.00] 2429; 2430; BROADWELL-LABEL: test_pcmpgtq: 2431; BROADWELL: # %bb.0: 2432; BROADWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 2433; BROADWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 2434; BROADWELL-NEXT: retq # sched: [7:1.00] 2435; 2436; SKYLAKE-LABEL: test_pcmpgtq: 2437; SKYLAKE: # %bb.0: 2438; SKYLAKE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2439; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2440; SKYLAKE-NEXT: retq # sched: [7:1.00] 2441; 2442; SKX-LABEL: test_pcmpgtq: 2443; SKX: # %bb.0: 2444; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2445; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2446; SKX-NEXT: retq # sched: [7:1.00] 2447; 2448; ZNVER1-LABEL: test_pcmpgtq: 2449; ZNVER1: # %bb.0: 2450; ZNVER1-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2451; ZNVER1-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 2452; ZNVER1-NEXT: retq # sched: [1:0.50] 2453 %1 = icmp sgt <4 x i64> %a0, %a1 2454 %2 = sext <4 x i1> %1 to <4 x i64> 2455 %3 = load <4 x i64>, <4 x i64> *%a2, align 32 2456 %4 = icmp sgt <4 x i64> %2, %3 2457 %5 = sext <4 x i1> %4 to <4 x i64> 2458 ret <4 x i64> %5 2459} 2460 2461define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 2462; GENERIC-LABEL: test_pcmpgtw: 2463; GENERIC: # %bb.0: 2464; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2465; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2466; GENERIC-NEXT: retq # sched: [1:1.00] 2467; 2468; HASWELL-LABEL: test_pcmpgtw: 2469; HASWELL: # %bb.0: 2470; HASWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2471; HASWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2472; HASWELL-NEXT: retq # sched: [7:1.00] 2473; 2474; BROADWELL-LABEL: test_pcmpgtw: 2475; BROADWELL: # %bb.0: 2476; BROADWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2477; BROADWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2478; BROADWELL-NEXT: retq # sched: [7:1.00] 2479; 2480; SKYLAKE-LABEL: test_pcmpgtw: 2481; SKYLAKE: # %bb.0: 2482; SKYLAKE-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2483; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2484; SKYLAKE-NEXT: retq # sched: [7:1.00] 2485; 2486; SKX-LABEL: test_pcmpgtw: 2487; SKX: # %bb.0: 2488; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2489; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2490; SKX-NEXT: retq # sched: [7:1.00] 2491; 2492; ZNVER1-LABEL: test_pcmpgtw: 2493; ZNVER1: # %bb.0: 2494; ZNVER1-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2495; ZNVER1-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2496; ZNVER1-NEXT: retq # sched: [1:0.50] 2497 %1 = icmp sgt <16 x i16> %a0, %a1 2498 %2 = sext <16 x i1> %1 to <16 x i16> 2499 %3 = load <16 x i16>, <16 x i16> *%a2, align 32 2500 %4 = icmp sgt <16 x i16> %2, %3 2501 %5 = sext <16 x i1> %4 to <16 x i16> 2502 ret <16 x i16> %5 2503} 2504 2505define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 2506; GENERIC-LABEL: test_perm2i128: 2507; GENERIC: # %bb.0: 2508; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 2509; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] 2510; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2511; GENERIC-NEXT: retq # sched: [1:1.00] 2512; 2513; HASWELL-LABEL: test_perm2i128: 2514; HASWELL: # %bb.0: 2515; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 2516; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] 2517; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2518; HASWELL-NEXT: retq # sched: [7:1.00] 2519; 2520; BROADWELL-LABEL: test_perm2i128: 2521; BROADWELL: # %bb.0: 2522; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 2523; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00] 2524; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2525; BROADWELL-NEXT: retq # sched: [7:1.00] 2526; 2527; SKYLAKE-LABEL: test_perm2i128: 2528; SKYLAKE: # %bb.0: 2529; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 2530; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] 2531; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 2532; SKYLAKE-NEXT: retq # sched: [7:1.00] 2533; 2534; SKX-LABEL: test_perm2i128: 2535; SKX: # %bb.0: 2536; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 2537; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] 2538; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 2539; SKX-NEXT: retq # sched: [7:1.00] 2540; 2541; ZNVER1-LABEL: test_perm2i128: 2542; ZNVER1: # %bb.0: 2543; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [2:0.25] 2544; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:0.50] 2545; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 2546; ZNVER1-NEXT: retq # sched: [1:0.50] 2547 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 2548 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 2549 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 2550 %4 = add <4 x i64> %1, %3 2551 ret <4 x i64> %4 2552} 2553 2554define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 2555; GENERIC-LABEL: test_permd: 2556; GENERIC: # %bb.0: 2557; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00] 2558; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 2559; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2560; GENERIC-NEXT: retq # sched: [1:1.00] 2561; 2562; HASWELL-LABEL: test_permd: 2563; HASWELL: # %bb.0: 2564; HASWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2565; HASWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2566; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2567; HASWELL-NEXT: retq # sched: [7:1.00] 2568; 2569; BROADWELL-LABEL: test_permd: 2570; BROADWELL: # %bb.0: 2571; BROADWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2572; BROADWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 2573; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2574; BROADWELL-NEXT: retq # sched: [7:1.00] 2575; 2576; SKYLAKE-LABEL: test_permd: 2577; SKYLAKE: # %bb.0: 2578; SKYLAKE-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2579; SKYLAKE-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2580; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 2581; SKYLAKE-NEXT: retq # sched: [7:1.00] 2582; 2583; SKX-LABEL: test_permd: 2584; SKX: # %bb.0: 2585; SKX-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2586; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2587; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 2588; SKX-NEXT: retq # sched: [7:1.00] 2589; 2590; ZNVER1-LABEL: test_permd: 2591; ZNVER1: # %bb.0: 2592; ZNVER1-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [2:0.25] 2593; ZNVER1-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:0.50] 2594; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 2595; ZNVER1-NEXT: retq # sched: [1:0.50] 2596 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0) 2597 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 2598 %3 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %2, <8 x i32> %a0) 2599 %4 = add <8 x i32> %1, %3 2600 ret <8 x i32> %4 2601} 2602declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly 2603 2604define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) { 2605; GENERIC-LABEL: test_permpd: 2606; GENERIC: # %bb.0: 2607; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00] 2608; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00] 2609; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2610; GENERIC-NEXT: retq # sched: [1:1.00] 2611; 2612; HASWELL-LABEL: test_permpd: 2613; HASWELL: # %bb.0: 2614; HASWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2615; HASWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] 2616; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2617; HASWELL-NEXT: retq # sched: [7:1.00] 2618; 2619; BROADWELL-LABEL: test_permpd: 2620; BROADWELL: # %bb.0: 2621; BROADWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2622; BROADWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00] 2623; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2624; BROADWELL-NEXT: retq # sched: [7:1.00] 2625; 2626; SKYLAKE-LABEL: test_permpd: 2627; SKYLAKE: # %bb.0: 2628; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2629; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] 2630; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2631; SKYLAKE-NEXT: retq # sched: [7:1.00] 2632; 2633; SKX-LABEL: test_permpd: 2634; SKX: # %bb.0: 2635; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2636; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] 2637; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2638; SKX-NEXT: retq # sched: [7:1.00] 2639; 2640; ZNVER1-LABEL: test_permpd: 2641; ZNVER1: # %bb.0: 2642; ZNVER1-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [107:0.50] 2643; ZNVER1-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [100:0.25] 2644; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2645; ZNVER1-NEXT: retq # sched: [1:0.50] 2646 %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3> 2647 %2 = load <4 x double>, <4 x double> *%a1, align 32 2648 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3> 2649 %4 = fadd <4 x double> %1, %3 2650 ret <4 x double> %4 2651} 2652 2653define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2) { 2654; GENERIC-LABEL: test_permps: 2655; GENERIC: # %bb.0: 2656; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00] 2657; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 2658; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2659; GENERIC-NEXT: retq # sched: [1:1.00] 2660; 2661; HASWELL-LABEL: test_permps: 2662; HASWELL: # %bb.0: 2663; HASWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2664; HASWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2665; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2666; HASWELL-NEXT: retq # sched: [7:1.00] 2667; 2668; BROADWELL-LABEL: test_permps: 2669; BROADWELL: # %bb.0: 2670; BROADWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2671; BROADWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 2672; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2673; BROADWELL-NEXT: retq # sched: [7:1.00] 2674; 2675; SKYLAKE-LABEL: test_permps: 2676; SKYLAKE: # %bb.0: 2677; SKYLAKE-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2678; SKYLAKE-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2679; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 2680; SKYLAKE-NEXT: retq # sched: [7:1.00] 2681; 2682; SKX-LABEL: test_permps: 2683; SKX: # %bb.0: 2684; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2685; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2686; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 2687; SKX-NEXT: retq # sched: [7:1.00] 2688; 2689; ZNVER1-LABEL: test_permps: 2690; ZNVER1: # %bb.0: 2691; ZNVER1-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [100:0.25] 2692; ZNVER1-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [107:0.50] 2693; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2694; ZNVER1-NEXT: retq # sched: [1:0.50] 2695 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0) 2696 %2 = load <8 x float>, <8 x float> *%a2, align 32 2697 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> %a0) 2698 %4 = fadd <8 x float> %1, %3 2699 ret <8 x float> %4 2700} 2701declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly 2702 2703define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) { 2704; GENERIC-LABEL: test_permq: 2705; GENERIC: # %bb.0: 2706; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00] 2707; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00] 2708; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2709; GENERIC-NEXT: retq # sched: [1:1.00] 2710; 2711; HASWELL-LABEL: test_permq: 2712; HASWELL: # %bb.0: 2713; HASWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2714; HASWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] 2715; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2716; HASWELL-NEXT: retq # sched: [7:1.00] 2717; 2718; BROADWELL-LABEL: test_permq: 2719; BROADWELL: # %bb.0: 2720; BROADWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2721; BROADWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00] 2722; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2723; BROADWELL-NEXT: retq # sched: [7:1.00] 2724; 2725; SKYLAKE-LABEL: test_permq: 2726; SKYLAKE: # %bb.0: 2727; SKYLAKE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2728; SKYLAKE-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] 2729; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 2730; SKYLAKE-NEXT: retq # sched: [7:1.00] 2731; 2732; SKX-LABEL: test_permq: 2733; SKX: # %bb.0: 2734; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2735; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] 2736; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 2737; SKX-NEXT: retq # sched: [7:1.00] 2738; 2739; ZNVER1-LABEL: test_permq: 2740; ZNVER1: # %bb.0: 2741; ZNVER1-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:0.50] 2742; ZNVER1-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [2:0.25] 2743; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2744; ZNVER1-NEXT: retq # sched: [1:0.50] 2745 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3> 2746 %2 = load <4 x i64>, <4 x i64> *%a1, align 32 2747 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3> 2748 %4 = add <4 x i64> %1, %3 2749 ret <4 x i64> %4 2750} 2751 2752define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) { 2753; GENERIC-LABEL: test_pgatherdd: 2754; GENERIC: # %bb.0: 2755; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2756; GENERIC-NEXT: retq # sched: [1:1.00] 2757; 2758; HASWELL-LABEL: test_pgatherdd: 2759; HASWELL: # %bb.0: 2760; HASWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67] 2761; HASWELL-NEXT: retq # sched: [7:1.00] 2762; 2763; BROADWELL-LABEL: test_pgatherdd: 2764; BROADWELL: # %bb.0: 2765; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2766; BROADWELL-NEXT: retq # sched: [7:1.00] 2767; 2768; SKYLAKE-LABEL: test_pgatherdd: 2769; SKYLAKE: # %bb.0: 2770; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2771; SKYLAKE-NEXT: retq # sched: [7:1.00] 2772; 2773; SKX-LABEL: test_pgatherdd: 2774; SKX: # %bb.0: 2775; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2776; SKX-NEXT: retq # sched: [7:1.00] 2777; 2778; ZNVER1-LABEL: test_pgatherdd: 2779; ZNVER1: # %bb.0: 2780; ZNVER1-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 2781; ZNVER1-NEXT: retq # sched: [1:0.50] 2782 %1 = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3, i8 2) 2783 ret <4 x i32> %1 2784} 2785declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) nounwind readonly 2786 2787define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) { 2788; GENERIC-LABEL: test_pgatherdd_ymm: 2789; GENERIC: # %bb.0: 2790; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] 2791; GENERIC-NEXT: retq # sched: [1:1.00] 2792; 2793; HASWELL-LABEL: test_pgatherdd_ymm: 2794; HASWELL: # %bb.0: 2795; HASWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [27:6.50] 2796; HASWELL-NEXT: retq # sched: [7:1.00] 2797; 2798; BROADWELL-LABEL: test_pgatherdd_ymm: 2799; BROADWELL: # %bb.0: 2800; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] 2801; BROADWELL-NEXT: retq # sched: [7:1.00] 2802; 2803; SKYLAKE-LABEL: test_pgatherdd_ymm: 2804; SKYLAKE: # %bb.0: 2805; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] 2806; SKYLAKE-NEXT: retq # sched: [7:1.00] 2807; 2808; SKX-LABEL: test_pgatherdd_ymm: 2809; SKX: # %bb.0: 2810; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] 2811; SKX-NEXT: retq # sched: [7:1.00] 2812; 2813; ZNVER1-LABEL: test_pgatherdd_ymm: 2814; ZNVER1: # %bb.0: 2815; ZNVER1-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:0.25] 2816; ZNVER1-NEXT: retq # sched: [1:0.50] 2817 %1 = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3, i8 2) 2818 ret <8 x i32> %1 2819} 2820declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) nounwind readonly 2821 2822define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) { 2823; GENERIC-LABEL: test_pgatherdq: 2824; GENERIC: # %bb.0: 2825; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2826; GENERIC-NEXT: retq # sched: [1:1.00] 2827; 2828; HASWELL-LABEL: test_pgatherdq: 2829; HASWELL: # %bb.0: 2830; HASWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67] 2831; HASWELL-NEXT: retq # sched: [7:1.00] 2832; 2833; BROADWELL-LABEL: test_pgatherdq: 2834; BROADWELL: # %bb.0: 2835; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2836; BROADWELL-NEXT: retq # sched: [7:1.00] 2837; 2838; SKYLAKE-LABEL: test_pgatherdq: 2839; SKYLAKE: # %bb.0: 2840; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2841; SKYLAKE-NEXT: retq # sched: [7:1.00] 2842; 2843; SKX-LABEL: test_pgatherdq: 2844; SKX: # %bb.0: 2845; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2846; SKX-NEXT: retq # sched: [7:1.00] 2847; 2848; ZNVER1-LABEL: test_pgatherdq: 2849; ZNVER1: # %bb.0: 2850; ZNVER1-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 2851; ZNVER1-NEXT: retq # sched: [1:0.50] 2852 %1 = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3, i8 2) 2853 ret <2 x i64> %1 2854} 2855declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) nounwind readonly 2856 2857define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) { 2858; GENERIC-LABEL: test_pgatherdq_ymm: 2859; GENERIC: # %bb.0: 2860; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50] 2861; GENERIC-NEXT: retq # sched: [1:1.00] 2862; 2863; HASWELL-LABEL: test_pgatherdq_ymm: 2864; HASWELL: # %bb.0: 2865; HASWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [27:4.00] 2866; HASWELL-NEXT: retq # sched: [7:1.00] 2867; 2868; BROADWELL-LABEL: test_pgatherdq_ymm: 2869; BROADWELL: # %bb.0: 2870; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50] 2871; BROADWELL-NEXT: retq # sched: [7:1.00] 2872; 2873; SKYLAKE-LABEL: test_pgatherdq_ymm: 2874; SKYLAKE: # %bb.0: 2875; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00] 2876; SKYLAKE-NEXT: retq # sched: [7:1.00] 2877; 2878; SKX-LABEL: test_pgatherdq_ymm: 2879; SKX: # %bb.0: 2880; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00] 2881; SKX-NEXT: retq # sched: [7:1.00] 2882; 2883; ZNVER1-LABEL: test_pgatherdq_ymm: 2884; ZNVER1: # %bb.0: 2885; ZNVER1-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [100:0.25] 2886; ZNVER1-NEXT: retq # sched: [1:0.50] 2887 %1 = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3, i8 2) 2888 ret <4 x i64> %1 2889} 2890declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) nounwind readonly 2891 2892define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) { 2893; GENERIC-LABEL: test_pgatherqd: 2894; GENERIC: # %bb.0: 2895; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2896; GENERIC-NEXT: retq # sched: [1:1.00] 2897; 2898; HASWELL-LABEL: test_pgatherqd: 2899; HASWELL: # %bb.0: 2900; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:5.00] 2901; HASWELL-NEXT: retq # sched: [7:1.00] 2902; 2903; BROADWELL-LABEL: test_pgatherqd: 2904; BROADWELL: # %bb.0: 2905; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2906; BROADWELL-NEXT: retq # sched: [7:1.00] 2907; 2908; SKYLAKE-LABEL: test_pgatherqd: 2909; SKYLAKE: # %bb.0: 2910; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2911; SKYLAKE-NEXT: retq # sched: [7:1.00] 2912; 2913; SKX-LABEL: test_pgatherqd: 2914; SKX: # %bb.0: 2915; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2916; SKX-NEXT: retq # sched: [7:1.00] 2917; 2918; ZNVER1-LABEL: test_pgatherqd: 2919; ZNVER1: # %bb.0: 2920; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 2921; ZNVER1-NEXT: retq # sched: [1:0.50] 2922 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3, i8 2) 2923 ret <4 x i32> %1 2924} 2925declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) nounwind readonly 2926 2927define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) { 2928; GENERIC-LABEL: test_pgatherqd_ymm: 2929; GENERIC: # %bb.0: 2930; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50] 2931; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 2932; GENERIC-NEXT: retq # sched: [1:1.00] 2933; 2934; HASWELL-LABEL: test_pgatherqd_ymm: 2935; HASWELL: # %bb.0: 2936; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [28:5.00] 2937; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 2938; HASWELL-NEXT: retq # sched: [7:1.00] 2939; 2940; BROADWELL-LABEL: test_pgatherqd_ymm: 2941; BROADWELL: # %bb.0: 2942; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50] 2943; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 2944; BROADWELL-NEXT: retq # sched: [7:1.00] 2945; 2946; SKYLAKE-LABEL: test_pgatherqd_ymm: 2947; SKYLAKE: # %bb.0: 2948; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00] 2949; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 2950; SKYLAKE-NEXT: retq # sched: [7:1.00] 2951; 2952; SKX-LABEL: test_pgatherqd_ymm: 2953; SKX: # %bb.0: 2954; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00] 2955; SKX-NEXT: vzeroupper # sched: [4:1.00] 2956; SKX-NEXT: retq # sched: [7:1.00] 2957; 2958; ZNVER1-LABEL: test_pgatherqd_ymm: 2959; ZNVER1: # %bb.0: 2960; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [100:0.25] 2961; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 2962; ZNVER1-NEXT: retq # sched: [1:0.50] 2963 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3, i8 2) 2964 ret <4 x i32> %1 2965} 2966declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) nounwind readonly 2967 2968define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) { 2969; GENERIC-LABEL: test_pgatherqq: 2970; GENERIC: # %bb.0: 2971; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2972; GENERIC-NEXT: retq # sched: [1:1.00] 2973; 2974; HASWELL-LABEL: test_pgatherqq: 2975; HASWELL: # %bb.0: 2976; HASWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33] 2977; HASWELL-NEXT: retq # sched: [7:1.00] 2978; 2979; BROADWELL-LABEL: test_pgatherqq: 2980; BROADWELL: # %bb.0: 2981; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2982; BROADWELL-NEXT: retq # sched: [7:1.00] 2983; 2984; SKYLAKE-LABEL: test_pgatherqq: 2985; SKYLAKE: # %bb.0: 2986; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2987; SKYLAKE-NEXT: retq # sched: [7:1.00] 2988; 2989; SKX-LABEL: test_pgatherqq: 2990; SKX: # %bb.0: 2991; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2992; SKX-NEXT: retq # sched: [7:1.00] 2993; 2994; ZNVER1-LABEL: test_pgatherqq: 2995; ZNVER1: # %bb.0: 2996; ZNVER1-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 2997; ZNVER1-NEXT: retq # sched: [1:0.50] 2998 %1 = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %a1, <2 x i64> %a2, <2 x i64> %a3, i8 2) 2999 ret <2 x i64> %1 3000} 3001declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) nounwind readonly 3002 3003define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) { 3004; GENERIC-LABEL: test_pgatherqq_ymm: 3005; GENERIC: # %bb.0: 3006; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] 3007; GENERIC-NEXT: retq # sched: [1:1.00] 3008; 3009; HASWELL-LABEL: test_pgatherqq_ymm: 3010; HASWELL: # %bb.0: 3011; HASWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [24:5.00] 3012; HASWELL-NEXT: retq # sched: [7:1.00] 3013; 3014; BROADWELL-LABEL: test_pgatherqq_ymm: 3015; BROADWELL: # %bb.0: 3016; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] 3017; BROADWELL-NEXT: retq # sched: [7:1.00] 3018; 3019; SKYLAKE-LABEL: test_pgatherqq_ymm: 3020; SKYLAKE: # %bb.0: 3021; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] 3022; SKYLAKE-NEXT: retq # sched: [7:1.00] 3023; 3024; SKX-LABEL: test_pgatherqq_ymm: 3025; SKX: # %bb.0: 3026; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] 3027; SKX-NEXT: retq # sched: [7:1.00] 3028; 3029; ZNVER1-LABEL: test_pgatherqq_ymm: 3030; ZNVER1: # %bb.0: 3031; ZNVER1-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:0.25] 3032; ZNVER1-NEXT: retq # sched: [1:0.50] 3033 %1 = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %a1, <4 x i64> %a2, <4 x i64> %a3, i8 2) 3034 ret <4 x i64> %1 3035} 3036declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) nounwind readonly 3037 3038define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 3039; GENERIC-LABEL: test_phaddd: 3040; GENERIC: # %bb.0: 3041; GENERIC-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.50] 3042; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:1.50] 3043; GENERIC-NEXT: retq # sched: [1:1.00] 3044; 3045; HASWELL-LABEL: test_phaddd: 3046; HASWELL: # %bb.0: 3047; HASWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3048; HASWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3049; HASWELL-NEXT: retq # sched: [7:1.00] 3050; 3051; BROADWELL-LABEL: test_phaddd: 3052; BROADWELL: # %bb.0: 3053; BROADWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3054; BROADWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 3055; BROADWELL-NEXT: retq # sched: [7:1.00] 3056; 3057; SKYLAKE-LABEL: test_phaddd: 3058; SKYLAKE: # %bb.0: 3059; SKYLAKE-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3060; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3061; SKYLAKE-NEXT: retq # sched: [7:1.00] 3062; 3063; SKX-LABEL: test_phaddd: 3064; SKX: # %bb.0: 3065; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3066; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3067; SKX-NEXT: retq # sched: [7:1.00] 3068; 3069; ZNVER1-LABEL: test_phaddd: 3070; ZNVER1: # %bb.0: 3071; ZNVER1-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 3072; ZNVER1-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 3073; ZNVER1-NEXT: retq # sched: [1:0.50] 3074 %1 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) 3075 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 3076 %3 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %1, <8 x i32> %2) 3077 ret <8 x i32> %3 3078} 3079declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone 3080 3081define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3082; GENERIC-LABEL: test_phaddsw: 3083; GENERIC: # %bb.0: 3084; GENERIC-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50] 3085; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50] 3086; GENERIC-NEXT: retq # sched: [1:1.00] 3087; 3088; HASWELL-LABEL: test_phaddsw: 3089; HASWELL: # %bb.0: 3090; HASWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3091; HASWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3092; HASWELL-NEXT: retq # sched: [7:1.00] 3093; 3094; BROADWELL-LABEL: test_phaddsw: 3095; BROADWELL: # %bb.0: 3096; BROADWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3097; BROADWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 3098; BROADWELL-NEXT: retq # sched: [7:1.00] 3099; 3100; SKYLAKE-LABEL: test_phaddsw: 3101; SKYLAKE: # %bb.0: 3102; SKYLAKE-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3103; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3104; SKYLAKE-NEXT: retq # sched: [7:1.00] 3105; 3106; SKX-LABEL: test_phaddsw: 3107; SKX: # %bb.0: 3108; SKX-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3109; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3110; SKX-NEXT: retq # sched: [7:1.00] 3111; 3112; ZNVER1-LABEL: test_phaddsw: 3113; ZNVER1: # %bb.0: 3114; ZNVER1-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 3115; ZNVER1-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 3116; ZNVER1-NEXT: retq # sched: [1:0.50] 3117 %1 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) 3118 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3119 %3 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %1, <16 x i16> %2) 3120 ret <16 x i16> %3 3121} 3122declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone 3123 3124define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3125; GENERIC-LABEL: test_phaddw: 3126; GENERIC: # %bb.0: 3127; GENERIC-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.50] 3128; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:1.50] 3129; GENERIC-NEXT: retq # sched: [1:1.00] 3130; 3131; HASWELL-LABEL: test_phaddw: 3132; HASWELL: # %bb.0: 3133; HASWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3134; HASWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3135; HASWELL-NEXT: retq # sched: [7:1.00] 3136; 3137; BROADWELL-LABEL: test_phaddw: 3138; BROADWELL: # %bb.0: 3139; BROADWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3140; BROADWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 3141; BROADWELL-NEXT: retq # sched: [7:1.00] 3142; 3143; SKYLAKE-LABEL: test_phaddw: 3144; SKYLAKE: # %bb.0: 3145; SKYLAKE-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3146; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3147; SKYLAKE-NEXT: retq # sched: [7:1.00] 3148; 3149; SKX-LABEL: test_phaddw: 3150; SKX: # %bb.0: 3151; SKX-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3152; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3153; SKX-NEXT: retq # sched: [7:1.00] 3154; 3155; ZNVER1-LABEL: test_phaddw: 3156; ZNVER1: # %bb.0: 3157; ZNVER1-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 3158; ZNVER1-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 3159; ZNVER1-NEXT: retq # sched: [1:0.50] 3160 %1 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) 3161 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3162 %3 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %1, <16 x i16> %2) 3163 ret <16 x i16> %3 3164} 3165declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone 3166 3167define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 3168; GENERIC-LABEL: test_phsubd: 3169; GENERIC: # %bb.0: 3170; GENERIC-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.50] 3171; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:1.50] 3172; GENERIC-NEXT: retq # sched: [1:1.00] 3173; 3174; HASWELL-LABEL: test_phsubd: 3175; HASWELL: # %bb.0: 3176; HASWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3177; HASWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3178; HASWELL-NEXT: retq # sched: [7:1.00] 3179; 3180; BROADWELL-LABEL: test_phsubd: 3181; BROADWELL: # %bb.0: 3182; BROADWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3183; BROADWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 3184; BROADWELL-NEXT: retq # sched: [7:1.00] 3185; 3186; SKYLAKE-LABEL: test_phsubd: 3187; SKYLAKE: # %bb.0: 3188; SKYLAKE-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3189; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3190; SKYLAKE-NEXT: retq # sched: [7:1.00] 3191; 3192; SKX-LABEL: test_phsubd: 3193; SKX: # %bb.0: 3194; SKX-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3195; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3196; SKX-NEXT: retq # sched: [7:1.00] 3197; 3198; ZNVER1-LABEL: test_phsubd: 3199; ZNVER1: # %bb.0: 3200; ZNVER1-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 3201; ZNVER1-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 3202; ZNVER1-NEXT: retq # sched: [1:0.50] 3203 %1 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) 3204 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 3205 %3 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %1, <8 x i32> %2) 3206 ret <8 x i32> %3 3207} 3208declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone 3209 3210define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3211; GENERIC-LABEL: test_phsubsw: 3212; GENERIC: # %bb.0: 3213; GENERIC-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50] 3214; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50] 3215; GENERIC-NEXT: retq # sched: [1:1.00] 3216; 3217; HASWELL-LABEL: test_phsubsw: 3218; HASWELL: # %bb.0: 3219; HASWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3220; HASWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3221; HASWELL-NEXT: retq # sched: [7:1.00] 3222; 3223; BROADWELL-LABEL: test_phsubsw: 3224; BROADWELL: # %bb.0: 3225; BROADWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3226; BROADWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 3227; BROADWELL-NEXT: retq # sched: [7:1.00] 3228; 3229; SKYLAKE-LABEL: test_phsubsw: 3230; SKYLAKE: # %bb.0: 3231; SKYLAKE-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3232; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3233; SKYLAKE-NEXT: retq # sched: [7:1.00] 3234; 3235; SKX-LABEL: test_phsubsw: 3236; SKX: # %bb.0: 3237; SKX-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3238; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3239; SKX-NEXT: retq # sched: [7:1.00] 3240; 3241; ZNVER1-LABEL: test_phsubsw: 3242; ZNVER1: # %bb.0: 3243; ZNVER1-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 3244; ZNVER1-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 3245; ZNVER1-NEXT: retq # sched: [1:0.50] 3246 %1 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) 3247 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3248 %3 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %1, <16 x i16> %2) 3249 ret <16 x i16> %3 3250} 3251declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone 3252 3253define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3254; GENERIC-LABEL: test_phsubw: 3255; GENERIC: # %bb.0: 3256; GENERIC-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.50] 3257; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:1.50] 3258; GENERIC-NEXT: retq # sched: [1:1.00] 3259; 3260; HASWELL-LABEL: test_phsubw: 3261; HASWELL: # %bb.0: 3262; HASWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3263; HASWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3264; HASWELL-NEXT: retq # sched: [7:1.00] 3265; 3266; BROADWELL-LABEL: test_phsubw: 3267; BROADWELL: # %bb.0: 3268; BROADWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3269; BROADWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 3270; BROADWELL-NEXT: retq # sched: [7:1.00] 3271; 3272; SKYLAKE-LABEL: test_phsubw: 3273; SKYLAKE: # %bb.0: 3274; SKYLAKE-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3275; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3276; SKYLAKE-NEXT: retq # sched: [7:1.00] 3277; 3278; SKX-LABEL: test_phsubw: 3279; SKX: # %bb.0: 3280; SKX-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3281; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3282; SKX-NEXT: retq # sched: [7:1.00] 3283; 3284; ZNVER1-LABEL: test_phsubw: 3285; ZNVER1: # %bb.0: 3286; ZNVER1-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 3287; ZNVER1-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 3288; ZNVER1-NEXT: retq # sched: [1:0.50] 3289 %1 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) 3290 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3291 %3 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %1, <16 x i16> %2) 3292 ret <16 x i16> %3 3293} 3294declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone 3295 3296define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 3297; GENERIC-LABEL: test_pmaddubsw: 3298; GENERIC: # %bb.0: 3299; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3300; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 3301; GENERIC-NEXT: retq # sched: [1:1.00] 3302; 3303; HASWELL-LABEL: test_pmaddubsw: 3304; HASWELL: # %bb.0: 3305; HASWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3306; HASWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 3307; HASWELL-NEXT: retq # sched: [7:1.00] 3308; 3309; BROADWELL-LABEL: test_pmaddubsw: 3310; BROADWELL: # %bb.0: 3311; BROADWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3312; BROADWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 3313; BROADWELL-NEXT: retq # sched: [7:1.00] 3314; 3315; SKYLAKE-LABEL: test_pmaddubsw: 3316; SKYLAKE: # %bb.0: 3317; SKYLAKE-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3318; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3319; SKYLAKE-NEXT: retq # sched: [7:1.00] 3320; 3321; SKX-LABEL: test_pmaddubsw: 3322; SKX: # %bb.0: 3323; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3324; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3325; SKX-NEXT: retq # sched: [7:1.00] 3326; 3327; ZNVER1-LABEL: test_pmaddubsw: 3328; ZNVER1: # %bb.0: 3329; ZNVER1-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 3330; ZNVER1-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 3331; ZNVER1-NEXT: retq # sched: [1:0.50] 3332 %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) 3333 %2 = bitcast <16 x i16> %1 to <32 x i8> 3334 %3 = load <32 x i8>, <32 x i8> *%a2, align 32 3335 %4 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %2, <32 x i8> %3) 3336 ret <16 x i16> %4 3337} 3338declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone 3339 3340define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3341; GENERIC-LABEL: test_pmaddwd: 3342; GENERIC: # %bb.0: 3343; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3344; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 3345; GENERIC-NEXT: retq # sched: [1:1.00] 3346; 3347; HASWELL-LABEL: test_pmaddwd: 3348; HASWELL: # %bb.0: 3349; HASWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3350; HASWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 3351; HASWELL-NEXT: retq # sched: [7:1.00] 3352; 3353; BROADWELL-LABEL: test_pmaddwd: 3354; BROADWELL: # %bb.0: 3355; BROADWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3356; BROADWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 3357; BROADWELL-NEXT: retq # sched: [7:1.00] 3358; 3359; SKYLAKE-LABEL: test_pmaddwd: 3360; SKYLAKE: # %bb.0: 3361; SKYLAKE-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3362; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3363; SKYLAKE-NEXT: retq # sched: [7:1.00] 3364; 3365; SKX-LABEL: test_pmaddwd: 3366; SKX: # %bb.0: 3367; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3368; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3369; SKX-NEXT: retq # sched: [7:1.00] 3370; 3371; ZNVER1-LABEL: test_pmaddwd: 3372; ZNVER1: # %bb.0: 3373; ZNVER1-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 3374; ZNVER1-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 3375; ZNVER1-NEXT: retq # sched: [1:0.50] 3376 %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) 3377 %2 = bitcast <8 x i32> %1 to <16 x i16> 3378 %3 = load <16 x i16>, <16 x i16> *%a2, align 32 3379 %4 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %2, <16 x i16> %3) 3380 ret <8 x i32> %4 3381} 3382declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone 3383 3384define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) { 3385; GENERIC-LABEL: test_pmaskmovd: 3386; GENERIC: # %bb.0: 3387; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:1.00] 3388; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 3389; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3390; GENERIC-NEXT: retq # sched: [1:1.00] 3391; 3392; HASWELL-LABEL: test_pmaskmovd: 3393; HASWELL: # %bb.0: 3394; HASWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:2.00] 3395; HASWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 3396; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3397; HASWELL-NEXT: retq # sched: [7:1.00] 3398; 3399; BROADWELL-LABEL: test_pmaskmovd: 3400; BROADWELL: # %bb.0: 3401; BROADWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:2.00] 3402; BROADWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 3403; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3404; BROADWELL-NEXT: retq # sched: [7:1.00] 3405; 3406; SKYLAKE-LABEL: test_pmaskmovd: 3407; SKYLAKE: # %bb.0: 3408; SKYLAKE-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] 3409; SKYLAKE-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] 3410; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3411; SKYLAKE-NEXT: retq # sched: [7:1.00] 3412; 3413; SKX-LABEL: test_pmaskmovd: 3414; SKX: # %bb.0: 3415; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] 3416; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] 3417; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3418; SKX-NEXT: retq # sched: [7:1.00] 3419; 3420; ZNVER1-LABEL: test_pmaskmovd: 3421; ZNVER1: # %bb.0: 3422; ZNVER1-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [100:0.25] 3423; ZNVER1-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [100:0.25] 3424; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] 3425; ZNVER1-NEXT: retq # sched: [1:0.50] 3426 %1 = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) 3427 call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) 3428 ret <4 x i32> %1 3429} 3430declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly 3431declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind 3432 3433define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) { 3434; GENERIC-LABEL: test_pmaskmovd_ymm: 3435; GENERIC: # %bb.0: 3436; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:1.00] 3437; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 3438; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 3439; GENERIC-NEXT: retq # sched: [1:1.00] 3440; 3441; HASWELL-LABEL: test_pmaskmovd_ymm: 3442; HASWELL: # %bb.0: 3443; HASWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:2.00] 3444; HASWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 3445; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3446; HASWELL-NEXT: retq # sched: [7:1.00] 3447; 3448; BROADWELL-LABEL: test_pmaskmovd_ymm: 3449; BROADWELL: # %bb.0: 3450; BROADWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:2.00] 3451; BROADWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 3452; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3453; BROADWELL-NEXT: retq # sched: [7:1.00] 3454; 3455; SKYLAKE-LABEL: test_pmaskmovd_ymm: 3456; SKYLAKE: # %bb.0: 3457; SKYLAKE-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] 3458; SKYLAKE-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] 3459; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3460; SKYLAKE-NEXT: retq # sched: [7:1.00] 3461; 3462; SKX-LABEL: test_pmaskmovd_ymm: 3463; SKX: # %bb.0: 3464; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] 3465; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] 3466; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3467; SKX-NEXT: retq # sched: [7:1.00] 3468; 3469; ZNVER1-LABEL: test_pmaskmovd_ymm: 3470; ZNVER1: # %bb.0: 3471; ZNVER1-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [100:0.25] 3472; ZNVER1-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [100:0.25] 3473; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25] 3474; ZNVER1-NEXT: retq # sched: [1:0.50] 3475 %1 = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) 3476 call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) 3477 ret <8 x i32> %1 3478} 3479declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly 3480declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind 3481 3482define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) { 3483; GENERIC-LABEL: test_pmaskmovq: 3484; GENERIC: # %bb.0: 3485; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00] 3486; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 3487; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3488; GENERIC-NEXT: retq # sched: [1:1.00] 3489; 3490; HASWELL-LABEL: test_pmaskmovq: 3491; HASWELL: # %bb.0: 3492; HASWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:2.00] 3493; HASWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 3494; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3495; HASWELL-NEXT: retq # sched: [7:1.00] 3496; 3497; BROADWELL-LABEL: test_pmaskmovq: 3498; BROADWELL: # %bb.0: 3499; BROADWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:2.00] 3500; BROADWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 3501; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3502; BROADWELL-NEXT: retq # sched: [7:1.00] 3503; 3504; SKYLAKE-LABEL: test_pmaskmovq: 3505; SKYLAKE: # %bb.0: 3506; SKYLAKE-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50] 3507; SKYLAKE-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00] 3508; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3509; SKYLAKE-NEXT: retq # sched: [7:1.00] 3510; 3511; SKX-LABEL: test_pmaskmovq: 3512; SKX: # %bb.0: 3513; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50] 3514; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00] 3515; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3516; SKX-NEXT: retq # sched: [7:1.00] 3517; 3518; ZNVER1-LABEL: test_pmaskmovq: 3519; ZNVER1: # %bb.0: 3520; ZNVER1-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00] 3521; ZNVER1-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [100:0.25] 3522; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] 3523; ZNVER1-NEXT: retq # sched: [1:0.50] 3524 %1 = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) 3525 call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) 3526 ret <2 x i64> %1 3527} 3528declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly 3529declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind 3530 3531define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) { 3532; GENERIC-LABEL: test_pmaskmovq_ymm: 3533; GENERIC: # %bb.0: 3534; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.00] 3535; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 3536; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 3537; GENERIC-NEXT: retq # sched: [1:1.00] 3538; 3539; HASWELL-LABEL: test_pmaskmovq_ymm: 3540; HASWELL: # %bb.0: 3541; HASWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:2.00] 3542; HASWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 3543; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3544; HASWELL-NEXT: retq # sched: [7:1.00] 3545; 3546; BROADWELL-LABEL: test_pmaskmovq_ymm: 3547; BROADWELL: # %bb.0: 3548; BROADWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:2.00] 3549; BROADWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 3550; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3551; BROADWELL-NEXT: retq # sched: [7:1.00] 3552; 3553; SKYLAKE-LABEL: test_pmaskmovq_ymm: 3554; SKYLAKE: # %bb.0: 3555; SKYLAKE-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50] 3556; SKYLAKE-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00] 3557; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3558; SKYLAKE-NEXT: retq # sched: [7:1.00] 3559; 3560; SKX-LABEL: test_pmaskmovq_ymm: 3561; SKX: # %bb.0: 3562; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50] 3563; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00] 3564; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3565; SKX-NEXT: retq # sched: [7:1.00] 3566; 3567; ZNVER1-LABEL: test_pmaskmovq_ymm: 3568; ZNVER1: # %bb.0: 3569; ZNVER1-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.50] 3570; ZNVER1-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [100:0.25] 3571; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25] 3572; ZNVER1-NEXT: retq # sched: [1:0.50] 3573 %1 = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) 3574 call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) 3575 ret <4 x i64> %1 3576} 3577declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly 3578declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind 3579 3580define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 3581; GENERIC-LABEL: test_pmaxsb: 3582; GENERIC: # %bb.0: 3583; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3584; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3585; GENERIC-NEXT: retq # sched: [1:1.00] 3586; 3587; HASWELL-LABEL: test_pmaxsb: 3588; HASWELL: # %bb.0: 3589; HASWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3590; HASWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3591; HASWELL-NEXT: retq # sched: [7:1.00] 3592; 3593; BROADWELL-LABEL: test_pmaxsb: 3594; BROADWELL: # %bb.0: 3595; BROADWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3596; BROADWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3597; BROADWELL-NEXT: retq # sched: [7:1.00] 3598; 3599; SKYLAKE-LABEL: test_pmaxsb: 3600; SKYLAKE: # %bb.0: 3601; SKYLAKE-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3602; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3603; SKYLAKE-NEXT: retq # sched: [7:1.00] 3604; 3605; SKX-LABEL: test_pmaxsb: 3606; SKX: # %bb.0: 3607; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3608; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3609; SKX-NEXT: retq # sched: [7:1.00] 3610; 3611; ZNVER1-LABEL: test_pmaxsb: 3612; ZNVER1: # %bb.0: 3613; ZNVER1-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3614; ZNVER1-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3615; ZNVER1-NEXT: retq # sched: [1:0.50] 3616 %1 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) 3617 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 3618 %3 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %1, <32 x i8> %2) 3619 ret <32 x i8> %3 3620} 3621declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone 3622 3623define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 3624; GENERIC-LABEL: test_pmaxsd: 3625; GENERIC: # %bb.0: 3626; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3627; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3628; GENERIC-NEXT: retq # sched: [1:1.00] 3629; 3630; HASWELL-LABEL: test_pmaxsd: 3631; HASWELL: # %bb.0: 3632; HASWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3633; HASWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3634; HASWELL-NEXT: retq # sched: [7:1.00] 3635; 3636; BROADWELL-LABEL: test_pmaxsd: 3637; BROADWELL: # %bb.0: 3638; BROADWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3639; BROADWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3640; BROADWELL-NEXT: retq # sched: [7:1.00] 3641; 3642; SKYLAKE-LABEL: test_pmaxsd: 3643; SKYLAKE: # %bb.0: 3644; SKYLAKE-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3645; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3646; SKYLAKE-NEXT: retq # sched: [7:1.00] 3647; 3648; SKX-LABEL: test_pmaxsd: 3649; SKX: # %bb.0: 3650; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3651; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3652; SKX-NEXT: retq # sched: [7:1.00] 3653; 3654; ZNVER1-LABEL: test_pmaxsd: 3655; ZNVER1: # %bb.0: 3656; ZNVER1-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3657; ZNVER1-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3658; ZNVER1-NEXT: retq # sched: [1:0.50] 3659 %1 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) 3660 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 3661 %3 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %1, <8 x i32> %2) 3662 ret <8 x i32> %3 3663} 3664declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone 3665 3666define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3667; GENERIC-LABEL: test_pmaxsw: 3668; GENERIC: # %bb.0: 3669; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3670; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3671; GENERIC-NEXT: retq # sched: [1:1.00] 3672; 3673; HASWELL-LABEL: test_pmaxsw: 3674; HASWELL: # %bb.0: 3675; HASWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3676; HASWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3677; HASWELL-NEXT: retq # sched: [7:1.00] 3678; 3679; BROADWELL-LABEL: test_pmaxsw: 3680; BROADWELL: # %bb.0: 3681; BROADWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3682; BROADWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3683; BROADWELL-NEXT: retq # sched: [7:1.00] 3684; 3685; SKYLAKE-LABEL: test_pmaxsw: 3686; SKYLAKE: # %bb.0: 3687; SKYLAKE-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3688; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3689; SKYLAKE-NEXT: retq # sched: [7:1.00] 3690; 3691; SKX-LABEL: test_pmaxsw: 3692; SKX: # %bb.0: 3693; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3694; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3695; SKX-NEXT: retq # sched: [7:1.00] 3696; 3697; ZNVER1-LABEL: test_pmaxsw: 3698; ZNVER1: # %bb.0: 3699; ZNVER1-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3700; ZNVER1-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3701; ZNVER1-NEXT: retq # sched: [1:0.50] 3702 %1 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) 3703 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3704 %3 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %1, <16 x i16> %2) 3705 ret <16 x i16> %3 3706} 3707declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone 3708 3709define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 3710; GENERIC-LABEL: test_pmaxub: 3711; GENERIC: # %bb.0: 3712; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3713; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3714; GENERIC-NEXT: retq # sched: [1:1.00] 3715; 3716; HASWELL-LABEL: test_pmaxub: 3717; HASWELL: # %bb.0: 3718; HASWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3719; HASWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3720; HASWELL-NEXT: retq # sched: [7:1.00] 3721; 3722; BROADWELL-LABEL: test_pmaxub: 3723; BROADWELL: # %bb.0: 3724; BROADWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3725; BROADWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3726; BROADWELL-NEXT: retq # sched: [7:1.00] 3727; 3728; SKYLAKE-LABEL: test_pmaxub: 3729; SKYLAKE: # %bb.0: 3730; SKYLAKE-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3731; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3732; SKYLAKE-NEXT: retq # sched: [7:1.00] 3733; 3734; SKX-LABEL: test_pmaxub: 3735; SKX: # %bb.0: 3736; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3737; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3738; SKX-NEXT: retq # sched: [7:1.00] 3739; 3740; ZNVER1-LABEL: test_pmaxub: 3741; ZNVER1: # %bb.0: 3742; ZNVER1-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3743; ZNVER1-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3744; ZNVER1-NEXT: retq # sched: [1:0.50] 3745 %1 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) 3746 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 3747 %3 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %1, <32 x i8> %2) 3748 ret <32 x i8> %3 3749} 3750declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone 3751 3752define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 3753; GENERIC-LABEL: test_pmaxud: 3754; GENERIC: # %bb.0: 3755; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3756; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3757; GENERIC-NEXT: retq # sched: [1:1.00] 3758; 3759; HASWELL-LABEL: test_pmaxud: 3760; HASWELL: # %bb.0: 3761; HASWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3762; HASWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3763; HASWELL-NEXT: retq # sched: [7:1.00] 3764; 3765; BROADWELL-LABEL: test_pmaxud: 3766; BROADWELL: # %bb.0: 3767; BROADWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3768; BROADWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3769; BROADWELL-NEXT: retq # sched: [7:1.00] 3770; 3771; SKYLAKE-LABEL: test_pmaxud: 3772; SKYLAKE: # %bb.0: 3773; SKYLAKE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3774; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3775; SKYLAKE-NEXT: retq # sched: [7:1.00] 3776; 3777; SKX-LABEL: test_pmaxud: 3778; SKX: # %bb.0: 3779; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3780; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3781; SKX-NEXT: retq # sched: [7:1.00] 3782; 3783; ZNVER1-LABEL: test_pmaxud: 3784; ZNVER1: # %bb.0: 3785; ZNVER1-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3786; ZNVER1-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3787; ZNVER1-NEXT: retq # sched: [1:0.50] 3788 %1 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) 3789 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 3790 %3 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %1, <8 x i32> %2) 3791 ret <8 x i32> %3 3792} 3793declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone 3794 3795define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3796; GENERIC-LABEL: test_pmaxuw: 3797; GENERIC: # %bb.0: 3798; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3799; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3800; GENERIC-NEXT: retq # sched: [1:1.00] 3801; 3802; HASWELL-LABEL: test_pmaxuw: 3803; HASWELL: # %bb.0: 3804; HASWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3805; HASWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3806; HASWELL-NEXT: retq # sched: [7:1.00] 3807; 3808; BROADWELL-LABEL: test_pmaxuw: 3809; BROADWELL: # %bb.0: 3810; BROADWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3811; BROADWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3812; BROADWELL-NEXT: retq # sched: [7:1.00] 3813; 3814; SKYLAKE-LABEL: test_pmaxuw: 3815; SKYLAKE: # %bb.0: 3816; SKYLAKE-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3817; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3818; SKYLAKE-NEXT: retq # sched: [7:1.00] 3819; 3820; SKX-LABEL: test_pmaxuw: 3821; SKX: # %bb.0: 3822; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3823; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3824; SKX-NEXT: retq # sched: [7:1.00] 3825; 3826; ZNVER1-LABEL: test_pmaxuw: 3827; ZNVER1: # %bb.0: 3828; ZNVER1-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3829; ZNVER1-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3830; ZNVER1-NEXT: retq # sched: [1:0.50] 3831 %1 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) 3832 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3833 %3 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %1, <16 x i16> %2) 3834 ret <16 x i16> %3 3835} 3836declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone 3837 3838define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 3839; GENERIC-LABEL: test_pminsb: 3840; GENERIC: # %bb.0: 3841; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3842; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3843; GENERIC-NEXT: retq # sched: [1:1.00] 3844; 3845; HASWELL-LABEL: test_pminsb: 3846; HASWELL: # %bb.0: 3847; HASWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3848; HASWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3849; HASWELL-NEXT: retq # sched: [7:1.00] 3850; 3851; BROADWELL-LABEL: test_pminsb: 3852; BROADWELL: # %bb.0: 3853; BROADWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3854; BROADWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3855; BROADWELL-NEXT: retq # sched: [7:1.00] 3856; 3857; SKYLAKE-LABEL: test_pminsb: 3858; SKYLAKE: # %bb.0: 3859; SKYLAKE-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3860; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3861; SKYLAKE-NEXT: retq # sched: [7:1.00] 3862; 3863; SKX-LABEL: test_pminsb: 3864; SKX: # %bb.0: 3865; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3866; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3867; SKX-NEXT: retq # sched: [7:1.00] 3868; 3869; ZNVER1-LABEL: test_pminsb: 3870; ZNVER1: # %bb.0: 3871; ZNVER1-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3872; ZNVER1-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3873; ZNVER1-NEXT: retq # sched: [1:0.50] 3874 %1 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) 3875 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 3876 %3 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %1, <32 x i8> %2) 3877 ret <32 x i8> %3 3878} 3879declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone 3880 3881define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 3882; GENERIC-LABEL: test_pminsd: 3883; GENERIC: # %bb.0: 3884; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3885; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3886; GENERIC-NEXT: retq # sched: [1:1.00] 3887; 3888; HASWELL-LABEL: test_pminsd: 3889; HASWELL: # %bb.0: 3890; HASWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3891; HASWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3892; HASWELL-NEXT: retq # sched: [7:1.00] 3893; 3894; BROADWELL-LABEL: test_pminsd: 3895; BROADWELL: # %bb.0: 3896; BROADWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3897; BROADWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3898; BROADWELL-NEXT: retq # sched: [7:1.00] 3899; 3900; SKYLAKE-LABEL: test_pminsd: 3901; SKYLAKE: # %bb.0: 3902; SKYLAKE-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3903; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3904; SKYLAKE-NEXT: retq # sched: [7:1.00] 3905; 3906; SKX-LABEL: test_pminsd: 3907; SKX: # %bb.0: 3908; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3909; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3910; SKX-NEXT: retq # sched: [7:1.00] 3911; 3912; ZNVER1-LABEL: test_pminsd: 3913; ZNVER1: # %bb.0: 3914; ZNVER1-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3915; ZNVER1-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3916; ZNVER1-NEXT: retq # sched: [1:0.50] 3917 %1 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) 3918 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 3919 %3 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %1, <8 x i32> %2) 3920 ret <8 x i32> %3 3921} 3922declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone 3923 3924define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3925; GENERIC-LABEL: test_pminsw: 3926; GENERIC: # %bb.0: 3927; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3928; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3929; GENERIC-NEXT: retq # sched: [1:1.00] 3930; 3931; HASWELL-LABEL: test_pminsw: 3932; HASWELL: # %bb.0: 3933; HASWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3934; HASWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3935; HASWELL-NEXT: retq # sched: [7:1.00] 3936; 3937; BROADWELL-LABEL: test_pminsw: 3938; BROADWELL: # %bb.0: 3939; BROADWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3940; BROADWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3941; BROADWELL-NEXT: retq # sched: [7:1.00] 3942; 3943; SKYLAKE-LABEL: test_pminsw: 3944; SKYLAKE: # %bb.0: 3945; SKYLAKE-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3946; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3947; SKYLAKE-NEXT: retq # sched: [7:1.00] 3948; 3949; SKX-LABEL: test_pminsw: 3950; SKX: # %bb.0: 3951; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3952; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3953; SKX-NEXT: retq # sched: [7:1.00] 3954; 3955; ZNVER1-LABEL: test_pminsw: 3956; ZNVER1: # %bb.0: 3957; ZNVER1-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3958; ZNVER1-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3959; ZNVER1-NEXT: retq # sched: [1:0.50] 3960 %1 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) 3961 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3962 %3 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %1, <16 x i16> %2) 3963 ret <16 x i16> %3 3964} 3965declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone 3966 3967define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 3968; GENERIC-LABEL: test_pminub: 3969; GENERIC: # %bb.0: 3970; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3971; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3972; GENERIC-NEXT: retq # sched: [1:1.00] 3973; 3974; HASWELL-LABEL: test_pminub: 3975; HASWELL: # %bb.0: 3976; HASWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3977; HASWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3978; HASWELL-NEXT: retq # sched: [7:1.00] 3979; 3980; BROADWELL-LABEL: test_pminub: 3981; BROADWELL: # %bb.0: 3982; BROADWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3983; BROADWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3984; BROADWELL-NEXT: retq # sched: [7:1.00] 3985; 3986; SKYLAKE-LABEL: test_pminub: 3987; SKYLAKE: # %bb.0: 3988; SKYLAKE-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3989; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3990; SKYLAKE-NEXT: retq # sched: [7:1.00] 3991; 3992; SKX-LABEL: test_pminub: 3993; SKX: # %bb.0: 3994; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3995; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3996; SKX-NEXT: retq # sched: [7:1.00] 3997; 3998; ZNVER1-LABEL: test_pminub: 3999; ZNVER1: # %bb.0: 4000; ZNVER1-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4001; ZNVER1-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4002; ZNVER1-NEXT: retq # sched: [1:0.50] 4003 %1 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) 4004 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 4005 %3 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %1, <32 x i8> %2) 4006 ret <32 x i8> %3 4007} 4008declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone 4009 4010define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 4011; GENERIC-LABEL: test_pminud: 4012; GENERIC: # %bb.0: 4013; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4014; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4015; GENERIC-NEXT: retq # sched: [1:1.00] 4016; 4017; HASWELL-LABEL: test_pminud: 4018; HASWELL: # %bb.0: 4019; HASWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4020; HASWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4021; HASWELL-NEXT: retq # sched: [7:1.00] 4022; 4023; BROADWELL-LABEL: test_pminud: 4024; BROADWELL: # %bb.0: 4025; BROADWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4026; BROADWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 4027; BROADWELL-NEXT: retq # sched: [7:1.00] 4028; 4029; SKYLAKE-LABEL: test_pminud: 4030; SKYLAKE: # %bb.0: 4031; SKYLAKE-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4032; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4033; SKYLAKE-NEXT: retq # sched: [7:1.00] 4034; 4035; SKX-LABEL: test_pminud: 4036; SKX: # %bb.0: 4037; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4038; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4039; SKX-NEXT: retq # sched: [7:1.00] 4040; 4041; ZNVER1-LABEL: test_pminud: 4042; ZNVER1: # %bb.0: 4043; ZNVER1-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4044; ZNVER1-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4045; ZNVER1-NEXT: retq # sched: [1:0.50] 4046 %1 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) 4047 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 4048 %3 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %1, <8 x i32> %2) 4049 ret <8 x i32> %3 4050} 4051declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone 4052 4053define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 4054; GENERIC-LABEL: test_pminuw: 4055; GENERIC: # %bb.0: 4056; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4057; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4058; GENERIC-NEXT: retq # sched: [1:1.00] 4059; 4060; HASWELL-LABEL: test_pminuw: 4061; HASWELL: # %bb.0: 4062; HASWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4063; HASWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4064; HASWELL-NEXT: retq # sched: [7:1.00] 4065; 4066; BROADWELL-LABEL: test_pminuw: 4067; BROADWELL: # %bb.0: 4068; BROADWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4069; BROADWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 4070; BROADWELL-NEXT: retq # sched: [7:1.00] 4071; 4072; SKYLAKE-LABEL: test_pminuw: 4073; SKYLAKE: # %bb.0: 4074; SKYLAKE-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4075; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4076; SKYLAKE-NEXT: retq # sched: [7:1.00] 4077; 4078; SKX-LABEL: test_pminuw: 4079; SKX: # %bb.0: 4080; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4081; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4082; SKX-NEXT: retq # sched: [7:1.00] 4083; 4084; ZNVER1-LABEL: test_pminuw: 4085; ZNVER1: # %bb.0: 4086; ZNVER1-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4087; ZNVER1-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4088; ZNVER1-NEXT: retq # sched: [1:0.50] 4089 %1 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) 4090 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 4091 %3 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %1, <16 x i16> %2) 4092 ret <16 x i16> %3 4093} 4094declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone 4095 4096define i32 @test_pmovmskb(<32 x i8> %a0) { 4097; GENERIC-LABEL: test_pmovmskb: 4098; GENERIC: # %bb.0: 4099; GENERIC-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] 4100; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 4101; GENERIC-NEXT: retq # sched: [1:1.00] 4102; 4103; HASWELL-LABEL: test_pmovmskb: 4104; HASWELL: # %bb.0: 4105; HASWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00] 4106; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 4107; HASWELL-NEXT: retq # sched: [7:1.00] 4108; 4109; BROADWELL-LABEL: test_pmovmskb: 4110; BROADWELL: # %bb.0: 4111; BROADWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00] 4112; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 4113; BROADWELL-NEXT: retq # sched: [7:1.00] 4114; 4115; SKYLAKE-LABEL: test_pmovmskb: 4116; SKYLAKE: # %bb.0: 4117; SKYLAKE-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] 4118; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 4119; SKYLAKE-NEXT: retq # sched: [7:1.00] 4120; 4121; SKX-LABEL: test_pmovmskb: 4122; SKX: # %bb.0: 4123; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] 4124; SKX-NEXT: vzeroupper # sched: [4:1.00] 4125; SKX-NEXT: retq # sched: [7:1.00] 4126; 4127; ZNVER1-LABEL: test_pmovmskb: 4128; ZNVER1: # %bb.0: 4129; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:2.00] 4130; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 4131; ZNVER1-NEXT: retq # sched: [1:0.50] 4132 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) 4133 ret i32 %1 4134} 4135declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone 4136 4137define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) { 4138; GENERIC-LABEL: test_pmovsxbd: 4139; GENERIC: # %bb.0: 4140; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00] 4141; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] 4142; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4143; GENERIC-NEXT: retq # sched: [1:1.00] 4144; 4145; HASWELL-LABEL: test_pmovsxbd: 4146; HASWELL: # %bb.0: 4147; HASWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] 4148; HASWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] 4149; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4150; HASWELL-NEXT: retq # sched: [7:1.00] 4151; 4152; BROADWELL-LABEL: test_pmovsxbd: 4153; BROADWELL: # %bb.0: 4154; BROADWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] 4155; BROADWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] 4156; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4157; BROADWELL-NEXT: retq # sched: [7:1.00] 4158; 4159; SKYLAKE-LABEL: test_pmovsxbd: 4160; SKYLAKE: # %bb.0: 4161; SKYLAKE-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] 4162; SKYLAKE-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] 4163; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4164; SKYLAKE-NEXT: retq # sched: [7:1.00] 4165; 4166; SKX-LABEL: test_pmovsxbd: 4167; SKX: # %bb.0: 4168; SKX-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] 4169; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] 4170; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4171; SKX-NEXT: retq # sched: [7:1.00] 4172; 4173; ZNVER1-LABEL: test_pmovsxbd: 4174; ZNVER1: # %bb.0: 4175; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50] 4176; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.50] 4177; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4178; ZNVER1-NEXT: retq # sched: [1:0.50] 4179 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4180 %2 = sext <8 x i8> %1 to <8 x i32> 4181 %3 = load <16 x i8>, <16 x i8> *%a1, align 16 4182 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4183 %5 = sext <8 x i8> %4 to <8 x i32> 4184 %6 = add <8 x i32> %2, %5 4185 ret <8 x i32> %6 4186} 4187 4188define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) { 4189; GENERIC-LABEL: test_pmovsxbq: 4190; GENERIC: # %bb.0: 4191; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00] 4192; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] 4193; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4194; GENERIC-NEXT: retq # sched: [1:1.00] 4195; 4196; HASWELL-LABEL: test_pmovsxbq: 4197; HASWELL: # %bb.0: 4198; HASWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] 4199; HASWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] 4200; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4201; HASWELL-NEXT: retq # sched: [7:1.00] 4202; 4203; BROADWELL-LABEL: test_pmovsxbq: 4204; BROADWELL: # %bb.0: 4205; BROADWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] 4206; BROADWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] 4207; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4208; BROADWELL-NEXT: retq # sched: [7:1.00] 4209; 4210; SKYLAKE-LABEL: test_pmovsxbq: 4211; SKYLAKE: # %bb.0: 4212; SKYLAKE-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] 4213; SKYLAKE-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] 4214; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4215; SKYLAKE-NEXT: retq # sched: [7:1.00] 4216; 4217; SKX-LABEL: test_pmovsxbq: 4218; SKX: # %bb.0: 4219; SKX-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] 4220; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] 4221; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4222; SKX-NEXT: retq # sched: [7:1.00] 4223; 4224; ZNVER1-LABEL: test_pmovsxbq: 4225; ZNVER1: # %bb.0: 4226; ZNVER1-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:0.50] 4227; ZNVER1-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:0.50] 4228; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4229; ZNVER1-NEXT: retq # sched: [1:0.50] 4230 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4231 %2 = sext <4 x i8> %1 to <4 x i64> 4232 %3 = load <16 x i8>, <16 x i8> *%a1, align 16 4233 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4234 %5 = sext <4 x i8> %4 to <4 x i64> 4235 %6 = add <4 x i64> %2, %5 4236 ret <4 x i64> %6 4237} 4238 4239define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) { 4240; GENERIC-LABEL: test_pmovsxbw: 4241; GENERIC: # %bb.0: 4242; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00] 4243; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00] 4244; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4245; GENERIC-NEXT: retq # sched: [1:1.00] 4246; 4247; HASWELL-LABEL: test_pmovsxbw: 4248; HASWELL: # %bb.0: 4249; HASWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] 4250; HASWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00] 4251; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4252; HASWELL-NEXT: retq # sched: [7:1.00] 4253; 4254; BROADWELL-LABEL: test_pmovsxbw: 4255; BROADWELL: # %bb.0: 4256; BROADWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] 4257; BROADWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00] 4258; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4259; BROADWELL-NEXT: retq # sched: [7:1.00] 4260; 4261; SKYLAKE-LABEL: test_pmovsxbw: 4262; SKYLAKE: # %bb.0: 4263; SKYLAKE-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] 4264; SKYLAKE-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00] 4265; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4266; SKYLAKE-NEXT: retq # sched: [7:1.00] 4267; 4268; SKX-LABEL: test_pmovsxbw: 4269; SKX: # %bb.0: 4270; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] 4271; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00] 4272; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4273; SKX-NEXT: retq # sched: [7:1.00] 4274; 4275; ZNVER1-LABEL: test_pmovsxbw: 4276; ZNVER1: # %bb.0: 4277; ZNVER1-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:0.50] 4278; ZNVER1-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:0.50] 4279; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4280; ZNVER1-NEXT: retq # sched: [1:0.50] 4281 %1 = sext <16 x i8> %a0 to <16 x i16> 4282 %2 = load <16 x i8>, <16 x i8> *%a1, align 16 4283 %3 = sext <16 x i8> %2 to <16 x i16> 4284 %4 = add <16 x i16> %1, %3 4285 ret <16 x i16> %4 4286} 4287 4288define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) { 4289; GENERIC-LABEL: test_pmovsxdq: 4290; GENERIC: # %bb.0: 4291; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00] 4292; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00] 4293; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4294; GENERIC-NEXT: retq # sched: [1:1.00] 4295; 4296; HASWELL-LABEL: test_pmovsxdq: 4297; HASWELL: # %bb.0: 4298; HASWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] 4299; HASWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00] 4300; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4301; HASWELL-NEXT: retq # sched: [7:1.00] 4302; 4303; BROADWELL-LABEL: test_pmovsxdq: 4304; BROADWELL: # %bb.0: 4305; BROADWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] 4306; BROADWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00] 4307; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4308; BROADWELL-NEXT: retq # sched: [7:1.00] 4309; 4310; SKYLAKE-LABEL: test_pmovsxdq: 4311; SKYLAKE: # %bb.0: 4312; SKYLAKE-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] 4313; SKYLAKE-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00] 4314; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4315; SKYLAKE-NEXT: retq # sched: [7:1.00] 4316; 4317; SKX-LABEL: test_pmovsxdq: 4318; SKX: # %bb.0: 4319; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] 4320; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00] 4321; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4322; SKX-NEXT: retq # sched: [7:1.00] 4323; 4324; ZNVER1-LABEL: test_pmovsxdq: 4325; ZNVER1: # %bb.0: 4326; ZNVER1-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:0.50] 4327; ZNVER1-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:0.50] 4328; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4329; ZNVER1-NEXT: retq # sched: [1:0.50] 4330 %1 = sext <4 x i32> %a0 to <4 x i64> 4331 %2 = load <4 x i32>, <4 x i32> *%a1, align 16 4332 %3 = sext <4 x i32> %2 to <4 x i64> 4333 %4 = add <4 x i64> %1, %3 4334 ret <4 x i64> %4 4335} 4336 4337define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) { 4338; GENERIC-LABEL: test_pmovsxwd: 4339; GENERIC: # %bb.0: 4340; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] 4341; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00] 4342; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4343; GENERIC-NEXT: retq # sched: [1:1.00] 4344; 4345; HASWELL-LABEL: test_pmovsxwd: 4346; HASWELL: # %bb.0: 4347; HASWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] 4348; HASWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00] 4349; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4350; HASWELL-NEXT: retq # sched: [7:1.00] 4351; 4352; BROADWELL-LABEL: test_pmovsxwd: 4353; BROADWELL: # %bb.0: 4354; BROADWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] 4355; BROADWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00] 4356; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4357; BROADWELL-NEXT: retq # sched: [7:1.00] 4358; 4359; SKYLAKE-LABEL: test_pmovsxwd: 4360; SKYLAKE: # %bb.0: 4361; SKYLAKE-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] 4362; SKYLAKE-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00] 4363; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4364; SKYLAKE-NEXT: retq # sched: [7:1.00] 4365; 4366; SKX-LABEL: test_pmovsxwd: 4367; SKX: # %bb.0: 4368; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] 4369; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00] 4370; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4371; SKX-NEXT: retq # sched: [7:1.00] 4372; 4373; ZNVER1-LABEL: test_pmovsxwd: 4374; ZNVER1: # %bb.0: 4375; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50] 4376; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.50] 4377; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4378; ZNVER1-NEXT: retq # sched: [1:0.50] 4379 %1 = sext <8 x i16> %a0 to <8 x i32> 4380 %2 = load <8 x i16>, <8 x i16> *%a1, align 16 4381 %3 = sext <8 x i16> %2 to <8 x i32> 4382 %4 = add <8 x i32> %1, %3 4383 ret <8 x i32> %4 4384} 4385 4386define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) { 4387; GENERIC-LABEL: test_pmovsxwq: 4388; GENERIC: # %bb.0: 4389; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00] 4390; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] 4391; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4392; GENERIC-NEXT: retq # sched: [1:1.00] 4393; 4394; HASWELL-LABEL: test_pmovsxwq: 4395; HASWELL: # %bb.0: 4396; HASWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] 4397; HASWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] 4398; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4399; HASWELL-NEXT: retq # sched: [7:1.00] 4400; 4401; BROADWELL-LABEL: test_pmovsxwq: 4402; BROADWELL: # %bb.0: 4403; BROADWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] 4404; BROADWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] 4405; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4406; BROADWELL-NEXT: retq # sched: [7:1.00] 4407; 4408; SKYLAKE-LABEL: test_pmovsxwq: 4409; SKYLAKE: # %bb.0: 4410; SKYLAKE-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] 4411; SKYLAKE-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] 4412; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4413; SKYLAKE-NEXT: retq # sched: [7:1.00] 4414; 4415; SKX-LABEL: test_pmovsxwq: 4416; SKX: # %bb.0: 4417; SKX-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] 4418; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] 4419; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4420; SKX-NEXT: retq # sched: [7:1.00] 4421; 4422; ZNVER1-LABEL: test_pmovsxwq: 4423; ZNVER1: # %bb.0: 4424; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50] 4425; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.50] 4426; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4427; ZNVER1-NEXT: retq # sched: [1:0.50] 4428 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4429 %2 = sext <4 x i16> %1 to <4 x i64> 4430 %3 = load <8 x i16>, <8 x i16> *%a1, align 16 4431 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4432 %5 = sext <4 x i16> %4 to <4 x i64> 4433 %6 = add <4 x i64> %2, %5 4434 ret <4 x i64> %6 4435} 4436 4437define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) { 4438; GENERIC-LABEL: test_pmovzxbd: 4439; GENERIC: # %bb.0: 4440; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] 4441; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] 4442; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4443; GENERIC-NEXT: retq # sched: [1:1.00] 4444; 4445; HASWELL-LABEL: test_pmovzxbd: 4446; HASWELL: # %bb.0: 4447; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 4448; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] 4449; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4450; HASWELL-NEXT: retq # sched: [7:1.00] 4451; 4452; BROADWELL-LABEL: test_pmovzxbd: 4453; BROADWELL: # %bb.0: 4454; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 4455; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [9:1.00] 4456; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4457; BROADWELL-NEXT: retq # sched: [7:1.00] 4458; 4459; SKYLAKE-LABEL: test_pmovzxbd: 4460; SKYLAKE: # %bb.0: 4461; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 4462; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] 4463; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4464; SKYLAKE-NEXT: retq # sched: [7:1.00] 4465; 4466; SKX-LABEL: test_pmovzxbd: 4467; SKX: # %bb.0: 4468; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 4469; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] 4470; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4471; SKX-NEXT: retq # sched: [7:1.00] 4472; 4473; ZNVER1-LABEL: test_pmovzxbd: 4474; ZNVER1: # %bb.0: 4475; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50] 4476; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.50] 4477; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4478; ZNVER1-NEXT: retq # sched: [1:0.50] 4479 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4480 %2 = zext <8 x i8> %1 to <8 x i32> 4481 %3 = load <16 x i8>, <16 x i8> *%a1, align 16 4482 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4483 %5 = zext <8 x i8> %4 to <8 x i32> 4484 %6 = add <8 x i32> %2, %5 4485 ret <8 x i32> %6 4486} 4487 4488define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) { 4489; GENERIC-LABEL: test_pmovzxbq: 4490; GENERIC: # %bb.0: 4491; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] 4492; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] 4493; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4494; GENERIC-NEXT: retq # sched: [1:1.00] 4495; 4496; HASWELL-LABEL: test_pmovzxbq: 4497; HASWELL: # %bb.0: 4498; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] 4499; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] 4500; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4501; HASWELL-NEXT: retq # sched: [7:1.00] 4502; 4503; BROADWELL-LABEL: test_pmovzxbq: 4504; BROADWELL: # %bb.0: 4505; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] 4506; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00] 4507; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4508; BROADWELL-NEXT: retq # sched: [7:1.00] 4509; 4510; SKYLAKE-LABEL: test_pmovzxbq: 4511; SKYLAKE: # %bb.0: 4512; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] 4513; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] 4514; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4515; SKYLAKE-NEXT: retq # sched: [7:1.00] 4516; 4517; SKX-LABEL: test_pmovzxbq: 4518; SKX: # %bb.0: 4519; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] 4520; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] 4521; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4522; SKX-NEXT: retq # sched: [7:1.00] 4523; 4524; ZNVER1-LABEL: test_pmovzxbq: 4525; ZNVER1: # %bb.0: 4526; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50] 4527; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] 4528; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4529; ZNVER1-NEXT: retq # sched: [1:0.50] 4530 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4531 %2 = zext <4 x i8> %1 to <4 x i64> 4532 %3 = load <16 x i8>, <16 x i8> *%a1, align 16 4533 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4534 %5 = zext <4 x i8> %4 to <4 x i64> 4535 %6 = add <4 x i64> %2, %5 4536 ret <4 x i64> %6 4537} 4538 4539define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) { 4540; GENERIC-LABEL: test_pmovzxbw: 4541; GENERIC: # %bb.0: 4542; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] 4543; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] 4544; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4545; GENERIC-NEXT: retq # sched: [1:1.00] 4546; 4547; HASWELL-LABEL: test_pmovzxbw: 4548; HASWELL: # %bb.0: 4549; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4550; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] 4551; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4552; HASWELL-NEXT: retq # sched: [7:1.00] 4553; 4554; BROADWELL-LABEL: test_pmovzxbw: 4555; BROADWELL: # %bb.0: 4556; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4557; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [9:1.00] 4558; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4559; BROADWELL-NEXT: retq # sched: [7:1.00] 4560; 4561; SKYLAKE-LABEL: test_pmovzxbw: 4562; SKYLAKE: # %bb.0: 4563; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4564; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] 4565; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4566; SKYLAKE-NEXT: retq # sched: [7:1.00] 4567; 4568; SKX-LABEL: test_pmovzxbw: 4569; SKX: # %bb.0: 4570; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4571; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] 4572; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4573; SKX-NEXT: retq # sched: [7:1.00] 4574; 4575; ZNVER1-LABEL: test_pmovzxbw: 4576; ZNVER1: # %bb.0: 4577; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:0.50] 4578; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:0.50] 4579; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4580; ZNVER1-NEXT: retq # sched: [1:0.50] 4581 %1 = zext <16 x i8> %a0 to <16 x i16> 4582 %2 = load <16 x i8>, <16 x i8> *%a1, align 16 4583 %3 = zext <16 x i8> %2 to <16 x i16> 4584 %4 = add <16 x i16> %1, %3 4585 ret <16 x i16> %4 4586} 4587 4588define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) { 4589; GENERIC-LABEL: test_pmovzxdq: 4590; GENERIC: # %bb.0: 4591; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4592; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] 4593; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4594; GENERIC-NEXT: retq # sched: [1:1.00] 4595; 4596; HASWELL-LABEL: test_pmovzxdq: 4597; HASWELL: # %bb.0: 4598; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] 4599; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] 4600; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4601; HASWELL-NEXT: retq # sched: [7:1.00] 4602; 4603; BROADWELL-LABEL: test_pmovzxdq: 4604; BROADWELL: # %bb.0: 4605; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] 4606; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00] 4607; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4608; BROADWELL-NEXT: retq # sched: [7:1.00] 4609; 4610; SKYLAKE-LABEL: test_pmovzxdq: 4611; SKYLAKE: # %bb.0: 4612; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] 4613; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] 4614; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4615; SKYLAKE-NEXT: retq # sched: [7:1.00] 4616; 4617; SKX-LABEL: test_pmovzxdq: 4618; SKX: # %bb.0: 4619; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] 4620; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] 4621; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4622; SKX-NEXT: retq # sched: [7:1.00] 4623; 4624; ZNVER1-LABEL: test_pmovzxdq: 4625; ZNVER1: # %bb.0: 4626; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50] 4627; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] 4628; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4629; ZNVER1-NEXT: retq # sched: [1:0.50] 4630 %1 = zext <4 x i32> %a0 to <4 x i64> 4631 %2 = load <4 x i32>, <4 x i32> *%a1, align 16 4632 %3 = zext <4 x i32> %2 to <4 x i64> 4633 %4 = add <4 x i64> %1, %3 4634 ret <4 x i64> %4 4635} 4636 4637define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) { 4638; GENERIC-LABEL: test_pmovzxwd: 4639; GENERIC: # %bb.0: 4640; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 4641; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] 4642; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4643; GENERIC-NEXT: retq # sched: [1:1.00] 4644; 4645; HASWELL-LABEL: test_pmovzxwd: 4646; HASWELL: # %bb.0: 4647; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 4648; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] 4649; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4650; HASWELL-NEXT: retq # sched: [7:1.00] 4651; 4652; BROADWELL-LABEL: test_pmovzxwd: 4653; BROADWELL: # %bb.0: 4654; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 4655; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] 4656; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4657; BROADWELL-NEXT: retq # sched: [7:1.00] 4658; 4659; SKYLAKE-LABEL: test_pmovzxwd: 4660; SKYLAKE: # %bb.0: 4661; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 4662; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] 4663; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4664; SKYLAKE-NEXT: retq # sched: [7:1.00] 4665; 4666; SKX-LABEL: test_pmovzxwd: 4667; SKX: # %bb.0: 4668; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 4669; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] 4670; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4671; SKX-NEXT: retq # sched: [7:1.00] 4672; 4673; ZNVER1-LABEL: test_pmovzxwd: 4674; ZNVER1: # %bb.0: 4675; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] 4676; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] 4677; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4678; ZNVER1-NEXT: retq # sched: [1:0.50] 4679 %1 = zext <8 x i16> %a0 to <8 x i32> 4680 %2 = load <8 x i16>, <8 x i16> *%a1, align 16 4681 %3 = zext <8 x i16> %2 to <8 x i32> 4682 %4 = add <8 x i32> %1, %3 4683 ret <8 x i32> %4 4684} 4685 4686define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) { 4687; GENERIC-LABEL: test_pmovzxwq: 4688; GENERIC: # %bb.0: 4689; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] 4690; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] 4691; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4692; GENERIC-NEXT: retq # sched: [1:1.00] 4693; 4694; HASWELL-LABEL: test_pmovzxwq: 4695; HASWELL: # %bb.0: 4696; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] 4697; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] 4698; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4699; HASWELL-NEXT: retq # sched: [7:1.00] 4700; 4701; BROADWELL-LABEL: test_pmovzxwq: 4702; BROADWELL: # %bb.0: 4703; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] 4704; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00] 4705; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4706; BROADWELL-NEXT: retq # sched: [7:1.00] 4707; 4708; SKYLAKE-LABEL: test_pmovzxwq: 4709; SKYLAKE: # %bb.0: 4710; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] 4711; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] 4712; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4713; SKYLAKE-NEXT: retq # sched: [7:1.00] 4714; 4715; SKX-LABEL: test_pmovzxwq: 4716; SKX: # %bb.0: 4717; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] 4718; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] 4719; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4720; SKX-NEXT: retq # sched: [7:1.00] 4721; 4722; ZNVER1-LABEL: test_pmovzxwq: 4723; ZNVER1: # %bb.0: 4724; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] 4725; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] 4726; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4727; ZNVER1-NEXT: retq # sched: [1:0.50] 4728 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4729 %2 = zext <4 x i16> %1 to <4 x i64> 4730 %3 = load <8 x i16>, <8 x i16> *%a1, align 16 4731 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4732 %5 = zext <4 x i16> %4 to <4 x i64> 4733 %6 = add <4 x i64> %2, %5 4734 ret <4 x i64> %6 4735} 4736 4737define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 4738; GENERIC-LABEL: test_pmuldq: 4739; GENERIC: # %bb.0: 4740; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4741; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4742; GENERIC-NEXT: retq # sched: [1:1.00] 4743; 4744; HASWELL-LABEL: test_pmuldq: 4745; HASWELL: # %bb.0: 4746; HASWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4747; HASWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4748; HASWELL-NEXT: retq # sched: [7:1.00] 4749; 4750; BROADWELL-LABEL: test_pmuldq: 4751; BROADWELL: # %bb.0: 4752; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4753; BROADWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4754; BROADWELL-NEXT: retq # sched: [7:1.00] 4755; 4756; SKYLAKE-LABEL: test_pmuldq: 4757; SKYLAKE: # %bb.0: 4758; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4759; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4760; SKYLAKE-NEXT: retq # sched: [7:1.00] 4761; 4762; SKX-LABEL: test_pmuldq: 4763; SKX: # %bb.0: 4764; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4765; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4766; SKX-NEXT: retq # sched: [7:1.00] 4767; 4768; ZNVER1-LABEL: test_pmuldq: 4769; ZNVER1: # %bb.0: 4770; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 4771; ZNVER1-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4772; ZNVER1-NEXT: retq # sched: [1:0.50] 4773 %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) 4774 %2 = bitcast <4 x i64> %1 to <8 x i32> 4775 %3 = load <8 x i32>, <8 x i32> *%a2, align 32 4776 %4 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %2, <8 x i32> %3) 4777 ret <4 x i64> %4 4778} 4779declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone 4780 4781define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 4782; GENERIC-LABEL: test_pmulhrsw: 4783; GENERIC: # %bb.0: 4784; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4785; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4786; GENERIC-NEXT: retq # sched: [1:1.00] 4787; 4788; HASWELL-LABEL: test_pmulhrsw: 4789; HASWELL: # %bb.0: 4790; HASWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4791; HASWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4792; HASWELL-NEXT: retq # sched: [7:1.00] 4793; 4794; BROADWELL-LABEL: test_pmulhrsw: 4795; BROADWELL: # %bb.0: 4796; BROADWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4797; BROADWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4798; BROADWELL-NEXT: retq # sched: [7:1.00] 4799; 4800; SKYLAKE-LABEL: test_pmulhrsw: 4801; SKYLAKE: # %bb.0: 4802; SKYLAKE-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4803; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4804; SKYLAKE-NEXT: retq # sched: [7:1.00] 4805; 4806; SKX-LABEL: test_pmulhrsw: 4807; SKX: # %bb.0: 4808; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4809; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4810; SKX-NEXT: retq # sched: [7:1.00] 4811; 4812; ZNVER1-LABEL: test_pmulhrsw: 4813; ZNVER1: # %bb.0: 4814; ZNVER1-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 4815; ZNVER1-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4816; ZNVER1-NEXT: retq # sched: [1:0.50] 4817 %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) 4818 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 4819 %3 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %1, <16 x i16> %2) 4820 ret <16 x i16> %3 4821} 4822declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone 4823 4824define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 4825; GENERIC-LABEL: test_pmulhuw: 4826; GENERIC: # %bb.0: 4827; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4828; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4829; GENERIC-NEXT: retq # sched: [1:1.00] 4830; 4831; HASWELL-LABEL: test_pmulhuw: 4832; HASWELL: # %bb.0: 4833; HASWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4834; HASWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4835; HASWELL-NEXT: retq # sched: [7:1.00] 4836; 4837; BROADWELL-LABEL: test_pmulhuw: 4838; BROADWELL: # %bb.0: 4839; BROADWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4840; BROADWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4841; BROADWELL-NEXT: retq # sched: [7:1.00] 4842; 4843; SKYLAKE-LABEL: test_pmulhuw: 4844; SKYLAKE: # %bb.0: 4845; SKYLAKE-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4846; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4847; SKYLAKE-NEXT: retq # sched: [7:1.00] 4848; 4849; SKX-LABEL: test_pmulhuw: 4850; SKX: # %bb.0: 4851; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4852; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4853; SKX-NEXT: retq # sched: [7:1.00] 4854; 4855; ZNVER1-LABEL: test_pmulhuw: 4856; ZNVER1: # %bb.0: 4857; ZNVER1-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 4858; ZNVER1-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4859; ZNVER1-NEXT: retq # sched: [1:0.50] 4860 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) 4861 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 4862 %3 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %1, <16 x i16> %2) 4863 ret <16 x i16> %3 4864} 4865declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone 4866 4867define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 4868; GENERIC-LABEL: test_pmulhw: 4869; GENERIC: # %bb.0: 4870; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4871; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4872; GENERIC-NEXT: retq # sched: [1:1.00] 4873; 4874; HASWELL-LABEL: test_pmulhw: 4875; HASWELL: # %bb.0: 4876; HASWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4877; HASWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4878; HASWELL-NEXT: retq # sched: [7:1.00] 4879; 4880; BROADWELL-LABEL: test_pmulhw: 4881; BROADWELL: # %bb.0: 4882; BROADWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4883; BROADWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4884; BROADWELL-NEXT: retq # sched: [7:1.00] 4885; 4886; SKYLAKE-LABEL: test_pmulhw: 4887; SKYLAKE: # %bb.0: 4888; SKYLAKE-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4889; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4890; SKYLAKE-NEXT: retq # sched: [7:1.00] 4891; 4892; SKX-LABEL: test_pmulhw: 4893; SKX: # %bb.0: 4894; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4895; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4896; SKX-NEXT: retq # sched: [7:1.00] 4897; 4898; ZNVER1-LABEL: test_pmulhw: 4899; ZNVER1: # %bb.0: 4900; ZNVER1-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 4901; ZNVER1-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4902; ZNVER1-NEXT: retq # sched: [1:0.50] 4903 %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) 4904 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 4905 %3 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %1, <16 x i16> %2) 4906 ret <16 x i16> %3 4907} 4908declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone 4909 4910define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 4911; GENERIC-LABEL: test_pmulld: 4912; GENERIC: # %bb.0: 4913; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4914; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4915; GENERIC-NEXT: retq # sched: [1:1.00] 4916; 4917; HASWELL-LABEL: test_pmulld: 4918; HASWELL: # %bb.0: 4919; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00] 4920; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:2.00] 4921; HASWELL-NEXT: retq # sched: [7:1.00] 4922; 4923; BROADWELL-LABEL: test_pmulld: 4924; BROADWELL: # %bb.0: 4925; BROADWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00] 4926; BROADWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [16:2.00] 4927; BROADWELL-NEXT: retq # sched: [7:1.00] 4928; 4929; SKYLAKE-LABEL: test_pmulld: 4930; SKYLAKE: # %bb.0: 4931; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00] 4932; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00] 4933; SKYLAKE-NEXT: retq # sched: [7:1.00] 4934; 4935; SKX-LABEL: test_pmulld: 4936; SKX: # %bb.0: 4937; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00] 4938; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00] 4939; SKX-NEXT: retq # sched: [7:1.00] 4940; 4941; ZNVER1-LABEL: test_pmulld: 4942; ZNVER1: # %bb.0: 4943; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 4944; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 4945; ZNVER1-NEXT: retq # sched: [1:0.50] 4946 %1 = mul <8 x i32> %a0, %a1 4947 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 4948 %3 = mul <8 x i32> %1, %2 4949 ret <8 x i32> %3 4950} 4951 4952define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 4953; GENERIC-LABEL: test_pmullw: 4954; GENERIC: # %bb.0: 4955; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4956; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4957; GENERIC-NEXT: retq # sched: [1:1.00] 4958; 4959; HASWELL-LABEL: test_pmullw: 4960; HASWELL: # %bb.0: 4961; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4962; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4963; HASWELL-NEXT: retq # sched: [7:1.00] 4964; 4965; BROADWELL-LABEL: test_pmullw: 4966; BROADWELL: # %bb.0: 4967; BROADWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4968; BROADWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4969; BROADWELL-NEXT: retq # sched: [7:1.00] 4970; 4971; SKYLAKE-LABEL: test_pmullw: 4972; SKYLAKE: # %bb.0: 4973; SKYLAKE-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4974; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4975; SKYLAKE-NEXT: retq # sched: [7:1.00] 4976; 4977; SKX-LABEL: test_pmullw: 4978; SKX: # %bb.0: 4979; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4980; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4981; SKX-NEXT: retq # sched: [7:1.00] 4982; 4983; ZNVER1-LABEL: test_pmullw: 4984; ZNVER1: # %bb.0: 4985; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 4986; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4987; ZNVER1-NEXT: retq # sched: [1:0.50] 4988 %1 = mul <16 x i16> %a0, %a1 4989 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 4990 %3 = mul <16 x i16> %1, %2 4991 ret <16 x i16> %3 4992} 4993 4994define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 4995; GENERIC-LABEL: test_pmuludq: 4996; GENERIC: # %bb.0: 4997; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4998; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4999; GENERIC-NEXT: retq # sched: [1:1.00] 5000; 5001; HASWELL-LABEL: test_pmuludq: 5002; HASWELL: # %bb.0: 5003; HASWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 5004; HASWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 5005; HASWELL-NEXT: retq # sched: [7:1.00] 5006; 5007; BROADWELL-LABEL: test_pmuludq: 5008; BROADWELL: # %bb.0: 5009; BROADWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 5010; BROADWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5011; BROADWELL-NEXT: retq # sched: [7:1.00] 5012; 5013; SKYLAKE-LABEL: test_pmuludq: 5014; SKYLAKE: # %bb.0: 5015; SKYLAKE-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 5016; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 5017; SKYLAKE-NEXT: retq # sched: [7:1.00] 5018; 5019; SKX-LABEL: test_pmuludq: 5020; SKX: # %bb.0: 5021; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 5022; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 5023; SKX-NEXT: retq # sched: [7:1.00] 5024; 5025; ZNVER1-LABEL: test_pmuludq: 5026; ZNVER1: # %bb.0: 5027; ZNVER1-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 5028; ZNVER1-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5029; ZNVER1-NEXT: retq # sched: [1:0.50] 5030 %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) 5031 %2 = bitcast <4 x i64> %1 to <8 x i32> 5032 %3 = load <8 x i32>, <8 x i32> *%a2, align 32 5033 %4 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %2, <8 x i32> %3) 5034 ret <4 x i64> %4 5035} 5036declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone 5037 5038define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 5039; GENERIC-LABEL: test_por: 5040; GENERIC: # %bb.0: 5041; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5042; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5043; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5044; GENERIC-NEXT: retq # sched: [1:1.00] 5045; 5046; HASWELL-LABEL: test_por: 5047; HASWELL: # %bb.0: 5048; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5049; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5050; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5051; HASWELL-NEXT: retq # sched: [7:1.00] 5052; 5053; BROADWELL-LABEL: test_por: 5054; BROADWELL: # %bb.0: 5055; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5056; BROADWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 5057; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5058; BROADWELL-NEXT: retq # sched: [7:1.00] 5059; 5060; SKYLAKE-LABEL: test_por: 5061; SKYLAKE: # %bb.0: 5062; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5063; SKYLAKE-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5064; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5065; SKYLAKE-NEXT: retq # sched: [7:1.00] 5066; 5067; SKX-LABEL: test_por: 5068; SKX: # %bb.0: 5069; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5070; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5071; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5072; SKX-NEXT: retq # sched: [7:1.00] 5073; 5074; ZNVER1-LABEL: test_por: 5075; ZNVER1: # %bb.0: 5076; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5077; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5078; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5079; ZNVER1-NEXT: retq # sched: [1:0.50] 5080 %1 = or <4 x i64> %a0, %a1 5081 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 5082 %3 = or <4 x i64> %1, %2 5083 %4 = add <4 x i64> %3, %a1 5084 ret <4 x i64> %4 5085} 5086 5087define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 5088; GENERIC-LABEL: test_psadbw: 5089; GENERIC: # %bb.0: 5090; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 5091; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 5092; GENERIC-NEXT: retq # sched: [1:1.00] 5093; 5094; HASWELL-LABEL: test_psadbw: 5095; HASWELL: # %bb.0: 5096; HASWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 5097; HASWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 5098; HASWELL-NEXT: retq # sched: [7:1.00] 5099; 5100; BROADWELL-LABEL: test_psadbw: 5101; BROADWELL: # %bb.0: 5102; BROADWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 5103; BROADWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5104; BROADWELL-NEXT: retq # sched: [7:1.00] 5105; 5106; SKYLAKE-LABEL: test_psadbw: 5107; SKYLAKE: # %bb.0: 5108; SKYLAKE-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 5109; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 5110; SKYLAKE-NEXT: retq # sched: [7:1.00] 5111; 5112; SKX-LABEL: test_psadbw: 5113; SKX: # %bb.0: 5114; SKX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 5115; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 5116; SKX-NEXT: retq # sched: [7:1.00] 5117; 5118; ZNVER1-LABEL: test_psadbw: 5119; ZNVER1: # %bb.0: 5120; ZNVER1-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 5121; ZNVER1-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 5122; ZNVER1-NEXT: retq # sched: [1:0.50] 5123 %1 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) 5124 %2 = bitcast <4 x i64> %1 to <32 x i8> 5125 %3 = load <32 x i8>, <32 x i8> *%a2, align 32 5126 %4 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %2, <32 x i8> %3) 5127 ret <4 x i64> %4 5128} 5129declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone 5130 5131define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 5132; GENERIC-LABEL: test_pshufb: 5133; GENERIC: # %bb.0: 5134; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5135; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5136; GENERIC-NEXT: retq # sched: [1:1.00] 5137; 5138; HASWELL-LABEL: test_pshufb: 5139; HASWELL: # %bb.0: 5140; HASWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5141; HASWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5142; HASWELL-NEXT: retq # sched: [7:1.00] 5143; 5144; BROADWELL-LABEL: test_pshufb: 5145; BROADWELL: # %bb.0: 5146; BROADWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5147; BROADWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5148; BROADWELL-NEXT: retq # sched: [7:1.00] 5149; 5150; SKYLAKE-LABEL: test_pshufb: 5151; SKYLAKE: # %bb.0: 5152; SKYLAKE-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5153; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5154; SKYLAKE-NEXT: retq # sched: [7:1.00] 5155; 5156; SKX-LABEL: test_pshufb: 5157; SKX: # %bb.0: 5158; SKX-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5159; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5160; SKX-NEXT: retq # sched: [7:1.00] 5161; 5162; ZNVER1-LABEL: test_pshufb: 5163; ZNVER1: # %bb.0: 5164; ZNVER1-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5165; ZNVER1-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5166; ZNVER1-NEXT: retq # sched: [1:0.50] 5167 %1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) 5168 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 5169 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> %2) 5170 ret <32 x i8> %3 5171} 5172declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone 5173 5174define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) { 5175; GENERIC-LABEL: test_pshufd: 5176; GENERIC: # %bb.0: 5177; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 5178; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] 5179; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5180; GENERIC-NEXT: retq # sched: [1:1.00] 5181; 5182; HASWELL-LABEL: test_pshufd: 5183; HASWELL: # %bb.0: 5184; HASWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 5185; HASWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] 5186; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5187; HASWELL-NEXT: retq # sched: [7:1.00] 5188; 5189; BROADWELL-LABEL: test_pshufd: 5190; BROADWELL: # %bb.0: 5191; BROADWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 5192; BROADWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [7:1.00] 5193; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5194; BROADWELL-NEXT: retq # sched: [7:1.00] 5195; 5196; SKYLAKE-LABEL: test_pshufd: 5197; SKYLAKE: # %bb.0: 5198; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 5199; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] 5200; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5201; SKYLAKE-NEXT: retq # sched: [7:1.00] 5202; 5203; SKX-LABEL: test_pshufd: 5204; SKX: # %bb.0: 5205; SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 5206; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] 5207; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5208; SKX-NEXT: retq # sched: [7:1.00] 5209; 5210; ZNVER1-LABEL: test_pshufd: 5211; ZNVER1: # %bb.0: 5212; ZNVER1-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:0.50] 5213; ZNVER1-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.25] 5214; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5215; ZNVER1-NEXT: retq # sched: [1:0.50] 5216 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 5217 %2 = load <8 x i32>, <8 x i32> *%a1, align 32 5218 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 5219 %4 = add <8 x i32> %1, %3 5220 ret <8 x i32> %4 5221} 5222 5223define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) { 5224; GENERIC-LABEL: test_pshufhw: 5225; GENERIC: # %bb.0: 5226; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] 5227; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] 5228; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5229; GENERIC-NEXT: retq # sched: [1:1.00] 5230; 5231; HASWELL-LABEL: test_pshufhw: 5232; HASWELL: # %bb.0: 5233; HASWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] 5234; HASWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] 5235; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5236; HASWELL-NEXT: retq # sched: [7:1.00] 5237; 5238; BROADWELL-LABEL: test_pshufhw: 5239; BROADWELL: # %bb.0: 5240; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] 5241; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [7:1.00] 5242; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5243; BROADWELL-NEXT: retq # sched: [7:1.00] 5244; 5245; SKYLAKE-LABEL: test_pshufhw: 5246; SKYLAKE: # %bb.0: 5247; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] 5248; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] 5249; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5250; SKYLAKE-NEXT: retq # sched: [7:1.00] 5251; 5252; SKX-LABEL: test_pshufhw: 5253; SKX: # %bb.0: 5254; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] 5255; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] 5256; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5257; SKX-NEXT: retq # sched: [7:1.00] 5258; 5259; ZNVER1-LABEL: test_pshufhw: 5260; ZNVER1: # %bb.0: 5261; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:0.50] 5262; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:0.25] 5263; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5264; ZNVER1-NEXT: retq # sched: [1:0.50] 5265 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12> 5266 %2 = load <16 x i16>, <16 x i16> *%a1, align 32 5267 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 12, i32 15, i32 14> 5268 %4 = or <16 x i16> %1, %3 5269 ret <16 x i16> %4 5270} 5271 5272define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) { 5273; GENERIC-LABEL: test_pshuflw: 5274; GENERIC: # %bb.0: 5275; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] 5276; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] 5277; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5278; GENERIC-NEXT: retq # sched: [1:1.00] 5279; 5280; HASWELL-LABEL: test_pshuflw: 5281; HASWELL: # %bb.0: 5282; HASWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] 5283; HASWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] 5284; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5285; HASWELL-NEXT: retq # sched: [7:1.00] 5286; 5287; BROADWELL-LABEL: test_pshuflw: 5288; BROADWELL: # %bb.0: 5289; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] 5290; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [7:1.00] 5291; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5292; BROADWELL-NEXT: retq # sched: [7:1.00] 5293; 5294; SKYLAKE-LABEL: test_pshuflw: 5295; SKYLAKE: # %bb.0: 5296; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] 5297; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] 5298; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5299; SKYLAKE-NEXT: retq # sched: [7:1.00] 5300; 5301; SKX-LABEL: test_pshuflw: 5302; SKX: # %bb.0: 5303; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] 5304; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] 5305; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5306; SKX-NEXT: retq # sched: [7:1.00] 5307; 5308; ZNVER1-LABEL: test_pshuflw: 5309; ZNVER1: # %bb.0: 5310; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:0.50] 5311; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:0.25] 5312; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5313; ZNVER1-NEXT: retq # sched: [1:0.50] 5314 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15> 5315 %2 = load <16 x i16>, <16 x i16> *%a1, align 32 5316 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 5317 %4 = or <16 x i16> %1, %3 5318 ret <16 x i16> %4 5319} 5320 5321define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 5322; GENERIC-LABEL: test_psignb: 5323; GENERIC: # %bb.0: 5324; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5325; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5326; GENERIC-NEXT: retq # sched: [1:1.00] 5327; 5328; HASWELL-LABEL: test_psignb: 5329; HASWELL: # %bb.0: 5330; HASWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5331; HASWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5332; HASWELL-NEXT: retq # sched: [7:1.00] 5333; 5334; BROADWELL-LABEL: test_psignb: 5335; BROADWELL: # %bb.0: 5336; BROADWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5337; BROADWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 5338; BROADWELL-NEXT: retq # sched: [7:1.00] 5339; 5340; SKYLAKE-LABEL: test_psignb: 5341; SKYLAKE: # %bb.0: 5342; SKYLAKE-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5343; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5344; SKYLAKE-NEXT: retq # sched: [7:1.00] 5345; 5346; SKX-LABEL: test_psignb: 5347; SKX: # %bb.0: 5348; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5349; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5350; SKX-NEXT: retq # sched: [7:1.00] 5351; 5352; ZNVER1-LABEL: test_psignb: 5353; ZNVER1: # %bb.0: 5354; ZNVER1-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5355; ZNVER1-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5356; ZNVER1-NEXT: retq # sched: [1:0.50] 5357 %1 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) 5358 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 5359 %3 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %1, <32 x i8> %2) 5360 ret <32 x i8> %3 5361} 5362declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone 5363 5364define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 5365; GENERIC-LABEL: test_psignd: 5366; GENERIC: # %bb.0: 5367; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5368; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5369; GENERIC-NEXT: retq # sched: [1:1.00] 5370; 5371; HASWELL-LABEL: test_psignd: 5372; HASWELL: # %bb.0: 5373; HASWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5374; HASWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5375; HASWELL-NEXT: retq # sched: [7:1.00] 5376; 5377; BROADWELL-LABEL: test_psignd: 5378; BROADWELL: # %bb.0: 5379; BROADWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5380; BROADWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 5381; BROADWELL-NEXT: retq # sched: [7:1.00] 5382; 5383; SKYLAKE-LABEL: test_psignd: 5384; SKYLAKE: # %bb.0: 5385; SKYLAKE-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5386; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5387; SKYLAKE-NEXT: retq # sched: [7:1.00] 5388; 5389; SKX-LABEL: test_psignd: 5390; SKX: # %bb.0: 5391; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5392; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5393; SKX-NEXT: retq # sched: [7:1.00] 5394; 5395; ZNVER1-LABEL: test_psignd: 5396; ZNVER1: # %bb.0: 5397; ZNVER1-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5398; ZNVER1-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5399; ZNVER1-NEXT: retq # sched: [1:0.50] 5400 %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) 5401 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 5402 %3 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %1, <8 x i32> %2) 5403 ret <8 x i32> %3 5404} 5405declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone 5406 5407define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 5408; GENERIC-LABEL: test_psignw: 5409; GENERIC: # %bb.0: 5410; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5411; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5412; GENERIC-NEXT: retq # sched: [1:1.00] 5413; 5414; HASWELL-LABEL: test_psignw: 5415; HASWELL: # %bb.0: 5416; HASWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5417; HASWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5418; HASWELL-NEXT: retq # sched: [7:1.00] 5419; 5420; BROADWELL-LABEL: test_psignw: 5421; BROADWELL: # %bb.0: 5422; BROADWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5423; BROADWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 5424; BROADWELL-NEXT: retq # sched: [7:1.00] 5425; 5426; SKYLAKE-LABEL: test_psignw: 5427; SKYLAKE: # %bb.0: 5428; SKYLAKE-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5429; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5430; SKYLAKE-NEXT: retq # sched: [7:1.00] 5431; 5432; SKX-LABEL: test_psignw: 5433; SKX: # %bb.0: 5434; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5435; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5436; SKX-NEXT: retq # sched: [7:1.00] 5437; 5438; ZNVER1-LABEL: test_psignw: 5439; ZNVER1: # %bb.0: 5440; ZNVER1-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5441; ZNVER1-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5442; ZNVER1-NEXT: retq # sched: [1:0.50] 5443 %1 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) 5444 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 5445 %3 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %1, <16 x i16> %2) 5446 ret <16 x i16> %3 5447} 5448declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone 5449 5450define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 5451; GENERIC-LABEL: test_pslld: 5452; GENERIC: # %bb.0: 5453; GENERIC-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5454; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5455; GENERIC-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] 5456; GENERIC-NEXT: retq # sched: [1:1.00] 5457; 5458; HASWELL-LABEL: test_pslld: 5459; HASWELL: # %bb.0: 5460; HASWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5461; HASWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5462; HASWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] 5463; HASWELL-NEXT: retq # sched: [7:1.00] 5464; 5465; BROADWELL-LABEL: test_pslld: 5466; BROADWELL: # %bb.0: 5467; BROADWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5468; BROADWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5469; BROADWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] 5470; BROADWELL-NEXT: retq # sched: [7:1.00] 5471; 5472; SKYLAKE-LABEL: test_pslld: 5473; SKYLAKE: # %bb.0: 5474; SKYLAKE-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5475; SKYLAKE-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5476; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50] 5477; SKYLAKE-NEXT: retq # sched: [7:1.00] 5478; 5479; SKX-LABEL: test_pslld: 5480; SKX: # %bb.0: 5481; SKX-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5482; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5483; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50] 5484; SKX-NEXT: retq # sched: [7:1.00] 5485; 5486; ZNVER1-LABEL: test_pslld: 5487; ZNVER1: # %bb.0: 5488; ZNVER1-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 5489; ZNVER1-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 5490; ZNVER1-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.25] 5491; ZNVER1-NEXT: retq # sched: [1:0.50] 5492 %1 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) 5493 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 5494 %3 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %1, <4 x i32> %2) 5495 %4 = shl <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 5496 ret <8 x i32> %4 5497} 5498declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone 5499 5500define <32 x i8> @test_pslldq(<32 x i8> %a0) { 5501; GENERIC-LABEL: test_pslldq: 5502; GENERIC: # %bb.0: 5503; GENERIC-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] 5504; GENERIC-NEXT: retq # sched: [1:1.00] 5505; 5506; HASWELL-LABEL: test_pslldq: 5507; HASWELL: # %bb.0: 5508; HASWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] 5509; HASWELL-NEXT: retq # sched: [7:1.00] 5510; 5511; BROADWELL-LABEL: test_pslldq: 5512; BROADWELL: # %bb.0: 5513; BROADWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] 5514; BROADWELL-NEXT: retq # sched: [7:1.00] 5515; 5516; SKYLAKE-LABEL: test_pslldq: 5517; SKYLAKE: # %bb.0: 5518; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] 5519; SKYLAKE-NEXT: retq # sched: [7:1.00] 5520; 5521; SKX-LABEL: test_pslldq: 5522; SKX: # %bb.0: 5523; SKX-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] 5524; SKX-NEXT: retq # sched: [7:1.00] 5525; 5526; ZNVER1-LABEL: test_pslldq: 5527; ZNVER1: # %bb.0: 5528; ZNVER1-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [2:1.00] 5529; ZNVER1-NEXT: retq # sched: [1:0.50] 5530 %1 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a0, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60> 5531 ret <32 x i8> %1 5532} 5533 5534define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 5535; GENERIC-LABEL: test_psllq: 5536; GENERIC: # %bb.0: 5537; GENERIC-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5538; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5539; GENERIC-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] 5540; GENERIC-NEXT: retq # sched: [1:1.00] 5541; 5542; HASWELL-LABEL: test_psllq: 5543; HASWELL: # %bb.0: 5544; HASWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5545; HASWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5546; HASWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] 5547; HASWELL-NEXT: retq # sched: [7:1.00] 5548; 5549; BROADWELL-LABEL: test_psllq: 5550; BROADWELL: # %bb.0: 5551; BROADWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5552; BROADWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5553; BROADWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] 5554; BROADWELL-NEXT: retq # sched: [7:1.00] 5555; 5556; SKYLAKE-LABEL: test_psllq: 5557; SKYLAKE: # %bb.0: 5558; SKYLAKE-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5559; SKYLAKE-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5560; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50] 5561; SKYLAKE-NEXT: retq # sched: [7:1.00] 5562; 5563; SKX-LABEL: test_psllq: 5564; SKX: # %bb.0: 5565; SKX-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5566; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5567; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50] 5568; SKX-NEXT: retq # sched: [7:1.00] 5569; 5570; ZNVER1-LABEL: test_psllq: 5571; ZNVER1: # %bb.0: 5572; ZNVER1-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 5573; ZNVER1-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 5574; ZNVER1-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.25] 5575; ZNVER1-NEXT: retq # sched: [1:0.50] 5576 %1 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) 5577 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 5578 %3 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %1, <2 x i64> %2) 5579 %4 = shl <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2> 5580 ret <4 x i64> %4 5581} 5582declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone 5583 5584define <4 x i32> @test_psllvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 5585; GENERIC-LABEL: test_psllvd: 5586; GENERIC: # %bb.0: 5587; GENERIC-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 5588; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 5589; GENERIC-NEXT: retq # sched: [1:1.00] 5590; 5591; HASWELL-LABEL: test_psllvd: 5592; HASWELL: # %bb.0: 5593; HASWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 5594; HASWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 5595; HASWELL-NEXT: retq # sched: [7:1.00] 5596; 5597; BROADWELL-LABEL: test_psllvd: 5598; BROADWELL: # %bb.0: 5599; BROADWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 5600; BROADWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 5601; BROADWELL-NEXT: retq # sched: [7:1.00] 5602; 5603; SKYLAKE-LABEL: test_psllvd: 5604; SKYLAKE: # %bb.0: 5605; SKYLAKE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5606; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 5607; SKYLAKE-NEXT: retq # sched: [7:1.00] 5608; 5609; SKX-LABEL: test_psllvd: 5610; SKX: # %bb.0: 5611; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5612; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 5613; SKX-NEXT: retq # sched: [7:1.00] 5614; 5615; ZNVER1-LABEL: test_psllvd: 5616; ZNVER1: # %bb.0: 5617; ZNVER1-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5618; ZNVER1-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 5619; ZNVER1-NEXT: retq # sched: [1:0.50] 5620 %1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) 5621 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 5622 %3 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %1, <4 x i32> %2) 5623 ret <4 x i32> %3 5624} 5625declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone 5626 5627define <8 x i32> @test_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 5628; GENERIC-LABEL: test_psllvd_ymm: 5629; GENERIC: # %bb.0: 5630; GENERIC-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5631; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5632; GENERIC-NEXT: retq # sched: [1:1.00] 5633; 5634; HASWELL-LABEL: test_psllvd_ymm: 5635; HASWELL: # %bb.0: 5636; HASWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 5637; HASWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 5638; HASWELL-NEXT: retq # sched: [7:1.00] 5639; 5640; BROADWELL-LABEL: test_psllvd_ymm: 5641; BROADWELL: # %bb.0: 5642; BROADWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 5643; BROADWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 5644; BROADWELL-NEXT: retq # sched: [7:1.00] 5645; 5646; SKYLAKE-LABEL: test_psllvd_ymm: 5647; SKYLAKE: # %bb.0: 5648; SKYLAKE-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5649; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5650; SKYLAKE-NEXT: retq # sched: [7:1.00] 5651; 5652; SKX-LABEL: test_psllvd_ymm: 5653; SKX: # %bb.0: 5654; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5655; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5656; SKX-NEXT: retq # sched: [7:1.00] 5657; 5658; ZNVER1-LABEL: test_psllvd_ymm: 5659; ZNVER1: # %bb.0: 5660; ZNVER1-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5661; ZNVER1-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5662; ZNVER1-NEXT: retq # sched: [1:0.50] 5663 %1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) 5664 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 5665 %3 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %1, <8 x i32> %2) 5666 ret <8 x i32> %3 5667} 5668declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 5669 5670define <2 x i64> @test_psllvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 5671; GENERIC-LABEL: test_psllvq: 5672; GENERIC: # %bb.0: 5673; GENERIC-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 5674; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 5675; GENERIC-NEXT: retq # sched: [1:1.00] 5676; 5677; HASWELL-LABEL: test_psllvq: 5678; HASWELL: # %bb.0: 5679; HASWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 5680; HASWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 5681; HASWELL-NEXT: retq # sched: [7:1.00] 5682; 5683; BROADWELL-LABEL: test_psllvq: 5684; BROADWELL: # %bb.0: 5685; BROADWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 5686; BROADWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 5687; BROADWELL-NEXT: retq # sched: [7:1.00] 5688; 5689; SKYLAKE-LABEL: test_psllvq: 5690; SKYLAKE: # %bb.0: 5691; SKYLAKE-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5692; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 5693; SKYLAKE-NEXT: retq # sched: [7:1.00] 5694; 5695; SKX-LABEL: test_psllvq: 5696; SKX: # %bb.0: 5697; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5698; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 5699; SKX-NEXT: retq # sched: [7:1.00] 5700; 5701; ZNVER1-LABEL: test_psllvq: 5702; ZNVER1: # %bb.0: 5703; ZNVER1-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5704; ZNVER1-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 5705; ZNVER1-NEXT: retq # sched: [1:0.50] 5706 %1 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) 5707 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 5708 %3 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %1, <2 x i64> %2) 5709 ret <2 x i64> %3 5710} 5711declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone 5712 5713define <4 x i64> @test_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 5714; GENERIC-LABEL: test_psllvq_ymm: 5715; GENERIC: # %bb.0: 5716; GENERIC-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5717; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5718; GENERIC-NEXT: retq # sched: [1:1.00] 5719; 5720; HASWELL-LABEL: test_psllvq_ymm: 5721; HASWELL: # %bb.0: 5722; HASWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5723; HASWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5724; HASWELL-NEXT: retq # sched: [7:1.00] 5725; 5726; BROADWELL-LABEL: test_psllvq_ymm: 5727; BROADWELL: # %bb.0: 5728; BROADWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5729; BROADWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5730; BROADWELL-NEXT: retq # sched: [7:1.00] 5731; 5732; SKYLAKE-LABEL: test_psllvq_ymm: 5733; SKYLAKE: # %bb.0: 5734; SKYLAKE-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5735; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5736; SKYLAKE-NEXT: retq # sched: [7:1.00] 5737; 5738; SKX-LABEL: test_psllvq_ymm: 5739; SKX: # %bb.0: 5740; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5741; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5742; SKX-NEXT: retq # sched: [7:1.00] 5743; 5744; ZNVER1-LABEL: test_psllvq_ymm: 5745; ZNVER1: # %bb.0: 5746; ZNVER1-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5747; ZNVER1-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5748; ZNVER1-NEXT: retq # sched: [1:0.50] 5749 %1 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) 5750 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 5751 %3 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %1, <4 x i64> %2) 5752 ret <4 x i64> %3 5753} 5754declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 5755 5756define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 5757; GENERIC-LABEL: test_psllw: 5758; GENERIC: # %bb.0: 5759; GENERIC-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5760; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5761; GENERIC-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] 5762; GENERIC-NEXT: retq # sched: [1:1.00] 5763; 5764; HASWELL-LABEL: test_psllw: 5765; HASWELL: # %bb.0: 5766; HASWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5767; HASWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5768; HASWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] 5769; HASWELL-NEXT: retq # sched: [7:1.00] 5770; 5771; BROADWELL-LABEL: test_psllw: 5772; BROADWELL: # %bb.0: 5773; BROADWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5774; BROADWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5775; BROADWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] 5776; BROADWELL-NEXT: retq # sched: [7:1.00] 5777; 5778; SKYLAKE-LABEL: test_psllw: 5779; SKYLAKE: # %bb.0: 5780; SKYLAKE-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5781; SKYLAKE-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5782; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50] 5783; SKYLAKE-NEXT: retq # sched: [7:1.00] 5784; 5785; SKX-LABEL: test_psllw: 5786; SKX: # %bb.0: 5787; SKX-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5788; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5789; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50] 5790; SKX-NEXT: retq # sched: [7:1.00] 5791; 5792; ZNVER1-LABEL: test_psllw: 5793; ZNVER1: # %bb.0: 5794; ZNVER1-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 5795; ZNVER1-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 5796; ZNVER1-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.25] 5797; ZNVER1-NEXT: retq # sched: [1:0.50] 5798 %1 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) 5799 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 5800 %3 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %1, <8 x i16> %2) 5801 %4 = shl <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 5802 ret <16 x i16> %4 5803} 5804declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone 5805 5806define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 5807; GENERIC-LABEL: test_psrad: 5808; GENERIC: # %bb.0: 5809; GENERIC-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5810; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5811; GENERIC-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] 5812; GENERIC-NEXT: retq # sched: [1:1.00] 5813; 5814; HASWELL-LABEL: test_psrad: 5815; HASWELL: # %bb.0: 5816; HASWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5817; HASWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5818; HASWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] 5819; HASWELL-NEXT: retq # sched: [7:1.00] 5820; 5821; BROADWELL-LABEL: test_psrad: 5822; BROADWELL: # %bb.0: 5823; BROADWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5824; BROADWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5825; BROADWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] 5826; BROADWELL-NEXT: retq # sched: [7:1.00] 5827; 5828; SKYLAKE-LABEL: test_psrad: 5829; SKYLAKE: # %bb.0: 5830; SKYLAKE-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5831; SKYLAKE-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5832; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50] 5833; SKYLAKE-NEXT: retq # sched: [7:1.00] 5834; 5835; SKX-LABEL: test_psrad: 5836; SKX: # %bb.0: 5837; SKX-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5838; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5839; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50] 5840; SKX-NEXT: retq # sched: [7:1.00] 5841; 5842; ZNVER1-LABEL: test_psrad: 5843; ZNVER1: # %bb.0: 5844; ZNVER1-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 5845; ZNVER1-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 5846; ZNVER1-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.25] 5847; ZNVER1-NEXT: retq # sched: [1:0.50] 5848 %1 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) 5849 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 5850 %3 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> %2) 5851 %4 = ashr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 5852 ret <8 x i32> %4 5853} 5854declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone 5855 5856define <4 x i32> @test_psravd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 5857; GENERIC-LABEL: test_psravd: 5858; GENERIC: # %bb.0: 5859; GENERIC-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 5860; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 5861; GENERIC-NEXT: retq # sched: [1:1.00] 5862; 5863; HASWELL-LABEL: test_psravd: 5864; HASWELL: # %bb.0: 5865; HASWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 5866; HASWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 5867; HASWELL-NEXT: retq # sched: [7:1.00] 5868; 5869; BROADWELL-LABEL: test_psravd: 5870; BROADWELL: # %bb.0: 5871; BROADWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 5872; BROADWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 5873; BROADWELL-NEXT: retq # sched: [7:1.00] 5874; 5875; SKYLAKE-LABEL: test_psravd: 5876; SKYLAKE: # %bb.0: 5877; SKYLAKE-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5878; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 5879; SKYLAKE-NEXT: retq # sched: [7:1.00] 5880; 5881; SKX-LABEL: test_psravd: 5882; SKX: # %bb.0: 5883; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5884; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 5885; SKX-NEXT: retq # sched: [7:1.00] 5886; 5887; ZNVER1-LABEL: test_psravd: 5888; ZNVER1: # %bb.0: 5889; ZNVER1-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5890; ZNVER1-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 5891; ZNVER1-NEXT: retq # sched: [1:0.50] 5892 %1 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) 5893 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 5894 %3 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %1, <4 x i32> %2) 5895 ret <4 x i32> %3 5896} 5897declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone 5898 5899define <8 x i32> @test_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 5900; GENERIC-LABEL: test_psravd_ymm: 5901; GENERIC: # %bb.0: 5902; GENERIC-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5903; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5904; GENERIC-NEXT: retq # sched: [1:1.00] 5905; 5906; HASWELL-LABEL: test_psravd_ymm: 5907; HASWELL: # %bb.0: 5908; HASWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 5909; HASWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 5910; HASWELL-NEXT: retq # sched: [7:1.00] 5911; 5912; BROADWELL-LABEL: test_psravd_ymm: 5913; BROADWELL: # %bb.0: 5914; BROADWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 5915; BROADWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 5916; BROADWELL-NEXT: retq # sched: [7:1.00] 5917; 5918; SKYLAKE-LABEL: test_psravd_ymm: 5919; SKYLAKE: # %bb.0: 5920; SKYLAKE-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5921; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5922; SKYLAKE-NEXT: retq # sched: [7:1.00] 5923; 5924; SKX-LABEL: test_psravd_ymm: 5925; SKX: # %bb.0: 5926; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5927; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5928; SKX-NEXT: retq # sched: [7:1.00] 5929; 5930; ZNVER1-LABEL: test_psravd_ymm: 5931; ZNVER1: # %bb.0: 5932; ZNVER1-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5933; ZNVER1-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5934; ZNVER1-NEXT: retq # sched: [1:0.50] 5935 %1 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) 5936 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 5937 %3 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %1, <8 x i32> %2) 5938 ret <8 x i32> %3 5939} 5940declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone 5941 5942define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 5943; GENERIC-LABEL: test_psraw: 5944; GENERIC: # %bb.0: 5945; GENERIC-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5946; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5947; GENERIC-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] 5948; GENERIC-NEXT: retq # sched: [1:1.00] 5949; 5950; HASWELL-LABEL: test_psraw: 5951; HASWELL: # %bb.0: 5952; HASWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5953; HASWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5954; HASWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] 5955; HASWELL-NEXT: retq # sched: [7:1.00] 5956; 5957; BROADWELL-LABEL: test_psraw: 5958; BROADWELL: # %bb.0: 5959; BROADWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5960; BROADWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5961; BROADWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] 5962; BROADWELL-NEXT: retq # sched: [7:1.00] 5963; 5964; SKYLAKE-LABEL: test_psraw: 5965; SKYLAKE: # %bb.0: 5966; SKYLAKE-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5967; SKYLAKE-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5968; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50] 5969; SKYLAKE-NEXT: retq # sched: [7:1.00] 5970; 5971; SKX-LABEL: test_psraw: 5972; SKX: # %bb.0: 5973; SKX-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5974; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5975; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50] 5976; SKX-NEXT: retq # sched: [7:1.00] 5977; 5978; ZNVER1-LABEL: test_psraw: 5979; ZNVER1: # %bb.0: 5980; ZNVER1-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 5981; ZNVER1-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 5982; ZNVER1-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.25] 5983; ZNVER1-NEXT: retq # sched: [1:0.50] 5984 %1 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) 5985 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 5986 %3 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> %2) 5987 %4 = ashr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 5988 ret <16 x i16> %4 5989} 5990declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone 5991 5992define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 5993; GENERIC-LABEL: test_psrld: 5994; GENERIC: # %bb.0: 5995; GENERIC-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5996; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5997; GENERIC-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] 5998; GENERIC-NEXT: retq # sched: [1:1.00] 5999; 6000; HASWELL-LABEL: test_psrld: 6001; HASWELL: # %bb.0: 6002; HASWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6003; HASWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 6004; HASWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] 6005; HASWELL-NEXT: retq # sched: [7:1.00] 6006; 6007; BROADWELL-LABEL: test_psrld: 6008; BROADWELL: # %bb.0: 6009; BROADWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6010; BROADWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 6011; BROADWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] 6012; BROADWELL-NEXT: retq # sched: [7:1.00] 6013; 6014; SKYLAKE-LABEL: test_psrld: 6015; SKYLAKE: # %bb.0: 6016; SKYLAKE-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6017; SKYLAKE-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6018; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50] 6019; SKYLAKE-NEXT: retq # sched: [7:1.00] 6020; 6021; SKX-LABEL: test_psrld: 6022; SKX: # %bb.0: 6023; SKX-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6024; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6025; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50] 6026; SKX-NEXT: retq # sched: [7:1.00] 6027; 6028; ZNVER1-LABEL: test_psrld: 6029; ZNVER1: # %bb.0: 6030; ZNVER1-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 6031; ZNVER1-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 6032; ZNVER1-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.25] 6033; ZNVER1-NEXT: retq # sched: [1:0.50] 6034 %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) 6035 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 6036 %3 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %1, <4 x i32> %2) 6037 %4 = lshr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 6038 ret <8 x i32> %4 6039} 6040declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone 6041 6042define <32 x i8> @test_psrldq(<32 x i8> %a0) { 6043; GENERIC-LABEL: test_psrldq: 6044; GENERIC: # %bb.0: 6045; GENERIC-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] 6046; GENERIC-NEXT: retq # sched: [1:1.00] 6047; 6048; HASWELL-LABEL: test_psrldq: 6049; HASWELL: # %bb.0: 6050; HASWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] 6051; HASWELL-NEXT: retq # sched: [7:1.00] 6052; 6053; BROADWELL-LABEL: test_psrldq: 6054; BROADWELL: # %bb.0: 6055; BROADWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] 6056; BROADWELL-NEXT: retq # sched: [7:1.00] 6057; 6058; SKYLAKE-LABEL: test_psrldq: 6059; SKYLAKE: # %bb.0: 6060; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] 6061; SKYLAKE-NEXT: retq # sched: [7:1.00] 6062; 6063; SKX-LABEL: test_psrldq: 6064; SKX: # %bb.0: 6065; SKX-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] 6066; SKX-NEXT: retq # sched: [7:1.00] 6067; 6068; ZNVER1-LABEL: test_psrldq: 6069; ZNVER1: # %bb.0: 6070; ZNVER1-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [2:1.00] 6071; ZNVER1-NEXT: retq # sched: [1:0.50] 6072 %1 = shufflevector <32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50> 6073 ret <32 x i8> %1 6074} 6075 6076define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 6077; GENERIC-LABEL: test_psrlq: 6078; GENERIC: # %bb.0: 6079; GENERIC-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6080; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 6081; GENERIC-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] 6082; GENERIC-NEXT: retq # sched: [1:1.00] 6083; 6084; HASWELL-LABEL: test_psrlq: 6085; HASWELL: # %bb.0: 6086; HASWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6087; HASWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 6088; HASWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] 6089; HASWELL-NEXT: retq # sched: [7:1.00] 6090; 6091; BROADWELL-LABEL: test_psrlq: 6092; BROADWELL: # %bb.0: 6093; BROADWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6094; BROADWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 6095; BROADWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] 6096; BROADWELL-NEXT: retq # sched: [7:1.00] 6097; 6098; SKYLAKE-LABEL: test_psrlq: 6099; SKYLAKE: # %bb.0: 6100; SKYLAKE-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6101; SKYLAKE-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6102; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50] 6103; SKYLAKE-NEXT: retq # sched: [7:1.00] 6104; 6105; SKX-LABEL: test_psrlq: 6106; SKX: # %bb.0: 6107; SKX-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6108; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6109; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50] 6110; SKX-NEXT: retq # sched: [7:1.00] 6111; 6112; ZNVER1-LABEL: test_psrlq: 6113; ZNVER1: # %bb.0: 6114; ZNVER1-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 6115; ZNVER1-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 6116; ZNVER1-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.25] 6117; ZNVER1-NEXT: retq # sched: [1:0.50] 6118 %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) 6119 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 6120 %3 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %1, <2 x i64> %2) 6121 %4 = lshr <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2> 6122 ret <4 x i64> %4 6123} 6124declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone 6125 6126define <4 x i32> @test_psrlvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 6127; GENERIC-LABEL: test_psrlvd: 6128; GENERIC: # %bb.0: 6129; GENERIC-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6130; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6131; GENERIC-NEXT: retq # sched: [1:1.00] 6132; 6133; HASWELL-LABEL: test_psrlvd: 6134; HASWELL: # %bb.0: 6135; HASWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 6136; HASWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 6137; HASWELL-NEXT: retq # sched: [7:1.00] 6138; 6139; BROADWELL-LABEL: test_psrlvd: 6140; BROADWELL: # %bb.0: 6141; BROADWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 6142; BROADWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 6143; BROADWELL-NEXT: retq # sched: [7:1.00] 6144; 6145; SKYLAKE-LABEL: test_psrlvd: 6146; SKYLAKE: # %bb.0: 6147; SKYLAKE-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6148; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6149; SKYLAKE-NEXT: retq # sched: [7:1.00] 6150; 6151; SKX-LABEL: test_psrlvd: 6152; SKX: # %bb.0: 6153; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6154; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6155; SKX-NEXT: retq # sched: [7:1.00] 6156; 6157; ZNVER1-LABEL: test_psrlvd: 6158; ZNVER1: # %bb.0: 6159; ZNVER1-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6160; ZNVER1-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 6161; ZNVER1-NEXT: retq # sched: [1:0.50] 6162 %1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) 6163 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 6164 %3 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %1, <4 x i32> %2) 6165 ret <4 x i32> %3 6166} 6167declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone 6168 6169define <8 x i32> @test_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 6170; GENERIC-LABEL: test_psrlvd_ymm: 6171; GENERIC: # %bb.0: 6172; GENERIC-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 6173; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 6174; GENERIC-NEXT: retq # sched: [1:1.00] 6175; 6176; HASWELL-LABEL: test_psrlvd_ymm: 6177; HASWELL: # %bb.0: 6178; HASWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 6179; HASWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 6180; HASWELL-NEXT: retq # sched: [7:1.00] 6181; 6182; BROADWELL-LABEL: test_psrlvd_ymm: 6183; BROADWELL: # %bb.0: 6184; BROADWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 6185; BROADWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 6186; BROADWELL-NEXT: retq # sched: [7:1.00] 6187; 6188; SKYLAKE-LABEL: test_psrlvd_ymm: 6189; SKYLAKE: # %bb.0: 6190; SKYLAKE-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6191; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6192; SKYLAKE-NEXT: retq # sched: [7:1.00] 6193; 6194; SKX-LABEL: test_psrlvd_ymm: 6195; SKX: # %bb.0: 6196; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6197; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6198; SKX-NEXT: retq # sched: [7:1.00] 6199; 6200; ZNVER1-LABEL: test_psrlvd_ymm: 6201; ZNVER1: # %bb.0: 6202; ZNVER1-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6203; ZNVER1-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6204; ZNVER1-NEXT: retq # sched: [1:0.50] 6205 %1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) 6206 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 6207 %3 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %1, <8 x i32> %2) 6208 ret <8 x i32> %3 6209} 6210declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 6211 6212define <2 x i64> @test_psrlvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 6213; GENERIC-LABEL: test_psrlvq: 6214; GENERIC: # %bb.0: 6215; GENERIC-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6216; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6217; GENERIC-NEXT: retq # sched: [1:1.00] 6218; 6219; HASWELL-LABEL: test_psrlvq: 6220; HASWELL: # %bb.0: 6221; HASWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6222; HASWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6223; HASWELL-NEXT: retq # sched: [7:1.00] 6224; 6225; BROADWELL-LABEL: test_psrlvq: 6226; BROADWELL: # %bb.0: 6227; BROADWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6228; BROADWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6229; BROADWELL-NEXT: retq # sched: [7:1.00] 6230; 6231; SKYLAKE-LABEL: test_psrlvq: 6232; SKYLAKE: # %bb.0: 6233; SKYLAKE-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6234; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6235; SKYLAKE-NEXT: retq # sched: [7:1.00] 6236; 6237; SKX-LABEL: test_psrlvq: 6238; SKX: # %bb.0: 6239; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6240; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6241; SKX-NEXT: retq # sched: [7:1.00] 6242; 6243; ZNVER1-LABEL: test_psrlvq: 6244; ZNVER1: # %bb.0: 6245; ZNVER1-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6246; ZNVER1-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 6247; ZNVER1-NEXT: retq # sched: [1:0.50] 6248 %1 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) 6249 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 6250 %3 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %1, <2 x i64> %2) 6251 ret <2 x i64> %3 6252} 6253declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone 6254 6255define <4 x i64> @test_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 6256; GENERIC-LABEL: test_psrlvq_ymm: 6257; GENERIC: # %bb.0: 6258; GENERIC-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 6259; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 6260; GENERIC-NEXT: retq # sched: [1:1.00] 6261; 6262; HASWELL-LABEL: test_psrlvq_ymm: 6263; HASWELL: # %bb.0: 6264; HASWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 6265; HASWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 6266; HASWELL-NEXT: retq # sched: [7:1.00] 6267; 6268; BROADWELL-LABEL: test_psrlvq_ymm: 6269; BROADWELL: # %bb.0: 6270; BROADWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 6271; BROADWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 6272; BROADWELL-NEXT: retq # sched: [7:1.00] 6273; 6274; SKYLAKE-LABEL: test_psrlvq_ymm: 6275; SKYLAKE: # %bb.0: 6276; SKYLAKE-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6277; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6278; SKYLAKE-NEXT: retq # sched: [7:1.00] 6279; 6280; SKX-LABEL: test_psrlvq_ymm: 6281; SKX: # %bb.0: 6282; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6283; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6284; SKX-NEXT: retq # sched: [7:1.00] 6285; 6286; ZNVER1-LABEL: test_psrlvq_ymm: 6287; ZNVER1: # %bb.0: 6288; ZNVER1-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6289; ZNVER1-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6290; ZNVER1-NEXT: retq # sched: [1:0.50] 6291 %1 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) 6292 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 6293 %3 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %1, <4 x i64> %2) 6294 ret <4 x i64> %3 6295} 6296declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 6297 6298define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 6299; GENERIC-LABEL: test_psrlw: 6300; GENERIC: # %bb.0: 6301; GENERIC-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6302; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 6303; GENERIC-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] 6304; GENERIC-NEXT: retq # sched: [1:1.00] 6305; 6306; HASWELL-LABEL: test_psrlw: 6307; HASWELL: # %bb.0: 6308; HASWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6309; HASWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 6310; HASWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] 6311; HASWELL-NEXT: retq # sched: [7:1.00] 6312; 6313; BROADWELL-LABEL: test_psrlw: 6314; BROADWELL: # %bb.0: 6315; BROADWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6316; BROADWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 6317; BROADWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] 6318; BROADWELL-NEXT: retq # sched: [7:1.00] 6319; 6320; SKYLAKE-LABEL: test_psrlw: 6321; SKYLAKE: # %bb.0: 6322; SKYLAKE-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6323; SKYLAKE-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6324; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50] 6325; SKYLAKE-NEXT: retq # sched: [7:1.00] 6326; 6327; SKX-LABEL: test_psrlw: 6328; SKX: # %bb.0: 6329; SKX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6330; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6331; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50] 6332; SKX-NEXT: retq # sched: [7:1.00] 6333; 6334; ZNVER1-LABEL: test_psrlw: 6335; ZNVER1: # %bb.0: 6336; ZNVER1-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 6337; ZNVER1-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 6338; ZNVER1-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.25] 6339; ZNVER1-NEXT: retq # sched: [1:0.50] 6340 %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) 6341 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 6342 %3 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %1, <8 x i16> %2) 6343 %4 = lshr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 6344 ret <16 x i16> %4 6345} 6346declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone 6347 6348define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 6349; GENERIC-LABEL: test_psubb: 6350; GENERIC: # %bb.0: 6351; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6352; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6353; GENERIC-NEXT: retq # sched: [1:1.00] 6354; 6355; HASWELL-LABEL: test_psubb: 6356; HASWELL: # %bb.0: 6357; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6358; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6359; HASWELL-NEXT: retq # sched: [7:1.00] 6360; 6361; BROADWELL-LABEL: test_psubb: 6362; BROADWELL: # %bb.0: 6363; BROADWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6364; BROADWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6365; BROADWELL-NEXT: retq # sched: [7:1.00] 6366; 6367; SKYLAKE-LABEL: test_psubb: 6368; SKYLAKE: # %bb.0: 6369; SKYLAKE-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6370; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6371; SKYLAKE-NEXT: retq # sched: [7:1.00] 6372; 6373; SKX-LABEL: test_psubb: 6374; SKX: # %bb.0: 6375; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6376; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6377; SKX-NEXT: retq # sched: [7:1.00] 6378; 6379; ZNVER1-LABEL: test_psubb: 6380; ZNVER1: # %bb.0: 6381; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6382; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6383; ZNVER1-NEXT: retq # sched: [1:0.50] 6384 %1 = sub <32 x i8> %a0, %a1 6385 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 6386 %3 = sub <32 x i8> %1, %2 6387 ret <32 x i8> %3 6388} 6389 6390define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 6391; GENERIC-LABEL: test_psubd: 6392; GENERIC: # %bb.0: 6393; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6394; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6395; GENERIC-NEXT: retq # sched: [1:1.00] 6396; 6397; HASWELL-LABEL: test_psubd: 6398; HASWELL: # %bb.0: 6399; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6400; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6401; HASWELL-NEXT: retq # sched: [7:1.00] 6402; 6403; BROADWELL-LABEL: test_psubd: 6404; BROADWELL: # %bb.0: 6405; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6406; BROADWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6407; BROADWELL-NEXT: retq # sched: [7:1.00] 6408; 6409; SKYLAKE-LABEL: test_psubd: 6410; SKYLAKE: # %bb.0: 6411; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6412; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6413; SKYLAKE-NEXT: retq # sched: [7:1.00] 6414; 6415; SKX-LABEL: test_psubd: 6416; SKX: # %bb.0: 6417; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6418; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6419; SKX-NEXT: retq # sched: [7:1.00] 6420; 6421; ZNVER1-LABEL: test_psubd: 6422; ZNVER1: # %bb.0: 6423; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6424; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6425; ZNVER1-NEXT: retq # sched: [1:0.50] 6426 %1 = sub <8 x i32> %a0, %a1 6427 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 6428 %3 = sub <8 x i32> %1, %2 6429 ret <8 x i32> %3 6430} 6431 6432define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 6433; GENERIC-LABEL: test_psubq: 6434; GENERIC: # %bb.0: 6435; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6436; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6437; GENERIC-NEXT: retq # sched: [1:1.00] 6438; 6439; HASWELL-LABEL: test_psubq: 6440; HASWELL: # %bb.0: 6441; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6442; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6443; HASWELL-NEXT: retq # sched: [7:1.00] 6444; 6445; BROADWELL-LABEL: test_psubq: 6446; BROADWELL: # %bb.0: 6447; BROADWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6448; BROADWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6449; BROADWELL-NEXT: retq # sched: [7:1.00] 6450; 6451; SKYLAKE-LABEL: test_psubq: 6452; SKYLAKE: # %bb.0: 6453; SKYLAKE-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6454; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6455; SKYLAKE-NEXT: retq # sched: [7:1.00] 6456; 6457; SKX-LABEL: test_psubq: 6458; SKX: # %bb.0: 6459; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6460; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6461; SKX-NEXT: retq # sched: [7:1.00] 6462; 6463; ZNVER1-LABEL: test_psubq: 6464; ZNVER1: # %bb.0: 6465; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6466; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6467; ZNVER1-NEXT: retq # sched: [1:0.50] 6468 %1 = sub <4 x i64> %a0, %a1 6469 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 6470 %3 = sub <4 x i64> %1, %2 6471 ret <4 x i64> %3 6472} 6473 6474define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 6475; GENERIC-LABEL: test_psubsb: 6476; GENERIC: # %bb.0: 6477; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6478; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6479; GENERIC-NEXT: retq # sched: [1:1.00] 6480; 6481; HASWELL-LABEL: test_psubsb: 6482; HASWELL: # %bb.0: 6483; HASWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6484; HASWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6485; HASWELL-NEXT: retq # sched: [7:1.00] 6486; 6487; BROADWELL-LABEL: test_psubsb: 6488; BROADWELL: # %bb.0: 6489; BROADWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6490; BROADWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6491; BROADWELL-NEXT: retq # sched: [7:1.00] 6492; 6493; SKYLAKE-LABEL: test_psubsb: 6494; SKYLAKE: # %bb.0: 6495; SKYLAKE-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6496; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6497; SKYLAKE-NEXT: retq # sched: [7:1.00] 6498; 6499; SKX-LABEL: test_psubsb: 6500; SKX: # %bb.0: 6501; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6502; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6503; SKX-NEXT: retq # sched: [7:1.00] 6504; 6505; ZNVER1-LABEL: test_psubsb: 6506; ZNVER1: # %bb.0: 6507; ZNVER1-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6508; ZNVER1-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6509; ZNVER1-NEXT: retq # sched: [1:0.50] 6510 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) 6511 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 6512 %3 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %1, <32 x i8> %2) 6513 ret <32 x i8> %3 6514} 6515declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone 6516 6517define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 6518; GENERIC-LABEL: test_psubsw: 6519; GENERIC: # %bb.0: 6520; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6521; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6522; GENERIC-NEXT: retq # sched: [1:1.00] 6523; 6524; HASWELL-LABEL: test_psubsw: 6525; HASWELL: # %bb.0: 6526; HASWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6527; HASWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6528; HASWELL-NEXT: retq # sched: [7:1.00] 6529; 6530; BROADWELL-LABEL: test_psubsw: 6531; BROADWELL: # %bb.0: 6532; BROADWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6533; BROADWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6534; BROADWELL-NEXT: retq # sched: [7:1.00] 6535; 6536; SKYLAKE-LABEL: test_psubsw: 6537; SKYLAKE: # %bb.0: 6538; SKYLAKE-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6539; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6540; SKYLAKE-NEXT: retq # sched: [7:1.00] 6541; 6542; SKX-LABEL: test_psubsw: 6543; SKX: # %bb.0: 6544; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6545; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6546; SKX-NEXT: retq # sched: [7:1.00] 6547; 6548; ZNVER1-LABEL: test_psubsw: 6549; ZNVER1: # %bb.0: 6550; ZNVER1-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6551; ZNVER1-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6552; ZNVER1-NEXT: retq # sched: [1:0.50] 6553 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) 6554 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 6555 %3 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %1, <16 x i16> %2) 6556 ret <16 x i16> %3 6557} 6558declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone 6559 6560define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 6561; GENERIC-LABEL: test_psubusb: 6562; GENERIC: # %bb.0: 6563; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6564; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6565; GENERIC-NEXT: retq # sched: [1:1.00] 6566; 6567; HASWELL-LABEL: test_psubusb: 6568; HASWELL: # %bb.0: 6569; HASWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6570; HASWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6571; HASWELL-NEXT: retq # sched: [7:1.00] 6572; 6573; BROADWELL-LABEL: test_psubusb: 6574; BROADWELL: # %bb.0: 6575; BROADWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6576; BROADWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6577; BROADWELL-NEXT: retq # sched: [7:1.00] 6578; 6579; SKYLAKE-LABEL: test_psubusb: 6580; SKYLAKE: # %bb.0: 6581; SKYLAKE-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6582; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6583; SKYLAKE-NEXT: retq # sched: [7:1.00] 6584; 6585; SKX-LABEL: test_psubusb: 6586; SKX: # %bb.0: 6587; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6588; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6589; SKX-NEXT: retq # sched: [7:1.00] 6590; 6591; ZNVER1-LABEL: test_psubusb: 6592; ZNVER1: # %bb.0: 6593; ZNVER1-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6594; ZNVER1-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6595; ZNVER1-NEXT: retq # sched: [1:0.50] 6596 %1 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) 6597 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 6598 %3 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %1, <32 x i8> %2) 6599 ret <32 x i8> %3 6600} 6601declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone 6602 6603define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 6604; GENERIC-LABEL: test_psubusw: 6605; GENERIC: # %bb.0: 6606; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6607; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6608; GENERIC-NEXT: retq # sched: [1:1.00] 6609; 6610; HASWELL-LABEL: test_psubusw: 6611; HASWELL: # %bb.0: 6612; HASWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6613; HASWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6614; HASWELL-NEXT: retq # sched: [7:1.00] 6615; 6616; BROADWELL-LABEL: test_psubusw: 6617; BROADWELL: # %bb.0: 6618; BROADWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6619; BROADWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6620; BROADWELL-NEXT: retq # sched: [7:1.00] 6621; 6622; SKYLAKE-LABEL: test_psubusw: 6623; SKYLAKE: # %bb.0: 6624; SKYLAKE-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6625; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6626; SKYLAKE-NEXT: retq # sched: [7:1.00] 6627; 6628; SKX-LABEL: test_psubusw: 6629; SKX: # %bb.0: 6630; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6631; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6632; SKX-NEXT: retq # sched: [7:1.00] 6633; 6634; ZNVER1-LABEL: test_psubusw: 6635; ZNVER1: # %bb.0: 6636; ZNVER1-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6637; ZNVER1-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6638; ZNVER1-NEXT: retq # sched: [1:0.50] 6639 %1 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) 6640 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 6641 %3 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %1, <16 x i16> %2) 6642 ret <16 x i16> %3 6643} 6644declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone 6645 6646define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 6647; GENERIC-LABEL: test_psubw: 6648; GENERIC: # %bb.0: 6649; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6650; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6651; GENERIC-NEXT: retq # sched: [1:1.00] 6652; 6653; HASWELL-LABEL: test_psubw: 6654; HASWELL: # %bb.0: 6655; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6656; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6657; HASWELL-NEXT: retq # sched: [7:1.00] 6658; 6659; BROADWELL-LABEL: test_psubw: 6660; BROADWELL: # %bb.0: 6661; BROADWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6662; BROADWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6663; BROADWELL-NEXT: retq # sched: [7:1.00] 6664; 6665; SKYLAKE-LABEL: test_psubw: 6666; SKYLAKE: # %bb.0: 6667; SKYLAKE-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6668; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6669; SKYLAKE-NEXT: retq # sched: [7:1.00] 6670; 6671; SKX-LABEL: test_psubw: 6672; SKX: # %bb.0: 6673; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6674; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6675; SKX-NEXT: retq # sched: [7:1.00] 6676; 6677; ZNVER1-LABEL: test_psubw: 6678; ZNVER1: # %bb.0: 6679; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6680; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6681; ZNVER1-NEXT: retq # sched: [1:0.50] 6682 %1 = sub <16 x i16> %a0, %a1 6683 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 6684 %3 = sub <16 x i16> %1, %2 6685 ret <16 x i16> %3 6686} 6687 6688define <32 x i8> @test_punpckhbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 6689; GENERIC-LABEL: test_punpckhbw: 6690; GENERIC: # %bb.0: 6691; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] 6692; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] 6693; GENERIC-NEXT: retq # sched: [1:1.00] 6694; 6695; HASWELL-LABEL: test_punpckhbw: 6696; HASWELL: # %bb.0: 6697; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] 6698; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] 6699; HASWELL-NEXT: retq # sched: [7:1.00] 6700; 6701; BROADWELL-LABEL: test_punpckhbw: 6702; BROADWELL: # %bb.0: 6703; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] 6704; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [7:1.00] 6705; BROADWELL-NEXT: retq # sched: [7:1.00] 6706; 6707; SKYLAKE-LABEL: test_punpckhbw: 6708; SKYLAKE: # %bb.0: 6709; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] 6710; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] 6711; SKYLAKE-NEXT: retq # sched: [7:1.00] 6712; 6713; SKX-LABEL: test_punpckhbw: 6714; SKX: # %bb.0: 6715; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] 6716; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] 6717; SKX-NEXT: retq # sched: [7:1.00] 6718; 6719; ZNVER1-LABEL: test_punpckhbw: 6720; ZNVER1: # %bb.0: 6721; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:0.25] 6722; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:0.50] 6723; ZNVER1-NEXT: retq # sched: [1:0.50] 6724 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63> 6725 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 6726 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63> 6727 ret <32 x i8> %3 6728} 6729 6730define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 6731; GENERIC-LABEL: test_punpckhdq: 6732; GENERIC: # %bb.0: 6733; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 6734; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 6735; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6736; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6737; GENERIC-NEXT: retq # sched: [1:1.00] 6738; 6739; HASWELL-LABEL: test_punpckhdq: 6740; HASWELL: # %bb.0: 6741; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 6742; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 6743; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6744; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6745; HASWELL-NEXT: retq # sched: [7:1.00] 6746; 6747; BROADWELL-LABEL: test_punpckhdq: 6748; BROADWELL: # %bb.0: 6749; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 6750; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] 6751; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6752; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6753; BROADWELL-NEXT: retq # sched: [7:1.00] 6754; 6755; SKYLAKE-LABEL: test_punpckhdq: 6756; SKYLAKE: # %bb.0: 6757; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 6758; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 6759; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6760; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6761; SKYLAKE-NEXT: retq # sched: [7:1.00] 6762; 6763; SKX-LABEL: test_punpckhdq: 6764; SKX: # %bb.0: 6765; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 6766; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 6767; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6768; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6769; SKX-NEXT: retq # sched: [7:1.00] 6770; 6771; ZNVER1-LABEL: test_punpckhdq: 6772; ZNVER1: # %bb.0: 6773; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.25] 6774; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50] 6775; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25] 6776; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6777; ZNVER1-NEXT: retq # sched: [1:0.50] 6778 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 6779 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 6780 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 6781 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 6782 ret <8 x i32> %4 6783} 6784 6785define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 6786; GENERIC-LABEL: test_punpckhqdq: 6787; GENERIC: # %bb.0: 6788; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 6789; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 6790; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 6791; GENERIC-NEXT: retq # sched: [1:1.00] 6792; 6793; HASWELL-LABEL: test_punpckhqdq: 6794; HASWELL: # %bb.0: 6795; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 6796; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 6797; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 6798; HASWELL-NEXT: retq # sched: [7:1.00] 6799; 6800; BROADWELL-LABEL: test_punpckhqdq: 6801; BROADWELL: # %bb.0: 6802; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 6803; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00] 6804; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 6805; BROADWELL-NEXT: retq # sched: [7:1.00] 6806; 6807; SKYLAKE-LABEL: test_punpckhqdq: 6808; SKYLAKE: # %bb.0: 6809; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 6810; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 6811; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 6812; SKYLAKE-NEXT: retq # sched: [7:1.00] 6813; 6814; SKX-LABEL: test_punpckhqdq: 6815; SKX: # %bb.0: 6816; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 6817; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 6818; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 6819; SKX-NEXT: retq # sched: [7:1.00] 6820; 6821; ZNVER1-LABEL: test_punpckhqdq: 6822; ZNVER1: # %bb.0: 6823; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.25] 6824; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:0.50] 6825; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 6826; ZNVER1-NEXT: retq # sched: [1:0.50] 6827 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 6828 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 6829 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 6830 %4 = add <4 x i64> %1, %3 6831 ret <4 x i64> %4 6832} 6833 6834define <16 x i16> @test_punpckhwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 6835; GENERIC-LABEL: test_punpckhwd: 6836; GENERIC: # %bb.0: 6837; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] 6838; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] 6839; GENERIC-NEXT: retq # sched: [1:1.00] 6840; 6841; HASWELL-LABEL: test_punpckhwd: 6842; HASWELL: # %bb.0: 6843; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] 6844; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] 6845; HASWELL-NEXT: retq # sched: [7:1.00] 6846; 6847; BROADWELL-LABEL: test_punpckhwd: 6848; BROADWELL: # %bb.0: 6849; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] 6850; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [7:1.00] 6851; BROADWELL-NEXT: retq # sched: [7:1.00] 6852; 6853; SKYLAKE-LABEL: test_punpckhwd: 6854; SKYLAKE: # %bb.0: 6855; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] 6856; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] 6857; SKYLAKE-NEXT: retq # sched: [7:1.00] 6858; 6859; SKX-LABEL: test_punpckhwd: 6860; SKX: # %bb.0: 6861; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] 6862; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] 6863; SKX-NEXT: retq # sched: [7:1.00] 6864; 6865; ZNVER1-LABEL: test_punpckhwd: 6866; ZNVER1: # %bb.0: 6867; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:0.25] 6868; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:0.50] 6869; ZNVER1-NEXT: retq # sched: [1:0.50] 6870 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 6871 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 6872 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 6873 ret <16 x i16> %3 6874} 6875 6876define <32 x i8> @test_punpcklbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 6877; GENERIC-LABEL: test_punpcklbw: 6878; GENERIC: # %bb.0: 6879; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] 6880; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] 6881; GENERIC-NEXT: retq # sched: [1:1.00] 6882; 6883; HASWELL-LABEL: test_punpcklbw: 6884; HASWELL: # %bb.0: 6885; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] 6886; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] 6887; HASWELL-NEXT: retq # sched: [7:1.00] 6888; 6889; BROADWELL-LABEL: test_punpcklbw: 6890; BROADWELL: # %bb.0: 6891; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] 6892; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [7:1.00] 6893; BROADWELL-NEXT: retq # sched: [7:1.00] 6894; 6895; SKYLAKE-LABEL: test_punpcklbw: 6896; SKYLAKE: # %bb.0: 6897; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] 6898; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] 6899; SKYLAKE-NEXT: retq # sched: [7:1.00] 6900; 6901; SKX-LABEL: test_punpcklbw: 6902; SKX: # %bb.0: 6903; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] 6904; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] 6905; SKX-NEXT: retq # sched: [7:1.00] 6906; 6907; ZNVER1-LABEL: test_punpcklbw: 6908; ZNVER1: # %bb.0: 6909; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:0.25] 6910; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:0.50] 6911; ZNVER1-NEXT: retq # sched: [1:0.50] 6912 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55> 6913 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 6914 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55> 6915 ret <32 x i8> %3 6916} 6917 6918define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 6919; GENERIC-LABEL: test_punpckldq: 6920; GENERIC: # %bb.0: 6921; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 6922; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 6923; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6924; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6925; GENERIC-NEXT: retq # sched: [1:1.00] 6926; 6927; HASWELL-LABEL: test_punpckldq: 6928; HASWELL: # %bb.0: 6929; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 6930; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 6931; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6932; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6933; HASWELL-NEXT: retq # sched: [7:1.00] 6934; 6935; BROADWELL-LABEL: test_punpckldq: 6936; BROADWELL: # %bb.0: 6937; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 6938; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] 6939; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6940; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6941; BROADWELL-NEXT: retq # sched: [7:1.00] 6942; 6943; SKYLAKE-LABEL: test_punpckldq: 6944; SKYLAKE: # %bb.0: 6945; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 6946; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 6947; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6948; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6949; SKYLAKE-NEXT: retq # sched: [7:1.00] 6950; 6951; SKX-LABEL: test_punpckldq: 6952; SKX: # %bb.0: 6953; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 6954; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 6955; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6956; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6957; SKX-NEXT: retq # sched: [7:1.00] 6958; 6959; ZNVER1-LABEL: test_punpckldq: 6960; ZNVER1: # %bb.0: 6961; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.25] 6962; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50] 6963; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25] 6964; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6965; ZNVER1-NEXT: retq # sched: [1:0.50] 6966 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 6967 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 6968 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 6969 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 6970 ret <8 x i32> %4 6971} 6972 6973define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 6974; GENERIC-LABEL: test_punpcklqdq: 6975; GENERIC: # %bb.0: 6976; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 6977; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 6978; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 6979; GENERIC-NEXT: retq # sched: [1:1.00] 6980; 6981; HASWELL-LABEL: test_punpcklqdq: 6982; HASWELL: # %bb.0: 6983; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 6984; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 6985; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 6986; HASWELL-NEXT: retq # sched: [7:1.00] 6987; 6988; BROADWELL-LABEL: test_punpcklqdq: 6989; BROADWELL: # %bb.0: 6990; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 6991; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00] 6992; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 6993; BROADWELL-NEXT: retq # sched: [7:1.00] 6994; 6995; SKYLAKE-LABEL: test_punpcklqdq: 6996; SKYLAKE: # %bb.0: 6997; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 6998; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 6999; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 7000; SKYLAKE-NEXT: retq # sched: [7:1.00] 7001; 7002; SKX-LABEL: test_punpcklqdq: 7003; SKX: # %bb.0: 7004; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 7005; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 7006; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 7007; SKX-NEXT: retq # sched: [7:1.00] 7008; 7009; ZNVER1-LABEL: test_punpcklqdq: 7010; ZNVER1: # %bb.0: 7011; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.25] 7012; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:0.50] 7013; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 7014; ZNVER1-NEXT: retq # sched: [1:0.50] 7015 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 7016 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 7017 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 7018 %4 = add <4 x i64> %1, %3 7019 ret <4 x i64> %4 7020} 7021 7022define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 7023; GENERIC-LABEL: test_punpcklwd: 7024; GENERIC: # %bb.0: 7025; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] 7026; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] 7027; GENERIC-NEXT: retq # sched: [1:1.00] 7028; 7029; HASWELL-LABEL: test_punpcklwd: 7030; HASWELL: # %bb.0: 7031; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] 7032; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] 7033; HASWELL-NEXT: retq # sched: [7:1.00] 7034; 7035; BROADWELL-LABEL: test_punpcklwd: 7036; BROADWELL: # %bb.0: 7037; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] 7038; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [7:1.00] 7039; BROADWELL-NEXT: retq # sched: [7:1.00] 7040; 7041; SKYLAKE-LABEL: test_punpcklwd: 7042; SKYLAKE: # %bb.0: 7043; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] 7044; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] 7045; SKYLAKE-NEXT: retq # sched: [7:1.00] 7046; 7047; SKX-LABEL: test_punpcklwd: 7048; SKX: # %bb.0: 7049; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] 7050; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] 7051; SKX-NEXT: retq # sched: [7:1.00] 7052; 7053; ZNVER1-LABEL: test_punpcklwd: 7054; ZNVER1: # %bb.0: 7055; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:0.25] 7056; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:0.50] 7057; ZNVER1-NEXT: retq # sched: [1:0.50] 7058 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 7059 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 7060 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 7061 ret <16 x i16> %3 7062} 7063 7064define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 7065; GENERIC-LABEL: test_pxor: 7066; GENERIC: # %bb.0: 7067; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7068; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 7069; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 7070; GENERIC-NEXT: retq # sched: [1:1.00] 7071; 7072; HASWELL-LABEL: test_pxor: 7073; HASWELL: # %bb.0: 7074; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7075; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 7076; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 7077; HASWELL-NEXT: retq # sched: [7:1.00] 7078; 7079; BROADWELL-LABEL: test_pxor: 7080; BROADWELL: # %bb.0: 7081; BROADWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7082; BROADWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 7083; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 7084; BROADWELL-NEXT: retq # sched: [7:1.00] 7085; 7086; SKYLAKE-LABEL: test_pxor: 7087; SKYLAKE: # %bb.0: 7088; SKYLAKE-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7089; SKYLAKE-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 7090; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7091; SKYLAKE-NEXT: retq # sched: [7:1.00] 7092; 7093; SKX-LABEL: test_pxor: 7094; SKX: # %bb.0: 7095; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7096; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 7097; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7098; SKX-NEXT: retq # sched: [7:1.00] 7099; 7100; ZNVER1-LABEL: test_pxor: 7101; ZNVER1: # %bb.0: 7102; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 7103; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 7104; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 7105; ZNVER1-NEXT: retq # sched: [1:0.50] 7106 %1 = xor <4 x i64> %a0, %a1 7107 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 7108 %3 = xor <4 x i64> %1, %2 7109 %4 = add <4 x i64> %3, %a1 7110 ret <4 x i64> %4 7111} 7112 7113!0 = !{i32 1} 7114